source: pcp/src/wannier.c@ 4f1369

Last change on this file since 4f1369 was 4f1369, checked in by Frederik Heber <heber@…>, 17 years ago

Wannier-file split up: ComputeMWLF() into smaller functions

This split up was done in the SVN repository and tested there and is working. It was impossible to piece it up into smaller chunks in order to maintain "bisectability", hence the last wannier.* from the SVN rep. was used. However, there was one bug in ParseWannierFile: There is the define "type" and there was a enum variable also of name type which caused errors on reading perturbed densities in the resulting shielding values. Removing the variable completely fixed this.
Note: With regards to the shielding values of c2h4 at 24Ht the resulting values are still absolutely the same as with revision initial_commit

  • Property mode set to 100644
File size: 93.4 KB
Line 
1/** \file wannier.c
2 * Maximally Localized Wannier Functions.
3 *
4 * Contains the on function that minimises the spread of all orbitals in one rush in a parallel
5 * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to
6 * calculate the spread of a specific orbital, which may be useful in checking on the change of
7 * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex.
8 *
9 Project: ParallelCarParrinello
10 \author Frederik Heber
11 \date 2006
12
13 File: wannier.c
14 $Id: wannier.c,v 1.7 2007-10-12 15:50:38 heber Exp $
15*/
16
17#include <math.h>
18#include <gsl/gsl_math.h>
19#include <gsl/gsl_eigen.h>
20#include <gsl/gsl_matrix.h>
21#include <gsl/gsl_vector.h>
22#include <gsl/gsl_complex.h>
23#include <gsl/gsl_complex_math.h>
24#include <gsl/gsl_sort_vector.h>
25#include <gsl/gsl_heapsort.h>
26#include <gsl/gsl_blas.h>
27#include <string.h>
28
29#include "data.h"
30#include "density.h"
31#include "errors.h"
32#include "gramsch.h"
33#include "helpers.h"
34#include "init.h"
35#include "myfft.h"
36#include "mymath.h"
37#include "output.h"
38#include "perturbed.h"
39#include "wannier.h"
40
41
42#define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread
43#define type Occupied
44
45
46/** Converts type fftw_complex to gsl_complex.
47 * \param a complex number
48 * \return b complex number
49 */
50gsl_complex convertComplex (fftw_complex a) {
51 return gsl_complex_rect(c_re(a),c_im(a));
52}
53
54/** "merry go round" implementation for parallel index ordering.
55 * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is
56 * performed which generates once each possible pairing.
57 * \param *top index array 1
58 * \param *bot index array 2
59 * \param m N/2, where N is the matrix row/column dimension
60 * \note taken from [Golub, Matrix computations, 1989, p451]
61 */
62void MerryGoRoundIndices(int *top, int *bot, int m)
63{
64 int *old_top, *old_bot;
65 int k;
66 old_top = (int *) Malloc(sizeof(int)*m, "music: old_top");
67 old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot");
68/* fprintf(stderr,"oldtop\t");
69 for (k=0;k<m;k++)
70 fprintf(stderr,"%i\t", top[k]);
71 fprintf(stderr,"\n");
72 fprintf(stderr,"oldbot\t");
73 for (k=0;k<m;k++)
74 fprintf(stderr,"%i\t", bot[k]);
75 fprintf(stderr,"\n");*/
76 // first copy arrays
77 for (k=0;k<m;k++) {
78 old_top[k] = top[k];
79 old_bot[k] = bot[k];
80 }
81 // then let the music play
82 for (k=0;k<m;k++) {
83 if (k==1)
84 top[k] = old_bot[0];
85 else if (k > 1)
86 top[k] = old_top[k-1];
87 if (k==m-1)
88 bot[k] = old_top[k];
89 else
90 bot[k] = old_bot[k+1];
91 }
92/* fprintf(stderr,"top\t");
93 for (k=0;k<m;k++)
94 fprintf(stderr,"%i\t", top[k]);
95 fprintf(stderr,"\n");
96 fprintf(stderr,"bot\t");
97 for (k=0;k<m;k++)
98 fprintf(stderr,"%i\t", bot[k]);
99 fprintf(stderr,"\n");*/
100 // and finito
101 Free(old_top, "MerryGoRoundIndices: old_top");
102 Free(old_bot, "MerryGoRoundIndices: old_bot");
103}
104
105/** merry-go-round for matrix columns.
106 * The trick here is that we must be aware of multiple rotations per process, thus only some of the
107 * whole lot of local columns get sent/received, most of them are just shifted via exchanging the various
108 * pointers to the matrix columns within the local array.
109 * \param comm communicator for circulation
110 * \param *Aloc local array of columns
111 * \param Num entries per column
112 * \param max_rounds number of column pairs in \a *Around
113 * \param k offset for tag
114 * \param tagS0 MPI tag for sending left column
115 * \param tagS1 MPI tag for sending right column
116 * \param tagR0 MPI tag for receiving left column
117 * \param tagR1 MPI tag for receiving right column
118 */
119void MerryGoRoundColumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) {
120 //double *A_locS1, *A_locS2; // local columns of A[k]
121 //double *A_locR1, *A_locR2; // local columns of A[k]
122 MPI_Request requestS0, requestS1, requestR0, requestR1;
123 MPI_Status status;
124 int ProcRank, ProcNum;
125 int l;
126 MPI_Comm_size (comm, &ProcNum);
127 MPI_Comm_rank (comm, &ProcRank);
128 double *Abuffer1, *Abuffer2; // mark the columns that are circulated
129
130 //fprintf(stderr,"shifting...");
131 if (ProcRank == 0) {
132 if (max_rounds > 1) {
133 // get last left column
134 Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
135 MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
136 } else {
137 // get right column
138 Abuffer1 = Aloc[1]; // note down the free column
139 MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, tagS1+2*k, comm, &requestS0);
140 }
141
142 //fprintf(stderr,"...left columns...");
143 for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right
144 Aloc[l] = Aloc[l-2];
145
146 if (max_rounds > 1) {
147 //fprintf(stderr,"...first right...");
148 Aloc[2] = Aloc[1]; // get first right column
149 }
150
151 //fprintf(stderr,"...right columns...");
152 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
153 Aloc[l] = Aloc[l+2];
154
155 //fprintf(stderr,"...last right...");
156 Aloc[(2*max_rounds-1)] = Abuffer1;
157 MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
158
159 } else if (ProcRank == ProcNum-1) {
160 //fprintf(stderr,"...first right...");
161 // get first right column
162 Abuffer2 = Aloc[1]; // note down the free column
163 MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
164
165 //fprintf(stderr,"...right columns...");
166 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
167 Aloc[(l)] = Aloc[(l+2)];
168
169 //fprintf(stderr,"...last right...");
170 Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column
171
172 //fprintf(stderr,"...left columns...");
173 for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
174 Aloc[(l)] = Aloc[(l-2)];
175
176 //fprintf(stderr,"...first left...");
177// if (max_rounds > 1)
178 Aloc[0] = Abuffer2; // get first left column
179 MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
180
181 } else {
182 // get last left column
183 MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
184 Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
185
186 //fprintf(stderr,"...first right...");
187 // get first right column
188 MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
189 Abuffer2 = Aloc[1]; // note down the free column
190
191 //fprintf(stderr,"...left columns...");
192 for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
193 Aloc[(l)] = Aloc[(l-2)];
194
195 //fprintf(stderr,"...right columns...");
196 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
197 Aloc[(l)] = Aloc[(l+2)];
198
199 //fprintf(stderr,"...first left...");
200 Aloc[0] = Abuffer1; // get first left column
201 MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
202
203 //fprintf(stderr,"...last right...");
204 Aloc[(2*max_rounds-1)] = Abuffer2;
205 MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
206 }
207
208 //fprintf(stderr,"...waiting...");
209 if (ProcRank != ProcNum-1)
210 MPI_Wait(&requestS0, &status);
211 if (ProcRank != 0) // first left column
212 MPI_Wait(&requestR0, &status);
213 if (ProcRank != 0)
214 MPI_Wait(&requestS1, &status);
215 if (ProcRank != ProcNum-1)
216 MPI_Wait(&requestR1, &status);
217 //fprintf(stderr,"...done\n");
218}
219
220/** By testing of greatest common divisor of the matrix rows (\a AllocNum) finds suitable parallel cpu group.
221 * \param *P Problem at hand
222 * \param AllocNum number of rows in matrix
223 * \return address of MPI communicator
224 */
225#ifdef HAVE_INLINE
226inline MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
227#else
228MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
229#endif
230{
231 MPI_Comm *comm = &P->Par.comm_ST;
232
233 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi);
234 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Jacobi diagonalization is done parallely by ", P->Par.me);
235 if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) { // all parallel
236 comm = &P->Par.comm_ST;
237 //if (P->Call.out[ReadOut]) fprintf(stderr,"all\n");
238 } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first
239 if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
240 comm = &P->Par.comm_ST_Psi;
241 //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
242 } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
243 comm = &P->Par.comm_ST_PsiT;
244 //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
245 }
246 } else {
247 if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
248 comm = &P->Par.comm_ST_PsiT;
249 //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
250 } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
251 comm = &P->Par.comm_ST_Psi;
252 //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
253 }
254 }
255 return comm;
256}
257
258/** Allocates and fills Lookup table for sin/cos values at each grid node.
259 * \param ***cos_table pointer to two-dimensional lookup table for cosine values
260 * \param ***sin_table pointer to two-dimensional lookup table for sine values
261 * \param *N array with number of nodes per \a NDIM axis
262 */
263void CreateSinCosLookupTable(double ***cos_table, double ***sin_table, int *N)
264{
265 int i, j;
266 double argument;
267 double **cos_lookup, **sin_lookup;
268
269 // create lookup table for sin/cos values
270 cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup");
271 sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup");
272 for (i=0;i<NDIM;i++) {
273 // allocate memory
274 cos_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: cos_lookup");
275 sin_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: sin_lookup");
276
277 // reset arrays
278 SetArrayToDouble0(cos_lookup[i],N[i]);
279 SetArrayToDouble0(sin_lookup[i],N[i]);
280
281 // create lookup values
282 for (j=0;j<N[i];j++) {
283 argument = 2*PI/(double)N[i]*(double)j;
284 cos_lookup[i][j] = cos(argument);
285 sin_lookup[i][j] = sin(argument);
286 }
287 }
288 *cos_table = cos_lookup;
289 *sin_table = sin_lookup;
290}
291
292/** Frees memory allocated during CreateSinCosLookupTable().
293 * \param ***cos_lookup pointer to two-dimensional lookup table for cosine values
294 * \param ***sin_lookup pointer to two-dimensional lookup table for sine values
295 */
296void FreeSinCosLookupTable(double **cos_lookup, double **sin_lookup)
297{
298 int i;
299 // free lookups
300 for (i=0;i<NDIM;i++) {
301 Free(cos_lookup[i], "FreeSinCosLookupTable: cos_lookup[i]");
302 Free(sin_lookup[i], "FreeSinCosLookupTable: sin_lookup[i]");
303 }
304 Free(cos_lookup, "FreeSinCosLookupTable: cos_lookup");
305 Free(sin_lookup, "FreeSinCosLookupTable: sin_lookup");
306}
307
308/** Fills the entries of the six variance matrices.
309 * These matrices are parallely diagonalized during Wannier Localization. They are calculated from the
310 * wave function and by diagonalization one obtains the unitary transformation with which the Psis are
311 * treated afterwards.
312 * \param *P Problem at hand
313 * \param AllocNum number of rows/columns
314 * \param **A pointer to variance matrices
315 * \sa ComputeMLWF() - master function.
316 */
317void FillHigherOrderRealMomentsMatrices(struct Problem *P, int AllocNum, gsl_matrix **A)
318{
319 struct Lattice *Lat = &P->Lat;
320 struct RunStruct *R = &P->R;
321 struct Psis *Psi = &Lat->Psi;
322 struct LatticeLevel *Lev0 = R->Lev0;
323 struct LatticeLevel *LevS = R->LevS;
324 struct Density *Dens0 = Lev0->Dens;
325 struct OnePsiElement *OnePsiA, *OnePsiB, *LOnePsiB;
326 struct fft_plan_3d *plan = Lat->plan;
327 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
328 fftw_real *PsiCR = (fftw_real *)PsiC;
329 fftw_complex *work = Dens0->DensityCArray[Temp2Density];
330 fftw_real **HGcR = &Dens0->DensityArray[HGDensity]; // use HGDensity, 4x Gap..Density, TempDensity as a storage array
331 fftw_complex **HGcRC = (fftw_complex**)HGcR;
332 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
333 fftw_real **HGcR2 = (fftw_real**)HGcR2C;
334 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
335 MPI_Status status;
336 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
337 int n[NDIM],n0,i0,iS, Index;
338 int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
339 int N0 = LevS->Plan0.plan->local_nx;
340 int *N = LevS->Plan0.plan->N;
341 const int NUpx = LevS->NUp[0];
342 const int NUpy = LevS->NUp[1];
343 const int NUpz = LevS->NUp[2];
344 double argument, PsiAtNode;
345 int e,g,i,j,k,l,m,p,u;
346 double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0;
347 double **cos_lookup = NULL,**sin_lookup = NULL;
348
349 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me);
350
351 debug(P, "Creating Lookup Table");
352 CreateSinCosLookupTable(&cos_lookup, &sin_lookup, N);
353
354 debug(P, "Calculating each entry of variance matrices");
355 l=-1; // to access U matrix element (0..Num-1)
356 // fill the matrices
357 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
358 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
359 if (OnePsiA->PsiType == type) { // drop all but occupied ones
360 l++; // increase l if it is non-extra wave function
361 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
362 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
363 else
364 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
365
366 //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0);
367 if (LPsiDatA != NULL) {
368 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
369 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
370 for (n0=0;n0<N0;n0++)
371 for (n[1]=0;n[1]<N[1];n[1]++)
372 for (n[2]=0;n[2]<N[2];n[2]++) {
373 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
374 iS = n[2]+N[2]*(n[1]+N[1]*n0);
375 n[0] = n0 + LevS->Plan0.plan->start_nx;
376 for (k=0;k<max_operators;k+=2) {
377 e = k/2;
378 argument = 2.*PI/(double)(N[e])*(double)(n[e]);
379 PsiAtNode = PsiCR[i0] /LevS->MaxN;
380 // check lookup
381 if (!l) // perform check on first wave function only
382 if ((fabs(cos(argument) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(argument) - sin_lookup[e][n[e]]) > MYEPSILON)) {
383 Error(SomeError, "Lookup table does not match real value!");
384 }
385 HGcR[k][iS] = cos_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
386 HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */
387 HGcR[k+1][iS] = sin_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
388 HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
389 }
390 }
391 for (u=0;u<max_operators;u++) {
392 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work);
393 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work);
394 }
395 }
396 m = -1; // to access U matrix element (0..Num-1)
397 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
398 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
399 if (OnePsiB->PsiType == type) { // drop all but occupied ones
400 m++; // increase m if it is non-extra wave function
401 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
402 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
403 else
404 LOnePsiB = NULL;
405 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
406 RecvSource = OnePsiB->my_color_comm_ST_Psi;
407 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
408 LPsiDatB=LevS->LPsi->TempPsi;
409 } else { // .. otherwise send it to all other processes (Max_me... - 1)
410 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
411 if (p != OnePsiB->my_color_comm_ST_Psi)
412 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
413 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
414 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
415
416 for (u=0;u<max_operators;u++) {
417 a_ij = 0;
418 b_ij = 0;
419 if (LPsiDatA != NULL) { // calculate, reduce and send to all ...
420 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u);
421 g=0;
422 if (LevS->GArray[0].GSq == 0.0) {
423 Index = LevS->GArray[g].Index;
424 a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im);
425 b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im);
426 g++;
427 }
428 for (; g < LevS->MaxG; g++) {
429 Index = LevS->GArray[g].Index;
430 a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im);
431 b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im);
432 } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...)
433 // sum up elements from all coefficients sharing processes
434 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
435 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
436 a_ij = A_ij;
437 b_ij = B_ij;
438 // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!)
439 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
440 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
441 } else { // receive ...
442 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
443 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
444 }
445 // ... and store
446 //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij);
447 //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij);
448 gsl_matrix_set(A[u], l, m, A_ij);
449 gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m));
450 }
451 }
452 }
453 }
454 }
455 // reset extra entries
456 for (u=0;u<=max_operators;u++) {
457 for (i=Num;i<AllocNum;i++)
458 for (j=0;j<AllocNum;j++)
459 gsl_matrix_set(A[u], i,j, 0.);
460 for (i=Num;i<AllocNum;i++)
461 for (j=0;j<AllocNum;j++)
462 gsl_matrix_set(A[u], j,i, 0.);
463 }
464 FreeSinCosLookupTable(cos_lookup, sin_lookup);
465
466/*// print A matrices for debug
467 if (P->Par.me == 0)
468 for (u=0;u<max_operators+1;u++) {
469 fprintf(stderr, "A[%i] = \n",u);
470 for (i=0;i<Num;i++) {
471 for (j=0;j<Num;j++)
472 fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j));
473 fprintf(stderr, "\n");
474 }
475 }
476*/
477}
478
479/** Calculates reciprocal second order moments of each PsiType#Occupied orbital.
480 * First order is zero due to wave function being real (symmetry condition).
481 * \param *P Problem at hand
482 */
483void CalculateSecondOrderReciprocalMoment(struct Problem *P)
484{
485 struct Lattice *Lat = &P->Lat;
486 struct RunStruct *R = &P->R;
487 struct FileData *F = &P->Files;
488 struct Psis *Psi = &Lat->Psi;
489 struct LatticeLevel *LevS = R->LevS;
490 double result, Result;
491 fftw_complex *LPsiDatA=NULL;
492 struct OnePsiElement *OnePsiA;
493 int i,j,l,g;
494 char spin[12], suffix[18];
495
496 switch (Lat->Psi.PsiST) {
497 case SpinDouble:
498 strcpy(suffix,".recispread.csv");
499 strcpy(spin,"SpinDouble");
500 break;
501 case SpinUp:
502 strcpy(suffix,".recispread_up.csv");
503 strcpy(spin,"SpinUp");
504 break;
505 case SpinDown:
506 strcpy(suffix,".recispread_down.csv");
507 strcpy(spin,"SpinDown");
508 break;
509 }
510 if(P->Par.me_comm_ST == 0) {
511 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Calculating reciprocal moments ...\n",P->Par.me);
512 if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
513 OpenFile(P, &F->ReciSpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
514 else if (F->ReciSpreadFile == NULL) // re-op,18en if not first level and not opened yet (or closed from ParseWannierFile)
515 OpenFile(P, &F->ReciSpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
516 if (F->ReciSpreadFile == NULL) {
517 Error(SomeError,"ComputeMLWF: Error opening Reciprocal spread File!\n");
518 } else {
519 fprintf(F->ReciSpreadFile,"===== Reciprocal Spreads of type %s ==========================================================================\n", spin);
520 }
521 }
522
523 // integrate second order moment
524 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
525 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
526 if (OnePsiA->PsiType == type) { // drop all but occupied ones
527 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
528 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
529 else
530 LPsiDatA = NULL;
531
532 if (LPsiDatA != NULL) {
533 if (P->Par.me_comm_ST == 0)
534 fprintf(F->ReciSpreadFile,"Psi%d_Lev%d\t", Psi->AllPsiStatus[l].MyGlobalNo, R->LevSNo);
535 for (i=0;i<NDIM;i++) {
536 for (j=0;j<NDIM;j++) {
537 result = 0.;
538 g = 0;
539 if (LevS->GArray[0].GSq == 0.0) {
540 result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*(LPsiDatA[0].re * LPsiDatA[0].re);
541 g++;
542 }
543 for (;g<LevS->MaxG;g++)
544 result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*2.*(LPsiDatA[g].re * LPsiDatA[g].re + LPsiDatA[g].im * LPsiDatA[g].im);
545 //result *= Lat->Volume/LevS->MaxG;
546 MPI_Allreduce ( &result, &Result, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
547 if (P->Par.me_comm_ST == 0)
548 fprintf(F->ReciSpreadFile,"%2.5lg ", Result);
549 }
550 }
551 if (P->Par.me_comm_ST == 0)
552 fprintf(F->ReciSpreadFile,"\n");
553 }
554 }
555 }
556 if(P->Par.me_comm_ST == 0) {
557 fprintf(F->ReciSpreadFile,"====================================================================================================================\n\n");
558 fflush(F->ReciSpreadFile);
559 }
560}
561
562/** Given the unitary matrix the transformation is performed on the Psis.
563 * \param *P Problem at hand
564 * \param *U gsl matrix containing the transformation matrix
565 * \param Num dimension parameter for the matrix, i.e. number of wave functions
566 */
567void UnitaryTransformationOnWavefunctions(struct Problem *P, gsl_matrix *U, int Num)
568{
569 struct Lattice *Lat = &P->Lat;
570 struct RunStruct *R = &P->R;
571 struct Psis *Psi = &Lat->Psi;
572 struct LatticeLevel *LevS = R->LevS;
573 MPI_Status status;
574 struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB;
575 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
576 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
577 int g,i,j,l,k,m,p;
578
579 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation of all wave functions according to U\n",P->Par.me);
580
581 Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here
582 fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer");
583
584 for (l=0;l<Num;l++) // allocate for each local wave function
585 coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l];
586
587 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me);
588 l=-1; // to access U matrix element (0..Num-1)
589 k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1)
590 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
591 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
592 if (OnePsiA->PsiType == type) { // drop all but occupied ones
593 l++; // increase l if it is occupied wave function
594 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local?
595 k++; // increase k only if it is a local, non-extra orbital wave function
596 LPsiDatA = (fftw_complex *) coeffs_buffer[k]; // new coeffs first go to copy buffer, old ones must not be overwritten yet
597 SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG); // zero buffer part
598 } else
599 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
600
601 m = -1; // to access U matrix element (0..Num-1)
602 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
603 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
604 if (OnePsiB->PsiType == type) { // drop all but occupied ones
605 m++; // increase m if it is occupied wave function
606 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
607 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
608 else
609 LOnePsiB = NULL;
610 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
611 RecvSource = OnePsiB->my_color_comm_ST_Psi;
612 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
613 LPsiDatB=LevS->LPsi->TempPsi;
614 } else { // .. otherwise send it to all other processes (Max_me... - 1)
615 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
616 if (p != OnePsiB->my_color_comm_ST_Psi)
617 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
618 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
619 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
620
621 if (LPsiDatA != NULL) {
622 double tmp = gsl_matrix_get(U,l,m);
623 g=0;
624 if (LevS->GArray[0].GSq == 0.0) {
625 LPsiDatA[g].re += LPsiDatB[g].re * tmp;
626 LPsiDatA[g].im += LPsiDatB[g].im * tmp;
627 g++;
628 }
629 for (; g < LevS->MaxG; g++) {
630 LPsiDatA[g].re += LPsiDatB[g].re * tmp;
631 LPsiDatA[g].im += LPsiDatB[g].im * tmp;
632 }
633 }
634 }
635 }
636 }
637 }
638
639 //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me);
640 // now, as all wave functions are updated, swap the buffer
641 l = -1;
642 for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function
643 if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {
644 l++;
645 //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo);
646 LPsiDatA = (fftw_complex *)coeffs_buffer[l];
647 LPsiDatB = LevS->LPsi->LocalPsi[l];
648 for (g=0;g<LevS->MaxG;g++) {
649 LPsiDatB[g].re = LPsiDatA[g].re;
650 LPsiDatB[g].im = LPsiDatA[g].im;
651 }
652 // recalculating non-local form factors which are coefficient dependent!
653 CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo);
654 }
655 }
656 // and free allocated buffer memory
657 Free(coeffs_buffer, "UnitaryTransformationOnWavefunctions: coeffs_buffer");
658}
659
660/** Changes Wannier Centres according to RunStruct#CommonWannier.
661 * \param *P Problem at hand.
662 * \param Num number of Psis
663 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
664 * \param *WannierSpread array with wannier spread per wave function
665 */
666void ChangeWannierCentres(struct Problem *P, int Num, double **WannierCentre, double *WannierSpread)
667{
668 struct RunStruct *R = &P->R;
669 struct Lattice *Lat = &P->Lat;
670 struct LatticeLevel *LevS = R->LevS;
671 int *marker, **group;
672 int partner[Num];
673 int i,j,l,k;
674 int totalflag, flag;
675 double q[NDIM], center[NDIM];
676 double Spread;
677 int *N = LevS->Plan0.plan->N;
678
679 switch (R->CommonWannier) {
680 case 4:
681 debug(P,"Shifting each Wannier centers to cell center");
682 for (j=0;j<NDIM;j++) // center point in [0,1]^3
683 center[j] = 0.5;
684 RMat33Vec3(q,Lat->RealBasis, center); // transform to real coordinates
685 for (i=0; i < Num; i++) { // go through all occupied wave functions
686 for (j=0;j<NDIM;j++) // put into Wannier centres
687 WannierCentre[i][j] = q[j];
688 }
689 break;
690 case 3:
691 debug(P,"Shifting Wannier centers individually to nearest grid point");
692 for (i=0;i < Num; i++) { // go through all wave functions
693 RMat33Vec3(q, Lat->ReciBasis, WannierCentre[i]);
694 for (j=0;j<NDIM;j++) { // Recibasis is not true inverse but times 2.*PI
695 q[j] *= (double)N[j]/(2.*PI);
696
697 //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp));
698 if (fabs((double)floor(q[j]) - q[j]) < fabs((double)ceil(q[j]) - q[j]))
699 q[j] = floor(q[j])/(double)N[j];
700 else
701 q[j] = ceil(q[j])/(double)N[j];
702 }
703 RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
704 }
705 break;
706 case 2:
707 debug(P,"Combining individual orbitals according to spread.");
708 //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me);
709 debug(P,"finding partners");
710 marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker");
711 group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group");
712 for (l=0;l<Num;l++) {
713 group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]"); // each must group must have one more as end marker
714 for (k=0;k<=Num;k++)
715 group[l][k] = -1; // reset partner group
716 }
717 for (k=0;k<Num;k++)
718 partner[k] = 0;
719 debug(P,"mem allocated");
720 // go for each orbital through every other, check distance against the sum of both spreads
721 // if smaller add to group of this orbital
722 for (l=0;l<Num;l++) {
723 j=0; // index for partner group
724 for (k=0;k<Num;k++) { // check this against l
725 Spread = 0.;
726 for (i=0;i<NDIM;i++) {
727 //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]);
728 Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]);
729 }
730 Spread = sqrt(Spread); // distance in Spread
731 //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]);
732 if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread
733 group[l][j++] = k; // add k to group of l
734 partner[l]++;
735 //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l);
736 }
737 }
738 }
739
740 // consistency, for each orbital check if this orbital is also in the group of each referred orbital
741 debug(P,"checking consistency");
742 totalflag = 1;
743 for (l=0;l<Num;l++) // checking l's group
744 for (k=0;k<Num;k++) { // k is partner index
745 if (group[l][k] != -1) { // if current index k is a partner
746 flag = 0;
747 for(j=0;j<Num;j++) { // go through each entry in l partner's partner group if l exists
748 if ((group[ group[l][k] ][j] == l))
749 flag = 1;
750 }
751 //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]);
752 if (totalflag == 1) totalflag = flag;
753 }
754 }
755 // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres
756 debug(P,"weight and calculate new centers for partner groups");
757 for (l=0;l<=Num;l++)
758 marker[l] = 1;
759 if (totalflag) {
760 for (l=0;l<Num;l++) { // go through each orbital
761 if (marker[l] != 0) { // if it hasn't been reweighted
762 marker[l] = 0;
763 for (i=0;i<NDIM;i++)
764 q[i] = 0.;
765 j = 0;
766 while (group[l][j] != -1) {
767 marker[group[l][j]] = 0;
768 for (i=0;i<NDIM;i++) {
769 //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]);
770 q[i] += WannierCentre[ group[l][j] ][i];
771 }
772 j++;
773 }
774 //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j);
775 for (i=0;i<NDIM;i++) {// weight by number of elements in partner group
776 q[i] /= (double)(j);
777 }
778
779 // put WannierCentre into own and all partners'
780 for (i=0;i<NDIM;i++)
781 WannierCentre[l][i] = q[i];
782 j = 0;
783 while (group[l][j] != -1) {
784 for (i=0;i<NDIM;i++)
785 WannierCentre[group[l][j]][i] = q[i];
786 j++;
787 }
788 }
789 }
790 }
791 if (P->Call.out[StepLeaderOut]) {
792 fprintf(stderr,"Summary:\n");
793 fprintf(stderr,"========\n");
794 for (i=0;i<Num;i++)
795 fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]);
796 }
797 debug(P,"done");
798
799 Free(marker, "ChangeWannierCentres: marker");
800 for (l=0;l<Num;l++)
801 Free(group[l], "ChangeWannierCentres: group[l]");
802 Free(group, "ChangeWannierCentres: group");
803 break;
804 case 1:
805 debug(P,"Individual orbitals are changed to center of all.");
806 for (i=0;i<NDIM;i++) // zero center of weight
807 q[i] = 0.;
808 for (k=0;k<Num;k++)
809 for (i=0;i<NDIM;i++) { // sum up all orbitals each component
810 q[i] += WannierCentre[k][i];
811 }
812 for (i=0;i<NDIM;i++) // divide by number
813 q[i] /= Num;
814 for (k=0;k<Num;k++)
815 for (i=0;i<NDIM;i++) { // put into this function's array
816 WannierCentre[k][i] = q[i];
817 }
818 break;
819 case 0:
820 default:
821 break;
822 }
823}
824
825/** From the entries of the variance matrices the spread is calculated.
826 * WannierCentres are evaluated according to Resta operator: \f$\langle X \rangle = \frac{L}{2\pi} \sum_j {\cal I} \ln{\langle \Psi_j | \exp{(i \frac{2\pi}{L} X)} | \Psi_j \rangle}\f$
827 * WannierSpread is: \f$ \sum_j \langle \Psi_j | r^2 | \Psi_j \rangle - \langle \Psi_j | r | psi_j \rangle^2\f$
828 * \param *P Problem at hand
829 * \param *A variance matrices
830 * \param old_spread first term of wannier spread
831 * \param spread second term of wannier spread
832 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
833 * \param *WannierSpread array with wannier spread per wave function
834 */
835void ComputeWannierCentresfromVarianceMatrices(struct Problem *P, gsl_matrix **A, double *spread, double *old_spread, double **WannierCentre, double *WannierSpread)
836{
837 struct Lattice *Lat = &P->Lat;
838 struct Psis *Psi = &Lat->Psi;
839 struct OnePsiElement *OnePsiA;
840 int i,j,k,l;
841 double tmp, q[NDIM];
842
843 *old_spread = 0;
844 *spread = 0;
845
846 // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital
847 i=-1;
848 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
849 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
850 if (OnePsiA->PsiType == type) { // drop all but occupied ones
851 i++; // increase l if it is occupied wave function
852 //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i);
853
854 // calculate Wannier Centre
855 for (j=0;j<NDIM;j++) {
856 q[j] = 1./(2.*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i))));
857 if (q[j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ
858 q[j] += 1.;
859 }
860 RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
861
862 // store orbital spread and centre in file
863 tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
864 - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
865 - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);
866 WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp;
867 //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]);
868 //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n",
869 //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]);
870 //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n",
871 //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]);
872
873 // gather all spreads
874 *old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U)
875 for (k=0;k<max_operators;k++)
876 *spread += pow(gsl_matrix_get(A[k],i,i),2);
877 }
878 }
879}
880
881/** Prints gsl_matrix nicely to screen.
882 * \param *P Problem at hand
883 * \param *U gsl matrix
884 * \param Num number of rows/columns
885 * \param *msg name of matrix, prepended before entries
886 */
887void PrintGSLMatrix(struct Problem *P, gsl_matrix *U, int Num, const char *msg)
888{
889 int k,l;
890 fprintf(stderr,"(%i) %s = \n",P->Par.me, msg);
891 for (k=0;k<Num;k++) {
892 for (l=0;l<Num;l++)
893 fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k));
894 fprintf(stderr,"\n");
895 }
896}
897
898/** Allocates memory for the DiagonalizationData structure
899 * \param *P Problem at hand
900 * \param DiagData pointer to structure
901 * \param Num number of rows and columns in matrix
902 * \param NumMatrices number of matrices to be simultaneously diagonalized
903 * \param extra number of extra matrices to be passively also diagonalized
904 */
905void InitDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData, int Num, int NumMatrices, int extra)
906{
907 int i;
908
909 // store integer values
910 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me);
911 DiagData->Num = Num;
912 DiagData->AllocNum = ceil((double)Num / 2. ) *2;
913 DiagData->NumMatrices = NumMatrices;
914 DiagData->extra = extra;
915
916 // determine communicator and our rank and its size
917 DiagData->comm = DetermineParallelGroupbyGCD(P,DiagData->AllocNum);
918 MPI_Comm_size (*(DiagData->comm), &(DiagData->ProcNum));
919 MPI_Comm_rank (*(DiagData->comm), &(DiagData->ProcRank));
920
921 // allocate memory
922 DiagData->U = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
923 gsl_matrix_set_identity(DiagData->U);
924
925 DiagData->A = (gsl_matrix **) Malloc((DiagData->NumMatrices+DiagData->extra) * sizeof(gsl_matrix *), "InitDiagonalization: *A");
926 for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) {
927 DiagData->A[i] = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
928 gsl_matrix_set_zero(DiagData->A[i]);
929 }
930
931 // init merry-go-round array for diagonilzation
932 DiagData->top = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: top");
933 DiagData->bot = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: bot");
934 for (i=0;i<DiagData->AllocNum/2;i++) {
935 DiagData->top[i] = 2*i;
936 DiagData->bot[i] = 2*i+1;
937 }
938/* // print starting values of index generation tables top and bot
939 fprintf(stderr,"top\t");
940 for (k=0;k<AllocNum/2;k++)
941 fprintf(stderr,"%i\t", top[k]);
942 fprintf(stderr,"\n");
943 fprintf(stderr,"bot\t");
944 for (k=0;k<AllocNum/2;k++)
945 fprintf(stderr,"%i\t", bot[k]);
946 fprintf(stderr,"\n");*/
947}
948
949/** Frees allocated memory for the DiagonalizationData structure.
950 * \param DiagData pointer to structure
951 */
952void FreeDiagonalization(struct DiagonalizationData *DiagData)
953{
954 int i;
955
956 Free(DiagData->top, "FreeDiagonalization: DiagData->top");
957 Free(DiagData->bot, "FreeDiagonalization: DiagData->bot");
958 gsl_matrix_free(DiagData->U);
959 for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++)
960 gsl_matrix_free(DiagData->A[i]);
961 Free(DiagData->A, "FreeDiagonalization: DiagData->A");
962}
963
964/** Orthogonalizing PsiType#Occupied wave functions for MD.
965 * Orthogonalizes wave functions by finding a unitary transformation such that the density remains unchanged.
966 * To this end, the overlap matrix \f$\langle \Psi_i | \Psi_j \rangle\f$ is created and diagonalised by Jacobi
967 * transformation (product of rotation matrices). The found transformation matrix is applied to all Psis.
968 * \param *P Problem at hand
969 * \note [Payne] states that GramSchmidt is not well-suited. However, this Jacobi diagonalisation is not well
970 * suited for our CG minimisation either. If one wave functions is changed in course of the minimsation
971 * procedure, in a Gram-Schmidt-Orthogonalisation only this wave function is changed (although it remains under
972 * debate whether subsequent Psis are still orthogonal to this changed wave function!), in a Jacobi-Orthogonali-
973 * stion however at least two if not all wave functions are changed. Thus inhibits our fast way of updating the
974 * density after a minimisation step -- UpdateDensityCalculation(). Thus, this routine can only be used for a
975 * final orthogonalisation of all Psis, after the CG minimisation.
976 */
977void OrthogonalizePsis(struct Problem *P)
978{
979 struct Lattice *Lat = &P->Lat;
980 struct Psis *Psi = &Lat->Psi;
981 struct LatticeLevel *LevS = P->R.LevS;
982 int Num = Psi->NoOfPsis;
983 int i,j,l;
984 struct DiagonalizationData DiagData;
985 double PsiSP;
986
987 InitDiagonalization(P, &DiagData, Num, 1, 0);
988
989 // Calculate Overlap matrix
990 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
991 CalculateOverlap(P, l, Occupied);
992 for (i=0;i<Num;i++) // fill gsl matrix with overlap values
993 for (j=0;j<Num;j++)
994 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
995 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (before)");
996
997 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (before)");
998
999 // diagonalize overlap matrix
1000 Diagonalize(P, &DiagData);
1001
1002 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (after)");
1003
1004 // apply found unitary transformation to wave functions
1005 UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
1006
1007 // update GramSch stati
1008 for (i=Psi->TypeStartIndex[Occupied];i<Psi->TypeStartIndex[UnOccupied];i++) {
1009 GramSchNormalize(P,LevS,LevS->LPsi->LocalPsi[i], 0.); // calculates square product if 0. is given...
1010 PsiSP = GramSchGetNorm2(P,LevS,LevS->LPsi->LocalPsi[i]);
1011 //if (P->Par.me_comm_ST_Psi == 0) fprintf(stderr,"(%i) PsiSP[%i] = %lg\n", P->Par.me, i, PsiSP);
1012 Psi->LocalPsiStatus[i].PsiGramSchStatus = (int)IsOrthonormal;
1013 }
1014 UpdateGramSchAllPsiStatus(P,Psi);
1015/*
1016 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
1017 CalculateOverlap(P, l, Occupied);
1018 for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1019 for (j=0;j<Num;j++)
1020 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1021 if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (after)");
1022 */
1023 // free memory
1024 FreeDiagonalization(&DiagData);
1025}
1026
1027/** Orthogonalizing PsiType#Occupied wave functions and their time derivatives for MD.
1028 * Ensures that two orthonormality condition are met for Psis:
1029 * \f$\langle \Psi_i | \Psi_j \rangle = \delta_{ij}\f$
1030 * \f$\langle \dot{\Psi_i} | \dot{\Psi_j} \rangle = \delta_{ij}\f$
1031 * \param *P Problem at hand
1032 * \note [Payne] states that GramSchmidt is not well-suited, and this stronger
1033 * condition is needed for numerical stability
1034 * \todo create and fill 1stoverlap with finite difference between Psi, OldPsi with R->Deltat
1035 */
1036void StrongOrthogonalizePsis(struct Problem *P)
1037{
1038 struct Lattice *Lat = &P->Lat;
1039 struct Psis *Psi = &Lat->Psi;
1040 int Num = Psi->NoOfPsis;
1041 int i,j,l;
1042 struct DiagonalizationData DiagData;
1043
1044 InitDiagonalization(P, &DiagData, Num, 2, 0);
1045
1046 // Calculate Overlap matrix
1047 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) {
1048 CalculateOverlap(P, l, Occupied);
1049 //Calculate1stOverlap(P, l, Occupied);
1050 }
1051 for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1052 for (j=0;j<Num;j++) {
1053 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1054 //gsl_matrix_set(DiagData.A[1],i,j,Psi->1stOverlap[i][j]);
1055 }
1056
1057 // diagonalize overlap matrix
1058 Diagonalize(P, &DiagData);
1059
1060 // apply found unitary transformation to wave functions
1061 UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
1062
1063 // free memory
1064 FreeDiagonalization(&DiagData);
1065}
1066
1067/** Uses either serial or parallel diagonalization.
1068 * \param *P Problem at hand
1069 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1070 * \sa ParallelDiagonalization(), SerialDiagonalization()
1071 */
1072void Diagonalize(struct Problem *P, struct DiagonalizationData *DiagData)
1073{
1074 if (DiagData->Num != 1) { // one- or multi-process case?
1075 if (((DiagData->AllocNum % 2) == 0) && (DiagData->ProcNum != 1) && ((DiagData->AllocNum / 2) % DiagData->ProcNum == 0)) {
1076 /*
1077 debug(P,"Testing with silly matrix");
1078 ParallelDiagonalization(P, test, Utest, 1, 0, 4, Num, comm, ProcRank, ProcNum, top, bot);
1079 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1080 PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
1081 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1082 PrintGSLMatrix(P, Utest, 4, "Utest (final)");
1083 */
1084 debug(P,"ParallelDiagonalization");
1085 ParallelDiagonalization(P, DiagData);
1086 } else {/*
1087 debug(P,"Testing with silly matrix");
1088 SerialDiagonalization(P, test, Utest, 1, 0, 4, Num, top, bot);
1089 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1090 PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
1091 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1092 PrintGSLMatrix(P, Utest, 4, "Utest (final)");
1093 */
1094 debug(P,"SerialDiagonalization");
1095 SerialDiagonalization(P, DiagData);
1096 }
1097
1098 //if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1099 //PrintGSLMatrix(P, DiagData->U, DiagData->AllocNum, "U");
1100 }
1101}
1102
1103/** Computation of Maximally Localized Wannier Functions.
1104 * Maximally localized functions are prime when evulating a Hamiltonian with
1105 * magnetic fields under periodic boundary conditions, as the common position
1106 * operator is no longer valid. These can be obtained by orbital rotations, which
1107 * are looked for iteratively and gathered in one transformation matrix, to be
1108 * later applied to the set of orbital wave functions.
1109 *
1110 * In order to obtain these, the following algorithm is applied:
1111 * -# Initialize U (identity) as the sought-for transformation matrix
1112 * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements
1113 * \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators
1114 * \f$A^{(k)}\f$
1115 * -# For each pair of indices (i,j) (i<j) do the following:
1116 * -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$
1117 * where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$
1118 * -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G
1119 * corresponding to the greatest eigenvalue, such that \f$x\geq0\f$
1120 * -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the
1121 * identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$
1122 * \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$
1123 * -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$
1124 * -# Gather the rotations in \f$U = U R\f$
1125 * -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$
1126 * -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3.
1127 * -# Apply transformations to the orbital wavefunctions \f$ | \phi_i \rangle = \sum_j U_{ij} | \phi_j \rangle\f$
1128 * -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in
1129 * OnePsiElementAddData#WannierCentre
1130 *
1131 * Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions.
1132 * Note that hereby additional memory is needed as old and transformed wave functions must be present at the same
1133 * time.
1134 *
1135 * The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index
1136 * generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices
1137 * may be diagonalized simultaneously and parallely. We use the implementation explained in
1138 * [Golub, Matrix computations, 1989, p451].
1139 *
1140 * \param *P Problem at hand
1141 */
1142void ComputeMLWF(struct Problem *P) {
1143 // variables and allocation
1144 struct Lattice *Lat = &P->Lat;
1145 struct Psis *Psi = &Lat->Psi;
1146 int i;
1147 int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
1148 double **WannierCentre;
1149 double *WannierSpread;
1150 double spread, spreadSQ;
1151 struct DiagonalizationData DiagData;
1152
1153 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me);
1154
1155 InitDiagonalization(P, &DiagData, Num, max_operators, 1);
1156
1157 // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1))
1158 //debug (P,"Calculatung Variance matrices");
1159 FillHigherOrderRealMomentsMatrices(P, DiagData.AllocNum, DiagData.A);
1160
1161 //debug (P,"Diagonalizing");
1162 Diagonalize(P, &DiagData);
1163
1164 // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle
1165 //debug (P,"Performing Unitary transformation");
1166 UnitaryTransformationOnWavefunctions(P, DiagData.U, Num);
1167
1168 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7: Compute centres and spread printout\n",P->Par.me);
1169 WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ComputeMLWF: *WannierCentre");
1170 WannierSpread = (double *) Malloc(sizeof(double)*Num, "ComputeMLWF: WannierSpread");
1171 for (i=0;i<Num;i++)
1172 WannierCentre[i] = (double *) Malloc(sizeof(double)*NDIM, "ComputeMLWF: WannierCentre");
1173
1174 //debug (P,"Computing Wannier centres from variance matrix");
1175 ComputeWannierCentresfromVarianceMatrices(P, DiagData.A, &spread, &spreadSQ, WannierCentre, WannierSpread);
1176
1177 // join Wannier orbital to groups with common centres under certain conditions
1178 //debug (P,"Changing Wannier Centres according to CommonWannier");
1179 ChangeWannierCentres(P, Num, WannierCentre, WannierSpread);
1180
1181 // write to file
1182 //debug (P,"Writing spread file");
1183 WriteWannierFile(P, spread, spreadSQ, WannierCentre, WannierSpread);
1184
1185 debug(P,"Free'ing memory");
1186 // free all remaining memory
1187 FreeDiagonalization(&DiagData);
1188 for (i=0;i<Num;i++)
1189 Free(WannierCentre[i], "ComputeMLWF: WannierCentre[i]");
1190 Free(WannierCentre, "ComputeMLWF: WannierCentre");
1191 Free(WannierSpread, "ComputeMLWF: WannierSpread");
1192}
1193
1194/** Solves directly for eigenvectors and eigenvalues of a real 2x2 matrix.
1195 * The eigenvalues are determined by \f$\det{(A-\lambda \cdot I)}\f$, where \f$I\f$ is the unit matrix and
1196 * \f$\lambda\f$ an eigenvalue. This leads to a pq-formula which is easily evaluated in the first part.
1197 *
1198 * The eigenvectors are then obtained by solving \f$A-\lambda \cdot I)x = 0\f$ for a given eigenvalue
1199 * \f$\lambda\f$. However, the eigenvector magnitudes are not specified, thus we the equation system is
1200 * still lacking such an equation. We use this fact to set either coordinate arbitrarily to 1 and then
1201 * derive the other by solving the equation system. Finally, the eigenvector is normalized.
1202 * \param *P Problem at hand
1203 * \param *A matrix whose eigenvalues/-vectors are to be found
1204 * \param *eval vector with eigenvalues
1205 * \param *evec matrix with corresponding eigenvectors in columns
1206 */
1207#ifdef HAVE_INLINE
1208inline void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
1209#else
1210void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
1211#endif
1212{
1213 double a11,a12,a21,a22;
1214 double ev[2], summand1, summand2, norm;
1215 int i;
1216 // find eigenvalues
1217 a11 = gsl_matrix_get(A,0,0);
1218 a12 = gsl_matrix_get(A,0,1);
1219 a21 = gsl_matrix_get(A,1,0);
1220 a22 = gsl_matrix_get(A,1,1);
1221 summand1 = (a11+a22)/2.;
1222 summand2 = sqrt(summand1*summand1 + a12*a21 - a11*a22);
1223 ev[0] = summand1 + summand2;
1224 ev[1] = summand1 - summand2;
1225 gsl_vector_set(eval, 0, ev[0]);
1226 gsl_vector_set(eval, 1, ev[1]);
1227 //fprintf (stderr,"(%i) ev1 %lg \t ev2 %lg\n", P->Par.me, ev1, ev2);
1228
1229 // find eigenvectors
1230 for(i=0;i<2;i++) {
1231 if (fabs(ev[i]) < MYEPSILON) {
1232 gsl_matrix_set(evec, 0,i, 0.);
1233 gsl_matrix_set(evec, 1,i, 0.);
1234 } else if (fabs(a22-ev[i]) > MYEPSILON) {
1235 norm = sqrt(1*1 + (a21*a21/(a22-ev[i])/(a22-ev[i])));
1236 gsl_matrix_set(evec, 0,i, 1./norm);
1237 gsl_matrix_set(evec, 1,i, -(a21/(a22-ev[i]))/norm);
1238 //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
1239 } else if (fabs(a12) > MYEPSILON) {
1240 norm = sqrt(1*1 + (a11-ev[i])*(a11-ev[i])/(a12*a12));
1241 gsl_matrix_set(evec, 0,i, 1./norm);
1242 gsl_matrix_set(evec, 1,i, -((a11-ev[i])/a12)/norm);
1243 //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
1244 } else {
1245 if (fabs(a11-ev[i]) > MYEPSILON) {
1246 norm = sqrt(1*1 + (a12*a12/(a11-ev[i])/(a11-ev[i])));
1247 gsl_matrix_set(evec, 0,i, -(a12/(a11-ev[i]))/norm);
1248 gsl_matrix_set(evec, 1,i, 1./norm);
1249 //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
1250 } else if (fabs(a21) > MYEPSILON) {
1251 norm = sqrt(1*1 + (a22-ev[i])*(a22-ev[i])/(a21*a21));
1252 gsl_matrix_set(evec, 0,i, -(a22-ev[i])/a21/norm);
1253 gsl_matrix_set(evec, 1,i, 1./norm);
1254 //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
1255 } else {
1256 //gsl_matrix_set(evec, 0,i, 0.);
1257 //gsl_matrix_set(evec, 1,i, 1.);
1258 fprintf (stderr,"\t evec %i undetermined\n", i);
1259 }
1260 //gsl_matrix_set(evec, 0,0, 1.);
1261 //gsl_matrix_set(evec, 1,0, 0.);
1262 //fprintf (stderr,"(%i) evec1 undetermined", P->Par.me);
1263 }
1264 }
1265}
1266
1267/** Calculates sine and cosine values for multiple matrix diagonalization.
1268 * \param *evec eigenvectors in columns
1269 * \param *eval corresponding eigenvalues
1270 * \param *c cosine to be returned
1271 * \param *s sine to be returned
1272 */
1273#ifdef HAVE_INLINE
1274inline void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
1275#else
1276void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
1277#endif
1278{
1279 int index;
1280 double x,y,r;
1281
1282 index = gsl_vector_max_index (eval); // get biggest eigenvalue
1283 //fprintf(stderr,"\t1st: %lg\t2nd: %lg --- biggest: %i\n", gsl_vector_get(eval, 0), gsl_vector_get(eval, 1), index);
1284 x = gsl_matrix_get(evec, 0, index);
1285 y = gsl_matrix_get(evec, 1, index);
1286 if (x < 0) { // ensure x>=0 so that rotation angles remain smaller Pi/4
1287 y = -y;
1288 x = -x;
1289 }
1290 //z = gsl_matrix_get(evec, 2, index) * x/fabs(x);
1291 //fprintf(stderr,"\tx %lg\ty %lg\n", x,y);
1292
1293 //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
1294 // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
1295 r = sqrt(x*x + y*y); // + z*z);
1296 if (fabs(r) > MYEPSILON) {
1297 *c = sqrt((x + r) / (2.*r));
1298 *s = y / sqrt(2.*r*(x+r)); //, -z / sqrt(2*r*(x+r)));
1299 } else {
1300 *c = 1.;
1301 *s = 0.;
1302 }
1303}
1304
1305/*
1306 * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
1307 * \param *U transformation matrix set to unity matrix
1308 * \param NumMatrices number of matrices to be diagonalized simultaneously
1309 * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
1310 * \param AllocNum number of rows/columns in matrices
1311 * \param Num number of wave functions
1312 * \param ProcRank index in group for this cpu
1313 * \param ProcNum number of cpus in group
1314 * \param *top array with top row indices (merry-go-round)
1315 * \param *bot array with bottom row indices (merry-go-round)
1316 */
1317
1318/** Simultaneous diagonalization of matrices with multiple cpus.
1319 * \param *P Problem at hand
1320 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1321 * \note this is slower given one cpu only than SerialDiagonalization()
1322 */
1323void ParallelDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
1324{
1325 struct RunStruct *R =&P->R;
1326 int tagR0, tagR1, tagS0, tagS1;
1327 int iloc, jloc;
1328 double *s_all, *c_all;
1329 int round, max_rounds;
1330 int start;
1331 int *rcounts, *rdispls;
1332 double *c, *s;
1333 int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from
1334 int left, right; // left or right neighbour for process
1335 double spread = 0., old_spread=0., Spread=0.;
1336 int i,j,k,l,m,u;
1337 int set;
1338 int it_steps; // iteration step counter
1339 double **Aloc[DiagData->NumMatrices+1], **Uloc; // local columns for one step of A[k]
1340 double *Around[DiagData->NumMatrices+1], *Uround; // all local columns for one round of A[k]
1341 double *Atotal[DiagData->NumMatrices+1], *Utotal; // all local columns for one round of A[k]
1342 double a_i, a_j;
1343 gsl_matrix *G;
1344 gsl_vector *h;
1345 gsl_vector *eval;
1346 gsl_matrix *evec;
1347 //gsl_eigen_symmv_workspace *w;
1348
1349 max_rounds = (DiagData->AllocNum / 2)/DiagData->ProcNum; // each process must perform multiple rotations per step of a set
1350 if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, DiagData->ProcRank, DiagData->ProcNum, max_rounds);
1351
1352 // allocate column vectors for interchange of columns
1353 debug(P,"allocate column vectors for interchange of columns");
1354 c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c");
1355 s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s");
1356 c_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: c_all");
1357 s_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: s_all");
1358 rcounts = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rcounts");
1359 rdispls = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rdispls");
1360
1361 // allocate eigenvector stuff
1362 debug(P,"allocate eigenvector stuff");
1363 G = gsl_matrix_calloc (2,2);
1364 h = gsl_vector_alloc (2);
1365 eval = gsl_vector_alloc (2);
1366 evec = gsl_matrix_alloc (2,2);
1367 //w = gsl_eigen_symmv_alloc(2);
1368
1369 // establish communication partners
1370 debug(P,"establish communication partners");
1371 if (DiagData->ProcRank == 0) {
1372 tagS0 = WannierALTag; // left p0 always remains left p0
1373 } else {
1374 tagS0 = (DiagData->ProcRank == DiagData->ProcNum - 1) ? WannierARTag : WannierALTag; // left p_last becomes right p_last
1375 }
1376 tagS1 = (DiagData->ProcRank == 0) ? WannierALTag : WannierARTag; // right p0 always goes into left p1
1377 tagR0 = WannierALTag; //
1378 tagR1 = WannierARTag; // first process
1379 if (DiagData->ProcRank == 0) {
1380 left = DiagData->ProcNum-1;
1381 right = 1;
1382 Lsend = 0;
1383 Rsend = 1;
1384 Lrecv = 0;
1385 Rrecv = 1;
1386 } else if (DiagData->ProcRank == DiagData->ProcNum - 1) {
1387 left = DiagData->ProcRank - 1;
1388 right = 0;
1389 Lsend = DiagData->ProcRank;
1390 Rsend = DiagData->ProcRank - 1;
1391 Lrecv = DiagData->ProcRank - 1;
1392 Rrecv = DiagData->ProcRank;
1393 } else {
1394 left = DiagData->ProcRank - 1;
1395 right = DiagData->ProcRank + 1;
1396 Lsend = DiagData->ProcRank+1;
1397 Rsend = DiagData->ProcRank - 1;
1398 Lrecv = DiagData->ProcRank - 1;
1399 Rrecv = DiagData->ProcRank+1;
1400 }
1401 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv);
1402
1403 // initialise A_loc
1404 debug(P,"initialise A_loc");
1405 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
1406 //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]");
1407 Around[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Around[k]");
1408 Atotal[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Atotal[k]");
1409 Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]");
1410 //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds];
1411
1412 for (round=0;round<max_rounds;round++) {
1413 Aloc[k][2*round] = &Around[k][DiagData->AllocNum*(2*round)];
1414 Aloc[k][2*round+1] = &Around[k][DiagData->AllocNum*(2*round+1)];
1415 for (l=0;l<DiagData->AllocNum;l++) {
1416 Aloc[k][2*round][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round));
1417 Aloc[k][2*round+1][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)+1);
1418 //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]);
1419 }
1420 }
1421 }
1422 // initialise U_loc
1423 debug(P,"initialise U_loc");
1424 //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc");
1425 Uround = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Uround");
1426 Utotal = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Utotal");
1427 Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc");
1428 //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds];
1429 for (round=0;round<max_rounds;round++) {
1430 Uloc[2*round] = &Uround[DiagData->AllocNum*(2*round)];
1431 Uloc[2*round+1] = &Uround[DiagData->AllocNum*(2*round+1)];
1432 for (l=0;l<DiagData->AllocNum;l++) {
1433 Uloc[2*round][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round));
1434 Uloc[2*round+1][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)+1);
1435 //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]);
1436 }
1437 }
1438
1439 // now comes the iteration loop
1440 debug(P,"now comes the iteration loop");
1441 it_steps = 0;
1442 do {
1443 it_steps++;
1444 //if (P->Par.me == 0) fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps);
1445 for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 staying at its place all the time
1446 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
1447 for (round = 0; round < max_rounds;round++) {
1448 start = DiagData->ProcRank * max_rounds + round;
1449 // get indices
1450 i = DiagData->top[start] < DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // minimum of the two indices
1451 iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
1452 j = DiagData->top[start] > DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // maximum of the two indices: thus j > i
1453 jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
1454 //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc);
1455
1456 // calculate rotation angle, i.e. c and s
1457 //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j);
1458 gsl_matrix_set_zero(G);
1459 for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
1460 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
1461 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
1462 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
1463 gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
1464 gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
1465
1466 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
1467 for (l=0;l<2;l++)
1468 for (m=0;m<2;m++)
1469 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
1470 }
1471 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
1472 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
1473 EigensolverFor22Matrix(P,G,eval,evec);
1474 //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
1475
1476 CalculateRotationAnglesFromEigenvalues(evec, eval, &c[round], &s[round]);
1477 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]);
1478
1479 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
1480 // V_loc = V_loc * V_small
1481 //debug(P,"apply rotation to local U");
1482 for (l=0;l<DiagData->AllocNum;l++) {
1483 a_i = Uloc[2*round+iloc][l];
1484 a_j = Uloc[2*round+jloc][l];
1485 Uloc[2*round+iloc][l] = c[round] * a_i + s[round] * a_j;
1486 Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j;
1487 }
1488 } // end of round
1489 // circulate the rotation angles
1490 //debug(P,"circulate the rotation angles");
1491 MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); // MPI_Allgather is waaaaay faster than ring circulation
1492 MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *(DiagData->comm));
1493 //m = start;
1494 for (l=0;l<DiagData->AllocNum/2;l++) { // for each process
1495 // we have V_small from process k
1496 //debug(P,"Apply V_small from other process");
1497 i = DiagData->top[l] < DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // minimum of the two indices
1498 j = DiagData->top[l] > DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // maximum of the two indices: thus j > i
1499 iloc = DiagData->top[l] < DiagData->bot[l] ? 0 : 1;
1500 jloc = DiagData->top[l] > DiagData->bot[l] ? 0 : 1;
1501 for (m=0;m<max_rounds;m++) {
1502 //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j);
1503 // apply row rotation to each A[k]
1504 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1505 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
1506 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
1507 a_i = Aloc[k][2*m+iloc][i];
1508 a_j = Aloc[k][2*m+iloc][j];
1509 Aloc[k][2*m+iloc][i] = c_all[l] * a_i + s_all[l] * a_j;
1510 Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
1511 a_i = Aloc[k][2*m+jloc][i];
1512 a_j = Aloc[k][2*m+jloc][j];
1513 Aloc[k][2*m+jloc][i] = c_all[l] * a_i + s_all[l] * a_j;
1514 Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
1515 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
1516 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
1517 }
1518 }
1519 }
1520 // apply rotation to local operator matrices
1521 // A_loc = A_loc * V_small
1522 //debug(P,"apply rotation to local operator matrices A[k]");
1523 for (m=0;m<max_rounds;m++) {
1524 start = DiagData->ProcRank * max_rounds + m;
1525 iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
1526 jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
1527 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// extra for B matrix !
1528 for (l=0;l<DiagData->AllocNum;l++) {
1529 // Columns, i and j belong to this process only!
1530 a_i = Aloc[k][2*m+iloc][l];
1531 a_j = Aloc[k][2*m+jloc][l];
1532 Aloc[k][2*m+iloc][l] = c[m] * a_i + s[m] * a_j;
1533 Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j;
1534 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]);
1535 }
1536 }
1537 }
1538 // Shuffling of these round's columns to prepare next rotation set
1539 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1540 // extract columns from A
1541 //debug(P,"extract columns from A");
1542 MerryGoRoundColumns(*(DiagData->comm), Aloc[k], DiagData->AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1);
1543
1544 }
1545 // and also for V ...
1546 //debug(P,"extract columns from U");
1547 MerryGoRoundColumns(*(DiagData->comm), Uloc, DiagData->AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1);
1548
1549
1550 // and merry-go-round for the indices too
1551 //debug(P,"and merry-go-round for the indices too");
1552 MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
1553 }
1554
1555 //fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
1556 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
1557 old_spread = Spread;
1558 spread = 0.;
1559 for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
1560 for (i=0; i < 2*max_rounds; i++) { // go through all wave functions
1561 spread += Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]*Aloc[k][i][i+DiagData->ProcRank*2*max_rounds];
1562 //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i);
1563 }
1564 }
1565 MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *(DiagData->comm));
1566 //Spread = spread;
1567 if (P->Par.me == 0) {
1568 //if(P->Call.out[ReadOut])
1569 // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier);
1570 //else
1571 //fprintf(stderr,"%2.9e\n",Spread);
1572 }
1573 // STEP 5: check change of variance
1574 } while (fabs(old_spread-Spread) >= R->EpsWannier);
1575 // end of iterative diagonalization loop: We have found our final orthogonal U!
1576
1577 // gather local parts of U into complete matrix
1578 for (l=0;l<DiagData->ProcNum;l++)
1579 rcounts[l] = DiagData->AllocNum;
1580 debug(P,"allgather U");
1581 for (round=0;round<2*max_rounds;round++) {
1582 for (l=0;l<DiagData->ProcNum;l++)
1583 rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
1584 MPI_Allgatherv(Uloc[round], DiagData->AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
1585 }
1586 for (k=0;k<DiagData->AllocNum;k++) {
1587 for(l=0;l<DiagData->AllocNum;l++) {
1588 gsl_matrix_set(DiagData->U,k,l, Utotal[l+k*DiagData->AllocNum]);
1589 }
1590 }
1591
1592 // after one set, gather A[k] from all and calculate spread
1593 for (l=0;l<DiagData->ProcNum;l++)
1594 rcounts[l] = DiagData->AllocNum;
1595 debug(P,"gather A[k] for spread");
1596 for (u=0;u<DiagData->NumMatrices+DiagData->extra;u++) {// extra for B matrix !
1597 debug(P,"A[k] all gather");
1598 for (round=0;round<2*max_rounds;round++) {
1599 for (l=0;l<DiagData->ProcNum;l++)
1600 rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
1601 MPI_Allgatherv(Aloc[u][round], DiagData->AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
1602 }
1603 for (k=0;k<DiagData->AllocNum;k++) {
1604 for(l=0;l<DiagData->AllocNum;l++) {
1605 gsl_matrix_set(DiagData->A[u],k,l, Atotal[u][l+k*DiagData->AllocNum]);
1606 }
1607 }
1608 }
1609
1610 // free eigenvector stuff
1611 gsl_vector_free(h);
1612 gsl_matrix_free(G);
1613 //gsl_eigen_symmv_free(w);
1614 gsl_vector_free(eval);
1615 gsl_matrix_free(evec);
1616
1617 // Free column vectors
1618 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
1619 Free(Atotal[k], "ParallelDiagonalization: Atotal[k]");
1620 Free(Around[k], "ParallelDiagonalization: Around[k]");
1621 }
1622 Free(Uround, "ParallelDiagonalization: Uround");
1623 Free(Utotal, "ParallelDiagonalization: Utotal");
1624 Free(c_all, "ParallelDiagonalization: c_all");
1625 Free(s_all, "ParallelDiagonalization: s_all");
1626 Free(c, "ParallelDiagonalization: c");
1627 Free(s, "ParallelDiagonalization: s");
1628 Free(rcounts, "ParallelDiagonalization: rcounts");
1629 Free(rdispls, "ParallelDiagonalization: rdispls");
1630}
1631/*
1632 * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
1633 * \param *U transformation matrix set to unity matrix
1634 * \param NumMatrices number of matrices to be diagonalized
1635 * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
1636 * \param AllocNum number of rows/columns in matrices
1637 * \param Num number of wave functions
1638 * \param *top array with top row indices (merry-go-round)
1639 * \param *bot array with bottom row indices (merry-go-round)
1640 */
1641
1642/** Simultaneous Diagonalization of variances matrices with one cpu.
1643 * \param *P Problem at hand
1644 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1645 * \note this is faster given one cpu only than ParallelDiagonalization()
1646 */
1647void SerialDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
1648{
1649 struct RunStruct *R = &P->R;
1650 gsl_matrix *G;
1651 gsl_vector *h;
1652 gsl_vector *eval;
1653 gsl_matrix *evec;
1654 //gsl_eigen_symmv_workspace *w;
1655 double *c,*s,a_i,a_j;
1656 int it_steps, set, ProcRank;
1657 int i,j,k,l,m;
1658 double spread = 0., old_spread = 0.;\
1659
1660 // allocate eigenvector stuff
1661 debug(P,"allocate eigenvector stuff");
1662 G = gsl_matrix_calloc (2,2);
1663 h = gsl_vector_alloc (2);
1664 eval = gsl_vector_alloc (2);
1665 evec = gsl_matrix_alloc (2,2);
1666 //w = gsl_eigen_symmv_alloc(2);
1667
1668 c = (double *) Malloc(sizeof(double), "ComputeMLWF: c");
1669 s = (double *) Malloc(sizeof(double), "ComputeMLWF: s");
1670 debug(P,"now comes the iteration loop");
1671 it_steps=0;
1672 do {
1673 it_steps++;
1674 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me);
1675 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps);
1676 for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
1677 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
1678 // STEP 3: for all index pairs 0<= i<j <AllocNum
1679 for (ProcRank=0;ProcRank<DiagData->AllocNum/2;ProcRank++) {
1680 // get indices
1681 i = DiagData->top[ProcRank] < DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // minimum of the two indices
1682 j = DiagData->top[ProcRank] > DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // maximum of the two indices: thus j > i
1683 //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j);
1684 // STEP 3a: Calculate G
1685 gsl_matrix_set_zero(G);
1686
1687 for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
1688 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
1689 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,i), gsl_matrix_get(DiagData->A[k],j,j),gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
1690 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,j), gsl_matrix_get(DiagData->A[k],j,i),gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
1691 gsl_vector_set(h, 0, gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
1692 gsl_vector_set(h, 1, gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
1693 //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1));
1694
1695 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
1696 for (l=0;l<2;l++)
1697 for (m=0;m<2;m++)
1698 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
1699
1700 //PrintGSLMatrix(P,G,2, "G");
1701 }
1702
1703 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
1704 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
1705 EigensolverFor22Matrix(P,G,eval,evec);
1706 //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
1707
1708 //PrintGSLMatrix(P, evec, 2, "Eigenvectors");
1709 CalculateRotationAnglesFromEigenvalues(evec, eval, c, s);
1710 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]);
1711
1712 //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j);
1713 // STEP 3d: apply rotation R to rows and columns of A^{(k)}
1714 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1715 //PrintGSLMatrix(P,A[k],AllocNum, "A (before rot)");
1716 for (l=0;l<DiagData->AllocNum;l++) {
1717 // Rows
1718 a_i = gsl_matrix_get(DiagData->A[k],i,l);
1719 a_j = gsl_matrix_get(DiagData->A[k],j,l);
1720 gsl_matrix_set(DiagData->A[k], i, l, c[0] * a_i + s[0] * a_j);
1721 gsl_matrix_set(DiagData->A[k], j, l, -s[0] * a_i + c[0] * a_j);
1722 }
1723 for (l=0;l<DiagData->AllocNum;l++) {
1724 // Columns
1725 a_i = gsl_matrix_get(DiagData->A[k],l,i);
1726 a_j = gsl_matrix_get(DiagData->A[k],l,j);
1727 gsl_matrix_set(DiagData->A[k], l, i, c[0] * a_i + s[0] * a_j);
1728 gsl_matrix_set(DiagData->A[k], l, j, -s[0] * a_i + c[0] * a_j);
1729 }
1730 //PrintGSLMatrix(P,A[k],AllocNum, "A (after rot)");
1731 }
1732 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
1733 //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (before rot)");
1734 // STEP 3e: apply U = R*U
1735 for (l=0;l<DiagData->AllocNum;l++) {
1736 a_i = gsl_matrix_get(DiagData->U,i,l);
1737 a_j = gsl_matrix_get(DiagData->U,j,l);
1738 gsl_matrix_set(DiagData->U, i, l, c[0] * a_i + s[0] * a_j);
1739 gsl_matrix_set(DiagData->U, j, l, -s[0] * a_i + c[0] * a_j);
1740 }
1741 //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (after rot)");
1742 }
1743 // and merry-go-round for the indices too
1744 //debug(P,"and merry-go-round for the indices too");
1745 if (DiagData->AllocNum > 2) MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
1746 }
1747
1748 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
1749 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
1750 old_spread = spread;
1751 spread = 0.;
1752 for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
1753 for (i=0; i < DiagData->AllocNum; i++) { // go through all wave functions
1754 spread += pow(gsl_matrix_get(DiagData->A[k],i,i),2);
1755 }
1756 }
1757 if(P->Par.me == 0) {
1758 //if(P->Call.out[ReadOut])
1759 // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier);
1760 //else
1761 //fprintf(stderr,"%2.9e\n",spread);
1762 }
1763 // STEP 5: check change of variance
1764 } while (fabs(old_spread-spread) >= R->EpsWannier);
1765 // end of iterative diagonalization loop: We have found our final orthogonal U!
1766 Free(c, "SerialDiagonalization: c");
1767 Free(s, "SerialDiagonalization: s");
1768 // free eigenvector stuff
1769 gsl_vector_free(h);
1770 gsl_matrix_free(G);
1771 //gsl_eigen_symmv_free(w);
1772 gsl_vector_free(eval);
1773 gsl_matrix_free(evec);
1774}
1775
1776/** Writes the wannier centres and spread to file.
1777 * Also puts centres from array into OnePsiElementAddData structure.
1778 * \param *P Problem at hand
1779 * \param old_spread first term of wannier spread
1780 * \param spread second term of wannier spread
1781 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
1782 * \param *WannierSpread array with wannier spread per wave function
1783 */
1784void WriteWannierFile(struct Problem *P, double spread, double old_spread, double **WannierCentre, double *WannierSpread)
1785{
1786 struct FileData *F = &P->Files;
1787 struct RunStruct *R = &P->R;
1788 struct Lattice *Lat = &P->Lat;
1789 struct Psis *Psi = &Lat->Psi;
1790 struct OnePsiElement *OnePsiA;
1791 char spin[12], suffix[18];
1792 int i,j,l;
1793
1794 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me);
1795
1796 switch (Lat->Psi.PsiST) {
1797 case SpinDouble:
1798 strcpy(suffix,".spread.csv");
1799 strcpy(spin,"SpinDouble");
1800 break;
1801 case SpinUp:
1802 strcpy(suffix,".spread_up.csv");
1803 strcpy(spin,"SpinUp");
1804 break;
1805 case SpinDown:
1806 strcpy(suffix,".spread_down.csv");
1807 strcpy(spin,"SpinDown");
1808 break;
1809 }
1810 if (P->Par.me_comm_ST == 0) {
1811 if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
1812 OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
1813 else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile)
1814 OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
1815 if (F->SpreadFile == NULL) {
1816 Error(SomeError,"WriteWannierFile: Error opening Wannier File!\n");
1817 } else {
1818 fprintf(F->SpreadFile,"#===== W A N N I E R C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin);
1819 fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n");
1820 }
1821 }
1822
1823 // put (new) WannierCentres into local ones and into file
1824 i=-1;
1825 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
1826 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
1827 if (OnePsiA->PsiType == type) { // drop all but occupied ones
1828 i++; // increase l if it is occupied wave function
1829 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
1830 for (j=0;j<NDIM;j++)
1831 Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
1832 }
1833 if (P->Par.me_comm_ST == 0)
1834 fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]);
1835 }
1836 }
1837 if (P->Par.me_comm_ST == 0) {
1838 fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n");
1839 fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread);
1840 fflush(F->SpreadFile);
1841 }
1842}
1843
1844
1845/** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre.
1846 * \param *P Problem at hand
1847 * \return 1 - success, 0 - failure
1848 */
1849int ParseWannierFile(struct Problem *P)
1850{
1851 struct Lattice *Lat = &P->Lat;
1852 struct RunStruct *R = &P->R;
1853 struct Psis *Psi = &Lat->Psi;
1854 struct OnePsiElement *OnePsiA;
1855 int i,l,j, msglen;
1856 FILE *SpreadFile;
1857 char tagname[255];
1858 char suffix[18];
1859 double WannierCentre[NDIM+1]; // combined centre and spread
1860 MPI_Status status;
1861 int signal = 0; // 1 - ok, 0 - error
1862
1863 switch (Lat->Psi.PsiST) {
1864 case SpinDouble:
1865 strcpy(suffix,".spread.csv");
1866 break;
1867 case SpinUp:
1868 strcpy(suffix,".spread_up.csv");
1869 break;
1870 case SpinDown:
1871 strcpy(suffix,".spread_down.csv");
1872 break;
1873 }
1874 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Parsing Wannier Centres from file ... \n", P->Par.me);
1875
1876 if (P->Par.me_comm_ST == 0) {
1877 if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists
1878 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1879 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1880 return 0;
1881 //Error(SomeError,"ParseWannierFile: Opening failed\n");
1882 }
1883 signal = 1;
1884 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1885 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1886 } else {
1887 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1888 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1889 if (signal == 0)
1890 return 0;
1891 }
1892 i=-1;
1893 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
1894 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
1895 if (OnePsiA->PsiType == type) { // drop all but occupied ones
1896 i++; // increase l if it is occupied wave function
1897 if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file
1898 sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo);
1899 signal = 0;
1900 if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) {
1901 //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed");
1902 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1903 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1904 return 0;
1905 }
1906 if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) {
1907 //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed");
1908 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1909 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1910 return 0;
1911 }
1912 signal = 1;
1913 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1914 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1915 } else {
1916 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1917 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1918 if (signal == 0)
1919 return 0;
1920 }
1921 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local?
1922 if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em
1923 if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
1924 Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed");
1925 //return 0;
1926 MPI_Get_count(&status, MPI_DOUBLE, &msglen);
1927 if (msglen != NDIM+1)
1928 Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count");
1929 //return 0;
1930 }
1931 if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader
1932 Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed");
1933 //return 0;
1934 // and store 'em (for all who have this Psi local)
1935 if ((P->Par.me == 0) && P->Call.out[ValueOut]) fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]);
1936 for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j];
1937 Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM];
1938 //if (P->Par.me ==0 && P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
1939 } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group
1940 if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS)
1941 Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed");
1942 //return 0;
1943 }
1944 }
1945 }
1946 if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0))
1947 fclose(SpreadFile);
1948 //fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me);
1949 return 1;
1950}
1951
1952/** Calculates the spread of orbital \a i.
1953 * Stored in OnePsiElementAddData#WannierSpread.
1954 * \param *P Problem at hand
1955 * \param i i-th wave function (note "extra" ones are not counted!)
1956 * \return spread \f$\sigma^2_{A^{(k)}}\f$
1957 */
1958double CalculateSpread(struct Problem *P, int i) {
1959 struct Lattice *Lat = &P->Lat;
1960 struct RunStruct *R = &P->R;
1961 struct Psis *Psi = &Lat->Psi;
1962 struct LatticeLevel *Lev0 = R->Lev0;
1963 struct LatticeLevel *LevS = R->LevS;
1964 struct Density *Dens0 = Lev0->Dens;
1965 struct fft_plan_3d *plan = Lat->plan;
1966 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
1967 fftw_real *PsiCR = (fftw_real *)PsiC;
1968 fftw_complex *work = Dens0->DensityCArray[Temp2Density];
1969 fftw_real **HGcR = &Dens0->DensityArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as a storage array
1970 fftw_complex **HGcRC = (fftw_complex**)HGcR;
1971 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
1972 fftw_real **HGcR2 = (fftw_real**)HGcR2C;
1973 MPI_Status status;
1974 struct OnePsiElement *OnePsiA, *LOnePsiA;
1975 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
1976 fftw_complex *LPsiDatA=NULL;
1977 int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index;
1978 N0 = LevS->Plan0.plan->local_nx;
1979 N = LevS->Plan0.plan->N;
1980 const int NUpx = LevS->NUp[0];
1981 const int NUpy = LevS->NUp[1];
1982 const int NUpz = LevS->NUp[2];
1983 double a_ij, b_ij, A_ij, B_ij;
1984 double tmp, tmp2, spread = 0;
1985 double **cos_lookup, **sin_lookup;
1986
1987 b_ij = 0;
1988
1989 CreateSinCosLookupTable(&cos_lookup,&sin_lookup,N);
1990
1991 // fill matrices
1992 OnePsiA = &Psi->AllPsiStatus[i]; // grab the desired OnePsiA
1993 if (OnePsiA->PsiType != Extra) { // drop if extra one
1994 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
1995 LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo];
1996 else
1997 LOnePsiA = NULL;
1998 if (LOnePsiA == NULL) { // if it's not local ... receive it from respective process into TempPsi
1999 RecvSource = OnePsiA->my_color_comm_ST_Psi;
2000 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status );
2001 LPsiDatA=LevS->LPsi->TempPsi;
2002 } else { // .. otherwise send it to all other processes (Max_me... - 1)
2003 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
2004 if (p != OnePsiA->my_color_comm_ST_Psi)
2005 MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT);
2006 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
2007 } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received
2008
2009 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
2010 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
2011 for (n0=0;n0<N0;n0++)
2012 for (n[1]=0;n[1]<N[1];n[1]++)
2013 for (n[2]=0;n[2]<N[2];n[2]++) {
2014 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
2015 iS = n[2]+N[2]*(n[1]+N[1]*n0);
2016 n[0] = n0 + LevS->Plan0.plan->start_nx;
2017 for (k=0;k<max_operators;k+=2) {
2018 tmp = 2*PI/(double)(N[k/2])*(double)(n[k/2]);
2019 tmp2 = PsiCR[i0] /LevS->MaxN;
2020 // check lookup
2021 if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) {
2022 fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]);
2023 Error(SomeError, "Lookup table does not match real value!");
2024 }
2025// HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
2026// HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */
2027// HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
2028// HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
2029 HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */
2030 HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */
2031 HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */
2032 HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */
2033 }
2034 }
2035 for (k=0;k<max_operators;k++) {
2036 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work);
2037 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work);
2038 }
2039
2040
2041 for (k=0;k<max_operators;k++) {
2042 a_ij = 0;
2043 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k);
2044 // sum directly in a_ij and b_ij the two desired terms
2045 g=0;
2046 if (LevS->GArray[0].GSq == 0.0) {
2047 Index = LevS->GArray[g].Index;
2048 a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im);
2049 b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im);
2050 g++;
2051 }
2052 for (; g < LevS->MaxG; g++) {
2053 Index = LevS->GArray[g].Index;
2054 a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im);
2055 b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im);
2056 } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...)
2057 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
2058 spread += pow(A_ij,2);
2059 }
2060 }
2061 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
2062
2063 // store spread in OnePsiElementAdd
2064 Psi->AddData[i].WannierSpread = B_ij - spread;
2065
2066 FreeSinCosLookupTable(cos_lookup,sin_lookup);
2067
2068 return (B_ij - spread);
2069}
Note: See TracBrowser for help on using the repository browser.