source: pcp/src/wannier.c@ 807e8a

Last change on this file since 807e8a was 724819, checked in by Frederik Heber <heber@…>, 17 years ago

incount in MPI_Pack_size() is not unsigned int as stated in the reference example, but a signed integer

  • Property mode set to 100644
File size: 99.1 KB
Line 
1/** \file wannier.c
2 * Maximally Localized Wannier Functions.
3 *
4 * Contains the on function that minimises the spread of all orbitals in one rush in a parallel
5 * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to
6 * calculate the spread of a specific orbital, which may be useful in checking on the change of
7 * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex.
8 *
9 Project: ParallelCarParrinello
10 \author Frederik Heber
11 \date 2006
12
13 File: wannier.c
14 $Id: wannier.c,v 1.7 2007-10-12 15:50:38 heber Exp $
15*/
16
17#include <math.h>
18#include <gsl/gsl_math.h>
19#include <gsl/gsl_eigen.h>
20#include <gsl/gsl_matrix.h>
21#include <gsl/gsl_vector.h>
22#include <gsl/gsl_complex.h>
23#include <gsl/gsl_complex_math.h>
24#include <gsl/gsl_sort_vector.h>
25#include <gsl/gsl_heapsort.h>
26#include <gsl/gsl_blas.h>
27#include <string.h>
28
29#include "data.h"
30#include "density.h"
31#include "errors.h"
32#include "gramsch.h"
33#include "helpers.h"
34#include "init.h"
35#include "myfft.h"
36#include "mymath.h"
37#include "output.h"
38#include "perturbed.h"
39#include "wannier.h"
40
41
42#define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread
43#define type Occupied
44
45
46/** Converts type fftw_complex to gsl_complex.
47 * \param a complex number
48 * \return b complex number
49 */
50gsl_complex convertComplex (fftw_complex a) {
51 return gsl_complex_rect(c_re(a),c_im(a));
52}
53
54/** "merry go round" implementation for parallel index ordering.
55 * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is
56 * performed which generates once each possible pairing.
57 * \param *top index array 1
58 * \param *bot index array 2
59 * \param m N/2, where N is the matrix row/column dimension
60 * \note taken from [Golub, Matrix computations, 1989, p451]
61 */
62void MerryGoRoundIndices(int *top, int *bot, int m)
63{
64 int *old_top, *old_bot;
65 int k;
66 old_top = (int *) Malloc(sizeof(int)*m, "music: old_top");
67 old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot");
68/* fprintf(stderr,"oldtop\t");
69 for (k=0;k<m;k++)
70 fprintf(stderr,"%i\t", top[k]);
71 fprintf(stderr,"\n");
72 fprintf(stderr,"oldbot\t");
73 for (k=0;k<m;k++)
74 fprintf(stderr,"%i\t", bot[k]);
75 fprintf(stderr,"\n");*/
76 // first copy arrays
77 for (k=0;k<m;k++) {
78 old_top[k] = top[k];
79 old_bot[k] = bot[k];
80 }
81 // then let the music play
82 for (k=0;k<m;k++) {
83 if (k==1)
84 top[k] = old_bot[0];
85 else if (k > 1)
86 top[k] = old_top[k-1];
87 if (k==m-1)
88 bot[k] = old_top[k];
89 else
90 bot[k] = old_bot[k+1];
91 }
92/* fprintf(stderr,"top\t");
93 for (k=0;k<m;k++)
94 fprintf(stderr,"%i\t", top[k]);
95 fprintf(stderr,"\n");
96 fprintf(stderr,"bot\t");
97 for (k=0;k<m;k++)
98 fprintf(stderr,"%i\t", bot[k]);
99 fprintf(stderr,"\n");*/
100 // and finito
101 Free(old_top, "MerryGoRoundIndices: old_top");
102 Free(old_bot, "MerryGoRoundIndices: old_bot");
103}
104
105/** merry-go-round for matrix columns.
106 * The trick here is that we must be aware of multiple rotations per process, thus only some of the
107 * whole lot of local columns get sent/received, most of them are just shifted via exchanging the various
108 * pointers to the matrix columns within the local array.
109 * \param comm communicator for circulation
110 * \param *Aloc local array of columns
111 * \param Num entries per column
112 * \param max_rounds number of column pairs in \a *Around
113 * \param k offset for tag
114 * \param tagS0 MPI tag for sending left column
115 * \param tagS1 MPI tag for sending right column
116 * \param tagR0 MPI tag for receiving left column
117 * \param tagR1 MPI tag for receiving right column
118 */
119void MerryGoRoundColumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) {
120 //double *A_locS1, *A_locS2; // local columns of A[k]
121 //double *A_locR1, *A_locR2; // local columns of A[k]
122 MPI_Request requestS0, requestS1, requestR0, requestR1;
123 MPI_Status status;
124 int ProcRank, ProcNum;
125 int l;
126 MPI_Comm_size (comm, &ProcNum);
127 MPI_Comm_rank (comm, &ProcRank);
128 double *Abuffer1, *Abuffer2; // mark the columns that are circulated
129
130 //fprintf(stderr,"shifting...");
131 if (ProcRank == 0) {
132 if (max_rounds > 1) {
133 // get last left column
134 Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
135 MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
136 } else {
137 // get right column
138 Abuffer1 = Aloc[1]; // note down the free column
139 MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, tagS1+2*k, comm, &requestS0);
140 }
141
142 //fprintf(stderr,"...left columns...");
143 for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right
144 Aloc[l] = Aloc[l-2];
145
146 if (max_rounds > 1) {
147 //fprintf(stderr,"...first right...");
148 Aloc[2] = Aloc[1]; // get first right column
149 }
150
151 //fprintf(stderr,"...right columns...");
152 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
153 Aloc[l] = Aloc[l+2];
154
155 //fprintf(stderr,"...last right...");
156 Aloc[(2*max_rounds-1)] = Abuffer1;
157 MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
158
159 } else if (ProcRank == ProcNum-1) {
160 //fprintf(stderr,"...first right...");
161 // get first right column
162 Abuffer2 = Aloc[1]; // note down the free column
163 MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
164
165 //fprintf(stderr,"...right columns...");
166 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
167 Aloc[(l)] = Aloc[(l+2)];
168
169 //fprintf(stderr,"...last right...");
170 Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column
171
172 //fprintf(stderr,"...left columns...");
173 for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
174 Aloc[(l)] = Aloc[(l-2)];
175
176 //fprintf(stderr,"...first left...");
177// if (max_rounds > 1)
178 Aloc[0] = Abuffer2; // get first left column
179 MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
180
181 } else {
182 // get last left column
183 MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
184 Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
185
186 //fprintf(stderr,"...first right...");
187 // get first right column
188 MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
189 Abuffer2 = Aloc[1]; // note down the free column
190
191 //fprintf(stderr,"...left columns...");
192 for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
193 Aloc[(l)] = Aloc[(l-2)];
194
195 //fprintf(stderr,"...right columns...");
196 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
197 Aloc[(l)] = Aloc[(l+2)];
198
199 //fprintf(stderr,"...first left...");
200 Aloc[0] = Abuffer1; // get first left column
201 MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
202
203 //fprintf(stderr,"...last right...");
204 Aloc[(2*max_rounds-1)] = Abuffer2;
205 MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
206 }
207
208 //fprintf(stderr,"...waiting...");
209 if (ProcRank != ProcNum-1)
210 MPI_Wait(&requestS0, &status);
211 if (ProcRank != 0) // first left column
212 MPI_Wait(&requestR0, &status);
213 if (ProcRank != 0)
214 MPI_Wait(&requestS1, &status);
215 if (ProcRank != ProcNum-1)
216 MPI_Wait(&requestR1, &status);
217 //fprintf(stderr,"...done\n");
218}
219
220/** By testing of greatest common divisor of the matrix rows (\a AllocNum) finds suitable parallel cpu group.
221 * \param *P Problem at hand
222 * \param AllocNum number of rows in matrix
223 * \return address of MPI communicator
224 */
225#ifdef HAVE_INLINE
226inline MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
227#else
228MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
229#endif
230{
231 MPI_Comm *comm = &P->Par.comm_ST;
232
233 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi);
234 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Jacobi diagonalization is done parallely by ", P->Par.me);
235 if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) { // all parallel
236 comm = &P->Par.comm_ST;
237 //if (P->Call.out[ReadOut]) fprintf(stderr,"all\n");
238 } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first
239 if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
240 comm = &P->Par.comm_ST_Psi;
241 //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
242 } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
243 comm = &P->Par.comm_ST_PsiT;
244 //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
245 }
246 } else {
247 if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
248 comm = &P->Par.comm_ST_PsiT;
249 //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
250 } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
251 comm = &P->Par.comm_ST_Psi;
252 //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
253 }
254 }
255 return comm;
256}
257
258/** Allocates and fills Lookup table for sin/cos values at each grid node.
259 * \param ***cos_table pointer to two-dimensional lookup table for cosine values
260 * \param ***sin_table pointer to two-dimensional lookup table for sine values
261 * \param *N array with number of nodes per \a NDIM axis
262 */
263void CreateSinCosLookupTable(double ***cos_table, double ***sin_table, int *N)
264{
265 int i, j;
266 double argument;
267 double **cos_lookup, **sin_lookup;
268
269 // create lookup table for sin/cos values
270 cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup");
271 sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup");
272 for (i=0;i<NDIM;i++) {
273 // allocate memory
274 cos_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: cos_lookup");
275 sin_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: sin_lookup");
276
277 // reset arrays
278 SetArrayToDouble0(cos_lookup[i],N[i]);
279 SetArrayToDouble0(sin_lookup[i],N[i]);
280
281 // create lookup values
282 for (j=0;j<N[i];j++) {
283 argument = 2*PI/(double)N[i]*(double)j;
284 cos_lookup[i][j] = cos(argument);
285 sin_lookup[i][j] = sin(argument);
286 }
287 }
288 *cos_table = cos_lookup;
289 *sin_table = sin_lookup;
290}
291
292/** Frees memory allocated during CreateSinCosLookupTable().
293 * \param ***cos_lookup pointer to two-dimensional lookup table for cosine values
294 * \param ***sin_lookup pointer to two-dimensional lookup table for sine values
295 */
296void FreeSinCosLookupTable(double **cos_lookup, double **sin_lookup)
297{
298 int i;
299 // free lookups
300 for (i=0;i<NDIM;i++) {
301 Free(cos_lookup[i], "FreeSinCosLookupTable: cos_lookup[i]");
302 Free(sin_lookup[i], "FreeSinCosLookupTable: sin_lookup[i]");
303 }
304 Free(cos_lookup, "FreeSinCosLookupTable: cos_lookup");
305 Free(sin_lookup, "FreeSinCosLookupTable: sin_lookup");
306}
307
308/** Fills the entries of the six variance matrices.
309 * These matrices are parallely diagonalized during Wannier Localization. They are calculated from the
310 * wave function and by diagonalization one obtains the unitary transformation with which the Psis are
311 * treated afterwards.
312 * \param *P Problem at hand
313 * \param AllocNum number of rows/columns
314 * \param **A pointer to variance matrices
315 * \sa ComputeMLWF() - master function.
316 */
317void FillHigherOrderRealMomentsMatrices(struct Problem *P, int AllocNum, gsl_matrix **A)
318{
319 struct Lattice *Lat = &P->Lat;
320 struct RunStruct *R = &P->R;
321 struct Psis *Psi = &Lat->Psi;
322 struct LatticeLevel *Lev0 = R->Lev0;
323 struct LatticeLevel *LevS = R->LevS;
324 struct Density *Dens0 = Lev0->Dens;
325 struct OnePsiElement *OnePsiA, *OnePsiB, *LOnePsiB;
326 struct fft_plan_3d *plan = Lat->plan;
327 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
328 fftw_real *PsiCR = (fftw_real *)PsiC;
329 fftw_complex *work = Dens0->DensityCArray[Temp2Density];
330 fftw_real **HGcR = &Dens0->DensityArray[HGDensity]; // use HGDensity, 4x Gap..Density, TempDensity as a storage array
331 fftw_complex **HGcRC = (fftw_complex**)HGcR;
332 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
333 fftw_real **HGcR2 = (fftw_real**)HGcR2C;
334 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
335 MPI_Status status;
336 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
337 int n[NDIM],n0,i0,iS, Index;
338 int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
339 int N0 = LevS->Plan0.plan->local_nx;
340 int *N = LevS->Plan0.plan->N;
341 const int NUpx = LevS->NUp[0];
342 const int NUpy = LevS->NUp[1];
343 const int NUpz = LevS->NUp[2];
344 double argument, PsiAtNode;
345 int e,g,i,j,k,l,m,p,u;
346 double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0;
347 double **cos_lookup = NULL,**sin_lookup = NULL;
348
349 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me);
350
351 debug(P, "Creating Lookup Table");
352 CreateSinCosLookupTable(&cos_lookup, &sin_lookup, N);
353
354 debug(P, "Calculating each entry of variance matrices");
355 l=-1; // to access U matrix element (0..Num-1)
356 // fill the matrices
357 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
358 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
359 if (OnePsiA->PsiType == type) { // drop all but occupied ones
360 l++; // increase l if it is non-extra wave function
361 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
362 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
363 else
364 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
365
366 //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0);
367 if (LPsiDatA != NULL) {
368 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
369 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
370 for (n0=0;n0<N0;n0++)
371 for (n[1]=0;n[1]<N[1];n[1]++)
372 for (n[2]=0;n[2]<N[2];n[2]++) {
373 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
374 iS = n[2]+N[2]*(n[1]+N[1]*n0);
375 n[0] = n0 + LevS->Plan0.plan->start_nx;
376 for (k=0;k<max_operators;k+=2) {
377 e = k/2;
378 argument = 2.*PI/(double)(N[e])*(double)(n[e]);
379 PsiAtNode = PsiCR[i0] /LevS->MaxN;
380 // check lookup
381 if (!l) // perform check on first wave function only
382 if ((fabs(cos(argument) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(argument) - sin_lookup[e][n[e]]) > MYEPSILON)) {
383 Error(SomeError, "Lookup table does not match real value!");
384 }
385 HGcR[k][iS] = cos_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
386 HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */
387 HGcR[k+1][iS] = sin_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
388 HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
389 }
390 }
391 for (u=0;u<max_operators;u++) {
392 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work);
393 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work);
394 }
395 }
396 m = -1; // to access U matrix element (0..Num-1)
397 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
398 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
399 if (OnePsiB->PsiType == type) { // drop all but occupied ones
400 m++; // increase m if it is non-extra wave function
401 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
402 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
403 else
404 LOnePsiB = NULL;
405 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
406 RecvSource = OnePsiB->my_color_comm_ST_Psi;
407 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
408 LPsiDatB=LevS->LPsi->TempPsi;
409 } else { // .. otherwise send it to all other processes (Max_me... - 1)
410 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
411 if (p != OnePsiB->my_color_comm_ST_Psi)
412 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
413 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
414 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
415
416 for (u=0;u<max_operators;u++) {
417 a_ij = 0;
418 b_ij = 0;
419 if (LPsiDatA != NULL) { // calculate, reduce and send to all ...
420 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u);
421 g=0;
422 if (LevS->GArray[0].GSq == 0.0) {
423 Index = LevS->GArray[g].Index;
424 a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im);
425 b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im);
426 g++;
427 }
428 for (; g < LevS->MaxG; g++) {
429 Index = LevS->GArray[g].Index;
430 a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im);
431 b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im);
432 } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...)
433 // sum up elements from all coefficients sharing processes
434 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
435 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
436 a_ij = A_ij;
437 b_ij = B_ij;
438 // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!)
439 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
440 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
441 } else { // receive ...
442 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
443 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
444 }
445 // ... and store
446 //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij);
447 //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij);
448 gsl_matrix_set(A[u], l, m, A_ij);
449 gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m));
450 }
451 }
452 }
453 }
454 }
455 // reset extra entries
456 for (u=0;u<=max_operators;u++) {
457 for (i=Num;i<AllocNum;i++)
458 for (j=0;j<AllocNum;j++)
459 gsl_matrix_set(A[u], i,j, 0.);
460 for (i=Num;i<AllocNum;i++)
461 for (j=0;j<AllocNum;j++)
462 gsl_matrix_set(A[u], j,i, 0.);
463 }
464 FreeSinCosLookupTable(cos_lookup, sin_lookup);
465
466/*// print A matrices for debug
467 if (P->Par.me == 0)
468 for (u=0;u<max_operators+1;u++) {
469 fprintf(stderr, "A[%i] = \n",u);
470 for (i=0;i<Num;i++) {
471 for (j=0;j<Num;j++)
472 fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j));
473 fprintf(stderr, "\n");
474 }
475 }
476*/
477}
478
479/** Calculates reciprocal second order moments of each PsiType#Occupied orbital.
480 * First order is zero due to wave function being real (symmetry condition).
481 * \param *P Problem at hand
482 */
483void CalculateSecondOrderReciprocalMoment(struct Problem *P)
484{
485 struct Lattice *Lat = &P->Lat;
486 struct RunStruct *R = &P->R;
487 struct FileData *F = &P->Files;
488 struct Psis *Psi = &Lat->Psi;
489 struct LatticeLevel *LevS = R->LevS;
490 double result, Result;
491 fftw_complex *LPsiDatA=NULL;
492 struct OnePsiElement *OnePsiA;
493 int i,j,l,g;
494 char spin[12], suffix[18];
495
496 switch (Lat->Psi.PsiST) {
497 case SpinDouble:
498 strcpy(suffix,".recispread.csv");
499 strcpy(spin,"SpinDouble");
500 break;
501 case SpinUp:
502 strcpy(suffix,".recispread_up.csv");
503 strcpy(spin,"SpinUp");
504 break;
505 case SpinDown:
506 strcpy(suffix,".recispread_down.csv");
507 strcpy(spin,"SpinDown");
508 break;
509 }
510 if(P->Par.me_comm_ST == 0) {
511 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Calculating reciprocal moments ...\n",P->Par.me);
512 if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
513 OpenFile(P, &F->ReciSpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
514 else if (F->ReciSpreadFile == NULL) // re-op,18en if not first level and not opened yet (or closed from ParseWannierFile)
515 OpenFile(P, &F->ReciSpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
516 if (F->ReciSpreadFile == NULL) {
517 Error(SomeError,"ComputeMLWF: Error opening Reciprocal spread File!\n");
518 } else {
519 fprintf(F->ReciSpreadFile,"===== Reciprocal Spreads of type %s ==========================================================================\n", spin);
520 }
521 }
522
523 // integrate second order moment
524 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
525 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
526 if (OnePsiA->PsiType == type) { // drop all but occupied ones
527 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
528 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
529 else
530 LPsiDatA = NULL;
531
532 if (LPsiDatA != NULL) {
533 if (P->Par.me_comm_ST == 0)
534 fprintf(F->ReciSpreadFile,"Psi%d_Lev%d\t", Psi->AllPsiStatus[l].MyGlobalNo, R->LevSNo);
535 for (i=0;i<NDIM;i++) {
536 for (j=0;j<NDIM;j++) {
537 result = 0.;
538 g = 0;
539 if (LevS->GArray[0].GSq == 0.0) {
540 result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*(LPsiDatA[0].re * LPsiDatA[0].re);
541 g++;
542 }
543 for (;g<LevS->MaxG;g++)
544 result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*2.*(LPsiDatA[g].re * LPsiDatA[g].re + LPsiDatA[g].im * LPsiDatA[g].im);
545 //result *= Lat->Volume/LevS->MaxG;
546 MPI_Allreduce ( &result, &Result, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
547 if (P->Par.me_comm_ST == 0)
548 fprintf(F->ReciSpreadFile,"%2.5lg ", Result);
549 }
550 }
551 if (P->Par.me_comm_ST == 0)
552 fprintf(F->ReciSpreadFile,"\n");
553 }
554 }
555 }
556 if(P->Par.me_comm_ST == 0) {
557 fprintf(F->ReciSpreadFile,"====================================================================================================================\n\n");
558 fflush(F->ReciSpreadFile);
559 }
560}
561
562/** Given the unitary matrix the transformation is performed on the Psis.
563 * \param *P Problem at hand
564 * \param *U gsl matrix containing the transformation matrix
565 * \param Num dimension parameter for the matrix, i.e. number of wave functions
566 */
567void UnitaryTransformationOnWavefunctions(struct Problem *P, gsl_matrix *U, int Num)
568{
569 struct Lattice *Lat = &P->Lat;
570 struct RunStruct *R = &P->R;
571 struct Psis *Psi = &Lat->Psi;
572 struct LatticeLevel *LevS = R->LevS;
573 MPI_Status status;
574 struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB;
575 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
576 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
577 int g,i,j,l,k,m,p;
578
579 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation of all wave functions according to U\n",P->Par.me);
580
581 Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here
582 fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer");
583
584 for (l=0;l<Num;l++) // allocate for each local wave function
585 coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l];
586
587 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me);
588 l=-1; // to access U matrix element (0..Num-1)
589 k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1)
590 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
591 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
592 if (OnePsiA->PsiType == type) { // drop all but occupied ones
593 l++; // increase l if it is occupied wave function
594 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local?
595 k++; // increase k only if it is a local, non-extra orbital wave function
596 LPsiDatA = (fftw_complex *) coeffs_buffer[k]; // new coeffs first go to copy buffer, old ones must not be overwritten yet
597 SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG); // zero buffer part
598 } else
599 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
600
601 m = -1; // to access U matrix element (0..Num-1)
602 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
603 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
604 if (OnePsiB->PsiType == type) { // drop all but occupied ones
605 m++; // increase m if it is occupied wave function
606 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
607 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
608 else
609 LOnePsiB = NULL;
610 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
611 RecvSource = OnePsiB->my_color_comm_ST_Psi;
612 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
613 LPsiDatB=LevS->LPsi->TempPsi;
614 } else { // .. otherwise send it to all other processes (Max_me... - 1)
615 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
616 if (p != OnePsiB->my_color_comm_ST_Psi)
617 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
618 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
619 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
620
621 if (LPsiDatA != NULL) {
622 double tmp = gsl_matrix_get(U,l,m);
623 g=0;
624 if (LevS->GArray[0].GSq == 0.0) {
625 LPsiDatA[g].re += LPsiDatB[g].re * tmp;
626 LPsiDatA[g].im += LPsiDatB[g].im * tmp;
627 g++;
628 }
629 for (; g < LevS->MaxG; g++) {
630 LPsiDatA[g].re += LPsiDatB[g].re * tmp;
631 LPsiDatA[g].im += LPsiDatB[g].im * tmp;
632 }
633 }
634 }
635 }
636 }
637 }
638
639 //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me);
640 // now, as all wave functions are updated, swap the buffer
641 l = -1;
642 for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function
643 if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {
644 l++;
645 //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo);
646 LPsiDatA = (fftw_complex *)coeffs_buffer[l];
647 LPsiDatB = LevS->LPsi->LocalPsi[l];
648 for (g=0;g<LevS->MaxG;g++) {
649 LPsiDatB[g].re = LPsiDatA[g].re;
650 LPsiDatB[g].im = LPsiDatA[g].im;
651 }
652 // recalculating non-local form factors which are coefficient dependent!
653 CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo);
654 }
655 }
656 // and free allocated buffer memory
657 Free(coeffs_buffer, "UnitaryTransformationOnWavefunctions: coeffs_buffer");
658}
659
660/** Changes Wannier Centres according to RunStruct#CommonWannier.
661 * \param *P Problem at hand.
662 */
663void ChangeWannierCentres(struct Problem *P)
664{
665 struct RunStruct *R = &P->R;
666 struct Lattice *Lat = &P->Lat;
667 struct LatticeLevel *LevS = R->LevS;
668 struct Psis *Psi = &Lat->Psi;
669 struct OnePsiElement *OnePsiA;
670 int *marker, **group;
671 int Num = Psi->NoOfPsis;
672 double **WannierCentre = NULL;
673 double *WannierSpread = NULL;
674 int partner[Num];
675 int i,j,l,k;
676 int totalflag, flag;
677 double q[NDIM], center[NDIM];
678 double Spread;
679 int *N = LevS->Plan0.plan->N;
680
681 int MPISignal = 0, mpisignal = 0;
682 int membersize, maxsize;
683 int msgsize;
684 double *buffer = NULL;
685 int position;
686 MPI_Status status;
687
688 // determine necessary pack/buffer sizes for the wannier stuff
689 MPI_Pack_size(3, MPI_DOUBLE, P->Par.comm_ST_PsiT, &membersize);
690 maxsize = membersize;
691 MPI_Pack_size(1, MPI_DOUBLE, P->Par.comm_ST_PsiT, &membersize);
692 maxsize += membersize;
693
694 // allocate memory
695 buffer = (double *) Malloc(sizeof(double)*maxsize, "ChangeWannierCentres: *buffer");
696 WannierSpread = (double *) Malloc(sizeof(double)*Num, "ChangeWannierCentres: *WannierSpread");
697 WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ChangeWannierCentres: **WannierCentre");
698 for (j=0;j<Num;j++)
699 WannierCentre[j] = (double *) Malloc(sizeof(double)*(NDIM+1), "ChangeWannierCentres: *WannierCentre[]");
700
701 // exchange wannier centers and spread among processes by going over all wave functions
702 i=-1;
703 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {
704 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
705 if (OnePsiA->PsiType == type) { // drop all but occupied ones
706 i++; // increase l if it is occupied wave function
707
708 // send and receive centres and spread (we use the MPI_Pack'ing scheme to preserve correct order of data)
709 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
710 position = 0;
711 MPI_Pack(Psi->AddData[OnePsiA->MyLocalNo].WannierCentre, 3, MPI_DOUBLE, buffer, maxsize, &position, P->Par.comm_ST_PsiT);
712 MPI_Pack(&Psi->AddData[OnePsiA->MyLocalNo].WannierSpread, 1, MPI_DOUBLE, buffer, maxsize, &position, P->Par.comm_ST_PsiT);
713 for (k=0;k<P->Par.Max_me_comm_ST_PsiT;k++)
714 if (k != P->Par.my_color_comm_ST_Psi) { // send to all other processes
715 fprintf(stderr, "(%i) Send goes from %d to %d\n", P->Par.me, k, P->Par.my_color_comm_ST_Psi);
716 if (MPI_Send(buffer, position, MPI_PACKED, k, WannierTag5, P->Par.comm_ST_PsiT) != MPI_SUCCESS) {
717 mpisignal = 1;
718 }
719 } else { // "send" to us
720 for(j=0;j<NDIM;j++) // copy to own array as well
721 WannierCentre[i][j] = Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j];
722 WannierSpread[i] = Psi->AddData[OnePsiA->MyLocalNo].WannierSpread;
723 }
724 } else {
725 fprintf(stderr, "(%i) Receive is from %d to %d\n", P->Par.me, OnePsiA->my_color_comm_ST_Psi, P->Par.my_color_comm_ST_Psi);
726 if (MPI_Recv(buffer, maxsize, MPI_PACKED, OnePsiA->my_color_comm_ST_Psi, WannierTag5, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
727 mpisignal = 1;
728 position = 0;
729 MPI_Get_count(&status, MPI_PACKED, &msgsize);
730 MPI_Unpack (buffer, msgsize, &position, WannierCentre[i], 3, MPI_DOUBLE, P->Par.comm_ST_PsiT);
731 MPI_Unpack (buffer, msgsize, &position, &WannierSpread[i], 1, MPI_DOUBLE, P->Par.comm_ST_PsiT);
732 }
733
734 // check for MPI errors
735 if (MPI_Allreduce(&mpisignal, &MPISignal, 1, MPI_INT, MPI_SUM, P->Par.comm_ST_PsiT) != MPI_SUCCESS) // allreduce signals
736 Error(SomeError,"ChangeWannierCentres: Bcast of signal failed\n");
737 if (MPISignal) { // greater 0 means there was at least one hickup somewhere
738 Error(SomeError,"ChangeWannierCentres: Scattering of Wanniers failed\n");
739 }
740 }
741 }
742 // free the buffer memory
743 Free(buffer, "ChangeWannierCentres: *buffer");
744
745 if ((P->Call.out[NormalOut]) && (1)) { // root prints gathered wannier centres for debugging
746 fprintf(stderr, "(%i) Printing gathered Wannier centres:\n", P->Par.me);
747 for(l=0;l<Num;l++) {
748 fprintf(stderr, "(%i) ", P->Par.me);
749 for(j=0;j<NDIM;j++)
750 fprintf(stderr, "%lg\t", WannierCentre[l][j]);
751 fprintf(stderr, "-\t%lg\n", WannierSpread[l]);
752 }
753 }
754
755 // change centres
756 switch (R->CommonWannier) {
757 case 4:
758 debug(P,"Shifting each Wannier centers to cell center");
759 for (j=0;j<NDIM;j++) // center point in [0,1]^3
760 center[j] = 0.5;
761 RMat33Vec3(q,Lat->RealBasis, center); // transform to real coordinates
762 for (i=0; i < Num; i++) { // go through all occupied wave functions
763 for (j=0;j<NDIM;j++) // put into Wannier centres
764 WannierCentre[i][j] = q[j];
765 }
766 break;
767 case 3:
768 debug(P,"Shifting Wannier centers individually to nearest grid point");
769 for (i=0;i < Num; i++) { // go through all wave functions
770 RMat33Vec3(q, Lat->ReciBasis, WannierCentre[i]);
771 for (j=0;j<NDIM;j++) { // Recibasis is not true inverse but times 2.*PI
772 q[j] *= (double)N[j]/(2.*PI);
773
774 //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp));
775 if (fabs((double)floor(q[j]) - q[j]) < fabs((double)ceil(q[j]) - q[j]))
776 q[j] = floor(q[j])/(double)N[j];
777 else
778 q[j] = ceil(q[j])/(double)N[j];
779 }
780 RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
781 }
782 break;
783 case 2:
784 debug(P,"Combining individual orbitals according to spread.");
785 //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me);
786 debug(P,"finding partners");
787 marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker");
788 group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group");
789 for (l=0;l<Num;l++) {
790 group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]"); // each must group must have one more as end marker
791 for (k=0;k<=Num;k++)
792 group[l][k] = -1; // reset partner group
793 }
794 for (k=0;k<Num;k++)
795 partner[k] = 0;
796 debug(P,"mem allocated");
797 // go for each orbital through every other, check distance against the sum of both spreads
798 // if smaller add to group of this orbital
799 for (l=0;l<Num;l++) {
800 j=0; // index for partner group
801 for (k=0;k<Num;k++) { // check this against l
802 Spread = 0.;
803 for (i=0;i<NDIM;i++) {
804 //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]);
805 Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]);
806 }
807 //Spread = sqrt(Spread); // distance in Spread
808 //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]);
809 if (Spread < 5*(WannierSpread[l]+WannierSpread[k])*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread times two plus safety
810 group[l][j++] = k; // add k to group of l
811 partner[l]++;
812 //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l);
813 }
814 }
815 }
816
817 // consistency, for each orbital check if this orbital is also in the group of each referred orbital
818 debug(P,"checking consistency");
819 totalflag = 1;
820 for (l=0;l<Num;l++) // checking l's group
821 for (k=0;k<Num;k++) { // k is partner index
822 if (group[l][k] != -1) { // if current index k is a partner
823 flag = 0;
824 for(j=0;j<Num;j++) { // go through each entry in l partner's partner group if l exists
825 if ((group[ group[l][k] ][j] == l))
826 flag = 1;
827 }
828 //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]);
829 if (totalflag == 1) totalflag = flag;
830 }
831 }
832 // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres
833 debug(P,"weight and calculate new centers for partner groups");
834 for (l=0;l<=Num;l++)
835 marker[l] = 1;
836 if (totalflag) {
837 for (l=0;l<Num;l++) { // go through each orbital
838 if (marker[l] != 0) { // if it hasn't been reweighted
839 marker[l] = 0;
840 for (i=0;i<NDIM;i++)
841 q[i] = 0.;
842 j = 0;
843 while (group[l][j] != -1) {
844 marker[group[l][j]] = 0;
845 for (i=0;i<NDIM;i++) {
846 //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]);
847 q[i] += WannierCentre[ group[l][j] ][i];
848 }
849 j++;
850 }
851 //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j);
852 for (i=0;i<NDIM;i++) {// weight by number of elements in partner group
853 q[i] /= (double)(j);
854 }
855
856 // put WannierCentre into own and all partners'
857 for (i=0;i<NDIM;i++)
858 WannierCentre[l][i] = q[i];
859 j = 0;
860 while (group[l][j] != -1) {
861 for (i=0;i<NDIM;i++)
862 WannierCentre[group[l][j]][i] = q[i];
863 j++;
864 }
865 }
866 }
867 }
868 if (P->Call.out[StepLeaderOut]) {
869 fprintf(stderr,"Summary:\n");
870 fprintf(stderr,"========\n");
871 for (i=0;i<Num;i++)
872 fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]);
873 }
874 debug(P,"done");
875
876 Free(marker, "ChangeWannierCentres: marker");
877 for (l=0;l<Num;l++)
878 Free(group[l], "ChangeWannierCentres: group[l]");
879 Free(group, "ChangeWannierCentres: group");
880 break;
881 case 1:
882 debug(P,"Individual orbitals are changed to center of all.");
883 for (i=0;i<NDIM;i++) // zero center of weight
884 q[i] = 0.;
885 for (k=0;k<Num;k++)
886 for (i=0;i<NDIM;i++) { // sum up all orbitals each component
887 q[i] += WannierCentre[k][i];
888 }
889 for (i=0;i<NDIM;i++) // divide by number
890 q[i] /= Num;
891 for (k=0;k<Num;k++)
892 for (i=0;i<NDIM;i++) { // put into this function's array
893 WannierCentre[k][i] = q[i];
894 }
895 break;
896 case 0:
897 default:
898 break;
899 }
900
901 if ((P->Call.out[NormalOut]) && (1)) { // root prints gathered wannier centres for debugging
902 fprintf(stderr, "(%i) Printing changed Wannier centres:\n", P->Par.me);
903 for(l=0;l<Num;l++) {
904 fprintf(stderr, "(%i) ", P->Par.me);
905 for(j=0;j<NDIM;j++)
906 fprintf(stderr, "%lg\t", WannierCentre[l][j]);
907 fprintf(stderr, "-\t%lg\n", WannierSpread[l]);
908 }
909 }
910
911 // store info into local arrays again
912 i=-1;
913 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
914 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
915 if (OnePsiA->PsiType == type) { // drop all but occupied ones
916 i++; // increase l if it is occupied wave function
917 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local
918 for(j=0;j<NDIM;j++)
919 Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
920 }
921 }
922 }
923
924 // show locally stored info for debugging
925 if (P->Call.out[NormalOut]) {
926 i=-1;
927 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
928 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
929 if (OnePsiA->PsiType == type) { // drop all but occupied ones
930 i++; // increase l if it is occupied wave function
931 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // is this local?
932 fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0], Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1], Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2], Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
933 }
934 }
935 }
936
937 // free memory
938 Free(WannierSpread, "ChangeWannierCentres: *WannierSpread");
939 for (j=0;j<Num;j++)
940 Free(WannierCentre[j], "ChangeWannierCentres: *WannierCentre[]");
941 Free(WannierCentre, "ChangeWannierCentres: **WannierCentre");
942}
943
944/** From the entries of the variance matrices the spread is calculated.
945 * WannierCentres are evaluated according to Resta operator: \f$\langle X \rangle = \frac{L}{2\pi} \sum_j {\cal I} \ln{\langle \Psi_j | \exp{(i \frac{2\pi}{L} X)} | \Psi_j \rangle}\f$
946 * WannierSpread is: \f$ \sum_j \langle \Psi_j | r^2 | \Psi_j \rangle - \langle \Psi_j | r | psi_j \rangle^2\f$
947 * \param *P Problem at hand
948 * \param *A variance matrices
949 * \param old_spread first term of wannier spread
950 * \param spread second term of wannier spread
951 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
952 * \param *WannierSpread array with wannier spread per wave function
953 */
954void ComputeWannierCentresfromVarianceMatrices(struct Problem *P, gsl_matrix **A, double *spread, double *old_spread, double **WannierCentre, double *WannierSpread)
955{
956 struct Lattice *Lat = &P->Lat;
957 struct Psis *Psi = &Lat->Psi;
958 struct OnePsiElement *OnePsiA;
959 int i,j,k,l;
960 double tmp, q[NDIM];
961
962 *old_spread = 0;
963 *spread = 0;
964
965 // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital
966 i=-1;
967 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
968 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
969 if (OnePsiA->PsiType == type) { // drop all but occupied ones
970 i++; // increase l if it is occupied wave function
971 //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i);
972
973 // calculate Wannier Centre
974 for (j=0;j<NDIM;j++) {
975 q[j] = 1./(2.*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i))));
976 if (q[j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ
977 q[j] += 1.;
978 }
979 RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
980
981 // store orbital spread and centre in file
982 tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
983 - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
984 - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);
985 WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp;
986 //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]);
987 //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n",
988 //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]);
989 //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n",
990 //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]);
991
992 // gather all spreads
993 *old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U)
994 for (k=0;k<max_operators;k++)
995 *spread += pow(gsl_matrix_get(A[k],i,i),2);
996 }
997 }
998}
999
1000/** Prints gsl_matrix nicely to screen.
1001 * \param *P Problem at hand
1002 * \param *U gsl matrix
1003 * \param Num number of rows/columns
1004 * \param *msg name of matrix, prepended before entries
1005 */
1006void PrintGSLMatrix(struct Problem *P, gsl_matrix *U, int Num, const char *msg)
1007{
1008 int k,l;
1009 fprintf(stderr,"(%i) %s = \n",P->Par.me, msg);
1010 for (k=0;k<Num;k++) {
1011 for (l=0;l<Num;l++)
1012 fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k));
1013 fprintf(stderr,"\n");
1014 }
1015}
1016
1017/** Allocates memory for the DiagonalizationData structure
1018 * \param *P Problem at hand
1019 * \param DiagData pointer to structure
1020 * \param Num number of rows and columns in matrix
1021 * \param NumMatrices number of matrices to be simultaneously diagonalized
1022 * \param extra number of extra matrices to be passively also diagonalized
1023 */
1024void InitDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData, int Num, int NumMatrices, int extra)
1025{
1026 int i;
1027
1028 // store integer values
1029 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me);
1030 DiagData->Num = Num;
1031 DiagData->AllocNum = ceil((double)Num / 2. ) *2;
1032 DiagData->NumMatrices = NumMatrices;
1033 DiagData->extra = extra;
1034
1035 // determine communicator and our rank and its size
1036 DiagData->comm = DetermineParallelGroupbyGCD(P,DiagData->AllocNum);
1037 MPI_Comm_size (*(DiagData->comm), &(DiagData->ProcNum));
1038 MPI_Comm_rank (*(DiagData->comm), &(DiagData->ProcRank));
1039
1040 // allocate memory
1041 DiagData->U = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
1042 gsl_matrix_set_identity(DiagData->U);
1043
1044 DiagData->A = (gsl_matrix **) Malloc((DiagData->NumMatrices+DiagData->extra) * sizeof(gsl_matrix *), "InitDiagonalization: *A");
1045 for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) {
1046 DiagData->A[i] = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
1047 gsl_matrix_set_zero(DiagData->A[i]);
1048 }
1049
1050 // init merry-go-round array for diagonilzation
1051 DiagData->top = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: top");
1052 DiagData->bot = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: bot");
1053 for (i=0;i<DiagData->AllocNum/2;i++) {
1054 DiagData->top[i] = 2*i;
1055 DiagData->bot[i] = 2*i+1;
1056 }
1057/* // print starting values of index generation tables top and bot
1058 fprintf(stderr,"top\t");
1059 for (k=0;k<AllocNum/2;k++)
1060 fprintf(stderr,"%i\t", top[k]);
1061 fprintf(stderr,"\n");
1062 fprintf(stderr,"bot\t");
1063 for (k=0;k<AllocNum/2;k++)
1064 fprintf(stderr,"%i\t", bot[k]);
1065 fprintf(stderr,"\n");*/
1066}
1067
1068/** Frees allocated memory for the DiagonalizationData structure.
1069 * \param DiagData pointer to structure
1070 */
1071void FreeDiagonalization(struct DiagonalizationData *DiagData)
1072{
1073 int i;
1074
1075 Free(DiagData->top, "FreeDiagonalization: DiagData->top");
1076 Free(DiagData->bot, "FreeDiagonalization: DiagData->bot");
1077 gsl_matrix_free(DiagData->U);
1078 for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++)
1079 gsl_matrix_free(DiagData->A[i]);
1080 Free(DiagData->A, "FreeDiagonalization: DiagData->A");
1081}
1082
1083/** Orthogonalizing PsiType#Occupied wave functions for MD.
1084 * Orthogonalizes wave functions by finding a unitary transformation such that the density remains unchanged.
1085 * To this end, the overlap matrix \f$\langle \Psi_i | \Psi_j \rangle\f$ is created and diagonalised by Jacobi
1086 * transformation (product of rotation matrices). The found transformation matrix is applied to all Psis.
1087 * \param *P Problem at hand
1088 * \note [Payne] states that GramSchmidt is not well-suited. However, this Jacobi diagonalisation is not well
1089 * suited for our CG minimisation either. If one wave functions is changed in course of the minimsation
1090 * procedure, in a Gram-Schmidt-Orthogonalisation only this wave function is changed (although it remains under
1091 * debate whether subsequent Psis are still orthogonal to this changed wave function!), in a Jacobi-Orthogonali-
1092 * stion however at least two if not all wave functions are changed. Thus inhibits our fast way of updating the
1093 * density after a minimisation step -- UpdateDensityCalculation(). Thus, this routine can only be used for a
1094 * final orthogonalisation of all Psis, after the CG minimisation.
1095 */
1096void OrthogonalizePsis(struct Problem *P)
1097{
1098 struct Lattice *Lat = &P->Lat;
1099 struct Psis *Psi = &Lat->Psi;
1100 struct LatticeLevel *LevS = P->R.LevS;
1101 int Num = Psi->NoOfPsis;
1102 int i,j,l;
1103 struct DiagonalizationData DiagData;
1104 double PsiSP;
1105
1106 InitDiagonalization(P, &DiagData, Num, 1, 0);
1107
1108 // Calculate Overlap matrix
1109 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
1110 CalculateOverlap(P, l, Occupied);
1111 for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1112 for (j=0;j<Num;j++)
1113 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1114 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (before)");
1115
1116 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (before)");
1117
1118 // diagonalize overlap matrix
1119 Diagonalize(P, &DiagData);
1120
1121 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (after)");
1122
1123 // apply found unitary transformation to wave functions
1124 UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
1125
1126 // update GramSch stati
1127 for (i=Psi->TypeStartIndex[Occupied];i<Psi->TypeStartIndex[UnOccupied];i++) {
1128 GramSchNormalize(P,LevS,LevS->LPsi->LocalPsi[i], 0.); // calculates square product if 0. is given...
1129 PsiSP = GramSchGetNorm2(P,LevS,LevS->LPsi->LocalPsi[i]);
1130 //if (P->Par.me_comm_ST_Psi == 0) fprintf(stderr,"(%i) PsiSP[%i] = %lg\n", P->Par.me, i, PsiSP);
1131 Psi->LocalPsiStatus[i].PsiGramSchStatus = (int)IsOrthonormal;
1132 }
1133 UpdateGramSchAllPsiStatus(P,Psi);
1134/*
1135 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
1136 CalculateOverlap(P, l, Occupied);
1137 for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1138 for (j=0;j<Num;j++)
1139 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1140 if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (after)");
1141 */
1142 // free memory
1143 FreeDiagonalization(&DiagData);
1144}
1145
1146/** Orthogonalizing PsiType#Occupied wave functions and their time derivatives for MD.
1147 * Ensures that two orthonormality condition are met for Psis:
1148 * \f$\langle \Psi_i | \Psi_j \rangle = \delta_{ij}\f$
1149 * \f$\langle \dot{\Psi_i} | \dot{\Psi_j} \rangle = \delta_{ij}\f$
1150 * \param *P Problem at hand
1151 * \note [Payne] states that GramSchmidt is not well-suited, and this stronger
1152 * condition is needed for numerical stability
1153 * \todo create and fill 1stoverlap with finite difference between Psi, OldPsi with R->Deltat
1154 */
1155void StrongOrthogonalizePsis(struct Problem *P)
1156{
1157 struct Lattice *Lat = &P->Lat;
1158 struct Psis *Psi = &Lat->Psi;
1159 int Num = Psi->NoOfPsis;
1160 int i,j,l;
1161 struct DiagonalizationData DiagData;
1162
1163 InitDiagonalization(P, &DiagData, Num, 2, 0);
1164
1165 // Calculate Overlap matrix
1166 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) {
1167 CalculateOverlap(P, l, Occupied);
1168 //Calculate1stOverlap(P, l, Occupied);
1169 }
1170 for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1171 for (j=0;j<Num;j++) {
1172 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1173 //gsl_matrix_set(DiagData.A[1],i,j,Psi->1stOverlap[i][j]);
1174 }
1175
1176 // diagonalize overlap matrix
1177 Diagonalize(P, &DiagData);
1178
1179 // apply found unitary transformation to wave functions
1180 UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
1181
1182 // free memory
1183 FreeDiagonalization(&DiagData);
1184}
1185
1186/** Uses either serial or parallel diagonalization.
1187 * \param *P Problem at hand
1188 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1189 * \sa ParallelDiagonalization(), SerialDiagonalization()
1190 */
1191void Diagonalize(struct Problem *P, struct DiagonalizationData *DiagData)
1192{
1193 if (DiagData->Num != 1) { // one- or multi-process case?
1194 if (((DiagData->AllocNum % 2) == 0) && (DiagData->ProcNum != 1) && ((DiagData->AllocNum / 2) % DiagData->ProcNum == 0)) {
1195 /*
1196 debug(P,"Testing with silly matrix");
1197 ParallelDiagonalization(P, test, Utest, 1, 0, 4, Num, comm, ProcRank, ProcNum, top, bot);
1198 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1199 PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
1200 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1201 PrintGSLMatrix(P, Utest, 4, "Utest (final)");
1202 */
1203 debug(P,"ParallelDiagonalization");
1204 ParallelDiagonalization(P, DiagData);
1205 } else {/*
1206 debug(P,"Testing with silly matrix");
1207 SerialDiagonalization(P, test, Utest, 1, 0, 4, Num, top, bot);
1208 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1209 PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
1210 if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1211 PrintGSLMatrix(P, Utest, 4, "Utest (final)");
1212 */
1213 debug(P,"SerialDiagonalization");
1214 SerialDiagonalization(P, DiagData);
1215 }
1216
1217 //if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1218 //PrintGSLMatrix(P, DiagData->U, DiagData->AllocNum, "U");
1219 }
1220}
1221
1222/** Computation of Maximally Localized Wannier Functions.
1223 * Maximally localized functions are prime when evulating a Hamiltonian with
1224 * magnetic fields under periodic boundary conditions, as the common position
1225 * operator is no longer valid. These can be obtained by orbital rotations, which
1226 * are looked for iteratively and gathered in one transformation matrix, to be
1227 * later applied to the set of orbital wave functions.
1228 *
1229 * In order to obtain these, the following algorithm is applied:
1230 * -# Initialize U (identity) as the sought-for transformation matrix
1231 * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements
1232 * \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators
1233 * \f$A^{(k)}\f$
1234 * -# For each pair of indices (i,j) (i<j) do the following:
1235 * -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$
1236 * where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$
1237 * -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G
1238 * corresponding to the greatest eigenvalue, such that \f$x\geq0\f$
1239 * -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the
1240 * identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$
1241 * \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$
1242 * -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$
1243 * -# Gather the rotations in \f$U = U R\f$
1244 * -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$
1245 * -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3.
1246 * -# Apply transformations to the orbital wavefunctions \f$ | \phi_i \rangle = \sum_j U_{ij} | \phi_j \rangle\f$
1247 * -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in
1248 * OnePsiElementAddData#WannierCentre
1249 *
1250 * Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions.
1251 * Note that hereby additional memory is needed as old and transformed wave functions must be present at the same
1252 * time.
1253 *
1254 * The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index
1255 * generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices
1256 * may be diagonalized simultaneously and parallely. We use the implementation explained in
1257 * [Golub, Matrix computations, 1989, p451].
1258 *
1259 * \param *P Problem at hand
1260 */
1261void ComputeMLWF(struct Problem *P) {
1262 // variables and allocation
1263 struct Lattice *Lat = &P->Lat;
1264 struct Psis *Psi = &Lat->Psi;
1265 int i;
1266 int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
1267 double **WannierCentre;
1268 double *WannierSpread;
1269 double spread, spreadSQ;
1270 struct DiagonalizationData DiagData;
1271
1272 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me);
1273
1274 InitDiagonalization(P, &DiagData, Num, max_operators, 1);
1275
1276 // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1))
1277 //debug (P,"Calculatung Variance matrices");
1278 FillHigherOrderRealMomentsMatrices(P, DiagData.AllocNum, DiagData.A);
1279
1280 //debug (P,"Diagonalizing");
1281 Diagonalize(P, &DiagData);
1282
1283 // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle
1284 //debug (P,"Performing Unitary transformation");
1285 UnitaryTransformationOnWavefunctions(P, DiagData.U, Num);
1286
1287 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7: Compute centres and spread printout\n",P->Par.me);
1288 WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ComputeMLWF: *WannierCentre");
1289 WannierSpread = (double *) Malloc(sizeof(double)*Num, "ComputeMLWF: WannierSpread");
1290 for (i=0;i<Num;i++)
1291 WannierCentre[i] = (double *) Malloc(sizeof(double)*NDIM, "ComputeMLWF: WannierCentre");
1292
1293 //debug (P,"Computing Wannier centres from variance matrix");
1294 ComputeWannierCentresfromVarianceMatrices(P, DiagData.A, &spread, &spreadSQ, WannierCentre, WannierSpread);
1295
1296 // write to file
1297 //debug (P,"Writing spread file");
1298 WriteWannierFile(P, spread, spreadSQ, WannierCentre, WannierSpread);
1299
1300 debug(P,"Free'ing memory");
1301 // free all remaining memory
1302 FreeDiagonalization(&DiagData);
1303 for (i=0;i<Num;i++)
1304 Free(WannierCentre[i], "ComputeMLWF: WannierCentre[i]");
1305 Free(WannierCentre, "ComputeMLWF: WannierCentre");
1306 Free(WannierSpread, "ComputeMLWF: WannierSpread");
1307}
1308
1309/** Solves directly for eigenvectors and eigenvalues of a real 2x2 matrix.
1310 * The eigenvalues are determined by \f$\det{(A-\lambda \cdot I)}\f$, where \f$I\f$ is the unit matrix and
1311 * \f$\lambda\f$ an eigenvalue. This leads to a pq-formula which is easily evaluated in the first part.
1312 *
1313 * The eigenvectors are then obtained by solving \f$A-\lambda \cdot I)x = 0\f$ for a given eigenvalue
1314 * \f$\lambda\f$. However, the eigenvector magnitudes are not specified, thus we the equation system is
1315 * still lacking such an equation. We use this fact to set either coordinate arbitrarily to 1 and then
1316 * derive the other by solving the equation system. Finally, the eigenvector is normalized.
1317 * \param *P Problem at hand
1318 * \param *A matrix whose eigenvalues/-vectors are to be found
1319 * \param *eval vector with eigenvalues
1320 * \param *evec matrix with corresponding eigenvectors in columns
1321 */
1322#ifdef HAVE_INLINE
1323inline void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
1324#else
1325void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
1326#endif
1327{
1328 double a11,a12,a21,a22;
1329 double ev[2], summand1, summand2, norm;
1330 int i;
1331 // find eigenvalues
1332 a11 = gsl_matrix_get(A,0,0);
1333 a12 = gsl_matrix_get(A,0,1);
1334 a21 = gsl_matrix_get(A,1,0);
1335 a22 = gsl_matrix_get(A,1,1);
1336 summand1 = (a11+a22)/2.;
1337 summand2 = sqrt(summand1*summand1 + a12*a21 - a11*a22);
1338 ev[0] = summand1 + summand2;
1339 ev[1] = summand1 - summand2;
1340 gsl_vector_set(eval, 0, ev[0]);
1341 gsl_vector_set(eval, 1, ev[1]);
1342 //fprintf (stderr,"(%i) ev1 %lg \t ev2 %lg\n", P->Par.me, ev1, ev2);
1343
1344 // find eigenvectors
1345 for(i=0;i<2;i++) {
1346 if (fabs(ev[i]) < MYEPSILON) {
1347 gsl_matrix_set(evec, 0,i, 0.);
1348 gsl_matrix_set(evec, 1,i, 0.);
1349 } else if (fabs(a22-ev[i]) > MYEPSILON) {
1350 norm = sqrt(1*1 + (a21*a21/(a22-ev[i])/(a22-ev[i])));
1351 gsl_matrix_set(evec, 0,i, 1./norm);
1352 gsl_matrix_set(evec, 1,i, -(a21/(a22-ev[i]))/norm);
1353 //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
1354 } else if (fabs(a12) > MYEPSILON) {
1355 norm = sqrt(1*1 + (a11-ev[i])*(a11-ev[i])/(a12*a12));
1356 gsl_matrix_set(evec, 0,i, 1./norm);
1357 gsl_matrix_set(evec, 1,i, -((a11-ev[i])/a12)/norm);
1358 //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
1359 } else {
1360 if (fabs(a11-ev[i]) > MYEPSILON) {
1361 norm = sqrt(1*1 + (a12*a12/(a11-ev[i])/(a11-ev[i])));
1362 gsl_matrix_set(evec, 0,i, -(a12/(a11-ev[i]))/norm);
1363 gsl_matrix_set(evec, 1,i, 1./norm);
1364 //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
1365 } else if (fabs(a21) > MYEPSILON) {
1366 norm = sqrt(1*1 + (a22-ev[i])*(a22-ev[i])/(a21*a21));
1367 gsl_matrix_set(evec, 0,i, -(a22-ev[i])/a21/norm);
1368 gsl_matrix_set(evec, 1,i, 1./norm);
1369 //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
1370 } else {
1371 //gsl_matrix_set(evec, 0,i, 0.);
1372 //gsl_matrix_set(evec, 1,i, 1.);
1373 fprintf (stderr,"\t evec %i undetermined\n", i);
1374 }
1375 //gsl_matrix_set(evec, 0,0, 1.);
1376 //gsl_matrix_set(evec, 1,0, 0.);
1377 //fprintf (stderr,"(%i) evec1 undetermined", P->Par.me);
1378 }
1379 }
1380}
1381
1382/** Calculates sine and cosine values for multiple matrix diagonalization.
1383 * \param *evec eigenvectors in columns
1384 * \param *eval corresponding eigenvalues
1385 * \param *c cosine to be returned
1386 * \param *s sine to be returned
1387 */
1388#ifdef HAVE_INLINE
1389inline void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
1390#else
1391void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
1392#endif
1393{
1394 int index;
1395 double x,y,r;
1396
1397 index = gsl_vector_max_index (eval); // get biggest eigenvalue
1398 //fprintf(stderr,"\t1st: %lg\t2nd: %lg --- biggest: %i\n", gsl_vector_get(eval, 0), gsl_vector_get(eval, 1), index);
1399 x = gsl_matrix_get(evec, 0, index);
1400 y = gsl_matrix_get(evec, 1, index);
1401 if (x < 0) { // ensure x>=0 so that rotation angles remain smaller Pi/4
1402 y = -y;
1403 x = -x;
1404 }
1405 //z = gsl_matrix_get(evec, 2, index) * x/fabs(x);
1406 //fprintf(stderr,"\tx %lg\ty %lg\n", x,y);
1407
1408 //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
1409 // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
1410 r = sqrt(x*x + y*y); // + z*z);
1411 if (fabs(r) > MYEPSILON) {
1412 *c = sqrt((x + r) / (2.*r));
1413 *s = y / sqrt(2.*r*(x+r)); //, -z / sqrt(2*r*(x+r)));
1414 } else {
1415 *c = 1.;
1416 *s = 0.;
1417 }
1418}
1419
1420/*
1421 * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
1422 * \param *U transformation matrix set to unity matrix
1423 * \param NumMatrices number of matrices to be diagonalized simultaneously
1424 * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
1425 * \param AllocNum number of rows/columns in matrices
1426 * \param Num number of wave functions
1427 * \param ProcRank index in group for this cpu
1428 * \param ProcNum number of cpus in group
1429 * \param *top array with top row indices (merry-go-round)
1430 * \param *bot array with bottom row indices (merry-go-round)
1431 */
1432
1433/** Simultaneous diagonalization of matrices with multiple cpus.
1434 * \param *P Problem at hand
1435 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1436 * \note this is slower given one cpu only than SerialDiagonalization()
1437 */
1438void ParallelDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
1439{
1440 struct RunStruct *R =&P->R;
1441 int tagR0, tagR1, tagS0, tagS1;
1442 int iloc, jloc;
1443 double *s_all, *c_all;
1444 int round, max_rounds;
1445 int start;
1446 int *rcounts, *rdispls;
1447 double *c, *s;
1448 int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from
1449 int left, right; // left or right neighbour for process
1450 double spread = 0., old_spread=0., Spread=0.;
1451 int i,j,k,l,m,u;
1452 int set;
1453 int it_steps; // iteration step counter
1454 double **Aloc[DiagData->NumMatrices+1], **Uloc; // local columns for one step of A[k]
1455 double *Around[DiagData->NumMatrices+1], *Uround; // all local columns for one round of A[k]
1456 double *Atotal[DiagData->NumMatrices+1], *Utotal; // all local columns for one round of A[k]
1457 double a_i, a_j;
1458 gsl_matrix *G;
1459 gsl_vector *h;
1460 gsl_vector *eval;
1461 gsl_matrix *evec;
1462 //gsl_eigen_symmv_workspace *w;
1463
1464 max_rounds = (DiagData->AllocNum / 2)/DiagData->ProcNum; // each process must perform multiple rotations per step of a set
1465 if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, DiagData->ProcRank, DiagData->ProcNum, max_rounds);
1466
1467 // allocate column vectors for interchange of columns
1468 debug(P,"allocate column vectors for interchange of columns");
1469 c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c");
1470 s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s");
1471 c_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: c_all");
1472 s_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: s_all");
1473 rcounts = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rcounts");
1474 rdispls = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rdispls");
1475
1476 // allocate eigenvector stuff
1477 debug(P,"allocate eigenvector stuff");
1478 G = gsl_matrix_calloc (2,2);
1479 h = gsl_vector_alloc (2);
1480 eval = gsl_vector_alloc (2);
1481 evec = gsl_matrix_alloc (2,2);
1482 //w = gsl_eigen_symmv_alloc(2);
1483
1484 // establish communication partners
1485 debug(P,"establish communication partners");
1486 if (DiagData->ProcRank == 0) {
1487 tagS0 = WannierALTag; // left p0 always remains left p0
1488 } else {
1489 tagS0 = (DiagData->ProcRank == DiagData->ProcNum - 1) ? WannierARTag : WannierALTag; // left p_last becomes right p_last
1490 }
1491 tagS1 = (DiagData->ProcRank == 0) ? WannierALTag : WannierARTag; // right p0 always goes into left p1
1492 tagR0 = WannierALTag; //
1493 tagR1 = WannierARTag; // first process
1494 if (DiagData->ProcRank == 0) {
1495 left = DiagData->ProcNum-1;
1496 right = 1;
1497 Lsend = 0;
1498 Rsend = 1;
1499 Lrecv = 0;
1500 Rrecv = 1;
1501 } else if (DiagData->ProcRank == DiagData->ProcNum - 1) {
1502 left = DiagData->ProcRank - 1;
1503 right = 0;
1504 Lsend = DiagData->ProcRank;
1505 Rsend = DiagData->ProcRank - 1;
1506 Lrecv = DiagData->ProcRank - 1;
1507 Rrecv = DiagData->ProcRank;
1508 } else {
1509 left = DiagData->ProcRank - 1;
1510 right = DiagData->ProcRank + 1;
1511 Lsend = DiagData->ProcRank+1;
1512 Rsend = DiagData->ProcRank - 1;
1513 Lrecv = DiagData->ProcRank - 1;
1514 Rrecv = DiagData->ProcRank+1;
1515 }
1516 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv);
1517
1518 // initialise A_loc
1519 debug(P,"initialise A_loc");
1520 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
1521 //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]");
1522 Around[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Around[k]");
1523 Atotal[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Atotal[k]");
1524 Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]");
1525 //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds];
1526
1527 for (round=0;round<max_rounds;round++) {
1528 Aloc[k][2*round] = &Around[k][DiagData->AllocNum*(2*round)];
1529 Aloc[k][2*round+1] = &Around[k][DiagData->AllocNum*(2*round+1)];
1530 for (l=0;l<DiagData->AllocNum;l++) {
1531 Aloc[k][2*round][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round));
1532 Aloc[k][2*round+1][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)+1);
1533 //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]);
1534 }
1535 }
1536 }
1537 // initialise U_loc
1538 debug(P,"initialise U_loc");
1539 //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc");
1540 Uround = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Uround");
1541 Utotal = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Utotal");
1542 Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc");
1543 //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds];
1544 for (round=0;round<max_rounds;round++) {
1545 Uloc[2*round] = &Uround[DiagData->AllocNum*(2*round)];
1546 Uloc[2*round+1] = &Uround[DiagData->AllocNum*(2*round+1)];
1547 for (l=0;l<DiagData->AllocNum;l++) {
1548 Uloc[2*round][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round));
1549 Uloc[2*round+1][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)+1);
1550 //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]);
1551 }
1552 }
1553
1554 // now comes the iteration loop
1555 debug(P,"now comes the iteration loop");
1556 it_steps = 0;
1557 do {
1558 it_steps++;
1559 //if (P->Par.me == 0) fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps);
1560 for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 staying at its place all the time
1561 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
1562 for (round = 0; round < max_rounds;round++) {
1563 start = DiagData->ProcRank * max_rounds + round;
1564 // get indices
1565 i = DiagData->top[start] < DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // minimum of the two indices
1566 iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
1567 j = DiagData->top[start] > DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // maximum of the two indices: thus j > i
1568 jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
1569 //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc);
1570
1571 // calculate rotation angle, i.e. c and s
1572 //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j);
1573 gsl_matrix_set_zero(G);
1574 for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
1575 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
1576 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
1577 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
1578 gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
1579 gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
1580
1581 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
1582 for (l=0;l<2;l++)
1583 for (m=0;m<2;m++)
1584 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
1585 }
1586 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
1587 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
1588 EigensolverFor22Matrix(P,G,eval,evec);
1589 //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
1590
1591 CalculateRotationAnglesFromEigenvalues(evec, eval, &c[round], &s[round]);
1592 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]);
1593
1594 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
1595 // V_loc = V_loc * V_small
1596 //debug(P,"apply rotation to local U");
1597 for (l=0;l<DiagData->AllocNum;l++) {
1598 a_i = Uloc[2*round+iloc][l];
1599 a_j = Uloc[2*round+jloc][l];
1600 Uloc[2*round+iloc][l] = c[round] * a_i + s[round] * a_j;
1601 Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j;
1602 }
1603 } // end of round
1604 // circulate the rotation angles
1605 //debug(P,"circulate the rotation angles");
1606 MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); // MPI_Allgather is waaaaay faster than ring circulation
1607 MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *(DiagData->comm));
1608 //m = start;
1609 for (l=0;l<DiagData->AllocNum/2;l++) { // for each process
1610 // we have V_small from process k
1611 //debug(P,"Apply V_small from other process");
1612 i = DiagData->top[l] < DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // minimum of the two indices
1613 j = DiagData->top[l] > DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // maximum of the two indices: thus j > i
1614 iloc = DiagData->top[l] < DiagData->bot[l] ? 0 : 1;
1615 jloc = DiagData->top[l] > DiagData->bot[l] ? 0 : 1;
1616 for (m=0;m<max_rounds;m++) {
1617 //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j);
1618 // apply row rotation to each A[k]
1619 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1620 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
1621 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
1622 a_i = Aloc[k][2*m+iloc][i];
1623 a_j = Aloc[k][2*m+iloc][j];
1624 Aloc[k][2*m+iloc][i] = c_all[l] * a_i + s_all[l] * a_j;
1625 Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
1626 a_i = Aloc[k][2*m+jloc][i];
1627 a_j = Aloc[k][2*m+jloc][j];
1628 Aloc[k][2*m+jloc][i] = c_all[l] * a_i + s_all[l] * a_j;
1629 Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
1630 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
1631 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
1632 }
1633 }
1634 }
1635 // apply rotation to local operator matrices
1636 // A_loc = A_loc * V_small
1637 //debug(P,"apply rotation to local operator matrices A[k]");
1638 for (m=0;m<max_rounds;m++) {
1639 start = DiagData->ProcRank * max_rounds + m;
1640 iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
1641 jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
1642 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// extra for B matrix !
1643 for (l=0;l<DiagData->AllocNum;l++) {
1644 // Columns, i and j belong to this process only!
1645 a_i = Aloc[k][2*m+iloc][l];
1646 a_j = Aloc[k][2*m+jloc][l];
1647 Aloc[k][2*m+iloc][l] = c[m] * a_i + s[m] * a_j;
1648 Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j;
1649 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]);
1650 }
1651 }
1652 }
1653 // Shuffling of these round's columns to prepare next rotation set
1654 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1655 // extract columns from A
1656 //debug(P,"extract columns from A");
1657 MerryGoRoundColumns(*(DiagData->comm), Aloc[k], DiagData->AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1);
1658
1659 }
1660 // and also for V ...
1661 //debug(P,"extract columns from U");
1662 MerryGoRoundColumns(*(DiagData->comm), Uloc, DiagData->AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1);
1663
1664
1665 // and merry-go-round for the indices too
1666 //debug(P,"and merry-go-round for the indices too");
1667 MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
1668 }
1669
1670 //fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
1671 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
1672 old_spread = Spread;
1673 spread = 0.;
1674 for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
1675 for (i=0; i < 2*max_rounds; i++) { // go through all wave functions
1676 spread += Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]*Aloc[k][i][i+DiagData->ProcRank*2*max_rounds];
1677 //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i);
1678 }
1679 }
1680 MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *(DiagData->comm));
1681 //Spread = spread;
1682 if (P->Par.me == 0) {
1683 //if(P->Call.out[ReadOut])
1684 // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier);
1685 //else
1686 //fprintf(stderr,"%2.9e\n",Spread);
1687 }
1688 // STEP 5: check change of variance
1689 } while (fabs(old_spread-Spread) >= R->EpsWannier);
1690 // end of iterative diagonalization loop: We have found our final orthogonal U!
1691
1692 // gather local parts of U into complete matrix
1693 for (l=0;l<DiagData->ProcNum;l++)
1694 rcounts[l] = DiagData->AllocNum;
1695 debug(P,"allgather U");
1696 for (round=0;round<2*max_rounds;round++) {
1697 for (l=0;l<DiagData->ProcNum;l++)
1698 rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
1699 MPI_Allgatherv(Uloc[round], DiagData->AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
1700 }
1701 for (k=0;k<DiagData->AllocNum;k++) {
1702 for(l=0;l<DiagData->AllocNum;l++) {
1703 gsl_matrix_set(DiagData->U,k,l, Utotal[l+k*DiagData->AllocNum]);
1704 }
1705 }
1706
1707 // after one set, gather A[k] from all and calculate spread
1708 for (l=0;l<DiagData->ProcNum;l++)
1709 rcounts[l] = DiagData->AllocNum;
1710 debug(P,"gather A[k] for spread");
1711 for (u=0;u<DiagData->NumMatrices+DiagData->extra;u++) {// extra for B matrix !
1712 debug(P,"A[k] all gather");
1713 for (round=0;round<2*max_rounds;round++) {
1714 for (l=0;l<DiagData->ProcNum;l++)
1715 rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
1716 MPI_Allgatherv(Aloc[u][round], DiagData->AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
1717 }
1718 for (k=0;k<DiagData->AllocNum;k++) {
1719 for(l=0;l<DiagData->AllocNum;l++) {
1720 gsl_matrix_set(DiagData->A[u],k,l, Atotal[u][l+k*DiagData->AllocNum]);
1721 }
1722 }
1723 }
1724
1725 // free eigenvector stuff
1726 gsl_vector_free(h);
1727 gsl_matrix_free(G);
1728 //gsl_eigen_symmv_free(w);
1729 gsl_vector_free(eval);
1730 gsl_matrix_free(evec);
1731
1732 // Free column vectors
1733 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
1734 Free(Atotal[k], "ParallelDiagonalization: Atotal[k]");
1735 Free(Around[k], "ParallelDiagonalization: Around[k]");
1736 }
1737 Free(Uround, "ParallelDiagonalization: Uround");
1738 Free(Utotal, "ParallelDiagonalization: Utotal");
1739 Free(c_all, "ParallelDiagonalization: c_all");
1740 Free(s_all, "ParallelDiagonalization: s_all");
1741 Free(c, "ParallelDiagonalization: c");
1742 Free(s, "ParallelDiagonalization: s");
1743 Free(rcounts, "ParallelDiagonalization: rcounts");
1744 Free(rdispls, "ParallelDiagonalization: rdispls");
1745}
1746/*
1747 * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
1748 * \param *U transformation matrix set to unity matrix
1749 * \param NumMatrices number of matrices to be diagonalized
1750 * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
1751 * \param AllocNum number of rows/columns in matrices
1752 * \param Num number of wave functions
1753 * \param *top array with top row indices (merry-go-round)
1754 * \param *bot array with bottom row indices (merry-go-round)
1755 */
1756
1757/** Simultaneous Diagonalization of variances matrices with one cpu.
1758 * \param *P Problem at hand
1759 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1760 * \note this is faster given one cpu only than ParallelDiagonalization()
1761 */
1762void SerialDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
1763{
1764 struct RunStruct *R = &P->R;
1765 gsl_matrix *G;
1766 gsl_vector *h;
1767 gsl_vector *eval;
1768 gsl_matrix *evec;
1769 //gsl_eigen_symmv_workspace *w;
1770 double *c,*s,a_i,a_j;
1771 int it_steps, set, ProcRank;
1772 int i,j,k,l,m;
1773 double spread = 0., old_spread = 0.;\
1774
1775 // allocate eigenvector stuff
1776 debug(P,"allocate eigenvector stuff");
1777 G = gsl_matrix_calloc (2,2);
1778 h = gsl_vector_alloc (2);
1779 eval = gsl_vector_alloc (2);
1780 evec = gsl_matrix_alloc (2,2);
1781 //w = gsl_eigen_symmv_alloc(2);
1782
1783 c = (double *) Malloc(sizeof(double), "ComputeMLWF: c");
1784 s = (double *) Malloc(sizeof(double), "ComputeMLWF: s");
1785 debug(P,"now comes the iteration loop");
1786 it_steps=0;
1787 do {
1788 it_steps++;
1789 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me);
1790 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps);
1791 for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
1792 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
1793 // STEP 3: for all index pairs 0<= i<j <AllocNum
1794 for (ProcRank=0;ProcRank<DiagData->AllocNum/2;ProcRank++) {
1795 // get indices
1796 i = DiagData->top[ProcRank] < DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // minimum of the two indices
1797 j = DiagData->top[ProcRank] > DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // maximum of the two indices: thus j > i
1798 //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j);
1799 // STEP 3a: Calculate G
1800 gsl_matrix_set_zero(G);
1801
1802 for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
1803 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
1804 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,i), gsl_matrix_get(DiagData->A[k],j,j),gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
1805 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,j), gsl_matrix_get(DiagData->A[k],j,i),gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
1806 gsl_vector_set(h, 0, gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
1807 gsl_vector_set(h, 1, gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
1808 //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1));
1809
1810 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
1811 for (l=0;l<2;l++)
1812 for (m=0;m<2;m++)
1813 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
1814
1815 //PrintGSLMatrix(P,G,2, "G");
1816 }
1817
1818 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
1819 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
1820 EigensolverFor22Matrix(P,G,eval,evec);
1821 //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
1822
1823 //PrintGSLMatrix(P, evec, 2, "Eigenvectors");
1824 CalculateRotationAnglesFromEigenvalues(evec, eval, c, s);
1825 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]);
1826
1827 //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j);
1828 // STEP 3d: apply rotation R to rows and columns of A^{(k)}
1829 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1830 //PrintGSLMatrix(P,A[k],AllocNum, "A (before rot)");
1831 for (l=0;l<DiagData->AllocNum;l++) {
1832 // Rows
1833 a_i = gsl_matrix_get(DiagData->A[k],i,l);
1834 a_j = gsl_matrix_get(DiagData->A[k],j,l);
1835 gsl_matrix_set(DiagData->A[k], i, l, c[0] * a_i + s[0] * a_j);
1836 gsl_matrix_set(DiagData->A[k], j, l, -s[0] * a_i + c[0] * a_j);
1837 }
1838 for (l=0;l<DiagData->AllocNum;l++) {
1839 // Columns
1840 a_i = gsl_matrix_get(DiagData->A[k],l,i);
1841 a_j = gsl_matrix_get(DiagData->A[k],l,j);
1842 gsl_matrix_set(DiagData->A[k], l, i, c[0] * a_i + s[0] * a_j);
1843 gsl_matrix_set(DiagData->A[k], l, j, -s[0] * a_i + c[0] * a_j);
1844 }
1845 //PrintGSLMatrix(P,A[k],AllocNum, "A (after rot)");
1846 }
1847 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
1848 //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (before rot)");
1849 // STEP 3e: apply U = R*U
1850 for (l=0;l<DiagData->AllocNum;l++) {
1851 a_i = gsl_matrix_get(DiagData->U,i,l);
1852 a_j = gsl_matrix_get(DiagData->U,j,l);
1853 gsl_matrix_set(DiagData->U, i, l, c[0] * a_i + s[0] * a_j);
1854 gsl_matrix_set(DiagData->U, j, l, -s[0] * a_i + c[0] * a_j);
1855 }
1856 //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (after rot)");
1857 }
1858 // and merry-go-round for the indices too
1859 //debug(P,"and merry-go-round for the indices too");
1860 if (DiagData->AllocNum > 2) MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
1861 }
1862
1863 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
1864 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
1865 old_spread = spread;
1866 spread = 0.;
1867 for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
1868 for (i=0; i < DiagData->AllocNum; i++) { // go through all wave functions
1869 spread += pow(gsl_matrix_get(DiagData->A[k],i,i),2);
1870 }
1871 }
1872 if(P->Par.me == 0) {
1873 //if(P->Call.out[ReadOut])
1874 // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier);
1875 //else
1876 //fprintf(stderr,"%2.9e\n",spread);
1877 }
1878 // STEP 5: check change of variance
1879 } while (fabs(old_spread-spread) >= R->EpsWannier);
1880 // end of iterative diagonalization loop: We have found our final orthogonal U!
1881 Free(c, "SerialDiagonalization: c");
1882 Free(s, "SerialDiagonalization: s");
1883 // free eigenvector stuff
1884 gsl_vector_free(h);
1885 gsl_matrix_free(G);
1886 //gsl_eigen_symmv_free(w);
1887 gsl_vector_free(eval);
1888 gsl_matrix_free(evec);
1889}
1890
1891/** Writes the wannier centres and spread to file.
1892 * Also puts centres from array into OnePsiElementAddData structure.
1893 * \param *P Problem at hand
1894 * \param old_spread first term of wannier spread
1895 * \param spread second term of wannier spread
1896 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
1897 * \param *WannierSpread array with wannier spread per wave function
1898 */
1899void WriteWannierFile(struct Problem *P, double spread, double old_spread, double **WannierCentre, double *WannierSpread)
1900{
1901 struct FileData *F = &P->Files;
1902 struct RunStruct *R = &P->R;
1903 struct Lattice *Lat = &P->Lat;
1904 struct Psis *Psi = &Lat->Psi;
1905 struct OnePsiElement *OnePsiA;
1906 char spin[12], suffix[18];
1907 int i,j,l;
1908
1909 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me);
1910
1911 switch (Lat->Psi.PsiST) {
1912 case SpinDouble:
1913 strcpy(suffix,".spread.csv");
1914 strcpy(spin,"SpinDouble");
1915 break;
1916 case SpinUp:
1917 strcpy(suffix,".spread_up.csv");
1918 strcpy(spin,"SpinUp");
1919 break;
1920 case SpinDown:
1921 strcpy(suffix,".spread_down.csv");
1922 strcpy(spin,"SpinDown");
1923 break;
1924 }
1925 if (P->Par.me_comm_ST == 0) {
1926 if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
1927 OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
1928 else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile)
1929 OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
1930 if (F->SpreadFile == NULL) {
1931 Error(SomeError,"WriteWannierFile: Error opening Wannier File!\n");
1932 } else {
1933 fprintf(F->SpreadFile,"#===== W A N N I E R C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin);
1934 fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n");
1935 }
1936 }
1937
1938 // put (new) WannierCentres into local ones and into file
1939 i=-1;
1940 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
1941 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
1942 if (OnePsiA->PsiType == type) { // drop all but occupied ones
1943 i++; // increase l if it is occupied wave function
1944 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
1945 for (j=0;j<NDIM;j++)
1946 Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
1947 Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierSpread[i];
1948 }
1949 if (P->Par.me_comm_ST == 0)
1950 fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]);
1951 }
1952 }
1953 if (P->Par.me_comm_ST == 0) {
1954 fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n");
1955 fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread);
1956 fflush(F->SpreadFile);
1957 }
1958}
1959
1960
1961/** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre.
1962 * \param *P Problem at hand
1963 * \return 1 - success, 0 - failure
1964 */
1965int ParseWannierFile(struct Problem *P)
1966{
1967 struct Lattice *Lat = &P->Lat;
1968 struct RunStruct *R = &P->R;
1969 struct Psis *Psi = &Lat->Psi;
1970 struct OnePsiElement *OnePsiA;
1971 int i,l,j, msglen;
1972 FILE *SpreadFile;
1973 char *tagname = NULL;
1974 char suffix[18];
1975 double WannierCentre[NDIM+1]; // combined centre and spread
1976 MPI_Status status;
1977 int signal = 0; // 1 - ok, 0 - error
1978
1979 switch (Lat->Psi.PsiST) {
1980 case SpinDouble:
1981 strcpy(suffix,".spread.csv");
1982 break;
1983 case SpinUp:
1984 strcpy(suffix,".spread_up.csv");
1985 break;
1986 case SpinDown:
1987 strcpy(suffix,".spread_down.csv");
1988 break;
1989 }
1990 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Parsing Wannier Centres from file ... \n", P->Par.me);
1991
1992 if (P->Par.me_comm_ST == 0) {
1993 tagname = (char *) Malloc(sizeof(char)*MAXSTRINGSIZE, "ParseWannierFile: *tagname");
1994 if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists
1995 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1996 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1997 return 0;
1998 //Error(SomeError,"ParseWannierFile: Opening failed\n");
1999 }
2000 signal = 1;
2001 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2002 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2003 } else {
2004 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2005 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2006 if (signal == 0)
2007 return 0;
2008 }
2009 i=-1;
2010 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
2011 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
2012 if (OnePsiA->PsiType == type) { // drop all but occupied ones
2013 i++; // increase l if it is occupied wave function
2014 if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file
2015 sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo);
2016 signal = 0;
2017 if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) {
2018 //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed");
2019 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2020 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2021 return 0;
2022 }
2023 if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) {
2024 //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed");
2025 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2026 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2027 return 0;
2028 }
2029 signal = 1;
2030 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2031 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2032 } else {
2033 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
2034 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
2035 if (signal == 0)
2036 return 0;
2037 }
2038 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local?
2039 if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em
2040 if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
2041 Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed");
2042 //return 0;
2043 MPI_Get_count(&status, MPI_DOUBLE, &msglen);
2044 if (msglen != NDIM+1)
2045 Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count");
2046 //return 0;
2047 }
2048 if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader
2049 Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed");
2050 //return 0;
2051 // and store 'em (for all who have this Psi local)
2052 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]);
2053 for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j];
2054 Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM];
2055 //if (P->Par.me == 0 && P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
2056 } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group
2057 if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS)
2058 Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed");
2059 //return 0;
2060 }
2061 }
2062 }
2063 if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) {
2064 fclose(SpreadFile);
2065 Free(tagname, "ParseWannierFile: *tagname");
2066 }
2067 //fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me);
2068 return 1;
2069}
2070
2071/** Calculates the spread of orbital \a i.
2072 * Stored in OnePsiElementAddData#WannierSpread.
2073 * \param *P Problem at hand
2074 * \param i i-th wave function (note "extra" ones are not counted!)
2075 * \return spread \f$\sigma^2_{A^{(k)}}\f$
2076 */
2077double CalculateSpread(struct Problem *P, int i) {
2078 struct Lattice *Lat = &P->Lat;
2079 struct RunStruct *R = &P->R;
2080 struct Psis *Psi = &Lat->Psi;
2081 struct LatticeLevel *Lev0 = R->Lev0;
2082 struct LatticeLevel *LevS = R->LevS;
2083 struct Density *Dens0 = Lev0->Dens;
2084 struct fft_plan_3d *plan = Lat->plan;
2085 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
2086 fftw_real *PsiCR = (fftw_real *)PsiC;
2087 fftw_complex *work = Dens0->DensityCArray[Temp2Density];
2088 fftw_real **HGcR = &Dens0->DensityArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as a storage array
2089 fftw_complex **HGcRC = (fftw_complex**)HGcR;
2090 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
2091 fftw_real **HGcR2 = (fftw_real**)HGcR2C;
2092 MPI_Status status;
2093 struct OnePsiElement *OnePsiA, *LOnePsiA;
2094 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
2095 fftw_complex *LPsiDatA=NULL;
2096 int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index;
2097 N0 = LevS->Plan0.plan->local_nx;
2098 N = LevS->Plan0.plan->N;
2099 const int NUpx = LevS->NUp[0];
2100 const int NUpy = LevS->NUp[1];
2101 const int NUpz = LevS->NUp[2];
2102 double a_ij, b_ij, A_ij, B_ij;
2103 double tmp, tmp2, spread = 0;
2104 double **cos_lookup, **sin_lookup;
2105
2106 b_ij = 0;
2107
2108 CreateSinCosLookupTable(&cos_lookup,&sin_lookup,N);
2109
2110 // fill matrices
2111 OnePsiA = &Psi->AllPsiStatus[i]; // grab the desired OnePsiA
2112 if (OnePsiA->PsiType != Extra) { // drop if extra one
2113 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
2114 LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo];
2115 else
2116 LOnePsiA = NULL;
2117 if (LOnePsiA == NULL) { // if it's not local ... receive it from respective process into TempPsi
2118 RecvSource = OnePsiA->my_color_comm_ST_Psi;
2119 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status );
2120 LPsiDatA=LevS->LPsi->TempPsi;
2121 } else { // .. otherwise send it to all other processes (Max_me... - 1)
2122 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
2123 if (p != OnePsiA->my_color_comm_ST_Psi)
2124 MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT);
2125 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
2126 } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received
2127
2128 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
2129 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
2130 for (n0=0;n0<N0;n0++)
2131 for (n[1]=0;n[1]<N[1];n[1]++)
2132 for (n[2]=0;n[2]<N[2];n[2]++) {
2133 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
2134 iS = n[2]+N[2]*(n[1]+N[1]*n0);
2135 n[0] = n0 + LevS->Plan0.plan->start_nx;
2136 for (k=0;k<max_operators;k+=2) {
2137 tmp = 2*PI/(double)(N[k/2])*(double)(n[k/2]);
2138 tmp2 = PsiCR[i0] /LevS->MaxN;
2139 // check lookup
2140 if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) {
2141 fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]);
2142 Error(SomeError, "Lookup table does not match real value!");
2143 }
2144// HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
2145// HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */
2146// HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
2147// HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
2148 HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */
2149 HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */
2150 HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */
2151 HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */
2152 }
2153 }
2154 for (k=0;k<max_operators;k++) {
2155 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work);
2156 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work);
2157 }
2158
2159
2160 for (k=0;k<max_operators;k++) {
2161 a_ij = 0;
2162 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k);
2163 // sum directly in a_ij and b_ij the two desired terms
2164 g=0;
2165 if (LevS->GArray[0].GSq == 0.0) {
2166 Index = LevS->GArray[g].Index;
2167 a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im);
2168 b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im);
2169 g++;
2170 }
2171 for (; g < LevS->MaxG; g++) {
2172 Index = LevS->GArray[g].Index;
2173 a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im);
2174 b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im);
2175 } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...)
2176 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
2177 spread += pow(A_ij,2);
2178 }
2179 }
2180 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
2181
2182 // store spread in OnePsiElementAdd
2183 Psi->AddData[i].WannierSpread = B_ij - spread;
2184
2185 FreeSinCosLookupTable(cos_lookup,sin_lookup);
2186
2187 return (B_ij - spread);
2188}
Note: See TracBrowser for help on using the repository browser.