source: pcp/src/wannier.c@ 774ae8

Last change on this file since 774ae8 was 961b75, checked in by Frederik Heber <heber@…>, 17 years ago

ParseForParameter() has new parameter repetition, which reads the repetition-th occurence of the given keyword

  • Property mode set to 100644
File size: 70.5 KB
Line 
1/** \file wannier.c
2 * Maximally Localized Wannier Functions.
3 *
4 * Contains the on function that minimises the spread of all orbitals in one rush in a parallel
5 * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to
6 * calculate the spread of a specific orbital, which may be useful in checking on the change of
7 * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex.
8 *
9 Project: ParallelCarParrinello
10 \author Frederik Heber
11 \date 2006
12
13 File: wannier.c
14 $Id: wannier.c,v 1.63 2007-02-13 14:15:29 foo Exp $
15*/
16
17#include <math.h>
18#include <gsl/gsl_math.h>
19#include <gsl/gsl_eigen.h>
20#include <gsl/gsl_matrix.h>
21#include <gsl/gsl_vector.h>
22#include <gsl/gsl_complex.h>
23#include <gsl/gsl_complex_math.h>
24#include <gsl/gsl_sort_vector.h>
25#include <gsl/gsl_heapsort.h>
26#include <gsl/gsl_blas.h>
27#include <string.h>
28
29#include "data.h"
30#include "density.h"
31#include "errors.h"
32#include "helpers.h"
33#include "init.h"
34#include "myfft.h"
35#include "mymath.h"
36#include "output.h"
37#include "wannier.h"
38
39
40#define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread
41
42
43/** Converts type fftw_complex to gsl_complex.
44 * \param a complex number
45 * \return b complex number
46 */
47gsl_complex convertComplex (fftw_complex a) {
48 return gsl_complex_rect(c_re(a),c_im(a));
49}
50
51/** "merry go round" implementation for parallel index ordering.
52 * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is
53 * performed which generates once each possible pairing.
54 * \param *top index array 1
55 * \param *bot index array 2
56 * \param m N/2, where N is the matrix row/column dimension
57 * \note taken from [Golub, Matrix computations, 1989, p451]
58 */
59void music(int *top, int *bot, int m)
60{
61 int *old_top, *old_bot;
62 int k;
63 old_top = (int *) Malloc(sizeof(int)*m, "music: old_top");
64 old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot");
65/* fprintf(stderr,"oldtop\t");
66 for (k=0;k<m;k++)
67 fprintf(stderr,"%i\t", top[k]);
68 fprintf(stderr,"\n");
69 fprintf(stderr,"oldbot\t");
70 for (k=0;k<m;k++)
71 fprintf(stderr,"%i\t", bot[k]);
72 fprintf(stderr,"\n");*/
73 // first copy arrays
74 for (k=0;k<m;k++) {
75 old_top[k] = top[k];
76 old_bot[k] = bot[k];
77 }
78 // then let the music play
79 for (k=0;k<m;k++) {
80 if (k==1)
81 top[k] = old_bot[0];
82 else if (k > 1)
83 top[k] = old_top[k-1];
84 if (k==m-1)
85 bot[k] = old_top[k];
86 else
87 bot[k] = old_bot[k+1];
88 }
89/* fprintf(stderr,"top\t");
90 for (k=0;k<m;k++)
91 fprintf(stderr,"%i\t", top[k]);
92 fprintf(stderr,"\n");
93 fprintf(stderr,"bot\t");
94 for (k=0;k<m;k++)
95 fprintf(stderr,"%i\t", bot[k]);
96 fprintf(stderr,"\n");*/
97 // and finito
98 Free(old_top, "bla");
99 Free(old_bot, "bla");
100}
101
102/** merry-go-round for matrix columns.
103 * The trick here is that we must be aware of multiple rotations per process, thus only some of the
104 * whole lot of local columns get sent/received, most of them are just shifted via exchanging the various
105 * pointers to the matrix columns within the local array.
106 * \param comm communicator for circulation
107 * \param *Aloc local array of columns
108 * \param Num entries per column
109 * \param max_rounds number of column pairs in \a *Around
110 * \param k offset for tag
111 * \param tagS0 MPI tag for sending left column
112 * \param tagS1 MPI tag for sending right column
113 * \param tagR0 MPI tag for receiving left column
114 * \param tagR1 MPI tag for receiving right column
115 */
116void shiftcolumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) {
117 //double *A_locS1, *A_locS2; // local columns of A[k]
118 //double *A_locR1, *A_locR2; // local columns of A[k]
119 MPI_Request requestS0, requestS1, requestR0, requestR1;
120 MPI_Status status;
121 int ProcRank, ProcNum;
122 int l;
123 MPI_Comm_size (comm, &ProcNum);
124 MPI_Comm_rank (comm, &ProcRank);
125 double *Abuffer1, *Abuffer2; // mark the columns that are circulated
126
127 //fprintf(stderr,"shifting...");
128 if (ProcRank == 0) {
129 if (max_rounds > 1) {
130 // get last left column
131 Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
132 MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
133 } else {
134 // get right column
135 Abuffer1 = Aloc[1]; // note down the free column
136 MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, tagS1+2*k, comm, &requestS0);
137 }
138
139 //fprintf(stderr,"...left columns...");
140 for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right
141 Aloc[l] = Aloc[l-2];
142
143 if (max_rounds > 1) {
144 //fprintf(stderr,"...first right...");
145 Aloc[2] = Aloc[1]; // get first right column
146 }
147
148 //fprintf(stderr,"...right columns...");
149 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
150 Aloc[l] = Aloc[l+2];
151
152 //fprintf(stderr,"...last right...");
153 Aloc[(2*max_rounds-1)] = Abuffer1;
154 MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
155
156 } else if (ProcRank == ProcNum-1) {
157 //fprintf(stderr,"...first right...");
158 // get first right column
159 Abuffer2 = Aloc[1]; // note down the free column
160 MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
161
162 //fprintf(stderr,"...right columns...");
163 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
164 Aloc[(l)] = Aloc[(l+2)];
165
166 //fprintf(stderr,"...last right...");
167 Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column
168
169 //fprintf(stderr,"...left columns...");
170 for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
171 Aloc[(l)] = Aloc[(l-2)];
172
173 //fprintf(stderr,"...first left...");
174// if (max_rounds > 1)
175 Aloc[0] = Abuffer2; // get first left column
176 MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
177
178 } else {
179 // get last left column
180 MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
181 Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
182
183 //fprintf(stderr,"...first right...");
184 // get first right column
185 MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
186 Abuffer2 = Aloc[1]; // note down the free column
187
188 //fprintf(stderr,"...left columns...");
189 for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
190 Aloc[(l)] = Aloc[(l-2)];
191
192 //fprintf(stderr,"...right columns...");
193 for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
194 Aloc[(l)] = Aloc[(l+2)];
195
196 //fprintf(stderr,"...first left...");
197 Aloc[0] = Abuffer1; // get first left column
198 MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
199
200 //fprintf(stderr,"...last right...");
201 Aloc[(2*max_rounds-1)] = Abuffer2;
202 MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
203 }
204
205 //fprintf(stderr,"...waiting...");
206 if (ProcRank != ProcNum-1)
207 MPI_Wait(&requestS0, &status);
208 if (ProcRank != 0) // first left column
209 MPI_Wait(&requestR0, &status);
210 if (ProcRank != 0)
211 MPI_Wait(&requestS1, &status);
212 if (ProcRank != ProcNum-1)
213 MPI_Wait(&requestR1, &status);
214 //fprintf(stderr,"...done\n");
215}
216
217/** Computation of Maximally Localized Wannier Functions.
218 * Maximally localized functions are prime when evulating a Hamiltonian with
219 * magnetic fields under periodic boundary conditions, as the common position
220 * operator is no longer valid. These can be obtained by orbital rotations, which
221 * are looked for iteratively and gathered in one transformation matrix, to be
222 * later applied to the set of orbital wave functions.
223 *
224 * In order to obtain these, the following algorithm is applied:
225 * -# Initialize U (identity) as the sought-for transformation matrix
226 * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements
227 * \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators
228 * \f$A^{(k)}\f$
229 * -# For each pair of indices (i,j) (i<j) do the following:
230 * -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$
231 * where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$
232 * -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G
233 * corresponding to the greatest eigenvalue, such that \f$x\geq0\f$
234 * -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the
235 * identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$
236 * \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$
237 * -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$
238 * -# Gather the rotations in \f$U = U R\f$
239 * -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$
240 * -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3.
241 * -# Apply transformations to the orbital wavefunctions \f$ | \phi_i \rangle = \sum_j U_{ij} | \phi_j \rangle\f$
242 * -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in
243 * OnePsiElementAddData#WannierCentre
244 *
245 * Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions.
246 * Note that hereby additional memory is needed as old and transformed wave functions must be present at the same
247 * time.
248 *
249 * The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index
250 * generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices
251 * may be diagonalized simultaneously and parallely. We use the implementation explained in
252 * [Golub, Matrix computations, 1989, p451].
253 *
254 * \param *P Problem at hand
255 */
256void ComputeMLWF(struct Problem *P) {
257 // variables and allocation
258 struct FileData *F = &P->Files;
259 struct Lattice *Lat = &P->Lat;
260 struct RunStruct *R = &P->R;
261 struct Psis *Psi = &Lat->Psi;
262 struct LatticeLevel *Lev0 = R->Lev0;
263 struct LatticeLevel *LevS = R->LevS;
264 struct Density *Dens0 = Lev0->Dens;
265 struct fft_plan_3d *plan = Lat->plan;
266 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
267 fftw_real *PsiCR = (fftw_real *)PsiC;
268 fftw_complex *work = Dens0->DensityCArray[Temp2Density];
269 fftw_real **HGcR = &Dens0->DensityArray[HGDensity]; // use HGDensity, 4x Gap..Density, TempDensity as a storage array
270 fftw_complex **HGcRC = (fftw_complex**)HGcR;
271 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
272 fftw_real **HGcR2 = (fftw_real**)HGcR2C;
273 MPI_Status status;
274 struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB;
275 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
276 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
277 int n[NDIM],n0,i0,iS, Index;
278 int N0;
279 int N[NDIM];
280 const int NUpx = LevS->NUp[0];
281 const int NUpy = LevS->NUp[1];
282 const int NUpz = LevS->NUp[2];
283 int e,i,j,k,l,m,u,p,g;
284 int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
285 double x,y,r;
286 double q[NDIM];
287 double *c,*s;
288 int index;
289 double spread = 0., old_spread=0., Spread=0.;
290 double WannierCentre[Num][NDIM];
291 double WannierSpread[Num];
292 double tmp,tmp2;
293 double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0;
294 double **cos_lookup,**sin_lookup;
295 gsl_matrix *G;
296 gsl_vector *h;
297 gsl_vector *eval;
298 gsl_matrix *evec;
299 gsl_eigen_symmv_workspace *w;
300 int ProcNum, ProcRank, set;
301 int it_steps; // iteration step counter
302 int *top, *bot;
303 int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from
304 int left, right; // left or right neighbour for process
305 double **Aloc[max_operators+1], **Uloc; // local columns for one step of A[k]
306 double *Around[max_operators+1], *Uround; // all local columns for one round of A[k]
307 double *Atotal[max_operators+1], *Utotal; // all local columns for one round of A[k]
308 double a_i, a_j;
309 int tagR0, tagR1, tagS0, tagS1;
310 int iloc, jloc;
311 double *s_all, *c_all;
312 int round, max_rounds;
313 int start;
314 int *rcounts, *rdispls;
315 int AllocNum = ceil((double)Num / 2. ) *2;
316 int totalflag, flag;
317 int *marker, **group;
318 int partner[Num];
319 int type = Occupied;
320 MPI_Comm *comm;
321 char spin[12], suffix[18];
322
323 N0 = LevS->Plan0.plan->local_nx;
324 N[0] = LevS->Plan0.plan->N[0];
325 N[1] = LevS->Plan0.plan->N[1];
326 N[2] = LevS->Plan0.plan->N[2];
327
328 comm = &P->Par.comm_ST;
329
330 fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi);
331 if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) { // all parallel
332 comm = &P->Par.comm_ST;
333 fprintf(stderr,"(%i) Jacobi is done parallely by all\n", P->Par.me);
334 } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first
335 if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
336 comm = &P->Par.comm_ST_Psi;
337 fprintf(stderr,"(%i) Jacobi is done parallely by Psi\n", P->Par.me);
338 } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
339 comm = &P->Par.comm_ST_PsiT;
340 fprintf(stderr,"(%i) Jacobi is done parallely by PsiT\n", P->Par.me);
341 }
342 } else {
343 if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
344 comm = &P->Par.comm_ST_PsiT;
345 fprintf(stderr,"(%i) Jacobi is done parallely by PsiT\n", P->Par.me);
346 } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
347 comm = &P->Par.comm_ST_Psi;
348 fprintf(stderr,"(%i) Jacobi is done parallely by Psi\n", P->Par.me);
349 }
350 }
351
352 MPI_Comm_size (*comm, &ProcNum);
353 MPI_Comm_rank (*comm, &ProcRank);
354
355 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me);
356
357 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me);
358
359 // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1))
360 gsl_matrix *A[max_operators+1]; // one extra for B matrix
361 for (u=0;u<=max_operators;u++)
362 A[u] = gsl_matrix_calloc (AllocNum,AllocNum); // allocate matrix
363
364 // create lookup table for sin/cos values
365 cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup");
366 sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup");
367 for (i=0;i<NDIM;i++) {
368 // allocate memory
369 cos_lookup[i] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[i], "ComputeMLWF: cos_lookup");
370 sin_lookup[i] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[i], "ComputeMLWF: sin_lookup");
371 // reset arrays
372 SetArrayToDouble0(cos_lookup[i],LevS->Plan0.plan->N[i]);
373 SetArrayToDouble0(sin_lookup[i],LevS->Plan0.plan->N[i]);
374 // create lookup values
375 for (j=0;j<LevS->Plan0.plan->N[i];j++) {
376 tmp = 2*PI/(double)LevS->Plan0.plan->N[i]*(double)j;
377 cos_lookup[i][j] = cos(tmp);
378 sin_lookup[i][j] = sin(tmp);
379 }
380 }
381 l=-1; // to access U matrix element (0..Num-1)
382 // fill the matrices
383 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
384 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
385 if (OnePsiA->PsiType == type) { // drop all but occupied ones
386 l++; // increase l if it is non-extra wave function
387 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
388 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
389 else
390 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
391
392 //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0);
393 if (LPsiDatA != NULL) {
394 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
395 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
396 for (n0=0;n0<N0;n0++)
397 for (n[1]=0;n[1]<N[1];n[1]++)
398 for (n[2]=0;n[2]<N[2];n[2]++) {
399 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
400 iS = n[2]+N[2]*(n[1]+N[1]*n0);
401 n[0] = n0 + LevS->Plan0.plan->start_nx;
402 for (k=0;k<max_operators;k+=2) {
403 e = k/2;
404 tmp = 2*PI/(double)(N[e])*(double)(n[e]);
405 tmp2 = PsiCR[i0] /LevS->MaxN;
406 // check lookup
407 if (!l) // perform check on first wave function only
408 if ((fabs(cos(tmp) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[e][n[e]]) > MYEPSILON)) {
409 Error(SomeError, "Lookup table does not match real value!");
410 }
411 HGcR[k][iS] = cos_lookup[e][n[e]] * tmp2; /* Matrix Vector Mult */
412 HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */
413 HGcR[k+1][iS] = sin_lookup[e][n[e]] * tmp2; /* Matrix Vector Mult */
414 HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
415 }
416 }
417 for (u=0;u<max_operators;u++) {
418 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work);
419 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work);
420 }
421 }
422 m = -1; // to access U matrix element (0..Num-1)
423 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
424 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
425 if (OnePsiB->PsiType == type) { // drop all but occupied ones
426 m++; // increase m if it is non-extra wave function
427 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
428 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
429 else
430 LOnePsiB = NULL;
431 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
432 RecvSource = OnePsiB->my_color_comm_ST_Psi;
433 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
434 LPsiDatB=LevS->LPsi->TempPsi;
435 } else { // .. otherwise send it to all other processes (Max_me... - 1)
436 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
437 if (p != OnePsiB->my_color_comm_ST_Psi)
438 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
439 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
440 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
441
442 for (u=0;u<max_operators;u++) {
443 a_ij = 0;
444 b_ij = 0;
445 if (LPsiDatA != NULL) { // calculate, reduce and send to all ...
446 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u);
447 g=0;
448 if (LevS->GArray[0].GSq == 0.0) {
449 Index = LevS->GArray[g].Index;
450 a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im);
451 b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im);
452 g++;
453 }
454 for (; g < LevS->MaxG; g++) {
455 Index = LevS->GArray[g].Index;
456 a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im);
457 b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im);
458 } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...)
459 // sum up elements from all coefficients sharing processes
460 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
461 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
462 a_ij = A_ij;
463 b_ij = B_ij;
464 // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!)
465 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
466 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
467 } else { // receive ...
468 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
469 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
470 }
471 // ... and store
472 //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij);
473 //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij);
474 gsl_matrix_set(A[u], l, m, A_ij);
475 gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m));
476 }
477 }
478 }
479 }
480 }
481 // reset extra entries
482 for (u=0;u<=max_operators;u++) {
483 for (i=Num;i<AllocNum;i++)
484 for (j=0;j<AllocNum;j++)
485 gsl_matrix_set(A[u], i,j, 0.);
486 for (i=Num;i<AllocNum;i++)
487 for (j=0;j<AllocNum;j++)
488 gsl_matrix_set(A[u], j,i, 0.);
489 }
490 // free lookups
491 for (i=0;i<NDIM;i++) {
492 Free(cos_lookup[i], "bla");
493 Free(sin_lookup[i], "bla");
494 }
495 Free(cos_lookup, "bla");
496 Free(sin_lookup, "bla");
497
498 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me);
499 // STEP 1: Initialize U = 1
500 gsl_matrix *U = gsl_matrix_alloc (AllocNum,AllocNum);
501 gsl_matrix_set_identity(U);
502
503 // init merry-go-round array
504 top = (int *) Malloc(sizeof(int)*AllocNum/2, "ComputeMLWF: top");
505 bot = (int *) Malloc(sizeof(int)*AllocNum/2, "ComputeMLWF: bot");
506 //debug(P,"init merry-go-round array");
507 for (i=0;i<AllocNum/2;i++) {
508 top[i] = 2*i;
509 bot[i] = 2*i+1;
510 }
511
512/*// print A matrices for debug
513 if (P->Par.me == 0)
514 for (u=0;u<max_operators+1;u++) {
515 fprintf(stderr, "A[%i] = \n",u);
516 for (i=0;i<Num;i++) {
517 for (j=0;j<Num;j++)
518 fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j));
519 fprintf(stderr, "\n");
520 }
521 }
522*/
523 if (Num != 1) {
524 // one- or multi-process case?
525 if (((AllocNum % 2) == 0) && (ProcNum != 1) && ((AllocNum / 2) % ProcNum == 0)) {
526 max_rounds = (AllocNum / 2)/ProcNum; // each process must perform multiple rotations per step of a set
527 //fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, ProcRank, ProcNum, max_rounds);
528 // allocate column vectors for interchange of columns
529 //debug(P,"allocate column vectors for interchange of columns");
530 c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c");
531 s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s");
532 c_all = (double *) Malloc(sizeof(double)*AllocNum/2, "ComputeMLWF: c_all");
533 s_all = (double *) Malloc(sizeof(double)*AllocNum/2, "ComputeMLWF: s_all");
534 rcounts = (int *) Malloc(sizeof(int)*ProcNum, "ComputeMLWF: rcounts");
535 rdispls = (int *) Malloc(sizeof(int)*ProcNum, "ComputeMLWF: rdispls");
536/* // print starting values of index generation tables top and bot
537 fprintf(stderr,"top\t");
538 for (k=0;k<AllocNum/2;k++)
539 fprintf(stderr,"%i\t", top[k]);
540 fprintf(stderr,"\n");
541 fprintf(stderr,"bot\t");
542 for (k=0;k<AllocNum/2;k++)
543 fprintf(stderr,"%i\t", bot[k]);
544 fprintf(stderr,"\n");*/
545 // establish communication partners
546 //debug(P,"establish communication partners");
547 if (ProcRank == 0) {
548 tagS0 = WannierALTag; // left p0 always remains left p0
549 } else {
550 tagS0 = ProcRank == ProcNum - 1 ? WannierARTag : WannierALTag; // left p_last becomes right p_last
551 }
552 tagS1 = ProcRank == 0 ? WannierALTag : WannierARTag; // right p0 always goes into left p1
553 tagR0 = WannierALTag; //
554 tagR1 = WannierARTag; // first process
555 if (ProcRank == 0) {
556 left = ProcNum-1;
557 right = 1;
558 Lsend = 0;
559 Rsend = 1;
560 Lrecv = 0;
561 Rrecv = 1;
562 } else if (ProcRank == ProcNum - 1) {
563 left = ProcRank - 1;
564 right = 0;
565 Lsend = ProcRank;
566 Rsend = ProcRank - 1;
567 Lrecv = ProcRank - 1;
568 Rrecv = ProcRank;
569 } else {
570 left = ProcRank - 1;
571 right = ProcRank + 1;
572 Lsend = ProcRank+1;
573 Rsend = ProcRank - 1;
574 Lrecv = ProcRank - 1;
575 Rrecv = ProcRank+1;
576 }
577 //fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv);
578 // allocate eigenvector stuff
579 //debug(P,"allocate eigenvector stuff");
580 G = gsl_matrix_calloc (2,2);
581 h = gsl_vector_alloc (2);
582 eval = gsl_vector_alloc (2);
583 evec = gsl_matrix_alloc (2,2);
584 w = gsl_eigen_symmv_alloc(2);
585 // initialise A_loc
586 //debug(P,"initialise A_loc");
587 for (k=0;k<max_operators+1;k++) {
588 //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]");
589 Around[k] = (double *) Malloc(sizeof(double)*AllocNum*2*max_rounds, "ComputeMLWF: Around[k]");
590 Atotal[k] = (double *) Malloc(sizeof(double)*AllocNum*AllocNum, "ComputeMLWF: Atotal[k]");
591 Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]");
592 //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds];
593
594 for (round=0;round<max_rounds;round++) {
595 Aloc[k][2*round] = &Around[k][AllocNum*(2*round)];
596 Aloc[k][2*round+1] = &Around[k][AllocNum*(2*round+1)];
597 for (l=0;l<AllocNum;l++) {
598 Aloc[k][2*round][l] = gsl_matrix_get(A[k],l,2*(ProcRank*max_rounds+round));
599 Aloc[k][2*round+1][l] = gsl_matrix_get(A[k],l,2*(ProcRank*max_rounds+round)+1);
600 //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]);
601 }
602 }
603 }
604 // initialise U_loc
605 //debug(P,"initialise U_loc");
606 //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc");
607 Uround = (double *) Malloc(sizeof(double)*AllocNum*2*max_rounds, "ComputeMLWF: Uround");
608 Utotal = (double *) Malloc(sizeof(double)*AllocNum*AllocNum, "ComputeMLWF: Utotal");
609 Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc");
610 //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds];
611 for (round=0;round<max_rounds;round++) {
612 Uloc[2*round] = &Uround[AllocNum*(2*round)];
613 Uloc[2*round+1] = &Uround[AllocNum*(2*round+1)];
614 for (l=0;l<AllocNum;l++) {
615 Uloc[2*round][l] = gsl_matrix_get(U,l,2*(ProcRank*max_rounds+round));
616 Uloc[2*round+1][l] = gsl_matrix_get(U,l,2*(ProcRank*max_rounds+round)+1);
617 //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]);
618 }
619 }
620 // now comes the iteration loop
621 //debug(P,"now comes the iteration loop");
622 it_steps = 0;
623 do {
624 it_steps++;
625 fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps);
626 for (set=0; set < AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
627 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
628 for (round = 0; round < max_rounds;round++) {
629 start = ProcRank * max_rounds + round;
630 // get indices
631 i = top[start] < bot[start] ? top[start] : bot[start]; // minimum of the two indices
632 iloc = top[start] < bot[start] ? 0 : 1;
633 j = top[start] > bot[start] ? top[start] : bot[start]; // maximum of the two indices: thus j > i
634 jloc = top[start] > bot[start] ? 0 : 1;
635 //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc);
636
637 // calculate rotation angle, i.e. c and s
638 //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j);
639 gsl_matrix_set_zero(G);
640 for (k=0;k<max_operators;k++) { // go through all operators ...
641 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
642 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
643 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
644 gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
645 gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
646
647 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
648 for (l=0;l<2;l++)
649 for (m=0;m<2;m++)
650 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
651 }
652 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
653 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
654 gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
655 index = gsl_vector_max_index (eval); // get biggest eigenvalue
656 x = gsl_matrix_get(evec, 0, index);
657 y = gsl_matrix_get(evec, 1, index) * x/fabs(x);
658 x = fabs(x); // ensure x>=0 so that rotation angles remain smaller Pi/4
659 //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
660 // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
661 r = sqrt(x*x + y*y);
662 c[round] = sqrt((x + r) / (2*r));
663 s[round] = y / sqrt(2*r*(x+r));
664 // [[c,s],[-s,c]]= V_small
665 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]);
666
667 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
668 // V_loc = V_loc * V_small
669 //debug(P,"apply rotation to local U");
670 for (l=0;l<AllocNum;l++) {
671 a_i = Uloc[2*round+iloc][l];
672 a_j = Uloc[2*round+jloc][l];
673 Uloc[2*round+iloc][l] = c[round] * a_i + s[round] * a_j;
674 Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j;
675 }
676 } // end of round
677 // circulate the rotation angles
678 //debug(P,"circulate the rotation angles");
679 MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *comm); // MPI_Allgather is waaaaay faster than ring circulation
680 MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *comm);
681 //m = start;
682 for (l=0;l<AllocNum/2;l++) { // for each process
683 // we have V_small from process k
684 //debug(P,"Apply V_small from other process");
685 i = top[l] < bot[l] ? top[l] : bot[l]; // minimum of the two indices
686 j = top[l] > bot[l] ? top[l] : bot[l]; // maximum of the two indices: thus j > i
687 iloc = top[l] < bot[l] ? 0 : 1;
688 jloc = top[l] > bot[l] ? 0 : 1;
689 for (m=0;m<max_rounds;m++) {
690 //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j);
691 // apply row rotation to each A[k]
692 for (k=0;k<max_operators+1;k++) {// one extra for B matrix !
693 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
694 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
695 a_i = Aloc[k][2*m+iloc][i];
696 a_j = Aloc[k][2*m+iloc][j];
697 Aloc[k][2*m+iloc][i] = c_all[l] * a_i + s_all[l] * a_j;
698 Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
699 a_i = Aloc[k][2*m+jloc][i];
700 a_j = Aloc[k][2*m+jloc][j];
701 Aloc[k][2*m+jloc][i] = c_all[l] * a_i + s_all[l] * a_j;
702 Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
703 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
704 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
705 }
706 }
707 }
708 // apply rotation to local operator matrices
709 // A_loc = A_loc * V_small
710 //debug(P,"apply rotation to local operator matrices A[k]");
711 for (m=0;m<max_rounds;m++) {
712 start = ProcRank * max_rounds + m;
713 iloc = top[start] < bot[start] ? 0 : 1;
714 jloc = top[start] > bot[start] ? 0 : 1;
715 for (k=0;k<max_operators+1;k++) {// one extra for B matrix !
716 for (l=0;l<AllocNum;l++) {
717 // Columns, i and j belong to this process only!
718 a_i = Aloc[k][2*m+iloc][l];
719 a_j = Aloc[k][2*m+jloc][l];
720 Aloc[k][2*m+iloc][l] = c[m] * a_i + s[m] * a_j;
721 Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j;
722 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]);
723 }
724 }
725 }
726 // Shuffling of these round's columns to prepare next rotation set
727 for (k=0;k<max_operators+1;k++) {// one extra for B matrix !
728 // extract columns from A
729 //debug(P,"extract columns from A");
730 shiftcolumns(*comm, Aloc[k], AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1);
731
732 }
733 // and also for V ...
734 //debug(P,"extract columns from U");
735 shiftcolumns(*comm, Uloc, AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1);
736
737
738 // and merry-go-round for the indices too
739 //debug(P,"and merry-go-round for the indices too");
740 music(top, bot, AllocNum/2);
741 }
742
743 //fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
744 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
745 old_spread = Spread;
746 spread = 0.;
747 for(k=0;k<max_operators;k++) { // go through all self-adjoint operators
748 for (i=0; i < 2*max_rounds; i++) { // go through all wave functions
749 spread += Aloc[k][i][i+ProcRank*2*max_rounds]*Aloc[k][i][i+ProcRank*2*max_rounds];
750 //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i);
751 }
752 }
753 MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *comm);
754 //Spread = spread;
755 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier);
756 else fprintf(stderr,"%2.9e\n",Spread);
757 // STEP 5: check change of variance
758 } while (fabs(old_spread-Spread) >= R->EpsWannier);
759 // end of iterative diagonalization loop: We have found our final orthogonal U!
760
761 for (l=0;l<ProcNum;l++)
762 rcounts[l] = AllocNum;
763 //debug(P,"allgather U");
764 for (round=0;round<2*max_rounds;round++) {
765 for (l=0;l<ProcNum;l++)
766 rdispls[l] = (l*2*max_rounds + round)*AllocNum;
767 MPI_Allgatherv(Uloc[round], AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *comm);
768 }
769 for (k=0;k<AllocNum;k++) {
770 for(l=0;l<AllocNum;l++) {
771 gsl_matrix_set(U,k,l, Utotal[l+k*AllocNum]);
772 }
773 }
774
775
776 // after one set, gather A[k] from all and calculate spread
777 for (l=0;l<ProcNum;l++)
778 rcounts[l] = AllocNum;
779 //debug(P,"gather A[k] for spread");
780 for (u=0;u<max_operators+1;u++) {// one extra for B matrix !
781 //debug(P,"A[k] all gather");
782 for (round=0;round<2*max_rounds;round++) {
783 for (l=0;l<ProcNum;l++)
784 rdispls[l] = (l*2*max_rounds + round)*AllocNum;
785 MPI_Allgatherv(Aloc[u][round], AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *comm);
786 }
787 for (k=0;k<AllocNum;k++) {
788 for(l=0;l<AllocNum;l++) {
789 gsl_matrix_set(A[u],k,l, Atotal[u][l+k*AllocNum]);
790 }
791 }
792 }
793
794 // free eigenvector stuff
795 gsl_vector_free(h);
796 gsl_matrix_free(G);
797 gsl_eigen_symmv_free(w);
798 gsl_vector_free(eval);
799 gsl_matrix_free(evec);
800 // Free column vectors
801 for (k=0;k<max_operators+1;k++) {
802 Free(Atotal[k], "bla");
803 Free(Around[k], "bla");
804 }
805 Free(Uround, "bla");
806 Free(Utotal, "bla");
807 Free(c_all, "bla");
808 Free(s_all, "bla");
809 Free(c, "bla");
810 Free(s, "bla");
811 Free(rcounts, "bla");
812 Free(rdispls, "bla");
813
814 } else {
815
816 c = (double *) Malloc(sizeof(double), "ComputeMLWF: c");
817 s = (double *) Malloc(sizeof(double), "ComputeMLWF: s");
818 G = gsl_matrix_calloc (2,2);
819 h = gsl_vector_alloc (2);
820 eval = gsl_vector_alloc (2);
821 evec = gsl_matrix_alloc (2,2);
822 w = gsl_eigen_symmv_alloc(2);
823 //debug(P,"now comes the iteration loop");
824 it_steps=0;
825 do {
826 it_steps++;
827 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me);
828 fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps);
829 for (set=0; set < AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
830 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
831 // STEP 3: for all index pairs 0<= i<j <AllocNum
832 for (ProcRank=0;ProcRank<AllocNum/2;ProcRank++) {
833 // get indices
834 i = top[ProcRank] < bot[ProcRank] ? top[ProcRank] : bot[ProcRank]; // minimum of the two indices
835 j = top[ProcRank] > bot[ProcRank] ? top[ProcRank] : bot[ProcRank]; // maximum of the two indices: thus j > i
836 //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j);
837 // STEP 3a: Calculate G
838 gsl_matrix_set_zero(G);
839
840 for (k=0;k<max_operators;k++) { // go through all operators ...
841 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
842 //fprintf(stderr,"(%i) k%i [a_ii - a_ij] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(A[k],i,i), gsl_matrix_get(A[k],j,j),gsl_matrix_get(A[k],i,i) - gsl_matrix_get(A[k],j,j));
843 //fprintf(stderr,"(%i) k%i [a_ij + a_jij] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(A[k],i,j), gsl_matrix_get(A[k],j,i),gsl_matrix_get(A[k],i,j) + gsl_matrix_get(A[k],j,i));
844 gsl_vector_set(h, 0, gsl_matrix_get(A[k],i,i) - gsl_matrix_get(A[k],j,j));
845 gsl_vector_set(h, 1, gsl_matrix_get(A[k],i,j) + gsl_matrix_get(A[k],j,i));
846 //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1));
847
848 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
849 for (l=0;l<2;l++)
850 for (m=0;m<2;m++)
851 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
852 }
853
854 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
855 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
856 gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
857
858 index = gsl_vector_max_index (eval); // get biggest eigenvalue
859 x = gsl_matrix_get(evec, 0, index);
860 y = gsl_matrix_get(evec, 1, index) * x/fabs(x);
861 //z = gsl_matrix_get(evec, 2, index) * x/fabs(x);
862 x = fabs(x); // ensure x>=0 so that rotation angles remain smaller Pi/4
863
864 //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
865 // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
866 r = sqrt(x*x + y*y); // + z*z);
867 c[0] = sqrt((x + r) / (2*r));
868 s[0] = y / sqrt(2*r*(x+r)); //, -z / sqrt(2*r*(x+r)));
869 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]);
870
871 //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j);
872 // STEP 3d: apply rotation R to rows and columns of A^{(k)}
873 for (k=0;k<max_operators+1;k++) {// one extra for B matrix !
874 for (l=0;l<AllocNum;l++) {
875 // Rows
876 a_i = gsl_matrix_get(A[k],i,l);
877 a_j = gsl_matrix_get(A[k],j,l);
878 gsl_matrix_set(A[k], i, l, c[0] * a_i + s[0] * a_j);
879 gsl_matrix_set(A[k], j, l, -s[0] * a_i + c[0] * a_j);
880 }
881 for (l=0;l<AllocNum;l++) {
882 // Columns
883 a_i = gsl_matrix_get(A[k],l,i);
884 a_j = gsl_matrix_get(A[k],l,j);
885 gsl_matrix_set(A[k], l, i, c[0] * a_i + s[0] * a_j);
886 gsl_matrix_set(A[k], l, j, -s[0] * a_i + c[0] * a_j);
887 }
888 }
889 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
890 // STEP 3e: apply U = R*U
891 for (l=0;l<AllocNum;l++) {
892 a_i = gsl_matrix_get(U,i,l);
893 a_j = gsl_matrix_get(U,j,l);
894 gsl_matrix_set(U, i, l, c[0] * a_i + s[0] * a_j);
895 gsl_matrix_set(U, j, l, -s[0] * a_i + c[0] * a_j);
896 }
897 }
898 // and merry-go-round for the indices too
899 //debug(P,"and merry-go-round for the indices too");
900 if (AllocNum > 2) music(top, bot, AllocNum/2);
901 }
902
903 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
904 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
905 old_spread = spread;
906 spread = 0;
907 for(k=0;k<max_operators;k++) { // go through all self-adjoint operators
908 for (i=0; i < AllocNum; i++) { // go through all wave functions
909 spread += pow(gsl_matrix_get(A[k],i,i),2);
910 }
911 }
912 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier);
913 else fprintf(stderr,"%2.9e\n",spread);
914 // STEP 5: check change of variance
915 } while (fabs(old_spread-spread) >= R->EpsWannier);
916 // end of iterative diagonalization loop: We have found our final orthogonal U!
917 gsl_vector_free(h);
918 gsl_matrix_free(G);
919 gsl_eigen_symmv_free(w);
920 gsl_vector_free(eval);
921 gsl_matrix_free(evec);
922 Free(c, "bla");
923 Free(s, "bla");
924 }
925
926 if(P->Call.out[ReadOut]) {// && P->Par.me == 0) {
927 //debug(P,"output total U");
928 fprintf(stderr,"(%i) U_tot = \n",P->Par.me);
929 for (k=0;k<Num;k++) {
930 for (l=0;l<Num;l++)
931 fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k));
932 fprintf(stderr,"\n");
933 }
934 }
935 }
936 Free(top, "bla");
937 Free(bot, "bla");
938
939 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Allocating buffer mem\n",P->Par.me);
940 // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle
941 Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here
942 fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer");
943 for (l=0;l<Num;l++) // allocate for each local wave function
944 coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l];
945
946 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me);
947 l=-1; // to access U matrix element (0..Num-1)
948 k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1)
949 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
950 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
951 if (OnePsiA->PsiType == type) { // drop all but occupied ones
952 l++; // increase l if it is occupied wave function
953 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local?
954 k++; // increase k only if it is a local, non-extra orbital wave function
955 LPsiDatA = (fftw_complex *) coeffs_buffer[k]; // new coeffs first go to copy buffer, old ones must not be overwritten yet
956 SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG); // zero buffer part
957 } else
958 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
959
960 m = -1; // to access U matrix element (0..Num-1)
961 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
962 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
963 if (OnePsiB->PsiType == type) { // drop all but occupied ones
964 m++; // increase m if it is occupied wave function
965 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
966 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
967 else
968 LOnePsiB = NULL;
969 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
970 RecvSource = OnePsiB->my_color_comm_ST_Psi;
971 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
972 LPsiDatB=LevS->LPsi->TempPsi;
973 } else { // .. otherwise send it to all other processes (Max_me... - 1)
974 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
975 if (p != OnePsiB->my_color_comm_ST_Psi)
976 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
977 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
978 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
979
980 if (LPsiDatA != NULL) {
981 double tmp = gsl_matrix_get(U,l,m);
982 g=0;
983 if (LevS->GArray[0].GSq == 0.0) {
984 LPsiDatA[g].re += LPsiDatB[g].re * tmp;
985 LPsiDatA[g].im += LPsiDatB[g].im * tmp;
986 g++;
987 }
988 for (; g < LevS->MaxG; g++) {
989 LPsiDatA[g].re += LPsiDatB[g].re * tmp;
990 LPsiDatA[g].im += LPsiDatB[g].im * tmp;
991 }
992 }
993 }
994 }
995 }
996 }
997 gsl_matrix_free(U);
998
999 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me);
1000 // now, as all wave functions are updated, swap the buffer
1001 l = -1;
1002 for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function
1003 if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {
1004 l++;
1005 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo);
1006 LPsiDatA = (fftw_complex *)coeffs_buffer[l];
1007 LPsiDatB = LevS->LPsi->LocalPsi[l];
1008 for (g=0;g<LevS->MaxG;g++) {
1009 LPsiDatB[g].re = LPsiDatA[g].re;
1010 LPsiDatB[g].im = LPsiDatA[g].im;
1011 }
1012 // recalculating non-local form factors which are coefficient dependent!
1013 CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo);
1014 }
1015 }
1016 // and free allocated buffer memory
1017 Free(coeffs_buffer, "bla");
1018
1019 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7\n",P->Par.me);
1020 // STEP 7: Compute Wannier centers, spread and printout
1021 // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital
1022 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me);
1023
1024 switch (Lat->Psi.PsiST) {
1025 case SpinDouble:
1026 strncpy(suffix,".spread.csv",18);
1027 strncat(spin,"SpinDouble",12);
1028 break;
1029 case SpinUp:
1030 strncpy(suffix,".spread_up.csv",18);
1031 strncat(spin,"SpinUp",12);
1032 break;
1033 case SpinDown:
1034 strncpy(suffix,".spread_down.csv",18);
1035 strncat(spin,"SpinDown",12);
1036 break;
1037 }
1038 if (P->Par.me_comm_ST == 0) {
1039 if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
1040 OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
1041 else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile)
1042 OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
1043 if (F->SpreadFile == NULL) {
1044 Error(SomeError,"ComputeMLWF: Error opening Wannier File!\n");
1045 } else {
1046 fprintf(F->SpreadFile,"#===== W A N N I E R C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin);
1047 fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n");
1048 }
1049 }
1050 old_spread = 0;
1051 spread = 0;
1052 i=-1;
1053 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
1054 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
1055 if (OnePsiA->PsiType == type) { // drop all but occupied ones
1056 i++; // increase l if it is occupied wave function
1057 //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i);
1058
1059 // calculate Wannier Centre
1060 for (j=0;j<NDIM;j++) {
1061 WannierCentre[i][j] = Lat->RealBasisQ[j]/(2*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i))));
1062 if (WannierCentre[i][j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ
1063 WannierCentre[i][j] = Lat->RealBasisQ[j] + WannierCentre[i][j];
1064 }
1065
1066 // store orbital spread and centre in file
1067 tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
1068 - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
1069 - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);
1070 WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp;
1071 //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]);
1072 //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n",
1073 //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]);
1074 //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n",
1075 //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]);
1076
1077 // gather all spreads
1078 old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U)
1079 for (k=0;k<max_operators;k++)
1080 spread += pow(gsl_matrix_get(A[k],i,i),2);
1081
1082 // store calculated Wannier centre
1083 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // is this local?
1084 for (j=0;j<NDIM;j++)
1085 Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
1086 }
1087 }
1088
1089 // join Wannier orbital to groups with common centres under certain conditions
1090 switch (R->CommonWannier) {
1091 case 4:
1092 debug(P,"Shifting each Wannier centers to cell center");
1093 for (i=0; i < Num; i++) { // go through all occupied wave functions
1094 for (j=0;j<NDIM;j++)
1095 WannierCentre[i][j] = Lat->RealBasisQ[j]/2.;
1096 }
1097 break;
1098 case 3:
1099 debug(P,"Shifting Wannier centers individually to nearest grid point");
1100 for (i=0;i < Num; i++) { // go through all wave functions
1101 for (j=0;j<NDIM;j++) {
1102 tmp = WannierCentre[i][j]/Lat->RealBasisQ[j]*(double)N[j];
1103 //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp));
1104 if (fabs((double)floor(tmp) - tmp) < fabs((double)ceil(tmp) - tmp))
1105 WannierCentre[i][j] = (double)floor(tmp)/(double)N[j]*Lat->RealBasisQ[j];
1106 else
1107 WannierCentre[i][j] = (double)ceil(tmp)/(double)N[j]*Lat->RealBasisQ[j];
1108 }
1109 }
1110 break;
1111 case 2:
1112 debug(P,"Combining individual orbitals according to spread.");
1113 //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me);
1114 //debug(P,"finding partners");
1115 marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker");
1116 group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group");
1117 for (l=0;l<Num;l++) {
1118 group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]"); // each must group must have one more as end marker
1119 for (k=0;k<=Num;k++)
1120 group[l][k] = -1; // reset partner group
1121 }
1122 for (k=0;k<Num;k++)
1123 partner[k] = 0;
1124 //debug(P,"mem allocated");
1125 // go for each orbital through every other, check distance against the sum of both spreads
1126 // if smaller add to group of this orbital
1127 for (l=0;l<Num;l++) {
1128 j=0; // index for partner group
1129 for (k=0;k<Num;k++) { // check this against l
1130 Spread = 0.;
1131 for (i=0;i<NDIM;i++) {
1132 //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]);
1133 Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]);
1134 }
1135 Spread = sqrt(Spread); // distance in Spread
1136 //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]);
1137 if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread
1138 group[l][j++] = k; // add k to group of l
1139 partner[l]++;
1140 //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l);
1141 }
1142 }
1143 }
1144
1145 // consistency, for each orbital check if this orbital is also in the group of each referred orbital
1146 //debug(P,"checking consistency");
1147 totalflag = 1;
1148 for (l=0;l<Num;l++) // checking l's group
1149 for (k=0;k<Num;k++) { // k is partner index
1150 if (group[l][k] != -1) { // if current index k is a partner
1151 flag = 0;
1152 for(j=0;j<Num;j++) { // go through each entry in l partner's partner group if l exists
1153 if ((group[ group[l][k] ][j] == l))
1154 flag = 1;
1155 }
1156 //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]);
1157 if (totalflag == 1) totalflag = flag;
1158 }
1159 }
1160 // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres
1161 //debug(P,"weight and calculate new centers for partner groups");
1162 for (l=0;l<=Num;l++)
1163 marker[l] = 1;
1164 if (totalflag) {
1165 for (l=0;l<Num;l++) { // go through each orbital
1166 if (marker[l] != 0) { // if it hasn't been reweighted
1167 marker[l] = 0;
1168 for (i=0;i<NDIM;i++)
1169 q[i] = 0.;
1170 j = 0;
1171 while (group[l][j] != -1) {
1172 marker[group[l][j]] = 0;
1173 for (i=0;i<NDIM;i++) {
1174 //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]);
1175 q[i] += WannierCentre[ group[l][j] ][i];
1176 }
1177 j++;
1178 }
1179 //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j);
1180 for (i=0;i<NDIM;i++) {// weight by number of elements in partner group
1181 q[i] /= (double)(j);
1182 }
1183
1184 // put WannierCentre into own and all partners'
1185 for (i=0;i<NDIM;i++)
1186 WannierCentre[l][i] = q[i];
1187 j = 0;
1188 while (group[l][j] != -1) {
1189 for (i=0;i<NDIM;i++)
1190 WannierCentre[group[l][j]][i] = q[i];
1191 j++;
1192 }
1193 }
1194 }
1195 }
1196 if (P->Call.out[StepLeaderOut]) {
1197 fprintf(stderr,"Summary:\n");
1198 fprintf(stderr,"========\n");
1199 for (i=0;i<Num;i++)
1200 fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]);
1201 }
1202 //debug(P,"done");
1203
1204 Free(marker, "bla");
1205 for (l=0;l<Num;l++)
1206 Free(group[l], "bla");
1207 Free(group, "bla");
1208 break;
1209 case 1:
1210 debug(P,"Individual orbitals are changed to center of all.");
1211 for (i=0;i<NDIM;i++) // zero center of weight
1212 q[i] = 0.;
1213 for (k=0;k<Num;k++)
1214 for (i=0;i<NDIM;i++) { // sum up all orbitals each component
1215 q[i] += WannierCentre[k][i];
1216 }
1217 for (i=0;i<NDIM;i++) // divide by number
1218 q[i] /= Num;
1219 for (k=0;k<Num;k++)
1220 for (i=0;i<NDIM;i++) { // put into this function's array
1221 WannierCentre[k][i] = q[i];
1222 }
1223 break;
1224 case 0:
1225 default:
1226 break;
1227 }
1228
1229 // put (new) WannierCentres into local ones and into file
1230 i=-1;
1231 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
1232 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
1233 if (OnePsiA->PsiType == type) { // drop all but occupied ones
1234 i++; // increase l if it is occupied wave function
1235 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
1236 for (j=0;j<NDIM;j++)
1237 Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
1238 }
1239 if (P->Par.me_comm_ST == 0)
1240 fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]);
1241 }
1242 }
1243 if (P->Par.me_comm_ST == 0) {
1244 fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n");
1245 fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread);
1246 }
1247 fflush(F->SpreadFile);
1248
1249 // and the spread was calculated in the loop above
1250/*
1251 i=-1;
1252 for (l=0;l<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;l++)
1253 if (Psi->AllPsiStatus[l].PsiType == type) {
1254 i++;
1255 spread = CalculateSpread(P,l);
1256 tmp = gsl_matrix_get(A[max_operators],i,i) - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
1257 - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
1258 - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);
1259 if(P->Call.out[ValueOut]) fprintf(stderr, "(%i) Check of spread of %ith wave function: %lg against %lg\n",P->Par.me, i, Psi->AddData[i].WannierSpread, tmp);
1260 }*/
1261 // free all remaining memory
1262 for (k=0;k<max_operators+1;k++)
1263 gsl_matrix_free(A[k]);
1264}
1265
1266/** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre.
1267 * \param *P Problem at hand
1268 * \return 1 - success, 0 - failure
1269 */
1270int ParseWannierFile(struct Problem *P)
1271{
1272 struct Lattice *Lat = &P->Lat;
1273 struct RunStruct *R = &P->R;
1274 struct Psis *Psi = &Lat->Psi;
1275 struct OnePsiElement *OnePsiA;
1276 int i,l,j, msglen;
1277 FILE *SpreadFile;
1278 char tagname[255];
1279 char suffix[18];
1280 double WannierCentre[NDIM+1]; // combined centre and spread
1281 MPI_Status status;
1282 enum PsiTypeTag type = Occupied;
1283 int signal = 0; // 1 - ok, 0 - error
1284
1285 switch (Lat->Psi.PsiST) {
1286 case SpinDouble:
1287 strncpy(suffix,".spread.csv",18);
1288 break;
1289 case SpinUp:
1290 strncpy(suffix,".spread_up.csv",18);
1291 break;
1292 case SpinDown:
1293 strncpy(suffix,".spread_down.csv",18);
1294 break;
1295 }
1296
1297 if (P->Par.me_comm_ST == 0) {
1298 if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists
1299 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1300 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1301 return 0;
1302 //Error(SomeError,"ParseWannierFile: Opening failed\n");
1303 }
1304 signal = 1;
1305 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1306 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1307 } else {
1308 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1309 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1310 if (signal == 0)
1311 return 0;
1312 }
1313 i=-1;
1314 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
1315 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
1316 if (OnePsiA->PsiType == type) { // drop all but occupied ones
1317 i++; // increase l if it is occupied wave function
1318 if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file
1319 sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo);
1320 signal = 0;
1321 if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) {
1322 //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed");
1323 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1324 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1325 return 0;
1326 }
1327 if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) {
1328 //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed");
1329 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1330 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1331 return 0;
1332 }
1333 signal = 1;
1334 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1335 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1336 } else {
1337 if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1338 Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1339 if (signal == 0)
1340 return 0;
1341 }
1342 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local?
1343 if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em
1344 if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
1345 Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed");
1346 //return 0;
1347 MPI_Get_count(&status, MPI_DOUBLE, &msglen);
1348 if (msglen != NDIM+1)
1349 Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count");
1350 //return 0;
1351 }
1352 if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader
1353 Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed");
1354 //return 0;
1355 // and store 'em (for all who have this Psi local)
1356 fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]);
1357 for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j];
1358 Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM];
1359 if (P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
1360 } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group
1361 if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS)
1362 Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed");
1363 //return 0;
1364 }
1365 }
1366 }
1367 if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0))
1368 fclose(SpreadFile);
1369 fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me);
1370 return 1;
1371}
1372
1373/** Calculates the spread of orbital \a i.
1374 * Stored in OnePsiElementAddData#WannierSpread.
1375 * \param *P Problem at hand
1376 * \param i i-th wave function (note "extra" ones are not counted!)
1377 * \return spread \f$\sigma^2_{A^{(k)}}\f$
1378 */
1379double CalculateSpread(struct Problem *P, int i) {
1380 struct Lattice *Lat = &P->Lat;
1381 struct RunStruct *R = &P->R;
1382 struct Psis *Psi = &Lat->Psi;
1383 struct LatticeLevel *Lev0 = R->Lev0;
1384 struct LatticeLevel *LevS = R->LevS;
1385 struct Density *Dens0 = Lev0->Dens;
1386 struct fft_plan_3d *plan = Lat->plan;
1387 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
1388 fftw_real *PsiCR = (fftw_real *)PsiC;
1389 fftw_complex *work = Dens0->DensityCArray[Temp2Density];
1390 fftw_real **HGcR = &Dens0->DensityArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as a storage array
1391 fftw_complex **HGcRC = (fftw_complex**)HGcR;
1392 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
1393 fftw_real **HGcR2 = (fftw_real**)HGcR2C;
1394 MPI_Status status;
1395 struct OnePsiElement *OnePsiA, *LOnePsiA;
1396 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
1397 fftw_complex *LPsiDatA=NULL;
1398 int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index;
1399 N0 = LevS->Plan0.plan->local_nx;
1400 N = LevS->Plan0.plan->N;
1401 const int NUpx = LevS->NUp[0];
1402 const int NUpy = LevS->NUp[1];
1403 const int NUpz = LevS->NUp[2];
1404 double a_ij, b_ij, A_ij, B_ij;
1405 double tmp, tmp2, spread = 0;
1406 double **cos_lookup, **sin_lookup;
1407
1408 b_ij = 0;
1409
1410 // create lookup table for sin/cos values
1411 cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup");
1412 sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup");
1413 for (k=0;k<NDIM;k++) {
1414 // allocate memory
1415 cos_lookup[k] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[k], "ComputeMLWF: cos_lookup");
1416 sin_lookup[k] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[k], "ComputeMLWF: sin_lookup");
1417 // reset arrays
1418 SetArrayToDouble0(cos_lookup[k],LevS->Plan0.plan->N[k]);
1419 SetArrayToDouble0(sin_lookup[k],LevS->Plan0.plan->N[k]);
1420 // create lookup values
1421 for (g=0;g<LevS->Plan0.plan->N[k];g++) {
1422 tmp = 2*PI/(double)LevS->Plan0.plan->N[k]*(double)g;
1423 cos_lookup[k][g] = cos(tmp);
1424 sin_lookup[k][g] = sin(tmp);
1425 }
1426 }
1427 // fill matrices
1428 OnePsiA = &Psi->AllPsiStatus[i]; // grab the desired OnePsiA
1429 if (OnePsiA->PsiType != Extra) { // drop if extra one
1430 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
1431 LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo];
1432 else
1433 LOnePsiA = NULL;
1434 if (LOnePsiA == NULL) { // if it's not local ... receive it from respective process into TempPsi
1435 RecvSource = OnePsiA->my_color_comm_ST_Psi;
1436 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status );
1437 LPsiDatA=LevS->LPsi->TempPsi;
1438 } else { // .. otherwise send it to all other processes (Max_me... - 1)
1439 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
1440 if (p != OnePsiA->my_color_comm_ST_Psi)
1441 MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT);
1442 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
1443 } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received
1444
1445 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
1446 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
1447 for (n0=0;n0<N0;n0++)
1448 for (n[1]=0;n[1]<N[1];n[1]++)
1449 for (n[2]=0;n[2]<N[2];n[2]++) {
1450 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
1451 iS = n[2]+N[2]*(n[1]+N[1]*n0);
1452 n[0] = n0 + LevS->Plan0.plan->start_nx;
1453 for (k=0;k<max_operators;k+=2) {
1454 tmp = 2*PI/(double)(N[k/2])*(double)(n[k/2]);
1455 tmp2 = PsiCR[i0] /LevS->MaxN;
1456 // check lookup
1457 if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) {
1458 fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]);
1459 Error(SomeError, "Lookup table does not match real value!");
1460 }
1461// HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
1462// HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */
1463// HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
1464// HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
1465 HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */
1466 HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */
1467 HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */
1468 HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */
1469 }
1470 }
1471 for (k=0;k<max_operators;k++) {
1472 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work);
1473 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work);
1474 }
1475
1476
1477 for (k=0;k<max_operators;k++) {
1478 a_ij = 0;
1479 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k);
1480 // sum directly in a_ij and b_ij the two desired terms
1481 g=0;
1482 if (LevS->GArray[0].GSq == 0.0) {
1483 Index = LevS->GArray[g].Index;
1484 a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im);
1485 b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im);
1486 g++;
1487 }
1488 for (; g < LevS->MaxG; g++) {
1489 Index = LevS->GArray[g].Index;
1490 a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im);
1491 b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im);
1492 } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...)
1493 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
1494 spread += pow(A_ij,2);
1495 }
1496 }
1497 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
1498
1499 // store spread in OnePsiElementAdd
1500 Psi->AddData[i].WannierSpread = B_ij - spread;
1501 // free lookups
1502 for (k=0;k<NDIM;k++) {
1503 Free(cos_lookup[k], "bla");
1504 Free(sin_lookup[k], "bla");
1505 }
1506 Free(cos_lookup, "bla");
1507 Free(sin_lookup, "bla");
1508
1509 return (B_ij - spread);
1510}
Note: See TracBrowser for help on using the repository browser.