1 | /** \file wannier.c
|
---|
2 | * Maximally Localized Wannier Functions.
|
---|
3 | *
|
---|
4 | * Contains the on function that minimises the spread of all orbitals in one rush in a parallel
|
---|
5 | * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to
|
---|
6 | * calculate the spread of a specific orbital, which may be useful in checking on the change of
|
---|
7 | * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex.
|
---|
8 | *
|
---|
9 | Project: ParallelCarParrinello
|
---|
10 | \author Frederik Heber
|
---|
11 | \date 2006
|
---|
12 |
|
---|
13 | File: wannier.c
|
---|
14 | $Id: wannier.c,v 1.7 2007-10-12 15:50:38 heber Exp $
|
---|
15 | */
|
---|
16 |
|
---|
17 | #include <math.h>
|
---|
18 | #include <gsl/gsl_math.h>
|
---|
19 | #include <gsl/gsl_eigen.h>
|
---|
20 | #include <gsl/gsl_matrix.h>
|
---|
21 | #include <gsl/gsl_vector.h>
|
---|
22 | #include <gsl/gsl_complex.h>
|
---|
23 | #include <gsl/gsl_complex_math.h>
|
---|
24 | #include <gsl/gsl_sort_vector.h>
|
---|
25 | #include <gsl/gsl_heapsort.h>
|
---|
26 | #include <gsl/gsl_blas.h>
|
---|
27 | #include <string.h>
|
---|
28 |
|
---|
29 | #include "data.h"
|
---|
30 | #include "density.h"
|
---|
31 | #include "errors.h"
|
---|
32 | #include "gramsch.h"
|
---|
33 | #include "helpers.h"
|
---|
34 | #include "init.h"
|
---|
35 | #include "myfft.h"
|
---|
36 | #include "mymath.h"
|
---|
37 | #include "output.h"
|
---|
38 | #include "perturbed.h"
|
---|
39 | #include "wannier.h"
|
---|
40 |
|
---|
41 |
|
---|
42 | #define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread
|
---|
43 | #define type Occupied
|
---|
44 |
|
---|
45 |
|
---|
46 | /** Converts type fftw_complex to gsl_complex.
|
---|
47 | * \param a complex number
|
---|
48 | * \return b complex number
|
---|
49 | */
|
---|
50 | gsl_complex convertComplex (fftw_complex a) {
|
---|
51 | return gsl_complex_rect(c_re(a),c_im(a));
|
---|
52 | }
|
---|
53 |
|
---|
54 | /** "merry go round" implementation for parallel index ordering.
|
---|
55 | * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is
|
---|
56 | * performed which generates once each possible pairing.
|
---|
57 | * \param *top index array 1
|
---|
58 | * \param *bot index array 2
|
---|
59 | * \param m N/2, where N is the matrix row/column dimension
|
---|
60 | * \note taken from [Golub, Matrix computations, 1989, p451]
|
---|
61 | */
|
---|
62 | void MerryGoRoundIndices(int *top, int *bot, int m)
|
---|
63 | {
|
---|
64 | int *old_top, *old_bot;
|
---|
65 | int k;
|
---|
66 | old_top = (int *) Malloc(sizeof(int)*m, "music: old_top");
|
---|
67 | old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot");
|
---|
68 | /* fprintf(stderr,"oldtop\t");
|
---|
69 | for (k=0;k<m;k++)
|
---|
70 | fprintf(stderr,"%i\t", top[k]);
|
---|
71 | fprintf(stderr,"\n");
|
---|
72 | fprintf(stderr,"oldbot\t");
|
---|
73 | for (k=0;k<m;k++)
|
---|
74 | fprintf(stderr,"%i\t", bot[k]);
|
---|
75 | fprintf(stderr,"\n");*/
|
---|
76 | // first copy arrays
|
---|
77 | for (k=0;k<m;k++) {
|
---|
78 | old_top[k] = top[k];
|
---|
79 | old_bot[k] = bot[k];
|
---|
80 | }
|
---|
81 | // then let the music play
|
---|
82 | for (k=0;k<m;k++) {
|
---|
83 | if (k==1)
|
---|
84 | top[k] = old_bot[0];
|
---|
85 | else if (k > 1)
|
---|
86 | top[k] = old_top[k-1];
|
---|
87 | if (k==m-1)
|
---|
88 | bot[k] = old_top[k];
|
---|
89 | else
|
---|
90 | bot[k] = old_bot[k+1];
|
---|
91 | }
|
---|
92 | /* fprintf(stderr,"top\t");
|
---|
93 | for (k=0;k<m;k++)
|
---|
94 | fprintf(stderr,"%i\t", top[k]);
|
---|
95 | fprintf(stderr,"\n");
|
---|
96 | fprintf(stderr,"bot\t");
|
---|
97 | for (k=0;k<m;k++)
|
---|
98 | fprintf(stderr,"%i\t", bot[k]);
|
---|
99 | fprintf(stderr,"\n");*/
|
---|
100 | // and finito
|
---|
101 | Free(old_top, "MerryGoRoundIndices: old_top");
|
---|
102 | Free(old_bot, "MerryGoRoundIndices: old_bot");
|
---|
103 | }
|
---|
104 |
|
---|
105 | /** merry-go-round for matrix columns.
|
---|
106 | * The trick here is that we must be aware of multiple rotations per process, thus only some of the
|
---|
107 | * whole lot of local columns get sent/received, most of them are just shifted via exchanging the various
|
---|
108 | * pointers to the matrix columns within the local array.
|
---|
109 | * \param comm communicator for circulation
|
---|
110 | * \param *Aloc local array of columns
|
---|
111 | * \param Num entries per column
|
---|
112 | * \param max_rounds number of column pairs in \a *Around
|
---|
113 | * \param k offset for tag
|
---|
114 | * \param tagS0 MPI tag for sending left column
|
---|
115 | * \param tagS1 MPI tag for sending right column
|
---|
116 | * \param tagR0 MPI tag for receiving left column
|
---|
117 | * \param tagR1 MPI tag for receiving right column
|
---|
118 | */
|
---|
119 | void MerryGoRoundColumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) {
|
---|
120 | //double *A_locS1, *A_locS2; // local columns of A[k]
|
---|
121 | //double *A_locR1, *A_locR2; // local columns of A[k]
|
---|
122 | MPI_Request requestS0, requestS1, requestR0, requestR1;
|
---|
123 | MPI_Status status;
|
---|
124 | int ProcRank, ProcNum;
|
---|
125 | int l;
|
---|
126 | MPI_Comm_size (comm, &ProcNum);
|
---|
127 | MPI_Comm_rank (comm, &ProcRank);
|
---|
128 | double *Abuffer1, *Abuffer2; // mark the columns that are circulated
|
---|
129 |
|
---|
130 | //fprintf(stderr,"shifting...");
|
---|
131 | if (ProcRank == 0) {
|
---|
132 | if (max_rounds > 1) {
|
---|
133 | // get last left column
|
---|
134 | Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
|
---|
135 | MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
|
---|
136 | } else {
|
---|
137 | // get right column
|
---|
138 | Abuffer1 = Aloc[1]; // note down the free column
|
---|
139 | MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, tagS1+2*k, comm, &requestS0);
|
---|
140 | }
|
---|
141 |
|
---|
142 | //fprintf(stderr,"...left columns...");
|
---|
143 | for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right
|
---|
144 | Aloc[l] = Aloc[l-2];
|
---|
145 |
|
---|
146 | if (max_rounds > 1) {
|
---|
147 | //fprintf(stderr,"...first right...");
|
---|
148 | Aloc[2] = Aloc[1]; // get first right column
|
---|
149 | }
|
---|
150 |
|
---|
151 | //fprintf(stderr,"...right columns...");
|
---|
152 | for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
|
---|
153 | Aloc[l] = Aloc[l+2];
|
---|
154 |
|
---|
155 | //fprintf(stderr,"...last right...");
|
---|
156 | Aloc[(2*max_rounds-1)] = Abuffer1;
|
---|
157 | MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
|
---|
158 |
|
---|
159 | } else if (ProcRank == ProcNum-1) {
|
---|
160 | //fprintf(stderr,"...first right...");
|
---|
161 | // get first right column
|
---|
162 | Abuffer2 = Aloc[1]; // note down the free column
|
---|
163 | MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
|
---|
164 |
|
---|
165 | //fprintf(stderr,"...right columns...");
|
---|
166 | for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
|
---|
167 | Aloc[(l)] = Aloc[(l+2)];
|
---|
168 |
|
---|
169 | //fprintf(stderr,"...last right...");
|
---|
170 | Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column
|
---|
171 |
|
---|
172 | //fprintf(stderr,"...left columns...");
|
---|
173 | for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
|
---|
174 | Aloc[(l)] = Aloc[(l-2)];
|
---|
175 |
|
---|
176 | //fprintf(stderr,"...first left...");
|
---|
177 | // if (max_rounds > 1)
|
---|
178 | Aloc[0] = Abuffer2; // get first left column
|
---|
179 | MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
|
---|
180 |
|
---|
181 | } else {
|
---|
182 | // get last left column
|
---|
183 | MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
|
---|
184 | Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
|
---|
185 |
|
---|
186 | //fprintf(stderr,"...first right...");
|
---|
187 | // get first right column
|
---|
188 | MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
|
---|
189 | Abuffer2 = Aloc[1]; // note down the free column
|
---|
190 |
|
---|
191 | //fprintf(stderr,"...left columns...");
|
---|
192 | for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
|
---|
193 | Aloc[(l)] = Aloc[(l-2)];
|
---|
194 |
|
---|
195 | //fprintf(stderr,"...right columns...");
|
---|
196 | for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
|
---|
197 | Aloc[(l)] = Aloc[(l+2)];
|
---|
198 |
|
---|
199 | //fprintf(stderr,"...first left...");
|
---|
200 | Aloc[0] = Abuffer1; // get first left column
|
---|
201 | MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
|
---|
202 |
|
---|
203 | //fprintf(stderr,"...last right...");
|
---|
204 | Aloc[(2*max_rounds-1)] = Abuffer2;
|
---|
205 | MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
|
---|
206 | }
|
---|
207 |
|
---|
208 | //fprintf(stderr,"...waiting...");
|
---|
209 | if (ProcRank != ProcNum-1)
|
---|
210 | MPI_Wait(&requestS0, &status);
|
---|
211 | if (ProcRank != 0) // first left column
|
---|
212 | MPI_Wait(&requestR0, &status);
|
---|
213 | if (ProcRank != 0)
|
---|
214 | MPI_Wait(&requestS1, &status);
|
---|
215 | if (ProcRank != ProcNum-1)
|
---|
216 | MPI_Wait(&requestR1, &status);
|
---|
217 | //fprintf(stderr,"...done\n");
|
---|
218 | }
|
---|
219 |
|
---|
220 | /** By testing of greatest common divisor of the matrix rows (\a AllocNum) finds suitable parallel cpu group.
|
---|
221 | * \param *P Problem at hand
|
---|
222 | * \param AllocNum number of rows in matrix
|
---|
223 | * \return address of MPI communicator
|
---|
224 | */
|
---|
225 | #ifdef HAVE_INLINE
|
---|
226 | inline MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
|
---|
227 | #else
|
---|
228 | MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
|
---|
229 | #endif
|
---|
230 | {
|
---|
231 | MPI_Comm *comm = &P->Par.comm_ST;
|
---|
232 |
|
---|
233 | //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi);
|
---|
234 | //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Jacobi diagonalization is done parallely by ", P->Par.me);
|
---|
235 | if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) { // all parallel
|
---|
236 | comm = &P->Par.comm_ST;
|
---|
237 | //if (P->Call.out[ReadOut]) fprintf(stderr,"all\n");
|
---|
238 | } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first
|
---|
239 | if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
|
---|
240 | comm = &P->Par.comm_ST_Psi;
|
---|
241 | //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
|
---|
242 | } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
|
---|
243 | comm = &P->Par.comm_ST_PsiT;
|
---|
244 | //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
|
---|
245 | }
|
---|
246 | } else {
|
---|
247 | if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
|
---|
248 | comm = &P->Par.comm_ST_PsiT;
|
---|
249 | //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
|
---|
250 | } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
|
---|
251 | comm = &P->Par.comm_ST_Psi;
|
---|
252 | //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
|
---|
253 | }
|
---|
254 | }
|
---|
255 | return comm;
|
---|
256 | }
|
---|
257 |
|
---|
258 | /** Allocates and fills Lookup table for sin/cos values at each grid node.
|
---|
259 | * \param ***cos_table pointer to two-dimensional lookup table for cosine values
|
---|
260 | * \param ***sin_table pointer to two-dimensional lookup table for sine values
|
---|
261 | * \param *N array with number of nodes per \a NDIM axis
|
---|
262 | */
|
---|
263 | void CreateSinCosLookupTable(double ***cos_table, double ***sin_table, int *N)
|
---|
264 | {
|
---|
265 | int i, j;
|
---|
266 | double argument;
|
---|
267 | double **cos_lookup, **sin_lookup;
|
---|
268 |
|
---|
269 | // create lookup table for sin/cos values
|
---|
270 | cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup");
|
---|
271 | sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup");
|
---|
272 | for (i=0;i<NDIM;i++) {
|
---|
273 | // allocate memory
|
---|
274 | cos_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: cos_lookup");
|
---|
275 | sin_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: sin_lookup");
|
---|
276 |
|
---|
277 | // reset arrays
|
---|
278 | SetArrayToDouble0(cos_lookup[i],N[i]);
|
---|
279 | SetArrayToDouble0(sin_lookup[i],N[i]);
|
---|
280 |
|
---|
281 | // create lookup values
|
---|
282 | for (j=0;j<N[i];j++) {
|
---|
283 | argument = 2*PI/(double)N[i]*(double)j;
|
---|
284 | cos_lookup[i][j] = cos(argument);
|
---|
285 | sin_lookup[i][j] = sin(argument);
|
---|
286 | }
|
---|
287 | }
|
---|
288 | *cos_table = cos_lookup;
|
---|
289 | *sin_table = sin_lookup;
|
---|
290 | }
|
---|
291 |
|
---|
292 | /** Frees memory allocated during CreateSinCosLookupTable().
|
---|
293 | * \param ***cos_lookup pointer to two-dimensional lookup table for cosine values
|
---|
294 | * \param ***sin_lookup pointer to two-dimensional lookup table for sine values
|
---|
295 | */
|
---|
296 | void FreeSinCosLookupTable(double **cos_lookup, double **sin_lookup)
|
---|
297 | {
|
---|
298 | int i;
|
---|
299 | // free lookups
|
---|
300 | for (i=0;i<NDIM;i++) {
|
---|
301 | Free(cos_lookup[i], "FreeSinCosLookupTable: cos_lookup[i]");
|
---|
302 | Free(sin_lookup[i], "FreeSinCosLookupTable: sin_lookup[i]");
|
---|
303 | }
|
---|
304 | Free(cos_lookup, "FreeSinCosLookupTable: cos_lookup");
|
---|
305 | Free(sin_lookup, "FreeSinCosLookupTable: sin_lookup");
|
---|
306 | }
|
---|
307 |
|
---|
308 | /** Fills the entries of the six variance matrices.
|
---|
309 | * These matrices are parallely diagonalized during Wannier Localization. They are calculated from the
|
---|
310 | * wave function and by diagonalization one obtains the unitary transformation with which the Psis are
|
---|
311 | * treated afterwards.
|
---|
312 | * \param *P Problem at hand
|
---|
313 | * \param AllocNum number of rows/columns
|
---|
314 | * \param **A pointer to variance matrices
|
---|
315 | * \sa ComputeMLWF() - master function.
|
---|
316 | */
|
---|
317 | void FillHigherOrderRealMomentsMatrices(struct Problem *P, int AllocNum, gsl_matrix **A)
|
---|
318 | {
|
---|
319 | struct Lattice *Lat = &P->Lat;
|
---|
320 | struct RunStruct *R = &P->R;
|
---|
321 | struct Psis *Psi = &Lat->Psi;
|
---|
322 | struct LatticeLevel *Lev0 = R->Lev0;
|
---|
323 | struct LatticeLevel *LevS = R->LevS;
|
---|
324 | struct Density *Dens0 = Lev0->Dens;
|
---|
325 | struct OnePsiElement *OnePsiA, *OnePsiB, *LOnePsiB;
|
---|
326 | struct fft_plan_3d *plan = Lat->plan;
|
---|
327 | fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
|
---|
328 | fftw_real *PsiCR = (fftw_real *)PsiC;
|
---|
329 | fftw_complex *work = Dens0->DensityCArray[Temp2Density];
|
---|
330 | fftw_real **HGcR = &Dens0->DensityArray[HGDensity]; // use HGDensity, 4x Gap..Density, TempDensity as a storage array
|
---|
331 | fftw_complex **HGcRC = (fftw_complex**)HGcR;
|
---|
332 | fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
|
---|
333 | fftw_real **HGcR2 = (fftw_real**)HGcR2C;
|
---|
334 | int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
|
---|
335 | MPI_Status status;
|
---|
336 | fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
|
---|
337 | int n[NDIM],n0,i0,iS, Index;
|
---|
338 | int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
|
---|
339 | int N0 = LevS->Plan0.plan->local_nx;
|
---|
340 | int *N = LevS->Plan0.plan->N;
|
---|
341 | const int NUpx = LevS->NUp[0];
|
---|
342 | const int NUpy = LevS->NUp[1];
|
---|
343 | const int NUpz = LevS->NUp[2];
|
---|
344 | double argument, PsiAtNode;
|
---|
345 | int e,g,i,j,k,l,m,p,u;
|
---|
346 | double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0;
|
---|
347 | double **cos_lookup = NULL,**sin_lookup = NULL;
|
---|
348 |
|
---|
349 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me);
|
---|
350 |
|
---|
351 | debug(P, "Creating Lookup Table");
|
---|
352 | CreateSinCosLookupTable(&cos_lookup, &sin_lookup, N);
|
---|
353 |
|
---|
354 | debug(P, "Calculating each entry of variance matrices");
|
---|
355 | l=-1; // to access U matrix element (0..Num-1)
|
---|
356 | // fill the matrices
|
---|
357 | for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
|
---|
358 | OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
|
---|
359 | if (OnePsiA->PsiType == type) { // drop all but occupied ones
|
---|
360 | l++; // increase l if it is non-extra wave function
|
---|
361 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
|
---|
362 | LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
|
---|
363 | else
|
---|
364 | LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
|
---|
365 |
|
---|
366 | //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0);
|
---|
367 | if (LPsiDatA != NULL) {
|
---|
368 | CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
|
---|
369 | // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
|
---|
370 | for (n0=0;n0<N0;n0++)
|
---|
371 | for (n[1]=0;n[1]<N[1];n[1]++)
|
---|
372 | for (n[2]=0;n[2]<N[2];n[2]++) {
|
---|
373 | i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
|
---|
374 | iS = n[2]+N[2]*(n[1]+N[1]*n0);
|
---|
375 | n[0] = n0 + LevS->Plan0.plan->start_nx;
|
---|
376 | for (k=0;k<max_operators;k+=2) {
|
---|
377 | e = k/2;
|
---|
378 | argument = 2.*PI/(double)(N[e])*(double)(n[e]);
|
---|
379 | PsiAtNode = PsiCR[i0] /LevS->MaxN;
|
---|
380 | // check lookup
|
---|
381 | if (!l) // perform check on first wave function only
|
---|
382 | if ((fabs(cos(argument) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(argument) - sin_lookup[e][n[e]]) > MYEPSILON)) {
|
---|
383 | Error(SomeError, "Lookup table does not match real value!");
|
---|
384 | }
|
---|
385 | HGcR[k][iS] = cos_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
|
---|
386 | HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */
|
---|
387 | HGcR[k+1][iS] = sin_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
|
---|
388 | HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
|
---|
389 | }
|
---|
390 | }
|
---|
391 | for (u=0;u<max_operators;u++) {
|
---|
392 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work);
|
---|
393 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work);
|
---|
394 | }
|
---|
395 | }
|
---|
396 | m = -1; // to access U matrix element (0..Num-1)
|
---|
397 | for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
|
---|
398 | OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
|
---|
399 | if (OnePsiB->PsiType == type) { // drop all but occupied ones
|
---|
400 | m++; // increase m if it is non-extra wave function
|
---|
401 | if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
|
---|
402 | LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
|
---|
403 | else
|
---|
404 | LOnePsiB = NULL;
|
---|
405 | if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
|
---|
406 | RecvSource = OnePsiB->my_color_comm_ST_Psi;
|
---|
407 | MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
|
---|
408 | LPsiDatB=LevS->LPsi->TempPsi;
|
---|
409 | } else { // .. otherwise send it to all other processes (Max_me... - 1)
|
---|
410 | for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
|
---|
411 | if (p != OnePsiB->my_color_comm_ST_Psi)
|
---|
412 | MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
|
---|
413 | LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
|
---|
414 | } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
|
---|
415 |
|
---|
416 | for (u=0;u<max_operators;u++) {
|
---|
417 | a_ij = 0;
|
---|
418 | b_ij = 0;
|
---|
419 | if (LPsiDatA != NULL) { // calculate, reduce and send to all ...
|
---|
420 | //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u);
|
---|
421 | g=0;
|
---|
422 | if (LevS->GArray[0].GSq == 0.0) {
|
---|
423 | Index = LevS->GArray[g].Index;
|
---|
424 | a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im);
|
---|
425 | b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im);
|
---|
426 | g++;
|
---|
427 | }
|
---|
428 | for (; g < LevS->MaxG; g++) {
|
---|
429 | Index = LevS->GArray[g].Index;
|
---|
430 | a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im);
|
---|
431 | b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im);
|
---|
432 | } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...)
|
---|
433 | // sum up elements from all coefficients sharing processes
|
---|
434 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
|
---|
435 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
|
---|
436 | a_ij = A_ij;
|
---|
437 | b_ij = B_ij;
|
---|
438 | // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!)
|
---|
439 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
|
---|
440 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
|
---|
441 | } else { // receive ...
|
---|
442 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
|
---|
443 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
|
---|
444 | }
|
---|
445 | // ... and store
|
---|
446 | //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij);
|
---|
447 | //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij);
|
---|
448 | gsl_matrix_set(A[u], l, m, A_ij);
|
---|
449 | gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m));
|
---|
450 | }
|
---|
451 | }
|
---|
452 | }
|
---|
453 | }
|
---|
454 | }
|
---|
455 | // reset extra entries
|
---|
456 | for (u=0;u<=max_operators;u++) {
|
---|
457 | for (i=Num;i<AllocNum;i++)
|
---|
458 | for (j=0;j<AllocNum;j++)
|
---|
459 | gsl_matrix_set(A[u], i,j, 0.);
|
---|
460 | for (i=Num;i<AllocNum;i++)
|
---|
461 | for (j=0;j<AllocNum;j++)
|
---|
462 | gsl_matrix_set(A[u], j,i, 0.);
|
---|
463 | }
|
---|
464 | FreeSinCosLookupTable(cos_lookup, sin_lookup);
|
---|
465 |
|
---|
466 | /*// print A matrices for debug
|
---|
467 | if (P->Par.me == 0)
|
---|
468 | for (u=0;u<max_operators+1;u++) {
|
---|
469 | fprintf(stderr, "A[%i] = \n",u);
|
---|
470 | for (i=0;i<Num;i++) {
|
---|
471 | for (j=0;j<Num;j++)
|
---|
472 | fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j));
|
---|
473 | fprintf(stderr, "\n");
|
---|
474 | }
|
---|
475 | }
|
---|
476 | */
|
---|
477 | }
|
---|
478 |
|
---|
479 | /** Calculates reciprocal second order moments of each PsiType#Occupied orbital.
|
---|
480 | * First order is zero due to wave function being real (symmetry condition).
|
---|
481 | * \param *P Problem at hand
|
---|
482 | */
|
---|
483 | void CalculateSecondOrderReciprocalMoment(struct Problem *P)
|
---|
484 | {
|
---|
485 | struct Lattice *Lat = &P->Lat;
|
---|
486 | struct RunStruct *R = &P->R;
|
---|
487 | struct FileData *F = &P->Files;
|
---|
488 | struct Psis *Psi = &Lat->Psi;
|
---|
489 | struct LatticeLevel *LevS = R->LevS;
|
---|
490 | double result, Result;
|
---|
491 | fftw_complex *LPsiDatA=NULL;
|
---|
492 | struct OnePsiElement *OnePsiA;
|
---|
493 | int i,j,l,g;
|
---|
494 | char spin[12], suffix[18];
|
---|
495 |
|
---|
496 | switch (Lat->Psi.PsiST) {
|
---|
497 | case SpinDouble:
|
---|
498 | strcpy(suffix,".recispread.csv");
|
---|
499 | strcpy(spin,"SpinDouble");
|
---|
500 | break;
|
---|
501 | case SpinUp:
|
---|
502 | strcpy(suffix,".recispread_up.csv");
|
---|
503 | strcpy(spin,"SpinUp");
|
---|
504 | break;
|
---|
505 | case SpinDown:
|
---|
506 | strcpy(suffix,".recispread_down.csv");
|
---|
507 | strcpy(spin,"SpinDown");
|
---|
508 | break;
|
---|
509 | }
|
---|
510 | if(P->Par.me_comm_ST == 0) {
|
---|
511 | if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Calculating reciprocal moments ...\n",P->Par.me);
|
---|
512 | if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
|
---|
513 | OpenFile(P, &F->ReciSpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
|
---|
514 | else if (F->ReciSpreadFile == NULL) // re-op,18en if not first level and not opened yet (or closed from ParseWannierFile)
|
---|
515 | OpenFile(P, &F->ReciSpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
|
---|
516 | if (F->ReciSpreadFile == NULL) {
|
---|
517 | Error(SomeError,"ComputeMLWF: Error opening Reciprocal spread File!\n");
|
---|
518 | } else {
|
---|
519 | fprintf(F->ReciSpreadFile,"===== Reciprocal Spreads of type %s ==========================================================================\n", spin);
|
---|
520 | }
|
---|
521 | }
|
---|
522 |
|
---|
523 | // integrate second order moment
|
---|
524 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
|
---|
525 | OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
|
---|
526 | if (OnePsiA->PsiType == type) { // drop all but occupied ones
|
---|
527 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
|
---|
528 | LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
|
---|
529 | else
|
---|
530 | LPsiDatA = NULL;
|
---|
531 |
|
---|
532 | if (LPsiDatA != NULL) {
|
---|
533 | if (P->Par.me_comm_ST == 0)
|
---|
534 | fprintf(F->ReciSpreadFile,"Psi%d_Lev%d\t", Psi->AllPsiStatus[l].MyGlobalNo, R->LevSNo);
|
---|
535 | for (i=0;i<NDIM;i++) {
|
---|
536 | for (j=0;j<NDIM;j++) {
|
---|
537 | result = 0.;
|
---|
538 | g = 0;
|
---|
539 | if (LevS->GArray[0].GSq == 0.0) {
|
---|
540 | result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*(LPsiDatA[0].re * LPsiDatA[0].re);
|
---|
541 | g++;
|
---|
542 | }
|
---|
543 | for (;g<LevS->MaxG;g++)
|
---|
544 | result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*2.*(LPsiDatA[g].re * LPsiDatA[g].re + LPsiDatA[g].im * LPsiDatA[g].im);
|
---|
545 | //result *= Lat->Volume/LevS->MaxG;
|
---|
546 | MPI_Allreduce ( &result, &Result, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
|
---|
547 | if (P->Par.me_comm_ST == 0)
|
---|
548 | fprintf(F->ReciSpreadFile,"%2.5lg ", Result);
|
---|
549 | }
|
---|
550 | }
|
---|
551 | if (P->Par.me_comm_ST == 0)
|
---|
552 | fprintf(F->ReciSpreadFile,"\n");
|
---|
553 | }
|
---|
554 | }
|
---|
555 | }
|
---|
556 | if(P->Par.me_comm_ST == 0) {
|
---|
557 | fprintf(F->ReciSpreadFile,"====================================================================================================================\n\n");
|
---|
558 | fflush(F->ReciSpreadFile);
|
---|
559 | }
|
---|
560 | }
|
---|
561 |
|
---|
562 | /** Given the unitary matrix the transformation is performed on the Psis.
|
---|
563 | * \param *P Problem at hand
|
---|
564 | * \param *U gsl matrix containing the transformation matrix
|
---|
565 | * \param Num dimension parameter for the matrix, i.e. number of wave functions
|
---|
566 | */
|
---|
567 | void UnitaryTransformationOnWavefunctions(struct Problem *P, gsl_matrix *U, int Num)
|
---|
568 | {
|
---|
569 | struct Lattice *Lat = &P->Lat;
|
---|
570 | struct RunStruct *R = &P->R;
|
---|
571 | struct Psis *Psi = &Lat->Psi;
|
---|
572 | struct LatticeLevel *LevS = R->LevS;
|
---|
573 | MPI_Status status;
|
---|
574 | struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB;
|
---|
575 | int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
|
---|
576 | fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
|
---|
577 | int g,i,j,l,k,m,p;
|
---|
578 |
|
---|
579 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation of all wave functions according to U\n",P->Par.me);
|
---|
580 |
|
---|
581 | Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here
|
---|
582 | fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer");
|
---|
583 |
|
---|
584 | for (l=0;l<Num;l++) // allocate for each local wave function
|
---|
585 | coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l];
|
---|
586 |
|
---|
587 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me);
|
---|
588 | l=-1; // to access U matrix element (0..Num-1)
|
---|
589 | k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1)
|
---|
590 | for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
|
---|
591 | OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
|
---|
592 | if (OnePsiA->PsiType == type) { // drop all but occupied ones
|
---|
593 | l++; // increase l if it is occupied wave function
|
---|
594 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local?
|
---|
595 | k++; // increase k only if it is a local, non-extra orbital wave function
|
---|
596 | LPsiDatA = (fftw_complex *) coeffs_buffer[k]; // new coeffs first go to copy buffer, old ones must not be overwritten yet
|
---|
597 | SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG); // zero buffer part
|
---|
598 | } else
|
---|
599 | LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
|
---|
600 |
|
---|
601 | m = -1; // to access U matrix element (0..Num-1)
|
---|
602 | for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
|
---|
603 | OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
|
---|
604 | if (OnePsiB->PsiType == type) { // drop all but occupied ones
|
---|
605 | m++; // increase m if it is occupied wave function
|
---|
606 | if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
|
---|
607 | LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
|
---|
608 | else
|
---|
609 | LOnePsiB = NULL;
|
---|
610 | if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
|
---|
611 | RecvSource = OnePsiB->my_color_comm_ST_Psi;
|
---|
612 | MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
|
---|
613 | LPsiDatB=LevS->LPsi->TempPsi;
|
---|
614 | } else { // .. otherwise send it to all other processes (Max_me... - 1)
|
---|
615 | for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
|
---|
616 | if (p != OnePsiB->my_color_comm_ST_Psi)
|
---|
617 | MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
|
---|
618 | LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
|
---|
619 | } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
|
---|
620 |
|
---|
621 | if (LPsiDatA != NULL) {
|
---|
622 | double tmp = gsl_matrix_get(U,l,m);
|
---|
623 | g=0;
|
---|
624 | if (LevS->GArray[0].GSq == 0.0) {
|
---|
625 | LPsiDatA[g].re += LPsiDatB[g].re * tmp;
|
---|
626 | LPsiDatA[g].im += LPsiDatB[g].im * tmp;
|
---|
627 | g++;
|
---|
628 | }
|
---|
629 | for (; g < LevS->MaxG; g++) {
|
---|
630 | LPsiDatA[g].re += LPsiDatB[g].re * tmp;
|
---|
631 | LPsiDatA[g].im += LPsiDatB[g].im * tmp;
|
---|
632 | }
|
---|
633 | }
|
---|
634 | }
|
---|
635 | }
|
---|
636 | }
|
---|
637 | }
|
---|
638 |
|
---|
639 | //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me);
|
---|
640 | // now, as all wave functions are updated, swap the buffer
|
---|
641 | l = -1;
|
---|
642 | for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function
|
---|
643 | if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {
|
---|
644 | l++;
|
---|
645 | //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo);
|
---|
646 | LPsiDatA = (fftw_complex *)coeffs_buffer[l];
|
---|
647 | LPsiDatB = LevS->LPsi->LocalPsi[l];
|
---|
648 | for (g=0;g<LevS->MaxG;g++) {
|
---|
649 | LPsiDatB[g].re = LPsiDatA[g].re;
|
---|
650 | LPsiDatB[g].im = LPsiDatA[g].im;
|
---|
651 | }
|
---|
652 | // recalculating non-local form factors which are coefficient dependent!
|
---|
653 | CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo);
|
---|
654 | }
|
---|
655 | }
|
---|
656 | // and free allocated buffer memory
|
---|
657 | Free(coeffs_buffer, "UnitaryTransformationOnWavefunctions: coeffs_buffer");
|
---|
658 | }
|
---|
659 |
|
---|
660 | /** Changes Wannier Centres according to RunStruct#CommonWannier.
|
---|
661 | * \param *P Problem at hand.
|
---|
662 | * \param Num number of Psis
|
---|
663 | * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
|
---|
664 | * \param *WannierSpread array with wannier spread per wave function
|
---|
665 | */
|
---|
666 | void ChangeWannierCentres(struct Problem *P, int Num, double **WannierCentre, double *WannierSpread)
|
---|
667 | {
|
---|
668 | struct RunStruct *R = &P->R;
|
---|
669 | struct Lattice *Lat = &P->Lat;
|
---|
670 | struct LatticeLevel *LevS = R->LevS;
|
---|
671 | int *marker, **group;
|
---|
672 | int partner[Num];
|
---|
673 | int i,j,l,k;
|
---|
674 | int totalflag, flag;
|
---|
675 | double q[NDIM], center[NDIM];
|
---|
676 | double Spread;
|
---|
677 | int *N = LevS->Plan0.plan->N;
|
---|
678 |
|
---|
679 | switch (R->CommonWannier) {
|
---|
680 | case 4:
|
---|
681 | debug(P,"Shifting each Wannier centers to cell center");
|
---|
682 | for (j=0;j<NDIM;j++) // center point in [0,1]^3
|
---|
683 | center[j] = 0.5;
|
---|
684 | RMat33Vec3(q,Lat->RealBasis, center); // transform to real coordinates
|
---|
685 | for (i=0; i < Num; i++) { // go through all occupied wave functions
|
---|
686 | for (j=0;j<NDIM;j++) // put into Wannier centres
|
---|
687 | WannierCentre[i][j] = q[j];
|
---|
688 | }
|
---|
689 | break;
|
---|
690 | case 3:
|
---|
691 | debug(P,"Shifting Wannier centers individually to nearest grid point");
|
---|
692 | for (i=0;i < Num; i++) { // go through all wave functions
|
---|
693 | RMat33Vec3(q, Lat->ReciBasis, WannierCentre[i]);
|
---|
694 | for (j=0;j<NDIM;j++) { // Recibasis is not true inverse but times 2.*PI
|
---|
695 | q[j] *= (double)N[j]/(2.*PI);
|
---|
696 |
|
---|
697 | //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp));
|
---|
698 | if (fabs((double)floor(q[j]) - q[j]) < fabs((double)ceil(q[j]) - q[j]))
|
---|
699 | q[j] = floor(q[j])/(double)N[j];
|
---|
700 | else
|
---|
701 | q[j] = ceil(q[j])/(double)N[j];
|
---|
702 | }
|
---|
703 | RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
|
---|
704 | }
|
---|
705 | break;
|
---|
706 | case 2:
|
---|
707 | debug(P,"Combining individual orbitals according to spread.");
|
---|
708 | //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me);
|
---|
709 | debug(P,"finding partners");
|
---|
710 | marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker");
|
---|
711 | group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group");
|
---|
712 | for (l=0;l<Num;l++) {
|
---|
713 | group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]"); // each must group must have one more as end marker
|
---|
714 | for (k=0;k<=Num;k++)
|
---|
715 | group[l][k] = -1; // reset partner group
|
---|
716 | }
|
---|
717 | for (k=0;k<Num;k++)
|
---|
718 | partner[k] = 0;
|
---|
719 | debug(P,"mem allocated");
|
---|
720 | // go for each orbital through every other, check distance against the sum of both spreads
|
---|
721 | // if smaller add to group of this orbital
|
---|
722 | for (l=0;l<Num;l++) {
|
---|
723 | j=0; // index for partner group
|
---|
724 | for (k=0;k<Num;k++) { // check this against l
|
---|
725 | Spread = 0.;
|
---|
726 | for (i=0;i<NDIM;i++) {
|
---|
727 | //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]);
|
---|
728 | Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]);
|
---|
729 | }
|
---|
730 | Spread = sqrt(Spread); // distance in Spread
|
---|
731 | //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]);
|
---|
732 | if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread
|
---|
733 | group[l][j++] = k; // add k to group of l
|
---|
734 | partner[l]++;
|
---|
735 | //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l);
|
---|
736 | }
|
---|
737 | }
|
---|
738 | }
|
---|
739 |
|
---|
740 | // consistency, for each orbital check if this orbital is also in the group of each referred orbital
|
---|
741 | debug(P,"checking consistency");
|
---|
742 | totalflag = 1;
|
---|
743 | for (l=0;l<Num;l++) // checking l's group
|
---|
744 | for (k=0;k<Num;k++) { // k is partner index
|
---|
745 | if (group[l][k] != -1) { // if current index k is a partner
|
---|
746 | flag = 0;
|
---|
747 | for(j=0;j<Num;j++) { // go through each entry in l partner's partner group if l exists
|
---|
748 | if ((group[ group[l][k] ][j] == l))
|
---|
749 | flag = 1;
|
---|
750 | }
|
---|
751 | //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]);
|
---|
752 | if (totalflag == 1) totalflag = flag;
|
---|
753 | }
|
---|
754 | }
|
---|
755 | // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres
|
---|
756 | debug(P,"weight and calculate new centers for partner groups");
|
---|
757 | for (l=0;l<=Num;l++)
|
---|
758 | marker[l] = 1;
|
---|
759 | if (totalflag) {
|
---|
760 | for (l=0;l<Num;l++) { // go through each orbital
|
---|
761 | if (marker[l] != 0) { // if it hasn't been reweighted
|
---|
762 | marker[l] = 0;
|
---|
763 | for (i=0;i<NDIM;i++)
|
---|
764 | q[i] = 0.;
|
---|
765 | j = 0;
|
---|
766 | while (group[l][j] != -1) {
|
---|
767 | marker[group[l][j]] = 0;
|
---|
768 | for (i=0;i<NDIM;i++) {
|
---|
769 | //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]);
|
---|
770 | q[i] += WannierCentre[ group[l][j] ][i];
|
---|
771 | }
|
---|
772 | j++;
|
---|
773 | }
|
---|
774 | //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j);
|
---|
775 | for (i=0;i<NDIM;i++) {// weight by number of elements in partner group
|
---|
776 | q[i] /= (double)(j);
|
---|
777 | }
|
---|
778 |
|
---|
779 | // put WannierCentre into own and all partners'
|
---|
780 | for (i=0;i<NDIM;i++)
|
---|
781 | WannierCentre[l][i] = q[i];
|
---|
782 | j = 0;
|
---|
783 | while (group[l][j] != -1) {
|
---|
784 | for (i=0;i<NDIM;i++)
|
---|
785 | WannierCentre[group[l][j]][i] = q[i];
|
---|
786 | j++;
|
---|
787 | }
|
---|
788 | }
|
---|
789 | }
|
---|
790 | }
|
---|
791 | if (P->Call.out[StepLeaderOut]) {
|
---|
792 | fprintf(stderr,"Summary:\n");
|
---|
793 | fprintf(stderr,"========\n");
|
---|
794 | for (i=0;i<Num;i++)
|
---|
795 | fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]);
|
---|
796 | }
|
---|
797 | debug(P,"done");
|
---|
798 |
|
---|
799 | Free(marker, "ChangeWannierCentres: marker");
|
---|
800 | for (l=0;l<Num;l++)
|
---|
801 | Free(group[l], "ChangeWannierCentres: group[l]");
|
---|
802 | Free(group, "ChangeWannierCentres: group");
|
---|
803 | break;
|
---|
804 | case 1:
|
---|
805 | debug(P,"Individual orbitals are changed to center of all.");
|
---|
806 | for (i=0;i<NDIM;i++) // zero center of weight
|
---|
807 | q[i] = 0.;
|
---|
808 | for (k=0;k<Num;k++)
|
---|
809 | for (i=0;i<NDIM;i++) { // sum up all orbitals each component
|
---|
810 | q[i] += WannierCentre[k][i];
|
---|
811 | }
|
---|
812 | for (i=0;i<NDIM;i++) // divide by number
|
---|
813 | q[i] /= Num;
|
---|
814 | for (k=0;k<Num;k++)
|
---|
815 | for (i=0;i<NDIM;i++) { // put into this function's array
|
---|
816 | WannierCentre[k][i] = q[i];
|
---|
817 | }
|
---|
818 | break;
|
---|
819 | case 0:
|
---|
820 | default:
|
---|
821 | break;
|
---|
822 | }
|
---|
823 | }
|
---|
824 |
|
---|
825 | /** From the entries of the variance matrices the spread is calculated.
|
---|
826 | * WannierCentres are evaluated according to Resta operator: \f$\langle X \rangle = \frac{L}{2\pi} \sum_j {\cal I} \ln{\langle \Psi_j | \exp{(i \frac{2\pi}{L} X)} | \Psi_j \rangle}\f$
|
---|
827 | * WannierSpread is: \f$ \sum_j \langle \Psi_j | r^2 | \Psi_j \rangle - \langle \Psi_j | r | psi_j \rangle^2\f$
|
---|
828 | * \param *P Problem at hand
|
---|
829 | * \param *A variance matrices
|
---|
830 | * \param old_spread first term of wannier spread
|
---|
831 | * \param spread second term of wannier spread
|
---|
832 | * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
|
---|
833 | * \param *WannierSpread array with wannier spread per wave function
|
---|
834 | */
|
---|
835 | void ComputeWannierCentresfromVarianceMatrices(struct Problem *P, gsl_matrix **A, double *spread, double *old_spread, double **WannierCentre, double *WannierSpread)
|
---|
836 | {
|
---|
837 | struct Lattice *Lat = &P->Lat;
|
---|
838 | struct Psis *Psi = &Lat->Psi;
|
---|
839 | struct OnePsiElement *OnePsiA;
|
---|
840 | int i,j,k,l;
|
---|
841 | double tmp, q[NDIM];
|
---|
842 |
|
---|
843 | *old_spread = 0;
|
---|
844 | *spread = 0;
|
---|
845 |
|
---|
846 | // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital
|
---|
847 | i=-1;
|
---|
848 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
|
---|
849 | OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
|
---|
850 | if (OnePsiA->PsiType == type) { // drop all but occupied ones
|
---|
851 | i++; // increase l if it is occupied wave function
|
---|
852 | //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i);
|
---|
853 |
|
---|
854 | // calculate Wannier Centre
|
---|
855 | for (j=0;j<NDIM;j++) {
|
---|
856 | q[j] = 1./(2.*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i))));
|
---|
857 | if (q[j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ
|
---|
858 | q[j] += 1.;
|
---|
859 | }
|
---|
860 | RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
|
---|
861 |
|
---|
862 | // store orbital spread and centre in file
|
---|
863 | tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
|
---|
864 | - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
|
---|
865 | - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);
|
---|
866 | WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp;
|
---|
867 | //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]);
|
---|
868 | //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n",
|
---|
869 | //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]);
|
---|
870 | //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n",
|
---|
871 | //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]);
|
---|
872 |
|
---|
873 | // gather all spreads
|
---|
874 | *old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U)
|
---|
875 | for (k=0;k<max_operators;k++)
|
---|
876 | *spread += pow(gsl_matrix_get(A[k],i,i),2);
|
---|
877 | }
|
---|
878 | }
|
---|
879 | }
|
---|
880 |
|
---|
881 | /** Prints gsl_matrix nicely to screen.
|
---|
882 | * \param *P Problem at hand
|
---|
883 | * \param *U gsl matrix
|
---|
884 | * \param Num number of rows/columns
|
---|
885 | * \param *msg name of matrix, prepended before entries
|
---|
886 | */
|
---|
887 | void PrintGSLMatrix(struct Problem *P, gsl_matrix *U, int Num, const char *msg)
|
---|
888 | {
|
---|
889 | int k,l;
|
---|
890 | fprintf(stderr,"(%i) %s = \n",P->Par.me, msg);
|
---|
891 | for (k=0;k<Num;k++) {
|
---|
892 | for (l=0;l<Num;l++)
|
---|
893 | fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k));
|
---|
894 | fprintf(stderr,"\n");
|
---|
895 | }
|
---|
896 | }
|
---|
897 |
|
---|
898 | /** Allocates memory for the DiagonalizationData structure
|
---|
899 | * \param *P Problem at hand
|
---|
900 | * \param DiagData pointer to structure
|
---|
901 | * \param Num number of rows and columns in matrix
|
---|
902 | * \param NumMatrices number of matrices to be simultaneously diagonalized
|
---|
903 | * \param extra number of extra matrices to be passively also diagonalized
|
---|
904 | */
|
---|
905 | void InitDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData, int Num, int NumMatrices, int extra)
|
---|
906 | {
|
---|
907 | int i;
|
---|
908 |
|
---|
909 | // store integer values
|
---|
910 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me);
|
---|
911 | DiagData->Num = Num;
|
---|
912 | DiagData->AllocNum = ceil((double)Num / 2. ) *2;
|
---|
913 | DiagData->NumMatrices = NumMatrices;
|
---|
914 | DiagData->extra = extra;
|
---|
915 |
|
---|
916 | // determine communicator and our rank and its size
|
---|
917 | DiagData->comm = DetermineParallelGroupbyGCD(P,DiagData->AllocNum);
|
---|
918 | MPI_Comm_size (*(DiagData->comm), &(DiagData->ProcNum));
|
---|
919 | MPI_Comm_rank (*(DiagData->comm), &(DiagData->ProcRank));
|
---|
920 |
|
---|
921 | // allocate memory
|
---|
922 | DiagData->U = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
|
---|
923 | gsl_matrix_set_identity(DiagData->U);
|
---|
924 |
|
---|
925 | DiagData->A = (gsl_matrix **) Malloc((DiagData->NumMatrices+DiagData->extra) * sizeof(gsl_matrix *), "InitDiagonalization: *A");
|
---|
926 | for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) {
|
---|
927 | DiagData->A[i] = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
|
---|
928 | gsl_matrix_set_zero(DiagData->A[i]);
|
---|
929 | }
|
---|
930 |
|
---|
931 | // init merry-go-round array for diagonilzation
|
---|
932 | DiagData->top = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: top");
|
---|
933 | DiagData->bot = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: bot");
|
---|
934 | for (i=0;i<DiagData->AllocNum/2;i++) {
|
---|
935 | DiagData->top[i] = 2*i;
|
---|
936 | DiagData->bot[i] = 2*i+1;
|
---|
937 | }
|
---|
938 | /* // print starting values of index generation tables top and bot
|
---|
939 | fprintf(stderr,"top\t");
|
---|
940 | for (k=0;k<AllocNum/2;k++)
|
---|
941 | fprintf(stderr,"%i\t", top[k]);
|
---|
942 | fprintf(stderr,"\n");
|
---|
943 | fprintf(stderr,"bot\t");
|
---|
944 | for (k=0;k<AllocNum/2;k++)
|
---|
945 | fprintf(stderr,"%i\t", bot[k]);
|
---|
946 | fprintf(stderr,"\n");*/
|
---|
947 | }
|
---|
948 |
|
---|
949 | /** Frees allocated memory for the DiagonalizationData structure.
|
---|
950 | * \param DiagData pointer to structure
|
---|
951 | */
|
---|
952 | void FreeDiagonalization(struct DiagonalizationData *DiagData)
|
---|
953 | {
|
---|
954 | int i;
|
---|
955 |
|
---|
956 | Free(DiagData->top, "FreeDiagonalization: DiagData->top");
|
---|
957 | Free(DiagData->bot, "FreeDiagonalization: DiagData->bot");
|
---|
958 | gsl_matrix_free(DiagData->U);
|
---|
959 | for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++)
|
---|
960 | gsl_matrix_free(DiagData->A[i]);
|
---|
961 | Free(DiagData->A, "FreeDiagonalization: DiagData->A");
|
---|
962 | }
|
---|
963 |
|
---|
964 | /** Orthogonalizing PsiType#Occupied wave functions for MD.
|
---|
965 | * Orthogonalizes wave functions by finding a unitary transformation such that the density remains unchanged.
|
---|
966 | * To this end, the overlap matrix \f$\langle \Psi_i | \Psi_j \rangle\f$ is created and diagonalised by Jacobi
|
---|
967 | * transformation (product of rotation matrices). The found transformation matrix is applied to all Psis.
|
---|
968 | * \param *P Problem at hand
|
---|
969 | * \note [Payne] states that GramSchmidt is not well-suited. However, this Jacobi diagonalisation is not well
|
---|
970 | * suited for our CG minimisation either. If one wave functions is changed in course of the minimsation
|
---|
971 | * procedure, in a Gram-Schmidt-Orthogonalisation only this wave function is changed (although it remains under
|
---|
972 | * debate whether subsequent Psis are still orthogonal to this changed wave function!), in a Jacobi-Orthogonali-
|
---|
973 | * stion however at least two if not all wave functions are changed. Thus inhibits our fast way of updating the
|
---|
974 | * density after a minimisation step -- UpdateDensityCalculation(). Thus, this routine can only be used for a
|
---|
975 | * final orthogonalisation of all Psis, after the CG minimisation.
|
---|
976 | */
|
---|
977 | void OrthogonalizePsis(struct Problem *P)
|
---|
978 | {
|
---|
979 | struct Lattice *Lat = &P->Lat;
|
---|
980 | struct Psis *Psi = &Lat->Psi;
|
---|
981 | struct LatticeLevel *LevS = P->R.LevS;
|
---|
982 | int Num = Psi->NoOfPsis;
|
---|
983 | int i,j,l;
|
---|
984 | struct DiagonalizationData DiagData;
|
---|
985 | double PsiSP;
|
---|
986 |
|
---|
987 | InitDiagonalization(P, &DiagData, Num, 1, 0);
|
---|
988 |
|
---|
989 | // Calculate Overlap matrix
|
---|
990 | for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
|
---|
991 | CalculateOverlap(P, l, Occupied);
|
---|
992 | for (i=0;i<Num;i++) // fill gsl matrix with overlap values
|
---|
993 | for (j=0;j<Num;j++)
|
---|
994 | gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
|
---|
995 | //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (before)");
|
---|
996 |
|
---|
997 | //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (before)");
|
---|
998 |
|
---|
999 | // diagonalize overlap matrix
|
---|
1000 | Diagonalize(P, &DiagData);
|
---|
1001 |
|
---|
1002 | //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (after)");
|
---|
1003 |
|
---|
1004 | // apply found unitary transformation to wave functions
|
---|
1005 | UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
|
---|
1006 |
|
---|
1007 | // update GramSch stati
|
---|
1008 | for (i=Psi->TypeStartIndex[Occupied];i<Psi->TypeStartIndex[UnOccupied];i++) {
|
---|
1009 | GramSchNormalize(P,LevS,LevS->LPsi->LocalPsi[i], 0.); // calculates square product if 0. is given...
|
---|
1010 | PsiSP = GramSchGetNorm2(P,LevS,LevS->LPsi->LocalPsi[i]);
|
---|
1011 | //if (P->Par.me_comm_ST_Psi == 0) fprintf(stderr,"(%i) PsiSP[%i] = %lg\n", P->Par.me, i, PsiSP);
|
---|
1012 | Psi->LocalPsiStatus[i].PsiGramSchStatus = (int)IsOrthonormal;
|
---|
1013 | }
|
---|
1014 | UpdateGramSchAllPsiStatus(P,Psi);
|
---|
1015 | /*
|
---|
1016 | for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
|
---|
1017 | CalculateOverlap(P, l, Occupied);
|
---|
1018 | for (i=0;i<Num;i++) // fill gsl matrix with overlap values
|
---|
1019 | for (j=0;j<Num;j++)
|
---|
1020 | gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
|
---|
1021 | if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (after)");
|
---|
1022 | */
|
---|
1023 | // free memory
|
---|
1024 | FreeDiagonalization(&DiagData);
|
---|
1025 | }
|
---|
1026 |
|
---|
1027 | /** Orthogonalizing PsiType#Occupied wave functions and their time derivatives for MD.
|
---|
1028 | * Ensures that two orthonormality condition are met for Psis:
|
---|
1029 | * \f$\langle \Psi_i | \Psi_j \rangle = \delta_{ij}\f$
|
---|
1030 | * \f$\langle \dot{\Psi_i} | \dot{\Psi_j} \rangle = \delta_{ij}\f$
|
---|
1031 | * \param *P Problem at hand
|
---|
1032 | * \note [Payne] states that GramSchmidt is not well-suited, and this stronger
|
---|
1033 | * condition is needed for numerical stability
|
---|
1034 | * \todo create and fill 1stoverlap with finite difference between Psi, OldPsi with R->Deltat
|
---|
1035 | */
|
---|
1036 | void StrongOrthogonalizePsis(struct Problem *P)
|
---|
1037 | {
|
---|
1038 | struct Lattice *Lat = &P->Lat;
|
---|
1039 | struct Psis *Psi = &Lat->Psi;
|
---|
1040 | int Num = Psi->NoOfPsis;
|
---|
1041 | int i,j,l;
|
---|
1042 | struct DiagonalizationData DiagData;
|
---|
1043 |
|
---|
1044 | InitDiagonalization(P, &DiagData, Num, 2, 0);
|
---|
1045 |
|
---|
1046 | // Calculate Overlap matrix
|
---|
1047 | for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) {
|
---|
1048 | CalculateOverlap(P, l, Occupied);
|
---|
1049 | //Calculate1stOverlap(P, l, Occupied);
|
---|
1050 | }
|
---|
1051 | for (i=0;i<Num;i++) // fill gsl matrix with overlap values
|
---|
1052 | for (j=0;j<Num;j++) {
|
---|
1053 | gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
|
---|
1054 | //gsl_matrix_set(DiagData.A[1],i,j,Psi->1stOverlap[i][j]);
|
---|
1055 | }
|
---|
1056 |
|
---|
1057 | // diagonalize overlap matrix
|
---|
1058 | Diagonalize(P, &DiagData);
|
---|
1059 |
|
---|
1060 | // apply found unitary transformation to wave functions
|
---|
1061 | UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
|
---|
1062 |
|
---|
1063 | // free memory
|
---|
1064 | FreeDiagonalization(&DiagData);
|
---|
1065 | }
|
---|
1066 |
|
---|
1067 | /** Uses either serial or parallel diagonalization.
|
---|
1068 | * \param *P Problem at hand
|
---|
1069 | * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
|
---|
1070 | * \sa ParallelDiagonalization(), SerialDiagonalization()
|
---|
1071 | */
|
---|
1072 | void Diagonalize(struct Problem *P, struct DiagonalizationData *DiagData)
|
---|
1073 | {
|
---|
1074 | if (DiagData->Num != 1) { // one- or multi-process case?
|
---|
1075 | if (((DiagData->AllocNum % 2) == 0) && (DiagData->ProcNum != 1) && ((DiagData->AllocNum / 2) % DiagData->ProcNum == 0)) {
|
---|
1076 | /*
|
---|
1077 | debug(P,"Testing with silly matrix");
|
---|
1078 | ParallelDiagonalization(P, test, Utest, 1, 0, 4, Num, comm, ProcRank, ProcNum, top, bot);
|
---|
1079 | if(P->Call.out[ReadOut]) // && P->Par.me == 0)
|
---|
1080 | PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
|
---|
1081 | if(P->Call.out[ReadOut]) // && P->Par.me == 0)
|
---|
1082 | PrintGSLMatrix(P, Utest, 4, "Utest (final)");
|
---|
1083 | */
|
---|
1084 | debug(P,"ParallelDiagonalization");
|
---|
1085 | ParallelDiagonalization(P, DiagData);
|
---|
1086 | } else {/*
|
---|
1087 | debug(P,"Testing with silly matrix");
|
---|
1088 | SerialDiagonalization(P, test, Utest, 1, 0, 4, Num, top, bot);
|
---|
1089 | if(P->Call.out[ReadOut]) // && P->Par.me == 0)
|
---|
1090 | PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
|
---|
1091 | if(P->Call.out[ReadOut]) // && P->Par.me == 0)
|
---|
1092 | PrintGSLMatrix(P, Utest, 4, "Utest (final)");
|
---|
1093 | */
|
---|
1094 | debug(P,"SerialDiagonalization");
|
---|
1095 | SerialDiagonalization(P, DiagData);
|
---|
1096 | }
|
---|
1097 |
|
---|
1098 | //if(P->Call.out[ReadOut]) // && P->Par.me == 0)
|
---|
1099 | //PrintGSLMatrix(P, DiagData->U, DiagData->AllocNum, "U");
|
---|
1100 | }
|
---|
1101 | }
|
---|
1102 |
|
---|
1103 | /** Computation of Maximally Localized Wannier Functions.
|
---|
1104 | * Maximally localized functions are prime when evulating a Hamiltonian with
|
---|
1105 | * magnetic fields under periodic boundary conditions, as the common position
|
---|
1106 | * operator is no longer valid. These can be obtained by orbital rotations, which
|
---|
1107 | * are looked for iteratively and gathered in one transformation matrix, to be
|
---|
1108 | * later applied to the set of orbital wave functions.
|
---|
1109 | *
|
---|
1110 | * In order to obtain these, the following algorithm is applied:
|
---|
1111 | * -# Initialize U (identity) as the sought-for transformation matrix
|
---|
1112 | * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements
|
---|
1113 | * \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators
|
---|
1114 | * \f$A^{(k)}\f$
|
---|
1115 | * -# For each pair of indices (i,j) (i<j) do the following:
|
---|
1116 | * -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$
|
---|
1117 | * where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$
|
---|
1118 | * -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G
|
---|
1119 | * corresponding to the greatest eigenvalue, such that \f$x\geq0\f$
|
---|
1120 | * -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the
|
---|
1121 | * identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$
|
---|
1122 | * \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$
|
---|
1123 | * -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$
|
---|
1124 | * -# Gather the rotations in \f$U = U R\f$
|
---|
1125 | * -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$
|
---|
1126 | * -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3.
|
---|
1127 | * -# Apply transformations to the orbital wavefunctions \f$ | \phi_i \rangle = \sum_j U_{ij} | \phi_j \rangle\f$
|
---|
1128 | * -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in
|
---|
1129 | * OnePsiElementAddData#WannierCentre
|
---|
1130 | *
|
---|
1131 | * Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions.
|
---|
1132 | * Note that hereby additional memory is needed as old and transformed wave functions must be present at the same
|
---|
1133 | * time.
|
---|
1134 | *
|
---|
1135 | * The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index
|
---|
1136 | * generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices
|
---|
1137 | * may be diagonalized simultaneously and parallely. We use the implementation explained in
|
---|
1138 | * [Golub, Matrix computations, 1989, p451].
|
---|
1139 | *
|
---|
1140 | * \param *P Problem at hand
|
---|
1141 | */
|
---|
1142 | void ComputeMLWF(struct Problem *P) {
|
---|
1143 | // variables and allocation
|
---|
1144 | struct Lattice *Lat = &P->Lat;
|
---|
1145 | struct Psis *Psi = &Lat->Psi;
|
---|
1146 | int i;
|
---|
1147 | int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
|
---|
1148 | double **WannierCentre;
|
---|
1149 | double *WannierSpread;
|
---|
1150 | double spread, spreadSQ;
|
---|
1151 | struct DiagonalizationData DiagData;
|
---|
1152 |
|
---|
1153 | if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me);
|
---|
1154 |
|
---|
1155 | InitDiagonalization(P, &DiagData, Num, max_operators, 1);
|
---|
1156 |
|
---|
1157 | // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1))
|
---|
1158 | //debug (P,"Calculatung Variance matrices");
|
---|
1159 | FillHigherOrderRealMomentsMatrices(P, DiagData.AllocNum, DiagData.A);
|
---|
1160 |
|
---|
1161 | //debug (P,"Diagonalizing");
|
---|
1162 | Diagonalize(P, &DiagData);
|
---|
1163 |
|
---|
1164 | // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle
|
---|
1165 | //debug (P,"Performing Unitary transformation");
|
---|
1166 | UnitaryTransformationOnWavefunctions(P, DiagData.U, Num);
|
---|
1167 |
|
---|
1168 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7: Compute centres and spread printout\n",P->Par.me);
|
---|
1169 | WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ComputeMLWF: *WannierCentre");
|
---|
1170 | WannierSpread = (double *) Malloc(sizeof(double)*Num, "ComputeMLWF: WannierSpread");
|
---|
1171 | for (i=0;i<Num;i++)
|
---|
1172 | WannierCentre[i] = (double *) Malloc(sizeof(double)*NDIM, "ComputeMLWF: WannierCentre");
|
---|
1173 |
|
---|
1174 | //debug (P,"Computing Wannier centres from variance matrix");
|
---|
1175 | ComputeWannierCentresfromVarianceMatrices(P, DiagData.A, &spread, &spreadSQ, WannierCentre, WannierSpread);
|
---|
1176 |
|
---|
1177 | // join Wannier orbital to groups with common centres under certain conditions
|
---|
1178 | //debug (P,"Changing Wannier Centres according to CommonWannier");
|
---|
1179 | ChangeWannierCentres(P, Num, WannierCentre, WannierSpread);
|
---|
1180 |
|
---|
1181 | // write to file
|
---|
1182 | //debug (P,"Writing spread file");
|
---|
1183 | WriteWannierFile(P, spread, spreadSQ, WannierCentre, WannierSpread);
|
---|
1184 |
|
---|
1185 | debug(P,"Free'ing memory");
|
---|
1186 | // free all remaining memory
|
---|
1187 | FreeDiagonalization(&DiagData);
|
---|
1188 | for (i=0;i<Num;i++)
|
---|
1189 | Free(WannierCentre[i], "ComputeMLWF: WannierCentre[i]");
|
---|
1190 | Free(WannierCentre, "ComputeMLWF: WannierCentre");
|
---|
1191 | Free(WannierSpread, "ComputeMLWF: WannierSpread");
|
---|
1192 | }
|
---|
1193 |
|
---|
1194 | /** Solves directly for eigenvectors and eigenvalues of a real 2x2 matrix.
|
---|
1195 | * The eigenvalues are determined by \f$\det{(A-\lambda \cdot I)}\f$, where \f$I\f$ is the unit matrix and
|
---|
1196 | * \f$\lambda\f$ an eigenvalue. This leads to a pq-formula which is easily evaluated in the first part.
|
---|
1197 | *
|
---|
1198 | * The eigenvectors are then obtained by solving \f$A-\lambda \cdot I)x = 0\f$ for a given eigenvalue
|
---|
1199 | * \f$\lambda\f$. However, the eigenvector magnitudes are not specified, thus we the equation system is
|
---|
1200 | * still lacking such an equation. We use this fact to set either coordinate arbitrarily to 1 and then
|
---|
1201 | * derive the other by solving the equation system. Finally, the eigenvector is normalized.
|
---|
1202 | * \param *P Problem at hand
|
---|
1203 | * \param *A matrix whose eigenvalues/-vectors are to be found
|
---|
1204 | * \param *eval vector with eigenvalues
|
---|
1205 | * \param *evec matrix with corresponding eigenvectors in columns
|
---|
1206 | */
|
---|
1207 | #ifdef HAVE_INLINE
|
---|
1208 | inline void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
|
---|
1209 | #else
|
---|
1210 | void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
|
---|
1211 | #endif
|
---|
1212 | {
|
---|
1213 | double a11,a12,a21,a22;
|
---|
1214 | double ev[2], summand1, summand2, norm;
|
---|
1215 | int i;
|
---|
1216 | // find eigenvalues
|
---|
1217 | a11 = gsl_matrix_get(A,0,0);
|
---|
1218 | a12 = gsl_matrix_get(A,0,1);
|
---|
1219 | a21 = gsl_matrix_get(A,1,0);
|
---|
1220 | a22 = gsl_matrix_get(A,1,1);
|
---|
1221 | summand1 = (a11+a22)/2.;
|
---|
1222 | summand2 = sqrt(summand1*summand1 + a12*a21 - a11*a22);
|
---|
1223 | ev[0] = summand1 + summand2;
|
---|
1224 | ev[1] = summand1 - summand2;
|
---|
1225 | gsl_vector_set(eval, 0, ev[0]);
|
---|
1226 | gsl_vector_set(eval, 1, ev[1]);
|
---|
1227 | //fprintf (stderr,"(%i) ev1 %lg \t ev2 %lg\n", P->Par.me, ev1, ev2);
|
---|
1228 |
|
---|
1229 | // find eigenvectors
|
---|
1230 | for(i=0;i<2;i++) {
|
---|
1231 | if (fabs(ev[i]) < MYEPSILON) {
|
---|
1232 | gsl_matrix_set(evec, 0,i, 0.);
|
---|
1233 | gsl_matrix_set(evec, 1,i, 0.);
|
---|
1234 | } else if (fabs(a22-ev[i]) > MYEPSILON) {
|
---|
1235 | norm = sqrt(1*1 + (a21*a21/(a22-ev[i])/(a22-ev[i])));
|
---|
1236 | gsl_matrix_set(evec, 0,i, 1./norm);
|
---|
1237 | gsl_matrix_set(evec, 1,i, -(a21/(a22-ev[i]))/norm);
|
---|
1238 | //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
|
---|
1239 | } else if (fabs(a12) > MYEPSILON) {
|
---|
1240 | norm = sqrt(1*1 + (a11-ev[i])*(a11-ev[i])/(a12*a12));
|
---|
1241 | gsl_matrix_set(evec, 0,i, 1./norm);
|
---|
1242 | gsl_matrix_set(evec, 1,i, -((a11-ev[i])/a12)/norm);
|
---|
1243 | //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
|
---|
1244 | } else {
|
---|
1245 | if (fabs(a11-ev[i]) > MYEPSILON) {
|
---|
1246 | norm = sqrt(1*1 + (a12*a12/(a11-ev[i])/(a11-ev[i])));
|
---|
1247 | gsl_matrix_set(evec, 0,i, -(a12/(a11-ev[i]))/norm);
|
---|
1248 | gsl_matrix_set(evec, 1,i, 1./norm);
|
---|
1249 | //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
|
---|
1250 | } else if (fabs(a21) > MYEPSILON) {
|
---|
1251 | norm = sqrt(1*1 + (a22-ev[i])*(a22-ev[i])/(a21*a21));
|
---|
1252 | gsl_matrix_set(evec, 0,i, -(a22-ev[i])/a21/norm);
|
---|
1253 | gsl_matrix_set(evec, 1,i, 1./norm);
|
---|
1254 | //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
|
---|
1255 | } else {
|
---|
1256 | //gsl_matrix_set(evec, 0,i, 0.);
|
---|
1257 | //gsl_matrix_set(evec, 1,i, 1.);
|
---|
1258 | fprintf (stderr,"\t evec %i undetermined\n", i);
|
---|
1259 | }
|
---|
1260 | //gsl_matrix_set(evec, 0,0, 1.);
|
---|
1261 | //gsl_matrix_set(evec, 1,0, 0.);
|
---|
1262 | //fprintf (stderr,"(%i) evec1 undetermined", P->Par.me);
|
---|
1263 | }
|
---|
1264 | }
|
---|
1265 | }
|
---|
1266 |
|
---|
1267 | /** Calculates sine and cosine values for multiple matrix diagonalization.
|
---|
1268 | * \param *evec eigenvectors in columns
|
---|
1269 | * \param *eval corresponding eigenvalues
|
---|
1270 | * \param *c cosine to be returned
|
---|
1271 | * \param *s sine to be returned
|
---|
1272 | */
|
---|
1273 | #ifdef HAVE_INLINE
|
---|
1274 | inline void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
|
---|
1275 | #else
|
---|
1276 | void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
|
---|
1277 | #endif
|
---|
1278 | {
|
---|
1279 | int index;
|
---|
1280 | double x,y,r;
|
---|
1281 |
|
---|
1282 | index = gsl_vector_max_index (eval); // get biggest eigenvalue
|
---|
1283 | //fprintf(stderr,"\t1st: %lg\t2nd: %lg --- biggest: %i\n", gsl_vector_get(eval, 0), gsl_vector_get(eval, 1), index);
|
---|
1284 | x = gsl_matrix_get(evec, 0, index);
|
---|
1285 | y = gsl_matrix_get(evec, 1, index);
|
---|
1286 | if (x < 0) { // ensure x>=0 so that rotation angles remain smaller Pi/4
|
---|
1287 | y = -y;
|
---|
1288 | x = -x;
|
---|
1289 | }
|
---|
1290 | //z = gsl_matrix_get(evec, 2, index) * x/fabs(x);
|
---|
1291 | //fprintf(stderr,"\tx %lg\ty %lg\n", x,y);
|
---|
1292 |
|
---|
1293 | //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
|
---|
1294 | // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
|
---|
1295 | r = sqrt(x*x + y*y); // + z*z);
|
---|
1296 | if (fabs(r) > MYEPSILON) {
|
---|
1297 | *c = sqrt((x + r) / (2.*r));
|
---|
1298 | *s = y / sqrt(2.*r*(x+r)); //, -z / sqrt(2*r*(x+r)));
|
---|
1299 | } else {
|
---|
1300 | *c = 1.;
|
---|
1301 | *s = 0.;
|
---|
1302 | }
|
---|
1303 | }
|
---|
1304 |
|
---|
1305 | /*
|
---|
1306 | * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
|
---|
1307 | * \param *U transformation matrix set to unity matrix
|
---|
1308 | * \param NumMatrices number of matrices to be diagonalized simultaneously
|
---|
1309 | * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
|
---|
1310 | * \param AllocNum number of rows/columns in matrices
|
---|
1311 | * \param Num number of wave functions
|
---|
1312 | * \param ProcRank index in group for this cpu
|
---|
1313 | * \param ProcNum number of cpus in group
|
---|
1314 | * \param *top array with top row indices (merry-go-round)
|
---|
1315 | * \param *bot array with bottom row indices (merry-go-round)
|
---|
1316 | */
|
---|
1317 |
|
---|
1318 | /** Simultaneous diagonalization of matrices with multiple cpus.
|
---|
1319 | * \param *P Problem at hand
|
---|
1320 | * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
|
---|
1321 | * \note this is slower given one cpu only than SerialDiagonalization()
|
---|
1322 | */
|
---|
1323 | void ParallelDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
|
---|
1324 | {
|
---|
1325 | struct RunStruct *R =&P->R;
|
---|
1326 | int tagR0, tagR1, tagS0, tagS1;
|
---|
1327 | int iloc, jloc;
|
---|
1328 | double *s_all, *c_all;
|
---|
1329 | int round, max_rounds;
|
---|
1330 | int start;
|
---|
1331 | int *rcounts, *rdispls;
|
---|
1332 | double *c, *s;
|
---|
1333 | int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from
|
---|
1334 | int left, right; // left or right neighbour for process
|
---|
1335 | double spread = 0., old_spread=0., Spread=0.;
|
---|
1336 | int i,j,k,l,m,u;
|
---|
1337 | int set;
|
---|
1338 | int it_steps; // iteration step counter
|
---|
1339 | double **Aloc[DiagData->NumMatrices+1], **Uloc; // local columns for one step of A[k]
|
---|
1340 | double *Around[DiagData->NumMatrices+1], *Uround; // all local columns for one round of A[k]
|
---|
1341 | double *Atotal[DiagData->NumMatrices+1], *Utotal; // all local columns for one round of A[k]
|
---|
1342 | double a_i, a_j;
|
---|
1343 | gsl_matrix *G;
|
---|
1344 | gsl_vector *h;
|
---|
1345 | gsl_vector *eval;
|
---|
1346 | gsl_matrix *evec;
|
---|
1347 | //gsl_eigen_symmv_workspace *w;
|
---|
1348 |
|
---|
1349 | max_rounds = (DiagData->AllocNum / 2)/DiagData->ProcNum; // each process must perform multiple rotations per step of a set
|
---|
1350 | if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, DiagData->ProcRank, DiagData->ProcNum, max_rounds);
|
---|
1351 |
|
---|
1352 | // allocate column vectors for interchange of columns
|
---|
1353 | debug(P,"allocate column vectors for interchange of columns");
|
---|
1354 | c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c");
|
---|
1355 | s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s");
|
---|
1356 | c_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: c_all");
|
---|
1357 | s_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: s_all");
|
---|
1358 | rcounts = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rcounts");
|
---|
1359 | rdispls = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rdispls");
|
---|
1360 |
|
---|
1361 | // allocate eigenvector stuff
|
---|
1362 | debug(P,"allocate eigenvector stuff");
|
---|
1363 | G = gsl_matrix_calloc (2,2);
|
---|
1364 | h = gsl_vector_alloc (2);
|
---|
1365 | eval = gsl_vector_alloc (2);
|
---|
1366 | evec = gsl_matrix_alloc (2,2);
|
---|
1367 | //w = gsl_eigen_symmv_alloc(2);
|
---|
1368 |
|
---|
1369 | // establish communication partners
|
---|
1370 | debug(P,"establish communication partners");
|
---|
1371 | if (DiagData->ProcRank == 0) {
|
---|
1372 | tagS0 = WannierALTag; // left p0 always remains left p0
|
---|
1373 | } else {
|
---|
1374 | tagS0 = (DiagData->ProcRank == DiagData->ProcNum - 1) ? WannierARTag : WannierALTag; // left p_last becomes right p_last
|
---|
1375 | }
|
---|
1376 | tagS1 = (DiagData->ProcRank == 0) ? WannierALTag : WannierARTag; // right p0 always goes into left p1
|
---|
1377 | tagR0 = WannierALTag; //
|
---|
1378 | tagR1 = WannierARTag; // first process
|
---|
1379 | if (DiagData->ProcRank == 0) {
|
---|
1380 | left = DiagData->ProcNum-1;
|
---|
1381 | right = 1;
|
---|
1382 | Lsend = 0;
|
---|
1383 | Rsend = 1;
|
---|
1384 | Lrecv = 0;
|
---|
1385 | Rrecv = 1;
|
---|
1386 | } else if (DiagData->ProcRank == DiagData->ProcNum - 1) {
|
---|
1387 | left = DiagData->ProcRank - 1;
|
---|
1388 | right = 0;
|
---|
1389 | Lsend = DiagData->ProcRank;
|
---|
1390 | Rsend = DiagData->ProcRank - 1;
|
---|
1391 | Lrecv = DiagData->ProcRank - 1;
|
---|
1392 | Rrecv = DiagData->ProcRank;
|
---|
1393 | } else {
|
---|
1394 | left = DiagData->ProcRank - 1;
|
---|
1395 | right = DiagData->ProcRank + 1;
|
---|
1396 | Lsend = DiagData->ProcRank+1;
|
---|
1397 | Rsend = DiagData->ProcRank - 1;
|
---|
1398 | Lrecv = DiagData->ProcRank - 1;
|
---|
1399 | Rrecv = DiagData->ProcRank+1;
|
---|
1400 | }
|
---|
1401 | //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv);
|
---|
1402 |
|
---|
1403 | // initialise A_loc
|
---|
1404 | debug(P,"initialise A_loc");
|
---|
1405 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
|
---|
1406 | //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]");
|
---|
1407 | Around[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Around[k]");
|
---|
1408 | Atotal[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Atotal[k]");
|
---|
1409 | Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]");
|
---|
1410 | //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds];
|
---|
1411 |
|
---|
1412 | for (round=0;round<max_rounds;round++) {
|
---|
1413 | Aloc[k][2*round] = &Around[k][DiagData->AllocNum*(2*round)];
|
---|
1414 | Aloc[k][2*round+1] = &Around[k][DiagData->AllocNum*(2*round+1)];
|
---|
1415 | for (l=0;l<DiagData->AllocNum;l++) {
|
---|
1416 | Aloc[k][2*round][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round));
|
---|
1417 | Aloc[k][2*round+1][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)+1);
|
---|
1418 | //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]);
|
---|
1419 | }
|
---|
1420 | }
|
---|
1421 | }
|
---|
1422 | // initialise U_loc
|
---|
1423 | debug(P,"initialise U_loc");
|
---|
1424 | //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc");
|
---|
1425 | Uround = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Uround");
|
---|
1426 | Utotal = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Utotal");
|
---|
1427 | Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc");
|
---|
1428 | //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds];
|
---|
1429 | for (round=0;round<max_rounds;round++) {
|
---|
1430 | Uloc[2*round] = &Uround[DiagData->AllocNum*(2*round)];
|
---|
1431 | Uloc[2*round+1] = &Uround[DiagData->AllocNum*(2*round+1)];
|
---|
1432 | for (l=0;l<DiagData->AllocNum;l++) {
|
---|
1433 | Uloc[2*round][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round));
|
---|
1434 | Uloc[2*round+1][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)+1);
|
---|
1435 | //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]);
|
---|
1436 | }
|
---|
1437 | }
|
---|
1438 |
|
---|
1439 | // now comes the iteration loop
|
---|
1440 | debug(P,"now comes the iteration loop");
|
---|
1441 | it_steps = 0;
|
---|
1442 | do {
|
---|
1443 | it_steps++;
|
---|
1444 | //if (P->Par.me == 0) fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps);
|
---|
1445 | for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 staying at its place all the time
|
---|
1446 | //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
|
---|
1447 | for (round = 0; round < max_rounds;round++) {
|
---|
1448 | start = DiagData->ProcRank * max_rounds + round;
|
---|
1449 | // get indices
|
---|
1450 | i = DiagData->top[start] < DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // minimum of the two indices
|
---|
1451 | iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
|
---|
1452 | j = DiagData->top[start] > DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // maximum of the two indices: thus j > i
|
---|
1453 | jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
|
---|
1454 | //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc);
|
---|
1455 |
|
---|
1456 | // calculate rotation angle, i.e. c and s
|
---|
1457 | //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j);
|
---|
1458 | gsl_matrix_set_zero(G);
|
---|
1459 | for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
|
---|
1460 | // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
|
---|
1461 | //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
|
---|
1462 | //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
|
---|
1463 | gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
|
---|
1464 | gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
|
---|
1465 |
|
---|
1466 | // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
|
---|
1467 | for (l=0;l<2;l++)
|
---|
1468 | for (m=0;m<2;m++)
|
---|
1469 | gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
|
---|
1470 | }
|
---|
1471 | //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
|
---|
1472 | // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
|
---|
1473 | EigensolverFor22Matrix(P,G,eval,evec);
|
---|
1474 | //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
|
---|
1475 |
|
---|
1476 | CalculateRotationAnglesFromEigenvalues(evec, eval, &c[round], &s[round]);
|
---|
1477 | //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]);
|
---|
1478 |
|
---|
1479 | //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
|
---|
1480 | // V_loc = V_loc * V_small
|
---|
1481 | //debug(P,"apply rotation to local U");
|
---|
1482 | for (l=0;l<DiagData->AllocNum;l++) {
|
---|
1483 | a_i = Uloc[2*round+iloc][l];
|
---|
1484 | a_j = Uloc[2*round+jloc][l];
|
---|
1485 | Uloc[2*round+iloc][l] = c[round] * a_i + s[round] * a_j;
|
---|
1486 | Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j;
|
---|
1487 | }
|
---|
1488 | } // end of round
|
---|
1489 | // circulate the rotation angles
|
---|
1490 | //debug(P,"circulate the rotation angles");
|
---|
1491 | MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); // MPI_Allgather is waaaaay faster than ring circulation
|
---|
1492 | MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *(DiagData->comm));
|
---|
1493 | //m = start;
|
---|
1494 | for (l=0;l<DiagData->AllocNum/2;l++) { // for each process
|
---|
1495 | // we have V_small from process k
|
---|
1496 | //debug(P,"Apply V_small from other process");
|
---|
1497 | i = DiagData->top[l] < DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // minimum of the two indices
|
---|
1498 | j = DiagData->top[l] > DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // maximum of the two indices: thus j > i
|
---|
1499 | iloc = DiagData->top[l] < DiagData->bot[l] ? 0 : 1;
|
---|
1500 | jloc = DiagData->top[l] > DiagData->bot[l] ? 0 : 1;
|
---|
1501 | for (m=0;m<max_rounds;m++) {
|
---|
1502 | //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j);
|
---|
1503 | // apply row rotation to each A[k]
|
---|
1504 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
|
---|
1505 | //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
|
---|
1506 | //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
|
---|
1507 | a_i = Aloc[k][2*m+iloc][i];
|
---|
1508 | a_j = Aloc[k][2*m+iloc][j];
|
---|
1509 | Aloc[k][2*m+iloc][i] = c_all[l] * a_i + s_all[l] * a_j;
|
---|
1510 | Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
|
---|
1511 | a_i = Aloc[k][2*m+jloc][i];
|
---|
1512 | a_j = Aloc[k][2*m+jloc][j];
|
---|
1513 | Aloc[k][2*m+jloc][i] = c_all[l] * a_i + s_all[l] * a_j;
|
---|
1514 | Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
|
---|
1515 | //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
|
---|
1516 | //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
|
---|
1517 | }
|
---|
1518 | }
|
---|
1519 | }
|
---|
1520 | // apply rotation to local operator matrices
|
---|
1521 | // A_loc = A_loc * V_small
|
---|
1522 | //debug(P,"apply rotation to local operator matrices A[k]");
|
---|
1523 | for (m=0;m<max_rounds;m++) {
|
---|
1524 | start = DiagData->ProcRank * max_rounds + m;
|
---|
1525 | iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
|
---|
1526 | jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
|
---|
1527 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// extra for B matrix !
|
---|
1528 | for (l=0;l<DiagData->AllocNum;l++) {
|
---|
1529 | // Columns, i and j belong to this process only!
|
---|
1530 | a_i = Aloc[k][2*m+iloc][l];
|
---|
1531 | a_j = Aloc[k][2*m+jloc][l];
|
---|
1532 | Aloc[k][2*m+iloc][l] = c[m] * a_i + s[m] * a_j;
|
---|
1533 | Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j;
|
---|
1534 | //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]);
|
---|
1535 | }
|
---|
1536 | }
|
---|
1537 | }
|
---|
1538 | // Shuffling of these round's columns to prepare next rotation set
|
---|
1539 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
|
---|
1540 | // extract columns from A
|
---|
1541 | //debug(P,"extract columns from A");
|
---|
1542 | MerryGoRoundColumns(*(DiagData->comm), Aloc[k], DiagData->AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1);
|
---|
1543 |
|
---|
1544 | }
|
---|
1545 | // and also for V ...
|
---|
1546 | //debug(P,"extract columns from U");
|
---|
1547 | MerryGoRoundColumns(*(DiagData->comm), Uloc, DiagData->AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1);
|
---|
1548 |
|
---|
1549 |
|
---|
1550 | // and merry-go-round for the indices too
|
---|
1551 | //debug(P,"and merry-go-round for the indices too");
|
---|
1552 | MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
|
---|
1553 | }
|
---|
1554 |
|
---|
1555 | //fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
|
---|
1556 | // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
|
---|
1557 | old_spread = Spread;
|
---|
1558 | spread = 0.;
|
---|
1559 | for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
|
---|
1560 | for (i=0; i < 2*max_rounds; i++) { // go through all wave functions
|
---|
1561 | spread += Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]*Aloc[k][i][i+DiagData->ProcRank*2*max_rounds];
|
---|
1562 | //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i);
|
---|
1563 | }
|
---|
1564 | }
|
---|
1565 | MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *(DiagData->comm));
|
---|
1566 | //Spread = spread;
|
---|
1567 | if (P->Par.me == 0) {
|
---|
1568 | //if(P->Call.out[ReadOut])
|
---|
1569 | // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier);
|
---|
1570 | //else
|
---|
1571 | //fprintf(stderr,"%2.9e\n",Spread);
|
---|
1572 | }
|
---|
1573 | // STEP 5: check change of variance
|
---|
1574 | } while (fabs(old_spread-Spread) >= R->EpsWannier);
|
---|
1575 | // end of iterative diagonalization loop: We have found our final orthogonal U!
|
---|
1576 |
|
---|
1577 | // gather local parts of U into complete matrix
|
---|
1578 | for (l=0;l<DiagData->ProcNum;l++)
|
---|
1579 | rcounts[l] = DiagData->AllocNum;
|
---|
1580 | debug(P,"allgather U");
|
---|
1581 | for (round=0;round<2*max_rounds;round++) {
|
---|
1582 | for (l=0;l<DiagData->ProcNum;l++)
|
---|
1583 | rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
|
---|
1584 | MPI_Allgatherv(Uloc[round], DiagData->AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
|
---|
1585 | }
|
---|
1586 | for (k=0;k<DiagData->AllocNum;k++) {
|
---|
1587 | for(l=0;l<DiagData->AllocNum;l++) {
|
---|
1588 | gsl_matrix_set(DiagData->U,k,l, Utotal[l+k*DiagData->AllocNum]);
|
---|
1589 | }
|
---|
1590 | }
|
---|
1591 |
|
---|
1592 | // after one set, gather A[k] from all and calculate spread
|
---|
1593 | for (l=0;l<DiagData->ProcNum;l++)
|
---|
1594 | rcounts[l] = DiagData->AllocNum;
|
---|
1595 | debug(P,"gather A[k] for spread");
|
---|
1596 | for (u=0;u<DiagData->NumMatrices+DiagData->extra;u++) {// extra for B matrix !
|
---|
1597 | debug(P,"A[k] all gather");
|
---|
1598 | for (round=0;round<2*max_rounds;round++) {
|
---|
1599 | for (l=0;l<DiagData->ProcNum;l++)
|
---|
1600 | rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
|
---|
1601 | MPI_Allgatherv(Aloc[u][round], DiagData->AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
|
---|
1602 | }
|
---|
1603 | for (k=0;k<DiagData->AllocNum;k++) {
|
---|
1604 | for(l=0;l<DiagData->AllocNum;l++) {
|
---|
1605 | gsl_matrix_set(DiagData->A[u],k,l, Atotal[u][l+k*DiagData->AllocNum]);
|
---|
1606 | }
|
---|
1607 | }
|
---|
1608 | }
|
---|
1609 |
|
---|
1610 | // free eigenvector stuff
|
---|
1611 | gsl_vector_free(h);
|
---|
1612 | gsl_matrix_free(G);
|
---|
1613 | //gsl_eigen_symmv_free(w);
|
---|
1614 | gsl_vector_free(eval);
|
---|
1615 | gsl_matrix_free(evec);
|
---|
1616 |
|
---|
1617 | // Free column vectors
|
---|
1618 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
|
---|
1619 | Free(Atotal[k], "ParallelDiagonalization: Atotal[k]");
|
---|
1620 | Free(Around[k], "ParallelDiagonalization: Around[k]");
|
---|
1621 | }
|
---|
1622 | Free(Uround, "ParallelDiagonalization: Uround");
|
---|
1623 | Free(Utotal, "ParallelDiagonalization: Utotal");
|
---|
1624 | Free(c_all, "ParallelDiagonalization: c_all");
|
---|
1625 | Free(s_all, "ParallelDiagonalization: s_all");
|
---|
1626 | Free(c, "ParallelDiagonalization: c");
|
---|
1627 | Free(s, "ParallelDiagonalization: s");
|
---|
1628 | Free(rcounts, "ParallelDiagonalization: rcounts");
|
---|
1629 | Free(rdispls, "ParallelDiagonalization: rdispls");
|
---|
1630 | }
|
---|
1631 | /*
|
---|
1632 | * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
|
---|
1633 | * \param *U transformation matrix set to unity matrix
|
---|
1634 | * \param NumMatrices number of matrices to be diagonalized
|
---|
1635 | * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
|
---|
1636 | * \param AllocNum number of rows/columns in matrices
|
---|
1637 | * \param Num number of wave functions
|
---|
1638 | * \param *top array with top row indices (merry-go-round)
|
---|
1639 | * \param *bot array with bottom row indices (merry-go-round)
|
---|
1640 | */
|
---|
1641 |
|
---|
1642 | /** Simultaneous Diagonalization of variances matrices with one cpu.
|
---|
1643 | * \param *P Problem at hand
|
---|
1644 | * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
|
---|
1645 | * \note this is faster given one cpu only than ParallelDiagonalization()
|
---|
1646 | */
|
---|
1647 | void SerialDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
|
---|
1648 | {
|
---|
1649 | struct RunStruct *R = &P->R;
|
---|
1650 | gsl_matrix *G;
|
---|
1651 | gsl_vector *h;
|
---|
1652 | gsl_vector *eval;
|
---|
1653 | gsl_matrix *evec;
|
---|
1654 | //gsl_eigen_symmv_workspace *w;
|
---|
1655 | double *c,*s,a_i,a_j;
|
---|
1656 | int it_steps, set, ProcRank;
|
---|
1657 | int i,j,k,l,m;
|
---|
1658 | double spread = 0., old_spread = 0.;\
|
---|
1659 |
|
---|
1660 | // allocate eigenvector stuff
|
---|
1661 | debug(P,"allocate eigenvector stuff");
|
---|
1662 | G = gsl_matrix_calloc (2,2);
|
---|
1663 | h = gsl_vector_alloc (2);
|
---|
1664 | eval = gsl_vector_alloc (2);
|
---|
1665 | evec = gsl_matrix_alloc (2,2);
|
---|
1666 | //w = gsl_eigen_symmv_alloc(2);
|
---|
1667 |
|
---|
1668 | c = (double *) Malloc(sizeof(double), "ComputeMLWF: c");
|
---|
1669 | s = (double *) Malloc(sizeof(double), "ComputeMLWF: s");
|
---|
1670 | debug(P,"now comes the iteration loop");
|
---|
1671 | it_steps=0;
|
---|
1672 | do {
|
---|
1673 | it_steps++;
|
---|
1674 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me);
|
---|
1675 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps);
|
---|
1676 | for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
|
---|
1677 | //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
|
---|
1678 | // STEP 3: for all index pairs 0<= i<j <AllocNum
|
---|
1679 | for (ProcRank=0;ProcRank<DiagData->AllocNum/2;ProcRank++) {
|
---|
1680 | // get indices
|
---|
1681 | i = DiagData->top[ProcRank] < DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // minimum of the two indices
|
---|
1682 | j = DiagData->top[ProcRank] > DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // maximum of the two indices: thus j > i
|
---|
1683 | //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j);
|
---|
1684 | // STEP 3a: Calculate G
|
---|
1685 | gsl_matrix_set_zero(G);
|
---|
1686 |
|
---|
1687 | for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
|
---|
1688 | // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
|
---|
1689 | //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,i), gsl_matrix_get(DiagData->A[k],j,j),gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
|
---|
1690 | //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,j), gsl_matrix_get(DiagData->A[k],j,i),gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
|
---|
1691 | gsl_vector_set(h, 0, gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
|
---|
1692 | gsl_vector_set(h, 1, gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
|
---|
1693 | //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1));
|
---|
1694 |
|
---|
1695 | // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
|
---|
1696 | for (l=0;l<2;l++)
|
---|
1697 | for (m=0;m<2;m++)
|
---|
1698 | gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
|
---|
1699 |
|
---|
1700 | //PrintGSLMatrix(P,G,2, "G");
|
---|
1701 | }
|
---|
1702 |
|
---|
1703 | //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
|
---|
1704 | // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
|
---|
1705 | EigensolverFor22Matrix(P,G,eval,evec);
|
---|
1706 | //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
|
---|
1707 |
|
---|
1708 | //PrintGSLMatrix(P, evec, 2, "Eigenvectors");
|
---|
1709 | CalculateRotationAnglesFromEigenvalues(evec, eval, c, s);
|
---|
1710 | //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]);
|
---|
1711 |
|
---|
1712 | //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j);
|
---|
1713 | // STEP 3d: apply rotation R to rows and columns of A^{(k)}
|
---|
1714 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
|
---|
1715 | //PrintGSLMatrix(P,A[k],AllocNum, "A (before rot)");
|
---|
1716 | for (l=0;l<DiagData->AllocNum;l++) {
|
---|
1717 | // Rows
|
---|
1718 | a_i = gsl_matrix_get(DiagData->A[k],i,l);
|
---|
1719 | a_j = gsl_matrix_get(DiagData->A[k],j,l);
|
---|
1720 | gsl_matrix_set(DiagData->A[k], i, l, c[0] * a_i + s[0] * a_j);
|
---|
1721 | gsl_matrix_set(DiagData->A[k], j, l, -s[0] * a_i + c[0] * a_j);
|
---|
1722 | }
|
---|
1723 | for (l=0;l<DiagData->AllocNum;l++) {
|
---|
1724 | // Columns
|
---|
1725 | a_i = gsl_matrix_get(DiagData->A[k],l,i);
|
---|
1726 | a_j = gsl_matrix_get(DiagData->A[k],l,j);
|
---|
1727 | gsl_matrix_set(DiagData->A[k], l, i, c[0] * a_i + s[0] * a_j);
|
---|
1728 | gsl_matrix_set(DiagData->A[k], l, j, -s[0] * a_i + c[0] * a_j);
|
---|
1729 | }
|
---|
1730 | //PrintGSLMatrix(P,A[k],AllocNum, "A (after rot)");
|
---|
1731 | }
|
---|
1732 | //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
|
---|
1733 | //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (before rot)");
|
---|
1734 | // STEP 3e: apply U = R*U
|
---|
1735 | for (l=0;l<DiagData->AllocNum;l++) {
|
---|
1736 | a_i = gsl_matrix_get(DiagData->U,i,l);
|
---|
1737 | a_j = gsl_matrix_get(DiagData->U,j,l);
|
---|
1738 | gsl_matrix_set(DiagData->U, i, l, c[0] * a_i + s[0] * a_j);
|
---|
1739 | gsl_matrix_set(DiagData->U, j, l, -s[0] * a_i + c[0] * a_j);
|
---|
1740 | }
|
---|
1741 | //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (after rot)");
|
---|
1742 | }
|
---|
1743 | // and merry-go-round for the indices too
|
---|
1744 | //debug(P,"and merry-go-round for the indices too");
|
---|
1745 | if (DiagData->AllocNum > 2) MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
|
---|
1746 | }
|
---|
1747 |
|
---|
1748 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
|
---|
1749 | // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
|
---|
1750 | old_spread = spread;
|
---|
1751 | spread = 0.;
|
---|
1752 | for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
|
---|
1753 | for (i=0; i < DiagData->AllocNum; i++) { // go through all wave functions
|
---|
1754 | spread += pow(gsl_matrix_get(DiagData->A[k],i,i),2);
|
---|
1755 | }
|
---|
1756 | }
|
---|
1757 | if(P->Par.me == 0) {
|
---|
1758 | //if(P->Call.out[ReadOut])
|
---|
1759 | // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier);
|
---|
1760 | //else
|
---|
1761 | //fprintf(stderr,"%2.9e\n",spread);
|
---|
1762 | }
|
---|
1763 | // STEP 5: check change of variance
|
---|
1764 | } while (fabs(old_spread-spread) >= R->EpsWannier);
|
---|
1765 | // end of iterative diagonalization loop: We have found our final orthogonal U!
|
---|
1766 | Free(c, "SerialDiagonalization: c");
|
---|
1767 | Free(s, "SerialDiagonalization: s");
|
---|
1768 | // free eigenvector stuff
|
---|
1769 | gsl_vector_free(h);
|
---|
1770 | gsl_matrix_free(G);
|
---|
1771 | //gsl_eigen_symmv_free(w);
|
---|
1772 | gsl_vector_free(eval);
|
---|
1773 | gsl_matrix_free(evec);
|
---|
1774 | }
|
---|
1775 |
|
---|
1776 | /** Writes the wannier centres and spread to file.
|
---|
1777 | * Also puts centres from array into OnePsiElementAddData structure.
|
---|
1778 | * \param *P Problem at hand
|
---|
1779 | * \param old_spread first term of wannier spread
|
---|
1780 | * \param spread second term of wannier spread
|
---|
1781 | * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
|
---|
1782 | * \param *WannierSpread array with wannier spread per wave function
|
---|
1783 | */
|
---|
1784 | void WriteWannierFile(struct Problem *P, double spread, double old_spread, double **WannierCentre, double *WannierSpread)
|
---|
1785 | {
|
---|
1786 | struct FileData *F = &P->Files;
|
---|
1787 | struct RunStruct *R = &P->R;
|
---|
1788 | struct Lattice *Lat = &P->Lat;
|
---|
1789 | struct Psis *Psi = &Lat->Psi;
|
---|
1790 | struct OnePsiElement *OnePsiA;
|
---|
1791 | char spin[12], suffix[18];
|
---|
1792 | int i,j,l;
|
---|
1793 |
|
---|
1794 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me);
|
---|
1795 |
|
---|
1796 | switch (Lat->Psi.PsiST) {
|
---|
1797 | case SpinDouble:
|
---|
1798 | strcpy(suffix,".spread.csv");
|
---|
1799 | strcpy(spin,"SpinDouble");
|
---|
1800 | break;
|
---|
1801 | case SpinUp:
|
---|
1802 | strcpy(suffix,".spread_up.csv");
|
---|
1803 | strcpy(spin,"SpinUp");
|
---|
1804 | break;
|
---|
1805 | case SpinDown:
|
---|
1806 | strcpy(suffix,".spread_down.csv");
|
---|
1807 | strcpy(spin,"SpinDown");
|
---|
1808 | break;
|
---|
1809 | }
|
---|
1810 | if (P->Par.me_comm_ST == 0) {
|
---|
1811 | if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
|
---|
1812 | OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
|
---|
1813 | else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile)
|
---|
1814 | OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
|
---|
1815 | if (F->SpreadFile == NULL) {
|
---|
1816 | Error(SomeError,"WriteWannierFile: Error opening Wannier File!\n");
|
---|
1817 | } else {
|
---|
1818 | fprintf(F->SpreadFile,"#===== W A N N I E R C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin);
|
---|
1819 | fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n");
|
---|
1820 | }
|
---|
1821 | }
|
---|
1822 |
|
---|
1823 | // put (new) WannierCentres into local ones and into file
|
---|
1824 | i=-1;
|
---|
1825 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
|
---|
1826 | OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
|
---|
1827 | if (OnePsiA->PsiType == type) { // drop all but occupied ones
|
---|
1828 | i++; // increase l if it is occupied wave function
|
---|
1829 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
|
---|
1830 | for (j=0;j<NDIM;j++)
|
---|
1831 | Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
|
---|
1832 | }
|
---|
1833 | if (P->Par.me_comm_ST == 0)
|
---|
1834 | fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]);
|
---|
1835 | }
|
---|
1836 | }
|
---|
1837 | if (P->Par.me_comm_ST == 0) {
|
---|
1838 | fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n");
|
---|
1839 | fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread);
|
---|
1840 | fflush(F->SpreadFile);
|
---|
1841 | }
|
---|
1842 | }
|
---|
1843 |
|
---|
1844 |
|
---|
1845 | /** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre.
|
---|
1846 | * \param *P Problem at hand
|
---|
1847 | * \return 1 - success, 0 - failure
|
---|
1848 | */
|
---|
1849 | int ParseWannierFile(struct Problem *P)
|
---|
1850 | {
|
---|
1851 | struct Lattice *Lat = &P->Lat;
|
---|
1852 | struct RunStruct *R = &P->R;
|
---|
1853 | struct Psis *Psi = &Lat->Psi;
|
---|
1854 | struct OnePsiElement *OnePsiA;
|
---|
1855 | int i,l,j, msglen;
|
---|
1856 | FILE *SpreadFile;
|
---|
1857 | char *tagname;
|
---|
1858 | char suffix[18];
|
---|
1859 | double WannierCentre[NDIM+1]; // combined centre and spread
|
---|
1860 | MPI_Status status;
|
---|
1861 | int signal = 0; // 1 - ok, 0 - error
|
---|
1862 |
|
---|
1863 | switch (Lat->Psi.PsiST) {
|
---|
1864 | case SpinDouble:
|
---|
1865 | strcpy(suffix,".spread.csv");
|
---|
1866 | break;
|
---|
1867 | case SpinUp:
|
---|
1868 | strcpy(suffix,".spread_up.csv");
|
---|
1869 | break;
|
---|
1870 | case SpinDown:
|
---|
1871 | strcpy(suffix,".spread_down.csv");
|
---|
1872 | break;
|
---|
1873 | }
|
---|
1874 | if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Parsing Wannier Centres from file ... \n", P->Par.me);
|
---|
1875 |
|
---|
1876 | if (P->Par.me_comm_ST == 0) {
|
---|
1877 | tagname = (char *) Malloc(sizeof(char)*255, "ParseWannierFile: *tagname");
|
---|
1878 | if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists
|
---|
1879 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
|
---|
1880 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
|
---|
1881 | return 0;
|
---|
1882 | //Error(SomeError,"ParseWannierFile: Opening failed\n");
|
---|
1883 | }
|
---|
1884 | signal = 1;
|
---|
1885 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
|
---|
1886 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
|
---|
1887 | } else {
|
---|
1888 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
|
---|
1889 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
|
---|
1890 | if (signal == 0)
|
---|
1891 | return 0;
|
---|
1892 | }
|
---|
1893 | i=-1;
|
---|
1894 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
|
---|
1895 | OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
|
---|
1896 | if (OnePsiA->PsiType == type) { // drop all but occupied ones
|
---|
1897 | i++; // increase l if it is occupied wave function
|
---|
1898 | if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file
|
---|
1899 | sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo);
|
---|
1900 | signal = 0;
|
---|
1901 | if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) {
|
---|
1902 | //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed");
|
---|
1903 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
|
---|
1904 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
|
---|
1905 | return 0;
|
---|
1906 | }
|
---|
1907 | if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) {
|
---|
1908 | //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed");
|
---|
1909 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
|
---|
1910 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
|
---|
1911 | return 0;
|
---|
1912 | }
|
---|
1913 | signal = 1;
|
---|
1914 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
|
---|
1915 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
|
---|
1916 | } else {
|
---|
1917 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
|
---|
1918 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
|
---|
1919 | if (signal == 0)
|
---|
1920 | return 0;
|
---|
1921 | }
|
---|
1922 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local?
|
---|
1923 | if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em
|
---|
1924 | if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
|
---|
1925 | Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed");
|
---|
1926 | //return 0;
|
---|
1927 | MPI_Get_count(&status, MPI_DOUBLE, &msglen);
|
---|
1928 | if (msglen != NDIM+1)
|
---|
1929 | Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count");
|
---|
1930 | //return 0;
|
---|
1931 | }
|
---|
1932 | if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader
|
---|
1933 | Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed");
|
---|
1934 | //return 0;
|
---|
1935 | // and store 'em (for all who have this Psi local)
|
---|
1936 | if ((P->Par.me == 0) && P->Call.out[ValueOut]) fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]);
|
---|
1937 | for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j];
|
---|
1938 | Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM];
|
---|
1939 | //if (P->Par.me == 0 && P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
|
---|
1940 | } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group
|
---|
1941 | if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS)
|
---|
1942 | Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed");
|
---|
1943 | //return 0;
|
---|
1944 | }
|
---|
1945 | }
|
---|
1946 | }
|
---|
1947 | if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) {
|
---|
1948 | fclose(SpreadFile);
|
---|
1949 | Free(tagname, "ParseWannierFile: *tagname");
|
---|
1950 | }
|
---|
1951 | //fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me);
|
---|
1952 | return 1;
|
---|
1953 | }
|
---|
1954 |
|
---|
1955 | /** Calculates the spread of orbital \a i.
|
---|
1956 | * Stored in OnePsiElementAddData#WannierSpread.
|
---|
1957 | * \param *P Problem at hand
|
---|
1958 | * \param i i-th wave function (note "extra" ones are not counted!)
|
---|
1959 | * \return spread \f$\sigma^2_{A^{(k)}}\f$
|
---|
1960 | */
|
---|
1961 | double CalculateSpread(struct Problem *P, int i) {
|
---|
1962 | struct Lattice *Lat = &P->Lat;
|
---|
1963 | struct RunStruct *R = &P->R;
|
---|
1964 | struct Psis *Psi = &Lat->Psi;
|
---|
1965 | struct LatticeLevel *Lev0 = R->Lev0;
|
---|
1966 | struct LatticeLevel *LevS = R->LevS;
|
---|
1967 | struct Density *Dens0 = Lev0->Dens;
|
---|
1968 | struct fft_plan_3d *plan = Lat->plan;
|
---|
1969 | fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
|
---|
1970 | fftw_real *PsiCR = (fftw_real *)PsiC;
|
---|
1971 | fftw_complex *work = Dens0->DensityCArray[Temp2Density];
|
---|
1972 | fftw_real **HGcR = &Dens0->DensityArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as a storage array
|
---|
1973 | fftw_complex **HGcRC = (fftw_complex**)HGcR;
|
---|
1974 | fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
|
---|
1975 | fftw_real **HGcR2 = (fftw_real**)HGcR2C;
|
---|
1976 | MPI_Status status;
|
---|
1977 | struct OnePsiElement *OnePsiA, *LOnePsiA;
|
---|
1978 | int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
|
---|
1979 | fftw_complex *LPsiDatA=NULL;
|
---|
1980 | int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index;
|
---|
1981 | N0 = LevS->Plan0.plan->local_nx;
|
---|
1982 | N = LevS->Plan0.plan->N;
|
---|
1983 | const int NUpx = LevS->NUp[0];
|
---|
1984 | const int NUpy = LevS->NUp[1];
|
---|
1985 | const int NUpz = LevS->NUp[2];
|
---|
1986 | double a_ij, b_ij, A_ij, B_ij;
|
---|
1987 | double tmp, tmp2, spread = 0;
|
---|
1988 | double **cos_lookup, **sin_lookup;
|
---|
1989 |
|
---|
1990 | b_ij = 0;
|
---|
1991 |
|
---|
1992 | CreateSinCosLookupTable(&cos_lookup,&sin_lookup,N);
|
---|
1993 |
|
---|
1994 | // fill matrices
|
---|
1995 | OnePsiA = &Psi->AllPsiStatus[i]; // grab the desired OnePsiA
|
---|
1996 | if (OnePsiA->PsiType != Extra) { // drop if extra one
|
---|
1997 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
|
---|
1998 | LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo];
|
---|
1999 | else
|
---|
2000 | LOnePsiA = NULL;
|
---|
2001 | if (LOnePsiA == NULL) { // if it's not local ... receive it from respective process into TempPsi
|
---|
2002 | RecvSource = OnePsiA->my_color_comm_ST_Psi;
|
---|
2003 | MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status );
|
---|
2004 | LPsiDatA=LevS->LPsi->TempPsi;
|
---|
2005 | } else { // .. otherwise send it to all other processes (Max_me... - 1)
|
---|
2006 | for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
|
---|
2007 | if (p != OnePsiA->my_color_comm_ST_Psi)
|
---|
2008 | MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT);
|
---|
2009 | LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
|
---|
2010 | } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received
|
---|
2011 |
|
---|
2012 | CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
|
---|
2013 | // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
|
---|
2014 | for (n0=0;n0<N0;n0++)
|
---|
2015 | for (n[1]=0;n[1]<N[1];n[1]++)
|
---|
2016 | for (n[2]=0;n[2]<N[2];n[2]++) {
|
---|
2017 | i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
|
---|
2018 | iS = n[2]+N[2]*(n[1]+N[1]*n0);
|
---|
2019 | n[0] = n0 + LevS->Plan0.plan->start_nx;
|
---|
2020 | for (k=0;k<max_operators;k+=2) {
|
---|
2021 | tmp = 2*PI/(double)(N[k/2])*(double)(n[k/2]);
|
---|
2022 | tmp2 = PsiCR[i0] /LevS->MaxN;
|
---|
2023 | // check lookup
|
---|
2024 | if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) {
|
---|
2025 | fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]);
|
---|
2026 | Error(SomeError, "Lookup table does not match real value!");
|
---|
2027 | }
|
---|
2028 | // HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
|
---|
2029 | // HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */
|
---|
2030 | // HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
|
---|
2031 | // HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
|
---|
2032 | HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */
|
---|
2033 | HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */
|
---|
2034 | HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */
|
---|
2035 | HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */
|
---|
2036 | }
|
---|
2037 | }
|
---|
2038 | for (k=0;k<max_operators;k++) {
|
---|
2039 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work);
|
---|
2040 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work);
|
---|
2041 | }
|
---|
2042 |
|
---|
2043 |
|
---|
2044 | for (k=0;k<max_operators;k++) {
|
---|
2045 | a_ij = 0;
|
---|
2046 | //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k);
|
---|
2047 | // sum directly in a_ij and b_ij the two desired terms
|
---|
2048 | g=0;
|
---|
2049 | if (LevS->GArray[0].GSq == 0.0) {
|
---|
2050 | Index = LevS->GArray[g].Index;
|
---|
2051 | a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im);
|
---|
2052 | b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im);
|
---|
2053 | g++;
|
---|
2054 | }
|
---|
2055 | for (; g < LevS->MaxG; g++) {
|
---|
2056 | Index = LevS->GArray[g].Index;
|
---|
2057 | a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im);
|
---|
2058 | b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im);
|
---|
2059 | } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...)
|
---|
2060 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
|
---|
2061 | spread += pow(A_ij,2);
|
---|
2062 | }
|
---|
2063 | }
|
---|
2064 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
|
---|
2065 |
|
---|
2066 | // store spread in OnePsiElementAdd
|
---|
2067 | Psi->AddData[i].WannierSpread = B_ij - spread;
|
---|
2068 |
|
---|
2069 | FreeSinCosLookupTable(cos_lookup,sin_lookup);
|
---|
2070 |
|
---|
2071 | return (B_ij - spread);
|
---|
2072 | }
|
---|