| 1 | /** \file wannier.c
 | 
|---|
| 2 |  * Maximally Localized Wannier Functions.
 | 
|---|
| 3 |  * 
 | 
|---|
| 4 |  * Contains the on function that minimises the spread of all orbitals in one rush in a parallel
 | 
|---|
| 5 |  * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to
 | 
|---|
| 6 |  * calculate the spread of a specific orbital, which may be useful in checking on the change of
 | 
|---|
| 7 |  * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex.
 | 
|---|
| 8 |  * 
 | 
|---|
| 9 |   Project: ParallelCarParrinello
 | 
|---|
| 10 |  \author Frederik Heber
 | 
|---|
| 11 |  \date 2006
 | 
|---|
| 12 | 
 | 
|---|
| 13 |   File: wannier.c
 | 
|---|
| 14 |   $Id: wannier.c,v 1.7 2007-10-12 15:50:38 heber Exp $
 | 
|---|
| 15 | */
 | 
|---|
| 16 | 
 | 
|---|
| 17 | #include <math.h>
 | 
|---|
| 18 | #include <gsl/gsl_math.h>
 | 
|---|
| 19 | #include <gsl/gsl_eigen.h>
 | 
|---|
| 20 | #include <gsl/gsl_matrix.h>
 | 
|---|
| 21 | #include <gsl/gsl_vector.h>
 | 
|---|
| 22 | #include <gsl/gsl_complex.h>
 | 
|---|
| 23 | #include <gsl/gsl_complex_math.h>
 | 
|---|
| 24 | #include <gsl/gsl_sort_vector.h>
 | 
|---|
| 25 | #include <gsl/gsl_heapsort.h>
 | 
|---|
| 26 | #include <gsl/gsl_blas.h>
 | 
|---|
| 27 | #include <string.h>
 | 
|---|
| 28 | 
 | 
|---|
| 29 | #include "data.h"
 | 
|---|
| 30 | #include "density.h"
 | 
|---|
| 31 | #include "errors.h"
 | 
|---|
| 32 | #include "gramsch.h"
 | 
|---|
| 33 | #include "helpers.h"
 | 
|---|
| 34 | #include "init.h"
 | 
|---|
| 35 | #include "myfft.h"
 | 
|---|
| 36 | #include "mymath.h"
 | 
|---|
| 37 | #include "output.h"
 | 
|---|
| 38 | #include "perturbed.h"
 | 
|---|
| 39 | #include "wannier.h"
 | 
|---|
| 40 | 
 | 
|---|
| 41 | 
 | 
|---|
| 42 | #define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread
 | 
|---|
| 43 | #define type Occupied
 | 
|---|
| 44 | 
 | 
|---|
| 45 | 
 | 
|---|
| 46 | /** Converts type fftw_complex to gsl_complex.
 | 
|---|
| 47 |  * \param a complex number
 | 
|---|
| 48 |  * \return b complex number
 | 
|---|
| 49 |  */
 | 
|---|
| 50 | gsl_complex convertComplex (fftw_complex a) {
 | 
|---|
| 51 |   return gsl_complex_rect(c_re(a),c_im(a));
 | 
|---|
| 52 | }
 | 
|---|
| 53 | 
 | 
|---|
| 54 | /** "merry go round" implementation for parallel index ordering.
 | 
|---|
| 55 |  * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is
 | 
|---|
| 56 |  * performed which generates once each possible pairing.  
 | 
|---|
| 57 |  * \param *top index array 1
 | 
|---|
| 58 |  * \param *bot index array 2
 | 
|---|
| 59 |  * \param m N/2, where N is the matrix row/column dimension
 | 
|---|
| 60 |  * \note taken from [Golub, Matrix computations, 1989, p451]
 | 
|---|
| 61 |  */
 | 
|---|
| 62 | void MerryGoRoundIndices(int *top, int *bot, int m)
 | 
|---|
| 63 | {
 | 
|---|
| 64 |   int *old_top, *old_bot;
 | 
|---|
| 65 |   int k;
 | 
|---|
| 66 |   old_top = (int *) Malloc(sizeof(int)*m, "music: old_top");
 | 
|---|
| 67 |   old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot");
 | 
|---|
| 68 | /*  fprintf(stderr,"oldtop\t");
 | 
|---|
| 69 |   for (k=0;k<m;k++)
 | 
|---|
| 70 |     fprintf(stderr,"%i\t", top[k]);
 | 
|---|
| 71 |   fprintf(stderr,"\n");
 | 
|---|
| 72 |   fprintf(stderr,"oldbot\t");
 | 
|---|
| 73 |   for (k=0;k<m;k++)
 | 
|---|
| 74 |     fprintf(stderr,"%i\t", bot[k]);
 | 
|---|
| 75 |   fprintf(stderr,"\n");*/
 | 
|---|
| 76 |   // first copy arrays    
 | 
|---|
| 77 |   for (k=0;k<m;k++) {
 | 
|---|
| 78 |     old_top[k] = top[k];
 | 
|---|
| 79 |     old_bot[k] = bot[k];
 | 
|---|
| 80 |   }
 | 
|---|
| 81 |   // then let the music play
 | 
|---|
| 82 |   for (k=0;k<m;k++) {
 | 
|---|
| 83 |     if (k==1)
 | 
|---|
| 84 |       top[k] = old_bot[0];
 | 
|---|
| 85 |     else if (k > 1)
 | 
|---|
| 86 |       top[k] = old_top[k-1];
 | 
|---|
| 87 |     if (k==m-1)
 | 
|---|
| 88 |       bot[k] = old_top[k];
 | 
|---|
| 89 |     else
 | 
|---|
| 90 |       bot[k] = old_bot[k+1];
 | 
|---|
| 91 |   }
 | 
|---|
| 92 | /*  fprintf(stderr,"top\t");
 | 
|---|
| 93 |   for (k=0;k<m;k++)
 | 
|---|
| 94 |     fprintf(stderr,"%i\t", top[k]);
 | 
|---|
| 95 |   fprintf(stderr,"\n");
 | 
|---|
| 96 |   fprintf(stderr,"bot\t");
 | 
|---|
| 97 |   for (k=0;k<m;k++)
 | 
|---|
| 98 |     fprintf(stderr,"%i\t", bot[k]);
 | 
|---|
| 99 |   fprintf(stderr,"\n");*/
 | 
|---|
| 100 |   // and finito
 | 
|---|
| 101 |   Free(old_top, "MerryGoRoundIndices: old_top");
 | 
|---|
| 102 |   Free(old_bot, "MerryGoRoundIndices: old_bot");
 | 
|---|
| 103 | }
 | 
|---|
| 104 | 
 | 
|---|
| 105 | /** merry-go-round for matrix columns.
 | 
|---|
| 106 |  * The trick here is that we must be aware of multiple rotations per process, thus only some of the
 | 
|---|
| 107 |  * whole lot of local columns get sent/received, most of them are just shifted via exchanging the various
 | 
|---|
| 108 |  * pointers to the matrix columns within the local array.
 | 
|---|
| 109 |  * \param comm communicator for circulation
 | 
|---|
| 110 |  * \param *Aloc local array of columns
 | 
|---|
| 111 |  * \param Num entries per column
 | 
|---|
| 112 |  * \param max_rounds number of column pairs in \a *Around
 | 
|---|
| 113 |  * \param k offset for tag
 | 
|---|
| 114 |  * \param tagS0 MPI tag for sending left column
 | 
|---|
| 115 |  * \param tagS1 MPI tag for sending right column
 | 
|---|
| 116 |  * \param tagR0 MPI tag for receiving left column
 | 
|---|
| 117 |  * \param tagR1 MPI tag for receiving right column
 | 
|---|
| 118 |  */ 
 | 
|---|
| 119 | void MerryGoRoundColumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) {
 | 
|---|
| 120 |   //double *A_locS1, *A_locS2;  // local columns of A[k]
 | 
|---|
| 121 |   //double *A_locR1, *A_locR2;  // local columns of A[k]
 | 
|---|
| 122 |   MPI_Request requestS0, requestS1, requestR0, requestR1;
 | 
|---|
| 123 |   MPI_Status status;
 | 
|---|
| 124 |   int ProcRank, ProcNum;
 | 
|---|
| 125 |   int l; 
 | 
|---|
| 126 |   MPI_Comm_size (comm, &ProcNum);
 | 
|---|
| 127 |   MPI_Comm_rank (comm, &ProcRank);
 | 
|---|
| 128 |   double *Abuffer1, *Abuffer2;  // mark the columns that are circulated
 | 
|---|
| 129 |   
 | 
|---|
| 130 |   //fprintf(stderr,"shifting...");
 | 
|---|
| 131 |   if (ProcRank == 0) {
 | 
|---|
| 132 |     if (max_rounds > 1) { 
 | 
|---|
| 133 |       // get last left column
 | 
|---|
| 134 |       Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
 | 
|---|
| 135 |       MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
 | 
|---|
| 136 |     } else {
 | 
|---|
| 137 |       // get right column
 | 
|---|
| 138 |       Abuffer1 = Aloc[1]; // note down the free column
 | 
|---|
| 139 |       MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1,  tagS1+2*k, comm, &requestS0);
 | 
|---|
| 140 |     } 
 | 
|---|
| 141 |     
 | 
|---|
| 142 |     //fprintf(stderr,"...left columns...");
 | 
|---|
| 143 |     for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right
 | 
|---|
| 144 |       Aloc[l] = Aloc[l-2];
 | 
|---|
| 145 | 
 | 
|---|
| 146 |     if (max_rounds > 1) { 
 | 
|---|
| 147 |       //fprintf(stderr,"...first right...");
 | 
|---|
| 148 |       Aloc[2] = Aloc[1]; // get first right column
 | 
|---|
| 149 |     }
 | 
|---|
| 150 | 
 | 
|---|
| 151 |     //fprintf(stderr,"...right columns...");
 | 
|---|
| 152 |     for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
 | 
|---|
| 153 |       Aloc[l] = Aloc[l+2];
 | 
|---|
| 154 | 
 | 
|---|
| 155 |     //fprintf(stderr,"...last right...");
 | 
|---|
| 156 |     Aloc[(2*max_rounds-1)] = Abuffer1;
 | 
|---|
| 157 |     MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);  
 | 
|---|
| 158 | 
 | 
|---|
| 159 |   } else if (ProcRank == ProcNum-1) {
 | 
|---|
| 160 |     //fprintf(stderr,"...first right...");
 | 
|---|
| 161 |     // get first right column
 | 
|---|
| 162 |     Abuffer2 = Aloc[1]; // note down the free column
 | 
|---|
| 163 |     MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
 | 
|---|
| 164 | 
 | 
|---|
| 165 |     //fprintf(stderr,"...right columns...");
 | 
|---|
| 166 |     for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
 | 
|---|
| 167 |       Aloc[(l)] = Aloc[(l+2)];
 | 
|---|
| 168 | 
 | 
|---|
| 169 |     //fprintf(stderr,"...last right...");
 | 
|---|
| 170 |     Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column
 | 
|---|
| 171 | 
 | 
|---|
| 172 |     //fprintf(stderr,"...left columns...");
 | 
|---|
| 173 |     for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
 | 
|---|
| 174 |       Aloc[(l)] = Aloc[(l-2)];
 | 
|---|
| 175 | 
 | 
|---|
| 176 |     //fprintf(stderr,"...first left...");
 | 
|---|
| 177 | //    if (max_rounds > 1)
 | 
|---|
| 178 |       Aloc[0] = Abuffer2; // get first left column
 | 
|---|
| 179 |     MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
 | 
|---|
| 180 | 
 | 
|---|
| 181 |   } else {
 | 
|---|
| 182 |     // get last left column
 | 
|---|
| 183 |     MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
 | 
|---|
| 184 |     Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
 | 
|---|
| 185 | 
 | 
|---|
| 186 |     //fprintf(stderr,"...first right...");
 | 
|---|
| 187 |     // get first right column
 | 
|---|
| 188 |     MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
 | 
|---|
| 189 |     Abuffer2 = Aloc[1]; // note down the free column
 | 
|---|
| 190 | 
 | 
|---|
| 191 |     //fprintf(stderr,"...left columns...");
 | 
|---|
| 192 |     for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
 | 
|---|
| 193 |       Aloc[(l)] = Aloc[(l-2)];
 | 
|---|
| 194 | 
 | 
|---|
| 195 |     //fprintf(stderr,"...right columns...");
 | 
|---|
| 196 |     for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
 | 
|---|
| 197 |       Aloc[(l)] = Aloc[(l+2)];
 | 
|---|
| 198 | 
 | 
|---|
| 199 |     //fprintf(stderr,"...first left...");
 | 
|---|
| 200 |     Aloc[0] = Abuffer1; // get first left column
 | 
|---|
| 201 |     MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
 | 
|---|
| 202 | 
 | 
|---|
| 203 |     //fprintf(stderr,"...last right...");
 | 
|---|
| 204 |     Aloc[(2*max_rounds-1)] = Abuffer2;
 | 
|---|
| 205 |     MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
 | 
|---|
| 206 |   }
 | 
|---|
| 207 |     
 | 
|---|
| 208 |   //fprintf(stderr,"...waiting...");
 | 
|---|
| 209 |   if (ProcRank != ProcNum-1)
 | 
|---|
| 210 |     MPI_Wait(&requestS0, &status);
 | 
|---|
| 211 |   if (ProcRank != 0)  // first left column
 | 
|---|
| 212 |     MPI_Wait(&requestR0, &status);
 | 
|---|
| 213 |   if (ProcRank != 0)
 | 
|---|
| 214 |     MPI_Wait(&requestS1, &status);
 | 
|---|
| 215 |   if (ProcRank != ProcNum-1)
 | 
|---|
| 216 |     MPI_Wait(&requestR1, &status);
 | 
|---|
| 217 |   //fprintf(stderr,"...done\n");
 | 
|---|
| 218 | }
 | 
|---|
| 219 | 
 | 
|---|
| 220 | /** By testing of greatest common divisor of the matrix rows (\a AllocNum) finds suitable parallel cpu group.
 | 
|---|
| 221 |  * \param *P Problem at hand
 | 
|---|
| 222 |  * \param AllocNum number of rows in matrix
 | 
|---|
| 223 |  * \return address of MPI communicator
 | 
|---|
| 224 |  */
 | 
|---|
| 225 | #ifdef HAVE_INLINE
 | 
|---|
| 226 | inline MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
 | 
|---|
| 227 | #else
 | 
|---|
| 228 | MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
 | 
|---|
| 229 | #endif
 | 
|---|
| 230 | {
 | 
|---|
| 231 |   MPI_Comm *comm = &P->Par.comm_ST;
 | 
|---|
| 232 |   
 | 
|---|
| 233 |   //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi);
 | 
|---|
| 234 |   //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Jacobi diagonalization is done parallely by ", P->Par.me);
 | 
|---|
| 235 |   if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) {  // all parallel
 | 
|---|
| 236 |     comm = &P->Par.comm_ST;
 | 
|---|
| 237 |     //if (P->Call.out[ReadOut]) fprintf(stderr,"all\n");           
 | 
|---|
| 238 |   } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first  
 | 
|---|
| 239 |     if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) {  // coefficients parallel
 | 
|---|
| 240 |       comm = &P->Par.comm_ST_Psi;
 | 
|---|
| 241 |       //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");       
 | 
|---|
| 242 |     } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) {  // Psis parallel
 | 
|---|
| 243 |       comm = &P->Par.comm_ST_PsiT;
 | 
|---|
| 244 |       //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");       
 | 
|---|
| 245 |     }
 | 
|---|
| 246 |   } else {
 | 
|---|
| 247 |     if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) {  // Psis parallel 
 | 
|---|
| 248 |       comm = &P->Par.comm_ST_PsiT;
 | 
|---|
| 249 |       //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");       
 | 
|---|
| 250 |     } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) {  // coefficients parallel
 | 
|---|
| 251 |       comm = &P->Par.comm_ST_Psi;
 | 
|---|
| 252 |       //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");       
 | 
|---|
| 253 |     }
 | 
|---|
| 254 |   }
 | 
|---|
| 255 |   return comm;
 | 
|---|
| 256 | }
 | 
|---|
| 257 | 
 | 
|---|
| 258 | /** Allocates and fills Lookup table for sin/cos values at each grid node.
 | 
|---|
| 259 |  * \param ***cos_table pointer to two-dimensional lookup table for cosine values
 | 
|---|
| 260 |  * \param ***sin_table pointer to two-dimensional lookup table for sine values
 | 
|---|
| 261 |  * \param *N array with number of nodes per \a NDIM axis
 | 
|---|
| 262 |  */
 | 
|---|
| 263 | void CreateSinCosLookupTable(double ***cos_table, double ***sin_table, int *N)
 | 
|---|
| 264 | {
 | 
|---|
| 265 |   int i, j;
 | 
|---|
| 266 |   double argument;
 | 
|---|
| 267 |   double **cos_lookup, **sin_lookup;
 | 
|---|
| 268 |   
 | 
|---|
| 269 |   // create lookup table for sin/cos values
 | 
|---|
| 270 |   cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup");
 | 
|---|
| 271 |   sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup");  
 | 
|---|
| 272 |   for (i=0;i<NDIM;i++) {
 | 
|---|
| 273 |     // allocate memory
 | 
|---|
| 274 |     cos_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: cos_lookup");
 | 
|---|
| 275 |     sin_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: sin_lookup");
 | 
|---|
| 276 |     
 | 
|---|
| 277 |     // reset arrays
 | 
|---|
| 278 |     SetArrayToDouble0(cos_lookup[i],N[i]);
 | 
|---|
| 279 |     SetArrayToDouble0(sin_lookup[i],N[i]);
 | 
|---|
| 280 |     
 | 
|---|
| 281 |     // create lookup values
 | 
|---|
| 282 |     for (j=0;j<N[i];j++) {
 | 
|---|
| 283 |       argument = 2*PI/(double)N[i]*(double)j; 
 | 
|---|
| 284 |       cos_lookup[i][j] = cos(argument);
 | 
|---|
| 285 |       sin_lookup[i][j] = sin(argument);
 | 
|---|
| 286 |     }
 | 
|---|
| 287 |   }
 | 
|---|
| 288 |   *cos_table = cos_lookup;
 | 
|---|
| 289 |   *sin_table = sin_lookup;
 | 
|---|
| 290 | }
 | 
|---|
| 291 | 
 | 
|---|
| 292 | /** Frees memory allocated during CreateSinCosLookupTable().
 | 
|---|
| 293 |  * \param ***cos_lookup pointer to two-dimensional lookup table for cosine values
 | 
|---|
| 294 |  * \param ***sin_lookup pointer to two-dimensional lookup table for sine values
 | 
|---|
| 295 |  */
 | 
|---|
| 296 | void FreeSinCosLookupTable(double **cos_lookup, double **sin_lookup)
 | 
|---|
| 297 | {
 | 
|---|
| 298 |   int i;
 | 
|---|
| 299 |   // free lookups
 | 
|---|
| 300 |   for (i=0;i<NDIM;i++) {
 | 
|---|
| 301 |     Free(cos_lookup[i], "FreeSinCosLookupTable: cos_lookup[i]");
 | 
|---|
| 302 |     Free(sin_lookup[i], "FreeSinCosLookupTable: sin_lookup[i]");
 | 
|---|
| 303 |   }
 | 
|---|
| 304 |   Free(cos_lookup, "FreeSinCosLookupTable: cos_lookup");
 | 
|---|
| 305 |   Free(sin_lookup, "FreeSinCosLookupTable: sin_lookup");
 | 
|---|
| 306 | }
 | 
|---|
| 307 | 
 | 
|---|
| 308 | /** Fills the entries of the six variance matrices.
 | 
|---|
| 309 |  * These matrices are parallely diagonalized during Wannier Localization. They are calculated from the
 | 
|---|
| 310 |  * wave function and by diagonalization one obtains the unitary transformation with which the Psis are
 | 
|---|
| 311 |  * treated afterwards.
 | 
|---|
| 312 |  * \param *P Problem at hand
 | 
|---|
| 313 |  * \param AllocNum number of rows/columns
 | 
|---|
| 314 |  * \param **A pointer to variance matrices
 | 
|---|
| 315 |  * \sa ComputeMLWF() - master function. 
 | 
|---|
| 316 |  */
 | 
|---|
| 317 | void FillHigherOrderRealMomentsMatrices(struct Problem *P, int AllocNum, gsl_matrix **A)
 | 
|---|
| 318 | {
 | 
|---|
| 319 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 320 |   struct RunStruct *R = &P->R;
 | 
|---|
| 321 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 322 |   struct LatticeLevel *Lev0 = R->Lev0;
 | 
|---|
| 323 |   struct LatticeLevel *LevS = R->LevS;
 | 
|---|
| 324 |   struct Density *Dens0 = Lev0->Dens;
 | 
|---|
| 325 |   struct OnePsiElement *OnePsiA, *OnePsiB, *LOnePsiB;
 | 
|---|
| 326 |   struct fft_plan_3d *plan = Lat->plan; 
 | 
|---|
| 327 |   fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
 | 
|---|
| 328 |   fftw_real *PsiCR = (fftw_real *)PsiC;
 | 
|---|
| 329 |   fftw_complex *work = Dens0->DensityCArray[Temp2Density]; 
 | 
|---|
| 330 |   fftw_real **HGcR = &Dens0->DensityArray[HGDensity];  // use HGDensity, 4x Gap..Density, TempDensity as a storage array
 | 
|---|
| 331 |   fftw_complex **HGcRC = (fftw_complex**)HGcR;
 | 
|---|
| 332 |   fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as an array
 | 
|---|
| 333 |   fftw_real **HGcR2 = (fftw_real**)HGcR2C;
 | 
|---|
| 334 |   int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
 | 
|---|
| 335 |   MPI_Status status;
 | 
|---|
| 336 |   fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
 | 
|---|
| 337 |   int n[NDIM],n0,i0,iS, Index;
 | 
|---|
| 338 |   int Num = Psi->NoOfPsis;  // is number of occupied plus unoccupied states for rows
 | 
|---|
| 339 |   int N0 = LevS->Plan0.plan->local_nx;
 | 
|---|
| 340 |   int *N = LevS->Plan0.plan->N;
 | 
|---|
| 341 |   const int NUpx = LevS->NUp[0];
 | 
|---|
| 342 |   const int NUpy = LevS->NUp[1];
 | 
|---|
| 343 |   const int NUpz = LevS->NUp[2];
 | 
|---|
| 344 |   double argument, PsiAtNode;
 | 
|---|
| 345 |   int e,g,i,j,k,l,m,p,u;
 | 
|---|
| 346 |   double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0;
 | 
|---|
| 347 |   double **cos_lookup =  NULL,**sin_lookup = NULL; 
 | 
|---|
| 348 | 
 | 
|---|
| 349 |   if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me);
 | 
|---|
| 350 | 
 | 
|---|
| 351 |   debug(P, "Creating Lookup Table");
 | 
|---|
| 352 |   CreateSinCosLookupTable(&cos_lookup, &sin_lookup, N);
 | 
|---|
| 353 | 
 | 
|---|
| 354 |   debug(P, "Calculating each entry of variance matrices");
 | 
|---|
| 355 |   l=-1; // to access U matrix element (0..Num-1)
 | 
|---|
| 356 |   // fill the matrices
 | 
|---|
| 357 |   for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) {  // go through all wave functions
 | 
|---|
| 358 |     OnePsiA = &Psi->AllPsiStatus[i];    // grab OnePsiA
 | 
|---|
| 359 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 360 |       l++;  // increase l if it is non-extra wave function
 | 
|---|
| 361 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
 | 
|---|
| 362 |         LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
 | 
|---|
| 363 |       else
 | 
|---|
| 364 |         LPsiDatA = NULL;  // otherwise processes won't enter second loop, though they're supposed to send coefficients!
 | 
|---|
| 365 |       
 | 
|---|
| 366 |       //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0);
 | 
|---|
| 367 |       if (LPsiDatA != NULL) {
 | 
|---|
| 368 |         CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
 | 
|---|
| 369 |         // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
 | 
|---|
| 370 |         for (n0=0;n0<N0;n0++)  
 | 
|---|
| 371 |           for (n[1]=0;n[1]<N[1];n[1]++) 
 | 
|---|
| 372 |             for (n[2]=0;n[2]<N[2];n[2]++) {
 | 
|---|
| 373 |               i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
 | 
|---|
| 374 |               iS = n[2]+N[2]*(n[1]+N[1]*n0);
 | 
|---|
| 375 |               n[0] = n0 + LevS->Plan0.plan->start_nx;
 | 
|---|
| 376 |               for (k=0;k<max_operators;k+=2) {
 | 
|---|
| 377 |                 e = k/2;
 | 
|---|
| 378 |                 argument = 2.*PI/(double)(N[e])*(double)(n[e]);
 | 
|---|
| 379 |                 PsiAtNode = PsiCR[i0] /LevS->MaxN;
 | 
|---|
| 380 |                 // check lookup
 | 
|---|
| 381 |                 if (!l) // perform check on first wave function only
 | 
|---|
| 382 |                   if ((fabs(cos(argument) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(argument) - sin_lookup[e][n[e]]) > MYEPSILON)) {
 | 
|---|
| 383 |                     Error(SomeError, "Lookup table does not match real value!");
 | 
|---|
| 384 |                   }
 | 
|---|
| 385 |                 HGcR[k][iS] = cos_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
 | 
|---|
| 386 |                 HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */
 | 
|---|
| 387 |                 HGcR[k+1][iS] = sin_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
 | 
|---|
| 388 |                 HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
 | 
|---|
| 389 |               }
 | 
|---|
| 390 |             }
 | 
|---|
| 391 |         for (u=0;u<max_operators;u++) {
 | 
|---|
| 392 |           fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work);
 | 
|---|
| 393 |           fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work);
 | 
|---|
| 394 |         }
 | 
|---|
| 395 |       }  
 | 
|---|
| 396 |       m = -1; // to access U matrix element (0..Num-1)
 | 
|---|
| 397 |       for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) {  // go through all wave functions
 | 
|---|
| 398 |         OnePsiB = &Psi->AllPsiStatus[j];    // grab OnePsiB
 | 
|---|
| 399 |         if (OnePsiB->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 400 |           m++;  // increase m if it is non-extra wave function
 | 
|---|
| 401 |           if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
 | 
|---|
| 402 |              LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
 | 
|---|
| 403 |           else 
 | 
|---|
| 404 |              LOnePsiB = NULL;
 | 
|---|
| 405 |           if (LOnePsiB == NULL) {   // if it's not local ... receive it from respective process into TempPsi
 | 
|---|
| 406 |             RecvSource = OnePsiB->my_color_comm_ST_Psi;
 | 
|---|
| 407 |             MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
 | 
|---|
| 408 |             LPsiDatB=LevS->LPsi->TempPsi;
 | 
|---|
| 409 |           } else {                  // .. otherwise send it to all other processes (Max_me... - 1)
 | 
|---|
| 410 |             for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
 | 
|---|
| 411 |               if (p != OnePsiB->my_color_comm_ST_Psi) 
 | 
|---|
| 412 |                 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
 | 
|---|
| 413 |             LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
 | 
|---|
| 414 |           } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
 | 
|---|
| 415 | 
 | 
|---|
| 416 |           for (u=0;u<max_operators;u++) {
 | 
|---|
| 417 |             a_ij = 0;
 | 
|---|
| 418 |             b_ij = 0;
 | 
|---|
| 419 |             if (LPsiDatA != NULL) { // calculate, reduce and send to all ...
 | 
|---|
| 420 |               //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u);
 | 
|---|
| 421 |               g=0;
 | 
|---|
| 422 |               if (LevS->GArray[0].GSq == 0.0) {
 | 
|---|
| 423 |                 Index = LevS->GArray[g].Index;
 | 
|---|
| 424 |                 a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im);
 | 
|---|
| 425 |                 b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im);
 | 
|---|
| 426 |                 g++;
 | 
|---|
| 427 |               }
 | 
|---|
| 428 |               for (; g < LevS->MaxG; g++) {
 | 
|---|
| 429 |                 Index = LevS->GArray[g].Index;
 | 
|---|
| 430 |                 a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im);
 | 
|---|
| 431 |                 b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im);
 | 
|---|
| 432 |               } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...)
 | 
|---|
| 433 |               // sum up elements from all coefficients sharing processes
 | 
|---|
| 434 |               MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
 | 
|---|
| 435 |               MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
 | 
|---|
| 436 |               a_ij = A_ij;
 | 
|---|
| 437 |               b_ij = B_ij;
 | 
|---|
| 438 |               // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!)
 | 
|---|
| 439 |               MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
 | 
|---|
| 440 |               MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
 | 
|---|
| 441 |             } else { // receive ...
 | 
|---|
| 442 |               MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
 | 
|---|
| 443 |               MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);              
 | 
|---|
| 444 |             }
 | 
|---|
| 445 |             // ... and store
 | 
|---|
| 446 |             //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij);
 | 
|---|
| 447 |             //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij);
 | 
|---|
| 448 |             gsl_matrix_set(A[u], l, m, A_ij);
 | 
|---|
| 449 |             gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m));
 | 
|---|
| 450 |           }
 | 
|---|
| 451 |         }
 | 
|---|
| 452 |       }
 | 
|---|
| 453 |     }
 | 
|---|
| 454 |   }
 | 
|---|
| 455 |   // reset extra entries
 | 
|---|
| 456 |   for (u=0;u<=max_operators;u++) {
 | 
|---|
| 457 |     for (i=Num;i<AllocNum;i++)
 | 
|---|
| 458 |       for (j=0;j<AllocNum;j++)
 | 
|---|
| 459 |         gsl_matrix_set(A[u], i,j, 0.);
 | 
|---|
| 460 |     for (i=Num;i<AllocNum;i++)
 | 
|---|
| 461 |       for (j=0;j<AllocNum;j++)
 | 
|---|
| 462 |         gsl_matrix_set(A[u], j,i, 0.);
 | 
|---|
| 463 |   }
 | 
|---|
| 464 |   FreeSinCosLookupTable(cos_lookup, sin_lookup);
 | 
|---|
| 465 | 
 | 
|---|
| 466 | /*// print A matrices for debug
 | 
|---|
| 467 |   if (P->Par.me == 0) 
 | 
|---|
| 468 |     for (u=0;u<max_operators+1;u++) {
 | 
|---|
| 469 |      fprintf(stderr, "A[%i] = \n",u);
 | 
|---|
| 470 |       for (i=0;i<Num;i++) {
 | 
|---|
| 471 |         for (j=0;j<Num;j++)
 | 
|---|
| 472 |           fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j));
 | 
|---|
| 473 |         fprintf(stderr, "\n");
 | 
|---|
| 474 |       }
 | 
|---|
| 475 |     }  
 | 
|---|
| 476 | */
 | 
|---|
| 477 | }
 | 
|---|
| 478 | 
 | 
|---|
| 479 | /** Calculates reciprocal second order moments of each PsiType#Occupied orbital.
 | 
|---|
| 480 |  * First order is zero due to wave function being real (symmetry condition).
 | 
|---|
| 481 |  * \param *P Problem at hand
 | 
|---|
| 482 |  */
 | 
|---|
| 483 | void CalculateSecondOrderReciprocalMoment(struct Problem *P)
 | 
|---|
| 484 | {
 | 
|---|
| 485 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 486 |   struct RunStruct *R = &P->R;
 | 
|---|
| 487 |   struct FileData *F = &P->Files;
 | 
|---|
| 488 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 489 |   struct LatticeLevel *LevS = R->LevS;
 | 
|---|
| 490 |   double result, Result; 
 | 
|---|
| 491 |   fftw_complex *LPsiDatA=NULL;
 | 
|---|
| 492 |   struct OnePsiElement *OnePsiA;
 | 
|---|
| 493 |   int i,j,l,g;
 | 
|---|
| 494 |   char spin[12], suffix[18];
 | 
|---|
| 495 | 
 | 
|---|
| 496 |   switch (Lat->Psi.PsiST) {
 | 
|---|
| 497 |   case SpinDouble:
 | 
|---|
| 498 |     strcpy(suffix,".recispread.csv");    
 | 
|---|
| 499 |     strcpy(spin,"SpinDouble");
 | 
|---|
| 500 |     break;
 | 
|---|
| 501 |   case SpinUp:
 | 
|---|
| 502 |     strcpy(suffix,".recispread_up.csv");    
 | 
|---|
| 503 |     strcpy(spin,"SpinUp");
 | 
|---|
| 504 |     break;
 | 
|---|
| 505 |   case SpinDown:
 | 
|---|
| 506 |     strcpy(suffix,".recispread_down.csv");    
 | 
|---|
| 507 |     strcpy(spin,"SpinDown");
 | 
|---|
| 508 |     break;
 | 
|---|
| 509 |   }
 | 
|---|
| 510 |   if(P->Par.me_comm_ST == 0) {
 | 
|---|
| 511 |     if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Calculating reciprocal moments ...\n",P->Par.me);
 | 
|---|
| 512 |     if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level 
 | 
|---|
| 513 |       OpenFile(P, &F->ReciSpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
 | 
|---|
| 514 |     else if (F->ReciSpreadFile == NULL) // re-op,18en if not first level and not opened yet (or closed from ParseWannierFile)
 | 
|---|
| 515 |       OpenFile(P, &F->ReciSpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
 | 
|---|
| 516 |     if (F->ReciSpreadFile == NULL) {
 | 
|---|
| 517 |       Error(SomeError,"ComputeMLWF: Error opening Reciprocal spread File!\n");
 | 
|---|
| 518 |     } else {   
 | 
|---|
| 519 |       fprintf(F->ReciSpreadFile,"===== Reciprocal Spreads of type %s ==========================================================================\n", spin);
 | 
|---|
| 520 |     }
 | 
|---|
| 521 |   }
 | 
|---|
| 522 | 
 | 
|---|
| 523 |   // integrate second order moment
 | 
|---|
| 524 |   for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions
 | 
|---|
| 525 |     OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
| 526 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 527 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
 | 
|---|
| 528 |         LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
 | 
|---|
| 529 |       else
 | 
|---|
| 530 |         LPsiDatA = NULL;
 | 
|---|
| 531 |       
 | 
|---|
| 532 |       if (LPsiDatA != NULL) {
 | 
|---|
| 533 |         if (P->Par.me_comm_ST == 0) 
 | 
|---|
| 534 |           fprintf(F->ReciSpreadFile,"Psi%d_Lev%d\t", Psi->AllPsiStatus[l].MyGlobalNo, R->LevSNo); 
 | 
|---|
| 535 |         for (i=0;i<NDIM;i++) {
 | 
|---|
| 536 |           for (j=0;j<NDIM;j++) {
 | 
|---|
| 537 |             result = 0.;
 | 
|---|
| 538 |             g = 0;
 | 
|---|
| 539 |             if (LevS->GArray[0].GSq == 0.0) {
 | 
|---|
| 540 |               result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*(LPsiDatA[0].re * LPsiDatA[0].re);
 | 
|---|
| 541 |               g++;
 | 
|---|
| 542 |             }
 | 
|---|
| 543 |             for (;g<LevS->MaxG;g++)
 | 
|---|
| 544 |               result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*2.*(LPsiDatA[g].re * LPsiDatA[g].re + LPsiDatA[g].im * LPsiDatA[g].im);
 | 
|---|
| 545 |             //result *= Lat->Volume/LevS->MaxG;
 | 
|---|
| 546 |             MPI_Allreduce ( &result, &Result, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
 | 
|---|
| 547 |             if (P->Par.me_comm_ST == 0) 
 | 
|---|
| 548 |               fprintf(F->ReciSpreadFile,"%2.5lg  ", Result);
 | 
|---|
| 549 |           }
 | 
|---|
| 550 |         }
 | 
|---|
| 551 |         if (P->Par.me_comm_ST == 0) 
 | 
|---|
| 552 |           fprintf(F->ReciSpreadFile,"\n");
 | 
|---|
| 553 |       }
 | 
|---|
| 554 |     }
 | 
|---|
| 555 |   }
 | 
|---|
| 556 |   if(P->Par.me_comm_ST == 0) { 
 | 
|---|
| 557 |     fprintf(F->ReciSpreadFile,"====================================================================================================================\n\n");
 | 
|---|
| 558 |     fflush(F->ReciSpreadFile);
 | 
|---|
| 559 |   }
 | 
|---|
| 560 | }
 | 
|---|
| 561 | 
 | 
|---|
| 562 | /** Given the unitary matrix the transformation is performed on the Psis.
 | 
|---|
| 563 |  * \param *P Problem at hand
 | 
|---|
| 564 |  * \param *U gsl matrix containing the transformation matrix
 | 
|---|
| 565 |  * \param Num dimension parameter for the matrix, i.e. number of wave functions
 | 
|---|
| 566 |  */
 | 
|---|
| 567 | void UnitaryTransformationOnWavefunctions(struct Problem *P, gsl_matrix *U, int Num)
 | 
|---|
| 568 | {
 | 
|---|
| 569 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 570 |   struct RunStruct *R = &P->R;
 | 
|---|
| 571 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 572 |   struct LatticeLevel *LevS = R->LevS;
 | 
|---|
| 573 |   MPI_Status status;
 | 
|---|
| 574 |   struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB;
 | 
|---|
| 575 |   int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
 | 
|---|
| 576 |   fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
 | 
|---|
| 577 |   int g,i,j,l,k,m,p;
 | 
|---|
| 578 | 
 | 
|---|
| 579 |   //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation of all wave functions according to U\n",P->Par.me); 
 | 
|---|
| 580 |   
 | 
|---|
| 581 |   Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here
 | 
|---|
| 582 |   fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer");
 | 
|---|
| 583 | 
 | 
|---|
| 584 |   for (l=0;l<Num;l++) // allocate for each local wave function
 | 
|---|
| 585 |     coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l];
 | 
|---|
| 586 |   
 | 
|---|
| 587 |   //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me);
 | 
|---|
| 588 |   l=-1; // to access U matrix element (0..Num-1)
 | 
|---|
| 589 |   k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1)
 | 
|---|
| 590 |   for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) {  // go through all wave functions
 | 
|---|
| 591 |     OnePsiA = &Psi->AllPsiStatus[i];    // grab OnePsiA
 | 
|---|
| 592 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 593 |       l++;  // increase l if it is occupied wave function
 | 
|---|
| 594 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local?
 | 
|---|
| 595 |         k++; // increase k only if it is a local, non-extra orbital wave function
 | 
|---|
| 596 |         LPsiDatA = (fftw_complex *) coeffs_buffer[k];    // new coeffs first go to copy buffer, old ones must not be overwritten yet
 | 
|---|
| 597 |         SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG);  // zero buffer part
 | 
|---|
| 598 |       } else
 | 
|---|
| 599 |         LPsiDatA = NULL;  // otherwise processes won't enter second loop, though they're supposed to send coefficients!
 | 
|---|
| 600 | 
 | 
|---|
| 601 |       m = -1; // to access U matrix element (0..Num-1)
 | 
|---|
| 602 |       for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) {  // go through all wave functions
 | 
|---|
| 603 |         OnePsiB = &Psi->AllPsiStatus[j];    // grab OnePsiB
 | 
|---|
| 604 |         if (OnePsiB->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 605 |           m++;  // increase m if it is occupied wave function
 | 
|---|
| 606 |           if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
 | 
|---|
| 607 |              LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
 | 
|---|
| 608 |           else 
 | 
|---|
| 609 |              LOnePsiB = NULL;
 | 
|---|
| 610 |           if (LOnePsiB == NULL) {   // if it's not local ... receive it from respective process into TempPsi
 | 
|---|
| 611 |             RecvSource = OnePsiB->my_color_comm_ST_Psi;
 | 
|---|
| 612 |             MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
 | 
|---|
| 613 |             LPsiDatB=LevS->LPsi->TempPsi;
 | 
|---|
| 614 |           } else {                  // .. otherwise send it to all other processes (Max_me... - 1)
 | 
|---|
| 615 |             for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
 | 
|---|
| 616 |               if (p != OnePsiB->my_color_comm_ST_Psi) 
 | 
|---|
| 617 |                 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
 | 
|---|
| 618 |             LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
 | 
|---|
| 619 |           } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
 | 
|---|
| 620 | 
 | 
|---|
| 621 |           if (LPsiDatA != NULL) {
 | 
|---|
| 622 |             double tmp = gsl_matrix_get(U,l,m);
 | 
|---|
| 623 |             g=0;
 | 
|---|
| 624 |             if (LevS->GArray[0].GSq == 0.0) {
 | 
|---|
| 625 |               LPsiDatA[g].re += LPsiDatB[g].re * tmp;
 | 
|---|
| 626 |               LPsiDatA[g].im += LPsiDatB[g].im * tmp;
 | 
|---|
| 627 |               g++;
 | 
|---|
| 628 |             }
 | 
|---|
| 629 |             for (; g < LevS->MaxG; g++) {
 | 
|---|
| 630 |               LPsiDatA[g].re += LPsiDatB[g].re * tmp;
 | 
|---|
| 631 |               LPsiDatA[g].im += LPsiDatB[g].im * tmp;
 | 
|---|
| 632 |             }
 | 
|---|
| 633 |           }
 | 
|---|
| 634 |         }
 | 
|---|
| 635 |       }
 | 
|---|
| 636 |     }
 | 
|---|
| 637 |   }
 | 
|---|
| 638 | 
 | 
|---|
| 639 |   //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me);
 | 
|---|
| 640 |   // now, as all wave functions are updated, swap the buffer
 | 
|---|
| 641 |   l = -1;
 | 
|---|
| 642 |   for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function
 | 
|---|
| 643 |     if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {
 | 
|---|
| 644 |       l++;
 | 
|---|
| 645 |       //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo);
 | 
|---|
| 646 |       LPsiDatA = (fftw_complex *)coeffs_buffer[l];
 | 
|---|
| 647 |       LPsiDatB = LevS->LPsi->LocalPsi[l];
 | 
|---|
| 648 |       for (g=0;g<LevS->MaxG;g++) {
 | 
|---|
| 649 |         LPsiDatB[g].re = LPsiDatA[g].re;
 | 
|---|
| 650 |         LPsiDatB[g].im = LPsiDatA[g].im;
 | 
|---|
| 651 |       }
 | 
|---|
| 652 |       // recalculating non-local form factors which are coefficient dependent!
 | 
|---|
| 653 |       CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo);
 | 
|---|
| 654 |     }
 | 
|---|
| 655 |   } 
 | 
|---|
| 656 |   // and free allocated buffer memory
 | 
|---|
| 657 |   Free(coeffs_buffer, "UnitaryTransformationOnWavefunctions: coeffs_buffer");
 | 
|---|
| 658 | }
 | 
|---|
| 659 | 
 | 
|---|
| 660 | /** Changes Wannier Centres according to RunStruct#CommonWannier.
 | 
|---|
| 661 |  * \param *P Problem at hand.
 | 
|---|
| 662 |  */
 | 
|---|
| 663 | void ChangeWannierCentres(struct Problem *P)
 | 
|---|
| 664 | {
 | 
|---|
| 665 |   struct RunStruct *R = &P->R;
 | 
|---|
| 666 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 667 |   struct LatticeLevel *LevS = R->LevS;
 | 
|---|
| 668 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 669 |   struct OnePsiElement *OnePsiA;
 | 
|---|
| 670 |   int *marker, **group;
 | 
|---|
| 671 |   int Num = Psi->NoOfPsis;
 | 
|---|
| 672 |   double **WannierCentre = NULL;
 | 
|---|
| 673 |   double *WannierSpread = NULL;
 | 
|---|
| 674 |   int partner[Num];
 | 
|---|
| 675 |   int i,j,l,k;
 | 
|---|
| 676 |   int totalflag, flag;
 | 
|---|
| 677 |   double q[NDIM], center[NDIM];
 | 
|---|
| 678 |   double Spread;
 | 
|---|
| 679 |   int *N = LevS->Plan0.plan->N;
 | 
|---|
| 680 | 
 | 
|---|
| 681 |   int MPISignal = 0, mpisignal = 0;
 | 
|---|
| 682 |   int membersize, maxsize;
 | 
|---|
| 683 |   int msgsize;
 | 
|---|
| 684 |   double *buffer = NULL;
 | 
|---|
| 685 |   int position;
 | 
|---|
| 686 |   MPI_Status status;
 | 
|---|
| 687 |   
 | 
|---|
| 688 |   // determine necessary pack/buffer sizes for the wannier stuff
 | 
|---|
| 689 |   MPI_Pack_size(3, MPI_DOUBLE, P->Par.comm_ST_PsiT, &membersize);
 | 
|---|
| 690 |   maxsize = membersize;
 | 
|---|
| 691 |   MPI_Pack_size(1, MPI_DOUBLE, P->Par.comm_ST_PsiT, &membersize);
 | 
|---|
| 692 |   maxsize += membersize;
 | 
|---|
| 693 |   
 | 
|---|
| 694 |   // allocate memory
 | 
|---|
| 695 |   buffer = (double *) Malloc(sizeof(double)*maxsize, "ChangeWannierCentres: *buffer");
 | 
|---|
| 696 |   WannierSpread = (double *) Malloc(sizeof(double)*Num, "ChangeWannierCentres: *WannierSpread");
 | 
|---|
| 697 |   WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ChangeWannierCentres: **WannierCentre");
 | 
|---|
| 698 |   for (j=0;j<Num;j++)
 | 
|---|
| 699 |     WannierCentre[j] = (double *) Malloc(sizeof(double)*(NDIM+1), "ChangeWannierCentres: *WannierCentre[]");
 | 
|---|
| 700 | 
 | 
|---|
| 701 |   // exchange wannier centers and spread among processes by going over all wave functions
 | 
|---|
| 702 |   i=-1;
 | 
|---|
| 703 |   for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {
 | 
|---|
| 704 |     OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
| 705 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 706 |       i++;  // increase l if it is occupied wave function
 | 
|---|
| 707 |       
 | 
|---|
| 708 |       // send and receive centres and spread (we use the MPI_Pack'ing scheme to preserve correct order of data)
 | 
|---|
| 709 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
 | 
|---|
| 710 |         position = 0;
 | 
|---|
| 711 |         MPI_Pack(Psi->AddData[OnePsiA->MyLocalNo].WannierCentre, 3, MPI_DOUBLE, buffer, maxsize, &position, P->Par.comm_ST_PsiT);
 | 
|---|
| 712 |         MPI_Pack(&Psi->AddData[OnePsiA->MyLocalNo].WannierSpread, 1, MPI_DOUBLE, buffer, maxsize, &position, P->Par.comm_ST_PsiT);
 | 
|---|
| 713 |         for (k=0;k<P->Par.Max_me_comm_ST_PsiT;k++) 
 | 
|---|
| 714 |           if (k != P->Par.my_color_comm_ST_Psi) {  // send to all other processes
 | 
|---|
| 715 |             fprintf(stderr, "(%i) Send goes from %d to %d\n", P->Par.me, k, P->Par.my_color_comm_ST_Psi);
 | 
|---|
| 716 |             if (MPI_Send(buffer, position, MPI_PACKED, k, WannierTag5, P->Par.comm_ST_PsiT) != MPI_SUCCESS) {
 | 
|---|
| 717 |               mpisignal = 1;
 | 
|---|
| 718 |             }
 | 
|---|
| 719 |           } else {  // "send" to us
 | 
|---|
| 720 |             for(j=0;j<NDIM;j++)  // copy to own array as well
 | 
|---|
| 721 |               WannierCentre[i][j] = Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j];
 | 
|---|
| 722 |             WannierSpread[i] = Psi->AddData[OnePsiA->MyLocalNo].WannierSpread;
 | 
|---|
| 723 |           }
 | 
|---|
| 724 |       } else {
 | 
|---|
| 725 |         fprintf(stderr, "(%i) Receive is from %d to %d\n", P->Par.me, OnePsiA->my_color_comm_ST_Psi, P->Par.my_color_comm_ST_Psi);
 | 
|---|
| 726 |         if (MPI_Recv(buffer, maxsize, MPI_PACKED, OnePsiA->my_color_comm_ST_Psi, WannierTag5, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
 | 
|---|
| 727 |           mpisignal = 1;
 | 
|---|
| 728 |         position = 0;
 | 
|---|
| 729 |         MPI_Get_count(&status, MPI_PACKED, &msgsize);
 | 
|---|
| 730 |         MPI_Unpack (buffer, msgsize, &position, WannierCentre[i], 3, MPI_DOUBLE, P->Par.comm_ST_PsiT);
 | 
|---|
| 731 |         MPI_Unpack (buffer, msgsize, &position, &WannierSpread[i], 1, MPI_DOUBLE, P->Par.comm_ST_PsiT);
 | 
|---|
| 732 |       }
 | 
|---|
| 733 |       
 | 
|---|
| 734 |       // check for MPI errors
 | 
|---|
| 735 |       if (MPI_Allreduce(&mpisignal, &MPISignal, 1, MPI_INT, MPI_SUM, P->Par.comm_ST_PsiT) != MPI_SUCCESS) // allreduce signals
 | 
|---|
| 736 |         Error(SomeError,"ChangeWannierCentres: Bcast of signal failed\n");
 | 
|---|
| 737 |       if (MPISignal) { // greater 0 means there was at least one hickup somewhere
 | 
|---|
| 738 |         Error(SomeError,"ChangeWannierCentres: Scattering of Wanniers failed\n");
 | 
|---|
| 739 |       }
 | 
|---|
| 740 |     }
 | 
|---|
| 741 |   }
 | 
|---|
| 742 |   // free the buffer memory
 | 
|---|
| 743 |   Free(buffer, "ChangeWannierCentres: *buffer");
 | 
|---|
| 744 | 
 | 
|---|
| 745 |   if ((P->Call.out[NormalOut]) && (1)) { // root prints gathered wannier centres for debugging
 | 
|---|
| 746 |     fprintf(stderr, "(%i) Printing gathered Wannier centres:\n", P->Par.me);
 | 
|---|
| 747 |     for(l=0;l<Num;l++) {
 | 
|---|
| 748 |       fprintf(stderr, "(%i) ", P->Par.me);
 | 
|---|
| 749 |       for(j=0;j<NDIM;j++)
 | 
|---|
| 750 |         fprintf(stderr, "%lg\t", WannierCentre[l][j]);
 | 
|---|
| 751 |       fprintf(stderr, "-\t%lg\n", WannierSpread[l]);
 | 
|---|
| 752 |     }
 | 
|---|
| 753 |   }
 | 
|---|
| 754 |       
 | 
|---|
| 755 |   // change centres
 | 
|---|
| 756 |   switch (R->CommonWannier) {
 | 
|---|
| 757 |    case 4:
 | 
|---|
| 758 |     debug(P,"Shifting each Wannier centers to cell center");
 | 
|---|
| 759 |     for (j=0;j<NDIM;j++)  // center point in [0,1]^3
 | 
|---|
| 760 |       center[j] = 0.5;
 | 
|---|
| 761 |     RMat33Vec3(q,Lat->RealBasis, center); // transform to real coordinates
 | 
|---|
| 762 |     for (i=0; i < Num; i++) {  // go through all occupied wave functions
 | 
|---|
| 763 |       for (j=0;j<NDIM;j++)  // put into Wannier centres
 | 
|---|
| 764 |         WannierCentre[i][j] = q[j]; 
 | 
|---|
| 765 |     }    
 | 
|---|
| 766 |     break;
 | 
|---|
| 767 |    case 3:
 | 
|---|
| 768 |     debug(P,"Shifting Wannier centers individually to nearest grid point");
 | 
|---|
| 769 |     for (i=0;i < Num; i++) {  // go through all wave functions
 | 
|---|
| 770 |       RMat33Vec3(q, Lat->ReciBasis, WannierCentre[i]);
 | 
|---|
| 771 |       for (j=0;j<NDIM;j++) {  // Recibasis is not true inverse but times 2.*PI
 | 
|---|
| 772 |         q[j] *= (double)N[j]/(2.*PI);
 | 
|---|
| 773 |         
 | 
|---|
| 774 |         //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp));
 | 
|---|
| 775 |         if (fabs((double)floor(q[j]) - q[j]) < fabs((double)ceil(q[j]) - q[j]))
 | 
|---|
| 776 |           q[j] = floor(q[j])/(double)N[j];
 | 
|---|
| 777 |         else
 | 
|---|
| 778 |           q[j] = ceil(q[j])/(double)N[j];
 | 
|---|
| 779 |       }
 | 
|---|
| 780 |       RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
 | 
|---|
| 781 |     }
 | 
|---|
| 782 |     break;
 | 
|---|
| 783 |    case 2:
 | 
|---|
| 784 |     debug(P,"Combining individual orbitals according to spread.");
 | 
|---|
| 785 |     //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me);
 | 
|---|
| 786 |     debug(P,"finding partners");
 | 
|---|
| 787 |     marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker");
 | 
|---|
| 788 |     group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group");
 | 
|---|
| 789 |     for (l=0;l<Num;l++) {
 | 
|---|
| 790 |       group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]");  // each must group must have one more as end marker
 | 
|---|
| 791 |       for (k=0;k<=Num;k++)
 | 
|---|
| 792 |         group[l][k] = -1; // reset partner group
 | 
|---|
| 793 |     }
 | 
|---|
| 794 |     for (k=0;k<Num;k++)
 | 
|---|
| 795 |       partner[k] = 0;
 | 
|---|
| 796 |     debug(P,"mem allocated");
 | 
|---|
| 797 |     // go for each orbital through every other, check distance against the sum of both spreads
 | 
|---|
| 798 |     // if smaller add to group of this orbital
 | 
|---|
| 799 |     for (l=0;l<Num;l++) {
 | 
|---|
| 800 |       j=0;  // index for partner group
 | 
|---|
| 801 |       for (k=0;k<Num;k++) {  // check this against l
 | 
|---|
| 802 |         Spread = 0.;
 | 
|---|
| 803 |         for (i=0;i<NDIM;i++) {
 | 
|---|
| 804 |           //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]); 
 | 
|---|
| 805 |           Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]);
 | 
|---|
| 806 |         }
 | 
|---|
| 807 |         //Spread = sqrt(Spread);  // distance in Spread
 | 
|---|
| 808 |         //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]); 
 | 
|---|
| 809 |         if (Spread < 5*(WannierSpread[l]+WannierSpread[k])*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread times two plus safety
 | 
|---|
| 810 |           group[l][j++] = k;  // add k to group of l
 | 
|---|
| 811 |           partner[l]++;
 | 
|---|
| 812 |           //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l);
 | 
|---|
| 813 |         } 
 | 
|---|
| 814 |       }
 | 
|---|
| 815 |     }
 | 
|---|
| 816 |     
 | 
|---|
| 817 |     // consistency, for each orbital check if this orbital is also in the group of each referred orbital
 | 
|---|
| 818 |     debug(P,"checking consistency");
 | 
|---|
| 819 |     totalflag = 1;
 | 
|---|
| 820 |     for (l=0;l<Num;l++) // checking l's group
 | 
|---|
| 821 |       for (k=0;k<Num;k++) { // k is partner index
 | 
|---|
| 822 |         if (group[l][k] != -1) {  // if current index k is a partner
 | 
|---|
| 823 |           flag = 0;
 | 
|---|
| 824 |           for(j=0;j<Num;j++) {  // go through each entry in l partner's partner group if l exists
 | 
|---|
| 825 |             if ((group[ group[l][k] ][j] == l))
 | 
|---|
| 826 |               flag = 1;
 | 
|---|
| 827 |           }
 | 
|---|
| 828 |           //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]);
 | 
|---|
| 829 |           if (totalflag == 1) totalflag = flag;
 | 
|---|
| 830 |         }
 | 
|---|
| 831 |       }
 | 
|---|
| 832 |     // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres
 | 
|---|
| 833 |     debug(P,"weight and calculate new centers for partner groups");
 | 
|---|
| 834 |     for (l=0;l<=Num;l++)
 | 
|---|
| 835 |       marker[l] = 1;
 | 
|---|
| 836 |     if (totalflag) {
 | 
|---|
| 837 |       for (l=0;l<Num;l++) { // go through each orbital
 | 
|---|
| 838 |         if (marker[l] != 0) { // if it hasn't been reweighted
 | 
|---|
| 839 |           marker[l] = 0;
 | 
|---|
| 840 |           for (i=0;i<NDIM;i++) 
 | 
|---|
| 841 |             q[i] = 0.;
 | 
|---|
| 842 |           j = 0;
 | 
|---|
| 843 |           while (group[l][j] != -1) {
 | 
|---|
| 844 |             marker[group[l][j]] = 0;
 | 
|---|
| 845 |             for (i=0;i<NDIM;i++) {
 | 
|---|
| 846 |               //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]);
 | 
|---|
| 847 |               q[i] += WannierCentre[ group[l][j] ][i];
 | 
|---|
| 848 |             } 
 | 
|---|
| 849 |             j++;
 | 
|---|
| 850 |           }
 | 
|---|
| 851 |           //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j);
 | 
|---|
| 852 |           for (i=0;i<NDIM;i++) {// weight by number of elements in partner group
 | 
|---|
| 853 |             q[i] /= (double)(j);
 | 
|---|
| 854 |           }
 | 
|---|
| 855 |   
 | 
|---|
| 856 |           // put WannierCentre into own and all partners'      
 | 
|---|
| 857 |           for (i=0;i<NDIM;i++)
 | 
|---|
| 858 |             WannierCentre[l][i] = q[i];
 | 
|---|
| 859 |           j = 0;  
 | 
|---|
| 860 |           while (group[l][j] != -1) {
 | 
|---|
| 861 |             for (i=0;i<NDIM;i++)
 | 
|---|
| 862 |               WannierCentre[group[l][j]][i] = q[i];
 | 
|---|
| 863 |             j++;
 | 
|---|
| 864 |           }
 | 
|---|
| 865 |         }
 | 
|---|
| 866 |       }
 | 
|---|
| 867 |     }
 | 
|---|
| 868 |     if (P->Call.out[StepLeaderOut]) {
 | 
|---|
| 869 |       fprintf(stderr,"Summary:\n");
 | 
|---|
| 870 |       fprintf(stderr,"========\n");
 | 
|---|
| 871 |       for (i=0;i<Num;i++)
 | 
|---|
| 872 |         fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]);
 | 
|---|
| 873 |     } 
 | 
|---|
| 874 |     debug(P,"done");
 | 
|---|
| 875 |   
 | 
|---|
| 876 |     Free(marker, "ChangeWannierCentres: marker");
 | 
|---|
| 877 |     for (l=0;l<Num;l++)
 | 
|---|
| 878 |       Free(group[l], "ChangeWannierCentres: group[l]");
 | 
|---|
| 879 |     Free(group, "ChangeWannierCentres: group");
 | 
|---|
| 880 |     break;
 | 
|---|
| 881 |    case 1:
 | 
|---|
| 882 |     debug(P,"Individual orbitals are changed to center of all.");
 | 
|---|
| 883 |     for (i=0;i<NDIM;i++)  // zero center of weight
 | 
|---|
| 884 |       q[i] = 0.;
 | 
|---|
| 885 |     for (k=0;k<Num;k++)
 | 
|---|
| 886 |       for (i=0;i<NDIM;i++) {  // sum up all orbitals each component
 | 
|---|
| 887 |         q[i] += WannierCentre[k][i];
 | 
|---|
| 888 |       }
 | 
|---|
| 889 |     for (i=0;i<NDIM;i++)  // divide by number
 | 
|---|
| 890 |       q[i] /= Num;
 | 
|---|
| 891 |     for (k=0;k<Num;k++)
 | 
|---|
| 892 |       for (i=0;i<NDIM;i++) {  // put into this function's array
 | 
|---|
| 893 |         WannierCentre[k][i] = q[i];
 | 
|---|
| 894 |       }
 | 
|---|
| 895 |     break;
 | 
|---|
| 896 |    case 0:
 | 
|---|
| 897 |    default:
 | 
|---|
| 898 |    break;
 | 
|---|
| 899 |   }
 | 
|---|
| 900 | 
 | 
|---|
| 901 |   if ((P->Call.out[NormalOut]) && (1)) { // root prints gathered wannier centres for debugging
 | 
|---|
| 902 |     fprintf(stderr, "(%i) Printing changed Wannier centres:\n", P->Par.me);
 | 
|---|
| 903 |     for(l=0;l<Num;l++) {
 | 
|---|
| 904 |       fprintf(stderr, "(%i) ", P->Par.me);
 | 
|---|
| 905 |       for(j=0;j<NDIM;j++)
 | 
|---|
| 906 |         fprintf(stderr, "%lg\t", WannierCentre[l][j]);
 | 
|---|
| 907 |       fprintf(stderr, "-\t%lg\n", WannierSpread[l]);
 | 
|---|
| 908 |     }
 | 
|---|
| 909 |   }
 | 
|---|
| 910 |       
 | 
|---|
| 911 |   // store info into local arrays again
 | 
|---|
| 912 |   i=-1;
 | 
|---|
| 913 |   for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions
 | 
|---|
| 914 |     OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
| 915 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 916 |       i++;  // increase l if it is occupied wave function
 | 
|---|
| 917 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local
 | 
|---|
| 918 |         for(j=0;j<NDIM;j++)
 | 
|---|
| 919 |           Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
 | 
|---|
| 920 |       }
 | 
|---|
| 921 |     }
 | 
|---|
| 922 |   }
 | 
|---|
| 923 | 
 | 
|---|
| 924 |   // show locally stored info for debugging
 | 
|---|
| 925 |   if (P->Call.out[NormalOut]) {
 | 
|---|
| 926 |     i=-1;
 | 
|---|
| 927 |     for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions
 | 
|---|
| 928 |       OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
| 929 |       if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 930 |         i++;  // increase l if it is occupied wave function
 | 
|---|
| 931 |         if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // is this local?
 | 
|---|
| 932 |            fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0], Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1], Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2], Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
 | 
|---|
| 933 |       }
 | 
|---|
| 934 |     }
 | 
|---|
| 935 |   }
 | 
|---|
| 936 | 
 | 
|---|
| 937 |   // free memory
 | 
|---|
| 938 |   Free(WannierSpread, "ChangeWannierCentres: *WannierSpread");
 | 
|---|
| 939 |   for (j=0;j<Num;j++)
 | 
|---|
| 940 |     Free(WannierCentre[j], "ChangeWannierCentres: *WannierCentre[]");
 | 
|---|
| 941 |   Free(WannierCentre, "ChangeWannierCentres: **WannierCentre");
 | 
|---|
| 942 | }
 | 
|---|
| 943 | 
 | 
|---|
| 944 | /** From the entries of the variance matrices the spread is calculated.
 | 
|---|
| 945 |  * WannierCentres are evaluated according to Resta operator: \f$\langle X \rangle = \frac{L}{2\pi} \sum_j {\cal I} \ln{\langle \Psi_j | \exp{(i \frac{2\pi}{L} X)} | \Psi_j \rangle}\f$
 | 
|---|
| 946 |  * WannierSpread is: \f$ \sum_j \langle \Psi_j | r^2 | \Psi_j \rangle - \langle \Psi_j | r | psi_j \rangle^2\f$
 | 
|---|
| 947 |  * \param *P Problem at hand
 | 
|---|
| 948 |  * \param *A variance matrices
 | 
|---|
| 949 |  * \param old_spread first term of wannier spread
 | 
|---|
| 950 |  * \param spread second term of wannier spread
 | 
|---|
| 951 |  * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
 | 
|---|
| 952 |  * \param *WannierSpread array with wannier spread per wave function
 | 
|---|
| 953 |  */
 | 
|---|
| 954 | void ComputeWannierCentresfromVarianceMatrices(struct Problem *P, gsl_matrix **A, double *spread, double *old_spread, double **WannierCentre, double *WannierSpread)
 | 
|---|
| 955 | {
 | 
|---|
| 956 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 957 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 958 |   struct OnePsiElement *OnePsiA;
 | 
|---|
| 959 |   int i,j,k,l;
 | 
|---|
| 960 |   double tmp, q[NDIM];
 | 
|---|
| 961 |   
 | 
|---|
| 962 |   *old_spread = 0;
 | 
|---|
| 963 |   *spread = 0;
 | 
|---|
| 964 | 
 | 
|---|
| 965 |   // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital
 | 
|---|
| 966 |   i=-1;
 | 
|---|
| 967 |   for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions
 | 
|---|
| 968 |     OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
| 969 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 970 |       i++;  // increase l if it is occupied wave function
 | 
|---|
| 971 |       //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i);
 | 
|---|
| 972 |       
 | 
|---|
| 973 |       // calculate Wannier Centre
 | 
|---|
| 974 |       for (j=0;j<NDIM;j++) {
 | 
|---|
| 975 |         q[j] = 1./(2.*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i))));
 | 
|---|
| 976 |         if (q[j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ
 | 
|---|
| 977 |           q[j] += 1.;
 | 
|---|
| 978 |       }
 | 
|---|
| 979 |       RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
 | 
|---|
| 980 |       
 | 
|---|
| 981 |       // store orbital spread and centre in file
 | 
|---|
| 982 |       tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
 | 
|---|
| 983 |             - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
 | 
|---|
| 984 |             - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);          
 | 
|---|
| 985 |       WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp; 
 | 
|---|
| 986 |       //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]);
 | 
|---|
| 987 |       //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n",
 | 
|---|
| 988 |         //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]); 
 | 
|---|
| 989 |       //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n",
 | 
|---|
| 990 |         //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]);
 | 
|---|
| 991 |       
 | 
|---|
| 992 |       // gather all spreads
 | 
|---|
| 993 |       *old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U)
 | 
|---|
| 994 |       for (k=0;k<max_operators;k++)
 | 
|---|
| 995 |         *spread += pow(gsl_matrix_get(A[k],i,i),2);
 | 
|---|
| 996 |     }
 | 
|---|
| 997 |   }
 | 
|---|
| 998 | }
 | 
|---|
| 999 | 
 | 
|---|
| 1000 | /** Prints gsl_matrix nicely to screen.
 | 
|---|
| 1001 |  * \param *P Problem at hand
 | 
|---|
| 1002 |  * \param *U gsl matrix
 | 
|---|
| 1003 |  * \param Num number of rows/columns
 | 
|---|
| 1004 |  * \param *msg name of matrix, prepended before entries
 | 
|---|
| 1005 |  */
 | 
|---|
| 1006 | void PrintGSLMatrix(struct Problem *P, gsl_matrix *U, int Num, const char *msg)
 | 
|---|
| 1007 | {
 | 
|---|
| 1008 |   int k,l;
 | 
|---|
| 1009 |   fprintf(stderr,"(%i) %s = \n",P->Par.me, msg);
 | 
|---|
| 1010 |   for (k=0;k<Num;k++) {    
 | 
|---|
| 1011 |     for (l=0;l<Num;l++)
 | 
|---|
| 1012 |       fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k));
 | 
|---|
| 1013 |     fprintf(stderr,"\n");
 | 
|---|
| 1014 |   }
 | 
|---|
| 1015 | }
 | 
|---|
| 1016 | 
 | 
|---|
| 1017 | /** Allocates memory for the  DiagonalizationData structure
 | 
|---|
| 1018 |  * \param *P Problem at hand
 | 
|---|
| 1019 |  * \param DiagData pointer to structure
 | 
|---|
| 1020 |  * \param Num number of rows and columns in matrix
 | 
|---|
| 1021 |  * \param NumMatrices number of matrices to be simultaneously diagonalized
 | 
|---|
| 1022 |  * \param extra number of extra matrices to be passively also diagonalized
 | 
|---|
| 1023 |  */
 | 
|---|
| 1024 | void InitDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData, int Num, int NumMatrices, int extra)
 | 
|---|
| 1025 | {
 | 
|---|
| 1026 |   int i;
 | 
|---|
| 1027 |   
 | 
|---|
| 1028 |   // store integer values
 | 
|---|
| 1029 |   //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me);
 | 
|---|
| 1030 |   DiagData->Num = Num;
 | 
|---|
| 1031 |   DiagData->AllocNum = ceil((double)Num / 2. ) *2;
 | 
|---|
| 1032 |   DiagData->NumMatrices = NumMatrices;
 | 
|---|
| 1033 |   DiagData->extra = extra;
 | 
|---|
| 1034 |   
 | 
|---|
| 1035 |   // determine communicator and our rank and its size
 | 
|---|
| 1036 |   DiagData->comm = DetermineParallelGroupbyGCD(P,DiagData->AllocNum);
 | 
|---|
| 1037 |   MPI_Comm_size (*(DiagData->comm), &(DiagData->ProcNum));
 | 
|---|
| 1038 |   MPI_Comm_rank (*(DiagData->comm), &(DiagData->ProcRank));
 | 
|---|
| 1039 |   
 | 
|---|
| 1040 |   // allocate memory
 | 
|---|
| 1041 |   DiagData->U = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
 | 
|---|
| 1042 |   gsl_matrix_set_identity(DiagData->U);
 | 
|---|
| 1043 |   
 | 
|---|
| 1044 |   DiagData->A = (gsl_matrix **) Malloc((DiagData->NumMatrices+DiagData->extra) * sizeof(gsl_matrix *), "InitDiagonalization: *A"); 
 | 
|---|
| 1045 |   for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) {
 | 
|---|
| 1046 |     DiagData->A[i] = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
 | 
|---|
| 1047 |     gsl_matrix_set_zero(DiagData->A[i]);
 | 
|---|
| 1048 |   }
 | 
|---|
| 1049 | 
 | 
|---|
| 1050 |   // init merry-go-round array for diagonilzation
 | 
|---|
| 1051 |   DiagData->top = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: top");
 | 
|---|
| 1052 |   DiagData->bot = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: bot");
 | 
|---|
| 1053 |   for (i=0;i<DiagData->AllocNum/2;i++) {
 | 
|---|
| 1054 |     DiagData->top[i] = 2*i;
 | 
|---|
| 1055 |     DiagData->bot[i] = 2*i+1;      
 | 
|---|
| 1056 |   }
 | 
|---|
| 1057 | /*    // print starting values of index generation tables top and bot
 | 
|---|
| 1058 |       fprintf(stderr,"top\t");
 | 
|---|
| 1059 |       for (k=0;k<AllocNum/2;k++)
 | 
|---|
| 1060 |         fprintf(stderr,"%i\t", top[k]);
 | 
|---|
| 1061 |       fprintf(stderr,"\n");
 | 
|---|
| 1062 |       fprintf(stderr,"bot\t");
 | 
|---|
| 1063 |       for (k=0;k<AllocNum/2;k++)
 | 
|---|
| 1064 |         fprintf(stderr,"%i\t", bot[k]);
 | 
|---|
| 1065 |       fprintf(stderr,"\n");*/
 | 
|---|
| 1066 | }
 | 
|---|
| 1067 | 
 | 
|---|
| 1068 | /** Frees allocated memory for the DiagonalizationData structure.
 | 
|---|
| 1069 |  * \param DiagData pointer to structure
 | 
|---|
| 1070 |  */
 | 
|---|
| 1071 | void FreeDiagonalization(struct DiagonalizationData *DiagData)
 | 
|---|
| 1072 | {
 | 
|---|
| 1073 |   int i;
 | 
|---|
| 1074 |   
 | 
|---|
| 1075 |   Free(DiagData->top, "FreeDiagonalization: DiagData->top");
 | 
|---|
| 1076 |   Free(DiagData->bot, "FreeDiagonalization: DiagData->bot");
 | 
|---|
| 1077 |   gsl_matrix_free(DiagData->U);
 | 
|---|
| 1078 |   for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++)
 | 
|---|
| 1079 |     gsl_matrix_free(DiagData->A[i]);   
 | 
|---|
| 1080 |   Free(DiagData->A, "FreeDiagonalization: DiagData->A");
 | 
|---|
| 1081 | }
 | 
|---|
| 1082 | 
 | 
|---|
| 1083 | /** Orthogonalizing PsiType#Occupied wave functions for MD.
 | 
|---|
| 1084 |  * Orthogonalizes wave functions by finding a unitary transformation such that the density remains unchanged.
 | 
|---|
| 1085 |  * To this end, the overlap matrix \f$\langle \Psi_i | \Psi_j \rangle\f$ is created and diagonalised by Jacobi
 | 
|---|
| 1086 |  * transformation (product of rotation matrices). The found transformation matrix is applied to all Psis.
 | 
|---|
| 1087 |  * \param *P Problem at hand
 | 
|---|
| 1088 |  * \note [Payne] states that GramSchmidt is not well-suited. However, this Jacobi diagonalisation is not well
 | 
|---|
| 1089 |  * suited for our CG minimisation either. If one wave functions is changed in course of the minimsation
 | 
|---|
| 1090 |  * procedure, in a Gram-Schmidt-Orthogonalisation only this wave function is changed (although it remains under
 | 
|---|
| 1091 |  * debate whether subsequent Psis are still orthogonal to this changed wave function!), in a Jacobi-Orthogonali-
 | 
|---|
| 1092 |  * stion however at least two if not all wave functions are changed. Thus inhibits our fast way of updating the
 | 
|---|
| 1093 |  * density after a minimisation step -- UpdateDensityCalculation(). Thus, this routine can only be used for a
 | 
|---|
| 1094 |  * final orthogonalisation of all Psis, after the CG minimisation.
 | 
|---|
| 1095 |  */
 | 
|---|
| 1096 | void OrthogonalizePsis(struct Problem *P)
 | 
|---|
| 1097 | {
 | 
|---|
| 1098 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 1099 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 1100 |   struct LatticeLevel *LevS = P->R.LevS;
 | 
|---|
| 1101 |   int Num = Psi->NoOfPsis;
 | 
|---|
| 1102 |   int i,j,l;
 | 
|---|
| 1103 |   struct DiagonalizationData DiagData;
 | 
|---|
| 1104 |   double PsiSP;
 | 
|---|
| 1105 | 
 | 
|---|
| 1106 |   InitDiagonalization(P, &DiagData, Num, 1, 0);
 | 
|---|
| 1107 |     
 | 
|---|
| 1108 |   // Calculate Overlap matrix
 | 
|---|
| 1109 |   for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
 | 
|---|
| 1110 |     CalculateOverlap(P, l, Occupied);  
 | 
|---|
| 1111 |   for (i=0;i<Num;i++) // fill gsl matrix with overlap values
 | 
|---|
| 1112 |     for (j=0;j<Num;j++)
 | 
|---|
| 1113 |       gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
 | 
|---|
| 1114 |   //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (before)");
 | 
|---|
| 1115 |   
 | 
|---|
| 1116 |   //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (before)");
 | 
|---|
| 1117 | 
 | 
|---|
| 1118 |   // diagonalize overlap matrix
 | 
|---|
| 1119 |   Diagonalize(P, &DiagData);
 | 
|---|
| 1120 | 
 | 
|---|
| 1121 |   //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (after)");
 | 
|---|
| 1122 | 
 | 
|---|
| 1123 |   // apply found unitary transformation to wave functions
 | 
|---|
| 1124 |   UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
 | 
|---|
| 1125 | 
 | 
|---|
| 1126 |   // update GramSch stati
 | 
|---|
| 1127 |   for (i=Psi->TypeStartIndex[Occupied];i<Psi->TypeStartIndex[UnOccupied];i++) {
 | 
|---|
| 1128 |     GramSchNormalize(P,LevS,LevS->LPsi->LocalPsi[i], 0.);   // calculates square product if 0. is given...
 | 
|---|
| 1129 |     PsiSP = GramSchGetNorm2(P,LevS,LevS->LPsi->LocalPsi[i]);
 | 
|---|
| 1130 |     //if (P->Par.me_comm_ST_Psi == 0) fprintf(stderr,"(%i) PsiSP[%i] = %lg\n", P->Par.me, i, PsiSP);
 | 
|---|
| 1131 |     Psi->LocalPsiStatus[i].PsiGramSchStatus = (int)IsOrthonormal;
 | 
|---|
| 1132 |   }
 | 
|---|
| 1133 |   UpdateGramSchAllPsiStatus(P,Psi);
 | 
|---|
| 1134 | /*
 | 
|---|
| 1135 |   for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
 | 
|---|
| 1136 |     CalculateOverlap(P, l, Occupied);  
 | 
|---|
| 1137 |   for (i=0;i<Num;i++) // fill gsl matrix with overlap values
 | 
|---|
| 1138 |     for (j=0;j<Num;j++)
 | 
|---|
| 1139 |       gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
 | 
|---|
| 1140 |   if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (after)");
 | 
|---|
| 1141 |   */
 | 
|---|
| 1142 |   // free memory
 | 
|---|
| 1143 |   FreeDiagonalization(&DiagData);
 | 
|---|
| 1144 | }
 | 
|---|
| 1145 | 
 | 
|---|
| 1146 | /** Orthogonalizing PsiType#Occupied wave functions and their time derivatives for MD.
 | 
|---|
| 1147 |  * Ensures that two orthonormality condition are met for Psis: 
 | 
|---|
| 1148 |  * \f$\langle \Psi_i | \Psi_j \rangle = \delta_{ij}\f$
 | 
|---|
| 1149 |  * \f$\langle \dot{\Psi_i} | \dot{\Psi_j} \rangle = \delta_{ij}\f$
 | 
|---|
| 1150 |  * \param *P Problem at hand
 | 
|---|
| 1151 |  * \note [Payne] states that GramSchmidt is not well-suited, and this stronger
 | 
|---|
| 1152 |  *       condition is needed for numerical stability
 | 
|---|
| 1153 |  * \todo create and fill 1stoverlap with finite difference between Psi, OldPsi with R->Deltat
 | 
|---|
| 1154 |  */
 | 
|---|
| 1155 | void StrongOrthogonalizePsis(struct Problem *P)
 | 
|---|
| 1156 | {
 | 
|---|
| 1157 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 1158 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 1159 |   int Num = Psi->NoOfPsis;
 | 
|---|
| 1160 |   int i,j,l;
 | 
|---|
| 1161 |   struct DiagonalizationData DiagData;
 | 
|---|
| 1162 | 
 | 
|---|
| 1163 |   InitDiagonalization(P, &DiagData, Num, 2, 0);
 | 
|---|
| 1164 |     
 | 
|---|
| 1165 |   // Calculate Overlap matrix
 | 
|---|
| 1166 |   for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) {
 | 
|---|
| 1167 |     CalculateOverlap(P, l, Occupied);  
 | 
|---|
| 1168 |     //Calculate1stOverlap(P, l, Occupied);
 | 
|---|
| 1169 |   }  
 | 
|---|
| 1170 |   for (i=0;i<Num;i++) // fill gsl matrix with overlap values
 | 
|---|
| 1171 |     for (j=0;j<Num;j++) {
 | 
|---|
| 1172 |       gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
 | 
|---|
| 1173 |       //gsl_matrix_set(DiagData.A[1],i,j,Psi->1stOverlap[i][j]);
 | 
|---|
| 1174 |     }
 | 
|---|
| 1175 |       
 | 
|---|
| 1176 |   // diagonalize overlap matrix
 | 
|---|
| 1177 |   Diagonalize(P, &DiagData);
 | 
|---|
| 1178 |   
 | 
|---|
| 1179 |   // apply found unitary transformation to wave functions
 | 
|---|
| 1180 |   UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
 | 
|---|
| 1181 |   
 | 
|---|
| 1182 |   // free memory
 | 
|---|
| 1183 |   FreeDiagonalization(&DiagData);
 | 
|---|
| 1184 | }
 | 
|---|
| 1185 | 
 | 
|---|
| 1186 | /** Uses either serial or parallel diagonalization.
 | 
|---|
| 1187 |  * \param *P Problem at hand
 | 
|---|
| 1188 |  * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
 | 
|---|
| 1189 |  * \sa ParallelDiagonalization(), SerialDiagonalization()
 | 
|---|
| 1190 |  */
 | 
|---|
| 1191 | void Diagonalize(struct Problem *P, struct DiagonalizationData *DiagData)
 | 
|---|
| 1192 | {
 | 
|---|
| 1193 |   if (DiagData->Num != 1) { // one- or multi-process case?
 | 
|---|
| 1194 |     if (((DiagData->AllocNum % 2) == 0) && (DiagData->ProcNum != 1) && ((DiagData->AllocNum / 2) % DiagData->ProcNum == 0)) {
 | 
|---|
| 1195 |       /*
 | 
|---|
| 1196 |       debug(P,"Testing with silly matrix");
 | 
|---|
| 1197 |       ParallelDiagonalization(P, test, Utest, 1, 0, 4, Num, comm, ProcRank, ProcNum, top, bot);
 | 
|---|
| 1198 |       if(P->Call.out[ReadOut]) // && P->Par.me == 0) 
 | 
|---|
| 1199 |         PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
 | 
|---|
| 1200 |       if(P->Call.out[ReadOut]) // && P->Par.me == 0) 
 | 
|---|
| 1201 |         PrintGSLMatrix(P, Utest, 4, "Utest (final)");
 | 
|---|
| 1202 |         */  
 | 
|---|
| 1203 |       debug(P,"ParallelDiagonalization");
 | 
|---|
| 1204 |       ParallelDiagonalization(P, DiagData);
 | 
|---|
| 1205 |     } else {/*
 | 
|---|
| 1206 |       debug(P,"Testing with silly matrix");
 | 
|---|
| 1207 |       SerialDiagonalization(P, test, Utest, 1, 0, 4, Num, top, bot);
 | 
|---|
| 1208 |       if(P->Call.out[ReadOut]) // && P->Par.me == 0) 
 | 
|---|
| 1209 |         PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
 | 
|---|
| 1210 |       if(P->Call.out[ReadOut]) // && P->Par.me == 0) 
 | 
|---|
| 1211 |         PrintGSLMatrix(P, Utest, 4, "Utest (final)");
 | 
|---|
| 1212 |          */ 
 | 
|---|
| 1213 |       debug(P,"SerialDiagonalization");
 | 
|---|
| 1214 |       SerialDiagonalization(P, DiagData);
 | 
|---|
| 1215 |     }  
 | 
|---|
| 1216 |     
 | 
|---|
| 1217 |     //if(P->Call.out[ReadOut]) // && P->Par.me == 0) 
 | 
|---|
| 1218 |       //PrintGSLMatrix(P, DiagData->U, DiagData->AllocNum, "U");
 | 
|---|
| 1219 |   }
 | 
|---|
| 1220 | }
 | 
|---|
| 1221 | 
 | 
|---|
| 1222 | /** Computation of Maximally Localized Wannier Functions.
 | 
|---|
| 1223 |  * Maximally localized functions are prime when evulating a Hamiltonian with
 | 
|---|
| 1224 |  * magnetic fields under periodic boundary conditions, as the common position
 | 
|---|
| 1225 |  * operator is no longer valid. These can be obtained by orbital rotations, which
 | 
|---|
| 1226 |  * are looked for iteratively and gathered in one transformation matrix, to be
 | 
|---|
| 1227 |  * later applied to the set of orbital wave functions.
 | 
|---|
| 1228 |  * 
 | 
|---|
| 1229 |  * In order to obtain these, the following algorithm is applied:
 | 
|---|
| 1230 |  * -# Initialize U (identity) as the sought-for transformation matrix
 | 
|---|
| 1231 |  * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements
 | 
|---|
| 1232 |  *    \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators
 | 
|---|
| 1233 |  *    \f$A^{(k)}\f$
 | 
|---|
| 1234 |  * -# For each pair of indices (i,j) (i<j) do the following:
 | 
|---|
| 1235 |  *    -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$
 | 
|---|
| 1236 |  *       where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$
 | 
|---|
| 1237 |  *    -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G
 | 
|---|
| 1238 |  *       corresponding to the greatest eigenvalue, such that \f$x\geq0\f$
 | 
|---|
| 1239 |  *    -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the 
 | 
|---|
| 1240 |  *       identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$
 | 
|---|
| 1241 |  *       \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$
 | 
|---|
| 1242 |  *    -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$
 | 
|---|
| 1243 |  *    -# Gather the rotations in \f$U = U R\f$
 | 
|---|
| 1244 |  * -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$
 | 
|---|
| 1245 |  * -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3.
 | 
|---|
| 1246 |  * -# Apply transformations to the orbital wavefunctions \f$ | \phi_i \rangle = \sum_j U_{ij} | \phi_j \rangle\f$
 | 
|---|
| 1247 |  * -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in
 | 
|---|
| 1248 |  *    OnePsiElementAddData#WannierCentre
 | 
|---|
| 1249 |  * 
 | 
|---|
| 1250 |  * Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions.
 | 
|---|
| 1251 |  * Note that hereby additional memory is needed as old and transformed wave functions must be present at the same
 | 
|---|
| 1252 |  * time.
 | 
|---|
| 1253 |  * 
 | 
|---|
| 1254 |  * The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index
 | 
|---|
| 1255 |  * generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices
 | 
|---|
| 1256 |  * may be diagonalized simultaneously and parallely. We use the implementation explained in
 | 
|---|
| 1257 |  * [Golub, Matrix computations, 1989, p451].
 | 
|---|
| 1258 |  * 
 | 
|---|
| 1259 |  * \param *P Problem at hand
 | 
|---|
| 1260 |  */
 | 
|---|
| 1261 | void ComputeMLWF(struct Problem *P) {
 | 
|---|
| 1262 |   // variables and allocation
 | 
|---|
| 1263 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 1264 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 1265 |   int i;
 | 
|---|
| 1266 |   int Num = Psi->NoOfPsis;  // is number of occupied plus unoccupied states for rows
 | 
|---|
| 1267 |   double **WannierCentre;
 | 
|---|
| 1268 |   double *WannierSpread;
 | 
|---|
| 1269 |   double spread, spreadSQ;
 | 
|---|
| 1270 |   struct DiagonalizationData DiagData;
 | 
|---|
| 1271 | 
 | 
|---|
| 1272 |   if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me);
 | 
|---|
| 1273 | 
 | 
|---|
| 1274 |   InitDiagonalization(P, &DiagData, Num, max_operators, 1);
 | 
|---|
| 1275 | 
 | 
|---|
| 1276 |   // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1))
 | 
|---|
| 1277 |   //debug (P,"Calculatung Variance matrices");
 | 
|---|
| 1278 |   FillHigherOrderRealMomentsMatrices(P, DiagData.AllocNum, DiagData.A);
 | 
|---|
| 1279 | 
 | 
|---|
| 1280 |   //debug (P,"Diagonalizing");
 | 
|---|
| 1281 |   Diagonalize(P, &DiagData);
 | 
|---|
| 1282 | 
 | 
|---|
| 1283 |   // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle
 | 
|---|
| 1284 |   //debug (P,"Performing Unitary transformation");
 | 
|---|
| 1285 |   UnitaryTransformationOnWavefunctions(P, DiagData.U, Num);
 | 
|---|
| 1286 | 
 | 
|---|
| 1287 |   if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7: Compute centres and spread printout\n",P->Par.me);
 | 
|---|
| 1288 |   WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ComputeMLWF: *WannierCentre");
 | 
|---|
| 1289 |   WannierSpread = (double *) Malloc(sizeof(double)*Num, "ComputeMLWF: WannierSpread"); 
 | 
|---|
| 1290 |   for (i=0;i<Num;i++)
 | 
|---|
| 1291 |     WannierCentre[i] = (double *) Malloc(sizeof(double)*NDIM, "ComputeMLWF: WannierCentre");
 | 
|---|
| 1292 | 
 | 
|---|
| 1293 |   //debug (P,"Computing Wannier centres from variance matrix"); 
 | 
|---|
| 1294 |   ComputeWannierCentresfromVarianceMatrices(P, DiagData.A, &spread, &spreadSQ, WannierCentre, WannierSpread);
 | 
|---|
| 1295 | 
 | 
|---|
| 1296 |   // write to file
 | 
|---|
| 1297 |   //debug (P,"Writing spread file");
 | 
|---|
| 1298 |   WriteWannierFile(P, spread, spreadSQ, WannierCentre, WannierSpread);  
 | 
|---|
| 1299 | 
 | 
|---|
| 1300 |   debug(P,"Free'ing memory");
 | 
|---|
| 1301 |   // free all remaining memory
 | 
|---|
| 1302 |   FreeDiagonalization(&DiagData);
 | 
|---|
| 1303 |   for (i=0;i<Num;i++)
 | 
|---|
| 1304 |     Free(WannierCentre[i], "ComputeMLWF: WannierCentre[i]");
 | 
|---|
| 1305 |   Free(WannierCentre, "ComputeMLWF: WannierCentre");
 | 
|---|
| 1306 |   Free(WannierSpread, "ComputeMLWF: WannierSpread");
 | 
|---|
| 1307 | }
 | 
|---|
| 1308 | 
 | 
|---|
| 1309 | /** Solves directly for eigenvectors and eigenvalues of a real 2x2 matrix.
 | 
|---|
| 1310 |  * The eigenvalues are determined by \f$\det{(A-\lambda \cdot I)}\f$, where \f$I\f$ is the unit matrix and
 | 
|---|
| 1311 |  * \f$\lambda\f$ an eigenvalue. This leads to a pq-formula which is easily evaluated in the first part.
 | 
|---|
| 1312 |  * 
 | 
|---|
| 1313 |  * The eigenvectors are then obtained by solving \f$A-\lambda \cdot I)x = 0\f$ for a given eigenvalue
 | 
|---|
| 1314 |  * \f$\lambda\f$. However, the eigenvector magnitudes are not specified, thus we the equation system is
 | 
|---|
| 1315 |  * still lacking such an equation. We use this fact to set either coordinate arbitrarily to 1 and then
 | 
|---|
| 1316 |  * derive the other by solving the equation system. Finally, the eigenvector is normalized.  
 | 
|---|
| 1317 |  * \param *P Problem at hand
 | 
|---|
| 1318 |  * \param *A matrix whose eigenvalues/-vectors are to be found
 | 
|---|
| 1319 |  * \param *eval vector with eigenvalues
 | 
|---|
| 1320 |  * \param *evec matrix with corresponding eigenvectors in columns
 | 
|---|
| 1321 |  */
 | 
|---|
| 1322 | #ifdef HAVE_INLINE 
 | 
|---|
| 1323 | inline void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
 | 
|---|
| 1324 | #else
 | 
|---|
| 1325 | void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec)
 | 
|---|
| 1326 | #endif
 | 
|---|
| 1327 | {
 | 
|---|
| 1328 |   double a11,a12,a21,a22;
 | 
|---|
| 1329 |   double ev[2], summand1, summand2, norm;
 | 
|---|
| 1330 |   int i;
 | 
|---|
| 1331 |   // find eigenvalues
 | 
|---|
| 1332 |   a11 = gsl_matrix_get(A,0,0);
 | 
|---|
| 1333 |   a12 = gsl_matrix_get(A,0,1);
 | 
|---|
| 1334 |   a21 = gsl_matrix_get(A,1,0);
 | 
|---|
| 1335 |   a22 = gsl_matrix_get(A,1,1);
 | 
|---|
| 1336 |   summand1 = (a11+a22)/2.;
 | 
|---|
| 1337 |   summand2 = sqrt(summand1*summand1 + a12*a21 - a11*a22);
 | 
|---|
| 1338 |   ev[0] = summand1 + summand2;
 | 
|---|
| 1339 |   ev[1] = summand1 - summand2;
 | 
|---|
| 1340 |   gsl_vector_set(eval, 0, ev[0]);
 | 
|---|
| 1341 |   gsl_vector_set(eval, 1, ev[1]);
 | 
|---|
| 1342 |   //fprintf (stderr,"(%i) ev1 %lg \t ev2 %lg\n", P->Par.me, ev1, ev2);
 | 
|---|
| 1343 |   
 | 
|---|
| 1344 |   // find eigenvectors
 | 
|---|
| 1345 |   for(i=0;i<2;i++) {
 | 
|---|
| 1346 |     if (fabs(ev[i]) < MYEPSILON) {
 | 
|---|
| 1347 |       gsl_matrix_set(evec, 0,i, 0.);
 | 
|---|
| 1348 |       gsl_matrix_set(evec, 1,i, 0.);
 | 
|---|
| 1349 |     } else if (fabs(a22-ev[i]) > MYEPSILON) {
 | 
|---|
| 1350 |       norm = sqrt(1*1 + (a21*a21/(a22-ev[i])/(a22-ev[i])));
 | 
|---|
| 1351 |       gsl_matrix_set(evec, 0,i, 1./norm);
 | 
|---|
| 1352 |       gsl_matrix_set(evec, 1,i, -(a21/(a22-ev[i]))/norm);
 | 
|---|
| 1353 |       //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
 | 
|---|
| 1354 |     } else if (fabs(a12) > MYEPSILON) {
 | 
|---|
| 1355 |       norm = sqrt(1*1 + (a11-ev[i])*(a11-ev[i])/(a12*a12));
 | 
|---|
| 1356 |       gsl_matrix_set(evec, 0,i, 1./norm);
 | 
|---|
| 1357 |       gsl_matrix_set(evec, 1,i, -((a11-ev[i])/a12)/norm);
 | 
|---|
| 1358 |       //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
 | 
|---|
| 1359 |     } else {
 | 
|---|
| 1360 |       if (fabs(a11-ev[i]) > MYEPSILON) {
 | 
|---|
| 1361 |         norm = sqrt(1*1 + (a12*a12/(a11-ev[i])/(a11-ev[i])));
 | 
|---|
| 1362 |         gsl_matrix_set(evec, 0,i, -(a12/(a11-ev[i]))/norm);
 | 
|---|
| 1363 |         gsl_matrix_set(evec, 1,i, 1./norm);
 | 
|---|
| 1364 |         //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
 | 
|---|
| 1365 |       } else if (fabs(a21) > MYEPSILON) {
 | 
|---|
| 1366 |         norm = sqrt(1*1 + (a22-ev[i])*(a22-ev[i])/(a21*a21));
 | 
|---|
| 1367 |         gsl_matrix_set(evec, 0,i, -(a22-ev[i])/a21/norm);
 | 
|---|
| 1368 |         gsl_matrix_set(evec, 1,i, 1./norm);
 | 
|---|
| 1369 |         //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
 | 
|---|
| 1370 |       } else {
 | 
|---|
| 1371 |         //gsl_matrix_set(evec, 0,i, 0.);
 | 
|---|
| 1372 |         //gsl_matrix_set(evec, 1,i, 1.);
 | 
|---|
| 1373 |         fprintf (stderr,"\t evec %i undetermined\n", i);
 | 
|---|
| 1374 |       }
 | 
|---|
| 1375 |       //gsl_matrix_set(evec, 0,0, 1.);
 | 
|---|
| 1376 |       //gsl_matrix_set(evec, 1,0, 0.);
 | 
|---|
| 1377 |       //fprintf (stderr,"(%i) evec1 undetermined", P->Par.me);
 | 
|---|
| 1378 |     }
 | 
|---|
| 1379 |   }
 | 
|---|
| 1380 | }
 | 
|---|
| 1381 | 
 | 
|---|
| 1382 | /** Calculates sine and cosine values for multiple matrix diagonalization.
 | 
|---|
| 1383 |  * \param *evec eigenvectors in columns
 | 
|---|
| 1384 |  * \param *eval corresponding eigenvalues
 | 
|---|
| 1385 |  * \param *c cosine to be returned
 | 
|---|
| 1386 |  * \param *s sine to be returned
 | 
|---|
| 1387 |  */
 | 
|---|
| 1388 | #ifdef HAVE_INLINE
 | 
|---|
| 1389 | inline void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
 | 
|---|
| 1390 | #else
 | 
|---|
| 1391 | void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s)
 | 
|---|
| 1392 | #endif
 | 
|---|
| 1393 | {
 | 
|---|
| 1394 |   int index;
 | 
|---|
| 1395 |   double x,y,r;
 | 
|---|
| 1396 |   
 | 
|---|
| 1397 |   index = gsl_vector_max_index (eval);  // get biggest eigenvalue
 | 
|---|
| 1398 |   //fprintf(stderr,"\t1st: %lg\t2nd: %lg ---  biggest: %i\n", gsl_vector_get(eval, 0), gsl_vector_get(eval, 1), index);
 | 
|---|
| 1399 |   x = gsl_matrix_get(evec, 0, index);
 | 
|---|
| 1400 |   y = gsl_matrix_get(evec, 1, index);
 | 
|---|
| 1401 |   if (x < 0) {  // ensure x>=0 so that rotation angles remain smaller Pi/4
 | 
|---|
| 1402 |     y = -y;
 | 
|---|
| 1403 |     x = -x;  
 | 
|---|
| 1404 |   }
 | 
|---|
| 1405 |   //z = gsl_matrix_get(evec, 2, index) * x/fabs(x);
 | 
|---|
| 1406 |   //fprintf(stderr,"\tx %lg\ty %lg\n", x,y);
 | 
|---|
| 1407 |   
 | 
|---|
| 1408 |   //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
 | 
|---|
| 1409 |   // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
 | 
|---|
| 1410 |   r = sqrt(x*x + y*y); // + z*z);
 | 
|---|
| 1411 |   if (fabs(r) > MYEPSILON) {
 | 
|---|
| 1412 |     *c = sqrt((x + r) / (2.*r));
 | 
|---|
| 1413 |     *s = y / sqrt(2.*r*(x+r)); //, -z / sqrt(2*r*(x+r)));
 | 
|---|
| 1414 |   } else {
 | 
|---|
| 1415 |     *c = 1.;
 | 
|---|
| 1416 |     *s = 0.;
 | 
|---|
| 1417 |   }
 | 
|---|
| 1418 | }
 | 
|---|
| 1419 | 
 | 
|---|
| 1420 | /*
 | 
|---|
| 1421 |  * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
 | 
|---|
| 1422 |  * \param *U transformation matrix set to unity matrix
 | 
|---|
| 1423 |  * \param NumMatrices number of matrices to be diagonalized simultaneously
 | 
|---|
| 1424 |  * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
 | 
|---|
| 1425 |  * \param AllocNum number of rows/columns in matrices
 | 
|---|
| 1426 |  * \param Num number of wave functions
 | 
|---|
| 1427 |  * \param ProcRank index in group for this cpu
 | 
|---|
| 1428 |  * \param ProcNum number of cpus in group
 | 
|---|
| 1429 |  * \param *top array with top row indices (merry-go-round)
 | 
|---|
| 1430 |  * \param *bot array with bottom row indices (merry-go-round) 
 | 
|---|
| 1431 |  */
 | 
|---|
| 1432 | 
 | 
|---|
| 1433 | /** Simultaneous diagonalization of matrices with multiple cpus.
 | 
|---|
| 1434 |  * \param *P Problem at hand
 | 
|---|
| 1435 |  * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
 | 
|---|
| 1436 |  * \note this is slower given one cpu only than SerialDiagonalization()
 | 
|---|
| 1437 |  */
 | 
|---|
| 1438 | void ParallelDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
 | 
|---|
| 1439 | {
 | 
|---|
| 1440 |   struct RunStruct *R =&P->R;
 | 
|---|
| 1441 |   int tagR0, tagR1, tagS0, tagS1;
 | 
|---|
| 1442 |   int iloc, jloc;
 | 
|---|
| 1443 |   double *s_all, *c_all;
 | 
|---|
| 1444 |   int round, max_rounds;
 | 
|---|
| 1445 |   int start;
 | 
|---|
| 1446 |   int *rcounts, *rdispls;
 | 
|---|
| 1447 |   double *c, *s;
 | 
|---|
| 1448 |   int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from
 | 
|---|
| 1449 |   int left, right; // left or right neighbour for process
 | 
|---|
| 1450 |   double spread = 0., old_spread=0., Spread=0.;
 | 
|---|
| 1451 |   int i,j,k,l,m,u;
 | 
|---|
| 1452 |   int set;
 | 
|---|
| 1453 |   int it_steps; // iteration step counter
 | 
|---|
| 1454 |   double **Aloc[DiagData->NumMatrices+1], **Uloc; // local columns for one step of A[k]
 | 
|---|
| 1455 |   double *Around[DiagData->NumMatrices+1], *Uround; // all local columns for one round of A[k]
 | 
|---|
| 1456 |   double *Atotal[DiagData->NumMatrices+1], *Utotal; // all local columns for one round of A[k]
 | 
|---|
| 1457 |   double a_i, a_j;
 | 
|---|
| 1458 |   gsl_matrix *G;
 | 
|---|
| 1459 |   gsl_vector *h;
 | 
|---|
| 1460 |   gsl_vector *eval;
 | 
|---|
| 1461 |   gsl_matrix *evec;
 | 
|---|
| 1462 |   //gsl_eigen_symmv_workspace *w;
 | 
|---|
| 1463 | 
 | 
|---|
| 1464 |   max_rounds = (DiagData->AllocNum / 2)/DiagData->ProcNum;  // each process must perform multiple rotations per step of a set
 | 
|---|
| 1465 |   if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, DiagData->ProcRank, DiagData->ProcNum, max_rounds);
 | 
|---|
| 1466 |   
 | 
|---|
| 1467 |   // allocate column vectors for interchange of columns
 | 
|---|
| 1468 |   debug(P,"allocate column vectors for interchange of columns");
 | 
|---|
| 1469 |   c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c");
 | 
|---|
| 1470 |   s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s");
 | 
|---|
| 1471 |   c_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: c_all");
 | 
|---|
| 1472 |   s_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: s_all");
 | 
|---|
| 1473 |   rcounts = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rcounts");
 | 
|---|
| 1474 |   rdispls = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rdispls");
 | 
|---|
| 1475 | 
 | 
|---|
| 1476 |   // allocate eigenvector stuff
 | 
|---|
| 1477 |   debug(P,"allocate eigenvector stuff");
 | 
|---|
| 1478 |   G = gsl_matrix_calloc (2,2);
 | 
|---|
| 1479 |   h = gsl_vector_alloc (2);
 | 
|---|
| 1480 |   eval = gsl_vector_alloc (2);
 | 
|---|
| 1481 |   evec = gsl_matrix_alloc (2,2);
 | 
|---|
| 1482 |   //w = gsl_eigen_symmv_alloc(2);
 | 
|---|
| 1483 |   
 | 
|---|
| 1484 |   // establish communication partners
 | 
|---|
| 1485 |   debug(P,"establish communication partners");
 | 
|---|
| 1486 |   if (DiagData->ProcRank == 0) {
 | 
|---|
| 1487 |     tagS0 = WannierALTag;   // left p0 always remains left p0
 | 
|---|
| 1488 |   } else {
 | 
|---|
| 1489 |     tagS0 = (DiagData->ProcRank == DiagData->ProcNum - 1) ? WannierARTag : WannierALTag; // left p_last becomes right p_last          
 | 
|---|
| 1490 |   }
 | 
|---|
| 1491 |   tagS1 = (DiagData->ProcRank == 0) ? WannierALTag : WannierARTag; // right p0 always goes into left p1
 | 
|---|
| 1492 |   tagR0 = WannierALTag; // 
 | 
|---|
| 1493 |   tagR1 = WannierARTag; // first process
 | 
|---|
| 1494 |   if (DiagData->ProcRank == 0) {
 | 
|---|
| 1495 |     left = DiagData->ProcNum-1;
 | 
|---|
| 1496 |     right = 1;
 | 
|---|
| 1497 |     Lsend = 0;
 | 
|---|
| 1498 |     Rsend = 1;
 | 
|---|
| 1499 |     Lrecv = 0;
 | 
|---|
| 1500 |     Rrecv = 1;        
 | 
|---|
| 1501 |   } else if (DiagData->ProcRank == DiagData->ProcNum - 1) {
 | 
|---|
| 1502 |     left = DiagData->ProcRank - 1;
 | 
|---|
| 1503 |     right = 0;
 | 
|---|
| 1504 |     Lsend = DiagData->ProcRank;
 | 
|---|
| 1505 |     Rsend = DiagData->ProcRank - 1;
 | 
|---|
| 1506 |     Lrecv = DiagData->ProcRank - 1;
 | 
|---|
| 1507 |     Rrecv = DiagData->ProcRank;        
 | 
|---|
| 1508 |   } else {
 | 
|---|
| 1509 |     left = DiagData->ProcRank - 1;
 | 
|---|
| 1510 |     right = DiagData->ProcRank + 1;
 | 
|---|
| 1511 |     Lsend = DiagData->ProcRank+1;
 | 
|---|
| 1512 |     Rsend = DiagData->ProcRank - 1;
 | 
|---|
| 1513 |     Lrecv = DiagData->ProcRank - 1;
 | 
|---|
| 1514 |     Rrecv = DiagData->ProcRank+1;
 | 
|---|
| 1515 |   }
 | 
|---|
| 1516 |   //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv);
 | 
|---|
| 1517 |   
 | 
|---|
| 1518 |   // initialise A_loc
 | 
|---|
| 1519 |   debug(P,"initialise A_loc");
 | 
|---|
| 1520 |   for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
 | 
|---|
| 1521 |     //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]");
 | 
|---|
| 1522 |     Around[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Around[k]");
 | 
|---|
| 1523 |     Atotal[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Atotal[k]");
 | 
|---|
| 1524 |     Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]");
 | 
|---|
| 1525 |     //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds];
 | 
|---|
| 1526 |     
 | 
|---|
| 1527 |     for (round=0;round<max_rounds;round++) {
 | 
|---|
| 1528 |       Aloc[k][2*round] = &Around[k][DiagData->AllocNum*(2*round)];
 | 
|---|
| 1529 |       Aloc[k][2*round+1] = &Around[k][DiagData->AllocNum*(2*round+1)];
 | 
|---|
| 1530 |       for (l=0;l<DiagData->AllocNum;l++) {
 | 
|---|
| 1531 |         Aloc[k][2*round][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round));
 | 
|---|
| 1532 |         Aloc[k][2*round+1][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)+1);
 | 
|---|
| 1533 |         //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]);
 | 
|---|
| 1534 |       }
 | 
|---|
| 1535 |     }
 | 
|---|
| 1536 |   }
 | 
|---|
| 1537 |   // initialise U_loc
 | 
|---|
| 1538 |   debug(P,"initialise U_loc");
 | 
|---|
| 1539 |   //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc");
 | 
|---|
| 1540 |   Uround = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Uround");
 | 
|---|
| 1541 |   Utotal = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Utotal");
 | 
|---|
| 1542 |   Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc");
 | 
|---|
| 1543 |   //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds];
 | 
|---|
| 1544 |   for (round=0;round<max_rounds;round++) {
 | 
|---|
| 1545 |     Uloc[2*round] = &Uround[DiagData->AllocNum*(2*round)];
 | 
|---|
| 1546 |     Uloc[2*round+1] = &Uround[DiagData->AllocNum*(2*round+1)];
 | 
|---|
| 1547 |     for (l=0;l<DiagData->AllocNum;l++) {
 | 
|---|
| 1548 |       Uloc[2*round][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round));
 | 
|---|
| 1549 |       Uloc[2*round+1][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)+1);
 | 
|---|
| 1550 |       //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]);
 | 
|---|
| 1551 |     }
 | 
|---|
| 1552 |   }
 | 
|---|
| 1553 |   
 | 
|---|
| 1554 |   // now comes the iteration loop
 | 
|---|
| 1555 |   debug(P,"now comes the iteration loop");
 | 
|---|
| 1556 |   it_steps = 0;
 | 
|---|
| 1557 |   do {
 | 
|---|
| 1558 |     it_steps++;
 | 
|---|
| 1559 |     //if (P->Par.me == 0) fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps);
 | 
|---|
| 1560 |     for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 staying at its place all the time
 | 
|---|
| 1561 |       //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
 | 
|---|
| 1562 |       for (round = 0; round < max_rounds;round++) {
 | 
|---|
| 1563 |         start = DiagData->ProcRank * max_rounds + round;
 | 
|---|
| 1564 |         // get indices
 | 
|---|
| 1565 |         i = DiagData->top[start] < DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // minimum of the two indices
 | 
|---|
| 1566 |         iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1; 
 | 
|---|
| 1567 |         j = DiagData->top[start] > DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // maximum of the two indices: thus j >  i
 | 
|---|
| 1568 |         jloc =  DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
 | 
|---|
| 1569 |         //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc);
 | 
|---|
| 1570 |         
 | 
|---|
| 1571 |         // calculate rotation angle, i.e. c and s
 | 
|---|
| 1572 |         //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j);
 | 
|---|
| 1573 |         gsl_matrix_set_zero(G);        
 | 
|---|
| 1574 |         for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
 | 
|---|
| 1575 |           // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]         
 | 
|---|
| 1576 |           //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);          
 | 
|---|
| 1577 |           //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);          
 | 
|---|
| 1578 |           gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
 | 
|---|
| 1579 |           gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
 | 
|---|
| 1580 |           
 | 
|---|
| 1581 |           // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
 | 
|---|
| 1582 |           for (l=0;l<2;l++)
 | 
|---|
| 1583 |             for (m=0;m<2;m++)
 | 
|---|
| 1584 |                 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
 | 
|---|
| 1585 |         }
 | 
|---|
| 1586 |         //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
 | 
|---|
| 1587 |         // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
 | 
|---|
| 1588 |         EigensolverFor22Matrix(P,G,eval,evec);
 | 
|---|
| 1589 |         //gsl_eigen_symmv(G, eval, evec, w);  // calculates eigenvalues and eigenvectors of G    
 | 
|---|
| 1590 | 
 | 
|---|
| 1591 |         CalculateRotationAnglesFromEigenvalues(evec, eval, &c[round], &s[round]);
 | 
|---|
| 1592 |         //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]);
 | 
|---|
| 1593 | 
 | 
|---|
| 1594 |         //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
 | 
|---|
| 1595 |         // V_loc = V_loc * V_small
 | 
|---|
| 1596 |         //debug(P,"apply rotation to local U");
 | 
|---|
| 1597 |         for (l=0;l<DiagData->AllocNum;l++) {
 | 
|---|
| 1598 |           a_i = Uloc[2*round+iloc][l];
 | 
|---|
| 1599 |           a_j = Uloc[2*round+jloc][l];
 | 
|---|
| 1600 |           Uloc[2*round+iloc][l] =  c[round] * a_i + s[round] * a_j;
 | 
|---|
| 1601 |           Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j;
 | 
|---|
| 1602 |         }
 | 
|---|
| 1603 |       }  // end of round        
 | 
|---|
| 1604 |       // circulate the rotation angles
 | 
|---|
| 1605 |       //debug(P,"circulate the rotation angles");
 | 
|---|
| 1606 |       MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *(DiagData->comm));   // MPI_Allgather is waaaaay faster than ring circulation
 | 
|---|
| 1607 |       MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *(DiagData->comm));
 | 
|---|
| 1608 |       //m = start;
 | 
|---|
| 1609 |       for (l=0;l<DiagData->AllocNum/2;l++) { // for each process
 | 
|---|
| 1610 |         // we have V_small from process k
 | 
|---|
| 1611 |         //debug(P,"Apply V_small from other process");
 | 
|---|
| 1612 |         i = DiagData->top[l] < DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // minimum of the two indices
 | 
|---|
| 1613 |         j = DiagData->top[l] > DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // maximum of the two indices: thus j >  i
 | 
|---|
| 1614 |         iloc = DiagData->top[l] < DiagData->bot[l] ? 0 : 1; 
 | 
|---|
| 1615 |         jloc = DiagData->top[l] > DiagData->bot[l] ? 0 : 1;
 | 
|---|
| 1616 |         for (m=0;m<max_rounds;m++) {
 | 
|---|
| 1617 |           //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j);
 | 
|---|
| 1618 |           // apply row rotation to each A[k]
 | 
|---|
| 1619 |           for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
 | 
|---|
| 1620 |             //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
 | 
|---|
| 1621 |             //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
 | 
|---|
| 1622 |             a_i = Aloc[k][2*m+iloc][i];
 | 
|---|
| 1623 |             a_j = Aloc[k][2*m+iloc][j];
 | 
|---|
| 1624 |             Aloc[k][2*m+iloc][i] =  c_all[l] * a_i + s_all[l] * a_j;
 | 
|---|
| 1625 |             Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
 | 
|---|
| 1626 |             a_i = Aloc[k][2*m+jloc][i];
 | 
|---|
| 1627 |             a_j = Aloc[k][2*m+jloc][j];
 | 
|---|
| 1628 |             Aloc[k][2*m+jloc][i] =  c_all[l] * a_i + s_all[l] * a_j;
 | 
|---|
| 1629 |             Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
 | 
|---|
| 1630 |             //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
 | 
|---|
| 1631 |             //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
 | 
|---|
| 1632 |           }
 | 
|---|
| 1633 |         }
 | 
|---|
| 1634 |       }
 | 
|---|
| 1635 |       // apply rotation to local operator matrices
 | 
|---|
| 1636 |       // A_loc = A_loc * V_small 
 | 
|---|
| 1637 |       //debug(P,"apply rotation to local operator matrices A[k]");
 | 
|---|
| 1638 |       for (m=0;m<max_rounds;m++) {
 | 
|---|
| 1639 |         start = DiagData->ProcRank * max_rounds + m;
 | 
|---|
| 1640 |         iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1; 
 | 
|---|
| 1641 |         jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
 | 
|---|
| 1642 |         for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// extra for B matrix !
 | 
|---|
| 1643 |           for (l=0;l<DiagData->AllocNum;l++) {
 | 
|---|
| 1644 |             // Columns, i and j belong to this process only!
 | 
|---|
| 1645 |             a_i = Aloc[k][2*m+iloc][l];
 | 
|---|
| 1646 |             a_j = Aloc[k][2*m+jloc][l];
 | 
|---|
| 1647 |             Aloc[k][2*m+iloc][l] =  c[m] * a_i + s[m] * a_j;
 | 
|---|
| 1648 |             Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j;
 | 
|---|
| 1649 |             //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]);
 | 
|---|
| 1650 |           }
 | 
|---|
| 1651 |         }
 | 
|---|
| 1652 |       }
 | 
|---|
| 1653 |       // Shuffling of these round's columns to prepare next rotation set
 | 
|---|
| 1654 |       for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix ! 
 | 
|---|
| 1655 |         // extract columns from A
 | 
|---|
| 1656 |         //debug(P,"extract columns from A");
 | 
|---|
| 1657 |         MerryGoRoundColumns(*(DiagData->comm), Aloc[k], DiagData->AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1);
 | 
|---|
| 1658 |                   
 | 
|---|
| 1659 |       }       
 | 
|---|
| 1660 |       // and also for V ...
 | 
|---|
| 1661 |       //debug(P,"extract columns from U");
 | 
|---|
| 1662 |       MerryGoRoundColumns(*(DiagData->comm), Uloc, DiagData->AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1);
 | 
|---|
| 1663 | 
 | 
|---|
| 1664 | 
 | 
|---|
| 1665 |       // and merry-go-round for the indices too            
 | 
|---|
| 1666 |       //debug(P,"and merry-go-round for the indices too");
 | 
|---|
| 1667 |       MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
 | 
|---|
| 1668 |     }
 | 
|---|
| 1669 | 
 | 
|---|
| 1670 |     //fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
 | 
|---|
| 1671 |     // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
 | 
|---|
| 1672 |     old_spread = Spread;
 | 
|---|
| 1673 |     spread = 0.;
 | 
|---|
| 1674 |     for(k=0;k<DiagData->NumMatrices;k++) {   // go through all self-adjoint operators
 | 
|---|
| 1675 |       for (i=0; i < 2*max_rounds; i++) {  // go through all wave functions
 | 
|---|
| 1676 |           spread += Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]*Aloc[k][i][i+DiagData->ProcRank*2*max_rounds];
 | 
|---|
| 1677 |           //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i);
 | 
|---|
| 1678 |       }
 | 
|---|
| 1679 |     }
 | 
|---|
| 1680 |     MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *(DiagData->comm));
 | 
|---|
| 1681 |     //Spread = spread;
 | 
|---|
| 1682 |     if (P->Par.me == 0) { 
 | 
|---|
| 1683 |       //if(P->Call.out[ReadOut]) 
 | 
|---|
| 1684 |       //  fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier);
 | 
|---|
| 1685 |       //else 
 | 
|---|
| 1686 |         //fprintf(stderr,"%2.9e\n",Spread);
 | 
|---|
| 1687 |     }
 | 
|---|
| 1688 |     // STEP 5: check change of variance
 | 
|---|
| 1689 |   } while (fabs(old_spread-Spread) >= R->EpsWannier);
 | 
|---|
| 1690 |   // end of iterative diagonalization loop: We have found our final orthogonal U!
 | 
|---|
| 1691 | 
 | 
|---|
| 1692 |   // gather local parts of U into complete matrix 
 | 
|---|
| 1693 |   for (l=0;l<DiagData->ProcNum;l++)
 | 
|---|
| 1694 |     rcounts[l] = DiagData->AllocNum;
 | 
|---|
| 1695 |   debug(P,"allgather U");
 | 
|---|
| 1696 |   for (round=0;round<2*max_rounds;round++) {
 | 
|---|
| 1697 |     for (l=0;l<DiagData->ProcNum;l++)
 | 
|---|
| 1698 |       rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
 | 
|---|
| 1699 |     MPI_Allgatherv(Uloc[round], DiagData->AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
 | 
|---|
| 1700 |   }
 | 
|---|
| 1701 |   for (k=0;k<DiagData->AllocNum;k++) {    
 | 
|---|
| 1702 |     for(l=0;l<DiagData->AllocNum;l++) {
 | 
|---|
| 1703 |       gsl_matrix_set(DiagData->U,k,l, Utotal[l+k*DiagData->AllocNum]);
 | 
|---|
| 1704 |     }
 | 
|---|
| 1705 |   }
 | 
|---|
| 1706 | 
 | 
|---|
| 1707 |   // after one set, gather A[k] from all and calculate spread
 | 
|---|
| 1708 |   for (l=0;l<DiagData->ProcNum;l++)
 | 
|---|
| 1709 |     rcounts[l] = DiagData->AllocNum;
 | 
|---|
| 1710 |   debug(P,"gather A[k] for spread");
 | 
|---|
| 1711 |   for (u=0;u<DiagData->NumMatrices+DiagData->extra;u++) {// extra for B matrix !
 | 
|---|
| 1712 |     debug(P,"A[k] all gather");
 | 
|---|
| 1713 |     for (round=0;round<2*max_rounds;round++) {
 | 
|---|
| 1714 |       for (l=0;l<DiagData->ProcNum;l++)
 | 
|---|
| 1715 |         rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum;
 | 
|---|
| 1716 |       MPI_Allgatherv(Aloc[u][round], DiagData->AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
 | 
|---|
| 1717 |     }
 | 
|---|
| 1718 |     for (k=0;k<DiagData->AllocNum;k++) {    
 | 
|---|
| 1719 |       for(l=0;l<DiagData->AllocNum;l++) {
 | 
|---|
| 1720 |         gsl_matrix_set(DiagData->A[u],k,l, Atotal[u][l+k*DiagData->AllocNum]);
 | 
|---|
| 1721 |       }
 | 
|---|
| 1722 |     }
 | 
|---|
| 1723 |   }
 | 
|---|
| 1724 |    
 | 
|---|
| 1725 |   // free eigenvector stuff     
 | 
|---|
| 1726 |   gsl_vector_free(h);
 | 
|---|
| 1727 |   gsl_matrix_free(G);
 | 
|---|
| 1728 |   //gsl_eigen_symmv_free(w);  
 | 
|---|
| 1729 |   gsl_vector_free(eval);
 | 
|---|
| 1730 |   gsl_matrix_free(evec);
 | 
|---|
| 1731 |  
 | 
|---|
| 1732 |   // Free column vectors
 | 
|---|
| 1733 |   for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
 | 
|---|
| 1734 |     Free(Atotal[k], "ParallelDiagonalization: Atotal[k]");
 | 
|---|
| 1735 |     Free(Around[k], "ParallelDiagonalization: Around[k]");
 | 
|---|
| 1736 |   }
 | 
|---|
| 1737 |   Free(Uround, "ParallelDiagonalization: Uround");
 | 
|---|
| 1738 |   Free(Utotal, "ParallelDiagonalization: Utotal");
 | 
|---|
| 1739 |   Free(c_all, "ParallelDiagonalization: c_all");
 | 
|---|
| 1740 |   Free(s_all, "ParallelDiagonalization: s_all");
 | 
|---|
| 1741 |   Free(c, "ParallelDiagonalization: c");
 | 
|---|
| 1742 |   Free(s, "ParallelDiagonalization: s");
 | 
|---|
| 1743 |   Free(rcounts, "ParallelDiagonalization: rcounts");
 | 
|---|
| 1744 |   Free(rdispls, "ParallelDiagonalization: rdispls");
 | 
|---|
| 1745 | }
 | 
|---|
| 1746 | /*
 | 
|---|
| 1747 |  * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
 | 
|---|
| 1748 |  * \param *U transformation matrix set to unity matrix
 | 
|---|
| 1749 |  * \param NumMatrices number of matrices to be diagonalized
 | 
|---|
| 1750 |  * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
 | 
|---|
| 1751 |  * \param AllocNum number of rows/columns in matrices
 | 
|---|
| 1752 |  * \param Num number of wave functions
 | 
|---|
| 1753 |  * \param *top array with top row indices (merry-go-round)
 | 
|---|
| 1754 |  * \param *bot array with bottom row indices (merry-go-round) 
 | 
|---|
| 1755 |  */
 | 
|---|
| 1756 | 
 | 
|---|
| 1757 | /** Simultaneous Diagonalization of variances matrices with one cpu.
 | 
|---|
| 1758 |  * \param *P Problem at hand
 | 
|---|
| 1759 |  * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
 | 
|---|
| 1760 |  * \note this is faster given one cpu only than ParallelDiagonalization()
 | 
|---|
| 1761 |  */
 | 
|---|
| 1762 | void SerialDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData)
 | 
|---|
| 1763 | {
 | 
|---|
| 1764 |   struct RunStruct *R = &P->R;
 | 
|---|
| 1765 |   gsl_matrix *G;
 | 
|---|
| 1766 |   gsl_vector *h;
 | 
|---|
| 1767 |   gsl_vector *eval;
 | 
|---|
| 1768 |   gsl_matrix *evec;
 | 
|---|
| 1769 |   //gsl_eigen_symmv_workspace *w;
 | 
|---|
| 1770 |   double *c,*s,a_i,a_j;
 | 
|---|
| 1771 |   int it_steps, set, ProcRank;
 | 
|---|
| 1772 |   int i,j,k,l,m;
 | 
|---|
| 1773 |   double spread = 0., old_spread = 0.;\
 | 
|---|
| 1774 | 
 | 
|---|
| 1775 |   // allocate eigenvector stuff
 | 
|---|
| 1776 |   debug(P,"allocate eigenvector stuff");
 | 
|---|
| 1777 |   G = gsl_matrix_calloc (2,2);
 | 
|---|
| 1778 |   h = gsl_vector_alloc (2);
 | 
|---|
| 1779 |   eval = gsl_vector_alloc (2);
 | 
|---|
| 1780 |   evec = gsl_matrix_alloc (2,2);
 | 
|---|
| 1781 |   //w = gsl_eigen_symmv_alloc(2);
 | 
|---|
| 1782 | 
 | 
|---|
| 1783 |   c = (double *) Malloc(sizeof(double), "ComputeMLWF: c");
 | 
|---|
| 1784 |   s = (double *) Malloc(sizeof(double), "ComputeMLWF: s");
 | 
|---|
| 1785 |   debug(P,"now comes the iteration loop");
 | 
|---|
| 1786 |   it_steps=0;
 | 
|---|
| 1787 |   do {
 | 
|---|
| 1788 |     it_steps++;
 | 
|---|
| 1789 |     //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me);
 | 
|---|
| 1790 |     //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps);
 | 
|---|
| 1791 |     for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
 | 
|---|
| 1792 |       //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
 | 
|---|
| 1793 |       // STEP 3: for all index pairs 0<= i<j <AllocNum
 | 
|---|
| 1794 |       for (ProcRank=0;ProcRank<DiagData->AllocNum/2;ProcRank++) {
 | 
|---|
| 1795 |         // get indices
 | 
|---|
| 1796 |         i = DiagData->top[ProcRank] < DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // minimum of the two indices
 | 
|---|
| 1797 |         j = DiagData->top[ProcRank] > DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // maximum of the two indices: thus j >  i 
 | 
|---|
| 1798 |         //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j);
 | 
|---|
| 1799 |         // STEP 3a: Calculate G
 | 
|---|
| 1800 |         gsl_matrix_set_zero(G);
 | 
|---|
| 1801 |         
 | 
|---|
| 1802 |         for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
 | 
|---|
| 1803 |           // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
 | 
|---|
| 1804 |           //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,i), gsl_matrix_get(DiagData->A[k],j,j),gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));          
 | 
|---|
| 1805 |           //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,j), gsl_matrix_get(DiagData->A[k],j,i),gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));          
 | 
|---|
| 1806 |           gsl_vector_set(h, 0, gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
 | 
|---|
| 1807 |           gsl_vector_set(h, 1, gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
 | 
|---|
| 1808 |           //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1));
 | 
|---|
| 1809 |           
 | 
|---|
| 1810 |           // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
 | 
|---|
| 1811 |           for (l=0;l<2;l++)
 | 
|---|
| 1812 |             for (m=0;m<2;m++)
 | 
|---|
| 1813 |                 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
 | 
|---|
| 1814 |                 
 | 
|---|
| 1815 |           //PrintGSLMatrix(P,G,2, "G");
 | 
|---|
| 1816 |         }
 | 
|---|
| 1817 |       
 | 
|---|
| 1818 |         //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
 | 
|---|
| 1819 |         // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
 | 
|---|
| 1820 |         EigensolverFor22Matrix(P,G,eval,evec);
 | 
|---|
| 1821 |         //gsl_eigen_symmv(G, eval, evec, w);  // calculates eigenvalues and eigenvectors of G
 | 
|---|
| 1822 | 
 | 
|---|
| 1823 |         //PrintGSLMatrix(P, evec, 2, "Eigenvectors");
 | 
|---|
| 1824 |         CalculateRotationAnglesFromEigenvalues(evec, eval, c, s);
 | 
|---|
| 1825 |         //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]);
 | 
|---|
| 1826 |      
 | 
|---|
| 1827 |         //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j);
 | 
|---|
| 1828 |         // STEP 3d: apply rotation R to rows and columns of A^{(k)}
 | 
|---|
| 1829 |         for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
 | 
|---|
| 1830 |           //PrintGSLMatrix(P,A[k],AllocNum, "A (before rot)");
 | 
|---|
| 1831 |           for (l=0;l<DiagData->AllocNum;l++) {
 | 
|---|
| 1832 |             // Rows
 | 
|---|
| 1833 |             a_i = gsl_matrix_get(DiagData->A[k],i,l);
 | 
|---|
| 1834 |             a_j = gsl_matrix_get(DiagData->A[k],j,l);
 | 
|---|
| 1835 |             gsl_matrix_set(DiagData->A[k], i, l,  c[0] * a_i + s[0] * a_j);
 | 
|---|
| 1836 |             gsl_matrix_set(DiagData->A[k], j, l, -s[0] * a_i + c[0] * a_j);
 | 
|---|
| 1837 |           }
 | 
|---|
| 1838 |           for (l=0;l<DiagData->AllocNum;l++) {
 | 
|---|
| 1839 |             // Columns
 | 
|---|
| 1840 |             a_i = gsl_matrix_get(DiagData->A[k],l,i);
 | 
|---|
| 1841 |             a_j = gsl_matrix_get(DiagData->A[k],l,j);
 | 
|---|
| 1842 |             gsl_matrix_set(DiagData->A[k], l, i,  c[0] * a_i + s[0] * a_j);
 | 
|---|
| 1843 |             gsl_matrix_set(DiagData->A[k], l, j, -s[0] * a_i + c[0] * a_j);
 | 
|---|
| 1844 |           }
 | 
|---|
| 1845 |           //PrintGSLMatrix(P,A[k],AllocNum, "A (after rot)");
 | 
|---|
| 1846 |         }
 | 
|---|
| 1847 |         //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
 | 
|---|
| 1848 |         //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (before rot)");
 | 
|---|
| 1849 |         // STEP 3e: apply U = R*U
 | 
|---|
| 1850 |         for (l=0;l<DiagData->AllocNum;l++) {
 | 
|---|
| 1851 |             a_i = gsl_matrix_get(DiagData->U,i,l);
 | 
|---|
| 1852 |             a_j = gsl_matrix_get(DiagData->U,j,l);
 | 
|---|
| 1853 |             gsl_matrix_set(DiagData->U, i, l,  c[0] * a_i + s[0] * a_j);
 | 
|---|
| 1854 |             gsl_matrix_set(DiagData->U, j, l, -s[0] * a_i + c[0] * a_j);
 | 
|---|
| 1855 |         }
 | 
|---|
| 1856 |         //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (after rot)");
 | 
|---|
| 1857 |       }
 | 
|---|
| 1858 |       // and merry-go-round for the indices too            
 | 
|---|
| 1859 |       //debug(P,"and merry-go-round for the indices too");
 | 
|---|
| 1860 |       if (DiagData->AllocNum > 2) MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
 | 
|---|
| 1861 |     }
 | 
|---|
| 1862 |    
 | 
|---|
| 1863 |     //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
 | 
|---|
| 1864 |     // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
 | 
|---|
| 1865 |     old_spread = spread;
 | 
|---|
| 1866 |     spread = 0.;
 | 
|---|
| 1867 |     for(k=0;k<DiagData->NumMatrices;k++) {   // go through all self-adjoint operators
 | 
|---|
| 1868 |       for (i=0; i < DiagData->AllocNum; i++) {  // go through all wave functions
 | 
|---|
| 1869 |           spread += pow(gsl_matrix_get(DiagData->A[k],i,i),2);
 | 
|---|
| 1870 |       }
 | 
|---|
| 1871 |     }
 | 
|---|
| 1872 |     if(P->Par.me == 0) {
 | 
|---|
| 1873 |       //if(P->Call.out[ReadOut]) 
 | 
|---|
| 1874 |       //  fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier);
 | 
|---|
| 1875 |       //else 
 | 
|---|
| 1876 |         //fprintf(stderr,"%2.9e\n",spread);
 | 
|---|
| 1877 |     }
 | 
|---|
| 1878 |     // STEP 5: check change of variance
 | 
|---|
| 1879 |   } while (fabs(old_spread-spread) >= R->EpsWannier);
 | 
|---|
| 1880 |   // end of iterative diagonalization loop: We have found our final orthogonal U!
 | 
|---|
| 1881 |   Free(c, "SerialDiagonalization: c");
 | 
|---|
| 1882 |   Free(s, "SerialDiagonalization: s");
 | 
|---|
| 1883 |   // free eigenvector stuff     
 | 
|---|
| 1884 |   gsl_vector_free(h);
 | 
|---|
| 1885 |   gsl_matrix_free(G);
 | 
|---|
| 1886 |   //gsl_eigen_symmv_free(w);  
 | 
|---|
| 1887 |   gsl_vector_free(eval);
 | 
|---|
| 1888 |   gsl_matrix_free(evec);
 | 
|---|
| 1889 | }
 | 
|---|
| 1890 | 
 | 
|---|
| 1891 | /** Writes the wannier centres and spread to file.
 | 
|---|
| 1892 |  * Also puts centres from array into OnePsiElementAddData structure.
 | 
|---|
| 1893 |  * \param *P Problem at hand
 | 
|---|
| 1894 |  * \param old_spread first term of wannier spread
 | 
|---|
| 1895 |  * \param spread second term of wannier spread
 | 
|---|
| 1896 |  * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
 | 
|---|
| 1897 |  * \param *WannierSpread array with wannier spread per wave function
 | 
|---|
| 1898 |  */
 | 
|---|
| 1899 | void WriteWannierFile(struct Problem *P, double spread, double old_spread, double **WannierCentre, double *WannierSpread)
 | 
|---|
| 1900 | {
 | 
|---|
| 1901 |   struct FileData *F = &P->Files;
 | 
|---|
| 1902 |   struct RunStruct *R = &P->R;
 | 
|---|
| 1903 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 1904 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 1905 |   struct OnePsiElement *OnePsiA;
 | 
|---|
| 1906 |   char spin[12], suffix[18];
 | 
|---|
| 1907 |   int i,j,l;
 | 
|---|
| 1908 | 
 | 
|---|
| 1909 |   if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me);
 | 
|---|
| 1910 | 
 | 
|---|
| 1911 |   switch (Lat->Psi.PsiST) {
 | 
|---|
| 1912 |   case SpinDouble:
 | 
|---|
| 1913 |     strcpy(suffix,".spread.csv");    
 | 
|---|
| 1914 |     strcpy(spin,"SpinDouble");
 | 
|---|
| 1915 |     break;
 | 
|---|
| 1916 |   case SpinUp:
 | 
|---|
| 1917 |     strcpy(suffix,".spread_up.csv");    
 | 
|---|
| 1918 |     strcpy(spin,"SpinUp");
 | 
|---|
| 1919 |     break;
 | 
|---|
| 1920 |   case SpinDown:
 | 
|---|
| 1921 |     strcpy(suffix,".spread_down.csv");    
 | 
|---|
| 1922 |     strcpy(spin,"SpinDown");
 | 
|---|
| 1923 |     break;
 | 
|---|
| 1924 |   }
 | 
|---|
| 1925 |   if (P->Par.me_comm_ST == 0) {
 | 
|---|
| 1926 |     if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level 
 | 
|---|
| 1927 |       OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
 | 
|---|
| 1928 |     else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile)
 | 
|---|
| 1929 |       OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
 | 
|---|
| 1930 |     if (F->SpreadFile == NULL) {
 | 
|---|
| 1931 |       Error(SomeError,"WriteWannierFile: Error opening Wannier File!\n");
 | 
|---|
| 1932 |     } else {
 | 
|---|
| 1933 |       fprintf(F->SpreadFile,"#===== W A N N I E R  C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin);
 | 
|---|
| 1934 |       fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n");
 | 
|---|
| 1935 |     }
 | 
|---|
| 1936 |   }
 | 
|---|
| 1937 |   
 | 
|---|
| 1938 |   // put (new) WannierCentres into local ones and into file
 | 
|---|
| 1939 |   i=-1;
 | 
|---|
| 1940 |   for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions
 | 
|---|
| 1941 |     OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
| 1942 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 1943 |       i++;  // increase l if it is occupied wave function
 | 
|---|
| 1944 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
 | 
|---|
| 1945 |         for (j=0;j<NDIM;j++)
 | 
|---|
| 1946 |           Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
 | 
|---|
| 1947 |         Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierSpread[i];
 | 
|---|
| 1948 |       }
 | 
|---|
| 1949 |       if (P->Par.me_comm_ST == 0) 
 | 
|---|
| 1950 |         fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]); 
 | 
|---|
| 1951 |     }
 | 
|---|
| 1952 |   }    
 | 
|---|
| 1953 |   if (P->Par.me_comm_ST == 0) {
 | 
|---|
| 1954 |     fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n");
 | 
|---|
| 1955 |     fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread);
 | 
|---|
| 1956 |     fflush(F->SpreadFile);
 | 
|---|
| 1957 |   }
 | 
|---|
| 1958 | }
 | 
|---|
| 1959 | 
 | 
|---|
| 1960 | 
 | 
|---|
| 1961 | /** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre.
 | 
|---|
| 1962 |  * \param *P Problem at hand
 | 
|---|
| 1963 |  * \return 1 - success, 0 - failure
 | 
|---|
| 1964 |  */
 | 
|---|
| 1965 | int ParseWannierFile(struct Problem *P)
 | 
|---|
| 1966 | {
 | 
|---|
| 1967 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 1968 |   struct RunStruct *R = &P->R;
 | 
|---|
| 1969 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 1970 |   struct OnePsiElement *OnePsiA;
 | 
|---|
| 1971 |   int i,l,j, msglen;
 | 
|---|
| 1972 |   FILE *SpreadFile;
 | 
|---|
| 1973 |   char *tagname = NULL;
 | 
|---|
| 1974 |   char suffix[18];
 | 
|---|
| 1975 |   double WannierCentre[NDIM+1]; // combined centre and spread
 | 
|---|
| 1976 |   MPI_Status status;
 | 
|---|
| 1977 |   int signal = 0; // 1 - ok, 0 - error
 | 
|---|
| 1978 | 
 | 
|---|
| 1979 |   switch (Lat->Psi.PsiST) {
 | 
|---|
| 1980 |   case SpinDouble:
 | 
|---|
| 1981 |     strcpy(suffix,".spread.csv");    
 | 
|---|
| 1982 |     break;
 | 
|---|
| 1983 |   case SpinUp:
 | 
|---|
| 1984 |     strcpy(suffix,".spread_up.csv");    
 | 
|---|
| 1985 |     break;
 | 
|---|
| 1986 |   case SpinDown:
 | 
|---|
| 1987 |     strcpy(suffix,".spread_down.csv");    
 | 
|---|
| 1988 |     break;
 | 
|---|
| 1989 |   }
 | 
|---|
| 1990 |   if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Parsing Wannier Centres from file ... \n", P->Par.me);
 | 
|---|
| 1991 |   
 | 
|---|
| 1992 |   if (P->Par.me_comm_ST == 0) {
 | 
|---|
| 1993 |     tagname = (char *) Malloc(sizeof(char)*MAXSTRINGSIZE, "ParseWannierFile: *tagname");
 | 
|---|
| 1994 |     if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists
 | 
|---|
| 1995 |       if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
| 1996 |         Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
| 1997 |       return 0;
 | 
|---|
| 1998 |       //Error(SomeError,"ParseWannierFile: Opening failed\n");
 | 
|---|
| 1999 |     }
 | 
|---|
| 2000 |     signal = 1;
 | 
|---|
| 2001 |     if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
| 2002 |       Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
| 2003 |   } else {
 | 
|---|
| 2004 |     if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
| 2005 |       Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
| 2006 |     if (signal == 0)
 | 
|---|
| 2007 |       return 0;
 | 
|---|
| 2008 |   }
 | 
|---|
| 2009 |   i=-1;
 | 
|---|
| 2010 |   for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions
 | 
|---|
| 2011 |     OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
| 2012 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
| 2013 |       i++;  // increase l if it is occupied wave function
 | 
|---|
| 2014 |       if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file
 | 
|---|
| 2015 |         sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo);
 | 
|---|
| 2016 |         signal = 0;
 | 
|---|
| 2017 |         if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) {
 | 
|---|
| 2018 |           //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed");
 | 
|---|
| 2019 |           if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
| 2020 |             Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
| 2021 |           return 0;
 | 
|---|
| 2022 |         }
 | 
|---|
| 2023 |         if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) {
 | 
|---|
| 2024 |           //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed");
 | 
|---|
| 2025 |           if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
| 2026 |             Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
| 2027 |           return 0;
 | 
|---|
| 2028 |         }
 | 
|---|
| 2029 |         signal = 1;
 | 
|---|
| 2030 |         if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
| 2031 |           Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
| 2032 |       } else {
 | 
|---|
| 2033 |         if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
| 2034 |           Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
| 2035 |         if (signal == 0)
 | 
|---|
| 2036 |           return 0;
 | 
|---|
| 2037 |       }
 | 
|---|
| 2038 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local?
 | 
|---|
| 2039 |         if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em
 | 
|---|
| 2040 |           if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
 | 
|---|
| 2041 |             Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed");
 | 
|---|
| 2042 |             //return 0;
 | 
|---|
| 2043 |           MPI_Get_count(&status, MPI_DOUBLE, &msglen);
 | 
|---|
| 2044 |           if (msglen != NDIM+1) 
 | 
|---|
| 2045 |             Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count");
 | 
|---|
| 2046 |             //return 0;
 | 
|---|
| 2047 |         }
 | 
|---|
| 2048 |         if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader
 | 
|---|
| 2049 |           Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed");
 | 
|---|
| 2050 |           //return 0;
 | 
|---|
| 2051 |         // and store 'em (for all who have this Psi local)
 | 
|---|
| 2052 |         if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]);
 | 
|---|
| 2053 |         for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j];
 | 
|---|
| 2054 |         Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM];
 | 
|---|
| 2055 |         //if (P->Par.me == 0 && P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
 | 
|---|
| 2056 |       } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group
 | 
|---|
| 2057 |         if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS)
 | 
|---|
| 2058 |           Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed");
 | 
|---|
| 2059 |           //return 0;
 | 
|---|
| 2060 |       }
 | 
|---|
| 2061 |     }
 | 
|---|
| 2062 |   }
 | 
|---|
| 2063 |   if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) { 
 | 
|---|
| 2064 |     fclose(SpreadFile);
 | 
|---|
| 2065 |     Free(tagname, "ParseWannierFile: *tagname");
 | 
|---|
| 2066 |   }
 | 
|---|
| 2067 |   //fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me);
 | 
|---|
| 2068 |   return 1;
 | 
|---|
| 2069 | }
 | 
|---|
| 2070 | 
 | 
|---|
| 2071 | /** Calculates the spread of orbital \a i.
 | 
|---|
| 2072 |  * Stored in OnePsiElementAddData#WannierSpread.
 | 
|---|
| 2073 |  * \param *P Problem at hand
 | 
|---|
| 2074 |  * \param i i-th wave function (note "extra" ones are not counted!)
 | 
|---|
| 2075 |  * \return spread \f$\sigma^2_{A^{(k)}}\f$
 | 
|---|
| 2076 |  */
 | 
|---|
| 2077 | double CalculateSpread(struct Problem *P, int i) {
 | 
|---|
| 2078 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
| 2079 |   struct RunStruct *R = &P->R;
 | 
|---|
| 2080 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
| 2081 |   struct LatticeLevel *Lev0 = R->Lev0;
 | 
|---|
| 2082 |   struct LatticeLevel *LevS = R->LevS;
 | 
|---|
| 2083 |   struct Density *Dens0 = Lev0->Dens;
 | 
|---|
| 2084 |   struct fft_plan_3d *plan = Lat->plan; 
 | 
|---|
| 2085 |   fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
 | 
|---|
| 2086 |   fftw_real *PsiCR = (fftw_real *)PsiC;
 | 
|---|
| 2087 |   fftw_complex *work = Dens0->DensityCArray[Temp2Density];
 | 
|---|
| 2088 |   fftw_real **HGcR = &Dens0->DensityArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as a storage array
 | 
|---|
| 2089 |   fftw_complex **HGcRC = (fftw_complex**)HGcR;
 | 
|---|
| 2090 |   fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as an array
 | 
|---|
| 2091 |   fftw_real **HGcR2 = (fftw_real**)HGcR2C;
 | 
|---|
| 2092 |   MPI_Status status;
 | 
|---|
| 2093 |   struct OnePsiElement *OnePsiA, *LOnePsiA;
 | 
|---|
| 2094 |   int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
 | 
|---|
| 2095 |   fftw_complex *LPsiDatA=NULL;
 | 
|---|
| 2096 |   int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index;
 | 
|---|
| 2097 |   N0 = LevS->Plan0.plan->local_nx;
 | 
|---|
| 2098 |   N = LevS->Plan0.plan->N;
 | 
|---|
| 2099 |   const int NUpx = LevS->NUp[0];
 | 
|---|
| 2100 |   const int NUpy = LevS->NUp[1];
 | 
|---|
| 2101 |   const int NUpz = LevS->NUp[2];
 | 
|---|
| 2102 |   double a_ij, b_ij, A_ij, B_ij;
 | 
|---|
| 2103 |   double tmp, tmp2, spread = 0;
 | 
|---|
| 2104 |   double **cos_lookup, **sin_lookup;
 | 
|---|
| 2105 |   
 | 
|---|
| 2106 |   b_ij = 0;
 | 
|---|
| 2107 | 
 | 
|---|
| 2108 |   CreateSinCosLookupTable(&cos_lookup,&sin_lookup,N);
 | 
|---|
| 2109 | 
 | 
|---|
| 2110 |   // fill matrices
 | 
|---|
| 2111 |   OnePsiA = &Psi->AllPsiStatus[i];    // grab the desired OnePsiA
 | 
|---|
| 2112 |   if (OnePsiA->PsiType != Extra) {   // drop if extra one
 | 
|---|
| 2113 |     if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
 | 
|---|
| 2114 |        LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo];
 | 
|---|
| 2115 |     else 
 | 
|---|
| 2116 |        LOnePsiA = NULL;
 | 
|---|
| 2117 |     if (LOnePsiA == NULL) {   // if it's not local ... receive it from respective process into TempPsi
 | 
|---|
| 2118 |       RecvSource = OnePsiA->my_color_comm_ST_Psi;
 | 
|---|
| 2119 |       MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status );
 | 
|---|
| 2120 |       LPsiDatA=LevS->LPsi->TempPsi;
 | 
|---|
| 2121 |     } else {                  // .. otherwise send it to all other processes (Max_me... - 1)
 | 
|---|
| 2122 |       for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
 | 
|---|
| 2123 |         if (p != OnePsiA->my_color_comm_ST_Psi) 
 | 
|---|
| 2124 |           MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT);
 | 
|---|
| 2125 |       LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
 | 
|---|
| 2126 |     } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received
 | 
|---|
| 2127 |       
 | 
|---|
| 2128 |     CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
 | 
|---|
| 2129 |     // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
 | 
|---|
| 2130 |     for (n0=0;n0<N0;n0++)  
 | 
|---|
| 2131 |       for (n[1]=0;n[1]<N[1];n[1]++) 
 | 
|---|
| 2132 |         for (n[2]=0;n[2]<N[2];n[2]++) {
 | 
|---|
| 2133 |           i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
 | 
|---|
| 2134 |           iS = n[2]+N[2]*(n[1]+N[1]*n0);
 | 
|---|
| 2135 |           n[0] = n0 + LevS->Plan0.plan->start_nx;
 | 
|---|
| 2136 |           for (k=0;k<max_operators;k+=2) {
 | 
|---|
| 2137 |             tmp = 2*PI/(double)(N[k/2])*(double)(n[k/2]);
 | 
|---|
| 2138 |             tmp2 = PsiCR[i0] /LevS->MaxN;
 | 
|---|
| 2139 |             // check lookup
 | 
|---|
| 2140 |             if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) {
 | 
|---|
| 2141 |               fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]);   
 | 
|---|
| 2142 |               Error(SomeError, "Lookup table does not match real value!");
 | 
|---|
| 2143 |             }
 | 
|---|
| 2144 | //            HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
 | 
|---|
| 2145 | //            HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */
 | 
|---|
| 2146 | //            HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
 | 
|---|
| 2147 | //            HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
 | 
|---|
| 2148 |             HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */
 | 
|---|
| 2149 |             HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */
 | 
|---|
| 2150 |             HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */
 | 
|---|
| 2151 |             HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */
 | 
|---|
| 2152 |           }
 | 
|---|
| 2153 |         }
 | 
|---|
| 2154 |     for (k=0;k<max_operators;k++) {
 | 
|---|
| 2155 |       fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work);
 | 
|---|
| 2156 |       fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work);
 | 
|---|
| 2157 |     }
 | 
|---|
| 2158 | 
 | 
|---|
| 2159 | 
 | 
|---|
| 2160 |     for (k=0;k<max_operators;k++) {
 | 
|---|
| 2161 |       a_ij = 0;
 | 
|---|
| 2162 |       //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k);
 | 
|---|
| 2163 |       // sum directly in a_ij and b_ij the two desired terms
 | 
|---|
| 2164 |       g=0;
 | 
|---|
| 2165 |       if (LevS->GArray[0].GSq == 0.0) {
 | 
|---|
| 2166 |         Index = LevS->GArray[g].Index;
 | 
|---|
| 2167 |         a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im);
 | 
|---|
| 2168 |         b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im);
 | 
|---|
| 2169 |         g++;
 | 
|---|
| 2170 |       }
 | 
|---|
| 2171 |       for (; g < LevS->MaxG; g++) {
 | 
|---|
| 2172 |         Index = LevS->GArray[g].Index;
 | 
|---|
| 2173 |         a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im);
 | 
|---|
| 2174 |         b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im);
 | 
|---|
| 2175 |       } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...)
 | 
|---|
| 2176 |       MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
 | 
|---|
| 2177 |       spread += pow(A_ij,2);
 | 
|---|
| 2178 |     }
 | 
|---|
| 2179 |   }
 | 
|---|
| 2180 |   MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
 | 
|---|
| 2181 | 
 | 
|---|
| 2182 |   // store spread in OnePsiElementAdd
 | 
|---|
| 2183 |   Psi->AddData[i].WannierSpread = B_ij - spread;
 | 
|---|
| 2184 | 
 | 
|---|
| 2185 |   FreeSinCosLookupTable(cos_lookup,sin_lookup);
 | 
|---|
| 2186 |   
 | 
|---|
| 2187 |   return (B_ij - spread);
 | 
|---|
| 2188 | }
 | 
|---|