| 1 | /** \file wannier.c | 
|---|
| 2 | * Maximally Localized Wannier Functions. | 
|---|
| 3 | * | 
|---|
| 4 | * Contains the on function that minimises the spread of all orbitals in one rush in a parallel | 
|---|
| 5 | * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to | 
|---|
| 6 | * calculate the spread of a specific orbital, which may be useful in checking on the change of | 
|---|
| 7 | * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex. | 
|---|
| 8 | * | 
|---|
| 9 | Project: ParallelCarParrinello | 
|---|
| 10 | \author Frederik Heber | 
|---|
| 11 | \date 2006 | 
|---|
| 12 |  | 
|---|
| 13 | File: wannier.c | 
|---|
| 14 | $Id: wannier.c,v 1.7 2007-10-12 15:50:38 heber Exp $ | 
|---|
| 15 | */ | 
|---|
| 16 |  | 
|---|
| 17 | #include <math.h> | 
|---|
| 18 | #include <gsl/gsl_math.h> | 
|---|
| 19 | #include <gsl/gsl_eigen.h> | 
|---|
| 20 | #include <gsl/gsl_matrix.h> | 
|---|
| 21 | #include <gsl/gsl_vector.h> | 
|---|
| 22 | #include <gsl/gsl_complex.h> | 
|---|
| 23 | #include <gsl/gsl_complex_math.h> | 
|---|
| 24 | #include <gsl/gsl_sort_vector.h> | 
|---|
| 25 | #include <gsl/gsl_heapsort.h> | 
|---|
| 26 | #include <gsl/gsl_blas.h> | 
|---|
| 27 | #include <string.h> | 
|---|
| 28 |  | 
|---|
| 29 | #include "data.h" | 
|---|
| 30 | #include "density.h" | 
|---|
| 31 | #include "errors.h" | 
|---|
| 32 | #include "gramsch.h" | 
|---|
| 33 | #include "helpers.h" | 
|---|
| 34 | #include "init.h" | 
|---|
| 35 | #include "myfft.h" | 
|---|
| 36 | #include "mymath.h" | 
|---|
| 37 | #include "output.h" | 
|---|
| 38 | #include "perturbed.h" | 
|---|
| 39 | #include "wannier.h" | 
|---|
| 40 |  | 
|---|
| 41 |  | 
|---|
| 42 | #define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread | 
|---|
| 43 | #define type Occupied | 
|---|
| 44 |  | 
|---|
| 45 |  | 
|---|
| 46 | /** Converts type fftw_complex to gsl_complex. | 
|---|
| 47 | * \param a complex number | 
|---|
| 48 | * \return b complex number | 
|---|
| 49 | */ | 
|---|
| 50 | gsl_complex convertComplex (fftw_complex a) { | 
|---|
| 51 | return gsl_complex_rect(c_re(a),c_im(a)); | 
|---|
| 52 | } | 
|---|
| 53 |  | 
|---|
| 54 | /** "merry go round" implementation for parallel index ordering. | 
|---|
| 55 | * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is | 
|---|
| 56 | * performed which generates once each possible pairing. | 
|---|
| 57 | * \param *top index array 1 | 
|---|
| 58 | * \param *bot index array 2 | 
|---|
| 59 | * \param m N/2, where N is the matrix row/column dimension | 
|---|
| 60 | * \note taken from [Golub, Matrix computations, 1989, p451] | 
|---|
| 61 | */ | 
|---|
| 62 | void MerryGoRoundIndices(int *top, int *bot, int m) | 
|---|
| 63 | { | 
|---|
| 64 | int *old_top, *old_bot; | 
|---|
| 65 | int k; | 
|---|
| 66 | old_top = (int *) Malloc(sizeof(int)*m, "music: old_top"); | 
|---|
| 67 | old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot"); | 
|---|
| 68 | /*  fprintf(stderr,"oldtop\t"); | 
|---|
| 69 | for (k=0;k<m;k++) | 
|---|
| 70 | fprintf(stderr,"%i\t", top[k]); | 
|---|
| 71 | fprintf(stderr,"\n"); | 
|---|
| 72 | fprintf(stderr,"oldbot\t"); | 
|---|
| 73 | for (k=0;k<m;k++) | 
|---|
| 74 | fprintf(stderr,"%i\t", bot[k]); | 
|---|
| 75 | fprintf(stderr,"\n");*/ | 
|---|
| 76 | // first copy arrays | 
|---|
| 77 | for (k=0;k<m;k++) { | 
|---|
| 78 | old_top[k] = top[k]; | 
|---|
| 79 | old_bot[k] = bot[k]; | 
|---|
| 80 | } | 
|---|
| 81 | // then let the music play | 
|---|
| 82 | for (k=0;k<m;k++) { | 
|---|
| 83 | if (k==1) | 
|---|
| 84 | top[k] = old_bot[0]; | 
|---|
| 85 | else if (k > 1) | 
|---|
| 86 | top[k] = old_top[k-1]; | 
|---|
| 87 | if (k==m-1) | 
|---|
| 88 | bot[k] = old_top[k]; | 
|---|
| 89 | else | 
|---|
| 90 | bot[k] = old_bot[k+1]; | 
|---|
| 91 | } | 
|---|
| 92 | /*  fprintf(stderr,"top\t"); | 
|---|
| 93 | for (k=0;k<m;k++) | 
|---|
| 94 | fprintf(stderr,"%i\t", top[k]); | 
|---|
| 95 | fprintf(stderr,"\n"); | 
|---|
| 96 | fprintf(stderr,"bot\t"); | 
|---|
| 97 | for (k=0;k<m;k++) | 
|---|
| 98 | fprintf(stderr,"%i\t", bot[k]); | 
|---|
| 99 | fprintf(stderr,"\n");*/ | 
|---|
| 100 | // and finito | 
|---|
| 101 | Free(old_top, "MerryGoRoundIndices: old_top"); | 
|---|
| 102 | Free(old_bot, "MerryGoRoundIndices: old_bot"); | 
|---|
| 103 | } | 
|---|
| 104 |  | 
|---|
| 105 | /** merry-go-round for matrix columns. | 
|---|
| 106 | * The trick here is that we must be aware of multiple rotations per process, thus only some of the | 
|---|
| 107 | * whole lot of local columns get sent/received, most of them are just shifted via exchanging the various | 
|---|
| 108 | * pointers to the matrix columns within the local array. | 
|---|
| 109 | * \param comm communicator for circulation | 
|---|
| 110 | * \param *Aloc local array of columns | 
|---|
| 111 | * \param Num entries per column | 
|---|
| 112 | * \param max_rounds number of column pairs in \a *Around | 
|---|
| 113 | * \param k offset for tag | 
|---|
| 114 | * \param tagS0 MPI tag for sending left column | 
|---|
| 115 | * \param tagS1 MPI tag for sending right column | 
|---|
| 116 | * \param tagR0 MPI tag for receiving left column | 
|---|
| 117 | * \param tagR1 MPI tag for receiving right column | 
|---|
| 118 | */ | 
|---|
| 119 | void MerryGoRoundColumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) { | 
|---|
| 120 | //double *A_locS1, *A_locS2;  // local columns of A[k] | 
|---|
| 121 | //double *A_locR1, *A_locR2;  // local columns of A[k] | 
|---|
| 122 | MPI_Request requestS0, requestS1, requestR0, requestR1; | 
|---|
| 123 | MPI_Status status; | 
|---|
| 124 | int ProcRank, ProcNum; | 
|---|
| 125 | int l; | 
|---|
| 126 | MPI_Comm_size (comm, &ProcNum); | 
|---|
| 127 | MPI_Comm_rank (comm, &ProcRank); | 
|---|
| 128 | double *Abuffer1, *Abuffer2;  // mark the columns that are circulated | 
|---|
| 129 |  | 
|---|
| 130 | //fprintf(stderr,"shifting..."); | 
|---|
| 131 | if (ProcRank == 0) { | 
|---|
| 132 | if (max_rounds > 1) { | 
|---|
| 133 | // get last left column | 
|---|
| 134 | Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column | 
|---|
| 135 | MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0); | 
|---|
| 136 | } else { | 
|---|
| 137 | // get right column | 
|---|
| 138 | Abuffer1 = Aloc[1]; // note down the free column | 
|---|
| 139 | MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1,  tagS1+2*k, comm, &requestS0); | 
|---|
| 140 | } | 
|---|
| 141 |  | 
|---|
| 142 | //fprintf(stderr,"...left columns..."); | 
|---|
| 143 | for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right | 
|---|
| 144 | Aloc[l] = Aloc[l-2]; | 
|---|
| 145 |  | 
|---|
| 146 | if (max_rounds > 1) { | 
|---|
| 147 | //fprintf(stderr,"...first right..."); | 
|---|
| 148 | Aloc[2] = Aloc[1]; // get first right column | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | //fprintf(stderr,"...right columns..."); | 
|---|
| 152 | for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left | 
|---|
| 153 | Aloc[l] = Aloc[l+2]; | 
|---|
| 154 |  | 
|---|
| 155 | //fprintf(stderr,"...last right..."); | 
|---|
| 156 | Aloc[(2*max_rounds-1)] = Abuffer1; | 
|---|
| 157 | MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1); | 
|---|
| 158 |  | 
|---|
| 159 | } else if (ProcRank == ProcNum-1) { | 
|---|
| 160 | //fprintf(stderr,"...first right..."); | 
|---|
| 161 | // get first right column | 
|---|
| 162 | Abuffer2 = Aloc[1]; // note down the free column | 
|---|
| 163 | MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1); | 
|---|
| 164 |  | 
|---|
| 165 | //fprintf(stderr,"...right columns..."); | 
|---|
| 166 | for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left | 
|---|
| 167 | Aloc[(l)] = Aloc[(l+2)]; | 
|---|
| 168 |  | 
|---|
| 169 | //fprintf(stderr,"...last right..."); | 
|---|
| 170 | Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column | 
|---|
| 171 |  | 
|---|
| 172 | //fprintf(stderr,"...left columns..."); | 
|---|
| 173 | for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right | 
|---|
| 174 | Aloc[(l)] = Aloc[(l-2)]; | 
|---|
| 175 |  | 
|---|
| 176 | //fprintf(stderr,"...first left..."); | 
|---|
| 177 | //    if (max_rounds > 1) | 
|---|
| 178 | Aloc[0] = Abuffer2; // get first left column | 
|---|
| 179 | MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0); | 
|---|
| 180 |  | 
|---|
| 181 | } else { | 
|---|
| 182 | // get last left column | 
|---|
| 183 | MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0); | 
|---|
| 184 | Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column | 
|---|
| 185 |  | 
|---|
| 186 | //fprintf(stderr,"...first right..."); | 
|---|
| 187 | // get first right column | 
|---|
| 188 | MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1); | 
|---|
| 189 | Abuffer2 = Aloc[1]; // note down the free column | 
|---|
| 190 |  | 
|---|
| 191 | //fprintf(stderr,"...left columns..."); | 
|---|
| 192 | for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right | 
|---|
| 193 | Aloc[(l)] = Aloc[(l-2)]; | 
|---|
| 194 |  | 
|---|
| 195 | //fprintf(stderr,"...right columns..."); | 
|---|
| 196 | for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left | 
|---|
| 197 | Aloc[(l)] = Aloc[(l+2)]; | 
|---|
| 198 |  | 
|---|
| 199 | //fprintf(stderr,"...first left..."); | 
|---|
| 200 | Aloc[0] = Abuffer1; // get first left column | 
|---|
| 201 | MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0); | 
|---|
| 202 |  | 
|---|
| 203 | //fprintf(stderr,"...last right..."); | 
|---|
| 204 | Aloc[(2*max_rounds-1)] = Abuffer2; | 
|---|
| 205 | MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1); | 
|---|
| 206 | } | 
|---|
| 207 |  | 
|---|
| 208 | //fprintf(stderr,"...waiting..."); | 
|---|
| 209 | if (ProcRank != ProcNum-1) | 
|---|
| 210 | MPI_Wait(&requestS0, &status); | 
|---|
| 211 | if (ProcRank != 0)  // first left column | 
|---|
| 212 | MPI_Wait(&requestR0, &status); | 
|---|
| 213 | if (ProcRank != 0) | 
|---|
| 214 | MPI_Wait(&requestS1, &status); | 
|---|
| 215 | if (ProcRank != ProcNum-1) | 
|---|
| 216 | MPI_Wait(&requestR1, &status); | 
|---|
| 217 | //fprintf(stderr,"...done\n"); | 
|---|
| 218 | } | 
|---|
| 219 |  | 
|---|
| 220 | /** By testing of greatest common divisor of the matrix rows (\a AllocNum) finds suitable parallel cpu group. | 
|---|
| 221 | * \param *P Problem at hand | 
|---|
| 222 | * \param AllocNum number of rows in matrix | 
|---|
| 223 | * \return address of MPI communicator | 
|---|
| 224 | */ | 
|---|
| 225 | #ifdef HAVE_INLINE | 
|---|
| 226 | inline MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum) | 
|---|
| 227 | #else | 
|---|
| 228 | MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum) | 
|---|
| 229 | #endif | 
|---|
| 230 | { | 
|---|
| 231 | MPI_Comm *comm = &P->Par.comm_ST; | 
|---|
| 232 |  | 
|---|
| 233 | //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi); | 
|---|
| 234 | //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Jacobi diagonalization is done parallely by ", P->Par.me); | 
|---|
| 235 | if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) {  // all parallel | 
|---|
| 236 | comm = &P->Par.comm_ST; | 
|---|
| 237 | //if (P->Call.out[ReadOut]) fprintf(stderr,"all\n"); | 
|---|
| 238 | } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first | 
|---|
| 239 | if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) {  // coefficients parallel | 
|---|
| 240 | comm = &P->Par.comm_ST_Psi; | 
|---|
| 241 | //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n"); | 
|---|
| 242 | } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) {  // Psis parallel | 
|---|
| 243 | comm = &P->Par.comm_ST_PsiT; | 
|---|
| 244 | //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n"); | 
|---|
| 245 | } | 
|---|
| 246 | } else { | 
|---|
| 247 | if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) {  // Psis parallel | 
|---|
| 248 | comm = &P->Par.comm_ST_PsiT; | 
|---|
| 249 | //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n"); | 
|---|
| 250 | } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) {  // coefficients parallel | 
|---|
| 251 | comm = &P->Par.comm_ST_Psi; | 
|---|
| 252 | //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n"); | 
|---|
| 253 | } | 
|---|
| 254 | } | 
|---|
| 255 | return comm; | 
|---|
| 256 | } | 
|---|
| 257 |  | 
|---|
| 258 | /** Allocates and fills Lookup table for sin/cos values at each grid node. | 
|---|
| 259 | * \param ***cos_table pointer to two-dimensional lookup table for cosine values | 
|---|
| 260 | * \param ***sin_table pointer to two-dimensional lookup table for sine values | 
|---|
| 261 | * \param *N array with number of nodes per \a NDIM axis | 
|---|
| 262 | */ | 
|---|
| 263 | void CreateSinCosLookupTable(double ***cos_table, double ***sin_table, int *N) | 
|---|
| 264 | { | 
|---|
| 265 | int i, j; | 
|---|
| 266 | double argument; | 
|---|
| 267 | double **cos_lookup, **sin_lookup; | 
|---|
| 268 |  | 
|---|
| 269 | // create lookup table for sin/cos values | 
|---|
| 270 | cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup"); | 
|---|
| 271 | sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup"); | 
|---|
| 272 | for (i=0;i<NDIM;i++) { | 
|---|
| 273 | // allocate memory | 
|---|
| 274 | cos_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: cos_lookup"); | 
|---|
| 275 | sin_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: sin_lookup"); | 
|---|
| 276 |  | 
|---|
| 277 | // reset arrays | 
|---|
| 278 | SetArrayToDouble0(cos_lookup[i],N[i]); | 
|---|
| 279 | SetArrayToDouble0(sin_lookup[i],N[i]); | 
|---|
| 280 |  | 
|---|
| 281 | // create lookup values | 
|---|
| 282 | for (j=0;j<N[i];j++) { | 
|---|
| 283 | argument = 2*PI/(double)N[i]*(double)j; | 
|---|
| 284 | cos_lookup[i][j] = cos(argument); | 
|---|
| 285 | sin_lookup[i][j] = sin(argument); | 
|---|
| 286 | } | 
|---|
| 287 | } | 
|---|
| 288 | *cos_table = cos_lookup; | 
|---|
| 289 | *sin_table = sin_lookup; | 
|---|
| 290 | } | 
|---|
| 291 |  | 
|---|
| 292 | /** Frees memory allocated during CreateSinCosLookupTable(). | 
|---|
| 293 | * \param ***cos_lookup pointer to two-dimensional lookup table for cosine values | 
|---|
| 294 | * \param ***sin_lookup pointer to two-dimensional lookup table for sine values | 
|---|
| 295 | */ | 
|---|
| 296 | void FreeSinCosLookupTable(double **cos_lookup, double **sin_lookup) | 
|---|
| 297 | { | 
|---|
| 298 | int i; | 
|---|
| 299 | // free lookups | 
|---|
| 300 | for (i=0;i<NDIM;i++) { | 
|---|
| 301 | Free(cos_lookup[i], "FreeSinCosLookupTable: cos_lookup[i]"); | 
|---|
| 302 | Free(sin_lookup[i], "FreeSinCosLookupTable: sin_lookup[i]"); | 
|---|
| 303 | } | 
|---|
| 304 | Free(cos_lookup, "FreeSinCosLookupTable: cos_lookup"); | 
|---|
| 305 | Free(sin_lookup, "FreeSinCosLookupTable: sin_lookup"); | 
|---|
| 306 | } | 
|---|
| 307 |  | 
|---|
| 308 | /** Fills the entries of the six variance matrices. | 
|---|
| 309 | * These matrices are parallely diagonalized during Wannier Localization. They are calculated from the | 
|---|
| 310 | * wave function and by diagonalization one obtains the unitary transformation with which the Psis are | 
|---|
| 311 | * treated afterwards. | 
|---|
| 312 | * \param *P Problem at hand | 
|---|
| 313 | * \param AllocNum number of rows/columns | 
|---|
| 314 | * \param **A pointer to variance matrices | 
|---|
| 315 | * \sa ComputeMLWF() - master function. | 
|---|
| 316 | */ | 
|---|
| 317 | void FillHigherOrderRealMomentsMatrices(struct Problem *P, int AllocNum, gsl_matrix **A) | 
|---|
| 318 | { | 
|---|
| 319 | struct Lattice *Lat = &P->Lat; | 
|---|
| 320 | struct RunStruct *R = &P->R; | 
|---|
| 321 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 322 | struct LatticeLevel *Lev0 = R->Lev0; | 
|---|
| 323 | struct LatticeLevel *LevS = R->LevS; | 
|---|
| 324 | struct Density *Dens0 = Lev0->Dens; | 
|---|
| 325 | struct OnePsiElement *OnePsiA, *OnePsiB, *LOnePsiB; | 
|---|
| 326 | struct fft_plan_3d *plan = Lat->plan; | 
|---|
| 327 | fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity]; | 
|---|
| 328 | fftw_real *PsiCR = (fftw_real *)PsiC; | 
|---|
| 329 | fftw_complex *work = Dens0->DensityCArray[Temp2Density]; | 
|---|
| 330 | fftw_real **HGcR = &Dens0->DensityArray[HGDensity];  // use HGDensity, 4x Gap..Density, TempDensity as a storage array | 
|---|
| 331 | fftw_complex **HGcRC = (fftw_complex**)HGcR; | 
|---|
| 332 | fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as an array | 
|---|
| 333 | fftw_real **HGcR2 = (fftw_real**)HGcR2C; | 
|---|
| 334 | int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; | 
|---|
| 335 | MPI_Status status; | 
|---|
| 336 | fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL; | 
|---|
| 337 | int n[NDIM],n0,i0,iS, Index; | 
|---|
| 338 | int Num = Psi->NoOfPsis;  // is number of occupied plus unoccupied states for rows | 
|---|
| 339 | int N0 = LevS->Plan0.plan->local_nx; | 
|---|
| 340 | int *N = LevS->Plan0.plan->N; | 
|---|
| 341 | const int NUpx = LevS->NUp[0]; | 
|---|
| 342 | const int NUpy = LevS->NUp[1]; | 
|---|
| 343 | const int NUpz = LevS->NUp[2]; | 
|---|
| 344 | double argument, PsiAtNode; | 
|---|
| 345 | int e,g,i,j,k,l,m,p,u; | 
|---|
| 346 | double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0; | 
|---|
| 347 | double **cos_lookup =  NULL,**sin_lookup = NULL; | 
|---|
| 348 |  | 
|---|
| 349 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me); | 
|---|
| 350 |  | 
|---|
| 351 | debug(P, "Creating Lookup Table"); | 
|---|
| 352 | CreateSinCosLookupTable(&cos_lookup, &sin_lookup, N); | 
|---|
| 353 |  | 
|---|
| 354 | debug(P, "Calculating each entry of variance matrices"); | 
|---|
| 355 | l=-1; // to access U matrix element (0..Num-1) | 
|---|
| 356 | // fill the matrices | 
|---|
| 357 | for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) {  // go through all wave functions | 
|---|
| 358 | OnePsiA = &Psi->AllPsiStatus[i];    // grab OnePsiA | 
|---|
| 359 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
| 360 | l++;  // increase l if it is non-extra wave function | 
|---|
| 361 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? | 
|---|
| 362 | LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; | 
|---|
| 363 | else | 
|---|
| 364 | LPsiDatA = NULL;  // otherwise processes won't enter second loop, though they're supposed to send coefficients! | 
|---|
| 365 |  | 
|---|
| 366 | //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0); | 
|---|
| 367 | if (LPsiDatA != NULL) { | 
|---|
| 368 | CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1); | 
|---|
| 369 | // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()! | 
|---|
| 370 | for (n0=0;n0<N0;n0++) | 
|---|
| 371 | for (n[1]=0;n[1]<N[1];n[1]++) | 
|---|
| 372 | for (n[2]=0;n[2]<N[2];n[2]++) { | 
|---|
| 373 | i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx); | 
|---|
| 374 | iS = n[2]+N[2]*(n[1]+N[1]*n0); | 
|---|
| 375 | n[0] = n0 + LevS->Plan0.plan->start_nx; | 
|---|
| 376 | for (k=0;k<max_operators;k+=2) { | 
|---|
| 377 | e = k/2; | 
|---|
| 378 | argument = 2.*PI/(double)(N[e])*(double)(n[e]); | 
|---|
| 379 | PsiAtNode = PsiCR[i0] /LevS->MaxN; | 
|---|
| 380 | // check lookup | 
|---|
| 381 | if (!l) // perform check on first wave function only | 
|---|
| 382 | if ((fabs(cos(argument) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(argument) - sin_lookup[e][n[e]]) > MYEPSILON)) { | 
|---|
| 383 | Error(SomeError, "Lookup table does not match real value!"); | 
|---|
| 384 | } | 
|---|
| 385 | HGcR[k][iS] = cos_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */ | 
|---|
| 386 | HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */ | 
|---|
| 387 | HGcR[k+1][iS] = sin_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */ | 
|---|
| 388 | HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */ | 
|---|
| 389 | } | 
|---|
| 390 | } | 
|---|
| 391 | for (u=0;u<max_operators;u++) { | 
|---|
| 392 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work); | 
|---|
| 393 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work); | 
|---|
| 394 | } | 
|---|
| 395 | } | 
|---|
| 396 | m = -1; // to access U matrix element (0..Num-1) | 
|---|
| 397 | for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) {  // go through all wave functions | 
|---|
| 398 | OnePsiB = &Psi->AllPsiStatus[j];    // grab OnePsiB | 
|---|
| 399 | if (OnePsiB->PsiType == type) {   // drop all but occupied ones | 
|---|
| 400 | m++;  // increase m if it is non-extra wave function | 
|---|
| 401 | if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? | 
|---|
| 402 | LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; | 
|---|
| 403 | else | 
|---|
| 404 | LOnePsiB = NULL; | 
|---|
| 405 | if (LOnePsiB == NULL) {   // if it's not local ... receive it from respective process into TempPsi | 
|---|
| 406 | RecvSource = OnePsiB->my_color_comm_ST_Psi; | 
|---|
| 407 | MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); | 
|---|
| 408 | LPsiDatB=LevS->LPsi->TempPsi; | 
|---|
| 409 | } else {                  // .. otherwise send it to all other processes (Max_me... - 1) | 
|---|
| 410 | for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) | 
|---|
| 411 | if (p != OnePsiB->my_color_comm_ST_Psi) | 
|---|
| 412 | MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); | 
|---|
| 413 | LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; | 
|---|
| 414 | } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received | 
|---|
| 415 |  | 
|---|
| 416 | for (u=0;u<max_operators;u++) { | 
|---|
| 417 | a_ij = 0; | 
|---|
| 418 | b_ij = 0; | 
|---|
| 419 | if (LPsiDatA != NULL) { // calculate, reduce and send to all ... | 
|---|
| 420 | //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u); | 
|---|
| 421 | g=0; | 
|---|
| 422 | if (LevS->GArray[0].GSq == 0.0) { | 
|---|
| 423 | Index = LevS->GArray[g].Index; | 
|---|
| 424 | a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im); | 
|---|
| 425 | b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im); | 
|---|
| 426 | g++; | 
|---|
| 427 | } | 
|---|
| 428 | for (; g < LevS->MaxG; g++) { | 
|---|
| 429 | Index = LevS->GArray[g].Index; | 
|---|
| 430 | a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im); | 
|---|
| 431 | b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im); | 
|---|
| 432 | } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...) | 
|---|
| 433 | // sum up elements from all coefficients sharing processes | 
|---|
| 434 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); | 
|---|
| 435 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); | 
|---|
| 436 | a_ij = A_ij; | 
|---|
| 437 | b_ij = B_ij; | 
|---|
| 438 | // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!) | 
|---|
| 439 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); | 
|---|
| 440 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); | 
|---|
| 441 | } else { // receive ... | 
|---|
| 442 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); | 
|---|
| 443 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); | 
|---|
| 444 | } | 
|---|
| 445 | // ... and store | 
|---|
| 446 | //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij); | 
|---|
| 447 | //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij); | 
|---|
| 448 | gsl_matrix_set(A[u], l, m, A_ij); | 
|---|
| 449 | gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m)); | 
|---|
| 450 | } | 
|---|
| 451 | } | 
|---|
| 452 | } | 
|---|
| 453 | } | 
|---|
| 454 | } | 
|---|
| 455 | // reset extra entries | 
|---|
| 456 | for (u=0;u<=max_operators;u++) { | 
|---|
| 457 | for (i=Num;i<AllocNum;i++) | 
|---|
| 458 | for (j=0;j<AllocNum;j++) | 
|---|
| 459 | gsl_matrix_set(A[u], i,j, 0.); | 
|---|
| 460 | for (i=Num;i<AllocNum;i++) | 
|---|
| 461 | for (j=0;j<AllocNum;j++) | 
|---|
| 462 | gsl_matrix_set(A[u], j,i, 0.); | 
|---|
| 463 | } | 
|---|
| 464 | FreeSinCosLookupTable(cos_lookup, sin_lookup); | 
|---|
| 465 |  | 
|---|
| 466 | /*// print A matrices for debug | 
|---|
| 467 | if (P->Par.me == 0) | 
|---|
| 468 | for (u=0;u<max_operators+1;u++) { | 
|---|
| 469 | fprintf(stderr, "A[%i] = \n",u); | 
|---|
| 470 | for (i=0;i<Num;i++) { | 
|---|
| 471 | for (j=0;j<Num;j++) | 
|---|
| 472 | fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j)); | 
|---|
| 473 | fprintf(stderr, "\n"); | 
|---|
| 474 | } | 
|---|
| 475 | } | 
|---|
| 476 | */ | 
|---|
| 477 | } | 
|---|
| 478 |  | 
|---|
| 479 | /** Calculates reciprocal second order moments of each PsiType#Occupied orbital. | 
|---|
| 480 | * First order is zero due to wave function being real (symmetry condition). | 
|---|
| 481 | * \param *P Problem at hand | 
|---|
| 482 | */ | 
|---|
| 483 | void CalculateSecondOrderReciprocalMoment(struct Problem *P) | 
|---|
| 484 | { | 
|---|
| 485 | struct Lattice *Lat = &P->Lat; | 
|---|
| 486 | struct RunStruct *R = &P->R; | 
|---|
| 487 | struct FileData *F = &P->Files; | 
|---|
| 488 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 489 | struct LatticeLevel *LevS = R->LevS; | 
|---|
| 490 | double result, Result; | 
|---|
| 491 | fftw_complex *LPsiDatA=NULL; | 
|---|
| 492 | struct OnePsiElement *OnePsiA; | 
|---|
| 493 | int i,j,l,g; | 
|---|
| 494 | char spin[12], suffix[18]; | 
|---|
| 495 |  | 
|---|
| 496 | switch (Lat->Psi.PsiST) { | 
|---|
| 497 | case SpinDouble: | 
|---|
| 498 | strcpy(suffix,".recispread.csv"); | 
|---|
| 499 | strcpy(spin,"SpinDouble"); | 
|---|
| 500 | break; | 
|---|
| 501 | case SpinUp: | 
|---|
| 502 | strcpy(suffix,".recispread_up.csv"); | 
|---|
| 503 | strcpy(spin,"SpinUp"); | 
|---|
| 504 | break; | 
|---|
| 505 | case SpinDown: | 
|---|
| 506 | strcpy(suffix,".recispread_down.csv"); | 
|---|
| 507 | strcpy(spin,"SpinDown"); | 
|---|
| 508 | break; | 
|---|
| 509 | } | 
|---|
| 510 | if(P->Par.me_comm_ST == 0) { | 
|---|
| 511 | if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Calculating reciprocal moments ...\n",P->Par.me); | 
|---|
| 512 | if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level | 
|---|
| 513 | OpenFile(P, &F->ReciSpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level | 
|---|
| 514 | else if (F->ReciSpreadFile == NULL) // re-op,18en if not first level and not opened yet (or closed from ParseWannierFile) | 
|---|
| 515 | OpenFile(P, &F->ReciSpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level | 
|---|
| 516 | if (F->ReciSpreadFile == NULL) { | 
|---|
| 517 | Error(SomeError,"ComputeMLWF: Error opening Reciprocal spread File!\n"); | 
|---|
| 518 | } else { | 
|---|
| 519 | fprintf(F->ReciSpreadFile,"===== Reciprocal Spreads of type %s ==========================================================================\n", spin); | 
|---|
| 520 | } | 
|---|
| 521 | } | 
|---|
| 522 |  | 
|---|
| 523 | // integrate second order moment | 
|---|
| 524 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions | 
|---|
| 525 | OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA | 
|---|
| 526 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
| 527 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? | 
|---|
| 528 | LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; | 
|---|
| 529 | else | 
|---|
| 530 | LPsiDatA = NULL; | 
|---|
| 531 |  | 
|---|
| 532 | if (LPsiDatA != NULL) { | 
|---|
| 533 | if (P->Par.me_comm_ST == 0) | 
|---|
| 534 | fprintf(F->ReciSpreadFile,"Psi%d_Lev%d\t", Psi->AllPsiStatus[l].MyGlobalNo, R->LevSNo); | 
|---|
| 535 | for (i=0;i<NDIM;i++) { | 
|---|
| 536 | for (j=0;j<NDIM;j++) { | 
|---|
| 537 | result = 0.; | 
|---|
| 538 | g = 0; | 
|---|
| 539 | if (LevS->GArray[0].GSq == 0.0) { | 
|---|
| 540 | result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*(LPsiDatA[0].re * LPsiDatA[0].re); | 
|---|
| 541 | g++; | 
|---|
| 542 | } | 
|---|
| 543 | for (;g<LevS->MaxG;g++) | 
|---|
| 544 | result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*2.*(LPsiDatA[g].re * LPsiDatA[g].re + LPsiDatA[g].im * LPsiDatA[g].im); | 
|---|
| 545 | //result *= Lat->Volume/LevS->MaxG; | 
|---|
| 546 | MPI_Allreduce ( &result, &Result, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); | 
|---|
| 547 | if (P->Par.me_comm_ST == 0) | 
|---|
| 548 | fprintf(F->ReciSpreadFile,"%2.5lg  ", Result); | 
|---|
| 549 | } | 
|---|
| 550 | } | 
|---|
| 551 | if (P->Par.me_comm_ST == 0) | 
|---|
| 552 | fprintf(F->ReciSpreadFile,"\n"); | 
|---|
| 553 | } | 
|---|
| 554 | } | 
|---|
| 555 | } | 
|---|
| 556 | if(P->Par.me_comm_ST == 0) { | 
|---|
| 557 | fprintf(F->ReciSpreadFile,"====================================================================================================================\n\n"); | 
|---|
| 558 | fflush(F->ReciSpreadFile); | 
|---|
| 559 | } | 
|---|
| 560 | } | 
|---|
| 561 |  | 
|---|
| 562 | /** Given the unitary matrix the transformation is performed on the Psis. | 
|---|
| 563 | * \param *P Problem at hand | 
|---|
| 564 | * \param *U gsl matrix containing the transformation matrix | 
|---|
| 565 | * \param Num dimension parameter for the matrix, i.e. number of wave functions | 
|---|
| 566 | */ | 
|---|
| 567 | void UnitaryTransformationOnWavefunctions(struct Problem *P, gsl_matrix *U, int Num) | 
|---|
| 568 | { | 
|---|
| 569 | struct Lattice *Lat = &P->Lat; | 
|---|
| 570 | struct RunStruct *R = &P->R; | 
|---|
| 571 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 572 | struct LatticeLevel *LevS = R->LevS; | 
|---|
| 573 | MPI_Status status; | 
|---|
| 574 | struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB; | 
|---|
| 575 | int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; | 
|---|
| 576 | fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL; | 
|---|
| 577 | int g,i,j,l,k,m,p; | 
|---|
| 578 |  | 
|---|
| 579 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation of all wave functions according to U\n",P->Par.me); | 
|---|
| 580 |  | 
|---|
| 581 | Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here | 
|---|
| 582 | fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer"); | 
|---|
| 583 |  | 
|---|
| 584 | for (l=0;l<Num;l++) // allocate for each local wave function | 
|---|
| 585 | coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l]; | 
|---|
| 586 |  | 
|---|
| 587 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me); | 
|---|
| 588 | l=-1; // to access U matrix element (0..Num-1) | 
|---|
| 589 | k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1) | 
|---|
| 590 | for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) {  // go through all wave functions | 
|---|
| 591 | OnePsiA = &Psi->AllPsiStatus[i];    // grab OnePsiA | 
|---|
| 592 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
| 593 | l++;  // increase l if it is occupied wave function | 
|---|
| 594 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local? | 
|---|
| 595 | k++; // increase k only if it is a local, non-extra orbital wave function | 
|---|
| 596 | LPsiDatA = (fftw_complex *) coeffs_buffer[k];    // new coeffs first go to copy buffer, old ones must not be overwritten yet | 
|---|
| 597 | SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG);  // zero buffer part | 
|---|
| 598 | } else | 
|---|
| 599 | LPsiDatA = NULL;  // otherwise processes won't enter second loop, though they're supposed to send coefficients! | 
|---|
| 600 |  | 
|---|
| 601 | m = -1; // to access U matrix element (0..Num-1) | 
|---|
| 602 | for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) {  // go through all wave functions | 
|---|
| 603 | OnePsiB = &Psi->AllPsiStatus[j];    // grab OnePsiB | 
|---|
| 604 | if (OnePsiB->PsiType == type) {   // drop all but occupied ones | 
|---|
| 605 | m++;  // increase m if it is occupied wave function | 
|---|
| 606 | if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? | 
|---|
| 607 | LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; | 
|---|
| 608 | else | 
|---|
| 609 | LOnePsiB = NULL; | 
|---|
| 610 | if (LOnePsiB == NULL) {   // if it's not local ... receive it from respective process into TempPsi | 
|---|
| 611 | RecvSource = OnePsiB->my_color_comm_ST_Psi; | 
|---|
| 612 | MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); | 
|---|
| 613 | LPsiDatB=LevS->LPsi->TempPsi; | 
|---|
| 614 | } else {                  // .. otherwise send it to all other processes (Max_me... - 1) | 
|---|
| 615 | for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) | 
|---|
| 616 | if (p != OnePsiB->my_color_comm_ST_Psi) | 
|---|
| 617 | MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); | 
|---|
| 618 | LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; | 
|---|
| 619 | } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received | 
|---|
| 620 |  | 
|---|
| 621 | if (LPsiDatA != NULL) { | 
|---|
| 622 | double tmp = gsl_matrix_get(U,l,m); | 
|---|
| 623 | g=0; | 
|---|
| 624 | if (LevS->GArray[0].GSq == 0.0) { | 
|---|
| 625 | LPsiDatA[g].re += LPsiDatB[g].re * tmp; | 
|---|
| 626 | LPsiDatA[g].im += LPsiDatB[g].im * tmp; | 
|---|
| 627 | g++; | 
|---|
| 628 | } | 
|---|
| 629 | for (; g < LevS->MaxG; g++) { | 
|---|
| 630 | LPsiDatA[g].re += LPsiDatB[g].re * tmp; | 
|---|
| 631 | LPsiDatA[g].im += LPsiDatB[g].im * tmp; | 
|---|
| 632 | } | 
|---|
| 633 | } | 
|---|
| 634 | } | 
|---|
| 635 | } | 
|---|
| 636 | } | 
|---|
| 637 | } | 
|---|
| 638 |  | 
|---|
| 639 | //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me); | 
|---|
| 640 | // now, as all wave functions are updated, swap the buffer | 
|---|
| 641 | l = -1; | 
|---|
| 642 | for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function | 
|---|
| 643 | if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { | 
|---|
| 644 | l++; | 
|---|
| 645 | //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo); | 
|---|
| 646 | LPsiDatA = (fftw_complex *)coeffs_buffer[l]; | 
|---|
| 647 | LPsiDatB = LevS->LPsi->LocalPsi[l]; | 
|---|
| 648 | for (g=0;g<LevS->MaxG;g++) { | 
|---|
| 649 | LPsiDatB[g].re = LPsiDatA[g].re; | 
|---|
| 650 | LPsiDatB[g].im = LPsiDatA[g].im; | 
|---|
| 651 | } | 
|---|
| 652 | // recalculating non-local form factors which are coefficient dependent! | 
|---|
| 653 | CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo); | 
|---|
| 654 | } | 
|---|
| 655 | } | 
|---|
| 656 | // and free allocated buffer memory | 
|---|
| 657 | Free(coeffs_buffer, "UnitaryTransformationOnWavefunctions: coeffs_buffer"); | 
|---|
| 658 | } | 
|---|
| 659 |  | 
|---|
| 660 | /** Changes Wannier Centres according to RunStruct#CommonWannier. | 
|---|
| 661 | * \param *P Problem at hand. | 
|---|
| 662 | * \param Num number of Psis | 
|---|
| 663 | * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres | 
|---|
| 664 | * \param *WannierSpread array with wannier spread per wave function | 
|---|
| 665 | */ | 
|---|
| 666 | void ChangeWannierCentres(struct Problem *P, int Num, double **WannierCentre, double *WannierSpread) | 
|---|
| 667 | { | 
|---|
| 668 | struct RunStruct *R = &P->R; | 
|---|
| 669 | struct Lattice *Lat = &P->Lat; | 
|---|
| 670 | struct LatticeLevel *LevS = R->LevS; | 
|---|
| 671 | int *marker, **group; | 
|---|
| 672 | int partner[Num]; | 
|---|
| 673 | int i,j,l,k; | 
|---|
| 674 | int totalflag, flag; | 
|---|
| 675 | double q[NDIM], center[NDIM]; | 
|---|
| 676 | double Spread; | 
|---|
| 677 | int *N = LevS->Plan0.plan->N; | 
|---|
| 678 |  | 
|---|
| 679 | switch (R->CommonWannier) { | 
|---|
| 680 | case 4: | 
|---|
| 681 | debug(P,"Shifting each Wannier centers to cell center"); | 
|---|
| 682 | for (j=0;j<NDIM;j++)  // center point in [0,1]^3 | 
|---|
| 683 | center[j] = 0.5; | 
|---|
| 684 | RMat33Vec3(q,Lat->RealBasis, center); // transform to real coordinates | 
|---|
| 685 | for (i=0; i < Num; i++) {  // go through all occupied wave functions | 
|---|
| 686 | for (j=0;j<NDIM;j++)  // put into Wannier centres | 
|---|
| 687 | WannierCentre[i][j] = q[j]; | 
|---|
| 688 | } | 
|---|
| 689 | break; | 
|---|
| 690 | case 3: | 
|---|
| 691 | debug(P,"Shifting Wannier centers individually to nearest grid point"); | 
|---|
| 692 | for (i=0;i < Num; i++) {  // go through all wave functions | 
|---|
| 693 | RMat33Vec3(q, Lat->ReciBasis, WannierCentre[i]); | 
|---|
| 694 | for (j=0;j<NDIM;j++) {  // Recibasis is not true inverse but times 2.*PI | 
|---|
| 695 | q[j] *= (double)N[j]/(2.*PI); | 
|---|
| 696 |  | 
|---|
| 697 | //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp)); | 
|---|
| 698 | if (fabs((double)floor(q[j]) - q[j]) < fabs((double)ceil(q[j]) - q[j])) | 
|---|
| 699 | q[j] = floor(q[j])/(double)N[j]; | 
|---|
| 700 | else | 
|---|
| 701 | q[j] = ceil(q[j])/(double)N[j]; | 
|---|
| 702 | } | 
|---|
| 703 | RMat33Vec3(WannierCentre[i], Lat->RealBasis, q); | 
|---|
| 704 | } | 
|---|
| 705 | break; | 
|---|
| 706 | case 2: | 
|---|
| 707 | debug(P,"Combining individual orbitals according to spread."); | 
|---|
| 708 | //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me); | 
|---|
| 709 | debug(P,"finding partners"); | 
|---|
| 710 | marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker"); | 
|---|
| 711 | group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group"); | 
|---|
| 712 | for (l=0;l<Num;l++) { | 
|---|
| 713 | group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]");  // each must group must have one more as end marker | 
|---|
| 714 | for (k=0;k<=Num;k++) | 
|---|
| 715 | group[l][k] = -1; // reset partner group | 
|---|
| 716 | } | 
|---|
| 717 | for (k=0;k<Num;k++) | 
|---|
| 718 | partner[k] = 0; | 
|---|
| 719 | debug(P,"mem allocated"); | 
|---|
| 720 | // go for each orbital through every other, check distance against the sum of both spreads | 
|---|
| 721 | // if smaller add to group of this orbital | 
|---|
| 722 | for (l=0;l<Num;l++) { | 
|---|
| 723 | j=0;  // index for partner group | 
|---|
| 724 | for (k=0;k<Num;k++) {  // check this against l | 
|---|
| 725 | Spread = 0.; | 
|---|
| 726 | for (i=0;i<NDIM;i++) { | 
|---|
| 727 | //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]); | 
|---|
| 728 | Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]); | 
|---|
| 729 | } | 
|---|
| 730 | Spread = sqrt(Spread);  // distance in Spread | 
|---|
| 731 | //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]); | 
|---|
| 732 | if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread | 
|---|
| 733 | group[l][j++] = k;  // add k to group of l | 
|---|
| 734 | partner[l]++; | 
|---|
| 735 | //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l); | 
|---|
| 736 | } | 
|---|
| 737 | } | 
|---|
| 738 | } | 
|---|
| 739 |  | 
|---|
| 740 | // consistency, for each orbital check if this orbital is also in the group of each referred orbital | 
|---|
| 741 | debug(P,"checking consistency"); | 
|---|
| 742 | totalflag = 1; | 
|---|
| 743 | for (l=0;l<Num;l++) // checking l's group | 
|---|
| 744 | for (k=0;k<Num;k++) { // k is partner index | 
|---|
| 745 | if (group[l][k] != -1) {  // if current index k is a partner | 
|---|
| 746 | flag = 0; | 
|---|
| 747 | for(j=0;j<Num;j++) {  // go through each entry in l partner's partner group if l exists | 
|---|
| 748 | if ((group[ group[l][k] ][j] == l)) | 
|---|
| 749 | flag = 1; | 
|---|
| 750 | } | 
|---|
| 751 | //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]); | 
|---|
| 752 | if (totalflag == 1) totalflag = flag; | 
|---|
| 753 | } | 
|---|
| 754 | } | 
|---|
| 755 | // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres | 
|---|
| 756 | debug(P,"weight and calculate new centers for partner groups"); | 
|---|
| 757 | for (l=0;l<=Num;l++) | 
|---|
| 758 | marker[l] = 1; | 
|---|
| 759 | if (totalflag) { | 
|---|
| 760 | for (l=0;l<Num;l++) { // go through each orbital | 
|---|
| 761 | if (marker[l] != 0) { // if it hasn't been reweighted | 
|---|
| 762 | marker[l] = 0; | 
|---|
| 763 | for (i=0;i<NDIM;i++) | 
|---|
| 764 | q[i] = 0.; | 
|---|
| 765 | j = 0; | 
|---|
| 766 | while (group[l][j] != -1) { | 
|---|
| 767 | marker[group[l][j]] = 0; | 
|---|
| 768 | for (i=0;i<NDIM;i++) { | 
|---|
| 769 | //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]); | 
|---|
| 770 | q[i] += WannierCentre[ group[l][j] ][i]; | 
|---|
| 771 | } | 
|---|
| 772 | j++; | 
|---|
| 773 | } | 
|---|
| 774 | //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j); | 
|---|
| 775 | for (i=0;i<NDIM;i++) {// weight by number of elements in partner group | 
|---|
| 776 | q[i] /= (double)(j); | 
|---|
| 777 | } | 
|---|
| 778 |  | 
|---|
| 779 | // put WannierCentre into own and all partners' | 
|---|
| 780 | for (i=0;i<NDIM;i++) | 
|---|
| 781 | WannierCentre[l][i] = q[i]; | 
|---|
| 782 | j = 0; | 
|---|
| 783 | while (group[l][j] != -1) { | 
|---|
| 784 | for (i=0;i<NDIM;i++) | 
|---|
| 785 | WannierCentre[group[l][j]][i] = q[i]; | 
|---|
| 786 | j++; | 
|---|
| 787 | } | 
|---|
| 788 | } | 
|---|
| 789 | } | 
|---|
| 790 | } | 
|---|
| 791 | if (P->Call.out[StepLeaderOut]) { | 
|---|
| 792 | fprintf(stderr,"Summary:\n"); | 
|---|
| 793 | fprintf(stderr,"========\n"); | 
|---|
| 794 | for (i=0;i<Num;i++) | 
|---|
| 795 | fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]); | 
|---|
| 796 | } | 
|---|
| 797 | debug(P,"done"); | 
|---|
| 798 |  | 
|---|
| 799 | Free(marker, "ChangeWannierCentres: marker"); | 
|---|
| 800 | for (l=0;l<Num;l++) | 
|---|
| 801 | Free(group[l], "ChangeWannierCentres: group[l]"); | 
|---|
| 802 | Free(group, "ChangeWannierCentres: group"); | 
|---|
| 803 | break; | 
|---|
| 804 | case 1: | 
|---|
| 805 | debug(P,"Individual orbitals are changed to center of all."); | 
|---|
| 806 | for (i=0;i<NDIM;i++)  // zero center of weight | 
|---|
| 807 | q[i] = 0.; | 
|---|
| 808 | for (k=0;k<Num;k++) | 
|---|
| 809 | for (i=0;i<NDIM;i++) {  // sum up all orbitals each component | 
|---|
| 810 | q[i] += WannierCentre[k][i]; | 
|---|
| 811 | } | 
|---|
| 812 | for (i=0;i<NDIM;i++)  // divide by number | 
|---|
| 813 | q[i] /= Num; | 
|---|
| 814 | for (k=0;k<Num;k++) | 
|---|
| 815 | for (i=0;i<NDIM;i++) {  // put into this function's array | 
|---|
| 816 | WannierCentre[k][i] = q[i]; | 
|---|
| 817 | } | 
|---|
| 818 | break; | 
|---|
| 819 | case 0: | 
|---|
| 820 | default: | 
|---|
| 821 | break; | 
|---|
| 822 | } | 
|---|
| 823 | } | 
|---|
| 824 |  | 
|---|
| 825 | /** From the entries of the variance matrices the spread is calculated. | 
|---|
| 826 | * WannierCentres are evaluated according to Resta operator: \f$\langle X \rangle = \frac{L}{2\pi} \sum_j {\cal I} \ln{\langle \Psi_j | \exp{(i \frac{2\pi}{L} X)} | \Psi_j \rangle}\f$ | 
|---|
| 827 | * WannierSpread is: \f$ \sum_j \langle \Psi_j | r^2 | \Psi_j \rangle - \langle \Psi_j | r | psi_j \rangle^2\f$ | 
|---|
| 828 | * \param *P Problem at hand | 
|---|
| 829 | * \param *A variance matrices | 
|---|
| 830 | * \param old_spread first term of wannier spread | 
|---|
| 831 | * \param spread second term of wannier spread | 
|---|
| 832 | * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres | 
|---|
| 833 | * \param *WannierSpread array with wannier spread per wave function | 
|---|
| 834 | */ | 
|---|
| 835 | void ComputeWannierCentresfromVarianceMatrices(struct Problem *P, gsl_matrix **A, double *spread, double *old_spread, double **WannierCentre, double *WannierSpread) | 
|---|
| 836 | { | 
|---|
| 837 | struct Lattice *Lat = &P->Lat; | 
|---|
| 838 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 839 | struct OnePsiElement *OnePsiA; | 
|---|
| 840 | int i,j,k,l; | 
|---|
| 841 | double tmp, q[NDIM]; | 
|---|
| 842 |  | 
|---|
| 843 | *old_spread = 0; | 
|---|
| 844 | *spread = 0; | 
|---|
| 845 |  | 
|---|
| 846 | // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital | 
|---|
| 847 | i=-1; | 
|---|
| 848 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions | 
|---|
| 849 | OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA | 
|---|
| 850 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
| 851 | i++;  // increase l if it is occupied wave function | 
|---|
| 852 | //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i); | 
|---|
| 853 |  | 
|---|
| 854 | // calculate Wannier Centre | 
|---|
| 855 | for (j=0;j<NDIM;j++) { | 
|---|
| 856 | q[j] = 1./(2.*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i)))); | 
|---|
| 857 | if (q[j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ | 
|---|
| 858 | q[j] += 1.; | 
|---|
| 859 | } | 
|---|
| 860 | RMat33Vec3(WannierCentre[i], Lat->RealBasis, q); | 
|---|
| 861 |  | 
|---|
| 862 | // store orbital spread and centre in file | 
|---|
| 863 | tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2) | 
|---|
| 864 | - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2) | 
|---|
| 865 | - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2); | 
|---|
| 866 | WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp; | 
|---|
| 867 | //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]); | 
|---|
| 868 | //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n", | 
|---|
| 869 | //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]); | 
|---|
| 870 | //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n", | 
|---|
| 871 | //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]); | 
|---|
| 872 |  | 
|---|
| 873 | // gather all spreads | 
|---|
| 874 | *old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U) | 
|---|
| 875 | for (k=0;k<max_operators;k++) | 
|---|
| 876 | *spread += pow(gsl_matrix_get(A[k],i,i),2); | 
|---|
| 877 | } | 
|---|
| 878 | } | 
|---|
| 879 | } | 
|---|
| 880 |  | 
|---|
| 881 | /** Prints gsl_matrix nicely to screen. | 
|---|
| 882 | * \param *P Problem at hand | 
|---|
| 883 | * \param *U gsl matrix | 
|---|
| 884 | * \param Num number of rows/columns | 
|---|
| 885 | * \param *msg name of matrix, prepended before entries | 
|---|
| 886 | */ | 
|---|
| 887 | void PrintGSLMatrix(struct Problem *P, gsl_matrix *U, int Num, const char *msg) | 
|---|
| 888 | { | 
|---|
| 889 | int k,l; | 
|---|
| 890 | fprintf(stderr,"(%i) %s = \n",P->Par.me, msg); | 
|---|
| 891 | for (k=0;k<Num;k++) { | 
|---|
| 892 | for (l=0;l<Num;l++) | 
|---|
| 893 | fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k)); | 
|---|
| 894 | fprintf(stderr,"\n"); | 
|---|
| 895 | } | 
|---|
| 896 | } | 
|---|
| 897 |  | 
|---|
| 898 | /** Allocates memory for the  DiagonalizationData structure | 
|---|
| 899 | * \param *P Problem at hand | 
|---|
| 900 | * \param DiagData pointer to structure | 
|---|
| 901 | * \param Num number of rows and columns in matrix | 
|---|
| 902 | * \param NumMatrices number of matrices to be simultaneously diagonalized | 
|---|
| 903 | * \param extra number of extra matrices to be passively also diagonalized | 
|---|
| 904 | */ | 
|---|
| 905 | void InitDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData, int Num, int NumMatrices, int extra) | 
|---|
| 906 | { | 
|---|
| 907 | int i; | 
|---|
| 908 |  | 
|---|
| 909 | // store integer values | 
|---|
| 910 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me); | 
|---|
| 911 | DiagData->Num = Num; | 
|---|
| 912 | DiagData->AllocNum = ceil((double)Num / 2. ) *2; | 
|---|
| 913 | DiagData->NumMatrices = NumMatrices; | 
|---|
| 914 | DiagData->extra = extra; | 
|---|
| 915 |  | 
|---|
| 916 | // determine communicator and our rank and its size | 
|---|
| 917 | DiagData->comm = DetermineParallelGroupbyGCD(P,DiagData->AllocNum); | 
|---|
| 918 | MPI_Comm_size (*(DiagData->comm), &(DiagData->ProcNum)); | 
|---|
| 919 | MPI_Comm_rank (*(DiagData->comm), &(DiagData->ProcRank)); | 
|---|
| 920 |  | 
|---|
| 921 | // allocate memory | 
|---|
| 922 | DiagData->U = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum); | 
|---|
| 923 | gsl_matrix_set_identity(DiagData->U); | 
|---|
| 924 |  | 
|---|
| 925 | DiagData->A = (gsl_matrix **) Malloc((DiagData->NumMatrices+DiagData->extra) * sizeof(gsl_matrix *), "InitDiagonalization: *A"); | 
|---|
| 926 | for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) { | 
|---|
| 927 | DiagData->A[i] = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum); | 
|---|
| 928 | gsl_matrix_set_zero(DiagData->A[i]); | 
|---|
| 929 | } | 
|---|
| 930 |  | 
|---|
| 931 | // init merry-go-round array for diagonilzation | 
|---|
| 932 | DiagData->top = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: top"); | 
|---|
| 933 | DiagData->bot = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: bot"); | 
|---|
| 934 | for (i=0;i<DiagData->AllocNum/2;i++) { | 
|---|
| 935 | DiagData->top[i] = 2*i; | 
|---|
| 936 | DiagData->bot[i] = 2*i+1; | 
|---|
| 937 | } | 
|---|
| 938 | /*    // print starting values of index generation tables top and bot | 
|---|
| 939 | fprintf(stderr,"top\t"); | 
|---|
| 940 | for (k=0;k<AllocNum/2;k++) | 
|---|
| 941 | fprintf(stderr,"%i\t", top[k]); | 
|---|
| 942 | fprintf(stderr,"\n"); | 
|---|
| 943 | fprintf(stderr,"bot\t"); | 
|---|
| 944 | for (k=0;k<AllocNum/2;k++) | 
|---|
| 945 | fprintf(stderr,"%i\t", bot[k]); | 
|---|
| 946 | fprintf(stderr,"\n");*/ | 
|---|
| 947 | } | 
|---|
| 948 |  | 
|---|
| 949 | /** Frees allocated memory for the DiagonalizationData structure. | 
|---|
| 950 | * \param DiagData pointer to structure | 
|---|
| 951 | */ | 
|---|
| 952 | void FreeDiagonalization(struct DiagonalizationData *DiagData) | 
|---|
| 953 | { | 
|---|
| 954 | int i; | 
|---|
| 955 |  | 
|---|
| 956 | Free(DiagData->top, "FreeDiagonalization: DiagData->top"); | 
|---|
| 957 | Free(DiagData->bot, "FreeDiagonalization: DiagData->bot"); | 
|---|
| 958 | gsl_matrix_free(DiagData->U); | 
|---|
| 959 | for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) | 
|---|
| 960 | gsl_matrix_free(DiagData->A[i]); | 
|---|
| 961 | Free(DiagData->A, "FreeDiagonalization: DiagData->A"); | 
|---|
| 962 | } | 
|---|
| 963 |  | 
|---|
| 964 | /** Orthogonalizing PsiType#Occupied wave functions for MD. | 
|---|
| 965 | * Orthogonalizes wave functions by finding a unitary transformation such that the density remains unchanged. | 
|---|
| 966 | * To this end, the overlap matrix \f$\langle \Psi_i | \Psi_j \rangle\f$ is created and diagonalised by Jacobi | 
|---|
| 967 | * transformation (product of rotation matrices). The found transformation matrix is applied to all Psis. | 
|---|
| 968 | * \param *P Problem at hand | 
|---|
| 969 | * \note [Payne] states that GramSchmidt is not well-suited. However, this Jacobi diagonalisation is not well | 
|---|
| 970 | * suited for our CG minimisation either. If one wave functions is changed in course of the minimsation | 
|---|
| 971 | * procedure, in a Gram-Schmidt-Orthogonalisation only this wave function is changed (although it remains under | 
|---|
| 972 | * debate whether subsequent Psis are still orthogonal to this changed wave function!), in a Jacobi-Orthogonali- | 
|---|
| 973 | * stion however at least two if not all wave functions are changed. Thus inhibits our fast way of updating the | 
|---|
| 974 | * density after a minimisation step -- UpdateDensityCalculation(). Thus, this routine can only be used for a | 
|---|
| 975 | * final orthogonalisation of all Psis, after the CG minimisation. | 
|---|
| 976 | */ | 
|---|
| 977 | void OrthogonalizePsis(struct Problem *P) | 
|---|
| 978 | { | 
|---|
| 979 | struct Lattice *Lat = &P->Lat; | 
|---|
| 980 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 981 | struct LatticeLevel *LevS = P->R.LevS; | 
|---|
| 982 | int Num = Psi->NoOfPsis; | 
|---|
| 983 | int i,j,l; | 
|---|
| 984 | struct DiagonalizationData DiagData; | 
|---|
| 985 | double PsiSP; | 
|---|
| 986 |  | 
|---|
| 987 | InitDiagonalization(P, &DiagData, Num, 1, 0); | 
|---|
| 988 |  | 
|---|
| 989 | // Calculate Overlap matrix | 
|---|
| 990 | for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) | 
|---|
| 991 | CalculateOverlap(P, l, Occupied); | 
|---|
| 992 | for (i=0;i<Num;i++) // fill gsl matrix with overlap values | 
|---|
| 993 | for (j=0;j<Num;j++) | 
|---|
| 994 | gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]); | 
|---|
| 995 | //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (before)"); | 
|---|
| 996 |  | 
|---|
| 997 | //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (before)"); | 
|---|
| 998 |  | 
|---|
| 999 | // diagonalize overlap matrix | 
|---|
| 1000 | Diagonalize(P, &DiagData); | 
|---|
| 1001 |  | 
|---|
| 1002 | //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (after)"); | 
|---|
| 1003 |  | 
|---|
| 1004 | // apply found unitary transformation to wave functions | 
|---|
| 1005 | UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum); | 
|---|
| 1006 |  | 
|---|
| 1007 | // update GramSch stati | 
|---|
| 1008 | for (i=Psi->TypeStartIndex[Occupied];i<Psi->TypeStartIndex[UnOccupied];i++) { | 
|---|
| 1009 | GramSchNormalize(P,LevS,LevS->LPsi->LocalPsi[i], 0.);   // calculates square product if 0. is given... | 
|---|
| 1010 | PsiSP = GramSchGetNorm2(P,LevS,LevS->LPsi->LocalPsi[i]); | 
|---|
| 1011 | //if (P->Par.me_comm_ST_Psi == 0) fprintf(stderr,"(%i) PsiSP[%i] = %lg\n", P->Par.me, i, PsiSP); | 
|---|
| 1012 | Psi->LocalPsiStatus[i].PsiGramSchStatus = (int)IsOrthonormal; | 
|---|
| 1013 | } | 
|---|
| 1014 | UpdateGramSchAllPsiStatus(P,Psi); | 
|---|
| 1015 | /* | 
|---|
| 1016 | for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) | 
|---|
| 1017 | CalculateOverlap(P, l, Occupied); | 
|---|
| 1018 | for (i=0;i<Num;i++) // fill gsl matrix with overlap values | 
|---|
| 1019 | for (j=0;j<Num;j++) | 
|---|
| 1020 | gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]); | 
|---|
| 1021 | if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (after)"); | 
|---|
| 1022 | */ | 
|---|
| 1023 | // free memory | 
|---|
| 1024 | FreeDiagonalization(&DiagData); | 
|---|
| 1025 | } | 
|---|
| 1026 |  | 
|---|
| 1027 | /** Orthogonalizing PsiType#Occupied wave functions and their time derivatives for MD. | 
|---|
| 1028 | * Ensures that two orthonormality condition are met for Psis: | 
|---|
| 1029 | * \f$\langle \Psi_i | \Psi_j \rangle = \delta_{ij}\f$ | 
|---|
| 1030 | * \f$\langle \dot{\Psi_i} | \dot{\Psi_j} \rangle = \delta_{ij}\f$ | 
|---|
| 1031 | * \param *P Problem at hand | 
|---|
| 1032 | * \note [Payne] states that GramSchmidt is not well-suited, and this stronger | 
|---|
| 1033 | *       condition is needed for numerical stability | 
|---|
| 1034 | * \todo create and fill 1stoverlap with finite difference between Psi, OldPsi with R->Deltat | 
|---|
| 1035 | */ | 
|---|
| 1036 | void StrongOrthogonalizePsis(struct Problem *P) | 
|---|
| 1037 | { | 
|---|
| 1038 | struct Lattice *Lat = &P->Lat; | 
|---|
| 1039 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 1040 | int Num = Psi->NoOfPsis; | 
|---|
| 1041 | int i,j,l; | 
|---|
| 1042 | struct DiagonalizationData DiagData; | 
|---|
| 1043 |  | 
|---|
| 1044 | InitDiagonalization(P, &DiagData, Num, 2, 0); | 
|---|
| 1045 |  | 
|---|
| 1046 | // Calculate Overlap matrix | 
|---|
| 1047 | for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) { | 
|---|
| 1048 | CalculateOverlap(P, l, Occupied); | 
|---|
| 1049 | //Calculate1stOverlap(P, l, Occupied); | 
|---|
| 1050 | } | 
|---|
| 1051 | for (i=0;i<Num;i++) // fill gsl matrix with overlap values | 
|---|
| 1052 | for (j=0;j<Num;j++) { | 
|---|
| 1053 | gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]); | 
|---|
| 1054 | //gsl_matrix_set(DiagData.A[1],i,j,Psi->1stOverlap[i][j]); | 
|---|
| 1055 | } | 
|---|
| 1056 |  | 
|---|
| 1057 | // diagonalize overlap matrix | 
|---|
| 1058 | Diagonalize(P, &DiagData); | 
|---|
| 1059 |  | 
|---|
| 1060 | // apply found unitary transformation to wave functions | 
|---|
| 1061 | UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum); | 
|---|
| 1062 |  | 
|---|
| 1063 | // free memory | 
|---|
| 1064 | FreeDiagonalization(&DiagData); | 
|---|
| 1065 | } | 
|---|
| 1066 |  | 
|---|
| 1067 | /** Uses either serial or parallel diagonalization. | 
|---|
| 1068 | * \param *P Problem at hand | 
|---|
| 1069 | * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization | 
|---|
| 1070 | * \sa ParallelDiagonalization(), SerialDiagonalization() | 
|---|
| 1071 | */ | 
|---|
| 1072 | void Diagonalize(struct Problem *P, struct DiagonalizationData *DiagData) | 
|---|
| 1073 | { | 
|---|
| 1074 | if (DiagData->Num != 1) { // one- or multi-process case? | 
|---|
| 1075 | if (((DiagData->AllocNum % 2) == 0) && (DiagData->ProcNum != 1) && ((DiagData->AllocNum / 2) % DiagData->ProcNum == 0)) { | 
|---|
| 1076 | /* | 
|---|
| 1077 | debug(P,"Testing with silly matrix"); | 
|---|
| 1078 | ParallelDiagonalization(P, test, Utest, 1, 0, 4, Num, comm, ProcRank, ProcNum, top, bot); | 
|---|
| 1079 | if(P->Call.out[ReadOut]) // && P->Par.me == 0) | 
|---|
| 1080 | PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)"); | 
|---|
| 1081 | if(P->Call.out[ReadOut]) // && P->Par.me == 0) | 
|---|
| 1082 | PrintGSLMatrix(P, Utest, 4, "Utest (final)"); | 
|---|
| 1083 | */ | 
|---|
| 1084 | debug(P,"ParallelDiagonalization"); | 
|---|
| 1085 | ParallelDiagonalization(P, DiagData); | 
|---|
| 1086 | } else {/* | 
|---|
| 1087 | debug(P,"Testing with silly matrix"); | 
|---|
| 1088 | SerialDiagonalization(P, test, Utest, 1, 0, 4, Num, top, bot); | 
|---|
| 1089 | if(P->Call.out[ReadOut]) // && P->Par.me == 0) | 
|---|
| 1090 | PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)"); | 
|---|
| 1091 | if(P->Call.out[ReadOut]) // && P->Par.me == 0) | 
|---|
| 1092 | PrintGSLMatrix(P, Utest, 4, "Utest (final)"); | 
|---|
| 1093 | */ | 
|---|
| 1094 | debug(P,"SerialDiagonalization"); | 
|---|
| 1095 | SerialDiagonalization(P, DiagData); | 
|---|
| 1096 | } | 
|---|
| 1097 |  | 
|---|
| 1098 | //if(P->Call.out[ReadOut]) // && P->Par.me == 0) | 
|---|
| 1099 | //PrintGSLMatrix(P, DiagData->U, DiagData->AllocNum, "U"); | 
|---|
| 1100 | } | 
|---|
| 1101 | } | 
|---|
| 1102 |  | 
|---|
| 1103 | /** Computation of Maximally Localized Wannier Functions. | 
|---|
| 1104 | * Maximally localized functions are prime when evulating a Hamiltonian with | 
|---|
| 1105 | * magnetic fields under periodic boundary conditions, as the common position | 
|---|
| 1106 | * operator is no longer valid. These can be obtained by orbital rotations, which | 
|---|
| 1107 | * are looked for iteratively and gathered in one transformation matrix, to be | 
|---|
| 1108 | * later applied to the set of orbital wave functions. | 
|---|
| 1109 | * | 
|---|
| 1110 | * In order to obtain these, the following algorithm is applied: | 
|---|
| 1111 | * -# Initialize U (identity) as the sought-for transformation matrix | 
|---|
| 1112 | * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements | 
|---|
| 1113 | *    \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators | 
|---|
| 1114 | *    \f$A^{(k)}\f$ | 
|---|
| 1115 | * -# For each pair of indices (i,j) (i<j) do the following: | 
|---|
| 1116 | *    -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$ | 
|---|
| 1117 | *       where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$ | 
|---|
| 1118 | *    -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G | 
|---|
| 1119 | *       corresponding to the greatest eigenvalue, such that \f$x\geq0\f$ | 
|---|
| 1120 | *    -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the | 
|---|
| 1121 | *       identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$ | 
|---|
| 1122 | *       \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$ | 
|---|
| 1123 | *    -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$ | 
|---|
| 1124 | *    -# Gather the rotations in \f$U = U R\f$ | 
|---|
| 1125 | * -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$ | 
|---|
| 1126 | * -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3. | 
|---|
| 1127 | * -# Apply transformations to the orbital wavefunctions \f$ | \phi_i \rangle = \sum_j U_{ij} | \phi_j \rangle\f$ | 
|---|
| 1128 | * -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in | 
|---|
| 1129 | *    OnePsiElementAddData#WannierCentre | 
|---|
| 1130 | * | 
|---|
| 1131 | * Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions. | 
|---|
| 1132 | * Note that hereby additional memory is needed as old and transformed wave functions must be present at the same | 
|---|
| 1133 | * time. | 
|---|
| 1134 | * | 
|---|
| 1135 | * The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index | 
|---|
| 1136 | * generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices | 
|---|
| 1137 | * may be diagonalized simultaneously and parallely. We use the implementation explained in | 
|---|
| 1138 | * [Golub, Matrix computations, 1989, p451]. | 
|---|
| 1139 | * | 
|---|
| 1140 | * \param *P Problem at hand | 
|---|
| 1141 | */ | 
|---|
| 1142 | void ComputeMLWF(struct Problem *P) { | 
|---|
| 1143 | // variables and allocation | 
|---|
| 1144 | struct Lattice *Lat = &P->Lat; | 
|---|
| 1145 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 1146 | int i; | 
|---|
| 1147 | int Num = Psi->NoOfPsis;  // is number of occupied plus unoccupied states for rows | 
|---|
| 1148 | double **WannierCentre; | 
|---|
| 1149 | double *WannierSpread; | 
|---|
| 1150 | double spread, spreadSQ; | 
|---|
| 1151 | struct DiagonalizationData DiagData; | 
|---|
| 1152 |  | 
|---|
| 1153 | if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me); | 
|---|
| 1154 |  | 
|---|
| 1155 | InitDiagonalization(P, &DiagData, Num, max_operators, 1); | 
|---|
| 1156 |  | 
|---|
| 1157 | // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1)) | 
|---|
| 1158 | //debug (P,"Calculatung Variance matrices"); | 
|---|
| 1159 | FillHigherOrderRealMomentsMatrices(P, DiagData.AllocNum, DiagData.A); | 
|---|
| 1160 |  | 
|---|
| 1161 | //debug (P,"Diagonalizing"); | 
|---|
| 1162 | Diagonalize(P, &DiagData); | 
|---|
| 1163 |  | 
|---|
| 1164 | // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle | 
|---|
| 1165 | //debug (P,"Performing Unitary transformation"); | 
|---|
| 1166 | UnitaryTransformationOnWavefunctions(P, DiagData.U, Num); | 
|---|
| 1167 |  | 
|---|
| 1168 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7: Compute centres and spread printout\n",P->Par.me); | 
|---|
| 1169 | WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ComputeMLWF: *WannierCentre"); | 
|---|
| 1170 | WannierSpread = (double *) Malloc(sizeof(double)*Num, "ComputeMLWF: WannierSpread"); | 
|---|
| 1171 | for (i=0;i<Num;i++) | 
|---|
| 1172 | WannierCentre[i] = (double *) Malloc(sizeof(double)*NDIM, "ComputeMLWF: WannierCentre"); | 
|---|
| 1173 |  | 
|---|
| 1174 | //debug (P,"Computing Wannier centres from variance matrix"); | 
|---|
| 1175 | ComputeWannierCentresfromVarianceMatrices(P, DiagData.A, &spread, &spreadSQ, WannierCentre, WannierSpread); | 
|---|
| 1176 |  | 
|---|
| 1177 | // join Wannier orbital to groups with common centres under certain conditions | 
|---|
| 1178 | //debug (P,"Changing Wannier Centres according to CommonWannier"); | 
|---|
| 1179 | ChangeWannierCentres(P, Num, WannierCentre, WannierSpread); | 
|---|
| 1180 |  | 
|---|
| 1181 | // write to file | 
|---|
| 1182 | //debug (P,"Writing spread file"); | 
|---|
| 1183 | WriteWannierFile(P, spread, spreadSQ, WannierCentre, WannierSpread); | 
|---|
| 1184 |  | 
|---|
| 1185 | debug(P,"Free'ing memory"); | 
|---|
| 1186 | // free all remaining memory | 
|---|
| 1187 | FreeDiagonalization(&DiagData); | 
|---|
| 1188 | for (i=0;i<Num;i++) | 
|---|
| 1189 | Free(WannierCentre[i], "ComputeMLWF: WannierCentre[i]"); | 
|---|
| 1190 | Free(WannierCentre, "ComputeMLWF: WannierCentre"); | 
|---|
| 1191 | Free(WannierSpread, "ComputeMLWF: WannierSpread"); | 
|---|
| 1192 | } | 
|---|
| 1193 |  | 
|---|
| 1194 | /** Solves directly for eigenvectors and eigenvalues of a real 2x2 matrix. | 
|---|
| 1195 | * The eigenvalues are determined by \f$\det{(A-\lambda \cdot I)}\f$, where \f$I\f$ is the unit matrix and | 
|---|
| 1196 | * \f$\lambda\f$ an eigenvalue. This leads to a pq-formula which is easily evaluated in the first part. | 
|---|
| 1197 | * | 
|---|
| 1198 | * The eigenvectors are then obtained by solving \f$A-\lambda \cdot I)x = 0\f$ for a given eigenvalue | 
|---|
| 1199 | * \f$\lambda\f$. However, the eigenvector magnitudes are not specified, thus we the equation system is | 
|---|
| 1200 | * still lacking such an equation. We use this fact to set either coordinate arbitrarily to 1 and then | 
|---|
| 1201 | * derive the other by solving the equation system. Finally, the eigenvector is normalized. | 
|---|
| 1202 | * \param *P Problem at hand | 
|---|
| 1203 | * \param *A matrix whose eigenvalues/-vectors are to be found | 
|---|
| 1204 | * \param *eval vector with eigenvalues | 
|---|
| 1205 | * \param *evec matrix with corresponding eigenvectors in columns | 
|---|
| 1206 | */ | 
|---|
| 1207 | #ifdef HAVE_INLINE | 
|---|
| 1208 | inline void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec) | 
|---|
| 1209 | #else | 
|---|
| 1210 | void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec) | 
|---|
| 1211 | #endif | 
|---|
| 1212 | { | 
|---|
| 1213 | double a11,a12,a21,a22; | 
|---|
| 1214 | double ev[2], summand1, summand2, norm; | 
|---|
| 1215 | int i; | 
|---|
| 1216 | // find eigenvalues | 
|---|
| 1217 | a11 = gsl_matrix_get(A,0,0); | 
|---|
| 1218 | a12 = gsl_matrix_get(A,0,1); | 
|---|
| 1219 | a21 = gsl_matrix_get(A,1,0); | 
|---|
| 1220 | a22 = gsl_matrix_get(A,1,1); | 
|---|
| 1221 | summand1 = (a11+a22)/2.; | 
|---|
| 1222 | summand2 = sqrt(summand1*summand1 + a12*a21 - a11*a22); | 
|---|
| 1223 | ev[0] = summand1 + summand2; | 
|---|
| 1224 | ev[1] = summand1 - summand2; | 
|---|
| 1225 | gsl_vector_set(eval, 0, ev[0]); | 
|---|
| 1226 | gsl_vector_set(eval, 1, ev[1]); | 
|---|
| 1227 | //fprintf (stderr,"(%i) ev1 %lg \t ev2 %lg\n", P->Par.me, ev1, ev2); | 
|---|
| 1228 |  | 
|---|
| 1229 | // find eigenvectors | 
|---|
| 1230 | for(i=0;i<2;i++) { | 
|---|
| 1231 | if (fabs(ev[i]) < MYEPSILON) { | 
|---|
| 1232 | gsl_matrix_set(evec, 0,i, 0.); | 
|---|
| 1233 | gsl_matrix_set(evec, 1,i, 0.); | 
|---|
| 1234 | } else if (fabs(a22-ev[i]) > MYEPSILON) { | 
|---|
| 1235 | norm = sqrt(1*1 + (a21*a21/(a22-ev[i])/(a22-ev[i]))); | 
|---|
| 1236 | gsl_matrix_set(evec, 0,i, 1./norm); | 
|---|
| 1237 | gsl_matrix_set(evec, 1,i, -(a21/(a22-ev[i]))/norm); | 
|---|
| 1238 | //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm); | 
|---|
| 1239 | } else if (fabs(a12) > MYEPSILON) { | 
|---|
| 1240 | norm = sqrt(1*1 + (a11-ev[i])*(a11-ev[i])/(a12*a12)); | 
|---|
| 1241 | gsl_matrix_set(evec, 0,i, 1./norm); | 
|---|
| 1242 | gsl_matrix_set(evec, 1,i, -((a11-ev[i])/a12)/norm); | 
|---|
| 1243 | //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm); | 
|---|
| 1244 | } else { | 
|---|
| 1245 | if (fabs(a11-ev[i]) > MYEPSILON) { | 
|---|
| 1246 | norm = sqrt(1*1 + (a12*a12/(a11-ev[i])/(a11-ev[i]))); | 
|---|
| 1247 | gsl_matrix_set(evec, 0,i, -(a12/(a11-ev[i]))/norm); | 
|---|
| 1248 | gsl_matrix_set(evec, 1,i, 1./norm); | 
|---|
| 1249 | //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm); | 
|---|
| 1250 | } else if (fabs(a21) > MYEPSILON) { | 
|---|
| 1251 | norm = sqrt(1*1 + (a22-ev[i])*(a22-ev[i])/(a21*a21)); | 
|---|
| 1252 | gsl_matrix_set(evec, 0,i, -(a22-ev[i])/a21/norm); | 
|---|
| 1253 | gsl_matrix_set(evec, 1,i, 1./norm); | 
|---|
| 1254 | //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm); | 
|---|
| 1255 | } else { | 
|---|
| 1256 | //gsl_matrix_set(evec, 0,i, 0.); | 
|---|
| 1257 | //gsl_matrix_set(evec, 1,i, 1.); | 
|---|
| 1258 | fprintf (stderr,"\t evec %i undetermined\n", i); | 
|---|
| 1259 | } | 
|---|
| 1260 | //gsl_matrix_set(evec, 0,0, 1.); | 
|---|
| 1261 | //gsl_matrix_set(evec, 1,0, 0.); | 
|---|
| 1262 | //fprintf (stderr,"(%i) evec1 undetermined", P->Par.me); | 
|---|
| 1263 | } | 
|---|
| 1264 | } | 
|---|
| 1265 | } | 
|---|
| 1266 |  | 
|---|
| 1267 | /** Calculates sine and cosine values for multiple matrix diagonalization. | 
|---|
| 1268 | * \param *evec eigenvectors in columns | 
|---|
| 1269 | * \param *eval corresponding eigenvalues | 
|---|
| 1270 | * \param *c cosine to be returned | 
|---|
| 1271 | * \param *s sine to be returned | 
|---|
| 1272 | */ | 
|---|
| 1273 | #ifdef HAVE_INLINE | 
|---|
| 1274 | inline void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s) | 
|---|
| 1275 | #else | 
|---|
| 1276 | void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s) | 
|---|
| 1277 | #endif | 
|---|
| 1278 | { | 
|---|
| 1279 | int index; | 
|---|
| 1280 | double x,y,r; | 
|---|
| 1281 |  | 
|---|
| 1282 | index = gsl_vector_max_index (eval);  // get biggest eigenvalue | 
|---|
| 1283 | //fprintf(stderr,"\t1st: %lg\t2nd: %lg ---  biggest: %i\n", gsl_vector_get(eval, 0), gsl_vector_get(eval, 1), index); | 
|---|
| 1284 | x = gsl_matrix_get(evec, 0, index); | 
|---|
| 1285 | y = gsl_matrix_get(evec, 1, index); | 
|---|
| 1286 | if (x < 0) {  // ensure x>=0 so that rotation angles remain smaller Pi/4 | 
|---|
| 1287 | y = -y; | 
|---|
| 1288 | x = -x; | 
|---|
| 1289 | } | 
|---|
| 1290 | //z = gsl_matrix_get(evec, 2, index) * x/fabs(x); | 
|---|
| 1291 | //fprintf(stderr,"\tx %lg\ty %lg\n", x,y); | 
|---|
| 1292 |  | 
|---|
| 1293 | //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j); | 
|---|
| 1294 | // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]] | 
|---|
| 1295 | r = sqrt(x*x + y*y); // + z*z); | 
|---|
| 1296 | if (fabs(r) > MYEPSILON) { | 
|---|
| 1297 | *c = sqrt((x + r) / (2.*r)); | 
|---|
| 1298 | *s = y / sqrt(2.*r*(x+r)); //, -z / sqrt(2*r*(x+r))); | 
|---|
| 1299 | } else { | 
|---|
| 1300 | *c = 1.; | 
|---|
| 1301 | *s = 0.; | 
|---|
| 1302 | } | 
|---|
| 1303 | } | 
|---|
| 1304 |  | 
|---|
| 1305 | /* | 
|---|
| 1306 | * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized | 
|---|
| 1307 | * \param *U transformation matrix set to unity matrix | 
|---|
| 1308 | * \param NumMatrices number of matrices to be diagonalized simultaneously | 
|---|
| 1309 | * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A) | 
|---|
| 1310 | * \param AllocNum number of rows/columns in matrices | 
|---|
| 1311 | * \param Num number of wave functions | 
|---|
| 1312 | * \param ProcRank index in group for this cpu | 
|---|
| 1313 | * \param ProcNum number of cpus in group | 
|---|
| 1314 | * \param *top array with top row indices (merry-go-round) | 
|---|
| 1315 | * \param *bot array with bottom row indices (merry-go-round) | 
|---|
| 1316 | */ | 
|---|
| 1317 |  | 
|---|
| 1318 | /** Simultaneous diagonalization of matrices with multiple cpus. | 
|---|
| 1319 | * \param *P Problem at hand | 
|---|
| 1320 | * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization | 
|---|
| 1321 | * \note this is slower given one cpu only than SerialDiagonalization() | 
|---|
| 1322 | */ | 
|---|
| 1323 | void ParallelDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData) | 
|---|
| 1324 | { | 
|---|
| 1325 | struct RunStruct *R =&P->R; | 
|---|
| 1326 | int tagR0, tagR1, tagS0, tagS1; | 
|---|
| 1327 | int iloc, jloc; | 
|---|
| 1328 | double *s_all, *c_all; | 
|---|
| 1329 | int round, max_rounds; | 
|---|
| 1330 | int start; | 
|---|
| 1331 | int *rcounts, *rdispls; | 
|---|
| 1332 | double *c, *s; | 
|---|
| 1333 | int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from | 
|---|
| 1334 | int left, right; // left or right neighbour for process | 
|---|
| 1335 | double spread = 0., old_spread=0., Spread=0.; | 
|---|
| 1336 | int i,j,k,l,m,u; | 
|---|
| 1337 | int set; | 
|---|
| 1338 | int it_steps; // iteration step counter | 
|---|
| 1339 | double **Aloc[DiagData->NumMatrices+1], **Uloc; // local columns for one step of A[k] | 
|---|
| 1340 | double *Around[DiagData->NumMatrices+1], *Uround; // all local columns for one round of A[k] | 
|---|
| 1341 | double *Atotal[DiagData->NumMatrices+1], *Utotal; // all local columns for one round of A[k] | 
|---|
| 1342 | double a_i, a_j; | 
|---|
| 1343 | gsl_matrix *G; | 
|---|
| 1344 | gsl_vector *h; | 
|---|
| 1345 | gsl_vector *eval; | 
|---|
| 1346 | gsl_matrix *evec; | 
|---|
| 1347 | //gsl_eigen_symmv_workspace *w; | 
|---|
| 1348 |  | 
|---|
| 1349 | max_rounds = (DiagData->AllocNum / 2)/DiagData->ProcNum;  // each process must perform multiple rotations per step of a set | 
|---|
| 1350 | if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, DiagData->ProcRank, DiagData->ProcNum, max_rounds); | 
|---|
| 1351 |  | 
|---|
| 1352 | // allocate column vectors for interchange of columns | 
|---|
| 1353 | debug(P,"allocate column vectors for interchange of columns"); | 
|---|
| 1354 | c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c"); | 
|---|
| 1355 | s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s"); | 
|---|
| 1356 | c_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: c_all"); | 
|---|
| 1357 | s_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: s_all"); | 
|---|
| 1358 | rcounts = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rcounts"); | 
|---|
| 1359 | rdispls = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rdispls"); | 
|---|
| 1360 |  | 
|---|
| 1361 | // allocate eigenvector stuff | 
|---|
| 1362 | debug(P,"allocate eigenvector stuff"); | 
|---|
| 1363 | G = gsl_matrix_calloc (2,2); | 
|---|
| 1364 | h = gsl_vector_alloc (2); | 
|---|
| 1365 | eval = gsl_vector_alloc (2); | 
|---|
| 1366 | evec = gsl_matrix_alloc (2,2); | 
|---|
| 1367 | //w = gsl_eigen_symmv_alloc(2); | 
|---|
| 1368 |  | 
|---|
| 1369 | // establish communication partners | 
|---|
| 1370 | debug(P,"establish communication partners"); | 
|---|
| 1371 | if (DiagData->ProcRank == 0) { | 
|---|
| 1372 | tagS0 = WannierALTag;   // left p0 always remains left p0 | 
|---|
| 1373 | } else { | 
|---|
| 1374 | tagS0 = (DiagData->ProcRank == DiagData->ProcNum - 1) ? WannierARTag : WannierALTag; // left p_last becomes right p_last | 
|---|
| 1375 | } | 
|---|
| 1376 | tagS1 = (DiagData->ProcRank == 0) ? WannierALTag : WannierARTag; // right p0 always goes into left p1 | 
|---|
| 1377 | tagR0 = WannierALTag; // | 
|---|
| 1378 | tagR1 = WannierARTag; // first process | 
|---|
| 1379 | if (DiagData->ProcRank == 0) { | 
|---|
| 1380 | left = DiagData->ProcNum-1; | 
|---|
| 1381 | right = 1; | 
|---|
| 1382 | Lsend = 0; | 
|---|
| 1383 | Rsend = 1; | 
|---|
| 1384 | Lrecv = 0; | 
|---|
| 1385 | Rrecv = 1; | 
|---|
| 1386 | } else if (DiagData->ProcRank == DiagData->ProcNum - 1) { | 
|---|
| 1387 | left = DiagData->ProcRank - 1; | 
|---|
| 1388 | right = 0; | 
|---|
| 1389 | Lsend = DiagData->ProcRank; | 
|---|
| 1390 | Rsend = DiagData->ProcRank - 1; | 
|---|
| 1391 | Lrecv = DiagData->ProcRank - 1; | 
|---|
| 1392 | Rrecv = DiagData->ProcRank; | 
|---|
| 1393 | } else { | 
|---|
| 1394 | left = DiagData->ProcRank - 1; | 
|---|
| 1395 | right = DiagData->ProcRank + 1; | 
|---|
| 1396 | Lsend = DiagData->ProcRank+1; | 
|---|
| 1397 | Rsend = DiagData->ProcRank - 1; | 
|---|
| 1398 | Lrecv = DiagData->ProcRank - 1; | 
|---|
| 1399 | Rrecv = DiagData->ProcRank+1; | 
|---|
| 1400 | } | 
|---|
| 1401 | //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv); | 
|---|
| 1402 |  | 
|---|
| 1403 | // initialise A_loc | 
|---|
| 1404 | debug(P,"initialise A_loc"); | 
|---|
| 1405 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) { | 
|---|
| 1406 | //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]"); | 
|---|
| 1407 | Around[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Around[k]"); | 
|---|
| 1408 | Atotal[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Atotal[k]"); | 
|---|
| 1409 | Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]"); | 
|---|
| 1410 | //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds]; | 
|---|
| 1411 |  | 
|---|
| 1412 | for (round=0;round<max_rounds;round++) { | 
|---|
| 1413 | Aloc[k][2*round] = &Around[k][DiagData->AllocNum*(2*round)]; | 
|---|
| 1414 | Aloc[k][2*round+1] = &Around[k][DiagData->AllocNum*(2*round+1)]; | 
|---|
| 1415 | for (l=0;l<DiagData->AllocNum;l++) { | 
|---|
| 1416 | Aloc[k][2*round][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)); | 
|---|
| 1417 | Aloc[k][2*round+1][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)+1); | 
|---|
| 1418 | //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]); | 
|---|
| 1419 | } | 
|---|
| 1420 | } | 
|---|
| 1421 | } | 
|---|
| 1422 | // initialise U_loc | 
|---|
| 1423 | debug(P,"initialise U_loc"); | 
|---|
| 1424 | //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc"); | 
|---|
| 1425 | Uround = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Uround"); | 
|---|
| 1426 | Utotal = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Utotal"); | 
|---|
| 1427 | Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc"); | 
|---|
| 1428 | //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds]; | 
|---|
| 1429 | for (round=0;round<max_rounds;round++) { | 
|---|
| 1430 | Uloc[2*round] = &Uround[DiagData->AllocNum*(2*round)]; | 
|---|
| 1431 | Uloc[2*round+1] = &Uround[DiagData->AllocNum*(2*round+1)]; | 
|---|
| 1432 | for (l=0;l<DiagData->AllocNum;l++) { | 
|---|
| 1433 | Uloc[2*round][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)); | 
|---|
| 1434 | Uloc[2*round+1][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)+1); | 
|---|
| 1435 | //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]); | 
|---|
| 1436 | } | 
|---|
| 1437 | } | 
|---|
| 1438 |  | 
|---|
| 1439 | // now comes the iteration loop | 
|---|
| 1440 | debug(P,"now comes the iteration loop"); | 
|---|
| 1441 | it_steps = 0; | 
|---|
| 1442 | do { | 
|---|
| 1443 | it_steps++; | 
|---|
| 1444 | //if (P->Par.me == 0) fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps); | 
|---|
| 1445 | for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 staying at its place all the time | 
|---|
| 1446 | //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); | 
|---|
| 1447 | for (round = 0; round < max_rounds;round++) { | 
|---|
| 1448 | start = DiagData->ProcRank * max_rounds + round; | 
|---|
| 1449 | // get indices | 
|---|
| 1450 | i = DiagData->top[start] < DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // minimum of the two indices | 
|---|
| 1451 | iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1; | 
|---|
| 1452 | j = DiagData->top[start] > DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // maximum of the two indices: thus j >  i | 
|---|
| 1453 | jloc =  DiagData->top[start] > DiagData->bot[start] ? 0 : 1; | 
|---|
| 1454 | //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc); | 
|---|
| 1455 |  | 
|---|
| 1456 | // calculate rotation angle, i.e. c and s | 
|---|
| 1457 | //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j); | 
|---|
| 1458 | gsl_matrix_set_zero(G); | 
|---|
| 1459 | for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ... | 
|---|
| 1460 | // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] | 
|---|
| 1461 | //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); | 
|---|
| 1462 | //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); | 
|---|
| 1463 | gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); | 
|---|
| 1464 | gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); | 
|---|
| 1465 |  | 
|---|
| 1466 | // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] | 
|---|
| 1467 | for (l=0;l<2;l++) | 
|---|
| 1468 | for (m=0;m<2;m++) | 
|---|
| 1469 | gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); | 
|---|
| 1470 | } | 
|---|
| 1471 | //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); | 
|---|
| 1472 | // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G | 
|---|
| 1473 | EigensolverFor22Matrix(P,G,eval,evec); | 
|---|
| 1474 | //gsl_eigen_symmv(G, eval, evec, w);  // calculates eigenvalues and eigenvectors of G | 
|---|
| 1475 |  | 
|---|
| 1476 | CalculateRotationAnglesFromEigenvalues(evec, eval, &c[round], &s[round]); | 
|---|
| 1477 | //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]); | 
|---|
| 1478 |  | 
|---|
| 1479 | //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); | 
|---|
| 1480 | // V_loc = V_loc * V_small | 
|---|
| 1481 | //debug(P,"apply rotation to local U"); | 
|---|
| 1482 | for (l=0;l<DiagData->AllocNum;l++) { | 
|---|
| 1483 | a_i = Uloc[2*round+iloc][l]; | 
|---|
| 1484 | a_j = Uloc[2*round+jloc][l]; | 
|---|
| 1485 | Uloc[2*round+iloc][l] =  c[round] * a_i + s[round] * a_j; | 
|---|
| 1486 | Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j; | 
|---|
| 1487 | } | 
|---|
| 1488 | }  // end of round | 
|---|
| 1489 | // circulate the rotation angles | 
|---|
| 1490 | //debug(P,"circulate the rotation angles"); | 
|---|
| 1491 | MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *(DiagData->comm));   // MPI_Allgather is waaaaay faster than ring circulation | 
|---|
| 1492 | MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); | 
|---|
| 1493 | //m = start; | 
|---|
| 1494 | for (l=0;l<DiagData->AllocNum/2;l++) { // for each process | 
|---|
| 1495 | // we have V_small from process k | 
|---|
| 1496 | //debug(P,"Apply V_small from other process"); | 
|---|
| 1497 | i = DiagData->top[l] < DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // minimum of the two indices | 
|---|
| 1498 | j = DiagData->top[l] > DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // maximum of the two indices: thus j >  i | 
|---|
| 1499 | iloc = DiagData->top[l] < DiagData->bot[l] ? 0 : 1; | 
|---|
| 1500 | jloc = DiagData->top[l] > DiagData->bot[l] ? 0 : 1; | 
|---|
| 1501 | for (m=0;m<max_rounds;m++) { | 
|---|
| 1502 | //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j); | 
|---|
| 1503 | // apply row rotation to each A[k] | 
|---|
| 1504 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix ! | 
|---|
| 1505 | //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); | 
|---|
| 1506 | //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); | 
|---|
| 1507 | a_i = Aloc[k][2*m+iloc][i]; | 
|---|
| 1508 | a_j = Aloc[k][2*m+iloc][j]; | 
|---|
| 1509 | Aloc[k][2*m+iloc][i] =  c_all[l] * a_i + s_all[l] * a_j; | 
|---|
| 1510 | Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j; | 
|---|
| 1511 | a_i = Aloc[k][2*m+jloc][i]; | 
|---|
| 1512 | a_j = Aloc[k][2*m+jloc][j]; | 
|---|
| 1513 | Aloc[k][2*m+jloc][i] =  c_all[l] * a_i + s_all[l] * a_j; | 
|---|
| 1514 | Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j; | 
|---|
| 1515 | //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); | 
|---|
| 1516 | //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); | 
|---|
| 1517 | } | 
|---|
| 1518 | } | 
|---|
| 1519 | } | 
|---|
| 1520 | // apply rotation to local operator matrices | 
|---|
| 1521 | // A_loc = A_loc * V_small | 
|---|
| 1522 | //debug(P,"apply rotation to local operator matrices A[k]"); | 
|---|
| 1523 | for (m=0;m<max_rounds;m++) { | 
|---|
| 1524 | start = DiagData->ProcRank * max_rounds + m; | 
|---|
| 1525 | iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1; | 
|---|
| 1526 | jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1; | 
|---|
| 1527 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// extra for B matrix ! | 
|---|
| 1528 | for (l=0;l<DiagData->AllocNum;l++) { | 
|---|
| 1529 | // Columns, i and j belong to this process only! | 
|---|
| 1530 | a_i = Aloc[k][2*m+iloc][l]; | 
|---|
| 1531 | a_j = Aloc[k][2*m+jloc][l]; | 
|---|
| 1532 | Aloc[k][2*m+iloc][l] =  c[m] * a_i + s[m] * a_j; | 
|---|
| 1533 | Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j; | 
|---|
| 1534 | //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]); | 
|---|
| 1535 | } | 
|---|
| 1536 | } | 
|---|
| 1537 | } | 
|---|
| 1538 | // Shuffling of these round's columns to prepare next rotation set | 
|---|
| 1539 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix ! | 
|---|
| 1540 | // extract columns from A | 
|---|
| 1541 | //debug(P,"extract columns from A"); | 
|---|
| 1542 | MerryGoRoundColumns(*(DiagData->comm), Aloc[k], DiagData->AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1); | 
|---|
| 1543 |  | 
|---|
| 1544 | } | 
|---|
| 1545 | // and also for V ... | 
|---|
| 1546 | //debug(P,"extract columns from U"); | 
|---|
| 1547 | MerryGoRoundColumns(*(DiagData->comm), Uloc, DiagData->AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1); | 
|---|
| 1548 |  | 
|---|
| 1549 |  | 
|---|
| 1550 | // and merry-go-round for the indices too | 
|---|
| 1551 | //debug(P,"and merry-go-round for the indices too"); | 
|---|
| 1552 | MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2); | 
|---|
| 1553 | } | 
|---|
| 1554 |  | 
|---|
| 1555 | //fprintf(stderr,"(%i) STEP 4\n",P->Par.me); | 
|---|
| 1556 | // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 | 
|---|
| 1557 | old_spread = Spread; | 
|---|
| 1558 | spread = 0.; | 
|---|
| 1559 | for(k=0;k<DiagData->NumMatrices;k++) {   // go through all self-adjoint operators | 
|---|
| 1560 | for (i=0; i < 2*max_rounds; i++) {  // go through all wave functions | 
|---|
| 1561 | spread += Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]*Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]; | 
|---|
| 1562 | //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i); | 
|---|
| 1563 | } | 
|---|
| 1564 | } | 
|---|
| 1565 | MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *(DiagData->comm)); | 
|---|
| 1566 | //Spread = spread; | 
|---|
| 1567 | if (P->Par.me == 0) { | 
|---|
| 1568 | //if(P->Call.out[ReadOut]) | 
|---|
| 1569 | //  fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier); | 
|---|
| 1570 | //else | 
|---|
| 1571 | //fprintf(stderr,"%2.9e\n",Spread); | 
|---|
| 1572 | } | 
|---|
| 1573 | // STEP 5: check change of variance | 
|---|
| 1574 | } while (fabs(old_spread-Spread) >= R->EpsWannier); | 
|---|
| 1575 | // end of iterative diagonalization loop: We have found our final orthogonal U! | 
|---|
| 1576 |  | 
|---|
| 1577 | // gather local parts of U into complete matrix | 
|---|
| 1578 | for (l=0;l<DiagData->ProcNum;l++) | 
|---|
| 1579 | rcounts[l] = DiagData->AllocNum; | 
|---|
| 1580 | debug(P,"allgather U"); | 
|---|
| 1581 | for (round=0;round<2*max_rounds;round++) { | 
|---|
| 1582 | for (l=0;l<DiagData->ProcNum;l++) | 
|---|
| 1583 | rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum; | 
|---|
| 1584 | MPI_Allgatherv(Uloc[round], DiagData->AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm)); | 
|---|
| 1585 | } | 
|---|
| 1586 | for (k=0;k<DiagData->AllocNum;k++) { | 
|---|
| 1587 | for(l=0;l<DiagData->AllocNum;l++) { | 
|---|
| 1588 | gsl_matrix_set(DiagData->U,k,l, Utotal[l+k*DiagData->AllocNum]); | 
|---|
| 1589 | } | 
|---|
| 1590 | } | 
|---|
| 1591 |  | 
|---|
| 1592 | // after one set, gather A[k] from all and calculate spread | 
|---|
| 1593 | for (l=0;l<DiagData->ProcNum;l++) | 
|---|
| 1594 | rcounts[l] = DiagData->AllocNum; | 
|---|
| 1595 | debug(P,"gather A[k] for spread"); | 
|---|
| 1596 | for (u=0;u<DiagData->NumMatrices+DiagData->extra;u++) {// extra for B matrix ! | 
|---|
| 1597 | debug(P,"A[k] all gather"); | 
|---|
| 1598 | for (round=0;round<2*max_rounds;round++) { | 
|---|
| 1599 | for (l=0;l<DiagData->ProcNum;l++) | 
|---|
| 1600 | rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum; | 
|---|
| 1601 | MPI_Allgatherv(Aloc[u][round], DiagData->AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm)); | 
|---|
| 1602 | } | 
|---|
| 1603 | for (k=0;k<DiagData->AllocNum;k++) { | 
|---|
| 1604 | for(l=0;l<DiagData->AllocNum;l++) { | 
|---|
| 1605 | gsl_matrix_set(DiagData->A[u],k,l, Atotal[u][l+k*DiagData->AllocNum]); | 
|---|
| 1606 | } | 
|---|
| 1607 | } | 
|---|
| 1608 | } | 
|---|
| 1609 |  | 
|---|
| 1610 | // free eigenvector stuff | 
|---|
| 1611 | gsl_vector_free(h); | 
|---|
| 1612 | gsl_matrix_free(G); | 
|---|
| 1613 | //gsl_eigen_symmv_free(w); | 
|---|
| 1614 | gsl_vector_free(eval); | 
|---|
| 1615 | gsl_matrix_free(evec); | 
|---|
| 1616 |  | 
|---|
| 1617 | // Free column vectors | 
|---|
| 1618 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) { | 
|---|
| 1619 | Free(Atotal[k], "ParallelDiagonalization: Atotal[k]"); | 
|---|
| 1620 | Free(Around[k], "ParallelDiagonalization: Around[k]"); | 
|---|
| 1621 | } | 
|---|
| 1622 | Free(Uround, "ParallelDiagonalization: Uround"); | 
|---|
| 1623 | Free(Utotal, "ParallelDiagonalization: Utotal"); | 
|---|
| 1624 | Free(c_all, "ParallelDiagonalization: c_all"); | 
|---|
| 1625 | Free(s_all, "ParallelDiagonalization: s_all"); | 
|---|
| 1626 | Free(c, "ParallelDiagonalization: c"); | 
|---|
| 1627 | Free(s, "ParallelDiagonalization: s"); | 
|---|
| 1628 | Free(rcounts, "ParallelDiagonalization: rcounts"); | 
|---|
| 1629 | Free(rdispls, "ParallelDiagonalization: rdispls"); | 
|---|
| 1630 | } | 
|---|
| 1631 | /* | 
|---|
| 1632 | * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized | 
|---|
| 1633 | * \param *U transformation matrix set to unity matrix | 
|---|
| 1634 | * \param NumMatrices number of matrices to be diagonalized | 
|---|
| 1635 | * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A) | 
|---|
| 1636 | * \param AllocNum number of rows/columns in matrices | 
|---|
| 1637 | * \param Num number of wave functions | 
|---|
| 1638 | * \param *top array with top row indices (merry-go-round) | 
|---|
| 1639 | * \param *bot array with bottom row indices (merry-go-round) | 
|---|
| 1640 | */ | 
|---|
| 1641 |  | 
|---|
| 1642 | /** Simultaneous Diagonalization of variances matrices with one cpu. | 
|---|
| 1643 | * \param *P Problem at hand | 
|---|
| 1644 | * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization | 
|---|
| 1645 | * \note this is faster given one cpu only than ParallelDiagonalization() | 
|---|
| 1646 | */ | 
|---|
| 1647 | void SerialDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData) | 
|---|
| 1648 | { | 
|---|
| 1649 | struct RunStruct *R = &P->R; | 
|---|
| 1650 | gsl_matrix *G; | 
|---|
| 1651 | gsl_vector *h; | 
|---|
| 1652 | gsl_vector *eval; | 
|---|
| 1653 | gsl_matrix *evec; | 
|---|
| 1654 | //gsl_eigen_symmv_workspace *w; | 
|---|
| 1655 | double *c,*s,a_i,a_j; | 
|---|
| 1656 | int it_steps, set, ProcRank; | 
|---|
| 1657 | int i,j,k,l,m; | 
|---|
| 1658 | double spread = 0., old_spread = 0.;\ | 
|---|
| 1659 |  | 
|---|
| 1660 | // allocate eigenvector stuff | 
|---|
| 1661 | debug(P,"allocate eigenvector stuff"); | 
|---|
| 1662 | G = gsl_matrix_calloc (2,2); | 
|---|
| 1663 | h = gsl_vector_alloc (2); | 
|---|
| 1664 | eval = gsl_vector_alloc (2); | 
|---|
| 1665 | evec = gsl_matrix_alloc (2,2); | 
|---|
| 1666 | //w = gsl_eigen_symmv_alloc(2); | 
|---|
| 1667 |  | 
|---|
| 1668 | c = (double *) Malloc(sizeof(double), "ComputeMLWF: c"); | 
|---|
| 1669 | s = (double *) Malloc(sizeof(double), "ComputeMLWF: s"); | 
|---|
| 1670 | debug(P,"now comes the iteration loop"); | 
|---|
| 1671 | it_steps=0; | 
|---|
| 1672 | do { | 
|---|
| 1673 | it_steps++; | 
|---|
| 1674 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me); | 
|---|
| 1675 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps); | 
|---|
| 1676 | for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time | 
|---|
| 1677 | //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); | 
|---|
| 1678 | // STEP 3: for all index pairs 0<= i<j <AllocNum | 
|---|
| 1679 | for (ProcRank=0;ProcRank<DiagData->AllocNum/2;ProcRank++) { | 
|---|
| 1680 | // get indices | 
|---|
| 1681 | i = DiagData->top[ProcRank] < DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // minimum of the two indices | 
|---|
| 1682 | j = DiagData->top[ProcRank] > DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // maximum of the two indices: thus j >  i | 
|---|
| 1683 | //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j); | 
|---|
| 1684 | // STEP 3a: Calculate G | 
|---|
| 1685 | gsl_matrix_set_zero(G); | 
|---|
| 1686 |  | 
|---|
| 1687 | for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ... | 
|---|
| 1688 | // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] | 
|---|
| 1689 | //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,i), gsl_matrix_get(DiagData->A[k],j,j),gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j)); | 
|---|
| 1690 | //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,j), gsl_matrix_get(DiagData->A[k],j,i),gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i)); | 
|---|
| 1691 | gsl_vector_set(h, 0, gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j)); | 
|---|
| 1692 | gsl_vector_set(h, 1, gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i)); | 
|---|
| 1693 | //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1)); | 
|---|
| 1694 |  | 
|---|
| 1695 | // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] | 
|---|
| 1696 | for (l=0;l<2;l++) | 
|---|
| 1697 | for (m=0;m<2;m++) | 
|---|
| 1698 | gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); | 
|---|
| 1699 |  | 
|---|
| 1700 | //PrintGSLMatrix(P,G,2, "G"); | 
|---|
| 1701 | } | 
|---|
| 1702 |  | 
|---|
| 1703 | //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); | 
|---|
| 1704 | // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G | 
|---|
| 1705 | EigensolverFor22Matrix(P,G,eval,evec); | 
|---|
| 1706 | //gsl_eigen_symmv(G, eval, evec, w);  // calculates eigenvalues and eigenvectors of G | 
|---|
| 1707 |  | 
|---|
| 1708 | //PrintGSLMatrix(P, evec, 2, "Eigenvectors"); | 
|---|
| 1709 | CalculateRotationAnglesFromEigenvalues(evec, eval, c, s); | 
|---|
| 1710 | //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]); | 
|---|
| 1711 |  | 
|---|
| 1712 | //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j); | 
|---|
| 1713 | // STEP 3d: apply rotation R to rows and columns of A^{(k)} | 
|---|
| 1714 | for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix ! | 
|---|
| 1715 | //PrintGSLMatrix(P,A[k],AllocNum, "A (before rot)"); | 
|---|
| 1716 | for (l=0;l<DiagData->AllocNum;l++) { | 
|---|
| 1717 | // Rows | 
|---|
| 1718 | a_i = gsl_matrix_get(DiagData->A[k],i,l); | 
|---|
| 1719 | a_j = gsl_matrix_get(DiagData->A[k],j,l); | 
|---|
| 1720 | gsl_matrix_set(DiagData->A[k], i, l,  c[0] * a_i + s[0] * a_j); | 
|---|
| 1721 | gsl_matrix_set(DiagData->A[k], j, l, -s[0] * a_i + c[0] * a_j); | 
|---|
| 1722 | } | 
|---|
| 1723 | for (l=0;l<DiagData->AllocNum;l++) { | 
|---|
| 1724 | // Columns | 
|---|
| 1725 | a_i = gsl_matrix_get(DiagData->A[k],l,i); | 
|---|
| 1726 | a_j = gsl_matrix_get(DiagData->A[k],l,j); | 
|---|
| 1727 | gsl_matrix_set(DiagData->A[k], l, i,  c[0] * a_i + s[0] * a_j); | 
|---|
| 1728 | gsl_matrix_set(DiagData->A[k], l, j, -s[0] * a_i + c[0] * a_j); | 
|---|
| 1729 | } | 
|---|
| 1730 | //PrintGSLMatrix(P,A[k],AllocNum, "A (after rot)"); | 
|---|
| 1731 | } | 
|---|
| 1732 | //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); | 
|---|
| 1733 | //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (before rot)"); | 
|---|
| 1734 | // STEP 3e: apply U = R*U | 
|---|
| 1735 | for (l=0;l<DiagData->AllocNum;l++) { | 
|---|
| 1736 | a_i = gsl_matrix_get(DiagData->U,i,l); | 
|---|
| 1737 | a_j = gsl_matrix_get(DiagData->U,j,l); | 
|---|
| 1738 | gsl_matrix_set(DiagData->U, i, l,  c[0] * a_i + s[0] * a_j); | 
|---|
| 1739 | gsl_matrix_set(DiagData->U, j, l, -s[0] * a_i + c[0] * a_j); | 
|---|
| 1740 | } | 
|---|
| 1741 | //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (after rot)"); | 
|---|
| 1742 | } | 
|---|
| 1743 | // and merry-go-round for the indices too | 
|---|
| 1744 | //debug(P,"and merry-go-round for the indices too"); | 
|---|
| 1745 | if (DiagData->AllocNum > 2) MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2); | 
|---|
| 1746 | } | 
|---|
| 1747 |  | 
|---|
| 1748 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me); | 
|---|
| 1749 | // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 | 
|---|
| 1750 | old_spread = spread; | 
|---|
| 1751 | spread = 0.; | 
|---|
| 1752 | for(k=0;k<DiagData->NumMatrices;k++) {   // go through all self-adjoint operators | 
|---|
| 1753 | for (i=0; i < DiagData->AllocNum; i++) {  // go through all wave functions | 
|---|
| 1754 | spread += pow(gsl_matrix_get(DiagData->A[k],i,i),2); | 
|---|
| 1755 | } | 
|---|
| 1756 | } | 
|---|
| 1757 | if(P->Par.me == 0) { | 
|---|
| 1758 | //if(P->Call.out[ReadOut]) | 
|---|
| 1759 | //  fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier); | 
|---|
| 1760 | //else | 
|---|
| 1761 | //fprintf(stderr,"%2.9e\n",spread); | 
|---|
| 1762 | } | 
|---|
| 1763 | // STEP 5: check change of variance | 
|---|
| 1764 | } while (fabs(old_spread-spread) >= R->EpsWannier); | 
|---|
| 1765 | // end of iterative diagonalization loop: We have found our final orthogonal U! | 
|---|
| 1766 | Free(c, "SerialDiagonalization: c"); | 
|---|
| 1767 | Free(s, "SerialDiagonalization: s"); | 
|---|
| 1768 | // free eigenvector stuff | 
|---|
| 1769 | gsl_vector_free(h); | 
|---|
| 1770 | gsl_matrix_free(G); | 
|---|
| 1771 | //gsl_eigen_symmv_free(w); | 
|---|
| 1772 | gsl_vector_free(eval); | 
|---|
| 1773 | gsl_matrix_free(evec); | 
|---|
| 1774 | } | 
|---|
| 1775 |  | 
|---|
| 1776 | /** Writes the wannier centres and spread to file. | 
|---|
| 1777 | * Also puts centres from array into OnePsiElementAddData structure. | 
|---|
| 1778 | * \param *P Problem at hand | 
|---|
| 1779 | * \param old_spread first term of wannier spread | 
|---|
| 1780 | * \param spread second term of wannier spread | 
|---|
| 1781 | * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres | 
|---|
| 1782 | * \param *WannierSpread array with wannier spread per wave function | 
|---|
| 1783 | */ | 
|---|
| 1784 | void WriteWannierFile(struct Problem *P, double spread, double old_spread, double **WannierCentre, double *WannierSpread) | 
|---|
| 1785 | { | 
|---|
| 1786 | struct FileData *F = &P->Files; | 
|---|
| 1787 | struct RunStruct *R = &P->R; | 
|---|
| 1788 | struct Lattice *Lat = &P->Lat; | 
|---|
| 1789 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 1790 | struct OnePsiElement *OnePsiA; | 
|---|
| 1791 | char spin[12], suffix[18]; | 
|---|
| 1792 | int i,j,l; | 
|---|
| 1793 |  | 
|---|
| 1794 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me); | 
|---|
| 1795 |  | 
|---|
| 1796 | switch (Lat->Psi.PsiST) { | 
|---|
| 1797 | case SpinDouble: | 
|---|
| 1798 | strcpy(suffix,".spread.csv"); | 
|---|
| 1799 | strcpy(spin,"SpinDouble"); | 
|---|
| 1800 | break; | 
|---|
| 1801 | case SpinUp: | 
|---|
| 1802 | strcpy(suffix,".spread_up.csv"); | 
|---|
| 1803 | strcpy(spin,"SpinUp"); | 
|---|
| 1804 | break; | 
|---|
| 1805 | case SpinDown: | 
|---|
| 1806 | strcpy(suffix,".spread_down.csv"); | 
|---|
| 1807 | strcpy(spin,"SpinDown"); | 
|---|
| 1808 | break; | 
|---|
| 1809 | } | 
|---|
| 1810 | if (P->Par.me_comm_ST == 0) { | 
|---|
| 1811 | if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level | 
|---|
| 1812 | OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level | 
|---|
| 1813 | else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile) | 
|---|
| 1814 | OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level | 
|---|
| 1815 | if (F->SpreadFile == NULL) { | 
|---|
| 1816 | Error(SomeError,"WriteWannierFile: Error opening Wannier File!\n"); | 
|---|
| 1817 | } else { | 
|---|
| 1818 | fprintf(F->SpreadFile,"#===== W A N N I E R  C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin); | 
|---|
| 1819 | fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n"); | 
|---|
| 1820 | } | 
|---|
| 1821 | } | 
|---|
| 1822 |  | 
|---|
| 1823 | // put (new) WannierCentres into local ones and into file | 
|---|
| 1824 | i=-1; | 
|---|
| 1825 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions | 
|---|
| 1826 | OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA | 
|---|
| 1827 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
| 1828 | i++;  // increase l if it is occupied wave function | 
|---|
| 1829 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local? | 
|---|
| 1830 | for (j=0;j<NDIM;j++) | 
|---|
| 1831 | Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j]; | 
|---|
| 1832 | } | 
|---|
| 1833 | if (P->Par.me_comm_ST == 0) | 
|---|
| 1834 | fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]); | 
|---|
| 1835 | } | 
|---|
| 1836 | } | 
|---|
| 1837 | if (P->Par.me_comm_ST == 0) { | 
|---|
| 1838 | fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n"); | 
|---|
| 1839 | fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread); | 
|---|
| 1840 | fflush(F->SpreadFile); | 
|---|
| 1841 | } | 
|---|
| 1842 | } | 
|---|
| 1843 |  | 
|---|
| 1844 |  | 
|---|
| 1845 | /** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre. | 
|---|
| 1846 | * \param *P Problem at hand | 
|---|
| 1847 | * \return 1 - success, 0 - failure | 
|---|
| 1848 | */ | 
|---|
| 1849 | int ParseWannierFile(struct Problem *P) | 
|---|
| 1850 | { | 
|---|
| 1851 | struct Lattice *Lat = &P->Lat; | 
|---|
| 1852 | struct RunStruct *R = &P->R; | 
|---|
| 1853 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 1854 | struct OnePsiElement *OnePsiA; | 
|---|
| 1855 | int i,l,j, msglen; | 
|---|
| 1856 | FILE *SpreadFile; | 
|---|
| 1857 | char *tagname = NULL; | 
|---|
| 1858 | char suffix[18]; | 
|---|
| 1859 | double WannierCentre[NDIM+1]; // combined centre and spread | 
|---|
| 1860 | MPI_Status status; | 
|---|
| 1861 | int signal = 0; // 1 - ok, 0 - error | 
|---|
| 1862 |  | 
|---|
| 1863 | switch (Lat->Psi.PsiST) { | 
|---|
| 1864 | case SpinDouble: | 
|---|
| 1865 | strcpy(suffix,".spread.csv"); | 
|---|
| 1866 | break; | 
|---|
| 1867 | case SpinUp: | 
|---|
| 1868 | strcpy(suffix,".spread_up.csv"); | 
|---|
| 1869 | break; | 
|---|
| 1870 | case SpinDown: | 
|---|
| 1871 | strcpy(suffix,".spread_down.csv"); | 
|---|
| 1872 | break; | 
|---|
| 1873 | } | 
|---|
| 1874 | if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Parsing Wannier Centres from file ... \n", P->Par.me); | 
|---|
| 1875 |  | 
|---|
| 1876 | if (P->Par.me_comm_ST == 0) { | 
|---|
| 1877 | tagname = (char *) Malloc(sizeof(char)*MAXSTRINGSIZE, "ParseWannierFile: *tagname"); | 
|---|
| 1878 | if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists | 
|---|
| 1879 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
| 1880 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
| 1881 | return 0; | 
|---|
| 1882 | //Error(SomeError,"ParseWannierFile: Opening failed\n"); | 
|---|
| 1883 | } | 
|---|
| 1884 | signal = 1; | 
|---|
| 1885 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
| 1886 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
| 1887 | } else { | 
|---|
| 1888 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
| 1889 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
| 1890 | if (signal == 0) | 
|---|
| 1891 | return 0; | 
|---|
| 1892 | } | 
|---|
| 1893 | i=-1; | 
|---|
| 1894 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions | 
|---|
| 1895 | OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA | 
|---|
| 1896 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
| 1897 | i++;  // increase l if it is occupied wave function | 
|---|
| 1898 | if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file | 
|---|
| 1899 | sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo); | 
|---|
| 1900 | signal = 0; | 
|---|
| 1901 | if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) { | 
|---|
| 1902 | //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed"); | 
|---|
| 1903 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
| 1904 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
| 1905 | return 0; | 
|---|
| 1906 | } | 
|---|
| 1907 | if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) { | 
|---|
| 1908 | //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed"); | 
|---|
| 1909 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
| 1910 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
| 1911 | return 0; | 
|---|
| 1912 | } | 
|---|
| 1913 | signal = 1; | 
|---|
| 1914 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
| 1915 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
| 1916 | } else { | 
|---|
| 1917 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
| 1918 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
| 1919 | if (signal == 0) | 
|---|
| 1920 | return 0; | 
|---|
| 1921 | } | 
|---|
| 1922 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local? | 
|---|
| 1923 | if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em | 
|---|
| 1924 | if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS) | 
|---|
| 1925 | Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed"); | 
|---|
| 1926 | //return 0; | 
|---|
| 1927 | MPI_Get_count(&status, MPI_DOUBLE, &msglen); | 
|---|
| 1928 | if (msglen != NDIM+1) | 
|---|
| 1929 | Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count"); | 
|---|
| 1930 | //return 0; | 
|---|
| 1931 | } | 
|---|
| 1932 | if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader | 
|---|
| 1933 | Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed"); | 
|---|
| 1934 | //return 0; | 
|---|
| 1935 | // and store 'em (for all who have this Psi local) | 
|---|
| 1936 | if ((P->Par.me == 0) && P->Call.out[ValueOut]) fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]); | 
|---|
| 1937 | for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j]; | 
|---|
| 1938 | Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM]; | 
|---|
| 1939 | //if (P->Par.me == 0 && P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread); | 
|---|
| 1940 | } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group | 
|---|
| 1941 | if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS) | 
|---|
| 1942 | Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed"); | 
|---|
| 1943 | //return 0; | 
|---|
| 1944 | } | 
|---|
| 1945 | } | 
|---|
| 1946 | } | 
|---|
| 1947 | if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) { | 
|---|
| 1948 | fclose(SpreadFile); | 
|---|
| 1949 | Free(tagname, "ParseWannierFile: *tagname"); | 
|---|
| 1950 | } | 
|---|
| 1951 | //fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me); | 
|---|
| 1952 | return 1; | 
|---|
| 1953 | } | 
|---|
| 1954 |  | 
|---|
| 1955 | /** Calculates the spread of orbital \a i. | 
|---|
| 1956 | * Stored in OnePsiElementAddData#WannierSpread. | 
|---|
| 1957 | * \param *P Problem at hand | 
|---|
| 1958 | * \param i i-th wave function (note "extra" ones are not counted!) | 
|---|
| 1959 | * \return spread \f$\sigma^2_{A^{(k)}}\f$ | 
|---|
| 1960 | */ | 
|---|
| 1961 | double CalculateSpread(struct Problem *P, int i) { | 
|---|
| 1962 | struct Lattice *Lat = &P->Lat; | 
|---|
| 1963 | struct RunStruct *R = &P->R; | 
|---|
| 1964 | struct Psis *Psi = &Lat->Psi; | 
|---|
| 1965 | struct LatticeLevel *Lev0 = R->Lev0; | 
|---|
| 1966 | struct LatticeLevel *LevS = R->LevS; | 
|---|
| 1967 | struct Density *Dens0 = Lev0->Dens; | 
|---|
| 1968 | struct fft_plan_3d *plan = Lat->plan; | 
|---|
| 1969 | fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity]; | 
|---|
| 1970 | fftw_real *PsiCR = (fftw_real *)PsiC; | 
|---|
| 1971 | fftw_complex *work = Dens0->DensityCArray[Temp2Density]; | 
|---|
| 1972 | fftw_real **HGcR = &Dens0->DensityArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as a storage array | 
|---|
| 1973 | fftw_complex **HGcRC = (fftw_complex**)HGcR; | 
|---|
| 1974 | fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as an array | 
|---|
| 1975 | fftw_real **HGcR2 = (fftw_real**)HGcR2C; | 
|---|
| 1976 | MPI_Status status; | 
|---|
| 1977 | struct OnePsiElement *OnePsiA, *LOnePsiA; | 
|---|
| 1978 | int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; | 
|---|
| 1979 | fftw_complex *LPsiDatA=NULL; | 
|---|
| 1980 | int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index; | 
|---|
| 1981 | N0 = LevS->Plan0.plan->local_nx; | 
|---|
| 1982 | N = LevS->Plan0.plan->N; | 
|---|
| 1983 | const int NUpx = LevS->NUp[0]; | 
|---|
| 1984 | const int NUpy = LevS->NUp[1]; | 
|---|
| 1985 | const int NUpz = LevS->NUp[2]; | 
|---|
| 1986 | double a_ij, b_ij, A_ij, B_ij; | 
|---|
| 1987 | double tmp, tmp2, spread = 0; | 
|---|
| 1988 | double **cos_lookup, **sin_lookup; | 
|---|
| 1989 |  | 
|---|
| 1990 | b_ij = 0; | 
|---|
| 1991 |  | 
|---|
| 1992 | CreateSinCosLookupTable(&cos_lookup,&sin_lookup,N); | 
|---|
| 1993 |  | 
|---|
| 1994 | // fill matrices | 
|---|
| 1995 | OnePsiA = &Psi->AllPsiStatus[i];    // grab the desired OnePsiA | 
|---|
| 1996 | if (OnePsiA->PsiType != Extra) {   // drop if extra one | 
|---|
| 1997 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? | 
|---|
| 1998 | LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo]; | 
|---|
| 1999 | else | 
|---|
| 2000 | LOnePsiA = NULL; | 
|---|
| 2001 | if (LOnePsiA == NULL) {   // if it's not local ... receive it from respective process into TempPsi | 
|---|
| 2002 | RecvSource = OnePsiA->my_color_comm_ST_Psi; | 
|---|
| 2003 | MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status ); | 
|---|
| 2004 | LPsiDatA=LevS->LPsi->TempPsi; | 
|---|
| 2005 | } else {                  // .. otherwise send it to all other processes (Max_me... - 1) | 
|---|
| 2006 | for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) | 
|---|
| 2007 | if (p != OnePsiA->my_color_comm_ST_Psi) | 
|---|
| 2008 | MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT); | 
|---|
| 2009 | LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; | 
|---|
| 2010 | } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received | 
|---|
| 2011 |  | 
|---|
| 2012 | CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1); | 
|---|
| 2013 | // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()! | 
|---|
| 2014 | for (n0=0;n0<N0;n0++) | 
|---|
| 2015 | for (n[1]=0;n[1]<N[1];n[1]++) | 
|---|
| 2016 | for (n[2]=0;n[2]<N[2];n[2]++) { | 
|---|
| 2017 | i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx); | 
|---|
| 2018 | iS = n[2]+N[2]*(n[1]+N[1]*n0); | 
|---|
| 2019 | n[0] = n0 + LevS->Plan0.plan->start_nx; | 
|---|
| 2020 | for (k=0;k<max_operators;k+=2) { | 
|---|
| 2021 | tmp = 2*PI/(double)(N[k/2])*(double)(n[k/2]); | 
|---|
| 2022 | tmp2 = PsiCR[i0] /LevS->MaxN; | 
|---|
| 2023 | // check lookup | 
|---|
| 2024 | if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) { | 
|---|
| 2025 | fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]); | 
|---|
| 2026 | Error(SomeError, "Lookup table does not match real value!"); | 
|---|
| 2027 | } | 
|---|
| 2028 | //            HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */ | 
|---|
| 2029 | //            HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */ | 
|---|
| 2030 | //            HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */ | 
|---|
| 2031 | //            HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */ | 
|---|
| 2032 | HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */ | 
|---|
| 2033 | HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */ | 
|---|
| 2034 | HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */ | 
|---|
| 2035 | HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */ | 
|---|
| 2036 | } | 
|---|
| 2037 | } | 
|---|
| 2038 | for (k=0;k<max_operators;k++) { | 
|---|
| 2039 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work); | 
|---|
| 2040 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work); | 
|---|
| 2041 | } | 
|---|
| 2042 |  | 
|---|
| 2043 |  | 
|---|
| 2044 | for (k=0;k<max_operators;k++) { | 
|---|
| 2045 | a_ij = 0; | 
|---|
| 2046 | //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k); | 
|---|
| 2047 | // sum directly in a_ij and b_ij the two desired terms | 
|---|
| 2048 | g=0; | 
|---|
| 2049 | if (LevS->GArray[0].GSq == 0.0) { | 
|---|
| 2050 | Index = LevS->GArray[g].Index; | 
|---|
| 2051 | a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im); | 
|---|
| 2052 | b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im); | 
|---|
| 2053 | g++; | 
|---|
| 2054 | } | 
|---|
| 2055 | for (; g < LevS->MaxG; g++) { | 
|---|
| 2056 | Index = LevS->GArray[g].Index; | 
|---|
| 2057 | a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im); | 
|---|
| 2058 | b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im); | 
|---|
| 2059 | } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...) | 
|---|
| 2060 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); | 
|---|
| 2061 | spread += pow(A_ij,2); | 
|---|
| 2062 | } | 
|---|
| 2063 | } | 
|---|
| 2064 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); | 
|---|
| 2065 |  | 
|---|
| 2066 | // store spread in OnePsiElementAdd | 
|---|
| 2067 | Psi->AddData[i].WannierSpread = B_ij - spread; | 
|---|
| 2068 |  | 
|---|
| 2069 | FreeSinCosLookupTable(cos_lookup,sin_lookup); | 
|---|
| 2070 |  | 
|---|
| 2071 | return (B_ij - spread); | 
|---|
| 2072 | } | 
|---|