| [a0bcf1] | 1 | /** \file wannier.c
 | 
|---|
 | 2 |  * Maximally Localized Wannier Functions.
 | 
|---|
 | 3 |  * 
 | 
|---|
 | 4 |  * Contains the on function that minimises the spread of all orbitals in one rush in a parallel
 | 
|---|
 | 5 |  * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to
 | 
|---|
 | 6 |  * calculate the spread of a specific orbital, which may be useful in checking on the change of
 | 
|---|
 | 7 |  * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex.
 | 
|---|
 | 8 |  * 
 | 
|---|
 | 9 |   Project: ParallelCarParrinello
 | 
|---|
 | 10 |  \author Frederik Heber
 | 
|---|
 | 11 |  \date 2006
 | 
|---|
 | 12 | 
 | 
|---|
 | 13 |   File: wannier.c
 | 
|---|
 | 14 |   $Id: wannier.c,v 1.63 2007-02-13 14:15:29 foo Exp $
 | 
|---|
 | 15 | */
 | 
|---|
 | 16 | 
 | 
|---|
 | 17 | #include <math.h>
 | 
|---|
 | 18 | #include <gsl/gsl_math.h>
 | 
|---|
 | 19 | #include <gsl/gsl_eigen.h>
 | 
|---|
 | 20 | #include <gsl/gsl_matrix.h>
 | 
|---|
 | 21 | #include <gsl/gsl_vector.h>
 | 
|---|
 | 22 | #include <gsl/gsl_complex.h>
 | 
|---|
 | 23 | #include <gsl/gsl_complex_math.h>
 | 
|---|
 | 24 | #include <gsl/gsl_sort_vector.h>
 | 
|---|
 | 25 | #include <gsl/gsl_heapsort.h>
 | 
|---|
 | 26 | #include <gsl/gsl_blas.h>
 | 
|---|
 | 27 | #include <string.h>
 | 
|---|
 | 28 | 
 | 
|---|
 | 29 | #include "data.h"
 | 
|---|
 | 30 | #include "density.h"
 | 
|---|
 | 31 | #include "errors.h"
 | 
|---|
 | 32 | #include "helpers.h"
 | 
|---|
 | 33 | #include "init.h"
 | 
|---|
 | 34 | #include "myfft.h"
 | 
|---|
 | 35 | #include "mymath.h"
 | 
|---|
 | 36 | #include "output.h"
 | 
|---|
 | 37 | #include "wannier.h"
 | 
|---|
 | 38 | 
 | 
|---|
 | 39 | 
 | 
|---|
 | 40 | #define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread
 | 
|---|
 | 41 | 
 | 
|---|
 | 42 | 
 | 
|---|
 | 43 | /** Converts type fftw_complex to gsl_complex.
 | 
|---|
 | 44 |  * \param a complex number
 | 
|---|
 | 45 |  * \return b complex number
 | 
|---|
 | 46 |  */
 | 
|---|
 | 47 | gsl_complex convertComplex (fftw_complex a) {
 | 
|---|
 | 48 |   return gsl_complex_rect(c_re(a),c_im(a));
 | 
|---|
 | 49 | }
 | 
|---|
 | 50 | 
 | 
|---|
 | 51 | /** "merry go round" implementation for parallel index ordering.
 | 
|---|
 | 52 |  * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is
 | 
|---|
 | 53 |  * performed which generates once each possible pairing.  
 | 
|---|
 | 54 |  * \param *top index array 1
 | 
|---|
 | 55 |  * \param *bot index array 2
 | 
|---|
 | 56 |  * \param m N/2, where N is the matrix row/column dimension
 | 
|---|
 | 57 |  * \note taken from [Golub, Matrix computations, 1989, p451]
 | 
|---|
 | 58 |  */
 | 
|---|
 | 59 | void music(int *top, int *bot, int m)
 | 
|---|
 | 60 | {
 | 
|---|
 | 61 |   int *old_top, *old_bot;
 | 
|---|
 | 62 |   int k;
 | 
|---|
 | 63 |   old_top = (int *) Malloc(sizeof(int)*m, "music: old_top");
 | 
|---|
 | 64 |   old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot");
 | 
|---|
 | 65 | /*  fprintf(stderr,"oldtop\t");
 | 
|---|
 | 66 |   for (k=0;k<m;k++)
 | 
|---|
 | 67 |     fprintf(stderr,"%i\t", top[k]);
 | 
|---|
 | 68 |   fprintf(stderr,"\n");
 | 
|---|
 | 69 |   fprintf(stderr,"oldbot\t");
 | 
|---|
 | 70 |   for (k=0;k<m;k++)
 | 
|---|
 | 71 |     fprintf(stderr,"%i\t", bot[k]);
 | 
|---|
 | 72 |   fprintf(stderr,"\n");*/
 | 
|---|
 | 73 |   // first copy arrays    
 | 
|---|
 | 74 |   for (k=0;k<m;k++) {
 | 
|---|
 | 75 |     old_top[k] = top[k];
 | 
|---|
 | 76 |     old_bot[k] = bot[k];
 | 
|---|
 | 77 |   }
 | 
|---|
 | 78 |   // then let the music play
 | 
|---|
 | 79 |   for (k=0;k<m;k++) {
 | 
|---|
 | 80 |     if (k==1)
 | 
|---|
 | 81 |       top[k] = old_bot[0];
 | 
|---|
 | 82 |     else if (k > 1)
 | 
|---|
 | 83 |       top[k] = old_top[k-1];
 | 
|---|
 | 84 |     if (k==m-1)
 | 
|---|
 | 85 |       bot[k] = old_top[k];
 | 
|---|
 | 86 |     else
 | 
|---|
 | 87 |       bot[k] = old_bot[k+1];
 | 
|---|
 | 88 |   }
 | 
|---|
 | 89 | /*  fprintf(stderr,"top\t");
 | 
|---|
 | 90 |   for (k=0;k<m;k++)
 | 
|---|
 | 91 |     fprintf(stderr,"%i\t", top[k]);
 | 
|---|
 | 92 |   fprintf(stderr,"\n");
 | 
|---|
 | 93 |   fprintf(stderr,"bot\t");
 | 
|---|
 | 94 |   for (k=0;k<m;k++)
 | 
|---|
 | 95 |     fprintf(stderr,"%i\t", bot[k]);
 | 
|---|
 | 96 |   fprintf(stderr,"\n");*/
 | 
|---|
 | 97 |   // and finito
 | 
|---|
| [64fa9e] | 98 |   Free(old_top, "bla");
 | 
|---|
 | 99 |   Free(old_bot, "bla");
 | 
|---|
| [a0bcf1] | 100 | }
 | 
|---|
 | 101 | 
 | 
|---|
 | 102 | /** merry-go-round for matrix columns.
 | 
|---|
 | 103 |  * The trick here is that we must be aware of multiple rotations per process, thus only some of the
 | 
|---|
 | 104 |  * whole lot of local columns get sent/received, most of them are just shifted via exchanging the various
 | 
|---|
 | 105 |  * pointers to the matrix columns within the local array.
 | 
|---|
 | 106 |  * \param comm communicator for circulation
 | 
|---|
 | 107 |  * \param *Aloc local array of columns
 | 
|---|
 | 108 |  * \param Num entries per column
 | 
|---|
 | 109 |  * \param max_rounds number of column pairs in \a *Around
 | 
|---|
 | 110 |  * \param k offset for tag
 | 
|---|
 | 111 |  * \param tagS0 MPI tag for sending left column
 | 
|---|
 | 112 |  * \param tagS1 MPI tag for sending right column
 | 
|---|
 | 113 |  * \param tagR0 MPI tag for receiving left column
 | 
|---|
 | 114 |  * \param tagR1 MPI tag for receiving right column
 | 
|---|
 | 115 |  */ 
 | 
|---|
 | 116 | void shiftcolumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) {
 | 
|---|
 | 117 |   //double *A_locS1, *A_locS2;  // local columns of A[k]
 | 
|---|
 | 118 |   //double *A_locR1, *A_locR2;  // local columns of A[k]
 | 
|---|
 | 119 |   MPI_Request requestS0, requestS1, requestR0, requestR1;
 | 
|---|
 | 120 |   MPI_Status status;
 | 
|---|
 | 121 |   int ProcRank, ProcNum;
 | 
|---|
 | 122 |   int l; 
 | 
|---|
 | 123 |   MPI_Comm_size (comm, &ProcNum);
 | 
|---|
 | 124 |   MPI_Comm_rank (comm, &ProcRank);
 | 
|---|
 | 125 |   double *Abuffer1, *Abuffer2;  // mark the columns that are circulated
 | 
|---|
 | 126 |   
 | 
|---|
 | 127 |   //fprintf(stderr,"shifting...");
 | 
|---|
 | 128 |   if (ProcRank == 0) {
 | 
|---|
 | 129 |     if (max_rounds > 1) { 
 | 
|---|
 | 130 |       // get last left column
 | 
|---|
 | 131 |       Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
 | 
|---|
 | 132 |       MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
 | 
|---|
 | 133 |     } else {
 | 
|---|
 | 134 |       // get right column
 | 
|---|
 | 135 |       Abuffer1 = Aloc[1]; // note down the free column
 | 
|---|
 | 136 |       MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1,  tagS1+2*k, comm, &requestS0);
 | 
|---|
 | 137 |     } 
 | 
|---|
 | 138 |     
 | 
|---|
 | 139 |     //fprintf(stderr,"...left columns...");
 | 
|---|
 | 140 |     for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right
 | 
|---|
 | 141 |       Aloc[l] = Aloc[l-2];
 | 
|---|
 | 142 | 
 | 
|---|
 | 143 |     if (max_rounds > 1) { 
 | 
|---|
 | 144 |       //fprintf(stderr,"...first right...");
 | 
|---|
 | 145 |       Aloc[2] = Aloc[1]; // get first right column
 | 
|---|
 | 146 |     }
 | 
|---|
 | 147 | 
 | 
|---|
 | 148 |     //fprintf(stderr,"...right columns...");
 | 
|---|
 | 149 |     for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
 | 
|---|
 | 150 |       Aloc[l] = Aloc[l+2];
 | 
|---|
 | 151 | 
 | 
|---|
 | 152 |     //fprintf(stderr,"...last right...");
 | 
|---|
 | 153 |     Aloc[(2*max_rounds-1)] = Abuffer1;
 | 
|---|
 | 154 |     MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);  
 | 
|---|
 | 155 | 
 | 
|---|
 | 156 |   } else if (ProcRank == ProcNum-1) {
 | 
|---|
 | 157 |     //fprintf(stderr,"...first right...");
 | 
|---|
 | 158 |     // get first right column
 | 
|---|
 | 159 |     Abuffer2 = Aloc[1]; // note down the free column
 | 
|---|
 | 160 |     MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
 | 
|---|
 | 161 | 
 | 
|---|
 | 162 |     //fprintf(stderr,"...right columns...");
 | 
|---|
 | 163 |     for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
 | 
|---|
 | 164 |       Aloc[(l)] = Aloc[(l+2)];
 | 
|---|
 | 165 | 
 | 
|---|
 | 166 |     //fprintf(stderr,"...last right...");
 | 
|---|
 | 167 |     Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column
 | 
|---|
 | 168 | 
 | 
|---|
 | 169 |     //fprintf(stderr,"...left columns...");
 | 
|---|
 | 170 |     for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
 | 
|---|
 | 171 |       Aloc[(l)] = Aloc[(l-2)];
 | 
|---|
 | 172 | 
 | 
|---|
 | 173 |     //fprintf(stderr,"...first left...");
 | 
|---|
 | 174 | //    if (max_rounds > 1)
 | 
|---|
 | 175 |       Aloc[0] = Abuffer2; // get first left column
 | 
|---|
 | 176 |     MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
 | 
|---|
 | 177 | 
 | 
|---|
 | 178 |   } else {
 | 
|---|
 | 179 |     // get last left column
 | 
|---|
 | 180 |     MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
 | 
|---|
 | 181 |     Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
 | 
|---|
 | 182 | 
 | 
|---|
 | 183 |     //fprintf(stderr,"...first right...");
 | 
|---|
 | 184 |     // get first right column
 | 
|---|
 | 185 |     MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
 | 
|---|
 | 186 |     Abuffer2 = Aloc[1]; // note down the free column
 | 
|---|
 | 187 | 
 | 
|---|
 | 188 |     //fprintf(stderr,"...left columns...");
 | 
|---|
 | 189 |     for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
 | 
|---|
 | 190 |       Aloc[(l)] = Aloc[(l-2)];
 | 
|---|
 | 191 | 
 | 
|---|
 | 192 |     //fprintf(stderr,"...right columns...");
 | 
|---|
 | 193 |     for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
 | 
|---|
 | 194 |       Aloc[(l)] = Aloc[(l+2)];
 | 
|---|
 | 195 | 
 | 
|---|
 | 196 |     //fprintf(stderr,"...first left...");
 | 
|---|
 | 197 |     Aloc[0] = Abuffer1; // get first left column
 | 
|---|
 | 198 |     MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
 | 
|---|
 | 199 | 
 | 
|---|
 | 200 |     //fprintf(stderr,"...last right...");
 | 
|---|
 | 201 |     Aloc[(2*max_rounds-1)] = Abuffer2;
 | 
|---|
 | 202 |     MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
 | 
|---|
 | 203 |   }
 | 
|---|
 | 204 |     
 | 
|---|
 | 205 |   //fprintf(stderr,"...waiting...");
 | 
|---|
 | 206 |   if (ProcRank != ProcNum-1)
 | 
|---|
 | 207 |     MPI_Wait(&requestS0, &status);
 | 
|---|
 | 208 |   if (ProcRank != 0)  // first left column
 | 
|---|
 | 209 |     MPI_Wait(&requestR0, &status);
 | 
|---|
 | 210 |   if (ProcRank != 0)
 | 
|---|
 | 211 |     MPI_Wait(&requestS1, &status);
 | 
|---|
 | 212 |   if (ProcRank != ProcNum-1)
 | 
|---|
 | 213 |     MPI_Wait(&requestR1, &status);
 | 
|---|
 | 214 |   //fprintf(stderr,"...done\n");
 | 
|---|
 | 215 | }
 | 
|---|
 | 216 | 
 | 
|---|
 | 217 | /** Computation of Maximally Localized Wannier Functions.
 | 
|---|
 | 218 |  * Maximally localized functions are prime when evulating a Hamiltonian with
 | 
|---|
 | 219 |  * magnetic fields under periodic boundary conditions, as the common position
 | 
|---|
 | 220 |  * operator is no longer valid. These can be obtained by orbital rotations, which
 | 
|---|
 | 221 |  * are looked for iteratively and gathered in one transformation matrix, to be
 | 
|---|
 | 222 |  * later applied to the set of orbital wave functions.
 | 
|---|
 | 223 |  * 
 | 
|---|
 | 224 |  * In order to obtain these, the following algorithm is applied:
 | 
|---|
 | 225 |  * -# Initialize U (identity) as the sought-for transformation matrix
 | 
|---|
 | 226 |  * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements
 | 
|---|
 | 227 |  *    \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators
 | 
|---|
 | 228 |  *    \f$A^{(k)}\f$
 | 
|---|
 | 229 |  * -# For each pair of indices (i,j) (i<j) do the following:
 | 
|---|
 | 230 |  *    -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$
 | 
|---|
 | 231 |  *       where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$
 | 
|---|
 | 232 |  *    -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G
 | 
|---|
 | 233 |  *       corresponding to the greatest eigenvalue, such that \f$x\geq0\f$
 | 
|---|
 | 234 |  *    -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the 
 | 
|---|
 | 235 |  *       identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$
 | 
|---|
 | 236 |  *       \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$
 | 
|---|
 | 237 |  *    -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$
 | 
|---|
 | 238 |  *    -# Gather the rotations in \f$U = U R\f$
 | 
|---|
 | 239 |  * -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$
 | 
|---|
 | 240 |  * -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3.
 | 
|---|
 | 241 |  * -# Apply transformations to the orbital wavefunctions \f$ | \phi_i \rangle = \sum_j U_{ij} | \phi_j \rangle\f$
 | 
|---|
 | 242 |  * -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in
 | 
|---|
 | 243 |  *    OnePsiElementAddData#WannierCentre
 | 
|---|
 | 244 |  * 
 | 
|---|
 | 245 |  * Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions.
 | 
|---|
 | 246 |  * Note that hereby additional memory is needed as old and transformed wave functions must be present at the same
 | 
|---|
 | 247 |  * time.
 | 
|---|
 | 248 |  * 
 | 
|---|
 | 249 |  * The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index
 | 
|---|
 | 250 |  * generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices
 | 
|---|
 | 251 |  * may be diagonalized simultaneously and parallely. We use the implementation explained in
 | 
|---|
 | 252 |  * [Golub, Matrix computations, 1989, p451].
 | 
|---|
 | 253 |  * 
 | 
|---|
 | 254 |  * \param *P Problem at hand
 | 
|---|
 | 255 |  */
 | 
|---|
 | 256 | void ComputeMLWF(struct Problem *P) {
 | 
|---|
 | 257 |   // variables and allocation
 | 
|---|
 | 258 |   struct FileData *F = &P->Files;
 | 
|---|
 | 259 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
 | 260 |   struct RunStruct *R = &P->R;
 | 
|---|
 | 261 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
 | 262 |   struct LatticeLevel *Lev0 = R->Lev0;
 | 
|---|
 | 263 |   struct LatticeLevel *LevS = R->LevS;
 | 
|---|
 | 264 |   struct Density *Dens0 = Lev0->Dens;
 | 
|---|
 | 265 |   struct fft_plan_3d *plan = Lat->plan; 
 | 
|---|
 | 266 |   fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
 | 
|---|
 | 267 |   fftw_real *PsiCR = (fftw_real *)PsiC;
 | 
|---|
 | 268 |   fftw_complex *work = Dens0->DensityCArray[Temp2Density]; 
 | 
|---|
 | 269 |   fftw_real **HGcR = &Dens0->DensityArray[HGDensity];  // use HGDensity, 4x Gap..Density, TempDensity as a storage array
 | 
|---|
 | 270 |   fftw_complex **HGcRC = (fftw_complex**)HGcR;
 | 
|---|
 | 271 |   fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as an array
 | 
|---|
 | 272 |   fftw_real **HGcR2 = (fftw_real**)HGcR2C;
 | 
|---|
 | 273 |   MPI_Status status;
 | 
|---|
 | 274 |   struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB;
 | 
|---|
 | 275 |   int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
 | 
|---|
 | 276 |   fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL;
 | 
|---|
 | 277 |   int n[NDIM],n0,i0,iS, Index;
 | 
|---|
 | 278 |   int N0;
 | 
|---|
 | 279 |   int N[NDIM];
 | 
|---|
 | 280 |   const int NUpx = LevS->NUp[0];
 | 
|---|
 | 281 |   const int NUpy = LevS->NUp[1];
 | 
|---|
 | 282 |   const int NUpz = LevS->NUp[2];
 | 
|---|
 | 283 |   int e,i,j,k,l,m,u,p,g;
 | 
|---|
 | 284 |   int Num = Psi->NoOfPsis;  // is number of occupied plus unoccupied states for rows
 | 
|---|
 | 285 |   double x,y,r;
 | 
|---|
 | 286 |   double q[NDIM];
 | 
|---|
 | 287 |   double *c,*s;
 | 
|---|
 | 288 |   int index;
 | 
|---|
 | 289 |   double spread = 0., old_spread=0., Spread=0.;
 | 
|---|
 | 290 |   double WannierCentre[Num][NDIM];
 | 
|---|
 | 291 |   double WannierSpread[Num];
 | 
|---|
 | 292 |   double tmp,tmp2;
 | 
|---|
 | 293 |   double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0;
 | 
|---|
 | 294 |   double **cos_lookup,**sin_lookup; 
 | 
|---|
 | 295 |   gsl_matrix *G;
 | 
|---|
 | 296 |   gsl_vector *h;
 | 
|---|
 | 297 |   gsl_vector *eval;
 | 
|---|
 | 298 |   gsl_matrix *evec;
 | 
|---|
 | 299 |   gsl_eigen_symmv_workspace *w;
 | 
|---|
 | 300 |   int ProcNum, ProcRank, set;
 | 
|---|
 | 301 |   int it_steps; // iteration step counter
 | 
|---|
 | 302 |   int *top, *bot;
 | 
|---|
 | 303 |   int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from
 | 
|---|
 | 304 |   int left, right; // left or right neighbour for process
 | 
|---|
 | 305 |   double **Aloc[max_operators+1], **Uloc; // local columns for one step of A[k]
 | 
|---|
 | 306 |   double *Around[max_operators+1], *Uround; // all local columns for one round of A[k]
 | 
|---|
 | 307 |   double *Atotal[max_operators+1], *Utotal; // all local columns for one round of A[k]
 | 
|---|
 | 308 |   double a_i, a_j;
 | 
|---|
 | 309 |   int tagR0, tagR1, tagS0, tagS1;
 | 
|---|
 | 310 |   int iloc, jloc;
 | 
|---|
 | 311 |   double *s_all, *c_all;
 | 
|---|
 | 312 |   int round, max_rounds;
 | 
|---|
 | 313 |   int start;
 | 
|---|
 | 314 |   int *rcounts, *rdispls;
 | 
|---|
 | 315 |   int AllocNum = ceil((double)Num / 2. ) *2;
 | 
|---|
 | 316 |   int totalflag, flag;
 | 
|---|
 | 317 |   int *marker, **group;
 | 
|---|
 | 318 |   int partner[Num];
 | 
|---|
 | 319 |   int type = Occupied;
 | 
|---|
 | 320 |   MPI_Comm *comm;
 | 
|---|
 | 321 |   char spin[12], suffix[18];
 | 
|---|
 | 322 |   
 | 
|---|
 | 323 |   N0 = LevS->Plan0.plan->local_nx;
 | 
|---|
 | 324 |   N[0] = LevS->Plan0.plan->N[0];
 | 
|---|
 | 325 |   N[1] = LevS->Plan0.plan->N[1];
 | 
|---|
 | 326 |   N[2] = LevS->Plan0.plan->N[2];
 | 
|---|
 | 327 |   
 | 
|---|
 | 328 |   comm = &P->Par.comm_ST;
 | 
|---|
 | 329 |   
 | 
|---|
 | 330 |   fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi);
 | 
|---|
 | 331 |   if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) {  // all parallel
 | 
|---|
 | 332 |     comm = &P->Par.comm_ST;
 | 
|---|
 | 333 |     fprintf(stderr,"(%i) Jacobi is done parallely by all\n", P->Par.me);           
 | 
|---|
 | 334 |   } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first  
 | 
|---|
 | 335 |     if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) {  // coefficients parallel
 | 
|---|
 | 336 |       comm = &P->Par.comm_ST_Psi;
 | 
|---|
 | 337 |       fprintf(stderr,"(%i) Jacobi is done parallely by Psi\n", P->Par.me);       
 | 
|---|
 | 338 |     } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) {  // Psis parallel
 | 
|---|
 | 339 |       comm = &P->Par.comm_ST_PsiT;
 | 
|---|
 | 340 |       fprintf(stderr,"(%i) Jacobi is done parallely by PsiT\n", P->Par.me);       
 | 
|---|
 | 341 |     }
 | 
|---|
 | 342 |   } else {
 | 
|---|
 | 343 |     if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) {  // Psis parallel 
 | 
|---|
 | 344 |       comm = &P->Par.comm_ST_PsiT;
 | 
|---|
 | 345 |       fprintf(stderr,"(%i) Jacobi is done parallely by PsiT\n", P->Par.me);       
 | 
|---|
 | 346 |     } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) {  // coefficients parallel
 | 
|---|
 | 347 |       comm = &P->Par.comm_ST_Psi;
 | 
|---|
 | 348 |       fprintf(stderr,"(%i) Jacobi is done parallely by Psi\n", P->Par.me);       
 | 
|---|
 | 349 |     }
 | 
|---|
 | 350 |   }
 | 
|---|
 | 351 | 
 | 
|---|
 | 352 |   MPI_Comm_size (*comm, &ProcNum);
 | 
|---|
 | 353 |   MPI_Comm_rank (*comm, &ProcRank);
 | 
|---|
 | 354 |   
 | 
|---|
 | 355 |   if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me);
 | 
|---|
 | 356 |   
 | 
|---|
 | 357 |   if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me);
 | 
|---|
 | 358 | 
 | 
|---|
 | 359 |   // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1))
 | 
|---|
 | 360 |   gsl_matrix *A[max_operators+1];   // one extra for B matrix
 | 
|---|
 | 361 |   for (u=0;u<=max_operators;u++)
 | 
|---|
 | 362 |     A[u] = gsl_matrix_calloc (AllocNum,AllocNum);  // allocate matrix    
 | 
|---|
 | 363 | 
 | 
|---|
 | 364 |   // create lookup table for sin/cos values
 | 
|---|
 | 365 |   cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup");
 | 
|---|
 | 366 |   sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup");
 | 
|---|
 | 367 |   for (i=0;i<NDIM;i++) {
 | 
|---|
 | 368 |     // allocate memory
 | 
|---|
 | 369 |     cos_lookup[i] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[i], "ComputeMLWF: cos_lookup");
 | 
|---|
 | 370 |     sin_lookup[i] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[i], "ComputeMLWF: sin_lookup");
 | 
|---|
 | 371 |     // reset arrays
 | 
|---|
 | 372 |     SetArrayToDouble0(cos_lookup[i],LevS->Plan0.plan->N[i]);
 | 
|---|
 | 373 |     SetArrayToDouble0(sin_lookup[i],LevS->Plan0.plan->N[i]);
 | 
|---|
 | 374 |     // create lookup values
 | 
|---|
 | 375 |     for (j=0;j<LevS->Plan0.plan->N[i];j++) {
 | 
|---|
 | 376 |       tmp = 2*PI/(double)LevS->Plan0.plan->N[i]*(double)j; 
 | 
|---|
 | 377 |       cos_lookup[i][j] = cos(tmp);
 | 
|---|
 | 378 |       sin_lookup[i][j] = sin(tmp);
 | 
|---|
 | 379 |     }
 | 
|---|
 | 380 |   }
 | 
|---|
 | 381 |   l=-1; // to access U matrix element (0..Num-1)
 | 
|---|
 | 382 |   // fill the matrices
 | 
|---|
 | 383 |   for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) {  // go through all wave functions
 | 
|---|
 | 384 |     OnePsiA = &Psi->AllPsiStatus[i];    // grab OnePsiA
 | 
|---|
 | 385 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
 | 386 |       l++;  // increase l if it is non-extra wave function
 | 
|---|
 | 387 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
 | 
|---|
 | 388 |         LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
 | 
|---|
 | 389 |       else
 | 
|---|
 | 390 |         LPsiDatA = NULL;  // otherwise processes won't enter second loop, though they're supposed to send coefficients!
 | 
|---|
 | 391 |       
 | 
|---|
 | 392 |       //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0);
 | 
|---|
 | 393 |       if (LPsiDatA != NULL) {
 | 
|---|
 | 394 |         CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
 | 
|---|
 | 395 |         // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
 | 
|---|
 | 396 |         for (n0=0;n0<N0;n0++)  
 | 
|---|
 | 397 |           for (n[1]=0;n[1]<N[1];n[1]++) 
 | 
|---|
 | 398 |             for (n[2]=0;n[2]<N[2];n[2]++) {
 | 
|---|
 | 399 |               i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
 | 
|---|
 | 400 |               iS = n[2]+N[2]*(n[1]+N[1]*n0);
 | 
|---|
 | 401 |               n[0] = n0 + LevS->Plan0.plan->start_nx;
 | 
|---|
 | 402 |               for (k=0;k<max_operators;k+=2) {
 | 
|---|
 | 403 |                 e = k/2;
 | 
|---|
 | 404 |                 tmp = 2*PI/(double)(N[e])*(double)(n[e]);
 | 
|---|
 | 405 |                 tmp2 = PsiCR[i0] /LevS->MaxN;
 | 
|---|
 | 406 |                 // check lookup
 | 
|---|
 | 407 |                 if (!l) // perform check on first wave function only
 | 
|---|
 | 408 |                   if ((fabs(cos(tmp) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[e][n[e]]) > MYEPSILON)) {
 | 
|---|
 | 409 |                     Error(SomeError, "Lookup table does not match real value!");
 | 
|---|
 | 410 |                   }
 | 
|---|
 | 411 |                 HGcR[k][iS] = cos_lookup[e][n[e]] * tmp2; /* Matrix Vector Mult */
 | 
|---|
 | 412 |                 HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */
 | 
|---|
 | 413 |                 HGcR[k+1][iS] = sin_lookup[e][n[e]] * tmp2; /* Matrix Vector Mult */
 | 
|---|
 | 414 |                 HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
 | 
|---|
 | 415 |               }
 | 
|---|
 | 416 |             }
 | 
|---|
 | 417 |         for (u=0;u<max_operators;u++) {
 | 
|---|
 | 418 |           fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work);
 | 
|---|
 | 419 |           fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work);
 | 
|---|
 | 420 |         }
 | 
|---|
 | 421 |       }  
 | 
|---|
 | 422 |       m = -1; // to access U matrix element (0..Num-1)
 | 
|---|
 | 423 |       for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) {  // go through all wave functions
 | 
|---|
 | 424 |         OnePsiB = &Psi->AllPsiStatus[j];    // grab OnePsiB
 | 
|---|
 | 425 |         if (OnePsiB->PsiType == type) {   // drop all but occupied ones
 | 
|---|
 | 426 |           m++;  // increase m if it is non-extra wave function
 | 
|---|
 | 427 |           if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
 | 
|---|
 | 428 |              LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
 | 
|---|
 | 429 |           else 
 | 
|---|
 | 430 |              LOnePsiB = NULL;
 | 
|---|
 | 431 |           if (LOnePsiB == NULL) {   // if it's not local ... receive it from respective process into TempPsi
 | 
|---|
 | 432 |             RecvSource = OnePsiB->my_color_comm_ST_Psi;
 | 
|---|
 | 433 |             MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
 | 
|---|
 | 434 |             LPsiDatB=LevS->LPsi->TempPsi;
 | 
|---|
 | 435 |           } else {                  // .. otherwise send it to all other processes (Max_me... - 1)
 | 
|---|
 | 436 |             for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
 | 
|---|
 | 437 |               if (p != OnePsiB->my_color_comm_ST_Psi) 
 | 
|---|
 | 438 |                 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
 | 
|---|
 | 439 |             LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
 | 
|---|
 | 440 |           } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
 | 
|---|
 | 441 | 
 | 
|---|
 | 442 |           for (u=0;u<max_operators;u++) {
 | 
|---|
 | 443 |             a_ij = 0;
 | 
|---|
 | 444 |             b_ij = 0;
 | 
|---|
 | 445 |             if (LPsiDatA != NULL) { // calculate, reduce and send to all ...
 | 
|---|
 | 446 |               //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u);
 | 
|---|
 | 447 |               g=0;
 | 
|---|
 | 448 |               if (LevS->GArray[0].GSq == 0.0) {
 | 
|---|
 | 449 |                 Index = LevS->GArray[g].Index;
 | 
|---|
 | 450 |                 a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im);
 | 
|---|
 | 451 |                 b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im);
 | 
|---|
 | 452 |                 g++;
 | 
|---|
 | 453 |               }
 | 
|---|
 | 454 |               for (; g < LevS->MaxG; g++) {
 | 
|---|
 | 455 |                 Index = LevS->GArray[g].Index;
 | 
|---|
 | 456 |                 a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im);
 | 
|---|
 | 457 |                 b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im);
 | 
|---|
 | 458 |               } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...)
 | 
|---|
 | 459 |               // sum up elements from all coefficients sharing processes
 | 
|---|
 | 460 |               MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
 | 
|---|
 | 461 |               MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
 | 
|---|
 | 462 |               a_ij = A_ij;
 | 
|---|
 | 463 |               b_ij = B_ij;
 | 
|---|
 | 464 |               // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!)
 | 
|---|
 | 465 |               MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
 | 
|---|
 | 466 |               MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
 | 
|---|
 | 467 |             } else { // receive ...
 | 
|---|
 | 468 |               MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
 | 
|---|
 | 469 |               MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);              
 | 
|---|
 | 470 |             }
 | 
|---|
 | 471 |             // ... and store
 | 
|---|
 | 472 |             //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij);
 | 
|---|
 | 473 |             //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij);
 | 
|---|
 | 474 |             gsl_matrix_set(A[u], l, m, A_ij);
 | 
|---|
 | 475 |             gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m));
 | 
|---|
 | 476 |           }
 | 
|---|
 | 477 |         }
 | 
|---|
 | 478 |       }
 | 
|---|
 | 479 |     }
 | 
|---|
 | 480 |   }
 | 
|---|
 | 481 |   // reset extra entries
 | 
|---|
 | 482 |   for (u=0;u<=max_operators;u++) {
 | 
|---|
 | 483 |     for (i=Num;i<AllocNum;i++)
 | 
|---|
 | 484 |       for (j=0;j<AllocNum;j++)
 | 
|---|
 | 485 |         gsl_matrix_set(A[u], i,j, 0.);
 | 
|---|
 | 486 |     for (i=Num;i<AllocNum;i++)
 | 
|---|
 | 487 |       for (j=0;j<AllocNum;j++)
 | 
|---|
 | 488 |         gsl_matrix_set(A[u], j,i, 0.);
 | 
|---|
 | 489 |   }
 | 
|---|
 | 490 |   // free lookups
 | 
|---|
 | 491 |   for (i=0;i<NDIM;i++) {
 | 
|---|
| [64fa9e] | 492 |     Free(cos_lookup[i], "bla");
 | 
|---|
 | 493 |     Free(sin_lookup[i], "bla");
 | 
|---|
| [a0bcf1] | 494 |   }
 | 
|---|
| [64fa9e] | 495 |   Free(cos_lookup, "bla");
 | 
|---|
 | 496 |   Free(sin_lookup, "bla");
 | 
|---|
| [a0bcf1] | 497 |   
 | 
|---|
 | 498 |   if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me);
 | 
|---|
 | 499 |   // STEP 1: Initialize U = 1
 | 
|---|
 | 500 |   gsl_matrix *U = gsl_matrix_alloc (AllocNum,AllocNum);
 | 
|---|
 | 501 |   gsl_matrix_set_identity(U);
 | 
|---|
 | 502 | 
 | 
|---|
 | 503 |   // init merry-go-round array
 | 
|---|
 | 504 |   top = (int *) Malloc(sizeof(int)*AllocNum/2, "ComputeMLWF: top");
 | 
|---|
 | 505 |   bot = (int *) Malloc(sizeof(int)*AllocNum/2, "ComputeMLWF: bot");
 | 
|---|
 | 506 |   //debug(P,"init merry-go-round array");
 | 
|---|
 | 507 |   for (i=0;i<AllocNum/2;i++) {
 | 
|---|
 | 508 |     top[i] = 2*i;
 | 
|---|
 | 509 |     bot[i] = 2*i+1;      
 | 
|---|
 | 510 |   }
 | 
|---|
 | 511 | 
 | 
|---|
 | 512 | /*// print A matrices for debug
 | 
|---|
 | 513 |   if (P->Par.me == 0) 
 | 
|---|
 | 514 |     for (u=0;u<max_operators+1;u++) {
 | 
|---|
 | 515 |      fprintf(stderr, "A[%i] = \n",u);
 | 
|---|
 | 516 |       for (i=0;i<Num;i++) {
 | 
|---|
 | 517 |         for (j=0;j<Num;j++)
 | 
|---|
 | 518 |           fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j));
 | 
|---|
 | 519 |         fprintf(stderr, "\n");
 | 
|---|
 | 520 |       }
 | 
|---|
 | 521 |     }  
 | 
|---|
 | 522 | */
 | 
|---|
 | 523 |   if (Num != 1) {
 | 
|---|
 | 524 |     // one- or multi-process case?
 | 
|---|
 | 525 |     if (((AllocNum % 2) == 0) && (ProcNum != 1) && ((AllocNum / 2) % ProcNum == 0)) {
 | 
|---|
 | 526 |       max_rounds = (AllocNum / 2)/ProcNum;  // each process must perform multiple rotations per step of a set
 | 
|---|
 | 527 |       //fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, ProcRank, ProcNum, max_rounds);
 | 
|---|
 | 528 |       // allocate column vectors for interchange of columns
 | 
|---|
 | 529 |       //debug(P,"allocate column vectors for interchange of columns");
 | 
|---|
 | 530 |       c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c");
 | 
|---|
 | 531 |       s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s");
 | 
|---|
 | 532 |       c_all = (double *) Malloc(sizeof(double)*AllocNum/2, "ComputeMLWF: c_all");
 | 
|---|
 | 533 |       s_all = (double *) Malloc(sizeof(double)*AllocNum/2, "ComputeMLWF: s_all");
 | 
|---|
 | 534 |       rcounts = (int *) Malloc(sizeof(int)*ProcNum, "ComputeMLWF: rcounts");
 | 
|---|
 | 535 |       rdispls = (int *) Malloc(sizeof(int)*ProcNum, "ComputeMLWF: rdispls");
 | 
|---|
 | 536 | /*    // print starting values of index generation tables top and bot
 | 
|---|
 | 537 |       fprintf(stderr,"top\t");
 | 
|---|
 | 538 |       for (k=0;k<AllocNum/2;k++)
 | 
|---|
 | 539 |         fprintf(stderr,"%i\t", top[k]);
 | 
|---|
 | 540 |       fprintf(stderr,"\n");
 | 
|---|
 | 541 |       fprintf(stderr,"bot\t");
 | 
|---|
 | 542 |       for (k=0;k<AllocNum/2;k++)
 | 
|---|
 | 543 |         fprintf(stderr,"%i\t", bot[k]);
 | 
|---|
 | 544 |       fprintf(stderr,"\n");*/
 | 
|---|
 | 545 |       // establish communication partners
 | 
|---|
 | 546 |       //debug(P,"establish communication partners");
 | 
|---|
 | 547 |       if (ProcRank == 0) {
 | 
|---|
 | 548 |         tagS0 = WannierALTag;   // left p0 always remains left p0
 | 
|---|
 | 549 |       } else {
 | 
|---|
 | 550 |         tagS0 = ProcRank == ProcNum - 1 ? WannierARTag : WannierALTag; // left p_last becomes right p_last          
 | 
|---|
 | 551 |       }
 | 
|---|
 | 552 |       tagS1 = ProcRank == 0 ? WannierALTag : WannierARTag; // right p0 always goes into left p1
 | 
|---|
 | 553 |       tagR0 = WannierALTag; // 
 | 
|---|
 | 554 |       tagR1 = WannierARTag; // first process
 | 
|---|
 | 555 |       if (ProcRank == 0) {
 | 
|---|
 | 556 |         left = ProcNum-1;
 | 
|---|
 | 557 |         right = 1;
 | 
|---|
 | 558 |         Lsend = 0;
 | 
|---|
 | 559 |         Rsend = 1;
 | 
|---|
 | 560 |         Lrecv = 0;
 | 
|---|
 | 561 |         Rrecv = 1;        
 | 
|---|
 | 562 |       } else if (ProcRank == ProcNum - 1) {
 | 
|---|
 | 563 |         left = ProcRank - 1;
 | 
|---|
 | 564 |         right = 0;
 | 
|---|
 | 565 |         Lsend = ProcRank;
 | 
|---|
 | 566 |         Rsend = ProcRank - 1;
 | 
|---|
 | 567 |         Lrecv = ProcRank - 1;
 | 
|---|
 | 568 |         Rrecv = ProcRank;        
 | 
|---|
 | 569 |       } else {
 | 
|---|
 | 570 |         left = ProcRank - 1;
 | 
|---|
 | 571 |         right = ProcRank + 1;
 | 
|---|
 | 572 |         Lsend = ProcRank+1;
 | 
|---|
 | 573 |         Rsend = ProcRank - 1;
 | 
|---|
 | 574 |         Lrecv = ProcRank - 1;
 | 
|---|
 | 575 |         Rrecv = ProcRank+1;
 | 
|---|
 | 576 |       }
 | 
|---|
 | 577 |       //fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv);
 | 
|---|
 | 578 |       // allocate eigenvector stuff
 | 
|---|
 | 579 |       //debug(P,"allocate eigenvector stuff");
 | 
|---|
 | 580 |       G = gsl_matrix_calloc (2,2);
 | 
|---|
 | 581 |       h = gsl_vector_alloc (2);
 | 
|---|
 | 582 |       eval = gsl_vector_alloc (2);
 | 
|---|
 | 583 |       evec = gsl_matrix_alloc (2,2);
 | 
|---|
 | 584 |       w = gsl_eigen_symmv_alloc(2);
 | 
|---|
 | 585 |       // initialise A_loc
 | 
|---|
 | 586 |       //debug(P,"initialise A_loc");
 | 
|---|
 | 587 |       for (k=0;k<max_operators+1;k++) {
 | 
|---|
 | 588 |         //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]");
 | 
|---|
 | 589 |         Around[k] = (double *) Malloc(sizeof(double)*AllocNum*2*max_rounds, "ComputeMLWF: Around[k]");
 | 
|---|
 | 590 |         Atotal[k] = (double *) Malloc(sizeof(double)*AllocNum*AllocNum, "ComputeMLWF: Atotal[k]");
 | 
|---|
 | 591 |         Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]");
 | 
|---|
 | 592 |         //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds];
 | 
|---|
 | 593 |         
 | 
|---|
 | 594 |         for (round=0;round<max_rounds;round++) {
 | 
|---|
 | 595 |           Aloc[k][2*round] = &Around[k][AllocNum*(2*round)];
 | 
|---|
 | 596 |           Aloc[k][2*round+1] = &Around[k][AllocNum*(2*round+1)];
 | 
|---|
 | 597 |           for (l=0;l<AllocNum;l++) {
 | 
|---|
 | 598 |             Aloc[k][2*round][l] = gsl_matrix_get(A[k],l,2*(ProcRank*max_rounds+round));
 | 
|---|
 | 599 |             Aloc[k][2*round+1][l] = gsl_matrix_get(A[k],l,2*(ProcRank*max_rounds+round)+1);
 | 
|---|
 | 600 |             //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]);
 | 
|---|
 | 601 |           }
 | 
|---|
 | 602 |         }
 | 
|---|
 | 603 |       }
 | 
|---|
 | 604 |       // initialise U_loc
 | 
|---|
 | 605 |       //debug(P,"initialise U_loc");
 | 
|---|
 | 606 |       //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc");
 | 
|---|
 | 607 |       Uround = (double *) Malloc(sizeof(double)*AllocNum*2*max_rounds, "ComputeMLWF: Uround");
 | 
|---|
 | 608 |       Utotal = (double *) Malloc(sizeof(double)*AllocNum*AllocNum, "ComputeMLWF: Utotal");
 | 
|---|
 | 609 |       Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc");
 | 
|---|
 | 610 |       //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds];
 | 
|---|
 | 611 |       for (round=0;round<max_rounds;round++) {
 | 
|---|
 | 612 |         Uloc[2*round] = &Uround[AllocNum*(2*round)];
 | 
|---|
 | 613 |         Uloc[2*round+1] = &Uround[AllocNum*(2*round+1)];
 | 
|---|
 | 614 |         for (l=0;l<AllocNum;l++) {
 | 
|---|
 | 615 |           Uloc[2*round][l] = gsl_matrix_get(U,l,2*(ProcRank*max_rounds+round));
 | 
|---|
 | 616 |           Uloc[2*round+1][l] = gsl_matrix_get(U,l,2*(ProcRank*max_rounds+round)+1);
 | 
|---|
 | 617 |           //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]);
 | 
|---|
 | 618 |         }
 | 
|---|
 | 619 |       }
 | 
|---|
 | 620 |       // now comes the iteration loop
 | 
|---|
 | 621 |       //debug(P,"now comes the iteration loop");
 | 
|---|
 | 622 |       it_steps = 0;
 | 
|---|
 | 623 |       do {
 | 
|---|
 | 624 |         it_steps++;
 | 
|---|
 | 625 |         fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps);
 | 
|---|
 | 626 |         for (set=0; set < AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
 | 
|---|
 | 627 |           //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
 | 
|---|
 | 628 |           for (round = 0; round < max_rounds;round++) {
 | 
|---|
 | 629 |             start = ProcRank * max_rounds + round;
 | 
|---|
 | 630 |             // get indices
 | 
|---|
 | 631 |             i = top[start] < bot[start] ? top[start] : bot[start]; // minimum of the two indices
 | 
|---|
 | 632 |             iloc = top[start] < bot[start] ? 0 : 1; 
 | 
|---|
 | 633 |             j = top[start] > bot[start] ? top[start] : bot[start]; // maximum of the two indices: thus j >  i
 | 
|---|
 | 634 |             jloc =  top[start] > bot[start] ? 0 : 1;
 | 
|---|
 | 635 |             //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc);
 | 
|---|
 | 636 |             
 | 
|---|
 | 637 |             // calculate rotation angle, i.e. c and s
 | 
|---|
 | 638 |             //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j);
 | 
|---|
 | 639 |             gsl_matrix_set_zero(G);        
 | 
|---|
 | 640 |             for (k=0;k<max_operators;k++) { // go through all operators ...
 | 
|---|
 | 641 |               // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]         
 | 
|---|
 | 642 |               //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);          
 | 
|---|
 | 643 |               //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);          
 | 
|---|
 | 644 |               gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]);
 | 
|---|
 | 645 |               gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]);
 | 
|---|
 | 646 |               
 | 
|---|
 | 647 |               // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
 | 
|---|
 | 648 |               for (l=0;l<2;l++)
 | 
|---|
 | 649 |                 for (m=0;m<2;m++)
 | 
|---|
 | 650 |                     gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
 | 
|---|
 | 651 |             }
 | 
|---|
 | 652 |             //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
 | 
|---|
 | 653 |             // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
 | 
|---|
 | 654 |             gsl_eigen_symmv(G, eval, evec, w);  // calculates eigenvalues and eigenvectors of G    
 | 
|---|
 | 655 |             index = gsl_vector_max_index (eval);  // get biggest eigenvalue
 | 
|---|
 | 656 |             x = gsl_matrix_get(evec, 0, index);
 | 
|---|
 | 657 |             y = gsl_matrix_get(evec, 1, index) * x/fabs(x);
 | 
|---|
 | 658 |             x = fabs(x);  // ensure x>=0 so that rotation angles remain smaller Pi/4     
 | 
|---|
 | 659 |             //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
 | 
|---|
 | 660 |             // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
 | 
|---|
 | 661 |             r = sqrt(x*x + y*y);
 | 
|---|
 | 662 |             c[round] = sqrt((x + r) / (2*r));
 | 
|---|
 | 663 |             s[round] = y / sqrt(2*r*(x+r));
 | 
|---|
 | 664 |             // [[c,s],[-s,c]]= V_small
 | 
|---|
 | 665 |             //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]);
 | 
|---|
 | 666 |     
 | 
|---|
 | 667 |             //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
 | 
|---|
 | 668 |             // V_loc = V_loc * V_small
 | 
|---|
 | 669 |             //debug(P,"apply rotation to local U");
 | 
|---|
 | 670 |             for (l=0;l<AllocNum;l++) {
 | 
|---|
 | 671 |               a_i = Uloc[2*round+iloc][l];
 | 
|---|
 | 672 |               a_j = Uloc[2*round+jloc][l];
 | 
|---|
 | 673 |               Uloc[2*round+iloc][l] =  c[round] * a_i + s[round] * a_j;
 | 
|---|
 | 674 |               Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j;
 | 
|---|
 | 675 |             }
 | 
|---|
 | 676 |           }  // end of round        
 | 
|---|
 | 677 |           // circulate the rotation angles
 | 
|---|
 | 678 |           //debug(P,"circulate the rotation angles");
 | 
|---|
 | 679 |           MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *comm);   // MPI_Allgather is waaaaay faster than ring circulation
 | 
|---|
 | 680 |           MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *comm);
 | 
|---|
 | 681 |           //m = start;
 | 
|---|
 | 682 |           for (l=0;l<AllocNum/2;l++) { // for each process
 | 
|---|
 | 683 |             // we have V_small from process k
 | 
|---|
 | 684 |             //debug(P,"Apply V_small from other process");
 | 
|---|
 | 685 |             i = top[l] < bot[l] ? top[l] : bot[l]; // minimum of the two indices
 | 
|---|
 | 686 |             j = top[l] > bot[l] ? top[l] : bot[l]; // maximum of the two indices: thus j >  i
 | 
|---|
 | 687 |             iloc = top[l] < bot[l] ? 0 : 1; 
 | 
|---|
 | 688 |             jloc =  top[l] > bot[l] ? 0 : 1;
 | 
|---|
 | 689 |             for (m=0;m<max_rounds;m++) {
 | 
|---|
 | 690 |               //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j);
 | 
|---|
 | 691 |               // apply row rotation to each A[k]
 | 
|---|
 | 692 |               for (k=0;k<max_operators+1;k++) {// one extra for B matrix !
 | 
|---|
 | 693 |                 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
 | 
|---|
 | 694 |                 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
 | 
|---|
 | 695 |                 a_i = Aloc[k][2*m+iloc][i];
 | 
|---|
 | 696 |                 a_j = Aloc[k][2*m+iloc][j];
 | 
|---|
 | 697 |                 Aloc[k][2*m+iloc][i] =  c_all[l] * a_i + s_all[l] * a_j;
 | 
|---|
 | 698 |                 Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
 | 
|---|
 | 699 |                 a_i = Aloc[k][2*m+jloc][i];
 | 
|---|
 | 700 |                 a_j = Aloc[k][2*m+jloc][j];
 | 
|---|
 | 701 |                 Aloc[k][2*m+jloc][i] =  c_all[l] * a_i + s_all[l] * a_j;
 | 
|---|
 | 702 |                 Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j;
 | 
|---|
 | 703 |                 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]);
 | 
|---|
 | 704 |                 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]);
 | 
|---|
 | 705 |               }
 | 
|---|
 | 706 |             }
 | 
|---|
 | 707 |           }
 | 
|---|
 | 708 |           // apply rotation to local operator matrices
 | 
|---|
 | 709 |           // A_loc = A_loc * V_small 
 | 
|---|
 | 710 |           //debug(P,"apply rotation to local operator matrices A[k]");
 | 
|---|
 | 711 |           for (m=0;m<max_rounds;m++) {
 | 
|---|
 | 712 |             start = ProcRank * max_rounds + m;
 | 
|---|
 | 713 |             iloc = top[start] < bot[start] ? 0 : 1; 
 | 
|---|
 | 714 |             jloc =  top[start] > bot[start] ? 0 : 1;
 | 
|---|
 | 715 |             for (k=0;k<max_operators+1;k++) {// one extra for B matrix !
 | 
|---|
 | 716 |               for (l=0;l<AllocNum;l++) {
 | 
|---|
 | 717 |                 // Columns, i and j belong to this process only!
 | 
|---|
 | 718 |                 a_i = Aloc[k][2*m+iloc][l];
 | 
|---|
 | 719 |                 a_j = Aloc[k][2*m+jloc][l];
 | 
|---|
 | 720 |                 Aloc[k][2*m+iloc][l] =  c[m] * a_i + s[m] * a_j;
 | 
|---|
 | 721 |                 Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j;
 | 
|---|
 | 722 |                 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]);
 | 
|---|
 | 723 |               }
 | 
|---|
 | 724 |             }
 | 
|---|
 | 725 |           }
 | 
|---|
 | 726 |           // Shuffling of these round's columns to prepare next rotation set
 | 
|---|
 | 727 |           for (k=0;k<max_operators+1;k++) {// one extra for B matrix ! 
 | 
|---|
 | 728 |             // extract columns from A
 | 
|---|
 | 729 |             //debug(P,"extract columns from A");
 | 
|---|
 | 730 |             shiftcolumns(*comm, Aloc[k], AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1);
 | 
|---|
 | 731 |                       
 | 
|---|
 | 732 |           }       
 | 
|---|
 | 733 |           // and also for V ...
 | 
|---|
 | 734 |           //debug(P,"extract columns from U");
 | 
|---|
 | 735 |           shiftcolumns(*comm, Uloc, AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1);
 | 
|---|
 | 736 | 
 | 
|---|
 | 737 |     
 | 
|---|
 | 738 |           // and merry-go-round for the indices too            
 | 
|---|
 | 739 |           //debug(P,"and merry-go-round for the indices too");
 | 
|---|
 | 740 |           music(top, bot, AllocNum/2);
 | 
|---|
 | 741 |         }
 | 
|---|
 | 742 |   
 | 
|---|
 | 743 |         //fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
 | 
|---|
 | 744 |         // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
 | 
|---|
 | 745 |         old_spread = Spread;
 | 
|---|
 | 746 |         spread = 0.;
 | 
|---|
 | 747 |         for(k=0;k<max_operators;k++) {   // go through all self-adjoint operators
 | 
|---|
 | 748 |           for (i=0; i < 2*max_rounds; i++) {  // go through all wave functions
 | 
|---|
 | 749 |               spread += Aloc[k][i][i+ProcRank*2*max_rounds]*Aloc[k][i][i+ProcRank*2*max_rounds];
 | 
|---|
 | 750 |               //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i);
 | 
|---|
 | 751 |           }
 | 
|---|
 | 752 |         }
 | 
|---|
 | 753 |         MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *comm);
 | 
|---|
 | 754 |         //Spread = spread;
 | 
|---|
 | 755 |         if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier);
 | 
|---|
 | 756 |         else fprintf(stderr,"%2.9e\n",Spread);
 | 
|---|
 | 757 |         // STEP 5: check change of variance
 | 
|---|
 | 758 |       } while (fabs(old_spread-Spread) >= R->EpsWannier);
 | 
|---|
 | 759 |       // end of iterative diagonalization loop: We have found our final orthogonal U!
 | 
|---|
 | 760 | 
 | 
|---|
 | 761 |       for (l=0;l<ProcNum;l++)
 | 
|---|
 | 762 |         rcounts[l] = AllocNum;
 | 
|---|
 | 763 |       //debug(P,"allgather U");
 | 
|---|
 | 764 |       for (round=0;round<2*max_rounds;round++) {
 | 
|---|
 | 765 |         for (l=0;l<ProcNum;l++)
 | 
|---|
 | 766 |           rdispls[l] = (l*2*max_rounds + round)*AllocNum;
 | 
|---|
 | 767 |         MPI_Allgatherv(Uloc[round], AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *comm);
 | 
|---|
 | 768 |       }
 | 
|---|
 | 769 |       for (k=0;k<AllocNum;k++) {    
 | 
|---|
 | 770 |         for(l=0;l<AllocNum;l++) {
 | 
|---|
 | 771 |           gsl_matrix_set(U,k,l, Utotal[l+k*AllocNum]);
 | 
|---|
 | 772 |         }
 | 
|---|
 | 773 |       }
 | 
|---|
 | 774 | 
 | 
|---|
 | 775 | 
 | 
|---|
 | 776 |       // after one set, gather A[k] from all and calculate spread
 | 
|---|
 | 777 |       for (l=0;l<ProcNum;l++)
 | 
|---|
 | 778 |         rcounts[l] = AllocNum;
 | 
|---|
 | 779 |       //debug(P,"gather A[k] for spread");
 | 
|---|
 | 780 |       for (u=0;u<max_operators+1;u++) {// one extra for B matrix !
 | 
|---|
 | 781 |         //debug(P,"A[k] all gather");
 | 
|---|
 | 782 |         for (round=0;round<2*max_rounds;round++) {
 | 
|---|
 | 783 |           for (l=0;l<ProcNum;l++)
 | 
|---|
 | 784 |             rdispls[l] = (l*2*max_rounds + round)*AllocNum;
 | 
|---|
 | 785 |           MPI_Allgatherv(Aloc[u][round], AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *comm);
 | 
|---|
 | 786 |         }
 | 
|---|
 | 787 |         for (k=0;k<AllocNum;k++) {    
 | 
|---|
 | 788 |           for(l=0;l<AllocNum;l++) {
 | 
|---|
 | 789 |             gsl_matrix_set(A[u],k,l, Atotal[u][l+k*AllocNum]);
 | 
|---|
 | 790 |           }
 | 
|---|
 | 791 |         }
 | 
|---|
 | 792 |       }
 | 
|---|
 | 793 |      
 | 
|---|
 | 794 |       // free eigenvector stuff     
 | 
|---|
 | 795 |       gsl_vector_free(h);
 | 
|---|
 | 796 |       gsl_matrix_free(G);
 | 
|---|
 | 797 |       gsl_eigen_symmv_free(w);  
 | 
|---|
 | 798 |       gsl_vector_free(eval);
 | 
|---|
 | 799 |       gsl_matrix_free(evec);
 | 
|---|
 | 800 |       // Free column vectors
 | 
|---|
 | 801 |       for (k=0;k<max_operators+1;k++) {
 | 
|---|
| [64fa9e] | 802 |         Free(Atotal[k], "bla");
 | 
|---|
 | 803 |         Free(Around[k], "bla");
 | 
|---|
| [a0bcf1] | 804 |       }
 | 
|---|
| [64fa9e] | 805 |       Free(Uround, "bla");
 | 
|---|
 | 806 |       Free(Utotal, "bla");
 | 
|---|
 | 807 |       Free(c_all, "bla");
 | 
|---|
 | 808 |       Free(s_all, "bla");
 | 
|---|
 | 809 |       Free(c, "bla");
 | 
|---|
 | 810 |       Free(s, "bla");
 | 
|---|
 | 811 |       Free(rcounts, "bla");
 | 
|---|
 | 812 |       Free(rdispls, "bla");
 | 
|---|
| [a0bcf1] | 813 |       
 | 
|---|
 | 814 |     } else {
 | 
|---|
 | 815 |       
 | 
|---|
 | 816 |       c = (double *) Malloc(sizeof(double), "ComputeMLWF: c");
 | 
|---|
 | 817 |       s = (double *) Malloc(sizeof(double), "ComputeMLWF: s");
 | 
|---|
 | 818 |       G = gsl_matrix_calloc (2,2);
 | 
|---|
 | 819 |       h = gsl_vector_alloc (2);
 | 
|---|
 | 820 |       eval = gsl_vector_alloc (2);
 | 
|---|
 | 821 |       evec = gsl_matrix_alloc (2,2);
 | 
|---|
 | 822 |       w = gsl_eigen_symmv_alloc(2);
 | 
|---|
 | 823 |       //debug(P,"now comes the iteration loop");
 | 
|---|
 | 824 |       it_steps=0;
 | 
|---|
 | 825 |       do {
 | 
|---|
 | 826 |         it_steps++;
 | 
|---|
 | 827 |         //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me);
 | 
|---|
 | 828 |         fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps);
 | 
|---|
 | 829 |         for (set=0; set < AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
 | 
|---|
 | 830 |           //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
 | 
|---|
 | 831 |           // STEP 3: for all index pairs 0<= i<j <AllocNum
 | 
|---|
 | 832 |           for (ProcRank=0;ProcRank<AllocNum/2;ProcRank++) {
 | 
|---|
 | 833 |             // get indices
 | 
|---|
 | 834 |             i = top[ProcRank] < bot[ProcRank] ? top[ProcRank] : bot[ProcRank]; // minimum of the two indices
 | 
|---|
 | 835 |             j = top[ProcRank] > bot[ProcRank] ? top[ProcRank] : bot[ProcRank]; // maximum of the two indices: thus j >  i 
 | 
|---|
 | 836 |             //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j);
 | 
|---|
 | 837 |             // STEP 3a: Calculate G
 | 
|---|
 | 838 |             gsl_matrix_set_zero(G);
 | 
|---|
 | 839 |             
 | 
|---|
 | 840 |             for (k=0;k<max_operators;k++) { // go through all operators ...
 | 
|---|
 | 841 |               // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
 | 
|---|
 | 842 |               //fprintf(stderr,"(%i) k%i [a_ii - a_ij] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(A[k],i,i), gsl_matrix_get(A[k],j,j),gsl_matrix_get(A[k],i,i) - gsl_matrix_get(A[k],j,j));          
 | 
|---|
 | 843 |               //fprintf(stderr,"(%i) k%i [a_ij + a_jij] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(A[k],i,j), gsl_matrix_get(A[k],j,i),gsl_matrix_get(A[k],i,j) + gsl_matrix_get(A[k],j,i));          
 | 
|---|
 | 844 |               gsl_vector_set(h, 0, gsl_matrix_get(A[k],i,i) - gsl_matrix_get(A[k],j,j));
 | 
|---|
 | 845 |               gsl_vector_set(h, 1, gsl_matrix_get(A[k],i,j) + gsl_matrix_get(A[k],j,i));
 | 
|---|
 | 846 |               //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1));
 | 
|---|
 | 847 |               
 | 
|---|
 | 848 |               // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
 | 
|---|
 | 849 |               for (l=0;l<2;l++)
 | 
|---|
 | 850 |                 for (m=0;m<2;m++)
 | 
|---|
 | 851 |                     gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
 | 
|---|
 | 852 |             }
 | 
|---|
 | 853 |           
 | 
|---|
 | 854 |             //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
 | 
|---|
 | 855 |             // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
 | 
|---|
 | 856 |             gsl_eigen_symmv(G, eval, evec, w);  // calculates eigenvalues and eigenvectors of G
 | 
|---|
 | 857 |         
 | 
|---|
 | 858 |             index = gsl_vector_max_index (eval);  // get biggest eigenvalue
 | 
|---|
 | 859 |             x = gsl_matrix_get(evec, 0, index);
 | 
|---|
 | 860 |             y = gsl_matrix_get(evec, 1, index) * x/fabs(x);
 | 
|---|
 | 861 |             //z = gsl_matrix_get(evec, 2, index) * x/fabs(x);
 | 
|---|
 | 862 |             x = fabs(x);  // ensure x>=0 so that rotation angles remain smaller Pi/4
 | 
|---|
 | 863 |           
 | 
|---|
 | 864 |             //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
 | 
|---|
 | 865 |             // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
 | 
|---|
 | 866 |             r = sqrt(x*x + y*y); // + z*z);
 | 
|---|
 | 867 |             c[0] = sqrt((x + r) / (2*r));
 | 
|---|
 | 868 |             s[0] = y / sqrt(2*r*(x+r)); //, -z / sqrt(2*r*(x+r)));
 | 
|---|
 | 869 |             //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]);
 | 
|---|
 | 870 |           
 | 
|---|
 | 871 |             //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j);
 | 
|---|
 | 872 |             // STEP 3d: apply rotation R to rows and columns of A^{(k)}
 | 
|---|
 | 873 |             for (k=0;k<max_operators+1;k++) {// one extra for B matrix !
 | 
|---|
 | 874 |               for (l=0;l<AllocNum;l++) {
 | 
|---|
 | 875 |                 // Rows
 | 
|---|
 | 876 |                 a_i = gsl_matrix_get(A[k],i,l);
 | 
|---|
 | 877 |                 a_j = gsl_matrix_get(A[k],j,l);
 | 
|---|
 | 878 |                 gsl_matrix_set(A[k], i, l,  c[0] * a_i + s[0] * a_j);
 | 
|---|
 | 879 |                 gsl_matrix_set(A[k], j, l, -s[0] * a_i + c[0] * a_j);
 | 
|---|
 | 880 |               }
 | 
|---|
 | 881 |               for (l=0;l<AllocNum;l++) {
 | 
|---|
 | 882 |                 // Columns
 | 
|---|
 | 883 |                 a_i = gsl_matrix_get(A[k],l,i);
 | 
|---|
 | 884 |                 a_j = gsl_matrix_get(A[k],l,j);
 | 
|---|
 | 885 |                 gsl_matrix_set(A[k], l, i,  c[0] * a_i + s[0] * a_j);
 | 
|---|
 | 886 |                 gsl_matrix_set(A[k], l, j, -s[0] * a_i + c[0] * a_j);
 | 
|---|
 | 887 |               }
 | 
|---|
 | 888 |             }
 | 
|---|
 | 889 |             //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
 | 
|---|
 | 890 |             // STEP 3e: apply U = R*U
 | 
|---|
 | 891 |             for (l=0;l<AllocNum;l++) {
 | 
|---|
 | 892 |                 a_i = gsl_matrix_get(U,i,l);
 | 
|---|
 | 893 |                 a_j = gsl_matrix_get(U,j,l);
 | 
|---|
 | 894 |                 gsl_matrix_set(U, i, l,  c[0] * a_i + s[0] * a_j);
 | 
|---|
 | 895 |                 gsl_matrix_set(U, j, l, -s[0] * a_i + c[0] * a_j);
 | 
|---|
 | 896 |             }
 | 
|---|
 | 897 |           }
 | 
|---|
 | 898 |           // and merry-go-round for the indices too            
 | 
|---|
 | 899 |           //debug(P,"and merry-go-round for the indices too");
 | 
|---|
 | 900 |           if (AllocNum > 2) music(top, bot, AllocNum/2);
 | 
|---|
 | 901 |         }
 | 
|---|
 | 902 |        
 | 
|---|
 | 903 |         //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
 | 
|---|
 | 904 |         // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
 | 
|---|
 | 905 |         old_spread = spread;
 | 
|---|
 | 906 |         spread = 0;
 | 
|---|
 | 907 |         for(k=0;k<max_operators;k++) {   // go through all self-adjoint operators
 | 
|---|
 | 908 |           for (i=0; i < AllocNum; i++) {  // go through all wave functions
 | 
|---|
 | 909 |               spread += pow(gsl_matrix_get(A[k],i,i),2);
 | 
|---|
 | 910 |           }
 | 
|---|
 | 911 |         }
 | 
|---|
 | 912 |         if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier);
 | 
|---|
 | 913 |         else fprintf(stderr,"%2.9e\n",spread);
 | 
|---|
 | 914 |         // STEP 5: check change of variance
 | 
|---|
 | 915 |       } while (fabs(old_spread-spread) >= R->EpsWannier);
 | 
|---|
 | 916 |       // end of iterative diagonalization loop: We have found our final orthogonal U!
 | 
|---|
 | 917 |       gsl_vector_free(h);
 | 
|---|
 | 918 |       gsl_matrix_free(G);
 | 
|---|
 | 919 |       gsl_eigen_symmv_free(w);  
 | 
|---|
 | 920 |       gsl_vector_free(eval);
 | 
|---|
 | 921 |       gsl_matrix_free(evec);
 | 
|---|
| [64fa9e] | 922 |       Free(c, "bla");
 | 
|---|
 | 923 |       Free(s, "bla");
 | 
|---|
| [a0bcf1] | 924 |     }  
 | 
|---|
 | 925 |     
 | 
|---|
 | 926 |     if(P->Call.out[ReadOut]) {// && P->Par.me == 0) { 
 | 
|---|
 | 927 |       //debug(P,"output total U");
 | 
|---|
 | 928 |       fprintf(stderr,"(%i) U_tot = \n",P->Par.me);
 | 
|---|
 | 929 |       for (k=0;k<Num;k++) {    
 | 
|---|
 | 930 |         for (l=0;l<Num;l++)
 | 
|---|
 | 931 |           fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k));
 | 
|---|
 | 932 |         fprintf(stderr,"\n");
 | 
|---|
 | 933 |       }
 | 
|---|
 | 934 |     }
 | 
|---|
 | 935 |   }
 | 
|---|
| [64fa9e] | 936 |   Free(top, "bla");
 | 
|---|
 | 937 |   Free(bot, "bla");
 | 
|---|
| [a0bcf1] | 938 | 
 | 
|---|
 | 939 |   if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Allocating buffer mem\n",P->Par.me);
 | 
|---|
 | 940 |   // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle
 | 
|---|
 | 941 |   Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here
 | 
|---|
 | 942 |   fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer");
 | 
|---|
 | 943 |   for (l=0;l<Num;l++) // allocate for each local wave function
 | 
|---|
 | 944 |     coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l];
 | 
|---|
 | 945 |   
 | 
|---|
 | 946 |   if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me);
 | 
|---|
 | 947 |   l=-1; // to access U matrix element (0..Num-1)
 | 
|---|
 | 948 |   k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1)
 | 
|---|
 | 949 |   for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) {  // go through all wave functions
 | 
|---|
 | 950 |     OnePsiA = &Psi->AllPsiStatus[i];    // grab OnePsiA
 | 
|---|
 | 951 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
 | 952 |       l++;  // increase l if it is occupied wave function
 | 
|---|
 | 953 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local?
 | 
|---|
 | 954 |         k++; // increase k only if it is a local, non-extra orbital wave function
 | 
|---|
 | 955 |         LPsiDatA = (fftw_complex *) coeffs_buffer[k];    // new coeffs first go to copy buffer, old ones must not be overwritten yet
 | 
|---|
 | 956 |         SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG);  // zero buffer part
 | 
|---|
 | 957 |       } else
 | 
|---|
 | 958 |         LPsiDatA = NULL;  // otherwise processes won't enter second loop, though they're supposed to send coefficients!
 | 
|---|
 | 959 | 
 | 
|---|
 | 960 |       m = -1; // to access U matrix element (0..Num-1)
 | 
|---|
 | 961 |       for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) {  // go through all wave functions
 | 
|---|
 | 962 |         OnePsiB = &Psi->AllPsiStatus[j];    // grab OnePsiB
 | 
|---|
 | 963 |         if (OnePsiB->PsiType == type) {   // drop all but occupied ones
 | 
|---|
 | 964 |           m++;  // increase m if it is occupied wave function
 | 
|---|
 | 965 |           if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
 | 
|---|
 | 966 |              LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
 | 
|---|
 | 967 |           else 
 | 
|---|
 | 968 |              LOnePsiB = NULL;
 | 
|---|
 | 969 |           if (LOnePsiB == NULL) {   // if it's not local ... receive it from respective process into TempPsi
 | 
|---|
 | 970 |             RecvSource = OnePsiB->my_color_comm_ST_Psi;
 | 
|---|
 | 971 |             MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
 | 
|---|
 | 972 |             LPsiDatB=LevS->LPsi->TempPsi;
 | 
|---|
 | 973 |           } else {                  // .. otherwise send it to all other processes (Max_me... - 1)
 | 
|---|
 | 974 |             for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
 | 
|---|
 | 975 |               if (p != OnePsiB->my_color_comm_ST_Psi) 
 | 
|---|
 | 976 |                 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
 | 
|---|
 | 977 |             LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
 | 
|---|
 | 978 |           } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
 | 
|---|
 | 979 | 
 | 
|---|
 | 980 |           if (LPsiDatA != NULL) {
 | 
|---|
 | 981 |             double tmp = gsl_matrix_get(U,l,m);
 | 
|---|
 | 982 |             g=0;
 | 
|---|
 | 983 |             if (LevS->GArray[0].GSq == 0.0) {
 | 
|---|
 | 984 |               LPsiDatA[g].re += LPsiDatB[g].re * tmp;
 | 
|---|
 | 985 |               LPsiDatA[g].im += LPsiDatB[g].im * tmp;
 | 
|---|
 | 986 |               g++;
 | 
|---|
 | 987 |             }
 | 
|---|
 | 988 |             for (; g < LevS->MaxG; g++) {
 | 
|---|
 | 989 |               LPsiDatA[g].re += LPsiDatB[g].re * tmp;
 | 
|---|
 | 990 |               LPsiDatA[g].im += LPsiDatB[g].im * tmp;
 | 
|---|
 | 991 |             }
 | 
|---|
 | 992 |           }
 | 
|---|
 | 993 |         }
 | 
|---|
 | 994 |       }
 | 
|---|
 | 995 |     }
 | 
|---|
 | 996 |   }
 | 
|---|
 | 997 |   gsl_matrix_free(U);
 | 
|---|
 | 998 | 
 | 
|---|
 | 999 |   if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me);
 | 
|---|
 | 1000 |   // now, as all wave functions are updated, swap the buffer
 | 
|---|
 | 1001 |   l = -1;
 | 
|---|
 | 1002 |   for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function
 | 
|---|
 | 1003 |     if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {
 | 
|---|
 | 1004 |       l++;
 | 
|---|
 | 1005 |       if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo);
 | 
|---|
 | 1006 |       LPsiDatA = (fftw_complex *)coeffs_buffer[l];
 | 
|---|
 | 1007 |       LPsiDatB = LevS->LPsi->LocalPsi[l];
 | 
|---|
 | 1008 |       for (g=0;g<LevS->MaxG;g++) {
 | 
|---|
 | 1009 |         LPsiDatB[g].re = LPsiDatA[g].re;
 | 
|---|
 | 1010 |         LPsiDatB[g].im = LPsiDatA[g].im;
 | 
|---|
 | 1011 |       }
 | 
|---|
 | 1012 |       // recalculating non-local form factors which are coefficient dependent!
 | 
|---|
 | 1013 |       CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo);
 | 
|---|
 | 1014 |     }
 | 
|---|
 | 1015 |   } 
 | 
|---|
 | 1016 |   // and free allocated buffer memory
 | 
|---|
| [64fa9e] | 1017 |   Free(coeffs_buffer, "bla");
 | 
|---|
| [a0bcf1] | 1018 | 
 | 
|---|
 | 1019 |   if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7\n",P->Par.me);
 | 
|---|
 | 1020 |   // STEP 7: Compute Wannier centers, spread and printout
 | 
|---|
 | 1021 |   // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital
 | 
|---|
 | 1022 |   if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me);
 | 
|---|
 | 1023 | 
 | 
|---|
 | 1024 |   switch (Lat->Psi.PsiST) {
 | 
|---|
 | 1025 |   case SpinDouble:
 | 
|---|
 | 1026 |     strncpy(suffix,".spread.csv",18);    
 | 
|---|
 | 1027 |     strncat(spin,"SpinDouble",12);
 | 
|---|
 | 1028 |     break;
 | 
|---|
 | 1029 |   case SpinUp:
 | 
|---|
 | 1030 |     strncpy(suffix,".spread_up.csv",18);    
 | 
|---|
 | 1031 |     strncat(spin,"SpinUp",12);
 | 
|---|
 | 1032 |     break;
 | 
|---|
 | 1033 |   case SpinDown:
 | 
|---|
 | 1034 |     strncpy(suffix,".spread_down.csv",18);    
 | 
|---|
 | 1035 |     strncat(spin,"SpinDown",12);
 | 
|---|
 | 1036 |     break;
 | 
|---|
 | 1037 |   }
 | 
|---|
 | 1038 |   if (P->Par.me_comm_ST == 0) {
 | 
|---|
 | 1039 |     if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level 
 | 
|---|
 | 1040 |       OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
 | 
|---|
 | 1041 |     else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile)
 | 
|---|
 | 1042 |       OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
 | 
|---|
 | 1043 |     if (F->SpreadFile == NULL) {
 | 
|---|
 | 1044 |       Error(SomeError,"ComputeMLWF: Error opening Wannier File!\n");
 | 
|---|
 | 1045 |     } else {
 | 
|---|
 | 1046 |       fprintf(F->SpreadFile,"#===== W A N N I E R  C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin);
 | 
|---|
 | 1047 |       fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n");
 | 
|---|
 | 1048 |     }
 | 
|---|
 | 1049 |   }
 | 
|---|
 | 1050 |   old_spread = 0;
 | 
|---|
 | 1051 |   spread = 0;
 | 
|---|
 | 1052 |   i=-1;
 | 
|---|
 | 1053 |   for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions
 | 
|---|
 | 1054 |     OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
 | 1055 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
 | 1056 |       i++;  // increase l if it is occupied wave function
 | 
|---|
 | 1057 |       //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i);
 | 
|---|
 | 1058 |       
 | 
|---|
 | 1059 |       // calculate Wannier Centre
 | 
|---|
 | 1060 |       for (j=0;j<NDIM;j++) {
 | 
|---|
| [f5586e] | 1061 |         WannierCentre[i][j] = sqrt(Lat->RealBasisSQ[j])/(2*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i))));
 | 
|---|
| [a0bcf1] | 1062 |         if (WannierCentre[i][j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ
 | 
|---|
| [f5586e] | 1063 |           WannierCentre[i][j] = sqrt(Lat->RealBasisSQ[j]) + WannierCentre[i][j];
 | 
|---|
| [a0bcf1] | 1064 |       }
 | 
|---|
 | 1065 |       
 | 
|---|
 | 1066 |       // store orbital spread and centre in file
 | 
|---|
 | 1067 |       tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
 | 
|---|
 | 1068 |             - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
 | 
|---|
 | 1069 |             - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);          
 | 
|---|
 | 1070 |       WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp; 
 | 
|---|
 | 1071 |       //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]);
 | 
|---|
 | 1072 |       //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n",
 | 
|---|
 | 1073 |         //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]); 
 | 
|---|
 | 1074 |       //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n",
 | 
|---|
 | 1075 |         //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]);
 | 
|---|
 | 1076 |       
 | 
|---|
 | 1077 |       // gather all spreads
 | 
|---|
 | 1078 |       old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U)
 | 
|---|
 | 1079 |       for (k=0;k<max_operators;k++)
 | 
|---|
 | 1080 |         spread += pow(gsl_matrix_get(A[k],i,i),2);
 | 
|---|
 | 1081 | 
 | 
|---|
 | 1082 |       // store calculated Wannier centre
 | 
|---|
 | 1083 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // is this local?
 | 
|---|
 | 1084 |         for (j=0;j<NDIM;j++)
 | 
|---|
 | 1085 |           Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
 | 
|---|
 | 1086 |     }
 | 
|---|
 | 1087 |   }
 | 
|---|
 | 1088 |   
 | 
|---|
 | 1089 |   // join Wannier orbital to groups with common centres under certain conditions 
 | 
|---|
 | 1090 |   switch (R->CommonWannier) {
 | 
|---|
 | 1091 |    case 4:
 | 
|---|
 | 1092 |     debug(P,"Shifting each Wannier centers to cell center");
 | 
|---|
 | 1093 |     for (i=0; i < Num; i++) {  // go through all occupied wave functions
 | 
|---|
 | 1094 |       for (j=0;j<NDIM;j++)
 | 
|---|
| [f5586e] | 1095 |         WannierCentre[i][j] = sqrt(Lat->RealBasisSQ[j])/2.; 
 | 
|---|
| [a0bcf1] | 1096 |     }    
 | 
|---|
 | 1097 |     break;
 | 
|---|
 | 1098 |    case 3:
 | 
|---|
 | 1099 |     debug(P,"Shifting Wannier centers individually to nearest grid point");
 | 
|---|
 | 1100 |     for (i=0;i < Num; i++) {  // go through all wave functions
 | 
|---|
 | 1101 |       for (j=0;j<NDIM;j++) {
 | 
|---|
| [f5586e] | 1102 |         tmp = WannierCentre[i][j]/sqrt(Lat->RealBasisSQ[j])*(double)N[j];
 | 
|---|
| [a0bcf1] | 1103 |         //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp));
 | 
|---|
 | 1104 |         if (fabs((double)floor(tmp) - tmp) < fabs((double)ceil(tmp) - tmp))
 | 
|---|
| [f5586e] | 1105 |           WannierCentre[i][j] = (double)floor(tmp)/(double)N[j]*sqrt(Lat->RealBasisSQ[j]); 
 | 
|---|
| [a0bcf1] | 1106 |         else
 | 
|---|
| [f5586e] | 1107 |           WannierCentre[i][j] = (double)ceil(tmp)/(double)N[j]*sqrt(Lat->RealBasisSQ[j]); 
 | 
|---|
| [a0bcf1] | 1108 |       }
 | 
|---|
 | 1109 |     }
 | 
|---|
 | 1110 |     break;
 | 
|---|
 | 1111 |    case 2:
 | 
|---|
 | 1112 |     debug(P,"Combining individual orbitals according to spread.");
 | 
|---|
 | 1113 |     //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me);
 | 
|---|
 | 1114 |     //debug(P,"finding partners");
 | 
|---|
 | 1115 |     marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker");
 | 
|---|
 | 1116 |     group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group");
 | 
|---|
 | 1117 |     for (l=0;l<Num;l++) {
 | 
|---|
 | 1118 |       group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]");  // each must group must have one more as end marker
 | 
|---|
 | 1119 |       for (k=0;k<=Num;k++)
 | 
|---|
 | 1120 |         group[l][k] = -1; // reset partner group
 | 
|---|
 | 1121 |     }
 | 
|---|
 | 1122 |     for (k=0;k<Num;k++)
 | 
|---|
 | 1123 |       partner[k] = 0;
 | 
|---|
 | 1124 |     //debug(P,"mem allocated");
 | 
|---|
 | 1125 |     // go for each orbital through every other, check distance against the sum of both spreads
 | 
|---|
 | 1126 |     // if smaller add to group of this orbital
 | 
|---|
 | 1127 |     for (l=0;l<Num;l++) {
 | 
|---|
 | 1128 |       j=0;  // index for partner group
 | 
|---|
 | 1129 |       for (k=0;k<Num;k++) {  // check this against l
 | 
|---|
 | 1130 |         Spread = 0.;
 | 
|---|
 | 1131 |         for (i=0;i<NDIM;i++) {
 | 
|---|
 | 1132 |           //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]); 
 | 
|---|
 | 1133 |           Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]);
 | 
|---|
 | 1134 |         }
 | 
|---|
 | 1135 |         Spread = sqrt(Spread);  // distance in Spread
 | 
|---|
 | 1136 |         //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]); 
 | 
|---|
 | 1137 |         if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread
 | 
|---|
 | 1138 |           group[l][j++] = k;  // add k to group of l
 | 
|---|
 | 1139 |           partner[l]++;
 | 
|---|
 | 1140 |           //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l);
 | 
|---|
 | 1141 |         } 
 | 
|---|
 | 1142 |       }
 | 
|---|
 | 1143 |     }
 | 
|---|
 | 1144 |     
 | 
|---|
 | 1145 |     // consistency, for each orbital check if this orbital is also in the group of each referred orbital
 | 
|---|
 | 1146 |     //debug(P,"checking consistency");
 | 
|---|
 | 1147 |     totalflag = 1;
 | 
|---|
 | 1148 |     for (l=0;l<Num;l++) // checking l's group
 | 
|---|
 | 1149 |       for (k=0;k<Num;k++) { // k is partner index
 | 
|---|
 | 1150 |         if (group[l][k] != -1) {  // if current index k is a partner
 | 
|---|
 | 1151 |           flag = 0;
 | 
|---|
 | 1152 |           for(j=0;j<Num;j++) {  // go through each entry in l partner's partner group if l exists
 | 
|---|
 | 1153 |             if ((group[ group[l][k] ][j] == l))
 | 
|---|
 | 1154 |               flag = 1;
 | 
|---|
 | 1155 |           }
 | 
|---|
 | 1156 |           //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]);
 | 
|---|
 | 1157 |           if (totalflag == 1) totalflag = flag;
 | 
|---|
 | 1158 |         }
 | 
|---|
 | 1159 |       }
 | 
|---|
 | 1160 |     // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres
 | 
|---|
 | 1161 |     //debug(P,"weight and calculate new centers for partner groups");
 | 
|---|
 | 1162 |     for (l=0;l<=Num;l++)
 | 
|---|
 | 1163 |       marker[l] = 1;
 | 
|---|
 | 1164 |     if (totalflag) {
 | 
|---|
 | 1165 |       for (l=0;l<Num;l++) { // go through each orbital
 | 
|---|
 | 1166 |         if (marker[l] != 0) { // if it hasn't been reweighted
 | 
|---|
 | 1167 |           marker[l] = 0;
 | 
|---|
 | 1168 |           for (i=0;i<NDIM;i++) 
 | 
|---|
 | 1169 |             q[i] = 0.;
 | 
|---|
 | 1170 |           j = 0;
 | 
|---|
 | 1171 |           while (group[l][j] != -1) {
 | 
|---|
 | 1172 |             marker[group[l][j]] = 0;
 | 
|---|
 | 1173 |             for (i=0;i<NDIM;i++) {
 | 
|---|
 | 1174 |               //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]);
 | 
|---|
 | 1175 |               q[i] += WannierCentre[ group[l][j] ][i];
 | 
|---|
 | 1176 |             } 
 | 
|---|
 | 1177 |             j++;
 | 
|---|
 | 1178 |           }
 | 
|---|
 | 1179 |           //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j);
 | 
|---|
 | 1180 |           for (i=0;i<NDIM;i++) {// weight by number of elements in partner group
 | 
|---|
 | 1181 |             q[i] /= (double)(j);
 | 
|---|
 | 1182 |           }
 | 
|---|
 | 1183 |   
 | 
|---|
 | 1184 |           // put WannierCentre into own and all partners'      
 | 
|---|
 | 1185 |           for (i=0;i<NDIM;i++)
 | 
|---|
 | 1186 |             WannierCentre[l][i] = q[i];
 | 
|---|
 | 1187 |           j = 0;  
 | 
|---|
 | 1188 |           while (group[l][j] != -1) {
 | 
|---|
 | 1189 |             for (i=0;i<NDIM;i++)
 | 
|---|
 | 1190 |               WannierCentre[group[l][j]][i] = q[i];
 | 
|---|
 | 1191 |             j++;
 | 
|---|
 | 1192 |           }
 | 
|---|
 | 1193 |         }
 | 
|---|
 | 1194 |       }
 | 
|---|
 | 1195 |     }
 | 
|---|
 | 1196 |     if (P->Call.out[StepLeaderOut]) {
 | 
|---|
 | 1197 |       fprintf(stderr,"Summary:\n");
 | 
|---|
 | 1198 |       fprintf(stderr,"========\n");
 | 
|---|
 | 1199 |       for (i=0;i<Num;i++)
 | 
|---|
 | 1200 |         fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]);
 | 
|---|
 | 1201 |     } 
 | 
|---|
 | 1202 |     //debug(P,"done");
 | 
|---|
 | 1203 |   
 | 
|---|
| [64fa9e] | 1204 |     Free(marker, "bla");
 | 
|---|
| [a0bcf1] | 1205 |     for (l=0;l<Num;l++)
 | 
|---|
| [64fa9e] | 1206 |       Free(group[l], "bla");
 | 
|---|
 | 1207 |     Free(group, "bla");
 | 
|---|
| [a0bcf1] | 1208 |     break;
 | 
|---|
 | 1209 |    case 1:
 | 
|---|
 | 1210 |     debug(P,"Individual orbitals are changed to center of all.");
 | 
|---|
 | 1211 |     for (i=0;i<NDIM;i++)  // zero center of weight
 | 
|---|
 | 1212 |       q[i] = 0.;
 | 
|---|
 | 1213 |     for (k=0;k<Num;k++)
 | 
|---|
 | 1214 |       for (i=0;i<NDIM;i++) {  // sum up all orbitals each component
 | 
|---|
 | 1215 |         q[i] += WannierCentre[k][i];
 | 
|---|
 | 1216 |       }
 | 
|---|
 | 1217 |     for (i=0;i<NDIM;i++)  // divide by number
 | 
|---|
 | 1218 |       q[i] /= Num;
 | 
|---|
 | 1219 |     for (k=0;k<Num;k++)
 | 
|---|
 | 1220 |       for (i=0;i<NDIM;i++) {  // put into this function's array
 | 
|---|
 | 1221 |         WannierCentre[k][i] = q[i];
 | 
|---|
 | 1222 |       }
 | 
|---|
 | 1223 |     break;
 | 
|---|
 | 1224 |    case 0:
 | 
|---|
 | 1225 |    default:
 | 
|---|
 | 1226 |    break;
 | 
|---|
 | 1227 |   }
 | 
|---|
 | 1228 |   
 | 
|---|
 | 1229 |   // put (new) WannierCentres into local ones and into file
 | 
|---|
 | 1230 |   i=-1;
 | 
|---|
 | 1231 |   for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions
 | 
|---|
 | 1232 |     OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
 | 1233 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
 | 1234 |       i++;  // increase l if it is occupied wave function
 | 
|---|
 | 1235 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
 | 
|---|
 | 1236 |         for (j=0;j<NDIM;j++)
 | 
|---|
 | 1237 |           Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
 | 
|---|
 | 1238 |       }
 | 
|---|
 | 1239 |       if (P->Par.me_comm_ST == 0) 
 | 
|---|
 | 1240 |         fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]); 
 | 
|---|
 | 1241 |     }
 | 
|---|
 | 1242 |   }    
 | 
|---|
 | 1243 |   if (P->Par.me_comm_ST == 0) {
 | 
|---|
 | 1244 |     fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n");
 | 
|---|
 | 1245 |     fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread);
 | 
|---|
 | 1246 |   }
 | 
|---|
 | 1247 |   fflush(F->SpreadFile);
 | 
|---|
 | 1248 | 
 | 
|---|
 | 1249 |   // and the spread was calculated in the loop above
 | 
|---|
 | 1250 | /*
 | 
|---|
 | 1251 |   i=-1;     
 | 
|---|
 | 1252 |   for (l=0;l<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;l++)
 | 
|---|
 | 1253 |     if (Psi->AllPsiStatus[l].PsiType == type) {
 | 
|---|
 | 1254 |       i++;
 | 
|---|
 | 1255 |       spread = CalculateSpread(P,l);
 | 
|---|
 | 1256 |       tmp = gsl_matrix_get(A[max_operators],i,i) - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
 | 
|---|
 | 1257 |           - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
 | 
|---|
 | 1258 |           - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);
 | 
|---|
 | 1259 |       if(P->Call.out[ValueOut]) fprintf(stderr, "(%i) Check of spread of %ith wave function: %lg against %lg\n",P->Par.me, i, Psi->AddData[i].WannierSpread, tmp);  
 | 
|---|
 | 1260 |     }*/
 | 
|---|
 | 1261 |   // free all remaining memory
 | 
|---|
 | 1262 |   for (k=0;k<max_operators+1;k++)
 | 
|---|
 | 1263 |     gsl_matrix_free(A[k]);
 | 
|---|
 | 1264 | }
 | 
|---|
 | 1265 | 
 | 
|---|
 | 1266 | /** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre.
 | 
|---|
 | 1267 |  * \param *P Problem at hand
 | 
|---|
 | 1268 |  * \return 1 - success, 0 - failure
 | 
|---|
 | 1269 |  */
 | 
|---|
 | 1270 | int ParseWannierFile(struct Problem *P)
 | 
|---|
 | 1271 | {
 | 
|---|
 | 1272 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
 | 1273 |   struct RunStruct *R = &P->R;
 | 
|---|
 | 1274 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
 | 1275 |   struct OnePsiElement *OnePsiA;
 | 
|---|
 | 1276 |   int i,l,j, msglen;
 | 
|---|
 | 1277 |   FILE *SpreadFile;
 | 
|---|
 | 1278 |   char tagname[255];
 | 
|---|
 | 1279 |   char suffix[18];
 | 
|---|
 | 1280 |   double WannierCentre[NDIM+1]; // combined centre and spread
 | 
|---|
 | 1281 |   MPI_Status status;
 | 
|---|
 | 1282 |   enum PsiTypeTag type = Occupied;
 | 
|---|
 | 1283 |   int signal = 0; // 1 - ok, 0 - error
 | 
|---|
 | 1284 | 
 | 
|---|
 | 1285 |   switch (Lat->Psi.PsiST) {
 | 
|---|
 | 1286 |   case SpinDouble:
 | 
|---|
 | 1287 |     strncpy(suffix,".spread.csv",18);    
 | 
|---|
 | 1288 |     break;
 | 
|---|
 | 1289 |   case SpinUp:
 | 
|---|
 | 1290 |     strncpy(suffix,".spread_up.csv",18);    
 | 
|---|
 | 1291 |     break;
 | 
|---|
 | 1292 |   case SpinDown:
 | 
|---|
 | 1293 |     strncpy(suffix,".spread_down.csv",18);    
 | 
|---|
 | 1294 |     break;
 | 
|---|
 | 1295 |   }
 | 
|---|
 | 1296 |   
 | 
|---|
 | 1297 |   if (P->Par.me_comm_ST == 0) { 
 | 
|---|
 | 1298 |     if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists
 | 
|---|
 | 1299 |       if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
 | 1300 |         Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
 | 1301 |       return 0;
 | 
|---|
 | 1302 |       //Error(SomeError,"ParseWannierFile: Opening failed\n");
 | 
|---|
 | 1303 |     }
 | 
|---|
 | 1304 |     signal = 1;
 | 
|---|
 | 1305 |     if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
 | 1306 |       Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
 | 1307 |   } else {
 | 
|---|
 | 1308 |     if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
 | 1309 |       Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
 | 1310 |     if (signal == 0)
 | 
|---|
 | 1311 |       return 0;
 | 
|---|
 | 1312 |   }
 | 
|---|
 | 1313 |   i=-1;
 | 
|---|
 | 1314 |   for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions
 | 
|---|
 | 1315 |     OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA
 | 
|---|
 | 1316 |     if (OnePsiA->PsiType == type) {   // drop all but occupied ones
 | 
|---|
 | 1317 |       i++;  // increase l if it is occupied wave function
 | 
|---|
 | 1318 |       if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file
 | 
|---|
 | 1319 |         sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo);
 | 
|---|
 | 1320 |         signal = 0;
 | 
|---|
| [961b75] | 1321 |         if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) {
 | 
|---|
| [a0bcf1] | 1322 |           //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed");
 | 
|---|
 | 1323 |           if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
 | 1324 |             Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
 | 1325 |           return 0;
 | 
|---|
 | 1326 |         }
 | 
|---|
| [961b75] | 1327 |         if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) {
 | 
|---|
| [a0bcf1] | 1328 |           //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed");
 | 
|---|
 | 1329 |           if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
 | 1330 |             Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
 | 1331 |           return 0;
 | 
|---|
 | 1332 |         }
 | 
|---|
 | 1333 |         signal = 1;
 | 
|---|
 | 1334 |         if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
 | 1335 |           Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
 | 1336 |       } else {
 | 
|---|
 | 1337 |         if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
 | 
|---|
 | 1338 |           Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
 | 
|---|
 | 1339 |         if (signal == 0)
 | 
|---|
 | 1340 |           return 0;
 | 
|---|
 | 1341 |       }
 | 
|---|
 | 1342 |       if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local?
 | 
|---|
 | 1343 |         if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em
 | 
|---|
 | 1344 |           if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
 | 
|---|
 | 1345 |             Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed");
 | 
|---|
 | 1346 |             //return 0;
 | 
|---|
 | 1347 |           MPI_Get_count(&status, MPI_DOUBLE, &msglen);
 | 
|---|
 | 1348 |           if (msglen != NDIM+1) 
 | 
|---|
 | 1349 |             Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count");
 | 
|---|
 | 1350 |             //return 0;
 | 
|---|
 | 1351 |         }
 | 
|---|
 | 1352 |         if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader
 | 
|---|
 | 1353 |           Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed");
 | 
|---|
 | 1354 |           //return 0;
 | 
|---|
 | 1355 |         // and store 'em (for all who have this Psi local)
 | 
|---|
 | 1356 |         fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]);
 | 
|---|
 | 1357 |         for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j];
 | 
|---|
 | 1358 |         Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM];
 | 
|---|
 | 1359 |         if (P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
 | 
|---|
 | 1360 |       } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group
 | 
|---|
 | 1361 |         if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS)
 | 
|---|
 | 1362 |           Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed");
 | 
|---|
 | 1363 |           //return 0;
 | 
|---|
 | 1364 |       }
 | 
|---|
 | 1365 |     }
 | 
|---|
 | 1366 |   }
 | 
|---|
 | 1367 |   if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) 
 | 
|---|
 | 1368 |     fclose(SpreadFile);
 | 
|---|
 | 1369 |   fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me);
 | 
|---|
 | 1370 |   return 1;
 | 
|---|
 | 1371 | }
 | 
|---|
 | 1372 | 
 | 
|---|
 | 1373 | /** Calculates the spread of orbital \a i.
 | 
|---|
 | 1374 |  * Stored in OnePsiElementAddData#WannierSpread.
 | 
|---|
 | 1375 |  * \param *P Problem at hand
 | 
|---|
 | 1376 |  * \param i i-th wave function (note "extra" ones are not counted!)
 | 
|---|
 | 1377 |  * \return spread \f$\sigma^2_{A^{(k)}}\f$
 | 
|---|
 | 1378 |  */
 | 
|---|
 | 1379 | double CalculateSpread(struct Problem *P, int i) {
 | 
|---|
 | 1380 |   struct Lattice *Lat = &P->Lat;
 | 
|---|
 | 1381 |   struct RunStruct *R = &P->R;
 | 
|---|
 | 1382 |   struct Psis *Psi = &Lat->Psi;
 | 
|---|
 | 1383 |   struct LatticeLevel *Lev0 = R->Lev0;
 | 
|---|
 | 1384 |   struct LatticeLevel *LevS = R->LevS;
 | 
|---|
 | 1385 |   struct Density *Dens0 = Lev0->Dens;
 | 
|---|
 | 1386 |   struct fft_plan_3d *plan = Lat->plan; 
 | 
|---|
 | 1387 |   fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
 | 
|---|
 | 1388 |   fftw_real *PsiCR = (fftw_real *)PsiC;
 | 
|---|
 | 1389 |   fftw_complex *work = Dens0->DensityCArray[Temp2Density];
 | 
|---|
 | 1390 |   fftw_real **HGcR = &Dens0->DensityArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as a storage array
 | 
|---|
 | 1391 |   fftw_complex **HGcRC = (fftw_complex**)HGcR;
 | 
|---|
 | 1392 |   fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as an array
 | 
|---|
 | 1393 |   fftw_real **HGcR2 = (fftw_real**)HGcR2C;
 | 
|---|
 | 1394 |   MPI_Status status;
 | 
|---|
 | 1395 |   struct OnePsiElement *OnePsiA, *LOnePsiA;
 | 
|---|
 | 1396 |   int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
 | 
|---|
 | 1397 |   fftw_complex *LPsiDatA=NULL;
 | 
|---|
 | 1398 |   int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index;
 | 
|---|
 | 1399 |   N0 = LevS->Plan0.plan->local_nx;
 | 
|---|
 | 1400 |   N = LevS->Plan0.plan->N;
 | 
|---|
 | 1401 |   const int NUpx = LevS->NUp[0];
 | 
|---|
 | 1402 |   const int NUpy = LevS->NUp[1];
 | 
|---|
 | 1403 |   const int NUpz = LevS->NUp[2];
 | 
|---|
 | 1404 |   double a_ij, b_ij, A_ij, B_ij;
 | 
|---|
 | 1405 |   double tmp, tmp2, spread = 0;
 | 
|---|
 | 1406 |   double **cos_lookup, **sin_lookup;
 | 
|---|
 | 1407 |   
 | 
|---|
 | 1408 |   b_ij = 0;
 | 
|---|
 | 1409 | 
 | 
|---|
 | 1410 |   // create lookup table for sin/cos values
 | 
|---|
 | 1411 |   cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup");
 | 
|---|
 | 1412 |   sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup");
 | 
|---|
 | 1413 |   for (k=0;k<NDIM;k++) {
 | 
|---|
 | 1414 |     // allocate memory
 | 
|---|
 | 1415 |     cos_lookup[k] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[k], "ComputeMLWF: cos_lookup");
 | 
|---|
 | 1416 |     sin_lookup[k] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[k], "ComputeMLWF: sin_lookup");
 | 
|---|
 | 1417 |     // reset arrays
 | 
|---|
 | 1418 |     SetArrayToDouble0(cos_lookup[k],LevS->Plan0.plan->N[k]);
 | 
|---|
 | 1419 |     SetArrayToDouble0(sin_lookup[k],LevS->Plan0.plan->N[k]);
 | 
|---|
 | 1420 |     // create lookup values
 | 
|---|
 | 1421 |     for (g=0;g<LevS->Plan0.plan->N[k];g++) {
 | 
|---|
 | 1422 |       tmp = 2*PI/(double)LevS->Plan0.plan->N[k]*(double)g; 
 | 
|---|
 | 1423 |       cos_lookup[k][g] = cos(tmp);
 | 
|---|
 | 1424 |       sin_lookup[k][g] = sin(tmp);
 | 
|---|
 | 1425 |     }
 | 
|---|
 | 1426 |   }  
 | 
|---|
 | 1427 |   // fill matrices
 | 
|---|
 | 1428 |   OnePsiA = &Psi->AllPsiStatus[i];    // grab the desired OnePsiA
 | 
|---|
 | 1429 |   if (OnePsiA->PsiType != Extra) {   // drop if extra one
 | 
|---|
 | 1430 |     if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
 | 
|---|
 | 1431 |        LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo];
 | 
|---|
 | 1432 |     else 
 | 
|---|
 | 1433 |        LOnePsiA = NULL;
 | 
|---|
 | 1434 |     if (LOnePsiA == NULL) {   // if it's not local ... receive it from respective process into TempPsi
 | 
|---|
 | 1435 |       RecvSource = OnePsiA->my_color_comm_ST_Psi;
 | 
|---|
 | 1436 |       MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status );
 | 
|---|
 | 1437 |       LPsiDatA=LevS->LPsi->TempPsi;
 | 
|---|
 | 1438 |     } else {                  // .. otherwise send it to all other processes (Max_me... - 1)
 | 
|---|
 | 1439 |       for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
 | 
|---|
 | 1440 |         if (p != OnePsiA->my_color_comm_ST_Psi) 
 | 
|---|
 | 1441 |           MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT);
 | 
|---|
 | 1442 |       LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
 | 
|---|
 | 1443 |     } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received
 | 
|---|
 | 1444 |       
 | 
|---|
 | 1445 |     CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
 | 
|---|
 | 1446 |     // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
 | 
|---|
 | 1447 |     for (n0=0;n0<N0;n0++)  
 | 
|---|
 | 1448 |       for (n[1]=0;n[1]<N[1];n[1]++) 
 | 
|---|
 | 1449 |         for (n[2]=0;n[2]<N[2];n[2]++) {
 | 
|---|
 | 1450 |           i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx);
 | 
|---|
 | 1451 |           iS = n[2]+N[2]*(n[1]+N[1]*n0);
 | 
|---|
 | 1452 |           n[0] = n0 + LevS->Plan0.plan->start_nx;
 | 
|---|
 | 1453 |           for (k=0;k<max_operators;k+=2) {
 | 
|---|
 | 1454 |             tmp = 2*PI/(double)(N[k/2])*(double)(n[k/2]);
 | 
|---|
 | 1455 |             tmp2 = PsiCR[i0] /LevS->MaxN;
 | 
|---|
 | 1456 |             // check lookup
 | 
|---|
 | 1457 |             if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) {
 | 
|---|
 | 1458 |               fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]);   
 | 
|---|
 | 1459 |               Error(SomeError, "Lookup table does not match real value!");
 | 
|---|
 | 1460 |             }
 | 
|---|
 | 1461 | //            HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
 | 
|---|
 | 1462 | //            HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */
 | 
|---|
 | 1463 | //            HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
 | 
|---|
 | 1464 | //            HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
 | 
|---|
 | 1465 |             HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */
 | 
|---|
 | 1466 |             HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */
 | 
|---|
 | 1467 |             HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */
 | 
|---|
 | 1468 |             HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */
 | 
|---|
 | 1469 |           }
 | 
|---|
 | 1470 |         }
 | 
|---|
 | 1471 |     for (k=0;k<max_operators;k++) {
 | 
|---|
 | 1472 |       fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work);
 | 
|---|
 | 1473 |       fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work);
 | 
|---|
 | 1474 |     }
 | 
|---|
 | 1475 | 
 | 
|---|
 | 1476 | 
 | 
|---|
 | 1477 |     for (k=0;k<max_operators;k++) {
 | 
|---|
 | 1478 |       a_ij = 0;
 | 
|---|
 | 1479 |       //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k);
 | 
|---|
 | 1480 |       // sum directly in a_ij and b_ij the two desired terms
 | 
|---|
 | 1481 |       g=0;
 | 
|---|
 | 1482 |       if (LevS->GArray[0].GSq == 0.0) {
 | 
|---|
 | 1483 |         Index = LevS->GArray[g].Index;
 | 
|---|
 | 1484 |         a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im);
 | 
|---|
 | 1485 |         b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im);
 | 
|---|
 | 1486 |         g++;
 | 
|---|
 | 1487 |       }
 | 
|---|
 | 1488 |       for (; g < LevS->MaxG; g++) {
 | 
|---|
 | 1489 |         Index = LevS->GArray[g].Index;
 | 
|---|
 | 1490 |         a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im);
 | 
|---|
 | 1491 |         b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im);
 | 
|---|
 | 1492 |       } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...)
 | 
|---|
 | 1493 |       MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
 | 
|---|
 | 1494 |       spread += pow(A_ij,2);
 | 
|---|
 | 1495 |     }
 | 
|---|
 | 1496 |   }
 | 
|---|
 | 1497 |   MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
 | 
|---|
 | 1498 | 
 | 
|---|
 | 1499 |   // store spread in OnePsiElementAdd
 | 
|---|
 | 1500 |   Psi->AddData[i].WannierSpread = B_ij - spread;
 | 
|---|
 | 1501 |   // free lookups
 | 
|---|
 | 1502 |   for (k=0;k<NDIM;k++) {
 | 
|---|
| [64fa9e] | 1503 |     Free(cos_lookup[k], "bla");
 | 
|---|
 | 1504 |     Free(sin_lookup[k], "bla");
 | 
|---|
| [a0bcf1] | 1505 |   }
 | 
|---|
| [64fa9e] | 1506 |   Free(cos_lookup, "bla");
 | 
|---|
 | 1507 |   Free(sin_lookup, "bla");
 | 
|---|
| [a0bcf1] | 1508 | 
 | 
|---|
 | 1509 |   return (B_ij - spread);
 | 
|---|
 | 1510 | }
 | 
|---|