| [a0bcf1] | 1 | /** \file wannier.c | 
|---|
|  | 2 | * Maximally Localized Wannier Functions. | 
|---|
|  | 3 | * | 
|---|
|  | 4 | * Contains the on function that minimises the spread of all orbitals in one rush in a parallel | 
|---|
|  | 5 | * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to | 
|---|
|  | 6 | * calculate the spread of a specific orbital, which may be useful in checking on the change of | 
|---|
|  | 7 | * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex. | 
|---|
|  | 8 | * | 
|---|
|  | 9 | Project: ParallelCarParrinello | 
|---|
|  | 10 | \author Frederik Heber | 
|---|
|  | 11 | \date 2006 | 
|---|
|  | 12 |  | 
|---|
|  | 13 | File: wannier.c | 
|---|
|  | 14 | $Id: wannier.c,v 1.63 2007-02-13 14:15:29 foo Exp $ | 
|---|
|  | 15 | */ | 
|---|
|  | 16 |  | 
|---|
|  | 17 | #include <math.h> | 
|---|
|  | 18 | #include <gsl/gsl_math.h> | 
|---|
|  | 19 | #include <gsl/gsl_eigen.h> | 
|---|
|  | 20 | #include <gsl/gsl_matrix.h> | 
|---|
|  | 21 | #include <gsl/gsl_vector.h> | 
|---|
|  | 22 | #include <gsl/gsl_complex.h> | 
|---|
|  | 23 | #include <gsl/gsl_complex_math.h> | 
|---|
|  | 24 | #include <gsl/gsl_sort_vector.h> | 
|---|
|  | 25 | #include <gsl/gsl_heapsort.h> | 
|---|
|  | 26 | #include <gsl/gsl_blas.h> | 
|---|
|  | 27 | #include <string.h> | 
|---|
|  | 28 |  | 
|---|
|  | 29 | #include "data.h" | 
|---|
|  | 30 | #include "density.h" | 
|---|
|  | 31 | #include "errors.h" | 
|---|
|  | 32 | #include "helpers.h" | 
|---|
|  | 33 | #include "init.h" | 
|---|
|  | 34 | #include "myfft.h" | 
|---|
|  | 35 | #include "mymath.h" | 
|---|
|  | 36 | #include "output.h" | 
|---|
|  | 37 | #include "wannier.h" | 
|---|
|  | 38 |  | 
|---|
|  | 39 |  | 
|---|
|  | 40 | #define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread | 
|---|
|  | 41 |  | 
|---|
|  | 42 |  | 
|---|
|  | 43 | /** Converts type fftw_complex to gsl_complex. | 
|---|
|  | 44 | * \param a complex number | 
|---|
|  | 45 | * \return b complex number | 
|---|
|  | 46 | */ | 
|---|
|  | 47 | gsl_complex convertComplex (fftw_complex a) { | 
|---|
|  | 48 | return gsl_complex_rect(c_re(a),c_im(a)); | 
|---|
|  | 49 | } | 
|---|
|  | 50 |  | 
|---|
|  | 51 | /** "merry go round" implementation for parallel index ordering. | 
|---|
|  | 52 | * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is | 
|---|
|  | 53 | * performed which generates once each possible pairing. | 
|---|
|  | 54 | * \param *top index array 1 | 
|---|
|  | 55 | * \param *bot index array 2 | 
|---|
|  | 56 | * \param m N/2, where N is the matrix row/column dimension | 
|---|
|  | 57 | * \note taken from [Golub, Matrix computations, 1989, p451] | 
|---|
|  | 58 | */ | 
|---|
|  | 59 | void music(int *top, int *bot, int m) | 
|---|
|  | 60 | { | 
|---|
|  | 61 | int *old_top, *old_bot; | 
|---|
|  | 62 | int k; | 
|---|
|  | 63 | old_top = (int *) Malloc(sizeof(int)*m, "music: old_top"); | 
|---|
|  | 64 | old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot"); | 
|---|
|  | 65 | /*  fprintf(stderr,"oldtop\t"); | 
|---|
|  | 66 | for (k=0;k<m;k++) | 
|---|
|  | 67 | fprintf(stderr,"%i\t", top[k]); | 
|---|
|  | 68 | fprintf(stderr,"\n"); | 
|---|
|  | 69 | fprintf(stderr,"oldbot\t"); | 
|---|
|  | 70 | for (k=0;k<m;k++) | 
|---|
|  | 71 | fprintf(stderr,"%i\t", bot[k]); | 
|---|
|  | 72 | fprintf(stderr,"\n");*/ | 
|---|
|  | 73 | // first copy arrays | 
|---|
|  | 74 | for (k=0;k<m;k++) { | 
|---|
|  | 75 | old_top[k] = top[k]; | 
|---|
|  | 76 | old_bot[k] = bot[k]; | 
|---|
|  | 77 | } | 
|---|
|  | 78 | // then let the music play | 
|---|
|  | 79 | for (k=0;k<m;k++) { | 
|---|
|  | 80 | if (k==1) | 
|---|
|  | 81 | top[k] = old_bot[0]; | 
|---|
|  | 82 | else if (k > 1) | 
|---|
|  | 83 | top[k] = old_top[k-1]; | 
|---|
|  | 84 | if (k==m-1) | 
|---|
|  | 85 | bot[k] = old_top[k]; | 
|---|
|  | 86 | else | 
|---|
|  | 87 | bot[k] = old_bot[k+1]; | 
|---|
|  | 88 | } | 
|---|
|  | 89 | /*  fprintf(stderr,"top\t"); | 
|---|
|  | 90 | for (k=0;k<m;k++) | 
|---|
|  | 91 | fprintf(stderr,"%i\t", top[k]); | 
|---|
|  | 92 | fprintf(stderr,"\n"); | 
|---|
|  | 93 | fprintf(stderr,"bot\t"); | 
|---|
|  | 94 | for (k=0;k<m;k++) | 
|---|
|  | 95 | fprintf(stderr,"%i\t", bot[k]); | 
|---|
|  | 96 | fprintf(stderr,"\n");*/ | 
|---|
|  | 97 | // and finito | 
|---|
| [64fa9e] | 98 | Free(old_top, "bla"); | 
|---|
|  | 99 | Free(old_bot, "bla"); | 
|---|
| [a0bcf1] | 100 | } | 
|---|
|  | 101 |  | 
|---|
|  | 102 | /** merry-go-round for matrix columns. | 
|---|
|  | 103 | * The trick here is that we must be aware of multiple rotations per process, thus only some of the | 
|---|
|  | 104 | * whole lot of local columns get sent/received, most of them are just shifted via exchanging the various | 
|---|
|  | 105 | * pointers to the matrix columns within the local array. | 
|---|
|  | 106 | * \param comm communicator for circulation | 
|---|
|  | 107 | * \param *Aloc local array of columns | 
|---|
|  | 108 | * \param Num entries per column | 
|---|
|  | 109 | * \param max_rounds number of column pairs in \a *Around | 
|---|
|  | 110 | * \param k offset for tag | 
|---|
|  | 111 | * \param tagS0 MPI tag for sending left column | 
|---|
|  | 112 | * \param tagS1 MPI tag for sending right column | 
|---|
|  | 113 | * \param tagR0 MPI tag for receiving left column | 
|---|
|  | 114 | * \param tagR1 MPI tag for receiving right column | 
|---|
|  | 115 | */ | 
|---|
|  | 116 | void shiftcolumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) { | 
|---|
|  | 117 | //double *A_locS1, *A_locS2;  // local columns of A[k] | 
|---|
|  | 118 | //double *A_locR1, *A_locR2;  // local columns of A[k] | 
|---|
|  | 119 | MPI_Request requestS0, requestS1, requestR0, requestR1; | 
|---|
|  | 120 | MPI_Status status; | 
|---|
|  | 121 | int ProcRank, ProcNum; | 
|---|
|  | 122 | int l; | 
|---|
|  | 123 | MPI_Comm_size (comm, &ProcNum); | 
|---|
|  | 124 | MPI_Comm_rank (comm, &ProcRank); | 
|---|
|  | 125 | double *Abuffer1, *Abuffer2;  // mark the columns that are circulated | 
|---|
|  | 126 |  | 
|---|
|  | 127 | //fprintf(stderr,"shifting..."); | 
|---|
|  | 128 | if (ProcRank == 0) { | 
|---|
|  | 129 | if (max_rounds > 1) { | 
|---|
|  | 130 | // get last left column | 
|---|
|  | 131 | Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column | 
|---|
|  | 132 | MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0); | 
|---|
|  | 133 | } else { | 
|---|
|  | 134 | // get right column | 
|---|
|  | 135 | Abuffer1 = Aloc[1]; // note down the free column | 
|---|
|  | 136 | MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1,  tagS1+2*k, comm, &requestS0); | 
|---|
|  | 137 | } | 
|---|
|  | 138 |  | 
|---|
|  | 139 | //fprintf(stderr,"...left columns..."); | 
|---|
|  | 140 | for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right | 
|---|
|  | 141 | Aloc[l] = Aloc[l-2]; | 
|---|
|  | 142 |  | 
|---|
|  | 143 | if (max_rounds > 1) { | 
|---|
|  | 144 | //fprintf(stderr,"...first right..."); | 
|---|
|  | 145 | Aloc[2] = Aloc[1]; // get first right column | 
|---|
|  | 146 | } | 
|---|
|  | 147 |  | 
|---|
|  | 148 | //fprintf(stderr,"...right columns..."); | 
|---|
|  | 149 | for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left | 
|---|
|  | 150 | Aloc[l] = Aloc[l+2]; | 
|---|
|  | 151 |  | 
|---|
|  | 152 | //fprintf(stderr,"...last right..."); | 
|---|
|  | 153 | Aloc[(2*max_rounds-1)] = Abuffer1; | 
|---|
|  | 154 | MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1); | 
|---|
|  | 155 |  | 
|---|
|  | 156 | } else if (ProcRank == ProcNum-1) { | 
|---|
|  | 157 | //fprintf(stderr,"...first right..."); | 
|---|
|  | 158 | // get first right column | 
|---|
|  | 159 | Abuffer2 = Aloc[1]; // note down the free column | 
|---|
|  | 160 | MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1); | 
|---|
|  | 161 |  | 
|---|
|  | 162 | //fprintf(stderr,"...right columns..."); | 
|---|
|  | 163 | for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left | 
|---|
|  | 164 | Aloc[(l)] = Aloc[(l+2)]; | 
|---|
|  | 165 |  | 
|---|
|  | 166 | //fprintf(stderr,"...last right..."); | 
|---|
|  | 167 | Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column | 
|---|
|  | 168 |  | 
|---|
|  | 169 | //fprintf(stderr,"...left columns..."); | 
|---|
|  | 170 | for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right | 
|---|
|  | 171 | Aloc[(l)] = Aloc[(l-2)]; | 
|---|
|  | 172 |  | 
|---|
|  | 173 | //fprintf(stderr,"...first left..."); | 
|---|
|  | 174 | //    if (max_rounds > 1) | 
|---|
|  | 175 | Aloc[0] = Abuffer2; // get first left column | 
|---|
|  | 176 | MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0); | 
|---|
|  | 177 |  | 
|---|
|  | 178 | } else { | 
|---|
|  | 179 | // get last left column | 
|---|
|  | 180 | MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0); | 
|---|
|  | 181 | Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column | 
|---|
|  | 182 |  | 
|---|
|  | 183 | //fprintf(stderr,"...first right..."); | 
|---|
|  | 184 | // get first right column | 
|---|
|  | 185 | MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1); | 
|---|
|  | 186 | Abuffer2 = Aloc[1]; // note down the free column | 
|---|
|  | 187 |  | 
|---|
|  | 188 | //fprintf(stderr,"...left columns..."); | 
|---|
|  | 189 | for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right | 
|---|
|  | 190 | Aloc[(l)] = Aloc[(l-2)]; | 
|---|
|  | 191 |  | 
|---|
|  | 192 | //fprintf(stderr,"...right columns..."); | 
|---|
|  | 193 | for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left | 
|---|
|  | 194 | Aloc[(l)] = Aloc[(l+2)]; | 
|---|
|  | 195 |  | 
|---|
|  | 196 | //fprintf(stderr,"...first left..."); | 
|---|
|  | 197 | Aloc[0] = Abuffer1; // get first left column | 
|---|
|  | 198 | MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0); | 
|---|
|  | 199 |  | 
|---|
|  | 200 | //fprintf(stderr,"...last right..."); | 
|---|
|  | 201 | Aloc[(2*max_rounds-1)] = Abuffer2; | 
|---|
|  | 202 | MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1); | 
|---|
|  | 203 | } | 
|---|
|  | 204 |  | 
|---|
|  | 205 | //fprintf(stderr,"...waiting..."); | 
|---|
|  | 206 | if (ProcRank != ProcNum-1) | 
|---|
|  | 207 | MPI_Wait(&requestS0, &status); | 
|---|
|  | 208 | if (ProcRank != 0)  // first left column | 
|---|
|  | 209 | MPI_Wait(&requestR0, &status); | 
|---|
|  | 210 | if (ProcRank != 0) | 
|---|
|  | 211 | MPI_Wait(&requestS1, &status); | 
|---|
|  | 212 | if (ProcRank != ProcNum-1) | 
|---|
|  | 213 | MPI_Wait(&requestR1, &status); | 
|---|
|  | 214 | //fprintf(stderr,"...done\n"); | 
|---|
|  | 215 | } | 
|---|
|  | 216 |  | 
|---|
|  | 217 | /** Computation of Maximally Localized Wannier Functions. | 
|---|
|  | 218 | * Maximally localized functions are prime when evulating a Hamiltonian with | 
|---|
|  | 219 | * magnetic fields under periodic boundary conditions, as the common position | 
|---|
|  | 220 | * operator is no longer valid. These can be obtained by orbital rotations, which | 
|---|
|  | 221 | * are looked for iteratively and gathered in one transformation matrix, to be | 
|---|
|  | 222 | * later applied to the set of orbital wave functions. | 
|---|
|  | 223 | * | 
|---|
|  | 224 | * In order to obtain these, the following algorithm is applied: | 
|---|
|  | 225 | * -# Initialize U (identity) as the sought-for transformation matrix | 
|---|
|  | 226 | * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements | 
|---|
|  | 227 | *    \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators | 
|---|
|  | 228 | *    \f$A^{(k)}\f$ | 
|---|
|  | 229 | * -# For each pair of indices (i,j) (i<j) do the following: | 
|---|
|  | 230 | *    -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$ | 
|---|
|  | 231 | *       where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$ | 
|---|
|  | 232 | *    -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G | 
|---|
|  | 233 | *       corresponding to the greatest eigenvalue, such that \f$x\geq0\f$ | 
|---|
|  | 234 | *    -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the | 
|---|
|  | 235 | *       identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$ | 
|---|
|  | 236 | *       \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$ | 
|---|
|  | 237 | *    -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$ | 
|---|
|  | 238 | *    -# Gather the rotations in \f$U = U R\f$ | 
|---|
|  | 239 | * -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$ | 
|---|
|  | 240 | * -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3. | 
|---|
|  | 241 | * -# Apply transformations to the orbital wavefunctions \f$ | \phi_i \rangle = \sum_j U_{ij} | \phi_j \rangle\f$ | 
|---|
|  | 242 | * -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in | 
|---|
|  | 243 | *    OnePsiElementAddData#WannierCentre | 
|---|
|  | 244 | * | 
|---|
|  | 245 | * Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions. | 
|---|
|  | 246 | * Note that hereby additional memory is needed as old and transformed wave functions must be present at the same | 
|---|
|  | 247 | * time. | 
|---|
|  | 248 | * | 
|---|
|  | 249 | * The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index | 
|---|
|  | 250 | * generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices | 
|---|
|  | 251 | * may be diagonalized simultaneously and parallely. We use the implementation explained in | 
|---|
|  | 252 | * [Golub, Matrix computations, 1989, p451]. | 
|---|
|  | 253 | * | 
|---|
|  | 254 | * \param *P Problem at hand | 
|---|
|  | 255 | */ | 
|---|
|  | 256 | void ComputeMLWF(struct Problem *P) { | 
|---|
|  | 257 | // variables and allocation | 
|---|
|  | 258 | struct FileData *F = &P->Files; | 
|---|
|  | 259 | struct Lattice *Lat = &P->Lat; | 
|---|
|  | 260 | struct RunStruct *R = &P->R; | 
|---|
|  | 261 | struct Psis *Psi = &Lat->Psi; | 
|---|
|  | 262 | struct LatticeLevel *Lev0 = R->Lev0; | 
|---|
|  | 263 | struct LatticeLevel *LevS = R->LevS; | 
|---|
|  | 264 | struct Density *Dens0 = Lev0->Dens; | 
|---|
|  | 265 | struct fft_plan_3d *plan = Lat->plan; | 
|---|
|  | 266 | fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity]; | 
|---|
|  | 267 | fftw_real *PsiCR = (fftw_real *)PsiC; | 
|---|
|  | 268 | fftw_complex *work = Dens0->DensityCArray[Temp2Density]; | 
|---|
|  | 269 | fftw_real **HGcR = &Dens0->DensityArray[HGDensity];  // use HGDensity, 4x Gap..Density, TempDensity as a storage array | 
|---|
|  | 270 | fftw_complex **HGcRC = (fftw_complex**)HGcR; | 
|---|
|  | 271 | fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as an array | 
|---|
|  | 272 | fftw_real **HGcR2 = (fftw_real**)HGcR2C; | 
|---|
|  | 273 | MPI_Status status; | 
|---|
|  | 274 | struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB; | 
|---|
|  | 275 | int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; | 
|---|
|  | 276 | fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL; | 
|---|
|  | 277 | int n[NDIM],n0,i0,iS, Index; | 
|---|
|  | 278 | int N0; | 
|---|
|  | 279 | int N[NDIM]; | 
|---|
|  | 280 | const int NUpx = LevS->NUp[0]; | 
|---|
|  | 281 | const int NUpy = LevS->NUp[1]; | 
|---|
|  | 282 | const int NUpz = LevS->NUp[2]; | 
|---|
|  | 283 | int e,i,j,k,l,m,u,p,g; | 
|---|
|  | 284 | int Num = Psi->NoOfPsis;  // is number of occupied plus unoccupied states for rows | 
|---|
|  | 285 | double x,y,r; | 
|---|
|  | 286 | double q[NDIM]; | 
|---|
|  | 287 | double *c,*s; | 
|---|
|  | 288 | int index; | 
|---|
|  | 289 | double spread = 0., old_spread=0., Spread=0.; | 
|---|
|  | 290 | double WannierCentre[Num][NDIM]; | 
|---|
|  | 291 | double WannierSpread[Num]; | 
|---|
|  | 292 | double tmp,tmp2; | 
|---|
|  | 293 | double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0; | 
|---|
|  | 294 | double **cos_lookup,**sin_lookup; | 
|---|
|  | 295 | gsl_matrix *G; | 
|---|
|  | 296 | gsl_vector *h; | 
|---|
|  | 297 | gsl_vector *eval; | 
|---|
|  | 298 | gsl_matrix *evec; | 
|---|
|  | 299 | gsl_eigen_symmv_workspace *w; | 
|---|
|  | 300 | int ProcNum, ProcRank, set; | 
|---|
|  | 301 | int it_steps; // iteration step counter | 
|---|
|  | 302 | int *top, *bot; | 
|---|
|  | 303 | int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from | 
|---|
|  | 304 | int left, right; // left or right neighbour for process | 
|---|
|  | 305 | double **Aloc[max_operators+1], **Uloc; // local columns for one step of A[k] | 
|---|
|  | 306 | double *Around[max_operators+1], *Uround; // all local columns for one round of A[k] | 
|---|
|  | 307 | double *Atotal[max_operators+1], *Utotal; // all local columns for one round of A[k] | 
|---|
|  | 308 | double a_i, a_j; | 
|---|
|  | 309 | int tagR0, tagR1, tagS0, tagS1; | 
|---|
|  | 310 | int iloc, jloc; | 
|---|
|  | 311 | double *s_all, *c_all; | 
|---|
|  | 312 | int round, max_rounds; | 
|---|
|  | 313 | int start; | 
|---|
|  | 314 | int *rcounts, *rdispls; | 
|---|
|  | 315 | int AllocNum = ceil((double)Num / 2. ) *2; | 
|---|
|  | 316 | int totalflag, flag; | 
|---|
|  | 317 | int *marker, **group; | 
|---|
|  | 318 | int partner[Num]; | 
|---|
|  | 319 | int type = Occupied; | 
|---|
|  | 320 | MPI_Comm *comm; | 
|---|
|  | 321 | char spin[12], suffix[18]; | 
|---|
|  | 322 |  | 
|---|
|  | 323 | N0 = LevS->Plan0.plan->local_nx; | 
|---|
|  | 324 | N[0] = LevS->Plan0.plan->N[0]; | 
|---|
|  | 325 | N[1] = LevS->Plan0.plan->N[1]; | 
|---|
|  | 326 | N[2] = LevS->Plan0.plan->N[2]; | 
|---|
|  | 327 |  | 
|---|
|  | 328 | comm = &P->Par.comm_ST; | 
|---|
|  | 329 |  | 
|---|
|  | 330 | fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi); | 
|---|
|  | 331 | if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) {  // all parallel | 
|---|
|  | 332 | comm = &P->Par.comm_ST; | 
|---|
|  | 333 | fprintf(stderr,"(%i) Jacobi is done parallely by all\n", P->Par.me); | 
|---|
|  | 334 | } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first | 
|---|
|  | 335 | if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) {  // coefficients parallel | 
|---|
|  | 336 | comm = &P->Par.comm_ST_Psi; | 
|---|
|  | 337 | fprintf(stderr,"(%i) Jacobi is done parallely by Psi\n", P->Par.me); | 
|---|
|  | 338 | } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) {  // Psis parallel | 
|---|
|  | 339 | comm = &P->Par.comm_ST_PsiT; | 
|---|
|  | 340 | fprintf(stderr,"(%i) Jacobi is done parallely by PsiT\n", P->Par.me); | 
|---|
|  | 341 | } | 
|---|
|  | 342 | } else { | 
|---|
|  | 343 | if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) {  // Psis parallel | 
|---|
|  | 344 | comm = &P->Par.comm_ST_PsiT; | 
|---|
|  | 345 | fprintf(stderr,"(%i) Jacobi is done parallely by PsiT\n", P->Par.me); | 
|---|
|  | 346 | } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) {  // coefficients parallel | 
|---|
|  | 347 | comm = &P->Par.comm_ST_Psi; | 
|---|
|  | 348 | fprintf(stderr,"(%i) Jacobi is done parallely by Psi\n", P->Par.me); | 
|---|
|  | 349 | } | 
|---|
|  | 350 | } | 
|---|
|  | 351 |  | 
|---|
|  | 352 | MPI_Comm_size (*comm, &ProcNum); | 
|---|
|  | 353 | MPI_Comm_rank (*comm, &ProcRank); | 
|---|
|  | 354 |  | 
|---|
|  | 355 | if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me); | 
|---|
|  | 356 |  | 
|---|
|  | 357 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me); | 
|---|
|  | 358 |  | 
|---|
|  | 359 | // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1)) | 
|---|
|  | 360 | gsl_matrix *A[max_operators+1];   // one extra for B matrix | 
|---|
|  | 361 | for (u=0;u<=max_operators;u++) | 
|---|
|  | 362 | A[u] = gsl_matrix_calloc (AllocNum,AllocNum);  // allocate matrix | 
|---|
|  | 363 |  | 
|---|
|  | 364 | // create lookup table for sin/cos values | 
|---|
|  | 365 | cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup"); | 
|---|
|  | 366 | sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup"); | 
|---|
|  | 367 | for (i=0;i<NDIM;i++) { | 
|---|
|  | 368 | // allocate memory | 
|---|
|  | 369 | cos_lookup[i] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[i], "ComputeMLWF: cos_lookup"); | 
|---|
|  | 370 | sin_lookup[i] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[i], "ComputeMLWF: sin_lookup"); | 
|---|
|  | 371 | // reset arrays | 
|---|
|  | 372 | SetArrayToDouble0(cos_lookup[i],LevS->Plan0.plan->N[i]); | 
|---|
|  | 373 | SetArrayToDouble0(sin_lookup[i],LevS->Plan0.plan->N[i]); | 
|---|
|  | 374 | // create lookup values | 
|---|
|  | 375 | for (j=0;j<LevS->Plan0.plan->N[i];j++) { | 
|---|
|  | 376 | tmp = 2*PI/(double)LevS->Plan0.plan->N[i]*(double)j; | 
|---|
|  | 377 | cos_lookup[i][j] = cos(tmp); | 
|---|
|  | 378 | sin_lookup[i][j] = sin(tmp); | 
|---|
|  | 379 | } | 
|---|
|  | 380 | } | 
|---|
|  | 381 | l=-1; // to access U matrix element (0..Num-1) | 
|---|
|  | 382 | // fill the matrices | 
|---|
|  | 383 | for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) {  // go through all wave functions | 
|---|
|  | 384 | OnePsiA = &Psi->AllPsiStatus[i];    // grab OnePsiA | 
|---|
|  | 385 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
|  | 386 | l++;  // increase l if it is non-extra wave function | 
|---|
|  | 387 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? | 
|---|
|  | 388 | LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; | 
|---|
|  | 389 | else | 
|---|
|  | 390 | LPsiDatA = NULL;  // otherwise processes won't enter second loop, though they're supposed to send coefficients! | 
|---|
|  | 391 |  | 
|---|
|  | 392 | //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0); | 
|---|
|  | 393 | if (LPsiDatA != NULL) { | 
|---|
|  | 394 | CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1); | 
|---|
|  | 395 | // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()! | 
|---|
|  | 396 | for (n0=0;n0<N0;n0++) | 
|---|
|  | 397 | for (n[1]=0;n[1]<N[1];n[1]++) | 
|---|
|  | 398 | for (n[2]=0;n[2]<N[2];n[2]++) { | 
|---|
|  | 399 | i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx); | 
|---|
|  | 400 | iS = n[2]+N[2]*(n[1]+N[1]*n0); | 
|---|
|  | 401 | n[0] = n0 + LevS->Plan0.plan->start_nx; | 
|---|
|  | 402 | for (k=0;k<max_operators;k+=2) { | 
|---|
|  | 403 | e = k/2; | 
|---|
|  | 404 | tmp = 2*PI/(double)(N[e])*(double)(n[e]); | 
|---|
|  | 405 | tmp2 = PsiCR[i0] /LevS->MaxN; | 
|---|
|  | 406 | // check lookup | 
|---|
|  | 407 | if (!l) // perform check on first wave function only | 
|---|
|  | 408 | if ((fabs(cos(tmp) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[e][n[e]]) > MYEPSILON)) { | 
|---|
|  | 409 | Error(SomeError, "Lookup table does not match real value!"); | 
|---|
|  | 410 | } | 
|---|
|  | 411 | HGcR[k][iS] = cos_lookup[e][n[e]] * tmp2; /* Matrix Vector Mult */ | 
|---|
|  | 412 | HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */ | 
|---|
|  | 413 | HGcR[k+1][iS] = sin_lookup[e][n[e]] * tmp2; /* Matrix Vector Mult */ | 
|---|
|  | 414 | HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */ | 
|---|
|  | 415 | } | 
|---|
|  | 416 | } | 
|---|
|  | 417 | for (u=0;u<max_operators;u++) { | 
|---|
|  | 418 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work); | 
|---|
|  | 419 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work); | 
|---|
|  | 420 | } | 
|---|
|  | 421 | } | 
|---|
|  | 422 | m = -1; // to access U matrix element (0..Num-1) | 
|---|
|  | 423 | for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) {  // go through all wave functions | 
|---|
|  | 424 | OnePsiB = &Psi->AllPsiStatus[j];    // grab OnePsiB | 
|---|
|  | 425 | if (OnePsiB->PsiType == type) {   // drop all but occupied ones | 
|---|
|  | 426 | m++;  // increase m if it is non-extra wave function | 
|---|
|  | 427 | if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? | 
|---|
|  | 428 | LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; | 
|---|
|  | 429 | else | 
|---|
|  | 430 | LOnePsiB = NULL; | 
|---|
|  | 431 | if (LOnePsiB == NULL) {   // if it's not local ... receive it from respective process into TempPsi | 
|---|
|  | 432 | RecvSource = OnePsiB->my_color_comm_ST_Psi; | 
|---|
|  | 433 | MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); | 
|---|
|  | 434 | LPsiDatB=LevS->LPsi->TempPsi; | 
|---|
|  | 435 | } else {                  // .. otherwise send it to all other processes (Max_me... - 1) | 
|---|
|  | 436 | for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) | 
|---|
|  | 437 | if (p != OnePsiB->my_color_comm_ST_Psi) | 
|---|
|  | 438 | MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); | 
|---|
|  | 439 | LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; | 
|---|
|  | 440 | } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received | 
|---|
|  | 441 |  | 
|---|
|  | 442 | for (u=0;u<max_operators;u++) { | 
|---|
|  | 443 | a_ij = 0; | 
|---|
|  | 444 | b_ij = 0; | 
|---|
|  | 445 | if (LPsiDatA != NULL) { // calculate, reduce and send to all ... | 
|---|
|  | 446 | //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u); | 
|---|
|  | 447 | g=0; | 
|---|
|  | 448 | if (LevS->GArray[0].GSq == 0.0) { | 
|---|
|  | 449 | Index = LevS->GArray[g].Index; | 
|---|
|  | 450 | a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im); | 
|---|
|  | 451 | b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im); | 
|---|
|  | 452 | g++; | 
|---|
|  | 453 | } | 
|---|
|  | 454 | for (; g < LevS->MaxG; g++) { | 
|---|
|  | 455 | Index = LevS->GArray[g].Index; | 
|---|
|  | 456 | a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im); | 
|---|
|  | 457 | b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im); | 
|---|
|  | 458 | } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...) | 
|---|
|  | 459 | // sum up elements from all coefficients sharing processes | 
|---|
|  | 460 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); | 
|---|
|  | 461 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); | 
|---|
|  | 462 | a_ij = A_ij; | 
|---|
|  | 463 | b_ij = B_ij; | 
|---|
|  | 464 | // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!) | 
|---|
|  | 465 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); | 
|---|
|  | 466 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); | 
|---|
|  | 467 | } else { // receive ... | 
|---|
|  | 468 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); | 
|---|
|  | 469 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); | 
|---|
|  | 470 | } | 
|---|
|  | 471 | // ... and store | 
|---|
|  | 472 | //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij); | 
|---|
|  | 473 | //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij); | 
|---|
|  | 474 | gsl_matrix_set(A[u], l, m, A_ij); | 
|---|
|  | 475 | gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m)); | 
|---|
|  | 476 | } | 
|---|
|  | 477 | } | 
|---|
|  | 478 | } | 
|---|
|  | 479 | } | 
|---|
|  | 480 | } | 
|---|
|  | 481 | // reset extra entries | 
|---|
|  | 482 | for (u=0;u<=max_operators;u++) { | 
|---|
|  | 483 | for (i=Num;i<AllocNum;i++) | 
|---|
|  | 484 | for (j=0;j<AllocNum;j++) | 
|---|
|  | 485 | gsl_matrix_set(A[u], i,j, 0.); | 
|---|
|  | 486 | for (i=Num;i<AllocNum;i++) | 
|---|
|  | 487 | for (j=0;j<AllocNum;j++) | 
|---|
|  | 488 | gsl_matrix_set(A[u], j,i, 0.); | 
|---|
|  | 489 | } | 
|---|
|  | 490 | // free lookups | 
|---|
|  | 491 | for (i=0;i<NDIM;i++) { | 
|---|
| [64fa9e] | 492 | Free(cos_lookup[i], "bla"); | 
|---|
|  | 493 | Free(sin_lookup[i], "bla"); | 
|---|
| [a0bcf1] | 494 | } | 
|---|
| [64fa9e] | 495 | Free(cos_lookup, "bla"); | 
|---|
|  | 496 | Free(sin_lookup, "bla"); | 
|---|
| [a0bcf1] | 497 |  | 
|---|
|  | 498 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me); | 
|---|
|  | 499 | // STEP 1: Initialize U = 1 | 
|---|
|  | 500 | gsl_matrix *U = gsl_matrix_alloc (AllocNum,AllocNum); | 
|---|
|  | 501 | gsl_matrix_set_identity(U); | 
|---|
|  | 502 |  | 
|---|
|  | 503 | // init merry-go-round array | 
|---|
|  | 504 | top = (int *) Malloc(sizeof(int)*AllocNum/2, "ComputeMLWF: top"); | 
|---|
|  | 505 | bot = (int *) Malloc(sizeof(int)*AllocNum/2, "ComputeMLWF: bot"); | 
|---|
|  | 506 | //debug(P,"init merry-go-round array"); | 
|---|
|  | 507 | for (i=0;i<AllocNum/2;i++) { | 
|---|
|  | 508 | top[i] = 2*i; | 
|---|
|  | 509 | bot[i] = 2*i+1; | 
|---|
|  | 510 | } | 
|---|
|  | 511 |  | 
|---|
|  | 512 | /*// print A matrices for debug | 
|---|
|  | 513 | if (P->Par.me == 0) | 
|---|
|  | 514 | for (u=0;u<max_operators+1;u++) { | 
|---|
|  | 515 | fprintf(stderr, "A[%i] = \n",u); | 
|---|
|  | 516 | for (i=0;i<Num;i++) { | 
|---|
|  | 517 | for (j=0;j<Num;j++) | 
|---|
|  | 518 | fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j)); | 
|---|
|  | 519 | fprintf(stderr, "\n"); | 
|---|
|  | 520 | } | 
|---|
|  | 521 | } | 
|---|
|  | 522 | */ | 
|---|
|  | 523 | if (Num != 1) { | 
|---|
|  | 524 | // one- or multi-process case? | 
|---|
|  | 525 | if (((AllocNum % 2) == 0) && (ProcNum != 1) && ((AllocNum / 2) % ProcNum == 0)) { | 
|---|
|  | 526 | max_rounds = (AllocNum / 2)/ProcNum;  // each process must perform multiple rotations per step of a set | 
|---|
|  | 527 | //fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, ProcRank, ProcNum, max_rounds); | 
|---|
|  | 528 | // allocate column vectors for interchange of columns | 
|---|
|  | 529 | //debug(P,"allocate column vectors for interchange of columns"); | 
|---|
|  | 530 | c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c"); | 
|---|
|  | 531 | s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s"); | 
|---|
|  | 532 | c_all = (double *) Malloc(sizeof(double)*AllocNum/2, "ComputeMLWF: c_all"); | 
|---|
|  | 533 | s_all = (double *) Malloc(sizeof(double)*AllocNum/2, "ComputeMLWF: s_all"); | 
|---|
|  | 534 | rcounts = (int *) Malloc(sizeof(int)*ProcNum, "ComputeMLWF: rcounts"); | 
|---|
|  | 535 | rdispls = (int *) Malloc(sizeof(int)*ProcNum, "ComputeMLWF: rdispls"); | 
|---|
|  | 536 | /*    // print starting values of index generation tables top and bot | 
|---|
|  | 537 | fprintf(stderr,"top\t"); | 
|---|
|  | 538 | for (k=0;k<AllocNum/2;k++) | 
|---|
|  | 539 | fprintf(stderr,"%i\t", top[k]); | 
|---|
|  | 540 | fprintf(stderr,"\n"); | 
|---|
|  | 541 | fprintf(stderr,"bot\t"); | 
|---|
|  | 542 | for (k=0;k<AllocNum/2;k++) | 
|---|
|  | 543 | fprintf(stderr,"%i\t", bot[k]); | 
|---|
|  | 544 | fprintf(stderr,"\n");*/ | 
|---|
|  | 545 | // establish communication partners | 
|---|
|  | 546 | //debug(P,"establish communication partners"); | 
|---|
|  | 547 | if (ProcRank == 0) { | 
|---|
|  | 548 | tagS0 = WannierALTag;   // left p0 always remains left p0 | 
|---|
|  | 549 | } else { | 
|---|
|  | 550 | tagS0 = ProcRank == ProcNum - 1 ? WannierARTag : WannierALTag; // left p_last becomes right p_last | 
|---|
|  | 551 | } | 
|---|
|  | 552 | tagS1 = ProcRank == 0 ? WannierALTag : WannierARTag; // right p0 always goes into left p1 | 
|---|
|  | 553 | tagR0 = WannierALTag; // | 
|---|
|  | 554 | tagR1 = WannierARTag; // first process | 
|---|
|  | 555 | if (ProcRank == 0) { | 
|---|
|  | 556 | left = ProcNum-1; | 
|---|
|  | 557 | right = 1; | 
|---|
|  | 558 | Lsend = 0; | 
|---|
|  | 559 | Rsend = 1; | 
|---|
|  | 560 | Lrecv = 0; | 
|---|
|  | 561 | Rrecv = 1; | 
|---|
|  | 562 | } else if (ProcRank == ProcNum - 1) { | 
|---|
|  | 563 | left = ProcRank - 1; | 
|---|
|  | 564 | right = 0; | 
|---|
|  | 565 | Lsend = ProcRank; | 
|---|
|  | 566 | Rsend = ProcRank - 1; | 
|---|
|  | 567 | Lrecv = ProcRank - 1; | 
|---|
|  | 568 | Rrecv = ProcRank; | 
|---|
|  | 569 | } else { | 
|---|
|  | 570 | left = ProcRank - 1; | 
|---|
|  | 571 | right = ProcRank + 1; | 
|---|
|  | 572 | Lsend = ProcRank+1; | 
|---|
|  | 573 | Rsend = ProcRank - 1; | 
|---|
|  | 574 | Lrecv = ProcRank - 1; | 
|---|
|  | 575 | Rrecv = ProcRank+1; | 
|---|
|  | 576 | } | 
|---|
|  | 577 | //fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv); | 
|---|
|  | 578 | // allocate eigenvector stuff | 
|---|
|  | 579 | //debug(P,"allocate eigenvector stuff"); | 
|---|
|  | 580 | G = gsl_matrix_calloc (2,2); | 
|---|
|  | 581 | h = gsl_vector_alloc (2); | 
|---|
|  | 582 | eval = gsl_vector_alloc (2); | 
|---|
|  | 583 | evec = gsl_matrix_alloc (2,2); | 
|---|
|  | 584 | w = gsl_eigen_symmv_alloc(2); | 
|---|
|  | 585 | // initialise A_loc | 
|---|
|  | 586 | //debug(P,"initialise A_loc"); | 
|---|
|  | 587 | for (k=0;k<max_operators+1;k++) { | 
|---|
|  | 588 | //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]"); | 
|---|
|  | 589 | Around[k] = (double *) Malloc(sizeof(double)*AllocNum*2*max_rounds, "ComputeMLWF: Around[k]"); | 
|---|
|  | 590 | Atotal[k] = (double *) Malloc(sizeof(double)*AllocNum*AllocNum, "ComputeMLWF: Atotal[k]"); | 
|---|
|  | 591 | Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]"); | 
|---|
|  | 592 | //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds]; | 
|---|
|  | 593 |  | 
|---|
|  | 594 | for (round=0;round<max_rounds;round++) { | 
|---|
|  | 595 | Aloc[k][2*round] = &Around[k][AllocNum*(2*round)]; | 
|---|
|  | 596 | Aloc[k][2*round+1] = &Around[k][AllocNum*(2*round+1)]; | 
|---|
|  | 597 | for (l=0;l<AllocNum;l++) { | 
|---|
|  | 598 | Aloc[k][2*round][l] = gsl_matrix_get(A[k],l,2*(ProcRank*max_rounds+round)); | 
|---|
|  | 599 | Aloc[k][2*round+1][l] = gsl_matrix_get(A[k],l,2*(ProcRank*max_rounds+round)+1); | 
|---|
|  | 600 | //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]); | 
|---|
|  | 601 | } | 
|---|
|  | 602 | } | 
|---|
|  | 603 | } | 
|---|
|  | 604 | // initialise U_loc | 
|---|
|  | 605 | //debug(P,"initialise U_loc"); | 
|---|
|  | 606 | //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc"); | 
|---|
|  | 607 | Uround = (double *) Malloc(sizeof(double)*AllocNum*2*max_rounds, "ComputeMLWF: Uround"); | 
|---|
|  | 608 | Utotal = (double *) Malloc(sizeof(double)*AllocNum*AllocNum, "ComputeMLWF: Utotal"); | 
|---|
|  | 609 | Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc"); | 
|---|
|  | 610 | //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds]; | 
|---|
|  | 611 | for (round=0;round<max_rounds;round++) { | 
|---|
|  | 612 | Uloc[2*round] = &Uround[AllocNum*(2*round)]; | 
|---|
|  | 613 | Uloc[2*round+1] = &Uround[AllocNum*(2*round+1)]; | 
|---|
|  | 614 | for (l=0;l<AllocNum;l++) { | 
|---|
|  | 615 | Uloc[2*round][l] = gsl_matrix_get(U,l,2*(ProcRank*max_rounds+round)); | 
|---|
|  | 616 | Uloc[2*round+1][l] = gsl_matrix_get(U,l,2*(ProcRank*max_rounds+round)+1); | 
|---|
|  | 617 | //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]); | 
|---|
|  | 618 | } | 
|---|
|  | 619 | } | 
|---|
|  | 620 | // now comes the iteration loop | 
|---|
|  | 621 | //debug(P,"now comes the iteration loop"); | 
|---|
|  | 622 | it_steps = 0; | 
|---|
|  | 623 | do { | 
|---|
|  | 624 | it_steps++; | 
|---|
|  | 625 | fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps); | 
|---|
|  | 626 | for (set=0; set < AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time | 
|---|
|  | 627 | //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); | 
|---|
|  | 628 | for (round = 0; round < max_rounds;round++) { | 
|---|
|  | 629 | start = ProcRank * max_rounds + round; | 
|---|
|  | 630 | // get indices | 
|---|
|  | 631 | i = top[start] < bot[start] ? top[start] : bot[start]; // minimum of the two indices | 
|---|
|  | 632 | iloc = top[start] < bot[start] ? 0 : 1; | 
|---|
|  | 633 | j = top[start] > bot[start] ? top[start] : bot[start]; // maximum of the two indices: thus j >  i | 
|---|
|  | 634 | jloc =  top[start] > bot[start] ? 0 : 1; | 
|---|
|  | 635 | //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc); | 
|---|
|  | 636 |  | 
|---|
|  | 637 | // calculate rotation angle, i.e. c and s | 
|---|
|  | 638 | //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j); | 
|---|
|  | 639 | gsl_matrix_set_zero(G); | 
|---|
|  | 640 | for (k=0;k<max_operators;k++) { // go through all operators ... | 
|---|
|  | 641 | // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] | 
|---|
|  | 642 | //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); | 
|---|
|  | 643 | //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); | 
|---|
|  | 644 | gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); | 
|---|
|  | 645 | gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); | 
|---|
|  | 646 |  | 
|---|
|  | 647 | // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] | 
|---|
|  | 648 | for (l=0;l<2;l++) | 
|---|
|  | 649 | for (m=0;m<2;m++) | 
|---|
|  | 650 | gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); | 
|---|
|  | 651 | } | 
|---|
|  | 652 | //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); | 
|---|
|  | 653 | // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G | 
|---|
|  | 654 | gsl_eigen_symmv(G, eval, evec, w);  // calculates eigenvalues and eigenvectors of G | 
|---|
|  | 655 | index = gsl_vector_max_index (eval);  // get biggest eigenvalue | 
|---|
|  | 656 | x = gsl_matrix_get(evec, 0, index); | 
|---|
|  | 657 | y = gsl_matrix_get(evec, 1, index) * x/fabs(x); | 
|---|
|  | 658 | x = fabs(x);  // ensure x>=0 so that rotation angles remain smaller Pi/4 | 
|---|
|  | 659 | //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j); | 
|---|
|  | 660 | // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]] | 
|---|
|  | 661 | r = sqrt(x*x + y*y); | 
|---|
|  | 662 | c[round] = sqrt((x + r) / (2*r)); | 
|---|
|  | 663 | s[round] = y / sqrt(2*r*(x+r)); | 
|---|
|  | 664 | // [[c,s],[-s,c]]= V_small | 
|---|
|  | 665 | //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]); | 
|---|
|  | 666 |  | 
|---|
|  | 667 | //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); | 
|---|
|  | 668 | // V_loc = V_loc * V_small | 
|---|
|  | 669 | //debug(P,"apply rotation to local U"); | 
|---|
|  | 670 | for (l=0;l<AllocNum;l++) { | 
|---|
|  | 671 | a_i = Uloc[2*round+iloc][l]; | 
|---|
|  | 672 | a_j = Uloc[2*round+jloc][l]; | 
|---|
|  | 673 | Uloc[2*round+iloc][l] =  c[round] * a_i + s[round] * a_j; | 
|---|
|  | 674 | Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j; | 
|---|
|  | 675 | } | 
|---|
|  | 676 | }  // end of round | 
|---|
|  | 677 | // circulate the rotation angles | 
|---|
|  | 678 | //debug(P,"circulate the rotation angles"); | 
|---|
|  | 679 | MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *comm);   // MPI_Allgather is waaaaay faster than ring circulation | 
|---|
|  | 680 | MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *comm); | 
|---|
|  | 681 | //m = start; | 
|---|
|  | 682 | for (l=0;l<AllocNum/2;l++) { // for each process | 
|---|
|  | 683 | // we have V_small from process k | 
|---|
|  | 684 | //debug(P,"Apply V_small from other process"); | 
|---|
|  | 685 | i = top[l] < bot[l] ? top[l] : bot[l]; // minimum of the two indices | 
|---|
|  | 686 | j = top[l] > bot[l] ? top[l] : bot[l]; // maximum of the two indices: thus j >  i | 
|---|
|  | 687 | iloc = top[l] < bot[l] ? 0 : 1; | 
|---|
|  | 688 | jloc =  top[l] > bot[l] ? 0 : 1; | 
|---|
|  | 689 | for (m=0;m<max_rounds;m++) { | 
|---|
|  | 690 | //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j); | 
|---|
|  | 691 | // apply row rotation to each A[k] | 
|---|
|  | 692 | for (k=0;k<max_operators+1;k++) {// one extra for B matrix ! | 
|---|
|  | 693 | //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); | 
|---|
|  | 694 | //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); | 
|---|
|  | 695 | a_i = Aloc[k][2*m+iloc][i]; | 
|---|
|  | 696 | a_j = Aloc[k][2*m+iloc][j]; | 
|---|
|  | 697 | Aloc[k][2*m+iloc][i] =  c_all[l] * a_i + s_all[l] * a_j; | 
|---|
|  | 698 | Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j; | 
|---|
|  | 699 | a_i = Aloc[k][2*m+jloc][i]; | 
|---|
|  | 700 | a_j = Aloc[k][2*m+jloc][j]; | 
|---|
|  | 701 | Aloc[k][2*m+jloc][i] =  c_all[l] * a_i + s_all[l] * a_j; | 
|---|
|  | 702 | Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j; | 
|---|
|  | 703 | //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); | 
|---|
|  | 704 | //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); | 
|---|
|  | 705 | } | 
|---|
|  | 706 | } | 
|---|
|  | 707 | } | 
|---|
|  | 708 | // apply rotation to local operator matrices | 
|---|
|  | 709 | // A_loc = A_loc * V_small | 
|---|
|  | 710 | //debug(P,"apply rotation to local operator matrices A[k]"); | 
|---|
|  | 711 | for (m=0;m<max_rounds;m++) { | 
|---|
|  | 712 | start = ProcRank * max_rounds + m; | 
|---|
|  | 713 | iloc = top[start] < bot[start] ? 0 : 1; | 
|---|
|  | 714 | jloc =  top[start] > bot[start] ? 0 : 1; | 
|---|
|  | 715 | for (k=0;k<max_operators+1;k++) {// one extra for B matrix ! | 
|---|
|  | 716 | for (l=0;l<AllocNum;l++) { | 
|---|
|  | 717 | // Columns, i and j belong to this process only! | 
|---|
|  | 718 | a_i = Aloc[k][2*m+iloc][l]; | 
|---|
|  | 719 | a_j = Aloc[k][2*m+jloc][l]; | 
|---|
|  | 720 | Aloc[k][2*m+iloc][l] =  c[m] * a_i + s[m] * a_j; | 
|---|
|  | 721 | Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j; | 
|---|
|  | 722 | //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]); | 
|---|
|  | 723 | } | 
|---|
|  | 724 | } | 
|---|
|  | 725 | } | 
|---|
|  | 726 | // Shuffling of these round's columns to prepare next rotation set | 
|---|
|  | 727 | for (k=0;k<max_operators+1;k++) {// one extra for B matrix ! | 
|---|
|  | 728 | // extract columns from A | 
|---|
|  | 729 | //debug(P,"extract columns from A"); | 
|---|
|  | 730 | shiftcolumns(*comm, Aloc[k], AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1); | 
|---|
|  | 731 |  | 
|---|
|  | 732 | } | 
|---|
|  | 733 | // and also for V ... | 
|---|
|  | 734 | //debug(P,"extract columns from U"); | 
|---|
|  | 735 | shiftcolumns(*comm, Uloc, AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1); | 
|---|
|  | 736 |  | 
|---|
|  | 737 |  | 
|---|
|  | 738 | // and merry-go-round for the indices too | 
|---|
|  | 739 | //debug(P,"and merry-go-round for the indices too"); | 
|---|
|  | 740 | music(top, bot, AllocNum/2); | 
|---|
|  | 741 | } | 
|---|
|  | 742 |  | 
|---|
|  | 743 | //fprintf(stderr,"(%i) STEP 4\n",P->Par.me); | 
|---|
|  | 744 | // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 | 
|---|
|  | 745 | old_spread = Spread; | 
|---|
|  | 746 | spread = 0.; | 
|---|
|  | 747 | for(k=0;k<max_operators;k++) {   // go through all self-adjoint operators | 
|---|
|  | 748 | for (i=0; i < 2*max_rounds; i++) {  // go through all wave functions | 
|---|
|  | 749 | spread += Aloc[k][i][i+ProcRank*2*max_rounds]*Aloc[k][i][i+ProcRank*2*max_rounds]; | 
|---|
|  | 750 | //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i); | 
|---|
|  | 751 | } | 
|---|
|  | 752 | } | 
|---|
|  | 753 | MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *comm); | 
|---|
|  | 754 | //Spread = spread; | 
|---|
|  | 755 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier); | 
|---|
|  | 756 | else fprintf(stderr,"%2.9e\n",Spread); | 
|---|
|  | 757 | // STEP 5: check change of variance | 
|---|
|  | 758 | } while (fabs(old_spread-Spread) >= R->EpsWannier); | 
|---|
|  | 759 | // end of iterative diagonalization loop: We have found our final orthogonal U! | 
|---|
|  | 760 |  | 
|---|
|  | 761 | for (l=0;l<ProcNum;l++) | 
|---|
|  | 762 | rcounts[l] = AllocNum; | 
|---|
|  | 763 | //debug(P,"allgather U"); | 
|---|
|  | 764 | for (round=0;round<2*max_rounds;round++) { | 
|---|
|  | 765 | for (l=0;l<ProcNum;l++) | 
|---|
|  | 766 | rdispls[l] = (l*2*max_rounds + round)*AllocNum; | 
|---|
|  | 767 | MPI_Allgatherv(Uloc[round], AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *comm); | 
|---|
|  | 768 | } | 
|---|
|  | 769 | for (k=0;k<AllocNum;k++) { | 
|---|
|  | 770 | for(l=0;l<AllocNum;l++) { | 
|---|
|  | 771 | gsl_matrix_set(U,k,l, Utotal[l+k*AllocNum]); | 
|---|
|  | 772 | } | 
|---|
|  | 773 | } | 
|---|
|  | 774 |  | 
|---|
|  | 775 |  | 
|---|
|  | 776 | // after one set, gather A[k] from all and calculate spread | 
|---|
|  | 777 | for (l=0;l<ProcNum;l++) | 
|---|
|  | 778 | rcounts[l] = AllocNum; | 
|---|
|  | 779 | //debug(P,"gather A[k] for spread"); | 
|---|
|  | 780 | for (u=0;u<max_operators+1;u++) {// one extra for B matrix ! | 
|---|
|  | 781 | //debug(P,"A[k] all gather"); | 
|---|
|  | 782 | for (round=0;round<2*max_rounds;round++) { | 
|---|
|  | 783 | for (l=0;l<ProcNum;l++) | 
|---|
|  | 784 | rdispls[l] = (l*2*max_rounds + round)*AllocNum; | 
|---|
|  | 785 | MPI_Allgatherv(Aloc[u][round], AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *comm); | 
|---|
|  | 786 | } | 
|---|
|  | 787 | for (k=0;k<AllocNum;k++) { | 
|---|
|  | 788 | for(l=0;l<AllocNum;l++) { | 
|---|
|  | 789 | gsl_matrix_set(A[u],k,l, Atotal[u][l+k*AllocNum]); | 
|---|
|  | 790 | } | 
|---|
|  | 791 | } | 
|---|
|  | 792 | } | 
|---|
|  | 793 |  | 
|---|
|  | 794 | // free eigenvector stuff | 
|---|
|  | 795 | gsl_vector_free(h); | 
|---|
|  | 796 | gsl_matrix_free(G); | 
|---|
|  | 797 | gsl_eigen_symmv_free(w); | 
|---|
|  | 798 | gsl_vector_free(eval); | 
|---|
|  | 799 | gsl_matrix_free(evec); | 
|---|
|  | 800 | // Free column vectors | 
|---|
|  | 801 | for (k=0;k<max_operators+1;k++) { | 
|---|
| [64fa9e] | 802 | Free(Atotal[k], "bla"); | 
|---|
|  | 803 | Free(Around[k], "bla"); | 
|---|
| [a0bcf1] | 804 | } | 
|---|
| [64fa9e] | 805 | Free(Uround, "bla"); | 
|---|
|  | 806 | Free(Utotal, "bla"); | 
|---|
|  | 807 | Free(c_all, "bla"); | 
|---|
|  | 808 | Free(s_all, "bla"); | 
|---|
|  | 809 | Free(c, "bla"); | 
|---|
|  | 810 | Free(s, "bla"); | 
|---|
|  | 811 | Free(rcounts, "bla"); | 
|---|
|  | 812 | Free(rdispls, "bla"); | 
|---|
| [a0bcf1] | 813 |  | 
|---|
|  | 814 | } else { | 
|---|
|  | 815 |  | 
|---|
|  | 816 | c = (double *) Malloc(sizeof(double), "ComputeMLWF: c"); | 
|---|
|  | 817 | s = (double *) Malloc(sizeof(double), "ComputeMLWF: s"); | 
|---|
|  | 818 | G = gsl_matrix_calloc (2,2); | 
|---|
|  | 819 | h = gsl_vector_alloc (2); | 
|---|
|  | 820 | eval = gsl_vector_alloc (2); | 
|---|
|  | 821 | evec = gsl_matrix_alloc (2,2); | 
|---|
|  | 822 | w = gsl_eigen_symmv_alloc(2); | 
|---|
|  | 823 | //debug(P,"now comes the iteration loop"); | 
|---|
|  | 824 | it_steps=0; | 
|---|
|  | 825 | do { | 
|---|
|  | 826 | it_steps++; | 
|---|
|  | 827 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me); | 
|---|
|  | 828 | fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps); | 
|---|
|  | 829 | for (set=0; set < AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time | 
|---|
|  | 830 | //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); | 
|---|
|  | 831 | // STEP 3: for all index pairs 0<= i<j <AllocNum | 
|---|
|  | 832 | for (ProcRank=0;ProcRank<AllocNum/2;ProcRank++) { | 
|---|
|  | 833 | // get indices | 
|---|
|  | 834 | i = top[ProcRank] < bot[ProcRank] ? top[ProcRank] : bot[ProcRank]; // minimum of the two indices | 
|---|
|  | 835 | j = top[ProcRank] > bot[ProcRank] ? top[ProcRank] : bot[ProcRank]; // maximum of the two indices: thus j >  i | 
|---|
|  | 836 | //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j); | 
|---|
|  | 837 | // STEP 3a: Calculate G | 
|---|
|  | 838 | gsl_matrix_set_zero(G); | 
|---|
|  | 839 |  | 
|---|
|  | 840 | for (k=0;k<max_operators;k++) { // go through all operators ... | 
|---|
|  | 841 | // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] | 
|---|
|  | 842 | //fprintf(stderr,"(%i) k%i [a_ii - a_ij] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(A[k],i,i), gsl_matrix_get(A[k],j,j),gsl_matrix_get(A[k],i,i) - gsl_matrix_get(A[k],j,j)); | 
|---|
|  | 843 | //fprintf(stderr,"(%i) k%i [a_ij + a_jij] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(A[k],i,j), gsl_matrix_get(A[k],j,i),gsl_matrix_get(A[k],i,j) + gsl_matrix_get(A[k],j,i)); | 
|---|
|  | 844 | gsl_vector_set(h, 0, gsl_matrix_get(A[k],i,i) - gsl_matrix_get(A[k],j,j)); | 
|---|
|  | 845 | gsl_vector_set(h, 1, gsl_matrix_get(A[k],i,j) + gsl_matrix_get(A[k],j,i)); | 
|---|
|  | 846 | //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1)); | 
|---|
|  | 847 |  | 
|---|
|  | 848 | // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] | 
|---|
|  | 849 | for (l=0;l<2;l++) | 
|---|
|  | 850 | for (m=0;m<2;m++) | 
|---|
|  | 851 | gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); | 
|---|
|  | 852 | } | 
|---|
|  | 853 |  | 
|---|
|  | 854 | //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); | 
|---|
|  | 855 | // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G | 
|---|
|  | 856 | gsl_eigen_symmv(G, eval, evec, w);  // calculates eigenvalues and eigenvectors of G | 
|---|
|  | 857 |  | 
|---|
|  | 858 | index = gsl_vector_max_index (eval);  // get biggest eigenvalue | 
|---|
|  | 859 | x = gsl_matrix_get(evec, 0, index); | 
|---|
|  | 860 | y = gsl_matrix_get(evec, 1, index) * x/fabs(x); | 
|---|
|  | 861 | //z = gsl_matrix_get(evec, 2, index) * x/fabs(x); | 
|---|
|  | 862 | x = fabs(x);  // ensure x>=0 so that rotation angles remain smaller Pi/4 | 
|---|
|  | 863 |  | 
|---|
|  | 864 | //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j); | 
|---|
|  | 865 | // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]] | 
|---|
|  | 866 | r = sqrt(x*x + y*y); // + z*z); | 
|---|
|  | 867 | c[0] = sqrt((x + r) / (2*r)); | 
|---|
|  | 868 | s[0] = y / sqrt(2*r*(x+r)); //, -z / sqrt(2*r*(x+r))); | 
|---|
|  | 869 | //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]); | 
|---|
|  | 870 |  | 
|---|
|  | 871 | //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j); | 
|---|
|  | 872 | // STEP 3d: apply rotation R to rows and columns of A^{(k)} | 
|---|
|  | 873 | for (k=0;k<max_operators+1;k++) {// one extra for B matrix ! | 
|---|
|  | 874 | for (l=0;l<AllocNum;l++) { | 
|---|
|  | 875 | // Rows | 
|---|
|  | 876 | a_i = gsl_matrix_get(A[k],i,l); | 
|---|
|  | 877 | a_j = gsl_matrix_get(A[k],j,l); | 
|---|
|  | 878 | gsl_matrix_set(A[k], i, l,  c[0] * a_i + s[0] * a_j); | 
|---|
|  | 879 | gsl_matrix_set(A[k], j, l, -s[0] * a_i + c[0] * a_j); | 
|---|
|  | 880 | } | 
|---|
|  | 881 | for (l=0;l<AllocNum;l++) { | 
|---|
|  | 882 | // Columns | 
|---|
|  | 883 | a_i = gsl_matrix_get(A[k],l,i); | 
|---|
|  | 884 | a_j = gsl_matrix_get(A[k],l,j); | 
|---|
|  | 885 | gsl_matrix_set(A[k], l, i,  c[0] * a_i + s[0] * a_j); | 
|---|
|  | 886 | gsl_matrix_set(A[k], l, j, -s[0] * a_i + c[0] * a_j); | 
|---|
|  | 887 | } | 
|---|
|  | 888 | } | 
|---|
|  | 889 | //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); | 
|---|
|  | 890 | // STEP 3e: apply U = R*U | 
|---|
|  | 891 | for (l=0;l<AllocNum;l++) { | 
|---|
|  | 892 | a_i = gsl_matrix_get(U,i,l); | 
|---|
|  | 893 | a_j = gsl_matrix_get(U,j,l); | 
|---|
|  | 894 | gsl_matrix_set(U, i, l,  c[0] * a_i + s[0] * a_j); | 
|---|
|  | 895 | gsl_matrix_set(U, j, l, -s[0] * a_i + c[0] * a_j); | 
|---|
|  | 896 | } | 
|---|
|  | 897 | } | 
|---|
|  | 898 | // and merry-go-round for the indices too | 
|---|
|  | 899 | //debug(P,"and merry-go-round for the indices too"); | 
|---|
|  | 900 | if (AllocNum > 2) music(top, bot, AllocNum/2); | 
|---|
|  | 901 | } | 
|---|
|  | 902 |  | 
|---|
|  | 903 | //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me); | 
|---|
|  | 904 | // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 | 
|---|
|  | 905 | old_spread = spread; | 
|---|
|  | 906 | spread = 0; | 
|---|
|  | 907 | for(k=0;k<max_operators;k++) {   // go through all self-adjoint operators | 
|---|
|  | 908 | for (i=0; i < AllocNum; i++) {  // go through all wave functions | 
|---|
|  | 909 | spread += pow(gsl_matrix_get(A[k],i,i),2); | 
|---|
|  | 910 | } | 
|---|
|  | 911 | } | 
|---|
|  | 912 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier); | 
|---|
|  | 913 | else fprintf(stderr,"%2.9e\n",spread); | 
|---|
|  | 914 | // STEP 5: check change of variance | 
|---|
|  | 915 | } while (fabs(old_spread-spread) >= R->EpsWannier); | 
|---|
|  | 916 | // end of iterative diagonalization loop: We have found our final orthogonal U! | 
|---|
|  | 917 | gsl_vector_free(h); | 
|---|
|  | 918 | gsl_matrix_free(G); | 
|---|
|  | 919 | gsl_eigen_symmv_free(w); | 
|---|
|  | 920 | gsl_vector_free(eval); | 
|---|
|  | 921 | gsl_matrix_free(evec); | 
|---|
| [64fa9e] | 922 | Free(c, "bla"); | 
|---|
|  | 923 | Free(s, "bla"); | 
|---|
| [a0bcf1] | 924 | } | 
|---|
|  | 925 |  | 
|---|
|  | 926 | if(P->Call.out[ReadOut]) {// && P->Par.me == 0) { | 
|---|
|  | 927 | //debug(P,"output total U"); | 
|---|
|  | 928 | fprintf(stderr,"(%i) U_tot = \n",P->Par.me); | 
|---|
|  | 929 | for (k=0;k<Num;k++) { | 
|---|
|  | 930 | for (l=0;l<Num;l++) | 
|---|
|  | 931 | fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k)); | 
|---|
|  | 932 | fprintf(stderr,"\n"); | 
|---|
|  | 933 | } | 
|---|
|  | 934 | } | 
|---|
|  | 935 | } | 
|---|
| [64fa9e] | 936 | Free(top, "bla"); | 
|---|
|  | 937 | Free(bot, "bla"); | 
|---|
| [a0bcf1] | 938 |  | 
|---|
|  | 939 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Allocating buffer mem\n",P->Par.me); | 
|---|
|  | 940 | // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle | 
|---|
|  | 941 | Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here | 
|---|
|  | 942 | fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer"); | 
|---|
|  | 943 | for (l=0;l<Num;l++) // allocate for each local wave function | 
|---|
|  | 944 | coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l]; | 
|---|
|  | 945 |  | 
|---|
|  | 946 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me); | 
|---|
|  | 947 | l=-1; // to access U matrix element (0..Num-1) | 
|---|
|  | 948 | k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1) | 
|---|
|  | 949 | for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) {  // go through all wave functions | 
|---|
|  | 950 | OnePsiA = &Psi->AllPsiStatus[i];    // grab OnePsiA | 
|---|
|  | 951 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
|  | 952 | l++;  // increase l if it is occupied wave function | 
|---|
|  | 953 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local? | 
|---|
|  | 954 | k++; // increase k only if it is a local, non-extra orbital wave function | 
|---|
|  | 955 | LPsiDatA = (fftw_complex *) coeffs_buffer[k];    // new coeffs first go to copy buffer, old ones must not be overwritten yet | 
|---|
|  | 956 | SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG);  // zero buffer part | 
|---|
|  | 957 | } else | 
|---|
|  | 958 | LPsiDatA = NULL;  // otherwise processes won't enter second loop, though they're supposed to send coefficients! | 
|---|
|  | 959 |  | 
|---|
|  | 960 | m = -1; // to access U matrix element (0..Num-1) | 
|---|
|  | 961 | for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) {  // go through all wave functions | 
|---|
|  | 962 | OnePsiB = &Psi->AllPsiStatus[j];    // grab OnePsiB | 
|---|
|  | 963 | if (OnePsiB->PsiType == type) {   // drop all but occupied ones | 
|---|
|  | 964 | m++;  // increase m if it is occupied wave function | 
|---|
|  | 965 | if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? | 
|---|
|  | 966 | LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; | 
|---|
|  | 967 | else | 
|---|
|  | 968 | LOnePsiB = NULL; | 
|---|
|  | 969 | if (LOnePsiB == NULL) {   // if it's not local ... receive it from respective process into TempPsi | 
|---|
|  | 970 | RecvSource = OnePsiB->my_color_comm_ST_Psi; | 
|---|
|  | 971 | MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); | 
|---|
|  | 972 | LPsiDatB=LevS->LPsi->TempPsi; | 
|---|
|  | 973 | } else {                  // .. otherwise send it to all other processes (Max_me... - 1) | 
|---|
|  | 974 | for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) | 
|---|
|  | 975 | if (p != OnePsiB->my_color_comm_ST_Psi) | 
|---|
|  | 976 | MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); | 
|---|
|  | 977 | LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; | 
|---|
|  | 978 | } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received | 
|---|
|  | 979 |  | 
|---|
|  | 980 | if (LPsiDatA != NULL) { | 
|---|
|  | 981 | double tmp = gsl_matrix_get(U,l,m); | 
|---|
|  | 982 | g=0; | 
|---|
|  | 983 | if (LevS->GArray[0].GSq == 0.0) { | 
|---|
|  | 984 | LPsiDatA[g].re += LPsiDatB[g].re * tmp; | 
|---|
|  | 985 | LPsiDatA[g].im += LPsiDatB[g].im * tmp; | 
|---|
|  | 986 | g++; | 
|---|
|  | 987 | } | 
|---|
|  | 988 | for (; g < LevS->MaxG; g++) { | 
|---|
|  | 989 | LPsiDatA[g].re += LPsiDatB[g].re * tmp; | 
|---|
|  | 990 | LPsiDatA[g].im += LPsiDatB[g].im * tmp; | 
|---|
|  | 991 | } | 
|---|
|  | 992 | } | 
|---|
|  | 993 | } | 
|---|
|  | 994 | } | 
|---|
|  | 995 | } | 
|---|
|  | 996 | } | 
|---|
|  | 997 | gsl_matrix_free(U); | 
|---|
|  | 998 |  | 
|---|
|  | 999 | if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me); | 
|---|
|  | 1000 | // now, as all wave functions are updated, swap the buffer | 
|---|
|  | 1001 | l = -1; | 
|---|
|  | 1002 | for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function | 
|---|
|  | 1003 | if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { | 
|---|
|  | 1004 | l++; | 
|---|
|  | 1005 | if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo); | 
|---|
|  | 1006 | LPsiDatA = (fftw_complex *)coeffs_buffer[l]; | 
|---|
|  | 1007 | LPsiDatB = LevS->LPsi->LocalPsi[l]; | 
|---|
|  | 1008 | for (g=0;g<LevS->MaxG;g++) { | 
|---|
|  | 1009 | LPsiDatB[g].re = LPsiDatA[g].re; | 
|---|
|  | 1010 | LPsiDatB[g].im = LPsiDatA[g].im; | 
|---|
|  | 1011 | } | 
|---|
|  | 1012 | // recalculating non-local form factors which are coefficient dependent! | 
|---|
|  | 1013 | CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo); | 
|---|
|  | 1014 | } | 
|---|
|  | 1015 | } | 
|---|
|  | 1016 | // and free allocated buffer memory | 
|---|
| [64fa9e] | 1017 | Free(coeffs_buffer, "bla"); | 
|---|
| [a0bcf1] | 1018 |  | 
|---|
|  | 1019 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7\n",P->Par.me); | 
|---|
|  | 1020 | // STEP 7: Compute Wannier centers, spread and printout | 
|---|
|  | 1021 | // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital | 
|---|
|  | 1022 | if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me); | 
|---|
|  | 1023 |  | 
|---|
|  | 1024 | switch (Lat->Psi.PsiST) { | 
|---|
|  | 1025 | case SpinDouble: | 
|---|
|  | 1026 | strncpy(suffix,".spread.csv",18); | 
|---|
|  | 1027 | strncat(spin,"SpinDouble",12); | 
|---|
|  | 1028 | break; | 
|---|
|  | 1029 | case SpinUp: | 
|---|
|  | 1030 | strncpy(suffix,".spread_up.csv",18); | 
|---|
|  | 1031 | strncat(spin,"SpinUp",12); | 
|---|
|  | 1032 | break; | 
|---|
|  | 1033 | case SpinDown: | 
|---|
|  | 1034 | strncpy(suffix,".spread_down.csv",18); | 
|---|
|  | 1035 | strncat(spin,"SpinDown",12); | 
|---|
|  | 1036 | break; | 
|---|
|  | 1037 | } | 
|---|
|  | 1038 | if (P->Par.me_comm_ST == 0) { | 
|---|
|  | 1039 | if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level | 
|---|
|  | 1040 | OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level | 
|---|
|  | 1041 | else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile) | 
|---|
|  | 1042 | OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level | 
|---|
|  | 1043 | if (F->SpreadFile == NULL) { | 
|---|
|  | 1044 | Error(SomeError,"ComputeMLWF: Error opening Wannier File!\n"); | 
|---|
|  | 1045 | } else { | 
|---|
|  | 1046 | fprintf(F->SpreadFile,"#===== W A N N I E R  C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin); | 
|---|
|  | 1047 | fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n"); | 
|---|
|  | 1048 | } | 
|---|
|  | 1049 | } | 
|---|
|  | 1050 | old_spread = 0; | 
|---|
|  | 1051 | spread = 0; | 
|---|
|  | 1052 | i=-1; | 
|---|
|  | 1053 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions | 
|---|
|  | 1054 | OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA | 
|---|
|  | 1055 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
|  | 1056 | i++;  // increase l if it is occupied wave function | 
|---|
|  | 1057 | //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i); | 
|---|
|  | 1058 |  | 
|---|
|  | 1059 | // calculate Wannier Centre | 
|---|
|  | 1060 | for (j=0;j<NDIM;j++) { | 
|---|
|  | 1061 | WannierCentre[i][j] = Lat->RealBasisQ[j]/(2*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i)))); | 
|---|
|  | 1062 | if (WannierCentre[i][j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ | 
|---|
|  | 1063 | WannierCentre[i][j] = Lat->RealBasisQ[j] + WannierCentre[i][j]; | 
|---|
|  | 1064 | } | 
|---|
|  | 1065 |  | 
|---|
|  | 1066 | // store orbital spread and centre in file | 
|---|
|  | 1067 | tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2) | 
|---|
|  | 1068 | - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2) | 
|---|
|  | 1069 | - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2); | 
|---|
|  | 1070 | WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp; | 
|---|
|  | 1071 | //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]); | 
|---|
|  | 1072 | //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n", | 
|---|
|  | 1073 | //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]); | 
|---|
|  | 1074 | //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n", | 
|---|
|  | 1075 | //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]); | 
|---|
|  | 1076 |  | 
|---|
|  | 1077 | // gather all spreads | 
|---|
|  | 1078 | old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U) | 
|---|
|  | 1079 | for (k=0;k<max_operators;k++) | 
|---|
|  | 1080 | spread += pow(gsl_matrix_get(A[k],i,i),2); | 
|---|
|  | 1081 |  | 
|---|
|  | 1082 | // store calculated Wannier centre | 
|---|
|  | 1083 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // is this local? | 
|---|
|  | 1084 | for (j=0;j<NDIM;j++) | 
|---|
|  | 1085 | Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j]; | 
|---|
|  | 1086 | } | 
|---|
|  | 1087 | } | 
|---|
|  | 1088 |  | 
|---|
|  | 1089 | // join Wannier orbital to groups with common centres under certain conditions | 
|---|
|  | 1090 | switch (R->CommonWannier) { | 
|---|
|  | 1091 | case 4: | 
|---|
|  | 1092 | debug(P,"Shifting each Wannier centers to cell center"); | 
|---|
|  | 1093 | for (i=0; i < Num; i++) {  // go through all occupied wave functions | 
|---|
|  | 1094 | for (j=0;j<NDIM;j++) | 
|---|
|  | 1095 | WannierCentre[i][j] = Lat->RealBasisQ[j]/2.; | 
|---|
|  | 1096 | } | 
|---|
|  | 1097 | break; | 
|---|
|  | 1098 | case 3: | 
|---|
|  | 1099 | debug(P,"Shifting Wannier centers individually to nearest grid point"); | 
|---|
|  | 1100 | for (i=0;i < Num; i++) {  // go through all wave functions | 
|---|
|  | 1101 | for (j=0;j<NDIM;j++) { | 
|---|
|  | 1102 | tmp = WannierCentre[i][j]/Lat->RealBasisQ[j]*(double)N[j]; | 
|---|
|  | 1103 | //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp)); | 
|---|
|  | 1104 | if (fabs((double)floor(tmp) - tmp) < fabs((double)ceil(tmp) - tmp)) | 
|---|
|  | 1105 | WannierCentre[i][j] = (double)floor(tmp)/(double)N[j]*Lat->RealBasisQ[j]; | 
|---|
|  | 1106 | else | 
|---|
|  | 1107 | WannierCentre[i][j] = (double)ceil(tmp)/(double)N[j]*Lat->RealBasisQ[j]; | 
|---|
|  | 1108 | } | 
|---|
|  | 1109 | } | 
|---|
|  | 1110 | break; | 
|---|
|  | 1111 | case 2: | 
|---|
|  | 1112 | debug(P,"Combining individual orbitals according to spread."); | 
|---|
|  | 1113 | //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me); | 
|---|
|  | 1114 | //debug(P,"finding partners"); | 
|---|
|  | 1115 | marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker"); | 
|---|
|  | 1116 | group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group"); | 
|---|
|  | 1117 | for (l=0;l<Num;l++) { | 
|---|
|  | 1118 | group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]");  // each must group must have one more as end marker | 
|---|
|  | 1119 | for (k=0;k<=Num;k++) | 
|---|
|  | 1120 | group[l][k] = -1; // reset partner group | 
|---|
|  | 1121 | } | 
|---|
|  | 1122 | for (k=0;k<Num;k++) | 
|---|
|  | 1123 | partner[k] = 0; | 
|---|
|  | 1124 | //debug(P,"mem allocated"); | 
|---|
|  | 1125 | // go for each orbital through every other, check distance against the sum of both spreads | 
|---|
|  | 1126 | // if smaller add to group of this orbital | 
|---|
|  | 1127 | for (l=0;l<Num;l++) { | 
|---|
|  | 1128 | j=0;  // index for partner group | 
|---|
|  | 1129 | for (k=0;k<Num;k++) {  // check this against l | 
|---|
|  | 1130 | Spread = 0.; | 
|---|
|  | 1131 | for (i=0;i<NDIM;i++) { | 
|---|
|  | 1132 | //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]); | 
|---|
|  | 1133 | Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]); | 
|---|
|  | 1134 | } | 
|---|
|  | 1135 | Spread = sqrt(Spread);  // distance in Spread | 
|---|
|  | 1136 | //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]); | 
|---|
|  | 1137 | if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread | 
|---|
|  | 1138 | group[l][j++] = k;  // add k to group of l | 
|---|
|  | 1139 | partner[l]++; | 
|---|
|  | 1140 | //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l); | 
|---|
|  | 1141 | } | 
|---|
|  | 1142 | } | 
|---|
|  | 1143 | } | 
|---|
|  | 1144 |  | 
|---|
|  | 1145 | // consistency, for each orbital check if this orbital is also in the group of each referred orbital | 
|---|
|  | 1146 | //debug(P,"checking consistency"); | 
|---|
|  | 1147 | totalflag = 1; | 
|---|
|  | 1148 | for (l=0;l<Num;l++) // checking l's group | 
|---|
|  | 1149 | for (k=0;k<Num;k++) { // k is partner index | 
|---|
|  | 1150 | if (group[l][k] != -1) {  // if current index k is a partner | 
|---|
|  | 1151 | flag = 0; | 
|---|
|  | 1152 | for(j=0;j<Num;j++) {  // go through each entry in l partner's partner group if l exists | 
|---|
|  | 1153 | if ((group[ group[l][k] ][j] == l)) | 
|---|
|  | 1154 | flag = 1; | 
|---|
|  | 1155 | } | 
|---|
|  | 1156 | //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]); | 
|---|
|  | 1157 | if (totalflag == 1) totalflag = flag; | 
|---|
|  | 1158 | } | 
|---|
|  | 1159 | } | 
|---|
|  | 1160 | // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres | 
|---|
|  | 1161 | //debug(P,"weight and calculate new centers for partner groups"); | 
|---|
|  | 1162 | for (l=0;l<=Num;l++) | 
|---|
|  | 1163 | marker[l] = 1; | 
|---|
|  | 1164 | if (totalflag) { | 
|---|
|  | 1165 | for (l=0;l<Num;l++) { // go through each orbital | 
|---|
|  | 1166 | if (marker[l] != 0) { // if it hasn't been reweighted | 
|---|
|  | 1167 | marker[l] = 0; | 
|---|
|  | 1168 | for (i=0;i<NDIM;i++) | 
|---|
|  | 1169 | q[i] = 0.; | 
|---|
|  | 1170 | j = 0; | 
|---|
|  | 1171 | while (group[l][j] != -1) { | 
|---|
|  | 1172 | marker[group[l][j]] = 0; | 
|---|
|  | 1173 | for (i=0;i<NDIM;i++) { | 
|---|
|  | 1174 | //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]); | 
|---|
|  | 1175 | q[i] += WannierCentre[ group[l][j] ][i]; | 
|---|
|  | 1176 | } | 
|---|
|  | 1177 | j++; | 
|---|
|  | 1178 | } | 
|---|
|  | 1179 | //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j); | 
|---|
|  | 1180 | for (i=0;i<NDIM;i++) {// weight by number of elements in partner group | 
|---|
|  | 1181 | q[i] /= (double)(j); | 
|---|
|  | 1182 | } | 
|---|
|  | 1183 |  | 
|---|
|  | 1184 | // put WannierCentre into own and all partners' | 
|---|
|  | 1185 | for (i=0;i<NDIM;i++) | 
|---|
|  | 1186 | WannierCentre[l][i] = q[i]; | 
|---|
|  | 1187 | j = 0; | 
|---|
|  | 1188 | while (group[l][j] != -1) { | 
|---|
|  | 1189 | for (i=0;i<NDIM;i++) | 
|---|
|  | 1190 | WannierCentre[group[l][j]][i] = q[i]; | 
|---|
|  | 1191 | j++; | 
|---|
|  | 1192 | } | 
|---|
|  | 1193 | } | 
|---|
|  | 1194 | } | 
|---|
|  | 1195 | } | 
|---|
|  | 1196 | if (P->Call.out[StepLeaderOut]) { | 
|---|
|  | 1197 | fprintf(stderr,"Summary:\n"); | 
|---|
|  | 1198 | fprintf(stderr,"========\n"); | 
|---|
|  | 1199 | for (i=0;i<Num;i++) | 
|---|
|  | 1200 | fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]); | 
|---|
|  | 1201 | } | 
|---|
|  | 1202 | //debug(P,"done"); | 
|---|
|  | 1203 |  | 
|---|
| [64fa9e] | 1204 | Free(marker, "bla"); | 
|---|
| [a0bcf1] | 1205 | for (l=0;l<Num;l++) | 
|---|
| [64fa9e] | 1206 | Free(group[l], "bla"); | 
|---|
|  | 1207 | Free(group, "bla"); | 
|---|
| [a0bcf1] | 1208 | break; | 
|---|
|  | 1209 | case 1: | 
|---|
|  | 1210 | debug(P,"Individual orbitals are changed to center of all."); | 
|---|
|  | 1211 | for (i=0;i<NDIM;i++)  // zero center of weight | 
|---|
|  | 1212 | q[i] = 0.; | 
|---|
|  | 1213 | for (k=0;k<Num;k++) | 
|---|
|  | 1214 | for (i=0;i<NDIM;i++) {  // sum up all orbitals each component | 
|---|
|  | 1215 | q[i] += WannierCentre[k][i]; | 
|---|
|  | 1216 | } | 
|---|
|  | 1217 | for (i=0;i<NDIM;i++)  // divide by number | 
|---|
|  | 1218 | q[i] /= Num; | 
|---|
|  | 1219 | for (k=0;k<Num;k++) | 
|---|
|  | 1220 | for (i=0;i<NDIM;i++) {  // put into this function's array | 
|---|
|  | 1221 | WannierCentre[k][i] = q[i]; | 
|---|
|  | 1222 | } | 
|---|
|  | 1223 | break; | 
|---|
|  | 1224 | case 0: | 
|---|
|  | 1225 | default: | 
|---|
|  | 1226 | break; | 
|---|
|  | 1227 | } | 
|---|
|  | 1228 |  | 
|---|
|  | 1229 | // put (new) WannierCentres into local ones and into file | 
|---|
|  | 1230 | i=-1; | 
|---|
|  | 1231 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions | 
|---|
|  | 1232 | OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA | 
|---|
|  | 1233 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
|  | 1234 | i++;  // increase l if it is occupied wave function | 
|---|
|  | 1235 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local? | 
|---|
|  | 1236 | for (j=0;j<NDIM;j++) | 
|---|
|  | 1237 | Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j]; | 
|---|
|  | 1238 | } | 
|---|
|  | 1239 | if (P->Par.me_comm_ST == 0) | 
|---|
|  | 1240 | fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]); | 
|---|
|  | 1241 | } | 
|---|
|  | 1242 | } | 
|---|
|  | 1243 | if (P->Par.me_comm_ST == 0) { | 
|---|
|  | 1244 | fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n"); | 
|---|
|  | 1245 | fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread); | 
|---|
|  | 1246 | } | 
|---|
|  | 1247 | fflush(F->SpreadFile); | 
|---|
|  | 1248 |  | 
|---|
|  | 1249 | // and the spread was calculated in the loop above | 
|---|
|  | 1250 | /* | 
|---|
|  | 1251 | i=-1; | 
|---|
|  | 1252 | for (l=0;l<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;l++) | 
|---|
|  | 1253 | if (Psi->AllPsiStatus[l].PsiType == type) { | 
|---|
|  | 1254 | i++; | 
|---|
|  | 1255 | spread = CalculateSpread(P,l); | 
|---|
|  | 1256 | tmp = gsl_matrix_get(A[max_operators],i,i) - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2) | 
|---|
|  | 1257 | - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2) | 
|---|
|  | 1258 | - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2); | 
|---|
|  | 1259 | if(P->Call.out[ValueOut]) fprintf(stderr, "(%i) Check of spread of %ith wave function: %lg against %lg\n",P->Par.me, i, Psi->AddData[i].WannierSpread, tmp); | 
|---|
|  | 1260 | }*/ | 
|---|
|  | 1261 | // free all remaining memory | 
|---|
|  | 1262 | for (k=0;k<max_operators+1;k++) | 
|---|
|  | 1263 | gsl_matrix_free(A[k]); | 
|---|
|  | 1264 | } | 
|---|
|  | 1265 |  | 
|---|
|  | 1266 | /** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre. | 
|---|
|  | 1267 | * \param *P Problem at hand | 
|---|
|  | 1268 | * \return 1 - success, 0 - failure | 
|---|
|  | 1269 | */ | 
|---|
|  | 1270 | int ParseWannierFile(struct Problem *P) | 
|---|
|  | 1271 | { | 
|---|
|  | 1272 | struct Lattice *Lat = &P->Lat; | 
|---|
|  | 1273 | struct RunStruct *R = &P->R; | 
|---|
|  | 1274 | struct Psis *Psi = &Lat->Psi; | 
|---|
|  | 1275 | struct OnePsiElement *OnePsiA; | 
|---|
|  | 1276 | int i,l,j, msglen; | 
|---|
|  | 1277 | FILE *SpreadFile; | 
|---|
|  | 1278 | char tagname[255]; | 
|---|
|  | 1279 | char suffix[18]; | 
|---|
|  | 1280 | double WannierCentre[NDIM+1]; // combined centre and spread | 
|---|
|  | 1281 | MPI_Status status; | 
|---|
|  | 1282 | enum PsiTypeTag type = Occupied; | 
|---|
|  | 1283 | int signal = 0; // 1 - ok, 0 - error | 
|---|
|  | 1284 |  | 
|---|
|  | 1285 | switch (Lat->Psi.PsiST) { | 
|---|
|  | 1286 | case SpinDouble: | 
|---|
|  | 1287 | strncpy(suffix,".spread.csv",18); | 
|---|
|  | 1288 | break; | 
|---|
|  | 1289 | case SpinUp: | 
|---|
|  | 1290 | strncpy(suffix,".spread_up.csv",18); | 
|---|
|  | 1291 | break; | 
|---|
|  | 1292 | case SpinDown: | 
|---|
|  | 1293 | strncpy(suffix,".spread_down.csv",18); | 
|---|
|  | 1294 | break; | 
|---|
|  | 1295 | } | 
|---|
|  | 1296 |  | 
|---|
|  | 1297 | if (P->Par.me_comm_ST == 0) { | 
|---|
|  | 1298 | if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists | 
|---|
|  | 1299 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
|  | 1300 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
|  | 1301 | return 0; | 
|---|
|  | 1302 | //Error(SomeError,"ParseWannierFile: Opening failed\n"); | 
|---|
|  | 1303 | } | 
|---|
|  | 1304 | signal = 1; | 
|---|
|  | 1305 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
|  | 1306 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
|  | 1307 | } else { | 
|---|
|  | 1308 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
|  | 1309 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
|  | 1310 | if (signal == 0) | 
|---|
|  | 1311 | return 0; | 
|---|
|  | 1312 | } | 
|---|
|  | 1313 | i=-1; | 
|---|
|  | 1314 | for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) {  // go through all wave functions | 
|---|
|  | 1315 | OnePsiA = &Psi->AllPsiStatus[l];    // grab OnePsiA | 
|---|
|  | 1316 | if (OnePsiA->PsiType == type) {   // drop all but occupied ones | 
|---|
|  | 1317 | i++;  // increase l if it is occupied wave function | 
|---|
|  | 1318 | if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file | 
|---|
|  | 1319 | sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo); | 
|---|
|  | 1320 | signal = 0; | 
|---|
| [961b75] | 1321 | if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) { | 
|---|
| [a0bcf1] | 1322 | //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed"); | 
|---|
|  | 1323 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
|  | 1324 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
|  | 1325 | return 0; | 
|---|
|  | 1326 | } | 
|---|
| [961b75] | 1327 | if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) { | 
|---|
| [a0bcf1] | 1328 | //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed"); | 
|---|
|  | 1329 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
|  | 1330 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
|  | 1331 | return 0; | 
|---|
|  | 1332 | } | 
|---|
|  | 1333 | signal = 1; | 
|---|
|  | 1334 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
|  | 1335 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
|  | 1336 | } else { | 
|---|
|  | 1337 | if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) | 
|---|
|  | 1338 | Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); | 
|---|
|  | 1339 | if (signal == 0) | 
|---|
|  | 1340 | return 0; | 
|---|
|  | 1341 | } | 
|---|
|  | 1342 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local? | 
|---|
|  | 1343 | if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em | 
|---|
|  | 1344 | if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS) | 
|---|
|  | 1345 | Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed"); | 
|---|
|  | 1346 | //return 0; | 
|---|
|  | 1347 | MPI_Get_count(&status, MPI_DOUBLE, &msglen); | 
|---|
|  | 1348 | if (msglen != NDIM+1) | 
|---|
|  | 1349 | Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count"); | 
|---|
|  | 1350 | //return 0; | 
|---|
|  | 1351 | } | 
|---|
|  | 1352 | if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader | 
|---|
|  | 1353 | Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed"); | 
|---|
|  | 1354 | //return 0; | 
|---|
|  | 1355 | // and store 'em (for all who have this Psi local) | 
|---|
|  | 1356 | fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]); | 
|---|
|  | 1357 | for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j]; | 
|---|
|  | 1358 | Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM]; | 
|---|
|  | 1359 | if (P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread); | 
|---|
|  | 1360 | } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group | 
|---|
|  | 1361 | if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS) | 
|---|
|  | 1362 | Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed"); | 
|---|
|  | 1363 | //return 0; | 
|---|
|  | 1364 | } | 
|---|
|  | 1365 | } | 
|---|
|  | 1366 | } | 
|---|
|  | 1367 | if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) | 
|---|
|  | 1368 | fclose(SpreadFile); | 
|---|
|  | 1369 | fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me); | 
|---|
|  | 1370 | return 1; | 
|---|
|  | 1371 | } | 
|---|
|  | 1372 |  | 
|---|
|  | 1373 | /** Calculates the spread of orbital \a i. | 
|---|
|  | 1374 | * Stored in OnePsiElementAddData#WannierSpread. | 
|---|
|  | 1375 | * \param *P Problem at hand | 
|---|
|  | 1376 | * \param i i-th wave function (note "extra" ones are not counted!) | 
|---|
|  | 1377 | * \return spread \f$\sigma^2_{A^{(k)}}\f$ | 
|---|
|  | 1378 | */ | 
|---|
|  | 1379 | double CalculateSpread(struct Problem *P, int i) { | 
|---|
|  | 1380 | struct Lattice *Lat = &P->Lat; | 
|---|
|  | 1381 | struct RunStruct *R = &P->R; | 
|---|
|  | 1382 | struct Psis *Psi = &Lat->Psi; | 
|---|
|  | 1383 | struct LatticeLevel *Lev0 = R->Lev0; | 
|---|
|  | 1384 | struct LatticeLevel *LevS = R->LevS; | 
|---|
|  | 1385 | struct Density *Dens0 = Lev0->Dens; | 
|---|
|  | 1386 | struct fft_plan_3d *plan = Lat->plan; | 
|---|
|  | 1387 | fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity]; | 
|---|
|  | 1388 | fftw_real *PsiCR = (fftw_real *)PsiC; | 
|---|
|  | 1389 | fftw_complex *work = Dens0->DensityCArray[Temp2Density]; | 
|---|
|  | 1390 | fftw_real **HGcR = &Dens0->DensityArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as a storage array | 
|---|
|  | 1391 | fftw_complex **HGcRC = (fftw_complex**)HGcR; | 
|---|
|  | 1392 | fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity];  // use HGcDensity, 4x Gap..Density, TempDensity as an array | 
|---|
|  | 1393 | fftw_real **HGcR2 = (fftw_real**)HGcR2C; | 
|---|
|  | 1394 | MPI_Status status; | 
|---|
|  | 1395 | struct OnePsiElement *OnePsiA, *LOnePsiA; | 
|---|
|  | 1396 | int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; | 
|---|
|  | 1397 | fftw_complex *LPsiDatA=NULL; | 
|---|
|  | 1398 | int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index; | 
|---|
|  | 1399 | N0 = LevS->Plan0.plan->local_nx; | 
|---|
|  | 1400 | N = LevS->Plan0.plan->N; | 
|---|
|  | 1401 | const int NUpx = LevS->NUp[0]; | 
|---|
|  | 1402 | const int NUpy = LevS->NUp[1]; | 
|---|
|  | 1403 | const int NUpz = LevS->NUp[2]; | 
|---|
|  | 1404 | double a_ij, b_ij, A_ij, B_ij; | 
|---|
|  | 1405 | double tmp, tmp2, spread = 0; | 
|---|
|  | 1406 | double **cos_lookup, **sin_lookup; | 
|---|
|  | 1407 |  | 
|---|
|  | 1408 | b_ij = 0; | 
|---|
|  | 1409 |  | 
|---|
|  | 1410 | // create lookup table for sin/cos values | 
|---|
|  | 1411 | cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup"); | 
|---|
|  | 1412 | sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup"); | 
|---|
|  | 1413 | for (k=0;k<NDIM;k++) { | 
|---|
|  | 1414 | // allocate memory | 
|---|
|  | 1415 | cos_lookup[k] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[k], "ComputeMLWF: cos_lookup"); | 
|---|
|  | 1416 | sin_lookup[k] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[k], "ComputeMLWF: sin_lookup"); | 
|---|
|  | 1417 | // reset arrays | 
|---|
|  | 1418 | SetArrayToDouble0(cos_lookup[k],LevS->Plan0.plan->N[k]); | 
|---|
|  | 1419 | SetArrayToDouble0(sin_lookup[k],LevS->Plan0.plan->N[k]); | 
|---|
|  | 1420 | // create lookup values | 
|---|
|  | 1421 | for (g=0;g<LevS->Plan0.plan->N[k];g++) { | 
|---|
|  | 1422 | tmp = 2*PI/(double)LevS->Plan0.plan->N[k]*(double)g; | 
|---|
|  | 1423 | cos_lookup[k][g] = cos(tmp); | 
|---|
|  | 1424 | sin_lookup[k][g] = sin(tmp); | 
|---|
|  | 1425 | } | 
|---|
|  | 1426 | } | 
|---|
|  | 1427 | // fill matrices | 
|---|
|  | 1428 | OnePsiA = &Psi->AllPsiStatus[i];    // grab the desired OnePsiA | 
|---|
|  | 1429 | if (OnePsiA->PsiType != Extra) {   // drop if extra one | 
|---|
|  | 1430 | if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? | 
|---|
|  | 1431 | LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo]; | 
|---|
|  | 1432 | else | 
|---|
|  | 1433 | LOnePsiA = NULL; | 
|---|
|  | 1434 | if (LOnePsiA == NULL) {   // if it's not local ... receive it from respective process into TempPsi | 
|---|
|  | 1435 | RecvSource = OnePsiA->my_color_comm_ST_Psi; | 
|---|
|  | 1436 | MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status ); | 
|---|
|  | 1437 | LPsiDatA=LevS->LPsi->TempPsi; | 
|---|
|  | 1438 | } else {                  // .. otherwise send it to all other processes (Max_me... - 1) | 
|---|
|  | 1439 | for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) | 
|---|
|  | 1440 | if (p != OnePsiA->my_color_comm_ST_Psi) | 
|---|
|  | 1441 | MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT); | 
|---|
|  | 1442 | LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; | 
|---|
|  | 1443 | } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received | 
|---|
|  | 1444 |  | 
|---|
|  | 1445 | CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1); | 
|---|
|  | 1446 | // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()! | 
|---|
|  | 1447 | for (n0=0;n0<N0;n0++) | 
|---|
|  | 1448 | for (n[1]=0;n[1]<N[1];n[1]++) | 
|---|
|  | 1449 | for (n[2]=0;n[2]<N[2];n[2]++) { | 
|---|
|  | 1450 | i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx); | 
|---|
|  | 1451 | iS = n[2]+N[2]*(n[1]+N[1]*n0); | 
|---|
|  | 1452 | n[0] = n0 + LevS->Plan0.plan->start_nx; | 
|---|
|  | 1453 | for (k=0;k<max_operators;k+=2) { | 
|---|
|  | 1454 | tmp = 2*PI/(double)(N[k/2])*(double)(n[k/2]); | 
|---|
|  | 1455 | tmp2 = PsiCR[i0] /LevS->MaxN; | 
|---|
|  | 1456 | // check lookup | 
|---|
|  | 1457 | if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) { | 
|---|
|  | 1458 | fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]); | 
|---|
|  | 1459 | Error(SomeError, "Lookup table does not match real value!"); | 
|---|
|  | 1460 | } | 
|---|
|  | 1461 | //            HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */ | 
|---|
|  | 1462 | //            HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */ | 
|---|
|  | 1463 | //            HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */ | 
|---|
|  | 1464 | //            HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */ | 
|---|
|  | 1465 | HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */ | 
|---|
|  | 1466 | HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */ | 
|---|
|  | 1467 | HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */ | 
|---|
|  | 1468 | HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */ | 
|---|
|  | 1469 | } | 
|---|
|  | 1470 | } | 
|---|
|  | 1471 | for (k=0;k<max_operators;k++) { | 
|---|
|  | 1472 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work); | 
|---|
|  | 1473 | fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work); | 
|---|
|  | 1474 | } | 
|---|
|  | 1475 |  | 
|---|
|  | 1476 |  | 
|---|
|  | 1477 | for (k=0;k<max_operators;k++) { | 
|---|
|  | 1478 | a_ij = 0; | 
|---|
|  | 1479 | //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k); | 
|---|
|  | 1480 | // sum directly in a_ij and b_ij the two desired terms | 
|---|
|  | 1481 | g=0; | 
|---|
|  | 1482 | if (LevS->GArray[0].GSq == 0.0) { | 
|---|
|  | 1483 | Index = LevS->GArray[g].Index; | 
|---|
|  | 1484 | a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im); | 
|---|
|  | 1485 | b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im); | 
|---|
|  | 1486 | g++; | 
|---|
|  | 1487 | } | 
|---|
|  | 1488 | for (; g < LevS->MaxG; g++) { | 
|---|
|  | 1489 | Index = LevS->GArray[g].Index; | 
|---|
|  | 1490 | a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im); | 
|---|
|  | 1491 | b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im); | 
|---|
|  | 1492 | } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...) | 
|---|
|  | 1493 | MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); | 
|---|
|  | 1494 | spread += pow(A_ij,2); | 
|---|
|  | 1495 | } | 
|---|
|  | 1496 | } | 
|---|
|  | 1497 | MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); | 
|---|
|  | 1498 |  | 
|---|
|  | 1499 | // store spread in OnePsiElementAdd | 
|---|
|  | 1500 | Psi->AddData[i].WannierSpread = B_ij - spread; | 
|---|
|  | 1501 | // free lookups | 
|---|
|  | 1502 | for (k=0;k<NDIM;k++) { | 
|---|
| [64fa9e] | 1503 | Free(cos_lookup[k], "bla"); | 
|---|
|  | 1504 | Free(sin_lookup[k], "bla"); | 
|---|
| [a0bcf1] | 1505 | } | 
|---|
| [64fa9e] | 1506 | Free(cos_lookup, "bla"); | 
|---|
|  | 1507 | Free(sin_lookup, "bla"); | 
|---|
| [a0bcf1] | 1508 |  | 
|---|
|  | 1509 | return (B_ij - spread); | 
|---|
|  | 1510 | } | 
|---|