/** \file wannier.c * Maximally Localized Wannier Functions. * * Contains the on function that minimises the spread of all orbitals in one rush in a parallel * Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to * calculate the spread of a specific orbital, which may be useful in checking on the change of * spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex. * Project: ParallelCarParrinello \author Frederik Heber \date 2006 File: wannier.c $Id: wannier.c,v 1.7 2007-10-12 15:50:38 heber Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include "data.h" #include "density.h" #include "errors.h" #include "gramsch.h" #include "helpers.h" #include "init.h" #include "myfft.h" #include "mymath.h" #include "output.h" #include "perturbed.h" #include "wannier.h" #define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread #define type Occupied /** Converts type fftw_complex to gsl_complex. * \param a complex number * \return b complex number */ gsl_complex convertComplex (fftw_complex a) { return gsl_complex_rect(c_re(a),c_im(a)); } /** "merry go round" implementation for parallel index ordering. * Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is * performed which generates once each possible pairing. * \param *top index array 1 * \param *bot index array 2 * \param m N/2, where N is the matrix row/column dimension * \note taken from [Golub, Matrix computations, 1989, p451] */ void MerryGoRoundIndices(int *top, int *bot, int m) { int *old_top, *old_bot; int k; old_top = (int *) Malloc(sizeof(int)*m, "music: old_top"); old_bot = (int *) Malloc(sizeof(int)*m, "music: old_bot"); /* fprintf(stderr,"oldtop\t"); for (k=0;k 1) top[k] = old_top[k-1]; if (k==m-1) bot[k] = old_top[k]; else bot[k] = old_bot[k+1]; } /* fprintf(stderr,"top\t"); for (k=0;k 1) { // get last left column Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0); } else { // get right column Abuffer1 = Aloc[1]; // note down the free column MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, tagS1+2*k, comm, &requestS0); } //fprintf(stderr,"...left columns..."); for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right Aloc[l] = Aloc[l-2]; if (max_rounds > 1) { //fprintf(stderr,"...first right..."); Aloc[2] = Aloc[1]; // get first right column } //fprintf(stderr,"...right columns..."); for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left Aloc[l] = Aloc[l+2]; //fprintf(stderr,"...last right..."); Aloc[(2*max_rounds-1)] = Abuffer1; MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1); } else if (ProcRank == ProcNum-1) { //fprintf(stderr,"...first right..."); // get first right column Abuffer2 = Aloc[1]; // note down the free column MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1); //fprintf(stderr,"...right columns..."); for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left Aloc[(l)] = Aloc[(l+2)]; //fprintf(stderr,"...last right..."); Aloc[(2*max_rounds-1)] = Aloc[2*(max_rounds-1)]; // Put last left into last right column //fprintf(stderr,"...left columns..."); for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right Aloc[(l)] = Aloc[(l-2)]; //fprintf(stderr,"...first left..."); // if (max_rounds > 1) Aloc[0] = Abuffer2; // get first left column MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0); } else { // get last left column MPI_Isend(Aloc[2*(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0); Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column //fprintf(stderr,"...first right..."); // get first right column MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1); Abuffer2 = Aloc[1]; // note down the free column //fprintf(stderr,"...left columns..."); for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right Aloc[(l)] = Aloc[(l-2)]; //fprintf(stderr,"...right columns..."); for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left Aloc[(l)] = Aloc[(l+2)]; //fprintf(stderr,"...first left..."); Aloc[0] = Abuffer1; // get first left column MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0); //fprintf(stderr,"...last right..."); Aloc[(2*max_rounds-1)] = Abuffer2; MPI_Irecv(Aloc[(2*max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1); } //fprintf(stderr,"...waiting..."); if (ProcRank != ProcNum-1) MPI_Wait(&requestS0, &status); if (ProcRank != 0) // first left column MPI_Wait(&requestR0, &status); if (ProcRank != 0) MPI_Wait(&requestS1, &status); if (ProcRank != ProcNum-1) MPI_Wait(&requestR1, &status); //fprintf(stderr,"...done\n"); } /** By testing of greatest common divisor of the matrix rows (\a AllocNum) finds suitable parallel cpu group. * \param *P Problem at hand * \param AllocNum number of rows in matrix * \return address of MPI communicator */ #ifdef HAVE_INLINE inline MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum) #else MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum) #endif { MPI_Comm *comm = &P->Par.comm_ST; //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi); //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Jacobi diagonalization is done parallely by ", P->Par.me); if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) { // all parallel comm = &P->Par.comm_ST; //if (P->Call.out[ReadOut]) fprintf(stderr,"all\n"); } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel comm = &P->Par.comm_ST_Psi; //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n"); } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel comm = &P->Par.comm_ST_PsiT; //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n"); } } else { if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel comm = &P->Par.comm_ST_PsiT; //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n"); } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel comm = &P->Par.comm_ST_Psi; //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n"); } } return comm; } /** Allocates and fills Lookup table for sin/cos values at each grid node. * \param ***cos_table pointer to two-dimensional lookup table for cosine values * \param ***sin_table pointer to two-dimensional lookup table for sine values * \param *N array with number of nodes per \a NDIM axis */ void CreateSinCosLookupTable(double ***cos_table, double ***sin_table, int *N) { int i, j; double argument; double **cos_lookup, **sin_lookup; // create lookup table for sin/cos values cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup"); sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup"); for (i=0;iLat; struct RunStruct *R = &P->R; struct Psis *Psi = &Lat->Psi; struct LatticeLevel *Lev0 = R->Lev0; struct LatticeLevel *LevS = R->LevS; struct Density *Dens0 = Lev0->Dens; struct OnePsiElement *OnePsiA, *OnePsiB, *LOnePsiB; struct fft_plan_3d *plan = Lat->plan; fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity]; fftw_real *PsiCR = (fftw_real *)PsiC; fftw_complex *work = Dens0->DensityCArray[Temp2Density]; fftw_real **HGcR = &Dens0->DensityArray[HGDensity]; // use HGDensity, 4x Gap..Density, TempDensity as a storage array fftw_complex **HGcRC = (fftw_complex**)HGcR; fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array fftw_real **HGcR2 = (fftw_real**)HGcR2C; int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; MPI_Status status; fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL; int n[NDIM],n0,i0,iS, Index; int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows int N0 = LevS->Plan0.plan->local_nx; int *N = LevS->Plan0.plan->N; const int NUpx = LevS->NUp[0]; const int NUpy = LevS->NUp[1]; const int NUpz = LevS->NUp[2]; double argument, PsiAtNode; int e,g,i,j,k,l,m,p,u; double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0; double **cos_lookup = NULL,**sin_lookup = NULL; if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me); debug(P, "Creating Lookup Table"); CreateSinCosLookupTable(&cos_lookup, &sin_lookup, N); debug(P, "Calculating each entry of variance matrices"); l=-1; // to access U matrix element (0..Num-1) // fill the matrices for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA if (OnePsiA->PsiType == type) { // drop all but occupied ones l++; // increase l if it is non-extra wave function if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; else LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients! //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0); if (LPsiDatA != NULL) { CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1); // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()! for (n0=0;n0Plan0.plan->start_nx; for (k=0;kMaxN; // check lookup if (!l) // perform check on first wave function only if ((fabs(cos(argument) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(argument) - sin_lookup[e][n[e]]) > MYEPSILON)) { Error(SomeError, "Lookup table does not match real value!"); } HGcR[k][iS] = cos_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */ HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */ HGcR[k+1][iS] = sin_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */ HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */ } } for (u=0;uLevelNo, FFTNF1, HGcRC[u], work); fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work); } } m = -1; // to access U matrix element (0..Num-1) for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB if (OnePsiB->PsiType == type) { // drop all but occupied ones m++; // increase m if it is non-extra wave function if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; else LOnePsiB = NULL; if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi RecvSource = OnePsiB->my_color_comm_ST_Psi; MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); LPsiDatB=LevS->LPsi->TempPsi; } else { // .. otherwise send it to all other processes (Max_me... - 1) for (p=0;pPar.Max_me_comm_ST_PsiT;p++) if (p != OnePsiB->my_color_comm_ST_Psi) MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received for (u=0;uPar.me, l,m,u); g=0; if (LevS->GArray[0].GSq == 0.0) { Index = LevS->GArray[g].Index; a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im); b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im); g++; } for (; g < LevS->MaxG; g++) { Index = LevS->GArray[g].Index; a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im); b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im); } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...) // sum up elements from all coefficients sharing processes MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); a_ij = A_ij; b_ij = B_ij; // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!) MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); } else { // receive ... MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); } // ... and store //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij); //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij); gsl_matrix_set(A[u], l, m, A_ij); gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m)); } } } } } // reset extra entries for (u=0;u<=max_operators;u++) { for (i=Num;iPar.me == 0) for (u=0;uLat; struct RunStruct *R = &P->R; struct FileData *F = &P->Files; struct Psis *Psi = &Lat->Psi; struct LatticeLevel *LevS = R->LevS; double result, Result; fftw_complex *LPsiDatA=NULL; struct OnePsiElement *OnePsiA; int i,j,l,g; char spin[12], suffix[18]; switch (Lat->Psi.PsiST) { case SpinDouble: strcpy(suffix,".recispread.csv"); strcpy(spin,"SpinDouble"); break; case SpinUp: strcpy(suffix,".recispread_up.csv"); strcpy(spin,"SpinUp"); break; case SpinDown: strcpy(suffix,".recispread_down.csv"); strcpy(spin,"SpinDown"); break; } if(P->Par.me_comm_ST == 0) { if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Calculating reciprocal moments ...\n",P->Par.me); if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level OpenFile(P, &F->ReciSpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level else if (F->ReciSpreadFile == NULL) // re-op,18en if not first level and not opened yet (or closed from ParseWannierFile) OpenFile(P, &F->ReciSpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level if (F->ReciSpreadFile == NULL) { Error(SomeError,"ComputeMLWF: Error opening Reciprocal spread File!\n"); } else { fprintf(F->ReciSpreadFile,"===== Reciprocal Spreads of type %s ==========================================================================\n", spin); } } // integrate second order moment for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA if (OnePsiA->PsiType == type) { // drop all but occupied ones if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; else LPsiDatA = NULL; if (LPsiDatA != NULL) { if (P->Par.me_comm_ST == 0) fprintf(F->ReciSpreadFile,"Psi%d_Lev%d\t", Psi->AllPsiStatus[l].MyGlobalNo, R->LevSNo); for (i=0;iGArray[0].GSq == 0.0) { result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*(LPsiDatA[0].re * LPsiDatA[0].re); g++; } for (;gMaxG;g++) result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*2.*(LPsiDatA[g].re * LPsiDatA[g].re + LPsiDatA[g].im * LPsiDatA[g].im); //result *= Lat->Volume/LevS->MaxG; MPI_Allreduce ( &result, &Result, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); if (P->Par.me_comm_ST == 0) fprintf(F->ReciSpreadFile,"%2.5lg ", Result); } } if (P->Par.me_comm_ST == 0) fprintf(F->ReciSpreadFile,"\n"); } } } if(P->Par.me_comm_ST == 0) { fprintf(F->ReciSpreadFile,"====================================================================================================================\n\n"); fflush(F->ReciSpreadFile); } } /** Given the unitary matrix the transformation is performed on the Psis. * \param *P Problem at hand * \param *U gsl matrix containing the transformation matrix * \param Num dimension parameter for the matrix, i.e. number of wave functions */ void UnitaryTransformationOnWavefunctions(struct Problem *P, gsl_matrix *U, int Num) { struct Lattice *Lat = &P->Lat; struct RunStruct *R = &P->R; struct Psis *Psi = &Lat->Psi; struct LatticeLevel *LevS = R->LevS; MPI_Status status; struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB; int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL; int g,i,j,l,k,m,p; //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation of all wave functions according to U\n",P->Par.me); Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer"); for (l=0;lLPsi->OldLocalPsi[l]; //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me); l=-1; // to access U matrix element (0..Num-1) k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1) for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA if (OnePsiA->PsiType == type) { // drop all but occupied ones l++; // increase l if it is occupied wave function if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local? k++; // increase k only if it is a local, non-extra orbital wave function LPsiDatA = (fftw_complex *) coeffs_buffer[k]; // new coeffs first go to copy buffer, old ones must not be overwritten yet SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG); // zero buffer part } else LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients! m = -1; // to access U matrix element (0..Num-1) for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB if (OnePsiB->PsiType == type) { // drop all but occupied ones m++; // increase m if it is occupied wave function if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; else LOnePsiB = NULL; if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi RecvSource = OnePsiB->my_color_comm_ST_Psi; MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); LPsiDatB=LevS->LPsi->TempPsi; } else { // .. otherwise send it to all other processes (Max_me... - 1) for (p=0;pPar.Max_me_comm_ST_PsiT;p++) if (p != OnePsiB->my_color_comm_ST_Psi) MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received if (LPsiDatA != NULL) { double tmp = gsl_matrix_get(U,l,m); g=0; if (LevS->GArray[0].GSq == 0.0) { LPsiDatA[g].re += LPsiDatB[g].re * tmp; LPsiDatA[g].im += LPsiDatB[g].im * tmp; g++; } for (; g < LevS->MaxG; g++) { LPsiDatA[g].re += LPsiDatB[g].re * tmp; LPsiDatA[g].im += LPsiDatB[g].im * tmp; } } } } } } //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me); // now, as all wave functions are updated, swap the buffer l = -1; for (k=0;kMaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { l++; //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo); LPsiDatA = (fftw_complex *)coeffs_buffer[l]; LPsiDatB = LevS->LPsi->LocalPsi[l]; for (g=0;gMaxG;g++) { LPsiDatB[g].re = LPsiDatA[g].re; LPsiDatB[g].im = LPsiDatA[g].im; } // recalculating non-local form factors which are coefficient dependent! CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo); } } // and free allocated buffer memory Free(coeffs_buffer, "UnitaryTransformationOnWavefunctions: coeffs_buffer"); } /** Changes Wannier Centres according to RunStruct#CommonWannier. * \param *P Problem at hand. * \param Num number of Psis * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres * \param *WannierSpread array with wannier spread per wave function */ void ChangeWannierCentres(struct Problem *P, int Num, double **WannierCentre, double *WannierSpread) { struct RunStruct *R = &P->R; struct Lattice *Lat = &P->Lat; struct LatticeLevel *LevS = R->LevS; int *marker, **group; int partner[Num]; int i,j,l,k; int totalflag, flag; double q[NDIM], center[NDIM]; double Spread; int *N = LevS->Plan0.plan->N; switch (R->CommonWannier) { case 4: debug(P,"Shifting each Wannier centers to cell center"); for (j=0;jRealBasis, center); // transform to real coordinates for (i=0; i < Num; i++) { // go through all occupied wave functions for (j=0;jReciBasis, WannierCentre[i]); for (j=0;jPar.me, j, N[j], tmp, floor(tmp), ceil(tmp)); if (fabs((double)floor(q[j]) - q[j]) < fabs((double)ceil(q[j]) - q[j])) q[j] = floor(q[j])/(double)N[j]; else q[j] = ceil(q[j])/(double)N[j]; } RMat33Vec3(WannierCentre[i], Lat->RealBasis, q); } break; case 2: debug(P,"Combining individual orbitals according to spread."); //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me); debug(P,"finding partners"); marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker"); group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group"); for (l=0;lPar.me, WannierCentre[l][i], WannierCentre[k][i]); Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]); } Spread = sqrt(Spread); // distance in Spread //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]); if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread group[l][j++] = k; // add k to group of l partner[l]++; //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l); } } } // consistency, for each orbital check if this orbital is also in the group of each referred orbital debug(P,"checking consistency"); totalflag = 1; for (l=0;lPar.me, l, group[l][k]); if (totalflag == 1) totalflag = flag; } } // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres debug(P,"weight and calculate new centers for partner groups"); for (l=0;l<=Num;l++) marker[l] = 1; if (totalflag) { for (l=0;lPar.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]); q[i] += WannierCentre[ group[l][j] ][i]; } j++; } //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j); for (i=0;iCall.out[StepLeaderOut]) { fprintf(stderr,"Summary:\n"); fprintf(stderr,"========\n"); for (i=0;iLat; struct Psis *Psi = &Lat->Psi; struct OnePsiElement *OnePsiA; int i,j,k,l; double tmp, q[NDIM]; *old_spread = 0; *spread = 0; // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital i=-1; for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA if (OnePsiA->PsiType == type) { // drop all but occupied ones i++; // increase l if it is occupied wave function //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i); // calculate Wannier Centre for (j=0;jRealBasisSQ q[j] += 1.; } RMat33Vec3(WannierCentre[i], Lat->RealBasis, q); // store orbital spread and centre in file tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2) - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2) - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2); WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp; //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]); //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n", //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]); //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n", //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]); // gather all spreads *old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U) for (k=0;kPar.me, msg); for (k=0;kCall.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me); DiagData->Num = Num; DiagData->AllocNum = ceil((double)Num / 2. ) *2; DiagData->NumMatrices = NumMatrices; DiagData->extra = extra; // determine communicator and our rank and its size DiagData->comm = DetermineParallelGroupbyGCD(P,DiagData->AllocNum); MPI_Comm_size (*(DiagData->comm), &(DiagData->ProcNum)); MPI_Comm_rank (*(DiagData->comm), &(DiagData->ProcRank)); // allocate memory DiagData->U = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum); gsl_matrix_set_identity(DiagData->U); DiagData->A = (gsl_matrix **) Malloc((DiagData->NumMatrices+DiagData->extra) * sizeof(gsl_matrix *), "InitDiagonalization: *A"); for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) { DiagData->A[i] = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum); gsl_matrix_set_zero(DiagData->A[i]); } // init merry-go-round array for diagonilzation DiagData->top = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: top"); DiagData->bot = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: bot"); for (i=0;iAllocNum/2;i++) { DiagData->top[i] = 2*i; DiagData->bot[i] = 2*i+1; } /* // print starting values of index generation tables top and bot fprintf(stderr,"top\t"); for (k=0;ktop, "FreeDiagonalization: DiagData->top"); Free(DiagData->bot, "FreeDiagonalization: DiagData->bot"); gsl_matrix_free(DiagData->U); for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) gsl_matrix_free(DiagData->A[i]); Free(DiagData->A, "FreeDiagonalization: DiagData->A"); } /** Orthogonalizing PsiType#Occupied wave functions for MD. * Orthogonalizes wave functions by finding a unitary transformation such that the density remains unchanged. * To this end, the overlap matrix \f$\langle \Psi_i | \Psi_j \rangle\f$ is created and diagonalised by Jacobi * transformation (product of rotation matrices). The found transformation matrix is applied to all Psis. * \param *P Problem at hand * \note [Payne] states that GramSchmidt is not well-suited. However, this Jacobi diagonalisation is not well * suited for our CG minimisation either. If one wave functions is changed in course of the minimsation * procedure, in a Gram-Schmidt-Orthogonalisation only this wave function is changed (although it remains under * debate whether subsequent Psis are still orthogonal to this changed wave function!), in a Jacobi-Orthogonali- * stion however at least two if not all wave functions are changed. Thus inhibits our fast way of updating the * density after a minimisation step -- UpdateDensityCalculation(). Thus, this routine can only be used for a * final orthogonalisation of all Psis, after the CG minimisation. */ void OrthogonalizePsis(struct Problem *P) { struct Lattice *Lat = &P->Lat; struct Psis *Psi = &Lat->Psi; struct LatticeLevel *LevS = P->R.LevS; int Num = Psi->NoOfPsis; int i,j,l; struct DiagonalizationData DiagData; double PsiSP; InitDiagonalization(P, &DiagData, Num, 1, 0); // Calculate Overlap matrix for (l=Psi->TypeStartIndex[Occupied]; lTypeStartIndex[UnOccupied]; l++) CalculateOverlap(P, l, Occupied); for (i=0;iOverlap[i][j]); //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num," (before)"); //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (before)"); // diagonalize overlap matrix Diagonalize(P, &DiagData); //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (after)"); // apply found unitary transformation to wave functions UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum); // update GramSch stati for (i=Psi->TypeStartIndex[Occupied];iTypeStartIndex[UnOccupied];i++) { GramSchNormalize(P,LevS,LevS->LPsi->LocalPsi[i], 0.); // calculates square product if 0. is given... PsiSP = GramSchGetNorm2(P,LevS,LevS->LPsi->LocalPsi[i]); //if (P->Par.me_comm_ST_Psi == 0) fprintf(stderr,"(%i) PsiSP[%i] = %lg\n", P->Par.me, i, PsiSP); Psi->LocalPsiStatus[i].PsiGramSchStatus = (int)IsOrthonormal; } UpdateGramSchAllPsiStatus(P,Psi); /* for (l=Psi->TypeStartIndex[Occupied]; lTypeStartIndex[UnOccupied]; l++) CalculateOverlap(P, l, Occupied); for (i=0;iOverlap[i][j]); if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num," (after)"); */ // free memory FreeDiagonalization(&DiagData); } /** Orthogonalizing PsiType#Occupied wave functions and their time derivatives for MD. * Ensures that two orthonormality condition are met for Psis: * \f$\langle \Psi_i | \Psi_j \rangle = \delta_{ij}\f$ * \f$\langle \dot{\Psi_i} | \dot{\Psi_j} \rangle = \delta_{ij}\f$ * \param *P Problem at hand * \note [Payne] states that GramSchmidt is not well-suited, and this stronger * condition is needed for numerical stability * \todo create and fill 1stoverlap with finite difference between Psi, OldPsi with R->Deltat */ void StrongOrthogonalizePsis(struct Problem *P) { struct Lattice *Lat = &P->Lat; struct Psis *Psi = &Lat->Psi; int Num = Psi->NoOfPsis; int i,j,l; struct DiagonalizationData DiagData; InitDiagonalization(P, &DiagData, Num, 2, 0); // Calculate Overlap matrix for (l=Psi->TypeStartIndex[Occupied]; lTypeStartIndex[UnOccupied]; l++) { CalculateOverlap(P, l, Occupied); //Calculate1stOverlap(P, l, Occupied); } for (i=0;iOverlap[i][j]); //gsl_matrix_set(DiagData.A[1],i,j,Psi->1stOverlap[i][j]); } // diagonalize overlap matrix Diagonalize(P, &DiagData); // apply found unitary transformation to wave functions UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum); // free memory FreeDiagonalization(&DiagData); } /** Uses either serial or parallel diagonalization. * \param *P Problem at hand * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization * \sa ParallelDiagonalization(), SerialDiagonalization() */ void Diagonalize(struct Problem *P, struct DiagonalizationData *DiagData) { if (DiagData->Num != 1) { // one- or multi-process case? if (((DiagData->AllocNum % 2) == 0) && (DiagData->ProcNum != 1) && ((DiagData->AllocNum / 2) % DiagData->ProcNum == 0)) { /* debug(P,"Testing with silly matrix"); ParallelDiagonalization(P, test, Utest, 1, 0, 4, Num, comm, ProcRank, ProcNum, top, bot); if(P->Call.out[ReadOut]) // && P->Par.me == 0) PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)"); if(P->Call.out[ReadOut]) // && P->Par.me == 0) PrintGSLMatrix(P, Utest, 4, "Utest (final)"); */ debug(P,"ParallelDiagonalization"); ParallelDiagonalization(P, DiagData); } else {/* debug(P,"Testing with silly matrix"); SerialDiagonalization(P, test, Utest, 1, 0, 4, Num, top, bot); if(P->Call.out[ReadOut]) // && P->Par.me == 0) PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)"); if(P->Call.out[ReadOut]) // && P->Par.me == 0) PrintGSLMatrix(P, Utest, 4, "Utest (final)"); */ debug(P,"SerialDiagonalization"); SerialDiagonalization(P, DiagData); } //if(P->Call.out[ReadOut]) // && P->Par.me == 0) //PrintGSLMatrix(P, DiagData->U, DiagData->AllocNum, "U"); } } /** Computation of Maximally Localized Wannier Functions. * Maximally localized functions are prime when evulating a Hamiltonian with * magnetic fields under periodic boundary conditions, as the common position * operator is no longer valid. These can be obtained by orbital rotations, which * are looked for iteratively and gathered in one transformation matrix, to be * later applied to the set of orbital wave functions. * * In order to obtain these, the following algorithm is applied: * -# Initialize U (identity) as the sought-for transformation matrix * -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements * \f$A^{(k)}_{ij} = \langle \phi_i | A^{(k)} | \phi_j \rangle\f$ for the six operators * \f$A^{(k)}\f$ * -# For each pair of indices (i,j) (iLat; struct Psis *Psi = &Lat->Psi; int i; int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows double **WannierCentre; double *WannierSpread; double spread, spreadSQ; struct DiagonalizationData DiagData; if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me); InitDiagonalization(P, &DiagData, Num, max_operators, 1); // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1)) //debug (P,"Calculatung Variance matrices"); FillHigherOrderRealMomentsMatrices(P, DiagData.AllocNum, DiagData.A); //debug (P,"Diagonalizing"); Diagonalize(P, &DiagData); // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle //debug (P,"Performing Unitary transformation"); UnitaryTransformationOnWavefunctions(P, DiagData.U, Num); if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7: Compute centres and spread printout\n",P->Par.me); WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ComputeMLWF: *WannierCentre"); WannierSpread = (double *) Malloc(sizeof(double)*Num, "ComputeMLWF: WannierSpread"); for (i=0;iPar.me, ev1, ev2); // find eigenvectors for(i=0;i<2;i++) { if (fabs(ev[i]) < MYEPSILON) { gsl_matrix_set(evec, 0,i, 0.); gsl_matrix_set(evec, 1,i, 0.); } else if (fabs(a22-ev[i]) > MYEPSILON) { norm = sqrt(1*1 + (a21*a21/(a22-ev[i])/(a22-ev[i]))); gsl_matrix_set(evec, 0,i, 1./norm); gsl_matrix_set(evec, 1,i, -(a21/(a22-ev[i]))/norm); //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm); } else if (fabs(a12) > MYEPSILON) { norm = sqrt(1*1 + (a11-ev[i])*(a11-ev[i])/(a12*a12)); gsl_matrix_set(evec, 0,i, 1./norm); gsl_matrix_set(evec, 1,i, -((a11-ev[i])/a12)/norm); //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm); } else { if (fabs(a11-ev[i]) > MYEPSILON) { norm = sqrt(1*1 + (a12*a12/(a11-ev[i])/(a11-ev[i]))); gsl_matrix_set(evec, 0,i, -(a12/(a11-ev[i]))/norm); gsl_matrix_set(evec, 1,i, 1./norm); //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm); } else if (fabs(a21) > MYEPSILON) { norm = sqrt(1*1 + (a22-ev[i])*(a22-ev[i])/(a21*a21)); gsl_matrix_set(evec, 0,i, -(a22-ev[i])/a21/norm); gsl_matrix_set(evec, 1,i, 1./norm); //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm); } else { //gsl_matrix_set(evec, 0,i, 0.); //gsl_matrix_set(evec, 1,i, 1.); fprintf (stderr,"\t evec %i undetermined\n", i); } //gsl_matrix_set(evec, 0,0, 1.); //gsl_matrix_set(evec, 1,0, 0.); //fprintf (stderr,"(%i) evec1 undetermined", P->Par.me); } } } /** Calculates sine and cosine values for multiple matrix diagonalization. * \param *evec eigenvectors in columns * \param *eval corresponding eigenvalues * \param *c cosine to be returned * \param *s sine to be returned */ #ifdef HAVE_INLINE inline void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s) #else void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s) #endif { int index; double x,y,r; index = gsl_vector_max_index (eval); // get biggest eigenvalue //fprintf(stderr,"\t1st: %lg\t2nd: %lg --- biggest: %i\n", gsl_vector_get(eval, 0), gsl_vector_get(eval, 1), index); x = gsl_matrix_get(evec, 0, index); y = gsl_matrix_get(evec, 1, index); if (x < 0) { // ensure x>=0 so that rotation angles remain smaller Pi/4 y = -y; x = -x; } //z = gsl_matrix_get(evec, 2, index) * x/fabs(x); //fprintf(stderr,"\tx %lg\ty %lg\n", x,y); //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j); // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]] r = sqrt(x*x + y*y); // + z*z); if (fabs(r) > MYEPSILON) { *c = sqrt((x + r) / (2.*r)); *s = y / sqrt(2.*r*(x+r)); //, -z / sqrt(2*r*(x+r))); } else { *c = 1.; *s = 0.; } } /* * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized * \param *U transformation matrix set to unity matrix * \param NumMatrices number of matrices to be diagonalized simultaneously * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A) * \param AllocNum number of rows/columns in matrices * \param Num number of wave functions * \param ProcRank index in group for this cpu * \param ProcNum number of cpus in group * \param *top array with top row indices (merry-go-round) * \param *bot array with bottom row indices (merry-go-round) */ /** Simultaneous diagonalization of matrices with multiple cpus. * \param *P Problem at hand * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization * \note this is slower given one cpu only than SerialDiagonalization() */ void ParallelDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData) { struct RunStruct *R =&P->R; int tagR0, tagR1, tagS0, tagS1; int iloc, jloc; double *s_all, *c_all; int round, max_rounds; int start; int *rcounts, *rdispls; double *c, *s; int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from int left, right; // left or right neighbour for process double spread = 0., old_spread=0., Spread=0.; int i,j,k,l,m,u; int set; int it_steps; // iteration step counter double **Aloc[DiagData->NumMatrices+1], **Uloc; // local columns for one step of A[k] double *Around[DiagData->NumMatrices+1], *Uround; // all local columns for one round of A[k] double *Atotal[DiagData->NumMatrices+1], *Utotal; // all local columns for one round of A[k] double a_i, a_j; gsl_matrix *G; gsl_vector *h; gsl_vector *eval; gsl_matrix *evec; //gsl_eigen_symmv_workspace *w; max_rounds = (DiagData->AllocNum / 2)/DiagData->ProcNum; // each process must perform multiple rotations per step of a set if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, DiagData->ProcRank, DiagData->ProcNum, max_rounds); // allocate column vectors for interchange of columns debug(P,"allocate column vectors for interchange of columns"); c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c"); s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s"); c_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: c_all"); s_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: s_all"); rcounts = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rcounts"); rdispls = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rdispls"); // allocate eigenvector stuff debug(P,"allocate eigenvector stuff"); G = gsl_matrix_calloc (2,2); h = gsl_vector_alloc (2); eval = gsl_vector_alloc (2); evec = gsl_matrix_alloc (2,2); //w = gsl_eigen_symmv_alloc(2); // establish communication partners debug(P,"establish communication partners"); if (DiagData->ProcRank == 0) { tagS0 = WannierALTag; // left p0 always remains left p0 } else { tagS0 = (DiagData->ProcRank == DiagData->ProcNum - 1) ? WannierARTag : WannierALTag; // left p_last becomes right p_last } tagS1 = (DiagData->ProcRank == 0) ? WannierALTag : WannierARTag; // right p0 always goes into left p1 tagR0 = WannierALTag; // tagR1 = WannierARTag; // first process if (DiagData->ProcRank == 0) { left = DiagData->ProcNum-1; right = 1; Lsend = 0; Rsend = 1; Lrecv = 0; Rrecv = 1; } else if (DiagData->ProcRank == DiagData->ProcNum - 1) { left = DiagData->ProcRank - 1; right = 0; Lsend = DiagData->ProcRank; Rsend = DiagData->ProcRank - 1; Lrecv = DiagData->ProcRank - 1; Rrecv = DiagData->ProcRank; } else { left = DiagData->ProcRank - 1; right = DiagData->ProcRank + 1; Lsend = DiagData->ProcRank+1; Rsend = DiagData->ProcRank - 1; Lrecv = DiagData->ProcRank - 1; Rrecv = DiagData->ProcRank+1; } //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv); // initialise A_loc debug(P,"initialise A_loc"); for (k=0;kNumMatrices+DiagData->extra;k++) { //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]"); Around[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Around[k]"); Atotal[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Atotal[k]"); Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]"); //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds]; for (round=0;roundAllocNum*(2*round)]; Aloc[k][2*round+1] = &Around[k][DiagData->AllocNum*(2*round+1)]; for (l=0;lAllocNum;l++) { Aloc[k][2*round][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)); Aloc[k][2*round+1][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)+1); //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]); } } } // initialise U_loc debug(P,"initialise U_loc"); //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc"); Uround = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Uround"); Utotal = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Utotal"); Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc"); //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds]; for (round=0;roundAllocNum*(2*round)]; Uloc[2*round+1] = &Uround[DiagData->AllocNum*(2*round+1)]; for (l=0;lAllocNum;l++) { Uloc[2*round][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)); Uloc[2*round+1][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)+1); //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]); } } // now comes the iteration loop debug(P,"now comes the iteration loop"); it_steps = 0; do { it_steps++; //if (P->Par.me == 0) fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps); for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 staying at its place all the time //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); for (round = 0; round < max_rounds;round++) { start = DiagData->ProcRank * max_rounds + round; // get indices i = DiagData->top[start] < DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // minimum of the two indices iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1; j = DiagData->top[start] > DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // maximum of the two indices: thus j > i jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1; //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc); // calculate rotation angle, i.e. c and s //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j); gsl_matrix_set_zero(G); for (k=0;kNumMatrices;k++) { // go through all operators ... // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] for (l=0;l<2;l++) for (m=0;m<2;m++) gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); } //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G EigensolverFor22Matrix(P,G,eval,evec); //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G CalculateRotationAnglesFromEigenvalues(evec, eval, &c[round], &s[round]); //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]); //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); // V_loc = V_loc * V_small //debug(P,"apply rotation to local U"); for (l=0;lAllocNum;l++) { a_i = Uloc[2*round+iloc][l]; a_j = Uloc[2*round+jloc][l]; Uloc[2*round+iloc][l] = c[round] * a_i + s[round] * a_j; Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j; } } // end of round // circulate the rotation angles //debug(P,"circulate the rotation angles"); MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); // MPI_Allgather is waaaaay faster than ring circulation MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); //m = start; for (l=0;lAllocNum/2;l++) { // for each process // we have V_small from process k //debug(P,"Apply V_small from other process"); i = DiagData->top[l] < DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // minimum of the two indices j = DiagData->top[l] > DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // maximum of the two indices: thus j > i iloc = DiagData->top[l] < DiagData->bot[l] ? 0 : 1; jloc = DiagData->top[l] > DiagData->bot[l] ? 0 : 1; for (m=0;mPar.me, m,i,j); // apply row rotation to each A[k] for (k=0;kNumMatrices+DiagData->extra;k++) {// one extra for B matrix ! //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); a_i = Aloc[k][2*m+iloc][i]; a_j = Aloc[k][2*m+iloc][j]; Aloc[k][2*m+iloc][i] = c_all[l] * a_i + s_all[l] * a_j; Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j; a_i = Aloc[k][2*m+jloc][i]; a_j = Aloc[k][2*m+jloc][j]; Aloc[k][2*m+jloc][i] = c_all[l] * a_i + s_all[l] * a_j; Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j; //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); } } } // apply rotation to local operator matrices // A_loc = A_loc * V_small //debug(P,"apply rotation to local operator matrices A[k]"); for (m=0;mProcRank * max_rounds + m; iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1; jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1; for (k=0;kNumMatrices+DiagData->extra;k++) {// extra for B matrix ! for (l=0;lAllocNum;l++) { // Columns, i and j belong to this process only! a_i = Aloc[k][2*m+iloc][l]; a_j = Aloc[k][2*m+jloc][l]; Aloc[k][2*m+iloc][l] = c[m] * a_i + s[m] * a_j; Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j; //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]); } } } // Shuffling of these round's columns to prepare next rotation set for (k=0;kNumMatrices+DiagData->extra;k++) {// one extra for B matrix ! // extract columns from A //debug(P,"extract columns from A"); MerryGoRoundColumns(*(DiagData->comm), Aloc[k], DiagData->AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1); } // and also for V ... //debug(P,"extract columns from U"); MerryGoRoundColumns(*(DiagData->comm), Uloc, DiagData->AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1); // and merry-go-round for the indices too //debug(P,"and merry-go-round for the indices too"); MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2); } //fprintf(stderr,"(%i) STEP 4\n",P->Par.me); // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 old_spread = Spread; spread = 0.; for(k=0;kNumMatrices;k++) { // go through all self-adjoint operators for (i=0; i < 2*max_rounds; i++) { // go through all wave functions spread += Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]*Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]; //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i); } } MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *(DiagData->comm)); //Spread = spread; if (P->Par.me == 0) { //if(P->Call.out[ReadOut]) // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier); //else //fprintf(stderr,"%2.9e\n",Spread); } // STEP 5: check change of variance } while (fabs(old_spread-Spread) >= R->EpsWannier); // end of iterative diagonalization loop: We have found our final orthogonal U! // gather local parts of U into complete matrix for (l=0;lProcNum;l++) rcounts[l] = DiagData->AllocNum; debug(P,"allgather U"); for (round=0;round<2*max_rounds;round++) { for (l=0;lProcNum;l++) rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum; MPI_Allgatherv(Uloc[round], DiagData->AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm)); } for (k=0;kAllocNum;k++) { for(l=0;lAllocNum;l++) { gsl_matrix_set(DiagData->U,k,l, Utotal[l+k*DiagData->AllocNum]); } } // after one set, gather A[k] from all and calculate spread for (l=0;lProcNum;l++) rcounts[l] = DiagData->AllocNum; debug(P,"gather A[k] for spread"); for (u=0;uNumMatrices+DiagData->extra;u++) {// extra for B matrix ! debug(P,"A[k] all gather"); for (round=0;round<2*max_rounds;round++) { for (l=0;lProcNum;l++) rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum; MPI_Allgatherv(Aloc[u][round], DiagData->AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm)); } for (k=0;kAllocNum;k++) { for(l=0;lAllocNum;l++) { gsl_matrix_set(DiagData->A[u],k,l, Atotal[u][l+k*DiagData->AllocNum]); } } } // free eigenvector stuff gsl_vector_free(h); gsl_matrix_free(G); //gsl_eigen_symmv_free(w); gsl_vector_free(eval); gsl_matrix_free(evec); // Free column vectors for (k=0;kNumMatrices+DiagData->extra;k++) { Free(Atotal[k], "ParallelDiagonalization: Atotal[k]"); Free(Around[k], "ParallelDiagonalization: Around[k]"); } Free(Uround, "ParallelDiagonalization: Uround"); Free(Utotal, "ParallelDiagonalization: Utotal"); Free(c_all, "ParallelDiagonalization: c_all"); Free(s_all, "ParallelDiagonalization: s_all"); Free(c, "ParallelDiagonalization: c"); Free(s, "ParallelDiagonalization: s"); Free(rcounts, "ParallelDiagonalization: rcounts"); Free(rdispls, "ParallelDiagonalization: rdispls"); } /* * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized * \param *U transformation matrix set to unity matrix * \param NumMatrices number of matrices to be diagonalized * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A) * \param AllocNum number of rows/columns in matrices * \param Num number of wave functions * \param *top array with top row indices (merry-go-round) * \param *bot array with bottom row indices (merry-go-round) */ /** Simultaneous Diagonalization of variances matrices with one cpu. * \param *P Problem at hand * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization * \note this is faster given one cpu only than ParallelDiagonalization() */ void SerialDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData) { struct RunStruct *R = &P->R; gsl_matrix *G; gsl_vector *h; gsl_vector *eval; gsl_matrix *evec; //gsl_eigen_symmv_workspace *w; double *c,*s,a_i,a_j; int it_steps, set, ProcRank; int i,j,k,l,m; double spread = 0., old_spread = 0.;\ // allocate eigenvector stuff debug(P,"allocate eigenvector stuff"); G = gsl_matrix_calloc (2,2); h = gsl_vector_alloc (2); eval = gsl_vector_alloc (2); evec = gsl_matrix_alloc (2,2); //w = gsl_eigen_symmv_alloc(2); c = (double *) Malloc(sizeof(double), "ComputeMLWF: c"); s = (double *) Malloc(sizeof(double), "ComputeMLWF: s"); debug(P,"now comes the iteration loop"); it_steps=0; do { it_steps++; //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me); //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps); for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); // STEP 3: for all index pairs 0<= iAllocNum/2;ProcRank++) { // get indices i = DiagData->top[ProcRank] < DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // minimum of the two indices j = DiagData->top[ProcRank] > DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // maximum of the two indices: thus j > i //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j); // STEP 3a: Calculate G gsl_matrix_set_zero(G); for (k=0;kNumMatrices;k++) { // go through all operators ... // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,i), gsl_matrix_get(DiagData->A[k],j,j),gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j)); //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,j), gsl_matrix_get(DiagData->A[k],j,i),gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i)); gsl_vector_set(h, 0, gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j)); gsl_vector_set(h, 1, gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i)); //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1)); // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] for (l=0;l<2;l++) for (m=0;m<2;m++) gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); //PrintGSLMatrix(P,G,2, "G"); } //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G EigensolverFor22Matrix(P,G,eval,evec); //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G //PrintGSLMatrix(P, evec, 2, "Eigenvectors"); CalculateRotationAnglesFromEigenvalues(evec, eval, c, s); //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]); //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j); // STEP 3d: apply rotation R to rows and columns of A^{(k)} for (k=0;kNumMatrices+DiagData->extra;k++) {// one extra for B matrix ! //PrintGSLMatrix(P,A[k],AllocNum, "A (before rot)"); for (l=0;lAllocNum;l++) { // Rows a_i = gsl_matrix_get(DiagData->A[k],i,l); a_j = gsl_matrix_get(DiagData->A[k],j,l); gsl_matrix_set(DiagData->A[k], i, l, c[0] * a_i + s[0] * a_j); gsl_matrix_set(DiagData->A[k], j, l, -s[0] * a_i + c[0] * a_j); } for (l=0;lAllocNum;l++) { // Columns a_i = gsl_matrix_get(DiagData->A[k],l,i); a_j = gsl_matrix_get(DiagData->A[k],l,j); gsl_matrix_set(DiagData->A[k], l, i, c[0] * a_i + s[0] * a_j); gsl_matrix_set(DiagData->A[k], l, j, -s[0] * a_i + c[0] * a_j); } //PrintGSLMatrix(P,A[k],AllocNum, "A (after rot)"); } //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (before rot)"); // STEP 3e: apply U = R*U for (l=0;lAllocNum;l++) { a_i = gsl_matrix_get(DiagData->U,i,l); a_j = gsl_matrix_get(DiagData->U,j,l); gsl_matrix_set(DiagData->U, i, l, c[0] * a_i + s[0] * a_j); gsl_matrix_set(DiagData->U, j, l, -s[0] * a_i + c[0] * a_j); } //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (after rot)"); } // and merry-go-round for the indices too //debug(P,"and merry-go-round for the indices too"); if (DiagData->AllocNum > 2) MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2); } //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me); // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 old_spread = spread; spread = 0.; for(k=0;kNumMatrices;k++) { // go through all self-adjoint operators for (i=0; i < DiagData->AllocNum; i++) { // go through all wave functions spread += pow(gsl_matrix_get(DiagData->A[k],i,i),2); } } if(P->Par.me == 0) { //if(P->Call.out[ReadOut]) // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier); //else //fprintf(stderr,"%2.9e\n",spread); } // STEP 5: check change of variance } while (fabs(old_spread-spread) >= R->EpsWannier); // end of iterative diagonalization loop: We have found our final orthogonal U! Free(c, "SerialDiagonalization: c"); Free(s, "SerialDiagonalization: s"); // free eigenvector stuff gsl_vector_free(h); gsl_matrix_free(G); //gsl_eigen_symmv_free(w); gsl_vector_free(eval); gsl_matrix_free(evec); } /** Writes the wannier centres and spread to file. * Also puts centres from array into OnePsiElementAddData structure. * \param *P Problem at hand * \param old_spread first term of wannier spread * \param spread second term of wannier spread * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres * \param *WannierSpread array with wannier spread per wave function */ void WriteWannierFile(struct Problem *P, double spread, double old_spread, double **WannierCentre, double *WannierSpread) { struct FileData *F = &P->Files; struct RunStruct *R = &P->R; struct Lattice *Lat = &P->Lat; struct Psis *Psi = &Lat->Psi; struct OnePsiElement *OnePsiA; char spin[12], suffix[18]; int i,j,l; if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me); switch (Lat->Psi.PsiST) { case SpinDouble: strcpy(suffix,".spread.csv"); strcpy(spin,"SpinDouble"); break; case SpinUp: strcpy(suffix,".spread_up.csv"); strcpy(spin,"SpinUp"); break; case SpinDown: strcpy(suffix,".spread_down.csv"); strcpy(spin,"SpinDown"); break; } if (P->Par.me_comm_ST == 0) { if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile) OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level if (F->SpreadFile == NULL) { Error(SomeError,"WriteWannierFile: Error opening Wannier File!\n"); } else { fprintf(F->SpreadFile,"#===== W A N N I E R C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin); fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n"); } } // put (new) WannierCentres into local ones and into file i=-1; for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA if (OnePsiA->PsiType == type) { // drop all but occupied ones i++; // increase l if it is occupied wave function if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local? for (j=0;jAddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j]; } if (P->Par.me_comm_ST == 0) fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]); } } if (P->Par.me_comm_ST == 0) { fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n"); fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread); fflush(F->SpreadFile); } } /** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre. * \param *P Problem at hand * \return 1 - success, 0 - failure */ int ParseWannierFile(struct Problem *P) { struct Lattice *Lat = &P->Lat; struct RunStruct *R = &P->R; struct Psis *Psi = &Lat->Psi; struct OnePsiElement *OnePsiA; int i,l,j, msglen; FILE *SpreadFile; char *tagname = NULL; char suffix[18]; double WannierCentre[NDIM+1]; // combined centre and spread MPI_Status status; int signal = 0; // 1 - ok, 0 - error switch (Lat->Psi.PsiST) { case SpinDouble: strcpy(suffix,".spread.csv"); break; case SpinUp: strcpy(suffix,".spread_up.csv"); break; case SpinDown: strcpy(suffix,".spread_down.csv"); break; } if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Parsing Wannier Centres from file ... \n", P->Par.me); if (P->Par.me_comm_ST == 0) { tagname = (char *) Malloc(sizeof(char)*255, "ParseWannierFile: *tagname"); if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); return 0; //Error(SomeError,"ParseWannierFile: Opening failed\n"); } signal = 1; if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); } else { if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); if (signal == 0) return 0; } i=-1; for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA if (OnePsiA->PsiType == type) { // drop all but occupied ones i++; // increase l if it is occupied wave function if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo); signal = 0; if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) { //Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed"); if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); return 0; } if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) { //Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed"); if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); return 0; } signal = 1; if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); } else { if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS) Error(SomeError,"ParseWannierFile: Bcast of signal failed\n"); if (signal == 0) return 0; } if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local? if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS) Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed"); //return 0; MPI_Get_count(&status, MPI_DOUBLE, &msglen); if (msglen != NDIM+1) Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count"); //return 0; } if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed"); //return 0; // and store 'em (for all who have this Psi local) if ((P->Par.me == 0) && P->Call.out[ValueOut]) fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]); for (j=0;jAddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j]; Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM]; //if (P->Par.me == 0 && P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread); } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS) Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed"); //return 0; } } } if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) { fclose(SpreadFile); Free(tagname, "ParseWannierFile: *tagname"); } //fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me); return 1; } /** Calculates the spread of orbital \a i. * Stored in OnePsiElementAddData#WannierSpread. * \param *P Problem at hand * \param i i-th wave function (note "extra" ones are not counted!) * \return spread \f$\sigma^2_{A^{(k)}}\f$ */ double CalculateSpread(struct Problem *P, int i) { struct Lattice *Lat = &P->Lat; struct RunStruct *R = &P->R; struct Psis *Psi = &Lat->Psi; struct LatticeLevel *Lev0 = R->Lev0; struct LatticeLevel *LevS = R->LevS; struct Density *Dens0 = Lev0->Dens; struct fft_plan_3d *plan = Lat->plan; fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity]; fftw_real *PsiCR = (fftw_real *)PsiC; fftw_complex *work = Dens0->DensityCArray[Temp2Density]; fftw_real **HGcR = &Dens0->DensityArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as a storage array fftw_complex **HGcRC = (fftw_complex**)HGcR; fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array fftw_real **HGcR2 = (fftw_real**)HGcR2C; MPI_Status status; struct OnePsiElement *OnePsiA, *LOnePsiA; int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; fftw_complex *LPsiDatA=NULL; int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index; N0 = LevS->Plan0.plan->local_nx; N = LevS->Plan0.plan->N; const int NUpx = LevS->NUp[0]; const int NUpy = LevS->NUp[1]; const int NUpz = LevS->NUp[2]; double a_ij, b_ij, A_ij, B_ij; double tmp, tmp2, spread = 0; double **cos_lookup, **sin_lookup; b_ij = 0; CreateSinCosLookupTable(&cos_lookup,&sin_lookup,N); // fill matrices OnePsiA = &Psi->AllPsiStatus[i]; // grab the desired OnePsiA if (OnePsiA->PsiType != Extra) { // drop if extra one if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo]; else LOnePsiA = NULL; if (LOnePsiA == NULL) { // if it's not local ... receive it from respective process into TempPsi RecvSource = OnePsiA->my_color_comm_ST_Psi; MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status ); LPsiDatA=LevS->LPsi->TempPsi; } else { // .. otherwise send it to all other processes (Max_me... - 1) for (p=0;pPar.Max_me_comm_ST_PsiT;p++) if (p != OnePsiA->my_color_comm_ST_Psi) MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT); LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; } // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1); // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()! for (n0=0;n0Plan0.plan->start_nx; for (k=0;kMaxN; // check lookup if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) { fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]); Error(SomeError, "Lookup table does not match real value!"); } // HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */ // HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */ // HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */ // HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */ HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */ HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */ HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */ HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */ } } for (k=0;kLevelNo, FFTNF1, HGcRC[k], work); fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work); } for (k=0;kPar.me, l,m,k); // sum directly in a_ij and b_ij the two desired terms g=0; if (LevS->GArray[0].GSq == 0.0) { Index = LevS->GArray[g].Index; a_ij += (LPsiDatA[0].re*HGcRC[k][Index].re + LPsiDatA[0].im*HGcRC[k][Index].im); b_ij += (LPsiDatA[0].re*HGcR2C[k][Index].re + LPsiDatA[0].im*HGcR2C[k][Index].im); g++; } for (; g < LevS->MaxG; g++) { Index = LevS->GArray[g].Index; a_ij += 2*(LPsiDatA[g].re*HGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im); b_ij += 2*(LPsiDatA[g].re*HGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im); } // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...) MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); spread += pow(A_ij,2); } } MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); // store spread in OnePsiElementAdd Psi->AddData[i].WannierSpread = B_ij - spread; FreeSinCosLookupTable(cos_lookup,sin_lookup); return (B_ij - spread); }