Changeset 4f1369
- Timestamp:
- Apr 23, 2008, 1:45:37 PM (17 years ago)
- Children:
- 77f1ae
- Parents:
- 9bdd86
- Location:
- pcp/src
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
pcp/src/wannier.c
r9bdd86 r4f1369 12 12 13 13 File: wannier.c 14 $Id: wannier.c,v 1. 63 2007-02-13 14:15:29 fooExp $14 $Id: wannier.c,v 1.7 2007-10-12 15:50:38 heber Exp $ 15 15 */ 16 16 … … 30 30 #include "density.h" 31 31 #include "errors.h" 32 #include "gramsch.h" 32 33 #include "helpers.h" 33 34 #include "init.h" … … 35 36 #include "mymath.h" 36 37 #include "output.h" 38 #include "perturbed.h" 37 39 #include "wannier.h" 38 40 39 41 40 42 #define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread 43 #define type Occupied 41 44 42 45 … … 57 60 * \note taken from [Golub, Matrix computations, 1989, p451] 58 61 */ 59 void music(int *top, int *bot, int m)62 void MerryGoRoundIndices(int *top, int *bot, int m) 60 63 { 61 64 int *old_top, *old_bot; … … 96 99 fprintf(stderr,"\n");*/ 97 100 // and finito 98 Free(old_top, " bla");99 Free(old_bot, " bla");101 Free(old_top, "MerryGoRoundIndices: old_top"); 102 Free(old_bot, "MerryGoRoundIndices: old_bot"); 100 103 } 101 104 … … 114 117 * \param tagR1 MPI tag for receiving right column 115 118 */ 116 void shiftcolumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) {119 void MerryGoRoundColumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) { 117 120 //double *A_locS1, *A_locS2; // local columns of A[k] 118 121 //double *A_locR1, *A_locR2; // local columns of A[k] … … 213 216 MPI_Wait(&requestR1, &status); 214 217 //fprintf(stderr,"...done\n"); 218 } 219 220 /** By testing of greatest common divisor of the matrix rows (\a AllocNum) finds suitable parallel cpu group. 221 * \param *P Problem at hand 222 * \param AllocNum number of rows in matrix 223 * \return address of MPI communicator 224 */ 225 #ifdef HAVE_INLINE 226 inline MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum) 227 #else 228 MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum) 229 #endif 230 { 231 MPI_Comm *comm = &P->Par.comm_ST; 232 233 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi); 234 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Jacobi diagonalization is done parallely by ", P->Par.me); 235 if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) { // all parallel 236 comm = &P->Par.comm_ST; 237 //if (P->Call.out[ReadOut]) fprintf(stderr,"all\n"); 238 } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first 239 if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel 240 comm = &P->Par.comm_ST_Psi; 241 //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n"); 242 } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel 243 comm = &P->Par.comm_ST_PsiT; 244 //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n"); 245 } 246 } else { 247 if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel 248 comm = &P->Par.comm_ST_PsiT; 249 //if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n"); 250 } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel 251 comm = &P->Par.comm_ST_Psi; 252 //if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n"); 253 } 254 } 255 return comm; 256 } 257 258 /** Allocates and fills Lookup table for sin/cos values at each grid node. 259 * \param ***cos_table pointer to two-dimensional lookup table for cosine values 260 * \param ***sin_table pointer to two-dimensional lookup table for sine values 261 * \param *N array with number of nodes per \a NDIM axis 262 */ 263 void CreateSinCosLookupTable(double ***cos_table, double ***sin_table, int *N) 264 { 265 int i, j; 266 double argument; 267 double **cos_lookup, **sin_lookup; 268 269 // create lookup table for sin/cos values 270 cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup"); 271 sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup"); 272 for (i=0;i<NDIM;i++) { 273 // allocate memory 274 cos_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: cos_lookup"); 275 sin_lookup[i] = (double *) Malloc(sizeof(double)*N[i], "ComputeMLWF: sin_lookup"); 276 277 // reset arrays 278 SetArrayToDouble0(cos_lookup[i],N[i]); 279 SetArrayToDouble0(sin_lookup[i],N[i]); 280 281 // create lookup values 282 for (j=0;j<N[i];j++) { 283 argument = 2*PI/(double)N[i]*(double)j; 284 cos_lookup[i][j] = cos(argument); 285 sin_lookup[i][j] = sin(argument); 286 } 287 } 288 *cos_table = cos_lookup; 289 *sin_table = sin_lookup; 290 } 291 292 /** Frees memory allocated during CreateSinCosLookupTable(). 293 * \param ***cos_lookup pointer to two-dimensional lookup table for cosine values 294 * \param ***sin_lookup pointer to two-dimensional lookup table for sine values 295 */ 296 void FreeSinCosLookupTable(double **cos_lookup, double **sin_lookup) 297 { 298 int i; 299 // free lookups 300 for (i=0;i<NDIM;i++) { 301 Free(cos_lookup[i], "FreeSinCosLookupTable: cos_lookup[i]"); 302 Free(sin_lookup[i], "FreeSinCosLookupTable: sin_lookup[i]"); 303 } 304 Free(cos_lookup, "FreeSinCosLookupTable: cos_lookup"); 305 Free(sin_lookup, "FreeSinCosLookupTable: sin_lookup"); 306 } 307 308 /** Fills the entries of the six variance matrices. 309 * These matrices are parallely diagonalized during Wannier Localization. They are calculated from the 310 * wave function and by diagonalization one obtains the unitary transformation with which the Psis are 311 * treated afterwards. 312 * \param *P Problem at hand 313 * \param AllocNum number of rows/columns 314 * \param **A pointer to variance matrices 315 * \sa ComputeMLWF() - master function. 316 */ 317 void FillHigherOrderRealMomentsMatrices(struct Problem *P, int AllocNum, gsl_matrix **A) 318 { 319 struct Lattice *Lat = &P->Lat; 320 struct RunStruct *R = &P->R; 321 struct Psis *Psi = &Lat->Psi; 322 struct LatticeLevel *Lev0 = R->Lev0; 323 struct LatticeLevel *LevS = R->LevS; 324 struct Density *Dens0 = Lev0->Dens; 325 struct OnePsiElement *OnePsiA, *OnePsiB, *LOnePsiB; 326 struct fft_plan_3d *plan = Lat->plan; 327 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity]; 328 fftw_real *PsiCR = (fftw_real *)PsiC; 329 fftw_complex *work = Dens0->DensityCArray[Temp2Density]; 330 fftw_real **HGcR = &Dens0->DensityArray[HGDensity]; // use HGDensity, 4x Gap..Density, TempDensity as a storage array 331 fftw_complex **HGcRC = (fftw_complex**)HGcR; 332 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array 333 fftw_real **HGcR2 = (fftw_real**)HGcR2C; 334 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; 335 MPI_Status status; 336 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL; 337 int n[NDIM],n0,i0,iS, Index; 338 int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows 339 int N0 = LevS->Plan0.plan->local_nx; 340 int *N = LevS->Plan0.plan->N; 341 const int NUpx = LevS->NUp[0]; 342 const int NUpy = LevS->NUp[1]; 343 const int NUpz = LevS->NUp[2]; 344 double argument, PsiAtNode; 345 int e,g,i,j,k,l,m,p,u; 346 double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0; 347 double **cos_lookup = NULL,**sin_lookup = NULL; 348 349 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me); 350 351 debug(P, "Creating Lookup Table"); 352 CreateSinCosLookupTable(&cos_lookup, &sin_lookup, N); 353 354 debug(P, "Calculating each entry of variance matrices"); 355 l=-1; // to access U matrix element (0..Num-1) 356 // fill the matrices 357 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions 358 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA 359 if (OnePsiA->PsiType == type) { // drop all but occupied ones 360 l++; // increase l if it is non-extra wave function 361 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? 362 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; 363 else 364 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients! 365 366 //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0); 367 if (LPsiDatA != NULL) { 368 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1); 369 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()! 370 for (n0=0;n0<N0;n0++) 371 for (n[1]=0;n[1]<N[1];n[1]++) 372 for (n[2]=0;n[2]<N[2];n[2]++) { 373 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx); 374 iS = n[2]+N[2]*(n[1]+N[1]*n0); 375 n[0] = n0 + LevS->Plan0.plan->start_nx; 376 for (k=0;k<max_operators;k+=2) { 377 e = k/2; 378 argument = 2.*PI/(double)(N[e])*(double)(n[e]); 379 PsiAtNode = PsiCR[i0] /LevS->MaxN; 380 // check lookup 381 if (!l) // perform check on first wave function only 382 if ((fabs(cos(argument) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(argument) - sin_lookup[e][n[e]]) > MYEPSILON)) { 383 Error(SomeError, "Lookup table does not match real value!"); 384 } 385 HGcR[k][iS] = cos_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */ 386 HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */ 387 HGcR[k+1][iS] = sin_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */ 388 HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */ 389 } 390 } 391 for (u=0;u<max_operators;u++) { 392 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work); 393 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work); 394 } 395 } 396 m = -1; // to access U matrix element (0..Num-1) 397 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions 398 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB 399 if (OnePsiB->PsiType == type) { // drop all but occupied ones 400 m++; // increase m if it is non-extra wave function 401 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? 402 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; 403 else 404 LOnePsiB = NULL; 405 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi 406 RecvSource = OnePsiB->my_color_comm_ST_Psi; 407 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); 408 LPsiDatB=LevS->LPsi->TempPsi; 409 } else { // .. otherwise send it to all other processes (Max_me... - 1) 410 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) 411 if (p != OnePsiB->my_color_comm_ST_Psi) 412 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); 413 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; 414 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received 415 416 for (u=0;u<max_operators;u++) { 417 a_ij = 0; 418 b_ij = 0; 419 if (LPsiDatA != NULL) { // calculate, reduce and send to all ... 420 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u); 421 g=0; 422 if (LevS->GArray[0].GSq == 0.0) { 423 Index = LevS->GArray[g].Index; 424 a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im); 425 b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im); 426 g++; 427 } 428 for (; g < LevS->MaxG; g++) { 429 Index = LevS->GArray[g].Index; 430 a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im); 431 b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im); 432 } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...) 433 // sum up elements from all coefficients sharing processes 434 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); 435 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); 436 a_ij = A_ij; 437 b_ij = B_ij; 438 // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!) 439 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); 440 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); 441 } else { // receive ... 442 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); 443 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); 444 } 445 // ... and store 446 //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij); 447 //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij); 448 gsl_matrix_set(A[u], l, m, A_ij); 449 gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m)); 450 } 451 } 452 } 453 } 454 } 455 // reset extra entries 456 for (u=0;u<=max_operators;u++) { 457 for (i=Num;i<AllocNum;i++) 458 for (j=0;j<AllocNum;j++) 459 gsl_matrix_set(A[u], i,j, 0.); 460 for (i=Num;i<AllocNum;i++) 461 for (j=0;j<AllocNum;j++) 462 gsl_matrix_set(A[u], j,i, 0.); 463 } 464 FreeSinCosLookupTable(cos_lookup, sin_lookup); 465 466 /*// print A matrices for debug 467 if (P->Par.me == 0) 468 for (u=0;u<max_operators+1;u++) { 469 fprintf(stderr, "A[%i] = \n",u); 470 for (i=0;i<Num;i++) { 471 for (j=0;j<Num;j++) 472 fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j)); 473 fprintf(stderr, "\n"); 474 } 475 } 476 */ 477 } 478 479 /** Calculates reciprocal second order moments of each PsiType#Occupied orbital. 480 * First order is zero due to wave function being real (symmetry condition). 481 * \param *P Problem at hand 482 */ 483 void CalculateSecondOrderReciprocalMoment(struct Problem *P) 484 { 485 struct Lattice *Lat = &P->Lat; 486 struct RunStruct *R = &P->R; 487 struct FileData *F = &P->Files; 488 struct Psis *Psi = &Lat->Psi; 489 struct LatticeLevel *LevS = R->LevS; 490 double result, Result; 491 fftw_complex *LPsiDatA=NULL; 492 struct OnePsiElement *OnePsiA; 493 int i,j,l,g; 494 char spin[12], suffix[18]; 495 496 switch (Lat->Psi.PsiST) { 497 case SpinDouble: 498 strcpy(suffix,".recispread.csv"); 499 strcpy(spin,"SpinDouble"); 500 break; 501 case SpinUp: 502 strcpy(suffix,".recispread_up.csv"); 503 strcpy(spin,"SpinUp"); 504 break; 505 case SpinDown: 506 strcpy(suffix,".recispread_down.csv"); 507 strcpy(spin,"SpinDown"); 508 break; 509 } 510 if(P->Par.me_comm_ST == 0) { 511 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Calculating reciprocal moments ...\n",P->Par.me); 512 if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level 513 OpenFile(P, &F->ReciSpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level 514 else if (F->ReciSpreadFile == NULL) // re-op,18en if not first level and not opened yet (or closed from ParseWannierFile) 515 OpenFile(P, &F->ReciSpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level 516 if (F->ReciSpreadFile == NULL) { 517 Error(SomeError,"ComputeMLWF: Error opening Reciprocal spread File!\n"); 518 } else { 519 fprintf(F->ReciSpreadFile,"===== Reciprocal Spreads of type %s ==========================================================================\n", spin); 520 } 521 } 522 523 // integrate second order moment 524 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions 525 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA 526 if (OnePsiA->PsiType == type) { // drop all but occupied ones 527 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? 528 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; 529 else 530 LPsiDatA = NULL; 531 532 if (LPsiDatA != NULL) { 533 if (P->Par.me_comm_ST == 0) 534 fprintf(F->ReciSpreadFile,"Psi%d_Lev%d\t", Psi->AllPsiStatus[l].MyGlobalNo, R->LevSNo); 535 for (i=0;i<NDIM;i++) { 536 for (j=0;j<NDIM;j++) { 537 result = 0.; 538 g = 0; 539 if (LevS->GArray[0].GSq == 0.0) { 540 result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*(LPsiDatA[0].re * LPsiDatA[0].re); 541 g++; 542 } 543 for (;g<LevS->MaxG;g++) 544 result += LevS->GArray[g].G[i]*LevS->GArray[g].G[j]*2.*(LPsiDatA[g].re * LPsiDatA[g].re + LPsiDatA[g].im * LPsiDatA[g].im); 545 //result *= Lat->Volume/LevS->MaxG; 546 MPI_Allreduce ( &result, &Result, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); 547 if (P->Par.me_comm_ST == 0) 548 fprintf(F->ReciSpreadFile,"%2.5lg ", Result); 549 } 550 } 551 if (P->Par.me_comm_ST == 0) 552 fprintf(F->ReciSpreadFile,"\n"); 553 } 554 } 555 } 556 if(P->Par.me_comm_ST == 0) { 557 fprintf(F->ReciSpreadFile,"====================================================================================================================\n\n"); 558 fflush(F->ReciSpreadFile); 559 } 560 } 561 562 /** Given the unitary matrix the transformation is performed on the Psis. 563 * \param *P Problem at hand 564 * \param *U gsl matrix containing the transformation matrix 565 * \param Num dimension parameter for the matrix, i.e. number of wave functions 566 */ 567 void UnitaryTransformationOnWavefunctions(struct Problem *P, gsl_matrix *U, int Num) 568 { 569 struct Lattice *Lat = &P->Lat; 570 struct RunStruct *R = &P->R; 571 struct Psis *Psi = &Lat->Psi; 572 struct LatticeLevel *LevS = R->LevS; 573 MPI_Status status; 574 struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB; 575 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; 576 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL; 577 int g,i,j,l,k,m,p; 578 579 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation of all wave functions according to U\n",P->Par.me); 580 581 Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here 582 fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer"); 583 584 for (l=0;l<Num;l++) // allocate for each local wave function 585 coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l]; 586 587 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me); 588 l=-1; // to access U matrix element (0..Num-1) 589 k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1) 590 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions 591 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA 592 if (OnePsiA->PsiType == type) { // drop all but occupied ones 593 l++; // increase l if it is occupied wave function 594 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local? 595 k++; // increase k only if it is a local, non-extra orbital wave function 596 LPsiDatA = (fftw_complex *) coeffs_buffer[k]; // new coeffs first go to copy buffer, old ones must not be overwritten yet 597 SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG); // zero buffer part 598 } else 599 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients! 600 601 m = -1; // to access U matrix element (0..Num-1) 602 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions 603 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB 604 if (OnePsiB->PsiType == type) { // drop all but occupied ones 605 m++; // increase m if it is occupied wave function 606 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? 607 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; 608 else 609 LOnePsiB = NULL; 610 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi 611 RecvSource = OnePsiB->my_color_comm_ST_Psi; 612 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); 613 LPsiDatB=LevS->LPsi->TempPsi; 614 } else { // .. otherwise send it to all other processes (Max_me... - 1) 615 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) 616 if (p != OnePsiB->my_color_comm_ST_Psi) 617 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); 618 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; 619 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received 620 621 if (LPsiDatA != NULL) { 622 double tmp = gsl_matrix_get(U,l,m); 623 g=0; 624 if (LevS->GArray[0].GSq == 0.0) { 625 LPsiDatA[g].re += LPsiDatB[g].re * tmp; 626 LPsiDatA[g].im += LPsiDatB[g].im * tmp; 627 g++; 628 } 629 for (; g < LevS->MaxG; g++) { 630 LPsiDatA[g].re += LPsiDatB[g].re * tmp; 631 LPsiDatA[g].im += LPsiDatB[g].im * tmp; 632 } 633 } 634 } 635 } 636 } 637 } 638 639 //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me); 640 // now, as all wave functions are updated, swap the buffer 641 l = -1; 642 for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function 643 if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { 644 l++; 645 //if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo); 646 LPsiDatA = (fftw_complex *)coeffs_buffer[l]; 647 LPsiDatB = LevS->LPsi->LocalPsi[l]; 648 for (g=0;g<LevS->MaxG;g++) { 649 LPsiDatB[g].re = LPsiDatA[g].re; 650 LPsiDatB[g].im = LPsiDatA[g].im; 651 } 652 // recalculating non-local form factors which are coefficient dependent! 653 CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo); 654 } 655 } 656 // and free allocated buffer memory 657 Free(coeffs_buffer, "UnitaryTransformationOnWavefunctions: coeffs_buffer"); 658 } 659 660 /** Changes Wannier Centres according to RunStruct#CommonWannier. 661 * \param *P Problem at hand. 662 * \param Num number of Psis 663 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres 664 * \param *WannierSpread array with wannier spread per wave function 665 */ 666 void ChangeWannierCentres(struct Problem *P, int Num, double **WannierCentre, double *WannierSpread) 667 { 668 struct RunStruct *R = &P->R; 669 struct Lattice *Lat = &P->Lat; 670 struct LatticeLevel *LevS = R->LevS; 671 int *marker, **group; 672 int partner[Num]; 673 int i,j,l,k; 674 int totalflag, flag; 675 double q[NDIM], center[NDIM]; 676 double Spread; 677 int *N = LevS->Plan0.plan->N; 678 679 switch (R->CommonWannier) { 680 case 4: 681 debug(P,"Shifting each Wannier centers to cell center"); 682 for (j=0;j<NDIM;j++) // center point in [0,1]^3 683 center[j] = 0.5; 684 RMat33Vec3(q,Lat->RealBasis, center); // transform to real coordinates 685 for (i=0; i < Num; i++) { // go through all occupied wave functions 686 for (j=0;j<NDIM;j++) // put into Wannier centres 687 WannierCentre[i][j] = q[j]; 688 } 689 break; 690 case 3: 691 debug(P,"Shifting Wannier centers individually to nearest grid point"); 692 for (i=0;i < Num; i++) { // go through all wave functions 693 RMat33Vec3(q, Lat->ReciBasis, WannierCentre[i]); 694 for (j=0;j<NDIM;j++) { // Recibasis is not true inverse but times 2.*PI 695 q[j] *= (double)N[j]/(2.*PI); 696 697 //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp)); 698 if (fabs((double)floor(q[j]) - q[j]) < fabs((double)ceil(q[j]) - q[j])) 699 q[j] = floor(q[j])/(double)N[j]; 700 else 701 q[j] = ceil(q[j])/(double)N[j]; 702 } 703 RMat33Vec3(WannierCentre[i], Lat->RealBasis, q); 704 } 705 break; 706 case 2: 707 debug(P,"Combining individual orbitals according to spread."); 708 //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me); 709 debug(P,"finding partners"); 710 marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker"); 711 group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group"); 712 for (l=0;l<Num;l++) { 713 group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]"); // each must group must have one more as end marker 714 for (k=0;k<=Num;k++) 715 group[l][k] = -1; // reset partner group 716 } 717 for (k=0;k<Num;k++) 718 partner[k] = 0; 719 debug(P,"mem allocated"); 720 // go for each orbital through every other, check distance against the sum of both spreads 721 // if smaller add to group of this orbital 722 for (l=0;l<Num;l++) { 723 j=0; // index for partner group 724 for (k=0;k<Num;k++) { // check this against l 725 Spread = 0.; 726 for (i=0;i<NDIM;i++) { 727 //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]); 728 Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]); 729 } 730 Spread = sqrt(Spread); // distance in Spread 731 //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]); 732 if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread 733 group[l][j++] = k; // add k to group of l 734 partner[l]++; 735 //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l); 736 } 737 } 738 } 739 740 // consistency, for each orbital check if this orbital is also in the group of each referred orbital 741 debug(P,"checking consistency"); 742 totalflag = 1; 743 for (l=0;l<Num;l++) // checking l's group 744 for (k=0;k<Num;k++) { // k is partner index 745 if (group[l][k] != -1) { // if current index k is a partner 746 flag = 0; 747 for(j=0;j<Num;j++) { // go through each entry in l partner's partner group if l exists 748 if ((group[ group[l][k] ][j] == l)) 749 flag = 1; 750 } 751 //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]); 752 if (totalflag == 1) totalflag = flag; 753 } 754 } 755 // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres 756 debug(P,"weight and calculate new centers for partner groups"); 757 for (l=0;l<=Num;l++) 758 marker[l] = 1; 759 if (totalflag) { 760 for (l=0;l<Num;l++) { // go through each orbital 761 if (marker[l] != 0) { // if it hasn't been reweighted 762 marker[l] = 0; 763 for (i=0;i<NDIM;i++) 764 q[i] = 0.; 765 j = 0; 766 while (group[l][j] != -1) { 767 marker[group[l][j]] = 0; 768 for (i=0;i<NDIM;i++) { 769 //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]); 770 q[i] += WannierCentre[ group[l][j] ][i]; 771 } 772 j++; 773 } 774 //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j); 775 for (i=0;i<NDIM;i++) {// weight by number of elements in partner group 776 q[i] /= (double)(j); 777 } 778 779 // put WannierCentre into own and all partners' 780 for (i=0;i<NDIM;i++) 781 WannierCentre[l][i] = q[i]; 782 j = 0; 783 while (group[l][j] != -1) { 784 for (i=0;i<NDIM;i++) 785 WannierCentre[group[l][j]][i] = q[i]; 786 j++; 787 } 788 } 789 } 790 } 791 if (P->Call.out[StepLeaderOut]) { 792 fprintf(stderr,"Summary:\n"); 793 fprintf(stderr,"========\n"); 794 for (i=0;i<Num;i++) 795 fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]); 796 } 797 debug(P,"done"); 798 799 Free(marker, "ChangeWannierCentres: marker"); 800 for (l=0;l<Num;l++) 801 Free(group[l], "ChangeWannierCentres: group[l]"); 802 Free(group, "ChangeWannierCentres: group"); 803 break; 804 case 1: 805 debug(P,"Individual orbitals are changed to center of all."); 806 for (i=0;i<NDIM;i++) // zero center of weight 807 q[i] = 0.; 808 for (k=0;k<Num;k++) 809 for (i=0;i<NDIM;i++) { // sum up all orbitals each component 810 q[i] += WannierCentre[k][i]; 811 } 812 for (i=0;i<NDIM;i++) // divide by number 813 q[i] /= Num; 814 for (k=0;k<Num;k++) 815 for (i=0;i<NDIM;i++) { // put into this function's array 816 WannierCentre[k][i] = q[i]; 817 } 818 break; 819 case 0: 820 default: 821 break; 822 } 823 } 824 825 /** From the entries of the variance matrices the spread is calculated. 826 * WannierCentres are evaluated according to Resta operator: \f$\langle X \rangle = \frac{L}{2\pi} \sum_j {\cal I} \ln{\langle \Psi_j | \exp{(i \frac{2\pi}{L} X)} | \Psi_j \rangle}\f$ 827 * WannierSpread is: \f$ \sum_j \langle \Psi_j | r^2 | \Psi_j \rangle - \langle \Psi_j | r | psi_j \rangle^2\f$ 828 * \param *P Problem at hand 829 * \param *A variance matrices 830 * \param old_spread first term of wannier spread 831 * \param spread second term of wannier spread 832 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres 833 * \param *WannierSpread array with wannier spread per wave function 834 */ 835 void ComputeWannierCentresfromVarianceMatrices(struct Problem *P, gsl_matrix **A, double *spread, double *old_spread, double **WannierCentre, double *WannierSpread) 836 { 837 struct Lattice *Lat = &P->Lat; 838 struct Psis *Psi = &Lat->Psi; 839 struct OnePsiElement *OnePsiA; 840 int i,j,k,l; 841 double tmp, q[NDIM]; 842 843 *old_spread = 0; 844 *spread = 0; 845 846 // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital 847 i=-1; 848 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions 849 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA 850 if (OnePsiA->PsiType == type) { // drop all but occupied ones 851 i++; // increase l if it is occupied wave function 852 //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i); 853 854 // calculate Wannier Centre 855 for (j=0;j<NDIM;j++) { 856 q[j] = 1./(2.*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i)))); 857 if (q[j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ 858 q[j] += 1.; 859 } 860 RMat33Vec3(WannierCentre[i], Lat->RealBasis, q); 861 862 // store orbital spread and centre in file 863 tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2) 864 - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2) 865 - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2); 866 WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp; 867 //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]); 868 //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n", 869 //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]); 870 //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n", 871 //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]); 872 873 // gather all spreads 874 *old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U) 875 for (k=0;k<max_operators;k++) 876 *spread += pow(gsl_matrix_get(A[k],i,i),2); 877 } 878 } 879 } 880 881 /** Prints gsl_matrix nicely to screen. 882 * \param *P Problem at hand 883 * \param *U gsl matrix 884 * \param Num number of rows/columns 885 * \param *msg name of matrix, prepended before entries 886 */ 887 void PrintGSLMatrix(struct Problem *P, gsl_matrix *U, int Num, const char *msg) 888 { 889 int k,l; 890 fprintf(stderr,"(%i) %s = \n",P->Par.me, msg); 891 for (k=0;k<Num;k++) { 892 for (l=0;l<Num;l++) 893 fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k)); 894 fprintf(stderr,"\n"); 895 } 896 } 897 898 /** Allocates memory for the DiagonalizationData structure 899 * \param *P Problem at hand 900 * \param DiagData pointer to structure 901 * \param Num number of rows and columns in matrix 902 * \param NumMatrices number of matrices to be simultaneously diagonalized 903 * \param extra number of extra matrices to be passively also diagonalized 904 */ 905 void InitDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData, int Num, int NumMatrices, int extra) 906 { 907 int i; 908 909 // store integer values 910 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me); 911 DiagData->Num = Num; 912 DiagData->AllocNum = ceil((double)Num / 2. ) *2; 913 DiagData->NumMatrices = NumMatrices; 914 DiagData->extra = extra; 915 916 // determine communicator and our rank and its size 917 DiagData->comm = DetermineParallelGroupbyGCD(P,DiagData->AllocNum); 918 MPI_Comm_size (*(DiagData->comm), &(DiagData->ProcNum)); 919 MPI_Comm_rank (*(DiagData->comm), &(DiagData->ProcRank)); 920 921 // allocate memory 922 DiagData->U = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum); 923 gsl_matrix_set_identity(DiagData->U); 924 925 DiagData->A = (gsl_matrix **) Malloc((DiagData->NumMatrices+DiagData->extra) * sizeof(gsl_matrix *), "InitDiagonalization: *A"); 926 for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) { 927 DiagData->A[i] = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum); 928 gsl_matrix_set_zero(DiagData->A[i]); 929 } 930 931 // init merry-go-round array for diagonilzation 932 DiagData->top = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: top"); 933 DiagData->bot = (int *) Malloc(sizeof(int)*DiagData->AllocNum/2, "InitDiagonalization: bot"); 934 for (i=0;i<DiagData->AllocNum/2;i++) { 935 DiagData->top[i] = 2*i; 936 DiagData->bot[i] = 2*i+1; 937 } 938 /* // print starting values of index generation tables top and bot 939 fprintf(stderr,"top\t"); 940 for (k=0;k<AllocNum/2;k++) 941 fprintf(stderr,"%i\t", top[k]); 942 fprintf(stderr,"\n"); 943 fprintf(stderr,"bot\t"); 944 for (k=0;k<AllocNum/2;k++) 945 fprintf(stderr,"%i\t", bot[k]); 946 fprintf(stderr,"\n");*/ 947 } 948 949 /** Frees allocated memory for the DiagonalizationData structure. 950 * \param DiagData pointer to structure 951 */ 952 void FreeDiagonalization(struct DiagonalizationData *DiagData) 953 { 954 int i; 955 956 Free(DiagData->top, "FreeDiagonalization: DiagData->top"); 957 Free(DiagData->bot, "FreeDiagonalization: DiagData->bot"); 958 gsl_matrix_free(DiagData->U); 959 for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) 960 gsl_matrix_free(DiagData->A[i]); 961 Free(DiagData->A, "FreeDiagonalization: DiagData->A"); 962 } 963 964 /** Orthogonalizing PsiType#Occupied wave functions for MD. 965 * Orthogonalizes wave functions by finding a unitary transformation such that the density remains unchanged. 966 * To this end, the overlap matrix \f$\langle \Psi_i | \Psi_j \rangle\f$ is created and diagonalised by Jacobi 967 * transformation (product of rotation matrices). The found transformation matrix is applied to all Psis. 968 * \param *P Problem at hand 969 * \note [Payne] states that GramSchmidt is not well-suited. However, this Jacobi diagonalisation is not well 970 * suited for our CG minimisation either. If one wave functions is changed in course of the minimsation 971 * procedure, in a Gram-Schmidt-Orthogonalisation only this wave function is changed (although it remains under 972 * debate whether subsequent Psis are still orthogonal to this changed wave function!), in a Jacobi-Orthogonali- 973 * stion however at least two if not all wave functions are changed. Thus inhibits our fast way of updating the 974 * density after a minimisation step -- UpdateDensityCalculation(). Thus, this routine can only be used for a 975 * final orthogonalisation of all Psis, after the CG minimisation. 976 */ 977 void OrthogonalizePsis(struct Problem *P) 978 { 979 struct Lattice *Lat = &P->Lat; 980 struct Psis *Psi = &Lat->Psi; 981 struct LatticeLevel *LevS = P->R.LevS; 982 int Num = Psi->NoOfPsis; 983 int i,j,l; 984 struct DiagonalizationData DiagData; 985 double PsiSP; 986 987 InitDiagonalization(P, &DiagData, Num, 1, 0); 988 989 // Calculate Overlap matrix 990 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) 991 CalculateOverlap(P, l, Occupied); 992 for (i=0;i<Num;i++) // fill gsl matrix with overlap values 993 for (j=0;j<Num;j++) 994 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]); 995 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (before)"); 996 997 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (before)"); 998 999 // diagonalize overlap matrix 1000 Diagonalize(P, &DiagData); 1001 1002 //if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (after)"); 1003 1004 // apply found unitary transformation to wave functions 1005 UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum); 1006 1007 // update GramSch stati 1008 for (i=Psi->TypeStartIndex[Occupied];i<Psi->TypeStartIndex[UnOccupied];i++) { 1009 GramSchNormalize(P,LevS,LevS->LPsi->LocalPsi[i], 0.); // calculates square product if 0. is given... 1010 PsiSP = GramSchGetNorm2(P,LevS,LevS->LPsi->LocalPsi[i]); 1011 //if (P->Par.me_comm_ST_Psi == 0) fprintf(stderr,"(%i) PsiSP[%i] = %lg\n", P->Par.me, i, PsiSP); 1012 Psi->LocalPsiStatus[i].PsiGramSchStatus = (int)IsOrthonormal; 1013 } 1014 UpdateGramSchAllPsiStatus(P,Psi); 1015 /* 1016 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) 1017 CalculateOverlap(P, l, Occupied); 1018 for (i=0;i<Num;i++) // fill gsl matrix with overlap values 1019 for (j=0;j<Num;j++) 1020 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]); 1021 if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i|Psi_J> (after)"); 1022 */ 1023 // free memory 1024 FreeDiagonalization(&DiagData); 1025 } 1026 1027 /** Orthogonalizing PsiType#Occupied wave functions and their time derivatives for MD. 1028 * Ensures that two orthonormality condition are met for Psis: 1029 * \f$\langle \Psi_i | \Psi_j \rangle = \delta_{ij}\f$ 1030 * \f$\langle \dot{\Psi_i} | \dot{\Psi_j} \rangle = \delta_{ij}\f$ 1031 * \param *P Problem at hand 1032 * \note [Payne] states that GramSchmidt is not well-suited, and this stronger 1033 * condition is needed for numerical stability 1034 * \todo create and fill 1stoverlap with finite difference between Psi, OldPsi with R->Deltat 1035 */ 1036 void StrongOrthogonalizePsis(struct Problem *P) 1037 { 1038 struct Lattice *Lat = &P->Lat; 1039 struct Psis *Psi = &Lat->Psi; 1040 int Num = Psi->NoOfPsis; 1041 int i,j,l; 1042 struct DiagonalizationData DiagData; 1043 1044 InitDiagonalization(P, &DiagData, Num, 2, 0); 1045 1046 // Calculate Overlap matrix 1047 for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) { 1048 CalculateOverlap(P, l, Occupied); 1049 //Calculate1stOverlap(P, l, Occupied); 1050 } 1051 for (i=0;i<Num;i++) // fill gsl matrix with overlap values 1052 for (j=0;j<Num;j++) { 1053 gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]); 1054 //gsl_matrix_set(DiagData.A[1],i,j,Psi->1stOverlap[i][j]); 1055 } 1056 1057 // diagonalize overlap matrix 1058 Diagonalize(P, &DiagData); 1059 1060 // apply found unitary transformation to wave functions 1061 UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum); 1062 1063 // free memory 1064 FreeDiagonalization(&DiagData); 1065 } 1066 1067 /** Uses either serial or parallel diagonalization. 1068 * \param *P Problem at hand 1069 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization 1070 * \sa ParallelDiagonalization(), SerialDiagonalization() 1071 */ 1072 void Diagonalize(struct Problem *P, struct DiagonalizationData *DiagData) 1073 { 1074 if (DiagData->Num != 1) { // one- or multi-process case? 1075 if (((DiagData->AllocNum % 2) == 0) && (DiagData->ProcNum != 1) && ((DiagData->AllocNum / 2) % DiagData->ProcNum == 0)) { 1076 /* 1077 debug(P,"Testing with silly matrix"); 1078 ParallelDiagonalization(P, test, Utest, 1, 0, 4, Num, comm, ProcRank, ProcNum, top, bot); 1079 if(P->Call.out[ReadOut]) // && P->Par.me == 0) 1080 PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)"); 1081 if(P->Call.out[ReadOut]) // && P->Par.me == 0) 1082 PrintGSLMatrix(P, Utest, 4, "Utest (final)"); 1083 */ 1084 debug(P,"ParallelDiagonalization"); 1085 ParallelDiagonalization(P, DiagData); 1086 } else {/* 1087 debug(P,"Testing with silly matrix"); 1088 SerialDiagonalization(P, test, Utest, 1, 0, 4, Num, top, bot); 1089 if(P->Call.out[ReadOut]) // && P->Par.me == 0) 1090 PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)"); 1091 if(P->Call.out[ReadOut]) // && P->Par.me == 0) 1092 PrintGSLMatrix(P, Utest, 4, "Utest (final)"); 1093 */ 1094 debug(P,"SerialDiagonalization"); 1095 SerialDiagonalization(P, DiagData); 1096 } 1097 1098 //if(P->Call.out[ReadOut]) // && P->Par.me == 0) 1099 //PrintGSLMatrix(P, DiagData->U, DiagData->AllocNum, "U"); 1100 } 215 1101 } 216 1102 … … 256 1142 void ComputeMLWF(struct Problem *P) { 257 1143 // variables and allocation 258 struct FileData *F = &P->Files;259 1144 struct Lattice *Lat = &P->Lat; 260 struct RunStruct *R = &P->R;261 1145 struct Psis *Psi = &Lat->Psi; 262 struct LatticeLevel *Lev0 = R->Lev0; 263 struct LatticeLevel *LevS = R->LevS; 264 struct Density *Dens0 = Lev0->Dens; 265 struct fft_plan_3d *plan = Lat->plan; 266 fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity]; 267 fftw_real *PsiCR = (fftw_real *)PsiC; 268 fftw_complex *work = Dens0->DensityCArray[Temp2Density]; 269 fftw_real **HGcR = &Dens0->DensityArray[HGDensity]; // use HGDensity, 4x Gap..Density, TempDensity as a storage array 270 fftw_complex **HGcRC = (fftw_complex**)HGcR; 271 fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array 272 fftw_real **HGcR2 = (fftw_real**)HGcR2C; 273 MPI_Status status; 274 struct OnePsiElement *OnePsiB, *OnePsiA, *LOnePsiB; 275 int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource; 276 fftw_complex *LPsiDatA=NULL, *LPsiDatB=NULL; 277 int n[NDIM],n0,i0,iS, Index; 278 int N0; 279 int N[NDIM]; 280 const int NUpx = LevS->NUp[0]; 281 const int NUpy = LevS->NUp[1]; 282 const int NUpz = LevS->NUp[2]; 283 int e,i,j,k,l,m,u,p,g; 1146 int i; 284 1147 int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows 1148 double **WannierCentre; 1149 double *WannierSpread; 1150 double spread, spreadSQ; 1151 struct DiagonalizationData DiagData; 1152 1153 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me); 1154 1155 InitDiagonalization(P, &DiagData, Num, max_operators, 1); 1156 1157 // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1)) 1158 //debug (P,"Calculatung Variance matrices"); 1159 FillHigherOrderRealMomentsMatrices(P, DiagData.AllocNum, DiagData.A); 1160 1161 //debug (P,"Diagonalizing"); 1162 Diagonalize(P, &DiagData); 1163 1164 // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle 1165 //debug (P,"Performing Unitary transformation"); 1166 UnitaryTransformationOnWavefunctions(P, DiagData.U, Num); 1167 1168 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7: Compute centres and spread printout\n",P->Par.me); 1169 WannierCentre = (double **) Malloc(sizeof(double *)*Num, "ComputeMLWF: *WannierCentre"); 1170 WannierSpread = (double *) Malloc(sizeof(double)*Num, "ComputeMLWF: WannierSpread"); 1171 for (i=0;i<Num;i++) 1172 WannierCentre[i] = (double *) Malloc(sizeof(double)*NDIM, "ComputeMLWF: WannierCentre"); 1173 1174 //debug (P,"Computing Wannier centres from variance matrix"); 1175 ComputeWannierCentresfromVarianceMatrices(P, DiagData.A, &spread, &spreadSQ, WannierCentre, WannierSpread); 1176 1177 // join Wannier orbital to groups with common centres under certain conditions 1178 //debug (P,"Changing Wannier Centres according to CommonWannier"); 1179 ChangeWannierCentres(P, Num, WannierCentre, WannierSpread); 1180 1181 // write to file 1182 //debug (P,"Writing spread file"); 1183 WriteWannierFile(P, spread, spreadSQ, WannierCentre, WannierSpread); 1184 1185 debug(P,"Free'ing memory"); 1186 // free all remaining memory 1187 FreeDiagonalization(&DiagData); 1188 for (i=0;i<Num;i++) 1189 Free(WannierCentre[i], "ComputeMLWF: WannierCentre[i]"); 1190 Free(WannierCentre, "ComputeMLWF: WannierCentre"); 1191 Free(WannierSpread, "ComputeMLWF: WannierSpread"); 1192 } 1193 1194 /** Solves directly for eigenvectors and eigenvalues of a real 2x2 matrix. 1195 * The eigenvalues are determined by \f$\det{(A-\lambda \cdot I)}\f$, where \f$I\f$ is the unit matrix and 1196 * \f$\lambda\f$ an eigenvalue. This leads to a pq-formula which is easily evaluated in the first part. 1197 * 1198 * The eigenvectors are then obtained by solving \f$A-\lambda \cdot I)x = 0\f$ for a given eigenvalue 1199 * \f$\lambda\f$. However, the eigenvector magnitudes are not specified, thus we the equation system is 1200 * still lacking such an equation. We use this fact to set either coordinate arbitrarily to 1 and then 1201 * derive the other by solving the equation system. Finally, the eigenvector is normalized. 1202 * \param *P Problem at hand 1203 * \param *A matrix whose eigenvalues/-vectors are to be found 1204 * \param *eval vector with eigenvalues 1205 * \param *evec matrix with corresponding eigenvectors in columns 1206 */ 1207 #ifdef HAVE_INLINE 1208 inline void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec) 1209 #else 1210 void EigensolverFor22Matrix(struct Problem *P, gsl_matrix *A, gsl_vector *eval, gsl_matrix *evec) 1211 #endif 1212 { 1213 double a11,a12,a21,a22; 1214 double ev[2], summand1, summand2, norm; 1215 int i; 1216 // find eigenvalues 1217 a11 = gsl_matrix_get(A,0,0); 1218 a12 = gsl_matrix_get(A,0,1); 1219 a21 = gsl_matrix_get(A,1,0); 1220 a22 = gsl_matrix_get(A,1,1); 1221 summand1 = (a11+a22)/2.; 1222 summand2 = sqrt(summand1*summand1 + a12*a21 - a11*a22); 1223 ev[0] = summand1 + summand2; 1224 ev[1] = summand1 - summand2; 1225 gsl_vector_set(eval, 0, ev[0]); 1226 gsl_vector_set(eval, 1, ev[1]); 1227 //fprintf (stderr,"(%i) ev1 %lg \t ev2 %lg\n", P->Par.me, ev1, ev2); 1228 1229 // find eigenvectors 1230 for(i=0;i<2;i++) { 1231 if (fabs(ev[i]) < MYEPSILON) { 1232 gsl_matrix_set(evec, 0,i, 0.); 1233 gsl_matrix_set(evec, 1,i, 0.); 1234 } else if (fabs(a22-ev[i]) > MYEPSILON) { 1235 norm = sqrt(1*1 + (a21*a21/(a22-ev[i])/(a22-ev[i]))); 1236 gsl_matrix_set(evec, 0,i, 1./norm); 1237 gsl_matrix_set(evec, 1,i, -(a21/(a22-ev[i]))/norm); 1238 //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm); 1239 } else if (fabs(a12) > MYEPSILON) { 1240 norm = sqrt(1*1 + (a11-ev[i])*(a11-ev[i])/(a12*a12)); 1241 gsl_matrix_set(evec, 0,i, 1./norm); 1242 gsl_matrix_set(evec, 1,i, -((a11-ev[i])/a12)/norm); 1243 //fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm); 1244 } else { 1245 if (fabs(a11-ev[i]) > MYEPSILON) { 1246 norm = sqrt(1*1 + (a12*a12/(a11-ev[i])/(a11-ev[i]))); 1247 gsl_matrix_set(evec, 0,i, -(a12/(a11-ev[i]))/norm); 1248 gsl_matrix_set(evec, 1,i, 1./norm); 1249 //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm); 1250 } else if (fabs(a21) > MYEPSILON) { 1251 norm = sqrt(1*1 + (a22-ev[i])*(a22-ev[i])/(a21*a21)); 1252 gsl_matrix_set(evec, 0,i, -(a22-ev[i])/a21/norm); 1253 gsl_matrix_set(evec, 1,i, 1./norm); 1254 //fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm); 1255 } else { 1256 //gsl_matrix_set(evec, 0,i, 0.); 1257 //gsl_matrix_set(evec, 1,i, 1.); 1258 fprintf (stderr,"\t evec %i undetermined\n", i); 1259 } 1260 //gsl_matrix_set(evec, 0,0, 1.); 1261 //gsl_matrix_set(evec, 1,0, 0.); 1262 //fprintf (stderr,"(%i) evec1 undetermined", P->Par.me); 1263 } 1264 } 1265 } 1266 1267 /** Calculates sine and cosine values for multiple matrix diagonalization. 1268 * \param *evec eigenvectors in columns 1269 * \param *eval corresponding eigenvalues 1270 * \param *c cosine to be returned 1271 * \param *s sine to be returned 1272 */ 1273 #ifdef HAVE_INLINE 1274 inline void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s) 1275 #else 1276 void CalculateRotationAnglesFromEigenvalues(gsl_matrix *evec, gsl_vector *eval, double *c, double *s) 1277 #endif 1278 { 1279 int index; 285 1280 double x,y,r; 286 double q[NDIM]; 287 double *c,*s; 288 int index; 289 double spread = 0., old_spread=0., Spread=0.; 290 double WannierCentre[Num][NDIM]; 291 double WannierSpread[Num]; 292 double tmp,tmp2; 293 double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0; 294 double **cos_lookup,**sin_lookup; 295 gsl_matrix *G; 296 gsl_vector *h; 297 gsl_vector *eval; 298 gsl_matrix *evec; 299 gsl_eigen_symmv_workspace *w; 300 int ProcNum, ProcRank, set; 301 int it_steps; // iteration step counter 302 int *top, *bot; 303 int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from 304 int left, right; // left or right neighbour for process 305 double **Aloc[max_operators+1], **Uloc; // local columns for one step of A[k] 306 double *Around[max_operators+1], *Uround; // all local columns for one round of A[k] 307 double *Atotal[max_operators+1], *Utotal; // all local columns for one round of A[k] 308 double a_i, a_j; 1281 1282 index = gsl_vector_max_index (eval); // get biggest eigenvalue 1283 //fprintf(stderr,"\t1st: %lg\t2nd: %lg --- biggest: %i\n", gsl_vector_get(eval, 0), gsl_vector_get(eval, 1), index); 1284 x = gsl_matrix_get(evec, 0, index); 1285 y = gsl_matrix_get(evec, 1, index); 1286 if (x < 0) { // ensure x>=0 so that rotation angles remain smaller Pi/4 1287 y = -y; 1288 x = -x; 1289 } 1290 //z = gsl_matrix_get(evec, 2, index) * x/fabs(x); 1291 //fprintf(stderr,"\tx %lg\ty %lg\n", x,y); 1292 1293 //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j); 1294 // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]] 1295 r = sqrt(x*x + y*y); // + z*z); 1296 if (fabs(r) > MYEPSILON) { 1297 *c = sqrt((x + r) / (2.*r)); 1298 *s = y / sqrt(2.*r*(x+r)); //, -z / sqrt(2*r*(x+r))); 1299 } else { 1300 *c = 1.; 1301 *s = 0.; 1302 } 1303 } 1304 1305 /* 1306 * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized 1307 * \param *U transformation matrix set to unity matrix 1308 * \param NumMatrices number of matrices to be diagonalized simultaneously 1309 * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A) 1310 * \param AllocNum number of rows/columns in matrices 1311 * \param Num number of wave functions 1312 * \param ProcRank index in group for this cpu 1313 * \param ProcNum number of cpus in group 1314 * \param *top array with top row indices (merry-go-round) 1315 * \param *bot array with bottom row indices (merry-go-round) 1316 */ 1317 1318 /** Simultaneous diagonalization of matrices with multiple cpus. 1319 * \param *P Problem at hand 1320 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization 1321 * \note this is slower given one cpu only than SerialDiagonalization() 1322 */ 1323 void ParallelDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData) 1324 { 1325 struct RunStruct *R =&P->R; 309 1326 int tagR0, tagR1, tagS0, tagS1; 310 1327 int iloc, jloc; … … 313 1330 int start; 314 1331 int *rcounts, *rdispls; 315 int AllocNum = ceil((double)Num / 2. ) *2; 316 int totalflag, flag; 317 int *marker, **group; 318 int partner[Num]; 319 int type = Occupied; 320 MPI_Comm *comm; 321 char spin[12], suffix[18]; 322 323 N0 = LevS->Plan0.plan->local_nx; 324 N[0] = LevS->Plan0.plan->N[0]; 325 N[1] = LevS->Plan0.plan->N[1]; 326 N[2] = LevS->Plan0.plan->N[2]; 327 328 comm = &P->Par.comm_ST; 329 330 fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi); 331 if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) { // all parallel 332 comm = &P->Par.comm_ST; 333 fprintf(stderr,"(%i) Jacobi is done parallely by all\n", P->Par.me); 334 } else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first 335 if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel 336 comm = &P->Par.comm_ST_Psi; 337 fprintf(stderr,"(%i) Jacobi is done parallely by Psi\n", P->Par.me); 338 } else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel 339 comm = &P->Par.comm_ST_PsiT; 340 fprintf(stderr,"(%i) Jacobi is done parallely by PsiT\n", P->Par.me); 341 } 1332 double *c, *s; 1333 int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from 1334 int left, right; // left or right neighbour for process 1335 double spread = 0., old_spread=0., Spread=0.; 1336 int i,j,k,l,m,u; 1337 int set; 1338 int it_steps; // iteration step counter 1339 double **Aloc[DiagData->NumMatrices+1], **Uloc; // local columns for one step of A[k] 1340 double *Around[DiagData->NumMatrices+1], *Uround; // all local columns for one round of A[k] 1341 double *Atotal[DiagData->NumMatrices+1], *Utotal; // all local columns for one round of A[k] 1342 double a_i, a_j; 1343 gsl_matrix *G; 1344 gsl_vector *h; 1345 gsl_vector *eval; 1346 gsl_matrix *evec; 1347 //gsl_eigen_symmv_workspace *w; 1348 1349 max_rounds = (DiagData->AllocNum / 2)/DiagData->ProcNum; // each process must perform multiple rotations per step of a set 1350 if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, DiagData->ProcRank, DiagData->ProcNum, max_rounds); 1351 1352 // allocate column vectors for interchange of columns 1353 debug(P,"allocate column vectors for interchange of columns"); 1354 c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c"); 1355 s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s"); 1356 c_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: c_all"); 1357 s_all = (double *) Malloc(sizeof(double)*DiagData->AllocNum/2, "ComputeMLWF: s_all"); 1358 rcounts = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rcounts"); 1359 rdispls = (int *) Malloc(sizeof(int)*DiagData->ProcNum, "ComputeMLWF: rdispls"); 1360 1361 // allocate eigenvector stuff 1362 debug(P,"allocate eigenvector stuff"); 1363 G = gsl_matrix_calloc (2,2); 1364 h = gsl_vector_alloc (2); 1365 eval = gsl_vector_alloc (2); 1366 evec = gsl_matrix_alloc (2,2); 1367 //w = gsl_eigen_symmv_alloc(2); 1368 1369 // establish communication partners 1370 debug(P,"establish communication partners"); 1371 if (DiagData->ProcRank == 0) { 1372 tagS0 = WannierALTag; // left p0 always remains left p0 342 1373 } else { 343 if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel 344 comm = &P->Par.comm_ST_PsiT; 345 fprintf(stderr,"(%i) Jacobi is done parallely by PsiT\n", P->Par.me); 346 } else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel 347 comm = &P->Par.comm_ST_Psi; 348 fprintf(stderr,"(%i) Jacobi is done parallely by Psi\n", P->Par.me); 349 } 350 } 351 352 MPI_Comm_size (*comm, &ProcNum); 353 MPI_Comm_rank (*comm, &ProcRank); 354 355 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me); 356 357 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me); 358 359 // STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1)) 360 gsl_matrix *A[max_operators+1]; // one extra for B matrix 361 for (u=0;u<=max_operators;u++) 362 A[u] = gsl_matrix_calloc (AllocNum,AllocNum); // allocate matrix 363 364 // create lookup table for sin/cos values 365 cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup"); 366 sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup"); 367 for (i=0;i<NDIM;i++) { 368 // allocate memory 369 cos_lookup[i] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[i], "ComputeMLWF: cos_lookup"); 370 sin_lookup[i] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[i], "ComputeMLWF: sin_lookup"); 371 // reset arrays 372 SetArrayToDouble0(cos_lookup[i],LevS->Plan0.plan->N[i]); 373 SetArrayToDouble0(sin_lookup[i],LevS->Plan0.plan->N[i]); 374 // create lookup values 375 for (j=0;j<LevS->Plan0.plan->N[i];j++) { 376 tmp = 2*PI/(double)LevS->Plan0.plan->N[i]*(double)j; 377 cos_lookup[i][j] = cos(tmp); 378 sin_lookup[i][j] = sin(tmp); 379 } 380 } 381 l=-1; // to access U matrix element (0..Num-1) 382 // fill the matrices 383 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions 384 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA 385 if (OnePsiA->PsiType == type) { // drop all but occupied ones 386 l++; // increase l if it is non-extra wave function 387 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? 388 LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo]; 389 else 390 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients! 391 392 //fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0); 393 if (LPsiDatA != NULL) { 394 CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1); 395 // note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()! 396 for (n0=0;n0<N0;n0++) 397 for (n[1]=0;n[1]<N[1];n[1]++) 398 for (n[2]=0;n[2]<N[2];n[2]++) { 399 i0 = n[2]*NUpz+N[2]*NUpz*(n[1]*NUpy+N[1]*NUpy*n0*NUpx); 400 iS = n[2]+N[2]*(n[1]+N[1]*n0); 401 n[0] = n0 + LevS->Plan0.plan->start_nx; 402 for (k=0;k<max_operators;k+=2) { 403 e = k/2; 404 tmp = 2*PI/(double)(N[e])*(double)(n[e]); 405 tmp2 = PsiCR[i0] /LevS->MaxN; 406 // check lookup 407 if (!l) // perform check on first wave function only 408 if ((fabs(cos(tmp) - cos_lookup[e][n[e]]) > MYEPSILON) || (fabs(sin(tmp) - sin_lookup[e][n[e]]) > MYEPSILON)) { 409 Error(SomeError, "Lookup table does not match real value!"); 410 } 411 HGcR[k][iS] = cos_lookup[e][n[e]] * tmp2; /* Matrix Vector Mult */ 412 HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */ 413 HGcR[k+1][iS] = sin_lookup[e][n[e]] * tmp2; /* Matrix Vector Mult */ 414 HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */ 415 } 416 } 417 for (u=0;u<max_operators;u++) { 418 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work); 419 fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work); 1374 tagS0 = (DiagData->ProcRank == DiagData->ProcNum - 1) ? WannierARTag : WannierALTag; // left p_last becomes right p_last 1375 } 1376 tagS1 = (DiagData->ProcRank == 0) ? WannierALTag : WannierARTag; // right p0 always goes into left p1 1377 tagR0 = WannierALTag; // 1378 tagR1 = WannierARTag; // first process 1379 if (DiagData->ProcRank == 0) { 1380 left = DiagData->ProcNum-1; 1381 right = 1; 1382 Lsend = 0; 1383 Rsend = 1; 1384 Lrecv = 0; 1385 Rrecv = 1; 1386 } else if (DiagData->ProcRank == DiagData->ProcNum - 1) { 1387 left = DiagData->ProcRank - 1; 1388 right = 0; 1389 Lsend = DiagData->ProcRank; 1390 Rsend = DiagData->ProcRank - 1; 1391 Lrecv = DiagData->ProcRank - 1; 1392 Rrecv = DiagData->ProcRank; 1393 } else { 1394 left = DiagData->ProcRank - 1; 1395 right = DiagData->ProcRank + 1; 1396 Lsend = DiagData->ProcRank+1; 1397 Rsend = DiagData->ProcRank - 1; 1398 Lrecv = DiagData->ProcRank - 1; 1399 Rrecv = DiagData->ProcRank+1; 1400 } 1401 //if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv); 1402 1403 // initialise A_loc 1404 debug(P,"initialise A_loc"); 1405 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) { 1406 //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]"); 1407 Around[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Around[k]"); 1408 Atotal[k] = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Atotal[k]"); 1409 Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]"); 1410 //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds]; 1411 1412 for (round=0;round<max_rounds;round++) { 1413 Aloc[k][2*round] = &Around[k][DiagData->AllocNum*(2*round)]; 1414 Aloc[k][2*round+1] = &Around[k][DiagData->AllocNum*(2*round+1)]; 1415 for (l=0;l<DiagData->AllocNum;l++) { 1416 Aloc[k][2*round][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)); 1417 Aloc[k][2*round+1][l] = gsl_matrix_get(DiagData->A[k],l,2*(DiagData->ProcRank*max_rounds+round)+1); 1418 //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]); 1419 } 1420 } 1421 } 1422 // initialise U_loc 1423 debug(P,"initialise U_loc"); 1424 //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc"); 1425 Uround = (double *) Malloc(sizeof(double)*DiagData->AllocNum*2*max_rounds, "ComputeMLWF: Uround"); 1426 Utotal = (double *) Malloc(sizeof(double)*DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Utotal"); 1427 Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc"); 1428 //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds]; 1429 for (round=0;round<max_rounds;round++) { 1430 Uloc[2*round] = &Uround[DiagData->AllocNum*(2*round)]; 1431 Uloc[2*round+1] = &Uround[DiagData->AllocNum*(2*round+1)]; 1432 for (l=0;l<DiagData->AllocNum;l++) { 1433 Uloc[2*round][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)); 1434 Uloc[2*round+1][l] = gsl_matrix_get(DiagData->U,l,2*(DiagData->ProcRank*max_rounds+round)+1); 1435 //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]); 1436 } 1437 } 1438 1439 // now comes the iteration loop 1440 debug(P,"now comes the iteration loop"); 1441 it_steps = 0; 1442 do { 1443 it_steps++; 1444 //if (P->Par.me == 0) fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps); 1445 for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 staying at its place all the time 1446 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); 1447 for (round = 0; round < max_rounds;round++) { 1448 start = DiagData->ProcRank * max_rounds + round; 1449 // get indices 1450 i = DiagData->top[start] < DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // minimum of the two indices 1451 iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1; 1452 j = DiagData->top[start] > DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // maximum of the two indices: thus j > i 1453 jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1; 1454 //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc); 1455 1456 // calculate rotation angle, i.e. c and s 1457 //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j); 1458 gsl_matrix_set_zero(G); 1459 for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ... 1460 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] 1461 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); 1462 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); 1463 gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); 1464 gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); 1465 1466 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] 1467 for (l=0;l<2;l++) 1468 for (m=0;m<2;m++) 1469 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); 420 1470 } 421 } 422 m = -1; // to access U matrix element (0..Num-1) 423 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions 424 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB 425 if (OnePsiB->PsiType == type) { // drop all but occupied ones 426 m++; // increase m if it is non-extra wave function 427 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? 428 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; 429 else 430 LOnePsiB = NULL; 431 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi 432 RecvSource = OnePsiB->my_color_comm_ST_Psi; 433 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); 434 LPsiDatB=LevS->LPsi->TempPsi; 435 } else { // .. otherwise send it to all other processes (Max_me... - 1) 436 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) 437 if (p != OnePsiB->my_color_comm_ST_Psi) 438 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); 439 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; 440 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received 441 442 for (u=0;u<max_operators;u++) { 443 a_ij = 0; 444 b_ij = 0; 445 if (LPsiDatA != NULL) { // calculate, reduce and send to all ... 446 //fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u); 447 g=0; 448 if (LevS->GArray[0].GSq == 0.0) { 449 Index = LevS->GArray[g].Index; 450 a_ij = (LPsiDatB[0].re*HGcRC[u][Index].re + LPsiDatB[0].im*HGcRC[u][Index].im); 451 b_ij = (LPsiDatB[0].re*HGcR2C[u][Index].re + LPsiDatB[0].im*HGcR2C[u][Index].im); 452 g++; 453 } 454 for (; g < LevS->MaxG; g++) { 455 Index = LevS->GArray[g].Index; 456 a_ij += 2*(LPsiDatB[g].re*HGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im); 457 b_ij += 2*(LPsiDatB[g].re*HGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im); 458 } // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...) 459 // sum up elements from all coefficients sharing processes 460 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); 461 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi); 462 a_ij = A_ij; 463 b_ij = B_ij; 464 // send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!) 465 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); 466 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); 467 } else { // receive ... 468 MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); 469 MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT); 470 } 471 // ... and store 472 //fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij); 473 //fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij); 474 gsl_matrix_set(A[u], l, m, A_ij); 475 gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m)); 1471 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); 1472 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G 1473 EigensolverFor22Matrix(P,G,eval,evec); 1474 //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G 1475 1476 CalculateRotationAnglesFromEigenvalues(evec, eval, &c[round], &s[round]); 1477 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]); 1478 1479 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); 1480 // V_loc = V_loc * V_small 1481 //debug(P,"apply rotation to local U"); 1482 for (l=0;l<DiagData->AllocNum;l++) { 1483 a_i = Uloc[2*round+iloc][l]; 1484 a_j = Uloc[2*round+jloc][l]; 1485 Uloc[2*round+iloc][l] = c[round] * a_i + s[round] * a_j; 1486 Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j; 1487 } 1488 } // end of round 1489 // circulate the rotation angles 1490 //debug(P,"circulate the rotation angles"); 1491 MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); // MPI_Allgather is waaaaay faster than ring circulation 1492 MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); 1493 //m = start; 1494 for (l=0;l<DiagData->AllocNum/2;l++) { // for each process 1495 // we have V_small from process k 1496 //debug(P,"Apply V_small from other process"); 1497 i = DiagData->top[l] < DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // minimum of the two indices 1498 j = DiagData->top[l] > DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // maximum of the two indices: thus j > i 1499 iloc = DiagData->top[l] < DiagData->bot[l] ? 0 : 1; 1500 jloc = DiagData->top[l] > DiagData->bot[l] ? 0 : 1; 1501 for (m=0;m<max_rounds;m++) { 1502 //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j); 1503 // apply row rotation to each A[k] 1504 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix ! 1505 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); 1506 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); 1507 a_i = Aloc[k][2*m+iloc][i]; 1508 a_j = Aloc[k][2*m+iloc][j]; 1509 Aloc[k][2*m+iloc][i] = c_all[l] * a_i + s_all[l] * a_j; 1510 Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j; 1511 a_i = Aloc[k][2*m+jloc][i]; 1512 a_j = Aloc[k][2*m+jloc][j]; 1513 Aloc[k][2*m+jloc][i] = c_all[l] * a_i + s_all[l] * a_j; 1514 Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j; 1515 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); 1516 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); 476 1517 } 477 1518 } 478 1519 } 479 } 480 } 481 // reset extra entries 482 for (u=0;u<=max_operators;u++) { 483 for (i=Num;i<AllocNum;i++) 484 for (j=0;j<AllocNum;j++) 485 gsl_matrix_set(A[u], i,j, 0.); 486 for (i=Num;i<AllocNum;i++) 487 for (j=0;j<AllocNum;j++) 488 gsl_matrix_set(A[u], j,i, 0.); 489 } 490 // free lookups 491 for (i=0;i<NDIM;i++) { 492 Free(cos_lookup[i], "bla"); 493 Free(sin_lookup[i], "bla"); 494 } 495 Free(cos_lookup, "bla"); 496 Free(sin_lookup, "bla"); 497 498 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me); 499 // STEP 1: Initialize U = 1 500 gsl_matrix *U = gsl_matrix_alloc (AllocNum,AllocNum); 501 gsl_matrix_set_identity(U); 502 503 // init merry-go-round array 504 top = (int *) Malloc(sizeof(int)*AllocNum/2, "ComputeMLWF: top"); 505 bot = (int *) Malloc(sizeof(int)*AllocNum/2, "ComputeMLWF: bot"); 506 //debug(P,"init merry-go-round array"); 507 for (i=0;i<AllocNum/2;i++) { 508 top[i] = 2*i; 509 bot[i] = 2*i+1; 510 } 511 512 /*// print A matrices for debug 513 if (P->Par.me == 0) 514 for (u=0;u<max_operators+1;u++) { 515 fprintf(stderr, "A[%i] = \n",u); 516 for (i=0;i<Num;i++) { 517 for (j=0;j<Num;j++) 518 fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j)); 519 fprintf(stderr, "\n"); 520 } 521 } 522 */ 523 if (Num != 1) { 524 // one- or multi-process case? 525 if (((AllocNum % 2) == 0) && (ProcNum != 1) && ((AllocNum / 2) % ProcNum == 0)) { 526 max_rounds = (AllocNum / 2)/ProcNum; // each process must perform multiple rotations per step of a set 527 //fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, ProcRank, ProcNum, max_rounds); 528 // allocate column vectors for interchange of columns 529 //debug(P,"allocate column vectors for interchange of columns"); 530 c = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: c"); 531 s = (double *) Malloc(sizeof(double)*max_rounds, "ComputeMLWF: s"); 532 c_all = (double *) Malloc(sizeof(double)*AllocNum/2, "ComputeMLWF: c_all"); 533 s_all = (double *) Malloc(sizeof(double)*AllocNum/2, "ComputeMLWF: s_all"); 534 rcounts = (int *) Malloc(sizeof(int)*ProcNum, "ComputeMLWF: rcounts"); 535 rdispls = (int *) Malloc(sizeof(int)*ProcNum, "ComputeMLWF: rdispls"); 536 /* // print starting values of index generation tables top and bot 537 fprintf(stderr,"top\t"); 538 for (k=0;k<AllocNum/2;k++) 539 fprintf(stderr,"%i\t", top[k]); 540 fprintf(stderr,"\n"); 541 fprintf(stderr,"bot\t"); 542 for (k=0;k<AllocNum/2;k++) 543 fprintf(stderr,"%i\t", bot[k]); 544 fprintf(stderr,"\n");*/ 545 // establish communication partners 546 //debug(P,"establish communication partners"); 547 if (ProcRank == 0) { 548 tagS0 = WannierALTag; // left p0 always remains left p0 549 } else { 550 tagS0 = ProcRank == ProcNum - 1 ? WannierARTag : WannierALTag; // left p_last becomes right p_last 551 } 552 tagS1 = ProcRank == 0 ? WannierALTag : WannierARTag; // right p0 always goes into left p1 553 tagR0 = WannierALTag; // 554 tagR1 = WannierARTag; // first process 555 if (ProcRank == 0) { 556 left = ProcNum-1; 557 right = 1; 558 Lsend = 0; 559 Rsend = 1; 560 Lrecv = 0; 561 Rrecv = 1; 562 } else if (ProcRank == ProcNum - 1) { 563 left = ProcRank - 1; 564 right = 0; 565 Lsend = ProcRank; 566 Rsend = ProcRank - 1; 567 Lrecv = ProcRank - 1; 568 Rrecv = ProcRank; 569 } else { 570 left = ProcRank - 1; 571 right = ProcRank + 1; 572 Lsend = ProcRank+1; 573 Rsend = ProcRank - 1; 574 Lrecv = ProcRank - 1; 575 Rrecv = ProcRank+1; 576 } 577 //fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv); 578 // allocate eigenvector stuff 579 //debug(P,"allocate eigenvector stuff"); 580 G = gsl_matrix_calloc (2,2); 581 h = gsl_vector_alloc (2); 582 eval = gsl_vector_alloc (2); 583 evec = gsl_matrix_alloc (2,2); 584 w = gsl_eigen_symmv_alloc(2); 585 // initialise A_loc 586 //debug(P,"initialise A_loc"); 587 for (k=0;k<max_operators+1;k++) { 588 //Aloc[k] = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Aloc[k]"); 589 Around[k] = (double *) Malloc(sizeof(double)*AllocNum*2*max_rounds, "ComputeMLWF: Around[k]"); 590 Atotal[k] = (double *) Malloc(sizeof(double)*AllocNum*AllocNum, "ComputeMLWF: Atotal[k]"); 591 Aloc[k] = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Aloc[k]"); 592 //Around[k] = &Atotal[k][ProcRank*AllocNum*2*max_rounds]; 593 594 for (round=0;round<max_rounds;round++) { 595 Aloc[k][2*round] = &Around[k][AllocNum*(2*round)]; 596 Aloc[k][2*round+1] = &Around[k][AllocNum*(2*round+1)]; 597 for (l=0;l<AllocNum;l++) { 598 Aloc[k][2*round][l] = gsl_matrix_get(A[k],l,2*(ProcRank*max_rounds+round)); 599 Aloc[k][2*round+1][l] = gsl_matrix_get(A[k],l,2*(ProcRank*max_rounds+round)+1); 600 //fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]); 1520 // apply rotation to local operator matrices 1521 // A_loc = A_loc * V_small 1522 //debug(P,"apply rotation to local operator matrices A[k]"); 1523 for (m=0;m<max_rounds;m++) { 1524 start = DiagData->ProcRank * max_rounds + m; 1525 iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1; 1526 jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1; 1527 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// extra for B matrix ! 1528 for (l=0;l<DiagData->AllocNum;l++) { 1529 // Columns, i and j belong to this process only! 1530 a_i = Aloc[k][2*m+iloc][l]; 1531 a_j = Aloc[k][2*m+jloc][l]; 1532 Aloc[k][2*m+iloc][l] = c[m] * a_i + s[m] * a_j; 1533 Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j; 1534 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]); 601 1535 } 602 1536 } 603 1537 } 604 // initialise U_loc 605 //debug(P,"initialise U_loc"); 606 //Uloc = (double *) Malloc(sizeof(double)*AllocNum*2, "ComputeMLWF: Uloc"); 607 Uround = (double *) Malloc(sizeof(double)*AllocNum*2*max_rounds, "ComputeMLWF: Uround"); 608 Utotal = (double *) Malloc(sizeof(double)*AllocNum*AllocNum, "ComputeMLWF: Utotal"); 609 Uloc = (double **) Malloc(sizeof(double *)*2*max_rounds, "ComputeMLWF: Uloc"); 610 //Uround = &Utotal[ProcRank*AllocNum*2*max_rounds]; 611 for (round=0;round<max_rounds;round++) { 612 Uloc[2*round] = &Uround[AllocNum*(2*round)]; 613 Uloc[2*round+1] = &Uround[AllocNum*(2*round+1)]; 614 for (l=0;l<AllocNum;l++) { 615 Uloc[2*round][l] = gsl_matrix_get(U,l,2*(ProcRank*max_rounds+round)); 616 Uloc[2*round+1][l] = gsl_matrix_get(U,l,2*(ProcRank*max_rounds+round)+1); 617 //fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum*0],Uloc[l+AllocNum*1]); 1538 // Shuffling of these round's columns to prepare next rotation set 1539 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix ! 1540 // extract columns from A 1541 //debug(P,"extract columns from A"); 1542 MerryGoRoundColumns(*(DiagData->comm), Aloc[k], DiagData->AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1); 1543 1544 } 1545 // and also for V ... 1546 //debug(P,"extract columns from U"); 1547 MerryGoRoundColumns(*(DiagData->comm), Uloc, DiagData->AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1); 1548 1549 1550 // and merry-go-round for the indices too 1551 //debug(P,"and merry-go-round for the indices too"); 1552 MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2); 1553 } 1554 1555 //fprintf(stderr,"(%i) STEP 4\n",P->Par.me); 1556 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 1557 old_spread = Spread; 1558 spread = 0.; 1559 for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators 1560 for (i=0; i < 2*max_rounds; i++) { // go through all wave functions 1561 spread += Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]*Aloc[k][i][i+DiagData->ProcRank*2*max_rounds]; 1562 //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i); 1563 } 1564 } 1565 MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *(DiagData->comm)); 1566 //Spread = spread; 1567 if (P->Par.me == 0) { 1568 //if(P->Call.out[ReadOut]) 1569 // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier); 1570 //else 1571 //fprintf(stderr,"%2.9e\n",Spread); 1572 } 1573 // STEP 5: check change of variance 1574 } while (fabs(old_spread-Spread) >= R->EpsWannier); 1575 // end of iterative diagonalization loop: We have found our final orthogonal U! 1576 1577 // gather local parts of U into complete matrix 1578 for (l=0;l<DiagData->ProcNum;l++) 1579 rcounts[l] = DiagData->AllocNum; 1580 debug(P,"allgather U"); 1581 for (round=0;round<2*max_rounds;round++) { 1582 for (l=0;l<DiagData->ProcNum;l++) 1583 rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum; 1584 MPI_Allgatherv(Uloc[round], DiagData->AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm)); 1585 } 1586 for (k=0;k<DiagData->AllocNum;k++) { 1587 for(l=0;l<DiagData->AllocNum;l++) { 1588 gsl_matrix_set(DiagData->U,k,l, Utotal[l+k*DiagData->AllocNum]); 1589 } 1590 } 1591 1592 // after one set, gather A[k] from all and calculate spread 1593 for (l=0;l<DiagData->ProcNum;l++) 1594 rcounts[l] = DiagData->AllocNum; 1595 debug(P,"gather A[k] for spread"); 1596 for (u=0;u<DiagData->NumMatrices+DiagData->extra;u++) {// extra for B matrix ! 1597 debug(P,"A[k] all gather"); 1598 for (round=0;round<2*max_rounds;round++) { 1599 for (l=0;l<DiagData->ProcNum;l++) 1600 rdispls[l] = (l*2*max_rounds + round)*DiagData->AllocNum; 1601 MPI_Allgatherv(Aloc[u][round], DiagData->AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm)); 1602 } 1603 for (k=0;k<DiagData->AllocNum;k++) { 1604 for(l=0;l<DiagData->AllocNum;l++) { 1605 gsl_matrix_set(DiagData->A[u],k,l, Atotal[u][l+k*DiagData->AllocNum]); 1606 } 1607 } 1608 } 1609 1610 // free eigenvector stuff 1611 gsl_vector_free(h); 1612 gsl_matrix_free(G); 1613 //gsl_eigen_symmv_free(w); 1614 gsl_vector_free(eval); 1615 gsl_matrix_free(evec); 1616 1617 // Free column vectors 1618 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) { 1619 Free(Atotal[k], "ParallelDiagonalization: Atotal[k]"); 1620 Free(Around[k], "ParallelDiagonalization: Around[k]"); 1621 } 1622 Free(Uround, "ParallelDiagonalization: Uround"); 1623 Free(Utotal, "ParallelDiagonalization: Utotal"); 1624 Free(c_all, "ParallelDiagonalization: c_all"); 1625 Free(s_all, "ParallelDiagonalization: s_all"); 1626 Free(c, "ParallelDiagonalization: c"); 1627 Free(s, "ParallelDiagonalization: s"); 1628 Free(rcounts, "ParallelDiagonalization: rcounts"); 1629 Free(rdispls, "ParallelDiagonalization: rdispls"); 1630 } 1631 /* 1632 * \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized 1633 * \param *U transformation matrix set to unity matrix 1634 * \param NumMatrices number of matrices to be diagonalized 1635 * \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A) 1636 * \param AllocNum number of rows/columns in matrices 1637 * \param Num number of wave functions 1638 * \param *top array with top row indices (merry-go-round) 1639 * \param *bot array with bottom row indices (merry-go-round) 1640 */ 1641 1642 /** Simultaneous Diagonalization of variances matrices with one cpu. 1643 * \param *P Problem at hand 1644 * \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization 1645 * \note this is faster given one cpu only than ParallelDiagonalization() 1646 */ 1647 void SerialDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData) 1648 { 1649 struct RunStruct *R = &P->R; 1650 gsl_matrix *G; 1651 gsl_vector *h; 1652 gsl_vector *eval; 1653 gsl_matrix *evec; 1654 //gsl_eigen_symmv_workspace *w; 1655 double *c,*s,a_i,a_j; 1656 int it_steps, set, ProcRank; 1657 int i,j,k,l,m; 1658 double spread = 0., old_spread = 0.;\ 1659 1660 // allocate eigenvector stuff 1661 debug(P,"allocate eigenvector stuff"); 1662 G = gsl_matrix_calloc (2,2); 1663 h = gsl_vector_alloc (2); 1664 eval = gsl_vector_alloc (2); 1665 evec = gsl_matrix_alloc (2,2); 1666 //w = gsl_eigen_symmv_alloc(2); 1667 1668 c = (double *) Malloc(sizeof(double), "ComputeMLWF: c"); 1669 s = (double *) Malloc(sizeof(double), "ComputeMLWF: s"); 1670 debug(P,"now comes the iteration loop"); 1671 it_steps=0; 1672 do { 1673 it_steps++; 1674 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me); 1675 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps); 1676 for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time 1677 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); 1678 // STEP 3: for all index pairs 0<= i<j <AllocNum 1679 for (ProcRank=0;ProcRank<DiagData->AllocNum/2;ProcRank++) { 1680 // get indices 1681 i = DiagData->top[ProcRank] < DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // minimum of the two indices 1682 j = DiagData->top[ProcRank] > DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // maximum of the two indices: thus j > i 1683 //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j); 1684 // STEP 3a: Calculate G 1685 gsl_matrix_set_zero(G); 1686 1687 for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ... 1688 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] 1689 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,i), gsl_matrix_get(DiagData->A[k],j,j),gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j)); 1690 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,j), gsl_matrix_get(DiagData->A[k],j,i),gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i)); 1691 gsl_vector_set(h, 0, gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j)); 1692 gsl_vector_set(h, 1, gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i)); 1693 //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1)); 1694 1695 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] 1696 for (l=0;l<2;l++) 1697 for (m=0;m<2;m++) 1698 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); 1699 1700 //PrintGSLMatrix(P,G,2, "G"); 618 1701 } 619 } 620 // now comes the iteration loop 621 //debug(P,"now comes the iteration loop"); 622 it_steps = 0; 623 do { 624 it_steps++; 625 fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps); 626 for (set=0; set < AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time 627 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); 628 for (round = 0; round < max_rounds;round++) { 629 start = ProcRank * max_rounds + round; 630 // get indices 631 i = top[start] < bot[start] ? top[start] : bot[start]; // minimum of the two indices 632 iloc = top[start] < bot[start] ? 0 : 1; 633 j = top[start] > bot[start] ? top[start] : bot[start]; // maximum of the two indices: thus j > i 634 jloc = top[start] > bot[start] ? 0 : 1; 635 //fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc); 636 637 // calculate rotation angle, i.e. c and s 638 //fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j); 639 gsl_matrix_set_zero(G); 640 for (k=0;k<max_operators;k++) { // go through all operators ... 641 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] 642 //fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+iloc][i], Aloc[k][2*round+jloc][j],Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); 643 //fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2*round+jloc][i], Aloc[k][2*round+iloc][j],Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); 644 gsl_vector_set(h, 0, Aloc[k][2*round+iloc][i] - Aloc[k][2*round+jloc][j]); 645 gsl_vector_set(h, 1, Aloc[k][2*round+jloc][i] + Aloc[k][2*round+iloc][j]); 646 647 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] 648 for (l=0;l<2;l++) 649 for (m=0;m<2;m++) 650 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); 651 } 652 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); 653 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G 654 gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G 655 index = gsl_vector_max_index (eval); // get biggest eigenvalue 656 x = gsl_matrix_get(evec, 0, index); 657 y = gsl_matrix_get(evec, 1, index) * x/fabs(x); 658 x = fabs(x); // ensure x>=0 so that rotation angles remain smaller Pi/4 659 //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j); 660 // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]] 661 r = sqrt(x*x + y*y); 662 c[round] = sqrt((x + r) / (2*r)); 663 s[round] = y / sqrt(2*r*(x+r)); 664 // [[c,s],[-s,c]]= V_small 665 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]); 666 667 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); 668 // V_loc = V_loc * V_small 669 //debug(P,"apply rotation to local U"); 670 for (l=0;l<AllocNum;l++) { 671 a_i = Uloc[2*round+iloc][l]; 672 a_j = Uloc[2*round+jloc][l]; 673 Uloc[2*round+iloc][l] = c[round] * a_i + s[round] * a_j; 674 Uloc[2*round+jloc][l] = -s[round] * a_i + c[round] * a_j; 675 } 676 } // end of round 677 // circulate the rotation angles 678 //debug(P,"circulate the rotation angles"); 679 MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *comm); // MPI_Allgather is waaaaay faster than ring circulation 680 MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *comm); 681 //m = start; 682 for (l=0;l<AllocNum/2;l++) { // for each process 683 // we have V_small from process k 684 //debug(P,"Apply V_small from other process"); 685 i = top[l] < bot[l] ? top[l] : bot[l]; // minimum of the two indices 686 j = top[l] > bot[l] ? top[l] : bot[l]; // maximum of the two indices: thus j > i 687 iloc = top[l] < bot[l] ? 0 : 1; 688 jloc = top[l] > bot[l] ? 0 : 1; 689 for (m=0;m<max_rounds;m++) { 690 //fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j); 691 // apply row rotation to each A[k] 692 for (k=0;k<max_operators+1;k++) {// one extra for B matrix ! 693 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); 694 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); 695 a_i = Aloc[k][2*m+iloc][i]; 696 a_j = Aloc[k][2*m+iloc][j]; 697 Aloc[k][2*m+iloc][i] = c_all[l] * a_i + s_all[l] * a_j; 698 Aloc[k][2*m+iloc][j] = -s_all[l] * a_i + c_all[l] * a_j; 699 a_i = Aloc[k][2*m+jloc][i]; 700 a_j = Aloc[k][2*m+jloc][j]; 701 Aloc[k][2*m+jloc][i] = c_all[l] * a_i + s_all[l] * a_j; 702 Aloc[k][2*m+jloc][j] = -s_all[l] * a_i + c_all[l] * a_j; 703 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+iloc][i],j,Aloc[k][2*m+iloc][j]); 704 //fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2*m+jloc][i],j,Aloc[k][2*m+jloc][j]); 705 } 706 } 1702 1703 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); 1704 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G 1705 EigensolverFor22Matrix(P,G,eval,evec); 1706 //gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G 1707 1708 //PrintGSLMatrix(P, evec, 2, "Eigenvectors"); 1709 CalculateRotationAnglesFromEigenvalues(evec, eval, c, s); 1710 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]); 1711 1712 //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j); 1713 // STEP 3d: apply rotation R to rows and columns of A^{(k)} 1714 for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix ! 1715 //PrintGSLMatrix(P,A[k],AllocNum, "A (before rot)"); 1716 for (l=0;l<DiagData->AllocNum;l++) { 1717 // Rows 1718 a_i = gsl_matrix_get(DiagData->A[k],i,l); 1719 a_j = gsl_matrix_get(DiagData->A[k],j,l); 1720 gsl_matrix_set(DiagData->A[k], i, l, c[0] * a_i + s[0] * a_j); 1721 gsl_matrix_set(DiagData->A[k], j, l, -s[0] * a_i + c[0] * a_j); 707 1722 } 708 // apply rotation to local operator matrices 709 // A_loc = A_loc * V_small 710 //debug(P,"apply rotation to local operator matrices A[k]"); 711 for (m=0;m<max_rounds;m++) { 712 start = ProcRank * max_rounds + m; 713 iloc = top[start] < bot[start] ? 0 : 1; 714 jloc = top[start] > bot[start] ? 0 : 1; 715 for (k=0;k<max_operators+1;k++) {// one extra for B matrix ! 716 for (l=0;l<AllocNum;l++) { 717 // Columns, i and j belong to this process only! 718 a_i = Aloc[k][2*m+iloc][l]; 719 a_j = Aloc[k][2*m+jloc][l]; 720 Aloc[k][2*m+iloc][l] = c[m] * a_i + s[m] * a_j; 721 Aloc[k][2*m+jloc][l] = -s[m] * a_i + c[m] * a_j; 722 //fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2*m+iloc][l],l,Aloc[k][2*m+jloc][l]); 723 } 724 } 1723 for (l=0;l<DiagData->AllocNum;l++) { 1724 // Columns 1725 a_i = gsl_matrix_get(DiagData->A[k],l,i); 1726 a_j = gsl_matrix_get(DiagData->A[k],l,j); 1727 gsl_matrix_set(DiagData->A[k], l, i, c[0] * a_i + s[0] * a_j); 1728 gsl_matrix_set(DiagData->A[k], l, j, -s[0] * a_i + c[0] * a_j); 725 1729 } 726 // Shuffling of these round's columns to prepare next rotation set 727 for (k=0;k<max_operators+1;k++) {// one extra for B matrix ! 728 // extract columns from A 729 //debug(P,"extract columns from A"); 730 shiftcolumns(*comm, Aloc[k], AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1); 731 732 } 733 // and also for V ... 734 //debug(P,"extract columns from U"); 735 shiftcolumns(*comm, Uloc, AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1); 736 737 738 // and merry-go-round for the indices too 739 //debug(P,"and merry-go-round for the indices too"); 740 music(top, bot, AllocNum/2); 1730 //PrintGSLMatrix(P,A[k],AllocNum, "A (after rot)"); 741 1731 } 742 743 //fprintf(stderr,"(%i) STEP 4\n",P->Par.me); 744 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 745 old_spread = Spread; 746 spread = 0.; 747 for(k=0;k<max_operators;k++) { // go through all self-adjoint operators 748 for (i=0; i < 2*max_rounds; i++) { // go through all wave functions 749 spread += Aloc[k][i][i+ProcRank*2*max_rounds]*Aloc[k][i][i+ProcRank*2*max_rounds]; 750 //spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i); 751 } 1732 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); 1733 //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (before rot)"); 1734 // STEP 3e: apply U = R*U 1735 for (l=0;l<DiagData->AllocNum;l++) { 1736 a_i = gsl_matrix_get(DiagData->U,i,l); 1737 a_j = gsl_matrix_get(DiagData->U,j,l); 1738 gsl_matrix_set(DiagData->U, i, l, c[0] * a_i + s[0] * a_j); 1739 gsl_matrix_set(DiagData->U, j, l, -s[0] * a_i + c[0] * a_j); 752 1740 } 753 MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *comm); 754 //Spread = spread; 755 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier); 756 else fprintf(stderr,"%2.9e\n",Spread); 757 // STEP 5: check change of variance 758 } while (fabs(old_spread-Spread) >= R->EpsWannier); 759 // end of iterative diagonalization loop: We have found our final orthogonal U! 760 761 for (l=0;l<ProcNum;l++) 762 rcounts[l] = AllocNum; 763 //debug(P,"allgather U"); 764 for (round=0;round<2*max_rounds;round++) { 765 for (l=0;l<ProcNum;l++) 766 rdispls[l] = (l*2*max_rounds + round)*AllocNum; 767 MPI_Allgatherv(Uloc[round], AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *comm); 768 } 769 for (k=0;k<AllocNum;k++) { 770 for(l=0;l<AllocNum;l++) { 771 gsl_matrix_set(U,k,l, Utotal[l+k*AllocNum]); 772 } 773 } 774 775 776 // after one set, gather A[k] from all and calculate spread 777 for (l=0;l<ProcNum;l++) 778 rcounts[l] = AllocNum; 779 //debug(P,"gather A[k] for spread"); 780 for (u=0;u<max_operators+1;u++) {// one extra for B matrix ! 781 //debug(P,"A[k] all gather"); 782 for (round=0;round<2*max_rounds;round++) { 783 for (l=0;l<ProcNum;l++) 784 rdispls[l] = (l*2*max_rounds + round)*AllocNum; 785 MPI_Allgatherv(Aloc[u][round], AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *comm); 786 } 787 for (k=0;k<AllocNum;k++) { 788 for(l=0;l<AllocNum;l++) { 789 gsl_matrix_set(A[u],k,l, Atotal[u][l+k*AllocNum]); 790 } 791 } 792 } 793 794 // free eigenvector stuff 795 gsl_vector_free(h); 796 gsl_matrix_free(G); 797 gsl_eigen_symmv_free(w); 798 gsl_vector_free(eval); 799 gsl_matrix_free(evec); 800 // Free column vectors 801 for (k=0;k<max_operators+1;k++) { 802 Free(Atotal[k], "bla"); 803 Free(Around[k], "bla"); 804 } 805 Free(Uround, "bla"); 806 Free(Utotal, "bla"); 807 Free(c_all, "bla"); 808 Free(s_all, "bla"); 809 Free(c, "bla"); 810 Free(s, "bla"); 811 Free(rcounts, "bla"); 812 Free(rdispls, "bla"); 813 814 } else { 815 816 c = (double *) Malloc(sizeof(double), "ComputeMLWF: c"); 817 s = (double *) Malloc(sizeof(double), "ComputeMLWF: s"); 818 G = gsl_matrix_calloc (2,2); 819 h = gsl_vector_alloc (2); 820 eval = gsl_vector_alloc (2); 821 evec = gsl_matrix_alloc (2,2); 822 w = gsl_eigen_symmv_alloc(2); 823 //debug(P,"now comes the iteration loop"); 824 it_steps=0; 825 do { 826 it_steps++; 827 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me); 828 fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps); 829 for (set=0; set < AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time 830 //fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set); 831 // STEP 3: for all index pairs 0<= i<j <AllocNum 832 for (ProcRank=0;ProcRank<AllocNum/2;ProcRank++) { 833 // get indices 834 i = top[ProcRank] < bot[ProcRank] ? top[ProcRank] : bot[ProcRank]; // minimum of the two indices 835 j = top[ProcRank] > bot[ProcRank] ? top[ProcRank] : bot[ProcRank]; // maximum of the two indices: thus j > i 836 //fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j); 837 // STEP 3a: Calculate G 838 gsl_matrix_set_zero(G); 839 840 for (k=0;k<max_operators;k++) { // go through all operators ... 841 // Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)] 842 //fprintf(stderr,"(%i) k%i [a_ii - a_ij] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(A[k],i,i), gsl_matrix_get(A[k],j,j),gsl_matrix_get(A[k],i,i) - gsl_matrix_get(A[k],j,j)); 843 //fprintf(stderr,"(%i) k%i [a_ij + a_jij] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(A[k],i,j), gsl_matrix_get(A[k],j,i),gsl_matrix_get(A[k],i,j) + gsl_matrix_get(A[k],j,i)); 844 gsl_vector_set(h, 0, gsl_matrix_get(A[k],i,i) - gsl_matrix_get(A[k],j,j)); 845 gsl_vector_set(h, 1, gsl_matrix_get(A[k],i,j) + gsl_matrix_get(A[k],j,i)); 846 //gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1)); 847 848 // Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ] 849 for (l=0;l<2;l++) 850 for (m=0;m<2;m++) 851 gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m)); 852 } 853 854 //fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j); 855 // STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G 856 gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G 857 858 index = gsl_vector_max_index (eval); // get biggest eigenvalue 859 x = gsl_matrix_get(evec, 0, index); 860 y = gsl_matrix_get(evec, 1, index) * x/fabs(x); 861 //z = gsl_matrix_get(evec, 2, index) * x/fabs(x); 862 x = fabs(x); // ensure x>=0 so that rotation angles remain smaller Pi/4 863 864 //fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j); 865 // STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]] 866 r = sqrt(x*x + y*y); // + z*z); 867 c[0] = sqrt((x + r) / (2*r)); 868 s[0] = y / sqrt(2*r*(x+r)); //, -z / sqrt(2*r*(x+r))); 869 //fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]); 870 871 //fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j); 872 // STEP 3d: apply rotation R to rows and columns of A^{(k)} 873 for (k=0;k<max_operators+1;k++) {// one extra for B matrix ! 874 for (l=0;l<AllocNum;l++) { 875 // Rows 876 a_i = gsl_matrix_get(A[k],i,l); 877 a_j = gsl_matrix_get(A[k],j,l); 878 gsl_matrix_set(A[k], i, l, c[0] * a_i + s[0] * a_j); 879 gsl_matrix_set(A[k], j, l, -s[0] * a_i + c[0] * a_j); 880 } 881 for (l=0;l<AllocNum;l++) { 882 // Columns 883 a_i = gsl_matrix_get(A[k],l,i); 884 a_j = gsl_matrix_get(A[k],l,j); 885 gsl_matrix_set(A[k], l, i, c[0] * a_i + s[0] * a_j); 886 gsl_matrix_set(A[k], l, j, -s[0] * a_i + c[0] * a_j); 887 } 888 } 889 //fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j); 890 // STEP 3e: apply U = R*U 891 for (l=0;l<AllocNum;l++) { 892 a_i = gsl_matrix_get(U,i,l); 893 a_j = gsl_matrix_get(U,j,l); 894 gsl_matrix_set(U, i, l, c[0] * a_i + s[0] * a_j); 895 gsl_matrix_set(U, j, l, -s[0] * a_i + c[0] * a_j); 896 } 897 } 898 // and merry-go-round for the indices too 899 //debug(P,"and merry-go-round for the indices too"); 900 if (AllocNum > 2) music(top, bot, AllocNum/2); 901 } 902 903 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me); 904 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 905 old_spread = spread; 906 spread = 0; 907 for(k=0;k<max_operators;k++) { // go through all self-adjoint operators 908 for (i=0; i < AllocNum; i++) { // go through all wave functions 909 spread += pow(gsl_matrix_get(A[k],i,i),2); 910 } 911 } 912 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier); 913 else fprintf(stderr,"%2.9e\n",spread); 914 // STEP 5: check change of variance 915 } while (fabs(old_spread-spread) >= R->EpsWannier); 916 // end of iterative diagonalization loop: We have found our final orthogonal U! 917 gsl_vector_free(h); 918 gsl_matrix_free(G); 919 gsl_eigen_symmv_free(w); 920 gsl_vector_free(eval); 921 gsl_matrix_free(evec); 922 Free(c, "bla"); 923 Free(s, "bla"); 924 } 925 926 if(P->Call.out[ReadOut]) {// && P->Par.me == 0) { 927 //debug(P,"output total U"); 928 fprintf(stderr,"(%i) U_tot = \n",P->Par.me); 929 for (k=0;k<Num;k++) { 930 for (l=0;l<Num;l++) 931 fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k)); 932 fprintf(stderr,"\n"); 933 } 934 } 935 } 936 Free(top, "bla"); 937 Free(bot, "bla"); 938 939 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Allocating buffer mem\n",P->Par.me); 940 // STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji | \phi_i \rangle = | \phi_j^\ast \rangle 941 Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here 942 fftw_complex **coeffs_buffer = Malloc(sizeof(fftw_complex *)*Num, "ComputeMLWF: **coeffs_buffer"); 943 for (l=0;l<Num;l++) // allocate for each local wave function 944 coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l]; 945 946 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me); 947 l=-1; // to access U matrix element (0..Num-1) 948 k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1) 949 for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions 950 OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA 951 if (OnePsiA->PsiType == type) { // drop all but occupied ones 952 l++; // increase l if it is occupied wave function 953 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local? 954 k++; // increase k only if it is a local, non-extra orbital wave function 955 LPsiDatA = (fftw_complex *) coeffs_buffer[k]; // new coeffs first go to copy buffer, old ones must not be overwritten yet 956 SetArrayToDouble0((double *)LPsiDatA, 2*LevS->MaxG); // zero buffer part 957 } else 958 LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients! 959 960 m = -1; // to access U matrix element (0..Num-1) 961 for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions 962 OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB 963 if (OnePsiB->PsiType == type) { // drop all but occupied ones 964 m++; // increase m if it is occupied wave function 965 if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local? 966 LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo]; 967 else 968 LOnePsiB = NULL; 969 if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi 970 RecvSource = OnePsiB->my_color_comm_ST_Psi; 971 MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status ); 972 LPsiDatB=LevS->LPsi->TempPsi; 973 } else { // .. otherwise send it to all other processes (Max_me... - 1) 974 for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++) 975 if (p != OnePsiB->my_color_comm_ST_Psi) 976 MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT); 977 LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo]; 978 } // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received 979 980 if (LPsiDatA != NULL) { 981 double tmp = gsl_matrix_get(U,l,m); 982 g=0; 983 if (LevS->GArray[0].GSq == 0.0) { 984 LPsiDatA[g].re += LPsiDatB[g].re * tmp; 985 LPsiDatA[g].im += LPsiDatB[g].im * tmp; 986 g++; 987 } 988 for (; g < LevS->MaxG; g++) { 989 LPsiDatA[g].re += LPsiDatB[g].re * tmp; 990 LPsiDatA[g].im += LPsiDatB[g].im * tmp; 991 } 992 } 993 } 994 } 995 } 996 } 997 gsl_matrix_free(U); 998 999 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me); 1000 // now, as all wave functions are updated, swap the buffer 1001 l = -1; 1002 for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function 1003 if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { 1004 l++; 1005 if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo); 1006 LPsiDatA = (fftw_complex *)coeffs_buffer[l]; 1007 LPsiDatB = LevS->LPsi->LocalPsi[l]; 1008 for (g=0;g<LevS->MaxG;g++) { 1009 LPsiDatB[g].re = LPsiDatA[g].re; 1010 LPsiDatB[g].im = LPsiDatA[g].im; 1011 } 1012 // recalculating non-local form factors which are coefficient dependent! 1013 CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo); 1014 } 1015 } 1016 // and free allocated buffer memory 1017 Free(coeffs_buffer, "bla"); 1018 1019 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7\n",P->Par.me); 1020 // STEP 7: Compute Wannier centers, spread and printout 1021 // the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital 1741 //PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (after rot)"); 1742 } 1743 // and merry-go-round for the indices too 1744 //debug(P,"and merry-go-round for the indices too"); 1745 if (DiagData->AllocNum > 2) MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2); 1746 } 1747 1748 //if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me); 1749 // STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2 1750 old_spread = spread; 1751 spread = 0.; 1752 for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators 1753 for (i=0; i < DiagData->AllocNum; i++) { // go through all wave functions 1754 spread += pow(gsl_matrix_get(DiagData->A[k],i,i),2); 1755 } 1756 } 1757 if(P->Par.me == 0) { 1758 //if(P->Call.out[ReadOut]) 1759 // fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier); 1760 //else 1761 //fprintf(stderr,"%2.9e\n",spread); 1762 } 1763 // STEP 5: check change of variance 1764 } while (fabs(old_spread-spread) >= R->EpsWannier); 1765 // end of iterative diagonalization loop: We have found our final orthogonal U! 1766 Free(c, "SerialDiagonalization: c"); 1767 Free(s, "SerialDiagonalization: s"); 1768 // free eigenvector stuff 1769 gsl_vector_free(h); 1770 gsl_matrix_free(G); 1771 //gsl_eigen_symmv_free(w); 1772 gsl_vector_free(eval); 1773 gsl_matrix_free(evec); 1774 } 1775 1776 /** Writes the wannier centres and spread to file. 1777 * Also puts centres from array into OnePsiElementAddData structure. 1778 * \param *P Problem at hand 1779 * \param old_spread first term of wannier spread 1780 * \param spread second term of wannier spread 1781 * \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres 1782 * \param *WannierSpread array with wannier spread per wave function 1783 */ 1784 void WriteWannierFile(struct Problem *P, double spread, double old_spread, double **WannierCentre, double *WannierSpread) 1785 { 1786 struct FileData *F = &P->Files; 1787 struct RunStruct *R = &P->R; 1788 struct Lattice *Lat = &P->Lat; 1789 struct Psis *Psi = &Lat->Psi; 1790 struct OnePsiElement *OnePsiA; 1791 char spin[12], suffix[18]; 1792 int i,j,l; 1793 1022 1794 if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me); 1023 1795 1024 1796 switch (Lat->Psi.PsiST) { 1025 1797 case SpinDouble: 1026 str ncpy(suffix,".spread.csv",18);1027 str ncat(spin,"SpinDouble",12);1798 strcpy(suffix,".spread.csv"); 1799 strcpy(spin,"SpinDouble"); 1028 1800 break; 1029 1801 case SpinUp: 1030 str ncpy(suffix,".spread_up.csv",18);1031 str ncat(spin,"SpinUp",12);1802 strcpy(suffix,".spread_up.csv"); 1803 strcpy(spin,"SpinUp"); 1032 1804 break; 1033 1805 case SpinDown: 1034 str ncpy(suffix,".spread_down.csv",18);1035 str ncat(spin,"SpinDown",12);1806 strcpy(suffix,".spread_down.csv"); 1807 strcpy(spin,"SpinDown"); 1036 1808 break; 1037 1809 } … … 1042 1814 OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level 1043 1815 if (F->SpreadFile == NULL) { 1044 Error(SomeError," ComputeMLWF: Error opening Wannier File!\n");1816 Error(SomeError,"WriteWannierFile: Error opening Wannier File!\n"); 1045 1817 } else { 1046 1818 fprintf(F->SpreadFile,"#===== W A N N I E R C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin); 1047 1819 fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n"); 1048 1820 } 1049 }1050 old_spread = 0;1051 spread = 0;1052 i=-1;1053 for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions1054 OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA1055 if (OnePsiA->PsiType == type) { // drop all but occupied ones1056 i++; // increase l if it is occupied wave function1057 //fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i);1058 1059 // calculate Wannier Centre1060 for (j=0;j<NDIM;j++) {1061 WannierCentre[i][j] = sqrt(Lat->RealBasisSQ[j])/(2*PI) * GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j*2],i,i),gsl_matrix_get(A[j*2+1],i,i))));1062 if (WannierCentre[i][j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ1063 WannierCentre[i][j] = sqrt(Lat->RealBasisSQ[j]) + WannierCentre[i][j];1064 }1065 1066 // store orbital spread and centre in file1067 tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)1068 - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)1069 - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);1070 WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp;1071 //fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]);1072 //if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n",1073 //Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]);1074 //if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n",1075 //WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]);1076 1077 // gather all spreads1078 old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U)1079 for (k=0;k<max_operators;k++)1080 spread += pow(gsl_matrix_get(A[k],i,i),2);1081 1082 // store calculated Wannier centre1083 if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // is this local?1084 for (j=0;j<NDIM;j++)1085 Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];1086 }1087 }1088 1089 // join Wannier orbital to groups with common centres under certain conditions1090 switch (R->CommonWannier) {1091 case 4:1092 debug(P,"Shifting each Wannier centers to cell center");1093 for (i=0; i < Num; i++) { // go through all occupied wave functions1094 for (j=0;j<NDIM;j++)1095 WannierCentre[i][j] = sqrt(Lat->RealBasisSQ[j])/2.;1096 }1097 break;1098 case 3:1099 debug(P,"Shifting Wannier centers individually to nearest grid point");1100 for (i=0;i < Num; i++) { // go through all wave functions1101 for (j=0;j<NDIM;j++) {1102 tmp = WannierCentre[i][j]/sqrt(Lat->RealBasisSQ[j])*(double)N[j];1103 //fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp));1104 if (fabs((double)floor(tmp) - tmp) < fabs((double)ceil(tmp) - tmp))1105 WannierCentre[i][j] = (double)floor(tmp)/(double)N[j]*sqrt(Lat->RealBasisSQ[j]);1106 else1107 WannierCentre[i][j] = (double)ceil(tmp)/(double)N[j]*sqrt(Lat->RealBasisSQ[j]);1108 }1109 }1110 break;1111 case 2:1112 debug(P,"Combining individual orbitals according to spread.");1113 //fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me);1114 //debug(P,"finding partners");1115 marker = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: marker");1116 group = (int**) Malloc(sizeof(int *)*Num,"ComputeMLWF: group");1117 for (l=0;l<Num;l++) {1118 group[l] = (int*) Malloc(sizeof(int)*(Num+1),"ComputeMLWF: group[l]"); // each must group must have one more as end marker1119 for (k=0;k<=Num;k++)1120 group[l][k] = -1; // reset partner group1121 }1122 for (k=0;k<Num;k++)1123 partner[k] = 0;1124 //debug(P,"mem allocated");1125 // go for each orbital through every other, check distance against the sum of both spreads1126 // if smaller add to group of this orbital1127 for (l=0;l<Num;l++) {1128 j=0; // index for partner group1129 for (k=0;k<Num;k++) { // check this against l1130 Spread = 0.;1131 for (i=0;i<NDIM;i++) {1132 //fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]);1133 Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]);1134 }1135 Spread = sqrt(Spread); // distance in Spread1136 //fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]);1137 if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread1138 group[l][j++] = k; // add k to group of l1139 partner[l]++;1140 //fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l);1141 }1142 }1143 }1144 1145 // consistency, for each orbital check if this orbital is also in the group of each referred orbital1146 //debug(P,"checking consistency");1147 totalflag = 1;1148 for (l=0;l<Num;l++) // checking l's group1149 for (k=0;k<Num;k++) { // k is partner index1150 if (group[l][k] != -1) { // if current index k is a partner1151 flag = 0;1152 for(j=0;j<Num;j++) { // go through each entry in l partner's partner group if l exists1153 if ((group[ group[l][k] ][j] == l))1154 flag = 1;1155 }1156 //if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]);1157 if (totalflag == 1) totalflag = flag;1158 }1159 }1160 // for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres1161 //debug(P,"weight and calculate new centers for partner groups");1162 for (l=0;l<=Num;l++)1163 marker[l] = 1;1164 if (totalflag) {1165 for (l=0;l<Num;l++) { // go through each orbital1166 if (marker[l] != 0) { // if it hasn't been reweighted1167 marker[l] = 0;1168 for (i=0;i<NDIM;i++)1169 q[i] = 0.;1170 j = 0;1171 while (group[l][j] != -1) {1172 marker[group[l][j]] = 0;1173 for (i=0;i<NDIM;i++) {1174 //fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]);1175 q[i] += WannierCentre[ group[l][j] ][i];1176 }1177 j++;1178 }1179 //fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j);1180 for (i=0;i<NDIM;i++) {// weight by number of elements in partner group1181 q[i] /= (double)(j);1182 }1183 1184 // put WannierCentre into own and all partners'1185 for (i=0;i<NDIM;i++)1186 WannierCentre[l][i] = q[i];1187 j = 0;1188 while (group[l][j] != -1) {1189 for (i=0;i<NDIM;i++)1190 WannierCentre[group[l][j]][i] = q[i];1191 j++;1192 }1193 }1194 }1195 }1196 if (P->Call.out[StepLeaderOut]) {1197 fprintf(stderr,"Summary:\n");1198 fprintf(stderr,"========\n");1199 for (i=0;i<Num;i++)1200 fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]);1201 }1202 //debug(P,"done");1203 1204 Free(marker, "bla");1205 for (l=0;l<Num;l++)1206 Free(group[l], "bla");1207 Free(group, "bla");1208 break;1209 case 1:1210 debug(P,"Individual orbitals are changed to center of all.");1211 for (i=0;i<NDIM;i++) // zero center of weight1212 q[i] = 0.;1213 for (k=0;k<Num;k++)1214 for (i=0;i<NDIM;i++) { // sum up all orbitals each component1215 q[i] += WannierCentre[k][i];1216 }1217 for (i=0;i<NDIM;i++) // divide by number1218 q[i] /= Num;1219 for (k=0;k<Num;k++)1220 for (i=0;i<NDIM;i++) { // put into this function's array1221 WannierCentre[k][i] = q[i];1222 }1223 break;1224 case 0:1225 default:1226 break;1227 1821 } 1228 1822 … … 1244 1838 fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n"); 1245 1839 fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread); 1246 } 1247 fflush(F->SpreadFile); 1248 1249 // and the spread was calculated in the loop above 1250 /* 1251 i=-1; 1252 for (l=0;l<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;l++) 1253 if (Psi->AllPsiStatus[l].PsiType == type) { 1254 i++; 1255 spread = CalculateSpread(P,l); 1256 tmp = gsl_matrix_get(A[max_operators],i,i) - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2) 1257 - pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2) 1258 - pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2); 1259 if(P->Call.out[ValueOut]) fprintf(stderr, "(%i) Check of spread of %ith wave function: %lg against %lg\n",P->Par.me, i, Psi->AddData[i].WannierSpread, tmp); 1260 }*/ 1261 // free all remaining memory 1262 for (k=0;k<max_operators+1;k++) 1263 gsl_matrix_free(A[k]); 1264 } 1840 fflush(F->SpreadFile); 1841 } 1842 } 1843 1265 1844 1266 1845 /** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre. … … 1280 1859 double WannierCentre[NDIM+1]; // combined centre and spread 1281 1860 MPI_Status status; 1282 enum PsiTypeTag type = Occupied;1283 1861 int signal = 0; // 1 - ok, 0 - error 1284 1862 1285 1863 switch (Lat->Psi.PsiST) { 1286 1864 case SpinDouble: 1287 str ncpy(suffix,".spread.csv",18);1865 strcpy(suffix,".spread.csv"); 1288 1866 break; 1289 1867 case SpinUp: 1290 str ncpy(suffix,".spread_up.csv",18);1868 strcpy(suffix,".spread_up.csv"); 1291 1869 break; 1292 1870 case SpinDown: 1293 str ncpy(suffix,".spread_down.csv",18);1871 strcpy(suffix,".spread_down.csv"); 1294 1872 break; 1295 1873 } 1874 if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Parsing Wannier Centres from file ... \n", P->Par.me); 1296 1875 1297 1876 if (P->Par.me_comm_ST == 0) { … … 1354 1933 //return 0; 1355 1934 // and store 'em (for all who have this Psi local) 1356 fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]);1935 if ((P->Par.me == 0) && P->Call.out[ValueOut]) fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]); 1357 1936 for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j]; 1358 1937 Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM]; 1359 if (P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);1938 //if (P->Par.me ==0 && P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread); 1360 1939 } else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group 1361 1940 if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS) … … 1367 1946 if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) 1368 1947 fclose(SpreadFile); 1369 fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me);1948 //fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me); 1370 1949 return 1; 1371 1950 } … … 1408 1987 b_ij = 0; 1409 1988 1410 // create lookup table for sin/cos values 1411 cos_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *cos_lookup"); 1412 sin_lookup = (double **) Malloc(sizeof(double *)*NDIM, "ComputeMLWF: *sin_lookup"); 1413 for (k=0;k<NDIM;k++) { 1414 // allocate memory 1415 cos_lookup[k] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[k], "ComputeMLWF: cos_lookup"); 1416 sin_lookup[k] = (double *) Malloc(sizeof(double)*LevS->Plan0.plan->N[k], "ComputeMLWF: sin_lookup"); 1417 // reset arrays 1418 SetArrayToDouble0(cos_lookup[k],LevS->Plan0.plan->N[k]); 1419 SetArrayToDouble0(sin_lookup[k],LevS->Plan0.plan->N[k]); 1420 // create lookup values 1421 for (g=0;g<LevS->Plan0.plan->N[k];g++) { 1422 tmp = 2*PI/(double)LevS->Plan0.plan->N[k]*(double)g; 1423 cos_lookup[k][g] = cos(tmp); 1424 sin_lookup[k][g] = sin(tmp); 1425 } 1426 } 1989 CreateSinCosLookupTable(&cos_lookup,&sin_lookup,N); 1990 1427 1991 // fill matrices 1428 1992 OnePsiA = &Psi->AllPsiStatus[i]; // grab the desired OnePsiA … … 1499 2063 // store spread in OnePsiElementAdd 1500 2064 Psi->AddData[i].WannierSpread = B_ij - spread; 1501 // free lookups 1502 for (k=0;k<NDIM;k++) { 1503 Free(cos_lookup[k], "bla"); 1504 Free(sin_lookup[k], "bla"); 1505 } 1506 Free(cos_lookup, "bla"); 1507 Free(sin_lookup, "bla"); 1508 2065 2066 FreeSinCosLookupTable(cos_lookup,sin_lookup); 2067 1509 2068 return (B_ij - spread); 1510 2069 } -
pcp/src/wannier.h
r9bdd86 r4f1369 12 12 13 13 File: wannier.h 14 $Id: wannier.h,v 1. 8 2007/02/05 14:00:58 fooExp $14 $Id: wannier.h,v 1.3 2007-10-08 15:43:29 heber Exp $ 15 15 */ 16 16 17 17 #include <gsl/gsl_complex.h> 18 18 19 /** Structure contains all variables needed for diagonalization of a matrix with 20 * SerialDiagonalization() or ParallelDiagonalization() 21 */ 22 struct DiagonalizationData { 23 int Num; //!< Number of rows/columns 24 int AllocNum; //!< even number of rows/columns 25 int NumMatrices;//!< number of matrices to be simultaneously "actively" diagonalized 26 int extra; //!< number of additional matrices that are also, yet "passively" diagonalized (not considered in rotation angle evaluation) 27 gsl_matrix *U; //!< transformation matrix 28 gsl_matrix **A; //!< matrix to be diagonlized 29 int *top; //!< merry-go-round top row of indices 30 int *bot; //!< merry-go-round bottom row of indices 31 MPI_Comm *comm; //!< MPI communicator for ParallelDiagonalization() 32 int ProcRank; //!< Rank of this process, used in ParallelDiagonalization() 33 int ProcNum; //!< Number of process in communicator, used in ParallelDiagonalization() 34 }; 35 36 void PrintGSLMatrix(struct Problem *P, gsl_matrix *U, int Num, const char *msg); 19 37 void ComputeMLWF(struct Problem *P); 38 void WriteWannierFile(struct Problem *P, double spread, double old_spread, double **WannierCentre, double *WannierSpread); 20 39 int ParseWannierFile(struct Problem *P); 40 void SerialDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData); 41 void ParallelDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData); 21 42 double CalculateSpread(struct Problem *P, int i); 22 43 gsl_complex convertComplex (fftw_complex a); 44 void InitDiagonalization(struct Problem *P, struct DiagonalizationData *DiagData, int Num, int NumMatrices, int extra); 45 void FreeDiagonalization(struct DiagonalizationData *DiagData); 46 void OrthogonalizePsis(struct Problem *P); 47 void StrongOrthogonalizePsis(struct Problem *P); 48 void Diagonalize(struct Problem *P, struct DiagonalizationData *DiagData); 49 50 void CalculateSecondOrderReciprocalMoment(struct Problem *P); 23 51 24 52 #endif /*WANNIER_H_*/
Note:
See TracChangeset
for help on using the changeset viewer.