Context Navigation

wannier.c@ d3482a

Visit:

Last change on this file since d3482a was d3482a, checked in by Frederik Heber <heber@…>, 17 years ago

various suffix...[255] or filename[255] were changed from arrays to pointers

Instead of having fixed array length that are fully represented in the code switched to Malloc/Free.

Property mode set to 100644

File size: 93.5 KB

Line
1	/** \file wannier.c
2	* Maximally Localized Wannier Functions.
3	*
4	* Contains the on function that minimises the spread of all orbitals in one rush in a parallel
5	* Jacobi-Diagonalization implementation, ComputeMLWF(), and one routine CalculateSpread() to
6	* calculate the spread of a specific orbital, which may be useful in checking on the change of
7	* spread during other calculations. convertComplex() helps in typecasting fftw_complex to gsl_complex.
8	*
9	Project: ParallelCarParrinello
10	\author Frederik Heber
11	\date 2006
12
13	File: wannier.c
14	$Id: wannier.c,v 1.7 2007-10-12 15:50:38 heber Exp $
15	*/
16
17	#include <math.h>
18	#include <gsl/gsl_math.h>
19	#include <gsl/gsl_eigen.h>
20	#include <gsl/gsl_matrix.h>
21	#include <gsl/gsl_vector.h>
22	#include <gsl/gsl_complex.h>
23	#include <gsl/gsl_complex_math.h>
24	#include <gsl/gsl_sort_vector.h>
25	#include <gsl/gsl_heapsort.h>
26	#include <gsl/gsl_blas.h>
27	#include <string.h>
28
29	#include "data.h"
30	#include "density.h"
31	#include "errors.h"
32	#include "gramsch.h"
33	#include "helpers.h"
34	#include "init.h"
35	#include "myfft.h"
36	#include "mymath.h"
37	#include "output.h"
38	#include "perturbed.h"
39	#include "wannier.h"
40
41
42	#define max_operators NDIM*2 //!< number of chosen self-adjoint operators when evaluating the spread
43	#define type Occupied
44
45
46	/** Converts type fftw_complex to gsl_complex.
47	* \param a complex number
48	* \return b complex number
49	*/
50	gsl_complex convertComplex (fftw_complex a) {
51	return gsl_complex_rect(c_re(a),c_im(a));
52	}
53
54	/** "merry go round" implementation for parallel index ordering.
55	* Given two arrays, one for the upper/left matrix columns, one for the lower/right ones, one step of an index generation is
56	* performed which generates once each possible pairing.
57	* \param *top index array 1
58	* \param *bot index array 2
59	* \param m N/2, where N is the matrix row/column dimension
60	* \note taken from [Golub, Matrix computations, 1989, p451]
61	*/
62	void MerryGoRoundIndices(int top, int bot, int m)
63	{
64	int old_top, old_bot;
65	int k;
66	old_top = (int ) Malloc(sizeof(int)m, "music: old_top");
67	old_bot = (int ) Malloc(sizeof(int)m, "music: old_bot");
68	/* fprintf(stderr,"oldtop\t");
69	for (k=0;k<m;k++)
70	fprintf(stderr,"%i\t", top[k]);
71	fprintf(stderr,"\n");
72	fprintf(stderr,"oldbot\t");
73	for (k=0;k<m;k++)
74	fprintf(stderr,"%i\t", bot[k]);
75	fprintf(stderr,"\n");*/
76	// first copy arrays
77	for (k=0;k<m;k++) {
78	old_top[k] = top[k];
79	old_bot[k] = bot[k];
80	}
81	// then let the music play
82	for (k=0;k<m;k++) {
83	if (k==1)
84	top[k] = old_bot[0];
85	else if (k > 1)
86	top[k] = old_top[k-1];
87	if (k==m-1)
88	bot[k] = old_top[k];
89	else
90	bot[k] = old_bot[k+1];
91	}
92	/* fprintf(stderr,"top\t");
93	for (k=0;k<m;k++)
94	fprintf(stderr,"%i\t", top[k]);
95	fprintf(stderr,"\n");
96	fprintf(stderr,"bot\t");
97	for (k=0;k<m;k++)
98	fprintf(stderr,"%i\t", bot[k]);
99	fprintf(stderr,"\n");*/
100	// and finito
101	Free(old_top, "MerryGoRoundIndices: old_top");
102	Free(old_bot, "MerryGoRoundIndices: old_bot");
103	}
104
105	/** merry-go-round for matrix columns.
106	* The trick here is that we must be aware of multiple rotations per process, thus only some of the
107	* whole lot of local columns get sent/received, most of them are just shifted via exchanging the various
108	* pointers to the matrix columns within the local array.
109	* \param comm communicator for circulation
110	* \param *Aloc local array of columns
111	* \param Num entries per column
112	* \param max_rounds number of column pairs in \a *Around
113	* \param k offset for tag
114	* \param tagS0 MPI tag for sending left column
115	* \param tagS1 MPI tag for sending right column
116	* \param tagR0 MPI tag for receiving left column
117	* \param tagR1 MPI tag for receiving right column
118	*/
119	void MerryGoRoundColumns(MPI_Comm comm, double **Aloc, int Num, int max_rounds, int k, int tagS0, int tagS1, int tagR0, int tagR1) {
120	//double A_locS1, A_locS2; // local columns of A[k]
121	//double A_locR1, A_locR2; // local columns of A[k]
122	MPI_Request requestS0, requestS1, requestR0, requestR1;
123	MPI_Status status;
124	int ProcRank, ProcNum;
125	int l;
126	MPI_Comm_size (comm, &ProcNum);
127	MPI_Comm_rank (comm, &ProcRank);
128	double Abuffer1, Abuffer2; // mark the columns that are circulated
129
130	//fprintf(stderr,"shifting...");
131	if (ProcRank == 0) {
132	if (max_rounds > 1) {
133	// get last left column
134	Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
135	MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2*k, comm, &requestS0);
136	} else {
137	// get right column
138	Abuffer1 = Aloc[1]; // note down the free column
139	MPI_Isend(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, tagS1+2*k, comm, &requestS0);
140	}
141
142	//fprintf(stderr,"...left columns...");
143	for(l=2*max_rounds-2;l>2;l-=2) // left columns become shifted one place to the right
144	Aloc[l] = Aloc[l-2];
145
146	if (max_rounds > 1) {
147	//fprintf(stderr,"...first right...");
148	Aloc[2] = Aloc[1]; // get first right column
149	}
150
151	//fprintf(stderr,"...right columns...");
152	for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
153	Aloc[l] = Aloc[l+2];
154
155	//fprintf(stderr,"...last right...");
156	Aloc[(2*max_rounds-1)] = Abuffer1;
157	MPI_Irecv(Abuffer1, Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2*k, comm, &requestR1);
158
159	} else if (ProcRank == ProcNum-1) {
160	//fprintf(stderr,"...first right...");
161	// get first right column
162	Abuffer2 = Aloc[1]; // note down the free column
163	MPI_Isend(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
164
165	//fprintf(stderr,"...right columns...");
166	for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
167	Aloc[(l)] = Aloc[(l+2)];
168
169	//fprintf(stderr,"...last right...");
170	Aloc[(2max_rounds-1)] = Aloc[2(max_rounds-1)]; // Put last left into last right column
171
172	//fprintf(stderr,"...left columns...");
173	for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
174	Aloc[(l)] = Aloc[(l-2)];
175
176	//fprintf(stderr,"...first left...");
177	// if (max_rounds > 1)
178	Aloc[0] = Abuffer2; // get first left column
179	MPI_Irecv(Abuffer2, Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
180
181	} else {
182	// get last left column
183	MPI_Isend(Aloc[2(max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierALTag+2k, comm, &requestS0);
184	Abuffer1 = Aloc[2*(max_rounds-1)]; // note down the free column
185
186	//fprintf(stderr,"...first right...");
187	// get first right column
188	MPI_Isend(Aloc[1], Num, MPI_DOUBLE, ProcRank-1, WannierARTag+2*k, comm, &requestS1);
189	Abuffer2 = Aloc[1]; // note down the free column
190
191	//fprintf(stderr,"...left columns...");
192	for(l=2*(max_rounds-1);l>0;l-=2) // left columns become shifted one place to the right
193	Aloc[(l)] = Aloc[(l-2)];
194
195	//fprintf(stderr,"...right columns...");
196	for(l=1;l<2*max_rounds-1;l+=2) // right columns become shifted one place to the left
197	Aloc[(l)] = Aloc[(l+2)];
198
199	//fprintf(stderr,"...first left...");
200	Aloc[0] = Abuffer1; // get first left column
201	MPI_Irecv(Aloc[0], Num, MPI_DOUBLE, ProcRank-1, WannierALTag+2*k, comm, &requestR0);
202
203	//fprintf(stderr,"...last right...");
204	Aloc[(2*max_rounds-1)] = Abuffer2;
205	MPI_Irecv(Aloc[(2max_rounds-1)], Num, MPI_DOUBLE, ProcRank+1, WannierARTag+2k, comm, &requestR1);
206	}
207
208	//fprintf(stderr,"...waiting...");
209	if (ProcRank != ProcNum-1)
210	MPI_Wait(&requestS0, &status);
211	if (ProcRank != 0) // first left column
212	MPI_Wait(&requestR0, &status);
213	if (ProcRank != 0)
214	MPI_Wait(&requestS1, &status);
215	if (ProcRank != ProcNum-1)
216	MPI_Wait(&requestR1, &status);
217	//fprintf(stderr,"...done\n");
218	}
219
220	/** By testing of greatest common divisor of the matrix rows (\a AllocNum) finds suitable parallel cpu group.
221	* \param *P Problem at hand
222	* \param AllocNum number of rows in matrix
223	* \return address of MPI communicator
224	*/
225	#ifdef HAVE_INLINE
226	inline MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
227	#else
228	MPI_Comm * DetermineParallelGroupbyGCD (struct Problem *P, int AllocNum)
229	#endif
230	{
231	MPI_Comm *comm = &P->Par.comm_ST;
232
233	//if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Comparing groups - AllocNum %i --- All %i\t Psi %i\t PsiT %i\n",P->Par.me, AllocNum, P->Par.Max_me_comm_ST, P->Par.Max_me_comm_ST_Psi, P->Par.Max_my_color_comm_ST_Psi);
234	//if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) Jacobi diagonalization is done parallely by ", P->Par.me);
235	if (AllocNum % (P->Par.Max_me_comm_ST*2) == 0) { // all parallel
236	comm = &P->Par.comm_ST;
237	//if (P->Call.out[ReadOut]) fprintf(stderr,"all\n");
238	} else if (P->Par.Max_me_comm_ST_Psi >= P->Par.Max_my_color_comm_ST_Psi) { // always the bigger group comes first
239	if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
240	comm = &P->Par.comm_ST_Psi;
241	//if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
242	} else if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
243	comm = &P->Par.comm_ST_PsiT;
244	//if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
245	}
246	} else {
247	if (AllocNum % (P->Par.Max_my_color_comm_ST_Psi*2) == 0) { // Psis parallel
248	comm = &P->Par.comm_ST_PsiT;
249	//if (P->Call.out[ReadOut]) fprintf(stderr,"PsiT\n");
250	} else if (AllocNum % (P->Par.Max_me_comm_ST_Psi*2) == 0) { // coefficients parallel
251	comm = &P->Par.comm_ST_Psi;
252	//if (P->Call.out[ReadOut]) fprintf(stderr,"Psi\n");
253	}
254	}
255	return comm;
256	}
257
258	/** Allocates and fills Lookup table for sin/cos values at each grid node.
259	* \param ***cos_table pointer to two-dimensional lookup table for cosine values
260	* \param ***sin_table pointer to two-dimensional lookup table for sine values
261	* \param *N array with number of nodes per \a NDIM axis
262	*/
263	void CreateSinCosLookupTable(double *cos_table, double sin_table, int N)
264	{
265	int i, j;
266	double argument;
267	double cos_lookup, sin_lookup;
268
269	// create lookup table for sin/cos values
270	cos_lookup = (double *) Malloc(sizeof(double )NDIM, "ComputeMLWF: cos_lookup");
271	sin_lookup = (double *) Malloc(sizeof(double )NDIM, "ComputeMLWF: sin_lookup");
272	for (i=0;i<NDIM;i++) {
273	// allocate memory
274	cos_lookup[i] = (double ) Malloc(sizeof(double)N[i], "ComputeMLWF: cos_lookup");
275	sin_lookup[i] = (double ) Malloc(sizeof(double)N[i], "ComputeMLWF: sin_lookup");
276
277	// reset arrays
278	SetArrayToDouble0(cos_lookup[i],N[i]);
279	SetArrayToDouble0(sin_lookup[i],N[i]);
280
281	// create lookup values
282	for (j=0;j<N[i];j++) {
283	argument = 2PI/(double)N[i](double)j;
284	cos_lookup[i][j] = cos(argument);
285	sin_lookup[i][j] = sin(argument);
286	}
287	}
288	*cos_table = cos_lookup;
289	*sin_table = sin_lookup;
290	}
291
292	/** Frees memory allocated during CreateSinCosLookupTable().
293	* \param ***cos_lookup pointer to two-dimensional lookup table for cosine values
294	* \param ***sin_lookup pointer to two-dimensional lookup table for sine values
295	*/
296	void FreeSinCosLookupTable(double cos_lookup, double sin_lookup)
297	{
298	int i;
299	// free lookups
300	for (i=0;i<NDIM;i++) {
301	Free(cos_lookup[i], "FreeSinCosLookupTable: cos_lookup[i]");
302	Free(sin_lookup[i], "FreeSinCosLookupTable: sin_lookup[i]");
303	}
304	Free(cos_lookup, "FreeSinCosLookupTable: cos_lookup");
305	Free(sin_lookup, "FreeSinCosLookupTable: sin_lookup");
306	}
307
308	/** Fills the entries of the six variance matrices.
309	* These matrices are parallely diagonalized during Wannier Localization. They are calculated from the
310	* wave function and by diagonalization one obtains the unitary transformation with which the Psis are
311	* treated afterwards.
312	* \param *P Problem at hand
313	* \param AllocNum number of rows/columns
314	* \param **A pointer to variance matrices
315	* \sa ComputeMLWF() - master function.
316	*/
317	void FillHigherOrderRealMomentsMatrices(struct Problem P, int AllocNum, gsl_matrix *A)
318	{
319	struct Lattice *Lat = &P->Lat;
320	struct RunStruct *R = &P->R;
321	struct Psis *Psi = &Lat->Psi;
322	struct LatticeLevel *Lev0 = R->Lev0;
323	struct LatticeLevel *LevS = R->LevS;
324	struct Density *Dens0 = Lev0->Dens;
325	struct OnePsiElement OnePsiA, OnePsiB, *LOnePsiB;
326	struct fft_plan_3d *plan = Lat->plan;
327	fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
328	fftw_real PsiCR = (fftw_real )PsiC;
329	fftw_complex *work = Dens0->DensityCArray[Temp2Density];
330	fftw_real **HGcR = &Dens0->DensityArray[HGDensity]; // use HGDensity, 4x Gap..Density, TempDensity as a storage array
331	fftw_complex HGcRC = (fftw_complex)HGcR;
332	fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
333	fftw_real HGcR2 = (fftw_real)HGcR2C;
334	int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
335	MPI_Status status;
336	fftw_complex LPsiDatA=NULL, LPsiDatB=NULL;
337	int n[NDIM],n0,i0,iS, Index;
338	int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
339	int N0 = LevS->Plan0.plan->local_nx;
340	int *N = LevS->Plan0.plan->N;
341	const int NUpx = LevS->NUp[0];
342	const int NUpy = LevS->NUp[1];
343	const int NUpz = LevS->NUp[2];
344	double argument, PsiAtNode;
345	int e,g,i,j,k,l,m,p,u;
346	double a_ij = 0, b_ij = 0, A_ij = 0, B_ij = 0;
347	double cos_lookup = NULL,sin_lookup = NULL;
348
349	if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 2\n",P->Par.me);
350
351	debug(P, "Creating Lookup Table");
352	CreateSinCosLookupTable(&cos_lookup, &sin_lookup, N);
353
354	debug(P, "Calculating each entry of variance matrices");
355	l=-1; // to access U matrix element (0..Num-1)
356	// fill the matrices
357	for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
358	OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
359	if (OnePsiA->PsiType == type) { // drop all but occupied ones
360	l++; // increase l if it is non-extra wave function
361	if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
362	LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
363	else
364	LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
365
366	//fprintf(stderr,"(%i),(%i,%i): fft'd, A[..] and B, back-fft'd acting on \\phi_A\n",P->Par.me,l,0);
367	if (LPsiDatA != NULL) {
368	CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
369	// note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
370	for (n0=0;n0<N0;n0++)
371	for (n[1]=0;n[1]<N[1];n[1]++)
372	for (n[2]=0;n[2]<N[2];n[2]++) {
373	i0 = n[2]NUpz+N[2]NUpz(n[1]NUpy+N[1]NUpyn0*NUpx);
374	iS = n[2]+N[2](n[1]+N[1]n0);
375	n[0] = n0 + LevS->Plan0.plan->start_nx;
376	for (k=0;k<max_operators;k+=2) {
377	e = k/2;
378	argument = 2.PI/(double)(N[e])(double)(n[e]);
379	PsiAtNode = PsiCR[i0] /LevS->MaxN;
380	// check lookup
381	if (!l) // perform check on first wave function only
382	if ((fabs(cos(argument) - cos_lookup[e][n[e]]) > MYEPSILON) \|\| (fabs(sin(argument) - sin_lookup[e][n[e]]) > MYEPSILON)) {
383	Error(SomeError, "Lookup table does not match real value!");
384	}
385	HGcR[k][iS] = cos_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
386	HGcR2[k][iS] = cos_lookup[e][n[e]] * HGcR[k][iS]; /* Matrix Vector Mult */
387	HGcR[k+1][iS] = sin_lookup[e][n[e]] * PsiAtNode; /* Matrix Vector Mult */
388	HGcR2[k+1][iS] = sin_lookup[e][n[e]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
389	}
390	}
391	for (u=0;u<max_operators;u++) {
392	fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[u], work);
393	fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[u], work);
394	}
395	}
396	m = -1; // to access U matrix element (0..Num-1)
397	for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
398	OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
399	if (OnePsiB->PsiType == type) { // drop all but occupied ones
400	m++; // increase m if it is non-extra wave function
401	if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
402	LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
403	else
404	LOnePsiB = NULL;
405	if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
406	RecvSource = OnePsiB->my_color_comm_ST_Psi;
407	MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
408	LPsiDatB=LevS->LPsi->TempPsi;
409	} else { // .. otherwise send it to all other processes (Max_me... - 1)
410	for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
411	if (p != OnePsiB->my_color_comm_ST_Psi)
412	MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
413	LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
414	} // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
415
416	for (u=0;u<max_operators;u++) {
417	a_ij = 0;
418	b_ij = 0;
419	if (LPsiDatA != NULL) { // calculate, reduce and send to all ...
420	//fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,u);
421	g=0;
422	if (LevS->GArray[0].GSq == 0.0) {
423	Index = LevS->GArray[g].Index;
424	a_ij = (LPsiDatB[0].reHGcRC[u][Index].re + LPsiDatB[0].imHGcRC[u][Index].im);
425	b_ij = (LPsiDatB[0].reHGcR2C[u][Index].re + LPsiDatB[0].imHGcR2C[u][Index].im);
426	g++;
427	}
428	for (; g < LevS->MaxG; g++) {
429	Index = LevS->GArray[g].Index;
430	a_ij += 2(LPsiDatB[g].reHGcRC[u][Index].re + LPsiDatB[g].im*HGcRC[u][Index].im);
431	b_ij += 2(LPsiDatB[g].reHGcR2C[u][Index].re + LPsiDatB[g].im*HGcR2C[u][Index].im);
432	} // due to the symmetry the resulting matrix element is real and symmetric in (i,j) ! (complex multiplication simplifies ...)
433	// sum up elements from all coefficients sharing processes
434	MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
435	MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
436	a_ij = A_ij;
437	b_ij = B_ij;
438	// send element to all Psi-sharing who don't have l local (MPI_Send is a lot slower than AllReduce!)
439	MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
440	MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
441	} else { // receive ...
442	MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
443	MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_PsiT);
444	}
445	// ... and store
446	//fprintf(stderr,"(%i),(%i,%i): A[%i]: setting component (local: %lg, total: %lg)\n",P->Par.me, l,m,u,a_ij,A_ij);
447	//fprintf(stderr,"(%i),(%i,%i): B: adding upon component (local: %lg, total: %lg)\n",P->Par.me, l,m,b_ij,B_ij);
448	gsl_matrix_set(A[u], l, m, A_ij);
449	gsl_matrix_set(A[max_operators], l, m, B_ij + gsl_matrix_get(A[max_operators],l,m));
450	}
451	}
452	}
453	}
454	}
455	// reset extra entries
456	for (u=0;u<=max_operators;u++) {
457	for (i=Num;i<AllocNum;i++)
458	for (j=0;j<AllocNum;j++)
459	gsl_matrix_set(A[u], i,j, 0.);
460	for (i=Num;i<AllocNum;i++)
461	for (j=0;j<AllocNum;j++)
462	gsl_matrix_set(A[u], j,i, 0.);
463	}
464	FreeSinCosLookupTable(cos_lookup, sin_lookup);
465
466	/*// print A matrices for debug
467	if (P->Par.me == 0)
468	for (u=0;u<max_operators+1;u++) {
469	fprintf(stderr, "A[%i] = \n",u);
470	for (i=0;i<Num;i++) {
471	for (j=0;j<Num;j++)
472	fprintf(stderr, "%e\t",gsl_matrix_get(A[u],i,j));
473	fprintf(stderr, "\n");
474	}
475	}
476	*/
477	}
478
479	/** Calculates reciprocal second order moments of each PsiType#Occupied orbital.
480	* First order is zero due to wave function being real (symmetry condition).
481	* \param *P Problem at hand
482	*/
483	void CalculateSecondOrderReciprocalMoment(struct Problem *P)
484	{
485	struct Lattice *Lat = &P->Lat;
486	struct RunStruct *R = &P->R;
487	struct FileData *F = &P->Files;
488	struct Psis *Psi = &Lat->Psi;
489	struct LatticeLevel *LevS = R->LevS;
490	double result, Result;
491	fftw_complex *LPsiDatA=NULL;
492	struct OnePsiElement *OnePsiA;
493	int i,j,l,g;
494	char spin[12], suffix[18];
495
496	switch (Lat->Psi.PsiST) {
497	case SpinDouble:
498	strcpy(suffix,".recispread.csv");
499	strcpy(spin,"SpinDouble");
500	break;
501	case SpinUp:
502	strcpy(suffix,".recispread_up.csv");
503	strcpy(spin,"SpinUp");
504	break;
505	case SpinDown:
506	strcpy(suffix,".recispread_down.csv");
507	strcpy(spin,"SpinDown");
508	break;
509	}
510	if(P->Par.me_comm_ST == 0) {
511	if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Calculating reciprocal moments ...\n",P->Par.me);
512	if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
513	OpenFile(P, &F->ReciSpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
514	else if (F->ReciSpreadFile == NULL) // re-op,18en if not first level and not opened yet (or closed from ParseWannierFile)
515	OpenFile(P, &F->ReciSpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
516	if (F->ReciSpreadFile == NULL) {
517	Error(SomeError,"ComputeMLWF: Error opening Reciprocal spread File!\n");
518	} else {
519	fprintf(F->ReciSpreadFile,"===== Reciprocal Spreads of type %s ==========================================================================\n", spin);
520	}
521	}
522
523	// integrate second order moment
524	for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
525	OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
526	if (OnePsiA->PsiType == type) { // drop all but occupied ones
527	if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
528	LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
529	else
530	LPsiDatA = NULL;
531
532	if (LPsiDatA != NULL) {
533	if (P->Par.me_comm_ST == 0)
534	fprintf(F->ReciSpreadFile,"Psi%d_Lev%d\t", Psi->AllPsiStatus[l].MyGlobalNo, R->LevSNo);
535	for (i=0;i<NDIM;i++) {
536	for (j=0;j<NDIM;j++) {
537	result = 0.;
538	g = 0;
539	if (LevS->GArray[0].GSq == 0.0) {
540	result += LevS->GArray[g].G[i]LevS->GArray[g].G[j](LPsiDatA[0].re * LPsiDatA[0].re);
541	g++;
542	}
543	for (;g<LevS->MaxG;g++)
544	result += LevS->GArray[g].G[i]LevS->GArray[g].G[j]2.(LPsiDatA[g].re LPsiDatA[g].re + LPsiDatA[g].im * LPsiDatA[g].im);
545	//result *= Lat->Volume/LevS->MaxG;
546	MPI_Allreduce ( &result, &Result, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
547	if (P->Par.me_comm_ST == 0)
548	fprintf(F->ReciSpreadFile,"%2.5lg ", Result);
549	}
550	}
551	if (P->Par.me_comm_ST == 0)
552	fprintf(F->ReciSpreadFile,"\n");
553	}
554	}
555	}
556	if(P->Par.me_comm_ST == 0) {
557	fprintf(F->ReciSpreadFile,"====================================================================================================================\n\n");
558	fflush(F->ReciSpreadFile);
559	}
560	}
561
562	/** Given the unitary matrix the transformation is performed on the Psis.
563	* \param *P Problem at hand
564	* \param *U gsl matrix containing the transformation matrix
565	* \param Num dimension parameter for the matrix, i.e. number of wave functions
566	*/
567	void UnitaryTransformationOnWavefunctions(struct Problem P, gsl_matrix U, int Num)
568	{
569	struct Lattice *Lat = &P->Lat;
570	struct RunStruct *R = &P->R;
571	struct Psis *Psi = &Lat->Psi;
572	struct LatticeLevel *LevS = R->LevS;
573	MPI_Status status;
574	struct OnePsiElement OnePsiB, OnePsiA, *LOnePsiB;
575	int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
576	fftw_complex LPsiDatA=NULL, LPsiDatB=NULL;
577	int g,i,j,l,k,m,p;
578
579	//if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation of all wave functions according to U\n",P->Par.me);
580
581	Num = Psi->TypeStartIndex[type+1] - Psi->TypeStartIndex[type]; // recalc Num as we can only work with local Psis from here
582	fftw_complex *coeffs_buffer = Malloc(sizeof(fftw_complex )Num, "ComputeMLWF: *coeffs_buffer");
583
584	for (l=0;l<Num;l++) // allocate for each local wave function
585	coeffs_buffer[l] = LevS->LPsi->OldLocalPsi[l];
586
587	//if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 6: Transformation ...\n",P->Par.me);
588	l=-1; // to access U matrix element (0..Num-1)
589	k=-1; // to access the above swap coeffs_buffer (0..LocalNo-1)
590	for (i=0; i < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; i++) { // go through all wave functions
591	OnePsiA = &Psi->AllPsiStatus[i]; // grab OnePsiA
592	if (OnePsiA->PsiType == type) { // drop all but occupied ones
593	l++; // increase l if it is occupied wave function
594	if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // local?
595	k++; // increase k only if it is a local, non-extra orbital wave function
596	LPsiDatA = (fftw_complex *) coeffs_buffer[k]; // new coeffs first go to copy buffer, old ones must not be overwritten yet
597	SetArrayToDouble0((double )LPsiDatA, 2LevS->MaxG); // zero buffer part
598	} else
599	LPsiDatA = NULL; // otherwise processes won't enter second loop, though they're supposed to send coefficients!
600
601	m = -1; // to access U matrix element (0..Num-1)
602	for (j=0; j < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; j++) { // go through all wave functions
603	OnePsiB = &Psi->AllPsiStatus[j]; // grab OnePsiB
604	if (OnePsiB->PsiType == type) { // drop all but occupied ones
605	m++; // increase m if it is occupied wave function
606	if (OnePsiB->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
607	LOnePsiB = &Psi->LocalPsiStatus[OnePsiB->MyLocalNo];
608	else
609	LOnePsiB = NULL;
610	if (LOnePsiB == NULL) { // if it's not local ... receive it from respective process into TempPsi
611	RecvSource = OnePsiB->my_color_comm_ST_Psi;
612	MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag2, P->Par.comm_ST_PsiT, &status );
613	LPsiDatB=LevS->LPsi->TempPsi;
614	} else { // .. otherwise send it to all other processes (Max_me... - 1)
615	for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
616	if (p != OnePsiB->my_color_comm_ST_Psi)
617	MPI_Send( LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag2, P->Par.comm_ST_PsiT);
618	LPsiDatB=LevS->LPsi->LocalPsi[OnePsiB->MyLocalNo];
619	} // LPsiDatB is now set to the coefficients of OnePsi either stored or MPI_Received
620
621	if (LPsiDatA != NULL) {
622	double tmp = gsl_matrix_get(U,l,m);
623	g=0;
624	if (LevS->GArray[0].GSq == 0.0) {
625	LPsiDatA[g].re += LPsiDatB[g].re * tmp;
626	LPsiDatA[g].im += LPsiDatB[g].im * tmp;
627	g++;
628	}
629	for (; g < LevS->MaxG; g++) {
630	LPsiDatA[g].re += LPsiDatB[g].re * tmp;
631	LPsiDatA[g].im += LPsiDatB[g].im * tmp;
632	}
633	}
634	}
635	}
636	}
637	}
638
639	//if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) STEP 6: Swapping buffer mem\n",P->Par.me);
640	// now, as all wave functions are updated, swap the buffer
641	l = -1;
642	for (k=0;k<Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT;k++) { // go through each local occupied wave function
643	if (Psi->AllPsiStatus[k].PsiType == type && Psi->AllPsiStatus[k].my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {
644	l++;
645	//if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) (k:%i,l:%i) LocalNo = (%i,%i)\t AllPsiNo = (%i,%i)\n", P->Par.me, k,l,Psi->LocalPsiStatus[l].MyLocalNo, Psi->LocalPsiStatus[l].MyGlobalNo, Psi->AllPsiStatus[k].MyLocalNo, Psi->AllPsiStatus[k].MyGlobalNo);
646	LPsiDatA = (fftw_complex *)coeffs_buffer[l];
647	LPsiDatB = LevS->LPsi->LocalPsi[l];
648	for (g=0;g<LevS->MaxG;g++) {
649	LPsiDatB[g].re = LPsiDatA[g].re;
650	LPsiDatB[g].im = LPsiDatA[g].im;
651	}
652	// recalculating non-local form factors which are coefficient dependent!
653	CalculateNonLocalEnergyNoRT(P, Psi->LocalPsiStatus[l].MyLocalNo);
654	}
655	}
656	// and free allocated buffer memory
657	Free(coeffs_buffer, "UnitaryTransformationOnWavefunctions: coeffs_buffer");
658	}
659
660	/** Changes Wannier Centres according to RunStruct#CommonWannier.
661	* \param *P Problem at hand.
662	* \param Num number of Psis
663	* \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
664	* \param *WannierSpread array with wannier spread per wave function
665	*/
666	void ChangeWannierCentres(struct Problem P, int Num, double WannierCentre, double WannierSpread)
667	{
668	struct RunStruct *R = &P->R;
669	struct Lattice *Lat = &P->Lat;
670	struct LatticeLevel *LevS = R->LevS;
671	int marker, *group;
672	int partner[Num];
673	int i,j,l,k;
674	int totalflag, flag;
675	double q[NDIM], center[NDIM];
676	double Spread;
677	int *N = LevS->Plan0.plan->N;
678
679	switch (R->CommonWannier) {
680	case 4:
681	debug(P,"Shifting each Wannier centers to cell center");
682	for (j=0;j<NDIM;j++) // center point in [0,1]^3
683	center[j] = 0.5;
684	RMat33Vec3(q,Lat->RealBasis, center); // transform to real coordinates
685	for (i=0; i < Num; i++) { // go through all occupied wave functions
686	for (j=0;j<NDIM;j++) // put into Wannier centres
687	WannierCentre[i][j] = q[j];
688	}
689	break;
690	case 3:
691	debug(P,"Shifting Wannier centers individually to nearest grid point");
692	for (i=0;i < Num; i++) { // go through all wave functions
693	RMat33Vec3(q, Lat->ReciBasis, WannierCentre[i]);
694	for (j=0;j<NDIM;j++) { // Recibasis is not true inverse but times 2.*PI
695	q[j] = (double)N[j]/(2.PI);
696
697	//fprintf(stderr,"(%i) N[%i]: %i\t tmp %e\t floor %e\t ceil %e\n",P->Par.me, j, N[j], tmp, floor(tmp), ceil(tmp));
698	if (fabs((double)floor(q[j]) - q[j]) < fabs((double)ceil(q[j]) - q[j]))
699	q[j] = floor(q[j])/(double)N[j];
700	else
701	q[j] = ceil(q[j])/(double)N[j];
702	}
703	RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
704	}
705	break;
706	case 2:
707	debug(P,"Combining individual orbitals according to spread.");
708	//fprintf(stderr,"(%i) Finding multiple bindings and Reweighting Wannier centres\n",P->Par.me);
709	debug(P,"finding partners");
710	marker = (int) Malloc(sizeof(int)(Num+1),"ComputeMLWF: marker");
711	group = (int*) Malloc(sizeof(int )*Num,"ComputeMLWF: group");
712	for (l=0;l<Num;l++) {
713	group[l] = (int) Malloc(sizeof(int)(Num+1),"ComputeMLWF: group[l]"); // each must group must have one more as end marker
714	for (k=0;k<=Num;k++)
715	group[l][k] = -1; // reset partner group
716	}
717	for (k=0;k<Num;k++)
718	partner[k] = 0;
719	debug(P,"mem allocated");
720	// go for each orbital through every other, check distance against the sum of both spreads
721	// if smaller add to group of this orbital
722	for (l=0;l<Num;l++) {
723	j=0; // index for partner group
724	for (k=0;k<Num;k++) { // check this against l
725	Spread = 0.;
726	for (i=0;i<NDIM;i++) {
727	//fprintf(stderr,"(%i) Spread += (%e - %e)^2 \n", P->Par.me, WannierCentre[l][i], WannierCentre[k][i]);
728	Spread += (WannierCentre[l][i] - WannierCentre[k][i])*(WannierCentre[l][i] - WannierCentre[k][i]);
729	}
730	Spread = sqrt(Spread); // distance in Spread
731	//fprintf(stderr,"(%i) %i to %i: distance %e, SpreadSum = %e + %e = %e \n", P->Par.me, l, k, Spread, WannierSpread[l], WannierSpread[k], WannierSpread[l]+WannierSpread[k]);
732	if (Spread < 1.5*(WannierSpread[l]+WannierSpread[k])) {// if distance smaller than sum of spread
733	group[l][j++] = k; // add k to group of l
734	partner[l]++;
735	//fprintf(stderr,"(%i) %i added as %i-th member to %i's group.\n", P->Par.me, k, j, l);
736	}
737	}
738	}
739
740	// consistency, for each orbital check if this orbital is also in the group of each referred orbital
741	debug(P,"checking consistency");
742	totalflag = 1;
743	for (l=0;l<Num;l++) // checking l's group
744	for (k=0;k<Num;k++) { // k is partner index
745	if (group[l][k] != -1) { // if current index k is a partner
746	flag = 0;
747	for(j=0;j<Num;j++) { // go through each entry in l partner's partner group if l exists
748	if ((group[ group[l][k] ][j] == l))
749	flag = 1;
750	}
751	//if (flag == 0) fprintf(stderr, "(%i) in %i's group %i is referred as a partner, but not the other way round!\n", P->Par.me, l, group[l][k]);
752	if (totalflag == 1) totalflag = flag;
753	}
754	}
755	// for each orbital group (marker group) weight each center to a total and put this into the local WannierCentres
756	debug(P,"weight and calculate new centers for partner groups");
757	for (l=0;l<=Num;l++)
758	marker[l] = 1;
759	if (totalflag) {
760	for (l=0;l<Num;l++) { // go through each orbital
761	if (marker[l] != 0) { // if it hasn't been reweighted
762	marker[l] = 0;
763	for (i=0;i<NDIM;i++)
764	q[i] = 0.;
765	j = 0;
766	while (group[l][j] != -1) {
767	marker[group[l][j]] = 0;
768	for (i=0;i<NDIM;i++) {
769	//fprintf(stderr,"(%i) Adding to %i's group, %i entry of %i: %e\n", P->Par.me, l, i, group[l][j], WannierCentre[ group[l][j] ][i]);
770	q[i] += WannierCentre[ group[l][j] ][i];
771	}
772	j++;
773	}
774	//fprintf(stderr,"(%i) %i's group: (%e,%e,%e)/%i = (%e,%e,%e)\n", P->Par.me, l, q[0], q[1], q[2], j, q[0]/(double)j, q[1]/(double)j, q[2]/(double)j);
775	for (i=0;i<NDIM;i++) {// weight by number of elements in partner group
776	q[i] /= (double)(j);
777	}
778
779	// put WannierCentre into own and all partners'
780	for (i=0;i<NDIM;i++)
781	WannierCentre[l][i] = q[i];
782	j = 0;
783	while (group[l][j] != -1) {
784	for (i=0;i<NDIM;i++)
785	WannierCentre[group[l][j]][i] = q[i];
786	j++;
787	}
788	}
789	}
790	}
791	if (P->Call.out[StepLeaderOut]) {
792	fprintf(stderr,"Summary:\n");
793	fprintf(stderr,"========\n");
794	for (i=0;i<Num;i++)
795	fprintf(stderr,"%i belongs to a %i-ple binding.\n",i,partner[i]);
796	}
797	debug(P,"done");
798
799	Free(marker, "ChangeWannierCentres: marker");
800	for (l=0;l<Num;l++)
801	Free(group[l], "ChangeWannierCentres: group[l]");
802	Free(group, "ChangeWannierCentres: group");
803	break;
804	case 1:
805	debug(P,"Individual orbitals are changed to center of all.");
806	for (i=0;i<NDIM;i++) // zero center of weight
807	q[i] = 0.;
808	for (k=0;k<Num;k++)
809	for (i=0;i<NDIM;i++) { // sum up all orbitals each component
810	q[i] += WannierCentre[k][i];
811	}
812	for (i=0;i<NDIM;i++) // divide by number
813	q[i] /= Num;
814	for (k=0;k<Num;k++)
815	for (i=0;i<NDIM;i++) { // put into this function's array
816	WannierCentre[k][i] = q[i];
817	}
818	break;
819	case 0:
820	default:
821	break;
822	}
823	}
824
825	/** From the entries of the variance matrices the spread is calculated.
826	* WannierCentres are evaluated according to Resta operator: \f$\langle X \rangle = \frac{L}{2\pi} \sum_j {\cal I} \ln{\langle \Psi_j \| \exp{(i \frac{2\pi}{L} X)} \| \Psi_j \rangle}\f$
827	* WannierSpread is: \f$ \sum_j \langle \Psi_j \| r^2 \| \Psi_j \rangle - \langle \Psi_j \| r \| psi_j \rangle^2\f$
828	* \param *P Problem at hand
829	* \param *A variance matrices
830	* \param old_spread first term of wannier spread
831	* \param spread second term of wannier spread
832	* \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
833	* \param *WannierSpread array with wannier spread per wave function
834	*/
835	void ComputeWannierCentresfromVarianceMatrices(struct Problem P, gsl_matrix A, double spread, double old_spread, double WannierCentre, double WannierSpread)
836	{
837	struct Lattice *Lat = &P->Lat;
838	struct Psis *Psi = &Lat->Psi;
839	struct OnePsiElement *OnePsiA;
840	int i,j,k,l;
841	double tmp, q[NDIM];
842
843	*old_spread = 0;
844	*spread = 0;
845
846	// the spread for x,y,z resides in the respective diagonal element of A_.. for each orbital
847	i=-1;
848	for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
849	OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
850	if (OnePsiA->PsiType == type) { // drop all but occupied ones
851	i++; // increase l if it is occupied wave function
852	//fprintf(stderr,"(%i) Wannier for %i\n", P->Par.me, i);
853
854	// calculate Wannier Centre
855	for (j=0;j<NDIM;j++) {
856	q[j] = 1./(2.PI) GSL_IMAG( gsl_complex_log( gsl_complex_rect(gsl_matrix_get(A[j2],i,i),gsl_matrix_get(A[j2+1],i,i))));
857	if (q[j] < 0) // change wrap around of above operator to smooth 0...Lat->RealBasisSQ
858	q[j] += 1.;
859	}
860	RMat33Vec3(WannierCentre[i], Lat->RealBasis, q);
861
862	// store orbital spread and centre in file
863	tmp = - pow(gsl_matrix_get(A[0],i,i),2) - pow(gsl_matrix_get(A[1],i,i),2)
864	- pow(gsl_matrix_get(A[2],i,i),2) - pow(gsl_matrix_get(A[3],i,i),2)
865	- pow(gsl_matrix_get(A[4],i,i),2) - pow(gsl_matrix_get(A[5],i,i),2);
866	WannierSpread[i] = gsl_matrix_get(A[max_operators],i,i) + tmp;
867	//fprintf(stderr,"(%i) WannierSpread[%i] = %e\n", P->Par.me, i, WannierSpread[i]);
868	//if (P->Par.me == 0) fprintf(F->SpreadFile,"Orbital %d:\t Wannier center (x,y,z)=(%lg,%lg,%lg)\t Spread sigma^2 = %lg - %lg = %lg\n",
869	//Psi->AllPsiStatus[i].MyGlobalNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], gsl_matrix_get(A[max_operators],i,i), -tmp, WannierSpread[i]);
870	//if (P->Par.me == 0) fprintf(F->SpreadFile,"%e\t%e\t%e\n",
871	//WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2]);
872
873	// gather all spreads
874	*old_spread += gsl_matrix_get(A[max_operators],i,i); // tr(U^H B U)
875	for (k=0;k<max_operators;k++)
876	*spread += pow(gsl_matrix_get(A[k],i,i),2);
877	}
878	}
879	}
880
881	/** Prints gsl_matrix nicely to screen.
882	* \param *P Problem at hand
883	* \param *U gsl matrix
884	* \param Num number of rows/columns
885	* \param *msg name of matrix, prepended before entries
886	*/
887	void PrintGSLMatrix(struct Problem P, gsl_matrix U, int Num, const char *msg)
888	{
889	int k,l;
890	fprintf(stderr,"(%i) %s = \n",P->Par.me, msg);
891	for (k=0;k<Num;k++) {
892	for (l=0;l<Num;l++)
893	fprintf(stderr,"%e\t",gsl_matrix_get(U,l,k));
894	fprintf(stderr,"\n");
895	}
896	}
897
898	/** Allocates memory for the DiagonalizationData structure
899	* \param *P Problem at hand
900	* \param DiagData pointer to structure
901	* \param Num number of rows and columns in matrix
902	* \param NumMatrices number of matrices to be simultaneously diagonalized
903	* \param extra number of extra matrices to be passively also diagonalized
904	*/
905	void InitDiagonalization(struct Problem P, struct DiagonalizationData DiagData, int Num, int NumMatrices, int extra)
906	{
907	int i;
908
909	// store integer values
910	//if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 1\n",P->Par.me);
911	DiagData->Num = Num;
912	DiagData->AllocNum = ceil((double)Num / 2. ) *2;
913	DiagData->NumMatrices = NumMatrices;
914	DiagData->extra = extra;
915
916	// determine communicator and our rank and its size
917	DiagData->comm = DetermineParallelGroupbyGCD(P,DiagData->AllocNum);
918	MPI_Comm_size (*(DiagData->comm), &(DiagData->ProcNum));
919	MPI_Comm_rank (*(DiagData->comm), &(DiagData->ProcRank));
920
921	// allocate memory
922	DiagData->U = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
923	gsl_matrix_set_identity(DiagData->U);
924
925	DiagData->A = (gsl_matrix *) Malloc((DiagData->NumMatrices+DiagData->extra) sizeof(gsl_matrix ), "InitDiagonalization: A");
926	for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++) {
927	DiagData->A[i] = gsl_matrix_calloc(DiagData->AllocNum,DiagData->AllocNum);
928	gsl_matrix_set_zero(DiagData->A[i]);
929	}
930
931	// init merry-go-round array for diagonilzation
932	DiagData->top = (int ) Malloc(sizeof(int)DiagData->AllocNum/2, "InitDiagonalization: top");
933	DiagData->bot = (int ) Malloc(sizeof(int)DiagData->AllocNum/2, "InitDiagonalization: bot");
934	for (i=0;i<DiagData->AllocNum/2;i++) {
935	DiagData->top[i] = 2*i;
936	DiagData->bot[i] = 2*i+1;
937	}
938	/* // print starting values of index generation tables top and bot
939	fprintf(stderr,"top\t");
940	for (k=0;k<AllocNum/2;k++)
941	fprintf(stderr,"%i\t", top[k]);
942	fprintf(stderr,"\n");
943	fprintf(stderr,"bot\t");
944	for (k=0;k<AllocNum/2;k++)
945	fprintf(stderr,"%i\t", bot[k]);
946	fprintf(stderr,"\n");*/
947	}
948
949	/** Frees allocated memory for the DiagonalizationData structure.
950	* \param DiagData pointer to structure
951	*/
952	void FreeDiagonalization(struct DiagonalizationData *DiagData)
953	{
954	int i;
955
956	Free(DiagData->top, "FreeDiagonalization: DiagData->top");
957	Free(DiagData->bot, "FreeDiagonalization: DiagData->bot");
958	gsl_matrix_free(DiagData->U);
959	for (i=0;i<(DiagData->NumMatrices+DiagData->extra);i++)
960	gsl_matrix_free(DiagData->A[i]);
961	Free(DiagData->A, "FreeDiagonalization: DiagData->A");
962	}
963
964	/** Orthogonalizing PsiType#Occupied wave functions for MD.
965	* Orthogonalizes wave functions by finding a unitary transformation such that the density remains unchanged.
966	* To this end, the overlap matrix \f$\langle \Psi_i \| \Psi_j \rangle\f$ is created and diagonalised by Jacobi
967	* transformation (product of rotation matrices). The found transformation matrix is applied to all Psis.
968	* \param *P Problem at hand
969	* \note [Payne] states that GramSchmidt is not well-suited. However, this Jacobi diagonalisation is not well
970	* suited for our CG minimisation either. If one wave functions is changed in course of the minimsation
971	* procedure, in a Gram-Schmidt-Orthogonalisation only this wave function is changed (although it remains under
972	* debate whether subsequent Psis are still orthogonal to this changed wave function!), in a Jacobi-Orthogonali-
973	* stion however at least two if not all wave functions are changed. Thus inhibits our fast way of updating the
974	* density after a minimisation step -- UpdateDensityCalculation(). Thus, this routine can only be used for a
975	* final orthogonalisation of all Psis, after the CG minimisation.
976	*/
977	void OrthogonalizePsis(struct Problem *P)
978	{
979	struct Lattice *Lat = &P->Lat;
980	struct Psis *Psi = &Lat->Psi;
981	struct LatticeLevel *LevS = P->R.LevS;
982	int Num = Psi->NoOfPsis;
983	int i,j,l;
984	struct DiagonalizationData DiagData;
985	double PsiSP;
986
987	InitDiagonalization(P, &DiagData, Num, 1, 0);
988
989	// Calculate Overlap matrix
990	for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
991	CalculateOverlap(P, l, Occupied);
992	for (i=0;i<Num;i++) // fill gsl matrix with overlap values
993	for (j=0;j<Num;j++)
994	gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
995	//if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i\|Psi_J> (before)");
996
997	//if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (before)");
998
999	// diagonalize overlap matrix
1000	Diagonalize(P, &DiagData);
1001
1002	//if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.U,Num,"Transformation (after)");
1003
1004	// apply found unitary transformation to wave functions
1005	UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
1006
1007	// update GramSch stati
1008	for (i=Psi->TypeStartIndex[Occupied];i<Psi->TypeStartIndex[UnOccupied];i++) {
1009	GramSchNormalize(P,LevS,LevS->LPsi->LocalPsi[i], 0.); // calculates square product if 0. is given...
1010	PsiSP = GramSchGetNorm2(P,LevS,LevS->LPsi->LocalPsi[i]);
1011	//if (P->Par.me_comm_ST_Psi == 0) fprintf(stderr,"(%i) PsiSP[%i] = %lg\n", P->Par.me, i, PsiSP);
1012	Psi->LocalPsiStatus[i].PsiGramSchStatus = (int)IsOrthonormal;
1013	}
1014	UpdateGramSchAllPsiStatus(P,Psi);
1015	/*
1016	for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++)
1017	CalculateOverlap(P, l, Occupied);
1018	for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1019	for (j=0;j<Num;j++)
1020	gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1021	if (P->Call.out[ReadOut]) PrintGSLMatrix(P,DiagData.A[0],Num,"<Psi_i\|Psi_J> (after)");
1022	*/
1023	// free memory
1024	FreeDiagonalization(&DiagData);
1025	}
1026
1027	/** Orthogonalizing PsiType#Occupied wave functions and their time derivatives for MD.
1028	* Ensures that two orthonormality condition are met for Psis:
1029	* \f$\langle \Psi_i \| \Psi_j \rangle = \delta_{ij}\f$
1030	* \f$\langle \dot{\Psi_i} \| \dot{\Psi_j} \rangle = \delta_{ij}\f$
1031	* \param *P Problem at hand
1032	* \note [Payne] states that GramSchmidt is not well-suited, and this stronger
1033	* condition is needed for numerical stability
1034	* \todo create and fill 1stoverlap with finite difference between Psi, OldPsi with R->Deltat
1035	*/
1036	void StrongOrthogonalizePsis(struct Problem *P)
1037	{
1038	struct Lattice *Lat = &P->Lat;
1039	struct Psis *Psi = &Lat->Psi;
1040	int Num = Psi->NoOfPsis;
1041	int i,j,l;
1042	struct DiagonalizationData DiagData;
1043
1044	InitDiagonalization(P, &DiagData, Num, 2, 0);
1045
1046	// Calculate Overlap matrix
1047	for (l=Psi->TypeStartIndex[Occupied]; l<Psi->TypeStartIndex[UnOccupied]; l++) {
1048	CalculateOverlap(P, l, Occupied);
1049	//Calculate1stOverlap(P, l, Occupied);
1050	}
1051	for (i=0;i<Num;i++) // fill gsl matrix with overlap values
1052	for (j=0;j<Num;j++) {
1053	gsl_matrix_set(DiagData.A[0],i,j,Psi->Overlap[i][j]);
1054	//gsl_matrix_set(DiagData.A[1],i,j,Psi->1stOverlap[i][j]);
1055	}
1056
1057	// diagonalize overlap matrix
1058	Diagonalize(P, &DiagData);
1059
1060	// apply found unitary transformation to wave functions
1061	UnitaryTransformationOnWavefunctions(P, DiagData.U, DiagData.AllocNum);
1062
1063	// free memory
1064	FreeDiagonalization(&DiagData);
1065	}
1066
1067	/** Uses either serial or parallel diagonalization.
1068	* \param *P Problem at hand
1069	* \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1070	* \sa ParallelDiagonalization(), SerialDiagonalization()
1071	*/
1072	void Diagonalize(struct Problem P, struct DiagonalizationData DiagData)
1073	{
1074	if (DiagData->Num != 1) { // one- or multi-process case?
1075	if (((DiagData->AllocNum % 2) == 0) && (DiagData->ProcNum != 1) && ((DiagData->AllocNum / 2) % DiagData->ProcNum == 0)) {
1076	/*
1077	debug(P,"Testing with silly matrix");
1078	ParallelDiagonalization(P, test, Utest, 1, 0, 4, Num, comm, ProcRank, ProcNum, top, bot);
1079	if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1080	PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
1081	if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1082	PrintGSLMatrix(P, Utest, 4, "Utest (final)");
1083	*/
1084	debug(P,"ParallelDiagonalization");
1085	ParallelDiagonalization(P, DiagData);
1086	} else {/*
1087	debug(P,"Testing with silly matrix");
1088	SerialDiagonalization(P, test, Utest, 1, 0, 4, Num, top, bot);
1089	if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1090	PrintGSLMatrix(P, test[0], 4, "test[0] (diagonalized)");
1091	if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1092	PrintGSLMatrix(P, Utest, 4, "Utest (final)");
1093	*/
1094	debug(P,"SerialDiagonalization");
1095	SerialDiagonalization(P, DiagData);
1096	}
1097
1098	//if(P->Call.out[ReadOut]) // && P->Par.me == 0)
1099	//PrintGSLMatrix(P, DiagData->U, DiagData->AllocNum, "U");
1100	}
1101	}
1102
1103	/** Computation of Maximally Localized Wannier Functions.
1104	* Maximally localized functions are prime when evulating a Hamiltonian with
1105	* magnetic fields under periodic boundary conditions, as the common position
1106	* operator is no longer valid. These can be obtained by orbital rotations, which
1107	* are looked for iteratively and gathered in one transformation matrix, to be
1108	* later applied to the set of orbital wave functions.
1109	*
1110	* In order to obtain these, the following algorithm is applied:
1111	* -# Initialize U (identity) as the sought-for transformation matrix
1112	* -# Compute the real symmetric (due to Gamma point symmetry!) matrix elements
1113	* \f$A^{(k)}_{ij} = \langle \phi_i \| A^{(k)} \| \phi_j \rangle\f$ for the six operators
1114	* \f$A^{(k)}\f$
1115	* -# For each pair of indices (i,j) (i<j) do the following:
1116	* -# Compute the 2x2 matrix \f$G = \Re \Bigl ( \sum_k h^H(A^{(k)}) h(A^{(k)}) \Bigr)\f$
1117	* where \f$h(A) = [a_{ii} - a_{jj}, a_{ij} + a_{ji}]\f$
1118	* -# Obtain eigenvalues and eigenvectors of G. Set \f$[x,y]^T\f$ to the eigenvector of G
1119	* corresponding to the greatest eigenvalue, such that \f$x\geq0\f$
1120	* -# Compute the rotation matrix R elements (ii,ij,ji,jj) \f$[c,s,-s,c]\f$ different from the
1121	* identity matrix by \f$r=\sqrt{x^2+y^2}\f$, \f$c = \sqrt{\frac{x+r}{2r}}\f$
1122	* \f$s=\frac{y}{\sqrt{2r(x+r)}}\f$
1123	* -# Perform the similarity operation \f$A^{(k)} \rightarrow R A^{(k)} R\f$
1124	* -# Gather the rotations in \f$U = U R\f$
1125	* -# Compute the total spread \f$\sigma^2_{A^{(k)}}\f$
1126	* -# Compare the change in spread to a desired minimum RunStruct#EpsWannier, if still greater go to step 3.
1127	* -# Apply transformations to the orbital wavefunctions \f$ \| \phi_i \rangle = \sum_j U_{ij} \| \phi_j \rangle\f$
1128	* -# Compute the position of the Wannier centers from diagonal elements of \f$A^{(k)}\f$, store in
1129	* OnePsiElementAddData#WannierCentre
1130	*
1131	* Afterwards, the routine applies the found unitary rotation to the unperturbed group of wave functions.
1132	* Note that hereby additional memory is needed as old and transformed wave functions must be present at the same
1133	* time.
1134	*
1135	* The routine uses parallelization if possible. A parallel Jacobi-Diagonalization is implemented using the index
1136	* generation in music() and shift-columns() such that the evaluated position operator eigenvalue matrices
1137	* may be diagonalized simultaneously and parallely. We use the implementation explained in
1138	* [Golub, Matrix computations, 1989, p451].
1139	*
1140	* \param *P Problem at hand
1141	*/
1142	void ComputeMLWF(struct Problem *P) {
1143	// variables and allocation
1144	struct Lattice *Lat = &P->Lat;
1145	struct Psis *Psi = &Lat->Psi;
1146	int i;
1147	int Num = Psi->NoOfPsis; // is number of occupied plus unoccupied states for rows
1148	double **WannierCentre;
1149	double *WannierSpread;
1150	double spread, spreadSQ;
1151	struct DiagonalizationData DiagData;
1152
1153	if(P->Call.out[StepLeaderOut]) fprintf(stderr,"(%i) Beginning localization of orbitals ...\n",P->Par.me);
1154
1155	InitDiagonalization(P, &DiagData, Num, max_operators, 1);
1156
1157	// STEP 2: Calculate A[k]_ij = V/N \sum_{G1,G2} C^\ast_{l,G1} c_{m,G2} \sum_R A^{(k)}(R) exp(iR(G2-G1))
1158	//debug (P,"Calculatung Variance matrices");
1159	FillHigherOrderRealMomentsMatrices(P, DiagData.AllocNum, DiagData.A);
1160
1161	//debug (P,"Diagonalizing");
1162	Diagonalize(P, &DiagData);
1163
1164	// STEP 6: apply transformation U to all wave functions \sum_i^Num U_ji \| \phi_i \rangle = \| \phi_j^\ast \rangle
1165	//debug (P,"Performing Unitary transformation");
1166	UnitaryTransformationOnWavefunctions(P, DiagData.U, Num);
1167
1168	if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 7: Compute centres and spread printout\n",P->Par.me);
1169	WannierCentre = (double *) Malloc(sizeof(double )Num, "ComputeMLWF: WannierCentre");
1170	WannierSpread = (double ) Malloc(sizeof(double)Num, "ComputeMLWF: WannierSpread");
1171	for (i=0;i<Num;i++)
1172	WannierCentre[i] = (double ) Malloc(sizeof(double)NDIM, "ComputeMLWF: WannierCentre");
1173
1174	//debug (P,"Computing Wannier centres from variance matrix");
1175	ComputeWannierCentresfromVarianceMatrices(P, DiagData.A, &spread, &spreadSQ, WannierCentre, WannierSpread);
1176
1177	// join Wannier orbital to groups with common centres under certain conditions
1178	//debug (P,"Changing Wannier Centres according to CommonWannier");
1179	ChangeWannierCentres(P, Num, WannierCentre, WannierSpread);
1180
1181	// write to file
1182	//debug (P,"Writing spread file");
1183	WriteWannierFile(P, spread, spreadSQ, WannierCentre, WannierSpread);
1184
1185	debug(P,"Free'ing memory");
1186	// free all remaining memory
1187	FreeDiagonalization(&DiagData);
1188	for (i=0;i<Num;i++)
1189	Free(WannierCentre[i], "ComputeMLWF: WannierCentre[i]");
1190	Free(WannierCentre, "ComputeMLWF: WannierCentre");
1191	Free(WannierSpread, "ComputeMLWF: WannierSpread");
1192	}
1193
1194	/** Solves directly for eigenvectors and eigenvalues of a real 2x2 matrix.
1195	* The eigenvalues are determined by \f$\det{(A-\lambda \cdot I)}\f$, where \f$I\f$ is the unit matrix and
1196	* \f$\lambda\f$ an eigenvalue. This leads to a pq-formula which is easily evaluated in the first part.
1197	*
1198	* The eigenvectors are then obtained by solving \f$A-\lambda \cdot I)x = 0\f$ for a given eigenvalue
1199	* \f$\lambda\f$. However, the eigenvector magnitudes are not specified, thus we the equation system is
1200	* still lacking such an equation. We use this fact to set either coordinate arbitrarily to 1 and then
1201	* derive the other by solving the equation system. Finally, the eigenvector is normalized.
1202	* \param *P Problem at hand
1203	* \param *A matrix whose eigenvalues/-vectors are to be found
1204	* \param *eval vector with eigenvalues
1205	* \param *evec matrix with corresponding eigenvectors in columns
1206	*/
1207	#ifdef HAVE_INLINE
1208	inline void EigensolverFor22Matrix(struct Problem P, gsl_matrix A, gsl_vector eval, gsl_matrix evec)
1209	#else
1210	void EigensolverFor22Matrix(struct Problem P, gsl_matrix A, gsl_vector eval, gsl_matrix evec)
1211	#endif
1212	{
1213	double a11,a12,a21,a22;
1214	double ev[2], summand1, summand2, norm;
1215	int i;
1216	// find eigenvalues
1217	a11 = gsl_matrix_get(A,0,0);
1218	a12 = gsl_matrix_get(A,0,1);
1219	a21 = gsl_matrix_get(A,1,0);
1220	a22 = gsl_matrix_get(A,1,1);
1221	summand1 = (a11+a22)/2.;
1222	summand2 = sqrt(summand1summand1 + a12a21 - a11*a22);
1223	ev[0] = summand1 + summand2;
1224	ev[1] = summand1 - summand2;
1225	gsl_vector_set(eval, 0, ev[0]);
1226	gsl_vector_set(eval, 1, ev[1]);
1227	//fprintf (stderr,"(%i) ev1 %lg \t ev2 %lg\n", P->Par.me, ev1, ev2);
1228
1229	// find eigenvectors
1230	for(i=0;i<2;i++) {
1231	if (fabs(ev[i]) < MYEPSILON) {
1232	gsl_matrix_set(evec, 0,i, 0.);
1233	gsl_matrix_set(evec, 1,i, 0.);
1234	} else if (fabs(a22-ev[i]) > MYEPSILON) {
1235	norm = sqrt(11 + (a21a21/(a22-ev[i])/(a22-ev[i])));
1236	gsl_matrix_set(evec, 0,i, 1./norm);
1237	gsl_matrix_set(evec, 1,i, -(a21/(a22-ev[i]))/norm);
1238	//fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
1239	} else if (fabs(a12) > MYEPSILON) {
1240	norm = sqrt(11 + (a11-ev[i])(a11-ev[i])/(a12*a12));
1241	gsl_matrix_set(evec, 0,i, 1./norm);
1242	gsl_matrix_set(evec, 1,i, -((a11-ev[i])/a12)/norm);
1243	//fprintf (stderr,"(%i) evec %i (%lg,%lg)", P->Par.me, i, -(a12/(a11-ev[i]))/norm, 1./norm);
1244	} else {
1245	if (fabs(a11-ev[i]) > MYEPSILON) {
1246	norm = sqrt(11 + (a12a12/(a11-ev[i])/(a11-ev[i])));
1247	gsl_matrix_set(evec, 0,i, -(a12/(a11-ev[i]))/norm);
1248	gsl_matrix_set(evec, 1,i, 1./norm);
1249	//fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
1250	} else if (fabs(a21) > MYEPSILON) {
1251	norm = sqrt(11 + (a22-ev[i])(a22-ev[i])/(a21*a21));
1252	gsl_matrix_set(evec, 0,i, -(a22-ev[i])/a21/norm);
1253	gsl_matrix_set(evec, 1,i, 1./norm);
1254	//fprintf (stderr,"\t evec %i (%lg,%lg)\n", i, -(a12/(a11-ev[i]))/norm, 1./norm);
1255	} else {
1256	//gsl_matrix_set(evec, 0,i, 0.);
1257	//gsl_matrix_set(evec, 1,i, 1.);
1258	fprintf (stderr,"\t evec %i undetermined\n", i);
1259	}
1260	//gsl_matrix_set(evec, 0,0, 1.);
1261	//gsl_matrix_set(evec, 1,0, 0.);
1262	//fprintf (stderr,"(%i) evec1 undetermined", P->Par.me);
1263	}
1264	}
1265	}
1266
1267	/** Calculates sine and cosine values for multiple matrix diagonalization.
1268	* \param *evec eigenvectors in columns
1269	* \param *eval corresponding eigenvalues
1270	* \param *c cosine to be returned
1271	* \param *s sine to be returned
1272	*/
1273	#ifdef HAVE_INLINE
1274	inline void CalculateRotationAnglesFromEigenvalues(gsl_matrix evec, gsl_vector eval, double c, double s)
1275	#else
1276	void CalculateRotationAnglesFromEigenvalues(gsl_matrix evec, gsl_vector eval, double c, double s)
1277	#endif
1278	{
1279	int index;
1280	double x,y,r;
1281
1282	index = gsl_vector_max_index (eval); // get biggest eigenvalue
1283	//fprintf(stderr,"\t1st: %lg\t2nd: %lg --- biggest: %i\n", gsl_vector_get(eval, 0), gsl_vector_get(eval, 1), index);
1284	x = gsl_matrix_get(evec, 0, index);
1285	y = gsl_matrix_get(evec, 1, index);
1286	if (x < 0) { // ensure x>=0 so that rotation angles remain smaller Pi/4
1287	y = -y;
1288	x = -x;
1289	}
1290	//z = gsl_matrix_get(evec, 2, index) * x/fabs(x);
1291	//fprintf(stderr,"\tx %lg\ty %lg\n", x,y);
1292
1293	//fprintf(stderr,"(%i),(%i,%i) STEP 3c\n",P->Par.me,i,j);
1294	// STEP 3c: calculate R = [[c,s^\ast],[-s,c^\ast]]
1295	r = sqrt(xx + yy); // + z*z);
1296	if (fabs(r) > MYEPSILON) {
1297	c = sqrt((x + r) / (2.r));
1298	s = y / sqrt(2.r(x+r)); //, -z / sqrt(2r*(x+r)));
1299	} else {
1300	*c = 1.;
1301	*s = 0.;
1302	}
1303	}
1304
1305	/*
1306	* \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
1307	* \param *U transformation matrix set to unity matrix
1308	* \param NumMatrices number of matrices to be diagonalized simultaneously
1309	* \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
1310	* \param AllocNum number of rows/columns in matrices
1311	* \param Num number of wave functions
1312	* \param ProcRank index in group for this cpu
1313	* \param ProcNum number of cpus in group
1314	* \param *top array with top row indices (merry-go-round)
1315	* \param *bot array with bottom row indices (merry-go-round)
1316	*/
1317
1318	/** Simultaneous diagonalization of matrices with multiple cpus.
1319	* \param *P Problem at hand
1320	* \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1321	* \note this is slower given one cpu only than SerialDiagonalization()
1322	*/
1323	void ParallelDiagonalization(struct Problem P, struct DiagonalizationData DiagData)
1324	{
1325	struct RunStruct *R =&P->R;
1326	int tagR0, tagR1, tagS0, tagS1;
1327	int iloc, jloc;
1328	double s_all, c_all;
1329	int round, max_rounds;
1330	int start;
1331	int rcounts, rdispls;
1332	double c, s;
1333	int Lsend, Rsend, Lrecv, Rrecv; // where left(right) column is sent to or where it originates from
1334	int left, right; // left or right neighbour for process
1335	double spread = 0., old_spread=0., Spread=0.;
1336	int i,j,k,l,m,u;
1337	int set;
1338	int it_steps; // iteration step counter
1339	double Aloc[DiagData->NumMatrices+1], Uloc; // local columns for one step of A[k]
1340	double Around[DiagData->NumMatrices+1], Uround; // all local columns for one round of A[k]
1341	double Atotal[DiagData->NumMatrices+1], Utotal; // all local columns for one round of A[k]
1342	double a_i, a_j;
1343	gsl_matrix *G;
1344	gsl_vector *h;
1345	gsl_vector *eval;
1346	gsl_matrix *evec;
1347	//gsl_eigen_symmv_workspace *w;
1348
1349	max_rounds = (DiagData->AllocNum / 2)/DiagData->ProcNum; // each process must perform multiple rotations per step of a set
1350	if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) start %i\tstep %i\tmax.rounds %i\n",P->Par.me, DiagData->ProcRank, DiagData->ProcNum, max_rounds);
1351
1352	// allocate column vectors for interchange of columns
1353	debug(P,"allocate column vectors for interchange of columns");
1354	c = (double ) Malloc(sizeof(double)max_rounds, "ComputeMLWF: c");
1355	s = (double ) Malloc(sizeof(double)max_rounds, "ComputeMLWF: s");
1356	c_all = (double ) Malloc(sizeof(double)DiagData->AllocNum/2, "ComputeMLWF: c_all");
1357	s_all = (double ) Malloc(sizeof(double)DiagData->AllocNum/2, "ComputeMLWF: s_all");
1358	rcounts = (int ) Malloc(sizeof(int)DiagData->ProcNum, "ComputeMLWF: rcounts");
1359	rdispls = (int ) Malloc(sizeof(int)DiagData->ProcNum, "ComputeMLWF: rdispls");
1360
1361	// allocate eigenvector stuff
1362	debug(P,"allocate eigenvector stuff");
1363	G = gsl_matrix_calloc (2,2);
1364	h = gsl_vector_alloc (2);
1365	eval = gsl_vector_alloc (2);
1366	evec = gsl_matrix_alloc (2,2);
1367	//w = gsl_eigen_symmv_alloc(2);
1368
1369	// establish communication partners
1370	debug(P,"establish communication partners");
1371	if (DiagData->ProcRank == 0) {
1372	tagS0 = WannierALTag; // left p0 always remains left p0
1373	} else {
1374	tagS0 = (DiagData->ProcRank == DiagData->ProcNum - 1) ? WannierARTag : WannierALTag; // left p_last becomes right p_last
1375	}
1376	tagS1 = (DiagData->ProcRank == 0) ? WannierALTag : WannierARTag; // right p0 always goes into left p1
1377	tagR0 = WannierALTag; //
1378	tagR1 = WannierARTag; // first process
1379	if (DiagData->ProcRank == 0) {
1380	left = DiagData->ProcNum-1;
1381	right = 1;
1382	Lsend = 0;
1383	Rsend = 1;
1384	Lrecv = 0;
1385	Rrecv = 1;
1386	} else if (DiagData->ProcRank == DiagData->ProcNum - 1) {
1387	left = DiagData->ProcRank - 1;
1388	right = 0;
1389	Lsend = DiagData->ProcRank;
1390	Rsend = DiagData->ProcRank - 1;
1391	Lrecv = DiagData->ProcRank - 1;
1392	Rrecv = DiagData->ProcRank;
1393	} else {
1394	left = DiagData->ProcRank - 1;
1395	right = DiagData->ProcRank + 1;
1396	Lsend = DiagData->ProcRank+1;
1397	Rsend = DiagData->ProcRank - 1;
1398	Lrecv = DiagData->ProcRank - 1;
1399	Rrecv = DiagData->ProcRank+1;
1400	}
1401	//if (P->Call.out[ReadOut]) fprintf(stderr,"(%i) left %i\t right %i --- Lsend %i\tRsend%i\tLrecv %i\tRrecv%i\n",P->Par.me, left, right, Lsend, Rsend, Lrecv, Rrecv);
1402
1403	// initialise A_loc
1404	debug(P,"initialise A_loc");
1405	for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
1406	//Aloc[k] = (double ) Malloc(sizeof(double)AllocNum*2, "ComputeMLWF: Aloc[k]");
1407	Around[k] = (double ) Malloc(sizeof(double)DiagData->AllocNum2max_rounds, "ComputeMLWF: Around[k]");
1408	Atotal[k] = (double ) Malloc(sizeof(double)DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Atotal[k]");
1409	Aloc[k] = (double *) Malloc(sizeof(double )2max_rounds, "ComputeMLWF: Aloc[k]");
1410	//Around[k] = &Atotal[k][ProcRankAllocNum2*max_rounds];
1411
1412	for (round=0;round<max_rounds;round++) {
1413	Aloc[k][2round] = &Around[k][DiagData->AllocNum(2*round)];
1414	Aloc[k][2round+1] = &Around[k][DiagData->AllocNum(2*round+1)];
1415	for (l=0;l<DiagData->AllocNum;l++) {
1416	Aloc[k][2round][l] = gsl_matrix_get(DiagData->A[k],l,2(DiagData->ProcRank*max_rounds+round));
1417	Aloc[k][2round+1][l] = gsl_matrix_get(DiagData->A[k],l,2(DiagData->ProcRank*max_rounds+round)+1);
1418	//fprintf(stderr,"(%i) (%i, 0/1) A_loc1 %e\tA_loc2 %e\n",P->Par.me, l, Aloc[k][l],Aloc[k][l+AllocNum]);
1419	}
1420	}
1421	}
1422	// initialise U_loc
1423	debug(P,"initialise U_loc");
1424	//Uloc = (double ) Malloc(sizeof(double)AllocNum*2, "ComputeMLWF: Uloc");
1425	Uround = (double ) Malloc(sizeof(double)DiagData->AllocNum2max_rounds, "ComputeMLWF: Uround");
1426	Utotal = (double ) Malloc(sizeof(double)DiagData->AllocNum*DiagData->AllocNum, "ComputeMLWF: Utotal");
1427	Uloc = (double *) Malloc(sizeof(double )2max_rounds, "ComputeMLWF: Uloc");
1428	//Uround = &Utotal[ProcRankAllocNum2*max_rounds];
1429	for (round=0;round<max_rounds;round++) {
1430	Uloc[2round] = &Uround[DiagData->AllocNum(2*round)];
1431	Uloc[2round+1] = &Uround[DiagData->AllocNum(2*round+1)];
1432	for (l=0;l<DiagData->AllocNum;l++) {
1433	Uloc[2round][l] = gsl_matrix_get(DiagData->U,l,2(DiagData->ProcRank*max_rounds+round));
1434	Uloc[2round+1][l] = gsl_matrix_get(DiagData->U,l,2(DiagData->ProcRank*max_rounds+round)+1);
1435	//fprintf(stderr,"(%i) (%i, 0/1) U_loc1 %e\tU_loc2 %e\n",P->Par.me, l, Uloc[l+AllocNum0],Uloc[l+AllocNum1]);
1436	}
1437	}
1438
1439	// now comes the iteration loop
1440	debug(P,"now comes the iteration loop");
1441	it_steps = 0;
1442	do {
1443	it_steps++;
1444	//if (P->Par.me == 0) fprintf(stderr,"(%i) Beginning parallel iteration %i ... ",P->Par.me,it_steps);
1445	for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 staying at its place all the time
1446	//fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
1447	for (round = 0; round < max_rounds;round++) {
1448	start = DiagData->ProcRank * max_rounds + round;
1449	// get indices
1450	i = DiagData->top[start] < DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // minimum of the two indices
1451	iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
1452	j = DiagData->top[start] > DiagData->bot[start] ? DiagData->top[start] : DiagData->bot[start]; // maximum of the two indices: thus j > i
1453	jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
1454	//fprintf(stderr,"(%i) my (%i,%i), loc(%i,%i)\n",P->Par.me, i,j, iloc, jloc);
1455
1456	// calculate rotation angle, i.e. c and s
1457	//fprintf(stderr,"(%i),(%i,%i) calculate rotation angle\n",P->Par.me,i,j);
1458	gsl_matrix_set_zero(G);
1459	for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
1460	// Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
1461	//fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,Aloc[k][2round+iloc][i], Aloc[k][2round+jloc][j],Aloc[k][2round+iloc][i] - Aloc[k][2round+jloc][j]);
1462	//fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,Aloc[k][2round+jloc][i], Aloc[k][2round+iloc][j],Aloc[k][2round+jloc][i] + Aloc[k][2round+iloc][j]);
1463	gsl_vector_set(h, 0, Aloc[k][2round+iloc][i] - Aloc[k][2round+jloc][j]);
1464	gsl_vector_set(h, 1, Aloc[k][2round+jloc][i] + Aloc[k][2round+iloc][j]);
1465
1466	// Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
1467	for (l=0;l<2;l++)
1468	for (m=0;m<2;m++)
1469	gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
1470	}
1471	//fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
1472	// STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
1473	EigensolverFor22Matrix(P,G,eval,evec);
1474	//gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
1475
1476	CalculateRotationAnglesFromEigenvalues(evec, eval, &c[round], &s[round]);
1477	//fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[round],s[round]);
1478
1479	//fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
1480	// V_loc = V_loc * V_small
1481	//debug(P,"apply rotation to local U");
1482	for (l=0;l<DiagData->AllocNum;l++) {
1483	a_i = Uloc[2*round+iloc][l];
1484	a_j = Uloc[2*round+jloc][l];
1485	Uloc[2round+iloc][l] = c[round] a_i + s[round] * a_j;
1486	Uloc[2round+jloc][l] = -s[round] a_i + c[round] * a_j;
1487	}
1488	} // end of round
1489	// circulate the rotation angles
1490	//debug(P,"circulate the rotation angles");
1491	MPI_Allgather(c, max_rounds, MPI_DOUBLE, c_all, max_rounds, MPI_DOUBLE, *(DiagData->comm)); // MPI_Allgather is waaaaay faster than ring circulation
1492	MPI_Allgather(s, max_rounds, MPI_DOUBLE, s_all, max_rounds, MPI_DOUBLE, *(DiagData->comm));
1493	//m = start;
1494	for (l=0;l<DiagData->AllocNum/2;l++) { // for each process
1495	// we have V_small from process k
1496	//debug(P,"Apply V_small from other process");
1497	i = DiagData->top[l] < DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // minimum of the two indices
1498	j = DiagData->top[l] > DiagData->bot[l] ? DiagData->top[l] : DiagData->bot[l]; // maximum of the two indices: thus j > i
1499	iloc = DiagData->top[l] < DiagData->bot[l] ? 0 : 1;
1500	jloc = DiagData->top[l] > DiagData->bot[l] ? 0 : 1;
1501	for (m=0;m<max_rounds;m++) {
1502	//fprintf(stderr,"(%i) %i processes' (%i,%i)\n",P->Par.me, m,i,j);
1503	// apply row rotation to each A[k]
1504	for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1505	//fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2m+iloc][i],j,Aloc[k][2m+iloc][j]);
1506	//fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2m+jloc][i],j,Aloc[k][2m+jloc][j]);
1507	a_i = Aloc[k][2*m+iloc][i];
1508	a_j = Aloc[k][2*m+iloc][j];
1509	Aloc[k][2m+iloc][i] = c_all[l] a_i + s_all[l] * a_j;
1510	Aloc[k][2m+iloc][j] = -s_all[l] a_i + c_all[l] * a_j;
1511	a_i = Aloc[k][2*m+jloc][i];
1512	a_j = Aloc[k][2*m+jloc][j];
1513	Aloc[k][2m+jloc][i] = c_all[l] a_i + s_all[l] * a_j;
1514	Aloc[k][2m+jloc][j] = -s_all[l] a_i + c_all[l] * a_j;
1515	//fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2m+iloc][i],j,Aloc[k][2m+iloc][j]);
1516	//fprintf(stderr,"(%i) A^%i: a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, i, Aloc[k][2m+jloc][i],j,Aloc[k][2m+jloc][j]);
1517	}
1518	}
1519	}
1520	// apply rotation to local operator matrices
1521	// A_loc = A_loc * V_small
1522	//debug(P,"apply rotation to local operator matrices A[k]");
1523	for (m=0;m<max_rounds;m++) {
1524	start = DiagData->ProcRank * max_rounds + m;
1525	iloc = DiagData->top[start] < DiagData->bot[start] ? 0 : 1;
1526	jloc = DiagData->top[start] > DiagData->bot[start] ? 0 : 1;
1527	for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// extra for B matrix !
1528	for (l=0;l<DiagData->AllocNum;l++) {
1529	// Columns, i and j belong to this process only!
1530	a_i = Aloc[k][2*m+iloc][l];
1531	a_j = Aloc[k][2*m+jloc][l];
1532	Aloc[k][2m+iloc][l] = c[m] a_i + s[m] * a_j;
1533	Aloc[k][2m+jloc][l] = -s[m] a_i + c[m] * a_j;
1534	//fprintf(stderr,"(%i) A:(k%i) a_i[%i] %e\ta_j[%i] %e\n",P->Par.me, k, l, Aloc[k][2m+iloc][l],l,Aloc[k][2m+jloc][l]);
1535	}
1536	}
1537	}
1538	// Shuffling of these round's columns to prepare next rotation set
1539	for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1540	// extract columns from A
1541	//debug(P,"extract columns from A");
1542	MerryGoRoundColumns(*(DiagData->comm), Aloc[k], DiagData->AllocNum, max_rounds, k, tagS0, tagS1, tagR0, tagR1);
1543
1544	}
1545	// and also for V ...
1546	//debug(P,"extract columns from U");
1547	MerryGoRoundColumns(*(DiagData->comm), Uloc, DiagData->AllocNum, max_rounds, 0, tagS0, tagS1, tagR0, tagR1);
1548
1549
1550	// and merry-go-round for the indices too
1551	//debug(P,"and merry-go-round for the indices too");
1552	MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
1553	}
1554
1555	//fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
1556	// STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
1557	old_spread = Spread;
1558	spread = 0.;
1559	for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
1560	for (i=0; i < 2*max_rounds; i++) { // go through all wave functions
1561	spread += Aloc[k][i][i+DiagData->ProcRank2max_rounds]Aloc[k][i][i+DiagData->ProcRank2*max_rounds];
1562	//spread += gsl_matrix_get(A[k],i,i)*gsl_matrix_get(A[k],i,i);
1563	}
1564	}
1565	MPI_Allreduce(&spread, &Spread, 1, MPI_DOUBLE, MPI_SUM, *(DiagData->comm));
1566	//Spread = spread;
1567	if (P->Par.me == 0) {
1568	//if(P->Call.out[ReadOut])
1569	// fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,Spread,R->EpsWannier);
1570	//else
1571	//fprintf(stderr,"%2.9e\n",Spread);
1572	}
1573	// STEP 5: check change of variance
1574	} while (fabs(old_spread-Spread) >= R->EpsWannier);
1575	// end of iterative diagonalization loop: We have found our final orthogonal U!
1576
1577	// gather local parts of U into complete matrix
1578	for (l=0;l<DiagData->ProcNum;l++)
1579	rcounts[l] = DiagData->AllocNum;
1580	debug(P,"allgather U");
1581	for (round=0;round<2*max_rounds;round++) {
1582	for (l=0;l<DiagData->ProcNum;l++)
1583	rdispls[l] = (l2max_rounds + round)*DiagData->AllocNum;
1584	MPI_Allgatherv(Uloc[round], DiagData->AllocNum, MPI_DOUBLE, Utotal, rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
1585	}
1586	for (k=0;k<DiagData->AllocNum;k++) {
1587	for(l=0;l<DiagData->AllocNum;l++) {
1588	gsl_matrix_set(DiagData->U,k,l, Utotal[l+k*DiagData->AllocNum]);
1589	}
1590	}
1591
1592	// after one set, gather A[k] from all and calculate spread
1593	for (l=0;l<DiagData->ProcNum;l++)
1594	rcounts[l] = DiagData->AllocNum;
1595	debug(P,"gather A[k] for spread");
1596	for (u=0;u<DiagData->NumMatrices+DiagData->extra;u++) {// extra for B matrix !
1597	debug(P,"A[k] all gather");
1598	for (round=0;round<2*max_rounds;round++) {
1599	for (l=0;l<DiagData->ProcNum;l++)
1600	rdispls[l] = (l2max_rounds + round)*DiagData->AllocNum;
1601	MPI_Allgatherv(Aloc[u][round], DiagData->AllocNum, MPI_DOUBLE, Atotal[u], rcounts, rdispls, MPI_DOUBLE, *(DiagData->comm));
1602	}
1603	for (k=0;k<DiagData->AllocNum;k++) {
1604	for(l=0;l<DiagData->AllocNum;l++) {
1605	gsl_matrix_set(DiagData->A[u],k,l, Atotal[u][l+k*DiagData->AllocNum]);
1606	}
1607	}
1608	}
1609
1610	// free eigenvector stuff
1611	gsl_vector_free(h);
1612	gsl_matrix_free(G);
1613	//gsl_eigen_symmv_free(w);
1614	gsl_vector_free(eval);
1615	gsl_matrix_free(evec);
1616
1617	// Free column vectors
1618	for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {
1619	Free(Atotal[k], "ParallelDiagonalization: Atotal[k]");
1620	Free(Around[k], "ParallelDiagonalization: Around[k]");
1621	}
1622	Free(Uround, "ParallelDiagonalization: Uround");
1623	Free(Utotal, "ParallelDiagonalization: Utotal");
1624	Free(c_all, "ParallelDiagonalization: c_all");
1625	Free(s_all, "ParallelDiagonalization: s_all");
1626	Free(c, "ParallelDiagonalization: c");
1627	Free(s, "ParallelDiagonalization: s");
1628	Free(rcounts, "ParallelDiagonalization: rcounts");
1629	Free(rdispls, "ParallelDiagonalization: rdispls");
1630	}
1631	/*
1632	* \param **A matrices (pointer array with \a NumMatrices entries) to be diagonalized
1633	* \param *U transformation matrix set to unity matrix
1634	* \param NumMatrices number of matrices to be diagonalized
1635	* \param extra number of additional matrices the rotation is applied to however which actively diagonalized (follow in \a **A)
1636	* \param AllocNum number of rows/columns in matrices
1637	* \param Num number of wave functions
1638	* \param *top array with top row indices (merry-go-round)
1639	* \param *bot array with bottom row indices (merry-go-round)
1640	*/
1641
1642	/** Simultaneous Diagonalization of variances matrices with one cpu.
1643	* \param *P Problem at hand
1644	* \param *DiagData pointer to structure DiagonalizationData containing necessary information for diagonalization
1645	* \note this is faster given one cpu only than ParallelDiagonalization()
1646	*/
1647	void SerialDiagonalization(struct Problem P, struct DiagonalizationData DiagData)
1648	{
1649	struct RunStruct *R = &P->R;
1650	gsl_matrix *G;
1651	gsl_vector *h;
1652	gsl_vector *eval;
1653	gsl_matrix *evec;
1654	//gsl_eigen_symmv_workspace *w;
1655	double c,s,a_i,a_j;
1656	int it_steps, set, ProcRank;
1657	int i,j,k,l,m;
1658	double spread = 0., old_spread = 0.;\
1659
1660	// allocate eigenvector stuff
1661	debug(P,"allocate eigenvector stuff");
1662	G = gsl_matrix_calloc (2,2);
1663	h = gsl_vector_alloc (2);
1664	eval = gsl_vector_alloc (2);
1665	evec = gsl_matrix_alloc (2,2);
1666	//w = gsl_eigen_symmv_alloc(2);
1667
1668	c = (double *) Malloc(sizeof(double), "ComputeMLWF: c");
1669	s = (double *) Malloc(sizeof(double), "ComputeMLWF: s");
1670	debug(P,"now comes the iteration loop");
1671	it_steps=0;
1672	do {
1673	it_steps++;
1674	//if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 3: Iteratively maximize negative spread part\n",P->Par.me);
1675	//if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Beginning iteration %i ... ",P->Par.me,it_steps);
1676	for (set=0; set < DiagData->AllocNum-1; set++) { // one column less due to column 0 stating at its place all the time
1677	//fprintf(stderr,"(%i) Beginning rotation set %i ...\n",P->Par.me,set);
1678	// STEP 3: for all index pairs 0<= i<j <AllocNum
1679	for (ProcRank=0;ProcRank<DiagData->AllocNum/2;ProcRank++) {
1680	// get indices
1681	i = DiagData->top[ProcRank] < DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // minimum of the two indices
1682	j = DiagData->top[ProcRank] > DiagData->bot[ProcRank] ? DiagData->top[ProcRank] : DiagData->bot[ProcRank]; // maximum of the two indices: thus j > i
1683	//fprintf(stderr,"(%i),(%i,%i) STEP 3a\n",P->Par.me,i,j);
1684	// STEP 3a: Calculate G
1685	gsl_matrix_set_zero(G);
1686
1687	for (k=0;k<DiagData->NumMatrices;k++) { // go through all operators ...
1688	// Calculate vector h(a) = [a_ii - a_jj, a_ij + a_ji, i(a_ji - a_ij)]
1689	//fprintf(stderr,"(%i) k%i [a_ii - a_jj] = %e - %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,i), gsl_matrix_get(DiagData->A[k],j,j),gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
1690	//fprintf(stderr,"(%i) k%i [a_ij + a_ji] = %e + %e = %e\n",P->Par.me, k,gsl_matrix_get(DiagData->A[k],i,j), gsl_matrix_get(DiagData->A[k],j,i),gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
1691	gsl_vector_set(h, 0, gsl_matrix_get(DiagData->A[k],i,i) - gsl_matrix_get(DiagData->A[k],j,j));
1692	gsl_vector_set(h, 1, gsl_matrix_get(DiagData->A[k],i,j) + gsl_matrix_get(DiagData->A[k],j,i));
1693	//gsl_vector_complex_set(h, 2, gsl_complex_mul_imag(gsl_complex_add(gsl_matrix_complex_get(A[k],j,i), gsl_matrix_complex_get(A[k],i,j)),1));
1694
1695	// Calculate G = Re[ \sum_k h^H (A^{(k)}) h(A^{(k)}) ]
1696	for (l=0;l<2;l++)
1697	for (m=0;m<2;m++)
1698	gsl_matrix_set(G,l,m, gsl_vector_get(h,l) * gsl_vector_get(h,m) + gsl_matrix_get(G,l,m));
1699
1700	//PrintGSLMatrix(P,G,2, "G");
1701	}
1702
1703	//fprintf(stderr,"(%i),(%i,%i) STEP 3b\n",P->Par.me,i,j);
1704	// STEP 3b: retrieve eigenvector which belongs to greatest eigenvalue of G
1705	EigensolverFor22Matrix(P,G,eval,evec);
1706	//gsl_eigen_symmv(G, eval, evec, w); // calculates eigenvalues and eigenvectors of G
1707
1708	//PrintGSLMatrix(P, evec, 2, "Eigenvectors");
1709	CalculateRotationAnglesFromEigenvalues(evec, eval, c, s);
1710	//fprintf(stderr,"(%i),(%i,%i) COS %e\t SIN %e\n",P->Par.me,i,j,c[0],s[0]);
1711
1712	//fprintf(stderr,"(%i),(%i,%i) STEP 3d\n",P->Par.me,i,j);
1713	// STEP 3d: apply rotation R to rows and columns of A^{(k)}
1714	for (k=0;k<DiagData->NumMatrices+DiagData->extra;k++) {// one extra for B matrix !
1715	//PrintGSLMatrix(P,A[k],AllocNum, "A (before rot)");
1716	for (l=0;l<DiagData->AllocNum;l++) {
1717	// Rows
1718	a_i = gsl_matrix_get(DiagData->A[k],i,l);
1719	a_j = gsl_matrix_get(DiagData->A[k],j,l);
1720	gsl_matrix_set(DiagData->A[k], i, l, c[0] * a_i + s[0] * a_j);
1721	gsl_matrix_set(DiagData->A[k], j, l, -s[0] * a_i + c[0] * a_j);
1722	}
1723	for (l=0;l<DiagData->AllocNum;l++) {
1724	// Columns
1725	a_i = gsl_matrix_get(DiagData->A[k],l,i);
1726	a_j = gsl_matrix_get(DiagData->A[k],l,j);
1727	gsl_matrix_set(DiagData->A[k], l, i, c[0] * a_i + s[0] * a_j);
1728	gsl_matrix_set(DiagData->A[k], l, j, -s[0] * a_i + c[0] * a_j);
1729	}
1730	//PrintGSLMatrix(P,A[k],AllocNum, "A (after rot)");
1731	}
1732	//fprintf(stderr,"(%i),(%i,%i) STEP 3e\n",P->Par.me,i,j);
1733	//PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (before rot)");
1734	// STEP 3e: apply U = R*U
1735	for (l=0;l<DiagData->AllocNum;l++) {
1736	a_i = gsl_matrix_get(DiagData->U,i,l);
1737	a_j = gsl_matrix_get(DiagData->U,j,l);
1738	gsl_matrix_set(DiagData->U, i, l, c[0] * a_i + s[0] * a_j);
1739	gsl_matrix_set(DiagData->U, j, l, -s[0] * a_i + c[0] * a_j);
1740	}
1741	//PrintGSLMatrix(P,DiagData->U,DiagData->AllocNum, "U (after rot)");
1742	}
1743	// and merry-go-round for the indices too
1744	//debug(P,"and merry-go-round for the indices too");
1745	if (DiagData->AllocNum > 2) MerryGoRoundIndices(DiagData->top, DiagData->bot, DiagData->AllocNum/2);
1746	}
1747
1748	//if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) STEP 4\n",P->Par.me);
1749	// STEP 4: calculate new variance: \sum_{ik} (A^{(k)}_ii)^2
1750	old_spread = spread;
1751	spread = 0.;
1752	for(k=0;k<DiagData->NumMatrices;k++) { // go through all self-adjoint operators
1753	for (i=0; i < DiagData->AllocNum; i++) { // go through all wave functions
1754	spread += pow(gsl_matrix_get(DiagData->A[k],i,i),2);
1755	}
1756	}
1757	if(P->Par.me == 0) {
1758	//if(P->Call.out[ReadOut])
1759	// fprintf(stderr,"(%i) STEP 5: %2.9e - %2.9e <= %lg ?\n",P->Par.me,old_spread,spread,R->EpsWannier);
1760	//else
1761	//fprintf(stderr,"%2.9e\n",spread);
1762	}
1763	// STEP 5: check change of variance
1764	} while (fabs(old_spread-spread) >= R->EpsWannier);
1765	// end of iterative diagonalization loop: We have found our final orthogonal U!
1766	Free(c, "SerialDiagonalization: c");
1767	Free(s, "SerialDiagonalization: s");
1768	// free eigenvector stuff
1769	gsl_vector_free(h);
1770	gsl_matrix_free(G);
1771	//gsl_eigen_symmv_free(w);
1772	gsl_vector_free(eval);
1773	gsl_matrix_free(evec);
1774	}
1775
1776	/** Writes the wannier centres and spread to file.
1777	* Also puts centres from array into OnePsiElementAddData structure.
1778	* \param *P Problem at hand
1779	* \param old_spread first term of wannier spread
1780	* \param spread second term of wannier spread
1781	* \param **WannierCentre 2D array (NDIM, \a Num) with wannier centres
1782	* \param *WannierSpread array with wannier spread per wave function
1783	*/
1784	void WriteWannierFile(struct Problem P, double spread, double old_spread, double WannierCentre, double WannierSpread)
1785	{
1786	struct FileData *F = &P->Files;
1787	struct RunStruct *R = &P->R;
1788	struct Lattice *Lat = &P->Lat;
1789	struct Psis *Psi = &Lat->Psi;
1790	struct OnePsiElement *OnePsiA;
1791	char spin[12], suffix[18];
1792	int i,j,l;
1793
1794	if(P->Call.out[ReadOut]) fprintf(stderr,"(%i) Spread printout\n", P->Par.me);
1795
1796	switch (Lat->Psi.PsiST) {
1797	case SpinDouble:
1798	strcpy(suffix,".spread.csv");
1799	strcpy(spin,"SpinDouble");
1800	break;
1801	case SpinUp:
1802	strcpy(suffix,".spread_up.csv");
1803	strcpy(spin,"SpinUp");
1804	break;
1805	case SpinDown:
1806	strcpy(suffix,".spread_down.csv");
1807	strcpy(spin,"SpinDown");
1808	break;
1809	}
1810	if (P->Par.me_comm_ST == 0) {
1811	if (R->LevSNo == Lat->MaxLevel-1) // open freshly if first level
1812	OpenFile(P, &F->SpreadFile, suffix, "w", P->Call.out[ReadOut]); // only open on starting level
1813	else if (F->SpreadFile == NULL) // re-open if not first level and not opened yet (or closed from ParseWannierFile)
1814	OpenFile(P, &F->SpreadFile, suffix, "a", P->Call.out[ReadOut]); // only open on starting level
1815	if (F->SpreadFile == NULL) {
1816	Error(SomeError,"WriteWannierFile: Error opening Wannier File!\n");
1817	} else {
1818	fprintf(F->SpreadFile,"#===== W A N N I E R C E N T R E S for Level %d of type %s ========================\n", R->LevSNo, spin);
1819	fprintf(F->SpreadFile,"# Orbital+Level\tx\ty\tz\tSpread\n");
1820	}
1821	}
1822
1823	// put (new) WannierCentres into local ones and into file
1824	i=-1;
1825	for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
1826	OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
1827	if (OnePsiA->PsiType == type) { // drop all but occupied ones
1828	i++; // increase l if it is occupied wave function
1829	if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) {// is this local?
1830	for (j=0;j<NDIM;j++)
1831	Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[i][j];
1832	}
1833	if (P->Par.me_comm_ST == 0)
1834	fprintf(F->SpreadFile,"Psi%d_Lev%d\t%lg\t%lg\t%lg\t%lg\n", Psi->AllPsiStatus[i].MyGlobalNo, R->LevSNo, WannierCentre[i][0], WannierCentre[i][1], WannierCentre[i][2], WannierSpread[i]);
1835	}
1836	}
1837	if (P->Par.me_comm_ST == 0) {
1838	fprintf(F->SpreadFile,"\n#Matrix traces\tB_ii\tA_ii^2\tTotal (B_ii - A_ii^2)\n");
1839	fprintf(F->SpreadFile,"TotalSpread_L%d\t%lg\t%lg\t%lg\n\n",R->LevSNo, old_spread, spread, old_spread - spread);
1840	fflush(F->SpreadFile);
1841	}
1842	}
1843
1844
1845	/** Parses the spread file and puts values into OnePsiElementAddData#WannierCentre.
1846	* \param *P Problem at hand
1847	* \return 1 - success, 0 - failure
1848	*/
1849	int ParseWannierFile(struct Problem *P)
1850	{
1851	struct Lattice *Lat = &P->Lat;
1852	struct RunStruct *R = &P->R;
1853	struct Psis *Psi = &Lat->Psi;
1854	struct OnePsiElement *OnePsiA;
1855	int i,l,j, msglen;
1856	FILE *SpreadFile;
1857	char *tagname;
1858	char suffix[18];
1859	double WannierCentre[NDIM+1]; // combined centre and spread
1860	MPI_Status status;
1861	int signal = 0; // 1 - ok, 0 - error
1862
1863	switch (Lat->Psi.PsiST) {
1864	case SpinDouble:
1865	strcpy(suffix,".spread.csv");
1866	break;
1867	case SpinUp:
1868	strcpy(suffix,".spread_up.csv");
1869	break;
1870	case SpinDown:
1871	strcpy(suffix,".spread_down.csv");
1872	break;
1873	}
1874	if (P->Call.out[NormalOut]) fprintf(stderr,"(%i) Parsing Wannier Centres from file ... \n", P->Par.me);
1875
1876	if (P->Par.me_comm_ST == 0) {
1877	tagname = (char ) Malloc(sizeof(char)255, "ParseWannierFile: *tagname");
1878	if(!OpenFile(P, &SpreadFile, suffix, "r", P->Call.out[ReadOut])) { // check if file exists
1879	if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1880	Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1881	return 0;
1882	//Error(SomeError,"ParseWannierFile: Opening failed\n");
1883	}
1884	signal = 1;
1885	if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1886	Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1887	} else {
1888	if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1889	Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1890	if (signal == 0)
1891	return 0;
1892	}
1893	i=-1;
1894	for (l=0; l < Psi->MaxPsiOfType+P->Par.Max_me_comm_ST_PsiT; l++) { // go through all wave functions
1895	OnePsiA = &Psi->AllPsiStatus[l]; // grab OnePsiA
1896	if (OnePsiA->PsiType == type) { // drop all but occupied ones
1897	i++; // increase l if it is occupied wave function
1898	if (P->Par.me_comm_ST == 0) { // only process 0 may access the spread file
1899	sprintf(tagname,"Psi%d_Lev%d",i,R->LevSNo);
1900	signal = 0;
1901	if (!ParseForParameter(0,SpreadFile,tagname,0,3,1,row_double,WannierCentre,1,optional)) {
1902	//Error(SomeError,"ParseWannierFile: Parsing WannierCentre failed");
1903	if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1904	Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1905	return 0;
1906	}
1907	if (!ParseForParameter(0,SpreadFile,tagname,0,4,1,double_type,&WannierCentre[NDIM],1,optional)) {
1908	//Error(SomeError,"ParseWannierFile: Parsing WannierSpread failed");
1909	if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1910	Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1911	return 0;
1912	}
1913	signal = 1;
1914	if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1915	Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1916	} else {
1917	if (MPI_Bcast(&signal,1,MPI_INT,0,P->Par.comm_ST) != MPI_SUCCESS)
1918	Error(SomeError,"ParseWannierFile: Bcast of signal failed\n");
1919	if (signal == 0)
1920	return 0;
1921	}
1922	if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) { // is this Psi local?
1923	if ((P->Par.me_comm_ST != 0) && (P->Par.me_comm_ST_Psi == 0)) { // if they don't belong to process 0 and we are a leader of a Psi group, receive 'em
1924	if (MPI_Recv(WannierCentre, NDIM+1, MPI_DOUBLE, 0, ParseWannierTag, P->Par.comm_ST_PsiT, &status) != MPI_SUCCESS)
1925	Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed");
1926	//return 0;
1927	MPI_Get_count(&status, MPI_DOUBLE, &msglen);
1928	if (msglen != NDIM+1)
1929	Error(SomeError,"ParseWannierFile: MPI_Recv of WannierCentre/Spread from process 0 failed due to wrong item count");
1930	//return 0;
1931	}
1932	if (MPI_Bcast(WannierCentre, NDIM+1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi) != MPI_SUCCESS) // Bcast to all processes of the Psi group from leader
1933	Error(SomeError,"ParseWannierFile: MPI_Bcast of WannierCentre/Spread to sub process in Psi group failed");
1934	//return 0;
1935	// and store 'em (for all who have this Psi local)
1936	if ((P->Par.me == 0) && P->Call.out[ValueOut]) fprintf(stderr,"(%i) Psi %i, L %i: (x,y,z) = (%lg, %lg, %lg), Spread %lg\n",P->Par.me,i, R->LevSNo, WannierCentre[0], WannierCentre[1], WannierCentre[2], WannierCentre[NDIM]);
1937	for (j=0;j<NDIM;j++) Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[j] = WannierCentre[j];
1938	Psi->AddData[OnePsiA->MyLocalNo].WannierSpread = WannierCentre[NDIM];
1939	//if (P->Par.me == 0 && P->Call.out[ValueOut]) fprintf(stderr,"(%i) %s\t%lg\t%lg\t%lg\t\t%lg\n",P->Par.me, tagname,Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[0],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[1],Psi->AddData[OnePsiA->MyLocalNo].WannierCentre[2],Psi->AddData[OnePsiA->MyLocalNo].WannierSpread);
1940	} else if (P->Par.me_comm_ST == 0) { // if they are not local, yet we are process 0, send 'em to leader of its Psi group
1941	if (MPI_Send(WannierCentre, NDIM+1, MPI_DOUBLE, OnePsiA->my_color_comm_ST_Psi, ParseWannierTag, P->Par.comm_ST_PsiT) != MPI_SUCCESS)
1942	Error(SomeError,"ParseWannierFile: MPI_Send of WannierCentre/Spread to process 0 of owning Psi group failed");
1943	//return 0;
1944	}
1945	}
1946	}
1947	if ((SpreadFile != NULL) && (P->Par.me_comm_ST == 0)) {
1948	fclose(SpreadFile);
1949	Free(tagname, "ParseWannierFile: *tagname");
1950	}
1951	//fprintf(stderr,"(%i) Parsing Wannier files succeeded!\n", P->Par.me);
1952	return 1;
1953	}
1954
1955	/** Calculates the spread of orbital \a i.
1956	* Stored in OnePsiElementAddData#WannierSpread.
1957	* \param *P Problem at hand
1958	* \param i i-th wave function (note "extra" ones are not counted!)
1959	* \return spread \f$\sigma^2_{A^{(k)}}\f$
1960	*/
1961	double CalculateSpread(struct Problem *P, int i) {
1962	struct Lattice *Lat = &P->Lat;
1963	struct RunStruct *R = &P->R;
1964	struct Psis *Psi = &Lat->Psi;
1965	struct LatticeLevel *Lev0 = R->Lev0;
1966	struct LatticeLevel *LevS = R->LevS;
1967	struct Density *Dens0 = Lev0->Dens;
1968	struct fft_plan_3d *plan = Lat->plan;
1969	fftw_complex *PsiC = Dens0->DensityCArray[ActualPsiDensity];
1970	fftw_real PsiCR = (fftw_real )PsiC;
1971	fftw_complex *work = Dens0->DensityCArray[Temp2Density];
1972	fftw_real **HGcR = &Dens0->DensityArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as a storage array
1973	fftw_complex HGcRC = (fftw_complex)HGcR;
1974	fftw_complex **HGcR2C = &Dens0->DensityCArray[HGcDensity]; // use HGcDensity, 4x Gap..Density, TempDensity as an array
1975	fftw_real HGcR2 = (fftw_real)HGcR2C;
1976	MPI_Status status;
1977	struct OnePsiElement OnePsiA, LOnePsiA;
1978	int ElementSize = (sizeof(fftw_complex) / sizeof(double)), RecvSource;
1979	fftw_complex *LPsiDatA=NULL;
1980	int k,n[NDIM],n0, *N,N0, g, p, iS, i0, Index;
1981	N0 = LevS->Plan0.plan->local_nx;
1982	N = LevS->Plan0.plan->N;
1983	const int NUpx = LevS->NUp[0];
1984	const int NUpy = LevS->NUp[1];
1985	const int NUpz = LevS->NUp[2];
1986	double a_ij, b_ij, A_ij, B_ij;
1987	double tmp, tmp2, spread = 0;
1988	double cos_lookup, sin_lookup;
1989
1990	b_ij = 0;
1991
1992	CreateSinCosLookupTable(&cos_lookup,&sin_lookup,N);
1993
1994	// fill matrices
1995	OnePsiA = &Psi->AllPsiStatus[i]; // grab the desired OnePsiA
1996	if (OnePsiA->PsiType != Extra) { // drop if extra one
1997	if (OnePsiA->my_color_comm_ST_Psi == P->Par.my_color_comm_ST_Psi) // local?
1998	LOnePsiA = &Psi->LocalPsiStatus[OnePsiA->MyLocalNo];
1999	else
2000	LOnePsiA = NULL;
2001	if (LOnePsiA == NULL) { // if it's not local ... receive it from respective process into TempPsi
2002	RecvSource = OnePsiA->my_color_comm_ST_Psi;
2003	MPI_Recv( LevS->LPsi->TempPsi, LevS->MaxG*ElementSize, MPI_DOUBLE, RecvSource, WannierTag1, P->Par.comm_ST_PsiT, &status );
2004	LPsiDatA=LevS->LPsi->TempPsi;
2005	} else { // .. otherwise send it to all other processes (Max_me... - 1)
2006	for (p=0;p<P->Par.Max_me_comm_ST_PsiT;p++)
2007	if (p != OnePsiA->my_color_comm_ST_Psi)
2008	MPI_Send( LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo], LevS->MaxG*ElementSize, MPI_DOUBLE, p, WannierTag1, P->Par.comm_ST_PsiT);
2009	LPsiDatA=LevS->LPsi->LocalPsi[OnePsiA->MyLocalNo];
2010	} // LPsiDatA is now set to the coefficients of OnePsi either stored or MPI_Received
2011
2012	CalculateOneDensityR(Lat, LevS, Dens0, LPsiDatA, Dens0->DensityArray[ActualDensity], R->FactorDensityR, 1);
2013	// note: factor is not used when storing result in DensityCArray[ActualPsiDensity] in CalculateOneDensityR()!
2014	for (n0=0;n0<N0;n0++)
2015	for (n[1]=0;n[1]<N[1];n[1]++)
2016	for (n[2]=0;n[2]<N[2];n[2]++) {
2017	i0 = n[2]NUpz+N[2]NUpz(n[1]NUpy+N[1]NUpyn0*NUpx);
2018	iS = n[2]+N[2](n[1]+N[1]n0);
2019	n[0] = n0 + LevS->Plan0.plan->start_nx;
2020	for (k=0;k<max_operators;k+=2) {
2021	tmp = 2PI/(double)(N[k/2])(double)(n[k/2]);
2022	tmp2 = PsiCR[i0] /LevS->MaxN;
2023	// check lookup
2024	if ((fabs(cos(tmp) - cos_lookup[k/2][n[k/2]]) > MYEPSILON) \|\| (fabs(sin(tmp) - sin_lookup[k/2][n[k/2]]) > MYEPSILON)) {
2025	fprintf(stderr,"(%i) (cos) %2.15e against (lookup) %2.15e,\t(sin) %2.15e against (lookup) %2.15e\n", P->Par.me, cos(tmp), cos_lookup[k/2][n[k/2]],sin(tmp),sin_lookup[k/2][n[k/2]]);
2026	Error(SomeError, "Lookup table does not match real value!");
2027	}
2028	// HGcR[k][iS] = cos_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
2029	// HGcR2[k][iS] = cos_lookup[k/2][n[k/2]] * HGcR[k][iS]; /* Matrix Vector Mult */
2030	// HGcR[k+1][iS] = sin_lookup[k/2][n[k/2]] * tmp2; /* Matrix Vector Mult */
2031	// HGcR2[k+1][iS] = sin_lookup[k/2][n[k/2]] * HGcR[k+1][iS]; /* Matrix Vector Mult */
2032	HGcR[k][iS] = cos(tmp) * tmp2; /* Matrix Vector Mult */
2033	HGcR2[k][iS] = pow(cos(tmp),2) * tmp2; /* Matrix Vector Mult */
2034	HGcR[k+1][iS] = sin(tmp) * tmp2; /* Matrix Vector Mult */
2035	HGcR2[k+1][iS] = pow(sin(tmp),2) * tmp2; /* Matrix Vector Mult */
2036	}
2037	}
2038	for (k=0;k<max_operators;k++) {
2039	fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcRC[k], work);
2040	fft_3d_real_to_complex(plan, LevS->LevelNo, FFTNF1, HGcR2C[k], work);
2041	}
2042
2043
2044	for (k=0;k<max_operators;k++) {
2045	a_ij = 0;
2046	//fprintf(stderr,"(%i),(%i,%i): A[%i]: multiplying with \\phi_B\n",P->Par.me, l,m,k);
2047	// sum directly in a_ij and b_ij the two desired terms
2048	g=0;
2049	if (LevS->GArray[0].GSq == 0.0) {
2050	Index = LevS->GArray[g].Index;
2051	a_ij += (LPsiDatA[0].reHGcRC[k][Index].re + LPsiDatA[0].imHGcRC[k][Index].im);
2052	b_ij += (LPsiDatA[0].reHGcR2C[k][Index].re + LPsiDatA[0].imHGcR2C[k][Index].im);
2053	g++;
2054	}
2055	for (; g < LevS->MaxG; g++) {
2056	Index = LevS->GArray[g].Index;
2057	a_ij += 2(LPsiDatA[g].reHGcRC[k][Index].re + LPsiDatA[g].im*HGcRC[k][Index].im);
2058	b_ij += 2(LPsiDatA[g].reHGcR2C[k][Index].re + LPsiDatA[g].im*HGcR2C[k][Index].im);
2059	} // due to the symmetry the resulting matrix element is real and symmetric in (i,i) ! (complex multiplication simplifies ...)
2060	MPI_Allreduce ( &a_ij, &A_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
2061	spread += pow(A_ij,2);
2062	}
2063	}
2064	MPI_Allreduce ( &b_ij, &B_ij, 1, MPI_DOUBLE, MPI_SUM, P->Par.comm_ST_Psi);
2065
2066	// store spread in OnePsiElementAdd
2067	Psi->AddData[i].WannierSpread = B_ij - spread;
2068
2069	FreeSinCosLookupTable(cos_lookup,sin_lookup);
2070
2071	return (B_ij - spread);
2072	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: pcp/src/wannier.c@ d3482a

Download in other formats: