source: pcp/src/myfft.c@ 64fa9e

Last change on this file since 64fa9e was 64fa9e, checked in by Frederik Heber <heber@…>, 17 years ago

Free(): now takes a debug string to know where free error occured

  • Property mode set to 100644
File size: 33.5 KB
Line 
1/** \file myfft.c
2 * FastFourierTransformations for parallel computing.
3 * Implements FFT on multiple machines using fftw package. The most important
4 * routines are the hither and back transformations: fft_3d_real_to_complex() and
5 * fft_3d_complex_to_real(). For fftw plans are needed, which are created by
6 * fft_3d_create_plan() and destroyed by fft_3d_destroy_plan(). Plan weights
7 * can be copied CopyElementOnepw() and there are sort critera GetKeyOnepw(),
8 * GetKeyOnepw2PZI().\n
9 * Wave functions can - for the purpose of testing - be filled with random values
10 * by InitDataTestR().
11 *
12 Project: ParallelCarParrinello
13 \author Jan Hamaekers
14 \date 2000
15
16 File: myfft.c
17 $Id: myfft.c,v 1.23 2007/02/05 15:28:39 foo Exp $
18*/
19
20#include<stdlib.h>
21#include<stdio.h>
22#include<unistd.h>
23#include<math.h>
24#include<string.h>
25
26// use double precision fft when we have it
27#ifdef HAVE_CONFIG_H
28#include <config.h>
29#endif
30
31#ifdef HAVE_DFFTW_H
32#include "dfftw.h"
33#else
34#include "fftw.h"
35#endif
36
37#ifdef HAVE_DRFFTW_H
38#include "drfftw.h"
39#else
40#include "rfftw.h"
41#endif
42
43#include"data.h"
44#include"helpers.h"
45#include"output.h"
46#include"errors.h"
47#include"mergesort2.h"
48#include"mymath.h"
49#include"myfft.h"
50
51
52/** Returns negative value of the weight of a plan_weight as sort critera.
53 * \param *a plan_weight array
54 * \param i index of element
55 * \param *Args unused (only for contingency)
56 */
57static double GetKeyOnepw(void *a, int i, void *Args) {
58 return(-((struct plan_weight *)a)[i].weight);
59}
60
61/** Returns "index + twice the process number + \a Args[2]" of a plan_weight as sort critera.
62 * \a Args[2] is only added if: Index % \a Args[1] != Args[1] - 1
63 * \param *a plan_weight array
64 * \param i index of element
65 * \param *Args
66 */
67static double GetKeyOnepw2PZI(void *a, int i, void *Args) {
68 return(((struct plan_weight *)a)[i].Index+((int *)Args)[2]*( ( ((struct plan_weight *)a)[i].Index%((int *)Args)[1]!=((int *)Args)[1]-1)+2*(((struct plan_weight *)a)[i].PE)));
69}
70
71/** Copies one element from a plan weight array \a *b to another \a *a.
72 * Copies from \a i-th element of \a *a index, weight and PE to \a *b
73 * \param *a source plan weight array
74 * \param i index of source element
75 * \param *b destination plan weight array
76 * \param j index of destination element
77 */
78static void CopyElementOnepw(void *a, int i, void *b, int j) {
79 ((struct plan_weight *)a)[i].Index = ((struct plan_weight *)b)[j].Index;
80 ((struct plan_weight *)a)[i].weight = ((struct plan_weight *)b)[j].weight;
81 ((struct plan_weight *)a)[i].PE = ((struct plan_weight *)b)[j].PE;
82}
83
84/** Creates a 3d plan for the FFT to use in transforms.
85 * \sa fft_plan_3d
86 * \param *P Problem at hand
87 * \param comm MPI Communicator
88 * \param E_cut Energy cutoff
89 * \param MaxLevel number of lattice levels
90 * \param *LevelSizes size increas factor from level to level for MaxLevel's
91 * \param GBasisSQ[] scalar product of reciprocal basis vectors
92 * \param N[] mesh points per dimension NDIM
93 * \param *MaxNoOfnFields maximum number of NFields per level
94 * \param **NFields NField per level per field
95 */
96struct fft_plan_3d *fft_3d_create_plan(struct Problem *P, MPI_Comm comm, const double E_cut, const int MaxLevel, const int *LevelSizes, const double GBasisSQ[NDIM], const int N[NDIM], const int *MaxNoOfnFields, int **NFields) {
97 struct fft_plan_3d *plan;
98 int *MaxNFields;
99 int i;
100 int j;
101 int ny;
102 int Index;
103 int LevelSize=1; // factorial level increase from 0th to maxlevel
104 int n[NDIM];
105 int Args[3];
106 double weight;
107 // Initialization of plan
108 plan = (struct fft_plan_3d *)
109 Malloc(sizeof(struct fft_plan_3d),"create_plan");
110 plan->P = P;
111 plan->local_data = NULL;
112 plan->work = NULL;
113 plan->MaxLevel = MaxLevel;
114 plan->LevelSizes = (int *)
115 Malloc(sizeof(int)*MaxLevel,"create_plan: MaxLevels");
116 for (i=0; i < MaxLevel; i++) {
117 LevelSize *= LevelSizes[i];
118 plan->LevelSizes[i] = LevelSizes[i];
119 }
120 plan->E_cut = E_cut;
121 plan->LevelSize = LevelSize;
122 if (N[0] % LevelSize != 0 || N[1] % LevelSize != 0 || N[2] % LevelSize != 0)
123 Error(SomeError,"create_plan: N[0] % LevelSize != 0 || N[1] % LevelSize != 0 || N[2] % LevelSize != 0");
124 // MPI part of plan
125 MPI_Comm_dup(comm, &plan->comm); // Duplicates an existing communicator with all its cached information
126 MPI_Comm_size(plan->comm, &plan->MaxPE); // Determines the size of the group associated with a communictor
127 MPI_Comm_rank(plan->comm, &plan->myPE); // Determines the rank of the calling process in the communicator
128 plan->PENo = (int *)
129 Malloc(2*sizeof(int)*plan->MaxPE,"create_plan: PENo"); // Malloc 2 ints per process in communicator group
130 for (i=0; i < 2*plan->MaxPE; i++) // all set to zero
131 plan->PENo[i] = 0;
132
133 for (i=0; i < NDIM; i++) {
134 plan->N[i] = N[i];
135 plan->GBasisSQ[i] = GBasisSQ[i];
136 plan->NLSR[i] = N[i] / LevelSize;
137 }
138 plan->Maxpw = (N[1] / LevelSize) * ((N[2] / LevelSize)/2+1);
139 if (P->Call.out[LeaderOut]) fprintf(stderr,"Create_Plan: E_cut %g, N[] %i %i %i, LS %i, Maxpw %i\n",E_cut, N[0],N[1],N[2], LevelSize, plan->Maxpw);
140 if (plan->NLSR[0] % plan->MaxPE != 0 || plan->Maxpw % plan->MaxPE != 0) // check if dividable among processes
141 Error(SomeError,"create_plan: plan->NLSR[0] % plan->MaxPE != 0 || plan->Maxpw % plan->MaxPE != 0");
142 plan->pw = (struct plan_weight *)
143 Malloc(plan->Maxpw*sizeof(struct plan_weight),"create_plan_weight"); // Allocating plan weight
144 plan->pwFS = (struct plan_weight *)
145 Malloc((plan->Maxpw+1)*sizeof(struct plan_weight),"create_plan_weight_FS");
146 for (n[1]=0; n[1] < plan->NLSR[1]; n[1]++) {
147 for (n[2]=0; n[2] < (plan->NLSR[2]/2+1); n[2]++) {
148 ny = (n[1] >= (plan->NLSR[1]/2+1) ? n[1]-plan->NLSR[1] : n[1]);
149 Index = CalcRowMajor2D(n[1],n[2],plan->NLSR[1],plan->NLSR[2]/2+1);
150 plan->pw[Index].Index = Index;
151 //fprintf(stderr,"(%i) plan->pw[%i] = %i\n",P->Par.me,Index, plan->pw[Index].Index);
152 if (GBasisSQ[0] < MYEPSILON) fprintf(stderr,"fft_3d_create_plan: GBasisSQ[0] = %lg\n",GBasisSQ[0]);
153 weight = (2.*E_cut/(LevelSize*LevelSize)-ny*ny*GBasisSQ[1]-n[2]*n[2]*GBasisSQ[2])/GBasisSQ[0];
154 if (weight > 0.0)
155 plan->pw[Index].weight = sqrt(weight);
156 else
157 plan->pw[Index].weight = 0.0;
158 if (n[2] == plan->NLSR[2]/2) plan->pw[Index].weight += 4.*sqrt(2.*E_cut/(LevelSize*LevelSize)/GBasisSQ[0]);
159 if (n[2] == 0) plan->pw[Index].weight += 2.*sqrt(2.*E_cut/(LevelSize*LevelSize)/GBasisSQ[0]);
160 }
161 }
162 naturalmergesort(plan->pw,plan->pwFS,0,plan->Maxpw-1,&GetKeyOnepw,NULL,&CopyElementOnepw);
163 if (plan->Maxpw < plan->MaxPE) Error(SomeError,"create_plan: plan->Maxpw < plan->MaxPE");
164 if (plan->Maxpw / plan->MaxPE < plan->NLSR[1] + plan->NLSR[1]/plan->MaxPE)
165 Error(SomeError,"create_plan: plan->Maxpw / plan->MaxPE < plan->NLSR[1] + plan->NLSR[1]/plan->MaxPE");
166 for (i=0; i < plan->Maxpw; i++) {
167 if (i < plan->NLSR[1]) {
168 plan->pw[i].PE = i % plan->MaxPE;
169 } else {
170 if (i-plan->NLSR[1] < plan->NLSR[1]) {
171 plan->pw[i].PE = 0;
172 } else {
173 if (i-2*plan->NLSR[1] < (plan->MaxPE-1)*plan->NLSR[1]) {
174 plan->pw[i].PE = ((i%(plan->MaxPE-1))+1);
175 } else {
176 plan->pw[i].PE = i%plan->MaxPE;
177 }
178 }
179 }
180 plan->PENo[2*(i % plan->MaxPE)]++;
181 if (plan->pw[i].Index%(plan->NLSR[2]/2+1) == plan->NLSR[2]/2)
182 plan->pw[i].weight -= 4.*sqrt(2.*E_cut/(LevelSize*LevelSize)/GBasisSQ[0]);
183 if (plan->pw[i].Index%(plan->NLSR[2]/2+1) == 0) {
184 plan->pw[i].weight -= 2.*sqrt(2.*E_cut/(LevelSize*LevelSize)/GBasisSQ[0]);
185 if (plan->pw[i].Index%(plan->NLSR[2]/2+1) >= plan->NLSR[2]/2+1) {
186 plan->pw[i].weight = 0.0;
187 } else {
188 if (plan->pw[i].Index % (plan->NLSR[2]/2+1) == 0) {
189 plan->PENo[2*(plan->pw[i].PE)+1] += floor(plan->pw[i].weight)+1;
190 } else {
191 plan->PENo[2*(plan->pw[i].PE)+1] += 2*floor(plan->pw[i].weight)+1;
192 }
193 }
194 } else {
195 plan->PENo[2*(plan->pw[i].PE)+1] += 2*floor(plan->pw[i].weight)+1;
196 }
197 }
198 plan->LocalMaxpw = plan->PENo[2*plan->myPE];
199 Args[0]=plan->MaxPE; Args[1]=plan->NLSR[2]/2+1; Args[2]=plan->Maxpw;
200 naturalmergesort(plan->pw,plan->pwFS,0,plan->Maxpw-1,&GetKeyOnepw2PZI,Args,&CopyElementOnepw);
201 for (j=0; j < plan->MaxPE; j++) {
202 for (i= plan->NLSR[1]/plan->MaxPE-1; i >= 0; i--) {
203 /*if (i != 0 && i+plan->LocalMaxpw*j >= i*(plan->NLSR[1]/2+1)+plan->LocalMaxpw*j)
204 Error(SomeError,"Createplan: i != 0 && i+plan->LocalMaxpw*j ? i*(plan->NLSR[2]/2+1)+plan->LocalMaxpw*j");*/
205 CopyElementOnepw(plan->pwFS,plan->LocalMaxpw,plan->pw,i+plan->LocalMaxpw*j);
206 CopyElementOnepw(plan->pw,i+plan->LocalMaxpw*j,plan->pw,(i+1)*(plan->NLSR[2]/2+1)-1+plan->LocalMaxpw*j);
207 CopyElementOnepw(plan->pw,(i+1)*(plan->NLSR[2]/2+1)-1+plan->LocalMaxpw*j,plan->pwFS,plan->LocalMaxpw);
208 }
209 }
210 plan->Localpw = &plan->pw[plan->myPE*plan->LocalMaxpw];
211 if (P->Call.out[PsiOut]) {
212 fprintf(stderr,"pw\n");
213 for (i=plan->myPE*plan->LocalMaxpw; i < (plan->myPE+1)*plan->LocalMaxpw; i++) // print my local weights
214 fprintf(stderr,"(%i)W(%g)I(%i)P(%i) ",i,plan->pw[i].weight,plan->pw[i].Index,plan->pw[i].PE);
215 fprintf(stderr,"\n");
216 }
217 if (P->Call.out[LeaderOut]) {
218 fprintf(stderr,"pwPENo\n");
219 for (i=0; i < plan->MaxPE; i++)
220 fprintf(stderr,"(%i)W(%i)X(%i) ",i,plan->PENo[2*i+1],plan->PENo[2*i]);
221 fprintf(stderr,"\n");
222 }
223 Free(plan->pwFS, "fft_3d_create_plan: plan->pwFS");
224 plan->pwFS = NULL;
225
226 plan->Lev_CR_RC = (int *)
227 Malloc(2*sizeof(int)*MaxLevel,"create_plan: Lev_CR_RC");
228 for (i=0; i < 2*MaxLevel; i++) plan->Lev_CR_RC[i] = 1;
229 plan->Levplan = (struct LevelPlan *)
230 Malloc(sizeof(struct LevelPlan)*MaxLevel,"create_plan:Levplan");
231 plan->MaxNoOfnFields = (int *)
232 Malloc(sizeof(int)*MaxLevel,"create_plan:MaxNoOfnFields");
233 for (i=0; i<MaxLevel;i++)
234 plan->MaxNoOfnFields[i] = MaxNoOfnFields[i];
235 plan->NFields = (int **)
236 Malloc(sizeof(int *)*MaxLevel,"create_plan:NFields");
237 MaxNFields = (int *)
238 Malloc(sizeof(int)*MaxLevel,"create_plan:MaxNFields");
239 for (i=0; i<MaxLevel;i++) {
240 plan->NFields[i] = (int *)
241 Malloc(sizeof(int )*MaxNoOfnFields[i],"create_plan:NFields");
242 MaxNFields[i] = 0;
243 for (j=0; j < plan->MaxNoOfnFields[i]; j++) {
244 plan->NFields[i][j] = NFields[i][j];
245 if (NFields[i][j] < 1) Error(SomeError,"Create_plan: NFields[j] < 1");
246 if (NFields[i][j] > MaxNFields[i]) MaxNFields[i] = NFields[i][j];
247 }
248 }
249 plan->LevelBlockSize = plan->LevelSize*plan->LevelSize*plan->LevelSize;
250 for (i=MaxLevel-1; i >= 0; i--) {
251 if (i == MaxLevel-1)
252 plan->Levplan[i].LevelFactor = LevelSizes[MaxLevel-1];
253 else
254 plan->Levplan[i].LevelFactor = plan->Levplan[i+1].LevelFactor*LevelSizes[i];
255 for (j=0; j < NDIM; j++)
256 plan->Levplan[i].N[j] = plan->NLSR[j]*plan->Levplan[i].LevelFactor;
257 plan->Levplan[i].LevelNo = i;
258 plan->LocalDataSize = (plan->Levplan[i].N[0]*plan->Levplan[i].N[1]*(plan->Levplan[i].N[2]/2+1)*MaxNFields[i])/plan->MaxPE;
259 plan->LocalWorkSize = plan->LocalDataSize;
260 plan->local_data = (fftw_complex *)
261 Realloc(plan->local_data, sizeof(fftw_complex)*plan->LocalDataSize,"create_plan: localdata");
262 plan->work = (fftw_complex *)
263 Realloc(plan->work, sizeof(fftw_complex)*plan->LocalWorkSize,"create_plan: work");
264 plan->Levplan[i].ny = plan->Levplan[i].N[1];
265 plan->Levplan[i].local_ny = plan->Levplan[i].N[1]/plan->MaxPE;
266 plan->Levplan[i].nz = plan->Levplan[i].N[2]/2+1;
267 plan->Levplan[i].local_nx = plan->Levplan[i].N[0]/plan->MaxPE;
268 plan->Levplan[i].nx = plan->Levplan[i].N[0];
269 plan->Levplan[i].start_nx = plan->Levplan[i].local_nx * plan->myPE;
270 plan->Levplan[i].local_nyz = plan->Levplan[i].local_ny*plan->Levplan[i].nz;
271 plan->Levplan[i].nyz = plan->Levplan[i].ny*plan->Levplan[i].nz;
272 fprintf(stderr,"(%i) nlocal (x%i, y%i, yz%i), n (%i, %i, %i), N (%i, %i, %i)\n", P->Par.me, plan->Levplan[i].local_nx, plan->Levplan[i].local_ny, plan->Levplan[i].local_nyz, plan->Levplan[i].nx, plan->Levplan[i].ny, plan->Levplan[i].nz, plan->Levplan[i].N[0], plan->Levplan[i].N[1], plan->Levplan[i].N[2]);
273 plan->Levplan[i].LocalSizeC = plan->Levplan[i].nx*plan->Levplan[i].local_nyz;
274 plan->Levplan[i].LocalSizeR = plan->Levplan[i].local_nx*plan->Levplan[i].ny*plan->Levplan[i].N[2];
275 plan->Levplan[i].LevelBlockSize = plan->Levplan[i].LevelFactor* plan->Levplan[i].LevelFactor*plan->Levplan[i].LevelFactor;
276 plan->Levplan[i].SendRecvBlockSize = plan->Levplan[i].local_nyz*plan->Levplan[i].local_nx;
277 plan->Levplan[i].PermBlockSize = plan->Levplan[i].LevelFactor;
278 if (P->Call.out[LeaderOut])
279 fprintf(stderr,"Create_Plan: Level(%i) LevelFactor(%i) LevelBlockSize(%i)\n",i,plan->Levplan[i].LevelFactor, plan->Levplan[i].LevelBlockSize);
280 if (plan->Lev_CR_RC[2*i]) {
281 plan->Levplan[i].xplanCR = (fftw_plan *)
282 Malloc(sizeof(fftw_plan)*plan->MaxNoOfnFields[i],"Create_Plan: xplanCR");
283 plan->Levplan[i].yzplanCR = (rfftwnd_plan *)
284 Malloc(sizeof(rfftwnd_plan)*plan->MaxNoOfnFields[i],"Create_Plan: yzplanCR");
285 for (j=0; j<plan->MaxNoOfnFields[i]; j++) {
286 plan->Levplan[i].xplanCR[j] = fftw_create_plan_specific(plan->Levplan[i].N[0], FFTW_BACKWARD, fftw_flag | FFTW_OUT_OF_PLACE,plan->local_data,plan->NFields[i][j],plan->work,plan->NFields[i][j]*plan->Levplan[i].local_nyz);
287 plan->Levplan[i].yzplanCR[j] = rfftw2d_create_plan_specific(plan->Levplan[i].N[1], plan->Levplan[i].N[2], FFTW_COMPLEX_TO_REAL, fftw_flag | FFTW_OUT_OF_PLACE,(fftw_real *)plan->local_data,plan->NFields[i][j],(fftw_real *)plan->work,plan->NFields[i][j]);
288 }
289 } else {
290 plan->Levplan[i].xplanCR = NULL;
291 plan->Levplan[i].yzplanCR = NULL;
292 }
293 if (plan->Lev_CR_RC[2*i+1]) {
294 plan->Levplan[i].xplanRC = (fftw_plan *)
295 Malloc(sizeof(fftw_plan)*plan->MaxNoOfnFields[i],"Create_Plan: xplanRC");
296 plan->Levplan[i].yzplanRC = (rfftwnd_plan *)
297 Malloc(sizeof(rfftwnd_plan)*plan->MaxNoOfnFields[i],"Create_Plan: yzplanRC");
298 for (j=0; j<plan->MaxNoOfnFields[i]; j++) {
299 plan->Levplan[i].xplanRC[j] = fftw_create_plan_specific(plan->Levplan[i].N[0], FFTW_FORWARD, fftw_flag | FFTW_OUT_OF_PLACE,plan->local_data,plan->NFields[i][j]*plan->Levplan[i].local_nyz,plan->work,plan->NFields[i][j]);
300 plan->Levplan[i].yzplanRC[j] = rfftw2d_create_plan_specific(plan->Levplan[i].N[1], plan->Levplan[i].N[2], FFTW_REAL_TO_COMPLEX, fftw_flag | FFTW_OUT_OF_PLACE,(fftw_real *)plan->local_data,plan->NFields[i][j],(fftw_real *)plan->work,plan->NFields[i][j]);
301 }
302 } else {
303 plan->Levplan[i].xplanRC = NULL;
304 plan->Levplan[i].yzplanRC = NULL;
305 }
306 }
307 if (plan->local_data != NULL) {
308 Free(plan->local_data, "fft_3d_create_plan: plan->local_data");
309 plan->local_data = NULL;
310 }
311 if (plan->work != NULL) {
312 Free(plan->local_data, "fft_3d_create_plan: plan->local_data");
313 plan->work = NULL;
314 }
315 Free(MaxNFields, "fft_3d_create_plan: MaxNFields");
316 return plan;
317}
318
319void fft_3d_destroy_plan(const struct Problem *P, struct fft_plan_3d *plan) {
320 int i,j;
321 //if (P->Call.out[LeaderOut]) fprintf(stderr,"Destroy_Plan\n");
322 for (i=0; i< plan->MaxLevel; i++) {
323 for (j=0; j< plan->MaxNoOfnFields[i]; j++) {
324 fftw_destroy_plan(plan->Levplan[i].xplanCR[j]);
325 fftw_destroy_plan(plan->Levplan[i].xplanRC[j]);
326 rfftwnd_destroy_plan(plan->Levplan[i].yzplanCR[j]);
327 rfftwnd_destroy_plan(plan->Levplan[i].yzplanRC[j]);
328 }
329 Free(plan->Levplan[i].xplanCR, "fft_3d_destroy_plan: plan->Levplan[i].xplanCR");
330 Free(plan->Levplan[i].xplanRC, "fft_3d_destroy_plan: plan->Levplan[i].xplanRC");
331 Free(plan->Levplan[i].yzplanCR, "fft_3d_destroy_plan: plan->Levplan[i].yzplanCR");
332 Free(plan->Levplan[i].yzplanRC, "fft_3d_destroy_plan: plan->Levplan[i].yzplanRC");
333 Free(plan->NFields[i], "fft_3d_destroy_plan: plan->NFields[i]");
334 }
335 Free(plan->MaxNoOfnFields, "fft_3d_destroy_plan: plan->MaxNoOfnFields");
336 Free(plan->NFields, "fft_3d_destroy_plan: plan->NFields");
337 Free(plan->Levplan, "fft_3d_destroy_plan: plan->Levplan");
338 Free(plan->PENo, "fft_3d_destroy_plan: plan->PENo");
339 plan->PENo = NULL;
340 Free(plan->LevelSizes, "fft_3d_destroy_plan: plan->LevelSizes");
341 plan->LevelSizes = NULL;
342 Free(plan->Lev_CR_RC, "fft_3d_destroy_plan: plan->Lev_CR_RC");
343 plan->Lev_CR_RC = NULL;
344 Free(plan->pw, "fft_3d_destroy_plan: plan->pw");
345 plan->pw = NULL;
346 plan->Levplan = NULL;
347 MPI_Comm_free(&plan->comm);
348 Free(plan, "fft_3d_destroy_plan: plan");
349}
350
351static void FirstDimTransCR(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* local_data -> work */
352 int local_nyz = Lplan->local_nyz;
353 int nx = Lplan->N[0];
354 int fft_iter;
355 int nFields = plan->NFields[Lplan->LevelNo][nFieldsNo];
356 fftw_plan fft_x = Lplan->xplanCR[nFieldsNo];
357 if (nFields > 1) {
358 for (fft_iter = 0; fft_iter < local_nyz; ++fft_iter)
359 fftw(fft_x, nFields,
360 local_data + (nx * nFields) * fft_iter, nFields, 1,
361 work+nFields*fft_iter, nFields*local_nyz, 1);
362 } else
363 fftw(fft_x, local_nyz,
364 local_data, 1, nx,
365 work, local_nyz, 1);
366}
367
368static void OtherDimTransCR(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* work -> local_data */
369 int local_nx = Lplan->local_nx;
370 int nyz = Lplan->nyz;
371 int fft_iter;
372 int rnyz = Lplan->N[1]*Lplan->N[2];
373 int nFields = plan->NFields[Lplan->LevelNo][nFieldsNo];
374 rfftwnd_plan fft = Lplan->yzplanCR[nFieldsNo];
375 if (nFields > 1) {
376 for (fft_iter = 0; fft_iter < local_nx; ++fft_iter)
377 rfftwnd_complex_to_real(fft, nFields,
378 ((fftw_complex *) work)+ (nyz * nFields) * fft_iter,
379 nFields, 1,
380 ((fftw_real *)local_data)+ (rnyz * nFields) * fft_iter
381 , nFields, 1);
382 } else {
383 rfftwnd_complex_to_real(fft, local_nx,
384 (fftw_complex *) work,
385 1, nyz,
386 (fftw_real *)local_data
387 , 1, rnyz);
388 }
389}
390
391static void TransposeMPICR(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int el_size, double *local_data, double *work) { /* work -> local_data */
392 MPI_Alltoall(work, Lplan->SendRecvBlockSize * el_size, MPI_DOUBLE,
393 local_data, Lplan->SendRecvBlockSize * el_size, MPI_DOUBLE,
394 plan->comm);
395}
396
397static void LocalPermutationCR(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int el_size, double *local_data, double *work) { /* data -> work */
398 int x,PEy,ly,z,Z,Y,srcIndex,destIndex,Index,cpyes,Ly;
399 int lnx=Lplan->local_nx,lny=plan->NLSR[1]/plan->MaxPE,nz=plan->NLSR[2]/2+1,MaxPE=plan->MaxPE,ny=plan->NLSR[1];
400 int es = el_size*Lplan->PermBlockSize;
401 int LevelFactor = Lplan->LevelFactor;
402 int snz = (nz-1)*es+el_size;
403 struct plan_weight *pw = plan->pw;
404 for (x=0; x < lnx; x++)
405 for (PEy=0; PEy < MaxPE; PEy++)
406 for (ly=0; ly < lny; ly++)
407 for (Ly=0; Ly < LevelFactor;Ly++)
408 for (z=0; z < nz; z++) {
409 Z = z*es;
410 srcIndex = Z+snz*(Ly+LevelFactor*(ly+lny*(x+lnx*(PEy))));
411 Index = pw[z+nz*(ly+lny*PEy)].Index;
412 Z = Index%nz;
413 Z = Z*es;
414 Y = ((Index/nz)*LevelFactor+Ly);
415 destIndex = Z+snz*(Y+LevelFactor*ny*(x));
416 cpyes = (z == nz-1 ? el_size : es);
417 memcpy( &work[destIndex],
418 &local_data[srcIndex],
419 cpyes * sizeof(double));
420 }
421}
422
423// does complex to real 3d fftransformation (reciprocal base -> real base)
424/** Inverse Fast Fourier Transformation of plane wave coefficients.
425 * If given state is \f$\langle \chi_{i,G} | \psi_i (r) \rangle \f$, result is \f$| \psi_i (r) \rangle\f$
426 * Calls subsequently (reverse order of calling as in fft_3d_real_to_complex())
427 * -# FirstDimTransCR()\n
428 * -# TransposeMPICR()\n
429 * -# LocalPermutationCR()\n
430 * -# OtherDimTransCR()\n
431 * \param *plan the transformation plan
432 * \param Level current level number
433 * \param nFieldsNo number of parallel ffts (in case of LevelUp)
434 * \param *local_data real base wave function coefficients, \f$\psi_i (G)\f$, on return \f$\psi_i (r)\f$
435 * \param *work temporary array for coefficients
436 * \warning coefficients in \a *local_date and *work are destroyed!
437 * \note Due to the symmetry of the reciprocal coefficients, not all of them are stored in memory. Thus
438 * the whole set must be rebuild before this inverse transformation can be called. From 0 to
439 * LatticeLevel::MaxG copied to negative counterpart and for 0 to LatticeLevel::MaxDoubleG the
440 * complex conjugate must be formed (see \ref density.c).
441 */
442void fft_3d_complex_to_real(const struct fft_plan_3d *plan, const int Level, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* local data -> local_data, work destroyed */
443 int el_size = (sizeof(fftw_complex) / sizeof(double));
444 int nFields = plan->NFields[Level][nFieldsNo];
445 struct LevelPlan *Lplan = &plan->Levplan[Level];
446 if (!plan->Lev_CR_RC[2*Level]) Error(SomeError,"fft_3d_complex_to_real: !plan->Lev_CR_RC[2*i]");
447 if (nFieldsNo < 0 || nFieldsNo >= plan->MaxNoOfnFields[Level]) Error(SomeError,"fft_3d_complex_to_real: nFieldsNo < 0 || nFieldsNo >= plan->MaxNoOfnFields[Level]");
448 el_size *= nFields;
449 //if (isnan(work[0].re))
450 //fprintf(stderr,"fft_3d_complex_to_real,FirstDimTransCR: before work %lg\n",work[0].re);
451 FirstDimTransCR(plan, Lplan, nFieldsNo, local_data, work);
452 //if (isnan(work[0].re))
453 //fprintf(stderr,"fft_3d_complex_to_real,FirstDimTransCR: after work %lg\n",work[0].re);
454 TransposeMPICR(plan, Lplan, el_size, (double *)local_data, (double *)work);
455 LocalPermutationCR(plan, Lplan, el_size, (double *)local_data, (double *)work);
456 OtherDimTransCR(plan, Lplan, nFieldsNo, local_data, work);
457}
458
459static void FirstDimTransRC(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* work -> local_data */
460 int local_nyz = Lplan->local_nyz;
461 int nx = Lplan->N[0];
462 int fft_iter;
463 int nFields = plan->NFields[Lplan->LevelNo][nFieldsNo];
464 fftw_plan fft_x = Lplan->xplanRC[nFieldsNo];
465 if (nFields > 1) {
466 for (fft_iter = 0; fft_iter < local_nyz; ++fft_iter)
467 fftw(fft_x, nFields,
468 work+nFields*fft_iter, nFields*local_nyz, 1,
469 local_data + (nx * nFields) * fft_iter, nFields, 1);
470 } else
471 fftw(fft_x, local_nyz,
472 work, local_nyz, 1,
473 local_data, 1, nx);
474}
475
476static void OtherDimTransRC(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* local_data -> work */
477 int local_nx = Lplan->local_nx;
478 int nyz = Lplan->nyz;
479 int rnyz = Lplan->N[1]*Lplan->N[2];
480 int fft_iter;
481 int nFields = plan->NFields[Lplan->LevelNo][nFieldsNo];
482 rfftwnd_plan fft = Lplan->yzplanRC[nFieldsNo];
483 if (nFields > 1) {
484 for (fft_iter = 0; fft_iter < local_nx; ++fft_iter)
485 rfftwnd_real_to_complex(fft, nFields,
486 ((fftw_real *) local_data) + (rnyz * nFields) * fft_iter,
487 nFields, 1,
488 ((fftw_complex *)work)+ (nyz * nFields) * fft_iter
489 , nFields, 1);
490 } else {
491 rfftwnd_real_to_complex(fft, local_nx,
492 (fftw_real *) local_data,
493 1, rnyz,
494 (fftw_complex *) work
495 , 1, nyz);
496 }
497}
498
499static void TransposeMPIRC(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int el_size, double *local_data, double *work) { /* local_data -> work */
500 MPI_Alltoall(local_data, Lplan->SendRecvBlockSize * el_size, MPI_DOUBLE,
501 work, Lplan->SendRecvBlockSize * el_size, MPI_DOUBLE,
502 plan->comm);
503}
504
505static void LocalPermutationRC(const struct fft_plan_3d *plan, const struct LevelPlan *Lplan, const int el_size, double *local_data, double *work) { /* work -> data*/
506 int x,PEy,ly,z,Z,Y,srcIndex,destIndex,Index,cpyes,Ly;
507 int lnx=Lplan->local_nx,lny=plan->NLSR[1]/plan->MaxPE,nz=plan->NLSR[2]/2+1,MaxPE=plan->MaxPE,ny=plan->NLSR[1];
508 int es = el_size*Lplan->PermBlockSize;
509 int LevelFactor = Lplan->LevelFactor;
510 int snz = (nz-1)*es+el_size;
511 struct plan_weight *pw = plan->pw;
512 for (x=0; x < lnx; x++)
513 for (PEy=0; PEy < MaxPE; PEy++)
514 for (ly=0; ly < lny; ly++)
515 for (Ly=0; Ly < LevelFactor;Ly++)
516 for (z=0; z < nz; z++) {
517 Z = z*es;
518 destIndex = Z+snz*(Ly+LevelFactor*(ly+lny*(x+lnx*(PEy))));
519 Index = pw[z+nz*(ly+lny*PEy)].Index;
520 Z = Index%nz;
521 Z = Z*es;
522 Y = ((Index/nz)*LevelFactor+Ly);
523 srcIndex = Z+snz*(Y+LevelFactor*ny*(x));
524 cpyes = (z == nz-1 ? el_size : es);
525 memcpy( &local_data[destIndex],
526 &work[srcIndex],
527 cpyes * sizeof(double));
528 }
529}
530
531/** Normal Fast Fourier Transformation of plane wave coefficients.
532 * If given state is \f$| \psi_i (r) \rangle \f$, result is \f$\langle \chi_{i,G} | \psi_i (r) \rangle\f$
533 * Calls subsequently
534 * -# OtherDimTransRC()\n
535 * -# LocalPermutationRC()\n
536 * -# TransposeMPIRC()\n
537 * -# FirstDimTransRC()\n
538 * \param *plan the transformation plan
539 * \param Level current level number
540 * \param nFieldsNo number of parallel ffts (in case of LevelUp)
541 * \param *local_data real base wave function coefficients, \f$\psi_i (r)\f$, on return \f$\psi_i (G)\f$
542 * \param *work temporary array for coefficients
543 * \warning coefficients in \a *local_date and *work are destroyed!
544 */
545void fft_3d_real_to_complex(const struct fft_plan_3d *plan, const int Level, const int nFieldsNo, fftw_complex *local_data, fftw_complex *work) { /* local data -> local_data, work destroyed */
546 int el_size = (sizeof(fftw_complex) / sizeof(double));
547 int nFields = plan->NFields[Level][nFieldsNo];
548 struct LevelPlan *Lplan = &plan->Levplan[Level];
549 if (!plan->Lev_CR_RC[2*Level+1]) Error(SomeError,"fft_3d_real_to_complex: !plan->Lev_CR_RC[2*i+1]");
550 if (nFieldsNo < 0 || nFieldsNo >= plan->MaxNoOfnFields[Level]) Error(SomeError,"fft_3d_real_to_complex: nFieldsNo < 0 || nFieldsNo >= plan->MaxNoOfnFields[Level]");
551 el_size *= nFields;
552 OtherDimTransRC(plan, Lplan, nFieldsNo, local_data, work);
553 LocalPermutationRC(plan, Lplan, el_size, (double *)local_data, (double *)work);
554 TransposeMPIRC(plan, Lplan, el_size, (double *)local_data, (double *)work);
555 FirstDimTransRC(plan, Lplan, nFieldsNo, local_data, work);
556}
557
558/** Fills \a *local_data with random values.
559 * \param *plan fft plan
560 * \param Level current level number
561 * \param nFields number of nFields (of parallel ffts)
562 * \param *local_data array of coefficients to be set at random values
563 */
564void InitDataTestR(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_real *local_data) {
565 int x,y,z,n,Index,i;
566 int Nx = plan->Levplan[Level].N[0]/plan->MaxPE;
567 int Ny = plan->Levplan[Level].N[1];
568 int Nz = plan->Levplan[Level].N[2];
569 srand((unsigned int)Level);
570 for (i=0;i<plan->myPE*Nx*Ny*Nz*nFields;i++) rand();
571 for (x=0; x < Nx; x++) {
572 for (y = 0; y < Ny; y++) {
573 for (z = 0; z < Nz; z++) {
574 for (n=0; n < nFields; n++) {
575 Index = n+nFields*(z+Nz*(y+Ny*(x)));
576 local_data[Index]=1.-2.*(rand()/(double)RAND_MAX);
577 }
578 }
579 }
580 }
581}
582
583/*
584static void OutputDataTestWR(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_real *local_data,FILE *target) {
585 int x,y,z,n,Index;
586 int Nx = plan->Levplan[Level].N[0]/plan->MaxPE;
587 int Ny = plan->Levplan[Level].N[1];
588 int Nz = plan->Levplan[Level].N[2];
589 fprintf(target,"Level=%i\n",Level);
590 for (x=0; x < Nx; x++) {
591 fprintf(target,"x=%i\n",x+Nx*plan->myPE);
592 for (y = 0; y < Ny; y++) {
593 fprintf(target,"y=%5i ",y);
594 for (z = 0; z < Nz; z++) {
595 for (n=0; n < nFields; n++) {
596 Index = n+nFields*(z+2*(Nz/2+1)*(y+Ny*(x)));
597 fprintf(target,"%10.5f ",local_data[Index]);
598 }
599 }
600 fprintf(target,"\n");
601 }
602 }
603}
604
605static void OutputDataTestR(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_real *local_data, double factor,FILE *target) {
606 int x,y,z,n,Index;
607 int Nx = plan->Levplan[Level].N[0]/plan->MaxPE;
608 int Ny = plan->Levplan[Level].N[1];
609 int Nz = plan->Levplan[Level].N[2];
610 fprintf(target,"Level=%i\n",Level);
611 for (x=0; x < Nx; x++) {
612 fprintf(target,"x=%i\n",x+Nx*plan->myPE);
613 for (y = 0; y < Ny; y++) {
614 fprintf(target,"y=%5i ",y);
615 for (z = 0; z < Nz; z++) {
616 for (n=0; n < nFields; n++) {
617 Index = n+nFields*(z+Nz*(y+Ny*(x)));
618 fprintf(target,"%10.5f ",local_data[Index]*factor);
619 }
620 }
621 fprintf(target,"\n");
622 }
623 }
624}
625
626static void InitDataTestWR(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_real *local_data) {
627 int i,x,y,z,n,Index,Value=0;
628 int Nx = plan->Levplan[Level].N[0]/plan->MaxPE;
629 int Ny = plan->Levplan[Level].N[1];
630 int Nz = plan->Levplan[Level].N[2];
631 srand(Level);
632 for (i=0;i<plan->myPE*Nx*Ny*Nz*nFields;i++) rand();
633 for (x=0; x < Nx; x++) {
634 for (y = 0; y < Ny; y++) {
635 for (z = 0; z < Nz; z++) {
636 for (n=0; n < nFields; n++) {
637 Index = n+nFields*(z+2*(Nz/2+1)*(y+Ny*(x)));
638 local_data[Index] = 1.-2.*(rand()/(double)RAND_MAX);
639 Value++;
640 }
641 }
642 }
643 }
644}
645
646static void OutputDataTestWCA(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_complex *local_data,double factor,FILE *target) {
647 int x,y,z,yz,n,Index,Y,Z,YZ;
648 int Nx = plan->Levplan[Level].N[0];
649 int Ny = plan->Levplan[Level].N[1]/plan->MaxPE;
650 int Nz = plan->Levplan[Level].N[2]/2+1;
651 int NZ = plan->NLSR[2]/2+1;
652 fprintf(target,"Level=%i\n",Level);
653 for (y = 0; y < Ny; y++) {
654 for (z = 0; z < Nz; z++) {
655 Y = (y+plan->myPE*Ny)/plan->Levplan[Level].LevelFactor;
656 Z = z/plan->Levplan[Level].LevelFactor;
657 YZ = Z+NZ*Y;
658 yz = z+Nz*y;
659 fprintf(target,"yz=%i(%i)\n",yz+plan->myPE*Ny*Nz,YZ);
660 for (x=0; x < Nx; x++) {
661 for (n=0; n < nFields; n++) {
662 Index = n+nFields*(z+Nz*(x+Nx*(y)));
663 fprintf(target,"%10.5f %10.5f ",local_data[Index].re*factor,local_data[Index].im*factor);
664 }
665 }
666 fprintf(target,"\n");
667 }
668
669 }
670}
671
672static void OutputDataTestCA(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_complex *local_data,double factor,FILE *target) {
673 int x,yz,n,Index,Y,Z,YZ,y,z;
674 int Nx = plan->Levplan[Level].nx;
675 int Ny = plan->Levplan[Level].local_ny ;
676 int Nz = plan->Levplan[Level].nz;
677 int Nyz = Ny*Nz;
678 int NZ = plan->NLSR[2]/2+1;
679 fprintf(target,"Level=%i\n",Level);
680 for (y = 0; y < Ny; y++) {
681 for (z = 0; z < Nz; z++) {
682 Y = (y+plan->myPE*Ny)/plan->Levplan[Level].LevelFactor;
683 Z = z/plan->Levplan[Level].LevelFactor;
684 YZ = Z+NZ*Y;
685 YZ = plan->pw[YZ].Index;
686 yz = z+Nz*y;
687 fprintf(target,"yz=%i(%i)\n",yz+plan->myPE*Nyz,YZ);
688 for (x=0; x < Nx; x++) {
689 for (n=0; n < nFields; n++) {
690 Index = n+nFields*(x+Nx*(yz));
691 fprintf(target,"%10.5f %10.5f ",((double *)local_data)[2*Index]*factor,((double *)local_data)[2*Index+1]*factor);
692 }
693 }
694 fprintf(target,"\n");
695 }
696 }
697}
698
699static void InitDataTestCA(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_complex *local_data) {
700 int x,yz,n,Index,Y,Z,YZ,y,z,Value=0;
701 int Nx = plan->Levplan[Level].nx;
702 int Ny = plan->Levplan[Level].local_ny ;
703 int Nz = plan->Levplan[Level].nz;
704 int Nyz = plan->Levplan[Level].ny*Nz;
705 int NZ = plan->NLSR[2]/2+1;
706 int YY;
707 for (y = 0; y < Ny; y++) {
708 for (z = 0; z < Nz; z++) {
709 Y = (y+plan->myPE*Ny)/plan->Levplan[Level].LevelFactor;
710 YY = (y+plan->myPE*Ny)%plan->Levplan[Level].LevelFactor;
711 Z = z/plan->Levplan[Level].Level;
712 YZ = Z+NZ*Y;
713 YZ = plan->pw[YZ].Index;
714 Z = (YZ%NZ)*plan->Levplan[Level].LevelFactor+(z)%plan->Levplan[Level].LevelFactor;
715 yz = Z+Nz*(((YZ/NZ)*plan->Levplan[Level].LevelFactor+YY));
716 for (x=0; x < Nx; x++) {
717 for (n=0; n < nFields; n++) {
718 Index = n+nFields*(x+Nx*(z+Nz*y));
719 Value = n+nFields*(yz+Nyz*x);
720 local_data[Index].re = Value*2;
721 local_data[Index].im = Value*2+1;
722 }
723 }
724 }
725 }
726}
727
728static void OutputDataTestCB(const struct fft_plan_3d *plan, const int Level, const int nFields,fftw_complex *local_data,double factor,FILE *target) {
729 int x,yz,n,Index,Y,Z,YZ,y,z;
730 int Nx = plan->Levplan[Level].local_nx;
731 int Ny = plan->Levplan[Level].ny ;
732 int Nz = plan->Levplan[Level].nz;
733 int NZ = plan->NLSR[2]/2+1;
734 fprintf(target,"Level=%i\n",Level);
735 for (x=0; x < Nx; x++) {
736 fprintf(target,"x=%i\n",x+plan->myPE*Nx);
737 for (y = 0; y < Ny; y++) {
738 for (z = 0; z < Nz; z++) {
739 Y = (y)/plan->Levplan[Level].LevelFactor;
740 Z = z/plan->Levplan[Level].LevelFactor;
741 YZ = Z+NZ*Y;
742 YZ = plan->pw[YZ].Index;
743 yz = z+Nz*y;
744 for (n=0; n < nFields; n++) {
745 Index = n+nFields*(yz+Ny*Nz*(x));
746 fprintf(target,"%10.5f %10.5f ",local_data[Index].re*factor,local_data[Index].im*factor);
747 }
748 }
749 fprintf(target,"\n");
750 }
751 }
752}
753
754void fft_3d_c2r_r2c_Test(struct Problem *P, const struct fft_plan_3d *plan, const int nFields) {
755 int i,j,Max=10;
756 double time1, time2, timeCR, timeRC, time1CR, time1RC, factor =1.0;
757 int local_nx, local_x_start, local_ny_after_transpose,local_y_start_after_transpose,total_local_size;
758 FILE *data0,*data1,*data0C,*data1C;
759 OpenFileNo2(P,&data0,".data0",plan->myPE,"w",P->Call.out[ReadOut]);
760 OpenFileNo2(P,&data1,".data1",plan->myPE,"w",P->Call.out[ReadOut]);
761 OpenFileNo2(P,&data0C,".data0C",plan->myPE,"w",P->Call.out[ReadOut]);
762 OpenFileNo2(P,&data1C,".data1C",plan->myPE,"w",P->Call.out[ReadOut]);
763 fprintf(stderr, "(%i)C2RTest: \n", P->Par.me);
764 for (i=plan->MaxLevel-1; i >= 0; i--) {
765 timeCR = 0;
766 timeRC = 0;
767 time1RC = 0;
768 time1CR = 0;
769 fprintf(stderr,"PE(%i) L(%i) Nx(%i) Nyz(%i) nF(%i)\n",plan->myPE,i,plan->Levplan[i].N[0],plan->Levplan[i].local_nyz,nFields);
770 factor = 1./(plan->Levplan[i].N[0]*plan->Levplan[i].N[1]*plan->Levplan[i].N[2]);
771 for (j=0; j < Max; j++) {
772 InitDataTestR(plan,i,nFields,(fftw_real *)plan->local_data);
773 time1 = GetTime();
774 fft_3d_real_to_complex(plan,i,nFields,plan->local_data, plan->work);
775 time2 = GetTime();
776 timeRC += (time2 - time1);
777 time1 = GetTime();
778 fft_3d_complex_to_real(plan,i,nFields,plan->local_data, plan->work);
779 time2 = GetTime();
780 timeCR += (time2 - time1);
781
782 }
783 fprintf(stderr,"PE(%i): L(%i) CR:sec(%g) RC:sec(%g)\n",plan->myPE, i, timeCR, timeRC);
784 }
785 fclose(data0);
786 fclose(data1);
787 fclose(data0C);
788 fclose(data1C);
789}
790*/
Note: See TracBrowser for help on using the repository browser.