1 | /** \file pcp.c
|
---|
2 | * Main routine, Run super-functions and signal handling.
|
---|
3 | *
|
---|
4 | * This file contains just framework:
|
---|
5 | * The main() routine, passing its arguments on to Run(), which does signal
|
---|
6 | * initialization InitSignals() and setting verbosity levels and calls RunSim()
|
---|
7 | * which is then just a wrapper for CalculateMD().
|
---|
8 | * For the signals there is the actual handler SigHandlerCPULIM() for the external
|
---|
9 | * exit signal, routines to check for the signal CheckCPULIM() and give correct
|
---|
10 | * return code to old handler CheckSigReturn().
|
---|
11 | *
|
---|
12 | Project: ParallelCarParrinello
|
---|
13 | \author Jan Hamaekers
|
---|
14 | \date 2000
|
---|
15 |
|
---|
16 | File: pcp.c
|
---|
17 | $Id: pcp.c,v 1.36 2007/02/05 13:59:24 foo Exp $
|
---|
18 | */
|
---|
19 |
|
---|
20 | /*! \mainpage Introductory to Parallel Car Parrinello
|
---|
21 | *
|
---|
22 | * This is a quick overview of the programme pcp, explaining as shallowly as possible
|
---|
23 | * its possble uses. For a more profound introduction be referred to its (german) diploma
|
---|
24 | * thesis paper here: http://wissrech.ins.uni-bonn.de/teaching/diplom/diplom_hamaeker.pdf
|
---|
25 | * (Note: All numbering behind formulas always refer to this thesis paper)
|
---|
26 | *
|
---|
27 | * ParallelCarParrinello - or short pcp - is the parallel implementation of
|
---|
28 | * Car-Parrinello-Molecular-Dynamics, using a density field theory approach. In brief,
|
---|
29 | * a given configuration is minimised in terms of the energy functional. The system
|
---|
30 | * is encapsulated in a cell and thus periodic boundary conditions are applied.
|
---|
31 | * The potential is also evaluated and this yields a force for every moving ion in
|
---|
32 | * the cell. Thereby the molecular dynamics is performed.
|
---|
33 | *
|
---|
34 | * The basic idea behind the parallelisation is most importantly the simultaneous
|
---|
35 | * fast fourier transformation, where the three-dimensional grid of coefficients in
|
---|
36 | * a plane wave approach are subdivided into planes for the hither transformation and
|
---|
37 | * pens for the thither one. These one or two dimensional ffts can be done by multiple
|
---|
38 | * processes and later reassembled.
|
---|
39 | * Second is the parallelisation of the minimisation. Each process holds a certain
|
---|
40 | * "local" part of all electronic wave functions and the others are assumed constant
|
---|
41 | * while minimising over these local ones by a conjugate gradient mechanism.
|
---|
42 | * Lastly, the Gram-Schmidt-Orthonormalisation is also done parallely.
|
---|
43 | *
|
---|
44 | * This Introductory is divided in the following sections:
|
---|
45 | * - \subpage intro Introduction
|
---|
46 | * - \subpage tutorial Hands-on Tutorial
|
---|
47 | *
|
---|
48 | * © 2006 Frederik Heber
|
---|
49 | */
|
---|
50 |
|
---|
51 | /*! \page intro Introduction
|
---|
52 | *
|
---|
53 | *
|
---|
54 | * \section install Installation
|
---|
55 | *
|
---|
56 | *
|
---|
57 | * \subsection reqs Requirements
|
---|
58 | *
|
---|
59 | * This package relies on certain other packages.
|
---|
60 | * - FFTW version 2\n
|
---|
61 | * This is a fast implementation of Fast Fourier Transformations.
|
---|
62 | * Currently, version 3 is not supported. Get it from their webpage, compile
|
---|
63 | * and install the libraries and header files in a path where the linker will
|
---|
64 | * find them.\n
|
---|
65 | * http://www.fftw.org/
|
---|
66 | * - MPICH\n
|
---|
67 | * This is an implementation of the Message Parsing Interface, which enables
|
---|
68 | * simple build of parallel programs by means of exchange data via messages.
|
---|
69 | * Here as well we need the libraries and header files, compile and install
|
---|
70 | * them in an accessible directory, and also the launcher mpirun.\n
|
---|
71 | * http://www-unix.mcs.anl.gov/mpi/mpich/
|
---|
72 | * - OpenDx\n
|
---|
73 | * For optional visualization of the calculations, displaying electronic densities,
|
---|
74 | * ion positions and forces within the cell.\n
|
---|
75 | * http://www.opendx.org/
|
---|
76 | *
|
---|
77 | * \subsection process Installation Process
|
---|
78 | *
|
---|
79 | * Note beforehand that this package manages compilation on various architectures with
|
---|
80 | * help of some GNU tools such as automake (Makefile.am) respectively autconf (configure.ac).
|
---|
81 | *
|
---|
82 | * After unzipping the archive into an appropiate source directory, the programme has
|
---|
83 | * to be compiled by executing consecutively:
|
---|
84 | * - ./configure
|
---|
85 | * - make
|
---|
86 | * - make install
|
---|
87 | *
|
---|
88 | * It is recommended to create two additional directories:\n
|
---|
89 | * One "build" directory which may safely reside in the main source folder,
|
---|
90 | * wherein the actual build process is launched and thus temporarily created object
|
---|
91 | * files removed from the main folder's contents. This happens automatically if
|
---|
92 | * configure is not called from the main folder but within this build folder by:
|
---|
93 | * - "../configure"
|
---|
94 | *
|
---|
95 | * Secondly, a "run" directory. This might either be an already present "/usr/local/bin"
|
---|
96 | * which is taken as default, but also a "/home/foo/pcp/run". If desired this path
|
---|
97 | * should be appended to the configure command as follows:
|
---|
98 | * - "../configure --bindir /home/foo/pcp/run"
|
---|
99 | *
|
---|
100 | * and the executable will be automatically installed therein on calling make install.
|
---|
101 | *
|
---|
102 | * Alternatively, you may more generally pick "--prefix=/home/foo/pcp", which would prefix
|
---|
103 | * all installation directories (such as doc, man, info, lib, ...) with the given directory
|
---|
104 | * home/foo/pcp. Thus, the executable would now be installed upon evocation of 'make install'
|
---|
105 | * in /home/foo/pcp/bin (because ./bin is default for executables). This and more options
|
---|
106 | * are explained when invoking './configure --help'. The subdirectory where the executable is
|
---|
107 | * installed in will from now on be called "bindir".
|
---|
108 | *
|
---|
109 | * Next, we must unpack the archive containing the pseudopotential files pseudopots.tar.gz by
|
---|
110 | * entering 'tar -zxvf pseudopots.tar.gz' in the main dir of the package. Note that the
|
---|
111 | * pseudopotential files now reside in 'defaults/pseudopot/' respectively to the main dir.
|
---|
112 | *
|
---|
113 | * At last, you may re-generate this very documentation of the pcp package with the help of the
|
---|
114 | * 'doxygen' package (http://www.doxygen.org/) by execution of:
|
---|
115 | * - make doxygen-doc (edit the paths in doxygen.cfg beforehand though!)
|
---|
116 | *
|
---|
117 | * Even more, if you are interested in the documentation of the simple molecuilder sub programme,
|
---|
118 | * enter /home/foo/pcp/build/util/molecuilder and execute the same command therein which generates
|
---|
119 | * the documentation in /home/foo/pcp/util/molecuilder/doc/html (and may be viewed by giving
|
---|
120 | * /home/foo/pcp/util/molecuilder/doc/html/index.html to a standard web browser).
|
---|
121 | *
|
---|
122 | * \section use Usage
|
---|
123 | *
|
---|
124 | * Having unpacked, compiled and installed the programme, we are actually set to go
|
---|
125 | * besides the specification of the actual physical problem. If you desire a ste-by-step
|
---|
126 | * introduction to the handling of the simple hydrogen molecule, be referred to the section
|
---|
127 | * \ref tutorial.
|
---|
128 | *
|
---|
129 | * \subsection specify Specifying the Problem
|
---|
130 | *
|
---|
131 | * There are two things necessary:
|
---|
132 | * - a configuration file with various informations from stop conditons to Z number
|
---|
133 | * or the basic cell vectors, giving its volume, number of ions, position and type
|
---|
134 | * and so on ...
|
---|
135 | * The given configuration file in the subdirectory "defaults" is fully documented
|
---|
136 | * and its sense should be easy to grasp thereby - or see below in the section
|
---|
137 | * \ref config.
|
---|
138 | * - pseudopotential files which reside in a certain directory specified in the
|
---|
139 | * config file. Ine is necessary for each different atom (discerned by atomic
|
---|
140 | * number Z) specifying the pseudo potential on a grid, generated by the fhi98PP
|
---|
141 | * package: http://www.fhi-berlin.mpg.de/th/fhi98md/fhi98PP/index.html\n
|
---|
142 | * These are also already present in the directory called "defaults/pseudopot".
|
---|
143 | *
|
---|
144 | * \subsection exec Programme execution with mpirun ...
|
---|
145 | *
|
---|
146 | * Basicially the programme is started as follows:
|
---|
147 | * - mpirun -np 1 -vvv -machinefile /home/foo/.rhosts pcp ../defaults/main_pcp\n
|
---|
148 | * where "-np 1" states the number of processors to use, so here just one, "-vvv"
|
---|
149 | * relates to the verbosity of the intermediate output (more v's, more output),
|
---|
150 | * "-machinefile" points to the rsh text file with allowed hosts (present on all
|
---|
151 | * hosts) and last but by no means least the config file with relative path
|
---|
152 | * "../defaults/main_pcp".\n
|
---|
153 | * Take care that your machinefile contains enough hosts and also that "-np"
|
---|
154 | * matches the product of ProcPEPsi and ProcPEGamma specified in the config file.
|
---|
155 | * You won't see the error messages, when one of clients hangs (mpirun just idles
|
---|
156 | * on all others as they cannot reach the host whose pcp has stopped).
|
---|
157 | *
|
---|
158 | * The important input and output is all done via files. Configuration and PseudoPot
|
---|
159 | * files are read on startup when the Problem structure, with its various parts, is
|
---|
160 | * initialized. Each given amount of made steps forces, energies, ion positions and
|
---|
161 | * the electronic density if desired are written to file, which can be read and
|
---|
162 | * visualized lateron by the Open Data Explorer.
|
---|
163 | *
|
---|
164 | * Let's now come to the parallel execution of the programme on multiple hosts.
|
---|
165 | * Generally, it is recommended to choose between n+1 and 2n processes for n hosts.
|
---|
166 | * There are two ways of connection to the other clients, either via rsh or ssh.
|
---|
167 | * Seemingly, mpich automatically picks the right one, first rsh, then ssh.
|
---|
168 | *
|
---|
169 | * Note:
|
---|
170 | * -# Make sure that the executable with its needed input files are present on all
|
---|
171 | * other machines (Most common way is a shared directory, such as the user's home
|
---|
172 | * residing on a server shared via NFS to all clients)
|
---|
173 | * -# Make sure that the paths are correct and the same on all hosts (also the config paths)
|
---|
174 | * -# Use a homogeneous set of clients (no mixing of architectures or slow with fast
|
---|
175 | * systems!)
|
---|
176 | *
|
---|
177 | * \subsubsection rsh ... with RSH
|
---|
178 | *
|
---|
179 | * Most important in this case is the file .rhosts in your home, where line by line
|
---|
180 | * the hostnames of available computers are included. Via RSH-Login the programme is
|
---|
181 | * launched on all machines by mpirun.\n
|
---|
182 | *
|
---|
183 | * \subsubsection ssh ... with SSH
|
---|
184 | *
|
---|
185 | * Via a passphrase and an ssh agent a much safer - due to encryption - way is used
|
---|
186 | * to connect to other clients, employed as follows
|
---|
187 | * - ssh-keygen -t rsa (enter keyphrase, public and private key is stored in ~/.ssh/)
|
---|
188 | * - cat ~/.ssh/\*.pub >>~/.ssh/authorized_keys (if home is shared via nfs) OR \n
|
---|
189 | * cat ~/.ssh/\*.pub | ssh user\@remote-system 'cat>>.ssh/authorized_keys' (when the
|
---|
190 | * user home is not shared via nfs for each remote-system)
|
---|
191 | *
|
---|
192 | * Afterwards, start an SSH agent and 'ssh-add' the above pass-phrase. It
|
---|
193 | * might be a good idea, to give a limited timespan 'ssh-add -t 86400' (seconds).
|
---|
194 | *
|
---|
195 | * Test it by trying something like 'ssh host /bin/ls /bin'. It should work (just print
|
---|
196 | * the /bin directory) without any queries or halts. Otherwise remove host and its IP
|
---|
197 | * from .ssh/known_hosts and try again. Check this for all hosts in your .rhosts,
|
---|
198 | * or try 'tstmachines -machinefile=/home/foo/.rhosts' which is supplied with mpich.
|
---|
199 | *
|
---|
200 | * The ssh method is especially preferrable if the hosts do not reside within a protected
|
---|
201 | * network, such as shielded by a firewall from the outside internet with internal IP
|
---|
202 | * adresses.
|
---|
203 | *
|
---|
204 | * \subsection results The Results
|
---|
205 | *
|
---|
206 | * Process 0 does all the output and is always started on your localhost.
|
---|
207 | * There a number of different output files if the programme is told to produce
|
---|
208 | * them in the configuration file. They all beginn with "pcp." and most end with
|
---|
209 | * the output step count in 4 digits such as ".0001". In between lies the actual
|
---|
210 | * content information:
|
---|
211 | * - "speed": Time measurements for the various sections of the calculations.
|
---|
212 | * - "density": There are three types: "doc" and "dx" specifiy the file format for
|
---|
213 | * OpenDx, while "data" contains the actual densities. These are only produced
|
---|
214 | * if DoOutVis is set to 1
|
---|
215 | * - "ions": "pos" means position, "force" contains forces from the dynamics
|
---|
216 | * simulation represented by arrows in OpenDx, "datZ", "doc" and "dx" are again
|
---|
217 | * only needed to specifiy the file format.
|
---|
218 | * - "energy.all": Total energy
|
---|
219 | * - "forces.all": Total force
|
---|
220 | *
|
---|
221 | * There are visualization scripts included in the subdirectory "defaults" such
|
---|
222 | * as "visual.cfg" for use with OpenDx.
|
---|
223 | *
|
---|
224 | *
|
---|
225 | * \section start Getting started
|
---|
226 | *
|
---|
227 | * In Order to get a first understanding of the programme, here is a brief explanation of its most
|
---|
228 | * important functions:
|
---|
229 | * -# Init(): Super-function to most of the initialisation subroutines, that allocate memory or
|
---|
230 | * setup the wave functions randomly or calculate first energies and densities.
|
---|
231 | * -# CalculateMD(): Contains the super-loop that calculate the force acting on the ions, moves them,
|
---|
232 | * updates the electronic wave functions and prints results to file.
|
---|
233 | * -# CalculateForce(): On evaluating the force acting on the ions also the actual minimisation of
|
---|
234 | * the wave functions - one after the other - is done, energies and densities calculated on ever
|
---|
235 | * finer grids.
|
---|
236 | * -# CalculateNewWave(): Is the actual (Fletcher&Reeves) conjugate gradient implementation that
|
---|
237 | * minimises one wave function by calculating the gradient, preconditioning the resulting matrix,
|
---|
238 | * retrieving the conjugate direction and doing the line search for the one-dimensional minimum.
|
---|
239 | *
|
---|
240 | * Most of these functions call many others in sequence, so be prompted to their respective
|
---|
241 | * documentation for further explanation and formula.
|
---|
242 | *
|
---|
243 | * \subsection follow Following the course of the programme
|
---|
244 | *
|
---|
245 | * When the programme is launched with the option "-vvv" for greater verbosity a series of output
|
---|
246 | * is printed to the screen, which briefly shall be explained here.
|
---|
247 | *
|
---|
248 | * At first there is the initialization output stating number of nodes on the ever more finely
|
---|
249 | * grids in normal and reciprocal space, density checks, parameters read from the configuration
|
---|
250 | * file and so on, until the minimisation finally commences.
|
---|
251 | *
|
---|
252 | * The most prominent line then looks as follows:
|
---|
253 | * (0,0,0)S(0,0,0): TE: -1.750005e+01 ATE: -1.825983e+01 Diff: 1.000000e+00 --- d: 6.634701e-01 dEdt0: -1.943969e+00 ddEddt0: 9.673485e-01
|
---|
254 | *
|
---|
255 | * The first three numbers refer to process numbers within MPI communicators: SpinUp/Double(0)SpinDown(1),
|
---|
256 | * number within coefficients sharing, within wave functions sharing group.
|
---|
257 | * The next three numbers after the "S" refer to: Number of performed minimisation steps, number of
|
---|
258 | * currently minimised local wave function, number of minimisation steps performed on this wave function.
|
---|
259 | * Afterwards follow: Total energy (TE), approximated total energy (ATE), relative error in the prediction (diff),
|
---|
260 | * delta (the one-dimensional line search parameter \f$cos \Theta\f$), the first (dEdt0) and second (ddEddt0)
|
---|
261 | * derivative of the total energy functional (at parameter = 0).
|
---|
262 | *
|
---|
263 | * Ever and again there appear checks such as:
|
---|
264 | * -# "ARelTE: 1.387160e-02 ARelKE: 3.074929e-03", stating the actual relative difference in total and
|
---|
265 | * kinetic energy between this and the last check
|
---|
266 | * -# "(0)TestGramSchm: Ok !", having tested the desired orthogonality of the wave functions
|
---|
267 | * -# "Beginning minimisation...", starting a perturbed minimisation run.
|
---|
268 | *
|
---|
269 | * At the end the programme will print a number of different energies to the screen: total, Hartree,
|
---|
270 | * ionic, kinetic, ... energies.
|
---|
271 | *
|
---|
272 | *
|
---|
273 | * \section tech Technical Details
|
---|
274 | *
|
---|
275 | * \subsection config The Configuration file explained
|
---|
276 | *
|
---|
277 | * The first column always contains a keyword (for which the programme parses, so
|
---|
278 | * the ordering as given here needn't be replicated) following one or more values
|
---|
279 | * or strings.
|
---|
280 | *
|
---|
281 | * - mainname: The short name of the Programme
|
---|
282 | * - defaultpath: Full path indicating where to put the output files during runtime
|
---|
283 | * - pseudopotpath: Full path to the needed PseudoPot files
|
---|
284 | * - ProcPEPsi: Number of process (groups consisting of ProcPEPsi processes) among which
|
---|
285 | * wave functions are split up (e.g. MaxPsiDouble 16, ProcPEPsi 4, ProcPEGamma 2 results
|
---|
286 | * in 4 wave functions per group and each group has 2 processes sharing coefficients)
|
---|
287 | * - ProcPEGamma: Number of processes which make up one wave functions coefficients sharing group
|
---|
288 | * - DoOutVis: Output files for OpenDX (1 - yes, 0 - no)
|
---|
289 | * - DoOutMes: Output Measurement (1 - yes, 0 - no)
|
---|
290 | * - AddGramSch: Do an additional GramSchmidt-run after a Psi has been minimised (hack)
|
---|
291 | * - Seed: Initialization value for pseudo random number generator on generating wave function coefficients, 0 means
|
---|
292 | * constant 1/V for perturbed wave functions.
|
---|
293 | * - UseSeparation: Minimise unoccupied states in a separate run per level indepedent of occupied states
|
---|
294 | * - UseFermiOccupation: After each minimisation run, occupation of orbitals will be reset according to Fermi distribution
|
---|
295 | * - MaxOuterStep: Number of to be performed Molecular Dynamics steps
|
---|
296 | * - Deltat: time slice per MD step
|
---|
297 | * - OutVisStep: Output visual data at every ..th step
|
---|
298 | * - OutSrcStep: Output measurement data at every ..th step
|
---|
299 | * - TargetTemp: Target temperature in the cell (this medium ion velocity)
|
---|
300 | * - ScaleTempStep: MD temperature is scaled at every ..th step
|
---|
301 | * - MaxPsiStep: number of minimisation steps per state(!) before going on to next local state (0 - default (3))
|
---|
302 | * - MaxMinStep: standard level stop criteria - maximum number of minimisation steps over all states
|
---|
303 | * - RelEpsTotalE: standard level stop criteria - minimum relative change in total energy
|
---|
304 | * - RelEpsKineticE: standard level stop criteria - minimum relative change in kinetic energy
|
---|
305 | * - MaxMinStopStep: standard level stop condition is evaluated at every ..th step, should scale with orbital number
|
---|
306 | * - MaxMinGapStopStep: standard level stop condition for gap calculation is evaluated at every ..th step
|
---|
307 | * - MaxInitMinStep: initial level stop criteria - maximum number of minimisation steps over all states
|
---|
308 | * - InitRelEpsTotalE: initial level stop criteria - minimum relative change in total energy
|
---|
309 | * - InitRelEpsKineticE: initial level stop criteria - minimum relative change in kinetic energy
|
---|
310 | * - InitMaxMinStopStep: initial level stop condition is evaluated at every ..th step
|
---|
311 | * - InitMaxMinGapStopStep: initial level stop condition is evaluated at every ..th step
|
---|
312 | * - BoxLength: Length of the super cell in atomic units (lower triangle matrix: 1 value, 2 values, 3 values)
|
---|
313 | * - ECut: Energy cutoff specifying how man plane waves are used (in Hartrees)
|
---|
314 | * - MaxLevel: number of different mesh levels in the code, >=2
|
---|
315 | * - Level0Factor: Mesh points Factor for the topmost level
|
---|
316 | * - RiemannTensor: Use RiemannTensor
|
---|
317 | * - PsiType: Whether orbits are always doubly occupied or spin up and down is discerned (and completely separated in calculation, needs at least 2 processes!)
|
---|
318 | * - MaxPsiDouble: specifying how many doubly occupied orbits there are (either this or next depends on given PsiType)
|
---|
319 | * - PsiMaxNoUp/PsiMaxNoDown: specifying how many spin up or down occupied orbits there are
|
---|
320 | * - RCut: Radial cutoff in the Ewald summation
|
---|
321 | * - IsAngstroem: length in 0 - Bohr radii or in 1- Angstrᅵm
|
---|
322 | * - MaxTypes: maximum number of different ion types (important for next constructs)
|
---|
323 | * - Ion_TypeNr: 6 values - Number of ions, atomic value, Gauss radius, maximum angular momentum of Pseudopotenials, L_loc and mass of ion (Nr in keyword must go from 1..MaxTypes!)
|
---|
324 | * - Ion_TypeNr_Nr: 4 values - position (x,y,z) in atomic units, IonMoveType (0 - movable, 1 - fixed).For each stated IonType there must such a keywor for each ion (second number goes from 1..Number of ions!)
|
---|
325 | *
|
---|
326 | * \section notes Notes on implementation and code understanding
|
---|
327 | *
|
---|
328 | * \subsection densities Densities and Wave functions
|
---|
329 | *
|
---|
330 | * The coefficients of densities and wave functions both in real and reciprocal space are only in parts stored locally
|
---|
331 | * on the process. Coefficients of one wave functions may be shared under a number of processes, increasing speed when
|
---|
332 | * doing loops over G - reciprocal vector, and the wave functions as a whole may also be shared, increasing Fourier transform
|
---|
333 | * and also minimization - however with the drawback of parallel conjugate gradient minimization which might lead to errors.
|
---|
334 | *
|
---|
335 | * Additionally, the coefficients are structured in levels. The grid is present in ever finer spacings, which is very helpful
|
---|
336 | * in the initial minimization after the wave functions have been randomly initialized. Also, the density is always calculated
|
---|
337 | * on a grid twice as fine as the one containing wave function coefficients, as the fourier transform is then exact. That's
|
---|
338 | * why there are always two levels: LatticeLevel LevS and Lev0. The first being the standard level with the wave functions,
|
---|
339 | * the latter for densities (one often stumbles over "Dens0" as a variable name). Also there two kinds of densities:
|
---|
340 | * DensityArray and DensityCArray, the first contains densities in real space, the latter in reciprocal space. However, both
|
---|
341 | * are densities, hence the scalar product of their respective wave functions.
|
---|
342 | * For this reason one finds often construct which use a position factor in order to transform wave coefficients into ones
|
---|
343 | * on a higher level, followed by a fourier transform and a transposition, see e.g. CalculateOneDensityR(). It is often
|
---|
344 | * encountered throughout the code.
|
---|
345 | *
|
---|
346 | *
|
---|
347 | * © 2006 Frederik Heber
|
---|
348 | */
|
---|
349 |
|
---|
350 |
|
---|
351 | /*! \page tutorial Hands-on Tutorial
|
---|
352 | *
|
---|
353 | *
|
---|
354 | * In this section we present a step-by-step tutorial, which leads you from the creation of a typical config file for a hydrogen
|
---|
355 | * molecule through the actual calcualtion to the analysis with final plots and graphics.
|
---|
356 | *
|
---|
357 | * \section configfile Creating the config file
|
---|
358 | *
|
---|
359 | * First, we need to create the config file. For this we use the utility 'molecuilder' which resides in the util subdirectory
|
---|
360 | * and is compiled upon execution of 'make' as described in Section \ref process and finally installed in the bindir. Go there,
|
---|
361 | * pick and create a directory, such as '/tmp/H2/' and start the utility:
|
---|
362 | * - ./molecuilder /tmp/H2/H2-config
|
---|
363 | *
|
---|
364 | * You might want to change the config file name, here chosen as 'H2-config', according to your personal liking, it is completely
|
---|
365 | * arbitrary. It is however recommended to pick an individual directory for each molecule upon which you choose to perform DFT
|
---|
366 | * calculations as there are a number of files which are produced during the run.
|
---|
367 | *
|
---|
368 | * Molecuilder will first ask for a cell size, which is entered by typing in the six elements of a symmetric matrix, for an cubic
|
---|
369 | * cell of 20 a.u. length, enter 20, 0, 20, 0, 0, 20.
|
---|
370 | *
|
---|
371 | * Next, you see two lists, both are empty, and a menu. So far no elements or atoms have been put into the cell. We want to add an
|
---|
372 | * atom, press 'a' and enter. Now we have to choose in which way we want to add the atom. The choice is either in absolute (x,y,z)
|
---|
373 | * coordinates, or relative to either another absolute point or to an already placed atom. The last option is used in the case when
|
---|
374 | * there are two atoms and you want to add a third according to its angle and distance to the former two.
|
---|
375 | *
|
---|
376 | * For now, we add the first hyodrogen atom. We want to put the molecule which consists of two hydrogen atoms at a distance of 1.4
|
---|
377 | * atomic units symmetrically off the cell's centre. Thus we choose option b, enter 10, 10, 10 as the reference point and then give the
|
---|
378 | * relative coordinates as 0.7, 0, 0. Thus, the molecule will be aligned along the x axis. The Z number is 1, in the case of hydrogen.
|
---|
379 | * Upon the last enter you notice that both the element and the atom list have filled a bit. Let's add the second atom. Choose 'a'
|
---|
380 | * again and option c this time, picking the first already entered atom as the reference point, by entering 0 ('number in molecule')
|
---|
381 | * next. Enter: -1.4, 0 , 0, and again Z = 1.
|
---|
382 | * Afterwards, we see that in the element list, the second entry changed from 1 to 2. It's the number of atoms of this element which
|
---|
383 | * the molecules consists of so far. Next is Z number, R-cut, lmax and lloc, the element's mass and a name and short hand which is
|
---|
384 | * taken from the database in the file 'elements.db' which must reside in the same directory as molecuilder.
|
---|
385 | *
|
---|
386 | * Finally, we need to specify some additional info in the config file that's non-standard. Enter 'e' in the menu to edit the
|
---|
387 | * current configuration. The list is huge, you might have to scroll a bit and don't panic, most of these are trivial and will
|
---|
388 | * become comprehensible with time. First is the paths, option 'B' and 'C'. Upon entering the option, it will give the the
|
---|
389 | * "old" value and request a new one. Enter:
|
---|
390 | * - B: /tmp/H2/
|
---|
391 | * - C: /home/foo/pcp/defaults/pseudopot
|
---|
392 | *
|
---|
393 | * The latter one is the path to the pseudopotential files which reside in the archive 'pseudopots.tar.gz' in /home/foo/pcp or
|
---|
394 | * whereever you have extracted the distributed pcp package to. Unzip it by 'tar -zxvf pseudopots.tar.gz' and it will create an
|
---|
395 | * additional directory called defaults wherein another directory called pseudopot resides containing the files for element with
|
---|
396 | * atomic number 1 up to 80 -- these are the files generated with the FHIMD PseudoPotential Generator.
|
---|
397 | *
|
---|
398 | * As the molecule and all necessary config file settings are entered, enter 's' to save the config file under the filename stated
|
---|
399 | * on the command-line and 'q' to quit.
|
---|
400 | *
|
---|
401 | *
|
---|
402 | * Now, we take a look at the created config file itself. Take your favorite editor and compare it to the explanations given in the
|
---|
403 | * secion \ref config. According to your machine setup, the following entries must be changed:
|
---|
404 | * - ProcPEPGamma/ProcPEPsi: Their product gives number of processes which must be started. As a beginning you might leave 1 here for
|
---|
405 | * both entries.
|
---|
406 | * - VectorPlane: Enter 1 (it's a cut plane for a two-dimensional representation of the current density)
|
---|
407 | * - VectorCut: Enter 10. (cut is done at this coordinate along the by VectorPlane given axis)
|
---|
408 | * - BoxLength: Check the six entries of the symmetric matrix for the correct cell size
|
---|
409 | * - ECut: In general, both the precision and time needed for the calculation. Reasonable values for H2 are 8. to 24.
|
---|
410 | * - MaxPsiDouble: Enter 1 here, it's the number of doubly occupied orbitals which is one in case of the hydrogen molecule
|
---|
411 | * - AddPsis: Leave 0, this is the number of unoccupied orbitals which we do not need.
|
---|
412 | * - RCut: Enter 20., the cell size. It's another numerical cutoff here for the ewald summation of the infinite coulomb potential.
|
---|
413 | *
|
---|
414 | * This sums it up. You might also have noticed three entries, one for the element and another two for each hydrogen atom, very similar
|
---|
415 | * in appearance to the list given by molecuilder. Compare to the explanation given in the config file section \ref config above.
|
---|
416 | * Of course, the above shown changes could also have been made during the edit menu of molecuilder, we just wanted to demonstrate that
|
---|
417 | * it's perfectly safe to use a text editor as well (any more than one white space such as some more spaces and/or tabs are
|
---|
418 | * automatically read over during the parsing of the config file).
|
---|
419 | *
|
---|
420 | * We may now start a calculation.
|
---|
421 | *
|
---|
422 | * \section launching Performing the calculation
|
---|
423 | *
|
---|
424 | * Enter the directory which you specified as the 'bindir' where the executable shall be installed in, let's say for this example
|
---|
425 | * it's '/tmp/pcp/run'. Now, if you have the mpi package installed and are able to launch mpirun, enter this:
|
---|
426 | *
|
---|
427 | * - mpirun -np 1 ./pcp -vvv -w /tmp/H2/H2-config
|
---|
428 | *
|
---|
429 | * Here, '1' is the number of processes needed, the product of ProcPEPGamma and ProcPEPsi. If not, enter this:
|
---|
430 | *
|
---|
431 | * - ./pcp -vvv -w /tmp/H2/H2-config
|
---|
432 | *
|
---|
433 | * More comfortably we may also take the unix shell script 'run', which uses mpirun and evaluates the product by itself. However,
|
---|
434 | * as it is platform-dependent and thus it is provided just as an idea-giver. It is installed in the bindir from subdirectory 'run'
|
---|
435 | * during 'make install'. However, its executable permissions must still be set: e.g. 'chmod 755 run' (The same applies for all
|
---|
436 | * other 'shell scripts' used in this tutorial). Also, the common paths are set in the file 'suffix', also in the bindir and must
|
---|
437 | * be adapted to your local context. Basically, there is always the rather uncomfortable way of doing every by hand, which is
|
---|
438 | * explained as well, or use these scripts if you can make them work.
|
---|
439 | *
|
---|
440 | * - Enter: ./run to see it's options
|
---|
441 | * - Enter: ./run -vvv /tmp/H2/H2-config H2 to run (here a log file pcp.H2.log is written, with the tag 'H2' which may be chosen arbitrarily).
|
---|
442 | *
|
---|
443 | * Possible errors here are as follows:
|
---|
444 | * - paths in 'suffix' are not changed or were overwritten with defaults on make 'install'
|
---|
445 | * - permission denied: 'chmod 755 run' missing (necessary after 'make install')
|
---|
446 | * - pseudopots not unpacked or wrong path given in config file
|
---|
447 | * - charged system: MaxPsiDouble (..SpinUp/Down) does not match required number of orbitals of the system
|
---|
448 | *
|
---|
449 | * Given 8 (hartree) as ECut and a moderately well performing computer numbers should rush by for a few seconds. Afterwards, the calculations
|
---|
450 | * has already finished (this is of course only the case for a simple molecule such as the hydrogen one). The last hundred lines or so
|
---|
451 | * consist of calculated values, such as the susceptibility tensor and among others the isometric values, shielding for each hydrogen atom,
|
---|
452 | * the total energy (though beware, due to the pseudopotential ansatz for a general molecule it is not equivalent to the real absolute binding
|
---|
453 | * energy) and the perturbed energy which are of minor interest.
|
---|
454 | *
|
---|
455 | * A number of file where generated in /tmp/H2/, some general ones are listed in sections above, we want to discuss a chosen few here:
|
---|
456 | *
|
---|
457 | * - LOG: pcp.H2.log contains all the lines which you just might have missed before (only if pcp was started with 'run'-script)
|
---|
458 | * - Cut plane: pcp.current....csv contains (for each process of ProcPEPGamma) it's part of the specified cut plane of the current density.
|
---|
459 | * - Susceptibility: pcp.chi.csv contains first an explanatory line and then one with ten entries, first is the ecut, then follow the nine entries of
|
---|
460 | * the rank 2 tensor.
|
---|
461 | * - shielding: pcp.sigma_i0_H.csv and ...i1... respectively contain the same as pcp.chi.csv only for the shielding tensor.
|
---|
462 | *
|
---|
463 | * And this is it. Of course, \f$H_2\f$ is a very simple molecule. Regard a case where you might want to calculate a (4,3) nanotube with
|
---|
464 | * hundreds of symmetrically aligned carbon atoms. Each of these will produce a pcp.sigma_i..csv, though all should be the same in the
|
---|
465 | * end. Also, we would like to have convergence plots and vector graphs of the current density and its values.
|
---|
466 | *
|
---|
467 | * In order to produce convergence plots, we need to perform the calculations for various cutoff energies (ECut). You may either move all
|
---|
468 | * the files in /tmp/H2 to e.g. /tmp/H2/8Ht/, edit the config file, pick 12Ht now as ECut and re-run the calculations, move the results
|
---|
469 | * and so on for four to six different values. An easier way is the script file 'test_for_ecut.sh'. Delete all pcp.*-files in /tmp/H2/
|
---|
470 | * (Thus it is always good practice not to pick pcp as prefix for the config file!) and enter in the bindir subdirectory:
|
---|
471 | *
|
---|
472 | * - chmod 755 test_for_ecut (as it is platform-dependent as well, otherwise you get an error: permission denied)
|
---|
473 | * - ./test_for_ecut.sh to see it complaining the lack of a config file
|
---|
474 | * - ./test_for_ecut.sh /tmp/H2/H2-config to see its desired options and the current ecut list (it's a string in the script file adapted to
|
---|
475 | * 4 processes sharing the coefficients)
|
---|
476 | * - ./test_for_ecut.sh /tmp/H2/H2-config H2 24 to launch it.
|
---|
477 | *
|
---|
478 | * Notice that it produces no ouput as does pcp. The lines are redirected into the log file whose tag you specified as 'H2'. As we want to
|
---|
479 | * check on what it's doing, we ought to have started it using 'nohup ./test_for...... 24 &' to launch it as a child process. Now,
|
---|
480 | * we need to press CTRL-Z to stop it and enter 'bg' to background it. Look into /tmp/H2. Notice various new subdirectories
|
---|
481 | * resembling the specified ecuts. Enter 'tail -f /tmp/H2/3Ht/pcp.H2.log' to see that the first calculation is already finished.
|
---|
482 | * Continue with 12Ht and furtheron if you are impatient. Otherwise, wait for the shell script to terminate. Now, we have multiple
|
---|
483 | * points for a convergence graph to evaluate. Thus, onward to the analysis.
|
---|
484 | *
|
---|
485 | *
|
---|
486 | * \section analysis Analyzing the results
|
---|
487 | *
|
---|
488 | * First, let's have a look at the cut plane and isospheres of the current density.
|
---|
489 | *
|
---|
490 | * - start gnuplot, enter "plot 'pcp.current_proc0.csv' with vectors" in one of the various cutoff directories. Notice the rotating vector
|
---|
491 | * field which the magnetic field induced in between the two hydrogen atoms.
|
---|
492 | * - start opendx by entering dx, load Programm visual.net which resides in the defaults subdirectory of the archive. It may take a moment
|
---|
493 | * the more complex the grid (the higher the energy cutoff) is and will then render three-dimensional isospheres of the current density
|
---|
494 | * on the screen. You may rotate them and 'Sequence Control' let's you see each the current density of of the three magnetic field
|
---|
495 | * directions.
|
---|
496 | *
|
---|
497 | * Now onward to the real analysis.
|
---|
498 | * Here again, we use some shell scripts which gather the results from the various calculations into one file (as you may guess, they pick
|
---|
499 | * the last lines from each dir from the .csv-files and gather them in one file with the same name in the main config directory. Enter in
|
---|
500 | * the bindir (don't forget to chmod):
|
---|
501 | *
|
---|
502 | * - ./gather_all_results /tmp/ H2, where the first argument gives the absolute directory and H2 the molecules subdirectory.
|
---|
503 | *
|
---|
504 | * It will print some stuff and finish. If you have gnuplot installed, some plots may briefly appear as a flicker on the screen. They
|
---|
505 | * created as a postscript in one file called H2.ps. Its purpose is purely to quickly check on the general convergence of each value and
|
---|
506 | * to pinpoint mavericks. Afterwards, you will find a number of results-...-files in the main config directory /tmp/H2. They contain
|
---|
507 | * the gathered chi and sigmas for the various energy cutoffs that were calculated. They are used in the creation of H2.ps. It is
|
---|
508 | * easy to create your own convergence analysis from these. Note also that in 'pcp.convergence.csv' you find a lot of variables, each
|
---|
509 | * run more ore less adds a new line. Thus, this file might be especially useful to check convergence of lets-say sigma against
|
---|
510 | * ecut or against the lattice constant (if you performed different runs with varyzing constants). Basically, all .csv-Files are
|
---|
511 | * results (the suffix may be changed in bindir/suffix though).
|
---|
512 | *
|
---|
513 | * That completes the magnetic analysis of the hydrogen molecule.
|
---|
514 | *
|
---|
515 | * But there are more complex molecules and there we want to delve more deeply into the topic of parallel computing.
|
---|
516 | *
|
---|
517 | * © 2006 Frederik Heber
|
---|
518 | */
|
---|
519 |
|
---|
520 | #include<signal.h>
|
---|
521 | #include<stdlib.h>
|
---|
522 | #include<stdio.h>
|
---|
523 | #include<math.h>
|
---|
524 | #include"mpi.h"
|
---|
525 | #include"data.h"
|
---|
526 | #include"errors.h"
|
---|
527 | #include"helpers.h"
|
---|
528 | #include"init.h"
|
---|
529 | #include"pcp.h"
|
---|
530 | #include"opt.h"
|
---|
531 | #include"myfft.h"
|
---|
532 | #include"gramsch.h"
|
---|
533 | #include"output.h"
|
---|
534 | #include "run.h"
|
---|
535 | #include "pcp.h"
|
---|
536 |
|
---|
537 | #define MYSTOPSIGNAL SIGALRM //!< external signal which gracefully stops calculations
|
---|
538 |
|
---|
539 | static void InitSignals(void);
|
---|
540 |
|
---|
541 | void (*default_sighandler)(int); //!< deals with external signal, set to \ref SigHandlerCPULIM()
|
---|
542 | volatile sig_atomic_t cpulim = 0; //!< contains external signal
|
---|
543 | int par_cpulim = 0; //!< for all MPI nodes: 0 = continue, 1 = limit reached, end operation
|
---|
544 | int myrank = 0; //!< Rank among the multiple processes
|
---|
545 |
|
---|
546 | /** Handles external Signal CPULimit.
|
---|
547 | * \ref cpulim is set to 1 as a flag
|
---|
548 | * especially if time limit is exceeded.
|
---|
549 | * \param sig contains signal code which is printed to screen
|
---|
550 | */
|
---|
551 | static void SigHandlerCPULIM(int sig) {
|
---|
552 | if(myrank == 0)
|
---|
553 | fprintf(stderr,"process %i received signal %d\n", myrank, sig);
|
---|
554 | cpulim = 1;
|
---|
555 | return;
|
---|
556 | }
|
---|
557 |
|
---|
558 | /** Checks if CPU limit is reached.
|
---|
559 | * is supposed to be called ever and again, checking for \ref cpulim,
|
---|
560 | * then \ref cpulim is broadcasted to all MPI nodes preventing deadlock
|
---|
561 | \note On reaching time limit: save all in datafile, close everything
|
---|
562 | * \param *P \ref Problem at hand
|
---|
563 | * \return is equal to cpulim
|
---|
564 | */
|
---|
565 | int CheckCPULIM(struct Problem *P) {
|
---|
566 | /* Bei Zeitlimit: evtl DataFile ausgeben, alles abschliessen */
|
---|
567 | par_cpulim = cpulim;
|
---|
568 | MPI_Bcast(&par_cpulim, 1, MPI_INT, 0, MPI_COMM_WORLD);
|
---|
569 | if (!par_cpulim) return 0;
|
---|
570 | if (myrank == 0) fprintf(stderr,"cpulim reached, finish data\n");
|
---|
571 | return 1;
|
---|
572 | /* Ende einleiten ! */
|
---|
573 | }
|
---|
574 |
|
---|
575 | /** Initializes signal handling.
|
---|
576 | * Sets \ref myrank to 0 and installs \ref SigHandlerCPULIM
|
---|
577 | */
|
---|
578 | static void InitSignals(void) {
|
---|
579 | myrank = 0/*P->Par.me*/;
|
---|
580 | default_sighandler = signal(MYSTOPSIGNAL, SigHandlerCPULIM);
|
---|
581 | }
|
---|
582 |
|
---|
583 | /** Checks additionally for signal.
|
---|
584 | * If signal has been processed, it gets re-processed by default handler
|
---|
585 | * in order to return correct status to calling script
|
---|
586 | */
|
---|
587 | static void CheckSigReturn(void) {
|
---|
588 | signal(MYSTOPSIGNAL, default_sighandler);
|
---|
589 | if(cpulim)
|
---|
590 | raise(MYSTOPSIGNAL);
|
---|
591 | }
|
---|
592 |
|
---|
593 | #if 0
|
---|
594 | #define testing 1
|
---|
595 | /** FFT Test.
|
---|
596 | * perfoms some tests on fft
|
---|
597 | * \param *P \ref Problem at hand
|
---|
598 | */
|
---|
599 | static double fftTest(struct Problem *P)
|
---|
600 | {
|
---|
601 | int i,d,ii,ix,iy,iz;
|
---|
602 | double MesTime1, MesTime2;
|
---|
603 | const struct Lattice *Lat = &P->Lat;
|
---|
604 | struct LatticeLevel *Lev = &Lat->Lev[STANDARTLEVEL];
|
---|
605 | struct fft_plan_3d *plan = Lat->plan;
|
---|
606 | struct Density *Dens = Lev->Dens;
|
---|
607 | fftw_complex *destC = Dens->DensityCArray[TotalLocalDensity];
|
---|
608 | fftw_real *destCR = (fftw_real *)destC;
|
---|
609 | fftw_real *destCR2 = Dens->DensityArray[TotalLocalDensity];
|
---|
610 | fftw_complex *work = (fftw_complex *)Dens->DensityCArray[TempDensity];
|
---|
611 | const double factor = 1./Lev->MaxN;
|
---|
612 | /* srand(1); */
|
---|
613 | for (d=0; d < (P->Par.me_comm_ST_Psi+P->Par.Max_me_comm_ST_Psi*P->Par.me_comm_ST_PsiT); d++)
|
---|
614 | for (i=0; i<Dens->LocalSizeR; i++)
|
---|
615 | rand();
|
---|
616 | for (i=0; i<Dens->LocalSizeR; i++) {
|
---|
617 | destCR[i] = 1.-2.*(rand()/(double)RAND_MAX);
|
---|
618 | destCR2[i] = destCR[i];
|
---|
619 | }
|
---|
620 | WaitForOtherProcs(P,0);
|
---|
621 | MesTime1 = GetTime();
|
---|
622 | fft_3d_real_to_complex(plan, STANDARTLEVEL, FFTNF1, destC, work);
|
---|
623 | for (i=0; i<Dens->LocalSizeC; i++) {
|
---|
624 | destC[i].re *= factor;
|
---|
625 | destC[i].im *= factor;
|
---|
626 | }
|
---|
627 | fft_3d_complex_to_real(plan, STANDARTLEVEL, FFTNF1, destC, work);
|
---|
628 | MesTime2 = GetTime();
|
---|
629 | for (i=0; i<Dens->LocalSizeR; i++) {
|
---|
630 | if (fabs(destCR2[i] - destCR[i]) >= MYEPSILON) {
|
---|
631 | iz = i % Lev->N[2];
|
---|
632 | ii = i / Lev->N[2];
|
---|
633 | iy = ii % Lev->N[1];
|
---|
634 | ix = ii / Lev->N[1];
|
---|
635 | fprintf(stderr,"FFT(%i)(%i,%i) =(%i)(%i,%i,%i) %g = |%g - %g| eps(%g)\n",P->Par.my_color_comm_ST,P->Par.my_color_comm_ST_Psi, P->Par.me_comm_ST_Psi, i, ix, iy, iz, fabs(destCR2[i] - destCR[i]), destCR2[i], destCR[i], MYEPSILON);
|
---|
636 | }
|
---|
637 | }
|
---|
638 | return(MesTime2-MesTime1);
|
---|
639 | }
|
---|
640 |
|
---|
641 | /** GS Test.
|
---|
642 | * performs test on Gram-Schmidt
|
---|
643 | * \param *P \ref Problem at hand
|
---|
644 | */
|
---|
645 | static double GSTest(struct Problem *P)
|
---|
646 | {
|
---|
647 | struct LatticeLevel *Lev = &P->Lat.Lev[STANDARTLEVEL];
|
---|
648 | int i,j,k;
|
---|
649 | double MesTime1, MesTime2;
|
---|
650 | int l = P->Lat.Psi.AllLocalNo[0]-1;
|
---|
651 | if (P->Par.my_color_comm_ST_Psi == 0) {
|
---|
652 | for (k=0; k<P->Par.me_comm_ST_Psi; k++)
|
---|
653 | for (j=0; j<2*Lev->AllMaxG[k]; j++)
|
---|
654 | rand();
|
---|
655 | l = P->Lat.Psi.LocalNo;
|
---|
656 | P->Lat.Psi.LocalPsiStatus[Occupied][l].PsiGramSchStatus = (int)NotOrthogonal;
|
---|
657 | for (i=0; i < Lev->MaxG; i++) {
|
---|
658 | Lev->LPsi->LocalPsi[l][i].re = 1.-2.*(rand()/(double)RAND_MAX);
|
---|
659 | Lev->LPsi->LocalPsi[l][i].im = 1.-2.*(rand()/(double)RAND_MAX);
|
---|
660 | }
|
---|
661 | }
|
---|
662 | P->Lat.Psi.AllPsiStatus[Occupied][l].PsiGramSchStatus = (int)NotOrthogonal;
|
---|
663 | WaitForOtherProcs(P,0);
|
---|
664 | MesTime1 = GetTime();
|
---|
665 | GramSch(P, &P->Lat.Lev[STANDARTLEVEL], &P->Lat.Psi, Orthonormalize);
|
---|
666 | MesTime2 = GetTime();
|
---|
667 | if (P->Par.my_color_comm_ST_Psi == 0) {
|
---|
668 | for (k=P->Par.me_comm_ST_Psi+1; k<P->Par.Max_me_comm_ST_Psi; k++)
|
---|
669 | for (j=0; j<2*Lev->AllMaxG[k]; j++)
|
---|
670 | rand();
|
---|
671 | }
|
---|
672 | return(MesTime2-MesTime1);
|
---|
673 | }
|
---|
674 | #endif
|
---|
675 |
|
---|
676 |
|
---|
677 | #ifndef MAXTESTRUN
|
---|
678 | #define MAXTESTRUN 100 //!< Maximum runs for a test
|
---|
679 | #endif
|
---|
680 |
|
---|
681 | #if 0
|
---|
682 | /** combined GS fft test.
|
---|
683 | * tests fft and Gram Schmidt combined
|
---|
684 | * \param *P \ref Problem at hand
|
---|
685 | */
|
---|
686 | static void TestGSfft(struct Problem *P)
|
---|
687 | {
|
---|
688 | int i, MaxG=0;
|
---|
689 | double fftTime = 0;
|
---|
690 | double GSTime = 0;
|
---|
691 | double *A = NULL;
|
---|
692 | double *AT = NULL;
|
---|
693 | double avarage, avarage2, stddev;
|
---|
694 | for(i=0; i < P->Par.Max_me_comm_ST_Psi; i++)
|
---|
695 | MaxG += P->Lat.Lev[STANDARTLEVEL].AllMaxG[i];
|
---|
696 | if (!P->Par.me_comm_ST_Psi) A = Malloc(sizeof(double)*P->Par.Max_me_comm_ST_Psi,"TestGSfft");
|
---|
697 | if (!P->Par.me_comm_ST_PsiT && !P->Par.me_comm_ST_Psi) AT = Malloc(sizeof(double)*P->Par.Max_me_comm_ST_PsiT,"TestGSfft");
|
---|
698 | for (i=0;i<MAXTESTRUN;i++)
|
---|
699 | fftTime += fftTest(P);
|
---|
700 | fftTime /= MAXTESTRUN;
|
---|
701 | MPI_Gather( &fftTime, 1, MPI_DOUBLE, A, 1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi );
|
---|
702 | if (!P->Par.me_comm_ST_Psi) {
|
---|
703 | avarage = 0;
|
---|
704 | for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++)
|
---|
705 | avarage += A[i];
|
---|
706 | avarage /= P->Par.Max_me_comm_ST_Psi;
|
---|
707 | avarage2 = 0;
|
---|
708 | for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++)
|
---|
709 | avarage2 += A[i]*A[i];
|
---|
710 | avarage2 /= P->Par.Max_me_comm_ST_Psi;
|
---|
711 | stddev = sqrt(fabs(avarage2-avarage*avarage));
|
---|
712 | fprintf(stdout,"fft: Grid(%i, %i, %i)(%i) ST(%i)MProc(%i, %i)=(%i, %i): avg %f std %f\n",
|
---|
713 | P->Lat.Lev[0].N[0],P->Lat.Lev[0].N[1],P->Lat.Lev[0].N[2],
|
---|
714 | MaxG,
|
---|
715 | P->Par.my_color_comm_ST,
|
---|
716 | P->Par.Max_me_comm_ST_PsiT, P->Par.Max_me_comm_ST_Psi,
|
---|
717 | P->Par.me_comm_ST_PsiT,P->Par.me_comm_ST_Psi,
|
---|
718 | avarage, stddev);
|
---|
719 | }
|
---|
720 | srand(2);
|
---|
721 | for (i=0;i<MAXTESTRUN;i++)
|
---|
722 | GSTime += GSTest(P);
|
---|
723 | GSTime /= MAXTESTRUN;
|
---|
724 | MPI_Gather( &GSTime, 1, MPI_DOUBLE, A, 1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi );
|
---|
725 | if (!P->Par.me_comm_ST_Psi) {
|
---|
726 | avarage = 0;
|
---|
727 | for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++)
|
---|
728 | avarage += A[i];
|
---|
729 | avarage /= P->Par.Max_me_comm_ST_Psi;
|
---|
730 | avarage2 = 0;
|
---|
731 | for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++)
|
---|
732 | avarage2 += A[i]*A[i];
|
---|
733 | avarage2 /= P->Par.Max_me_comm_ST_Psi;
|
---|
734 | stddev = sqrt(fabs(avarage2-avarage*avarage));
|
---|
735 | fprintf(stdout,"GS : Grid(%i, %i, %i)(%i) ST(%i)MProc(%i, %i)=(%i, %i): avg %f std %f\n",
|
---|
736 | P->Lat.Lev[0].N[0],P->Lat.Lev[0].N[1],P->Lat.Lev[0].N[2],
|
---|
737 | MaxG,
|
---|
738 | P->Par.my_color_comm_ST,
|
---|
739 | P->Par.Max_me_comm_ST_PsiT, P->Par.Max_me_comm_ST_Psi,
|
---|
740 | P->Par.me_comm_ST_PsiT,P->Par.me_comm_ST_Psi,
|
---|
741 | avarage, stddev);
|
---|
742 | }
|
---|
743 | if (!P->Par.me_comm_ST_Psi) Free(A, "TestGSfft: A");
|
---|
744 | if (!P->Par.me_comm_ST_PsiT && !P->Par.me_comm_ST_Psi) Free(AT, "TestGSfft: AT");
|
---|
745 | }
|
---|
746 | #endif
|
---|
747 |
|
---|
748 | /** Run Simulation.
|
---|
749 | * just calls \ref CalculateMD()
|
---|
750 | \if testing==1
|
---|
751 | * also place to put \ref TestGSfft(P), \ref GSTest(P) or \ref fftTest(P)
|
---|
752 | \endif
|
---|
753 | * \param *P \ref Problem at hand
|
---|
754 | */
|
---|
755 | static void RunSim(struct Problem *P) {
|
---|
756 | if (P->Call.out[NormalOut]) fprintf(stderr,"Proc %i: Running Simulation !\n",P->Par.me);
|
---|
757 | CalculateMD(P);
|
---|
758 | if (P->Call.out[NormalOut]) fprintf(stderr,"Proc %i: Ending Simulation !\n",P->Par.me);
|
---|
759 | /* TestGSfft(P);*/
|
---|
760 | }
|
---|
761 |
|
---|
762 | /** Preparation, running and cleaning of a run.
|
---|
763 | * Allocate Problem structure,
|
---|
764 | * read command line options GetOptions(),
|
---|
765 | * define verbosity of output groups SetOutGroup1(),
|
---|
766 | * maybe StartDebugger(),
|
---|
767 | * parse the configuration files ReadParameters().
|
---|
768 | *
|
---|
769 | * Pay attention to external stop signals InitSignals(),
|
---|
770 | * define verbosity within the MPi communicators SetOutGroup2(),
|
---|
771 | * initialize the timing arrays InitSpeedMeasure(),
|
---|
772 | * WaitForOtherProcs(),
|
---|
773 | * and doe the initialization of the actual problem Init() (SpeedMeasure()'d in InitTime).\n
|
---|
774 | * Runs the Simulation RunSim() and cleans up at the end by calling WaitForOtherProcs(),
|
---|
775 | * CompleteSpeedMeasure() and RemoveEverything()
|
---|
776 | * \param argc argument count from main
|
---|
777 | * \param argv argument array from main
|
---|
778 | */
|
---|
779 | static void Run(int argc, char **argv) {
|
---|
780 | struct Problem *P = (struct Problem *)
|
---|
781 | Malloc(sizeof(struct Problem),"Run - P"); /* contains options, files and data regaring the simulation at hand */
|
---|
782 | GetOptions(&P->Call, argc, argv);
|
---|
783 | SetOutGroup1(&P->Call);
|
---|
784 | if(P->Call.debug == 1) StartDebugger();
|
---|
785 | ReadParameters(P, P->Call.MainParameterFile);
|
---|
786 | StartParallel(P, argv);
|
---|
787 | InitSignals();
|
---|
788 | SetOutGroup2(P, &P->Call);
|
---|
789 | InitSpeedMeasure(P);
|
---|
790 | WaitForOtherProcs(P,1);
|
---|
791 | SpeedMeasure(P, InitTime, StartTimeDo);
|
---|
792 | Init(P);
|
---|
793 | SpeedMeasure(P, InitTime, StopTimeDo);
|
---|
794 | RunSim(P);
|
---|
795 | WaitForOtherProcs(P,1);
|
---|
796 | CompleteSpeedMeasure(P);
|
---|
797 | RemoveEverything(P);
|
---|
798 | }
|
---|
799 |
|
---|
800 | /** Main routine.
|
---|
801 | * Launches MPI environment (Init and Finalize), starts Run() and gives
|
---|
802 | * correct return signal on exit
|
---|
803 | * \param argc argument count
|
---|
804 | * \param argv argument array
|
---|
805 | * \return exit code (always successive)
|
---|
806 | */
|
---|
807 | int main(int argc, char **argv) {
|
---|
808 | MPI_Init(&argc, &argv);
|
---|
809 | Run(argc, argv);
|
---|
810 | MPI_Finalize();
|
---|
811 | CheckSigReturn();
|
---|
812 | return EXIT_SUCCESS;
|
---|
813 | }
|
---|