/** \file pcp.c * Main routine, Run super-functions and signal handling. * * This file contains just framework: * The main() routine, passing its arguments on to Run(), which does signal * initialization InitSignals() and setting verbosity levels and calls RunSim() * which is then just a wrapper for CalculateMD(). * For the signals there is the actual handler SigHandlerCPULIM() for the external * exit signal, routines to check for the signal CheckCPULIM() and give correct * return code to old handler CheckSigReturn(). * Project: ParallelCarParrinello \author Jan Hamaekers \date 2000 File: pcp.c $Id: pcp.c,v 1.36 2007/02/05 13:59:24 foo Exp $ */ /*! \mainpage Introductory to Parallel Car Parrinello * * This is a quick overview of the programme pcp, explaining as shallowly as possible * its possble uses. For a more profound introduction be referred to its (german) diploma * thesis paper here: http://wissrech.ins.uni-bonn.de/teaching/diplom/diplom_hamaeker.pdf * (Note: All numbering behind formulas always refer to this thesis paper) * * ParallelCarParrinello - or short pcp - is the parallel implementation of * Car-Parrinello-Molecular-Dynamics, using a density field theory approach. In brief, * a given configuration is minimised in terms of the energy functional. The system * is encapsulated in a cell and thus periodic boundary conditions are applied. * The potential is also evaluated and this yields a force for every moving ion in * the cell. Thereby the molecular dynamics is performed. * * The basic idea behind the parallelisation is most importantly the simultaneous * fast fourier transformation, where the three-dimensional grid of coefficients in * a plane wave approach are subdivided into planes for the hither transformation and * pens for the thither one. These one or two dimensional ffts can be done by multiple * processes and later reassembled. * Second is the parallelisation of the minimisation. Each process holds a certain * "local" part of all electronic wave functions and the others are assumed constant * while minimising over these local ones by a conjugate gradient mechanism. * Lastly, the Gram-Schmidt-Orthonormalisation is also done parallely. * * This Introductory is divided in the following sections: * - \subpage intro Introduction * - \subpage tutorial Hands-on Tutorial * * © 2006 Frederik Heber */ /*! \page intro Introduction * * * \section install Installation * * * \subsection reqs Requirements * * This package relies on certain other packages. * - FFTW version 2\n * This is a fast implementation of Fast Fourier Transformations. * Currently, version 3 is not supported. Get it from their webpage, compile * and install the libraries and header files in a path where the linker will * find them.\n * http://www.fftw.org/ * - MPICH\n * This is an implementation of the Message Parsing Interface, which enables * simple build of parallel programs by means of exchange data via messages. * Here as well we need the libraries and header files, compile and install * them in an accessible directory, and also the launcher mpirun.\n * http://www-unix.mcs.anl.gov/mpi/mpich/ * - OpenDx\n * For optional visualization of the calculations, displaying electronic densities, * ion positions and forces within the cell.\n * http://www.opendx.org/ * * \subsection process Installation Process * * Note beforehand that this package manages compilation on various architectures with * help of some GNU tools such as automake (Makefile.am) respectively autconf (configure.ac). * * After unzipping the archive into an appropiate source directory, the programme has * to be compiled by executing consecutively: * - ./configure * - make * - make install * * It is recommended to create two additional directories:\n * One "build" directory which may safely reside in the main source folder, * wherein the actual build process is launched and thus temporarily created object * files removed from the main folder's contents. This happens automatically if * configure is not called from the main folder but within this build folder by: * - "../configure" * * Secondly, a "run" directory. This might either be an already present "/usr/local/bin" * which is taken as default, but also a "/home/foo/pcp/run". If desired this path * should be appended to the configure command as follows: * - "../configure --bindir /home/foo/pcp/run" * * and the executable will be automatically installed therein on calling make install. * * Alternatively, you may more generally pick "--prefix=/home/foo/pcp", which would prefix * all installation directories (such as doc, man, info, lib, ...) with the given directory * home/foo/pcp. Thus, the executable would now be installed upon evocation of 'make install' * in /home/foo/pcp/bin (because ./bin is default for executables). This and more options * are explained when invoking './configure --help'. The subdirectory where the executable is * installed in will from now on be called "bindir". * * Next, we must unpack the archive containing the pseudopotential files pseudopots.tar.gz by * entering 'tar -zxvf pseudopots.tar.gz' in the main dir of the package. Note that the * pseudopotential files now reside in 'defaults/pseudopot/' respectively to the main dir. * * At last, you may re-generate this very documentation of the pcp package with the help of the * 'doxygen' package (http://www.doxygen.org/) by execution of: * - make doxygen-doc (edit the paths in doxygen.cfg beforehand though!) * * Even more, if you are interested in the documentation of the simple molecuilder sub programme, * enter /home/foo/pcp/build/util/molecuilder and execute the same command therein which generates * the documentation in /home/foo/pcp/util/molecuilder/doc/html (and may be viewed by giving * /home/foo/pcp/util/molecuilder/doc/html/index.html to a standard web browser). * * \section use Usage * * Having unpacked, compiled and installed the programme, we are actually set to go * besides the specification of the actual physical problem. If you desire a ste-by-step * introduction to the handling of the simple hydrogen molecule, be referred to the section * \ref tutorial. * * \subsection specify Specifying the Problem * * There are two things necessary: * - a configuration file with various informations from stop conditons to Z number * or the basic cell vectors, giving its volume, number of ions, position and type * and so on ... * The given configuration file in the subdirectory "defaults" is fully documented * and its sense should be easy to grasp thereby - or see below in the section * \ref config. * - pseudopotential files which reside in a certain directory specified in the * config file. Ine is necessary for each different atom (discerned by atomic * number Z) specifying the pseudo potential on a grid, generated by the fhi98PP * package: http://www.fhi-berlin.mpg.de/th/fhi98md/fhi98PP/index.html\n * These are also already present in the directory called "defaults/pseudopot". * * \subsection exec Programme execution with mpirun ... * * Basicially the programme is started as follows: * - mpirun -np 1 -vvv -machinefile /home/foo/.rhosts pcp ../defaults/main_pcp\n * where "-np 1" states the number of processors to use, so here just one, "-vvv" * relates to the verbosity of the intermediate output (more v's, more output), * "-machinefile" points to the rsh text file with allowed hosts (present on all * hosts) and last but by no means least the config file with relative path * "../defaults/main_pcp".\n * Take care that your machinefile contains enough hosts and also that "-np" * matches the product of ProcPEPsi and ProcPEGamma specified in the config file. * You won't see the error messages, when one of clients hangs (mpirun just idles * on all others as they cannot reach the host whose pcp has stopped). * * The important input and output is all done via files. Configuration and PseudoPot * files are read on startup when the Problem structure, with its various parts, is * initialized. Each given amount of made steps forces, energies, ion positions and * the electronic density if desired are written to file, which can be read and * visualized lateron by the Open Data Explorer. * * Let's now come to the parallel execution of the programme on multiple hosts. * Generally, it is recommended to choose between n+1 and 2n processes for n hosts. * There are two ways of connection to the other clients, either via rsh or ssh. * Seemingly, mpich automatically picks the right one, first rsh, then ssh. * * Note: * -# Make sure that the executable with its needed input files are present on all * other machines (Most common way is a shared directory, such as the user's home * residing on a server shared via NFS to all clients) * -# Make sure that the paths are correct and the same on all hosts (also the config paths) * -# Use a homogeneous set of clients (no mixing of architectures or slow with fast * systems!) * * \subsubsection rsh ... with RSH * * Most important in this case is the file .rhosts in your home, where line by line * the hostnames of available computers are included. Via RSH-Login the programme is * launched on all machines by mpirun.\n * * \subsubsection ssh ... with SSH * * Via a passphrase and an ssh agent a much safer - due to encryption - way is used * to connect to other clients, employed as follows * - ssh-keygen -t rsa (enter keyphrase, public and private key is stored in ~/.ssh/) * - cat ~/.ssh/\*.pub >>~/.ssh/authorized_keys (if home is shared via nfs) OR \n * cat ~/.ssh/\*.pub | ssh user\@remote-system 'cat>>.ssh/authorized_keys' (when the * user home is not shared via nfs for each remote-system) * * Afterwards, start an SSH agent and 'ssh-add' the above pass-phrase. It * might be a good idea, to give a limited timespan 'ssh-add -t 86400' (seconds). * * Test it by trying something like 'ssh host /bin/ls /bin'. It should work (just print * the /bin directory) without any queries or halts. Otherwise remove host and its IP * from .ssh/known_hosts and try again. Check this for all hosts in your .rhosts, * or try 'tstmachines -machinefile=/home/foo/.rhosts' which is supplied with mpich. * * The ssh method is especially preferrable if the hosts do not reside within a protected * network, such as shielded by a firewall from the outside internet with internal IP * adresses. * * \subsection results The Results * * Process 0 does all the output and is always started on your localhost. * There a number of different output files if the programme is told to produce * them in the configuration file. They all beginn with "pcp." and most end with * the output step count in 4 digits such as ".0001". In between lies the actual * content information: * - "speed": Time measurements for the various sections of the calculations. * - "density": There are three types: "doc" and "dx" specifiy the file format for * OpenDx, while "data" contains the actual densities. These are only produced * if DoOutVis is set to 1 * - "ions": "pos" means position, "force" contains forces from the dynamics * simulation represented by arrows in OpenDx, "datZ", "doc" and "dx" are again * only needed to specifiy the file format. * - "energy.all": Total energy * - "forces.all": Total force * * There are visualization scripts included in the subdirectory "defaults" such * as "visual.cfg" for use with OpenDx. * * * \section start Getting started * * In Order to get a first understanding of the programme, here is a brief explanation of its most * important functions: * -# Init(): Super-function to most of the initialisation subroutines, that allocate memory or * setup the wave functions randomly or calculate first energies and densities. * -# CalculateMD(): Contains the super-loop that calculate the force acting on the ions, moves them, * updates the electronic wave functions and prints results to file. * -# CalculateForce(): On evaluating the force acting on the ions also the actual minimisation of * the wave functions - one after the other - is done, energies and densities calculated on ever * finer grids. * -# CalculateNewWave(): Is the actual (Fletcher&Reeves) conjugate gradient implementation that * minimises one wave function by calculating the gradient, preconditioning the resulting matrix, * retrieving the conjugate direction and doing the line search for the one-dimensional minimum. * * Most of these functions call many others in sequence, so be prompted to their respective * documentation for further explanation and formula. * * \subsection follow Following the course of the programme * * When the programme is launched with the option "-vvv" for greater verbosity a series of output * is printed to the screen, which briefly shall be explained here. * * At first there is the initialization output stating number of nodes on the ever more finely * grids in normal and reciprocal space, density checks, parameters read from the configuration * file and so on, until the minimisation finally commences. * * The most prominent line then looks as follows: * (0,0,0)S(0,0,0): TE: -1.750005e+01 ATE: -1.825983e+01 Diff: 1.000000e+00 --- d: 6.634701e-01 dEdt0: -1.943969e+00 ddEddt0: 9.673485e-01 * * The first three numbers refer to process numbers within MPI communicators: SpinUp/Double(0)SpinDown(1), * number within coefficients sharing, within wave functions sharing group. * The next three numbers after the "S" refer to: Number of performed minimisation steps, number of * currently minimised local wave function, number of minimisation steps performed on this wave function. * Afterwards follow: Total energy (TE), approximated total energy (ATE), relative error in the prediction (diff), * delta (the one-dimensional line search parameter \f$cos \Theta\f$), the first (dEdt0) and second (ddEddt0) * derivative of the total energy functional (at parameter = 0). * * Ever and again there appear checks such as: * -# "ARelTE: 1.387160e-02 ARelKE: 3.074929e-03", stating the actual relative difference in total and * kinetic energy between this and the last check * -# "(0)TestGramSchm: Ok !", having tested the desired orthogonality of the wave functions * -# "Beginning minimisation...", starting a perturbed minimisation run. * * At the end the programme will print a number of different energies to the screen: total, Hartree, * ionic, kinetic, ... energies. * * * \section tech Technical Details * * \subsection config The Configuration file explained * * The first column always contains a keyword (for which the programme parses, so * the ordering as given here needn't be replicated) following one or more values * or strings. * * - mainname: The short name of the Programme * - defaultpath: Full path indicating where to put the output files during runtime * - pseudopotpath: Full path to the needed PseudoPot files * - ProcPEPsi: Number of process (groups consisting of ProcPEPsi processes) among which * wave functions are split up (e.g. MaxPsiDouble 16, ProcPEPsi 4, ProcPEGamma 2 results * in 4 wave functions per group and each group has 2 processes sharing coefficients) * - ProcPEGamma: Number of processes which make up one wave functions coefficients sharing group * - DoOutVis: Output files for OpenDX (1 - yes, 0 - no) * - DoOutMes: Output Measurement (1 - yes, 0 - no) * - AddGramSch: Do an additional GramSchmidt-run after a Psi has been minimised (hack) * - Seed: Initialization value for pseudo random number generator on generating wave function coefficients, 0 means * constant 1/V for perturbed wave functions. * - UseSeparation: Minimise unoccupied states in a separate run per level indepedent of occupied states * - UseFermiOccupation: After each minimisation run, occupation of orbitals will be reset according to Fermi distribution * - MaxOuterStep: Number of to be performed Molecular Dynamics steps * - Deltat: time slice per MD step * - OutVisStep: Output visual data at every ..th step * - OutSrcStep: Output measurement data at every ..th step * - TargetTemp: Target temperature in the cell (this medium ion velocity) * - ScaleTempStep: MD temperature is scaled at every ..th step * - MaxPsiStep: number of minimisation steps per state(!) before going on to next local state (0 - default (3)) * - MaxMinStep: standard level stop criteria - maximum number of minimisation steps over all states * - RelEpsTotalE: standard level stop criteria - minimum relative change in total energy * - RelEpsKineticE: standard level stop criteria - minimum relative change in kinetic energy * - MaxMinStopStep: standard level stop condition is evaluated at every ..th step, should scale with orbital number * - MaxMinGapStopStep: standard level stop condition for gap calculation is evaluated at every ..th step * - MaxInitMinStep: initial level stop criteria - maximum number of minimisation steps over all states * - InitRelEpsTotalE: initial level stop criteria - minimum relative change in total energy * - InitRelEpsKineticE: initial level stop criteria - minimum relative change in kinetic energy * - InitMaxMinStopStep: initial level stop condition is evaluated at every ..th step * - InitMaxMinGapStopStep: initial level stop condition is evaluated at every ..th step * - BoxLength: Length of the super cell in atomic units (lower triangle matrix: 1 value, 2 values, 3 values) * - ECut: Energy cutoff specifying how man plane waves are used (in Hartrees) * - MaxLevel: number of different mesh levels in the code, >=2 * - Level0Factor: Mesh points Factor for the topmost level * - RiemannTensor: Use RiemannTensor * - PsiType: Whether orbits are always doubly occupied or spin up and down is discerned (and completely separated in calculation, needs at least 2 processes!) * - MaxPsiDouble: specifying how many doubly occupied orbits there are (either this or next depends on given PsiType) * - PsiMaxNoUp/PsiMaxNoDown: specifying how many spin up or down occupied orbits there are * - RCut: Radial cutoff in the Ewald summation * - IsAngstroem: length in 0 - Bohr radii or in 1- Angstr�m * - MaxTypes: maximum number of different ion types (important for next constructs) * - Ion_TypeNr: 6 values - Number of ions, atomic value, Gauss radius, maximum angular momentum of Pseudopotenials, L_loc and mass of ion (Nr in keyword must go from 1..MaxTypes!) * - Ion_TypeNr_Nr: 4 values - position (x,y,z) in atomic units, IonMoveType (0 - movable, 1 - fixed).For each stated IonType there must such a keywor for each ion (second number goes from 1..Number of ions!) * * \section notes Notes on implementation and code understanding * * \subsection densities Densities and Wave functions * * The coefficients of densities and wave functions both in real and reciprocal space are only in parts stored locally * on the process. Coefficients of one wave functions may be shared under a number of processes, increasing speed when * doing loops over G - reciprocal vector, and the wave functions as a whole may also be shared, increasing Fourier transform * and also minimization - however with the drawback of parallel conjugate gradient minimization which might lead to errors. * * Additionally, the coefficients are structured in levels. The grid is present in ever finer spacings, which is very helpful * in the initial minimization after the wave functions have been randomly initialized. Also, the density is always calculated * on a grid twice as fine as the one containing wave function coefficients, as the fourier transform is then exact. That's * why there are always two levels: LatticeLevel LevS and Lev0. The first being the standard level with the wave functions, * the latter for densities (one often stumbles over "Dens0" as a variable name). Also there two kinds of densities: * DensityArray and DensityCArray, the first contains densities in real space, the latter in reciprocal space. However, both * are densities, hence the scalar product of their respective wave functions. * For this reason one finds often construct which use a position factor in order to transform wave coefficients into ones * on a higher level, followed by a fourier transform and a transposition, see e.g. CalculateOneDensityR(). It is often * encountered throughout the code. * * * © 2006 Frederik Heber */ /*! \page tutorial Hands-on Tutorial * * * In this section we present a step-by-step tutorial, which leads you from the creation of a typical config file for a hydrogen * molecule through the actual calcualtion to the analysis with final plots and graphics. * * \section configfile Creating the config file * * First, we need to create the config file. For this we use the utility 'molecuilder' which resides in the util subdirectory * and is compiled upon execution of 'make' as described in Section \ref process and finally installed in the bindir. Go there, * pick and create a directory, such as '/tmp/H2/' and start the utility: * - ./molecuilder /tmp/H2/H2-config * * You might want to change the config file name, here chosen as 'H2-config', according to your personal liking, it is completely * arbitrary. It is however recommended to pick an individual directory for each molecule upon which you choose to perform DFT * calculations as there are a number of files which are produced during the run. * * Molecuilder will first ask for a cell size, which is entered by typing in the six elements of a symmetric matrix, for an cubic * cell of 20 a.u. length, enter 20, 0, 20, 0, 0, 20. * * Next, you see two lists, both are empty, and a menu. So far no elements or atoms have been put into the cell. We want to add an * atom, press 'a' and enter. Now we have to choose in which way we want to add the atom. The choice is either in absolute (x,y,z) * coordinates, or relative to either another absolute point or to an already placed atom. The last option is used in the case when * there are two atoms and you want to add a third according to its angle and distance to the former two. * * For now, we add the first hyodrogen atom. We want to put the molecule which consists of two hydrogen atoms at a distance of 1.4 * atomic units symmetrically off the cell's centre. Thus we choose option b, enter 10, 10, 10 as the reference point and then give the * relative coordinates as 0.7, 0, 0. Thus, the molecule will be aligned along the x axis. The Z number is 1, in the case of hydrogen. * Upon the last enter you notice that both the element and the atom list have filled a bit. Let's add the second atom. Choose 'a' * again and option c this time, picking the first already entered atom as the reference point, by entering 0 ('number in molecule') * next. Enter: -1.4, 0 , 0, and again Z = 1. * Afterwards, we see that in the element list, the second entry changed from 1 to 2. It's the number of atoms of this element which * the molecules consists of so far. Next is Z number, R-cut, lmax and lloc, the element's mass and a name and short hand which is * taken from the database in the file 'elements.db' which must reside in the same directory as molecuilder. * * Finally, we need to specify some additional info in the config file that's non-standard. Enter 'e' in the menu to edit the * current configuration. The list is huge, you might have to scroll a bit and don't panic, most of these are trivial and will * become comprehensible with time. First is the paths, option 'B' and 'C'. Upon entering the option, it will give the the * "old" value and request a new one. Enter: * - B: /tmp/H2/ * - C: /home/foo/pcp/defaults/pseudopot * * The latter one is the path to the pseudopotential files which reside in the archive 'pseudopots.tar.gz' in /home/foo/pcp or * whereever you have extracted the distributed pcp package to. Unzip it by 'tar -zxvf pseudopots.tar.gz' and it will create an * additional directory called defaults wherein another directory called pseudopot resides containing the files for element with * atomic number 1 up to 80 -- these are the files generated with the FHIMD PseudoPotential Generator. * * As the molecule and all necessary config file settings are entered, enter 's' to save the config file under the filename stated * on the command-line and 'q' to quit. * * * Now, we take a look at the created config file itself. Take your favorite editor and compare it to the explanations given in the * secion \ref config. According to your machine setup, the following entries must be changed: * - ProcPEPGamma/ProcPEPsi: Their product gives number of processes which must be started. As a beginning you might leave 1 here for * both entries. * - VectorPlane: Enter 1 (it's a cut plane for a two-dimensional representation of the current density) * - VectorCut: Enter 10. (cut is done at this coordinate along the by VectorPlane given axis) * - BoxLength: Check the six entries of the symmetric matrix for the correct cell size * - ECut: In general, both the precision and time needed for the calculation. Reasonable values for H2 are 8. to 24. * - MaxPsiDouble: Enter 1 here, it's the number of doubly occupied orbitals which is one in case of the hydrogen molecule * - AddPsis: Leave 0, this is the number of unoccupied orbitals which we do not need. * - RCut: Enter 20., the cell size. It's another numerical cutoff here for the ewald summation of the infinite coulomb potential. * * This sums it up. You might also have noticed three entries, one for the element and another two for each hydrogen atom, very similar * in appearance to the list given by molecuilder. Compare to the explanation given in the config file section \ref config above. * Of course, the above shown changes could also have been made during the edit menu of molecuilder, we just wanted to demonstrate that * it's perfectly safe to use a text editor as well (any more than one white space such as some more spaces and/or tabs are * automatically read over during the parsing of the config file). * * We may now start a calculation. * * \section launching Performing the calculation * * Enter the directory which you specified as the 'bindir' where the executable shall be installed in, let's say for this example * it's '/tmp/pcp/run'. Now, if you have the mpi package installed and are able to launch mpirun, enter this: * * - mpirun -np 1 ./pcp -vvv -w /tmp/H2/H2-config * * Here, '1' is the number of processes needed, the product of ProcPEPGamma and ProcPEPsi. If not, enter this: * * - ./pcp -vvv -w /tmp/H2/H2-config * * More comfortably we may also take the unix shell script 'run', which uses mpirun and evaluates the product by itself. However, * as it is platform-dependent and thus it is provided just as an idea-giver. It is installed in the bindir from subdirectory 'run' * during 'make install'. However, its executable permissions must still be set: e.g. 'chmod 755 run' (The same applies for all * other 'shell scripts' used in this tutorial). Also, the common paths are set in the file 'suffix', also in the bindir and must * be adapted to your local context. Basically, there is always the rather uncomfortable way of doing every by hand, which is * explained as well, or use these scripts if you can make them work. * * - Enter: ./run to see it's options * - Enter: ./run -vvv /tmp/H2/H2-config H2 to run (here a log file pcp.H2.log is written, with the tag 'H2' which may be chosen arbitrarily). * * Possible errors here are as follows: * - paths in 'suffix' are not changed or were overwritten with defaults on make 'install' * - permission denied: 'chmod 755 run' missing (necessary after 'make install') * - pseudopots not unpacked or wrong path given in config file * - charged system: MaxPsiDouble (..SpinUp/Down) does not match required number of orbitals of the system * * Given 8 (hartree) as ECut and a moderately well performing computer numbers should rush by for a few seconds. Afterwards, the calculations * has already finished (this is of course only the case for a simple molecule such as the hydrogen one). The last hundred lines or so * consist of calculated values, such as the susceptibility tensor and among others the isometric values, shielding for each hydrogen atom, * the total energy (though beware, due to the pseudopotential ansatz for a general molecule it is not equivalent to the real absolute binding * energy) and the perturbed energy which are of minor interest. * * A number of file where generated in /tmp/H2/, some general ones are listed in sections above, we want to discuss a chosen few here: * * - LOG: pcp.H2.log contains all the lines which you just might have missed before (only if pcp was started with 'run'-script) * - Cut plane: pcp.current....csv contains (for each process of ProcPEPGamma) it's part of the specified cut plane of the current density. * - Susceptibility: pcp.chi.csv contains first an explanatory line and then one with ten entries, first is the ecut, then follow the nine entries of * the rank 2 tensor. * - shielding: pcp.sigma_i0_H.csv and ...i1... respectively contain the same as pcp.chi.csv only for the shielding tensor. * * And this is it. Of course, \f$H_2\f$ is a very simple molecule. Regard a case where you might want to calculate a (4,3) nanotube with * hundreds of symmetrically aligned carbon atoms. Each of these will produce a pcp.sigma_i..csv, though all should be the same in the * end. Also, we would like to have convergence plots and vector graphs of the current density and its values. * * In order to produce convergence plots, we need to perform the calculations for various cutoff energies (ECut). You may either move all * the files in /tmp/H2 to e.g. /tmp/H2/8Ht/, edit the config file, pick 12Ht now as ECut and re-run the calculations, move the results * and so on for four to six different values. An easier way is the script file 'test_for_ecut.sh'. Delete all pcp.*-files in /tmp/H2/ * (Thus it is always good practice not to pick pcp as prefix for the config file!) and enter in the bindir subdirectory: * * - chmod 755 test_for_ecut (as it is platform-dependent as well, otherwise you get an error: permission denied) * - ./test_for_ecut.sh to see it complaining the lack of a config file * - ./test_for_ecut.sh /tmp/H2/H2-config to see its desired options and the current ecut list (it's a string in the script file adapted to * 4 processes sharing the coefficients) * - ./test_for_ecut.sh /tmp/H2/H2-config H2 24 to launch it. * * Notice that it produces no ouput as does pcp. The lines are redirected into the log file whose tag you specified as 'H2'. As we want to * check on what it's doing, we ought to have started it using 'nohup ./test_for...... 24 &' to launch it as a child process. Now, * we need to press CTRL-Z to stop it and enter 'bg' to background it. Look into /tmp/H2. Notice various new subdirectories * resembling the specified ecuts. Enter 'tail -f /tmp/H2/3Ht/pcp.H2.log' to see that the first calculation is already finished. * Continue with 12Ht and furtheron if you are impatient. Otherwise, wait for the shell script to terminate. Now, we have multiple * points for a convergence graph to evaluate. Thus, onward to the analysis. * * * \section analysis Analyzing the results * * First, let's have a look at the cut plane and isospheres of the current density. * * - start gnuplot, enter "plot 'pcp.current_proc0.csv' with vectors" in one of the various cutoff directories. Notice the rotating vector * field which the magnetic field induced in between the two hydrogen atoms. * - start opendx by entering dx, load Programm visual.net which resides in the defaults subdirectory of the archive. It may take a moment * the more complex the grid (the higher the energy cutoff) is and will then render three-dimensional isospheres of the current density * on the screen. You may rotate them and 'Sequence Control' let's you see each the current density of of the three magnetic field * directions. * * Now onward to the real analysis. * Here again, we use some shell scripts which gather the results from the various calculations into one file (as you may guess, they pick * the last lines from each dir from the .csv-files and gather them in one file with the same name in the main config directory. Enter in * the bindir (don't forget to chmod): * * - ./gather_all_results /tmp/ H2, where the first argument gives the absolute directory and H2 the molecules subdirectory. * * It will print some stuff and finish. If you have gnuplot installed, some plots may briefly appear as a flicker on the screen. They * created as a postscript in one file called H2.ps. Its purpose is purely to quickly check on the general convergence of each value and * to pinpoint mavericks. Afterwards, you will find a number of results-...-files in the main config directory /tmp/H2. They contain * the gathered chi and sigmas for the various energy cutoffs that were calculated. They are used in the creation of H2.ps. It is * easy to create your own convergence analysis from these. Note also that in 'pcp.convergence.csv' you find a lot of variables, each * run more ore less adds a new line. Thus, this file might be especially useful to check convergence of lets-say sigma against * ecut or against the lattice constant (if you performed different runs with varyzing constants). Basically, all .csv-Files are * results (the suffix may be changed in bindir/suffix though). * * That completes the magnetic analysis of the hydrogen molecule. * * But there are more complex molecules and there we want to delve more deeply into the topic of parallel computing. * * © 2006 Frederik Heber */ #include #include #include #include #include"mpi.h" #include"data.h" #include"errors.h" #include"helpers.h" #include"init.h" #include"pcp.h" #include"opt.h" #include"myfft.h" #include"gramsch.h" #include"output.h" #include "run.h" #include "pcp.h" #define MYSTOPSIGNAL SIGALRM //!< external signal which gracefully stops calculations static void InitSignals(void); void (*default_sighandler)(int); //!< deals with external signal, set to \ref SigHandlerCPULIM() volatile sig_atomic_t cpulim = 0; //!< contains external signal int par_cpulim = 0; //!< for all MPI nodes: 0 = continue, 1 = limit reached, end operation int myrank = 0; //!< Rank among the multiple processes /** Handles external Signal CPULimit. * \ref cpulim is set to 1 as a flag * especially if time limit is exceeded. * \param sig contains signal code which is printed to screen */ static void SigHandlerCPULIM(int sig) { if(myrank == 0) fprintf(stderr,"process %i received signal %d\n", myrank, sig); cpulim = 1; return; } /** Checks if CPU limit is reached. * is supposed to be called ever and again, checking for \ref cpulim, * then \ref cpulim is broadcasted to all MPI nodes preventing deadlock \note On reaching time limit: save all in datafile, close everything * \param *P \ref Problem at hand * \return is equal to cpulim */ int CheckCPULIM(struct Problem *P) { /* Bei Zeitlimit: evtl DataFile ausgeben, alles abschliessen */ par_cpulim = cpulim; MPI_Bcast(&par_cpulim, 1, MPI_INT, 0, MPI_COMM_WORLD); if (!par_cpulim) return 0; if (myrank == 0) fprintf(stderr,"cpulim reached, finish data\n"); return 1; /* Ende einleiten ! */ } /** Initializes signal handling. * Sets \ref myrank to 0 and installs \ref SigHandlerCPULIM */ static void InitSignals(void) { myrank = 0/*P->Par.me*/; default_sighandler = signal(MYSTOPSIGNAL, SigHandlerCPULIM); } /** Checks additionally for signal. * If signal has been processed, it gets re-processed by default handler * in order to return correct status to calling script */ static void CheckSigReturn(void) { signal(MYSTOPSIGNAL, default_sighandler); if(cpulim) raise(MYSTOPSIGNAL); } #if 0 #define testing 1 /** FFT Test. * perfoms some tests on fft * \param *P \ref Problem at hand */ static double fftTest(struct Problem *P) { int i,d,ii,ix,iy,iz; double MesTime1, MesTime2; const struct Lattice *Lat = &P->Lat; struct LatticeLevel *Lev = &Lat->Lev[STANDARTLEVEL]; struct fft_plan_3d *plan = Lat->plan; struct Density *Dens = Lev->Dens; fftw_complex *destC = Dens->DensityCArray[TotalLocalDensity]; fftw_real *destCR = (fftw_real *)destC; fftw_real *destCR2 = Dens->DensityArray[TotalLocalDensity]; fftw_complex *work = (fftw_complex *)Dens->DensityCArray[TempDensity]; const double factor = 1./Lev->MaxN; /* srand(1); */ for (d=0; d < (P->Par.me_comm_ST_Psi+P->Par.Max_me_comm_ST_Psi*P->Par.me_comm_ST_PsiT); d++) for (i=0; iLocalSizeR; i++) rand(); for (i=0; iLocalSizeR; i++) { destCR[i] = 1.-2.*(rand()/(double)RAND_MAX); destCR2[i] = destCR[i]; } WaitForOtherProcs(P,0); MesTime1 = GetTime(); fft_3d_real_to_complex(plan, STANDARTLEVEL, FFTNF1, destC, work); for (i=0; iLocalSizeC; i++) { destC[i].re *= factor; destC[i].im *= factor; } fft_3d_complex_to_real(plan, STANDARTLEVEL, FFTNF1, destC, work); MesTime2 = GetTime(); for (i=0; iLocalSizeR; i++) { if (fabs(destCR2[i] - destCR[i]) >= MYEPSILON) { iz = i % Lev->N[2]; ii = i / Lev->N[2]; iy = ii % Lev->N[1]; ix = ii / Lev->N[1]; fprintf(stderr,"FFT(%i)(%i,%i) =(%i)(%i,%i,%i) %g = |%g - %g| eps(%g)\n",P->Par.my_color_comm_ST,P->Par.my_color_comm_ST_Psi, P->Par.me_comm_ST_Psi, i, ix, iy, iz, fabs(destCR2[i] - destCR[i]), destCR2[i], destCR[i], MYEPSILON); } } return(MesTime2-MesTime1); } /** GS Test. * performs test on Gram-Schmidt * \param *P \ref Problem at hand */ static double GSTest(struct Problem *P) { struct LatticeLevel *Lev = &P->Lat.Lev[STANDARTLEVEL]; int i,j,k; double MesTime1, MesTime2; int l = P->Lat.Psi.AllLocalNo[0]-1; if (P->Par.my_color_comm_ST_Psi == 0) { for (k=0; kPar.me_comm_ST_Psi; k++) for (j=0; j<2*Lev->AllMaxG[k]; j++) rand(); l = P->Lat.Psi.LocalNo; P->Lat.Psi.LocalPsiStatus[Occupied][l].PsiGramSchStatus = (int)NotOrthogonal; for (i=0; i < Lev->MaxG; i++) { Lev->LPsi->LocalPsi[l][i].re = 1.-2.*(rand()/(double)RAND_MAX); Lev->LPsi->LocalPsi[l][i].im = 1.-2.*(rand()/(double)RAND_MAX); } } P->Lat.Psi.AllPsiStatus[Occupied][l].PsiGramSchStatus = (int)NotOrthogonal; WaitForOtherProcs(P,0); MesTime1 = GetTime(); GramSch(P, &P->Lat.Lev[STANDARTLEVEL], &P->Lat.Psi, Orthonormalize); MesTime2 = GetTime(); if (P->Par.my_color_comm_ST_Psi == 0) { for (k=P->Par.me_comm_ST_Psi+1; kPar.Max_me_comm_ST_Psi; k++) for (j=0; j<2*Lev->AllMaxG[k]; j++) rand(); } return(MesTime2-MesTime1); } #endif #ifndef MAXTESTRUN #define MAXTESTRUN 100 //!< Maximum runs for a test #endif #if 0 /** combined GS fft test. * tests fft and Gram Schmidt combined * \param *P \ref Problem at hand */ static void TestGSfft(struct Problem *P) { int i, MaxG=0; double fftTime = 0; double GSTime = 0; double *A = NULL; double *AT = NULL; double avarage, avarage2, stddev; for(i=0; i < P->Par.Max_me_comm_ST_Psi; i++) MaxG += P->Lat.Lev[STANDARTLEVEL].AllMaxG[i]; if (!P->Par.me_comm_ST_Psi) A = Malloc(sizeof(double)*P->Par.Max_me_comm_ST_Psi,"TestGSfft"); if (!P->Par.me_comm_ST_PsiT && !P->Par.me_comm_ST_Psi) AT = Malloc(sizeof(double)*P->Par.Max_me_comm_ST_PsiT,"TestGSfft"); for (i=0;iPar.comm_ST_Psi ); if (!P->Par.me_comm_ST_Psi) { avarage = 0; for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) avarage += A[i]; avarage /= P->Par.Max_me_comm_ST_Psi; avarage2 = 0; for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) avarage2 += A[i]*A[i]; avarage2 /= P->Par.Max_me_comm_ST_Psi; stddev = sqrt(fabs(avarage2-avarage*avarage)); fprintf(stdout,"fft: Grid(%i, %i, %i)(%i) ST(%i)MProc(%i, %i)=(%i, %i): avg %f std %f\n", P->Lat.Lev[0].N[0],P->Lat.Lev[0].N[1],P->Lat.Lev[0].N[2], MaxG, P->Par.my_color_comm_ST, P->Par.Max_me_comm_ST_PsiT, P->Par.Max_me_comm_ST_Psi, P->Par.me_comm_ST_PsiT,P->Par.me_comm_ST_Psi, avarage, stddev); } srand(2); for (i=0;iPar.comm_ST_Psi ); if (!P->Par.me_comm_ST_Psi) { avarage = 0; for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) avarage += A[i]; avarage /= P->Par.Max_me_comm_ST_Psi; avarage2 = 0; for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) avarage2 += A[i]*A[i]; avarage2 /= P->Par.Max_me_comm_ST_Psi; stddev = sqrt(fabs(avarage2-avarage*avarage)); fprintf(stdout,"GS : Grid(%i, %i, %i)(%i) ST(%i)MProc(%i, %i)=(%i, %i): avg %f std %f\n", P->Lat.Lev[0].N[0],P->Lat.Lev[0].N[1],P->Lat.Lev[0].N[2], MaxG, P->Par.my_color_comm_ST, P->Par.Max_me_comm_ST_PsiT, P->Par.Max_me_comm_ST_Psi, P->Par.me_comm_ST_PsiT,P->Par.me_comm_ST_Psi, avarage, stddev); } if (!P->Par.me_comm_ST_Psi) Free(A); if (!P->Par.me_comm_ST_PsiT && !P->Par.me_comm_ST_Psi) Free(AT); } #endif /** Run Simulation. * just calls \ref CalculateMD() \if testing==1 * also place to put \ref TestGSfft(P), \ref GSTest(P) or \ref fftTest(P) \endif * \param *P \ref Problem at hand */ static void RunSim(struct Problem *P) { if (P->Call.out[NormalOut]) fprintf(stderr,"Proc %i: Running Simulation !\n",P->Par.me); CalculateMD(P); if (P->Call.out[NormalOut]) fprintf(stderr,"Proc %i: Ending Simulation !\n",P->Par.me); /* TestGSfft(P);*/ } /** Preparation, running and cleaning of a run. * Allocate Problem structure, * read command line options GetOptions(), * define verbosity of output groups SetOutGroup1(), * maybe StartDebugger(), * parse the configuration files ReadParameters(). * * Pay attention to external stop signals InitSignals(), * define verbosity within the MPi communicators SetOutGroup2(), * initialize the timing arrays InitSpeedMeasure(), * WaitForOtherProcs(), * and doe the initialization of the actual problem Init() (SpeedMeasure()'d in InitTime).\n * Runs the Simulation RunSim() and cleans up at the end by calling WaitForOtherProcs(), * CompleteSpeedMeasure() and RemoveEverything() * \param argc argument count from main * \param argv argument array from main */ static void Run(int argc, char **argv) { struct Problem *P = (struct Problem *) Malloc(sizeof(struct Problem),"Run - P"); /* contains options, files and data regaring the simulation at hand */ GetOptions(&P->Call, argc, argv); SetOutGroup1(&P->Call); if(P->Call.debug == 1) StartDebugger(); ReadParameters(P, P->Call.MainParameterFile); StartParallel(P, argv); InitSignals(); SetOutGroup2(P, &P->Call); InitSpeedMeasure(P); WaitForOtherProcs(P,1); SpeedMeasure(P, InitTime, StartTimeDo); Init(P); SpeedMeasure(P, InitTime, StopTimeDo); RunSim(P); WaitForOtherProcs(P,1); CompleteSpeedMeasure(P); RemoveEverything(P); } /** Main routine. * Launches MPI environment (Init and Finalize), starts Run() and gives * correct return signal on exit * \param argc argument count * \param argv argument array * \return exit code (always successive) */ int main(int argc, char **argv) { MPI_Init(&argc, &argv); Run(argc, argv); MPI_Finalize(); CheckSigReturn(); return EXIT_SUCCESS; }