| [a0bcf1] | 1 | /** \file pcp.c | 
|---|
|  | 2 | * Main routine, Run super-functions and signal handling. | 
|---|
|  | 3 | * | 
|---|
|  | 4 | * This file contains just framework: | 
|---|
|  | 5 | * The main() routine, passing its arguments on to Run(), which does signal | 
|---|
|  | 6 | * initialization InitSignals() and setting verbosity levels and calls RunSim() | 
|---|
|  | 7 | * which is then just a wrapper for CalculateMD(). | 
|---|
|  | 8 | * For the signals there is the actual handler SigHandlerCPULIM() for the external | 
|---|
|  | 9 | * exit signal, routines to check for the signal CheckCPULIM() and give correct | 
|---|
|  | 10 | * return code to old handler CheckSigReturn(). | 
|---|
|  | 11 | * | 
|---|
|  | 12 | Project: ParallelCarParrinello | 
|---|
|  | 13 | \author Jan Hamaekers | 
|---|
|  | 14 | \date 2000 | 
|---|
|  | 15 |  | 
|---|
|  | 16 | File: pcp.c | 
|---|
|  | 17 | $Id: pcp.c,v 1.36 2007/02/05 13:59:24 foo Exp $ | 
|---|
|  | 18 | */ | 
|---|
|  | 19 |  | 
|---|
|  | 20 | /*! \mainpage Introductory to Parallel Car Parrinello | 
|---|
|  | 21 | * | 
|---|
|  | 22 | * This is a quick overview of the programme pcp, explaining as shallowly as possible | 
|---|
|  | 23 | * its possble uses. For a more profound introduction be referred to its (german) diploma | 
|---|
|  | 24 | * thesis paper here: http://wissrech.ins.uni-bonn.de/teaching/diplom/diplom_hamaeker.pdf | 
|---|
|  | 25 | * (Note: All numbering behind formulas always refer to this thesis paper) | 
|---|
|  | 26 | * | 
|---|
|  | 27 | * ParallelCarParrinello - or short pcp - is the parallel implementation of | 
|---|
|  | 28 | * Car-Parrinello-Molecular-Dynamics, using a density field theory approach. In brief, | 
|---|
|  | 29 | * a given configuration is minimised in terms of the energy functional. The system | 
|---|
|  | 30 | * is encapsulated in a cell and thus periodic boundary conditions are applied. | 
|---|
|  | 31 | * The potential is also evaluated and this yields a force for every moving ion in | 
|---|
|  | 32 | * the cell. Thereby the molecular dynamics is performed. | 
|---|
|  | 33 | * | 
|---|
|  | 34 | * The basic idea behind the parallelisation is most importantly the simultaneous | 
|---|
|  | 35 | * fast fourier transformation, where the three-dimensional grid of coefficients in | 
|---|
|  | 36 | * a plane wave approach are subdivided into planes for the hither transformation and | 
|---|
|  | 37 | * pens for the thither one. These one or two dimensional ffts can be done by multiple | 
|---|
|  | 38 | * processes and later reassembled. | 
|---|
|  | 39 | * Second is the parallelisation of the minimisation. Each process holds a certain | 
|---|
|  | 40 | * "local" part of all electronic wave functions and the others are assumed constant | 
|---|
|  | 41 | * while minimising over these local ones by a conjugate gradient mechanism. | 
|---|
|  | 42 | * Lastly, the Gram-Schmidt-Orthonormalisation is also done parallely. | 
|---|
|  | 43 | * | 
|---|
|  | 44 | * This Introductory is divided in the following sections: | 
|---|
|  | 45 | * - \subpage intro Introduction | 
|---|
|  | 46 | * - \subpage tutorial Hands-on Tutorial | 
|---|
|  | 47 | * | 
|---|
|  | 48 | * © 2006 Frederik Heber | 
|---|
|  | 49 | */ | 
|---|
|  | 50 |  | 
|---|
|  | 51 | /*! \page intro Introduction | 
|---|
|  | 52 | * | 
|---|
|  | 53 | * | 
|---|
|  | 54 | * \section install Installation | 
|---|
|  | 55 | * | 
|---|
|  | 56 | * | 
|---|
|  | 57 | *  \subsection reqs Requirements | 
|---|
|  | 58 | * | 
|---|
|  | 59 | * This package relies on certain other packages. | 
|---|
|  | 60 | * - FFTW version 2\n | 
|---|
|  | 61 | *    This is a fast implementation of Fast Fourier Transformations. | 
|---|
|  | 62 | *    Currently, version 3 is not supported. Get it from their webpage, compile | 
|---|
|  | 63 | *    and install the libraries and header files in a path where the linker will | 
|---|
|  | 64 | *    find them.\n | 
|---|
|  | 65 | *    http://www.fftw.org/ | 
|---|
|  | 66 | * - MPICH\n | 
|---|
|  | 67 | *    This is an implementation of the Message Parsing Interface, which enables | 
|---|
|  | 68 | *    simple build of parallel programs by means of exchange data via messages. | 
|---|
|  | 69 | *    Here as well we need the libraries and header files, compile and install | 
|---|
|  | 70 | *    them in an accessible directory, and also the launcher mpirun.\n | 
|---|
|  | 71 | *    http://www-unix.mcs.anl.gov/mpi/mpich/ | 
|---|
|  | 72 | * - OpenDx\n | 
|---|
|  | 73 | *    For optional visualization of the calculations, displaying electronic densities, | 
|---|
|  | 74 | *    ion positions and forces within the cell.\n | 
|---|
|  | 75 | *    http://www.opendx.org/ | 
|---|
|  | 76 | * | 
|---|
|  | 77 | *  \subsection process Installation Process | 
|---|
|  | 78 | * | 
|---|
|  | 79 | * Note beforehand that this package manages compilation on various architectures with | 
|---|
|  | 80 | * help of some GNU tools such as automake (Makefile.am) respectively autconf (configure.ac). | 
|---|
|  | 81 | * | 
|---|
|  | 82 | * After unzipping the archive into an appropiate source directory, the programme has | 
|---|
|  | 83 | * to be compiled by executing consecutively: | 
|---|
|  | 84 | * - ./configure | 
|---|
|  | 85 | * - make | 
|---|
|  | 86 | * - make install | 
|---|
|  | 87 | * | 
|---|
|  | 88 | * It is recommended to create two additional directories:\n | 
|---|
|  | 89 | * One "build" directory which may safely reside in the main source folder, | 
|---|
|  | 90 | * wherein the actual build process is launched and thus temporarily created object | 
|---|
|  | 91 | * files removed from the main folder's contents. This happens automatically if | 
|---|
|  | 92 | * configure is not called from the main folder but within this build folder by: | 
|---|
|  | 93 | * - "../configure" | 
|---|
|  | 94 | * | 
|---|
|  | 95 | * Secondly, a "run" directory. This might either be an already present "/usr/local/bin" | 
|---|
|  | 96 | * which is taken as default, but also a "/home/foo/pcp/run". If desired this path | 
|---|
|  | 97 | * should be appended to the configure command as follows: | 
|---|
|  | 98 | * - "../configure --bindir /home/foo/pcp/run" | 
|---|
|  | 99 | * | 
|---|
|  | 100 | * and the executable will be automatically installed therein on calling make install. | 
|---|
|  | 101 | * | 
|---|
|  | 102 | * Alternatively, you may more generally pick "--prefix=/home/foo/pcp", which would prefix | 
|---|
|  | 103 | * all installation directories (such as doc, man, info, lib, ...) with the given directory | 
|---|
|  | 104 | * home/foo/pcp. Thus, the executable would now be installed upon evocation of 'make install' | 
|---|
|  | 105 | * in /home/foo/pcp/bin (because ./bin is default for executables). This and more options | 
|---|
|  | 106 | * are explained when invoking './configure --help'. The subdirectory where the executable is | 
|---|
|  | 107 | * installed in will from now on be called "bindir". | 
|---|
|  | 108 | * | 
|---|
|  | 109 | * Next, we must unpack the archive containing the pseudopotential files pseudopots.tar.gz by | 
|---|
|  | 110 | * entering 'tar -zxvf pseudopots.tar.gz' in the main dir of the package. Note that the | 
|---|
|  | 111 | * pseudopotential files now reside in 'defaults/pseudopot/' respectively to the main dir. | 
|---|
|  | 112 | * | 
|---|
|  | 113 | * At last, you may re-generate this very documentation of the pcp package with the help of the | 
|---|
|  | 114 | * 'doxygen' package (http://www.doxygen.org/) by execution of: | 
|---|
|  | 115 | * - make doxygen-doc (edit the paths in doxygen.cfg beforehand though!) | 
|---|
|  | 116 | * | 
|---|
|  | 117 | * Even more, if you are interested in the documentation of the simple molecuilder sub programme, | 
|---|
|  | 118 | * enter /home/foo/pcp/build/util/molecuilder and execute the same command therein which generates | 
|---|
|  | 119 | * the documentation in /home/foo/pcp/util/molecuilder/doc/html (and may be viewed by giving | 
|---|
|  | 120 | * /home/foo/pcp/util/molecuilder/doc/html/index.html to a standard web browser). | 
|---|
|  | 121 | * | 
|---|
|  | 122 | * \section use Usage | 
|---|
|  | 123 | * | 
|---|
|  | 124 | * Having unpacked, compiled and installed the programme, we are actually set to go | 
|---|
|  | 125 | * besides the specification of the actual physical problem. If you desire a ste-by-step | 
|---|
|  | 126 | * introduction to the handling of the simple hydrogen molecule, be referred to the section | 
|---|
|  | 127 | * \ref tutorial. | 
|---|
|  | 128 | * | 
|---|
|  | 129 | *  \subsection specify Specifying the Problem | 
|---|
|  | 130 | * | 
|---|
|  | 131 | *  There are two things necessary: | 
|---|
|  | 132 | *  - a configuration file with various informations from stop conditons to Z number | 
|---|
|  | 133 | *    or the basic cell vectors, giving its volume, number of ions, position and type | 
|---|
|  | 134 | *    and so on ... | 
|---|
|  | 135 | *    The given configuration file in the subdirectory "defaults" is fully documented | 
|---|
|  | 136 | *    and its sense should be easy to grasp thereby - or see below in the section | 
|---|
|  | 137 | *    \ref config. | 
|---|
|  | 138 | *  - pseudopotential files which reside in a certain directory specified in the | 
|---|
|  | 139 | *    config file. Ine is necessary for each different atom (discerned by atomic | 
|---|
|  | 140 | *    number Z) specifying the pseudo potential on a grid, generated by the fhi98PP | 
|---|
|  | 141 | *    package: http://www.fhi-berlin.mpg.de/th/fhi98md/fhi98PP/index.html\n | 
|---|
|  | 142 | *    These are also already present in the directory called "defaults/pseudopot". | 
|---|
|  | 143 | * | 
|---|
|  | 144 | *  \subsection exec Programme execution with mpirun ... | 
|---|
|  | 145 | * | 
|---|
|  | 146 | *  Basicially the programme is started as follows: | 
|---|
|  | 147 | *  - mpirun -np 1 -vvv -machinefile /home/foo/.rhosts pcp ../defaults/main_pcp\n | 
|---|
|  | 148 | *  where "-np 1" states the number of processors to use, so here just one, "-vvv" | 
|---|
|  | 149 | *  relates to the verbosity of the intermediate output (more v's, more output), | 
|---|
|  | 150 | *  "-machinefile" points to the rsh text file with allowed hosts (present on all | 
|---|
|  | 151 | *  hosts) and last but by no means least the config file with relative path | 
|---|
|  | 152 | *  "../defaults/main_pcp".\n | 
|---|
|  | 153 | *  Take care that your machinefile contains enough hosts and also that "-np" | 
|---|
|  | 154 | *  matches the product of ProcPEPsi and ProcPEGamma specified in the config file. | 
|---|
|  | 155 | *  You won't see the error messages, when one of clients hangs (mpirun just idles | 
|---|
|  | 156 | *  on all others as they cannot reach the host whose pcp has stopped). | 
|---|
|  | 157 | * | 
|---|
|  | 158 | *  The important input and output is all done via files. Configuration and PseudoPot | 
|---|
|  | 159 | *  files are read on startup when the Problem structure, with its various parts, is | 
|---|
|  | 160 | *  initialized. Each given amount of made steps forces, energies, ion positions and | 
|---|
|  | 161 | *  the electronic density if desired are written to file, which can be read and | 
|---|
|  | 162 | *  visualized lateron by the Open Data Explorer. | 
|---|
|  | 163 | * | 
|---|
|  | 164 | *  Let's now come to the parallel execution of the programme on multiple hosts. | 
|---|
|  | 165 | *  Generally, it is recommended to choose between n+1 and 2n processes for n hosts. | 
|---|
|  | 166 | *  There are two ways of connection to the other clients, either via rsh or ssh. | 
|---|
|  | 167 | *  Seemingly, mpich automatically picks the right one, first rsh, then ssh. | 
|---|
|  | 168 | * | 
|---|
|  | 169 | *    Note: | 
|---|
|  | 170 | *    -# Make sure that the executable with its needed input files are present on all | 
|---|
|  | 171 | *       other machines (Most common way is a shared directory, such as the user's home | 
|---|
|  | 172 | *       residing on a server shared via NFS to all clients) | 
|---|
|  | 173 | *    -# Make sure that the paths are correct and the same on all hosts (also the config paths) | 
|---|
|  | 174 | *    -# Use a homogeneous set of clients (no mixing of architectures or slow with fast | 
|---|
|  | 175 | *       systems!) | 
|---|
|  | 176 | * | 
|---|
|  | 177 | *    \subsubsection rsh ... with RSH | 
|---|
|  | 178 | * | 
|---|
|  | 179 | *    Most important in this case is the file .rhosts in your home, where line by line | 
|---|
|  | 180 | *    the hostnames of available computers are included. Via RSH-Login the programme is | 
|---|
|  | 181 | *    launched on all machines by mpirun.\n | 
|---|
|  | 182 | * | 
|---|
|  | 183 | *    \subsubsection ssh ... with SSH | 
|---|
|  | 184 | * | 
|---|
|  | 185 | *    Via a passphrase and an ssh agent a much safer - due to encryption - way is used | 
|---|
|  | 186 | *    to connect to other clients, employed as follows | 
|---|
|  | 187 | *    - ssh-keygen -t rsa (enter keyphrase, public and private key is stored in ~/.ssh/) | 
|---|
|  | 188 | *    - cat ~/.ssh/\*.pub >>~/.ssh/authorized_keys (if home is shared via nfs) OR \n | 
|---|
|  | 189 | *      cat ~/.ssh/\*.pub | ssh user\@remote-system 'cat>>.ssh/authorized_keys' (when the | 
|---|
|  | 190 | *      user home is not shared via nfs for each remote-system) | 
|---|
|  | 191 | * | 
|---|
|  | 192 | *    Afterwards, start an SSH agent and 'ssh-add' the above pass-phrase. It | 
|---|
|  | 193 | *    might be a good idea, to give a limited timespan 'ssh-add -t 86400' (seconds). | 
|---|
|  | 194 | * | 
|---|
|  | 195 | *    Test it by trying something like 'ssh host /bin/ls /bin'. It should work (just print | 
|---|
|  | 196 | *    the /bin directory) without any queries or halts. Otherwise remove host and its IP | 
|---|
|  | 197 | *    from .ssh/known_hosts and try again. Check this for all hosts in your .rhosts, | 
|---|
|  | 198 | *    or try 'tstmachines -machinefile=/home/foo/.rhosts' which is supplied with mpich. | 
|---|
|  | 199 | * | 
|---|
|  | 200 | *    The ssh method is especially preferrable if the hosts do not reside within a protected | 
|---|
|  | 201 | *    network, such as shielded by a firewall from the outside internet with internal IP | 
|---|
|  | 202 | *    adresses. | 
|---|
|  | 203 | * | 
|---|
|  | 204 | *  \subsection results The Results | 
|---|
|  | 205 | * | 
|---|
|  | 206 | *  Process 0 does all the output and is always started on your localhost. | 
|---|
|  | 207 | *  There a number of different output files if the programme is told to produce | 
|---|
|  | 208 | *  them in the configuration file. They all beginn with "pcp." and most end with | 
|---|
|  | 209 | *  the output step count in 4 digits such as ".0001". In between lies the actual | 
|---|
|  | 210 | *  content information: | 
|---|
|  | 211 | *  - "speed": Time measurements for the various sections of the calculations. | 
|---|
|  | 212 | *  - "density": There are three types: "doc" and "dx" specifiy the file format for | 
|---|
|  | 213 | *    OpenDx, while "data" contains the actual densities. These are only produced | 
|---|
|  | 214 | *    if DoOutVis is set to 1 | 
|---|
|  | 215 | *  - "ions": "pos" means position, "force" contains forces from the dynamics | 
|---|
|  | 216 | *    simulation represented by arrows in OpenDx, "datZ", "doc" and "dx" are again | 
|---|
|  | 217 | *    only needed to specifiy the file format. | 
|---|
|  | 218 | *  - "energy.all": Total energy | 
|---|
|  | 219 | *  - "forces.all": Total force | 
|---|
|  | 220 | * | 
|---|
|  | 221 | *  There are visualization scripts included in the subdirectory "defaults" such | 
|---|
|  | 222 | *  as "visual.cfg" for use with OpenDx. | 
|---|
|  | 223 | * | 
|---|
|  | 224 | * | 
|---|
|  | 225 | * \section start Getting started | 
|---|
|  | 226 | * | 
|---|
|  | 227 | *    In Order to get a first understanding of the programme, here is a brief explanation of its most | 
|---|
|  | 228 | *    important functions: | 
|---|
|  | 229 | *    -# Init(): Super-function to most of the initialisation subroutines, that allocate memory or | 
|---|
|  | 230 | *      setup the wave functions randomly or calculate first energies and densities. | 
|---|
|  | 231 | *    -# CalculateMD(): Contains the super-loop that calculate the force acting on the ions, moves them, | 
|---|
|  | 232 | *      updates the electronic wave functions and prints results to file. | 
|---|
|  | 233 | *    -# CalculateForce(): On evaluating the force acting on the ions also the actual minimisation of | 
|---|
|  | 234 | *      the wave functions - one after the other - is done, energies and densities calculated on ever | 
|---|
|  | 235 | *      finer grids. | 
|---|
|  | 236 | *    -# CalculateNewWave(): Is the actual (Fletcher&Reeves) conjugate gradient implementation that | 
|---|
|  | 237 | *      minimises one wave function by calculating the gradient, preconditioning the resulting matrix, | 
|---|
|  | 238 | *      retrieving the conjugate direction and doing the line search for the one-dimensional minimum. | 
|---|
|  | 239 | * | 
|---|
|  | 240 | *    Most of these functions call many others in sequence, so be prompted to their respective | 
|---|
|  | 241 | *    documentation for further explanation and formula. | 
|---|
|  | 242 | * | 
|---|
|  | 243 | *  \subsection follow Following the course of the programme | 
|---|
|  | 244 | * | 
|---|
|  | 245 | *    When the programme is launched with the option "-vvv" for greater verbosity a series of output | 
|---|
|  | 246 | *    is printed to the screen, which briefly shall be explained here. | 
|---|
|  | 247 | * | 
|---|
|  | 248 | *    At first there is the initialization output stating number of nodes on the ever more finely | 
|---|
|  | 249 | *    grids in normal and reciprocal space, density checks, parameters read from the configuration | 
|---|
|  | 250 | *    file and so on, until the minimisation finally commences. | 
|---|
|  | 251 | * | 
|---|
|  | 252 | *    The most prominent line then looks as follows: | 
|---|
|  | 253 | *    (0,0,0)S(0,0,0):        TE: -1.750005e+01       ATE: -1.825983e+01       Diff: 1.000000e+00      --- d: 6.634701e-01    dEdt0: -1.943969e+00    ddEddt0: 9.673485e-01 | 
|---|
|  | 254 | * | 
|---|
|  | 255 | *    The first three numbers refer to process numbers within MPI communicators: SpinUp/Double(0)SpinDown(1), | 
|---|
|  | 256 | *    number within coefficients sharing, within wave functions sharing group. | 
|---|
|  | 257 | *    The next three numbers after the "S" refer to: Number of performed minimisation steps, number of | 
|---|
|  | 258 | *    currently minimised local wave function, number of minimisation steps performed on this wave function. | 
|---|
|  | 259 | *    Afterwards follow: Total energy (TE), approximated total energy (ATE), relative error in the prediction (diff), | 
|---|
|  | 260 | *    delta (the one-dimensional line search parameter \f$cos \Theta\f$), the first (dEdt0) and second (ddEddt0) | 
|---|
|  | 261 | *    derivative of the total energy functional (at parameter = 0). | 
|---|
|  | 262 | * | 
|---|
|  | 263 | *    Ever and again there appear checks such as: | 
|---|
|  | 264 | *    -# "ARelTE: 1.387160e-02    ARelKE: 3.074929e-03", stating the actual relative difference in total and | 
|---|
|  | 265 | *       kinetic energy between this and the last check | 
|---|
|  | 266 | *    -# "(0)TestGramSchm: Ok !", having tested the desired orthogonality of the wave functions | 
|---|
|  | 267 | *    -# "Beginning minimisation...", starting a perturbed minimisation run. | 
|---|
|  | 268 | * | 
|---|
|  | 269 | *    At the end the programme will print a number of different energies to the screen: total, Hartree, | 
|---|
|  | 270 | *    ionic, kinetic, ... energies. | 
|---|
|  | 271 | * | 
|---|
|  | 272 | * | 
|---|
|  | 273 | * \section tech Technical Details | 
|---|
|  | 274 | * | 
|---|
|  | 275 | *  \subsection config The Configuration file explained | 
|---|
|  | 276 | * | 
|---|
|  | 277 | *  The first column always contains a keyword (for which the programme parses, so | 
|---|
|  | 278 | *  the ordering as given here needn't be replicated) following one or more values | 
|---|
|  | 279 | *  or strings. | 
|---|
|  | 280 | * | 
|---|
|  | 281 | *  - mainname: The short name of the Programme | 
|---|
|  | 282 | *  - defaultpath: Full path indicating where to put the output files during runtime | 
|---|
|  | 283 | *  - pseudopotpath: Full path to the needed PseudoPot files | 
|---|
|  | 284 | *  - ProcPEPsi: Number of process (groups consisting of ProcPEPsi processes) among which | 
|---|
|  | 285 | *    wave functions are split up (e.g. MaxPsiDouble 16, ProcPEPsi 4, ProcPEGamma 2 results | 
|---|
|  | 286 | *    in 4 wave functions per group and each group has 2 processes sharing coefficients) | 
|---|
|  | 287 | *  - ProcPEGamma: Number of processes which make up one wave functions coefficients sharing group | 
|---|
|  | 288 | *  - DoOutVis: Output files for OpenDX (1 - yes, 0 - no) | 
|---|
|  | 289 | *  - DoOutMes: Output Measurement (1 - yes, 0 - no) | 
|---|
|  | 290 | *  - AddGramSch: Do an additional GramSchmidt-run after a Psi has been minimised (hack) | 
|---|
|  | 291 | *  - Seed: Initialization value for pseudo random number generator on generating wave function coefficients, 0 means | 
|---|
|  | 292 | *    constant 1/V for perturbed wave functions. | 
|---|
|  | 293 | *  - UseSeparation: Minimise unoccupied states in a separate run per level indepedent of occupied states | 
|---|
|  | 294 | *  - UseFermiOccupation: After each minimisation run, occupation of orbitals will be reset according to Fermi distribution | 
|---|
|  | 295 | *  - MaxOuterStep: Number of to be performed Molecular Dynamics steps | 
|---|
|  | 296 | *  - Deltat: time slice per MD step | 
|---|
|  | 297 | *  - OutVisStep: Output visual data at every ..th step | 
|---|
|  | 298 | *  - OutSrcStep: Output measurement data at every ..th step | 
|---|
|  | 299 | *  - TargetTemp: Target temperature in the cell (this medium ion velocity) | 
|---|
|  | 300 | *  - ScaleTempStep: MD temperature is scaled at every ..th step | 
|---|
|  | 301 | *  - MaxPsiStep: number of minimisation steps per state(!) before going on to next local state (0 - default (3)) | 
|---|
|  | 302 | *  - MaxMinStep: standard level stop criteria - maximum number of minimisation steps over all states | 
|---|
|  | 303 | *  - RelEpsTotalE: standard level stop criteria - minimum relative change in total energy | 
|---|
|  | 304 | *  - RelEpsKineticE: standard level stop criteria - minimum relative change in kinetic energy | 
|---|
|  | 305 | *  - MaxMinStopStep: standard level stop condition is evaluated at every ..th step, should scale with orbital number | 
|---|
|  | 306 | *  - MaxMinGapStopStep: standard level stop condition for gap calculation is evaluated at every ..th step | 
|---|
|  | 307 | *  - MaxInitMinStep: initial level stop criteria - maximum number of minimisation steps over all states | 
|---|
|  | 308 | *  - InitRelEpsTotalE: initial level stop criteria - minimum relative change in total energy | 
|---|
|  | 309 | *  - InitRelEpsKineticE: initial level stop criteria - minimum relative change in kinetic energy | 
|---|
|  | 310 | *  - InitMaxMinStopStep: initial level stop condition is evaluated at every ..th step | 
|---|
|  | 311 | *  - InitMaxMinGapStopStep: initial level stop condition is evaluated at every ..th step | 
|---|
|  | 312 | *  - BoxLength: Length of the super cell in atomic units (lower triangle matrix: 1 value, 2 values, 3 values) | 
|---|
|  | 313 | *  - ECut: Energy cutoff specifying how man plane waves are used (in Hartrees) | 
|---|
|  | 314 | *  - MaxLevel: number of different mesh levels in the code, >=2 | 
|---|
|  | 315 | *  - Level0Factor: Mesh points Factor for the topmost level | 
|---|
|  | 316 | *  - RiemannTensor: Use RiemannTensor | 
|---|
|  | 317 | *  - PsiType: Whether orbits are always doubly occupied or spin up and down is discerned (and completely separated in calculation, needs at least 2 processes!) | 
|---|
|  | 318 | *  - MaxPsiDouble: specifying how many doubly occupied orbits there are (either this or next depends on given PsiType) | 
|---|
|  | 319 | *  - PsiMaxNoUp/PsiMaxNoDown: specifying how many spin up or down occupied orbits there are | 
|---|
|  | 320 | *  - RCut: Radial cutoff in the Ewald summation | 
|---|
|  | 321 | *  - IsAngstroem: length in 0 - Bohr radii or in 1- Angstrᅵm | 
|---|
|  | 322 | *  - MaxTypes: maximum number of different ion types (important for next constructs) | 
|---|
|  | 323 | *  - Ion_TypeNr: 6 values - Number of ions, atomic value, Gauss radius, maximum angular momentum of Pseudopotenials, L_loc and mass of ion (Nr in keyword must go from 1..MaxTypes!) | 
|---|
|  | 324 | *  - Ion_TypeNr_Nr: 4 values - position (x,y,z) in atomic units, IonMoveType (0 - movable, 1 - fixed).For each stated IonType there must such a keywor for each ion (second number goes from 1..Number of ions!) | 
|---|
|  | 325 | * | 
|---|
|  | 326 | * \section notes Notes on implementation and code understanding | 
|---|
|  | 327 | * | 
|---|
|  | 328 | *  \subsection densities Densities and Wave functions | 
|---|
|  | 329 | * | 
|---|
|  | 330 | *    The coefficients of densities and wave functions both in real and reciprocal space are only in parts stored locally | 
|---|
|  | 331 | *    on the process. Coefficients of one wave functions may be shared under a number of processes, increasing speed when | 
|---|
|  | 332 | *    doing loops over G - reciprocal vector, and the wave functions as a whole may also be shared, increasing Fourier transform | 
|---|
|  | 333 | *    and also minimization - however with the drawback of parallel conjugate gradient minimization which might lead to errors. | 
|---|
|  | 334 | * | 
|---|
|  | 335 | *    Additionally, the coefficients are structured in levels. The grid is present in ever finer spacings, which is very helpful | 
|---|
|  | 336 | *    in the initial minimization after the wave functions have been randomly initialized. Also, the density is always calculated | 
|---|
|  | 337 | *    on a grid twice as fine as the one containing wave function coefficients, as the fourier transform is then exact. That's | 
|---|
|  | 338 | *    why there are always two levels: LatticeLevel LevS and Lev0. The first being the standard level with the wave functions, | 
|---|
|  | 339 | *    the latter for densities (one often stumbles over "Dens0" as a variable name). Also there two kinds of densities: | 
|---|
|  | 340 | *    DensityArray and DensityCArray, the first contains densities in real space, the latter in reciprocal space. However, both | 
|---|
|  | 341 | *    are densities, hence the scalar product of their respective wave functions. | 
|---|
|  | 342 | *    For this reason one finds often construct which use a position factor in order to transform wave coefficients into ones | 
|---|
|  | 343 | *    on a higher level, followed by a fourier transform and a transposition, see e.g. CalculateOneDensityR(). It is often | 
|---|
|  | 344 | *    encountered throughout the code. | 
|---|
|  | 345 | * | 
|---|
|  | 346 | * | 
|---|
|  | 347 | * © 2006 Frederik Heber | 
|---|
|  | 348 | */ | 
|---|
|  | 349 |  | 
|---|
|  | 350 |  | 
|---|
|  | 351 | /*! \page tutorial Hands-on Tutorial | 
|---|
|  | 352 | * | 
|---|
|  | 353 | * | 
|---|
|  | 354 | *  In this section we present a step-by-step tutorial, which leads you from the creation of a typical config file for a hydrogen | 
|---|
|  | 355 | *  molecule through the actual calcualtion to the analysis with final plots and graphics. | 
|---|
|  | 356 | * | 
|---|
|  | 357 | *  \section configfile Creating the config file | 
|---|
|  | 358 | * | 
|---|
|  | 359 | *    First, we need to create the config file. For this we use the utility 'molecuilder' which resides in the util subdirectory | 
|---|
|  | 360 | *    and is compiled upon execution of 'make' as described in Section \ref process and finally installed in the bindir. Go there, | 
|---|
|  | 361 | *    pick and create a directory, such as '/tmp/H2/' and start the utility: | 
|---|
|  | 362 | *    - ./molecuilder /tmp/H2/H2-config | 
|---|
|  | 363 | * | 
|---|
|  | 364 | *    You might want to change the config file name, here chosen as 'H2-config', according to your personal liking, it is completely | 
|---|
|  | 365 | *    arbitrary. It is however recommended to pick an individual directory for each molecule upon which you choose to perform DFT | 
|---|
|  | 366 | *    calculations as there are a number of files which are produced during the run. | 
|---|
|  | 367 | * | 
|---|
|  | 368 | *    Molecuilder will first ask for a cell size, which is entered by typing in the six elements of a symmetric matrix, for an cubic | 
|---|
|  | 369 | *    cell of 20 a.u. length, enter 20, 0, 20, 0, 0, 20. | 
|---|
|  | 370 | * | 
|---|
|  | 371 | *    Next, you see two lists, both are empty, and a menu. So far no elements or atoms have been put into the cell. We want to add an | 
|---|
|  | 372 | *    atom, press 'a' and enter. Now we have to choose in which way we want to add the atom. The choice is either in absolute (x,y,z) | 
|---|
|  | 373 | *    coordinates, or relative to either another absolute point or to an already placed atom. The last option is used in the case when | 
|---|
|  | 374 | *    there are two atoms and you want to add a third according to its angle and distance to the former two. | 
|---|
|  | 375 | * | 
|---|
|  | 376 | *    For now, we add the first hyodrogen atom. We want to put the molecule which consists of two hydrogen atoms at a distance of 1.4 | 
|---|
|  | 377 | *    atomic units symmetrically off the cell's centre. Thus we choose option b, enter 10, 10, 10 as the reference point and then give the | 
|---|
|  | 378 | *    relative coordinates as 0.7, 0, 0. Thus, the molecule will be aligned along the x axis. The Z number is 1, in the case of hydrogen. | 
|---|
|  | 379 | *    Upon the last enter you notice that both the element and the atom list have filled a bit. Let's add the second atom. Choose 'a' | 
|---|
|  | 380 | *    again and option c this time, picking the first already entered atom as the reference point, by entering 0 ('number in molecule') | 
|---|
|  | 381 | *    next. Enter: -1.4, 0 , 0, and again Z = 1. | 
|---|
|  | 382 | *    Afterwards, we see that in the element list, the second entry changed from 1 to 2. It's the number of atoms of this element which | 
|---|
|  | 383 | *    the molecules consists of so far. Next is Z number, R-cut, lmax and lloc, the element's mass and a name and short hand which is | 
|---|
|  | 384 | *    taken from the database in the file 'elements.db' which must reside in the same directory as molecuilder. | 
|---|
|  | 385 | * | 
|---|
|  | 386 | *    Finally, we need to specify some additional info in the config file that's non-standard. Enter 'e' in the menu to edit the | 
|---|
|  | 387 | *    current configuration. The list is huge, you might have to scroll a bit and don't panic, most of these are trivial and will | 
|---|
|  | 388 | *    become comprehensible with time. First is the paths, option 'B' and 'C'. Upon entering the option, it will give the the | 
|---|
|  | 389 | *    "old" value and request a new one. Enter: | 
|---|
|  | 390 | *    - B: /tmp/H2/ | 
|---|
|  | 391 | *    - C: /home/foo/pcp/defaults/pseudopot | 
|---|
|  | 392 | * | 
|---|
|  | 393 | *    The latter one is the path to the pseudopotential files which reside in the archive 'pseudopots.tar.gz' in /home/foo/pcp or | 
|---|
|  | 394 | *    whereever you have extracted the distributed pcp package to. Unzip it by 'tar -zxvf pseudopots.tar.gz' and it will create an | 
|---|
|  | 395 | *    additional directory called defaults wherein another directory called pseudopot resides containing the files for element with | 
|---|
|  | 396 | *    atomic number 1 up to 80 -- these are the files generated with the FHIMD PseudoPotential Generator. | 
|---|
|  | 397 | * | 
|---|
|  | 398 | *    As the molecule and all necessary config file settings are entered, enter 's' to save the config file under the filename stated | 
|---|
|  | 399 | *    on the command-line and 'q' to quit. | 
|---|
|  | 400 | * | 
|---|
|  | 401 | * | 
|---|
|  | 402 | *    Now, we take a look at the created config file itself. Take your favorite editor and compare it to the explanations given in the | 
|---|
|  | 403 | *    secion \ref config. According to your machine setup, the following entries must be changed: | 
|---|
|  | 404 | *    - ProcPEPGamma/ProcPEPsi: Their product gives number of processes which must be started. As a beginning you might leave 1 here for | 
|---|
|  | 405 | *      both entries. | 
|---|
|  | 406 | *    - VectorPlane: Enter 1 (it's a cut plane for a two-dimensional representation of the current density) | 
|---|
|  | 407 | *    - VectorCut: Enter 10. (cut is done at this coordinate along the by VectorPlane given axis) | 
|---|
|  | 408 | *    - BoxLength: Check the six entries of the symmetric matrix for the correct cell size | 
|---|
|  | 409 | *    - ECut: In general, both the precision and time needed for the calculation. Reasonable values for H2 are 8. to 24. | 
|---|
|  | 410 | *    - MaxPsiDouble: Enter 1 here, it's the number of doubly occupied orbitals which is one in case of the hydrogen molecule | 
|---|
|  | 411 | *    - AddPsis: Leave 0, this is the number of unoccupied orbitals which we do not need. | 
|---|
|  | 412 | *    - RCut: Enter 20., the cell size. It's another numerical cutoff here for the ewald summation of the infinite coulomb potential. | 
|---|
|  | 413 | * | 
|---|
|  | 414 | *    This sums it up. You might also have noticed three entries, one for the element and another two for each hydrogen atom, very similar | 
|---|
|  | 415 | *    in appearance to the list given by molecuilder. Compare to the explanation given in the config file section \ref config above. | 
|---|
|  | 416 | *    Of course, the above shown changes could also have been made during the edit menu of molecuilder, we just wanted to demonstrate that | 
|---|
|  | 417 | *    it's perfectly safe to use a text editor as well (any more than one white space such as some more spaces and/or tabs are | 
|---|
|  | 418 | *    automatically read over during the parsing of the config file). | 
|---|
|  | 419 | * | 
|---|
|  | 420 | *    We may now start a calculation. | 
|---|
|  | 421 | * | 
|---|
|  | 422 | *  \section launching Performing the calculation | 
|---|
|  | 423 | * | 
|---|
|  | 424 | *    Enter the directory which you specified as the 'bindir' where the executable shall be installed in, let's say for this example | 
|---|
|  | 425 | *    it's '/tmp/pcp/run'. Now, if you have the mpi package installed and are able to launch mpirun, enter this: | 
|---|
|  | 426 | * | 
|---|
|  | 427 | *    - mpirun -np 1 ./pcp -vvv -w /tmp/H2/H2-config | 
|---|
|  | 428 | * | 
|---|
|  | 429 | *    Here, '1' is the number of processes needed, the product of ProcPEPGamma and ProcPEPsi. If not, enter this: | 
|---|
|  | 430 | * | 
|---|
|  | 431 | *    - ./pcp -vvv -w /tmp/H2/H2-config | 
|---|
|  | 432 | * | 
|---|
|  | 433 | *    More comfortably we may also take the unix shell script 'run', which uses mpirun and evaluates the product by itself. However, | 
|---|
|  | 434 | *    as it is platform-dependent and thus it is provided just as an idea-giver. It is installed in the bindir from subdirectory 'run' | 
|---|
|  | 435 | *    during 'make install'. However, its executable permissions must still be set: e.g. 'chmod 755 run' (The same applies for all | 
|---|
|  | 436 | *    other 'shell scripts' used in this tutorial). Also, the common paths are set in the file 'suffix', also in the bindir and must | 
|---|
|  | 437 | *    be adapted to your local context.  Basically, there is always the rather uncomfortable way of doing every by hand, which is | 
|---|
|  | 438 | *    explained as well, or use these scripts if you can make them work. | 
|---|
|  | 439 | * | 
|---|
|  | 440 | *    - Enter: ./run to see it's options | 
|---|
|  | 441 | *    - Enter: ./run -vvv /tmp/H2/H2-config H2 to run (here a log file pcp.H2.log is written, with the tag 'H2' which may be chosen arbitrarily). | 
|---|
|  | 442 | * | 
|---|
|  | 443 | *    Possible errors here are as follows: | 
|---|
|  | 444 | *    - paths in 'suffix' are not changed or were overwritten with defaults on make 'install' | 
|---|
|  | 445 | *    - permission denied: 'chmod 755 run' missing (necessary after 'make install') | 
|---|
|  | 446 | *    - pseudopots not unpacked or wrong path given in config file | 
|---|
|  | 447 | *    - charged system: MaxPsiDouble (..SpinUp/Down) does not match required number of orbitals of the system | 
|---|
|  | 448 | * | 
|---|
|  | 449 | *    Given 8 (hartree) as ECut and a moderately well performing computer numbers should rush by for a few seconds. Afterwards, the calculations | 
|---|
|  | 450 | *    has already finished (this is of course only the case for a simple molecule such as the hydrogen one). The last hundred lines or so | 
|---|
|  | 451 | *    consist of calculated values, such as the susceptibility tensor and among others the isometric values, shielding for each hydrogen atom, | 
|---|
|  | 452 | *    the total energy (though beware, due to the pseudopotential ansatz for a general molecule it is not equivalent to the real absolute binding | 
|---|
|  | 453 | *    energy) and the perturbed energy which are of minor interest. | 
|---|
|  | 454 | * | 
|---|
|  | 455 | *    A number of file where generated in /tmp/H2/, some general ones are listed in sections above, we want to discuss a chosen few here: | 
|---|
|  | 456 | * | 
|---|
|  | 457 | *    - LOG: pcp.H2.log contains all the lines which you just might have missed before (only if pcp was started with 'run'-script) | 
|---|
|  | 458 | *    - Cut plane: pcp.current....csv contains (for each process of ProcPEPGamma) it's part of the specified cut plane of the current density. | 
|---|
|  | 459 | *    - Susceptibility: pcp.chi.csv contains first an explanatory line and then one with ten entries, first is the ecut, then follow the nine entries of | 
|---|
|  | 460 | *      the rank 2 tensor. | 
|---|
|  | 461 | *    - shielding: pcp.sigma_i0_H.csv and ...i1... respectively contain the same as pcp.chi.csv only for the shielding tensor. | 
|---|
|  | 462 | * | 
|---|
|  | 463 | *    And this is it. Of course, \f$H_2\f$ is a very simple molecule. Regard a case where you might want to calculate a (4,3) nanotube with | 
|---|
|  | 464 | *    hundreds of symmetrically aligned carbon atoms. Each of these will produce a pcp.sigma_i..csv, though all should be the same in the | 
|---|
|  | 465 | *    end. Also, we would like to have convergence plots and vector graphs of the current density and its values. | 
|---|
|  | 466 | * | 
|---|
|  | 467 | *    In order to produce convergence plots, we need to perform the calculations for various cutoff energies (ECut). You may either move all | 
|---|
|  | 468 | *    the files in /tmp/H2 to e.g. /tmp/H2/8Ht/, edit the config file, pick 12Ht now as ECut and re-run the calculations, move the results | 
|---|
|  | 469 | *    and so on for four to six different values. An easier way is the script file 'test_for_ecut.sh'. Delete all pcp.*-files in /tmp/H2/ | 
|---|
|  | 470 | *    (Thus it is always good practice not to pick pcp as prefix for the config file!) and enter in the bindir subdirectory: | 
|---|
|  | 471 | * | 
|---|
|  | 472 | *    - chmod 755 test_for_ecut (as it is platform-dependent as well, otherwise you get an error: permission denied) | 
|---|
|  | 473 | *    - ./test_for_ecut.sh to see it complaining the lack of a config file | 
|---|
|  | 474 | *    - ./test_for_ecut.sh /tmp/H2/H2-config to see its desired options and the current ecut list (it's a string in the script file adapted to | 
|---|
|  | 475 | *      4 processes sharing the coefficients) | 
|---|
|  | 476 | *    - ./test_for_ecut.sh /tmp/H2/H2-config H2 24 to launch it. | 
|---|
|  | 477 | * | 
|---|
|  | 478 | *    Notice that it produces no ouput as does pcp. The lines are redirected into the log file whose tag you specified as 'H2'. As we want to | 
|---|
|  | 479 | *    check on what it's doing, we ought to have started it using 'nohup ./test_for...... 24 &' to launch it as a child process. Now, | 
|---|
|  | 480 | *    we need to press CTRL-Z to stop it and enter 'bg' to background it. Look into /tmp/H2. Notice various new subdirectories | 
|---|
|  | 481 | *    resembling the specified ecuts. Enter 'tail -f /tmp/H2/3Ht/pcp.H2.log' to see that the first calculation is already finished. | 
|---|
|  | 482 | *    Continue with 12Ht and furtheron if you are impatient. Otherwise, wait for the shell script to terminate. Now, we have multiple | 
|---|
|  | 483 | *    points for a convergence graph to evaluate. Thus, onward to the analysis. | 
|---|
|  | 484 | * | 
|---|
|  | 485 | * | 
|---|
|  | 486 | *  \section analysis Analyzing the results | 
|---|
|  | 487 | * | 
|---|
|  | 488 | *    First, let's have a look at the cut plane and isospheres of the current density. | 
|---|
|  | 489 | * | 
|---|
|  | 490 | *    - start gnuplot, enter "plot 'pcp.current_proc0.csv' with vectors" in one of the various cutoff directories. Notice the rotating vector | 
|---|
|  | 491 | *      field which the magnetic field induced in between the two hydrogen atoms. | 
|---|
|  | 492 | *    - start opendx by entering dx, load Programm visual.net which resides in the defaults subdirectory of the archive. It may take a moment | 
|---|
|  | 493 | *      the more complex the grid (the higher the energy cutoff) is and will then render three-dimensional isospheres of the current density | 
|---|
|  | 494 | *      on the screen. You may rotate them and 'Sequence Control' let's you see each the current density of of the three magnetic field | 
|---|
|  | 495 | *      directions. | 
|---|
|  | 496 | * | 
|---|
|  | 497 | *    Now onward to the real analysis. | 
|---|
|  | 498 | *    Here again, we use some shell scripts which gather the results from the various calculations into one file (as you may guess, they pick | 
|---|
|  | 499 | *    the last lines from each dir from the .csv-files and gather them in one file with the same name in the main config directory. Enter in | 
|---|
|  | 500 | *    the bindir (don't forget to chmod): | 
|---|
|  | 501 | * | 
|---|
|  | 502 | *    - ./gather_all_results /tmp/ H2, where the first argument gives the absolute directory and H2 the molecules subdirectory. | 
|---|
|  | 503 | * | 
|---|
|  | 504 | *    It will print some stuff and finish. If you have gnuplot installed, some plots may briefly appear as a flicker on the screen. They | 
|---|
|  | 505 | *    created as a postscript in one file called H2.ps. Its purpose is purely to quickly check on the general convergence of each value and | 
|---|
|  | 506 | *    to pinpoint mavericks. Afterwards, you will find a number of results-...-files in the main config directory /tmp/H2. They contain | 
|---|
|  | 507 | *    the gathered chi and sigmas for the various energy cutoffs that were calculated. They are used in the creation of H2.ps. It is | 
|---|
|  | 508 | *    easy to create your own convergence analysis from these. Note also that in 'pcp.convergence.csv' you find a lot of variables, each | 
|---|
|  | 509 | *    run more ore less adds a new line. Thus, this file might be especially useful to check convergence of lets-say sigma against | 
|---|
|  | 510 | *    ecut or against the lattice constant (if you performed different runs with varyzing constants). Basically, all .csv-Files are | 
|---|
|  | 511 | *    results (the suffix may be changed in bindir/suffix though). | 
|---|
|  | 512 | * | 
|---|
|  | 513 | *    That completes the magnetic analysis of the hydrogen molecule. | 
|---|
|  | 514 | * | 
|---|
|  | 515 | *    But there are more complex molecules and there we want to delve more deeply into the topic of parallel computing. | 
|---|
|  | 516 | * | 
|---|
|  | 517 | * © 2006 Frederik Heber | 
|---|
|  | 518 | */ | 
|---|
|  | 519 |  | 
|---|
|  | 520 | #include<signal.h> | 
|---|
|  | 521 | #include<stdlib.h> | 
|---|
|  | 522 | #include<stdio.h> | 
|---|
|  | 523 | #include<math.h> | 
|---|
|  | 524 | #include"mpi.h" | 
|---|
|  | 525 | #include"data.h" | 
|---|
|  | 526 | #include"errors.h" | 
|---|
|  | 527 | #include"helpers.h" | 
|---|
|  | 528 | #include"init.h" | 
|---|
|  | 529 | #include"pcp.h" | 
|---|
|  | 530 | #include"opt.h" | 
|---|
|  | 531 | #include"myfft.h" | 
|---|
|  | 532 | #include"gramsch.h" | 
|---|
|  | 533 | #include"output.h" | 
|---|
|  | 534 | #include "run.h" | 
|---|
|  | 535 | #include "pcp.h" | 
|---|
|  | 536 |  | 
|---|
|  | 537 | #define MYSTOPSIGNAL SIGALRM    //!< external signal which gracefully stops calculations | 
|---|
|  | 538 |  | 
|---|
|  | 539 | static void InitSignals(void); | 
|---|
|  | 540 |  | 
|---|
|  | 541 | void (*default_sighandler)(int);        //!< deals with external signal, set to \ref SigHandlerCPULIM() | 
|---|
|  | 542 | volatile sig_atomic_t cpulim = 0;       //!< contains external signal | 
|---|
|  | 543 | int par_cpulim = 0;                             //!< for all MPI nodes: 0 = continue, 1 = limit reached, end operation | 
|---|
|  | 544 | int myrank = 0;                                 //!< Rank among the multiple processes | 
|---|
|  | 545 |  | 
|---|
|  | 546 | /** Handles external Signal CPULimit. | 
|---|
|  | 547 | * \ref cpulim is set to 1 as a flag | 
|---|
|  | 548 | * especially if time limit is exceeded. | 
|---|
|  | 549 | * \param sig contains signal code which is printed to screen | 
|---|
|  | 550 | */ | 
|---|
|  | 551 | static void SigHandlerCPULIM(int sig) { | 
|---|
|  | 552 | if(myrank == 0) | 
|---|
|  | 553 | fprintf(stderr,"process %i received signal %d\n", myrank, sig); | 
|---|
|  | 554 | cpulim = 1; | 
|---|
|  | 555 | return; | 
|---|
|  | 556 | } | 
|---|
|  | 557 |  | 
|---|
|  | 558 | /** Checks if CPU limit is reached. | 
|---|
|  | 559 | * is supposed to be called ever and again, checking for \ref cpulim, | 
|---|
|  | 560 | * then \ref cpulim is broadcasted to all MPI nodes preventing deadlock | 
|---|
|  | 561 | \note On reaching time limit: save all in datafile, close everything | 
|---|
|  | 562 | * \param *P \ref Problem at hand | 
|---|
|  | 563 | * \return is equal to cpulim | 
|---|
|  | 564 | */ | 
|---|
|  | 565 | int CheckCPULIM(struct Problem *P) { | 
|---|
|  | 566 | /* Bei Zeitlimit: evtl DataFile ausgeben, alles abschliessen */ | 
|---|
|  | 567 | par_cpulim = cpulim; | 
|---|
|  | 568 | MPI_Bcast(&par_cpulim, 1, MPI_INT, 0, MPI_COMM_WORLD); | 
|---|
|  | 569 | if (!par_cpulim) return 0; | 
|---|
|  | 570 | if (myrank == 0) fprintf(stderr,"cpulim reached, finish data\n"); | 
|---|
|  | 571 | return 1; | 
|---|
|  | 572 | /* Ende einleiten ! */ | 
|---|
|  | 573 | } | 
|---|
|  | 574 |  | 
|---|
|  | 575 | /** Initializes signal handling. | 
|---|
|  | 576 | * Sets \ref myrank to 0 and installs \ref SigHandlerCPULIM | 
|---|
|  | 577 | */ | 
|---|
|  | 578 | static void InitSignals(void) { | 
|---|
|  | 579 | myrank = 0/*P->Par.me*/; | 
|---|
|  | 580 | default_sighandler = signal(MYSTOPSIGNAL, SigHandlerCPULIM); | 
|---|
|  | 581 | } | 
|---|
|  | 582 |  | 
|---|
|  | 583 | /** Checks additionally for signal. | 
|---|
|  | 584 | * If signal has been processed, it gets re-processed by default handler | 
|---|
|  | 585 | * in order to return correct status to calling script | 
|---|
|  | 586 | */ | 
|---|
|  | 587 | static void CheckSigReturn(void) { | 
|---|
|  | 588 | signal(MYSTOPSIGNAL, default_sighandler); | 
|---|
|  | 589 | if(cpulim) | 
|---|
|  | 590 | raise(MYSTOPSIGNAL); | 
|---|
|  | 591 | } | 
|---|
|  | 592 |  | 
|---|
|  | 593 | #if 0 | 
|---|
|  | 594 | #define testing 1 | 
|---|
|  | 595 | /** FFT Test. | 
|---|
|  | 596 | * perfoms some tests on fft | 
|---|
|  | 597 | * \param *P \ref Problem at hand | 
|---|
|  | 598 | */ | 
|---|
|  | 599 | static double fftTest(struct Problem *P) | 
|---|
|  | 600 | { | 
|---|
|  | 601 | int i,d,ii,ix,iy,iz; | 
|---|
|  | 602 | double MesTime1, MesTime2; | 
|---|
|  | 603 | const struct Lattice *Lat = &P->Lat; | 
|---|
|  | 604 | struct LatticeLevel *Lev = &Lat->Lev[STANDARTLEVEL]; | 
|---|
|  | 605 | struct  fft_plan_3d *plan = Lat->plan; | 
|---|
|  | 606 | struct Density *Dens = Lev->Dens; | 
|---|
|  | 607 | fftw_complex *destC = Dens->DensityCArray[TotalLocalDensity]; | 
|---|
|  | 608 | fftw_real *destCR = (fftw_real *)destC; | 
|---|
|  | 609 | fftw_real *destCR2 = Dens->DensityArray[TotalLocalDensity]; | 
|---|
|  | 610 | fftw_complex *work = (fftw_complex *)Dens->DensityCArray[TempDensity]; | 
|---|
|  | 611 | const double factor = 1./Lev->MaxN; | 
|---|
|  | 612 | /* srand(1); */ | 
|---|
|  | 613 | for (d=0; d < (P->Par.me_comm_ST_Psi+P->Par.Max_me_comm_ST_Psi*P->Par.me_comm_ST_PsiT); d++) | 
|---|
|  | 614 | for (i=0; i<Dens->LocalSizeR; i++) | 
|---|
|  | 615 | rand(); | 
|---|
|  | 616 | for (i=0; i<Dens->LocalSizeR; i++) { | 
|---|
|  | 617 | destCR[i] = 1.-2.*(rand()/(double)RAND_MAX); | 
|---|
|  | 618 | destCR2[i] = destCR[i]; | 
|---|
|  | 619 | } | 
|---|
|  | 620 | WaitForOtherProcs(P,0); | 
|---|
|  | 621 | MesTime1 = GetTime(); | 
|---|
|  | 622 | fft_3d_real_to_complex(plan, STANDARTLEVEL, FFTNF1, destC, work); | 
|---|
|  | 623 | for (i=0; i<Dens->LocalSizeC; i++) { | 
|---|
|  | 624 | destC[i].re *= factor; | 
|---|
|  | 625 | destC[i].im *= factor; | 
|---|
|  | 626 | } | 
|---|
|  | 627 | fft_3d_complex_to_real(plan, STANDARTLEVEL, FFTNF1, destC, work); | 
|---|
|  | 628 | MesTime2 = GetTime(); | 
|---|
|  | 629 | for (i=0; i<Dens->LocalSizeR; i++) { | 
|---|
|  | 630 | if (fabs(destCR2[i] - destCR[i]) >= MYEPSILON) { | 
|---|
|  | 631 | iz = i % Lev->N[2]; | 
|---|
|  | 632 | ii = i / Lev->N[2]; | 
|---|
|  | 633 | iy = ii % Lev->N[1]; | 
|---|
|  | 634 | ix = ii / Lev->N[1]; | 
|---|
|  | 635 | fprintf(stderr,"FFT(%i)(%i,%i) =(%i)(%i,%i,%i) %g = |%g - %g| eps(%g)\n",P->Par.my_color_comm_ST,P->Par.my_color_comm_ST_Psi, P->Par.me_comm_ST_Psi, i, ix, iy, iz, fabs(destCR2[i] - destCR[i]), destCR2[i], destCR[i], MYEPSILON); | 
|---|
|  | 636 | } | 
|---|
|  | 637 | } | 
|---|
|  | 638 | return(MesTime2-MesTime1); | 
|---|
|  | 639 | } | 
|---|
|  | 640 |  | 
|---|
|  | 641 | /** GS Test. | 
|---|
|  | 642 | * performs test on Gram-Schmidt | 
|---|
|  | 643 | * \param *P \ref Problem at hand | 
|---|
|  | 644 | */ | 
|---|
|  | 645 | static double GSTest(struct Problem *P) | 
|---|
|  | 646 | { | 
|---|
|  | 647 | struct LatticeLevel *Lev = &P->Lat.Lev[STANDARTLEVEL]; | 
|---|
|  | 648 | int i,j,k; | 
|---|
|  | 649 | double MesTime1, MesTime2; | 
|---|
|  | 650 | int l = P->Lat.Psi.AllLocalNo[0]-1; | 
|---|
|  | 651 | if (P->Par.my_color_comm_ST_Psi == 0) { | 
|---|
|  | 652 | for (k=0; k<P->Par.me_comm_ST_Psi; k++) | 
|---|
|  | 653 | for (j=0; j<2*Lev->AllMaxG[k]; j++) | 
|---|
|  | 654 | rand(); | 
|---|
|  | 655 | l = P->Lat.Psi.LocalNo; | 
|---|
|  | 656 | P->Lat.Psi.LocalPsiStatus[Occupied][l].PsiGramSchStatus = (int)NotOrthogonal; | 
|---|
|  | 657 | for (i=0; i < Lev->MaxG; i++) { | 
|---|
|  | 658 | Lev->LPsi->LocalPsi[l][i].re = 1.-2.*(rand()/(double)RAND_MAX); | 
|---|
|  | 659 | Lev->LPsi->LocalPsi[l][i].im = 1.-2.*(rand()/(double)RAND_MAX); | 
|---|
|  | 660 | } | 
|---|
|  | 661 | } | 
|---|
|  | 662 | P->Lat.Psi.AllPsiStatus[Occupied][l].PsiGramSchStatus = (int)NotOrthogonal; | 
|---|
|  | 663 | WaitForOtherProcs(P,0); | 
|---|
|  | 664 | MesTime1 = GetTime(); | 
|---|
|  | 665 | GramSch(P, &P->Lat.Lev[STANDARTLEVEL], &P->Lat.Psi, Orthonormalize); | 
|---|
|  | 666 | MesTime2 = GetTime(); | 
|---|
|  | 667 | if (P->Par.my_color_comm_ST_Psi == 0) { | 
|---|
|  | 668 | for (k=P->Par.me_comm_ST_Psi+1; k<P->Par.Max_me_comm_ST_Psi; k++) | 
|---|
|  | 669 | for (j=0; j<2*Lev->AllMaxG[k]; j++) | 
|---|
|  | 670 | rand(); | 
|---|
|  | 671 | } | 
|---|
|  | 672 | return(MesTime2-MesTime1); | 
|---|
|  | 673 | } | 
|---|
|  | 674 | #endif | 
|---|
|  | 675 |  | 
|---|
|  | 676 |  | 
|---|
|  | 677 | #ifndef MAXTESTRUN | 
|---|
|  | 678 | #define MAXTESTRUN 100  //!< Maximum runs for a test | 
|---|
|  | 679 | #endif | 
|---|
|  | 680 |  | 
|---|
|  | 681 | #if 0 | 
|---|
|  | 682 | /** combined GS fft test. | 
|---|
|  | 683 | * tests fft and Gram Schmidt combined | 
|---|
|  | 684 | * \param *P \ref Problem at hand | 
|---|
|  | 685 | */ | 
|---|
|  | 686 | static void TestGSfft(struct Problem *P) | 
|---|
|  | 687 | { | 
|---|
|  | 688 | int i, MaxG=0; | 
|---|
|  | 689 | double fftTime = 0; | 
|---|
|  | 690 | double GSTime = 0; | 
|---|
|  | 691 | double *A = NULL; | 
|---|
|  | 692 | double *AT = NULL; | 
|---|
|  | 693 | double avarage, avarage2, stddev; | 
|---|
|  | 694 | for(i=0; i < P->Par.Max_me_comm_ST_Psi; i++) | 
|---|
|  | 695 | MaxG += P->Lat.Lev[STANDARTLEVEL].AllMaxG[i]; | 
|---|
|  | 696 | if (!P->Par.me_comm_ST_Psi) A = Malloc(sizeof(double)*P->Par.Max_me_comm_ST_Psi,"TestGSfft"); | 
|---|
|  | 697 | if (!P->Par.me_comm_ST_PsiT && !P->Par.me_comm_ST_Psi) AT = Malloc(sizeof(double)*P->Par.Max_me_comm_ST_PsiT,"TestGSfft"); | 
|---|
|  | 698 | for (i=0;i<MAXTESTRUN;i++) | 
|---|
|  | 699 | fftTime += fftTest(P); | 
|---|
|  | 700 | fftTime /= MAXTESTRUN; | 
|---|
|  | 701 | MPI_Gather( &fftTime, 1, MPI_DOUBLE, A, 1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi ); | 
|---|
|  | 702 | if (!P->Par.me_comm_ST_Psi) { | 
|---|
|  | 703 | avarage = 0; | 
|---|
|  | 704 | for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) | 
|---|
|  | 705 | avarage += A[i]; | 
|---|
|  | 706 | avarage /= P->Par.Max_me_comm_ST_Psi; | 
|---|
|  | 707 | avarage2 = 0; | 
|---|
|  | 708 | for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) | 
|---|
|  | 709 | avarage2 += A[i]*A[i]; | 
|---|
|  | 710 | avarage2 /= P->Par.Max_me_comm_ST_Psi; | 
|---|
|  | 711 | stddev = sqrt(fabs(avarage2-avarage*avarage)); | 
|---|
|  | 712 | fprintf(stdout,"fft: Grid(%i, %i, %i)(%i) ST(%i)MProc(%i, %i)=(%i, %i): avg %f   std %f\n", | 
|---|
|  | 713 | P->Lat.Lev[0].N[0],P->Lat.Lev[0].N[1],P->Lat.Lev[0].N[2], | 
|---|
|  | 714 | MaxG, | 
|---|
|  | 715 | P->Par.my_color_comm_ST, | 
|---|
|  | 716 | P->Par.Max_me_comm_ST_PsiT, P->Par.Max_me_comm_ST_Psi, | 
|---|
|  | 717 | P->Par.me_comm_ST_PsiT,P->Par.me_comm_ST_Psi, | 
|---|
|  | 718 | avarage, stddev); | 
|---|
|  | 719 | } | 
|---|
|  | 720 | srand(2); | 
|---|
|  | 721 | for (i=0;i<MAXTESTRUN;i++) | 
|---|
|  | 722 | GSTime += GSTest(P); | 
|---|
|  | 723 | GSTime /= MAXTESTRUN; | 
|---|
|  | 724 | MPI_Gather( &GSTime, 1, MPI_DOUBLE, A, 1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi ); | 
|---|
|  | 725 | if (!P->Par.me_comm_ST_Psi) { | 
|---|
|  | 726 | avarage = 0; | 
|---|
|  | 727 | for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) | 
|---|
|  | 728 | avarage += A[i]; | 
|---|
|  | 729 | avarage /= P->Par.Max_me_comm_ST_Psi; | 
|---|
|  | 730 | avarage2 = 0; | 
|---|
|  | 731 | for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) | 
|---|
|  | 732 | avarage2 += A[i]*A[i]; | 
|---|
|  | 733 | avarage2 /= P->Par.Max_me_comm_ST_Psi; | 
|---|
|  | 734 | stddev = sqrt(fabs(avarage2-avarage*avarage)); | 
|---|
|  | 735 | fprintf(stdout,"GS : Grid(%i, %i, %i)(%i) ST(%i)MProc(%i, %i)=(%i, %i): avg %f   std %f\n", | 
|---|
|  | 736 | P->Lat.Lev[0].N[0],P->Lat.Lev[0].N[1],P->Lat.Lev[0].N[2], | 
|---|
|  | 737 | MaxG, | 
|---|
|  | 738 | P->Par.my_color_comm_ST, | 
|---|
|  | 739 | P->Par.Max_me_comm_ST_PsiT, P->Par.Max_me_comm_ST_Psi, | 
|---|
|  | 740 | P->Par.me_comm_ST_PsiT,P->Par.me_comm_ST_Psi, | 
|---|
|  | 741 | avarage, stddev); | 
|---|
|  | 742 | } | 
|---|
| [64fa9e] | 743 | if (!P->Par.me_comm_ST_Psi) Free(A, "TestGSfft: A"); | 
|---|
|  | 744 | if (!P->Par.me_comm_ST_PsiT && !P->Par.me_comm_ST_Psi) Free(AT, "TestGSfft: AT"); | 
|---|
| [a0bcf1] | 745 | } | 
|---|
|  | 746 | #endif | 
|---|
|  | 747 |  | 
|---|
|  | 748 | /** Run Simulation. | 
|---|
|  | 749 | * just calls \ref CalculateMD() | 
|---|
|  | 750 | \if testing==1 | 
|---|
|  | 751 | * also place to put \ref TestGSfft(P), \ref GSTest(P) or \ref fftTest(P) | 
|---|
|  | 752 | \endif | 
|---|
|  | 753 | * \param *P \ref Problem at hand | 
|---|
|  | 754 | */ | 
|---|
|  | 755 | static void RunSim(struct Problem *P) { | 
|---|
|  | 756 | if (P->Call.out[NormalOut]) fprintf(stderr,"Proc %i: Running Simulation !\n",P->Par.me); | 
|---|
|  | 757 | CalculateMD(P); | 
|---|
|  | 758 | if (P->Call.out[NormalOut]) fprintf(stderr,"Proc %i: Ending Simulation !\n",P->Par.me); | 
|---|
|  | 759 | /*  TestGSfft(P);*/ | 
|---|
|  | 760 | } | 
|---|
|  | 761 |  | 
|---|
|  | 762 | /** Preparation, running and cleaning of a run. | 
|---|
|  | 763 | * Allocate Problem structure, | 
|---|
|  | 764 | * read command line options GetOptions(), | 
|---|
|  | 765 | * define verbosity of output groups SetOutGroup1(), | 
|---|
|  | 766 | * maybe StartDebugger(), | 
|---|
|  | 767 | * parse the configuration files ReadParameters(). | 
|---|
|  | 768 | * | 
|---|
|  | 769 | * Pay attention to external stop signals InitSignals(), | 
|---|
|  | 770 | * define verbosity within the MPi communicators SetOutGroup2(), | 
|---|
|  | 771 | * initialize the timing arrays InitSpeedMeasure(), | 
|---|
|  | 772 | * WaitForOtherProcs(), | 
|---|
|  | 773 | * and doe the initialization of the actual problem Init() (SpeedMeasure()'d in InitTime).\n | 
|---|
|  | 774 | * Runs the Simulation RunSim() and cleans up at the end by calling WaitForOtherProcs(), | 
|---|
|  | 775 | * CompleteSpeedMeasure() and RemoveEverything() | 
|---|
|  | 776 | * \param argc argument count from main | 
|---|
|  | 777 | * \param argv argument array from main | 
|---|
|  | 778 | */ | 
|---|
|  | 779 | static void Run(int argc, char **argv) { | 
|---|
|  | 780 | struct Problem *P = (struct Problem *) | 
|---|
|  | 781 | Malloc(sizeof(struct Problem),"Run - P");   /* contains options, files and data regaring the simulation at hand */ | 
|---|
|  | 782 | GetOptions(&P->Call, argc, argv); | 
|---|
|  | 783 | SetOutGroup1(&P->Call); | 
|---|
|  | 784 | if(P->Call.debug == 1) StartDebugger(); | 
|---|
|  | 785 | ReadParameters(P, P->Call.MainParameterFile); | 
|---|
|  | 786 | StartParallel(P, argv); | 
|---|
|  | 787 | InitSignals(); | 
|---|
|  | 788 | SetOutGroup2(P, &P->Call); | 
|---|
|  | 789 | InitSpeedMeasure(P); | 
|---|
|  | 790 | WaitForOtherProcs(P,1); | 
|---|
|  | 791 | SpeedMeasure(P, InitTime, StartTimeDo); | 
|---|
|  | 792 | Init(P); | 
|---|
|  | 793 | SpeedMeasure(P, InitTime, StopTimeDo); | 
|---|
|  | 794 | RunSim(P); | 
|---|
|  | 795 | WaitForOtherProcs(P,1); | 
|---|
|  | 796 | CompleteSpeedMeasure(P); | 
|---|
|  | 797 | RemoveEverything(P); | 
|---|
|  | 798 | } | 
|---|
|  | 799 |  | 
|---|
|  | 800 | /** Main routine. | 
|---|
|  | 801 | * Launches MPI environment (Init and Finalize), starts Run() and gives | 
|---|
|  | 802 | * correct return signal on exit | 
|---|
|  | 803 | * \param argc argument count | 
|---|
|  | 804 | * \param argv argument array | 
|---|
|  | 805 | * \return exit code (always successive) | 
|---|
|  | 806 | */ | 
|---|
|  | 807 | int main(int argc, char **argv) { | 
|---|
|  | 808 | MPI_Init(&argc, &argv); | 
|---|
|  | 809 | Run(argc, argv); | 
|---|
|  | 810 | MPI_Finalize(); | 
|---|
|  | 811 | CheckSigReturn(); | 
|---|
|  | 812 | return EXIT_SUCCESS; | 
|---|
|  | 813 | } | 
|---|