| 1 | /** \file pcp.c
 | 
|---|
| 2 |  * Main routine, Run super-functions and signal handling.
 | 
|---|
| 3 |  * 
 | 
|---|
| 4 |  * This file contains just framework:
 | 
|---|
| 5 |  * The main() routine, passing its arguments on to Run(), which does signal
 | 
|---|
| 6 |  * initialization InitSignals() and setting verbosity levels and calls RunSim()
 | 
|---|
| 7 |  * which is then just a wrapper for CalculateMD().
 | 
|---|
| 8 |  * For the signals there is the actual handler SigHandlerCPULIM() for the external
 | 
|---|
| 9 |  * exit signal, routines to check for the signal CheckCPULIM() and give correct
 | 
|---|
| 10 |  * return code to old handler CheckSigReturn().
 | 
|---|
| 11 |  * 
 | 
|---|
| 12 |   Project: ParallelCarParrinello
 | 
|---|
| 13 |  \author Jan Hamaekers
 | 
|---|
| 14 |  \date 2000
 | 
|---|
| 15 | 
 | 
|---|
| 16 |   File: pcp.c
 | 
|---|
| 17 |   $Id: pcp.c,v 1.36 2007/02/05 13:59:24 foo Exp $
 | 
|---|
| 18 | */
 | 
|---|
| 19 | 
 | 
|---|
| 20 | /*! \mainpage Introductory to Parallel Car Parrinello
 | 
|---|
| 21 |  * 
 | 
|---|
| 22 |  * This is a quick overview of the programme pcp, explaining as shallowly as possible
 | 
|---|
| 23 |  * its possble uses. For a more profound introduction be referred to its (german) diploma
 | 
|---|
| 24 |  * thesis paper here: http://wissrech.ins.uni-bonn.de/teaching/diplom/diplom_hamaeker.pdf
 | 
|---|
| 25 |  * (Note: All numbering behind formulas always refer to this thesis paper)
 | 
|---|
| 26 |  * 
 | 
|---|
| 27 |  * ParallelCarParrinello - or short pcp - is the parallel implementation of
 | 
|---|
| 28 |  * Car-Parrinello-Molecular-Dynamics, using a density field theory approach. In brief,
 | 
|---|
| 29 |  * a given configuration is minimised in terms of the energy functional. The system
 | 
|---|
| 30 |  * is encapsulated in a cell and thus periodic boundary conditions are applied.
 | 
|---|
| 31 |  * The potential is also evaluated and this yields a force for every moving ion in
 | 
|---|
| 32 |  * the cell. Thereby the molecular dynamics is performed.
 | 
|---|
| 33 |  * 
 | 
|---|
| 34 |  * The basic idea behind the parallelisation is most importantly the simultaneous
 | 
|---|
| 35 |  * fast fourier transformation, where the three-dimensional grid of coefficients in
 | 
|---|
| 36 |  * a plane wave approach are subdivided into planes for the hither transformation and
 | 
|---|
| 37 |  * pens for the thither one. These one or two dimensional ffts can be done by multiple
 | 
|---|
| 38 |  * processes and later reassembled.
 | 
|---|
| 39 |  * Second is the parallelisation of the minimisation. Each process holds a certain
 | 
|---|
| 40 |  * "local" part of all electronic wave functions and the others are assumed constant
 | 
|---|
| 41 |  * while minimising over these local ones by a conjugate gradient mechanism.
 | 
|---|
| 42 |  * Lastly, the Gram-Schmidt-Orthonormalisation is also done parallely.
 | 
|---|
| 43 |  *
 | 
|---|
| 44 |  * This Introductory is divided in the following sections:
 | 
|---|
| 45 |  * - \subpage intro Introduction
 | 
|---|
| 46 |  * - \subpage tutorial Hands-on Tutorial
 | 
|---|
| 47 |  * 
 | 
|---|
| 48 |  * © 2006 Frederik Heber
 | 
|---|
| 49 |  */
 | 
|---|
| 50 |  
 | 
|---|
| 51 | /*! \page intro Introduction
 | 
|---|
| 52 |  *  
 | 
|---|
| 53 |  * 
 | 
|---|
| 54 |  * \section install Installation
 | 
|---|
| 55 |  * 
 | 
|---|
| 56 |  * 
 | 
|---|
| 57 |  *  \subsection reqs Requirements
 | 
|---|
| 58 |  * 
 | 
|---|
| 59 |  * This package relies on certain other packages.
 | 
|---|
| 60 |  * - FFTW version 2\n
 | 
|---|
| 61 |  *    This is a fast implementation of Fast Fourier Transformations.
 | 
|---|
| 62 |  *    Currently, version 3 is not supported. Get it from their webpage, compile
 | 
|---|
| 63 |  *    and install the libraries and header files in a path where the linker will
 | 
|---|
| 64 |  *    find them.\n
 | 
|---|
| 65 |  *    http://www.fftw.org/
 | 
|---|
| 66 |  * - MPICH\n
 | 
|---|
| 67 |  *    This is an implementation of the Message Parsing Interface, which enables
 | 
|---|
| 68 |  *    simple build of parallel programs by means of exchange data via messages.
 | 
|---|
| 69 |  *    Here as well we need the libraries and header files, compile and install
 | 
|---|
| 70 |  *    them in an accessible directory, and also the launcher mpirun.\n
 | 
|---|
| 71 |  *    http://www-unix.mcs.anl.gov/mpi/mpich/
 | 
|---|
| 72 |  * - OpenDx\n
 | 
|---|
| 73 |  *    For optional visualization of the calculations, displaying electronic densities,
 | 
|---|
| 74 |  *    ion positions and forces within the cell.\n
 | 
|---|
| 75 |  *    http://www.opendx.org/
 | 
|---|
| 76 |  * 
 | 
|---|
| 77 |  *  \subsection process Installation Process
 | 
|---|
| 78 |  * 
 | 
|---|
| 79 |  * Note beforehand that this package manages compilation on various architectures with
 | 
|---|
| 80 |  * help of some GNU tools such as automake (Makefile.am) respectively autconf (configure.ac).
 | 
|---|
| 81 |  * 
 | 
|---|
| 82 |  * After unzipping the archive into an appropiate source directory, the programme has
 | 
|---|
| 83 |  * to be compiled by executing consecutively:
 | 
|---|
| 84 |  * - ./configure
 | 
|---|
| 85 |  * - make
 | 
|---|
| 86 |  * - make install
 | 
|---|
| 87 |  * 
 | 
|---|
| 88 |  * It is recommended to create two additional directories:\n
 | 
|---|
| 89 |  * One "build" directory which may safely reside in the main source folder,
 | 
|---|
| 90 |  * wherein the actual build process is launched and thus temporarily created object
 | 
|---|
| 91 |  * files removed from the main folder's contents. This happens automatically if
 | 
|---|
| 92 |  * configure is not called from the main folder but within this build folder by:
 | 
|---|
| 93 |  * - "../configure"
 | 
|---|
| 94 |  * 
 | 
|---|
| 95 |  * Secondly, a "run" directory. This might either be an already present "/usr/local/bin"
 | 
|---|
| 96 |  * which is taken as default, but also a "/home/foo/pcp/run". If desired this path
 | 
|---|
| 97 |  * should be appended to the configure command as follows:
 | 
|---|
| 98 |  * - "../configure --bindir /home/foo/pcp/run"
 | 
|---|
| 99 |  * 
 | 
|---|
| 100 |  * and the executable will be automatically installed therein on calling make install.
 | 
|---|
| 101 |  *
 | 
|---|
| 102 |  * Alternatively, you may more generally pick "--prefix=/home/foo/pcp", which would prefix
 | 
|---|
| 103 |  * all installation directories (such as doc, man, info, lib, ...) with the given directory
 | 
|---|
| 104 |  * home/foo/pcp. Thus, the executable would now be installed upon evocation of 'make install'
 | 
|---|
| 105 |  * in /home/foo/pcp/bin (because ./bin is default for executables). This and more options
 | 
|---|
| 106 |  * are explained when invoking './configure --help'. The subdirectory where the executable is
 | 
|---|
| 107 |  * installed in will from now on be called "bindir".
 | 
|---|
| 108 |  * 
 | 
|---|
| 109 |  * Next, we must unpack the archive containing the pseudopotential files pseudopots.tar.gz by
 | 
|---|
| 110 |  * entering 'tar -zxvf pseudopots.tar.gz' in the main dir of the package. Note that the 
 | 
|---|
| 111 |  * pseudopotential files now reside in 'defaults/pseudopot/' respectively to the main dir.
 | 
|---|
| 112 |  * 
 | 
|---|
| 113 |  * At last, you may re-generate this very documentation of the pcp package with the help of the
 | 
|---|
| 114 |  * 'doxygen' package (http://www.doxygen.org/) by execution of:
 | 
|---|
| 115 |  * - make doxygen-doc (edit the paths in doxygen.cfg beforehand though!)
 | 
|---|
| 116 |  * 
 | 
|---|
| 117 |  * Even more, if you are interested in the documentation of the simple molecuilder sub programme,
 | 
|---|
| 118 |  * enter /home/foo/pcp/build/util/molecuilder and execute the same command therein which generates 
 | 
|---|
| 119 |  * the documentation in /home/foo/pcp/util/molecuilder/doc/html (and may be viewed by giving
 | 
|---|
| 120 |  * /home/foo/pcp/util/molecuilder/doc/html/index.html to a standard web browser).
 | 
|---|
| 121 |  * 
 | 
|---|
| 122 |  * \section use Usage
 | 
|---|
| 123 |  * 
 | 
|---|
| 124 |  * Having unpacked, compiled and installed the programme, we are actually set to go
 | 
|---|
| 125 |  * besides the specification of the actual physical problem. If you desire a ste-by-step
 | 
|---|
| 126 |  * introduction to the handling of the simple hydrogen molecule, be referred to the section
 | 
|---|
| 127 |  * \ref tutorial.
 | 
|---|
| 128 |  * 
 | 
|---|
| 129 |  *  \subsection specify Specifying the Problem
 | 
|---|
| 130 |  * 
 | 
|---|
| 131 |  *  There are two things necessary:
 | 
|---|
| 132 |  *  - a configuration file with various informations from stop conditons to Z number
 | 
|---|
| 133 |  *    or the basic cell vectors, giving its volume, number of ions, position and type
 | 
|---|
| 134 |  *    and so on ... 
 | 
|---|
| 135 |  *    The given configuration file in the subdirectory "defaults" is fully documented
 | 
|---|
| 136 |  *    and its sense should be easy to grasp thereby - or see below in the section
 | 
|---|
| 137 |  *    \ref config.
 | 
|---|
| 138 |  *  - pseudopotential files which reside in a certain directory specified in the
 | 
|---|
| 139 |  *    config file. Ine is necessary for each different atom (discerned by atomic
 | 
|---|
| 140 |  *    number Z) specifying the pseudo potential on a grid, generated by the fhi98PP
 | 
|---|
| 141 |  *    package: http://www.fhi-berlin.mpg.de/th/fhi98md/fhi98PP/index.html\n
 | 
|---|
| 142 |  *    These are also already present in the directory called "defaults/pseudopot".
 | 
|---|
| 143 |  * 
 | 
|---|
| 144 |  *  \subsection exec Programme execution with mpirun ...
 | 
|---|
| 145 |  * 
 | 
|---|
| 146 |  *  Basicially the programme is started as follows:
 | 
|---|
| 147 |  *  - mpirun -np 1 -vvv -machinefile /home/foo/.rhosts pcp ../defaults/main_pcp\n
 | 
|---|
| 148 |  *  where "-np 1" states the number of processors to use, so here just one, "-vvv"
 | 
|---|
| 149 |  *  relates to the verbosity of the intermediate output (more v's, more output),
 | 
|---|
| 150 |  *  "-machinefile" points to the rsh text file with allowed hosts (present on all
 | 
|---|
| 151 |  *  hosts) and last but by no means least the config file with relative path
 | 
|---|
| 152 |  *  "../defaults/main_pcp".\n
 | 
|---|
| 153 |  *  Take care that your machinefile contains enough hosts and also that "-np"
 | 
|---|
| 154 |  *  matches the product of ProcPEPsi and ProcPEGamma specified in the config file.
 | 
|---|
| 155 |  *  You won't see the error messages, when one of clients hangs (mpirun just idles
 | 
|---|
| 156 |  *  on all others as they cannot reach the host whose pcp has stopped).
 | 
|---|
| 157 |  *
 | 
|---|
| 158 |  *  The important input and output is all done via files. Configuration and PseudoPot
 | 
|---|
| 159 |  *  files are read on startup when the Problem structure, with its various parts, is
 | 
|---|
| 160 |  *  initialized. Each given amount of made steps forces, energies, ion positions and
 | 
|---|
| 161 |  *  the electronic density if desired are written to file, which can be read and
 | 
|---|
| 162 |  *  visualized lateron by the Open Data Explorer.
 | 
|---|
| 163 |  *  
 | 
|---|
| 164 |  *  Let's now come to the parallel execution of the programme on multiple hosts.
 | 
|---|
| 165 |  *  Generally, it is recommended to choose between n+1 and 2n processes for n hosts.
 | 
|---|
| 166 |  *  There are two ways of connection to the other clients, either via rsh or ssh.
 | 
|---|
| 167 |  *  Seemingly, mpich automatically picks the right one, first rsh, then ssh.
 | 
|---|
| 168 |  * 
 | 
|---|
| 169 |  *    Note: 
 | 
|---|
| 170 |  *    -# Make sure that the executable with its needed input files are present on all
 | 
|---|
| 171 |  *       other machines (Most common way is a shared directory, such as the user's home
 | 
|---|
| 172 |  *       residing on a server shared via NFS to all clients)
 | 
|---|
| 173 |  *    -# Make sure that the paths are correct and the same on all hosts (also the config paths)
 | 
|---|
| 174 |  *    -# Use a homogeneous set of clients (no mixing of architectures or slow with fast
 | 
|---|
| 175 |  *       systems!)
 | 
|---|
| 176 |  *
 | 
|---|
| 177 |  *    \subsubsection rsh ... with RSH
 | 
|---|
| 178 |  * 
 | 
|---|
| 179 |  *    Most important in this case is the file .rhosts in your home, where line by line
 | 
|---|
| 180 |  *    the hostnames of available computers are included. Via RSH-Login the programme is
 | 
|---|
| 181 |  *    launched on all machines by mpirun.\n
 | 
|---|
| 182 |  * 
 | 
|---|
| 183 |  *    \subsubsection ssh ... with SSH
 | 
|---|
| 184 |  *    
 | 
|---|
| 185 |  *    Via a passphrase and an ssh agent a much safer - due to encryption - way is used
 | 
|---|
| 186 |  *    to connect to other clients, employed as follows
 | 
|---|
| 187 |  *    - ssh-keygen -t rsa (enter keyphrase, public and private key is stored in ~/.ssh/)
 | 
|---|
| 188 |  *    - cat ~/.ssh/\*.pub >>~/.ssh/authorized_keys (if home is shared via nfs) OR \n
 | 
|---|
| 189 |  *      cat ~/.ssh/\*.pub | ssh user\@remote-system 'cat>>.ssh/authorized_keys' (when the
 | 
|---|
| 190 |  *      user home is not shared via nfs for each remote-system)
 | 
|---|
| 191 |  * 
 | 
|---|
| 192 |  *    Afterwards, start an SSH agent and 'ssh-add' the above pass-phrase. It
 | 
|---|
| 193 |  *    might be a good idea, to give a limited timespan 'ssh-add -t 86400' (seconds).
 | 
|---|
| 194 |  * 
 | 
|---|
| 195 |  *    Test it by trying something like 'ssh host /bin/ls /bin'. It should work (just print
 | 
|---|
| 196 |  *    the /bin directory) without any queries or halts. Otherwise remove host and its IP
 | 
|---|
| 197 |  *    from .ssh/known_hosts and try again. Check this for all hosts in your .rhosts,
 | 
|---|
| 198 |  *    or try 'tstmachines -machinefile=/home/foo/.rhosts' which is supplied with mpich.
 | 
|---|
| 199 |  * 
 | 
|---|
| 200 |  *    The ssh method is especially preferrable if the hosts do not reside within a protected
 | 
|---|
| 201 |  *    network, such as shielded by a firewall from the outside internet with internal IP
 | 
|---|
| 202 |  *    adresses.
 | 
|---|
| 203 |  * 
 | 
|---|
| 204 |  *  \subsection results The Results
 | 
|---|
| 205 |  * 
 | 
|---|
| 206 |  *  Process 0 does all the output and is always started on your localhost.
 | 
|---|
| 207 |  *  There a number of different output files if the programme is told to produce
 | 
|---|
| 208 |  *  them in the configuration file. They all beginn with "pcp." and most end with
 | 
|---|
| 209 |  *  the output step count in 4 digits such as ".0001". In between lies the actual
 | 
|---|
| 210 |  *  content information:
 | 
|---|
| 211 |  *  - "speed": Time measurements for the various sections of the calculations.
 | 
|---|
| 212 |  *  - "density": There are three types: "doc" and "dx" specifiy the file format for
 | 
|---|
| 213 |  *    OpenDx, while "data" contains the actual densities. These are only produced
 | 
|---|
| 214 |  *    if DoOutVis is set to 1
 | 
|---|
| 215 |  *  - "ions": "pos" means position, "force" contains forces from the dynamics
 | 
|---|
| 216 |  *    simulation represented by arrows in OpenDx, "datZ", "doc" and "dx" are again
 | 
|---|
| 217 |  *    only needed to specifiy the file format.
 | 
|---|
| 218 |  *  - "energy.all": Total energy
 | 
|---|
| 219 |  *  - "forces.all": Total force
 | 
|---|
| 220 |  * 
 | 
|---|
| 221 |  *  There are visualization scripts included in the subdirectory "defaults" such
 | 
|---|
| 222 |  *  as "visual.cfg" for use with OpenDx.
 | 
|---|
| 223 |  * 
 | 
|---|
| 224 |  * 
 | 
|---|
| 225 |  * \section start Getting started
 | 
|---|
| 226 |  * 
 | 
|---|
| 227 |  *    In Order to get a first understanding of the programme, here is a brief explanation of its most
 | 
|---|
| 228 |  *    important functions:
 | 
|---|
| 229 |  *    -# Init(): Super-function to most of the initialisation subroutines, that allocate memory or
 | 
|---|
| 230 |  *      setup the wave functions randomly or calculate first energies and densities.
 | 
|---|
| 231 |  *    -# CalculateMD(): Contains the super-loop that calculate the force acting on the ions, moves them,
 | 
|---|
| 232 |  *      updates the electronic wave functions and prints results to file.
 | 
|---|
| 233 |  *    -# CalculateForce(): On evaluating the force acting on the ions also the actual minimisation of
 | 
|---|
| 234 |  *      the wave functions - one after the other - is done, energies and densities calculated on ever
 | 
|---|
| 235 |  *      finer grids.
 | 
|---|
| 236 |  *    -# CalculateNewWave(): Is the actual (Fletcher&Reeves) conjugate gradient implementation that
 | 
|---|
| 237 |  *      minimises one wave function by calculating the gradient, preconditioning the resulting matrix,
 | 
|---|
| 238 |  *      retrieving the conjugate direction and doing the line search for the one-dimensional minimum.
 | 
|---|
| 239 |  * 
 | 
|---|
| 240 |  *    Most of these functions call many others in sequence, so be prompted to their respective
 | 
|---|
| 241 |  *    documentation for further explanation and formula.
 | 
|---|
| 242 |  * 
 | 
|---|
| 243 |  *  \subsection follow Following the course of the programme
 | 
|---|
| 244 |  * 
 | 
|---|
| 245 |  *    When the programme is launched with the option "-vvv" for greater verbosity a series of output
 | 
|---|
| 246 |  *    is printed to the screen, which briefly shall be explained here.
 | 
|---|
| 247 |  * 
 | 
|---|
| 248 |  *    At first there is the initialization output stating number of nodes on the ever more finely
 | 
|---|
| 249 |  *    grids in normal and reciprocal space, density checks, parameters read from the configuration
 | 
|---|
| 250 |  *    file and so on, until the minimisation finally commences.
 | 
|---|
| 251 |  * 
 | 
|---|
| 252 |  *    The most prominent line then looks as follows:
 | 
|---|
| 253 |  *    (0,0,0)S(0,0,0):        TE: -1.750005e+01       ATE: -1.825983e+01       Diff: 1.000000e+00      --- d: 6.634701e-01    dEdt0: -1.943969e+00    ddEddt0: 9.673485e-01 
 | 
|---|
| 254 |  *
 | 
|---|
| 255 |  *    The first three numbers refer to process numbers within MPI communicators: SpinUp/Double(0)SpinDown(1),
 | 
|---|
| 256 |  *    number within coefficients sharing, within wave functions sharing group.
 | 
|---|
| 257 |  *    The next three numbers after the "S" refer to: Number of performed minimisation steps, number of
 | 
|---|
| 258 |  *    currently minimised local wave function, number of minimisation steps performed on this wave function.
 | 
|---|
| 259 |  *    Afterwards follow: Total energy (TE), approximated total energy (ATE), relative error in the prediction (diff),
 | 
|---|
| 260 |  *    delta (the one-dimensional line search parameter \f$cos \Theta\f$), the first (dEdt0) and second (ddEddt0)
 | 
|---|
| 261 |  *    derivative of the total energy functional (at parameter = 0).
 | 
|---|
| 262 |  * 
 | 
|---|
| 263 |  *    Ever and again there appear checks such as:
 | 
|---|
| 264 |  *    -# "ARelTE: 1.387160e-02    ARelKE: 3.074929e-03", stating the actual relative difference in total and
 | 
|---|
| 265 |  *       kinetic energy between this and the last check
 | 
|---|
| 266 |  *    -# "(0)TestGramSchm: Ok !", having tested the desired orthogonality of the wave functions
 | 
|---|
| 267 |  *    -# "Beginning minimisation...", starting a perturbed minimisation run.
 | 
|---|
| 268 |  * 
 | 
|---|
| 269 |  *    At the end the programme will print a number of different energies to the screen: total, Hartree,
 | 
|---|
| 270 |  *    ionic, kinetic, ... energies.
 | 
|---|
| 271 |  * 
 | 
|---|
| 272 |  *
 | 
|---|
| 273 |  * \section tech Technical Details
 | 
|---|
| 274 |  * 
 | 
|---|
| 275 |  *  \subsection config The Configuration file explained
 | 
|---|
| 276 |  *
 | 
|---|
| 277 |  *  The first column always contains a keyword (for which the programme parses, so
 | 
|---|
| 278 |  *  the ordering as given here needn't be replicated) following one or more values
 | 
|---|
| 279 |  *  or strings.
 | 
|---|
| 280 |  * 
 | 
|---|
| 281 |  *  - mainname: The short name of the Programme
 | 
|---|
| 282 |  *  - defaultpath: Full path indicating where to put the output files during runtime
 | 
|---|
| 283 |  *  - pseudopotpath: Full path to the needed PseudoPot files
 | 
|---|
| 284 |  *  - ProcPEPsi: Number of process (groups consisting of ProcPEPsi processes) among which
 | 
|---|
| 285 |  *    wave functions are split up (e.g. MaxPsiDouble 16, ProcPEPsi 4, ProcPEGamma 2 results
 | 
|---|
| 286 |  *    in 4 wave functions per group and each group has 2 processes sharing coefficients)
 | 
|---|
| 287 |  *  - ProcPEGamma: Number of processes which make up one wave functions coefficients sharing group
 | 
|---|
| 288 |  *  - DoOutVis: Output files for OpenDX (1 - yes, 0 - no)
 | 
|---|
| 289 |  *  - DoOutMes: Output Measurement (1 - yes, 0 - no)
 | 
|---|
| 290 |  *  - AddGramSch: Do an additional GramSchmidt-run after a Psi has been minimised (hack)
 | 
|---|
| 291 |  *  - Seed: Initialization value for pseudo random number generator on generating wave function coefficients, 0 means
 | 
|---|
| 292 |  *    constant 1/V for perturbed wave functions.
 | 
|---|
| 293 |  *  - UseSeparation: Minimise unoccupied states in a separate run per level indepedent of occupied states
 | 
|---|
| 294 |  *  - UseFermiOccupation: After each minimisation run, occupation of orbitals will be reset according to Fermi distribution
 | 
|---|
| 295 |  *  - MaxOuterStep: Number of to be performed Molecular Dynamics steps
 | 
|---|
| 296 |  *  - Deltat: time slice per MD step
 | 
|---|
| 297 |  *  - OutVisStep: Output visual data at every ..th step
 | 
|---|
| 298 |  *  - OutSrcStep: Output measurement data at every ..th step
 | 
|---|
| 299 |  *  - TargetTemp: Target temperature in the cell (this medium ion velocity)
 | 
|---|
| 300 |  *  - ScaleTempStep: MD temperature is scaled at every ..th step
 | 
|---|
| 301 |  *  - MaxPsiStep: number of minimisation steps per state(!) before going on to next local state (0 - default (3))
 | 
|---|
| 302 |  *  - MaxMinStep: standard level stop criteria - maximum number of minimisation steps over all states
 | 
|---|
| 303 |  *  - RelEpsTotalE: standard level stop criteria - minimum relative change in total energy
 | 
|---|
| 304 |  *  - RelEpsKineticE: standard level stop criteria - minimum relative change in kinetic energy
 | 
|---|
| 305 |  *  - MaxMinStopStep: standard level stop condition is evaluated at every ..th step, should scale with orbital number
 | 
|---|
| 306 |  *  - MaxMinGapStopStep: standard level stop condition for gap calculation is evaluated at every ..th step
 | 
|---|
| 307 |  *  - MaxInitMinStep: initial level stop criteria - maximum number of minimisation steps over all states
 | 
|---|
| 308 |  *  - InitRelEpsTotalE: initial level stop criteria - minimum relative change in total energy
 | 
|---|
| 309 |  *  - InitRelEpsKineticE: initial level stop criteria - minimum relative change in kinetic energy
 | 
|---|
| 310 |  *  - InitMaxMinStopStep: initial level stop condition is evaluated at every ..th step
 | 
|---|
| 311 |  *  - InitMaxMinGapStopStep: initial level stop condition is evaluated at every ..th step
 | 
|---|
| 312 |  *  - BoxLength: Length of the super cell in atomic units (lower triangle matrix: 1 value, 2 values, 3 values)
 | 
|---|
| 313 |  *  - ECut: Energy cutoff specifying how man plane waves are used (in Hartrees)
 | 
|---|
| 314 |  *  - MaxLevel: number of different mesh levels in the code, >=2
 | 
|---|
| 315 |  *  - Level0Factor: Mesh points Factor for the topmost level
 | 
|---|
| 316 |  *  - RiemannTensor: Use RiemannTensor
 | 
|---|
| 317 |  *  - PsiType: Whether orbits are always doubly occupied or spin up and down is discerned (and completely separated in calculation, needs at least 2 processes!)
 | 
|---|
| 318 |  *  - MaxPsiDouble: specifying how many doubly occupied orbits there are (either this or next depends on given PsiType)
 | 
|---|
| 319 |  *  - PsiMaxNoUp/PsiMaxNoDown: specifying how many spin up or down occupied orbits there are
 | 
|---|
| 320 |  *  - RCut: Radial cutoff in the Ewald summation
 | 
|---|
| 321 |  *  - IsAngstroem: length in 0 - Bohr radii or in 1- Angstrᅵm
 | 
|---|
| 322 |  *  - MaxTypes: maximum number of different ion types (important for next constructs)
 | 
|---|
| 323 |  *  - Ion_TypeNr: 6 values - Number of ions, atomic value, Gauss radius, maximum angular momentum of Pseudopotenials, L_loc and mass of ion (Nr in keyword must go from 1..MaxTypes!)
 | 
|---|
| 324 |  *  - Ion_TypeNr_Nr: 4 values - position (x,y,z) in atomic units, IonMoveType (0 - movable, 1 - fixed).For each stated IonType there must such a keywor for each ion (second number goes from 1..Number of ions!)
 | 
|---|
| 325 |  *  
 | 
|---|
| 326 |  * \section notes Notes on implementation and code understanding
 | 
|---|
| 327 |  * 
 | 
|---|
| 328 |  *  \subsection densities Densities and Wave functions
 | 
|---|
| 329 |  *
 | 
|---|
| 330 |  *    The coefficients of densities and wave functions both in real and reciprocal space are only in parts stored locally
 | 
|---|
| 331 |  *    on the process. Coefficients of one wave functions may be shared under a number of processes, increasing speed when
 | 
|---|
| 332 |  *    doing loops over G - reciprocal vector, and the wave functions as a whole may also be shared, increasing Fourier transform
 | 
|---|
| 333 |  *    and also minimization - however with the drawback of parallel conjugate gradient minimization which might lead to errors.
 | 
|---|
| 334 |  *   
 | 
|---|
| 335 |  *    Additionally, the coefficients are structured in levels. The grid is present in ever finer spacings, which is very helpful
 | 
|---|
| 336 |  *    in the initial minimization after the wave functions have been randomly initialized. Also, the density is always calculated
 | 
|---|
| 337 |  *    on a grid twice as fine as the one containing wave function coefficients, as the fourier transform is then exact. That's
 | 
|---|
| 338 |  *    why there are always two levels: LatticeLevel LevS and Lev0. The first being the standard level with the wave functions,
 | 
|---|
| 339 |  *    the latter for densities (one often stumbles over "Dens0" as a variable name). Also there two kinds of densities:
 | 
|---|
| 340 |  *    DensityArray and DensityCArray, the first contains densities in real space, the latter in reciprocal space. However, both
 | 
|---|
| 341 |  *    are densities, hence the scalar product of their respective wave functions.
 | 
|---|
| 342 |  *    For this reason one finds often construct which use a position factor in order to transform wave coefficients into ones
 | 
|---|
| 343 |  *    on a higher level, followed by a fourier transform and a transposition, see e.g. CalculateOneDensityR(). It is often
 | 
|---|
| 344 |  *    encountered throughout the code.
 | 
|---|
| 345 |  *  
 | 
|---|
| 346 |  * 
 | 
|---|
| 347 |  * © 2006 Frederik Heber
 | 
|---|
| 348 |  */
 | 
|---|
| 349 |  
 | 
|---|
| 350 |  
 | 
|---|
| 351 | /*! \page tutorial Hands-on Tutorial
 | 
|---|
| 352 |  * 
 | 
|---|
| 353 |  * 
 | 
|---|
| 354 |  *  In this section we present a step-by-step tutorial, which leads you from the creation of a typical config file for a hydrogen
 | 
|---|
| 355 |  *  molecule through the actual calcualtion to the analysis with final plots and graphics.
 | 
|---|
| 356 |  * 
 | 
|---|
| 357 |  *  \section configfile Creating the config file
 | 
|---|
| 358 |  * 
 | 
|---|
| 359 |  *    First, we need to create the config file. For this we use the utility 'molecuilder' which resides in the util subdirectory
 | 
|---|
| 360 |  *    and is compiled upon execution of 'make' as described in Section \ref process and finally installed in the bindir. Go there,
 | 
|---|
| 361 |  *    pick and create a directory, such as '/tmp/H2/' and start the utility:
 | 
|---|
| 362 |  *    - ./molecuilder /tmp/H2/H2-config
 | 
|---|
| 363 |  * 
 | 
|---|
| 364 |  *    You might want to change the config file name, here chosen as 'H2-config', according to your personal liking, it is completely
 | 
|---|
| 365 |  *    arbitrary. It is however recommended to pick an individual directory for each molecule upon which you choose to perform DFT
 | 
|---|
| 366 |  *    calculations as there are a number of files which are produced during the run.
 | 
|---|
| 367 |  * 
 | 
|---|
| 368 |  *    Molecuilder will first ask for a cell size, which is entered by typing in the six elements of a symmetric matrix, for an cubic
 | 
|---|
| 369 |  *    cell of 20 a.u. length, enter 20, 0, 20, 0, 0, 20.
 | 
|---|
| 370 |  * 
 | 
|---|
| 371 |  *    Next, you see two lists, both are empty, and a menu. So far no elements or atoms have been put into the cell. We want to add an
 | 
|---|
| 372 |  *    atom, press 'a' and enter. Now we have to choose in which way we want to add the atom. The choice is either in absolute (x,y,z) 
 | 
|---|
| 373 |  *    coordinates, or relative to either another absolute point or to an already placed atom. The last option is used in the case when
 | 
|---|
| 374 |  *    there are two atoms and you want to add a third according to its angle and distance to the former two.
 | 
|---|
| 375 |  * 
 | 
|---|
| 376 |  *    For now, we add the first hyodrogen atom. We want to put the molecule which consists of two hydrogen atoms at a distance of 1.4
 | 
|---|
| 377 |  *    atomic units symmetrically off the cell's centre. Thus we choose option b, enter 10, 10, 10 as the reference point and then give the
 | 
|---|
| 378 |  *    relative coordinates as 0.7, 0, 0. Thus, the molecule will be aligned along the x axis. The Z number is 1, in the case of hydrogen.
 | 
|---|
| 379 |  *    Upon the last enter you notice that both the element and the atom list have filled a bit. Let's add the second atom. Choose 'a'
 | 
|---|
| 380 |  *    again and option c this time, picking the first already entered atom as the reference point, by entering 0 ('number in molecule') 
 | 
|---|
| 381 |  *    next. Enter: -1.4, 0 , 0, and again Z = 1.
 | 
|---|
| 382 |  *    Afterwards, we see that in the element list, the second entry changed from 1 to 2. It's the number of atoms of this element which
 | 
|---|
| 383 |  *    the molecules consists of so far. Next is Z number, R-cut, lmax and lloc, the element's mass and a name and short hand which is
 | 
|---|
| 384 |  *    taken from the database in the file 'elements.db' which must reside in the same directory as molecuilder.
 | 
|---|
| 385 |  * 
 | 
|---|
| 386 |  *    Finally, we need to specify some additional info in the config file that's non-standard. Enter 'e' in the menu to edit the
 | 
|---|
| 387 |  *    current configuration. The list is huge, you might have to scroll a bit and don't panic, most of these are trivial and will
 | 
|---|
| 388 |  *    become comprehensible with time. First is the paths, option 'B' and 'C'. Upon entering the option, it will give the the
 | 
|---|
| 389 |  *    "old" value and request a new one. Enter:
 | 
|---|
| 390 |  *    - B: /tmp/H2/
 | 
|---|
| 391 |  *    - C: /home/foo/pcp/defaults/pseudopot
 | 
|---|
| 392 |  *    
 | 
|---|
| 393 |  *    The latter one is the path to the pseudopotential files which reside in the archive 'pseudopots.tar.gz' in /home/foo/pcp or
 | 
|---|
| 394 |  *    whereever you have extracted the distributed pcp package to. Unzip it by 'tar -zxvf pseudopots.tar.gz' and it will create an
 | 
|---|
| 395 |  *    additional directory called defaults wherein another directory called pseudopot resides containing the files for element with
 | 
|---|
| 396 |  *    atomic number 1 up to 80 -- these are the files generated with the FHIMD PseudoPotential Generator.
 | 
|---|
| 397 |  * 
 | 
|---|
| 398 |  *    As the molecule and all necessary config file settings are entered, enter 's' to save the config file under the filename stated
 | 
|---|
| 399 |  *    on the command-line and 'q' to quit.
 | 
|---|
| 400 |  *  
 | 
|---|
| 401 |  *    
 | 
|---|
| 402 |  *    Now, we take a look at the created config file itself. Take your favorite editor and compare it to the explanations given in the
 | 
|---|
| 403 |  *    secion \ref config. According to your machine setup, the following entries must be changed:
 | 
|---|
| 404 |  *    - ProcPEPGamma/ProcPEPsi: Their product gives number of processes which must be started. As a beginning you might leave 1 here for
 | 
|---|
| 405 |  *      both entries.
 | 
|---|
| 406 |  *    - VectorPlane: Enter 1 (it's a cut plane for a two-dimensional representation of the current density)
 | 
|---|
| 407 |  *    - VectorCut: Enter 10. (cut is done at this coordinate along the by VectorPlane given axis)
 | 
|---|
| 408 |  *    - BoxLength: Check the six entries of the symmetric matrix for the correct cell size
 | 
|---|
| 409 |  *    - ECut: In general, both the precision and time needed for the calculation. Reasonable values for H2 are 8. to 24.
 | 
|---|
| 410 |  *    - MaxPsiDouble: Enter 1 here, it's the number of doubly occupied orbitals which is one in case of the hydrogen molecule 
 | 
|---|
| 411 |  *    - AddPsis: Leave 0, this is the number of unoccupied orbitals which we do not need.
 | 
|---|
| 412 |  *    - RCut: Enter 20., the cell size. It's another numerical cutoff here for the ewald summation of the infinite coulomb potential.
 | 
|---|
| 413 |  * 
 | 
|---|
| 414 |  *    This sums it up. You might also have noticed three entries, one for the element and another two for each hydrogen atom, very similar
 | 
|---|
| 415 |  *    in appearance to the list given by molecuilder. Compare to the explanation given in the config file section \ref config above.
 | 
|---|
| 416 |  *    Of course, the above shown changes could also have been made during the edit menu of molecuilder, we just wanted to demonstrate that
 | 
|---|
| 417 |  *    it's perfectly safe to use a text editor as well (any more than one white space such as some more spaces and/or tabs are 
 | 
|---|
| 418 |  *    automatically read over during the parsing of the config file).
 | 
|---|
| 419 |  * 
 | 
|---|
| 420 |  *    We may now start a calculation.
 | 
|---|
| 421 |  * 
 | 
|---|
| 422 |  *  \section launching Performing the calculation
 | 
|---|
| 423 |  * 
 | 
|---|
| 424 |  *    Enter the directory which you specified as the 'bindir' where the executable shall be installed in, let's say for this example
 | 
|---|
| 425 |  *    it's '/tmp/pcp/run'. Now, if you have the mpi package installed and are able to launch mpirun, enter this:
 | 
|---|
| 426 |  * 
 | 
|---|
| 427 |  *    - mpirun -np 1 ./pcp -vvv -w /tmp/H2/H2-config
 | 
|---|
| 428 |  * 
 | 
|---|
| 429 |  *    Here, '1' is the number of processes needed, the product of ProcPEPGamma and ProcPEPsi. If not, enter this:
 | 
|---|
| 430 |  * 
 | 
|---|
| 431 |  *    - ./pcp -vvv -w /tmp/H2/H2-config
 | 
|---|
| 432 |  * 
 | 
|---|
| 433 |  *    More comfortably we may also take the unix shell script 'run', which uses mpirun and evaluates the product by itself. However,
 | 
|---|
| 434 |  *    as it is platform-dependent and thus it is provided just as an idea-giver. It is installed in the bindir from subdirectory 'run'
 | 
|---|
| 435 |  *    during 'make install'. However, its executable permissions must still be set: e.g. 'chmod 755 run' (The same applies for all
 | 
|---|
| 436 |  *    other 'shell scripts' used in this tutorial). Also, the common paths are set in the file 'suffix', also in the bindir and must
 | 
|---|
| 437 |  *    be adapted to your local context.  Basically, there is always the rather uncomfortable way of doing every by hand, which is
 | 
|---|
| 438 |  *    explained as well, or use these scripts if you can make them work.
 | 
|---|
| 439 |  * 
 | 
|---|
| 440 |  *    - Enter: ./run to see it's options
 | 
|---|
| 441 |  *    - Enter: ./run -vvv /tmp/H2/H2-config H2 to run (here a log file pcp.H2.log is written, with the tag 'H2' which may be chosen arbitrarily).
 | 
|---|
| 442 |  * 
 | 
|---|
| 443 |  *    Possible errors here are as follows:
 | 
|---|
| 444 |  *    - paths in 'suffix' are not changed or were overwritten with defaults on make 'install'
 | 
|---|
| 445 |  *    - permission denied: 'chmod 755 run' missing (necessary after 'make install')
 | 
|---|
| 446 |  *    - pseudopots not unpacked or wrong path given in config file
 | 
|---|
| 447 |  *    - charged system: MaxPsiDouble (..SpinUp/Down) does not match required number of orbitals of the system
 | 
|---|
| 448 |  * 
 | 
|---|
| 449 |  *    Given 8 (hartree) as ECut and a moderately well performing computer numbers should rush by for a few seconds. Afterwards, the calculations
 | 
|---|
| 450 |  *    has already finished (this is of course only the case for a simple molecule such as the hydrogen one). The last hundred lines or so
 | 
|---|
| 451 |  *    consist of calculated values, such as the susceptibility tensor and among others the isometric values, shielding for each hydrogen atom,
 | 
|---|
| 452 |  *    the total energy (though beware, due to the pseudopotential ansatz for a general molecule it is not equivalent to the real absolute binding
 | 
|---|
| 453 |  *    energy) and the perturbed energy which are of minor interest.
 | 
|---|
| 454 |  * 
 | 
|---|
| 455 |  *    A number of file where generated in /tmp/H2/, some general ones are listed in sections above, we want to discuss a chosen few here:
 | 
|---|
| 456 |  * 
 | 
|---|
| 457 |  *    - LOG: pcp.H2.log contains all the lines which you just might have missed before (only if pcp was started with 'run'-script)
 | 
|---|
| 458 |  *    - Cut plane: pcp.current....csv contains (for each process of ProcPEPGamma) it's part of the specified cut plane of the current density.
 | 
|---|
| 459 |  *    - Susceptibility: pcp.chi.csv contains first an explanatory line and then one with ten entries, first is the ecut, then follow the nine entries of
 | 
|---|
| 460 |  *      the rank 2 tensor.
 | 
|---|
| 461 |  *    - shielding: pcp.sigma_i0_H.csv and ...i1... respectively contain the same as pcp.chi.csv only for the shielding tensor.
 | 
|---|
| 462 |  *
 | 
|---|
| 463 |  *    And this is it. Of course, \f$H_2\f$ is a very simple molecule. Regard a case where you might want to calculate a (4,3) nanotube with
 | 
|---|
| 464 |  *    hundreds of symmetrically aligned carbon atoms. Each of these will produce a pcp.sigma_i..csv, though all should be the same in the
 | 
|---|
| 465 |  *    end. Also, we would like to have convergence plots and vector graphs of the current density and its values. 
 | 
|---|
| 466 |  * 
 | 
|---|
| 467 |  *    In order to produce convergence plots, we need to perform the calculations for various cutoff energies (ECut). You may either move all
 | 
|---|
| 468 |  *    the files in /tmp/H2 to e.g. /tmp/H2/8Ht/, edit the config file, pick 12Ht now as ECut and re-run the calculations, move the results
 | 
|---|
| 469 |  *    and so on for four to six different values. An easier way is the script file 'test_for_ecut.sh'. Delete all pcp.*-files in /tmp/H2/
 | 
|---|
| 470 |  *    (Thus it is always good practice not to pick pcp as prefix for the config file!) and enter in the bindir subdirectory:
 | 
|---|
| 471 |  * 
 | 
|---|
| 472 |  *    - chmod 755 test_for_ecut (as it is platform-dependent as well, otherwise you get an error: permission denied)
 | 
|---|
| 473 |  *    - ./test_for_ecut.sh to see it complaining the lack of a config file
 | 
|---|
| 474 |  *    - ./test_for_ecut.sh /tmp/H2/H2-config to see its desired options and the current ecut list (it's a string in the script file adapted to
 | 
|---|
| 475 |  *      4 processes sharing the coefficients)
 | 
|---|
| 476 |  *    - ./test_for_ecut.sh /tmp/H2/H2-config H2 24 to launch it.
 | 
|---|
| 477 |  * 
 | 
|---|
| 478 |  *    Notice that it produces no ouput as does pcp. The lines are redirected into the log file whose tag you specified as 'H2'. As we want to
 | 
|---|
| 479 |  *    check on what it's doing, we ought to have started it using 'nohup ./test_for...... 24 &' to launch it as a child process. Now,
 | 
|---|
| 480 |  *    we need to press CTRL-Z to stop it and enter 'bg' to background it. Look into /tmp/H2. Notice various new subdirectories
 | 
|---|
| 481 |  *    resembling the specified ecuts. Enter 'tail -f /tmp/H2/3Ht/pcp.H2.log' to see that the first calculation is already finished.
 | 
|---|
| 482 |  *    Continue with 12Ht and furtheron if you are impatient. Otherwise, wait for the shell script to terminate. Now, we have multiple
 | 
|---|
| 483 |  *    points for a convergence graph to evaluate. Thus, onward to the analysis.  
 | 
|---|
| 484 |  *
 | 
|---|
| 485 |  *  
 | 
|---|
| 486 |  *  \section analysis Analyzing the results
 | 
|---|
| 487 |  *
 | 
|---|
| 488 |  *    First, let's have a look at the cut plane and isospheres of the current density.
 | 
|---|
| 489 |  * 
 | 
|---|
| 490 |  *    - start gnuplot, enter "plot 'pcp.current_proc0.csv' with vectors" in one of the various cutoff directories. Notice the rotating vector
 | 
|---|
| 491 |  *      field which the magnetic field induced in between the two hydrogen atoms.
 | 
|---|
| 492 |  *    - start opendx by entering dx, load Programm visual.net which resides in the defaults subdirectory of the archive. It may take a moment
 | 
|---|
| 493 |  *      the more complex the grid (the higher the energy cutoff) is and will then render three-dimensional isospheres of the current density
 | 
|---|
| 494 |  *      on the screen. You may rotate them and 'Sequence Control' let's you see each the current density of of the three magnetic field
 | 
|---|
| 495 |  *      directions.
 | 
|---|
| 496 |  * 
 | 
|---|
| 497 |  *    Now onward to the real analysis.
 | 
|---|
| 498 |  *    Here again, we use some shell scripts which gather the results from the various calculations into one file (as you may guess, they pick
 | 
|---|
| 499 |  *    the last lines from each dir from the .csv-files and gather them in one file with the same name in the main config directory. Enter in
 | 
|---|
| 500 |  *    the bindir (don't forget to chmod):
 | 
|---|
| 501 |  * 
 | 
|---|
| 502 |  *    - ./gather_all_results /tmp/ H2, where the first argument gives the absolute directory and H2 the molecules subdirectory.
 | 
|---|
| 503 |  * 
 | 
|---|
| 504 |  *    It will print some stuff and finish. If you have gnuplot installed, some plots may briefly appear as a flicker on the screen. They
 | 
|---|
| 505 |  *    created as a postscript in one file called H2.ps. Its purpose is purely to quickly check on the general convergence of each value and
 | 
|---|
| 506 |  *    to pinpoint mavericks. Afterwards, you will find a number of results-...-files in the main config directory /tmp/H2. They contain
 | 
|---|
| 507 |  *    the gathered chi and sigmas for the various energy cutoffs that were calculated. They are used in the creation of H2.ps. It is
 | 
|---|
| 508 |  *    easy to create your own convergence analysis from these. Note also that in 'pcp.convergence.csv' you find a lot of variables, each
 | 
|---|
| 509 |  *    run more ore less adds a new line. Thus, this file might be especially useful to check convergence of lets-say sigma against
 | 
|---|
| 510 |  *    ecut or against the lattice constant (if you performed different runs with varyzing constants). Basically, all .csv-Files are
 | 
|---|
| 511 |  *    results (the suffix may be changed in bindir/suffix though).
 | 
|---|
| 512 |  * 
 | 
|---|
| 513 |  *    That completes the magnetic analysis of the hydrogen molecule.
 | 
|---|
| 514 |  * 
 | 
|---|
| 515 |  *    But there are more complex molecules and there we want to delve more deeply into the topic of parallel computing. 
 | 
|---|
| 516 |  *  
 | 
|---|
| 517 |  * © 2006 Frederik Heber
 | 
|---|
| 518 |  */
 | 
|---|
| 519 | 
 | 
|---|
| 520 | #include<signal.h>
 | 
|---|
| 521 | #include<stdlib.h>
 | 
|---|
| 522 | #include<stdio.h>
 | 
|---|
| 523 | #include<math.h>
 | 
|---|
| 524 | #include"mpi.h"
 | 
|---|
| 525 | #include"data.h"
 | 
|---|
| 526 | #include"errors.h"
 | 
|---|
| 527 | #include"helpers.h"
 | 
|---|
| 528 | #include"init.h"
 | 
|---|
| 529 | #include"pcp.h"
 | 
|---|
| 530 | #include"opt.h"
 | 
|---|
| 531 | #include"myfft.h"
 | 
|---|
| 532 | #include"gramsch.h"
 | 
|---|
| 533 | #include"output.h"
 | 
|---|
| 534 | #include "run.h"
 | 
|---|
| 535 | #include "pcp.h"
 | 
|---|
| 536 | 
 | 
|---|
| 537 | #define MYSTOPSIGNAL SIGALRM    //!< external signal which gracefully stops calculations 
 | 
|---|
| 538 | 
 | 
|---|
| 539 | static void InitSignals(void);  
 | 
|---|
| 540 | 
 | 
|---|
| 541 | void (*default_sighandler)(int);        //!< deals with external signal, set to \ref SigHandlerCPULIM()
 | 
|---|
| 542 | volatile sig_atomic_t cpulim = 0;       //!< contains external signal
 | 
|---|
| 543 | int par_cpulim = 0;                             //!< for all MPI nodes: 0 = continue, 1 = limit reached, end operation
 | 
|---|
| 544 | int myrank = 0;                                 //!< Rank among the multiple processes
 | 
|---|
| 545 | 
 | 
|---|
| 546 | /** Handles external Signal CPULimit.
 | 
|---|
| 547 |  * \ref cpulim is set to 1 as a flag
 | 
|---|
| 548 |  * especially if time limit is exceeded.
 | 
|---|
| 549 |  * \param sig contains signal code which is printed to screen
 | 
|---|
| 550 |  */
 | 
|---|
| 551 | static void SigHandlerCPULIM(int sig) { 
 | 
|---|
| 552 |   if(myrank == 0)
 | 
|---|
| 553 |     fprintf(stderr,"process %i received signal %d\n", myrank, sig);
 | 
|---|
| 554 |   cpulim = 1;
 | 
|---|
| 555 |   return;
 | 
|---|
| 556 | }
 | 
|---|
| 557 | 
 | 
|---|
| 558 | /** Checks if CPU limit is reached.
 | 
|---|
| 559 |  * is supposed to be called ever and again, checking for \ref cpulim,
 | 
|---|
| 560 |  * then \ref cpulim is broadcasted to all MPI nodes preventing deadlock
 | 
|---|
| 561 |         \note On reaching time limit: save all in datafile, close everything
 | 
|---|
| 562 |  * \param *P \ref Problem at hand
 | 
|---|
| 563 |  * \return is equal to cpulim
 | 
|---|
| 564 |  */
 | 
|---|
| 565 | int CheckCPULIM(struct Problem *P) {
 | 
|---|
| 566 |   /* Bei Zeitlimit: evtl DataFile ausgeben, alles abschliessen */
 | 
|---|
| 567 |   par_cpulim = cpulim;
 | 
|---|
| 568 |   MPI_Bcast(&par_cpulim, 1, MPI_INT, 0, MPI_COMM_WORLD);
 | 
|---|
| 569 |   if (!par_cpulim) return 0;
 | 
|---|
| 570 |   if (myrank == 0) fprintf(stderr,"cpulim reached, finish data\n");
 | 
|---|
| 571 |   return 1;
 | 
|---|
| 572 |   /* Ende einleiten ! */
 | 
|---|
| 573 | }
 | 
|---|
| 574 | 
 | 
|---|
| 575 | /** Initializes signal handling.
 | 
|---|
| 576 |  * Sets \ref myrank to 0 and installs \ref SigHandlerCPULIM
 | 
|---|
| 577 |  */
 | 
|---|
| 578 | static void InitSignals(void) {
 | 
|---|
| 579 |   myrank = 0/*P->Par.me*/;
 | 
|---|
| 580 |   default_sighandler = signal(MYSTOPSIGNAL, SigHandlerCPULIM);
 | 
|---|
| 581 | }
 | 
|---|
| 582 | 
 | 
|---|
| 583 | /** Checks additionally for signal.
 | 
|---|
| 584 |  * If signal has been processed, it gets re-processed by default handler
 | 
|---|
| 585 |  * in order to return correct status to calling script
 | 
|---|
| 586 |  */
 | 
|---|
| 587 | static void CheckSigReturn(void) {
 | 
|---|
| 588 |   signal(MYSTOPSIGNAL, default_sighandler);
 | 
|---|
| 589 |   if(cpulim)
 | 
|---|
| 590 |     raise(MYSTOPSIGNAL);
 | 
|---|
| 591 | }
 | 
|---|
| 592 | 
 | 
|---|
| 593 | #if 0
 | 
|---|
| 594 | #define testing 1
 | 
|---|
| 595 | /** FFT Test.
 | 
|---|
| 596 |  * perfoms some tests on fft
 | 
|---|
| 597 |  * \param *P \ref Problem at hand
 | 
|---|
| 598 |  */
 | 
|---|
| 599 | static double fftTest(struct Problem *P)
 | 
|---|
| 600 | {
 | 
|---|
| 601 |   int i,d,ii,ix,iy,iz;
 | 
|---|
| 602 |   double MesTime1, MesTime2;
 | 
|---|
| 603 |   const struct Lattice *Lat = &P->Lat;
 | 
|---|
| 604 |   struct LatticeLevel *Lev = &Lat->Lev[STANDARTLEVEL];
 | 
|---|
| 605 |   struct  fft_plan_3d *plan = Lat->plan;
 | 
|---|
| 606 |   struct Density *Dens = Lev->Dens;
 | 
|---|
| 607 |   fftw_complex *destC = Dens->DensityCArray[TotalLocalDensity];
 | 
|---|
| 608 |   fftw_real *destCR = (fftw_real *)destC;
 | 
|---|
| 609 |   fftw_real *destCR2 = Dens->DensityArray[TotalLocalDensity];
 | 
|---|
| 610 |   fftw_complex *work = (fftw_complex *)Dens->DensityCArray[TempDensity];
 | 
|---|
| 611 |   const double factor = 1./Lev->MaxN;
 | 
|---|
| 612 |   /* srand(1); */
 | 
|---|
| 613 |   for (d=0; d < (P->Par.me_comm_ST_Psi+P->Par.Max_me_comm_ST_Psi*P->Par.me_comm_ST_PsiT); d++)
 | 
|---|
| 614 |     for (i=0; i<Dens->LocalSizeR; i++)
 | 
|---|
| 615 |       rand();
 | 
|---|
| 616 |   for (i=0; i<Dens->LocalSizeR; i++) {
 | 
|---|
| 617 |     destCR[i] = 1.-2.*(rand()/(double)RAND_MAX);
 | 
|---|
| 618 |     destCR2[i] = destCR[i]; 
 | 
|---|
| 619 |   }
 | 
|---|
| 620 |   WaitForOtherProcs(P,0); 
 | 
|---|
| 621 |   MesTime1 = GetTime();
 | 
|---|
| 622 |   fft_3d_real_to_complex(plan, STANDARTLEVEL, FFTNF1, destC, work);
 | 
|---|
| 623 |   for (i=0; i<Dens->LocalSizeC; i++) {    
 | 
|---|
| 624 |     destC[i].re *= factor;
 | 
|---|
| 625 |     destC[i].im *= factor;
 | 
|---|
| 626 |   }
 | 
|---|
| 627 |   fft_3d_complex_to_real(plan, STANDARTLEVEL, FFTNF1, destC, work);
 | 
|---|
| 628 |   MesTime2 = GetTime();
 | 
|---|
| 629 |   for (i=0; i<Dens->LocalSizeR; i++) {
 | 
|---|
| 630 |     if (fabs(destCR2[i] - destCR[i]) >= MYEPSILON) { 
 | 
|---|
| 631 |       iz = i % Lev->N[2];
 | 
|---|
| 632 |       ii = i / Lev->N[2];
 | 
|---|
| 633 |       iy = ii % Lev->N[1];
 | 
|---|
| 634 |       ix = ii / Lev->N[1];
 | 
|---|
| 635 |       fprintf(stderr,"FFT(%i)(%i,%i) =(%i)(%i,%i,%i) %g = |%g - %g| eps(%g)\n",P->Par.my_color_comm_ST,P->Par.my_color_comm_ST_Psi, P->Par.me_comm_ST_Psi, i, ix, iy, iz, fabs(destCR2[i] - destCR[i]), destCR2[i], destCR[i], MYEPSILON);
 | 
|---|
| 636 |     }
 | 
|---|
| 637 |   }
 | 
|---|
| 638 |   return(MesTime2-MesTime1);
 | 
|---|
| 639 | }
 | 
|---|
| 640 |   
 | 
|---|
| 641 | /** GS Test.
 | 
|---|
| 642 |  * performs test on Gram-Schmidt
 | 
|---|
| 643 |  * \param *P \ref Problem at hand
 | 
|---|
| 644 |  */
 | 
|---|
| 645 | static double GSTest(struct Problem *P)
 | 
|---|
| 646 | {
 | 
|---|
| 647 |   struct LatticeLevel *Lev = &P->Lat.Lev[STANDARTLEVEL];
 | 
|---|
| 648 |   int i,j,k;
 | 
|---|
| 649 |   double MesTime1, MesTime2;
 | 
|---|
| 650 |   int l = P->Lat.Psi.AllLocalNo[0]-1;
 | 
|---|
| 651 |   if (P->Par.my_color_comm_ST_Psi == 0) {
 | 
|---|
| 652 |     for (k=0; k<P->Par.me_comm_ST_Psi; k++)
 | 
|---|
| 653 |       for (j=0; j<2*Lev->AllMaxG[k]; j++)
 | 
|---|
| 654 |         rand();
 | 
|---|
| 655 |     l = P->Lat.Psi.LocalNo;
 | 
|---|
| 656 |     P->Lat.Psi.LocalPsiStatus[Occupied][l].PsiGramSchStatus = (int)NotOrthogonal;
 | 
|---|
| 657 |     for (i=0; i < Lev->MaxG; i++) {
 | 
|---|
| 658 |       Lev->LPsi->LocalPsi[l][i].re = 1.-2.*(rand()/(double)RAND_MAX);
 | 
|---|
| 659 |       Lev->LPsi->LocalPsi[l][i].im = 1.-2.*(rand()/(double)RAND_MAX);
 | 
|---|
| 660 |     }
 | 
|---|
| 661 |   }
 | 
|---|
| 662 |   P->Lat.Psi.AllPsiStatus[Occupied][l].PsiGramSchStatus = (int)NotOrthogonal;
 | 
|---|
| 663 |   WaitForOtherProcs(P,0); 
 | 
|---|
| 664 |   MesTime1 = GetTime();
 | 
|---|
| 665 |   GramSch(P, &P->Lat.Lev[STANDARTLEVEL], &P->Lat.Psi, Orthonormalize);
 | 
|---|
| 666 |   MesTime2 = GetTime();
 | 
|---|
| 667 |   if (P->Par.my_color_comm_ST_Psi == 0) {
 | 
|---|
| 668 |     for (k=P->Par.me_comm_ST_Psi+1; k<P->Par.Max_me_comm_ST_Psi; k++)
 | 
|---|
| 669 |       for (j=0; j<2*Lev->AllMaxG[k]; j++)
 | 
|---|
| 670 |         rand();
 | 
|---|
| 671 |   }
 | 
|---|
| 672 |   return(MesTime2-MesTime1);
 | 
|---|
| 673 | }
 | 
|---|
| 674 | #endif
 | 
|---|
| 675 | 
 | 
|---|
| 676 | 
 | 
|---|
| 677 | #ifndef MAXTESTRUN
 | 
|---|
| 678 | #define MAXTESTRUN 100  //!< Maximum runs for a test
 | 
|---|
| 679 | #endif
 | 
|---|
| 680 | 
 | 
|---|
| 681 | #if 0
 | 
|---|
| 682 | /** combined GS fft test.
 | 
|---|
| 683 |  * tests fft and Gram Schmidt combined
 | 
|---|
| 684 |  * \param *P \ref Problem at hand
 | 
|---|
| 685 |  */
 | 
|---|
| 686 | static void TestGSfft(struct Problem *P)
 | 
|---|
| 687 | {
 | 
|---|
| 688 |   int i, MaxG=0;
 | 
|---|
| 689 |   double fftTime = 0;
 | 
|---|
| 690 |   double GSTime = 0;
 | 
|---|
| 691 |   double *A = NULL;
 | 
|---|
| 692 |   double *AT = NULL;
 | 
|---|
| 693 |   double avarage, avarage2, stddev;
 | 
|---|
| 694 |   for(i=0; i < P->Par.Max_me_comm_ST_Psi; i++)
 | 
|---|
| 695 |     MaxG += P->Lat.Lev[STANDARTLEVEL].AllMaxG[i];
 | 
|---|
| 696 |   if (!P->Par.me_comm_ST_Psi) A = Malloc(sizeof(double)*P->Par.Max_me_comm_ST_Psi,"TestGSfft");
 | 
|---|
| 697 |   if (!P->Par.me_comm_ST_PsiT && !P->Par.me_comm_ST_Psi) AT = Malloc(sizeof(double)*P->Par.Max_me_comm_ST_PsiT,"TestGSfft");
 | 
|---|
| 698 |   for (i=0;i<MAXTESTRUN;i++) 
 | 
|---|
| 699 |     fftTime += fftTest(P);
 | 
|---|
| 700 |   fftTime /= MAXTESTRUN;
 | 
|---|
| 701 |   MPI_Gather( &fftTime, 1, MPI_DOUBLE, A, 1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi );
 | 
|---|
| 702 |   if (!P->Par.me_comm_ST_Psi) {
 | 
|---|
| 703 |     avarage = 0;
 | 
|---|
| 704 |     for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) 
 | 
|---|
| 705 |       avarage += A[i];
 | 
|---|
| 706 |     avarage /= P->Par.Max_me_comm_ST_Psi;
 | 
|---|
| 707 |     avarage2 = 0;
 | 
|---|
| 708 |     for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) 
 | 
|---|
| 709 |       avarage2 += A[i]*A[i];
 | 
|---|
| 710 |     avarage2 /= P->Par.Max_me_comm_ST_Psi;
 | 
|---|
| 711 |     stddev = sqrt(fabs(avarage2-avarage*avarage));
 | 
|---|
| 712 |     fprintf(stdout,"fft: Grid(%i, %i, %i)(%i) ST(%i)MProc(%i, %i)=(%i, %i): avg %f   std %f\n", 
 | 
|---|
| 713 |             P->Lat.Lev[0].N[0],P->Lat.Lev[0].N[1],P->Lat.Lev[0].N[2],
 | 
|---|
| 714 |             MaxG,
 | 
|---|
| 715 |             P->Par.my_color_comm_ST,
 | 
|---|
| 716 |             P->Par.Max_me_comm_ST_PsiT, P->Par.Max_me_comm_ST_Psi,
 | 
|---|
| 717 |             P->Par.me_comm_ST_PsiT,P->Par.me_comm_ST_Psi,
 | 
|---|
| 718 |             avarage, stddev);   
 | 
|---|
| 719 |   }
 | 
|---|
| 720 |   srand(2);
 | 
|---|
| 721 |   for (i=0;i<MAXTESTRUN;i++)
 | 
|---|
| 722 |     GSTime += GSTest(P);
 | 
|---|
| 723 |   GSTime /= MAXTESTRUN;
 | 
|---|
| 724 |   MPI_Gather( &GSTime, 1, MPI_DOUBLE, A, 1, MPI_DOUBLE, 0, P->Par.comm_ST_Psi );
 | 
|---|
| 725 |   if (!P->Par.me_comm_ST_Psi) {
 | 
|---|
| 726 |     avarage = 0;
 | 
|---|
| 727 |     for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) 
 | 
|---|
| 728 |       avarage += A[i];
 | 
|---|
| 729 |     avarage /= P->Par.Max_me_comm_ST_Psi;
 | 
|---|
| 730 |     avarage2 = 0;
 | 
|---|
| 731 |     for (i=0; i < P->Par.Max_me_comm_ST_Psi; i++) 
 | 
|---|
| 732 |       avarage2 += A[i]*A[i];
 | 
|---|
| 733 |     avarage2 /= P->Par.Max_me_comm_ST_Psi;
 | 
|---|
| 734 |     stddev = sqrt(fabs(avarage2-avarage*avarage));
 | 
|---|
| 735 |     fprintf(stdout,"GS : Grid(%i, %i, %i)(%i) ST(%i)MProc(%i, %i)=(%i, %i): avg %f   std %f\n", 
 | 
|---|
| 736 |             P->Lat.Lev[0].N[0],P->Lat.Lev[0].N[1],P->Lat.Lev[0].N[2],
 | 
|---|
| 737 |             MaxG,
 | 
|---|
| 738 |             P->Par.my_color_comm_ST,
 | 
|---|
| 739 |             P->Par.Max_me_comm_ST_PsiT, P->Par.Max_me_comm_ST_Psi,
 | 
|---|
| 740 |             P->Par.me_comm_ST_PsiT,P->Par.me_comm_ST_Psi,
 | 
|---|
| 741 |             avarage, stddev);   
 | 
|---|
| 742 |   }
 | 
|---|
| 743 |   if (!P->Par.me_comm_ST_Psi) Free(A, "TestGSfft: A");
 | 
|---|
| 744 |   if (!P->Par.me_comm_ST_PsiT && !P->Par.me_comm_ST_Psi) Free(AT, "TestGSfft: AT");
 | 
|---|
| 745 | }
 | 
|---|
| 746 | #endif
 | 
|---|
| 747 | 
 | 
|---|
| 748 | /** Run Simulation.
 | 
|---|
| 749 |  * just calls \ref CalculateMD()
 | 
|---|
| 750 |  \if testing==1
 | 
|---|
| 751 |  * also place to put \ref TestGSfft(P), \ref GSTest(P) or \ref fftTest(P)
 | 
|---|
| 752 |  \endif
 | 
|---|
| 753 |  * \param *P \ref Problem at hand
 | 
|---|
| 754 |  */
 | 
|---|
| 755 | static void RunSim(struct Problem *P) {
 | 
|---|
| 756 |   if (P->Call.out[NormalOut]) fprintf(stderr,"Proc %i: Running Simulation !\n",P->Par.me);
 | 
|---|
| 757 |   CalculateMD(P);
 | 
|---|
| 758 |   if (P->Call.out[NormalOut]) fprintf(stderr,"Proc %i: Ending Simulation !\n",P->Par.me);
 | 
|---|
| 759 |   /*  TestGSfft(P);*/
 | 
|---|
| 760 | }
 | 
|---|
| 761 | 
 | 
|---|
| 762 | /** Preparation, running and cleaning of a run.
 | 
|---|
| 763 |  * Allocate Problem structure,
 | 
|---|
| 764 |  * read command line options GetOptions(), 
 | 
|---|
| 765 |  * define verbosity of output groups SetOutGroup1(),
 | 
|---|
| 766 |  * maybe StartDebugger(),
 | 
|---|
| 767 |  * parse the configuration files ReadParameters().
 | 
|---|
| 768 |  * 
 | 
|---|
| 769 |  * Pay attention to external stop signals InitSignals(),
 | 
|---|
| 770 |  * define verbosity within the MPi communicators SetOutGroup2(),
 | 
|---|
| 771 |  * initialize the timing arrays InitSpeedMeasure(),
 | 
|---|
| 772 |  * WaitForOtherProcs(),
 | 
|---|
| 773 |  * and doe the initialization of the actual problem Init() (SpeedMeasure()'d in InitTime).\n
 | 
|---|
| 774 |  * Runs the Simulation RunSim() and cleans up at the end by calling WaitForOtherProcs(),
 | 
|---|
| 775 |  * CompleteSpeedMeasure() and RemoveEverything()
 | 
|---|
| 776 |  * \param argc argument count from main
 | 
|---|
| 777 |  * \param argv argument array from main
 | 
|---|
| 778 |  */
 | 
|---|
| 779 | static void Run(int argc, char **argv) {
 | 
|---|
| 780 |   struct Problem *P = (struct Problem *)
 | 
|---|
| 781 |     Malloc(sizeof(struct Problem),"Run - P");   /* contains options, files and data regaring the simulation at hand */
 | 
|---|
| 782 |   GetOptions(&P->Call, argc, argv);
 | 
|---|
| 783 |   SetOutGroup1(&P->Call);
 | 
|---|
| 784 |   if(P->Call.debug == 1) StartDebugger();        
 | 
|---|
| 785 |   ReadParameters(P, P->Call.MainParameterFile);  
 | 
|---|
| 786 |   StartParallel(P, argv); 
 | 
|---|
| 787 |   InitSignals();    
 | 
|---|
| 788 |   SetOutGroup2(P, &P->Call);
 | 
|---|
| 789 |   InitSpeedMeasure(P);
 | 
|---|
| 790 |   WaitForOtherProcs(P,1);
 | 
|---|
| 791 |   SpeedMeasure(P, InitTime, StartTimeDo);
 | 
|---|
| 792 |   Init(P);
 | 
|---|
| 793 |   SpeedMeasure(P, InitTime, StopTimeDo);          
 | 
|---|
| 794 |   RunSim(P);                   
 | 
|---|
| 795 |   WaitForOtherProcs(P,1);        
 | 
|---|
| 796 |   CompleteSpeedMeasure(P);
 | 
|---|
| 797 |   RemoveEverything(P);
 | 
|---|
| 798 | }
 | 
|---|
| 799 | 
 | 
|---|
| 800 | /** Main routine.
 | 
|---|
| 801 |  * Launches MPI environment (Init and Finalize), starts Run() and gives
 | 
|---|
| 802 |  * correct return signal on exit
 | 
|---|
| 803 |  * \param argc argument count
 | 
|---|
| 804 |  * \param argv argument array
 | 
|---|
| 805 |  * \return exit code (always successive)
 | 
|---|
| 806 | */
 | 
|---|
| 807 | int main(int argc, char **argv) {
 | 
|---|
| 808 |   MPI_Init(&argc, &argv);
 | 
|---|
| 809 |   Run(argc, argv);
 | 
|---|
| 810 |   MPI_Finalize();
 | 
|---|
| 811 |   CheckSigReturn();
 | 
|---|
| 812 |   return EXIT_SUCCESS;
 | 
|---|
| 813 | }
 | 
|---|