source: molecuilder/src/molecules.cpp@ 927aba

Last change on this file since 927aba was 927aba, checked in by Frederik Heber <heber@…>, 17 years ago

new function CenterInBox, some char* converted to string

CenterInBox centers the given geometry in a rectangular box of given edge lengths
new g++ that came with ubuntu hardy complained about deprecated use of char* instead of string (for constant char arrays such: "test")

  • Property mode set to 100644
File size: 186.8 KB
Line 
1/** \file molecules.cpp
2 *
3 * Functions for the class molecule.
4 *
5 */
6
7#include "molecules.hpp"
8
9/************************************* Other Functions *************************************/
10
11/** Determines sum of squared distances of \a X to all \a **vectors.
12 * \param *x reference vector
13 * \param *params
14 * \return sum of square distances
15 */
16double LSQ (const gsl_vector * x, void * params)
17{
18 double sum = 0.;
19 struct LSQ_params *par = (struct LSQ_params *)params;
20 vector **vectors = par->vectors;
21 int num = par->num;
22
23 for (int i=0;i<num;i++) {
24 for(int j=0;j<NDIM;j++)
25 sum += (gsl_vector_get(x,j) - (vectors[i])->x[j])*(gsl_vector_get(x,j) - (vectors[i])->x[j]);
26 }
27
28 return sum;
29};
30
31/************************************* Functions for class molecule *********************************/
32
33/** Constructor of class molecule.
34 * Initialises molecule list with correctly referenced start and end, and sets molecule::last_atom to zero.
35 */
36molecule::molecule(periodentafel *teil)
37{
38 // init atom chain list
39 start = new atom;
40 end = new atom;
41 start->father = NULL;
42 end->father = NULL;
43 link(start,end);
44 // init bond chain list
45 first = new bond(start, end, 1, -1);
46 last = new bond(start, end, 1, -1);
47 link(first,last);
48 // other stuff
49 last_atom = 0;
50 elemente = teil;
51 AtomCount = 0;
52 BondCount = 0;
53 NoNonBonds = 0;
54 NoNonHydrogen = 0;
55 NoCyclicBonds = 0;
56 ListOfBondsPerAtom = NULL;
57 NumberOfBondsPerAtom = NULL;
58 ElementCount = 0;
59 for(int i=0;i<MAX_ELEMENTS;i++)
60 ElementsInMolecule[i] = 0;
61 cell_size[0] = cell_size[2] = cell_size[5]= 20.;
62 cell_size[1] = cell_size[3] = cell_size[4]= 0.;
63};
64
65/** Destructor of class molecule.
66 * Initialises molecule list with correctly referenced start and end, and sets molecule::last_atom to zero.
67 */
68molecule::~molecule()
69{
70 if (ListOfBondsPerAtom != NULL)
71 for(int i=0;i<AtomCount;i++)
72 Free((void **)&ListOfBondsPerAtom[i], "molecule::~molecule: ListOfBondsPerAtom[i]");
73 Free((void **)&ListOfBondsPerAtom, "molecule::~molecule: ListOfBondsPerAtom");
74 Free((void **)&NumberOfBondsPerAtom, "molecule::~molecule: NumberOfBondsPerAtom");
75 CleanupMolecule();
76 delete(first);
77 delete(last);
78 delete(end);
79 delete(start);
80};
81
82/** Adds given atom \a *pointer from molecule list.
83 * Increases molecule::last_atom and gives last number to added atom and names it according to its element::abbrev and molecule::AtomCount
84 * \param *pointer allocated and set atom
85 * \return true - succeeded, false - atom not found in list
86 */
87bool molecule::AddAtom(atom *pointer)
88{
89 if (pointer != NULL) {
90 pointer->sort = &pointer->nr;
91 pointer->nr = last_atom++; // increase number within molecule
92 AtomCount++;
93 if (pointer->type != NULL) {
94 if (ElementsInMolecule[pointer->type->Z] == 0)
95 ElementCount++;
96 ElementsInMolecule[pointer->type->Z]++; // increase number of elements
97 if (pointer->type->Z != 1)
98 NoNonHydrogen++;
99 if (pointer->Name == NULL) {
100 Free((void **)&pointer->Name, "molecule::AddAtom: *pointer->Name");
101 pointer->Name = (char *) Malloc(sizeof(char)*6, "molecule::AddAtom: *pointer->Name");
102 sprintf(pointer->Name, "%2s%02d", pointer->type->symbol, pointer->nr+1);
103 }
104 }
105 return add(pointer, end);
106 } else
107 return false;
108};
109
110/** Adds a copy of the given atom \a *pointer from molecule list.
111 * Increases molecule::last_atom and gives last number to added atom.
112 * \param *pointer allocated and set atom
113 * \return true - succeeded, false - atom not found in list
114 */
115atom * molecule::AddCopyAtom(atom *pointer)
116{
117 if (pointer != NULL) {
118 atom *walker = new atom();
119 walker->type = pointer->type; // copy element of atom
120 walker->x.CopyVector(&pointer->x); // copy coordination
121 walker->v.CopyVector(&pointer->v); // copy velocity
122 walker->FixedIon = pointer->FixedIon;
123 walker->sort = &walker->nr;
124 walker->nr = last_atom++; // increase number within molecule
125 walker->father = pointer; //->GetTrueFather();
126 walker->Name = (char *) Malloc(sizeof(char)*strlen(pointer->Name)+1, "molecule::AddCopyAtom: *Name");
127 strcpy (walker->Name, pointer->Name);
128 add(walker, end);
129 if ((pointer->type != NULL) && (pointer->type->Z != 1))
130 NoNonHydrogen++;
131 AtomCount++;
132 return walker;
133 } else
134 return NULL;
135};
136
137/** Adds a Hydrogen atom in replacement for the given atom \a *partner in bond with a *origin.
138 * Here, we have to distinguish between single, double or triple bonds as stated by \a BondDegree, that each demand
139 * a different scheme when adding \a *replacement atom for the given one.
140 * -# Single Bond: Simply add new atom with bond distance rescaled to typical hydrogen one
141 * -# Double Bond: Here, we need the **BondList of the \a *origin atom, by scanning for the other bonds instead of
142 * *Bond, we use the through these connected atoms to determine the plane they lie in, vector::MakeNormalVector().
143 * The orthonormal vector to this plane along with the vector in *Bond direction determines the plane the two
144 * replacing hydrogens shall lie in. Now, all remains to do is take the usual hydrogen double bond angle for the
145 * element of *origin and form the sin/cos admixture of both plane vectors for the new coordinates of the two
146 * hydrogens forming this angle with *origin.
147 * -# Triple Bond: The idea is to set up a tetraoid (C1-H1-H2-H3) (however the lengths \f$b\f$ of the sides of the base
148 * triangle formed by the to be added hydrogens are not equal to the typical bond distance \f$l\f$ but have to be
149 * determined from the typical angle \f$\alpha\f$ for a hydrogen triple connected to the element of *origin):
150 * We have the height \f$d\f$ as the vector in *Bond direction (from triangle C1-H1-H2).
151 * \f[ h = l \cdot \cos{\left (\frac{\alpha}{2} \right )} \qquad b = 2l \cdot \sin{\left (\frac{\alpha}{2} \right)} \quad \rightarrow \quad d = l \cdot \sqrt{\cos^2{\left (\frac{\alpha}{2} \right)}-\frac{1}{3}\cdot\sin^2{\left (\frac{\alpha}{2}\right )}}
152 * \f]
153 * vector::GetNormalVector() creates one orthonormal vector from this *Bond vector and vector::MakeNormalVector creates
154 * the third one from the former two vectors. The latter ones form the plane of the base triangle mentioned above.
155 * The lengths for these are \f$f\f$ and \f$g\f$ (from triangle H1-H2-(center of H1-H2-H3)) with knowledge that
156 * the median lines in an isosceles triangle meet in the center point with a ratio 2:1.
157 * \f[ f = \frac{b}{\sqrt{3}} \qquad g = \frac{b}{2}
158 * \f]
159 * as the coordination of all three atoms in the coordinate system of these three vectors:
160 * \f$\pmatrix{d & f & 0}\f$, \f$\pmatrix{d & -0.5 \cdot f & g}\f$ and \f$\pmatrix{d & -0.5 \cdot f & -g}\f$.
161 *
162 * \param *out output stream for debugging
163 * \param *Bond pointer to bond between \a *origin and \a *replacement
164 * \param *TopOrigin son of \a *origin of upper level molecule (the atom added to this molecule as a copy of \a *origin)
165 * \param *origin pointer to atom which acts as the origin for scaling the added hydrogen to correct bond length
166 * \param *replacement pointer to the atom which shall be copied as a hydrogen atom in this molecule
167 * \param **BondList list of bonds \a *replacement has (necessary to determine plane for double and triple bonds)
168 * \param NumBond number of bonds in \a **BondList
169 * \param isAngstroem whether the coordination of the given atoms is in AtomicLength (false) or Angstrom(true)
170 * \return number of atoms added, if < bond::BondDegree then something went wrong
171 * \todo double and triple bonds splitting (always use the tetraeder angle!)
172 */
173bool molecule::AddHydrogenReplacementAtom(ofstream *out, bond *TopBond, atom *BottomOrigin, atom *TopOrigin, atom *TopReplacement, bond **BondList, int NumBond, bool IsAngstroem)
174{
175 double bondlength; // bond length of the bond to be replaced/cut
176 double bondangle; // bond angle of the bond to be replaced/cut
177 double BondRescale; // rescale value for the hydrogen bond length
178 bool AllWentWell = true; // flag gathering the boolean return value of molecule::AddAtom and other functions, as return value on exit
179 bond *FirstBond = NULL, *SecondBond = NULL; // Other bonds in double bond case to determine "other" plane
180 atom *FirstOtherAtom = NULL, *SecondOtherAtom = NULL, *ThirdOtherAtom = NULL; // pointer to hydrogen atoms to be added
181 int i; // loop variable
182 double b,l,d,f,g, alpha, factors[NDIM]; // hold temporary values in triple bond case for coordination determination
183 vector OrthoVector1, OrthoVector2; // temporary vectors in coordination construction
184 vector InBondVector; // vector in direction of *Bond
185 bond *Binder = NULL;
186 double *matrix;
187
188// *out << Verbose(3) << "Begin of AddHydrogenReplacementAtom." << endl;
189 // create vector in direction of bond
190 InBondVector.CopyVector(&TopReplacement->x);
191 InBondVector.SubtractVector(&TopOrigin->x);
192 bondlength = InBondVector.Norm();
193
194 // is greater than typical bond distance? Then we have to correct periodically
195 // the problem is not the H being out of the box, but InBondVector have the wrong direction
196 // due to TopReplacement or Origin being on the wrong side!
197 if (bondlength > BondDistance) {
198// *out << Verbose(4) << "InBondVector is: ";
199// InBondVector.Output(out);
200// *out << endl;
201 OrthoVector1.Zero();
202 for (int i=0;i<NDIM;i++) {
203 l = TopReplacement->x.x[i] - TopOrigin->x.x[i];
204 if (fabs(l) > BondDistance) { // is component greater than bond distance
205 OrthoVector1.x[i] = (l < 0) ? -1. : +1.;
206 } // (signs are correct, was tested!)
207 }
208 matrix = ReturnFullMatrixforSymmetric(cell_size);
209 OrthoVector1.MatrixMultiplication(matrix);
210 InBondVector.SubtractVector(&OrthoVector1); // subtract just the additional translation
211 Free((void **)&matrix, "molecule::AddHydrogenReplacementAtom: *matrix");
212 bondlength = InBondVector.Norm();
213// *out << Verbose(4) << "Corrected InBondVector is now: ";
214// InBondVector.Output(out);
215// *out << endl;
216 } // periodic correction finished
217
218 InBondVector.Normalize();
219 // get typical bond length and store as scale factor for later
220 BondRescale = TopOrigin->type->HBondDistance[TopBond->BondDegree-1];
221 if (BondRescale == -1) {
222 cerr << Verbose(3) << "WARNING: There is no typical bond distance for bond (" << TopOrigin->Name << "<->" << TopReplacement->Name << ") of degree " << TopBond->BondDegree << "!" << endl;
223 BondRescale = bondlength;
224 } else {
225 if (!IsAngstroem)
226 BondRescale /= (1.*AtomicLengthToAngstroem);
227 }
228
229 // discern single, double and triple bonds
230 switch(TopBond->BondDegree) {
231 case 1:
232 FirstOtherAtom = new atom(); // new atom
233 FirstOtherAtom->type = elemente->FindElement(1); // element is Hydrogen
234 FirstOtherAtom->v.CopyVector(&TopReplacement->v); // copy velocity
235 FirstOtherAtom->FixedIon = TopReplacement->FixedIon;
236 if (TopReplacement->type->Z == 1) { // neither rescale nor replace if it's already hydrogen
237 FirstOtherAtom->father = TopReplacement;
238 BondRescale = bondlength;
239 } else {
240 FirstOtherAtom->father = NULL; // if we replace hydrogen, we mark it as our father, otherwise we are just an added hydrogen with no father
241 }
242 InBondVector.Scale(&BondRescale); // rescale the distance vector to Hydrogen bond length
243 FirstOtherAtom->x.CopyVector(&TopOrigin->x); // set coordination to origin ...
244 FirstOtherAtom->x.AddVector(&InBondVector); // ... and add distance vector to replacement atom
245 AllWentWell = AllWentWell && AddAtom(FirstOtherAtom);
246// *out << Verbose(4) << "Added " << *FirstOtherAtom << " at: ";
247// FirstOtherAtom->x.Output(out);
248// *out << endl;
249 Binder = AddBond(BottomOrigin, FirstOtherAtom, 1);
250 Binder->Cyclic = false;
251 Binder->Type = TreeEdge;
252 break;
253 case 2:
254 // determine two other bonds (warning if there are more than two other) plus valence sanity check
255 for (i=0;i<NumBond;i++) {
256 if (BondList[i] != TopBond) {
257 if (FirstBond == NULL) {
258 FirstBond = BondList[i];
259 FirstOtherAtom = BondList[i]->GetOtherAtom(TopOrigin);
260 } else if (SecondBond == NULL) {
261 SecondBond = BondList[i];
262 SecondOtherAtom = BondList[i]->GetOtherAtom(TopOrigin);
263 } else {
264 *out << Verbose(3) << "WARNING: Detected more than four bonds for atom " << TopOrigin->Name;
265 }
266 }
267 }
268 if (SecondOtherAtom == NULL) { // then we have an atom with valence four, but only 3 bonds: one to replace and one which is TopBond (third is FirstBond)
269 SecondBond = TopBond;
270 SecondOtherAtom = TopReplacement;
271 }
272 if (FirstOtherAtom != NULL) { // then we just have this double bond and the plane does not matter at all
273// *out << Verbose(3) << "Regarding the double bond (" << TopOrigin->Name << "<->" << TopReplacement->Name << ") to be constructed: Taking " << FirstOtherAtom->Name << " and " << SecondOtherAtom->Name << " along with " << TopOrigin->Name << " to determine orthogonal plane." << endl;
274
275 // determine the plane of these two with the *origin
276 AllWentWell = AllWentWell && OrthoVector1.MakeNormalVector(&TopOrigin->x, &FirstOtherAtom->x, &SecondOtherAtom->x);
277 } else {
278 OrthoVector1.GetOneNormalVector(&InBondVector);
279 }
280 //*out << Verbose(3)<< "Orthovector1: ";
281 //OrthoVector1.Output(out);
282 //*out << endl;
283 // orthogonal vector and bond vector between origin and replacement form the new plane
284 OrthoVector1.MakeNormalVector(&InBondVector);
285 OrthoVector1.Normalize();
286 //*out << Verbose(3) << "ReScaleCheck: " << OrthoVector1.Norm() << " and " << InBondVector.Norm() << "." << endl;
287
288 // create the two Hydrogens ...
289 FirstOtherAtom = new atom();
290 SecondOtherAtom = new atom();
291 FirstOtherAtom->type = elemente->FindElement(1);
292 SecondOtherAtom->type = elemente->FindElement(1);
293 FirstOtherAtom->v.CopyVector(&TopReplacement->v); // copy velocity
294 FirstOtherAtom->FixedIon = TopReplacement->FixedIon;
295 SecondOtherAtom->v.CopyVector(&TopReplacement->v); // copy velocity
296 SecondOtherAtom->FixedIon = TopReplacement->FixedIon;
297 FirstOtherAtom->father = NULL; // we are just an added hydrogen with no father
298 SecondOtherAtom->father = NULL; // we are just an added hydrogen with no father
299 bondangle = TopOrigin->type->HBondAngle[1];
300 if (bondangle == -1) {
301 *out << Verbose(3) << "WARNING: There is no typical bond angle for bond (" << TopOrigin->Name << "<->" << TopReplacement->Name << ") of degree " << TopBond->BondDegree << "!" << endl;
302 bondangle = 0;
303 }
304 bondangle *= M_PI/180./2.;
305// *out << Verbose(3) << "ReScaleCheck: InBondVector ";
306// InBondVector.Output(out);
307// *out << endl;
308// *out << Verbose(3) << "ReScaleCheck: Orthovector ";
309// OrthoVector1.Output(out);
310// *out << endl;
311// *out << Verbose(3) << "Half the bond angle is " << bondangle << ", sin and cos of it: " << sin(bondangle) << ", " << cos(bondangle) << endl;
312 FirstOtherAtom->x.Zero();
313 SecondOtherAtom->x.Zero();
314 for(i=0;i<NDIM;i++) { // rotate by half the bond angle in both directions (InBondVector is bondangle = 0 direction)
315 FirstOtherAtom->x.x[i] = InBondVector.x[i] * cos(bondangle) + OrthoVector1.x[i] * (sin(bondangle));
316 SecondOtherAtom->x.x[i] = InBondVector.x[i] * cos(bondangle) + OrthoVector1.x[i] * (-sin(bondangle));
317 }
318 FirstOtherAtom->x.Scale(&BondRescale); // rescale by correct BondDistance
319 SecondOtherAtom->x.Scale(&BondRescale);
320 //*out << Verbose(3) << "ReScaleCheck: " << FirstOtherAtom->x.Norm() << " and " << SecondOtherAtom->x.Norm() << "." << endl;
321 for(i=0;i<NDIM;i++) { // and make relative to origin atom
322 FirstOtherAtom->x.x[i] += TopOrigin->x.x[i];
323 SecondOtherAtom->x.x[i] += TopOrigin->x.x[i];
324 }
325 // ... and add to molecule
326 AllWentWell = AllWentWell && AddAtom(FirstOtherAtom);
327 AllWentWell = AllWentWell && AddAtom(SecondOtherAtom);
328// *out << Verbose(4) << "Added " << *FirstOtherAtom << " at: ";
329// FirstOtherAtom->x.Output(out);
330// *out << endl;
331// *out << Verbose(4) << "Added " << *SecondOtherAtom << " at: ";
332// SecondOtherAtom->x.Output(out);
333// *out << endl;
334 Binder = AddBond(BottomOrigin, FirstOtherAtom, 1);
335 Binder->Cyclic = false;
336 Binder->Type = TreeEdge;
337 Binder = AddBond(BottomOrigin, SecondOtherAtom, 1);
338 Binder->Cyclic = false;
339 Binder->Type = TreeEdge;
340 break;
341 case 3:
342 // take the "usual" tetraoidal angle and add the three Hydrogen in direction of the bond (height of the tetraoid)
343 FirstOtherAtom = new atom();
344 SecondOtherAtom = new atom();
345 ThirdOtherAtom = new atom();
346 FirstOtherAtom->type = elemente->FindElement(1);
347 SecondOtherAtom->type = elemente->FindElement(1);
348 ThirdOtherAtom->type = elemente->FindElement(1);
349 FirstOtherAtom->v.CopyVector(&TopReplacement->v); // copy velocity
350 FirstOtherAtom->FixedIon = TopReplacement->FixedIon;
351 SecondOtherAtom->v.CopyVector(&TopReplacement->v); // copy velocity
352 SecondOtherAtom->FixedIon = TopReplacement->FixedIon;
353 ThirdOtherAtom->v.CopyVector(&TopReplacement->v); // copy velocity
354 ThirdOtherAtom->FixedIon = TopReplacement->FixedIon;
355 FirstOtherAtom->father = NULL; // we are just an added hydrogen with no father
356 SecondOtherAtom->father = NULL; // we are just an added hydrogen with no father
357 ThirdOtherAtom->father = NULL; // we are just an added hydrogen with no father
358
359 // we need to vectors orthonormal the InBondVector
360 AllWentWell = AllWentWell && OrthoVector1.GetOneNormalVector(&InBondVector);
361// *out << Verbose(3) << "Orthovector1: ";
362// OrthoVector1.Output(out);
363// *out << endl;
364 AllWentWell = AllWentWell && OrthoVector2.MakeNormalVector(&InBondVector, &OrthoVector1);
365// *out << Verbose(3) << "Orthovector2: ";
366// OrthoVector2.Output(out);
367// *out << endl;
368
369 // create correct coordination for the three atoms
370 alpha = (TopOrigin->type->HBondAngle[2])/180.*M_PI/2.; // retrieve triple bond angle from database
371 l = BondRescale; // desired bond length
372 b = 2.*l*sin(alpha); // base length of isosceles triangle
373 d = l*sqrt(cos(alpha)*cos(alpha) - sin(alpha)*sin(alpha)/3.); // length for InBondvector
374 f = b/sqrt(3.); // length for OrthVector1
375 g = b/2.; // length for OrthVector2
376// *out << Verbose(3) << "Bond length and half-angle: " << l << ", " << alpha << "\t (b,d,f,g) = " << b << ", " << d << ", " << f << ", " << g << ", " << endl;
377// *out << Verbose(3) << "The three Bond lengths: " << sqrt(d*d+f*f) << ", " << sqrt(d*d+(-0.5*f)*(-0.5*f)+g*g) << ", " << sqrt(d*d+(-0.5*f)*(-0.5*f)+g*g) << endl;
378 factors[0] = d;
379 factors[1] = f;
380 factors[2] = 0.;
381 FirstOtherAtom->x.LinearCombinationOfVectors(&InBondVector, &OrthoVector1, &OrthoVector2, factors);
382 factors[1] = -0.5*f;
383 factors[2] = g;
384 SecondOtherAtom->x.LinearCombinationOfVectors(&InBondVector, &OrthoVector1, &OrthoVector2, factors);
385 factors[2] = -g;
386 ThirdOtherAtom->x.LinearCombinationOfVectors(&InBondVector, &OrthoVector1, &OrthoVector2, factors);
387
388 // rescale each to correct BondDistance
389// FirstOtherAtom->x.Scale(&BondRescale);
390// SecondOtherAtom->x.Scale(&BondRescale);
391// ThirdOtherAtom->x.Scale(&BondRescale);
392
393 // and relative to *origin atom
394 FirstOtherAtom->x.AddVector(&TopOrigin->x);
395 SecondOtherAtom->x.AddVector(&TopOrigin->x);
396 ThirdOtherAtom->x.AddVector(&TopOrigin->x);
397
398 // ... and add to molecule
399 AllWentWell = AllWentWell && AddAtom(FirstOtherAtom);
400 AllWentWell = AllWentWell && AddAtom(SecondOtherAtom);
401 AllWentWell = AllWentWell && AddAtom(ThirdOtherAtom);
402// *out << Verbose(4) << "Added " << *FirstOtherAtom << " at: ";
403// FirstOtherAtom->x.Output(out);
404// *out << endl;
405// *out << Verbose(4) << "Added " << *SecondOtherAtom << " at: ";
406// SecondOtherAtom->x.Output(out);
407// *out << endl;
408// *out << Verbose(4) << "Added " << *ThirdOtherAtom << " at: ";
409// ThirdOtherAtom->x.Output(out);
410// *out << endl;
411 Binder = AddBond(BottomOrigin, FirstOtherAtom, 1);
412 Binder->Cyclic = false;
413 Binder->Type = TreeEdge;
414 Binder = AddBond(BottomOrigin, SecondOtherAtom, 1);
415 Binder->Cyclic = false;
416 Binder->Type = TreeEdge;
417 Binder = AddBond(BottomOrigin, ThirdOtherAtom, 1);
418 Binder->Cyclic = false;
419 Binder->Type = TreeEdge;
420 break;
421 default:
422 cerr << "ERROR: BondDegree does not state single, double or triple bond!" << endl;
423 AllWentWell = false;
424 break;
425 }
426
427// *out << Verbose(3) << "End of AddHydrogenReplacementAtom." << endl;
428 return AllWentWell;
429};
430
431/** Adds given atom \a *pointer from molecule list.
432 * Increases molecule::last_atom and gives last number to added atom.
433 * \param filename name and path of xyz file
434 * \return true - succeeded, false - file not found
435 */
436bool molecule::AddXYZFile(string filename)
437{
438 istringstream *input = NULL;
439 int NumberOfAtoms = 0; // atom number in xyz read
440 int i, j; // loop variables
441 atom *first = NULL; // pointer to added atom
442 char shorthand[3]; // shorthand for atom name
443 ifstream xyzfile; // xyz file
444 string line; // currently parsed line
445 double x[3]; // atom coordinates
446
447 xyzfile.open(filename.c_str());
448 if (!xyzfile)
449 return false;
450
451 getline(xyzfile,line,'\n'); // Read numer of atoms in file
452 input = new istringstream(line);
453 *input >> NumberOfAtoms;
454 cout << Verbose(0) << "Parsing " << NumberOfAtoms << " atoms in file." << endl;
455 getline(xyzfile,line,'\n'); // Read comment
456 cout << Verbose(1) << "Comment: " << line << endl;
457
458 for(i=0;i<NumberOfAtoms;i++){
459 first = new atom;
460 getline(xyzfile,line,'\n');
461 istringstream *item = new istringstream(line);
462 //istringstream input(line);
463 //cout << Verbose(1) << "Reading: " << line << endl;
464 *item >> shorthand;
465 *item >> x[0];
466 *item >> x[1];
467 *item >> x[2];
468 first->type = elemente->FindElement(shorthand);
469 if (first->type == NULL) {
470 cerr << "Could not parse the element at line: '" << line << "', setting to H.";
471 first->type = elemente->FindElement(1);
472 }
473 for(j=0;j<NDIM;j++)
474 first->x.x[j] = x[j];
475 AddAtom(first); // add to molecule
476 delete(item);
477 }
478 xyzfile.close();
479 delete(input);
480 return true;
481};
482
483/** Creates a copy of this molecule.
484 * \return copy of molecule
485 */
486molecule *molecule::CopyMolecule()
487{
488 molecule *copy = new molecule(elemente);
489 atom *CurrentAtom = NULL;
490 atom *LeftAtom = NULL, *RightAtom = NULL;
491 atom *Walker = NULL;
492
493 // copy all atoms
494 Walker = start;
495 while(Walker->next != end) {
496 Walker = Walker->next;
497 CurrentAtom = copy->AddCopyAtom(Walker);
498 }
499
500 // copy all bonds
501 bond *Binder = first;
502 bond *NewBond = NULL;
503 while(Binder->next != last) {
504 Binder = Binder->next;
505 // get the pendant atoms of current bond in the copy molecule
506 LeftAtom = copy->start;
507 while (LeftAtom->next != copy->end) {
508 LeftAtom = LeftAtom->next;
509 if (LeftAtom->father == Binder->leftatom)
510 break;
511 }
512 RightAtom = copy->start;
513 while (RightAtom->next != copy->end) {
514 RightAtom = RightAtom->next;
515 if (RightAtom->father == Binder->rightatom)
516 break;
517 }
518 NewBond = copy->AddBond(LeftAtom, RightAtom, Binder->BondDegree);
519 NewBond->Cyclic = Binder->Cyclic;
520 if (Binder->Cyclic)
521 copy->NoCyclicBonds++;
522 NewBond->Type = Binder->Type;
523 }
524 // correct fathers
525 Walker = copy->start;
526 while(Walker->next != copy->end) {
527 Walker = Walker->next;
528 if (Walker->father->father == Walker->father) // same atom in copy's father points to itself
529 Walker->father = Walker; // set father to itself (copy of a whole molecule)
530 else
531 Walker->father = Walker->father->father; // set father to original's father
532 }
533 // copy values
534 copy->CountAtoms((ofstream *)&cout);
535 copy->CountElements();
536 if (first->next != last) { // if adjaceny list is present
537 copy->BondDistance = BondDistance;
538 copy->CreateListOfBondsPerAtom((ofstream *)&cout);
539 }
540
541 return copy;
542};
543
544/** Adds a bond to a the molecule specified by two atoms, \a *first and \a *second.
545 * Also updates molecule::BondCount and molecule::NoNonBonds.
546 * \param *first first atom in bond
547 * \param *second atom in bond
548 * \return pointer to bond or NULL on failure
549 */
550bond * molecule::AddBond(atom *atom1, atom *atom2, int degree=1)
551{
552 bond *Binder = NULL;
553 if ((atom1 != NULL) && (FindAtom(atom1->nr) != NULL) && (atom2 != NULL) && (FindAtom(atom2->nr) != NULL)) {
554 Binder = new bond(atom1, atom2, degree, BondCount++);
555 if ((atom1->type != NULL) && (atom1->type->Z != 1) && (atom2->type != NULL) && (atom2->type->Z != 1))
556 NoNonBonds++;
557 add(Binder, last);
558 } else {
559 cerr << Verbose(1) << "ERROR: Could not add bond between " << atom1->Name << " and " << atom2->Name << " as one or both are not present in the molecule." << endl;
560 }
561 return Binder;
562};
563
564/** Remove bond from bond chain list.
565 * \todo Function not implemented yet
566 * \param *pointer bond pointer
567 * \return true - bound found and removed, false - bond not found/removed
568 */
569bool molecule::RemoveBond(bond *pointer)
570{
571 //cerr << Verbose(1) << "molecule::RemoveBond: Function not implemented yet." << endl;
572 removewithoutcheck(pointer);
573 return true;
574};
575
576/** Remove every bond from bond chain list that atom \a *BondPartner is a constituent of.
577 * \todo Function not implemented yet
578 * \param *BondPartner atom to be removed
579 * \return true - bounds found and removed, false - bonds not found/removed
580 */
581bool molecule::RemoveBonds(atom *BondPartner)
582{
583 cerr << Verbose(1) << "molecule::RemoveBond: Function not implemented yet." << endl;
584 return false;
585};
586
587/** Sets the molecule::cell_size to the components of \a *dim (rectangular box)
588 * \param *dim vector class
589 */
590void molecule::SetBoxDimension(vector *dim)
591{
592 cell_size[0] = dim->x[0];
593 cell_size[1] = 0.;
594 cell_size[2] = dim->x[1];
595 cell_size[3] = 0.;
596 cell_size[4] = 0.;
597 cell_size[5] = dim->x[2];
598};
599
600/** Centers the molecule in the box whose lengths are defined by vector \a *BoxLengths.
601 * \param *out output stream for debugging
602 * \param *BoxLengths box lengths
603 */
604bool molecule::CenterInBox(ofstream *out, vector *BoxLengths)
605{
606 bool status = true;
607 atom *ptr = NULL;
608 vector *min = new vector;
609 vector *max = new vector;
610
611 // gather min and max for each axis
612 ptr = start->next; // start at first in list
613 if (ptr != end) { //list not empty?
614 for (int i=0;i<NDIM;i++) {
615 max->x[i] = ptr->x.x[i];
616 min->x[i] = ptr->x.x[i];
617 }
618 while (ptr->next != end) { // continue with second if present
619 ptr = ptr->next;
620 //ptr->Output(1,1,out);
621 for (int i=0;i<NDIM;i++) {
622 max->x[i] = (max->x[i] < ptr->x.x[i]) ? ptr->x.x[i] : max->x[i];
623 min->x[i] = (min->x[i] > ptr->x.x[i]) ? ptr->x.x[i] : min->x[i];
624 }
625 }
626 }
627 // sanity check
628 for(int i=0;i<NDIM;i++) {
629 if (max->x[i] - min->x[i] > BoxLengths->x[i])
630 status = false;
631 }
632 // warn if check failed
633 if (!status)
634 *out << "WARNING: molecule is bigger than defined box!" << endl;
635 else { // else center in box
636 ptr = start;
637 while (ptr->next != end) {
638 ptr = ptr->next;
639 for (int i=0;i<NDIM;i++)
640 ptr->x.x[i] += -(max->x[i] + min->x[i])/2. + BoxLengths->x[i]/2.; // first term centers molecule at (0,0,0), second shifts to center of new box
641 }
642 }
643
644 // free and exit
645 delete(min);
646 delete(max);
647 return status;
648};
649
650/** Centers the edge of the atoms at (0,0,0).
651 * \param *out output stream for debugging
652 * \param *max coordinates of other edge, specifying box dimensions.
653 */
654void molecule::CenterEdge(ofstream *out, vector *max)
655{
656 vector *min = new vector;
657
658// *out << Verbose(3) << "Begin of CenterEdge." << endl;
659 atom *ptr = start->next; // start at first in list
660 if (ptr != end) { //list not empty?
661 for (int i=0;i<NDIM;i++) {
662 max->x[i] = ptr->x.x[i];
663 min->x[i] = ptr->x.x[i];
664 }
665 while (ptr->next != end) { // continue with second if present
666 ptr = ptr->next;
667 //ptr->Output(1,1,out);
668 for (int i=0;i<NDIM;i++) {
669 max->x[i] = (max->x[i] < ptr->x.x[i]) ? ptr->x.x[i] : max->x[i];
670 min->x[i] = (min->x[i] > ptr->x.x[i]) ? ptr->x.x[i] : min->x[i];
671 }
672 }
673// *out << Verbose(4) << "Maximum is ";
674// max->Output(out);
675// *out << ", Minimum is ";
676// min->Output(out);
677// *out << endl;
678
679 for (int i=0;i<NDIM;i++) {
680 min->x[i] *= -1.;
681 max->x[i] += min->x[i];
682 }
683 Translate(min);
684 }
685 delete(min);
686// *out << Verbose(3) << "End of CenterEdge." << endl;
687};
688
689/** Centers the center of the atoms at (0,0,0).
690 * \param *out output stream for debugging
691 * \param *center return vector for translation vector
692 */
693void molecule::CenterOrigin(ofstream *out, vector *center)
694{
695 int Num = 0;
696 atom *ptr = start->next; // start at first in list
697
698 for(int i=0;i<NDIM;i++) // zero center vector
699 center->x[i] = 0.;
700
701 if (ptr != end) { //list not empty?
702 while (ptr->next != end) { // continue with second if present
703 ptr = ptr->next;
704 Num++;
705 center->AddVector(&ptr->x);
706 }
707 center->Scale(-1./Num); // divide through total number (and sign for direction)
708 Translate(center);
709 }
710};
711
712/** Centers the center of gravity of the atoms at (0,0,0).
713 * \param *out output stream for debugging
714 * \param *center return vector for translation vector
715 */
716void molecule::CenterGravity(ofstream *out, vector *center)
717{
718 double Num = 0;
719 atom *ptr = start->next; // start at first in list
720 vector tmp;
721
722 for(int i=0;i<NDIM;i++) // zero center vector
723 center->x[i] = 0.;
724
725 if (ptr != end) { //list not empty?
726 while (ptr->next != end) { // continue with second if present
727 ptr = ptr->next;
728 Num += ptr->type->mass;
729 tmp.CopyVector(&ptr->x);
730 tmp.Scale(ptr->type->mass); // scale by mass
731 center->AddVector(&tmp);
732 }
733 center->Scale(-1./Num); // divide through total mass (and sign for direction)
734 Translate(center);
735 }
736};
737
738/** Scales all atoms by \a *factor.
739 * \param *factor pointer to scaling factor
740 */
741void molecule::Scale(double **factor)
742{
743 atom *ptr = start;
744
745 while (ptr->next != end) {
746 ptr = ptr->next;
747 ptr->x.Scale(factor);
748 }
749};
750
751/** Translate all atoms by given vector.
752 * \param trans[] translation vector.
753 */
754void molecule::Translate(const vector *trans)
755{
756 atom *ptr = start;
757
758 while (ptr->next != end) {
759 ptr = ptr->next;
760 ptr->x.Translate(trans);
761 }
762};
763
764/** Mirrors all atoms against a given plane.
765 * \param n[] normal vector of mirror plane.
766 */
767void molecule::Mirror(const vector *n)
768{
769 atom *ptr = start;
770
771 while (ptr->next != end) {
772 ptr = ptr->next;
773 ptr->x.Mirror(n);
774 }
775};
776
777/** Determines center of gravity (yet not considering atom masses).
778 * \param CenterOfGravity reference to return vector
779 */
780void molecule::DetermineCenterOfGravity(vector &CenterOfGravity)
781{
782 atom *Walker = start;
783 bond *Binder = NULL;
784 double *matrix = ReturnFullMatrixforSymmetric(cell_size);
785 double tmp;
786 bool flag;
787 vector TestVector, TranslationVector;
788
789 do {
790 CenterOfGravity.Zero();
791 flag = true;
792 while (Walker->next != end) {
793 Walker = Walker->next;
794#ifdef ADDHYDROGEN
795 if (Walker->type->Z != 1) {
796#endif
797 TestVector.CopyVector(&Walker->x);
798 TestVector.InverseMatrixMultiplication(matrix);
799 TranslationVector.Zero();
800 for (int i=0;i<NumberOfBondsPerAtom[Walker->nr]; i++) {
801 Binder = ListOfBondsPerAtom[Walker->nr][i];
802 if (Walker->nr < Binder->GetOtherAtom(Walker)->nr) // otherwise we shift one to, the other fro and gain nothing
803 for (int j=0;j<NDIM;j++) {
804 tmp = Walker->x.x[j] - Binder->GetOtherAtom(Walker)->x.x[j];
805 if ((fabs(tmp)) > BondDistance) {
806 flag = false;
807 cout << Verbose(0) << "Hit: atom " << Walker->Name << " in bond " << *Binder << " has to be shifted due to " << tmp << "." << endl;
808 if (tmp > 0)
809 TranslationVector.x[j] -= 1.;
810 else
811 TranslationVector.x[j] += 1.;
812 }
813 }
814 }
815 TestVector.AddVector(&TranslationVector);
816 TestVector.MatrixMultiplication(matrix);
817 CenterOfGravity.AddVector(&TestVector);
818 cout << Verbose(1) << "Vector is: ";
819 TestVector.Output((ofstream *)&cout);
820 cout << endl;
821#ifdef ADDHYDROGEN
822 // now also change all hydrogens
823 for (int i=0;i<NumberOfBondsPerAtom[Walker->nr]; i++) {
824 Binder = ListOfBondsPerAtom[Walker->nr][i];
825 if (Binder->GetOtherAtom(Walker)->type->Z == 1) {
826 TestVector.CopyVector(&Binder->GetOtherAtom(Walker)->x);
827 TestVector.InverseMatrixMultiplication(matrix);
828 TestVector.AddVector(&TranslationVector);
829 TestVector.MatrixMultiplication(matrix);
830 CenterOfGravity.AddVector(&TestVector);
831 cout << Verbose(1) << "Hydrogen Vector is: ";
832 TestVector.Output((ofstream *)&cout);
833 cout << endl;
834 }
835 }
836 }
837#endif
838 }
839 } while (!flag);
840 Free((void **)&matrix, "molecule::DetermineCenterOfGravity: *matrix");
841 CenterOfGravity.Scale(1./(double)AtomCount);
842};
843
844/** Align all atoms in such a manner that given vector \a *n is along z axis.
845 * \param n[] alignment vector.
846 */
847void molecule::Align(vector *n)
848{
849 atom *ptr = start;
850 double alpha, tmp;
851 vector z_axis;
852 z_axis.x[0] = 0.;
853 z_axis.x[1] = 0.;
854 z_axis.x[2] = 1.;
855
856 // rotate on z-x plane
857 cout << Verbose(0) << "Begin of Aligning all atoms." << endl;
858 alpha = atan(-n->x[0]/n->x[2]);
859 cout << Verbose(1) << "Z-X-angle: " << alpha << " ... ";
860 while (ptr->next != end) {
861 ptr = ptr->next;
862 tmp = ptr->x.x[0];
863 ptr->x.x[0] = cos(alpha) * tmp + sin(alpha) * ptr->x.x[2];
864 ptr->x.x[2] = -sin(alpha) * tmp + cos(alpha) * ptr->x.x[2];
865 }
866 // rotate n vector
867 tmp = n->x[0];
868 n->x[0] = cos(alpha) * tmp + sin(alpha) * n->x[2];
869 n->x[2] = -sin(alpha) * tmp + cos(alpha) * n->x[2];
870 cout << Verbose(1) << "alignment vector after first rotation: ";
871 n->Output((ofstream *)&cout);
872 cout << endl;
873
874 // rotate on z-y plane
875 ptr = start;
876 alpha = atan(-n->x[1]/n->x[2]);
877 cout << Verbose(1) << "Z-Y-angle: " << alpha << " ... ";
878 while (ptr->next != end) {
879 ptr = ptr->next;
880 tmp = ptr->x.x[1];
881 ptr->x.x[1] = cos(alpha) * tmp + sin(alpha) * ptr->x.x[2];
882 ptr->x.x[2] = -sin(alpha) * tmp + cos(alpha) * ptr->x.x[2];
883 }
884 // rotate n vector (for consistency check)
885 tmp = n->x[1];
886 n->x[1] = cos(alpha) * tmp + sin(alpha) * n->x[2];
887 n->x[2] = -sin(alpha) * tmp + cos(alpha) * n->x[2];
888
889 cout << Verbose(1) << "alignment vector after second rotation: ";
890 n->Output((ofstream *)&cout);
891 cout << Verbose(1) << endl;
892 cout << Verbose(0) << "End of Aligning all atoms." << endl;
893};
894
895/** Removes atom from molecule list.
896 * \param *pointer atom to be removed
897 * \return true - succeeded, false - atom not found in list
898 */
899bool molecule::RemoveAtom(atom *pointer)
900{
901 if (ElementsInMolecule[pointer->type->Z] != 0) // this would indicate an error
902 ElementsInMolecule[pointer->type->Z]--; // decrease number of atom of this element
903 else
904 cerr << "ERROR: Atom " << pointer->Name << " is of element " << pointer->type->Z << " but the entry in the table of the molecule is 0!" << endl;
905 if (ElementsInMolecule[pointer->type->Z] == 0) // was last atom of this element?
906 ElementCount--;
907 return remove(pointer, start, end);
908};
909
910/** Removes every atom from molecule list.
911 * \return true - succeeded, false - atom not found in list
912 */
913bool molecule::CleanupMolecule()
914{
915 return (cleanup(start,end) && cleanup(first,last));
916};
917
918/** Finds an atom specified by its continuous number.
919 * \param Nr number of atom withim molecule
920 * \return pointer to atom or NULL
921 */
922atom * molecule::FindAtom(int Nr) const{
923 atom * walker = find(&Nr, start,end);
924 if (walker != NULL) {
925 //cout << Verbose(0) << "Found Atom Nr. " << walker->nr << endl;
926 return walker;
927 } else {
928 cout << Verbose(0) << "Atom not found in list." << endl;
929 return NULL;
930 }
931};
932
933/** Asks for atom number, and checks whether in list.
934 * \param *text question before entering
935 */
936atom * molecule::AskAtom(string text)
937{
938 int No;
939 atom *ion = NULL;
940 do {
941 //cout << Verbose(0) << "============Atom list==========================" << endl;
942 //mol->Output((ofstream *)&cout);
943 //cout << Verbose(0) << "===============================================" << endl;
944 cout << Verbose(0) << text;
945 cin >> No;
946 ion = this->FindAtom(No);
947 } while (ion == NULL);
948 return ion;
949};
950
951/** Checks if given coordinates are within cell volume.
952 * \param *x array of coordinates
953 * \return true - is within, false - out of cell
954 */
955bool molecule::CheckBounds(const vector *x) const
956{
957 bool result = true;
958 int j =-1;
959 for (int i=0;i<3;i++) {
960 j += i+1;
961 result = result && ((x->x[i] >= 0) && (x->x[i] < cell_size[j]));
962 }
963 //return result;
964 return true; /// probably not gonna use the check no more
965};
966
967/** Calculates sum over least square distance to line hidden in \a *x.
968 * \param *x offset and direction vector
969 * \param *params pointer to lsq_params structure
970 * \return \f$ sum_i^N | y_i - (a + t_i b)|^2\f$
971 */
972double LeastSquareDistance (const gsl_vector * x, void * params)
973{
974 double res = 0, t;
975 vector a,b,c,d;
976 struct lsq_params *par = (struct lsq_params *)params;
977 atom *ptr = par->mol->start;
978
979 // initialize vectors
980 a.x[0] = gsl_vector_get(x,0);
981 a.x[1] = gsl_vector_get(x,1);
982 a.x[2] = gsl_vector_get(x,2);
983 b.x[0] = gsl_vector_get(x,3);
984 b.x[1] = gsl_vector_get(x,4);
985 b.x[2] = gsl_vector_get(x,5);
986 // go through all atoms
987 while (ptr != par->mol->end) {
988 ptr = ptr->next;
989 if (ptr->type == ((struct lsq_params *)params)->type) { // for specific type
990 c.CopyVector(&ptr->x); // copy vector to temporary one
991 c.SubtractVector(&a); // subtract offset vector
992 t = c.ScalarProduct(&b); // get direction parameter
993 d.CopyVector(&b); // and create vector
994 d.Scale(&t);
995 c.SubtractVector(&d); // ... yielding distance vector
996 res += d.ScalarProduct((const vector *)&d); // add squared distance
997 }
998 }
999 return res;
1000};
1001
1002/** By minimizing the least square distance gains alignment vector.
1003 * \bug this is not yet working properly it seems
1004 */
1005void molecule::GetAlignVector(struct lsq_params * par) const
1006{
1007 int np = 6;
1008
1009 const gsl_multimin_fminimizer_type *T =
1010 gsl_multimin_fminimizer_nmsimplex;
1011 gsl_multimin_fminimizer *s = NULL;
1012 gsl_vector *ss;
1013 gsl_multimin_function minex_func;
1014
1015 size_t iter = 0, i;
1016 int status;
1017 double size;
1018
1019 /* Initial vertex size vector */
1020 ss = gsl_vector_alloc (np);
1021
1022 /* Set all step sizes to 1 */
1023 gsl_vector_set_all (ss, 1.0);
1024
1025 /* Starting point */
1026 par->x = gsl_vector_alloc (np);
1027 par->mol = this;
1028
1029 gsl_vector_set (par->x, 0, 0.0); // offset
1030 gsl_vector_set (par->x, 1, 0.0);
1031 gsl_vector_set (par->x, 2, 0.0);
1032 gsl_vector_set (par->x, 3, 0.0); // direction
1033 gsl_vector_set (par->x, 4, 0.0);
1034 gsl_vector_set (par->x, 5, 1.0);
1035
1036 /* Initialize method and iterate */
1037 minex_func.f = &LeastSquareDistance;
1038 minex_func.n = np;
1039 minex_func.params = (void *)par;
1040
1041 s = gsl_multimin_fminimizer_alloc (T, np);
1042 gsl_multimin_fminimizer_set (s, &minex_func, par->x, ss);
1043
1044 do
1045 {
1046 iter++;
1047 status = gsl_multimin_fminimizer_iterate(s);
1048
1049 if (status)
1050 break;
1051
1052 size = gsl_multimin_fminimizer_size (s);
1053 status = gsl_multimin_test_size (size, 1e-2);
1054
1055 if (status == GSL_SUCCESS)
1056 {
1057 printf ("converged to minimum at\n");
1058 }
1059
1060 printf ("%5d ", (int)iter);
1061 for (i = 0; i < (size_t)np; i++)
1062 {
1063 printf ("%10.3e ", gsl_vector_get (s->x, i));
1064 }
1065 printf ("f() = %7.3f size = %.3f\n", s->fval, size);
1066 }
1067 while (status == GSL_CONTINUE && iter < 100);
1068
1069 for (i=0;i<(size_t)np;i++)
1070 gsl_vector_set(par->x, i, gsl_vector_get(s->x, i));
1071 //gsl_vector_free(par->x);
1072 gsl_vector_free(ss);
1073 gsl_multimin_fminimizer_free (s);
1074};
1075
1076/** Prints molecule to *out.
1077 * \param *out output stream
1078 */
1079bool molecule::Output(ofstream *out)
1080{
1081 element *runner = elemente->start;
1082 atom *walker = NULL;
1083 int ElementNo, AtomNo;
1084 CountElements();
1085
1086 if (out == NULL) {
1087 return false;
1088 } else {
1089 *out << "#Ion_TypeNr._Nr.R[0] R[1] R[2] MoveType (0 MoveIon, 1 FixedIon)" << endl;
1090 ElementNo = 0;
1091 while (runner->next != elemente->end) { // go through every element
1092 runner = runner->next;
1093 if (ElementsInMolecule[runner->Z]) { // if this element got atoms
1094 ElementNo++;
1095 AtomNo = 0;
1096 walker = start;
1097 while (walker->next != end) { // go through every atom of this element
1098 walker = walker->next;
1099 if (walker->type == runner) { // if this atom fits to element
1100 AtomNo++;
1101 walker->Output(ElementNo, AtomNo, out);
1102 }
1103 }
1104 }
1105 }
1106 return true;
1107 }
1108};
1109
1110/** Outputs contents of molecule::ListOfBondsPerAtom.
1111 * \param *out output stream
1112 */
1113void molecule::OutputListOfBonds(ofstream *out) const
1114{
1115 *out << Verbose(2) << endl << "From Contents of ListOfBondsPerAtom, all non-hydrogen atoms:" << endl;
1116 atom *Walker = start;
1117 while (Walker->next != end) {
1118 Walker = Walker->next;
1119#ifdef ADDHYDROGEN
1120 if (Walker->type->Z != 1) { // regard only non-hydrogen
1121#endif
1122 *out << Verbose(2) << "Atom " << Walker->Name << " has Bonds: "<<endl;
1123 for(int j=0;j<NumberOfBondsPerAtom[Walker->nr];j++) {
1124 *out << Verbose(3) << *(ListOfBondsPerAtom)[Walker->nr][j] << endl;
1125 }
1126#ifdef ADDHYDROGEN
1127 }
1128#endif
1129 }
1130 *out << endl;
1131};
1132
1133/** Output of element before the actual coordination list.
1134 * \param *out stream pointer
1135 */
1136bool molecule::Checkout(ofstream *out) const
1137{
1138 return elemente->Checkout(out, ElementsInMolecule);
1139};
1140
1141/** Prints molecule to *out as xyz file.
1142 * \param *out output stream
1143 */
1144bool molecule::OutputXYZ(ofstream *out) const
1145{
1146 atom *walker = NULL;
1147 int No = 0;
1148 time_t now;
1149
1150 now = time((time_t *)NULL); // Get the system time and put it into 'now' as 'calender time'
1151 walker = start;
1152 while (walker->next != end) { // go through every atom and count
1153 walker = walker->next;
1154 No++;
1155 }
1156 if (out != NULL) {
1157 *out << No << "\n\tCreated by molecuilder on " << ctime(&now);
1158 walker = start;
1159 while (walker->next != end) { // go through every atom of this element
1160 walker = walker->next;
1161 walker->OutputXYZLine(out);
1162 }
1163 return true;
1164 } else
1165 return false;
1166};
1167
1168/** Brings molecule::AtomCount and atom::*Name up-to-date.
1169 * \param *out output stream for debugging
1170 */
1171void molecule::CountAtoms(ofstream *out)
1172{
1173 int i = 0;
1174 atom *Walker = start;
1175 while (Walker->next != end) {
1176 Walker = Walker->next;
1177 i++;
1178 }
1179 if ((AtomCount == 0) || (i != AtomCount)) {
1180 *out << Verbose(3) << "Mismatch in AtomCount " << AtomCount << " and recounted number " << i << ", renaming all." << endl;
1181 AtomCount = i;
1182
1183 // count NonHydrogen atoms and give each atom a unique name
1184 if (AtomCount != 0) {
1185 i=0;
1186 NoNonHydrogen = 0;
1187 Walker = start;
1188 while (Walker->next != end) {
1189 Walker = Walker->next;
1190 Walker->nr = i; // update number in molecule (for easier referencing in FragmentMolecule lateron)
1191 if (Walker->type->Z != 1) // count non-hydrogen atoms whilst at it
1192 NoNonHydrogen++;
1193 Free((void **)&Walker->Name, "molecule::CountAtoms: *walker->Name");
1194 Walker->Name = (char *) Malloc(sizeof(char)*6, "molecule::CountAtoms: *walker->Name");
1195 sprintf(Walker->Name, "%2s%02d", Walker->type->symbol, Walker->nr+1);
1196 *out << "Naming atom nr. " << Walker->nr << " " << Walker->Name << "." << endl;
1197 i++;
1198 }
1199 } else
1200 *out << Verbose(3) << "AtomCount is still " << AtomCount << ", thus counting nothing." << endl;
1201 }
1202};
1203
1204/** Brings molecule::ElementCount and molecule::ElementsInMolecule up-to-date.
1205 */
1206void molecule::CountElements()
1207{
1208 int i = 0;
1209 for(i=0;i<MAX_ELEMENTS;i++)
1210 ElementsInMolecule[i] = 0;
1211 ElementCount = 0;
1212
1213 atom *walker = start;
1214 while (walker->next != end) {
1215 walker = walker->next;
1216 ElementsInMolecule[walker->type->Z]++;
1217 i++;
1218 }
1219 for(i=0;i<MAX_ELEMENTS;i++)
1220 ElementCount += (ElementsInMolecule[i] != 0 ? 1 : 0);
1221};
1222
1223/** Counts all cyclic bonds and returns their number.
1224 * \note Hydrogen bonds can never by cyclic, thus no check for that
1225 * \param *out output stream for debugging
1226 * \return number opf cyclic bonds
1227 */
1228int molecule::CountCyclicBonds(ofstream *out)
1229{
1230 int No = 0;
1231 int *MinimumRingSize = NULL;
1232 MoleculeLeafClass *Subgraphs = NULL;
1233 bond *Binder = first;
1234 if ((Binder->next != last) && (Binder->next->Type == Undetermined)) {
1235 *out << Verbose(0) << "No Depth-First-Search analysis performed so far, calling ..." << endl;
1236 Subgraphs = DepthFirstSearchAnalysis(out, false, MinimumRingSize);
1237 while (Subgraphs->next != NULL) {
1238 Subgraphs = Subgraphs->next;
1239 delete(Subgraphs->previous);
1240 }
1241 delete(Subgraphs);
1242 delete[](MinimumRingSize);
1243 }
1244 while(Binder->next != last) {
1245 Binder = Binder->next;
1246 if (Binder->Cyclic)
1247 No++;
1248 }
1249 return No;
1250};
1251
1252/** Returns Shading as a char string.
1253 * \param color the Shading
1254 * \return string of the flag
1255 */
1256string molecule::GetColor(enum Shading color)
1257{
1258 switch(color) {
1259 case white:
1260 return "white";
1261 break;
1262 case lightgray:
1263 return "lightgray";
1264 break;
1265 case darkgray:
1266 return "darkgray";
1267 break;
1268 case black:
1269 return "black";
1270 break;
1271 default:
1272 return "uncolored";
1273 break;
1274 };
1275};
1276
1277
1278/** Counts necessary number of valence electrons and returns number and SpinType.
1279 * \param configuration containing everything
1280 */
1281void molecule::CalculateOrbitals(class config &configuration)
1282{
1283 configuration.MaxPsiDouble = configuration.PsiMaxNoDown = configuration.PsiMaxNoUp = configuration.PsiType = 0;
1284 for(int i=0;i<MAX_ELEMENTS;i++) {
1285 if (ElementsInMolecule[i] != 0) {
1286 //cout << "CalculateOrbitals: " << elemente->FindElement(i)->name << " has a valence of " << (int)elemente->FindElement(i)->Valence << " and there are " << ElementsInMolecule[i] << " of it." << endl;
1287 configuration.MaxPsiDouble += ElementsInMolecule[i]*((int)elemente->FindElement(i)->Valence);
1288 }
1289 }
1290 configuration.PsiMaxNoDown = configuration.MaxPsiDouble/2 + (configuration.MaxPsiDouble % 2);
1291 configuration.PsiMaxNoUp = configuration.MaxPsiDouble/2;
1292 configuration.MaxPsiDouble /= 2;
1293 configuration.PsiType = (configuration.PsiMaxNoDown == configuration.PsiMaxNoUp) ? 0 : 1;
1294 if ((configuration.PsiType == 1) && (configuration.ProcPEPsi < 2)) {
1295 configuration.ProcPEGamma /= 2;
1296 configuration.ProcPEPsi *= 2;
1297 } else {
1298 configuration.ProcPEGamma *= configuration.ProcPEPsi;
1299 configuration.ProcPEPsi = 1;
1300 }
1301 configuration.InitMaxMinStopStep = configuration.MaxMinStopStep = configuration.MaxPsiDouble;
1302};
1303
1304/** Creates an adjacency list of the molecule.
1305 * Generally, we use the CSD approach to bond recognition, that is the the distance
1306 * between two atoms A and B must be within [Rcov(A)+Rcov(B)-t,Rcov(A)+Rcov(B)+t] with
1307 * a threshold t = 0.4 Angstroem.
1308 * To make it O(N log N) the function uses the linked-cell technique as follows:
1309 * The procedure is step-wise:
1310 * -# Remove every bond in list
1311 * -# Count the atoms in the molecule with CountAtoms()
1312 * -# partition cell into smaller linked cells of size \a bonddistance
1313 * -# put each atom into its corresponding cell
1314 * -# go through every cell, check the atoms therein against all possible bond partners in the 27 adjacent cells, add bond if true
1315 * -# create the list of bonds via CreateListOfBondsPerAtom()
1316 * -# correct the bond degree iteratively (single->double->triple bond)
1317 * -# finally print the bond list to \a *out if desired
1318 * \param *out out stream for printing the matrix, NULL if no output
1319 * \param bonddistance length of linked cells (i.e. maximum minimal length checked)
1320 */
1321void molecule::CreateAdjacencyList(ofstream *out, double bonddistance)
1322{
1323 atom *Walker = NULL, *OtherWalker = NULL;
1324 int No, NoBonds;
1325 int NumberCells, divisor[NDIM], n[NDIM], N[NDIM], index, Index, j;
1326 molecule **CellList;
1327 double distance, MinDistance, MaxDistance;
1328 double *matrix = ReturnFullMatrixforSymmetric(cell_size);
1329 vector x;
1330
1331 BondDistance = bonddistance;
1332 *out << Verbose(0) << "Begin of CreateAdjacencyList." << endl;
1333 // remove every bond from the list
1334 if ((first->next != last) && (last->previous != first)) { // there are bonds present
1335 cleanup(first,last);
1336 }
1337
1338 // count atoms in molecule = dimension of matrix (also give each unique name and continuous numbering)
1339 CountAtoms(out);
1340 *out << Verbose(1) << "AtomCount " << AtomCount << "." << endl;
1341
1342 if (AtomCount != 0) {
1343 // 1. find divisor for each axis, such that a sphere with radius of at least bonddistance can be placed into each cell
1344 j=-1;
1345 for (int i=0;i<NDIM;i++) {
1346 j += i+1;
1347 divisor[i] = (int)floor(cell_size[j]/bonddistance); // take smaller value such that size of linked cell is at least bonddistance
1348 *out << Verbose(1) << "divisor[" << i << "] = " << divisor[i] << "." << endl;
1349 }
1350 // 2a. allocate memory for the cell list
1351 NumberCells = divisor[0]*divisor[1]*divisor[2];
1352 *out << Verbose(1) << "Allocating " << NumberCells << " cells." << endl;
1353 CellList = (molecule **) Malloc(sizeof(molecule *)*NumberCells, "molecule::CreateAdjacencyList - ** CellList");
1354 for (int i=0;i<NumberCells;i++)
1355 CellList[i] = NULL;
1356
1357 // 2b. put all atoms into its corresponding list
1358 Walker = start;
1359 while(Walker->next != end) {
1360 Walker = Walker->next;
1361 //*out << Verbose(1) << "Current atom is " << *Walker << " with coordinates ";
1362 //Walker->x.Output(out);
1363 //*out << "." << endl;
1364 // compute the cell by the atom's coordinates
1365 j=-1;
1366 for (int i=0;i<NDIM;i++) {
1367 j += i+1;
1368 x.CopyVector(&(Walker->x));
1369 x.KeepPeriodic(out, matrix);
1370 n[i] = (int)floor(x.x[i]/cell_size[j]*(double)divisor[i]);
1371 }
1372 index = n[2] + (n[1] + n[0] * divisor[1]) * divisor[2];
1373 *out << Verbose(1) << "Atom " << *Walker << " goes into cell number [" << n[0] << "," << n[1] << "," << n[2] << "] = " << index << "." << endl;
1374 // add copy atom to this cell
1375 if (CellList[index] == NULL) // allocate molecule if not done
1376 CellList[index] = new molecule(elemente);
1377 OtherWalker = CellList[index]->AddCopyAtom(Walker); // add a copy of walker to this atom, father will be walker for later reference
1378 //*out << Verbose(1) << "Copy Atom is " << *OtherWalker << "." << endl;
1379 }
1380 //for (int i=0;i<NumberCells;i++)
1381 //*out << Verbose(1) << "Cell number " << i << ": " << CellList[i] << "." << endl;
1382
1383 // 3a. go through every cell
1384 for (N[0]=0;N[0]<divisor[0];N[0]++)
1385 for (N[1]=0;N[1]<divisor[1];N[1]++)
1386 for (N[2]=0;N[2]<divisor[2];N[2]++) {
1387 Index = N[2] + (N[1] + N[0] * divisor[1]) * divisor[2];
1388 if (CellList[Index] != NULL) { // if there atoms in this cell
1389 //*out << Verbose(1) << "Current cell is " << Index << "." << endl;
1390 // 3b. for every atom therein
1391 Walker = CellList[Index]->start;
1392 while (Walker->next != CellList[Index]->end) { // go through every atom
1393 Walker = Walker->next;
1394 //*out << Verbose(0) << "Current Atom is " << *Walker << "." << endl;
1395 // 3c. check for possible bond between each atom in this and every one in the 27 cells
1396 for (n[0]=-1;n[0]<=1;n[0]++)
1397 for (n[1]=-1;n[1]<=1;n[1]++)
1398 for (n[2]=-1;n[2]<=1;n[2]++) {
1399 // compute the index of this comparison cell and make it periodic
1400 index = ((N[2]+n[2]+divisor[2])%divisor[2]) + (((N[1]+n[1]+divisor[1])%divisor[1]) + ((N[0]+n[0]+divisor[0])%divisor[0]) * divisor[1]) * divisor[2];
1401 //*out << Verbose(1) << "Number of comparison cell is " << index << "." << endl;
1402 if (CellList[index] != NULL) { // if there are any atoms in this cell
1403 OtherWalker = CellList[index]->start;
1404 while(OtherWalker->next != CellList[index]->end) { // go through every atom in this cell
1405 OtherWalker = OtherWalker->next;
1406 //*out << Verbose(0) << "Current comparison atom is " << *OtherWalker << "." << endl;
1407 /// \todo periodic check is missing here!
1408 //*out << Verbose(1) << "Checking distance " << OtherWalker->x.PeriodicDistance(&(Walker->x), cell_size) << " against typical bond length of " << bonddistance*bonddistance << "." << endl;
1409 MinDistance = OtherWalker->type->CovalentRadius + Walker->type->CovalentRadius;
1410 MaxDistance = MinDistance + BONDTHRESHOLD;
1411 MinDistance -= BONDTHRESHOLD;
1412 distance = OtherWalker->x.PeriodicDistance(&(Walker->x), cell_size);
1413 if ((OtherWalker->father->nr > Walker->father->nr) && (distance <= MaxDistance*MaxDistance) && (distance >= MinDistance*MinDistance)) { // create bond if distance is smaller
1414 *out << Verbose(0) << "Adding Bond between " << *Walker << " and " << *OtherWalker << "." << endl;
1415 AddBond(Walker->father, OtherWalker->father, 1); // also increases molecule::BondCount
1416 } else {
1417 //*out << Verbose(1) << "Not Adding: Wrong label order or distance too great." << endl;
1418 }
1419 }
1420 }
1421 }
1422 }
1423 }
1424 }
1425 // 4. free the cell again
1426 for (int i=0;i<NumberCells;i++)
1427 if (CellList[i] != NULL) {
1428 delete(CellList[i]);
1429 }
1430 Free((void **)&CellList, "molecule::CreateAdjacencyList - ** CellList");
1431
1432 // create the adjacency list per atom
1433 CreateListOfBondsPerAtom(out);
1434
1435 // correct Bond degree of each bond by checking of updated(!) sum of bond degree for an atom match its valence count
1436 // bond degrres are correctled iteratively by one, so that 2-2 instead of 1-3 or 3-1 corrections are favoured: We want
1437 // a rather symmetric distribution of higher bond degrees
1438 if (BondCount != 0) {
1439 NoCyclicBonds = 0;
1440 *out << Verbose(1) << "Correcting Bond degree of each bond ... ";
1441 do {
1442 No = 0; // No acts as breakup flag (if 1 we still continue)
1443 Walker = start;
1444 while (Walker->next != end) { // go through every atom
1445 Walker = Walker->next;
1446 for(int i=0;i<NumberOfBondsPerAtom[Walker->nr];i++) { // go through each of its bond partners
1447 OtherWalker = ListOfBondsPerAtom[Walker->nr][i]->GetOtherAtom(Walker);
1448 // count valence of first partner (updated!), might have changed during last bond partner
1449 NoBonds = 0;
1450 for(j=0;j<NumberOfBondsPerAtom[Walker->nr];j++)
1451 NoBonds += ListOfBondsPerAtom[Walker->nr][j]->BondDegree;
1452 //*out << Verbose(3) << "Walker: " << (int)Walker->type->NoValenceOrbitals << " > " << NoBonds << "?" << endl;
1453 if ((int)(Walker->type->NoValenceOrbitals) > NoBonds) { // we have a mismatch, check NoBonds of other atom
1454 // count valence of second partner
1455 NoBonds = 0;
1456 for(j=0;j<NumberOfBondsPerAtom[OtherWalker->nr];j++)
1457 NoBonds += ListOfBondsPerAtom[OtherWalker->nr][j]->BondDegree;
1458 //*out << Verbose(3) << "OtherWalker: " << (int)OtherWalker->type->NoValenceOrbitals << " > " << NoBonds << "?" << endl;
1459 if ((int)(OtherWalker->type->NoValenceOrbitals) > NoBonds) // increase bond degree by one
1460 ListOfBondsPerAtom[Walker->nr][i]->BondDegree++;
1461 }
1462 }
1463 }
1464 } while (No);
1465 *out << " done." << endl;
1466 } else
1467 *out << Verbose(1) << "BondCount is " << BondCount << ", no bonds between any of the " << AtomCount << " atoms." << endl;
1468 *out << Verbose(1) << "I detected " << BondCount << " bonds in the molecule with distance " << bonddistance << "." << endl;
1469
1470// // output bonds for debugging (if bond chain list was correctly installed)
1471// *out << Verbose(1) << endl << "From contents of bond chain list:";
1472// bond *Binder = first;
1473// while(Binder->next != last) {
1474// Binder = Binder->next;
1475// *out << *Binder << "\t" << endl;
1476// }
1477// *out << endl;
1478 } else
1479 *out << Verbose(1) << "AtomCount is " << AtomCount << ", thus no bonds, no connections!." << endl;
1480 *out << Verbose(0) << "End of CreateAdjacencyList." << endl;
1481 Free((void **)&matrix, "molecule::CreateAdjacencyList: *matrix");
1482};
1483
1484/** Performs a Depth-First search on this molecule.
1485 * Marks bonds in molecule as cyclic, bridge, ... and atoms as
1486 * articulations points, ...
1487 * We use the algorithm from [Even, Graph Algorithms, p.62].
1488 * \param *out output stream for debugging
1489 * \param ReturnStack true - return pointer to atom stack of separable components, false - return NULL
1490 * \param *&MinimumRingSize contains smallest ring size in molecular structure on return or -1 if no rings were found
1491 * \return list of each disconnected subgraph as an individual molecule class structure
1492 */
1493MoleculeLeafClass * molecule::DepthFirstSearchAnalysis(ofstream *out, bool ReturnStack, int *&MinimumRingSize)
1494{
1495 class StackClass<atom *> *AtomStack;
1496 AtomStack = new StackClass<atom *>(AtomCount);
1497 class StackClass<bond *> *BackEdgeStack = new StackClass<bond *> (BondCount);
1498 MoleculeLeafClass *SubGraphs = new MoleculeLeafClass(NULL);
1499 MoleculeLeafClass *LeafWalker = SubGraphs;
1500 int CurrentGraphNr = 0, OldGraphNr;
1501 int ComponentNumber = 0;
1502 atom *Walker = NULL, *OtherAtom = NULL, *Root = start->next;
1503 bond *Binder = NULL;
1504 bool BackStepping = false;
1505
1506 *out << Verbose(0) << "Begin of DepthFirstSearchAnalysis" << endl;
1507
1508 ResetAllBondsToUnused();
1509 ResetAllAtomNumbers();
1510 InitComponentNumbers();
1511 BackEdgeStack->ClearStack();
1512 while (Root != end) { // if there any atoms at all
1513 // (1) mark all edges unused, empty stack, set atom->GraphNr = 0 for all
1514 AtomStack->ClearStack();
1515
1516 // put into new subgraph molecule and add this to list of subgraphs
1517 LeafWalker = new MoleculeLeafClass(LeafWalker);
1518 LeafWalker->Leaf = new molecule(elemente);
1519 LeafWalker->Leaf->AddCopyAtom(Root);
1520
1521 OldGraphNr = CurrentGraphNr;
1522 Walker = Root;
1523 do { // (10)
1524 do { // (2) set number and Lowpoint of Atom to i, increase i, push current atom
1525 if (!BackStepping) { // if we don't just return from (8)
1526 Walker->GraphNr = CurrentGraphNr;
1527 Walker->LowpointNr = CurrentGraphNr;
1528 *out << Verbose(1) << "Setting Walker[" << Walker->Name << "]'s number to " << Walker->GraphNr << " with Lowpoint " << Walker->LowpointNr << "." << endl;
1529 AtomStack->Push(Walker);
1530 CurrentGraphNr++;
1531 }
1532 do { // (3) if Walker has no unused egdes, go to (5)
1533 BackStepping = false; // reset backstepping flag for (8)
1534 if (Binder == NULL) // if we don't just return from (11), Binder is already set to next unused
1535 Binder = FindNextUnused(Walker);
1536 if (Binder == NULL)
1537 break;
1538 *out << Verbose(2) << "Current Unused Bond is " << *Binder << "." << endl;
1539 // (4) Mark Binder used, ...
1540 Binder->MarkUsed(black);
1541 OtherAtom = Binder->GetOtherAtom(Walker);
1542 *out << Verbose(2) << "(4) OtherAtom is " << OtherAtom->Name << "." << endl;
1543 if (OtherAtom->GraphNr != -1) {
1544 // (4a) ... if "other" atom has been visited (GraphNr != 0), set lowpoint to minimum of both, go to (3)
1545 Binder->Type = BackEdge;
1546 BackEdgeStack->Push(Binder);
1547 Walker->LowpointNr = ( Walker->LowpointNr < OtherAtom->GraphNr ) ? Walker->LowpointNr : OtherAtom->GraphNr;
1548 *out << Verbose(3) << "(4a) Visited: Setting Lowpoint of Walker[" << Walker->Name << "] to " << Walker->LowpointNr << "." << endl;
1549 } else {
1550 // (4b) ... otherwise set OtherAtom as Ancestor of Walker and Walker as OtherAtom, go to (2)
1551 Binder->Type = TreeEdge;
1552 OtherAtom->Ancestor = Walker;
1553 Walker = OtherAtom;
1554 *out << Verbose(3) << "(4b) Not Visited: OtherAtom[" << OtherAtom->Name << "]'s Ancestor is now " << OtherAtom->Ancestor->Name << ", Walker is OtherAtom " << OtherAtom->Name << "." << endl;
1555 break;
1556 }
1557 Binder = NULL;
1558 } while (1); // (3)
1559 if (Binder == NULL) {
1560 *out << Verbose(2) << "No more Unused Bonds." << endl;
1561 break;
1562 } else
1563 Binder = NULL;
1564 } while (1); // (2)
1565
1566 // if we came from backstepping, yet there were no more unused bonds, we end up here with no Ancestor, because Walker is Root! Then we are finished!
1567 if ((Walker == Root) && (Binder == NULL))
1568 break;
1569
1570 // (5) if Ancestor of Walker is ...
1571 *out << Verbose(1) << "(5) Number of Walker[" << Walker->Name << "]'s Ancestor[" << Walker->Ancestor->Name << "] is " << Walker->Ancestor->GraphNr << "." << endl;
1572 if (Walker->Ancestor->GraphNr != Root->GraphNr) {
1573 // (6) (Ancestor of Walker is not Root)
1574 if (Walker->LowpointNr < Walker->Ancestor->GraphNr) {
1575 // (6a) set Ancestor's Lowpoint number to minimum of of its Ancestor and itself, go to Step(8)
1576 Walker->Ancestor->LowpointNr = (Walker->Ancestor->LowpointNr < Walker->LowpointNr) ? Walker->Ancestor->LowpointNr : Walker->LowpointNr;
1577 *out << Verbose(2) << "(6) Setting Walker[" << Walker->Name << "]'s Ancestor[" << Walker->Ancestor->Name << "]'s Lowpoint to " << Walker->Ancestor->LowpointNr << "." << endl;
1578 } else {
1579 // (7) (Ancestor of Walker is a separating vertex, remove all from stack till Walker (including), these and Ancestor form a component
1580 Walker->Ancestor->SeparationVertex = true;
1581 *out << Verbose(2) << "(7) Walker[" << Walker->Name << "]'s Ancestor[" << Walker->Ancestor->Name << "]'s is a separating vertex, creating component." << endl;
1582 SetNextComponentNumber(Walker->Ancestor, ComponentNumber);
1583 *out << Verbose(3) << "(7) Walker[" << Walker->Name << "]'s Ancestor's Compont is " << ComponentNumber << "." << endl;
1584 SetNextComponentNumber(Walker, ComponentNumber);
1585 *out << Verbose(3) << "(7) Walker[" << Walker->Name << "]'s Compont is " << ComponentNumber << "." << endl;
1586 do {
1587 OtherAtom = AtomStack->PopLast();
1588 LeafWalker->Leaf->AddCopyAtom(OtherAtom);
1589 SetNextComponentNumber(OtherAtom, ComponentNumber);
1590 *out << Verbose(3) << "(7) Other[" << OtherAtom->Name << "]'s Compont is " << ComponentNumber << "." << endl;
1591 } while (OtherAtom != Walker);
1592 ComponentNumber++;
1593 }
1594 // (8) Walker becomes its Ancestor, go to (3)
1595 *out << Verbose(2) << "(8) Walker[" << Walker->Name << "] is now its Ancestor " << Walker->Ancestor->Name << ", backstepping. " << endl;
1596 Walker = Walker->Ancestor;
1597 BackStepping = true;
1598 }
1599 if (!BackStepping) { // coming from (8) want to go to (3)
1600 // (9) remove all from stack till Walker (including), these and Root form a component
1601 AtomStack->Output(out);
1602 SetNextComponentNumber(Root, ComponentNumber);
1603 *out << Verbose(3) << "(9) Root[" << Root->Name << "]'s Component is " << ComponentNumber << "." << endl;
1604 SetNextComponentNumber(Walker, ComponentNumber);
1605 *out << Verbose(3) << "(9) Walker[" << Walker->Name << "]'s Component is " << ComponentNumber << "." << endl;
1606 do {
1607 OtherAtom = AtomStack->PopLast();
1608 LeafWalker->Leaf->AddCopyAtom(OtherAtom);
1609 SetNextComponentNumber(OtherAtom, ComponentNumber);
1610 *out << Verbose(3) << "(7) Other[" << OtherAtom->Name << "]'s Compont is " << ComponentNumber << "." << endl;
1611 } while (OtherAtom != Walker);
1612 ComponentNumber++;
1613
1614 // (11) Root is separation vertex, set Walker to Root and go to (4)
1615 Walker = Root;
1616 Binder = FindNextUnused(Walker);
1617 *out << Verbose(1) << "(10) Walker is Root[" << Root->Name << "], next Unused Bond is " << Binder << "." << endl;
1618 if (Binder != NULL) { // Root is separation vertex
1619 *out << Verbose(1) << "(11) Root is a separation vertex." << endl;
1620 Walker->SeparationVertex = true;
1621 }
1622 }
1623 } while ((BackStepping) || (Binder != NULL)); // (10) halt only if Root has no unused edges
1624
1625 // From OldGraphNr to CurrentGraphNr ranges an disconnected subgraph
1626 *out << Verbose(0) << "Disconnected subgraph ranges from " << OldGraphNr << " to " << CurrentGraphNr << "." << endl;
1627 LeafWalker->Leaf->Output(out);
1628 *out << endl;
1629
1630 // step on to next root
1631 while ((Root != end) && (Root->GraphNr != -1)) {
1632 //*out << Verbose(1) << "Current next subgraph root candidate is " << Root->Name << "." << endl;
1633 if (Root->GraphNr != -1) // if already discovered, step on
1634 Root = Root->next;
1635 }
1636 }
1637 // set cyclic bond criterium on "same LP" basis
1638 Binder = first;
1639 while(Binder->next != last) {
1640 Binder = Binder->next;
1641 if (Binder->rightatom->LowpointNr == Binder->leftatom->LowpointNr) { // cyclic ??
1642 Binder->Cyclic = true;
1643 NoCyclicBonds++;
1644 }
1645 }
1646
1647 // analysis of the cycles (print rings, get minimum cycle length)
1648 CyclicStructureAnalysis(out, BackEdgeStack, MinimumRingSize);
1649
1650 *out << Verbose(1) << "Final graph info for each atom is:" << endl;
1651 Walker = start;
1652 while (Walker->next != end) {
1653 Walker = Walker->next;
1654 *out << Verbose(2) << "Atom " << Walker->Name << " is " << ((Walker->SeparationVertex) ? "a" : "not a") << " separation vertex, components are ";
1655 OutputComponentNumber(out, Walker);
1656 *out << " with Lowpoint " << Walker->LowpointNr << " and Graph Nr. " << Walker->GraphNr << "." << endl;
1657 }
1658
1659 *out << Verbose(1) << "Final graph info for each bond is:" << endl;
1660 Binder = first;
1661 while(Binder->next != last) {
1662 Binder = Binder->next;
1663 *out << Verbose(2) << ((Binder->Type == TreeEdge) ? "TreeEdge " : "BackEdge ") << *Binder << ": <";
1664 *out << ((Binder->leftatom->SeparationVertex) ? "SP," : "") << "L" << Binder->leftatom->LowpointNr << " G" << Binder->leftatom->GraphNr << " Comp.";
1665 OutputComponentNumber(out, Binder->leftatom);
1666 *out << " === ";
1667 *out << ((Binder->rightatom->SeparationVertex) ? "SP," : "") << "L" << Binder->rightatom->LowpointNr << " G" << Binder->rightatom->GraphNr << " Comp.";
1668 OutputComponentNumber(out, Binder->rightatom);
1669 *out << ">." << endl;
1670 if (Binder->Cyclic) // cyclic ??
1671 *out << Verbose(3) << "Lowpoint at each side are equal: CYCLIC!" << endl;
1672 }
1673
1674 // free all and exit
1675 delete(AtomStack);
1676 *out << Verbose(0) << "End of DepthFirstSearchAnalysis" << endl;
1677 return SubGraphs;
1678};
1679
1680/** Analyses the cycles found and returns minimum of all cycle lengths.
1681 * \param *out output stream for debugging
1682 * \param *BackEdgeStack stack with all back edges found during DFS scan
1683 * \param *&MinimumRingSize contains smallest ring size in molecular structure on return or -1 if no rings were found, if set is maximum search distance
1684 * \todo BFS from the not-same-LP to find back to starting point of tributary cycle over more than one bond
1685 */
1686void molecule::CyclicStructureAnalysis(ofstream *out, class StackClass<bond *> * BackEdgeStack, int *&MinimumRingSize)
1687{
1688 atom **PredecessorList = (atom **) Malloc(sizeof(atom *)*AtomCount, "molecule::CyclicStructureAnalysis: **PredecessorList");
1689 int *ShortestPathList = (int *) Malloc(sizeof(int)*AtomCount, "molecule::CyclicStructureAnalysis: *ShortestPathList");
1690 enum Shading *ColorList = (enum Shading *) Malloc(sizeof(enum Shading)*AtomCount, "molecule::CyclicStructureAnalysis: *ColorList");
1691 class StackClass<atom *> *BFSStack = new StackClass<atom *> (AtomCount); // will hold the current ring
1692 class StackClass<atom *> *TouchedStack = new StackClass<atom *> (AtomCount); // contains all "touched" atoms
1693 atom *Walker = NULL, *OtherAtom = NULL, *Root = NULL;
1694 bond *Binder = NULL, *BackEdge = NULL;
1695 int RingSize, NumCycles, MinRingSize = -1;
1696
1697 // initialise each vertex as white with no predecessor, empty queue, color Root lightgray
1698 for (int i=0;i<AtomCount;i++) {
1699 PredecessorList[i] = NULL;
1700 ShortestPathList[i] = -1;
1701 ColorList[i] = white;
1702 }
1703 MinimumRingSize = new int[AtomCount];
1704 for(int i=0;i<AtomCount;i++)
1705 MinimumRingSize[i] = AtomCount;
1706
1707
1708 *out << Verbose(1) << "Back edge list - ";
1709 BackEdgeStack->Output(out);
1710
1711 *out << Verbose(1) << "Analysing cycles ... " << endl;
1712 NumCycles = 0;
1713 while (!BackEdgeStack->IsEmpty()) {
1714 BackEdge = BackEdgeStack->PopFirst();
1715 // this is the target
1716 Root = BackEdge->leftatom;
1717 // this is the source point
1718 Walker = BackEdge->rightatom;
1719 ShortestPathList[Walker->nr] = 0;
1720 BFSStack->ClearStack(); // start with empty BFS stack
1721 BFSStack->Push(Walker);
1722 TouchedStack->Push(Walker);
1723 //*out << Verbose(1) << "---------------------------------------------------------------------------------------------------------" << endl;
1724 OtherAtom = NULL;
1725 while ((Walker != Root) && ((OtherAtom == NULL) || (ShortestPathList[OtherAtom->nr] < MinimumRingSize[Walker->GetTrueFather()->nr]))) { // look for Root
1726 Walker = BFSStack->PopFirst();
1727 //*out << Verbose(2) << "Current Walker is " << *Walker << ", we look for SP to Root " << *Root << "." << endl;
1728 for(int i=0;i<NumberOfBondsPerAtom[Walker->nr];i++) {
1729 Binder = ListOfBondsPerAtom[Walker->nr][i];
1730 if (Binder != BackEdge) { // only walk along DFS spanning tree (otherwise we always find SP of one being backedge Binder)
1731 OtherAtom = Binder->GetOtherAtom(Walker);
1732 //*out << Verbose(2) << "Current OtherAtom is: " << OtherAtom->Name << " for bond " << *Binder << "." << endl;
1733 if (ColorList[OtherAtom->nr] == white) {
1734 TouchedStack->Push(OtherAtom);
1735 ColorList[OtherAtom->nr] = lightgray;
1736 PredecessorList[OtherAtom->nr] = Walker; // Walker is the predecessor
1737 ShortestPathList[OtherAtom->nr] = ShortestPathList[Walker->nr]+1;
1738 //*out << Verbose(2) << "Coloring OtherAtom " << OtherAtom->Name << " lightgray, its predecessor is " << Walker->Name << " and its Shortest Path is " << ShortestPathList[OtherAtom->nr] << " egde(s) long." << endl;
1739 if (ShortestPathList[OtherAtom->nr] < MinimumRingSize[Walker->GetTrueFather()->nr]) { // Check for maximum distance
1740 //*out << Verbose(3) << "Putting OtherAtom into queue." << endl;
1741 BFSStack->Push(OtherAtom);
1742 }
1743 } else {
1744 //*out << Verbose(3) << "Not Adding, has already been visited." << endl;
1745 }
1746 } else {
1747 //*out << Verbose(3) << "Not Visiting, is a back edge." << endl;
1748 }
1749 }
1750 ColorList[Walker->nr] = black;
1751 //*out << Verbose(1) << "Coloring Walker " << Walker->Name << " black." << endl;
1752 }
1753
1754 if (Walker == Root) {
1755 // now climb back the predecessor list and thus find the cycle members
1756 NumCycles++;
1757 RingSize = 1;
1758 Walker = Root;
1759 *out << Verbose(1) << "Found ring contains: ";
1760 while (Walker != BackEdge->rightatom) {
1761 *out << Walker->Name << " <-> ";
1762 Walker = PredecessorList[Walker->nr];
1763 RingSize++;
1764 }
1765 *out << Walker->Name << " with a length of " << RingSize << "." << endl << endl;
1766 // walk through all and set MinimumRingSize
1767 Walker = Root;
1768 while (Walker != BackEdge->rightatom) {
1769 Walker = PredecessorList[Walker->nr];
1770 if (RingSize < MinimumRingSize[Walker->GetTrueFather()->nr])
1771 MinimumRingSize[Walker->GetTrueFather()->nr] = RingSize;
1772 }
1773 if ((RingSize < MinRingSize) || (MinRingSize == -1))
1774 MinRingSize = RingSize;
1775 } else {
1776 *out << Verbose(1) << "No ring containing " << *Root << " with length equal to or smaller than " << MinimumRingSize[Walker->GetTrueFather()->nr] << " found." << endl;
1777 }
1778
1779 // now clean the lists
1780 while (!TouchedStack->IsEmpty()){
1781 Walker = TouchedStack->PopFirst();
1782 PredecessorList[Walker->nr] = NULL;
1783 ShortestPathList[Walker->nr] = -1;
1784 ColorList[Walker->nr] = white;
1785 }
1786 }
1787 if (MinRingSize != -1) {
1788 // go over all atoms
1789 Root = start;
1790 while(Root->next != end) {
1791 Root = Root->next;
1792
1793 if (MinimumRingSize[Root->GetTrueFather()->nr] == AtomCount) { // check whether MinimumRingSize is set, if not BFS to next where it is
1794 ShortestPathList[Walker->nr] = 0;
1795 BFSStack->ClearStack(); // start with empty BFS stack
1796 BFSStack->Push(Walker);
1797 TouchedStack->Push(Walker);
1798 //*out << Verbose(1) << "---------------------------------------------------------------------------------------------------------" << endl;
1799 OtherAtom = Walker;
1800 while ((Walker != Root) && (OtherAtom != NULL)) { // look for Root
1801 Walker = BFSStack->PopFirst();
1802 //*out << Verbose(2) << "Current Walker is " << *Walker << ", we look for SP to Root " << *Root << "." << endl;
1803 for(int i=0;i<NumberOfBondsPerAtom[Walker->nr];i++) {
1804 Binder = ListOfBondsPerAtom[Walker->nr][i];
1805 if (Binder != BackEdge) { // only walk along DFS spanning tree (otherwise we always find SP of one being backedge Binder)
1806 OtherAtom = Binder->GetOtherAtom(Walker);
1807 //*out << Verbose(2) << "Current OtherAtom is: " << OtherAtom->Name << " for bond " << *Binder << "." << endl;
1808 if (ColorList[OtherAtom->nr] == white) {
1809 TouchedStack->Push(OtherAtom);
1810 ColorList[OtherAtom->nr] = lightgray;
1811 PredecessorList[OtherAtom->nr] = Walker; // Walker is the predecessor
1812 ShortestPathList[OtherAtom->nr] = ShortestPathList[Walker->nr]+1;
1813 //*out << Verbose(2) << "Coloring OtherAtom " << OtherAtom->Name << " lightgray, its predecessor is " << Walker->Name << " and its Shortest Path is " << ShortestPathList[OtherAtom->nr] << " egde(s) long." << endl;
1814 if (MinimumRingSize[OtherAtom->GetTrueFather()->nr] != AtomCount) { // if the other atom is connected to a ring
1815 MinimumRingSize[Root->GetTrueFather()->nr] = ShortestPathList[OtherAtom->nr]+MinimumRingSize[OtherAtom->GetTrueFather()->nr];
1816 OtherAtom = NULL; //break;
1817 break;
1818 } else
1819 BFSStack->Push(OtherAtom);
1820 } else {
1821 //*out << Verbose(3) << "Not Adding, has already been visited." << endl;
1822 }
1823 } else {
1824 //*out << Verbose(3) << "Not Visiting, is a back edge." << endl;
1825 }
1826 }
1827 ColorList[Walker->nr] = black;
1828 //*out << Verbose(1) << "Coloring Walker " << Walker->Name << " black." << endl;
1829 }
1830
1831 // now clean the lists
1832 while (!TouchedStack->IsEmpty()){
1833 Walker = TouchedStack->PopFirst();
1834 PredecessorList[Walker->nr] = NULL;
1835 ShortestPathList[Walker->nr] = -1;
1836 ColorList[Walker->nr] = white;
1837 }
1838 }
1839 *out << Verbose(1) << "Minimum ring size of " << *Root << " is " << MinimumRingSize[Root->GetTrueFather()->nr] << "." << endl;
1840 }
1841 *out << Verbose(1) << "Minimum ring size is " << MinRingSize << ", over " << NumCycles << " cycles total." << endl;
1842 } else
1843 *out << Verbose(1) << "No rings were detected in the molecular structure." << endl;
1844
1845 Free((void **)&PredecessorList, "molecule::CyclicStructureAnalysis: **PredecessorList");
1846 Free((void **)&ShortestPathList, "molecule::CyclicStructureAnalysis: **ShortestPathList");
1847 Free((void **)&ColorList, "molecule::CyclicStructureAnalysis: **ColorList");
1848 delete(BFSStack);
1849};
1850
1851/** Sets the next component number.
1852 * This is O(N) as the number of bonds per atom is bound.
1853 * \param *vertex atom whose next atom::*ComponentNr is to be set
1854 * \param nr number to use
1855 */
1856void molecule::SetNextComponentNumber(atom *vertex, int nr)
1857{
1858 int i=0;
1859 if (vertex != NULL) {
1860 for(;i<NumberOfBondsPerAtom[vertex->nr];i++) {
1861 if (vertex->ComponentNr[i] == -1) { // check if not yet used
1862 vertex->ComponentNr[i] = nr;
1863 break;
1864 }
1865 else if (vertex->ComponentNr[i] == nr) // if number is already present, don't add another time
1866 break; // breaking here will not cause error!
1867 }
1868 if (i == NumberOfBondsPerAtom[vertex->nr])
1869 cerr << "Error: All Component entries are already occupied!" << endl;
1870 } else
1871 cerr << "Error: Given vertex is NULL!" << endl;
1872};
1873
1874/** Output a list of flags, stating whether the bond was visited or not.
1875 * \param *out output stream for debugging
1876 */
1877void molecule::OutputComponentNumber(ofstream *out, atom *vertex)
1878{
1879 for(int i=0;i<NumberOfBondsPerAtom[vertex->nr];i++)
1880 *out << vertex->ComponentNr[i] << " ";
1881};
1882
1883/** Allocates memory for all atom::*ComponentNr in this molecule and sets each entry to -1.
1884 */
1885void molecule::InitComponentNumbers()
1886{
1887 atom *Walker = start;
1888 while(Walker->next != end) {
1889 Walker = Walker->next;
1890 if (Walker->ComponentNr != NULL)
1891 Free((void **)&Walker->ComponentNr, "molecule::InitComponentNumbers: **Walker->ComponentNr");
1892 Walker->ComponentNr = (int *) Malloc(sizeof(int)*NumberOfBondsPerAtom[Walker->nr], "molecule::InitComponentNumbers: *Walker->ComponentNr");
1893 for (int i=0;i<NumberOfBondsPerAtom[Walker->nr];i++)
1894 Walker->ComponentNr[i] = -1;
1895 }
1896};
1897
1898/** Returns next unused bond for this atom \a *vertex or NULL of none exists.
1899 * \param *vertex atom to regard
1900 * \return bond class or NULL
1901 */
1902bond * molecule::FindNextUnused(atom *vertex)
1903{
1904 for(int i=0;i<NumberOfBondsPerAtom[vertex->nr];i++)
1905 if (ListOfBondsPerAtom[vertex->nr][i]->IsUsed() == white)
1906 return(ListOfBondsPerAtom[vertex->nr][i]);
1907 return NULL;
1908};
1909
1910/** Resets bond::Used flag of all bonds in this molecule.
1911 * \return true - success, false - -failure
1912 */
1913void molecule::ResetAllBondsToUnused()
1914{
1915 bond *Binder = first;
1916 while (Binder->next != last) {
1917 Binder = Binder->next;
1918 Binder->ResetUsed();
1919 }
1920};
1921
1922/** Resets atom::nr to -1 of all atoms in this molecule.
1923 */
1924void molecule::ResetAllAtomNumbers()
1925{
1926 atom *Walker = start;
1927 while (Walker->next != end) {
1928 Walker = Walker->next;
1929 Walker->GraphNr = -1;
1930 }
1931};
1932
1933/** Output a list of flags, stating whether the bond was visited or not.
1934 * \param *out output stream for debugging
1935 * \param *list
1936 */
1937void OutputAlreadyVisited(ofstream *out, int *list)
1938{
1939 *out << Verbose(4) << "Already Visited Bonds:\t";
1940 for(int i=1;i<=list[0];i++) *out << Verbose(0) << list[i] << " ";
1941 *out << endl;
1942};
1943
1944/** Estimates by educated guessing (using upper limit) the expected number of fragments.
1945 * The upper limit is
1946 * \f[
1947 * n = N \cdot C^k
1948 * \f]
1949 * where \f$C=2^c\f$ and c is the maximum bond degree over N number of atoms.
1950 * \param *out output stream for debugging
1951 * \param order bond order k
1952 * \return number n of fragments
1953 */
1954int molecule::GuesstimateFragmentCount(ofstream *out, int order)
1955{
1956 int c = 0;
1957 int FragmentCount;
1958 // get maximum bond degree
1959 atom *Walker = start;
1960 while (Walker->next != end) {
1961 Walker = Walker->next;
1962 c = (NumberOfBondsPerAtom[Walker->nr] > c) ? NumberOfBondsPerAtom[Walker->nr] : c;
1963 }
1964 FragmentCount = NoNonHydrogen*(1 << (c*order));
1965 *out << Verbose(1) << "Upper limit for this subgraph is " << FragmentCount << " for " << NoNonHydrogen << " non-H atoms with maximum bond degree of " << c << "." << endl;
1966 return FragmentCount;
1967};
1968
1969/** Scans a single line for number and puts them into \a KeySet.
1970 * \param *out output stream for debugging
1971 * \param *buffer buffer to scan
1972 * \param &CurrentSet filled KeySet on return
1973 * \return true - at least one valid atom id parsed, false - CurrentSet is empty
1974 */
1975bool molecule::ScanBufferIntoKeySet(ofstream *out, char *buffer, KeySet &CurrentSet)
1976{
1977 stringstream line;
1978 int AtomNr;
1979 int status = 0;
1980
1981 line.str(buffer);
1982 while (!line.eof()) {
1983 line >> AtomNr;
1984 if ((AtomNr >= 0) && (AtomNr < AtomCount)) {
1985 CurrentSet.insert(AtomNr); // insert at end, hence in same order as in file!
1986 status++;
1987 } // else it's "-1" or else and thus must not be added
1988 }
1989 *out << Verbose(1) << "The scanned KeySet is ";
1990 for(KeySet::iterator runner = CurrentSet.begin(); runner != CurrentSet.end(); runner++) {
1991 *out << (*runner) << "\t";
1992 }
1993 *out << endl;
1994 return (status != 0);
1995};
1996
1997/** Parses the KeySet file and fills \a *FragmentList from the known molecule structure.
1998 * Does two-pass scanning:
1999 * -# Scans the keyset file and initialises a temporary graph
2000 * -# Scans TEFactors file and sets the TEFactor of each key set in the temporary graph accordingly
2001 * Finally, the temporary graph is inserted into the given \a FragmentList for return.
2002 * \param *out output stream for debugging
2003 * \param *path path to file
2004 * \param *FragmentList empty, filled on return
2005 * \param IsAngstroem whether we have Ansgtroem or bohrradius
2006 * \return true - parsing successfully, false - failure on parsing (FragmentList will be NULL)
2007 */
2008bool molecule::ParseKeySetFile(ofstream *out, char *path, Graph *&FragmentList, bool IsAngstroem)
2009{
2010 bool status = true;
2011 ifstream InputFile;
2012 stringstream line;
2013 GraphTestPair testGraphInsert;
2014 int NumberOfFragments = 0;
2015 double TEFactor;
2016 char *filename = (char *) Malloc(sizeof(char)*MAXSTRINGSIZE, "molecule::ParseKeySetFile - filename");
2017
2018 if (FragmentList == NULL) { // check list pointer
2019 FragmentList = new Graph;
2020 }
2021
2022 // 1st pass: open file and read
2023 *out << Verbose(1) << "Parsing the KeySet file ... " << endl;
2024 sprintf(filename, "%s/%s%s", path, FRAGMENTPREFIX, KEYSETFILE);
2025 InputFile.open(filename);
2026 if (InputFile != NULL) {
2027 // each line represents a new fragment
2028 char *buffer = (char *) Malloc(sizeof(char)*MAXSTRINGSIZE, "molecule::ParseKeySetFile - *buffer");
2029 // 1. parse keysets and insert into temp. graph
2030 while (!InputFile.eof()) {
2031 InputFile.getline(buffer, MAXSTRINGSIZE);
2032 KeySet CurrentSet;
2033 if ((strlen(buffer) > 0) && (ScanBufferIntoKeySet(out, buffer, CurrentSet))) { // if at least one valid atom was added, write config
2034 testGraphInsert = FragmentList->insert(GraphPair (CurrentSet,pair<int,double>(NumberOfFragments++,1))); // store fragment number and current factor
2035 if (!testGraphInsert.second) {
2036 cerr << "KeySet file must be corrupt as there are two equal key sets therein!" << endl;
2037 }
2038 //FragmentList->ListOfMolecules[NumberOfFragments++] = StoreFragmentFromKeySet(out, CurrentSet, IsAngstroem);
2039 }
2040 }
2041 // 2. Free and done
2042 InputFile.close();
2043 InputFile.clear();
2044 Free((void **)&buffer, "molecule::ParseKeySetFile - *buffer");
2045 *out << Verbose(1) << "done." << endl;
2046 } else {
2047 *out << Verbose(1) << "File " << filename << " not found." << endl;
2048 status = false;
2049 }
2050
2051 // 2nd pass: open TEFactors file and read
2052 *out << Verbose(1) << "Parsing the TEFactors file ... " << endl;
2053 sprintf(filename, "%s/%s%s", path, FRAGMENTPREFIX, TEFACTORSFILE);
2054 InputFile.open(filename);
2055 if (InputFile != NULL) {
2056 // 3. add found TEFactors to each keyset
2057 NumberOfFragments = 0;
2058 for(Graph::iterator runner = FragmentList->begin();runner != FragmentList->end(); runner++) {
2059 if (!InputFile.eof()) {
2060 InputFile >> TEFactor;
2061 (*runner).second.second = TEFactor;
2062 *out << Verbose(2) << "Setting " << ++NumberOfFragments << " fragment's TEFactor to " << (*runner).second.second << "." << endl;
2063 } else {
2064 status = false;
2065 break;
2066 }
2067 }
2068 // 4. Free and done
2069 InputFile.close();
2070 *out << Verbose(1) << "done." << endl;
2071 } else {
2072 *out << Verbose(1) << "File " << filename << " not found." << endl;
2073 status = false;
2074 }
2075
2076 // free memory
2077 Free((void **)&filename, "molecule::ParseKeySetFile - filename");
2078
2079 return status;
2080};
2081
2082/** Stores keysets and TEFactors to file.
2083 * \param *out output stream for debugging
2084 * \param KeySetList Graph with Keysets and factors
2085 * \param *path path to file
2086 * \return true - file written successfully, false - writing failed
2087 */
2088bool molecule::StoreKeySetFile(ofstream *out, Graph &KeySetList, char *path)
2089{
2090 ofstream output;
2091 bool status = true;
2092 string line;
2093 string::iterator ende;
2094
2095 // open KeySet file
2096 line = path;
2097 line.append("/");
2098 line += FRAGMENTPREFIX;
2099 ende = line.end();
2100 line += KEYSETFILE;
2101 output.open(line.c_str(), ios::out);
2102 *out << Verbose(1) << "Saving key sets of the total graph ... ";
2103 if(output != NULL) {
2104 for(Graph::iterator runner = KeySetList.begin(); runner != KeySetList.end(); runner++) {
2105 for (KeySet::iterator sprinter = (*runner).first.begin();sprinter != (*runner).first.end(); sprinter++) {
2106 if (sprinter != (*runner).first.begin())
2107 output << "\t";
2108 output << *sprinter;
2109 }
2110 output << endl;
2111 }
2112 *out << "done." << endl;
2113 } else {
2114 cerr << "Unable to open " << line << " for writing keysets!" << endl;
2115 status = false;
2116 }
2117 output.close();
2118 output.clear();
2119
2120 // open TEFactors file
2121 line.erase(ende, line.end());
2122 line += TEFACTORSFILE;
2123 output.open(line.c_str(), ios::out);
2124 *out << Verbose(1) << "Saving TEFactors of the total graph ... ";
2125 if(output != NULL) {
2126 for(Graph::iterator runner = KeySetList.begin(); runner != KeySetList.end(); runner++)
2127 output << (*runner).second.second << endl;
2128 *out << Verbose(1) << "done." << endl;
2129 } else {
2130 *out << Verbose(1) << "failed to open " << line << "." << endl;
2131 status = false;
2132 }
2133 output.close();
2134
2135 return status;
2136};
2137
2138/** Storing the bond structure of a molecule to file.
2139 * Simply stores Atom::nr and then the Atom::nr of all bond partners per line.
2140 * \param *out output stream for debugging
2141 * \param *path path to file
2142 * \return true - file written successfully, false - writing failed
2143 */
2144bool molecule::StoreAdjacencyToFile(ofstream *out, char *path)
2145{
2146 ofstream AdjacencyFile;
2147 atom *Walker = NULL;
2148 stringstream line;
2149 bool status = true;
2150
2151 line << path << "/" << FRAGMENTPREFIX << ADJACENCYFILE;
2152 AdjacencyFile.open(line.str().c_str(), ios::out);
2153 *out << Verbose(1) << "Saving adjacency list ... ";
2154 if (AdjacencyFile != NULL) {
2155 Walker = start;
2156 while(Walker->next != end) {
2157 Walker = Walker->next;
2158 AdjacencyFile << Walker->nr << "\t";
2159 for (int i=0;i<NumberOfBondsPerAtom[Walker->nr];i++)
2160 AdjacencyFile << ListOfBondsPerAtom[Walker->nr][i]->GetOtherAtom(Walker)->nr << "\t";
2161 AdjacencyFile << endl;
2162 }
2163 AdjacencyFile.close();
2164 *out << Verbose(1) << "done." << endl;
2165 } else {
2166 *out << Verbose(1) << "failed to open file " << line.str() << "." << endl;
2167 status = false;
2168 }
2169
2170 return status;
2171};
2172
2173/** Checks contents of adjacency file against bond structure in structure molecule.
2174 * \param *out output stream for debugging
2175 * \param *path path to file
2176 * \param **ListOfAtoms allocated (molecule::AtomCount) and filled lookup table for ids (Atom::nr) to *Atom
2177 * \return true - structure is equal, false - not equivalence
2178 */
2179bool molecule::CheckAdjacencyFileAgainstMolecule(ofstream *out, char *path, atom **ListOfAtoms)
2180{
2181 ifstream File;
2182 stringstream line;
2183 bool status = true;
2184 char *buffer = (char *) Malloc(sizeof(char)*MAXSTRINGSIZE, "molecule::CheckAdjacencyFileAgainstMolecule: *buffer");
2185
2186 line << path << "/" << FRAGMENTPREFIX << ADJACENCYFILE;
2187 File.open(line.str().c_str(), ios::out);
2188 *out << Verbose(1) << "Looking at bond structure stored in adjacency file and comparing to present one ... ";
2189 if (File != NULL) {
2190 // allocate storage structure
2191 int NonMatchNumber = 0; // will number of atoms with differing bond structure
2192 int *CurrentBonds = (int *) Malloc(sizeof(int)*8, "molecule::CheckAdjacencyFileAgainstMolecule - CurrentBonds"); // contains parsed bonds of current atom
2193 int CurrentBondsOfAtom;
2194
2195 // Parse the file line by line and count the bonds
2196 while (!File.eof()) {
2197 File.getline(buffer, MAXSTRINGSIZE);
2198 stringstream line;
2199 line.str(buffer);
2200 int AtomNr = -1;
2201 line >> AtomNr;
2202 CurrentBondsOfAtom = -1; // we count one too far due to line end
2203 // parse into structure
2204 if ((AtomNr >= 0) && (AtomNr < AtomCount)) {
2205 while (!line.eof())
2206 line >> CurrentBonds[ ++CurrentBondsOfAtom ];
2207 // compare against present bonds
2208 //cout << Verbose(2) << "Walker is " << *Walker << ", bond partners: ";
2209 if (CurrentBondsOfAtom == NumberOfBondsPerAtom[AtomNr]) {
2210 for(int i=0;i<NumberOfBondsPerAtom[AtomNr];i++) {
2211 int id = ListOfBondsPerAtom[AtomNr][i]->GetOtherAtom(ListOfAtoms[AtomNr])->nr;
2212 int j = 0;
2213 for (;(j<CurrentBondsOfAtom) && (CurrentBonds[j++] != id);); // check against all parsed bonds
2214 if (CurrentBonds[j-1] != id) { // no match ? Then mark in ListOfAtoms
2215 ListOfAtoms[AtomNr] = NULL;
2216 NonMatchNumber++;
2217 status = false;
2218 //out << "[" << id << "]\t";
2219 } else {
2220 //out << id << "\t";
2221 }
2222 }
2223 //out << endl;
2224 } else {
2225 *out << "Number of bonds for Atom " << *ListOfAtoms[AtomNr] << " does not match, parsed " << CurrentBondsOfAtom << " against " << NumberOfBondsPerAtom[AtomNr] << "." << endl;
2226 status = false;
2227 }
2228 }
2229 }
2230 File.close();
2231 File.clear();
2232 if (status) { // if equal we parse the KeySetFile
2233 *out << Verbose(1) << "done: Equal." << endl;
2234 status = true;
2235 } else
2236 *out << Verbose(1) << "done: Not equal by " << NonMatchNumber << " atoms." << endl;
2237 Free((void **)&CurrentBonds, "molecule::CheckAdjacencyFileAgainstMolecule - **CurrentBonds");
2238 } else {
2239 *out << Verbose(1) << "Adjacency file not found." << endl;
2240 status = false;
2241 }
2242 *out << endl;
2243 Free((void **)&buffer, "molecule::CheckAdjacencyFileAgainstMolecule: *buffer");
2244
2245 return status;
2246};
2247
2248/** Checks whether the OrderAtSite is still bewloe \a Order at some site.
2249 * \param *out output stream for debugging
2250 * \param *AtomMask defines true/false per global Atom::nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
2251 * \param *GlobalKeySetList list of keysets with global ids (valid in "this" molecule) needed for adaptive increase
2252 * \param Order desired Order if positive, desired exponent in threshold criteria if negative (0 is single-step)
2253 * \param *path path to ENERGYPERFRAGMENT file (may be NULL if Order is non-negative)
2254 * \return true - needs further fragmentation, false - does not need fragmentation
2255 */
2256bool molecule::CheckOrderAtSite(ofstream *out, bool *AtomMask, Graph *GlobalKeySetList, int Order, char *path)
2257{
2258 atom *Walker = start;
2259 bool status = false;
2260 ifstream InputFile;
2261
2262 // initialize mask list
2263 for(int i=0;i<AtomCount;i++)
2264 AtomMask[i] = false;
2265
2266 if (Order < 0) { // adaptive increase of BondOrder per site
2267 if (AtomMask[AtomCount] == true) // break after one step
2268 return false;
2269 // parse the EnergyPerFragment file
2270 char *buffer = (char *) Malloc(sizeof(char)*MAXSTRINGSIZE, "molecule::CheckOrderAtSite: *buffer");
2271 sprintf(buffer, "%s/%s%s.dat", path, FRAGMENTPREFIX, ENERGYPERFRAGMENT);
2272 InputFile.open(buffer, ios::in);
2273 if ((InputFile != NULL) && (GlobalKeySetList != NULL)) {
2274 // transmorph graph keyset list into indexed KeySetList
2275 map<int,KeySet> IndexKeySetList;
2276 for(Graph::iterator runner = GlobalKeySetList->begin(); runner != GlobalKeySetList->end(); runner++) {
2277 IndexKeySetList.insert( pair<int,KeySet>(runner->second.first,runner->first) );
2278 }
2279 int lines = 0;
2280 // count the number of lines, i.e. the number of fragments
2281 InputFile.getline(buffer, MAXSTRINGSIZE); // skip comment lines
2282 InputFile.getline(buffer, MAXSTRINGSIZE);
2283 while(!InputFile.eof()) {
2284 InputFile.getline(buffer, MAXSTRINGSIZE);
2285 lines++;
2286 }
2287 *out << Verbose(2) << "Scanned " << lines-1 << " lines." << endl; // one endline too much
2288 InputFile.clear();
2289 InputFile.seekg(ios::beg);
2290 map<int, pair<double,int> > AdaptiveCriteriaList; // (Root No., (Value, Order)) !
2291 int No, FragOrder;
2292 double Value;
2293 // each line represents a fragment root (Atom::nr) id and its energy contribution
2294 InputFile.getline(buffer, MAXSTRINGSIZE); // skip comment lines
2295 InputFile.getline(buffer, MAXSTRINGSIZE);
2296 while(!InputFile.eof()) {
2297 InputFile.getline(buffer, MAXSTRINGSIZE);
2298 if (strlen(buffer) > 2) {
2299 //*out << Verbose(2) << "Scanning: " << buffer;
2300 stringstream line(buffer);
2301 line >> FragOrder;
2302 line >> ws >> No;
2303 line >> ws >> Value; // skip time entry
2304 line >> ws >> Value;
2305 No -= 1; // indices start at 1 in file, not 0
2306 //*out << Verbose(2) << " - yields (" << No << "," << Value << ")" << endl;
2307
2308 // clean the list of those entries that have been superceded by higher order terms already
2309 map<int,KeySet>::iterator marker = IndexKeySetList.find(No); // find keyset to Frag No.
2310 if (marker != IndexKeySetList.end()) { // if found
2311 // as the smallest number in each set has always been the root (we use global id to keep the doubles away), seek smallest and insert into AtomMask
2312 pair <map<int, pair<double,int> >::iterator, bool> InsertedElement = AdaptiveCriteriaList.insert( make_pair(*((*marker).second.begin()), pair<double,int>( Value, Order) ));
2313 map<int, pair<double,int> >::iterator PresentItem = InsertedElement.first;
2314 if (!InsertedElement.second) { // this root is already present
2315 if ((*PresentItem).second.second < FragOrder) // if order there is lower, update entry with higher-order term
2316 //if ((*PresentItem).second.first < (*runner).first) // as higher-order terms are not always better, we skip this part (which would always include this site into adaptive increase)
2317 { // if value is smaller, update value and order
2318 (*PresentItem).second.first = Value;
2319 (*PresentItem).second.second = FragOrder;
2320 *out << Verbose(2) << "Updated element (" << (*PresentItem).first << ",[" << (*PresentItem).second.first << "," << (*PresentItem).second.second << "])." << endl;
2321 }
2322 } else {
2323 *out << Verbose(2) << "Inserted element (" << (*PresentItem).first << ",[" << (*PresentItem).second.first << "," << (*PresentItem).second.second << "])." << endl;
2324 }
2325 } else {
2326 *out << Verbose(1) << "No Fragment under No. " << No << "found." << endl;
2327 }
2328 }
2329 }
2330 // then map back onto (Value, (Root Nr., Order)) (i.e. sorted by value to pick the highest ones)
2331 map<double, pair<int,int> > FinalRootCandidates;
2332 *out << Verbose(1) << "Root candidate list is: " << endl;
2333 for(map<int, pair<double,int> >::iterator runner = AdaptiveCriteriaList.begin(); runner != AdaptiveCriteriaList.end(); runner++) {
2334 Walker = FindAtom((*runner).first);
2335 if (Walker != NULL) {
2336 if ((*runner).second.second >= Walker->AdaptiveOrder) { // only insert if this is an "active" root site for the current order
2337 *out << Verbose(2) << "(" << (*runner).first << ",[" << (*runner).second.first << "," << (*runner).second.second << "])" << endl;
2338 FinalRootCandidates.insert( make_pair( (*runner).second.first, pair<int,int>((*runner).first, (*runner).second.second) ) );
2339 }
2340 } else {
2341 cerr << "Atom No. " << (*runner).second.first << " was not found in this molecule." << endl;
2342 }
2343 }
2344 // pick the ones still below threshold and mark as to be adaptively updated
2345 for(map<double, pair<int,int> >::iterator runner = FinalRootCandidates.upper_bound(pow(10.,Order)); runner != FinalRootCandidates.end(); runner++) {
2346 No = (*runner).second.first;
2347 *out << Verbose(2) << "Root " << No << " is still above threshold (10^{" << Order <<"}: " << runner->first << ", setting entry " << No << " of Atom mask to true." << endl;
2348 AtomMask[No] = true;
2349 status = true;
2350 }
2351 // close and done
2352 InputFile.close();
2353 InputFile.clear();
2354 } else {
2355 cerr << "Unable to parse " << buffer << " file, incrementing all." << endl;
2356 while (Walker->next != end) {
2357 Walker = Walker->next;
2358 #ifdef ADDHYDROGEN
2359 if (Walker->type->Z != 1) // skip hydrogen
2360 #endif
2361 {
2362 AtomMask[Walker->nr] = true; // include all (non-hydrogen) atoms
2363 status = true;
2364 }
2365 }
2366 }
2367 Free((void **)&buffer, "molecule::CheckOrderAtSite: *buffer");
2368 // pick a given number of highest values and set AtomMask
2369 } else { // global increase of Bond Order
2370 while (Walker->next != end) {
2371 Walker = Walker->next;
2372 #ifdef ADDHYDROGEN
2373 if (Walker->type->Z != 1) // skip hydrogen
2374 #endif
2375 {
2376 AtomMask[Walker->nr] = true; // include all (non-hydrogen) atoms
2377 if ((Order != 0) && (Walker->AdaptiveOrder < Order))
2378 status = true;
2379 }
2380 }
2381 if ((Order == 0) && (AtomMask[AtomCount] == true)) // single stepping, just check
2382 status = false;
2383
2384 if (!status)
2385 *out << Verbose(1) << "Order at every site is already equal or above desired order " << Order << "." << endl;
2386 }
2387
2388 // print atom mask for debugging
2389 *out << " ";
2390 for(int i=0;i<AtomCount;i++)
2391 *out << (i % 10);
2392 *out << endl << "Atom mask is: ";
2393 for(int i=0;i<AtomCount;i++)
2394 *out << (AtomMask[i] ? "t" : "f");
2395 *out << endl;
2396
2397 return status;
2398};
2399
2400/** Create a SortIndex to map from BFS labels to the sequence in which the atoms are given in the config file.
2401 * \param *out output stream for debugging
2402 * \param *&SortIndex Mapping array of size molecule::AtomCount
2403 * \return true - success, false - failure of SortIndex alloc
2404 */
2405bool molecule::CreateMappingLabelsToConfigSequence(ofstream *out, int *&SortIndex)
2406{
2407 element *runner = elemente->start;
2408 int AtomNo = 0;
2409 atom *Walker = NULL;
2410
2411 if (SortIndex != NULL) {
2412 *out << Verbose(1) << "SortIndex is " << SortIndex << " and not NULL as expected." << endl;
2413 return false;
2414 }
2415 SortIndex = (int *) Malloc(sizeof(int)*AtomCount, "molecule::FragmentMolecule: *SortIndex");
2416 for(int i=0;i<AtomCount;i++)
2417 SortIndex[i] = -1;
2418 while (runner->next != elemente->end) { // go through every element
2419 runner = runner->next;
2420 if (ElementsInMolecule[runner->Z]) { // if this element got atoms
2421 Walker = start;
2422 while (Walker->next != end) { // go through every atom of this element
2423 Walker = Walker->next;
2424 if (Walker->type->Z == runner->Z) // if this atom fits to element
2425 SortIndex[Walker->nr] = AtomNo++;
2426 }
2427 }
2428 }
2429 return true;
2430};
2431
2432/** Performs a many-body bond order analysis for a given bond order.
2433 * -# parses adjacency, keysets and orderatsite files
2434 * -# performs DFS to find connected subgraphs (to leave this in was a design decision: might be useful later)
2435 * -# RootStack is created for every subgraph (here, later we implement the "update 10 sites with highest energ
2436y contribution", and that's why this consciously not done in the following loop)
2437 * -# in a loop over all subgraphs
2438 * -# calls FragmentBOSSANOVA with this RootStack and within the subgraph molecule structure
2439 * -# creates molecule (fragment)s from the returned keysets (StoreFragmentFromKeySet)
2440 * -# combines the generated molecule lists from all subgraphs
2441 * -# saves to disk: fragment configs, adjacency, orderatsite, keyset files
2442 * Note that as we split "this" molecule up into a list of subgraphs, i.e. a MoleculeListClass, we have two sets
2443 * of vertex indices: Global always means the index in "this" molecule, whereas local refers to the molecule or
2444 * subgraph in the MoleculeListClass.
2445 * \param *out output stream for debugging
2446 * \param Order up to how many neighbouring bonds a fragment contains in BondOrderScheme::BottumUp scheme
2447 * \param *configuration configuration for writing config files for each fragment
2448 */
2449void molecule::FragmentMolecule(ofstream *out, int Order, config *configuration)
2450{
2451 MoleculeListClass *BondFragments = NULL;
2452 int *SortIndex = NULL;
2453 int *MinimumRingSize = NULL;
2454 int FragmentCounter;
2455 MoleculeLeafClass *MolecularWalker = NULL;
2456 MoleculeLeafClass *Subgraphs = NULL; // list of subgraphs from DFS analysis
2457 fstream File;
2458 bool FragmentationToDo = true;
2459 Graph **FragmentList = NULL;
2460 Graph *ParsedFragmentList = NULL;
2461 Graph TotalGraph; // graph with all keysets however local numbers
2462 int TotalNumberOfKeySets = 0;
2463 atom **ListOfAtoms = NULL;
2464 atom ***ListOfLocalAtoms = NULL;
2465 bool *AtomMask = NULL;
2466
2467 *out << endl;
2468#ifdef ADDHYDROGEN
2469 *out << Verbose(0) << "I will treat hydrogen special and saturate dangling bonds with it." << endl;
2470#else
2471 *out << Verbose(0) << "Hydrogen is treated just like the rest of the lot." << endl;
2472#endif
2473
2474 // ++++++++++++++++++++++++++++ INITIAL STUFF: Bond structure analysis, file parsing, ... ++++++++++++++++++++++++++++++++++++++++++
2475
2476 // ===== 1. Check whether bond structure is same as stored in files ====
2477
2478 // fill the adjacency list
2479 CreateListOfBondsPerAtom(out);
2480
2481 // create lookup table for Atom::nr
2482 FragmentationToDo = FragmentationToDo && CreateFatherLookupTable(out, start, end, ListOfAtoms, AtomCount);
2483
2484 // === compare it with adjacency file ===
2485 FragmentationToDo = FragmentationToDo && CheckAdjacencyFileAgainstMolecule(out, configuration->configpath, ListOfAtoms);
2486 Free((void **)&ListOfAtoms, "molecule::FragmentMolecule - **ListOfAtoms");
2487
2488 // ===== 2. perform a DFS analysis to gather info on cyclic structure and a list of disconnected subgraphs =====
2489 Subgraphs = DepthFirstSearchAnalysis(out, false, MinimumRingSize);
2490 // fill the bond structure of the individually stored subgraphs
2491 Subgraphs->next->FillBondStructureFromReference(out, this, (FragmentCounter = 0), ListOfLocalAtoms, false); // we want to keep the created ListOfLocalAtoms
2492
2493 // ===== 3. if structure still valid, parse key set file and others =====
2494 FragmentationToDo = FragmentationToDo && ParseKeySetFile(out, configuration->configpath, ParsedFragmentList, configuration->GetIsAngstroem());
2495
2496 // ===== 4. check globally whether there's something to do actually (first adaptivity check)
2497 FragmentationToDo = FragmentationToDo && ParseOrderAtSiteFromFile(out, configuration->configpath);
2498
2499 // =================================== Begin of FRAGMENTATION ===============================
2500 // ===== 6a. assign each keyset to its respective subgraph =====
2501 Subgraphs->next->AssignKeySetsToFragment(out, this, ParsedFragmentList, ListOfLocalAtoms, FragmentList, (FragmentCounter = 0), false);
2502
2503 KeyStack *RootStack = new KeyStack[Subgraphs->next->Count()];
2504 AtomMask = new bool[AtomCount+1];
2505 while (CheckOrderAtSite(out, AtomMask, ParsedFragmentList, Order, configuration->configpath)) {
2506 AtomMask[AtomCount] = true; // last plus one entry is used as marker that we have been through this loop once already in CheckOrderAtSite()
2507 // ===== 6b. fill RootStack for each subgraph (second adaptivity check) =====
2508 Subgraphs->next->FillRootStackForSubgraphs(out, RootStack, AtomMask, (FragmentCounter = 0));
2509
2510 // ===== 7. fill the bond fragment list =====
2511 FragmentCounter = 0;
2512 MolecularWalker = Subgraphs;
2513 while (MolecularWalker->next != NULL) {
2514 MolecularWalker = MolecularWalker->next;
2515 *out << Verbose(1) << "Fragmenting subgraph " << MolecularWalker << "." << endl;
2516 // output ListOfBondsPerAtom for debugging
2517 MolecularWalker->Leaf->OutputListOfBonds(out);
2518 if (MolecularWalker->Leaf->first->next != MolecularWalker->Leaf->last) {
2519
2520 // call BOSSANOVA method
2521 *out << Verbose(0) << endl << " ========== BOND ENERGY of subgraph " << FragmentCounter << " ========================= " << endl;
2522 MolecularWalker->Leaf->FragmentBOSSANOVA(out, FragmentList[FragmentCounter], RootStack[FragmentCounter], MinimumRingSize);
2523 } else {
2524 cerr << "Subgraph " << MolecularWalker << " has no atoms!" << endl;
2525 }
2526 FragmentCounter++; // next fragment list
2527 }
2528 }
2529 delete[](RootStack);
2530 delete[](AtomMask);
2531 delete(ParsedFragmentList);
2532 delete[](MinimumRingSize);
2533
2534 // free the index lookup list
2535 for (int i=0;i<FragmentCounter;i++)
2536 Free((void **)&ListOfLocalAtoms[i], "molecule::FragmentMolecule - *ListOfLocalAtoms[]");
2537 Free((void **)&ListOfLocalAtoms, "molecule::FragmentMolecule - **ListOfLocalAtoms");
2538
2539 // ==================================== End of FRAGMENTATION ============================================
2540
2541 // ===== 8a. translate list into global numbers (i.e. ones that are valid in "this" molecule, not in MolecularWalker->Leaf)
2542 Subgraphs->next->TranslateIndicesToGlobalIDs(out, FragmentList, (FragmentCounter = 0), TotalNumberOfKeySets, TotalGraph);
2543
2544 // free subgraph memory again
2545 FragmentCounter = 0;
2546 if (Subgraphs != NULL) {
2547 while (Subgraphs->next != NULL) {
2548 Subgraphs = Subgraphs->next;
2549 delete(FragmentList[FragmentCounter++]);
2550 delete(Subgraphs->previous);
2551 }
2552 delete(Subgraphs);
2553 }
2554 Free((void **)&FragmentList, "molecule::FragmentMolecule - **FragmentList");
2555
2556 // ===== 8b. gather keyset lists (graphs) from all subgraphs and transform into MoleculeListClass =====
2557 // allocate memory for the pointer array and transmorph graphs into full molecular fragments
2558 BondFragments = new MoleculeListClass(TotalGraph.size(), AtomCount);
2559 int k=0;
2560 for(Graph::iterator runner = TotalGraph.begin(); runner != TotalGraph.end(); runner++) {
2561 KeySet test = (*runner).first;
2562 *out << "Fragment No." << (*runner).second.first << " with TEFactor " << (*runner).second.second << "." << endl;
2563 BondFragments->ListOfMolecules[k] = StoreFragmentFromKeySet(out, test, configuration);
2564 k++;
2565 }
2566 *out << k << "/" << BondFragments->NumberOfMolecules << " fragments generated from the keysets." << endl;
2567
2568 // ===== 9. Save fragments' configuration and keyset files et al to disk ===
2569 if (BondFragments->NumberOfMolecules != 0) {
2570 // create the SortIndex from BFS labels to order in the config file
2571 CreateMappingLabelsToConfigSequence(out, SortIndex);
2572
2573 *out << Verbose(1) << "Writing " << BondFragments->NumberOfMolecules << " possible bond fragmentation configs" << endl;
2574 if (BondFragments->OutputConfigForListOfFragments(out, configuration, SortIndex))
2575 *out << Verbose(1) << "All configs written." << endl;
2576 else
2577 *out << Verbose(1) << "Some config writing failed." << endl;
2578
2579 // store force index reference file
2580 BondFragments->StoreForcesFile(out, configuration->configpath, SortIndex);
2581
2582 // store keysets file
2583 StoreKeySetFile(out, TotalGraph, configuration->configpath);
2584
2585 // store Adjacency file
2586 StoreAdjacencyToFile(out, configuration->configpath);
2587
2588 // store adaptive orders into file
2589 StoreOrderAtSiteFile(out, configuration->configpath);
2590
2591 // restore orbital and Stop values
2592 CalculateOrbitals(*configuration);
2593
2594 // free memory for bond part
2595 *out << Verbose(1) << "Freeing bond memory" << endl;
2596 delete(FragmentList); // remove bond molecule from memory
2597 Free((void **)&SortIndex, "molecule::FragmentMolecule: *SortIndex");
2598 } else
2599 *out << Verbose(1) << "FragmentList is zero on return, splitting failed." << endl;
2600
2601 *out << Verbose(0) << "End of bond fragmentation." << endl;
2602};
2603
2604/** Stores pairs (Atom::nr, Atom::AdaptiveOrder) into file.
2605 * Atoms not present in the file get "-1".
2606 * \param *out output stream for debugging
2607 * \param *path path to file ORDERATSITEFILE
2608 * \return true - file writable, false - not writable
2609 */
2610bool molecule::StoreOrderAtSiteFile(ofstream *out, char *path)
2611{
2612 stringstream line;
2613 ofstream file;
2614
2615 line << path << "/" << FRAGMENTPREFIX << ORDERATSITEFILE;
2616 file.open(line.str().c_str());
2617 *out << Verbose(1) << "Writing OrderAtSite " << ORDERATSITEFILE << " ... " << endl;
2618 if (file != NULL) {
2619 atom *Walker = start;
2620 while (Walker->next != end) {
2621 Walker = Walker->next;
2622 file << Walker->nr << "\t" << (int)Walker->AdaptiveOrder << endl;
2623 *out << Verbose(2) << "Storing: " << Walker->nr << "\t" << (int)Walker->AdaptiveOrder << "." << endl;
2624 }
2625 file.close();
2626 *out << Verbose(1) << "done." << endl;
2627 return true;
2628 } else {
2629 *out << Verbose(1) << "failed to open file " << line.str() << "." << endl;
2630 return false;
2631 }
2632};
2633
2634/** Parses pairs(Atom::nr, Atom::AdaptiveOrder) from file and stores in molecule's Atom's.
2635 * Atoms not present in the file get "0".
2636 * \param *out output stream for debugging
2637 * \param *path path to file ORDERATSITEFILEe
2638 * \return true - file found and scanned, false - file not found
2639 * \sa ParseKeySetFile() and CheckAdjacencyFileAgainstMolecule() as this is meant to be used in conjunction with the two
2640 */
2641bool molecule::ParseOrderAtSiteFromFile(ofstream *out, char *path)
2642{
2643 unsigned char *OrderArray = (unsigned char *) Malloc(sizeof(unsigned char)*AtomCount, "molecule::ParseOrderAtSiteFromFile - *OrderArray");
2644 bool status;
2645 int AtomNr;
2646 stringstream line;
2647 ifstream file;
2648 int Order;
2649
2650 *out << Verbose(1) << "Begin of ParseOrderAtSiteFromFile" << endl;
2651 for(int i=0;i<AtomCount;i++)
2652 OrderArray[i] = 0;
2653 line << path << "/" << FRAGMENTPREFIX << ORDERATSITEFILE;
2654 file.open(line.str().c_str());
2655 if (file != NULL) {
2656 for (int i=0;i<AtomCount;i++) // initialise with 0
2657 OrderArray[i] = 0;
2658 while (!file.eof()) { // parse from file
2659 file >> AtomNr;
2660 file >> Order;
2661 OrderArray[AtomNr] = (unsigned char) Order;
2662 //*out << Verbose(2) << "AtomNr " << AtomNr << " with order " << (int)OrderArray[AtomNr] << "." << endl;
2663 }
2664 atom *Walker = start;
2665 while (Walker->next != end) { // fill into atom classes
2666 Walker = Walker->next;
2667 Walker->AdaptiveOrder = OrderArray[Walker->nr];
2668 *out << Verbose(2) << *Walker << " gets order " << (int)Walker->AdaptiveOrder << "." << endl;
2669 }
2670 file.close();
2671 *out << Verbose(1) << "done." << endl;
2672 status = true;
2673 } else {
2674 *out << Verbose(1) << "failed to open file " << line.str() << "." << endl;
2675 status = false;
2676 }
2677 Free((void **)&OrderArray, "molecule::ParseOrderAtSiteFromFile - *OrderArray");
2678
2679 *out << Verbose(1) << "End of ParseOrderAtSiteFromFile" << endl;
2680 return status;
2681};
2682
2683/** Creates an 2d array of pointer with an entry for each atom and each bond it has.
2684 * Updates molecule::ListOfBondsPerAtom, molecule::NumberOfBondsPerAtom by parsing through
2685 * bond chain list, using molecule::AtomCount and molecule::BondCount.
2686 * Allocates memory, fills the array and exits
2687 * \param *out output stream for debugging
2688 */
2689void molecule::CreateListOfBondsPerAtom(ofstream *out)
2690{
2691 bond *Binder = NULL;
2692 atom *Walker = NULL;
2693 int TotalDegree;
2694 *out << Verbose(1) << "Begin of Creating ListOfBondsPerAtom: AtomCount = " << AtomCount << "\tBondCount = " << BondCount << "\tNoNonBonds = " << NoNonBonds << "." << endl;
2695
2696 // re-allocate memory
2697 *out << Verbose(2) << "(Re-)Allocating memory." << endl;
2698 if (ListOfBondsPerAtom != NULL) {
2699 for(int i=0;i<AtomCount;i++)
2700 Free((void **)&ListOfBondsPerAtom[i], "molecule::CreateListOfBondsPerAtom: ListOfBondsPerAtom[i]");
2701 Free((void **)&ListOfBondsPerAtom, "molecule::CreateListOfBondsPerAtom: ListOfBondsPerAtom");
2702 }
2703 if (NumberOfBondsPerAtom != NULL)
2704 Free((void **)&NumberOfBondsPerAtom, "molecule::CreateListOfBondsPerAtom: NumberOfBondsPerAtom");
2705 ListOfBondsPerAtom = (bond ***) Malloc(sizeof(bond **)*AtomCount, "molecule::CreateListOfBondsPerAtom: ***ListOfBondsPerAtom");
2706 NumberOfBondsPerAtom = (int *) Malloc(sizeof(int)*AtomCount, "molecule::CreateListOfBondsPerAtom: *NumberOfBondsPerAtom");
2707
2708 // reset bond counts per atom
2709 for(int i=0;i<AtomCount;i++)
2710 NumberOfBondsPerAtom[i] = 0;
2711 // count bonds per atom
2712 Binder = first;
2713 while (Binder->next != last) {
2714 Binder = Binder->next;
2715 NumberOfBondsPerAtom[Binder->leftatom->nr]++;
2716 NumberOfBondsPerAtom[Binder->rightatom->nr]++;
2717 }
2718 // allocate list of bonds per atom
2719 for(int i=0;i<AtomCount;i++)
2720 ListOfBondsPerAtom[i] = (bond **) Malloc(sizeof(bond *)*NumberOfBondsPerAtom[i], "molecule::CreateListOfBondsPerAtom: **ListOfBondsPerAtom[]");
2721 // clear the list again, now each NumberOfBondsPerAtom marks current free field
2722 for(int i=0;i<AtomCount;i++)
2723 NumberOfBondsPerAtom[i] = 0;
2724 // fill the list
2725 Binder = first;
2726 while (Binder->next != last) {
2727 Binder = Binder->next;
2728 ListOfBondsPerAtom[Binder->leftatom->nr][NumberOfBondsPerAtom[Binder->leftatom->nr]++] = Binder;
2729 ListOfBondsPerAtom[Binder->rightatom->nr][NumberOfBondsPerAtom[Binder->rightatom->nr]++] = Binder;
2730 }
2731
2732 // output list for debugging
2733 *out << Verbose(3) << "ListOfBondsPerAtom for each atom:" << endl;
2734 Walker = start;
2735 while (Walker->next != end) {
2736 Walker = Walker->next;
2737 *out << Verbose(4) << "Atom " << Walker->Name << " with " << NumberOfBondsPerAtom[Walker->nr] << " bonds: ";
2738 TotalDegree = 0;
2739 for (int j=0;j<NumberOfBondsPerAtom[Walker->nr];j++) {
2740 *out << *ListOfBondsPerAtom[Walker->nr][j] << "\t";
2741 TotalDegree += ListOfBondsPerAtom[Walker->nr][j]->BondDegree;
2742 }
2743 *out << " -- TotalDegree: " << TotalDegree << endl;
2744 }
2745 *out << Verbose(1) << "End of Creating ListOfBondsPerAtom." << endl << endl;
2746};
2747
2748/** Adds atoms up to \a BondCount distance from \a *Root and notes them down in \a **AddedAtomList.
2749 * Gray vertices are always enqueued in an StackClass<atom *> FIFO queue, the rest is usual BFS with adding vertices found was
2750 * white and putting into queue.
2751 * \param *out output stream for debugging
2752 * \param *Mol Molecule class to add atoms to
2753 * \param **AddedAtomList list with added atom pointers, index is atom father's number
2754 * \param **AddedBondList list with added bond pointers, index is bond father's number
2755 * \param *Root root vertex for BFS
2756 * \param *Bond bond not to look beyond
2757 * \param BondOrder maximum distance for vertices to add
2758 * \param IsAngstroem lengths are in angstroem or bohrradii
2759 */
2760void molecule::BreadthFirstSearchAdd(ofstream *out, molecule *Mol, atom **&AddedAtomList, bond **&AddedBondList, atom *Root, bond *Bond, int BondOrder, bool IsAngstroem)
2761{
2762 atom **PredecessorList = (atom **) Malloc(sizeof(atom *)*AtomCount, "molecule::BreadthFirstSearchAdd: **PredecessorList");
2763 int *ShortestPathList = (int *) Malloc(sizeof(int)*AtomCount, "molecule::BreadthFirstSearchAdd: *ShortestPathList");
2764 enum Shading *ColorList = (enum Shading *) Malloc(sizeof(enum Shading)*AtomCount, "molecule::BreadthFirstSearchAdd: *ColorList");
2765 class StackClass<atom *> *AtomStack = new StackClass<atom *>(AtomCount);
2766 atom *Walker = NULL, *OtherAtom = NULL;
2767 bond *Binder = NULL;
2768
2769 // add Root if not done yet
2770 AtomStack->ClearStack();
2771 if (AddedAtomList[Root->nr] == NULL) // add Root if not yet present
2772 AddedAtomList[Root->nr] = Mol->AddCopyAtom(Root);
2773 AtomStack->Push(Root);
2774
2775 // initialise each vertex as white with no predecessor, empty queue, color Root lightgray
2776 for (int i=0;i<AtomCount;i++) {
2777 PredecessorList[i] = NULL;
2778 ShortestPathList[i] = -1;
2779 if (AddedAtomList[i] != NULL) // mark already present atoms (i.e. Root and maybe others) as visited
2780 ColorList[i] = lightgray;
2781 else
2782 ColorList[i] = white;
2783 }
2784 ShortestPathList[Root->nr] = 0;
2785
2786 // and go on ... Queue always contains all lightgray vertices
2787 while (!AtomStack->IsEmpty()) {
2788 // we have to pop the oldest atom from stack. This keeps the atoms on the stack always of the same ShortestPath distance.
2789 // e.g. if current atom is 2, push to end of stack are of length 3, but first all of length 2 would be popped. They again
2790 // append length of 3 (their neighbours). Thus on stack we have always atoms of a certain length n at bottom of stack and
2791 // followed by n+1 till top of stack.
2792 Walker = AtomStack->PopFirst(); // pop oldest added
2793 *out << Verbose(1) << "Current Walker is: " << Walker->Name << ", and has " << NumberOfBondsPerAtom[Walker->nr] << " bonds." << endl;
2794 for(int i=0;i<NumberOfBondsPerAtom[Walker->nr];i++) {
2795 Binder = ListOfBondsPerAtom[Walker->nr][i];
2796 if (Binder != NULL) { // don't look at bond equal NULL
2797 OtherAtom = Binder->GetOtherAtom(Walker);
2798 *out << Verbose(2) << "Current OtherAtom is: " << OtherAtom->Name << " for bond " << *Binder << "." << endl;
2799 if (ColorList[OtherAtom->nr] == white) {
2800 if (Binder != Bond) // let other atom white if it's via Root bond. In case it's cyclic it has to be reached again (yet Root is from OtherAtom already black, thus no problem)
2801 ColorList[OtherAtom->nr] = lightgray;
2802 PredecessorList[OtherAtom->nr] = Walker; // Walker is the predecessor
2803 ShortestPathList[OtherAtom->nr] = ShortestPathList[Walker->nr]+1;
2804 *out << Verbose(2) << "Coloring OtherAtom " << OtherAtom->Name << " " << ((ColorList[OtherAtom->nr] == white) ? "white" : "lightgray") << ", its predecessor is " << Walker->Name << " and its Shortest Path is " << ShortestPathList[OtherAtom->nr] << " egde(s) long." << endl;
2805 if ((((ShortestPathList[OtherAtom->nr] < BondOrder) && (Binder != Bond))) ) { // Check for maximum distance
2806 *out << Verbose(3);
2807 if (AddedAtomList[OtherAtom->nr] == NULL) { // add if it's not been so far
2808 AddedAtomList[OtherAtom->nr] = Mol->AddCopyAtom(OtherAtom);
2809 *out << "Added OtherAtom " << OtherAtom->Name;
2810 AddedBondList[Binder->nr] = Mol->AddBond(AddedAtomList[Walker->nr], AddedAtomList[OtherAtom->nr], Binder->BondDegree);
2811 AddedBondList[Binder->nr]->Cyclic = Binder->Cyclic;
2812 AddedBondList[Binder->nr]->Type = Binder->Type;
2813 *out << " and bond " << *(AddedBondList[Binder->nr]) << ", ";
2814 } else { // this code should actually never come into play (all white atoms are not yet present in BondMolecule, that's why they are white in the first place)
2815 *out << "Not adding OtherAtom " << OtherAtom->Name;
2816 if (AddedBondList[Binder->nr] == NULL) {
2817 AddedBondList[Binder->nr] = Mol->AddBond(AddedAtomList[Walker->nr], AddedAtomList[OtherAtom->nr], Binder->BondDegree);
2818 AddedBondList[Binder->nr]->Cyclic = Binder->Cyclic;
2819 AddedBondList[Binder->nr]->Type = Binder->Type;
2820 *out << ", added Bond " << *(AddedBondList[Binder->nr]);
2821 } else
2822 *out << ", not added Bond ";
2823 }
2824 *out << ", putting OtherAtom into queue." << endl;
2825 AtomStack->Push(OtherAtom);
2826 } else { // out of bond order, then replace
2827 if ((AddedAtomList[OtherAtom->nr] == NULL) && (Binder->Cyclic))
2828 ColorList[OtherAtom->nr] = white; // unmark if it has not been queued/added, to make it available via its other bonds (cyclic)
2829 if (Binder == Bond)
2830 *out << Verbose(3) << "Not Queueing, is the Root bond";
2831 else if (ShortestPathList[OtherAtom->nr] >= BondOrder)
2832 *out << Verbose(3) << "Not Queueing, is out of Bond Count of " << BondOrder;
2833 if (!Binder->Cyclic)
2834 *out << ", is not part of a cyclic bond, saturating bond with Hydrogen." << endl;
2835 if (AddedBondList[Binder->nr] == NULL) {
2836 if ((AddedAtomList[OtherAtom->nr] != NULL)) { // .. whether we add or saturate
2837 AddedBondList[Binder->nr] = Mol->AddBond(AddedAtomList[Walker->nr], AddedAtomList[OtherAtom->nr], Binder->BondDegree);
2838 AddedBondList[Binder->nr]->Cyclic = Binder->Cyclic;
2839 AddedBondList[Binder->nr]->Type = Binder->Type;
2840 } else {
2841#ifdef ADDHYDROGEN
2842 Mol->AddHydrogenReplacementAtom(out, Binder, AddedAtomList[Walker->nr], Walker, OtherAtom, ListOfBondsPerAtom[Walker->nr], NumberOfBondsPerAtom[Walker->nr], IsAngstroem);
2843#endif
2844 }
2845 }
2846 }
2847 } else {
2848 *out << Verbose(3) << "Not Adding, has already been visited." << endl;
2849 // This has to be a cyclic bond, check whether it's present ...
2850 if (AddedBondList[Binder->nr] == NULL) {
2851 if ((Binder != Bond) && (Binder->Cyclic) && (((ShortestPathList[Walker->nr]+1) < BondOrder))) {
2852 AddedBondList[Binder->nr] = Mol->AddBond(AddedAtomList[Walker->nr], AddedAtomList[OtherAtom->nr], Binder->BondDegree);
2853 AddedBondList[Binder->nr]->Cyclic = Binder->Cyclic;
2854 AddedBondList[Binder->nr]->Type = Binder->Type;
2855 } else { // if it's root bond it has to broken (otherwise we would not create the fragments)
2856#ifdef ADDHYDROGEN
2857 Mol->AddHydrogenReplacementAtom(out, Binder, AddedAtomList[Walker->nr], Walker, OtherAtom, ListOfBondsPerAtom[Walker->nr], NumberOfBondsPerAtom[Walker->nr], IsAngstroem);
2858#endif
2859 }
2860 }
2861 }
2862 }
2863 }
2864 ColorList[Walker->nr] = black;
2865 *out << Verbose(1) << "Coloring Walker " << Walker->Name << " black." << endl;
2866 }
2867 Free((void **)&PredecessorList, "molecule::BreadthFirstSearchAdd: **PredecessorList");
2868 Free((void **)&ShortestPathList, "molecule::BreadthFirstSearchAdd: **ShortestPathList");
2869 Free((void **)&ColorList, "molecule::BreadthFirstSearchAdd: **ColorList");
2870 delete(AtomStack);
2871};
2872
2873/** Adds bond structure to this molecule from \a Father molecule.
2874 * This basically causes this molecule to become an induced subgraph of the \a Father, i.e. for every bond in Father
2875 * with end points present in this molecule, bond is created in this molecule.
2876 * Special care was taken to ensure that this is of complexity O(N), where N is the \a Father's molecule::AtomCount.
2877 * \param *out output stream for debugging
2878 * \param *Father father molecule
2879 * \return true - is induced subgraph, false - there are atoms with fathers not in \a Father
2880 * \todo not checked, not fully working probably
2881 */
2882bool molecule::BuildInducedSubgraph(ofstream *out, const molecule *Father)
2883{
2884 atom *Walker = NULL, *OtherAtom = NULL;
2885 bool status = true;
2886 atom **ParentList = (atom **) Malloc(sizeof(atom *)*Father->AtomCount, "molecule::BuildInducedSubgraph: **ParentList");
2887
2888 *out << Verbose(2) << "Begin of BuildInducedSubgraph." << endl;
2889
2890 // reset parent list
2891 *out << Verbose(3) << "Resetting ParentList." << endl;
2892 for (int i=0;i<Father->AtomCount;i++)
2893 ParentList[i] = NULL;
2894
2895 // fill parent list with sons
2896 *out << Verbose(3) << "Filling Parent List." << endl;
2897 Walker = start;
2898 while (Walker->next != end) {
2899 Walker = Walker->next;
2900 ParentList[Walker->father->nr] = Walker;
2901 // Outputting List for debugging
2902 *out << Verbose(4) << "Son["<< Walker->father->nr <<"] of " << Walker->father << " is " << ParentList[Walker->father->nr] << "." << endl;
2903 }
2904
2905 // check each entry of parent list and if ok (one-to-and-onto matching) create bonds
2906 *out << Verbose(3) << "Creating bonds." << endl;
2907 Walker = Father->start;
2908 while (Walker->next != Father->end) {
2909 Walker = Walker->next;
2910 if (ParentList[Walker->nr] != NULL) {
2911 if (ParentList[Walker->nr]->father != Walker) {
2912 status = false;
2913 } else {
2914 for (int i=0;i<Father->NumberOfBondsPerAtom[Walker->nr];i++) {
2915 OtherAtom = Father->ListOfBondsPerAtom[Walker->nr][i]->GetOtherAtom(Walker);
2916 if (ParentList[OtherAtom->nr] != NULL) { // if otheratom is also a father of an atom on this molecule, create the bond
2917 *out << Verbose(4) << "Endpoints of Bond " << Father->ListOfBondsPerAtom[Walker->nr][i] << " are both present: " << ParentList[Walker->nr]->Name << " and " << ParentList[OtherAtom->nr]->Name << "." << endl;
2918 AddBond(ParentList[Walker->nr], ParentList[OtherAtom->nr], Father->ListOfBondsPerAtom[Walker->nr][i]->BondDegree);
2919 }
2920 }
2921 }
2922 }
2923 }
2924
2925 Free((void **)&ParentList, "molecule::BuildInducedSubgraph: **ParentList");
2926 *out << Verbose(2) << "End of BuildInducedSubgraph." << endl;
2927 return status;
2928};
2929
2930
2931/** Looks through a StackClass<atom *> and returns the likeliest removal candiate.
2932 * \param *out output stream for debugging messages
2933 * \param *&Leaf KeySet to look through
2934 * \param *&ShortestPathList list of the shortest path to decide which atom to suggest as removal candidate in the end
2935 * \param index of the atom suggested for removal
2936 */
2937int molecule::LookForRemovalCandidate(ofstream *&out, KeySet *&Leaf, int *&ShortestPathList)
2938{
2939 atom *Runner = NULL;
2940 int SP, Removal;
2941
2942 *out << Verbose(2) << "Looking for removal candidate." << endl;
2943 SP = -1; //0; // not -1, so that Root is never removed
2944 Removal = -1;
2945 for (KeySet::iterator runner = Leaf->begin(); runner != Leaf->end(); runner++) {
2946 Runner = FindAtom((*runner));
2947 if (Runner->type->Z != 1) { // skip all those added hydrogens when re-filling snake stack
2948 if (ShortestPathList[(*runner)] > SP) { // remove the oldest one with longest shortest path
2949 SP = ShortestPathList[(*runner)];
2950 Removal = (*runner);
2951 }
2952 }
2953 }
2954 return Removal;
2955};
2956
2957/** Stores a fragment from \a KeySet into \a molecule.
2958 * First creates the minimal set of atoms from the KeySet, then creates the bond structure from the complete
2959 * molecule and adds missing hydrogen where bonds were cut.
2960 * \param *out output stream for debugging messages
2961 * \param &Leaflet pointer to KeySet structure
2962 * \param IsAngstroem whether we have Ansgtroem or bohrradius
2963 * \return pointer to constructed molecule
2964 */
2965molecule * molecule::StoreFragmentFromKeySet(ofstream *out, KeySet &Leaflet, bool IsAngstroem)
2966{
2967 atom *Runner = NULL, *FatherOfRunner = NULL, *OtherFather = NULL;
2968 atom **SonList = (atom **) Malloc(sizeof(atom *)*AtomCount, "molecule::StoreFragmentFromStack: **SonList");
2969 molecule *Leaf = new molecule(elemente);
2970
2971// *out << Verbose(1) << "Begin of StoreFragmentFromKeyset." << endl;
2972
2973 Leaf->BondDistance = BondDistance;
2974 for(int i=0;i<NDIM*2;i++)
2975 Leaf->cell_size[i] = cell_size[i];
2976
2977 // initialise SonList (indicates when we need to replace a bond with hydrogen instead)
2978 for(int i=0;i<AtomCount;i++)
2979 SonList[i] = NULL;
2980
2981 // first create the minimal set of atoms from the KeySet
2982 for(KeySet::iterator runner = Leaflet.begin(); runner != Leaflet.end(); runner++) {
2983 FatherOfRunner = FindAtom((*runner)); // find the id
2984 SonList[FatherOfRunner->nr] = Leaf->AddCopyAtom(FatherOfRunner);
2985 }
2986
2987 // create the bonds between all: Make it an induced subgraph and add hydrogen
2988// *out << Verbose(2) << "Creating bonds from father graph (i.e. induced subgraph creation)." << endl;
2989 Runner = Leaf->start;
2990 while (Runner->next != Leaf->end) {
2991 Runner = Runner->next;
2992 FatherOfRunner = Runner->father;
2993 if (SonList[FatherOfRunner->nr] != NULL) { // check if this, our father, is present in list
2994 // create all bonds
2995 for (int i=0;i<NumberOfBondsPerAtom[FatherOfRunner->nr];i++) { // go through every bond of father
2996 OtherFather = ListOfBondsPerAtom[FatherOfRunner->nr][i]->GetOtherAtom(FatherOfRunner);
2997// *out << Verbose(2) << "Father " << *FatherOfRunner << " of son " << *SonList[FatherOfRunner->nr] << " is bound to " << *OtherFather;
2998 if (SonList[OtherFather->nr] != NULL) {
2999// *out << ", whose son is " << *SonList[OtherFather->nr] << "." << endl;
3000 if (OtherFather->nr > FatherOfRunner->nr) { // add bond (nr check is for adding only one of both variants: ab, ba)
3001// *out << Verbose(3) << "Adding Bond: " << Leaf->AddBond(Runner, SonList[OtherFather->nr], ListOfBondsPerAtom[FatherOfRunner->nr][i]->BondDegree) << "." << endl;
3002 //NumBonds[Runner->nr]++;
3003 } else {
3004// *out << Verbose(3) << "Not adding bond, labels in wrong order." << endl;
3005 }
3006 } else {
3007// *out << ", who has no son in this fragment molecule." << endl;
3008#ifdef ADDHYDROGEN
3009// *out << Verbose(3) << "Adding Hydrogen to " << Runner->Name << " and a bond in between." << endl;
3010 Leaf->AddHydrogenReplacementAtom(out, ListOfBondsPerAtom[FatherOfRunner->nr][i], Runner, FatherOfRunner, OtherFather, ListOfBondsPerAtom[FatherOfRunner->nr],NumberOfBondsPerAtom[FatherOfRunner->nr], IsAngstroem);
3011#endif
3012 //NumBonds[Runner->nr] += ListOfBondsPerAtom[FatherOfRunner->nr][i]->BondDegree;
3013 }
3014 }
3015 } else {
3016 *out << Verbose(0) << "ERROR: Son " << Runner->Name << " has father " << FatherOfRunner->Name << " but its entry in SonList is " << SonList[FatherOfRunner->nr] << "!" << endl;
3017 }
3018#ifdef ADDHYDROGEN
3019 while ((Runner->next != Leaf->end) && (Runner->next->type->Z == 1)) // skip added hydrogen
3020 Runner = Runner->next;
3021#endif
3022 }
3023 Leaf->CreateListOfBondsPerAtom(out);
3024 //Leaflet->Leaf->ScanForPeriodicCorrection(out);
3025 Free((void **)&SonList, "molecule::StoreFragmentFromStack: **SonList");
3026// *out << Verbose(1) << "End of StoreFragmentFromKeyset." << endl;
3027 return Leaf;
3028};
3029
3030/** Creates \a MoleculeListClass of all unique fragments of the \a molecule containing \a Order atoms or vertices.
3031 * The picture to have in mind is that of a DFS "snake" of a certain length \a Order, i.e. as in the infamous
3032 * computer game, that winds through the connected graph representing the molecule. Color (white,
3033 * lightgray, darkgray, black) indicates whether a vertex has been discovered so far or not. Labels will help in
3034 * creating only unique fragments and not additional ones with vertices simply in different sequence.
3035 * The Predecessor is always the one that came before in discovering, needed on backstepping. And
3036 * finally, the ShortestPath is needed for removing vertices from the snake stack during the back-
3037 * stepping.
3038 * \param *out output stream for debugging
3039 * \param Order number of atoms in each fragment
3040 * \param *configuration configuration for writing config files for each fragment
3041 * \return List of all unique fragments with \a Order atoms
3042 */
3043/*
3044MoleculeListClass * molecule::CreateListOfUniqueFragmentsOfOrder(ofstream *out, int Order, config *configuration)
3045{
3046 atom **PredecessorList = (atom **) Malloc(sizeof(atom *)*AtomCount, "molecule::CreateListOfUniqueFragmentsOfOrder: **PredecessorList");
3047 int *ShortestPathList = (int *) Malloc(sizeof(int)*AtomCount, "molecule::CreateListOfUniqueFragmentsOfOrder: *ShortestPathList");
3048 int *Labels = (int *) Malloc(sizeof(int)*AtomCount, "molecule::CreateListOfUniqueFragmentsOfOrder: *Labels");
3049 enum Shading *ColorVertexList = (enum Shading *) Malloc(sizeof(enum Shading)*AtomCount, "molecule::CreateListOfUniqueFragmentsOfOrder: *ColorList");
3050 enum Shading *ColorEdgeList = (enum Shading *) Malloc(sizeof(enum Shading)*BondCount, "molecule::CreateListOfUniqueFragmentsOfOrder: *ColorBondList");
3051 StackClass<atom *> *RootStack = new StackClass<atom *>(AtomCount);
3052 StackClass<atom *> *TouchedStack = new StackClass<atom *>((int)pow(4,Order)+2); // number of atoms reached from one with maximal 4 bonds plus Root itself
3053 StackClass<atom *> *SnakeStack = new StackClass<atom *>(Order+1); // equal to Order is not possible, as then the StackClass<atom *> cannot discern between full and empty stack!
3054 MoleculeLeafClass *Leaflet = NULL, *TempLeaf = NULL;
3055 MoleculeListClass *FragmentList = NULL;
3056 atom *Walker = NULL, *OtherAtom = NULL, *Root = NULL, *Removal = NULL;
3057 bond *Binder = NULL;
3058 int RunningIndex = 0, FragmentCounter = 0;
3059
3060 *out << Verbose(1) << "Begin of CreateListOfUniqueFragmentsOfOrder." << endl;
3061
3062 // reset parent list
3063 *out << Verbose(3) << "Resetting labels, parent, predecessor, color and shortest path lists." << endl;
3064 for (int i=0;i<AtomCount;i++) { // reset all atom labels
3065 // initialise each vertex as white with no predecessor, empty queue, color lightgray, not labelled, no sons
3066 Labels[i] = -1;
3067 SonList[i] = NULL;
3068 PredecessorList[i] = NULL;
3069 ColorVertexList[i] = white;
3070 ShortestPathList[i] = -1;
3071 }
3072 for (int i=0;i<BondCount;i++)
3073 ColorEdgeList[i] = white;
3074 RootStack->ClearStack(); // clearstack and push first atom if exists
3075 TouchedStack->ClearStack();
3076 Walker = start->next;
3077 while ((Walker != end)
3078#ifdef ADDHYDROGEN
3079 && (Walker->type->Z == 1)
3080#endif
3081 ) { // search for first non-hydrogen atom
3082 *out << Verbose(4) << "Current Root candidate is " << Walker->Name << "." << endl;
3083 Walker = Walker->next;
3084 }
3085 if (Walker != end)
3086 RootStack->Push(Walker);
3087 else
3088 *out << Verbose(0) << "ERROR: Could not find an appropriate Root atom!" << endl;
3089 *out << Verbose(3) << "Root " << Walker->Name << " is on AtomStack, beginning loop through all vertices ..." << endl;
3090
3091 ///// OUTER LOOP ////////////
3092 while (!RootStack->IsEmpty()) {
3093 // get new root vertex from atom stack
3094 Root = RootStack->PopFirst();
3095 ShortestPathList[Root->nr] = 0;
3096 if (Labels[Root->nr] == -1)
3097 Labels[Root->nr] = RunningIndex++; // prevent it from getting again on AtomStack
3098 PredecessorList[Root->nr] = Root;
3099 TouchedStack->Push(Root);
3100 *out << Verbose(0) << "Root for this loop is: " << Root->Name << ".\n";
3101
3102 // clear snake stack
3103 SnakeStack->ClearStack();
3104 //SnakeStack->TestImplementation(out, start->next);
3105
3106 ///// INNER LOOP ////////////
3107 // Problems:
3108 // - what about cyclic bonds?
3109 Walker = Root;
3110 do {
3111 *out << Verbose(1) << "Current Walker is: " << Walker->Name;
3112 // initial setting of the new Walker: label, color, shortest path and put on stacks
3113 if (Labels[Walker->nr] == -1) { // give atom a unique, monotonely increasing number
3114 Labels[Walker->nr] = RunningIndex++;
3115 RootStack->Push(Walker);
3116 }
3117 *out << ", has label " << Labels[Walker->nr];
3118 if ((ColorVertexList[Walker->nr] == white) || ((Binder != NULL) && (ColorEdgeList[Binder->nr] == white))) { // color it if newly discovered and push on stacks (and if within reach!)
3119 if ((Binder != NULL) && (ColorEdgeList[Binder->nr] == white)) {
3120 // Binder ought to be set still from last neighbour search
3121 *out << ", coloring bond " << *Binder << " black";
3122 ColorEdgeList[Binder->nr] = black; // mark this bond as used
3123 }
3124 if (ShortestPathList[Walker->nr] == -1) {
3125 ShortestPathList[Walker->nr] = ShortestPathList[PredecessorList[Walker->nr]->nr]+1;
3126 TouchedStack->Push(Walker); // mark every atom for lists cleanup later, whose shortest path has been changed
3127 }
3128 if ((ShortestPathList[Walker->nr] < Order) && (ColorVertexList[Walker->nr] != darkgray)) { // if not already on snake stack
3129 SnakeStack->Push(Walker);
3130 ColorVertexList[Walker->nr] = darkgray; // mark as dark gray of on snake stack
3131 }
3132 }
3133 *out << ", SP of " << ShortestPathList[Walker->nr] << " and its color is " << GetColor(ColorVertexList[Walker->nr]) << "." << endl;
3134
3135 // then check the stack for a newly stumbled upon fragment
3136 if (SnakeStack->ItemCount() == Order) { // is stack full?
3137 // store the fragment if it is one and get a removal candidate
3138 Removal = StoreFragmentFromStack(out, Root, Walker, Leaflet, SnakeStack, ShortestPathList, SonList, Labels, &FragmentCounter, configuration);
3139 // remove the candidate if one was found
3140 if (Removal != NULL) {
3141 *out << Verbose(2) << "Removing item " << Removal->Name << " with SP of " << ShortestPathList[Removal->nr] << " from snake stack." << endl;
3142 SnakeStack->RemoveItem(Removal);
3143 ColorVertexList[Removal->nr] = lightgray; // return back to not on snake stack but explored marking
3144 if (Walker == Removal) { // if the current atom is to be removed, we also have to take a step back
3145 Walker = PredecessorList[Removal->nr];
3146 *out << Verbose(2) << "Stepping back to " << Walker->Name << "." << endl;
3147 }
3148 }
3149 } else
3150 Removal = NULL;
3151
3152 // finally, look for a white neighbour as the next Walker
3153 Binder = NULL;
3154 if ((Removal == NULL) || (Walker != PredecessorList[Removal->nr])) { // don't look, if a new walker has been set above
3155 *out << Verbose(2) << "Snake has currently " << SnakeStack->ItemCount() << " item(s)." << endl;
3156 OtherAtom = NULL; // this is actually not needed, every atom has at least one neighbour
3157 if (ShortestPathList[Walker->nr] < Order) {
3158 for(int i=0;i<NumberOfBondsPerAtom[Walker->nr];i++) {
3159 Binder = ListOfBondsPerAtom[Walker->nr][i];
3160 *out << Verbose(2) << "Current bond is " << *Binder << ": ";
3161 OtherAtom = Binder->GetOtherAtom(Walker);
3162 if ((Labels[OtherAtom->nr] != -1) && (Labels[OtherAtom->nr] < Labels[Root->nr])) { // we don't step up to labels bigger than us
3163 *out << "Label " << Labels[OtherAtom->nr] << " is smaller than Root's " << Labels[Root->nr] << "." << endl;
3164 //ColorVertexList[OtherAtom->nr] = lightgray; // mark as explored
3165 } else { // otherwise check its colour and element
3166 if (
3167#ifdef ADDHYDROGEN
3168 (OtherAtom->type->Z != 1) &&
3169#endif
3170 (ColorEdgeList[Binder->nr] == white)) { // skip hydrogen, look for unexplored vertices
3171 *out << "Moving along " << GetColor(ColorEdgeList[Binder->nr]) << " bond " << Binder << " to " << ((ColorVertexList[OtherAtom->nr] == white) ? "unexplored" : "explored") << " item: " << OtherAtom->Name << "." << endl;
3172 // i find it currently rather sensible to always set the predecessor in order to find one's way back
3173 //if (PredecessorList[OtherAtom->nr] == NULL) {
3174 PredecessorList[OtherAtom->nr] = Walker;
3175 *out << Verbose(3) << "Setting Predecessor of " << OtherAtom->Name << " to " << PredecessorList[OtherAtom->nr]->Name << "." << endl;
3176 //} else {
3177 // *out << Verbose(3) << "Predecessor of " << OtherAtom->Name << " is " << PredecessorList[OtherAtom->nr]->Name << "." << endl;
3178 //}
3179 Walker = OtherAtom;
3180 break;
3181 } else {
3182 if (OtherAtom->type->Z == 1)
3183 *out << "Links to a hydrogen atom." << endl;
3184 else
3185 *out << "Bond has not white but " << GetColor(ColorEdgeList[Binder->nr]) << " color." << endl;
3186 }
3187 }
3188 }
3189 } else { // means we have stepped beyond the horizon: Return!
3190 Walker = PredecessorList[Walker->nr];
3191 OtherAtom = Walker;
3192 *out << Verbose(3) << "We have gone too far, stepping back to " << Walker->Name << "." << endl;
3193 }
3194 if (Walker != OtherAtom) { // if no white neighbours anymore, color it black
3195 *out << Verbose(2) << "Coloring " << Walker->Name << " black." << endl;
3196 ColorVertexList[Walker->nr] = black;
3197 Walker = PredecessorList[Walker->nr];
3198 }
3199 }
3200 } while ((Walker != Root) || (ColorVertexList[Root->nr] != black));
3201 *out << Verbose(2) << "Inner Looping is finished." << endl;
3202
3203 // if we reset all AtomCount atoms, we have again technically O(N^2) ...
3204 *out << Verbose(2) << "Resetting lists." << endl;
3205 Walker = NULL;
3206 Binder = NULL;
3207 while (!TouchedStack->IsEmpty()) {
3208 Walker = TouchedStack->PopLast();
3209 *out << Verbose(3) << "Re-initialising entries of " << *Walker << "." << endl;
3210 for(int i=0;i<NumberOfBondsPerAtom[Walker->nr];i++)
3211 ColorEdgeList[ListOfBondsPerAtom[Walker->nr][i]->nr] = white;
3212 PredecessorList[Walker->nr] = NULL;
3213 ColorVertexList[Walker->nr] = white;
3214 ShortestPathList[Walker->nr] = -1;
3215 }
3216 }
3217 *out << Verbose(1) << "Outer Looping over all vertices is done." << endl;
3218
3219 // copy together
3220 *out << Verbose(1) << "Copying all fragments into MoleculeList structure." << endl;
3221 FragmentList = new MoleculeListClass(FragmentCounter, AtomCount);
3222 RunningIndex = 0;
3223 while ((Leaflet != NULL) && (RunningIndex < FragmentCounter)) {
3224 FragmentList->ListOfMolecules[RunningIndex++] = Leaflet->Leaf;
3225 Leaflet->Leaf = NULL; // prevent molecule from being removed
3226 TempLeaf = Leaflet;
3227 Leaflet = Leaflet->previous;
3228 delete(TempLeaf);
3229 };
3230
3231 // free memory and exit
3232 Free((void **)&PredecessorList, "molecule::CreateListOfUniqueFragmentsOfOrder: **PredecessorList");
3233 Free((void **)&ShortestPathList, "molecule::CreateListOfUniqueFragmentsOfOrder: *ShortestPathList");
3234 Free((void **)&Labels, "molecule::CreateListOfUniqueFragmentsOfOrder: *Labels");
3235 Free((void **)&ColorVertexList, "molecule::CreateListOfUniqueFragmentsOfOrder: *ColorList");
3236 delete(RootStack);
3237 delete(TouchedStack);
3238 delete(SnakeStack);
3239
3240 *out << Verbose(1) << "End of CreateListOfUniqueFragmentsOfOrder." << endl;
3241 return FragmentList;
3242};
3243*/
3244
3245/** Structure containing all values in power set combination generation.
3246 */
3247struct UniqueFragments {
3248 config *configuration;
3249 atom *Root;
3250 Graph *Leaflet;
3251 KeySet *FragmentSet;
3252 int ANOVAOrder;
3253 int FragmentCounter;
3254 int CurrentIndex;
3255 int *Labels;
3256 double TEFactor;
3257 int *ShortestPathList;
3258 bool **UsedList;
3259 bond **BondsPerSPList;
3260 int *BondsPerSPCount;
3261};
3262
3263/** From a given set of Bond sorted by Shortest Path distance, create all possible fragments of size \a SetDimension.
3264 * -# loops over every possible combination (2^dimension of edge set)
3265 * -# inserts current set, if there's still space left
3266 * -# yes: calls SPFragmentGenerator with structure, created new edge list and size respective to root dist
3267ance+1
3268 * -# no: stores fragment into keyset list by calling InsertFragmentIntoGraph
3269 * -# removes all items added into the snake stack (in UniqueFragments structure) added during level (root
3270distance) and current set
3271 * \param *out output stream for debugging
3272 * \param FragmentSearch UniqueFragments structure with all values needed
3273 * \param RootDistance current shortest path level, whose set of edges is represented by **BondsSet
3274 * \param SetDimension Number of possible bonds on this level (i.e. size of the array BondsSet[])
3275 * \param SubOrder remaining number of allowed vertices to add
3276 */
3277void molecule::SPFragmentGenerator(ofstream *out, struct UniqueFragments *FragmentSearch, int RootDistance, bond **BondsSet, int SetDimension, int SubOrder)
3278{
3279 atom *OtherWalker = NULL;
3280 int verbosity = 0; //FragmentSearch->ANOVAOrder-SubOrder;
3281 int NumCombinations;
3282 bool bit;
3283 int bits, TouchedIndex, SubSetDimension, SP;
3284 int Removal;
3285 int *TouchedList = (int *) Malloc(sizeof(int)*(SubOrder+1), "molecule::SPFragmentGenerator: *TouchedList");
3286 bond *Binder = NULL;
3287 bond **BondsList = NULL;
3288
3289 NumCombinations = 1 << SetDimension;
3290
3291 // Hier muessen von 1 bis NumberOfBondsPerAtom[Walker->nr] alle Kombinationen
3292 // von Endstuecken (aus den Bonds) hinzugefᅵᅵgt werden und fᅵᅵr verbleibende ANOVAOrder
3293 // rekursiv GraphCrawler in der nᅵᅵchsten Ebene aufgerufen werden
3294
3295 *out << Verbose(1+verbosity) << "Begin of SPFragmentGenerator." << endl;
3296 *out << Verbose(1+verbosity) << "We are " << RootDistance << " away from Root, which is " << *FragmentSearch->Root << ", SubOrder is " << SubOrder << ", SetDimension is " << SetDimension << " and this means " << NumCombinations-1 << " combination(s)." << endl;
3297
3298 // initialised touched list (stores added atoms on this level)
3299 *out << Verbose(1+verbosity) << "Clearing touched list." << endl;
3300 for (TouchedIndex=0;TouchedIndex<=SubOrder;TouchedIndex++) // empty touched list
3301 TouchedList[TouchedIndex] = -1;
3302 TouchedIndex = 0;
3303
3304 // create every possible combination of the endpieces
3305 *out << Verbose(1+verbosity) << "Going through all combinations of the power set." << endl;
3306 for (int i=1;i<NumCombinations;i++) { // sweep through all power set combinations (skip empty set!)
3307 // count the set bit of i
3308 bits = 0;
3309 for (int j=0;j<SetDimension;j++)
3310 bits += (i & (1 << j)) >> j;
3311
3312 *out << Verbose(1+verbosity) << "Current set is " << Binary(i | (1 << SetDimension)) << ", number of bits is " << bits << "." << endl;
3313 if (bits <= SubOrder) { // if not greater than additional atoms allowed on stack, continue
3314 // --1-- add this set of the power set of bond partners to the snake stack
3315 for (int j=0;j<SetDimension;j++) { // pull out every bit by shifting
3316 bit = ((i & (1 << j)) != 0); // mask the bit for the j-th bond
3317 if (bit) { // if bit is set, we add this bond partner
3318 OtherWalker = BondsSet[j]->rightatom; // rightatom is always the one more distant, i.e. the one to add
3319 //*out << Verbose(1+verbosity) << "Current Bond is " << ListOfBondsPerAtom[Walker->nr][i] << ", checking on " << *OtherWalker << "." << endl;
3320 //if ((!FragmentSearch->UsedList[OtherWalker->nr][i]) && (FragmentSearch->Labels[OtherWalker->nr] > FragmentSearch->Labels[FragmentSearch->Root->nr])) {
3321 //*out << Verbose(2+verbosity) << "Not used so far, label " << FragmentSearch->Labels[OtherWalker->nr] << " is bigger than Root's " << FragmentSearch->Labels[FragmentSearch->Root->nr] << "." << endl;
3322 *out << Verbose(2+verbosity) << "Adding " << *OtherWalker << " with nr " << OtherWalker->nr << "." << endl;
3323 TouchedList[TouchedIndex++] = OtherWalker->nr; // note as added
3324 FragmentSearch->FragmentSet->insert(OtherWalker->nr);
3325 //FragmentSearch->UsedList[OtherWalker->nr][i] = true;
3326 //}
3327 } else {
3328 *out << Verbose(2+verbosity) << "Not adding." << endl;
3329 }
3330 }
3331
3332 if (bits < SubOrder) {
3333 *out << Verbose(1+verbosity) << "There's still some space left on stack: " << (SubOrder - bits) << "." << endl;
3334 // --2-- look at all added end pieces of this combination, construct bond subsets and sweep through a power set of these by recursion
3335 SP = RootDistance+1; // this is the next level
3336 // first count the members in the subset
3337 SubSetDimension = 0;
3338 Binder = FragmentSearch->BondsPerSPList[2*SP]; // start node for this level
3339 while (Binder->next != FragmentSearch->BondsPerSPList[2*SP+1]) { // compare to end node of this level
3340 Binder = Binder->next;
3341 for (int k=0;k<TouchedIndex;k++) {
3342 if (Binder->Contains(TouchedList[k])) // if we added this very endpiece
3343 SubSetDimension++;
3344 }
3345 }
3346 // then allocate and fill the list
3347 BondsList = (bond **) Malloc(sizeof(bond *)*SubSetDimension, "molecule::SPFragmentGenerator: **BondsList");
3348 SubSetDimension = 0;
3349 Binder = FragmentSearch->BondsPerSPList[2*SP];
3350 while (Binder->next != FragmentSearch->BondsPerSPList[2*SP+1]) {
3351 Binder = Binder->next;
3352 for (int k=0;k<TouchedIndex;k++) {
3353 if (Binder->leftatom->nr == TouchedList[k]) // leftatom is always the close one
3354 BondsList[SubSetDimension++] = Binder;
3355 }
3356 }
3357 *out << Verbose(2+verbosity) << "Calling subset generator " << SP << " away from root " << *FragmentSearch->Root << " with sub set dimension " << SubSetDimension << "." << endl;
3358 SPFragmentGenerator(out, FragmentSearch, SP, BondsList, SubSetDimension, SubOrder-bits);
3359 Free((void **)&BondsList, "molecule::SPFragmentGenerator: **BondsList");
3360 } else {
3361 // --2-- otherwise store the complete fragment
3362 *out << Verbose(1+verbosity) << "Enough items on stack for a fragment!" << endl;
3363 // store fragment as a KeySet
3364 *out << Verbose(2) << "Found a new fragment[" << FragmentSearch->FragmentCounter << "], local nr.s are: ";
3365 for(KeySet::iterator runner = FragmentSearch->FragmentSet->begin(); runner != FragmentSearch->FragmentSet->end(); runner++)
3366 *out << (*runner) << " ";
3367 InsertFragmentIntoGraph(out, FragmentSearch);
3368 //Removal = LookForRemovalCandidate(out, FragmentSearch->FragmentSet, FragmentSearch->ShortestPathList);
3369 //Removal = StoreFragmentFromStack(out, FragmentSearch->Root, FragmentSearch->Leaflet, FragmentSearch->FragmentStack, FragmentSearch->ShortestPathList,FragmentSearch->Labels, &FragmentSearch->FragmentCounter, FragmentSearch->configuration);
3370 }
3371
3372 // --3-- remove all added items in this level from snake stack
3373 *out << Verbose(1+verbosity) << "Removing all items that were added on this SP level " << RootDistance << "." << endl;
3374 for(int j=0;j<TouchedIndex;j++) {
3375 Removal = TouchedList[j];
3376 *out << Verbose(2+verbosity) << "Removing item nr. " << Removal << " from snake stack." << endl;
3377 FragmentSearch->FragmentSet->erase(Removal);
3378 TouchedList[j] = -1;
3379 }
3380 *out << Verbose(2) << "Remaining local nr.s on snake stack are: ";
3381 for(KeySet::iterator runner = FragmentSearch->FragmentSet->begin(); runner != FragmentSearch->FragmentSet->end(); runner++)
3382 *out << (*runner) << " ";
3383 *out << endl;
3384 TouchedIndex = 0; // set Index to 0 for list of atoms added on this level
3385 } else {
3386 *out << Verbose(2+verbosity) << "More atoms to add for this set (" << bits << ") than space left on stack " << SubOrder << ", skipping this set." << endl;
3387 }
3388 }
3389 Free((void **)&TouchedList, "molecule::SPFragmentGenerator: *TouchedList");
3390 *out << Verbose(1+verbosity) << "End of SPFragmentGenerator, " << RootDistance << " away from Root " << *FragmentSearch->Root << " and SubOrder is " << SubOrder << "." << endl;
3391};
3392
3393/** Creates a list of all unique fragments of certain vertex size from a given graph \a Fragment for a given root vertex in the context of \a this molecule.
3394 * -# initialises UniqueFragments structure
3395 * -# fills edge list via BFS
3396 * -# creates the fragment by calling recursive function SPFragmentGenerator with UniqueFragments structure, 0 as
3397 root distance, the edge set, its dimension and the current suborder
3398 * -# Free'ing structure
3399 * Note that we may use the fact that the atoms are SP-ordered on the atomstack. I.e. when popping always the last, we first get all
3400 * with SP of 2, then those with SP of 3, then those with SP of 4 and so on.
3401 * \param *out output stream for debugging
3402 * \param Order bond order (limits BFS exploration and "number of digits" in power set generation
3403 * \param FragmentSearch UniqueFragments structure containing TEFactor, root atom and so on
3404 * \param RestrictedKeySet Restricted vertex set to use in context of molecule
3405 * \return number of inserted fragments
3406 * \note ShortestPathList in FragmentSearch structure is probably due to NumberOfAtomsSPLevel and SP not needed anymore
3407 */
3408int molecule::PowerSetGenerator(ofstream *out, int Order, struct UniqueFragments &FragmentSearch, KeySet RestrictedKeySet)
3409{
3410 int SP, UniqueIndex, AtomKeyNr;
3411 int *NumberOfAtomsSPLevel = (int *) Malloc(sizeof(int)*Order, "molecule::PowerSetGenerator: *SPLevelCount");
3412 atom *Walker = NULL, *OtherWalker = NULL;
3413 bond *Binder = NULL;
3414 bond **BondsList = NULL;
3415 KeyStack AtomStack;
3416 atom **PredecessorList = (atom **) Malloc(sizeof(atom *)*AtomCount, "molecule::PowerSetGenerator: **PredecessorList");
3417 KeySet::iterator runner;
3418 int RootKeyNr = FragmentSearch.Root->nr;
3419 int Counter = FragmentSearch.FragmentCounter;
3420
3421 for (int i=0;i<AtomCount;i++) {
3422 PredecessorList[i] = NULL;
3423 }
3424
3425 *out << endl;
3426 *out << Verbose(0) << "Begin of PowerSetGenerator with order " << Order << " at Root " << *FragmentSearch.Root << "." << endl;
3427
3428 UniqueIndex = 0;
3429 if (FragmentSearch.Labels[RootKeyNr] == -1)
3430 FragmentSearch.Labels[RootKeyNr] = UniqueIndex++;
3431 FragmentSearch.ShortestPathList[RootKeyNr] = 0;
3432 // prepare the atom stack counters (number of atoms with certain SP on stack)
3433 for (int i=0;i<Order;i++)
3434 NumberOfAtomsSPLevel[i] = 0;
3435 NumberOfAtomsSPLevel[0] = 1; // for root
3436 SP = -1;
3437 *out << endl;
3438 *out << Verbose(0) << "Starting BFS analysis ..." << endl;
3439 // push as first on atom stack and goooo ...
3440 AtomStack.clear();
3441 AtomStack.push_back(RootKeyNr);
3442 *out << Verbose(0) << "Cleared AtomStack and pushed root as first item onto it." << endl;
3443 // do a BFS search to fill the SP lists and label the found vertices
3444 while (!AtomStack.empty()) {
3445 // pop next atom
3446 AtomKeyNr = AtomStack.front();
3447 AtomStack.pop_front();
3448 if (SP != -1)
3449 NumberOfAtomsSPLevel[SP]--;
3450 if ((SP == -1) || (NumberOfAtomsSPLevel[SP] == -1)) {
3451 SP++;
3452 NumberOfAtomsSPLevel[SP]--; // carry over "-1" to next level
3453 *out << Verbose(1) << "New SP level reached: " << SP << ", creating new SP list with 0 item(s)";
3454 if (SP > 0)
3455 *out << ", old level closed with " << FragmentSearch.BondsPerSPCount[SP-1] << " item(s)." << endl;
3456 else
3457 *out << "." << endl;
3458 FragmentSearch.BondsPerSPCount[SP] = 0;
3459 } else {
3460 *out << Verbose(1) << "Still " << NumberOfAtomsSPLevel[SP]+1 << " on this SP (" << SP << ") level, currently having " << FragmentSearch.BondsPerSPCount[SP] << " item(s)." << endl;
3461 }
3462 Walker = FindAtom(AtomKeyNr);
3463 *out << Verbose(0) << "Current Walker is: " << *Walker << " with nr " << Walker->nr << " and label " << FragmentSearch.Labels[AtomKeyNr] << " and SP of " << SP << "." << endl;
3464 // check for new sp level
3465 // go through all its bonds
3466 *out << Verbose(1) << "Going through all bonds of Walker." << endl;
3467 for (int i=0;i<NumberOfBondsPerAtom[AtomKeyNr];i++) {
3468 Binder = ListOfBondsPerAtom[AtomKeyNr][i];
3469 OtherWalker = Binder->GetOtherAtom(Walker);
3470 if ((RestrictedKeySet.find(OtherWalker->nr) != RestrictedKeySet.end())
3471#ifdef ADDHYDROGEN
3472 && (OtherWalker->type->Z != 1)
3473#endif
3474 ) { // skip hydrogens and restrict to fragment
3475 *out << Verbose(2) << "Current partner is " << *OtherWalker << " with nr " << OtherWalker->nr << " in bond " << *Binder << "." << endl;
3476 // set the label if not set (and push on root stack as well)
3477 if (FragmentSearch.Labels[OtherWalker->nr] == -1) {
3478 FragmentSearch.Labels[OtherWalker->nr] = UniqueIndex++;
3479 *out << Verbose(3) << "Set label to " << FragmentSearch.Labels[OtherWalker->nr] << "." << endl;
3480 } else {
3481 *out << Verbose(3) << "Label is already " << FragmentSearch.Labels[OtherWalker->nr] << "." << endl;
3482 }
3483 if ((OtherWalker != PredecessorList[AtomKeyNr]) && (OtherWalker->nr > RootKeyNr)) { // only pass through those with label bigger than Root's
3484 FragmentSearch.ShortestPathList[OtherWalker->nr] = SP+1;
3485 *out << Verbose(3) << "Set Shortest Path to " << FragmentSearch.ShortestPathList[OtherWalker->nr] << "." << endl;
3486 if (FragmentSearch.ShortestPathList[OtherWalker->nr] < Order) { // only pass through those within reach of Order
3487 // push for exploration on stack (only if the SP of OtherWalker is longer than of Walker! (otherwise it has been added already!)
3488 if (FragmentSearch.ShortestPathList[OtherWalker->nr] > SP) {
3489 if (FragmentSearch.ShortestPathList[OtherWalker->nr] < (Order-1)) {
3490 *out << Verbose(3) << "Putting on atom stack for further exploration." << endl;
3491 PredecessorList[OtherWalker->nr] = Walker; // note down the one further up the exploration tree
3492 AtomStack.push_back(OtherWalker->nr);
3493 NumberOfAtomsSPLevel[FragmentSearch.ShortestPathList[OtherWalker->nr]]++;
3494 } else {
3495 *out << Verbose(3) << "Not putting on atom stack, is at end of reach." << endl;
3496 }
3497 // add the bond in between to the SP list
3498 Binder = new bond(Walker, OtherWalker); // create a new bond in such a manner, that bond::rightatom is always the one more distant
3499 add(Binder, FragmentSearch.BondsPerSPList[2*SP+1]);
3500 FragmentSearch.BondsPerSPCount[SP]++;
3501 *out << Verbose(3) << "Added its bond to SP list, having now " << FragmentSearch.BondsPerSPCount[SP] << " item(s)." << endl;
3502 } else *out << Verbose(3) << "Not putting on atom stack, nor adding bond, as " << *OtherWalker << "'s SP is shorter than Walker's." << endl;
3503 } else *out << Verbose(3) << "Not continuing, as " << *OtherWalker << " is out of reach of order " << Order << "." << endl;
3504 } else *out << Verbose(3) << "Not passing on, as index of " << *OtherWalker << " " << OtherWalker->nr << " is smaller than that of Root " << RootKeyNr << " or this is my predecessor." << endl;
3505 } else *out << Verbose(2) << "Is not in the restricted keyset or skipping hydrogen " << *OtherWalker << "." << endl;
3506 }
3507 }
3508 // reset predecessor list
3509 for(int i=0;i<Order;i++) {
3510 Binder = FragmentSearch.BondsPerSPList[2*i];
3511 *out << Verbose(1) << "Current SP level is " << i << "." << endl;
3512 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
3513 Binder = Binder->next;
3514 PredecessorList[Binder->rightatom->nr] = NULL; // By construction "OtherAtom" is always Bond::rightatom!
3515 }
3516 }
3517 *out << endl;
3518
3519 // outputting all list for debugging
3520 *out << Verbose(0) << "Printing all found lists." << endl;
3521 for(int i=0;i<Order;i++) {
3522 Binder = FragmentSearch.BondsPerSPList[2*i];
3523 *out << Verbose(1) << "Current SP level is " << i << "." << endl;
3524 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
3525 Binder = Binder->next;
3526 *out << Verbose(2) << *Binder << endl;
3527 }
3528 }
3529
3530 // creating fragments with the found edge sets
3531 SP = 0;
3532 for(int i=0;i<Order;i++) { // sum up all found edges
3533 Binder = FragmentSearch.BondsPerSPList[2*i];
3534 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
3535 Binder = Binder->next;
3536 SP ++;
3537 }
3538 }
3539 *out << Verbose(0) << "Total number of edges is " << SP << "." << endl;
3540 if (SP >= (Order-1)) {
3541 // start with root (push on fragment stack)
3542 *out << Verbose(0) << "Starting fragment generation with " << *FragmentSearch.Root << ", local nr is " << FragmentSearch.Root->nr << "." << endl;
3543 FragmentSearch.FragmentSet->clear();
3544 FragmentSearch.FragmentSet->insert(FragmentSearch.Root->nr);
3545
3546 if (FragmentSearch.FragmentSet->size() == (unsigned int) Order) {
3547 *out << Verbose(0) << "Enough items on stack already for a fragment!" << endl;
3548 // store fragment as a KeySet
3549 *out << Verbose(2) << "Found a new fragment[" << FragmentSearch.FragmentCounter << "], local nr.s are: ";
3550 for(KeySet::iterator runner = FragmentSearch.FragmentSet->begin(); runner != FragmentSearch.FragmentSet->end(); runner++) {
3551 *out << (*runner) << " ";
3552 }
3553 *out << endl;
3554 InsertFragmentIntoGraph(out, &FragmentSearch);
3555 } else {
3556 *out << Verbose(0) << "Preparing subset for this root and calling generator." << endl;
3557 // prepare the subset and call the generator
3558 BondsList = (bond **) Malloc(sizeof(bond *)*FragmentSearch.BondsPerSPCount[0], "molecule::PowerSetGenerator: **BondsList");
3559 Binder = FragmentSearch.BondsPerSPList[0];
3560 for(int i=0;i<FragmentSearch.BondsPerSPCount[0];i++) {
3561 Binder = Binder->next;
3562 BondsList[i] = Binder;
3563 }
3564 SPFragmentGenerator(out, &FragmentSearch, 0, BondsList, FragmentSearch.BondsPerSPCount[0], Order-1);
3565 Free((void **)&BondsList, "molecule::PowerSetGenerator: **BondsList");
3566 }
3567 } else {
3568 *out << Verbose(0) << "Not enough total number of edges to build " << Order << "-body fragments." << endl;
3569 }
3570
3571 // as FragmentSearch structure is used only once, we don't have to clean it anymore
3572 // remove root from stack
3573 *out << Verbose(0) << "Removing root again from stack." << endl;
3574 FragmentSearch.FragmentSet->erase(FragmentSearch.Root->nr);
3575
3576 // free'ing the bonds lists
3577 *out << Verbose(0) << "Free'ing all found lists. and resetting index lists" << endl;
3578 for(int i=0;i<Order;i++) {
3579 *out << Verbose(1) << "Current SP level is " << i << ": ";
3580 Binder = FragmentSearch.BondsPerSPList[2*i];
3581 while (Binder->next != FragmentSearch.BondsPerSPList[2*i+1]) {
3582 Binder = Binder->next;
3583 // *out << "Removing atom " << Binder->leftatom->nr << " and " << Binder->rightatom->nr << "." << endl; // make sure numbers are local
3584 FragmentSearch.ShortestPathList[Binder->leftatom->nr] = -1;
3585 FragmentSearch.ShortestPathList[Binder->rightatom->nr] = -1;
3586 }
3587 // delete added bonds
3588 cleanup(FragmentSearch.BondsPerSPList[2*i], FragmentSearch.BondsPerSPList[2*i+1]);
3589 // also start and end node
3590 *out << "cleaned." << endl;
3591 }
3592
3593 // free allocated memory
3594 Free((void **)&NumberOfAtomsSPLevel, "molecule::PowerSetGenerator: *SPLevelCount");
3595 Free((void **)&PredecessorList, "molecule::PowerSetGenerator: **PredecessorList");
3596
3597 // return list
3598 *out << Verbose(0) << "End of PowerSetGenerator." << endl;
3599 return (FragmentSearch.FragmentCounter - Counter);
3600};
3601
3602/** Corrects the nuclei position if the fragment was created over the cell borders.
3603 * Scans all bonds, checks the distance, if greater than typical, we have a candidate for the correction.
3604 * We remove the bond whereafter the graph probably separates. Then, we translate the one component periodically
3605 * and re-add the bond. Looping on the distance check.
3606 * \param *out ofstream for debugging messages
3607 */
3608void molecule::ScanForPeriodicCorrection(ofstream *out)
3609{
3610 bond *Binder = NULL;
3611 bond *OtherBinder = NULL;
3612 atom *Walker = NULL;
3613 atom *OtherWalker = NULL;
3614 double *matrix = ReturnFullMatrixforSymmetric(cell_size);
3615 enum Shading *ColorList = NULL;
3616 double tmp;
3617 vector TranslationVector;
3618 //class StackClass<atom *> *CompStack = NULL;
3619 class StackClass<atom *> *AtomStack = new StackClass<atom *>(AtomCount);
3620 bool flag = true;
3621
3622// *out << Verbose(1) << "Begin of ScanForPeriodicCorrection." << endl;
3623
3624 ColorList = (enum Shading *) Malloc(sizeof(enum Shading)*AtomCount, "molecule::ScanForPeriodicCorrection: *ColorList");
3625 while (flag) {
3626 // remove bonds that are beyond bonddistance
3627 for(int i=0;i<NDIM;i++)
3628 TranslationVector.x[i] = 0.;
3629 // scan all bonds
3630 Binder = first;
3631 flag = false;
3632 while ((!flag) && (Binder->next != last)) {
3633 Binder = Binder->next;
3634 for (int i=0;i<NDIM;i++) {
3635 tmp = fabs(Binder->leftatom->x.x[i] - Binder->rightatom->x.x[i]);
3636 //*out << Verbose(3) << "Checking " << i << "th distance of " << *Binder->leftatom << " to " << *Binder->rightatom << ": " << tmp << "." << endl;
3637 if (tmp > BondDistance) {
3638 OtherBinder = Binder->next; // note down binding partner for later re-insertion
3639 unlink(Binder); // unlink bond
3640// *out << Verbose(2) << "Correcting at bond " << *Binder << "." << endl;
3641 flag = true;
3642 break;
3643 }
3644 }
3645 }
3646 if (flag) {
3647 // create translation vector from their periodically modified distance
3648 for (int i=0;i<NDIM;i++) {
3649 tmp = Binder->leftatom->x.x[i] - Binder->rightatom->x.x[i];
3650 if (fabs(tmp) > BondDistance)
3651 TranslationVector.x[i] = (tmp < 0) ? +1. : -1.;
3652 }
3653 TranslationVector.MatrixMultiplication(matrix);
3654 //*out << "Translation vector is ";
3655 //TranslationVector.Output(out);
3656 //*out << endl;
3657 // apply to all atoms of first component via BFS
3658 for (int i=0;i<AtomCount;i++)
3659 ColorList[i] = white;
3660 AtomStack->Push(Binder->leftatom);
3661 while (!AtomStack->IsEmpty()) {
3662 Walker = AtomStack->PopFirst();
3663 //*out << Verbose (3) << "Current Walker is: " << *Walker << "." << endl;
3664 ColorList[Walker->nr] = black; // mark as explored
3665 Walker->x.AddVector(&TranslationVector); // translate
3666 for (int i=0;i<NumberOfBondsPerAtom[Walker->nr];i++) { // go through all binding partners
3667 if (ListOfBondsPerAtom[Walker->nr][i] != Binder) {
3668 OtherWalker = ListOfBondsPerAtom[Walker->nr][i]->GetOtherAtom(Walker);
3669 if (ColorList[OtherWalker->nr] == white) {
3670 AtomStack->Push(OtherWalker); // push if yet unexplored
3671 }
3672 }
3673 }
3674 }
3675 // re-add bond
3676 link(Binder, OtherBinder);
3677 } else {
3678// *out << Verbose(2) << "No corrections for this fragment." << endl;
3679 }
3680 //delete(CompStack);
3681 }
3682
3683 // free allocated space from ReturnFullMatrixforSymmetric()
3684 delete(AtomStack);
3685 Free((void **)&ColorList, "molecule::ScanForPeriodicCorrection: *ColorList");
3686 Free((void **)&matrix, "molecule::ScanForPeriodicCorrection: *matrix");
3687// *out << Verbose(1) << "End of ScanForPeriodicCorrection." << endl;
3688};
3689
3690/** Blows the 6-dimensional \a cell_size array up to a full NDIM by NDIM matrix.
3691 * \param *symm 6-dim array of unique symmetric matrix components
3692 * \return allocated NDIM*NDIM array with the symmetric matrix
3693 */
3694double * molecule::ReturnFullMatrixforSymmetric(double *symm)
3695{
3696 double *matrix = (double *) Malloc(sizeof(double)*NDIM*NDIM, "molecule::ReturnFullMatrixforSymmetric: *matrix");
3697 matrix[0] = symm[0];
3698 matrix[1] = symm[1];
3699 matrix[2] = symm[3];
3700 matrix[3] = symm[1];
3701 matrix[4] = symm[2];
3702 matrix[5] = symm[4];
3703 matrix[6] = symm[3];
3704 matrix[7] = symm[4];
3705 matrix[8] = symm[5];
3706 return matrix;
3707};
3708
3709bool KeyCompare::operator() (const KeySet SubgraphA, const KeySet SubgraphB) const
3710{
3711 //cout << "my check is used." << endl;
3712 if (SubgraphA.size() < SubgraphB.size()) {
3713 return true;
3714 } else {
3715 if (SubgraphA.size() > SubgraphB.size()) {
3716 return false;
3717 } else {
3718 KeySet::iterator IteratorA = SubgraphA.begin();
3719 KeySet::iterator IteratorB = SubgraphB.begin();
3720 while ((IteratorA != SubgraphA.end()) && (IteratorB != SubgraphB.end())) {
3721 if ((*IteratorA) < (*IteratorB))
3722 return true;
3723 else if ((*IteratorA) > (*IteratorB)) {
3724 return false;
3725 } // else, go on to next index
3726 IteratorA++;
3727 IteratorB++;
3728 } // end of while loop
3729 }// end of check in case of equal sizes
3730 }
3731 return false; // if we reach this point, they are equal
3732};
3733
3734//bool operator < (KeySet SubgraphA, KeySet SubgraphB)
3735//{
3736// return KeyCompare(SubgraphA, SubgraphB);
3737//};
3738
3739/** Checking whether KeySet is not already present in Graph, if so just adds factor.
3740 * \param *out output stream for debugging
3741 * \param &set KeySet to insert
3742 * \param &graph Graph to insert into
3743 * \param *counter pointer to unique fragment count
3744 * \param factor energy factor for the fragment
3745 */
3746inline void InsertFragmentIntoGraph(ofstream *out, struct UniqueFragments *Fragment)
3747{
3748 GraphTestPair testGraphInsert;
3749
3750 testGraphInsert = Fragment->Leaflet->insert(GraphPair (*Fragment->FragmentSet,pair<int,double>(Fragment->FragmentCounter,Fragment->TEFactor))); // store fragment number and current factor
3751 if (testGraphInsert.second) {
3752 *out << Verbose(2) << "KeySet " << Fragment->FragmentCounter << " successfully inserted." << endl;
3753 Fragment->FragmentCounter++;
3754 } else {
3755 *out << Verbose(2) << "KeySet " << Fragment->FragmentCounter << " failed to insert, present fragment is " << ((*(testGraphInsert.first)).second).first << endl;
3756 ((*(testGraphInsert.first)).second).second += Fragment->TEFactor; // increase the "created" counter
3757 *out << Verbose(2) << "New factor is " << ((*(testGraphInsert.first)).second).second << "." << endl;
3758 }
3759};
3760//void inline InsertIntoGraph(ofstream *out, KeyStack &stack, Graph &graph, int *counter, double factor)
3761//{
3762// // copy stack contents to set and call overloaded function again
3763// KeySet set;
3764// for(KeyStack::iterator runner = stack.begin(); runner != stack.begin(); runner++)
3765// set.insert((*runner));
3766// InsertIntoGraph(out, set, graph, counter, factor);
3767//};
3768
3769/** Inserts each KeySet in \a graph2 into \a graph1.
3770 * \param *out output stream for debugging
3771 * \param graph1 first (dest) graph
3772 * \param graph2 second (source) graph
3773 * \param *counter keyset counter that gets increased
3774 */
3775inline void InsertGraphIntoGraph(ofstream *out, Graph &graph1, Graph &graph2, int *counter)
3776{
3777 GraphTestPair testGraphInsert;
3778
3779 for(Graph::iterator runner = graph2.begin(); runner != graph2.end(); runner++) {
3780 testGraphInsert = graph1.insert(GraphPair ((*runner).first,pair<int,double>((*counter)++,((*runner).second).second))); // store fragment number and current factor
3781 if (testGraphInsert.second) {
3782 *out << Verbose(2) << "KeySet " << (*counter)-1 << " successfully inserted." << endl;
3783 } else {
3784 *out << Verbose(2) << "KeySet " << (*counter)-1 << " failed to insert, present fragment is " << ((*(testGraphInsert.first)).second).first << endl;
3785 ((*(testGraphInsert.first)).second).second += (*runner).second.second;
3786 *out << Verbose(2) << "New factor is " << (*(testGraphInsert.first)).second.second << "." << endl;
3787 }
3788 }
3789};
3790
3791
3792/** Performs BOSSANOVA decomposition at selected sites, increasing the cutoff by one at these sites.
3793 * -# constructs a complete keyset of the molecule
3794 * -# In a loop over all possible roots from the given rootstack
3795 * -# increases order of root site
3796 * -# calls PowerSetGenerator with this order, the complete keyset and the rootkeynr
3797 * -# for all consecutive lower levels PowerSetGenerator is called with the suborder, the higher order keyset
3798as the restricted one and each site in the set as the root)
3799 * -# these are merged into a fragment list of keysets
3800 * -# All fragment lists (for all orders, i.e. from all destination fields) are merged into one list for return
3801 * Important only is that we create all fragments, it is not important if we create them more than once
3802 * as these copies are filtered out via use of the hash table (KeySet).
3803 * \param *out output stream for debugging
3804 * \param Fragment&*List list of already present keystacks (adaptive scheme) or empty list
3805 * \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
3806 * \param *MinimumRingSize minimum ring size for each atom (molecule::Atomcount)
3807 * \return pointer to Graph list
3808 */
3809void molecule::FragmentBOSSANOVA(ofstream *out, Graph *&FragmentList, KeyStack &RootStack, int *MinimumRingSize)
3810{
3811 Graph ***FragmentLowerOrdersList = NULL;
3812 int Order, NumLevels, NumMolecules, TotalNumMolecules = 0, *NumMoleculesOfOrder = NULL;
3813 int counter = 0;
3814 int UpgradeCount = RootStack.size();
3815 KeyStack FragmentRootStack;
3816 int RootKeyNr, RootNr;
3817 struct UniqueFragments FragmentSearch;
3818
3819 *out << Verbose(0) << "Begin of FragmentBOSSANOVA." << endl;
3820
3821 // FragmentLowerOrdersList is a 2D-array of pointer to MoleculeListClass objects, one dimension represents the ANOVA expansion of a single order (i.e. 5)
3822 // with all needed lower orders that are subtracted, the other dimension is the BondOrder (i.e. from 1 to 5)
3823 NumMoleculesOfOrder = (int *) Malloc(sizeof(int)*UpgradeCount, "molecule::FragmentBOSSANOVA: *NumMoleculesOfOrder");
3824 FragmentLowerOrdersList = (Graph ***) Malloc(sizeof(Graph **)*UpgradeCount, "molecule::FragmentBOSSANOVA: ***FragmentLowerOrdersList");
3825
3826 // initialise the fragments structure
3827 FragmentSearch.BondsPerSPList = (bond **) Malloc(sizeof(bond *)*Order*2, "molecule::PowerSetGenerator: ***BondsPerSPList");
3828 FragmentSearch.BondsPerSPCount = (int *) Malloc(sizeof(int)*Order, "molecule::PowerSetGenerator: *BondsPerSPCount");
3829 FragmentSearch.ShortestPathList = (int *) Malloc(sizeof(int)*AtomCount, "molecule::PowerSetGenerator: *ShortestPathList");
3830 FragmentSearch.Labels = (int *) Malloc(sizeof(int)*AtomCount, "molecule::PowerSetGenerator: *Labels");
3831 FragmentSearch.FragmentCounter = 0;
3832 FragmentSearch.FragmentSet = new KeySet;
3833 FragmentSearch.Root = FindAtom(RootKeyNr);
3834 for (int i=0;i<AtomCount;i++) {
3835 FragmentSearch.Labels[i] = -1;
3836 FragmentSearch.ShortestPathList[i] = -1;
3837 }
3838
3839 // Construct the complete KeySet which we need for topmost level only (but for all Roots)
3840 atom *Walker = start;
3841 KeySet CompleteMolecule;
3842 while (Walker->next != end) {
3843 Walker = Walker->next;
3844 CompleteMolecule.insert(Walker->GetTrueFather()->nr);
3845 }
3846
3847 // this can easily be seen: if Order is 5, then the number of levels for each lower order is the total sum of the number of levels above, as
3848 // each has to be split up. E.g. for the second level we have one from 5th, one from 4th, two from 3th (which in turn is one from 5th, one from 4th),
3849 // hence we have overall four 2th order levels for splitting. This also allows for putting all into a single array (FragmentLowerOrdersList[])
3850 // with the order along the cells as this: 5433222211111111 for BondOrder 5 needing 16=pow(2,5-1) cells (only we use bit-shifting which is faster)
3851 RootNr = 0; // counts through the roots in RootStack
3852 while (RootNr < UpgradeCount) {
3853 RootKeyNr = RootStack.front();
3854 RootStack.pop_front();
3855 Walker = FindAtom(RootKeyNr);
3856 // check cyclic lengths
3857 if ((MinimumRingSize[Walker->GetTrueFather()->nr] != -1) && (Walker->GetTrueFather()->AdaptiveOrder+1 >= MinimumRingSize[Walker->GetTrueFather()->nr])) {
3858 *out << Verbose(0) << "Bond order " << Walker->GetTrueFather()->AdaptiveOrder << " of Root " << *Walker << " greater than or equal to Minimum Ring size of " << MinimumRingSize << " found is not allowed." << endl;
3859 } else {
3860 // increase adaptive order by one
3861 Walker->GetTrueFather()->AdaptiveOrder++;
3862 Order = Walker->AdaptiveOrder = Walker->GetTrueFather()->AdaptiveOrder;
3863
3864 // initialise Order-dependent entries of UniqueFragments structure
3865 FragmentSearch.BondsPerSPList = (bond **) Malloc(sizeof(bond *)*Order*2, "molecule::PowerSetGenerator: ***BondsPerSPList");
3866 FragmentSearch.BondsPerSPCount = (int *) Malloc(sizeof(int)*Order, "molecule::PowerSetGenerator: *BondsPerSPCount");
3867 for (int i=0;i<Order;i++) {
3868 FragmentSearch.BondsPerSPList[2*i] = new bond(); // start node
3869 FragmentSearch.BondsPerSPList[2*i+1] = new bond(); // end node
3870 FragmentSearch.BondsPerSPList[2*i]->next = FragmentSearch.BondsPerSPList[2*i+1]; // intertwine these two
3871 FragmentSearch.BondsPerSPList[2*i+1]->previous = FragmentSearch.BondsPerSPList[2*i];
3872 FragmentSearch.BondsPerSPCount[i] = 0;
3873 }
3874
3875 // allocate memory for all lower level orders in this 1D-array of ptrs
3876 NumLevels = 1 << (Order-1); // (int)pow(2,Order);
3877 FragmentLowerOrdersList[RootNr] = (Graph **) Malloc(sizeof(Graph *)*NumLevels, "molecule::FragmentBOSSANOVA: **FragmentLowerOrdersList[]");
3878
3879 // create top order where nothing is reduced
3880 *out << Verbose(0) << "==============================================================================================================" << endl;
3881 *out << Verbose(0) << "Creating KeySets of Bond Order " << Order << " for " << *Walker << ", NumLevels is " << NumLevels << ", " << (RootStack.size()-RootNr-1) << " Roots remaining." << endl;
3882
3883 // Create list of Graphs of current Bond Order (i.e. F_{ij})
3884 FragmentLowerOrdersList[RootNr][0] = new Graph;
3885 FragmentSearch.TEFactor = 1.;
3886 FragmentSearch.Leaflet = FragmentLowerOrdersList[RootNr][0]; // set to insertion graph
3887 FragmentSearch.Root = Walker;
3888 NumMoleculesOfOrder[RootNr] = PowerSetGenerator(out, Walker->AdaptiveOrder, FragmentSearch, CompleteMolecule);
3889 *out << Verbose(1) << "Number of resulting KeySets is: " << NumMoleculesOfOrder[RootNr] << "." << endl;
3890 NumMolecules = 0;
3891
3892 if ((NumLevels >> 1) > 0) {
3893 // create lower order fragments
3894 *out << Verbose(0) << "Creating list of unique fragments of lower Bond Order terms to be subtracted." << endl;
3895 Order = Walker->AdaptiveOrder;
3896 for (int source=0;source<(NumLevels >> 1);source++) { // 1-terms don't need any more splitting, that's why only half is gone through (shift again)
3897 // step down to next order at (virtual) boundary of powers of 2 in array
3898 while (source >= (1 << (Walker->AdaptiveOrder-Order))) // (int)pow(2,Walker->AdaptiveOrder-Order))
3899 Order--;
3900 *out << Verbose(0) << "Current Order is: " << Order << "." << endl;
3901 for (int SubOrder=Order-1;SubOrder>0;SubOrder--) {
3902 int dest = source + (1 << (Walker->AdaptiveOrder-(SubOrder+1)));
3903 *out << Verbose(0) << "--------------------------------------------------------------------------------------------------------------" << endl;
3904 *out << Verbose(0) << "Current SubOrder is: " << SubOrder << " with source " << source << " to destination " << dest << "." << endl;
3905
3906 // every molecule is split into a list of again (Order - 1) molecules, while counting all molecules
3907 //*out << Verbose(1) << "Splitting the " << (*FragmentLowerOrdersList[RootNr][source]).size() << " molecules of the " << source << "th cell in the array." << endl;
3908 //NumMolecules = 0;
3909 FragmentLowerOrdersList[RootNr][dest] = new Graph;
3910 for(Graph::iterator runner = (*FragmentLowerOrdersList[RootNr][source]).begin();runner != (*FragmentLowerOrdersList[RootNr][source]).end(); runner++) {
3911 for (KeySet::iterator sprinter = (*runner).first.begin();sprinter != (*runner).first.end(); sprinter++) {
3912 Graph TempFragmentList;
3913 FragmentSearch.TEFactor = -(*runner).second.second;
3914 FragmentSearch.Leaflet = &TempFragmentList; // set to insertion graph
3915 FragmentSearch.Root = FindAtom(*sprinter);
3916 NumMoleculesOfOrder[RootNr] += PowerSetGenerator(out, SubOrder, FragmentSearch, (*runner).first);
3917 // insert new keysets FragmentList into FragmentLowerOrdersList[Walker->AdaptiveOrder-1][dest]
3918 *out << Verbose(1) << "Merging resulting key sets with those present in destination " << dest << "." << endl;
3919 InsertGraphIntoGraph(out, *FragmentLowerOrdersList[RootNr][dest], TempFragmentList, &NumMolecules);
3920 }
3921 }
3922 *out << Verbose(1) << "Number of resulting molecules for SubOrder " << SubOrder << " is: " << NumMolecules << "." << endl;
3923 }
3924 }
3925 }
3926 // now, we have completely filled each cell of FragmentLowerOrdersList[] for the current Walker->AdaptiveOrder
3927 //NumMoleculesOfOrder[Walker->AdaptiveOrder-1] = NumMolecules;
3928 TotalNumMolecules += NumMoleculesOfOrder[RootNr];
3929 *out << Verbose(1) << "Number of resulting molecules for Order " << (int)Walker->GetTrueFather()->AdaptiveOrder << " is: " << NumMoleculesOfOrder[RootNr] << "." << endl;
3930 RootStack.push_back(RootKeyNr); // put back on stack
3931 RootNr++;
3932
3933 // free Order-dependent entries of UniqueFragments structure for next loop cycle
3934 Free((void **)&FragmentSearch.BondsPerSPCount, "molecule::PowerSetGenerator: *BondsPerSPCount");
3935 for (int i=0;i<Order;i++) {
3936 delete(FragmentSearch.BondsPerSPList[2*i]);
3937 delete(FragmentSearch.BondsPerSPList[2*i+1]);
3938 }
3939 Free((void **)&FragmentSearch.BondsPerSPList, "molecule::PowerSetGenerator: ***BondsPerSPList");
3940 }
3941 }
3942 *out << Verbose(0) << "==============================================================================================================" << endl;
3943 *out << Verbose(1) << "Total number of resulting molecules is: " << TotalNumMolecules << "." << endl;
3944 *out << Verbose(0) << "==============================================================================================================" << endl;
3945
3946 // cleanup FragmentSearch structure
3947 Free((void **)&FragmentSearch.ShortestPathList, "molecule::PowerSetGenerator: *ShortestPathList");
3948 Free((void **)&FragmentSearch.Labels, "molecule::PowerSetGenerator: *Labels");
3949 delete(FragmentSearch.FragmentSet);
3950
3951 // now, FragmentLowerOrdersList is complete, it looks - for BondOrder 5 - as this (number is the ANOVA Order of the terms therein)
3952 // 5433222211111111
3953 // 43221111
3954 // 3211
3955 // 21
3956 // 1
3957
3958 // Subsequently, we combine all into a single list (FragmentList)
3959
3960 *out << Verbose(0) << "Combining the lists of all orders per order and finally into a single one." << endl;
3961 if (FragmentList == NULL) {
3962 FragmentList = new Graph;
3963 counter = 0;
3964 } else {
3965 counter = FragmentList->size();
3966 }
3967 RootNr = 0;
3968 while (!RootStack.empty()) {
3969 RootKeyNr = RootStack.front();
3970 RootStack.pop_front();
3971 Walker = FindAtom(RootKeyNr);
3972 NumLevels = 1 << (Walker->AdaptiveOrder - 1);
3973 for(int i=0;i<NumLevels;i++) {
3974 InsertGraphIntoGraph(out, *FragmentList, (*FragmentLowerOrdersList[RootNr][i]), &counter);
3975 delete(FragmentLowerOrdersList[RootNr][i]);
3976 }
3977 Free((void **)&FragmentLowerOrdersList[RootNr], "molecule::FragmentBOSSANOVA: **FragmentLowerOrdersList[]");
3978 RootNr++;
3979 }
3980 Free((void **)&FragmentLowerOrdersList, "molecule::FragmentBOSSANOVA: ***FragmentLowerOrdersList");
3981 Free((void **)&NumMoleculesOfOrder, "molecule::FragmentBOSSANOVA: *NumMoleculesOfOrder");
3982
3983 *out << Verbose(0) << "End of FragmentBOSSANOVA." << endl;
3984};
3985
3986/** Comparison function for GSL heapsort on distances in two molecules.
3987 * \param *a
3988 * \param *b
3989 * \return <0, \a *a less than \a *b, ==0 if equal, >0 \a *a greater than \a *b
3990 */
3991int CompareDoubles (const void * a, const void * b)
3992{
3993 if (*(double *)a > *(double *)b)
3994 return -1;
3995 else if (*(double *)a < *(double *)b)
3996 return 1;
3997 else
3998 return 0;
3999};
4000
4001/** Determines whether two molecules actually contain the same atoms and coordination.
4002 * \param *out output stream for debugging
4003 * \param *OtherMolecule the molecule to compare this one to
4004 * \param threshold upper limit of difference when comparing the coordination.
4005 * \return NULL - not equal, otherwise an allocated (molecule::AtomCount) permutation map of the atom numbers (which corresponds to which)
4006 */
4007int * molecule::IsEqualToWithinThreshold(ofstream *out, molecule *OtherMolecule, double threshold)
4008{
4009 int flag;
4010 double *Distances = NULL, *OtherDistances = NULL;
4011 vector CenterOfGravity, OtherCenterOfGravity;
4012 size_t *PermMap = NULL, *OtherPermMap = NULL;
4013 int *PermutationMap = NULL;
4014 atom *Walker = NULL;
4015 bool result = true; // status of comparison
4016
4017 *out << Verbose(3) << "Begin of IsEqualToWithinThreshold." << endl;
4018 /// first count both their atoms and elements and update lists thereby ...
4019 //*out << Verbose(0) << "Counting atoms, updating list" << endl;
4020 CountAtoms(out);
4021 OtherMolecule->CountAtoms(out);
4022 CountElements();
4023 OtherMolecule->CountElements();
4024
4025 /// ... and compare:
4026 /// -# AtomCount
4027 if (result) {
4028 if (AtomCount != OtherMolecule->AtomCount) {
4029 *out << Verbose(4) << "AtomCounts don't match: " << AtomCount << " == " << OtherMolecule->AtomCount << endl;
4030 result = false;
4031 } else *out << Verbose(4) << "AtomCounts match: " << AtomCount << " == " << OtherMolecule->AtomCount << endl;
4032 }
4033 /// -# ElementCount
4034 if (result) {
4035 if (ElementCount != OtherMolecule->ElementCount) {
4036 *out << Verbose(4) << "ElementCount don't match: " << ElementCount << " == " << OtherMolecule->ElementCount << endl;
4037 result = false;
4038 } else *out << Verbose(4) << "ElementCount match: " << ElementCount << " == " << OtherMolecule->ElementCount << endl;
4039 }
4040 /// -# ElementsInMolecule
4041 if (result) {
4042 for (flag=0;flag<MAX_ELEMENTS;flag++) {
4043 //*out << Verbose(5) << "Element " << flag << ": " << ElementsInMolecule[flag] << " <-> " << OtherMolecule->ElementsInMolecule[flag] << "." << endl;
4044 if (ElementsInMolecule[flag] != OtherMolecule->ElementsInMolecule[flag])
4045 break;
4046 }
4047 if (flag < MAX_ELEMENTS) {
4048 *out << Verbose(4) << "ElementsInMolecule don't match." << endl;
4049 result = false;
4050 } else *out << Verbose(4) << "ElementsInMolecule match." << endl;
4051 }
4052 /// then determine and compare center of gravity for each molecule ...
4053 if (result) {
4054 *out << Verbose(5) << "Calculating Centers of Gravity" << endl;
4055 DetermineCenterOfGravity(CenterOfGravity);
4056 OtherMolecule->DetermineCenterOfGravity(OtherCenterOfGravity);
4057 *out << Verbose(5) << "Center of Gravity: ";
4058 CenterOfGravity.Output(out);
4059 *out << endl << Verbose(5) << "Other Center of Gravity: ";
4060 OtherCenterOfGravity.Output(out);
4061 *out << endl;
4062 if (CenterOfGravity.Distance(&OtherCenterOfGravity) > threshold) {
4063 *out << Verbose(4) << "Centers of gravity don't match." << endl;
4064 result = false;
4065 }
4066 }
4067
4068 /// ... then make a list with the euclidian distance to this center for each atom of both molecules
4069 if (result) {
4070 *out << Verbose(5) << "Calculating distances" << endl;
4071 Distances = (double *) Malloc(sizeof(double)*AtomCount, "molecule::IsEqualToWithinThreshold: Distances");
4072 OtherDistances = (double *) Malloc(sizeof(double)*AtomCount, "molecule::IsEqualToWithinThreshold: OtherDistances");
4073 Walker = start;
4074 while (Walker->next != end) {
4075 Walker = Walker->next;
4076 //for (i=0;i<AtomCount;i++) {
4077 Distances[Walker->nr] = CenterOfGravity.Distance(&Walker->x);
4078 }
4079 Walker = OtherMolecule->start;
4080 while (Walker->next != OtherMolecule->end) {
4081 Walker = Walker->next;
4082 OtherDistances[Walker->nr] = OtherCenterOfGravity.Distance(&Walker->x);
4083 }
4084
4085 /// ... sort each list (using heapsort (o(N log N)) from GSL)
4086 *out << Verbose(5) << "Sorting distances" << endl;
4087 PermMap = (size_t *) Malloc(sizeof(size_t)*AtomCount, "molecule::IsEqualToWithinThreshold: *PermMap");
4088 OtherPermMap = (size_t *) Malloc(sizeof(size_t)*AtomCount, "molecule::IsEqualToWithinThreshold: *OtherPermMap");
4089 gsl_heapsort_index (PermMap, Distances, AtomCount, sizeof(double), CompareDoubles);
4090 gsl_heapsort_index (OtherPermMap, OtherDistances, AtomCount, sizeof(double), CompareDoubles);
4091 PermutationMap = (int *) Malloc(sizeof(int)*AtomCount, "molecule::IsEqualToWithinThreshold: *PermutationMap");
4092 *out << Verbose(5) << "Combining Permutation Maps" << endl;
4093 for(int i=0;i<AtomCount;i++)
4094 PermutationMap[PermMap[i]] = (int) OtherPermMap[i];
4095
4096 /// ... and compare them step by step, whether the difference is individiually(!) below \a threshold for all
4097 *out << Verbose(4) << "Comparing distances" << endl;
4098 flag = 0;
4099 for (int i=0;i<AtomCount;i++) {
4100 *out << Verbose(5) << "Distances: |" << Distances[PermMap[i]] << " - " << OtherDistances[OtherPermMap[i]] << "| = " << fabs(Distances[PermMap[i]] - OtherDistances[OtherPermMap[i]]) << " ?<? " << threshold << endl;
4101 if (fabs(Distances[PermMap[i]] - OtherDistances[OtherPermMap[i]]) > threshold)
4102 flag = 1;
4103 }
4104 Free((void **)&PermMap, "molecule::IsEqualToWithinThreshold: *PermMap");
4105 Free((void **)&OtherPermMap, "molecule::IsEqualToWithinThreshold: *OtherPermMap");
4106
4107 /// free memory
4108 Free((void **)&Distances, "molecule::IsEqualToWithinThreshold: Distances");
4109 Free((void **)&OtherDistances, "molecule::IsEqualToWithinThreshold: OtherDistances");
4110 if (flag) { // if not equal
4111 Free((void **)&PermutationMap, "molecule::IsEqualToWithinThreshold: *PermutationMap");
4112 result = false;
4113 }
4114 }
4115 /// return pointer to map if all distances were below \a threshold
4116 *out << Verbose(3) << "End of IsEqualToWithinThreshold." << endl;
4117 if (result) {
4118 *out << Verbose(3) << "Result: Equal." << endl;
4119 return PermutationMap;
4120 } else {
4121 *out << Verbose(3) << "Result: Not equal." << endl;
4122 return NULL;
4123 }
4124};
4125
4126/** Returns an index map for two father-son-molecules.
4127 * The map tells which atom in this molecule corresponds to which one in the other molecul with their fathers.
4128 * \param *out output stream for debugging
4129 * \param *OtherMolecule corresponding molecule with fathers
4130 * \return allocated map of size molecule::AtomCount with map
4131 * \todo make this with a good sort O(n), not O(n^2)
4132 */
4133int * molecule::GetFatherSonAtomicMap(ofstream *out, molecule *OtherMolecule)
4134{
4135 atom *Walker = NULL, *OtherWalker = NULL;
4136 *out << Verbose(3) << "Begin of GetFatherAtomicMap." << endl;
4137 int *AtomicMap = (int *) Malloc(sizeof(int)*AtomCount, "molecule::GetAtomicMap: *AtomicMap"); //Calloc
4138 for (int i=0;i<AtomCount;i++)
4139 AtomicMap[i] = -1;
4140 if (OtherMolecule == this) { // same molecule
4141 for (int i=0;i<AtomCount;i++) // no need as -1 means already that there is trivial correspondence
4142 AtomicMap[i] = i;
4143 *out << Verbose(4) << "Map is trivial." << endl;
4144 } else {
4145 *out << Verbose(4) << "Map is ";
4146 Walker = start;
4147 while (Walker->next != end) {
4148 Walker = Walker->next;
4149 if (Walker->father == NULL) {
4150 AtomicMap[Walker->nr] = -2;
4151 } else {
4152 OtherWalker = OtherMolecule->start;
4153 while (OtherWalker->next != OtherMolecule->end) {
4154 OtherWalker = OtherWalker->next;
4155 //for (int i=0;i<AtomCount;i++) { // search atom
4156 //for (int j=0;j<OtherMolecule->AtomCount;j++) {
4157 //*out << Verbose(4) << "Comparing father " << Walker->father << " with the other one " << OtherWalker->father << "." << endl;
4158 if (Walker->father == OtherWalker)
4159 AtomicMap[Walker->nr] = OtherWalker->nr;
4160 }
4161 }
4162 *out << AtomicMap[Walker->nr] << "\t";
4163 }
4164 *out << endl;
4165 }
4166 *out << Verbose(3) << "End of GetFatherAtomicMap." << endl;
4167 return AtomicMap;
4168};
4169
Note: See TracBrowser for help on using the repository browser.