/* * Project: MoleCuilder * Description: creates and alters molecular systems * Copyright (C) 2010 University of Bonn. All rights reserved. * Please see the LICENSE file or "Copyright notice" in builder.cpp for details. */ /* * molecule_graph.cpp * * Created on: Oct 5, 2009 * Author: heber */ // include config.h #ifdef HAVE_CONFIG_H #include #endif #include "Helpers/MemDebug.hpp" #include #include "atom.hpp" #include "bond.hpp" #include "bondgraph.hpp" #include "config.hpp" #include "Helpers/defs.hpp" #include "element.hpp" #include "Helpers/helpers.hpp" #include "Helpers/Info.hpp" #include "linkedcell.hpp" #include "lists.hpp" #include "Helpers/Verbose.hpp" #include "Helpers/Log.hpp" #include "molecule.hpp" #include "World.hpp" #include "Helpers/fast_functions.hpp" #include "Helpers/Assert.hpp" #include "LinearAlgebra/RealSpaceMatrix.hpp" #include "Box.hpp" struct BFSAccounting { atom **PredecessorList; int *ShortestPathList; enum Shading *ColorList; std::deque *BFSStack; std::deque *TouchedStack; int AtomCount; int BondOrder; atom *Root; bool BackStepping; int CurrentGraphNr; int ComponentNr; }; /** Accounting data for Depth First Search. */ struct DFSAccounting { std::deque *AtomStack; std::deque *BackEdgeStack; int CurrentGraphNr; int ComponentNumber; atom *Root; bool BackStepping; }; /************************************* Functions for class molecule *********************************/ /** Creates an adjacency list of the molecule. * We obtain an outside file with the indices of atoms which are bondmembers. */ void molecule::CreateAdjacencyListFromDbondFile(ifstream *input) { Info FunctionInfo(__func__); // 1 We will parse bonds out of the dbond file created by tremolo. int atom1, atom2; atom *Walker, *OtherWalker; char line[MAXSTRINGSIZE]; if (input->fail()) { DoeLog(0) && (eLog() << Verbose(0) << "Opening of bond file failed \n"); performCriticalExit(); }; doCountAtoms(); // skip header input->getline(line,MAXSTRINGSIZE); DoLog(1) && (Log() << Verbose(1) << "Scanning file ... \n"); while (!input->eof()) // Check whether we read everything already { input->getline(line,MAXSTRINGSIZE); stringstream zeile(line); zeile >> atom1; zeile >> atom2; DoLog(2) && (Log() << Verbose(2) << "Looking for atoms " << atom1 << " and " << atom2 << "." << endl); if (atom2 < atom1) //Sort indices of atoms in order flip(atom1, atom2); Walker = FindAtom(atom1); ASSERT(Walker,"Could not find an atom with the ID given in dbond file"); OtherWalker = FindAtom(atom2); ASSERT(OtherWalker,"Could not find an atom with the ID given in dbond file"); AddBond(Walker, OtherWalker); //Add the bond between the two atoms with respective indices. } } ; /** Creates an adjacency list of the molecule. * Generally, we use the CSD approach to bond recognition, that is the the distance * between two atoms A and B must be within [Rcov(A)+Rcov(B)-t,Rcov(A)+Rcov(B)+t] with * a threshold t = 0.4 Angstroem. * To make it O(N log N) the function uses the linked-cell technique as follows: * The procedure is step-wise: * -# Remove every bond in list * -# Count the atoms in the molecule with CountAtoms() * -# partition cell into smaller linked cells of size \a bonddistance * -# put each atom into its corresponding cell * -# go through every cell, check the atoms therein against all possible bond partners in the 27 adjacent cells, add bond if true * -# correct the bond degree iteratively (single->double->triple bond) * -# finally print the bond list to \a *out if desired * \param *out out stream for printing the matrix, NULL if no output * \param bonddistance length of linked cells (i.e. maximum minimal length checked) * \param IsAngstroem whether coordinate system is gauged to Angstroem or Bohr radii * \param *minmaxdistance function to give upper and lower bound on whether particle is bonded to some other * \param *BG BondGraph with the member function above or NULL, if just standard covalent should be used. */ void molecule::CreateAdjacencyList(double bonddistance, bool IsAngstroem, void (BondGraph::*minmaxdistance)(BondedParticle * const , BondedParticle * const , double &, double &, bool), BondGraph *BG) { atom *Walker = NULL; atom *OtherWalker = NULL; int n[NDIM]; double MinDistance, MaxDistance; LinkedCell *LC = NULL; bool free_BG = false; Box &domain = World::getInstance().getDomain(); if (BG == NULL) { BG = new BondGraph(IsAngstroem); free_BG = true; } BondDistance = bonddistance; // * ((IsAngstroem) ? 1. : 1./AtomicLengthToAngstroem); DoLog(0) && (Log() << Verbose(0) << "Begin of CreateAdjacencyList." << endl); // remove every bond from the list for(molecule::iterator AtomRunner = begin(); AtomRunner != end(); ++AtomRunner) for(BondList::iterator BondRunner = (*AtomRunner)->ListOfBonds.begin(); !(*AtomRunner)->ListOfBonds.empty(); BondRunner = (*AtomRunner)->ListOfBonds.begin()) if ((*BondRunner)->leftatom == *AtomRunner) delete((*BondRunner)); BondCount = 0; // count atoms in molecule = dimension of matrix (also give each unique name and continuous numbering) DoLog(1) && (Log() << Verbose(1) << "AtomCount " << getAtomCount() << " and bonddistance is " << bonddistance << "." << endl); if ((getAtomCount() > 1) && (bonddistance > 0.1)) { DoLog(2) && (Log() << Verbose(2) << "Creating Linked Cell structure ... " << endl); LC = new LinkedCell(*this, bonddistance); // create a list to map Tesselpoint::nr to atom * DoLog(2) && (Log() << Verbose(2) << "Creating TesselPoint to atom map ... " << endl); // set numbers for atoms that can later be used int i=0; for(internal_iterator iter = atoms.begin();iter!= atoms.end(); ++iter){ (*iter)->nr = i++; } // 3a. go through every cell DoLog(2) && (Log() << Verbose(2) << "Celling ... " << endl); for (LC->n[0] = 0; LC->n[0] < LC->N[0]; LC->n[0]++) for (LC->n[1] = 0; LC->n[1] < LC->N[1]; LC->n[1]++) for (LC->n[2] = 0; LC->n[2] < LC->N[2]; LC->n[2]++) { const LinkedCell::LinkedNodes *List = LC->GetCurrentCell(); // Log() << Verbose(2) << "Current cell is " << LC->n[0] << ", " << LC->n[1] << ", " << LC->n[2] << " with No. " << LC->index << " containing " << List->size() << " points." << endl; if (List != NULL) { for (LinkedCell::LinkedNodes::const_iterator Runner = List->begin(); Runner != List->end(); Runner++) { Walker = dynamic_cast(*Runner); ASSERT(Walker,"Tesselpoint that was not an atom retrieved from LinkedNode"); //Log() << Verbose(0) << "Current Atom is " << *Walker << "." << endl; // 3c. check for possible bond between each atom in this and every one in the 27 cells for (n[0] = -1; n[0] <= 1; n[0]++) for (n[1] = -1; n[1] <= 1; n[1]++) for (n[2] = -1; n[2] <= 1; n[2]++) { const LinkedCell::LinkedNodes *OtherList = LC->GetRelativeToCurrentCell(n); // Log() << Verbose(2) << "Current relative cell is " << LC->n[0] << ", " << LC->n[1] << ", " << LC->n[2] << " with No. " << LC->index << " containing " << List->size() << " points." << endl; if (OtherList != NULL) { for (LinkedCell::LinkedNodes::const_iterator OtherRunner = OtherList->begin(); OtherRunner != OtherList->end(); OtherRunner++) { if ((*OtherRunner)->nr > Walker->nr) { OtherWalker = dynamic_cast(*OtherRunner); ASSERT(OtherWalker,"TesselPoint that was not an atom retrieved from LinkedNode"); //Log() << Verbose(1) << "Checking distance " << OtherWalker->x.PeriodicDistanceSquared(&(Walker->x), cell_size) << " against typical bond length of " << bonddistance*bonddistance << "." << endl; (BG->*minmaxdistance)(Walker, OtherWalker, MinDistance, MaxDistance, IsAngstroem); const double distance = domain.periodicDistanceSquared(OtherWalker->getPosition(),Walker->getPosition()); const bool status = (distance <= MaxDistance * MaxDistance) && (distance >= MinDistance * MinDistance); // Log() << Verbose(1) << "MinDistance is " << MinDistance << " and MaxDistance is " << MaxDistance << "." << endl; if (OtherWalker->father->nr > Walker->father->nr) { if (status) { // create bond if distance is smaller // Log() << Verbose(1) << "Adding Bond between " << *Walker << " and " << *OtherWalker << " in distance " << sqrt(distance) << "." << endl; AddBond(Walker->father, OtherWalker->father, 1); // also increases molecule::BondCount } else { // Log() << Verbose(1) << "Not Adding: distance too great." << endl; } } else { // Log() << Verbose(1) << "Not Adding: Wrong order of labels." << endl; } } } } } } } } delete (LC); DoLog(1) && (Log() << Verbose(1) << "I detected " << BondCount << " bonds in the molecule with distance " << BondDistance << "." << endl); // correct bond degree by comparing valence and bond degree DoLog(2) && (Log() << Verbose(2) << "Correcting bond degree ... " << endl); CorrectBondDegree(); // output bonds for debugging (if bond chain list was correctly installed) for_each(atoms.begin(),atoms.end(),mem_fun(&atom::OutputBondOfAtom)); } else DoLog(1) && (Log() << Verbose(1) << "AtomCount is " << getAtomCount() << ", thus no bonds, no connections!." << endl); DoLog(0) && (Log() << Verbose(0) << "End of CreateAdjacencyList." << endl); if (free_BG) delete(BG); } ; /** Checks for presence of bonds within atom list. * TODO: more sophisticated check for bond structure (e.g. connected subgraph, ...) * \return true - bonds present, false - no bonds */ bool molecule::hasBondStructure() const { for(molecule::const_iterator AtomRunner = begin(); AtomRunner != end(); ++AtomRunner) if (!(*AtomRunner)->ListOfBonds.empty()) return true; return false; } /** Counts the number of present bonds. * \return number of bonds */ unsigned int molecule::CountBonds() const { unsigned int counter = 0; for(molecule::const_iterator AtomRunner = begin(); AtomRunner != end(); ++AtomRunner) for(BondList::const_iterator BondRunner = (*AtomRunner)->ListOfBonds.begin(); BondRunner != (*AtomRunner)->ListOfBonds.end(); ++BondRunner) if ((*BondRunner)->leftatom == *AtomRunner) counter++; return counter; } /** Prints a list of all bonds to \a *out. * \param output stream */ void molecule::OutputBondsList() const { DoLog(1) && (Log() << Verbose(1) << endl << "From contents of bond chain list:"); for(molecule::const_iterator AtomRunner = molecule::begin(); AtomRunner != molecule::end(); ++AtomRunner) for(BondList::const_iterator BondRunner = (*AtomRunner)->ListOfBonds.begin(); BondRunner != (*AtomRunner)->ListOfBonds.end(); ++BondRunner) if ((*BondRunner)->leftatom == *AtomRunner) { DoLog(0) && (Log() << Verbose(0) << *(*BondRunner) << "\t" << endl); } DoLog(0) && (Log() << Verbose(0) << endl); } ; /** correct bond degree by comparing valence and bond degree. * correct Bond degree of each bond by checking both bond partners for a mismatch between valence and current sum of bond degrees, * iteratively increase the one first where the other bond partner has the fewest number of bonds (i.e. in general bonds oxygene * preferred over carbon bonds). Beforehand, we had picked the first mismatching partner, which lead to oxygenes with single instead of * double bonds as was expected. * \param *out output stream for debugging * \return number of bonds that could not be corrected */ int molecule::CorrectBondDegree() const { int No = 0, OldNo = -1; if (BondCount != 0) { DoLog(1) && (Log() << Verbose(1) << "Correcting Bond degree of each bond ... " << endl); do { OldNo = No; No=0; BOOST_FOREACH(atom *atom,atoms){ No+=atom->CorrectBondDegree(); } } while (OldNo != No); DoLog(0) && (Log() << Verbose(0) << " done." << endl); } else { DoLog(1) && (Log() << Verbose(1) << "BondCount is " << BondCount << ", no bonds between any of the " << getAtomCount() << " atoms." << endl); } DoLog(0) && (Log() << Verbose(0) << No << " bonds could not be corrected." << endl); return (No); } ; /** Counts all cyclic bonds and returns their number. * \note Hydrogen bonds can never by cyclic, thus no check for that * \param *out output stream for debugging * \return number opf cyclic bonds */ int molecule::CountCyclicBonds() { NoCyclicBonds = 0; int *MinimumRingSize = NULL; MoleculeLeafClass *Subgraphs = NULL; std::deque *BackEdgeStack = NULL; for(molecule::iterator AtomRunner = begin(); AtomRunner != end(); ++AtomRunner) if ((!(*AtomRunner)->ListOfBonds.empty()) && ((*(*AtomRunner)->ListOfBonds.begin())->Type == Undetermined)) { DoLog(0) && (Log() << Verbose(0) << "No Depth-First-Search analysis performed so far, calling ..." << endl); Subgraphs = DepthFirstSearchAnalysis(BackEdgeStack); while (Subgraphs->next != NULL) { Subgraphs = Subgraphs->next; delete (Subgraphs->previous); } delete (Subgraphs); delete[] (MinimumRingSize); break; } for(molecule::iterator AtomRunner = begin(); AtomRunner != end(); ++AtomRunner) for(BondList::iterator BondRunner = (*AtomRunner)->ListOfBonds.begin(); BondRunner != (*AtomRunner)->ListOfBonds.end(); ++BondRunner) if ((*BondRunner)->leftatom == *AtomRunner) if ((*BondRunner)->Cyclic) NoCyclicBonds++; delete (BackEdgeStack); return NoCyclicBonds; } ; /** Returns Shading as a char string. * \param color the Shading * \return string of the flag */ string molecule::GetColor(enum Shading color) const { switch (color) { case white: return "white"; break; case lightgray: return "lightgray"; break; case darkgray: return "darkgray"; break; case black: return "black"; break; default: return "uncolored"; break; }; } ; /** Sets atom::GraphNr and atom::LowpointNr to BFSAccounting::CurrentGraphNr. * \param *out output stream for debugging * \param *Walker current node * \param &BFS structure with accounting data for BFS */ void DepthFirstSearchAnalysis_SetWalkersGraphNr(atom *&Walker, struct DFSAccounting &DFS) { if (!DFS.BackStepping) { // if we don't just return from (8) Walker->GraphNr = DFS.CurrentGraphNr; Walker->LowpointNr = DFS.CurrentGraphNr; DoLog(1) && (Log() << Verbose(1) << "Setting Walker[" << Walker->getName() << "]'s number to " << Walker->GraphNr << " with Lowpoint " << Walker->LowpointNr << "." << endl); DFS.AtomStack->push_front(Walker); DFS.CurrentGraphNr++; } } ; /** During DFS goes along unvisited bond and touches other atom. * Sets bond::type, if * -# BackEdge: set atom::LowpointNr and push on \a BackEdgeStack * -# TreeEgde: set atom::Ancestor and continue with Walker along this edge * Continue until molecule::FindNextUnused() finds no more unused bonds. * \param *out output stream for debugging * \param *mol molecule with atoms and finding unused bonds * \param *&Binder current edge * \param &DFS DFS accounting data */ void DepthFirstSearchAnalysis_ProbeAlongUnusedBond(const molecule * const mol, atom *&Walker, bond *&Binder, struct DFSAccounting &DFS) { atom *OtherAtom = NULL; do { // (3) if Walker has no unused egdes, go to (5) DFS.BackStepping = false; // reset backstepping flag for (8) if (Binder == NULL) // if we don't just return from (11), Binder is already set to next unused Binder = mol->FindNextUnused(Walker); if (Binder == NULL) break; DoLog(2) && (Log() << Verbose(2) << "Current Unused Bond is " << *Binder << "." << endl); // (4) Mark Binder used, ... Binder->MarkUsed(black); OtherAtom = Binder->GetOtherAtom(Walker); DoLog(2) && (Log() << Verbose(2) << "(4) OtherAtom is " << OtherAtom->getName() << "." << endl); if (OtherAtom->GraphNr != -1) { // (4a) ... if "other" atom has been visited (GraphNr != 0), set lowpoint to minimum of both, go to (3) Binder->Type = BackEdge; DFS.BackEdgeStack->push_front(Binder); Walker->LowpointNr = (Walker->LowpointNr < OtherAtom->GraphNr) ? Walker->LowpointNr : OtherAtom->GraphNr; DoLog(3) && (Log() << Verbose(3) << "(4a) Visited: Setting Lowpoint of Walker[" << Walker->getName() << "] to " << Walker->LowpointNr << "." << endl); } else { // (4b) ... otherwise set OtherAtom as Ancestor of Walker and Walker as OtherAtom, go to (2) Binder->Type = TreeEdge; OtherAtom->Ancestor = Walker; Walker = OtherAtom; DoLog(3) && (Log() << Verbose(3) << "(4b) Not Visited: OtherAtom[" << OtherAtom->getName() << "]'s Ancestor is now " << OtherAtom->Ancestor->getName() << ", Walker is OtherAtom " << OtherAtom->getName() << "." << endl); break; } Binder = NULL; } while (1); // (3) } ; /** Checks whether we have a new component. * if atom::LowpointNr of \a *&Walker is greater than atom::GraphNr of its atom::Ancestor, we have a new component. * Meaning that if we touch upon a node who suddenly has a smaller atom::LowpointNr than its ancestor, then we * have a found a new branch in the graph tree. * \param *out output stream for debugging * \param *mol molecule with atoms and finding unused bonds * \param *&Walker current node * \param &DFS DFS accounting data */ void DepthFirstSearchAnalysis_CheckForaNewComponent(const molecule * const mol, atom *&Walker, struct DFSAccounting &DFS, MoleculeLeafClass *&LeafWalker) { atom *OtherAtom = NULL; // (5) if Ancestor of Walker is ... DoLog(1) && (Log() << Verbose(1) << "(5) Number of Walker[" << Walker->getName() << "]'s Ancestor[" << Walker->Ancestor->getName() << "] is " << Walker->Ancestor->GraphNr << "." << endl); if (Walker->Ancestor->GraphNr != DFS.Root->GraphNr) { // (6) (Ancestor of Walker is not Root) if (Walker->LowpointNr < Walker->Ancestor->GraphNr) { // (6a) set Ancestor's Lowpoint number to minimum of of its Ancestor and itself, go to Step(8) Walker->Ancestor->LowpointNr = (Walker->Ancestor->LowpointNr < Walker->LowpointNr) ? Walker->Ancestor->LowpointNr : Walker->LowpointNr; DoLog(2) && (Log() << Verbose(2) << "(6) Setting Walker[" << Walker->getName() << "]'s Ancestor[" << Walker->Ancestor->getName() << "]'s Lowpoint to " << Walker->Ancestor->LowpointNr << "." << endl); } else { // (7) (Ancestor of Walker is a separating vertex, remove all from stack till Walker (including), these and Ancestor form a component Walker->Ancestor->SeparationVertex = true; DoLog(2) && (Log() << Verbose(2) << "(7) Walker[" << Walker->getName() << "]'s Ancestor[" << Walker->Ancestor->getName() << "]'s is a separating vertex, creating component." << endl); mol->SetNextComponentNumber(Walker->Ancestor, DFS.ComponentNumber); DoLog(3) && (Log() << Verbose(3) << "(7) Walker[" << Walker->getName() << "]'s Ancestor's Compont is " << DFS.ComponentNumber << "." << endl); mol->SetNextComponentNumber(Walker, DFS.ComponentNumber); DoLog(3) && (Log() << Verbose(3) << "(7) Walker[" << Walker->getName() << "]'s Compont is " << DFS.ComponentNumber << "." << endl); do { ASSERT(!DFS.AtomStack->empty(), "DepthFirstSearchAnalysis_CheckForaNewComponent() - DFS.AtomStack is empty!"); OtherAtom = DFS.AtomStack->front(); DFS.AtomStack->pop_front(); LeafWalker->Leaf->AddCopyAtom(OtherAtom); mol->SetNextComponentNumber(OtherAtom, DFS.ComponentNumber); DoLog(3) && (Log() << Verbose(3) << "(7) Other[" << OtherAtom->getName() << "]'s Compont is " << DFS.ComponentNumber << "." << endl); } while (OtherAtom != Walker); DFS.ComponentNumber++; } // (8) Walker becomes its Ancestor, go to (3) DoLog(2) && (Log() << Verbose(2) << "(8) Walker[" << Walker->getName() << "] is now its Ancestor " << Walker->Ancestor->getName() << ", backstepping. " << endl); Walker = Walker->Ancestor; DFS.BackStepping = true; } } ; /** Cleans the root stack when we have found a component. * If we are not DFSAccounting::BackStepping, then we clear the root stack by putting everything into a * component down till we meet DFSAccounting::Root. * \param *out output stream for debugging * \param *mol molecule with atoms and finding unused bonds * \param *&Walker current node * \param *&Binder current edge * \param &DFS DFS accounting data */ void DepthFirstSearchAnalysis_CleanRootStackDownTillWalker(const molecule * const mol, atom *&Walker, bond *&Binder, struct DFSAccounting &DFS, MoleculeLeafClass *&LeafWalker) { atom *OtherAtom = NULL; if (!DFS.BackStepping) { // coming from (8) want to go to (3) // (9) remove all from stack till Walker (including), these and Root form a component //DFS.AtomStack->Output(out); mol->SetNextComponentNumber(DFS.Root, DFS.ComponentNumber); DoLog(3) && (Log() << Verbose(3) << "(9) Root[" << DFS.Root->getName() << "]'s Component is " << DFS.ComponentNumber << "." << endl); mol->SetNextComponentNumber(Walker, DFS.ComponentNumber); DoLog(3) && (Log() << Verbose(3) << "(9) Walker[" << Walker->getName() << "]'s Component is " << DFS.ComponentNumber << "." << endl); do { ASSERT(!DFS.AtomStack->empty(), "DepthFirstSearchAnalysis_CleanRootStackDownTillWalker() - DFS.AtomStack is empty!"); OtherAtom = DFS.AtomStack->front(); DFS.AtomStack->pop_front(); LeafWalker->Leaf->AddCopyAtom(OtherAtom); mol->SetNextComponentNumber(OtherAtom, DFS.ComponentNumber); DoLog(3) && (Log() << Verbose(3) << "(7) Other[" << OtherAtom->getName() << "]'s Component is " << DFS.ComponentNumber << "." << endl); } while (OtherAtom != Walker); DFS.ComponentNumber++; // (11) Root is separation vertex, set Walker to Root and go to (4) Walker = DFS.Root; Binder = mol->FindNextUnused(Walker); DoLog(1) && (Log() << Verbose(1) << "(10) Walker is Root[" << DFS.Root->getName() << "], next Unused Bond is " << Binder << "." << endl); if (Binder != NULL) { // Root is separation vertex DoLog(1) && (Log() << Verbose(1) << "(11) Root is a separation vertex." << endl); Walker->SeparationVertex = true; } } } ; /** Initializes DFSAccounting structure. * \param *out output stream for debugging * \param &DFS accounting structure to allocate * \param *mol molecule with AtomCount, BondCount and all atoms */ void DepthFirstSearchAnalysis_Init(struct DFSAccounting &DFS, const molecule * const mol) { DFS.AtomStack = new std::deque (mol->getAtomCount()); DFS.CurrentGraphNr = 0; DFS.ComponentNumber = 0; DFS.BackStepping = false; mol->ResetAllBondsToUnused(); DFS.BackEdgeStack->clear(); } ; /** Free's DFSAccounting structure. * \param *out output stream for debugging * \param &DFS accounting structure to free */ void DepthFirstSearchAnalysis_Finalize(struct DFSAccounting &DFS) { delete (DFS.AtomStack); // delete (DFS.BackEdgeStack); // DON'T free, see DepthFirstSearchAnalysis(), is returned as allocated } ; void molecule::init_DFS(struct DFSAccounting &DFS) const{ DepthFirstSearchAnalysis_Init(DFS, this); for_each(atoms.begin(),atoms.end(),mem_fun(&atom::resetGraphNr)); for_each(atoms.begin(),atoms.end(),mem_fun(&atom::InitComponentNr)); } /** Performs a Depth-First search on this molecule. * Marks bonds in molecule as cyclic, bridge, ... and atoms as * articulations points, ... * We use the algorithm from [Even, Graph Algorithms, p.62]. * \param *out output stream for debugging * \param *&BackEdgeStack NULL pointer to std::deque with all the found back edges, allocated and filled on return * \return list of each disconnected subgraph as an individual molecule class structure */ MoleculeLeafClass * molecule::DepthFirstSearchAnalysis(std::deque *&BackEdgeStack) const { struct DFSAccounting DFS; BackEdgeStack = new std::deque (BondCount); DFS.BackEdgeStack = BackEdgeStack; MoleculeLeafClass *SubGraphs = new MoleculeLeafClass(NULL); MoleculeLeafClass *LeafWalker = SubGraphs; int OldGraphNr = 0; atom *Walker = NULL; bond *Binder = NULL; if (getAtomCount() == 0) return SubGraphs; DoLog(0) && (Log() << Verbose(0) << "Begin of DepthFirstSearchAnalysis" << endl); init_DFS(DFS); for (molecule::const_iterator iter = begin(); iter != end();) { DFS.Root = *iter; // (1) mark all edges unused, empty stack, set atom->GraphNr = -1 for all DFS.AtomStack->clear(); // put into new subgraph molecule and add this to list of subgraphs LeafWalker = new MoleculeLeafClass(LeafWalker); LeafWalker->Leaf = World::getInstance().createMolecule(); LeafWalker->Leaf->AddCopyAtom(DFS.Root); OldGraphNr = DFS.CurrentGraphNr; Walker = DFS.Root; do { // (10) do { // (2) set number and Lowpoint of Atom to i, increase i, push current atom DepthFirstSearchAnalysis_SetWalkersGraphNr(Walker, DFS); DepthFirstSearchAnalysis_ProbeAlongUnusedBond(this, Walker, Binder, DFS); if (Binder == NULL) { DoLog(2) && (Log() << Verbose(2) << "No more Unused Bonds." << endl); break; } else Binder = NULL; } while (1); // (2) // if we came from backstepping, yet there were no more unused bonds, we end up here with no Ancestor, because Walker is Root! Then we are finished! if ((Walker == DFS.Root) && (Binder == NULL)) break; DepthFirstSearchAnalysis_CheckForaNewComponent(this, Walker, DFS, LeafWalker); DepthFirstSearchAnalysis_CleanRootStackDownTillWalker(this, Walker, Binder, DFS, LeafWalker); } while ((DFS.BackStepping) || (Binder != NULL)); // (10) halt only if Root has no unused edges // From OldGraphNr to CurrentGraphNr ranges an disconnected subgraph DoLog(0) && (Log() << Verbose(0) << "Disconnected subgraph ranges from " << OldGraphNr << " to " << DFS.CurrentGraphNr << "." << endl); LeafWalker->Leaf->Output((ofstream *)&(Log() << Verbose(0))); DoLog(0) && (Log() << Verbose(0) << endl); // step on to next root while ((iter != end()) && ((*iter)->GraphNr != -1)) { //Log() << Verbose(1) << "Current next subgraph root candidate is " << (*iter)->Name << "." << endl; if ((*iter)->GraphNr != -1) // if already discovered, step on iter++; } } // set cyclic bond criterium on "same LP" basis CyclicBondAnalysis(); OutputGraphInfoPerAtom(); OutputGraphInfoPerBond(); // free all and exit DepthFirstSearchAnalysis_Finalize(DFS); DoLog(0) && (Log() << Verbose(0) << "End of DepthFirstSearchAnalysis" << endl); return SubGraphs; } ; /** Scans through all bonds and set bond::Cyclic to true where atom::LowpointNr of both ends is equal: LP criterion. */ void molecule::CyclicBondAnalysis() const { NoCyclicBonds = 0; for(molecule::const_iterator AtomRunner = begin(); AtomRunner != end(); ++AtomRunner) for(BondList::const_iterator BondRunner = (*AtomRunner)->ListOfBonds.begin(); BondRunner != (*AtomRunner)->ListOfBonds.end(); ++BondRunner) if ((*BondRunner)->leftatom == *AtomRunner) if ((*BondRunner)->rightatom->LowpointNr == (*BondRunner)->leftatom->LowpointNr) { // cyclic ?? (*BondRunner)->Cyclic = true; NoCyclicBonds++; } } ; /** Output graph information per atom. * \param *out output stream */ void molecule::OutputGraphInfoPerAtom() const { DoLog(1) && (Log() << Verbose(1) << "Final graph info for each atom is:" << endl); for_each(atoms.begin(),atoms.end(),mem_fun(&atom::OutputGraphInfo)); } ; /** Output graph information per bond. * \param *out output stream */ void molecule::OutputGraphInfoPerBond() const { bond *Binder = NULL; DoLog(1) && (Log() << Verbose(1) << "Final graph info for each bond is:" << endl); for(molecule::const_iterator AtomRunner = begin(); AtomRunner != end(); ++AtomRunner) for(BondList::const_iterator BondRunner = (*AtomRunner)->ListOfBonds.begin(); BondRunner != (*AtomRunner)->ListOfBonds.end(); ++BondRunner) if ((*BondRunner)->leftatom == *AtomRunner) { Binder = *BondRunner; if (DoLog(2)) { ostream &out = (Log() << Verbose(2)); out << ((Binder->Type == TreeEdge) ? "TreeEdge " : "BackEdge ") << *Binder << ": <"; out << ((Binder->leftatom->SeparationVertex) ? "SP," : "") << "L" << Binder->leftatom->LowpointNr << " G" << Binder->leftatom->GraphNr << " Comp."; Binder->leftatom->OutputComponentNumber(&out); out << " === "; out << ((Binder->rightatom->SeparationVertex) ? "SP," : "") << "L" << Binder->rightatom->LowpointNr << " G" << Binder->rightatom->GraphNr << " Comp."; Binder->rightatom->OutputComponentNumber(&out); out << ">." << endl; } if (Binder->Cyclic) // cyclic ?? DoLog(3) && (Log() << Verbose(3) << "Lowpoint at each side are equal: CYCLIC!" << endl); } } ; /** Initialise each vertex as white with no predecessor, empty queue, color Root lightgray. * \param *out output stream for debugging * \param &BFS accounting structure * \param AtomCount number of entries in the array to allocate */ void InitializeBFSAccounting(struct BFSAccounting &BFS, int AtomCount) { BFS.AtomCount = AtomCount; BFS.PredecessorList = new atom*[AtomCount]; BFS.ShortestPathList = new int[AtomCount]; BFS.ColorList = new enum Shading[AtomCount]; BFS.BFSStack = new std::deque (AtomCount); BFS.TouchedStack = new std::deque (AtomCount); for (int i = AtomCount; i--;) { BFS.ShortestPathList[i] = -1; BFS.PredecessorList[i] = 0; BFS.ColorList[i] = white; } }; /** Free's accounting structure. * \param *out output stream for debugging * \param &BFS accounting structure */ void FinalizeBFSAccounting(struct BFSAccounting &BFS) { delete[](BFS.PredecessorList); delete[](BFS.ShortestPathList); delete[](BFS.ColorList); delete (BFS.BFSStack); delete (BFS.TouchedStack); BFS.AtomCount = 0; }; /** Clean the accounting structure. * \param *out output stream for debugging * \param &BFS accounting structure */ void CleanBFSAccounting(struct BFSAccounting &BFS) { atom *Walker = NULL; while (!BFS.TouchedStack->empty()) { Walker = BFS.TouchedStack->front(); BFS.TouchedStack->pop_front(); BFS.PredecessorList[Walker->nr] = NULL; BFS.ShortestPathList[Walker->nr] = -1; BFS.ColorList[Walker->nr] = white; } }; /** Resets shortest path list and BFSStack. * \param *out output stream for debugging * \param *&Walker current node, pushed onto BFSAccounting::BFSStack and BFSAccounting::TouchedStack * \param &BFS accounting structure */ void ResetBFSAccounting(atom *&Walker, struct BFSAccounting &BFS) { BFS.ShortestPathList[Walker->nr] = 0; BFS.BFSStack->clear(); // start with empty BFS stack BFS.BFSStack->push_front(Walker); BFS.TouchedStack->push_front(Walker); }; /** Performs a BFS from \a *Root, trying to find the same node and hence a cycle. * \param *out output stream for debugging * \param *&BackEdge the edge from root that we don't want to move along * \param &BFS accounting structure */ void CyclicStructureAnalysis_CyclicBFSFromRootToRoot(bond *&BackEdge, struct BFSAccounting &BFS) { atom *Walker = NULL; atom *OtherAtom = NULL; do { // look for Root ASSERT(!BFS.BFSStack->empty(), "CyclicStructureAnalysis_CyclicBFSFromRootToRoot() - BFS.BFSStack is empty!"); Walker = BFS.BFSStack->front(); BFS.BFSStack->pop_front(); DoLog(2) && (Log() << Verbose(2) << "Current Walker is " << *Walker << ", we look for SP to Root " << *BFS.Root << "." << endl); for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) { if ((*Runner) != BackEdge) { // only walk along DFS spanning tree (otherwise we always find SP of one being backedge Binder) OtherAtom = (*Runner)->GetOtherAtom(Walker); #ifdef ADDHYDROGEN if (OtherAtom->getType()->getAtomicNumber() != 1) { #endif DoLog(2) && (Log() << Verbose(2) << "Current OtherAtom is: " << OtherAtom->getName() << " for bond " << *(*Runner) << "." << endl); if (BFS.ColorList[OtherAtom->nr] == white) { BFS.TouchedStack->push_front(OtherAtom); BFS.ColorList[OtherAtom->nr] = lightgray; BFS.PredecessorList[OtherAtom->nr] = Walker; // Walker is the predecessor BFS.ShortestPathList[OtherAtom->nr] = BFS.ShortestPathList[Walker->nr] + 1; DoLog(2) && (Log() << Verbose(2) << "Coloring OtherAtom " << OtherAtom->getName() << " lightgray, its predecessor is " << Walker->getName() << " and its Shortest Path is " << BFS.ShortestPathList[OtherAtom->nr] << " egde(s) long." << endl); //if (BFS.ShortestPathList[OtherAtom->nr] < MinimumRingSize[Walker->GetTrueFather()->nr]) { // Check for maximum distance DoLog(3) && (Log() << Verbose(3) << "Putting OtherAtom into queue." << endl); BFS.BFSStack->push_front(OtherAtom); //} } else { DoLog(3) && (Log() << Verbose(3) << "Not Adding, has already been visited." << endl); } if (OtherAtom == BFS.Root) break; #ifdef ADDHYDROGEN } else { DoLog(2) && (Log() << Verbose(2) << "Skipping hydrogen atom " << *OtherAtom << "." << endl); BFS.ColorList[OtherAtom->nr] = black; } #endif } else { DoLog(2) && (Log() << Verbose(2) << "Bond " << *(*Runner) << " not Visiting, is the back edge." << endl); } } BFS.ColorList[Walker->nr] = black; DoLog(1) && (Log() << Verbose(1) << "Coloring Walker " << Walker->getName() << " black." << endl); if (OtherAtom == BFS.Root) { // if we have found the root, check whether this cycle wasn't already found beforehand // step through predecessor list while (OtherAtom != BackEdge->rightatom) { if (!OtherAtom->GetTrueFather()->IsCyclic) // if one bond in the loop is not marked as cyclic, we haven't found this cycle yet break; else OtherAtom = BFS.PredecessorList[OtherAtom->nr]; } if (OtherAtom == BackEdge->rightatom) { // if each atom in found cycle is cyclic, loop's been found before already DoLog(3) && (Log() << Verbose(3) << "This cycle was already found before, skipping and removing seeker from search." << endl); do { ASSERT(!BFS.TouchedStack->empty(), "CyclicStructureAnalysis_CyclicBFSFromRootToRoot() - BFS.TouchedStack is empty!"); OtherAtom = BFS.TouchedStack->front(); BFS.TouchedStack->pop_front(); if (BFS.PredecessorList[OtherAtom->nr] == Walker) { DoLog(4) && (Log() << Verbose(4) << "Removing " << *OtherAtom << " from lists and stacks." << endl); BFS.PredecessorList[OtherAtom->nr] = NULL; BFS.ShortestPathList[OtherAtom->nr] = -1; BFS.ColorList[OtherAtom->nr] = white; // rats ... deque has no find() std::deque::iterator iter = find( BFS.BFSStack->begin(), BFS.BFSStack->end(), OtherAtom); ASSERT(iter != BFS.BFSStack->end(), "CyclicStructureAnalysis_CyclicBFSFromRootToRoot() - can't find "+toString(*OtherAtom)+" on stack!"); BFS.BFSStack->erase(iter); } } while ((!BFS.TouchedStack->empty()) && (BFS.PredecessorList[OtherAtom->nr] == NULL)); BFS.TouchedStack->push_front(OtherAtom); // last was wrongly popped OtherAtom = BackEdge->rightatom; // set to not Root } else OtherAtom = BFS.Root; } } while ((!BFS.BFSStack->empty()) && (OtherAtom != BFS.Root) && (OtherAtom != NULL)); // || (ShortestPathList[OtherAtom->nr] < MinimumRingSize[Walker->GetTrueFather()->nr]))); }; /** Climb back the BFSAccounting::PredecessorList and find cycle members. * \param *out output stream for debugging * \param *&OtherAtom * \param *&BackEdge denotes the edge we did not want to travel along when doing CyclicBFSFromRootToRoot() * \param &BFS accounting structure * \param *&MinimumRingSize minimum distance from this node possible without encountering oneself, set on return for each atom * \param &MinRingSize global minimum distance from one node without encountering oneself, set on return */ void CyclicStructureAnalysis_RetrieveCycleMembers(atom *&OtherAtom, bond *&BackEdge, struct BFSAccounting &BFS, int *&MinimumRingSize, int &MinRingSize) { atom *Walker = NULL; int NumCycles = 0; int RingSize = -1; if (OtherAtom == BFS.Root) { // now climb back the predecessor list and thus find the cycle members NumCycles++; RingSize = 1; BFS.Root->GetTrueFather()->IsCyclic = true; DoLog(1) && (Log() << Verbose(1) << "Found ring contains: "); Walker = BFS.Root; while (Walker != BackEdge->rightatom) { DoLog(0) && (Log() << Verbose(0) << Walker->getName() << " <-> "); Walker = BFS.PredecessorList[Walker->nr]; Walker->GetTrueFather()->IsCyclic = true; RingSize++; } DoLog(0) && (Log() << Verbose(0) << Walker->getName() << " with a length of " << RingSize << "." << endl << endl); // walk through all and set MinimumRingSize Walker = BFS.Root; MinimumRingSize[Walker->GetTrueFather()->nr] = RingSize; while (Walker != BackEdge->rightatom) { Walker = BFS.PredecessorList[Walker->nr]; if (RingSize < MinimumRingSize[Walker->GetTrueFather()->nr]) MinimumRingSize[Walker->GetTrueFather()->nr] = RingSize; } if ((RingSize < MinRingSize) || (MinRingSize == -1)) MinRingSize = RingSize; } else { DoLog(1) && (Log() << Verbose(1) << "No ring containing " << *BFS.Root << " with length equal to or smaller than " << MinimumRingSize[BFS.Root->GetTrueFather()->nr] << " found." << endl); } }; /** From a given node performs a BFS to touch the next cycle, for whose nodes \a *&MinimumRingSize is set and set it accordingly. * \param *out output stream for debugging * \param *&Root node to look for closest cycle from, i.e. \a *&MinimumRingSize is set for this node * \param *&MinimumRingSize minimum distance from this node possible without encountering oneself, set on return for each atom * \param AtomCount number of nodes in graph */ void CyclicStructureAnalysis_BFSToNextCycle(atom *&Root, atom *&Walker, int *&MinimumRingSize, int AtomCount) { struct BFSAccounting BFS; atom *OtherAtom = Walker; InitializeBFSAccounting(BFS, AtomCount); ResetBFSAccounting(Walker, BFS); while (OtherAtom != NULL) { // look for Root ASSERT(!BFS.BFSStack->empty(), "CyclicStructureAnalysis_BFSToNextCycle() - BFS.BFSStack is empty!"); Walker = BFS.BFSStack->front(); BFS.BFSStack->pop_front(); //Log() << Verbose(2) << "Current Walker is " << *Walker << ", we look for SP to Root " << *Root << "." << endl; for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) { // "removed (*Runner) != BackEdge) || " from next if, is u if ((Walker->ListOfBonds.size() == 1)) { // only walk along DFS spanning tree (otherwise we always find SP of 1 being backedge Binder), but terminal hydrogens may be connected via backedge, hence extra check OtherAtom = (*Runner)->GetOtherAtom(Walker); //Log() << Verbose(2) << "Current OtherAtom is: " << OtherAtom->Name << " for bond " << *Binder << "." << endl; if (BFS.ColorList[OtherAtom->nr] == white) { BFS.TouchedStack->push_front(OtherAtom); BFS.ColorList[OtherAtom->nr] = lightgray; BFS.PredecessorList[OtherAtom->nr] = Walker; // Walker is the predecessor BFS.ShortestPathList[OtherAtom->nr] = BFS.ShortestPathList[Walker->nr] + 1; //Log() << Verbose(2) << "Coloring OtherAtom " << OtherAtom->Name << " lightgray, its predecessor is " << Walker->Name << " and its Shortest Path is " << ShortestPathList[OtherAtom->nr] << " egde(s) long." << endl; if (OtherAtom->GetTrueFather()->IsCyclic) { // if the other atom is connected to a ring MinimumRingSize[Root->GetTrueFather()->nr] = BFS.ShortestPathList[OtherAtom->nr] + MinimumRingSize[OtherAtom->GetTrueFather()->nr]; OtherAtom = NULL; //break; break; } else BFS.BFSStack->push_front(OtherAtom); } else { //Log() << Verbose(3) << "Not Adding, has already been visited." << endl; } } else { //Log() << Verbose(3) << "Not Visiting, is a back edge." << endl; } } BFS.ColorList[Walker->nr] = black; //Log() << Verbose(1) << "Coloring Walker " << Walker->Name << " black." << endl; } //CleanAccountingLists(TouchedStack, PredecessorList, ShortestPathList, ColorList); FinalizeBFSAccounting(BFS); } ; /** All nodes that are not in cycles get assigned a \a *&MinimumRingSizeby BFS to next cycle. * \param *out output stream for debugging * \param *&MinimumRingSize array with minimum distance without encountering onself for each atom * \param &MinRingSize global minium distance * \param &NumCyles number of cycles in graph * \param *mol molecule with atoms */ void CyclicStructureAnalysis_AssignRingSizetoNonCycleMembers(int *&MinimumRingSize, int &MinRingSize, int &NumCycles, const molecule * const mol) { atom *Root = NULL; atom *Walker = NULL; if (MinRingSize != -1) { // if rings are present // go over all atoms for (molecule::const_iterator iter = mol->begin(); iter != mol->end(); ++iter) { Root = *iter; if (MinimumRingSize[Root->GetTrueFather()->nr] == mol->getAtomCount()) { // check whether MinimumRingSize is set, if not BFS to next where it is Walker = Root; //Log() << Verbose(1) << "---------------------------------------------------------------------------------------------------------" << endl; CyclicStructureAnalysis_BFSToNextCycle(Root, Walker, MinimumRingSize, mol->getAtomCount()); } DoLog(1) && (Log() << Verbose(1) << "Minimum ring size of " << *Root << " is " << MinimumRingSize[Root->GetTrueFather()->nr] << "." << endl); } DoLog(1) && (Log() << Verbose(1) << "Minimum ring size is " << MinRingSize << ", over " << NumCycles << " cycles total." << endl); } else DoLog(1) && (Log() << Verbose(1) << "No rings were detected in the molecular structure." << endl); } ; /** Analyses the cycles found and returns minimum of all cycle lengths. * We begin with a list of Back edges found during DepthFirstSearchAnalysis(). We go through this list - one end is the Root, * the other our initial Walker - and do a Breadth First Search for the Root. We mark down each Predecessor and as soon as * we have found the Root via BFS, we may climb back the closed cycle via the Predecessors. Thereby we mark atoms and bonds * as cyclic and print out the cycles. * \param *out output stream for debugging * \param *BackEdgeStack stack with all back edges found during DFS scan. Beware: This stack contains the bonds from the total molecule, not from the subgraph! * \param *&MinimumRingSize contains smallest ring size in molecular structure on return or -1 if no rings were found, if set is maximum search distance * \todo BFS from the not-same-LP to find back to starting point of tributary cycle over more than one bond */ void molecule::CyclicStructureAnalysis(std::deque * BackEdgeStack, int *&MinimumRingSize) const { struct BFSAccounting BFS; atom *Walker = NULL; atom *OtherAtom = NULL; bond *BackEdge = NULL; int NumCycles = 0; int MinRingSize = -1; InitializeBFSAccounting(BFS, getAtomCount()); //Log() << Verbose(1) << "Back edge list - "; //BackEdgeStack->Output(out); DoLog(1) && (Log() << Verbose(1) << "Analysing cycles ... " << endl); NumCycles = 0; while (!BackEdgeStack->empty()) { BackEdge = BackEdgeStack->front(); BackEdgeStack->pop_front(); // this is the target BFS.Root = BackEdge->leftatom; // this is the source point Walker = BackEdge->rightatom; ResetBFSAccounting(Walker, BFS); DoLog(1) && (Log() << Verbose(1) << "---------------------------------------------------------------------------------------------------------" << endl); OtherAtom = NULL; CyclicStructureAnalysis_CyclicBFSFromRootToRoot(BackEdge, BFS); CyclicStructureAnalysis_RetrieveCycleMembers(OtherAtom, BackEdge, BFS, MinimumRingSize, MinRingSize); CleanBFSAccounting(BFS); } FinalizeBFSAccounting(BFS); CyclicStructureAnalysis_AssignRingSizetoNonCycleMembers(MinimumRingSize, MinRingSize, NumCycles, this); }; /** Sets the next component number. * This is O(N) as the number of bonds per atom is bound. * \param *vertex atom whose next atom::*ComponentNr is to be set * \param nr number to use */ void molecule::SetNextComponentNumber(atom *vertex, int nr) const { size_t i = 0; if (vertex != NULL) { for (; i < vertex->ListOfBonds.size(); i++) { if (vertex->ComponentNr[i] == -1) { // check if not yet used vertex->ComponentNr[i] = nr; break; } else if (vertex->ComponentNr[i] == nr) // if number is already present, don't add another time break; // breaking here will not cause error! } if (i == vertex->ListOfBonds.size()) { DoeLog(0) && (eLog()<< Verbose(0) << "Error: All Component entries are already occupied!" << endl); performCriticalExit(); } } else { DoeLog(0) && (eLog()<< Verbose(0) << "Error: Given vertex is NULL!" << endl); performCriticalExit(); } } ; /** Returns next unused bond for this atom \a *vertex or NULL of none exists. * \param *vertex atom to regard * \return bond class or NULL */ bond * molecule::FindNextUnused(atom *vertex) const { for (BondList::const_iterator Runner = vertex->ListOfBonds.begin(); Runner != vertex->ListOfBonds.end(); (++Runner)) if ((*Runner)->IsUsed() == white) return ((*Runner)); return NULL; } ; /** Resets bond::Used flag of all bonds in this molecule. * \return true - success, false - -failure */ void molecule::ResetAllBondsToUnused() const { for(molecule::const_iterator AtomRunner = begin(); AtomRunner != end(); ++AtomRunner) for(BondList::const_iterator BondRunner = (*AtomRunner)->ListOfBonds.begin(); BondRunner != (*AtomRunner)->ListOfBonds.end(); ++BondRunner) if ((*BondRunner)->leftatom == *AtomRunner) (*BondRunner)->ResetUsed(); } ; /** Output a list of flags, stating whether the bond was visited or not. * \param *out output stream for debugging * \param *list */ void OutputAlreadyVisited(int *list) { DoLog(4) && (Log() << Verbose(4) << "Already Visited Bonds:\t"); for (int i = 1; i <= list[0]; i++) DoLog(0) && (Log() << Verbose(0) << list[i] << " "); DoLog(0) && (Log() << Verbose(0) << endl); } ; /** Storing the bond structure of a molecule to file. * Simply stores Atom::nr and then the Atom::nr of all bond partners per line. * \param &filename name of file * \param path path to file, defaults to empty * \return true - file written successfully, false - writing failed */ bool molecule::StoreAdjacencyToFile(std::string filename, std::string path) { ofstream AdjacencyFile; string line; bool status = true; if (path != "") line = path + "/" + filename; else line = filename; AdjacencyFile.open(line.c_str(), ios::out); DoLog(1) && (Log() << Verbose(1) << "Saving adjacency list ... " << endl); if (AdjacencyFile.good()) { AdjacencyFile << "m\tn" << endl; for_each(atoms.begin(),atoms.end(),bind2nd(mem_fun(&atom::OutputAdjacency),&AdjacencyFile)); AdjacencyFile.close(); DoLog(1) && (Log() << Verbose(1) << "\t... done." << endl); } else { DoLog(1) && (Log() << Verbose(1) << "\t... failed to open file " << line << "." << endl); status = false; } return status; } ; /** Storing the bond structure of a molecule to file. * Simply stores Atom::nr and then the Atom::nr of all bond partners, one per line. * \param &filename name of file * \param path path to file, defaults to empty * \return true - file written successfully, false - writing failed */ bool molecule::StoreBondsToFile(std::string filename, std::string path) { ofstream BondFile; string line; bool status = true; if (path != "") line = path + "/" + filename; else line = filename; BondFile.open(line.c_str(), ios::out); DoLog(1) && (Log() << Verbose(1) << "Saving adjacency list ... " << endl); if (BondFile.good()) { BondFile << "m\tn" << endl; for_each(atoms.begin(),atoms.end(),bind2nd(mem_fun(&atom::OutputBonds),&BondFile)); BondFile.close(); DoLog(1) && (Log() << Verbose(1) << "\t... done." << endl); } else { DoLog(1) && (Log() << Verbose(1) << "\t... failed to open file " << line << "." << endl); status = false; } return status; } ; bool CheckAdjacencyFileAgainstMolecule_Init(std::string &path, ifstream &File, int *&CurrentBonds) { string filename; filename = path + ADJACENCYFILE; File.open(filename.c_str(), ios::out); DoLog(1) && (Log() << Verbose(1) << "Looking at bond structure stored in adjacency file and comparing to present one ... " << endl); if (File.fail()) return false; // allocate storage structure CurrentBonds = new int[8]; // contains parsed bonds of current atom for(int i=0;i<8;i++) CurrentBonds[i] = 0; return true; } ; void CheckAdjacencyFileAgainstMolecule_Finalize(ifstream &File, int *&CurrentBonds) { File.close(); File.clear(); delete[](CurrentBonds); } ; void CheckAdjacencyFileAgainstMolecule_CompareBonds(bool &status, int &NonMatchNumber, atom *&Walker, size_t &CurrentBondsOfAtom, int AtomNr, int *&CurrentBonds, atom **ListOfAtoms) { size_t j = 0; int id = -1; //Log() << Verbose(2) << "Walker is " << *Walker << ", bond partners: "; if (CurrentBondsOfAtom == Walker->ListOfBonds.size()) { for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) { id = (*Runner)->GetOtherAtom(Walker)->nr; j = 0; for (; (j < CurrentBondsOfAtom) && (CurrentBonds[j++] != id);) ; // check against all parsed bonds if (CurrentBonds[j - 1] != id) { // no match ? Then mark in ListOfAtoms ListOfAtoms[AtomNr] = NULL; NonMatchNumber++; status = false; DoeLog(2) && (eLog() << Verbose(2) << id << " can not be found in list." << endl); } else { //Log() << Verbose(0) << "[" << id << "]\t"; } } //Log() << Verbose(0) << endl; } else { DoLog(0) && (Log() << Verbose(0) << "Number of bonds for Atom " << *Walker << " does not match, parsed " << CurrentBondsOfAtom << " against " << Walker->ListOfBonds.size() << "." << endl); status = false; } } ; /** Checks contents of adjacency file against bond structure in structure molecule. * \param *out output stream for debugging * \param *path path to file * \param **ListOfAtoms allocated (molecule::AtomCount) and filled lookup table for ids (Atom::nr) to *Atom * \return true - structure is equal, false - not equivalence */ bool molecule::CheckAdjacencyFileAgainstMolecule(std::string &path, atom **ListOfAtoms) { ifstream File; bool status = true; atom *Walker = NULL; int *CurrentBonds = NULL; int NonMatchNumber = 0; // will number of atoms with differing bond structure size_t CurrentBondsOfAtom = -1; const int AtomCount = getAtomCount(); if (!CheckAdjacencyFileAgainstMolecule_Init(path, File, CurrentBonds)) { DoLog(1) && (Log() << Verbose(1) << "Adjacency file not found." << endl); return true; } char buffer[MAXSTRINGSIZE]; // Parse the file line by line and count the bonds while (!File.eof()) { File.getline(buffer, MAXSTRINGSIZE); stringstream line; line.str(buffer); int AtomNr = -1; line >> AtomNr; CurrentBondsOfAtom = -1; // we count one too far due to line end // parse into structure if ((AtomNr >= 0) && (AtomNr < AtomCount)) { Walker = ListOfAtoms[AtomNr]; while (!line.eof()) line >> CurrentBonds[++CurrentBondsOfAtom]; // compare against present bonds CheckAdjacencyFileAgainstMolecule_CompareBonds(status, NonMatchNumber, Walker, CurrentBondsOfAtom, AtomNr, CurrentBonds, ListOfAtoms); } else { if (AtomNr != -1) DoeLog(2) && (eLog() << Verbose(2) << AtomNr << " is not valid in the range of ids [" << 0 << "," << AtomCount << ")." << endl); } } CheckAdjacencyFileAgainstMolecule_Finalize(File, CurrentBonds); if (status) { // if equal we parse the KeySetFile DoLog(1) && (Log() << Verbose(1) << "done: Equal." << endl); } else DoLog(1) && (Log() << Verbose(1) << "done: Not equal by " << NonMatchNumber << " atoms." << endl); return status; } ; /** Picks from a global stack with all back edges the ones in the fragment. * \param *out output stream for debugging * \param **ListOfLocalAtoms array of father atom::nr to local atom::nr (reverse of atom::father) * \param *ReferenceStack stack with all the back egdes * \param *LocalStack stack to be filled * \return true - everything ok, false - ReferenceStack was empty */ bool molecule::PickLocalBackEdges(atom **ListOfLocalAtoms, std::deque *&ReferenceStack, std::deque *&LocalStack) const { bool status = true; if (ReferenceStack->empty()) { DoLog(1) && (Log() << Verbose(1) << "ReferenceStack is empty!" << endl); return false; } bond *Binder = ReferenceStack->front(); ReferenceStack->pop_front(); bond *FirstBond = Binder; // mark the first bond, so that we don't loop through the stack indefinitely atom *Walker = NULL, *OtherAtom = NULL; ReferenceStack->push_front(Binder); do { // go through all bonds and push local ones Walker = ListOfLocalAtoms[Binder->leftatom->nr]; // get one atom in the reference molecule if (Walker != NULL) // if this Walker exists in the subgraph ... for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) { OtherAtom = (*Runner)->GetOtherAtom(Walker); if (OtherAtom == ListOfLocalAtoms[(*Runner)->rightatom->nr]) { // found the bond LocalStack->push_front((*Runner)); DoLog(3) && (Log() << Verbose(3) << "Found local edge " << *(*Runner) << "." << endl); break; } } ASSERT(!ReferenceStack->empty(), "molecule::PickLocalBackEdges() - ReferenceStack is empty!"); Binder = ReferenceStack->front(); // loop the stack for next item ReferenceStack->pop_front(); DoLog(3) && (Log() << Verbose(3) << "Current candidate edge " << Binder << "." << endl); ReferenceStack->push_front(Binder); } while (FirstBond != Binder); return status; } ; void BreadthFirstSearchAdd_Init(struct BFSAccounting &BFS, atom *&Root, int AtomCount, int BondOrder, atom **AddedAtomList = NULL) { BFS.AtomCount = AtomCount; BFS.BondOrder = BondOrder; BFS.PredecessorList = new atom*[AtomCount]; BFS.ShortestPathList = new int[AtomCount]; BFS.ColorList = new enum Shading[AtomCount]; BFS.BFSStack = new std::deque (AtomCount); BFS.Root = Root; BFS.BFSStack->clear(); BFS.BFSStack->push_front(Root); // initialise each vertex as white with no predecessor, empty queue, color Root lightgray for (int i = AtomCount; i--;) { BFS.PredecessorList[i] = NULL; BFS.ShortestPathList[i] = -1; if ((AddedAtomList != NULL) && (AddedAtomList[i] != NULL)) // mark already present atoms (i.e. Root and maybe others) as visited BFS.ColorList[i] = lightgray; else BFS.ColorList[i] = white; } //BFS.ShortestPathList[Root->nr] = 0; // done by Calloc } ; void BreadthFirstSearchAdd_Free(struct BFSAccounting &BFS) { delete[](BFS.PredecessorList); delete[](BFS.ShortestPathList); delete[](BFS.ColorList); delete (BFS.BFSStack); BFS.AtomCount = 0; } ; void BreadthFirstSearchAdd_UnvisitedNode(molecule *Mol, struct BFSAccounting &BFS, atom *&Walker, atom *&OtherAtom, bond *&Binder, bond *&Bond, atom **&AddedAtomList, bond **&AddedBondList, bool IsAngstroem) { if (Binder != Bond) // let other atom white if it's via Root bond. In case it's cyclic it has to be reached again (yet Root is from OtherAtom already black, thus no problem) BFS.ColorList[OtherAtom->nr] = lightgray; BFS.PredecessorList[OtherAtom->nr] = Walker; // Walker is the predecessor BFS.ShortestPathList[OtherAtom->nr] = BFS.ShortestPathList[Walker->nr] + 1; DoLog(2) && (Log() << Verbose(2) << "Coloring OtherAtom " << OtherAtom->getName() << " " << ((BFS.ColorList[OtherAtom->nr] == white) ? "white" : "lightgray") << ", its predecessor is " << Walker->getName() << " and its Shortest Path is " << BFS.ShortestPathList[OtherAtom->nr] << " egde(s) long." << endl); if ((((BFS.ShortestPathList[OtherAtom->nr] < BFS.BondOrder) && (Binder != Bond)))) { // Check for maximum distance DoLog(3) && (Log() << Verbose(3)); if (AddedAtomList[OtherAtom->nr] == NULL) { // add if it's not been so far AddedAtomList[OtherAtom->nr] = Mol->AddCopyAtom(OtherAtom); DoLog(0) && (Log() << Verbose(0) << "Added OtherAtom " << OtherAtom->getName()); AddedBondList[Binder->nr] = Mol->CopyBond(AddedAtomList[Walker->nr], AddedAtomList[OtherAtom->nr], Binder); DoLog(0) && (Log() << Verbose(0) << " and bond " << *(AddedBondList[Binder->nr]) << ", "); } else { // this code should actually never come into play (all white atoms are not yet present in BondMolecule, that's why they are white in the first place) DoLog(0) && (Log() << Verbose(0) << "Not adding OtherAtom " << OtherAtom->getName()); if (AddedBondList[Binder->nr] == NULL) { AddedBondList[Binder->nr] = Mol->CopyBond(AddedAtomList[Walker->nr], AddedAtomList[OtherAtom->nr], Binder); DoLog(0) && (Log() << Verbose(0) << ", added Bond " << *(AddedBondList[Binder->nr])); } else DoLog(0) && (Log() << Verbose(0) << ", not added Bond "); } DoLog(0) && (Log() << Verbose(0) << ", putting OtherAtom into queue." << endl); BFS.BFSStack->push_front(OtherAtom); } else { // out of bond order, then replace if ((AddedAtomList[OtherAtom->nr] == NULL) && (Binder->Cyclic)) BFS.ColorList[OtherAtom->nr] = white; // unmark if it has not been queued/added, to make it available via its other bonds (cyclic) if (Binder == Bond) DoLog(3) && (Log() << Verbose(3) << "Not Queueing, is the Root bond"); else if (BFS.ShortestPathList[OtherAtom->nr] >= BFS.BondOrder) DoLog(3) && (Log() << Verbose(3) << "Not Queueing, is out of Bond Count of " << BFS.BondOrder); if (!Binder->Cyclic) DoLog(0) && (Log() << Verbose(0) << ", is not part of a cyclic bond, saturating bond with Hydrogen." << endl); if (AddedBondList[Binder->nr] == NULL) { if ((AddedAtomList[OtherAtom->nr] != NULL)) { // .. whether we add or saturate AddedBondList[Binder->nr] = Mol->CopyBond(AddedAtomList[Walker->nr], AddedAtomList[OtherAtom->nr], Binder); } else { #ifdef ADDHYDROGEN if (!Mol->AddHydrogenReplacementAtom(Binder, AddedAtomList[Walker->nr], Walker, OtherAtom, IsAngstroem)) exit(1); #endif } } } } ; void BreadthFirstSearchAdd_VisitedNode(molecule *Mol, struct BFSAccounting &BFS, atom *&Walker, atom *&OtherAtom, bond *&Binder, bond *&Bond, atom **&AddedAtomList, bond **&AddedBondList, bool IsAngstroem) { DoLog(3) && (Log() << Verbose(3) << "Not Adding, has already been visited." << endl); // This has to be a cyclic bond, check whether it's present ... if (AddedBondList[Binder->nr] == NULL) { if ((Binder != Bond) && (Binder->Cyclic) && (((BFS.ShortestPathList[Walker->nr] + 1) < BFS.BondOrder))) { AddedBondList[Binder->nr] = Mol->CopyBond(AddedAtomList[Walker->nr], AddedAtomList[OtherAtom->nr], Binder); } else { // if it's root bond it has to broken (otherwise we would not create the fragments) #ifdef ADDHYDROGEN if(!Mol->AddHydrogenReplacementAtom(Binder, AddedAtomList[Walker->nr], Walker, OtherAtom, IsAngstroem)) exit(1); #endif } } } ; /** Adds atoms up to \a BondCount distance from \a *Root and notes them down in \a **AddedAtomList. * Gray vertices are always enqueued in an std::deque FIFO queue, the rest is usual BFS with adding vertices found was * white and putting into queue. * \param *out output stream for debugging * \param *Mol Molecule class to add atoms to * \param **AddedAtomList list with added atom pointers, index is atom father's number * \param **AddedBondList list with added bond pointers, index is bond father's number * \param *Root root vertex for BFS * \param *Bond bond not to look beyond * \param BondOrder maximum distance for vertices to add * \param IsAngstroem lengths are in angstroem or bohrradii */ void molecule::BreadthFirstSearchAdd(molecule *Mol, atom **&AddedAtomList, bond **&AddedBondList, atom *Root, bond *Bond, int BondOrder, bool IsAngstroem) { struct BFSAccounting BFS; atom *Walker = NULL, *OtherAtom = NULL; bond *Binder = NULL; // add Root if not done yet if (AddedAtomList[Root->nr] == NULL) // add Root if not yet present AddedAtomList[Root->nr] = Mol->AddCopyAtom(Root); BreadthFirstSearchAdd_Init(BFS, Root, BondOrder, getAtomCount(), AddedAtomList); // and go on ... Queue always contains all lightgray vertices while (!BFS.BFSStack->empty()) { // we have to pop the oldest atom from stack. This keeps the atoms on the stack always of the same ShortestPath distance. // e.g. if current atom is 2, push to end of stack are of length 3, but first all of length 2 would be popped. They again // append length of 3 (their neighbours). Thus on stack we have always atoms of a certain length n at bottom of stack and // followed by n+1 till top of stack. Walker = BFS.BFSStack->front(); // pop oldest added BFS.BFSStack->pop_front(); DoLog(1) && (Log() << Verbose(1) << "Current Walker is: " << Walker->getName() << ", and has " << Walker->ListOfBonds.size() << " bonds." << endl); for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) { if ((*Runner) != NULL) { // don't look at bond equal NULL Binder = (*Runner); OtherAtom = (*Runner)->GetOtherAtom(Walker); DoLog(2) && (Log() << Verbose(2) << "Current OtherAtom is: " << OtherAtom->getName() << " for bond " << *(*Runner) << "." << endl); if (BFS.ColorList[OtherAtom->nr] == white) { BreadthFirstSearchAdd_UnvisitedNode(Mol, BFS, Walker, OtherAtom, Binder, Bond, AddedAtomList, AddedBondList, IsAngstroem); } else { BreadthFirstSearchAdd_VisitedNode(Mol, BFS, Walker, OtherAtom, Binder, Bond, AddedAtomList, AddedBondList, IsAngstroem); } } } BFS.ColorList[Walker->nr] = black; DoLog(1) && (Log() << Verbose(1) << "Coloring Walker " << Walker->getName() << " black." << endl); } BreadthFirstSearchAdd_Free(BFS); } ; /** Adds a bond as a copy to a given one * \param *left leftatom of new bond * \param *right rightatom of new bond * \param *CopyBond rest of fields in bond are copied from this * \return pointer to new bond */ bond * molecule::CopyBond(atom *left, atom *right, bond *CopyBond) { bond *Binder = AddBond(left, right, CopyBond->BondDegree); Binder->Cyclic = CopyBond->Cyclic; Binder->Type = CopyBond->Type; return Binder; } ; void BuildInducedSubgraph_Init(atom **&ParentList, int AtomCount) { // reset parent list ParentList = new atom*[AtomCount]; for (int i=0;ibegin(); iter != mol->end(); ++iter) { ParentList[(*iter)->father->nr] = (*iter); // Outputting List for debugging DoLog(4) && (Log() << Verbose(4) << "Son[" << (*iter)->father->nr << "] of " << (*iter)->father << " is " << ParentList[(*iter)->father->nr] << "." << endl); } }; void BuildInducedSubgraph_Finalize(atom **&ParentList) { delete[](ParentList); } ; bool BuildInducedSubgraph_CreateBondsFromParent(molecule *mol, const molecule *Father, atom **&ParentList) { bool status = true; atom *OtherAtom = NULL; // check each entry of parent list and if ok (one-to-and-onto matching) create bonds DoLog(3) && (Log() << Verbose(3) << "Creating bonds." << endl); for (molecule::const_iterator iter = Father->begin(); iter != Father->end(); ++iter) { if (ParentList[(*iter)->nr] != NULL) { if (ParentList[(*iter)->nr]->father != (*iter)) { status = false; } else { for (BondList::const_iterator Runner = (*iter)->ListOfBonds.begin(); Runner != (*iter)->ListOfBonds.end(); (++Runner)) { OtherAtom = (*Runner)->GetOtherAtom((*iter)); if (ParentList[OtherAtom->nr] != NULL) { // if otheratom is also a father of an atom on this molecule, create the bond DoLog(4) && (Log() << Verbose(4) << "Endpoints of Bond " << (*Runner) << " are both present: " << ParentList[(*iter)->nr]->getName() << " and " << ParentList[OtherAtom->nr]->getName() << "." << endl); mol->AddBond(ParentList[(*iter)->nr], ParentList[OtherAtom->nr], (*Runner)->BondDegree); } } } } } return status; } ; /** Adds bond structure to this molecule from \a Father molecule. * This basically causes this molecule to become an induced subgraph of the \a Father, i.e. for every bond in Father * with end points present in this molecule, bond is created in this molecule. * Special care was taken to ensure that this is of complexity O(N), where N is the \a Father's molecule::AtomCount. * \param *out output stream for debugging * \param *Father father molecule * \return true - is induced subgraph, false - there are atoms with fathers not in \a Father * \todo not checked, not fully working probably */ bool molecule::BuildInducedSubgraph(const molecule *Father) { bool status = true; atom **ParentList = NULL; DoLog(2) && (Log() << Verbose(2) << "Begin of BuildInducedSubgraph." << endl); BuildInducedSubgraph_Init(ParentList, Father->getAtomCount()); BuildInducedSubgraph_FillParentList(this, Father, ParentList); status = BuildInducedSubgraph_CreateBondsFromParent(this, Father, ParentList); BuildInducedSubgraph_Finalize(ParentList); DoLog(2) && (Log() << Verbose(2) << "End of BuildInducedSubgraph." << endl); return status; } ; /** For a given keyset \a *Fragment, checks whether it is connected in the current molecule. * \param *out output stream for debugging * \param *Fragment Keyset of fragment's vertices * \return true - connected, false - disconnected * \note this is O(n^2) for it's just a bug checker not meant for permanent use! */ bool molecule::CheckForConnectedSubgraph(KeySet *Fragment) { atom *Walker = NULL, *Walker2 = NULL; bool BondStatus = false; int size; DoLog(1) && (Log() << Verbose(1) << "Begin of CheckForConnectedSubgraph" << endl); DoLog(2) && (Log() << Verbose(2) << "Disconnected atom: "); // count number of atoms in graph size = 0; for (KeySet::iterator runner = Fragment->begin(); runner != Fragment->end(); runner++) size++; if (size > 1) for (KeySet::iterator runner = Fragment->begin(); runner != Fragment->end(); runner++) { Walker = FindAtom(*runner); BondStatus = false; for (KeySet::iterator runners = Fragment->begin(); runners != Fragment->end(); runners++) { Walker2 = FindAtom(*runners); for (BondList::const_iterator Runner = Walker->ListOfBonds.begin(); Runner != Walker->ListOfBonds.end(); (++Runner)) { if ((*Runner)->GetOtherAtom(Walker) == Walker2) { BondStatus = true; break; } if (BondStatus) break; } } if (!BondStatus) { DoLog(0) && (Log() << Verbose(0) << (*Walker) << endl); return false; } } else { DoLog(0) && (Log() << Verbose(0) << "none." << endl); return true; } DoLog(0) && (Log() << Verbose(0) << "none." << endl); DoLog(1) && (Log() << Verbose(1) << "End of CheckForConnectedSubgraph" << endl); return true; }