/*
* Project: MoleCuilder
* Description: creates and alters molecular systems
* Copyright (C) 2010-2012 University of Bonn. All rights reserved.
* Copyright (C) 2013 Frederik Heber. All rights reserved.
*
*
* This file is part of MoleCuilder.
*
* MoleCuilder is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* MoleCuilder is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with MoleCuilder. If not, see .
*/
/*
* Fragmentation.cpp
*
* Created on: Oct 18, 2011
* Author: heber
*/
#ifdef HAVE_CONFIG_H
#include
#endif
#include
#include "CodePatterns/MemDebug.hpp"
#include "Fragmentation.hpp"
#include "CodePatterns/Assert.hpp"
#include "CodePatterns/Info.hpp"
#include "CodePatterns/IteratorAdaptors.hpp"
#include "CodePatterns/Log.hpp"
#include "Atom/atom.hpp"
#include "Bond/bond.hpp"
#include "Descriptors/MoleculeDescriptor.hpp"
#include "Element/element.hpp"
#include "Element/periodentafel.hpp"
#include "Fragmentation/AdaptivityMap.hpp"
#include "Fragmentation/AtomMask.hpp"
#include "Fragmentation/fragmentation_helpers.hpp"
#include "Fragmentation/Graph.hpp"
#include "Fragmentation/helpers.hpp"
#include "Fragmentation/KeySet.hpp"
#include "Fragmentation/PowerSetGenerator.hpp"
#include "Fragmentation/UniqueFragments.hpp"
#include "Graph/BondGraph.hpp"
#include "Graph/AdjacencyList.hpp"
#include "Graph/ListOfLocalAtoms.hpp"
#include "molecule.hpp"
#include "World.hpp"
/** Constructor of class Fragmentation.
*
* \param _mol molecule which we currently fragment
* \param _FileChecker instance contains adjacency parsed from elsewhere
* \param _treatment whether to treat hydrogen special and saturate dangling bonds or not
*/
Fragmentation::Fragmentation(molecule *_mol, AdjacencyList &_FileChecker, const enum HydrogenTreatment _treatment) :
mol(_mol),
treatment(_treatment),
FileChecker(_FileChecker)
{}
/** Destructor of class Fragmentation.
*
*/
Fragmentation::~Fragmentation()
{}
/** Performs a many-body bond order analysis for a given bond order.
*
* \note during fragmentation we switch to so-called local ids, atomic ids
* that are valid only for the specific molecule (representing a connected
* subgraph of the molecular system).
*
* -# create the local id to global id mapping
* -# parse the adjacency file and require the above mapping for translation
* -# initialize a mask for the molecule's atoms, telling which atoms are
* treated and which atoms neglected during fragmentation
* -# parse and orderatsite file and check whether there's something to do. This
allows for iterative calls to fragmentation
* -# fragments from the last fragmentation stored to file are converted into
* keysets (sets of atomic indices that describe one fragment)
* -# in a loop as long as order at site is not correct yet
* -# prepare a stack containing the initial ids where the fragmentation or
* rather the graph algorithms start from
* -# call fragmentBOSSANOVA()
* -# afterwards in case we don't saturate we remove single-atom fragments
* -# translate the local ids of the keysets into global ids.
* -# updated order at site file is written
*
* note that all created fragments or rather their describing key sets are
* contained in the Graph Fragmentation::FragmentList.
*
* \param atomids atomic ids (local to Fragmentation::mol) to fragment, used in AtomMask
* \param Order up to how many neighbouring bonds a fragment contains in BondOrderScheme::BottumUp scheme
* \param prefix prefix string for every fragment file name (may include path)
* \param ParsedFragmentList all already created key sets
* \return 1 - continue, 2 - stop (no fragmentation occured)
*/
int Fragmentation::FragmentMolecule(
const std::vector &atomids,
int Order,
const std::string &prefix,
const Graph &ParsedFragmentList,
const bool _ParseStateFile)
{
std::fstream File;
bool CheckOrder = false;
int TotalNumberOfKeySets = 0;
LOG(0, std::endl);
switch (treatment) {
case ExcludeHydrogen:
LOG(1, "INFO: I will treat hydrogen special.");
break;
case IncludeHydrogen:
LOG(1, "INFO: Hydrogen is treated just like the rest of the lot.");
break;
default:
ASSERT(0, "Fragmentation::FragmentMolecule() - there is a HydrogenTreatment setting which I have no idea about.");
break;
}
// ++++++++++++++++++++++++++++ INITIAL STUFF: Bond structure analysis, file parsing, ... ++++++++++++++++++++++++++++++++++++++++++
bool FragmentationToDo = true;
// ===== 1. Check whether bond structure is same as stored in files ====
// create a lookup global <-> local id for atomic ids valid in World and in molecule
Global_local_bimap_t Global_local_bimap;
for (std::vector::const_iterator iter = atomids.begin();
iter != atomids.end();
++iter) {
const atom * _atom = mol->FindAtom(*iter);
ASSERT( _atom != NULL,
"Fragmentation::FragmentMolecule() - could not find atom "+toString(*iter)+".");
Global_local_bimap.insert(
idpair_t(
(global_t)(_atom->getId()), (local_t)(*iter)
)
);
}
// === compare it with adjacency file ===
{
const std::vector globalids(
MapKeyConstIterator(Global_local_bimap.left.begin()),
MapKeyConstIterator(Global_local_bimap.left.end())
);
AdjacencyList WorldAdjacency(globalids);
FragmentationToDo = FragmentationToDo && (FileChecker > WorldAdjacency);
}
// ===== 2. create AtomMask that takes Saturation condition into account
AtomMask_t AtomMask(atomids);
for (molecule::const_iterator iter = const_cast(mol)->begin();
iter != const_cast(mol)->end();
++iter) {
// remove in hydrogen and we do saturate
if ((treatment == ExcludeHydrogen) && ((*iter)->getType()->getAtomicNumber() == 1)) // skip hydrogen
AtomMask.setFalse((*iter)->getNr());
}
// ===== 4. check globally whether there's something to do actually (first adaptivity check)
if (_ParseStateFile)
FragmentationToDo = FragmentationToDo && ParseOrderAtSiteFromFile(atomids, prefix, Global_local_bimap);
// =================================== Begin of FRAGMENTATION ===============================
// ===== 6a. assign each keyset to its respective subgraph =====
ListOfLocalAtoms_t ListOfLocalAtoms;
Graph FragmentList;
AssignKeySetsToFragment(ParsedFragmentList, ListOfLocalAtoms, FragmentList, true);
// ===== 6b. prepare and go into the adaptive (Order<0), single-step (Order==0) or incremental (Order>0) cycle
KeyStack RootStack;
FragmentationToDo = false; // if CheckOrderAtSite just ones recommends fragmentation, we will save fragments afterwards
bool LoopDoneAlready = false;
while ((CheckOrder = CheckOrderAtSite(AtomMask, ParsedFragmentList, Order, prefix, LoopDoneAlready))) {
FragmentationToDo = FragmentationToDo || CheckOrder;
LoopDoneAlready = true; // last plus one entry is used as marker that we have been through this loop once already in CheckOrderAtSite()
// ===== 6b. fill RootStack for each subgraph (second adaptivity check) =====
FillRootStackForSubgraphs(RootStack, AtomMask);
// call BOSSANOVA method
FragmentBOSSANOVA(mol, FragmentList, RootStack);
}
LOG(3, "DEBUG: CheckOrder is " << CheckOrder << ".");
// ==================================== End of FRAGMENTATION ============================================
// if hydrogen is not treated special, we may have single hydrogens and other
// fragments which are note single-determinant. These need to be removed
if (treatment == IncludeHydrogen) {
// remove all single atom fragments from FragmentList
Graph::iterator iter = FragmentList.begin();
while ( iter != FragmentList.end()) {
if ((*iter).first.size() == 1) {
LOG(1, "INFO: Removing KeySet " << (*iter).first << " as is not single-determinant.");
Graph::iterator eraseiter = iter++;
FragmentList.erase(eraseiter);
} else
++iter;
}
}
// ===== 8a. translate list into global numbers (i.e. ones that are valid in "this" molecule, not in MolecularWalker->Leaf)
TranslateIndicesToGlobalIDs(FragmentList, TotalNumberOfKeySets, TotalGraph);
LOG(1, "STATUS: We have created " << TotalGraph.size() << " fragments.");
// store adaptive orders into file
StoreOrderAtSiteFile(prefix);
return ((int)(!FragmentationToDo)+1); // 1 - continue, 2 - stop (no fragmentation occured)
};
/** Performs BOSSANOVA decomposition at selected sites, increasing the cutoff by one at these sites.
* -# constructs a complete keyset of the molecule
* -# In a loop over all possible roots from the given rootstack
* -# increases order of root site
* -# calls PowerSetGenerator with this order, the complete keyset and the rootkeynr
* -# for all consecutive lower levels PowerSetGenerator is called with the suborder, the higher order keyset
as the restricted one and each site in the set as the root)
* -# these are merged into a fragment list of keysets
* -# All fragment lists (for all orders, i.e. from all destination fields) are merged into one list for return
* Important only is that we create all fragments, it is not important if we create them more than once
* as these copies are filtered out via use of the hash table (KeySet).
* \param *out output stream for debugging
* \param Fragment&*List list of already present keystacks (adaptive scheme) or empty list
* \param &RootStack stack with all root candidates (unequal to each atom in complete molecule if adaptive scheme is applied)
* \return pointer to Graph list
*/
void Fragmentation::FragmentBOSSANOVA(molecule *mol, Graph &FragmentList, KeyStack &RootStack)
{
Info FunctionInfo(__func__);
std::vector *FragmentLowerOrdersList = NULL;
size_t NumLevels = 0;
// size_t NumMolecules = 0;
size_t TotalNumMolecules = 0;
int *NumMoleculesOfOrder = NULL;
int Order = 0;
int UpgradeCount = RootStack.size();
KeyStack FragmentRootStack;
int RootKeyNr = 0;
int RootNr = 0;
// FragmentLowerOrdersList is a 2D-array of pointer to vector of molecule objects, one dimension represents the ANOVA expansion of a single order (i.e. 5)
// with all needed lower orders that are subtracted, the other dimension is the BondOrder (i.e. from 1 to 5)
NumMoleculesOfOrder = new int[UpgradeCount];
FragmentLowerOrdersList = new std::vector[UpgradeCount];
for(int i=0;iFindAtom(RootKeyNr);
// check cyclic lengths
//if ((MinimumRingSize[Walker->GetTrueFather()->getNr()] != -1) && (Walker->GetTrueFather()->getAdaptiveOrder()+1 > MinimumRingSize[Walker->GetTrueFather()->getNr()])) {
// LOG(0, "Bond order " << Walker->GetTrueFather()->getAdaptiveOrder() << " of Root " << *Walker << " greater than or equal to Minimum Ring size of " << MinimumRingSize << " found is not allowed.");
//} else
{
// set adaptive order to desired max order
Walker->GetTrueFather()->setAdaptiveOrder(Walker->GetTrueFather()->getMaxOrder());
Order = Walker->GetTrueFather()->getAdaptiveOrder();
Walker->setAdaptiveOrder(Order);
// allocate memory for all lower level orders
NumLevels = Order;
FragmentLowerOrdersList[RootNr].resize(NumLevels, NULL);
for( size_t i=0;igetAdaptiveOrder());
// create top order where nothing is reduced
LOG(0, "==============================================================================================================");
LOG(0, "Creating KeySets up till Bond Order " << Order << " for " << *Walker << ", " << (RootStack.size()-RootNr) << " Roots remaining."); // , NumLevels is " << NumLevels << "
// Create list of Graphs of current Bond Order (i.e. F_{ij})
NumMoleculesOfOrder[RootNr] = PSG(RestrictedKeyset, treatment);
// output resulting number
LOG(1, "INFO: Number of resulting KeySets is: " << NumMoleculesOfOrder[RootNr] << ".");
// if (NumMoleculesOfOrder[RootNr] != 0) {
// NumMolecules = 0;
// }
// now, we have completely filled each cell of FragmentLowerOrdersList[] for the current Walker->getAdaptiveOrder()
//NumMoleculesOfOrder[Walker->getAdaptiveOrder()-1] = NumMolecules;
TotalNumMolecules += NumMoleculesOfOrder[RootNr];
// LOG(1, "Number of resulting molecules for Order " << (int)Walker->GetTrueFather()->getAdaptiveOrder() << " is: " << NumMoleculesOfOrder[RootNr] << ".");
RootStack.push_back(RootKeyNr); // put back on stack
RootNr++;
}
}
LOG(0, "==============================================================================================================");
LOG(0, "\tTotal number of resulting fragments is: " << TotalNumMolecules << ".");
LOG(0, "==============================================================================================================");
// now, FragmentLowerOrdersList is complete, it looks - for BondOrder 5 - as this (number is the ANOVA Order of the terms therein)
// 5433222211111111
// 43221111
// 3211
// 21
// 1
// Subsequently, we combine all into a single list (FragmentList)
CombineAllOrderListIntoOne(FragmentList, FragmentLowerOrdersList, RootStack, mol);
FreeAllOrdersList(FragmentLowerOrdersList, RootStack, mol);
delete[](NumMoleculesOfOrder);
};
/** Estimates by educated guessing (using upper limit) the expected number of fragments.
* The upper limit is
* \f[
* n = N \cdot C^k
* \f]
* where \f$C=2^c\f$ and c is the maximum bond degree over N number of atoms.
* \param *out output stream for debugging
* \param order bond order k
* \return number n of fragments
*/
int Fragmentation::GuesstimateFragmentCount(int order)
{
size_t c = 0;
int FragmentCount;
// get maximum bond degree
for (molecule::const_iterator iter = const_cast(mol)->begin();
iter != const_cast(mol)->end();
++iter) {
const BondList& ListOfBonds = (*iter)->getListOfBonds();
c = (ListOfBonds.size() > c) ? ListOfBonds.size() : c;
}
FragmentCount = (treatment == ExcludeHydrogen ? mol->getNoNonHydrogen() : mol->getAtomCount()) *(1 << (c*order));
LOG(1, "INFO: Upper limit for this subgraph is " << FragmentCount << " for "
<< mol->getNoNonHydrogen() << " non-H atoms with maximum bond degree of " << c << ".");
return FragmentCount;
};
/** Checks whether the OrderAtSite is still below \a Order at some site.
* \param AtomMask defines true/false per global Atom::Nr to mask in/out each nuclear site, used to activate given number of site to increment order adaptively
* \param *GlobalKeySetList list of keysets with global ids (valid in "this" molecule) needed for adaptive increase
* \param Order desired Order if positive, desired exponent in threshold criteria if negative (0 is single-step)
* \param path path to ENERGYPERFRAGMENT file (may be NULL if Order is non-negative)
* \param LoopDoneAlready indicate whether we have done a fragmentation loop already
* \return true - needs further fragmentation, false - does not need fragmentation
*/
bool Fragmentation::CheckOrderAtSite(AtomMask_t &AtomMask, const Graph &GlobalKeySetList, int Order, const std::string &path, bool LoopDoneAlready)
{
bool status = false;
if (Order < 0) { // adaptive increase of BondOrder per site
if (LoopDoneAlready) // break after one step
return false;
// transmorph graph keyset list into indexed KeySetList
AdaptivityMap * IndexKeySetList = GlobalKeySetList.GraphToAdaptivityMap();
// parse the EnergyPerFragment file
IndexKeySetList->ScanAdaptiveFileIntoMap(path); // (Root No., (Value, Order)) !
// then map back onto (Value, (Root Nr., Order)) (i.e. sorted by value to pick the highest ones)
IndexKeySetList->ReMapAdaptiveCriteriaListToValue(mol);
// pick the ones still below threshold and mark as to be adaptively updated
if (IndexKeySetList->IsAdaptiveCriteriaListEmpty()) {
ELOG(2, "Unable to parse file, incrementing all.");
status = true;
} else {
// mark as false all sites that are below threshold already
status = IndexKeySetList->MarkUpdateCandidates(AtomMask, Order, mol);
}
delete[](IndexKeySetList);
} else { // global increase of Bond Order
for(molecule::iterator iter = mol->begin(); iter != mol->end(); ++iter) {
atom * const Walker = *iter;
if (AtomMask.isTrue(Walker->getNr())) { // skip masked out
Walker->setMaxOrder((Order != 0 ? Order : Walker->getMaxOrder()+1));
// remove all that have reached desired order
if (Walker->getAdaptiveOrder() >= Walker->getMaxOrder()) // && (Walker->getAdaptiveOrder() < MinimumRingSize[Walker->getNr()]))
AtomMask.setFalse(Walker->getNr());
else
status = true;
}
}
if ((!Order) && (!LoopDoneAlready)) // single stepping, just check
status = true;
if (!status) {
if (Order == 0)
LOG(1, "INFO: Single stepping done.");
else
LOG(1, "INFO: Order at every site is already equal or above desired order " << Order << ".");
}
}
PrintAtomMask(AtomMask, AtomMask.size()); // for debugging
return status;
};
/** Stores pairs (Atom::Nr, Atom::AdaptiveOrder) into file.
* Atoms not present in the file get "-1".
* \param &path path to file ORDERATSITEFILE
* \return true - file writable, false - not writable
*/
bool Fragmentation::StoreOrderAtSiteFile(
const std::string &path)
{
string line;
ofstream file;
line = path + ORDERATSITEFILE;
file.open(line.c_str(), std::ofstream::out | std::ofstream::app);
std::stringstream output;
output << "INFO: Writing OrderAtSite " << ORDERATSITEFILE << " ... ";
if (file.good()) {
for (molecule::const_iterator iter = const_cast(mol)->begin();
iter != const_cast(mol)->end();
++iter) {
file << (*iter)->getId()
<< "\t" << (int)(*iter)->getAdaptiveOrder()
<< "\t" << (int)(*iter)->getMaxOrder() << std::endl;
}
file.close();
output << "done.";
return true;
} else {
output << "failed to open file " << line << ".";
return false;
}
LOG(1, output.str());
return true;
};
/** Parses pairs(Atom::Nr, Atom::AdaptiveOrder) from file and stores in molecule's Atom's.
* Atoms not present in the file get "0".
* \param atomids atoms to fragment, used in AtomMask
* \param &path path to file ORDERATSITEFILE
* \param global_local_bimap translate global to local id
* \return true - file found and scanned, false - file not found
* \sa ParseKeySetFile() and CheckAdjacencyFileAgainstMolecule() as this is meant to be used in conjunction with the two
*/
bool Fragmentation::ParseOrderAtSiteFromFile(
const std::vector &atomids,
const std::string &path,
const Global_local_bimap_t &global_local_bimap)
{
// Info FunctionInfo(__func__);
typedef unsigned char order_t;
typedef std::map OrderArray_t;
OrderArray_t OrderArray;
AtomMask_t MaxArray(atomids);
bool status;
int AtomNr, ordervalue, maxvalue;
string line;
ifstream file;
line = path + ORDERATSITEFILE;
file.open(line.c_str());
if (file.good()) {
while (!file.eof()) { // parse from file
AtomNr = -1;
file >> AtomNr;
file >> ordervalue;
file >> maxvalue;
if (AtomNr != -1) { // test whether we really parsed something (this is necessary, otherwise last atom is set twice and to 0 on second time)
// parsed id is global, must be translated to local id
Global_local_bimap_t::left_const_iterator iter = global_local_bimap.left.find(AtomNr);
// skip global ids we don't know about, must be in other molecule
if (iter != global_local_bimap.left.end()) {
const int LocalId = iter->second;
OrderArray[LocalId] = ordervalue;
MaxArray.setValue(LocalId, (bool)maxvalue);
//LOG(2, "AtomNr " << LocalId << " with order " << (int)OrderArray[LocalId] << " and max order set to " << (int)MaxArray[LocalId] << ".");
}
}
}
file.close();
// set atom values
for(molecule::iterator iter=mol->begin();iter!=mol->end();++iter){
(*iter)->setAdaptiveOrder(OrderArray[(*iter)->getNr()]);
(*iter)->setMaxOrder(OrderArray[(*iter)->getNr()]); //MaxArray.isTrue((*iter)->getNr());
}
//SetAtomValueToIndexedArray( OrderArray, &atom::getNr(), &atom::AdaptiveOrder );
//SetAtomValueToIndexedArray( MaxArray, &atom::getNr(), &atom::MaxOrder );
status = true;
} else {
ELOG(1, "Failed to open OrdersAtSite file " << line << ".");
status = false;
}
return status;
};
/** Fills the root stack for sites to be used as root in fragmentation depending on order or adaptivity criteria
* Again, as in \sa FillBondStructureFromReference steps recursively through each Leaf in this chain list of molecule's.
* \param &RootStack stack to be filled
* \param AtomMask defines true/false per global Atom::Nr to mask in/out each nuclear site
* \return true - stack is non-empty, fragmentation necessary, false - stack is empty, no more sites to update
*/
void Fragmentation::FillRootStackForSubgraphs(KeyStack &RootStack, const AtomMask_t &AtomMask)
{
for(molecule::const_iterator iter = const_cast(mol)->begin();
iter != const_cast(mol)->end();
++iter) {
const atom * const Father = (*iter)->GetTrueFather();
if (AtomMask.isTrue(Father->getNr())) // apply mask
if ((treatment == IncludeHydrogen) || ((*iter)->getType()->getAtomicNumber() != 1)) // skip hydrogen
RootStack.push_front((*iter)->getNr());
}
}
/** The indices per keyset are compared to the respective father's Atom::Nr in each subgraph and thus put into \a **&FragmentList.
* \param *KeySetList list with all keysets
* \param ListOfLocalAtoms Lookup table for each subgraph and index of each atom in global molecule, may be NULL on start, then it is filled
* \param **&FragmentList list to be allocated and returned
* \param FreeList true - ***ListOfLocalAtoms is free'd before return, false - it is not
* \retuen true - success, false - failure
*/
bool Fragmentation::AssignKeySetsToFragment(const Graph &KeySetList, ListOfLocalAtoms_t &ListOfLocalAtoms, Graph &FragmentList, bool FreeList)
{
// Info FunctionInfo(__func__);
bool status = true;
size_t KeySetCounter = 0;
// fill ListOfLocalAtoms if NULL was given
if (!mol->FillListOfLocalAtoms(ListOfLocalAtoms, mol->getAtomCount())) {
ELOG(1, "Filling of ListOfLocalAtoms failed.");
return false;
}
if (KeySetList.size() != 0) { // if there are some scanned keysets at all
// assign scanned keysets
KeySet TempSet;
for (Graph::const_iterator runner = KeySetList.begin(); runner != KeySetList.end(); runner++) { // key sets contain global numbers!
if (ListOfLocalAtoms[mol->FindAtom(*((*runner).first.begin()))->getNr()] != NULL) {// as we may assume that that bond structure is unchanged, we only test the first key in each set
// translate keyset to local numbers
for (KeySet::iterator sprinter = (*runner).first.begin(); sprinter != (*runner).first.end(); sprinter++)
TempSet.insert(ListOfLocalAtoms[mol->FindAtom(*sprinter)->getNr()]->getNr());
// insert into FragmentList
FragmentList.insert(GraphPair(TempSet, pair (KeySetCounter++, (*runner).second.second)));
}
TempSet.clear();
}
} else
ELOG(2, "KeySetList is NULL or empty.");
if (FreeList) {
// free the index lookup list
ListOfLocalAtoms.clear();
}
return status;
}
/** Translate list into global numbers (i.e. ones that are valid in "this" molecule, not in MolecularWalker->Leaf)
* \param &FragmentList Graph with local numbers per fragment
* \param &TotalNumberOfKeySets global key set counter
* \param &TotalGraph Graph to be filled with global numbers
*/
void Fragmentation::TranslateIndicesToGlobalIDs(Graph &FragmentList, int &TotalNumberOfKeySets, Graph &TotalGraph)
{
// Info FunctionInfo(__func__);
for (Graph::iterator runner = FragmentList.begin(); runner != FragmentList.end(); runner++) {
KeySet TempSet;
for (KeySet::iterator sprinter = (*runner).first.begin(); sprinter != (*runner).first.end(); sprinter++)
TempSet.insert((mol->FindAtom(*sprinter))->GetTrueFather()->getId());
TotalGraph.insert(GraphPair(TempSet, pair (TotalNumberOfKeySets++, (*runner).second.second)));
}
}