/* * Project: MoleCuilder * Description: creates and alters molecular systems * Copyright (C) 2010-2012 University of Bonn. All rights reserved. * Please see the LICENSE file or "Copyright notice" in builder.cpp for details. */ /* * TremoloParser.cpp * * Created on: Mar 2, 2010 * Author: metzler */ // include config.h #ifdef HAVE_CONFIG_H #include #endif #include "CodePatterns/MemDebug.hpp" #include "CodePatterns/Assert.hpp" #include "CodePatterns/Log.hpp" #include "CodePatterns/toString.hpp" #include "CodePatterns/Verbose.hpp" #include "TremoloParser.hpp" #include "Atom/atom.hpp" #include "Bond/bond.hpp" #include "Box.hpp" #include "Descriptors/AtomIdDescriptor.hpp" #include "Element/element.hpp" #include "Element/periodentafel.hpp" #include "LinearAlgebra/RealSpaceMatrix.hpp" #include "molecule.hpp" #include "MoleculeListClass.hpp" #include "World.hpp" #include "WorldTime.hpp" #include #include #include #include #include #include #include #include // declare specialized static variables const std::string FormatParserTrait::name = "tremolo"; const std::string FormatParserTrait::suffix = "data"; const ParserTypes FormatParserTrait::type = tremolo; /** * Constructor. */ FormatParser< tremolo >::FormatParser() : FormatParser_common(NULL) { knownKeys["x"] = TremoloKey::x; knownKeys["u"] = TremoloKey::u; knownKeys["F"] = TremoloKey::F; knownKeys["stress"] = TremoloKey::stress; knownKeys["Id"] = TremoloKey::Id; knownKeys["neighbors"] = TremoloKey::neighbors; knownKeys["imprData"] = TremoloKey::imprData; knownKeys["GroupMeasureTypeNo"] = TremoloKey::GroupMeasureTypeNo; knownKeys["type"] = TremoloKey::type; knownKeys["extType"] = TremoloKey::extType; knownKeys["name"] = TremoloKey::name; knownKeys["resName"] = TremoloKey::resName; knownKeys["chainID"] = TremoloKey::chainID; knownKeys["resSeq"] = TremoloKey::resSeq; knownKeys["occupancy"] = TremoloKey::occupancy; knownKeys["tempFactor"] = TremoloKey::tempFactor; knownKeys["segID"] = TremoloKey::segID; knownKeys["Charge"] = TremoloKey::Charge; knownKeys["charge"] = TremoloKey::charge; knownKeys["GrpTypeNo"] = TremoloKey::GrpTypeNo; knownKeys["torsion"] = TremoloKey::torsion; createKnownTypesByIdentity(); // default behavior: use all possible keys on output for (std::map::iterator iter = knownKeys.begin(); iter != knownKeys.end(); ++iter) usedFields.push_back(iter->first); // and noKey afterwards(!) such that it is not used in usedFields knownKeys[" "] = TremoloKey::noKey; // with this we can detect invalid keys // invert knownKeys for debug output for (std::map::iterator iter = knownKeys.begin(); iter != knownKeys.end(); ++iter) knownKeyNames.insert( make_pair( iter->second, iter->first) ); additionalAtomData.clear(); } /** * Destructor. */ FormatParser< tremolo >::~FormatParser() { LOG(1, "INFO: Clearing usedFields."); usedFields.clear(); additionalAtomData.clear(); knownKeys.clear(); } /** * Loads atoms from a tremolo-formatted file. * * \param tremolo file */ void FormatParser< tremolo >::load(istream* file) { std::string line; std::string::size_type location; // reset the id maps resetIdAssociations(); LOG(1, "INFO: Clearing usedFields."); usedFields.clear(); molecule *newmol = World::getInstance().createMolecule(); newmol->ActiveFlag = true; // TODO: Remove the insertion into molecule when saving does not depend on them anymore. Also, remove molecule.hpp include World::getInstance().getMolecules()->insert(newmol); while (file->good()) { std::getline(*file, line, '\n'); if (usedFields.empty()) { location = line.find("ATOMDATA", 0); if (location != string::npos) { parseAtomDataKeysLine(line, location + 8); } } if (line.length() > 0 && line.at(0) != '#') { readAtomDataLine(line, newmol); } } LOG(3, "usedFields after load contains: " << usedFields); // refresh atom::nr and atom::name std::vector atoms(newmol->getAtomCount()); std::transform(newmol->begin(), newmol->end(), atoms.begin(), boost::bind(&atom::getId, _1)); processNeighborInformation(atoms); adaptImprData(); adaptTorsion(); } /** * Saves the \a atoms into as a tremolo file. * * \param file where to save the state * \param atoms atoms to store */ void FormatParser< tremolo >::save(ostream* file, const std::vector &AtomList) { LOG(0, "Saving changes to tremolo."); std::vector::const_iterator atomIt; /*vector::iterator it;*/ std::vector::iterator it = unique(usedFields.begin(), usedFields.end()); // skips all duplicates in the vector LOG(3, "INFO: usedFields before save contains: " << usedFields); //LOG(3, "INFO: additionalAtomData contains: " << additionalAtomData); // distribute continuous indices resetIdAssociations(); atomId_t lastid = 0; for (atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) { associateLocaltoGlobalId(++lastid, (*atomIt)->getId()); } // store Atomdata line *file << "# ATOMDATA"; for (it=usedFields.begin(); it < usedFields.end(); it++) { *file << "\t" << *it; } *file << endl; // store Box info *file << "# Box"; const RealSpaceMatrix &M = World::getInstance().getDomain().getM(); for (size_t i=0; i::AtomInserted(atomId_t id) { std::map::iterator iter = additionalAtomData.find(id); ASSERT(iter == additionalAtomData.end(), "FormatParser< tremolo >::AtomInserted() - additionalAtomData already present for newly added atom " +toString(id)+"."); // don't add entry, as this gives a default resSeq of 0 not the molecule id // additionalAtomData.insert( std::make_pair(id, TremoloAtomInfoContainer()) ); } /** Remove additional AtomData info, when atom has been removed from World. * * @param id of atom */ void FormatParser< tremolo >::AtomRemoved(atomId_t id) { std::map::iterator iter = additionalAtomData.find(id); // as we do not insert AtomData on AtomInserted, we cannot be assured of its presence // ASSERT(iter != additionalAtomData.end(), // "FormatParser< tremolo >::AtomRemoved() - additionalAtomData is not present for atom " // +toString(id)+" to remove."); if (iter != additionalAtomData.end()) additionalAtomData.erase(iter); } /** * Sets the keys for which data should be written to the stream when save is * called. * * \param string of field names with the same syntax as for an ATOMDATA line * but without the prexix "ATOMDATA" */ void FormatParser< tremolo >::setFieldsForSave(std::string atomDataLine) { parseAtomDataKeysLine(atomDataLine, 0); } /** * Writes one line of tremolo-formatted data to the provided stream. * * \param stream where to write the line to * \param reference to the atom of which information should be written */ void FormatParser< tremolo >::saveLine(ostream* file, atom* currentAtom) { std::vector::iterator it; TremoloKey::atomDataKey currentField; LOG(4, "INFO: Saving atom " << *currentAtom << ", its father id is " << currentAtom->GetTrueFather()->getId()); for (it = usedFields.begin(); it != usedFields.end(); it++) { currentField = knownKeys[it->substr(0, it->find("="))]; switch (currentField) { case TremoloKey::x : // for the moment, assume there are always three dimensions LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << currentAtom->getPosition()); *file << currentAtom->at(0) << "\t"; *file << currentAtom->at(1) << "\t"; *file << currentAtom->at(2) << "\t"; break; case TremoloKey::u : // for the moment, assume there are always three dimensions LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << currentAtom->getAtomicVelocity()); *file << currentAtom->getAtomicVelocity()[0] << "\t"; *file << currentAtom->getAtomicVelocity()[1] << "\t"; *file << currentAtom->getAtomicVelocity()[2] << "\t"; break; case TremoloKey::type : if (additionalAtomData.count(currentAtom->getId())) { if (additionalAtomData[currentAtom->getId()].get(currentField) != "-") { LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << additionalAtomData[currentAtom->getId()].get(currentField)); *file << additionalAtomData[currentAtom->getId()].get(currentField) << "\t"; } else { LOG(3, "Writing for type " << knownKeyNames[currentField] << " default value: " << currentAtom->getType()->getSymbol()); *file << currentAtom->getType()->getSymbol() << "\t"; } } else if (additionalAtomData.count(currentAtom->GetTrueFather()->getId())) { if (additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField) != "-") { LOG(3, "Writing for type " << knownKeyNames[currentField] << " stuff from father: " << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField)); *file << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField) << "\t"; } else { LOG(3, "Writing for type " << knownKeyNames[currentField] << " default value from father: " << currentAtom->GetTrueFather()->getType()->getSymbol()); *file << currentAtom->GetTrueFather()->getType()->getSymbol() << "\t"; } } else { LOG(3, "Writing for type " << knownKeyNames[currentField] << " its default value: " << currentAtom->getType()->getSymbol()); *file << currentAtom->getType()->getSymbol() << "\t"; } break; case TremoloKey::Id : LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << currentAtom->getId()+1); *file << getLocalId(currentAtom->getId()) << "\t"; break; case TremoloKey::neighbors : LOG(3, "Writing type " << knownKeyNames[currentField]); writeNeighbors(file, atoi(it->substr(it->find("=") + 1, 1).c_str()), currentAtom); break; case TremoloKey::resSeq : if (additionalAtomData.count(currentAtom->getId())) { LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << additionalAtomData[currentAtom->getId()].get(currentField)); *file << additionalAtomData[currentAtom->getId()].get(currentField); } else if (currentAtom->getMolecule() != NULL) { LOG(3, "Writing for type " << knownKeyNames[currentField] << " its own id: " << currentAtom->getMolecule()->getId()+1); *file << setw(4) << currentAtom->getMolecule()->getId()+1; } else { LOG(3, "Writing for type " << knownKeyNames[currentField] << " default value: " << defaultAdditionalData.get(currentField)); *file << defaultAdditionalData.get(currentField); } *file << "\t"; break; case TremoloKey::charge : if (currentAtom->getCharge() == 0.) { if (additionalAtomData.count(currentAtom->getId())) { LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << additionalAtomData[currentAtom->getId()].get(currentField)); *file << additionalAtomData[currentAtom->getId()].get(currentField); } else if (additionalAtomData.count(currentAtom->GetTrueFather()->getId())) { LOG(3, "Writing for type " << knownKeyNames[currentField] << " stuff from father: " << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField)); *file << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField); } else { LOG(3, "Writing for type " << knownKeyNames[currentField] << " AtomInfo::charge : " << currentAtom->getCharge()); *file << currentAtom->getCharge(); } } else { LOG(3, "Writing for type " << knownKeyNames[currentField] << " AtomInfo::charge : " << currentAtom->getCharge()); *file << currentAtom->getCharge(); } *file << "\t"; break; default : if (additionalAtomData.count(currentAtom->getId())) { LOG(3, "Writing for type " << knownKeyNames[currentField] << ": " << additionalAtomData[currentAtom->getId()].get(currentField)); *file << additionalAtomData[currentAtom->getId()].get(currentField); } else if (additionalAtomData.count(currentAtom->GetTrueFather()->getId())) { LOG(3, "Writing for type " << knownKeyNames[currentField] << " stuff from father: " << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField)); *file << additionalAtomData[currentAtom->GetTrueFather()->getId()].get(currentField); } else { LOG(3, "Writing for type " << knownKeyNames[currentField] << " the default: " << defaultAdditionalData.get(currentField)); *file << defaultAdditionalData.get(currentField); } *file << "\t"; break; } } *file << endl; } /** * Writes the neighbor information of one atom to the provided stream. * * Note that ListOfBonds of WorldTime::CurrentTime is used. * * \param stream where to write neighbor information to * \param number of neighbors * \param reference to the atom of which to take the neighbor information */ void FormatParser< tremolo >::writeNeighbors(ostream* file, int numberOfNeighbors, atom* currentAtom) { const BondList& ListOfBonds = currentAtom->getListOfBonds(); // sort bonded indices typedef std::set sortedIndices; sortedIndices sortedBonds; for (BondList::const_iterator iter = ListOfBonds.begin(); iter != ListOfBonds.end(); ++iter) sortedBonds.insert(getLocalId((*iter)->GetOtherAtom(currentAtom)->getId())); // print indices sortedIndices::const_iterator currentBond = sortedBonds.begin(); for (int i = 0; i < numberOfNeighbors; i++) { *file << (currentBond != sortedBonds.end() ? (*currentBond) : 0) << "\t"; if (currentBond != sortedBonds.end()) ++currentBond; } } /** * Stores keys from the ATOMDATA line. * * \param line to parse the keys from * \param with which offset the keys begin within the line */ void FormatParser< tremolo >::parseAtomDataKeysLine(std::string line, int offset) { std::string keyword; std::stringstream lineStream; lineStream << line.substr(offset); LOG(1, "INFO: Clearing usedFields in parseAtomDataKeysLine."); usedFields.clear(); while (lineStream.good()) { lineStream >> keyword; if (knownKeys[keyword.substr(0, keyword.find("="))] == TremoloKey::noKey) { // TODO: throw exception about unknown key cout << "Unknown key: " << keyword << " is not part of the tremolo format specification." << endl; break; } usedFields.push_back(keyword); } //LOG(1, "INFO: " << usedFields); } /** Sets the properties per atom to print to .data file by parsing line from * \a atomdata_string. * * We just call \sa FormatParser< tremolo >::parseAtomDataKeysLine() which is left * private., * * @param atomdata_string line to parse with space-separated values */ void FormatParser< tremolo >::setAtomData(const std::string &atomdata_string) { parseAtomDataKeysLine(atomdata_string, 0); } /** * Reads one data line of a tremolo file and interprets it according to the keys * obtained from the ATOMDATA line. * * \param line to parse as an atom * \param *newmol molecule to add atom to */ void FormatParser< tremolo >::readAtomDataLine(std::string line, molecule *newmol = NULL) { std::vector::iterator it; std::stringstream lineStream; atom* newAtom = World::getInstance().createAtom(); const atomId_t atomid = newAtom->getId(); additionalAtomData[atomid] = TremoloAtomInfoContainer(); // fill with default values TremoloAtomInfoContainer *atomInfo = &additionalAtomData[atomid]; TremoloKey::atomDataKey currentField; ConvertTo toDouble; ConvertTo toInt; Vector tempVector; // setup tokenizer, splitting up white-spaced entries typedef boost::tokenizer > tokenizer; boost::char_separator whitespacesep(" \t"); tokenizer tokens(line, whitespacesep); ASSERT(tokens.begin() != tokens.end(), "FormatParser< tremolo >::readAtomDataLine - empty string, need at least ' '!"); tokenizer::iterator tok_iter = tokens.begin(); // then associate each token to each file for (it = usedFields.begin(); it < usedFields.end(); it++) { const std::string keyName = it->substr(0, it->find("=")); currentField = knownKeys[keyName]; const std::string word = *tok_iter; LOG(4, "INFO: Parsing key " << keyName << " with remaining data " << word); switch (currentField) { case TremoloKey::x : // for the moment, assume there are always three dimensions for (int i=0;i::readAtomDataLine() - no value for x["+toString(i)+"]!"); LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter); newAtom->set(i, toDouble(*tok_iter)); tok_iter++; } break; case TremoloKey::u : // for the moment, assume there are always three dimensions for (int i=0;i::readAtomDataLine() - no value for u["+toString(i)+"]!"); LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter); tempVector[i] = toDouble(*tok_iter); tok_iter++; } newAtom->setAtomicVelocity(tempVector); break; case TremoloKey::type : { ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for "+keyName+"!"); LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter); std::string element; try { element = knownTypes.getType(*tok_iter); } catch(IllegalParserKeyException) { // clean up World::getInstance().destroyAtom(newAtom); // give an error ELOG(0, "TremoloParser: I do not understand the element token " << *tok_iter << "."); } // put type name into container for later use atomInfo->set(currentField, *tok_iter); LOG(4, "INFO: Parsing element " << (*tok_iter) << " as " << element << " according to KnownTypes."); tok_iter++; newAtom->setType(World::getInstance().getPeriode()->FindElement(element)); ASSERT(newAtom->getType(), "Type was not set for this atom"); break; } case TremoloKey::Id : ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for "+keyName+"!"); LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter); associateLocaltoGlobalId(toInt(*tok_iter), atomid); tok_iter++; break; case TremoloKey::neighbors : for (int i=0;isubstr(it->find("=") + 1, 1).c_str());i++) { ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for "+keyName+"!"); LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter); lineStream << *tok_iter << "\t"; tok_iter++; } readNeighbors(&lineStream, atoi(it->substr(it->find("=") + 1, 1).c_str()), atomid); break; case TremoloKey::charge : ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for "+keyName+"!"); LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter); atomInfo->set(currentField, *tok_iter); newAtom->setCharge(boost::lexical_cast(*tok_iter)); tok_iter++; break; default : ASSERT(tok_iter != tokens.end(), "FormatParser< tremolo >::readAtomDataLine() - no value for "+keyName+"!"); LOG(4, "INFO: Parsing key " << keyName << " with next token " << *tok_iter); atomInfo->set(currentField, *tok_iter); tok_iter++; break; } } LOG(3, "INFO: Parsed atom " << atomid << "."); if (newmol != NULL) newmol->AddAtom(newAtom); } /** * Reads neighbor information for one atom from the input. * * \param line stream where to read the information from * \param numberOfNeighbors number of neighbors to read * \param atomid world id of the atom the information belongs to */ void FormatParser< tremolo >::readNeighbors(std::stringstream* line, int numberOfNeighbors, int atomId) { int neighborId = 0; for (int i = 0; i < numberOfNeighbors; i++) { *line >> neighborId; // 0 is used to fill empty neighbor positions in the tremolo file. if (neighborId > 0) { LOG(4, "INFO: Atom with global id " << atomId << " has neighbour with serial " << neighborId); additionalAtomData[atomId].neighbors.push_back(neighborId); } } } /** * Checks whether the provided name is within the list of used fields. * * \param field name to check * * \return true if the field name is used */ bool FormatParser< tremolo >::isUsedField(std::string fieldName) { bool fieldNameExists = false; for (std::vector::iterator usedField = usedFields.begin(); usedField != usedFields.end(); usedField++) { if (usedField->substr(0, usedField->find("=")) == fieldName) fieldNameExists = true; } return fieldNameExists; } /** * Adds the collected neighbor information to the atoms in the world. The atoms * are found by their current ID and mapped to the corresponding atoms with the * Id found in the parsed file. * * @param atoms vector with all newly added (global) atomic ids */ void FormatParser< tremolo >::processNeighborInformation(const std::vector &atoms) { if (!isUsedField("neighbors")) { return; } for (std::vector::const_iterator iter = atoms.begin(); iter != atoms.end(); ++iter) { ASSERT(additionalAtomData.count(*iter) != 0, "FormatParser< tremolo >::processNeighborInformation() - global id " +toString(*iter)+" unknown in additionalAtomData."); TremoloAtomInfoContainer ¤tInfo = additionalAtomData[*iter]; ASSERT (!currentInfo.neighbors_processed, "FormatParser< tremolo >::processNeighborInformation() - neighbors of new atom " +toString(*iter)+" are already processed."); for(std::vector::const_iterator neighbor = currentInfo.neighbors.begin(); neighbor != currentInfo.neighbors.end(); neighbor++ ) { LOG(3, "INFO: Creating bond between (" << *iter << ") and (" << getGlobalId(*neighbor) << "|" << *neighbor << ")"); ASSERT(getGlobalId(*neighbor) != -1, "FormatParser< tremolo >::processNeighborInformation() - global id to local id " +toString(*neighbor)+" is unknown."); World::getInstance().getAtom(AtomById(*iter)) ->addBond(WorldTime::getTime(), World::getInstance().getAtom(AtomById(getGlobalId(*neighbor)))); } currentInfo.neighbors_processed = true; } } /** * Replaces atom IDs read from the file by the corresponding world IDs. All IDs * IDs of the input string will be replaced; expected separating characters are * "-" and ",". * * \param string in which atom IDs should be adapted * * \return input string with modified atom IDs */ std::string FormatParser< tremolo >::adaptIdDependentDataString(std::string data) { // there might be no IDs if (data == "-") { return "-"; } char separator; int id; std::stringstream line, result; line << data; line >> id; result << getGlobalId(id); while (line.good()) { line >> separator >> id; result << separator << getGlobalId(id); } return result.str(); } /** * Corrects the atom IDs in each imprData entry to the corresponding world IDs * as they might differ from the originally read IDs. */ void FormatParser< tremolo >::adaptImprData() { if (!isUsedField("imprData")) { return; } for(std::map::iterator currentInfo = additionalAtomData.begin(); currentInfo != additionalAtomData.end(); currentInfo++ ) { currentInfo->second.imprData = adaptIdDependentDataString(currentInfo->second.imprData); } } /** * Corrects the atom IDs in each torsion entry to the corresponding world IDs * as they might differ from the originally read IDs. */ void FormatParser< tremolo >::adaptTorsion() { if (!isUsedField("torsion")) { return; } for(std::map::iterator currentInfo = additionalAtomData.begin(); currentInfo != additionalAtomData.end(); currentInfo++ ) { currentInfo->second.torsion = adaptIdDependentDataString(currentInfo->second.torsion); } }