/* * Project: MoleCuilder * Description: creates and alters molecular systems * Copyright (C) 2021 Frederik Heber. All rights reserved. * * * This file is part of MoleCuilder. * * MoleCuilder is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * MoleCuilder is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with MoleCuilder. If not, see . */ /* * Graph6Writer.cpp * * Created on: Apr 2, 2021 * Author: heber */ // include config.h #ifdef HAVE_CONFIG_H #include #endif #include "Graph6Writer.hpp" #include "CodePatterns/Assert.hpp" #include "CodePatterns/Log.hpp" #include #include #include #include "Atom/atom.hpp" #include "Descriptors/AtomIdDescriptor.hpp" #include "Element/element.hpp" #include "Graph/BoostGraphCreator.hpp" #include "Graph/BreadthFirstSearchGatherer.hpp" #include "World.hpp" //#include "CodePatterns/MemDebug.hpp" Graph6Writer::Graph6Writer(const std::vector atoms): _atoms(atoms) {} void Graph6Writer::write_n(std::ostream& out) { const unsigned long n = _atoms.size(); if (n<62) { out << ((unsigned char)(n+63)); return; } out << ((unsigned char)126); int num_bytes = 2; if (n> 258047) { out << ((unsigned char)126); num_bytes = 3; } for(int value=num_bytes; value>=0; value--) { unsigned char c = 0; int n_pos = 6*(value+1)-1; for(int c_pos=5; c_pos>=0; n_pos--, c_pos--) { c += (n & (1<>((int)n_pos/6); } out << (c+63); } } /* Given an iterator over the adjacency matrix in the order (0,1),(0,2),(1,2),(0,3),(1,3),(2,3),...,(n-1,n) this writes a graph6 representation to out. */ void Graph6Writer::write_graph6(std::ostream& out) { write_n(out); const unsigned long n = _atoms.size(); unsigned char value = 0; int byte_pos = 5; unsigned int bytes_written = 0; for (size_t j=0; jIsBondedTo(_atoms[j]); LOG(2, "DEBUG: (" << i << "," << j << ") = " << bit << "," << value << " | " << bit << " << " << byte_pos << " = " << (unsigned int)value << " | " << (bit << byte_pos)); value = value | (bit << byte_pos--); if (byte_pos < 0) { LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]"); ASSERT( (value+63) <= 126, "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63) +" and "+toString((unsigned char)126)); out << (unsigned char)(value+63); bytes_written++; value = 0; byte_pos = 5; } } if (byte_pos!=5) { ASSERT( (value+63) <= 126, "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63) +" and "+toString((unsigned char)126)); LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]"); out << (unsigned char)(value+63); bytes_written++; value=0; } ASSERT( value==0, "Graph6Writer::write_graph6() - byte is not null, i.e. chars left to write?"); ASSERT( bytes_written == (unsigned int)ceil(n*(n-1)/12.0f), "Graph6Writer::write_graph6() - unexpected number of bytes written"); } /** * Picks a non-hydrogen from all atoms in the current set of atoms * with lowest non-hydrogen bonds. * * Returns -1 if none could be found. */ atomId_t Graph6Writer::getBoundaryNonHydrogen() const { atomId_t start_atom_id = -1; int lowest_non_hydrogen_count = 16; for(std::vector::const_iterator iter = _atoms.begin(); iter != _atoms.end(); ++iter) { const atom *walker = *iter; if (walker->getElement().getSymbol() != "H") { const BondList& bond_list = walker->getListOfBonds(); int number_of_non_hydrogen_bonds = 0; for (BondList::const_iterator iter = bond_list.begin(); iter != bond_list.end(); ++iter) { number_of_non_hydrogen_bonds += (*iter)->GetOtherAtom(walker)->getElement().getSymbol() != "H"; } if (lowest_non_hydrogen_count > number_of_non_hydrogen_bonds) { start_atom_id = walker->getId(); lowest_non_hydrogen_count = number_of_non_hydrogen_bonds; } } } if ((start_atom_id == -1) && (!_atoms.empty())) { // we only have hydrogens, just pick the first start_atom_id = (*_atoms.begin())->getId(); } return start_atom_id; } bool OnlyNonHydrogens(const bond &_bond) { return _bond.HydrogenBond == 0; } void Graph6Writer::write_simple_elementlist(std::ostream& out) { bool isFirst = true; for(std::vector::const_iterator iter = _atoms.begin(); iter != _atoms.end(); ++iter) { const atom *walker = *iter; if (walker->getElement().getAtomicNumber() != (atomicNumber_t)1) { if (!isFirst) out << ' '; isFirst = false; out << walker->getElement().getSymbol(); } } } void Graph6Writer::write_elementlist(std::ostream& out) { /** Execute a Breadth-First Search discovery from one terminal atom (e.g., * pick random hydrogen and then it's bond-neighbor if it is non-hydrogen). * Then return the element list in that ordering. * * The graph6 string does not account for the inherent graph symmetries * (e.g., BW having 123<->321 but not 123<->132 symmetry). */ const World& world = World::getConstInstance(); // pick bond neighbor of a hydrogen atom atomId_t start_atom_id = getBoundaryNonHydrogen(); if (start_atom_id == (unsigned int)-1) { // fall back to first atom in list start_atom_id = _atoms.front()->getId(); } const atom* start_atom = world.getAtom(AtomById(start_atom_id)); LOG(1, "INFO: Start atom is " << *start_atom << "."); // do an unlimited BFS and get set of nodes, ordered by discovery level BoostGraphCreator graphCreator; graphCreator.createFromAtoms(_atoms, OnlyNonHydrogens); BreadthFirstSearchGatherer gatherer(graphCreator); gatherer(start_atom_id); // go through distance map and print sorted by discovery level const BreadthFirstSearchGatherer::distance_map_t &distances = gatherer.getDistances(); using pairtype = std::pair; const size_t max_distance = std::max_element(distances.begin(), distances.end(), [] (const pairtype & p1, const pairtype & p2) { return p1.second < p2.second; })->second; bool isFirst = true; /** * This is O(N^2) and a stupid implementation. However, we only intend to * use this for small molecules, so I don't care at the moment. The better * approach is to revert the map into a multimap and then traverse that. */ for (size_t i=0; i<= max_distance; ++i) { for (BreadthFirstSearchGatherer::distance_map_t::const_iterator iter = distances.begin(); iter != distances.end(); ++iter) { if (iter->second != i) continue; const atom* walker = world.getAtom(AtomById(iter->first)); assert(walker != NULL); LOG(1, "INFO: Gathered atom " << *walker); if (!isFirst) out << ' '; isFirst = false; out << walker->getElement().getSymbol(); } } }