| 1 | /* | 
|---|
| 2 | * Project: MoleCuilder | 
|---|
| 3 | * Description: creates and alters molecular systems | 
|---|
| 4 | * Copyright (C)  2021 Frederik Heber. All rights reserved. | 
|---|
| 5 | * | 
|---|
| 6 | * | 
|---|
| 7 | *   This file is part of MoleCuilder. | 
|---|
| 8 | * | 
|---|
| 9 | *    MoleCuilder is free software: you can redistribute it and/or modify | 
|---|
| 10 | *    it under the terms of the GNU General Public License as published by | 
|---|
| 11 | *    the Free Software Foundation, either version 2 of the License, or | 
|---|
| 12 | *    (at your option) any later version. | 
|---|
| 13 | * | 
|---|
| 14 | *    MoleCuilder is distributed in the hope that it will be useful, | 
|---|
| 15 | *    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
| 16 | *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
| 17 | *    GNU General Public License for more details. | 
|---|
| 18 | * | 
|---|
| 19 | *    You should have received a copy of the GNU General Public License | 
|---|
| 20 | *    along with MoleCuilder.  If not, see <http://www.gnu.org/licenses/>. | 
|---|
| 21 | */ | 
|---|
| 22 |  | 
|---|
| 23 | /* | 
|---|
| 24 | * Graph6Writer.cpp | 
|---|
| 25 | * | 
|---|
| 26 | *  Created on: Apr 2, 2021 | 
|---|
| 27 | *      Author: heber | 
|---|
| 28 | */ | 
|---|
| 29 |  | 
|---|
| 30 |  | 
|---|
| 31 | // include config.h | 
|---|
| 32 | #ifdef HAVE_CONFIG_H | 
|---|
| 33 | #include <config.h> | 
|---|
| 34 | #endif | 
|---|
| 35 |  | 
|---|
| 36 | #include "Graph6Writer.hpp" | 
|---|
| 37 |  | 
|---|
| 38 | #include "CodePatterns/Assert.hpp" | 
|---|
| 39 | #include "CodePatterns/Log.hpp" | 
|---|
| 40 |  | 
|---|
| 41 | #include <cassert> | 
|---|
| 42 | #include <cmath> | 
|---|
| 43 | #include <iostream> | 
|---|
| 44 |  | 
|---|
| 45 | #include "Atom/atom.hpp" | 
|---|
| 46 | #include "Descriptors/AtomIdDescriptor.hpp" | 
|---|
| 47 | #include "Element/element.hpp" | 
|---|
| 48 | #include "Graph/BoostGraphCreator.hpp" | 
|---|
| 49 | #include "Graph/BreadthFirstSearchGatherer.hpp" | 
|---|
| 50 | #include "World.hpp" | 
|---|
| 51 |  | 
|---|
| 52 | //#include "CodePatterns/MemDebug.hpp" | 
|---|
| 53 |  | 
|---|
| 54 | Graph6Writer::Graph6Writer(const std::vector<const atom *> atoms): | 
|---|
| 55 | _atoms(atoms) | 
|---|
| 56 | {} | 
|---|
| 57 |  | 
|---|
| 58 | void Graph6Writer::write_n(std::ostream& out) { | 
|---|
| 59 | const unsigned long n = _atoms.size(); | 
|---|
| 60 |  | 
|---|
| 61 | if (n<62) { | 
|---|
| 62 | out << ((unsigned char)(n+63)); | 
|---|
| 63 | return; | 
|---|
| 64 | } | 
|---|
| 65 |  | 
|---|
| 66 | out << ((unsigned char)126); | 
|---|
| 67 | int num_bytes = 2; | 
|---|
| 68 | if (n> 258047) { | 
|---|
| 69 | out << ((unsigned char)126); | 
|---|
| 70 | num_bytes = 3; | 
|---|
| 71 | } | 
|---|
| 72 | for(int value=num_bytes; value>=0; value--) { | 
|---|
| 73 | unsigned char c = 0; | 
|---|
| 74 | int n_pos = 6*(value+1)-1; | 
|---|
| 75 | for(int c_pos=5; c_pos>=0; n_pos--, c_pos--) { | 
|---|
| 76 | c += (n & (1<<n_pos))>>((int)n_pos/6); | 
|---|
| 77 | } | 
|---|
| 78 | out << (c+63); | 
|---|
| 79 | } | 
|---|
| 80 |  | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | /* Given an iterator over the adjacency matrix in the order (0,1),(0,2),(1,2),(0,3),(1,3),(2,3),...,(n-1,n) | 
|---|
| 84 | this writes a graph6 representation to out. */ | 
|---|
| 85 | void Graph6Writer::write_graph6(std::ostream& out) { | 
|---|
| 86 | write_n(out); | 
|---|
| 87 |  | 
|---|
| 88 | const unsigned long n = _atoms.size(); | 
|---|
| 89 |  | 
|---|
| 90 | unsigned char value = 0; | 
|---|
| 91 | int byte_pos = 5; | 
|---|
| 92 | unsigned int bytes_written = 0; | 
|---|
| 93 | for (size_t j=0; j<n; ++j) | 
|---|
| 94 | for (size_t i=0; i<j; ++i) { | 
|---|
| 95 | // std::cout << "\t\n" << (int)value << " " << byte_pos << std::endl; | 
|---|
| 96 |  | 
|---|
| 97 | unsigned int bit = _atoms[i]->IsBondedTo(_atoms[j]); | 
|---|
| 98 | LOG(2, "DEBUG: (" << i << "," << j << ") = " << bit << "," << value << " | " << bit << " << " << byte_pos << " = " << (unsigned int)value << " | " << (bit << byte_pos)); | 
|---|
| 99 | value = value | (bit << byte_pos--); | 
|---|
| 100 | if (byte_pos < 0) { | 
|---|
| 101 | LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]"); | 
|---|
| 102 | ASSERT( (value+63) <= 126, | 
|---|
| 103 | "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63) | 
|---|
| 104 | +" and "+toString((unsigned char)126)); | 
|---|
| 105 | out << (unsigned char)(value+63); | 
|---|
| 106 | bytes_written++; | 
|---|
| 107 | value = 0; | 
|---|
| 108 | byte_pos = 5; | 
|---|
| 109 | } | 
|---|
| 110 | } | 
|---|
| 111 | if (byte_pos!=5) { | 
|---|
| 112 | ASSERT( (value+63) <= 126, | 
|---|
| 113 | "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63) | 
|---|
| 114 | +" and "+toString((unsigned char)126)); | 
|---|
| 115 | LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]"); | 
|---|
| 116 | out << (unsigned char)(value+63); | 
|---|
| 117 | bytes_written++; | 
|---|
| 118 | value=0; | 
|---|
| 119 | } | 
|---|
| 120 | ASSERT( value==0, | 
|---|
| 121 | "Graph6Writer::write_graph6() - byte is not null, i.e. chars left to write?"); | 
|---|
| 122 | ASSERT( bytes_written == (unsigned int)ceil(n*(n-1)/12.0f), | 
|---|
| 123 | "Graph6Writer::write_graph6() - unexpected number of bytes written"); | 
|---|
| 124 | } | 
|---|
| 125 |  | 
|---|
| 126 | /** | 
|---|
| 127 | * Picks a non-hydrogen from all atoms in the current set of atoms | 
|---|
| 128 | * with lowest non-hydrogen bonds. | 
|---|
| 129 | * | 
|---|
| 130 | * Returns -1 if none could be found. | 
|---|
| 131 | */ | 
|---|
| 132 | atomId_t Graph6Writer::getBoundaryNonHydrogen() const { | 
|---|
| 133 | atomId_t start_atom_id = -1; | 
|---|
| 134 | int lowest_non_hydrogen_count = 16; | 
|---|
| 135 | for(std::vector<const atom *>::const_iterator iter = _atoms.begin(); | 
|---|
| 136 | iter != _atoms.end(); ++iter) { | 
|---|
| 137 | const atom *walker = *iter; | 
|---|
| 138 | if (walker->getElement().getSymbol() != "H") { | 
|---|
| 139 | const BondList& bond_list = walker->getListOfBonds(); | 
|---|
| 140 | int number_of_non_hydrogen_bonds = 0; | 
|---|
| 141 | for (BondList::const_iterator iter = bond_list.begin(); | 
|---|
| 142 | iter != bond_list.end(); ++iter) { | 
|---|
| 143 | number_of_non_hydrogen_bonds += (*iter)->GetOtherAtom(walker)->getElement().getSymbol() != "H"; | 
|---|
| 144 | } | 
|---|
| 145 | if (lowest_non_hydrogen_count > number_of_non_hydrogen_bonds) { | 
|---|
| 146 | start_atom_id = walker->getId(); | 
|---|
| 147 | lowest_non_hydrogen_count = number_of_non_hydrogen_bonds; | 
|---|
| 148 | } | 
|---|
| 149 | } | 
|---|
| 150 | } | 
|---|
| 151 | if ((start_atom_id == -1) && (!_atoms.empty())) { | 
|---|
| 152 | // we only have hydrogens, just pick the first | 
|---|
| 153 | start_atom_id = (*_atoms.begin())->getId(); | 
|---|
| 154 | } | 
|---|
| 155 | return start_atom_id; | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | bool OnlyNonHydrogens(const bond &_bond) { | 
|---|
| 159 | return _bond.HydrogenBond == 0; | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | void Graph6Writer::write_elementlist(std::ostream& out) { | 
|---|
| 163 | /** Execute a Breadth-First Search discovery from one terminal atom (e.g., | 
|---|
| 164 | * pick random hydrogen and then it's bond-neighbor if it is non-hydrogen). | 
|---|
| 165 | * Then return the element list in that ordering. | 
|---|
| 166 | * | 
|---|
| 167 | * The graph6 string does not account for the inherent graph symmetries | 
|---|
| 168 | * (e.g., BW having 123<->321 but not 123<->132 symmetry). | 
|---|
| 169 | */ | 
|---|
| 170 | const World& world = World::getConstInstance(); | 
|---|
| 171 | // pick bond neighbor of a hydrogen atom | 
|---|
| 172 | atomId_t start_atom_id = getBoundaryNonHydrogen(); | 
|---|
| 173 | if (start_atom_id == (unsigned int)-1) { | 
|---|
| 174 | // fall back to first atom in list | 
|---|
| 175 | start_atom_id = _atoms.front()->getId(); | 
|---|
| 176 | } | 
|---|
| 177 | const atom* start_atom = world.getAtom(AtomById(start_atom_id)); | 
|---|
| 178 | LOG(1, "INFO: Start atom is " << *start_atom << "."); | 
|---|
| 179 |  | 
|---|
| 180 | // do an unlimited BFS and get set of nodes, ordered by discovery level | 
|---|
| 181 | BoostGraphCreator graphCreator; | 
|---|
| 182 | graphCreator.createFromAtoms(_atoms, OnlyNonHydrogens); | 
|---|
| 183 | BreadthFirstSearchGatherer gatherer(graphCreator); | 
|---|
| 184 | gatherer(start_atom_id); | 
|---|
| 185 |  | 
|---|
| 186 | // go through distance map and print sorted by discovery level | 
|---|
| 187 | const BreadthFirstSearchGatherer::distance_map_t &distances = gatherer.getDistances(); | 
|---|
| 188 | using pairtype = std::pair<atomId_t, size_t>; | 
|---|
| 189 | const size_t max_distance = std::max_element(distances.begin(), distances.end(), [] (const pairtype & p1, const pairtype & p2) { | 
|---|
| 190 | return p1.second < p2.second; | 
|---|
| 191 | })->second; | 
|---|
| 192 | bool isFirst = true; | 
|---|
| 193 | /** | 
|---|
| 194 | * This is O(N^2) and a stupid implementation. However, we only intend to | 
|---|
| 195 | * use this for small molecules, so I don't care at the moment. The better | 
|---|
| 196 | * approach is to revert the map into a multimap and then traverse that. | 
|---|
| 197 | */ | 
|---|
| 198 | for (size_t i=0; i<= max_distance; ++i) { | 
|---|
| 199 | for (BreadthFirstSearchGatherer::distance_map_t::const_iterator iter = distances.begin(); | 
|---|
| 200 | iter != distances.end(); ++iter) { | 
|---|
| 201 | if (iter->second != i) | 
|---|
| 202 | continue; | 
|---|
| 203 | const atom* walker = world.getAtom(AtomById(iter->first)); | 
|---|
| 204 | assert(walker != NULL); | 
|---|
| 205 | LOG(1, "INFO: Gathered atom " << *walker); | 
|---|
| 206 | if (!isFirst) | 
|---|
| 207 | out << ' '; | 
|---|
| 208 | isFirst = false; | 
|---|
| 209 | out << walker->getElement().getSymbol(); | 
|---|
| 210 | } | 
|---|
| 211 | } | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|