| [5061d9] | 1 | /* | 
|---|
|  | 2 | * Project: MoleCuilder | 
|---|
|  | 3 | * Description: creates and alters molecular systems | 
|---|
|  | 4 | * Copyright (C)  2021 Frederik Heber. All rights reserved. | 
|---|
|  | 5 | * | 
|---|
|  | 6 | * | 
|---|
|  | 7 | *   This file is part of MoleCuilder. | 
|---|
|  | 8 | * | 
|---|
|  | 9 | *    MoleCuilder is free software: you can redistribute it and/or modify | 
|---|
|  | 10 | *    it under the terms of the GNU General Public License as published by | 
|---|
|  | 11 | *    the Free Software Foundation, either version 2 of the License, or | 
|---|
|  | 12 | *    (at your option) any later version. | 
|---|
|  | 13 | * | 
|---|
|  | 14 | *    MoleCuilder is distributed in the hope that it will be useful, | 
|---|
|  | 15 | *    but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|---|
|  | 16 | *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|---|
|  | 17 | *    GNU General Public License for more details. | 
|---|
|  | 18 | * | 
|---|
|  | 19 | *    You should have received a copy of the GNU General Public License | 
|---|
|  | 20 | *    along with MoleCuilder.  If not, see <http://www.gnu.org/licenses/>. | 
|---|
|  | 21 | */ | 
|---|
|  | 22 |  | 
|---|
|  | 23 | /* | 
|---|
|  | 24 | * Graph6Writer.cpp | 
|---|
|  | 25 | * | 
|---|
|  | 26 | *  Created on: Apr 2, 2021 | 
|---|
|  | 27 | *      Author: heber | 
|---|
|  | 28 | */ | 
|---|
|  | 29 |  | 
|---|
|  | 30 |  | 
|---|
|  | 31 | // include config.h | 
|---|
|  | 32 | #ifdef HAVE_CONFIG_H | 
|---|
|  | 33 | #include <config.h> | 
|---|
|  | 34 | #endif | 
|---|
|  | 35 |  | 
|---|
|  | 36 | #include "Graph6Writer.hpp" | 
|---|
|  | 37 |  | 
|---|
|  | 38 | #include "CodePatterns/Assert.hpp" | 
|---|
|  | 39 | #include "CodePatterns/Log.hpp" | 
|---|
|  | 40 |  | 
|---|
|  | 41 | #include <cassert> | 
|---|
|  | 42 | #include <cmath> | 
|---|
|  | 43 | #include <iostream> | 
|---|
|  | 44 |  | 
|---|
|  | 45 | #include "Atom/atom.hpp" | 
|---|
| [1c0b0b] | 46 | #include "Descriptors/AtomIdDescriptor.hpp" | 
|---|
| [5061d9] | 47 | #include "Element/element.hpp" | 
|---|
| [1c0b0b] | 48 | #include "Graph/BoostGraphCreator.hpp" | 
|---|
|  | 49 | #include "Graph/BreadthFirstSearchGatherer.hpp" | 
|---|
|  | 50 | #include "World.hpp" | 
|---|
| [5061d9] | 51 |  | 
|---|
|  | 52 | //#include "CodePatterns/MemDebug.hpp" | 
|---|
|  | 53 |  | 
|---|
|  | 54 | Graph6Writer::Graph6Writer(const std::vector<const atom *> atoms): | 
|---|
|  | 55 | _atoms(atoms) | 
|---|
|  | 56 | {} | 
|---|
|  | 57 |  | 
|---|
|  | 58 | void Graph6Writer::write_n(std::ostream& out) { | 
|---|
|  | 59 | const unsigned long n = _atoms.size(); | 
|---|
|  | 60 |  | 
|---|
|  | 61 | if (n<62) { | 
|---|
|  | 62 | out << ((unsigned char)(n+63)); | 
|---|
|  | 63 | return; | 
|---|
|  | 64 | } | 
|---|
|  | 65 |  | 
|---|
|  | 66 | out << ((unsigned char)126); | 
|---|
|  | 67 | int num_bytes = 2; | 
|---|
|  | 68 | if (n> 258047) { | 
|---|
|  | 69 | out << ((unsigned char)126); | 
|---|
|  | 70 | num_bytes = 3; | 
|---|
|  | 71 | } | 
|---|
|  | 72 | for(int value=num_bytes; value>=0; value--) { | 
|---|
|  | 73 | unsigned char c = 0; | 
|---|
|  | 74 | int n_pos = 6*(value+1)-1; | 
|---|
|  | 75 | for(int c_pos=5; c_pos>=0; n_pos--, c_pos--) { | 
|---|
|  | 76 | c += (n & (1<<n_pos))>>((int)n_pos/6); | 
|---|
|  | 77 | } | 
|---|
|  | 78 | out << (c+63); | 
|---|
|  | 79 | } | 
|---|
|  | 80 |  | 
|---|
|  | 81 | } | 
|---|
|  | 82 |  | 
|---|
|  | 83 | /* Given an iterator over the adjacency matrix in the order (0,1),(0,2),(1,2),(0,3),(1,3),(2,3),...,(n-1,n) | 
|---|
|  | 84 | this writes a graph6 representation to out. */ | 
|---|
|  | 85 | void Graph6Writer::write_graph6(std::ostream& out) { | 
|---|
|  | 86 | write_n(out); | 
|---|
|  | 87 |  | 
|---|
|  | 88 | const unsigned long n = _atoms.size(); | 
|---|
|  | 89 |  | 
|---|
|  | 90 | unsigned char value = 0; | 
|---|
|  | 91 | int byte_pos = 5; | 
|---|
|  | 92 | unsigned int bytes_written = 0; | 
|---|
|  | 93 | for (size_t j=0; j<n; ++j) | 
|---|
|  | 94 | for (size_t i=0; i<j; ++i) { | 
|---|
|  | 95 | // std::cout << "\t\n" << (int)value << " " << byte_pos << std::endl; | 
|---|
|  | 96 |  | 
|---|
|  | 97 | unsigned int bit = _atoms[i]->IsBondedTo(_atoms[j]); | 
|---|
|  | 98 | LOG(2, "DEBUG: (" << i << "," << j << ") = " << bit << "," << value << " | " << bit << " << " << byte_pos << " = " << (unsigned int)value << " | " << (bit << byte_pos)); | 
|---|
|  | 99 | value = value | (bit << byte_pos--); | 
|---|
|  | 100 | if (byte_pos < 0) { | 
|---|
|  | 101 | LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]"); | 
|---|
|  | 102 | ASSERT( (value+63) <= 126, | 
|---|
|  | 103 | "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63) | 
|---|
|  | 104 | +" and "+toString((unsigned char)126)); | 
|---|
|  | 105 | out << (unsigned char)(value+63); | 
|---|
|  | 106 | bytes_written++; | 
|---|
|  | 107 | value = 0; | 
|---|
|  | 108 | byte_pos = 5; | 
|---|
|  | 109 | } | 
|---|
|  | 110 | } | 
|---|
|  | 111 | if (byte_pos!=5) { | 
|---|
|  | 112 | ASSERT( (value+63) <= 126, | 
|---|
|  | 113 | "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63) | 
|---|
|  | 114 | +" and "+toString((unsigned char)126)); | 
|---|
|  | 115 | LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]"); | 
|---|
|  | 116 | out << (unsigned char)(value+63); | 
|---|
|  | 117 | bytes_written++; | 
|---|
|  | 118 | value=0; | 
|---|
|  | 119 | } | 
|---|
|  | 120 | ASSERT( value==0, | 
|---|
|  | 121 | "Graph6Writer::write_graph6() - byte is not null, i.e. chars left to write?"); | 
|---|
| [1c0b0b] | 122 | ASSERT( bytes_written == (unsigned int)ceil(n*(n-1)/12.0f), | 
|---|
| [5061d9] | 123 | "Graph6Writer::write_graph6() - unexpected number of bytes written"); | 
|---|
|  | 124 | } | 
|---|
|  | 125 |  | 
|---|
| [1c0b0b] | 126 | /** | 
|---|
| [78e5cf] | 127 | * Picks a non-hydrogen from all atoms in the current set of atoms | 
|---|
|  | 128 | * with lowest non-hydrogen bonds. | 
|---|
| [1c0b0b] | 129 | * | 
|---|
|  | 130 | * Returns -1 if none could be found. | 
|---|
|  | 131 | */ | 
|---|
|  | 132 | atomId_t Graph6Writer::getBoundaryNonHydrogen() const { | 
|---|
|  | 133 | atomId_t start_atom_id = -1; | 
|---|
| [78e5cf] | 134 | int lowest_non_hydrogen_count = 16; | 
|---|
| [1c0b0b] | 135 | for(std::vector<const atom *>::const_iterator iter = _atoms.begin(); | 
|---|
| [5061d9] | 136 | iter != _atoms.end(); ++iter) { | 
|---|
| [1c0b0b] | 137 | const atom *walker = *iter; | 
|---|
| [78e5cf] | 138 | if (walker->getElement().getSymbol() != "H") { | 
|---|
| [1c0b0b] | 139 | const BondList& bond_list = walker->getListOfBonds(); | 
|---|
| [78e5cf] | 140 | int number_of_non_hydrogen_bonds = 0; | 
|---|
|  | 141 | for (BondList::const_iterator iter = bond_list.begin(); | 
|---|
|  | 142 | iter != bond_list.end(); ++iter) { | 
|---|
|  | 143 | number_of_non_hydrogen_bonds += (*iter)->GetOtherAtom(walker)->getElement().getSymbol() != "H"; | 
|---|
|  | 144 | } | 
|---|
|  | 145 | if (lowest_non_hydrogen_count > number_of_non_hydrogen_bonds) { | 
|---|
|  | 146 | start_atom_id = walker->getId(); | 
|---|
|  | 147 | lowest_non_hydrogen_count = number_of_non_hydrogen_bonds; | 
|---|
| [1c0b0b] | 148 | } | 
|---|
|  | 149 | } | 
|---|
|  | 150 | } | 
|---|
| [78e5cf] | 151 | if ((start_atom_id == -1) && (!_atoms.empty())) { | 
|---|
|  | 152 | // we only have hydrogens, just pick the first | 
|---|
|  | 153 | start_atom_id = (*_atoms.begin())->getId(); | 
|---|
|  | 154 | } | 
|---|
| [1c0b0b] | 155 | return start_atom_id; | 
|---|
|  | 156 | } | 
|---|
|  | 157 |  | 
|---|
|  | 158 | bool OnlyNonHydrogens(const bond &_bond) { | 
|---|
|  | 159 | return _bond.HydrogenBond == 0; | 
|---|
|  | 160 | } | 
|---|
|  | 161 |  | 
|---|
|  | 162 | void Graph6Writer::write_elementlist(std::ostream& out) { | 
|---|
|  | 163 | /** Execute a Breadth-First Search discovery from one terminal atom (e.g., | 
|---|
|  | 164 | * pick random hydrogen and then it's bond-neighbor if it is non-hydrogen). | 
|---|
|  | 165 | * Then return the element list in that ordering. | 
|---|
|  | 166 | * | 
|---|
|  | 167 | * The graph6 string does not account for the inherent graph symmetries | 
|---|
|  | 168 | * (e.g., BW having 123<->321 but not 123<->132 symmetry). | 
|---|
|  | 169 | */ | 
|---|
| [78e5cf] | 170 | const World& world = World::getConstInstance(); | 
|---|
| [1c0b0b] | 171 | // pick bond neighbor of a hydrogen atom | 
|---|
|  | 172 | atomId_t start_atom_id = getBoundaryNonHydrogen(); | 
|---|
|  | 173 | if (start_atom_id == (unsigned int)-1) { | 
|---|
|  | 174 | // fall back to first atom in list | 
|---|
|  | 175 | start_atom_id = _atoms.front()->getId(); | 
|---|
|  | 176 | } | 
|---|
| [78e5cf] | 177 | const atom* start_atom = world.getAtom(AtomById(start_atom_id)); | 
|---|
|  | 178 | LOG(1, "INFO: Start atom is " << *start_atom << "."); | 
|---|
| [1c0b0b] | 179 |  | 
|---|
|  | 180 | // do an unlimited BFS and get set of nodes, ordered by discovery level | 
|---|
|  | 181 | BoostGraphCreator graphCreator; | 
|---|
|  | 182 | graphCreator.createFromAtoms(_atoms, OnlyNonHydrogens); | 
|---|
|  | 183 | BreadthFirstSearchGatherer gatherer(graphCreator); | 
|---|
| [78e5cf] | 184 | gatherer(start_atom_id); | 
|---|
|  | 185 |  | 
|---|
|  | 186 | // go through distance map and print sorted by discovery level | 
|---|
|  | 187 | const BreadthFirstSearchGatherer::distance_map_t &distances = gatherer.getDistances(); | 
|---|
|  | 188 | using pairtype = std::pair<atomId_t, size_t>; | 
|---|
|  | 189 | const size_t max_distance = std::max_element(distances.begin(), distances.end(), [] (const pairtype & p1, const pairtype & p2) { | 
|---|
|  | 190 | return p1.second < p2.second; | 
|---|
|  | 191 | })->second; | 
|---|
|  | 192 | bool isFirst = true; | 
|---|
|  | 193 | /** | 
|---|
|  | 194 | * This is O(N^2) and a stupid implementation. However, we only intend to | 
|---|
|  | 195 | * use this for small molecules, so I don't care at the moment. The better | 
|---|
|  | 196 | * approach is to revert the map into a multimap and then traverse that. | 
|---|
|  | 197 | */ | 
|---|
|  | 198 | for (size_t i=0; i<= max_distance; ++i) { | 
|---|
|  | 199 | for (BreadthFirstSearchGatherer::distance_map_t::const_iterator iter = distances.begin(); | 
|---|
|  | 200 | iter != distances.end(); ++iter) { | 
|---|
|  | 201 | if (iter->second != i) | 
|---|
|  | 202 | continue; | 
|---|
|  | 203 | const atom* walker = world.getAtom(AtomById(iter->first)); | 
|---|
|  | 204 | assert(walker != NULL); | 
|---|
|  | 205 | LOG(1, "INFO: Gathered atom " << *walker); | 
|---|
|  | 206 | if (!isFirst) | 
|---|
|  | 207 | out << ' '; | 
|---|
|  | 208 | isFirst = false; | 
|---|
|  | 209 | out << walker->getElement().getSymbol(); | 
|---|
|  | 210 | } | 
|---|
| [5061d9] | 211 | } | 
|---|
|  | 212 | } | 
|---|
|  | 213 |  | 
|---|