| 1 | /*
|
|---|
| 2 | * Project: MoleCuilder
|
|---|
| 3 | * Description: creates and alters molecular systems
|
|---|
| 4 | * Copyright (C) 2021 Frederik Heber. All rights reserved.
|
|---|
| 5 | *
|
|---|
| 6 | *
|
|---|
| 7 | * This file is part of MoleCuilder.
|
|---|
| 8 | *
|
|---|
| 9 | * MoleCuilder is free software: you can redistribute it and/or modify
|
|---|
| 10 | * it under the terms of the GNU General Public License as published by
|
|---|
| 11 | * the Free Software Foundation, either version 2 of the License, or
|
|---|
| 12 | * (at your option) any later version.
|
|---|
| 13 | *
|
|---|
| 14 | * MoleCuilder is distributed in the hope that it will be useful,
|
|---|
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|---|
| 17 | * GNU General Public License for more details.
|
|---|
| 18 | *
|
|---|
| 19 | * You should have received a copy of the GNU General Public License
|
|---|
| 20 | * along with MoleCuilder. If not, see <http://www.gnu.org/licenses/>.
|
|---|
| 21 | */
|
|---|
| 22 |
|
|---|
| 23 | /*
|
|---|
| 24 | * Graph6Writer.cpp
|
|---|
| 25 | *
|
|---|
| 26 | * Created on: Apr 2, 2021
|
|---|
| 27 | * Author: heber
|
|---|
| 28 | */
|
|---|
| 29 |
|
|---|
| 30 |
|
|---|
| 31 | // include config.h
|
|---|
| 32 | #ifdef HAVE_CONFIG_H
|
|---|
| 33 | #include <config.h>
|
|---|
| 34 | #endif
|
|---|
| 35 |
|
|---|
| 36 | #include "Graph6Writer.hpp"
|
|---|
| 37 |
|
|---|
| 38 | #include "CodePatterns/Assert.hpp"
|
|---|
| 39 | #include "CodePatterns/Log.hpp"
|
|---|
| 40 |
|
|---|
| 41 | #include <cassert>
|
|---|
| 42 | #include <cmath>
|
|---|
| 43 | #include <iostream>
|
|---|
| 44 |
|
|---|
| 45 | #include "Atom/atom.hpp"
|
|---|
| 46 | #include "Descriptors/AtomIdDescriptor.hpp"
|
|---|
| 47 | #include "Element/element.hpp"
|
|---|
| 48 | #include "Graph/BoostGraphCreator.hpp"
|
|---|
| 49 | #include "Graph/BreadthFirstSearchGatherer.hpp"
|
|---|
| 50 | #include "World.hpp"
|
|---|
| 51 |
|
|---|
| 52 | //#include "CodePatterns/MemDebug.hpp"
|
|---|
| 53 |
|
|---|
| 54 | Graph6Writer::Graph6Writer(const std::vector<const atom *> atoms):
|
|---|
| 55 | _atoms(atoms)
|
|---|
| 56 | {}
|
|---|
| 57 |
|
|---|
| 58 | void Graph6Writer::write_n(std::ostream& out) {
|
|---|
| 59 | const unsigned long n = _atoms.size();
|
|---|
| 60 |
|
|---|
| 61 | if (n<62) {
|
|---|
| 62 | out << ((unsigned char)(n+63));
|
|---|
| 63 | return;
|
|---|
| 64 | }
|
|---|
| 65 |
|
|---|
| 66 | out << ((unsigned char)126);
|
|---|
| 67 | int num_bytes = 2;
|
|---|
| 68 | if (n> 258047) {
|
|---|
| 69 | out << ((unsigned char)126);
|
|---|
| 70 | num_bytes = 3;
|
|---|
| 71 | }
|
|---|
| 72 | for(int value=num_bytes; value>=0; value--) {
|
|---|
| 73 | unsigned char c = 0;
|
|---|
| 74 | int n_pos = 6*(value+1)-1;
|
|---|
| 75 | for(int c_pos=5; c_pos>=0; n_pos--, c_pos--) {
|
|---|
| 76 | c += (n & (1<<n_pos))>>((int)n_pos/6);
|
|---|
| 77 | }
|
|---|
| 78 | out << (c+63);
|
|---|
| 79 | }
|
|---|
| 80 |
|
|---|
| 81 | }
|
|---|
| 82 |
|
|---|
| 83 | /* Given an iterator over the adjacency matrix in the order (0,1),(0,2),(1,2),(0,3),(1,3),(2,3),...,(n-1,n)
|
|---|
| 84 | this writes a graph6 representation to out. */
|
|---|
| 85 | void Graph6Writer::write_graph6(std::ostream& out) {
|
|---|
| 86 | write_n(out);
|
|---|
| 87 |
|
|---|
| 88 | const unsigned long n = _atoms.size();
|
|---|
| 89 |
|
|---|
| 90 | unsigned char value = 0;
|
|---|
| 91 | int byte_pos = 5;
|
|---|
| 92 | unsigned int bytes_written = 0;
|
|---|
| 93 | for (size_t j=0; j<n; ++j)
|
|---|
| 94 | for (size_t i=0; i<j; ++i) {
|
|---|
| 95 | // std::cout << "\t\n" << (int)value << " " << byte_pos << std::endl;
|
|---|
| 96 |
|
|---|
| 97 | unsigned int bit = _atoms[i]->IsBondedTo(_atoms[j]);
|
|---|
| 98 | LOG(2, "DEBUG: (" << i << "," << j << ") = " << bit << "," << value << " | " << bit << " << " << byte_pos << " = " << (unsigned int)value << " | " << (bit << byte_pos));
|
|---|
| 99 | value = value | (bit << byte_pos--);
|
|---|
| 100 | if (byte_pos < 0) {
|
|---|
| 101 | LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
|
|---|
| 102 | ASSERT( (value+63) <= 126,
|
|---|
| 103 | "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
|
|---|
| 104 | +" and "+toString((unsigned char)126));
|
|---|
| 105 | out << (unsigned char)(value+63);
|
|---|
| 106 | bytes_written++;
|
|---|
| 107 | value = 0;
|
|---|
| 108 | byte_pos = 5;
|
|---|
| 109 | }
|
|---|
| 110 | }
|
|---|
| 111 | if (byte_pos!=5) {
|
|---|
| 112 | ASSERT( (value+63) <= 126,
|
|---|
| 113 | "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
|
|---|
| 114 | +" and "+toString((unsigned char)126));
|
|---|
| 115 | LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
|
|---|
| 116 | out << (unsigned char)(value+63);
|
|---|
| 117 | bytes_written++;
|
|---|
| 118 | value=0;
|
|---|
| 119 | }
|
|---|
| 120 | ASSERT( value==0,
|
|---|
| 121 | "Graph6Writer::write_graph6() - byte is not null, i.e. chars left to write?");
|
|---|
| 122 | ASSERT( bytes_written == (unsigned int)ceil(n*(n-1)/12.0f),
|
|---|
| 123 | "Graph6Writer::write_graph6() - unexpected number of bytes written");
|
|---|
| 124 | }
|
|---|
| 125 |
|
|---|
| 126 | /**
|
|---|
| 127 | * Picks a non-hydrogen from all atoms in the current set of atoms
|
|---|
| 128 | * with lowest non-hydrogen bonds.
|
|---|
| 129 | *
|
|---|
| 130 | * Returns -1 if none could be found.
|
|---|
| 131 | */
|
|---|
| 132 | atomId_t Graph6Writer::getBoundaryNonHydrogen() const {
|
|---|
| 133 | atomId_t start_atom_id = -1;
|
|---|
| 134 | int lowest_non_hydrogen_count = 16;
|
|---|
| 135 | for(std::vector<const atom *>::const_iterator iter = _atoms.begin();
|
|---|
| 136 | iter != _atoms.end(); ++iter) {
|
|---|
| 137 | const atom *walker = *iter;
|
|---|
| 138 | if (walker->getElement().getSymbol() != "H") {
|
|---|
| 139 | const BondList& bond_list = walker->getListOfBonds();
|
|---|
| 140 | int number_of_non_hydrogen_bonds = 0;
|
|---|
| 141 | for (BondList::const_iterator iter = bond_list.begin();
|
|---|
| 142 | iter != bond_list.end(); ++iter) {
|
|---|
| 143 | number_of_non_hydrogen_bonds += (*iter)->GetOtherAtom(walker)->getElement().getSymbol() != "H";
|
|---|
| 144 | }
|
|---|
| 145 | if (lowest_non_hydrogen_count > number_of_non_hydrogen_bonds) {
|
|---|
| 146 | start_atom_id = walker->getId();
|
|---|
| 147 | lowest_non_hydrogen_count = number_of_non_hydrogen_bonds;
|
|---|
| 148 | }
|
|---|
| 149 | }
|
|---|
| 150 | }
|
|---|
| 151 | if ((start_atom_id == -1) && (!_atoms.empty())) {
|
|---|
| 152 | // we only have hydrogens, just pick the first
|
|---|
| 153 | start_atom_id = (*_atoms.begin())->getId();
|
|---|
| 154 | }
|
|---|
| 155 | return start_atom_id;
|
|---|
| 156 | }
|
|---|
| 157 |
|
|---|
| 158 | bool OnlyNonHydrogens(const bond &_bond) {
|
|---|
| 159 | return _bond.HydrogenBond == 0;
|
|---|
| 160 | }
|
|---|
| 161 |
|
|---|
| 162 | void Graph6Writer::write_elementlist(std::ostream& out) {
|
|---|
| 163 | /** Execute a Breadth-First Search discovery from one terminal atom (e.g.,
|
|---|
| 164 | * pick random hydrogen and then it's bond-neighbor if it is non-hydrogen).
|
|---|
| 165 | * Then return the element list in that ordering.
|
|---|
| 166 | *
|
|---|
| 167 | * The graph6 string does not account for the inherent graph symmetries
|
|---|
| 168 | * (e.g., BW having 123<->321 but not 123<->132 symmetry).
|
|---|
| 169 | */
|
|---|
| 170 | const World& world = World::getConstInstance();
|
|---|
| 171 | // pick bond neighbor of a hydrogen atom
|
|---|
| 172 | atomId_t start_atom_id = getBoundaryNonHydrogen();
|
|---|
| 173 | if (start_atom_id == (unsigned int)-1) {
|
|---|
| 174 | // fall back to first atom in list
|
|---|
| 175 | start_atom_id = _atoms.front()->getId();
|
|---|
| 176 | }
|
|---|
| 177 | const atom* start_atom = world.getAtom(AtomById(start_atom_id));
|
|---|
| 178 | LOG(1, "INFO: Start atom is " << *start_atom << ".");
|
|---|
| 179 |
|
|---|
| 180 | // do an unlimited BFS and get set of nodes, ordered by discovery level
|
|---|
| 181 | BoostGraphCreator graphCreator;
|
|---|
| 182 | graphCreator.createFromAtoms(_atoms, OnlyNonHydrogens);
|
|---|
| 183 | BreadthFirstSearchGatherer gatherer(graphCreator);
|
|---|
| 184 | gatherer(start_atom_id);
|
|---|
| 185 |
|
|---|
| 186 | // go through distance map and print sorted by discovery level
|
|---|
| 187 | const BreadthFirstSearchGatherer::distance_map_t &distances = gatherer.getDistances();
|
|---|
| 188 | using pairtype = std::pair<atomId_t, size_t>;
|
|---|
| 189 | const size_t max_distance = std::max_element(distances.begin(), distances.end(), [] (const pairtype & p1, const pairtype & p2) {
|
|---|
| 190 | return p1.second < p2.second;
|
|---|
| 191 | })->second;
|
|---|
| 192 | bool isFirst = true;
|
|---|
| 193 | /**
|
|---|
| 194 | * This is O(N^2) and a stupid implementation. However, we only intend to
|
|---|
| 195 | * use this for small molecules, so I don't care at the moment. The better
|
|---|
| 196 | * approach is to revert the map into a multimap and then traverse that.
|
|---|
| 197 | */
|
|---|
| 198 | for (size_t i=0; i<= max_distance; ++i) {
|
|---|
| 199 | for (BreadthFirstSearchGatherer::distance_map_t::const_iterator iter = distances.begin();
|
|---|
| 200 | iter != distances.end(); ++iter) {
|
|---|
| 201 | if (iter->second != i)
|
|---|
| 202 | continue;
|
|---|
| 203 | const atom* walker = world.getAtom(AtomById(iter->first));
|
|---|
| 204 | assert(walker != NULL);
|
|---|
| 205 | LOG(1, "INFO: Gathered atom " << *walker);
|
|---|
| 206 | if (!isFirst)
|
|---|
| 207 | out << ' ';
|
|---|
| 208 | isFirst = false;
|
|---|
| 209 | out << walker->getElement().getSymbol();
|
|---|
| 210 | }
|
|---|
| 211 | }
|
|---|
| 212 | }
|
|---|
| 213 |
|
|---|