source: src/Graph/Graph6Writer.cpp@ d203d1e

Candidate_v1.7.0 stable
Last change on this file since d203d1e was 5a479d, checked in by Frederik Heber <frederik.heber@…>, 21 months ago

Added python actions to generate all graph6 strings.

  • graph6 strings don't take permutations in the adjacency matrix into account. Hence, we are going through every permutation of non-hydrogens and generate the respective graph6 string.
  • Property mode set to 100644
File size: 7.6 KB
Line 
1/*
2 * Project: MoleCuilder
3 * Description: creates and alters molecular systems
4 * Copyright (C) 2021 Frederik Heber. All rights reserved.
5 *
6 *
7 * This file is part of MoleCuilder.
8 *
9 * MoleCuilder is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * MoleCuilder is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with MoleCuilder. If not, see <http://www.gnu.org/licenses/>.
21 */
22
23/*
24 * Graph6Writer.cpp
25 *
26 * Created on: Apr 2, 2021
27 * Author: heber
28 */
29
30
31// include config.h
32#ifdef HAVE_CONFIG_H
33#include <config.h>
34#endif
35
36#include "Graph6Writer.hpp"
37
38#include "CodePatterns/Assert.hpp"
39#include "CodePatterns/Log.hpp"
40
41#include <cassert>
42#include <cmath>
43#include <iostream>
44
45#include "Atom/atom.hpp"
46#include "Descriptors/AtomIdDescriptor.hpp"
47#include "Element/element.hpp"
48#include "Graph/BoostGraphCreator.hpp"
49#include "Graph/BreadthFirstSearchGatherer.hpp"
50#include "World.hpp"
51
52//#include "CodePatterns/MemDebug.hpp"
53
54Graph6Writer::Graph6Writer(const std::vector<const atom *> atoms):
55 _atoms(atoms)
56{}
57
58void Graph6Writer::write_n(std::ostream& out) {
59 const unsigned long n = _atoms.size();
60
61 if (n<62) {
62 out << ((unsigned char)(n+63));
63 return;
64 }
65
66 out << ((unsigned char)126);
67 int num_bytes = 2;
68 if (n> 258047) {
69 out << ((unsigned char)126);
70 num_bytes = 3;
71 }
72 for(int value=num_bytes; value>=0; value--) {
73 unsigned char c = 0;
74 int n_pos = 6*(value+1)-1;
75 for(int c_pos=5; c_pos>=0; n_pos--, c_pos--) {
76 c += (n & (1<<n_pos))>>((int)n_pos/6);
77 }
78 out << (c+63);
79 }
80
81}
82
83/* Given an iterator over the adjacency matrix in the order (0,1),(0,2),(1,2),(0,3),(1,3),(2,3),...,(n-1,n)
84 this writes a graph6 representation to out. */
85void Graph6Writer::write_graph6(std::ostream& out) {
86 write_n(out);
87
88 const unsigned long n = _atoms.size();
89
90 unsigned char value = 0;
91 int byte_pos = 5;
92 unsigned int bytes_written = 0;
93 for (size_t j=0; j<n; ++j)
94 for (size_t i=0; i<j; ++i) {
95 // std::cout << "\t\n" << (int)value << " " << byte_pos << std::endl;
96
97 unsigned int bit = _atoms[i]->IsBondedTo(_atoms[j]);
98 LOG(2, "DEBUG: (" << i << "," << j << ") = " << bit << "," << value << " | " << bit << " << " << byte_pos << " = " << (unsigned int)value << " | " << (bit << byte_pos));
99 value = value | (bit << byte_pos--);
100 if (byte_pos < 0) {
101 LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
102 ASSERT( (value+63) <= 126,
103 "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
104 +" and "+toString((unsigned char)126));
105 out << (unsigned char)(value+63);
106 bytes_written++;
107 value = 0;
108 byte_pos = 5;
109 }
110 }
111 if (byte_pos!=5) {
112 ASSERT( (value+63) <= 126,
113 "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
114 +" and "+toString((unsigned char)126));
115 LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
116 out << (unsigned char)(value+63);
117 bytes_written++;
118 value=0;
119 }
120 ASSERT( value==0,
121 "Graph6Writer::write_graph6() - byte is not null, i.e. chars left to write?");
122 ASSERT( bytes_written == (unsigned int)ceil(n*(n-1)/12.0f),
123 "Graph6Writer::write_graph6() - unexpected number of bytes written");
124}
125
126/**
127 * Picks a non-hydrogen from all atoms in the current set of atoms
128 * with lowest non-hydrogen bonds.
129 *
130 * Returns -1 if none could be found.
131 */
132atomId_t Graph6Writer::getBoundaryNonHydrogen() const {
133 atomId_t start_atom_id = -1;
134 int lowest_non_hydrogen_count = 16;
135 for(std::vector<const atom *>::const_iterator iter = _atoms.begin();
136 iter != _atoms.end(); ++iter) {
137 const atom *walker = *iter;
138 if (walker->getElement().getSymbol() != "H") {
139 const BondList& bond_list = walker->getListOfBonds();
140 int number_of_non_hydrogen_bonds = 0;
141 for (BondList::const_iterator iter = bond_list.begin();
142 iter != bond_list.end(); ++iter) {
143 number_of_non_hydrogen_bonds += (*iter)->GetOtherAtom(walker)->getElement().getSymbol() != "H";
144 }
145 if (lowest_non_hydrogen_count > number_of_non_hydrogen_bonds) {
146 start_atom_id = walker->getId();
147 lowest_non_hydrogen_count = number_of_non_hydrogen_bonds;
148 }
149 }
150 }
151 if ((start_atom_id == -1) && (!_atoms.empty())) {
152 // we only have hydrogens, just pick the first
153 start_atom_id = (*_atoms.begin())->getId();
154 }
155 return start_atom_id;
156}
157
158bool OnlyNonHydrogens(const bond &_bond) {
159 return _bond.HydrogenBond == 0;
160}
161
162void Graph6Writer::write_simple_elementlist(std::ostream& out) {
163 bool isFirst = true;
164 for(std::vector<const atom *>::const_iterator iter = _atoms.begin();
165 iter != _atoms.end(); ++iter) {
166 const atom *walker = *iter;
167 if (walker->getElement().getAtomicNumber() != (atomicNumber_t)1) {
168 if (!isFirst)
169 out << ' ';
170 isFirst = false;
171 out << walker->getElement().getSymbol();
172 }
173 }
174}
175
176void Graph6Writer::write_elementlist(std::ostream& out) {
177 /** Execute a Breadth-First Search discovery from one terminal atom (e.g.,
178 * pick random hydrogen and then it's bond-neighbor if it is non-hydrogen).
179 * Then return the element list in that ordering.
180 *
181 * The graph6 string does not account for the inherent graph symmetries
182 * (e.g., BW having 123<->321 but not 123<->132 symmetry).
183 */
184 const World& world = World::getConstInstance();
185 // pick bond neighbor of a hydrogen atom
186 atomId_t start_atom_id = getBoundaryNonHydrogen();
187 if (start_atom_id == (unsigned int)-1) {
188 // fall back to first atom in list
189 start_atom_id = _atoms.front()->getId();
190 }
191 const atom* start_atom = world.getAtom(AtomById(start_atom_id));
192 LOG(1, "INFO: Start atom is " << *start_atom << ".");
193
194 // do an unlimited BFS and get set of nodes, ordered by discovery level
195 BoostGraphCreator graphCreator;
196 graphCreator.createFromAtoms(_atoms, OnlyNonHydrogens);
197 BreadthFirstSearchGatherer gatherer(graphCreator);
198 gatherer(start_atom_id);
199
200 // go through distance map and print sorted by discovery level
201 const BreadthFirstSearchGatherer::distance_map_t &distances = gatherer.getDistances();
202 using pairtype = std::pair<atomId_t, size_t>;
203 const size_t max_distance = std::max_element(distances.begin(), distances.end(), [] (const pairtype & p1, const pairtype & p2) {
204 return p1.second < p2.second;
205 })->second;
206 bool isFirst = true;
207 /**
208 * This is O(N^2) and a stupid implementation. However, we only intend to
209 * use this for small molecules, so I don't care at the moment. The better
210 * approach is to revert the map into a multimap and then traverse that.
211 */
212 for (size_t i=0; i<= max_distance; ++i) {
213 for (BreadthFirstSearchGatherer::distance_map_t::const_iterator iter = distances.begin();
214 iter != distances.end(); ++iter) {
215 if (iter->second != i)
216 continue;
217 const atom* walker = world.getAtom(AtomById(iter->first));
218 assert(walker != NULL);
219 LOG(1, "INFO: Gathered atom " << *walker);
220 if (!isFirst)
221 out << ' ';
222 isFirst = false;
223 out << walker->getElement().getSymbol();
224 }
225 }
226}
227
Note: See TracBrowser for help on using the repository browser.