source: src/Graph/Graph6Writer.cpp@ d083cc

Candidate_v1.7.0 stable
Last change on this file since d083cc was 78e5cf, checked in by Frederik Heber <frederik.heber@…>, 3 years ago

Graph6Writer::write_elementlist uses least connected non-hydrogen.

  • this should enforce outermost atom in linear chain.
  • Property mode set to 100644
File size: 7.2 KB
Line 
1/*
2 * Project: MoleCuilder
3 * Description: creates and alters molecular systems
4 * Copyright (C) 2021 Frederik Heber. All rights reserved.
5 *
6 *
7 * This file is part of MoleCuilder.
8 *
9 * MoleCuilder is free software: you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation, either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * MoleCuilder is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with MoleCuilder. If not, see <http://www.gnu.org/licenses/>.
21 */
22
23/*
24 * Graph6Writer.cpp
25 *
26 * Created on: Apr 2, 2021
27 * Author: heber
28 */
29
30
31// include config.h
32#ifdef HAVE_CONFIG_H
33#include <config.h>
34#endif
35
36#include "Graph6Writer.hpp"
37
38#include "CodePatterns/Assert.hpp"
39#include "CodePatterns/Log.hpp"
40
41#include <cassert>
42#include <cmath>
43#include <iostream>
44
45#include "Atom/atom.hpp"
46#include "Descriptors/AtomIdDescriptor.hpp"
47#include "Element/element.hpp"
48#include "Graph/BoostGraphCreator.hpp"
49#include "Graph/BreadthFirstSearchGatherer.hpp"
50#include "World.hpp"
51
52//#include "CodePatterns/MemDebug.hpp"
53
54Graph6Writer::Graph6Writer(const std::vector<const atom *> atoms):
55 _atoms(atoms)
56{}
57
58void Graph6Writer::write_n(std::ostream& out) {
59 const unsigned long n = _atoms.size();
60
61 if (n<62) {
62 out << ((unsigned char)(n+63));
63 return;
64 }
65
66 out << ((unsigned char)126);
67 int num_bytes = 2;
68 if (n> 258047) {
69 out << ((unsigned char)126);
70 num_bytes = 3;
71 }
72 for(int value=num_bytes; value>=0; value--) {
73 unsigned char c = 0;
74 int n_pos = 6*(value+1)-1;
75 for(int c_pos=5; c_pos>=0; n_pos--, c_pos--) {
76 c += (n & (1<<n_pos))>>((int)n_pos/6);
77 }
78 out << (c+63);
79 }
80
81}
82
83/* Given an iterator over the adjacency matrix in the order (0,1),(0,2),(1,2),(0,3),(1,3),(2,3),...,(n-1,n)
84 this writes a graph6 representation to out. */
85void Graph6Writer::write_graph6(std::ostream& out) {
86 write_n(out);
87
88 const unsigned long n = _atoms.size();
89
90 unsigned char value = 0;
91 int byte_pos = 5;
92 unsigned int bytes_written = 0;
93 for (size_t j=0; j<n; ++j)
94 for (size_t i=0; i<j; ++i) {
95 // std::cout << "\t\n" << (int)value << " " << byte_pos << std::endl;
96
97 unsigned int bit = _atoms[i]->IsBondedTo(_atoms[j]);
98 LOG(2, "DEBUG: (" << i << "," << j << ") = " << bit << "," << value << " | " << bit << " << " << byte_pos << " = " << (unsigned int)value << " | " << (bit << byte_pos));
99 value = value | (bit << byte_pos--);
100 if (byte_pos < 0) {
101 LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
102 ASSERT( (value+63) <= 126,
103 "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
104 +" and "+toString((unsigned char)126));
105 out << (unsigned char)(value+63);
106 bytes_written++;
107 value = 0;
108 byte_pos = 5;
109 }
110 }
111 if (byte_pos!=5) {
112 ASSERT( (value+63) <= 126,
113 "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
114 +" and "+toString((unsigned char)126));
115 LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
116 out << (unsigned char)(value+63);
117 bytes_written++;
118 value=0;
119 }
120 ASSERT( value==0,
121 "Graph6Writer::write_graph6() - byte is not null, i.e. chars left to write?");
122 ASSERT( bytes_written == (unsigned int)ceil(n*(n-1)/12.0f),
123 "Graph6Writer::write_graph6() - unexpected number of bytes written");
124}
125
126/**
127 * Picks a non-hydrogen from all atoms in the current set of atoms
128 * with lowest non-hydrogen bonds.
129 *
130 * Returns -1 if none could be found.
131 */
132atomId_t Graph6Writer::getBoundaryNonHydrogen() const {
133 atomId_t start_atom_id = -1;
134 int lowest_non_hydrogen_count = 16;
135 for(std::vector<const atom *>::const_iterator iter = _atoms.begin();
136 iter != _atoms.end(); ++iter) {
137 const atom *walker = *iter;
138 if (walker->getElement().getSymbol() != "H") {
139 const BondList& bond_list = walker->getListOfBonds();
140 int number_of_non_hydrogen_bonds = 0;
141 for (BondList::const_iterator iter = bond_list.begin();
142 iter != bond_list.end(); ++iter) {
143 number_of_non_hydrogen_bonds += (*iter)->GetOtherAtom(walker)->getElement().getSymbol() != "H";
144 }
145 if (lowest_non_hydrogen_count > number_of_non_hydrogen_bonds) {
146 start_atom_id = walker->getId();
147 lowest_non_hydrogen_count = number_of_non_hydrogen_bonds;
148 }
149 }
150 }
151 if ((start_atom_id == -1) && (!_atoms.empty())) {
152 // we only have hydrogens, just pick the first
153 start_atom_id = (*_atoms.begin())->getId();
154 }
155 return start_atom_id;
156}
157
158bool OnlyNonHydrogens(const bond &_bond) {
159 return _bond.HydrogenBond == 0;
160}
161
162void Graph6Writer::write_elementlist(std::ostream& out) {
163 /** Execute a Breadth-First Search discovery from one terminal atom (e.g.,
164 * pick random hydrogen and then it's bond-neighbor if it is non-hydrogen).
165 * Then return the element list in that ordering.
166 *
167 * The graph6 string does not account for the inherent graph symmetries
168 * (e.g., BW having 123<->321 but not 123<->132 symmetry).
169 */
170 const World& world = World::getConstInstance();
171 // pick bond neighbor of a hydrogen atom
172 atomId_t start_atom_id = getBoundaryNonHydrogen();
173 if (start_atom_id == (unsigned int)-1) {
174 // fall back to first atom in list
175 start_atom_id = _atoms.front()->getId();
176 }
177 const atom* start_atom = world.getAtom(AtomById(start_atom_id));
178 LOG(1, "INFO: Start atom is " << *start_atom << ".");
179
180 // do an unlimited BFS and get set of nodes, ordered by discovery level
181 BoostGraphCreator graphCreator;
182 graphCreator.createFromAtoms(_atoms, OnlyNonHydrogens);
183 BreadthFirstSearchGatherer gatherer(graphCreator);
184 gatherer(start_atom_id);
185
186 // go through distance map and print sorted by discovery level
187 const BreadthFirstSearchGatherer::distance_map_t &distances = gatherer.getDistances();
188 using pairtype = std::pair<atomId_t, size_t>;
189 const size_t max_distance = std::max_element(distances.begin(), distances.end(), [] (const pairtype & p1, const pairtype & p2) {
190 return p1.second < p2.second;
191 })->second;
192 bool isFirst = true;
193 /**
194 * This is O(N^2) and a stupid implementation. However, we only intend to
195 * use this for small molecules, so I don't care at the moment. The better
196 * approach is to revert the map into a multimap and then traverse that.
197 */
198 for (size_t i=0; i<= max_distance; ++i) {
199 for (BreadthFirstSearchGatherer::distance_map_t::const_iterator iter = distances.begin();
200 iter != distances.end(); ++iter) {
201 if (iter->second != i)
202 continue;
203 const atom* walker = world.getAtom(AtomById(iter->first));
204 assert(walker != NULL);
205 LOG(1, "INFO: Gathered atom " << *walker);
206 if (!isFirst)
207 out << ' ';
208 isFirst = false;
209 out << walker->getElement().getSymbol();
210 }
211 }
212}
213
Note: See TracBrowser for help on using the repository browser.