/*
* Project: MoleCuilder
* Description: creates and alters molecular systems
* Copyright (C) 2021 Frederik Heber. All rights reserved.
*
*
* This file is part of MoleCuilder.
*
* MoleCuilder is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* MoleCuilder is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with MoleCuilder. If not, see .
*/
/*
* Graph6Writer.cpp
*
* Created on: Apr 2, 2021
* Author: heber
*/
// include config.h
#ifdef HAVE_CONFIG_H
#include
#endif
#include "Graph6Writer.hpp"
#include "CodePatterns/Assert.hpp"
#include "CodePatterns/Log.hpp"
#include
#include
#include
#include "Atom/atom.hpp"
#include "Descriptors/AtomIdDescriptor.hpp"
#include "Element/element.hpp"
#include "Graph/BoostGraphCreator.hpp"
#include "Graph/BreadthFirstSearchGatherer.hpp"
#include "World.hpp"
//#include "CodePatterns/MemDebug.hpp"
Graph6Writer::Graph6Writer(const std::vector atoms):
_atoms(atoms)
{}
void Graph6Writer::write_n(std::ostream& out) {
const unsigned long n = _atoms.size();
if (n<62) {
out << ((unsigned char)(n+63));
return;
}
out << ((unsigned char)126);
int num_bytes = 2;
if (n> 258047) {
out << ((unsigned char)126);
num_bytes = 3;
}
for(int value=num_bytes; value>=0; value--) {
unsigned char c = 0;
int n_pos = 6*(value+1)-1;
for(int c_pos=5; c_pos>=0; n_pos--, c_pos--) {
c += (n & (1<>((int)n_pos/6);
}
out << (c+63);
}
}
/* Given an iterator over the adjacency matrix in the order (0,1),(0,2),(1,2),(0,3),(1,3),(2,3),...,(n-1,n)
this writes a graph6 representation to out. */
void Graph6Writer::write_graph6(std::ostream& out) {
write_n(out);
const unsigned long n = _atoms.size();
unsigned char value = 0;
int byte_pos = 5;
unsigned int bytes_written = 0;
for (size_t j=0; jIsBondedTo(_atoms[j]);
LOG(2, "DEBUG: (" << i << "," << j << ") = " << bit << "," << value << " | " << bit << " << " << byte_pos << " = " << (unsigned int)value << " | " << (bit << byte_pos));
value = value | (bit << byte_pos--);
if (byte_pos < 0) {
LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
ASSERT( (value+63) <= 126,
"Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
+" and "+toString((unsigned char)126));
out << (unsigned char)(value+63);
bytes_written++;
value = 0;
byte_pos = 5;
}
}
if (byte_pos!=5) {
ASSERT( (value+63) <= 126,
"Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
+" and "+toString((unsigned char)126));
LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
out << (unsigned char)(value+63);
bytes_written++;
value=0;
}
ASSERT( value==0,
"Graph6Writer::write_graph6() - byte is not null, i.e. chars left to write?");
ASSERT( bytes_written == (unsigned int)ceil(n*(n-1)/12.0f),
"Graph6Writer::write_graph6() - unexpected number of bytes written");
}
/**
* Picks a non-hydrogen from all atoms in the current set of atoms
* with lowest non-hydrogen bonds.
*
* Returns -1 if none could be found.
*/
atomId_t Graph6Writer::getBoundaryNonHydrogen() const {
atomId_t start_atom_id = -1;
int lowest_non_hydrogen_count = 16;
for(std::vector::const_iterator iter = _atoms.begin();
iter != _atoms.end(); ++iter) {
const atom *walker = *iter;
if (walker->getElement().getSymbol() != "H") {
const BondList& bond_list = walker->getListOfBonds();
int number_of_non_hydrogen_bonds = 0;
for (BondList::const_iterator iter = bond_list.begin();
iter != bond_list.end(); ++iter) {
number_of_non_hydrogen_bonds += (*iter)->GetOtherAtom(walker)->getElement().getSymbol() != "H";
}
if (lowest_non_hydrogen_count > number_of_non_hydrogen_bonds) {
start_atom_id = walker->getId();
lowest_non_hydrogen_count = number_of_non_hydrogen_bonds;
}
}
}
if ((start_atom_id == -1) && (!_atoms.empty())) {
// we only have hydrogens, just pick the first
start_atom_id = (*_atoms.begin())->getId();
}
return start_atom_id;
}
bool OnlyNonHydrogens(const bond &_bond) {
return _bond.HydrogenBond == 0;
}
void Graph6Writer::write_simple_elementlist(std::ostream& out) {
bool isFirst = true;
for(std::vector::const_iterator iter = _atoms.begin();
iter != _atoms.end(); ++iter) {
const atom *walker = *iter;
if (walker->getElement().getAtomicNumber() != (atomicNumber_t)1) {
if (!isFirst)
out << ' ';
isFirst = false;
out << walker->getElement().getSymbol();
}
}
}
void Graph6Writer::write_elementlist(std::ostream& out) {
/** Execute a Breadth-First Search discovery from one terminal atom (e.g.,
* pick random hydrogen and then it's bond-neighbor if it is non-hydrogen).
* Then return the element list in that ordering.
*
* The graph6 string does not account for the inherent graph symmetries
* (e.g., BW having 123<->321 but not 123<->132 symmetry).
*/
const World& world = World::getConstInstance();
// pick bond neighbor of a hydrogen atom
atomId_t start_atom_id = getBoundaryNonHydrogen();
if (start_atom_id == (unsigned int)-1) {
// fall back to first atom in list
start_atom_id = _atoms.front()->getId();
}
const atom* start_atom = world.getAtom(AtomById(start_atom_id));
LOG(1, "INFO: Start atom is " << *start_atom << ".");
// do an unlimited BFS and get set of nodes, ordered by discovery level
BoostGraphCreator graphCreator;
graphCreator.createFromAtoms(_atoms, OnlyNonHydrogens);
BreadthFirstSearchGatherer gatherer(graphCreator);
gatherer(start_atom_id);
// go through distance map and print sorted by discovery level
const BreadthFirstSearchGatherer::distance_map_t &distances = gatherer.getDistances();
using pairtype = std::pair;
const size_t max_distance = std::max_element(distances.begin(), distances.end(), [] (const pairtype & p1, const pairtype & p2) {
return p1.second < p2.second;
})->second;
bool isFirst = true;
/**
* This is O(N^2) and a stupid implementation. However, we only intend to
* use this for small molecules, so I don't care at the moment. The better
* approach is to revert the map into a multimap and then traverse that.
*/
for (size_t i=0; i<= max_distance; ++i) {
for (BreadthFirstSearchGatherer::distance_map_t::const_iterator iter = distances.begin();
iter != distances.end(); ++iter) {
if (iter->second != i)
continue;
const atom* walker = world.getAtom(AtomById(iter->first));
assert(walker != NULL);
LOG(1, "INFO: Gathered atom " << *walker);
if (!isFirst)
out << ' ';
isFirst = false;
out << walker->getElement().getSymbol();
}
}
}