/*
 * Project: MoleCuilder
 * Description: creates and alters molecular systems
 * Copyright (C)  2021 Frederik Heber. All rights reserved.
 *
 *
 *   This file is part of MoleCuilder.
 *
 *    MoleCuilder is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 2 of the License, or
 *    (at your option) any later version.
 *
 *    MoleCuilder is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoleCuilder.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 * Graph6Writer.cpp
 *
 *  Created on: Apr 2, 2021
 *      Author: heber
 */


// include config.h
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "Graph6Writer.hpp"

#include "CodePatterns/Assert.hpp"
#include "CodePatterns/Log.hpp"

#include <cassert>
#include <cmath>
#include <iostream>

#include "Atom/atom.hpp"
#include "Descriptors/AtomIdDescriptor.hpp"
#include "Element/element.hpp"
#include "Graph/BoostGraphCreator.hpp"
#include "Graph/BreadthFirstSearchGatherer.hpp"
#include "World.hpp"

//#include "CodePatterns/MemDebug.hpp"

Graph6Writer::Graph6Writer(const std::vector<const atom *> atoms):
  _atoms(atoms)
{}

void Graph6Writer::write_n(std::ostream& out) {
  const unsigned long n = _atoms.size();

  if (n<62) {
    out << ((unsigned char)(n+63));
    return;
  }

  out << ((unsigned char)126);
  int num_bytes = 2;
  if (n> 258047) {
    out << ((unsigned char)126);
    num_bytes = 3;
  }
  for(int value=num_bytes; value>=0; value--) {
    unsigned char c = 0;
    int n_pos = 6*(value+1)-1;
    for(int c_pos=5; c_pos>=0; n_pos--, c_pos--) {
      c += (n & (1<<n_pos))>>((int)n_pos/6);
    }
    out << (c+63);
  }

}

/* Given an iterator over the adjacency matrix in the order (0,1),(0,2),(1,2),(0,3),(1,3),(2,3),...,(n-1,n)
   this writes a graph6 representation to out. */
void Graph6Writer::write_graph6(std::ostream& out) {
  write_n(out);

  const unsigned long n = _atoms.size();

  unsigned char value = 0;
  int byte_pos = 5;
  unsigned int bytes_written = 0;
  for (size_t j=0; j<n; ++j)
    for (size_t i=0; i<j; ++i) {
      // std::cout << "\t\n" << (int)value << " " << byte_pos << std::endl;

      unsigned int bit = _atoms[i]->IsBondedTo(_atoms[j]);
      LOG(2, "DEBUG: (" << i << "," << j << ") = " << bit << "," << value << " | " << bit << " << " << byte_pos << " = " << (unsigned int)value << " | " << (bit << byte_pos));
      value = value | (bit << byte_pos--);
      if (byte_pos < 0) {
        LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
        ASSERT( (value+63) <= 126,
            "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
            +" and "+toString((unsigned char)126));
        out << (unsigned char)(value+63);
        bytes_written++;
        value = 0;
        byte_pos = 5;
      }
    }
  if (byte_pos!=5) {
    ASSERT( (value+63) <= 126,
        "Graph6Writer::write_graph6() - char to write is outside "+toString((unsigned char)63)
        +" and "+toString((unsigned char)126));
    LOG(2, "DEBUG: Writing byte " << value << " into range [" << (unsigned char)63 << "," << (unsigned char)126 << "]");
    out << (unsigned char)(value+63);
    bytes_written++;
    value=0;
  }
  ASSERT( value==0,
      "Graph6Writer::write_graph6() - byte is not null, i.e. chars left to write?");
  ASSERT( bytes_written == (unsigned int)ceil(n*(n-1)/12.0f),
      "Graph6Writer::write_graph6() - unexpected number of bytes written");
}

/**
 * Picks a non-hydrogen from all atoms in the current set of atoms
 * with lowest non-hydrogen bonds.
 *
 * Returns -1 if none could be found.
 */
atomId_t Graph6Writer::getBoundaryNonHydrogen() const {
  atomId_t start_atom_id = -1;
  int lowest_non_hydrogen_count = 16;
  for(std::vector<const atom *>::const_iterator iter = _atoms.begin();
      iter != _atoms.end(); ++iter) {
    const atom *walker = *iter;
    if (walker->getElement().getSymbol() != "H") {
      const BondList& bond_list = walker->getListOfBonds();
      int number_of_non_hydrogen_bonds = 0;
      for (BondList::const_iterator iter = bond_list.begin();
        iter != bond_list.end(); ++iter) {
        number_of_non_hydrogen_bonds += (*iter)->GetOtherAtom(walker)->getElement().getSymbol() != "H";
      }
      if (lowest_non_hydrogen_count > number_of_non_hydrogen_bonds) {
        start_atom_id = walker->getId();
        lowest_non_hydrogen_count = number_of_non_hydrogen_bonds;
      }
    }
  }
  if ((start_atom_id == -1) && (!_atoms.empty())) {
    // we only have hydrogens, just pick the first
    start_atom_id = (*_atoms.begin())->getId();
  }
  return start_atom_id;
}

bool OnlyNonHydrogens(const bond &_bond) {
  return _bond.HydrogenBond == 0;
}

void Graph6Writer::write_elementlist(std::ostream& out) {
  /** Execute a Breadth-First Search discovery from one terminal atom (e.g.,
   * pick random hydrogen and then it's bond-neighbor if it is non-hydrogen).
   * Then return the element list in that ordering.
   *
   * The graph6 string does not account for the inherent graph symmetries
   * (e.g., BW having 123<->321 but not 123<->132 symmetry).
   */
  const World& world = World::getConstInstance();
  // pick bond neighbor of a hydrogen atom
  atomId_t start_atom_id = getBoundaryNonHydrogen();
  if (start_atom_id == (unsigned int)-1) {
    // fall back to first atom in list
    start_atom_id = _atoms.front()->getId();
  }
  const atom* start_atom = world.getAtom(AtomById(start_atom_id));
  LOG(1, "INFO: Start atom is " << *start_atom << ".");

  // do an unlimited BFS and get set of nodes, ordered by discovery level
  BoostGraphCreator graphCreator;
  graphCreator.createFromAtoms(_atoms, OnlyNonHydrogens);
  BreadthFirstSearchGatherer gatherer(graphCreator);
  gatherer(start_atom_id);

  // go through distance map and print sorted by discovery level
  const BreadthFirstSearchGatherer::distance_map_t &distances = gatherer.getDistances();
  using pairtype = std::pair<atomId_t, size_t>;
  const size_t max_distance = std::max_element(distances.begin(), distances.end(), [] (const pairtype & p1, const pairtype & p2) {
    return p1.second < p2.second;
  })->second;
  bool isFirst = true;
  /**
   * This is O(N^2) and a stupid implementation. However, we only intend to
   * use this for small molecules, so I don't care at the moment. The better
   * approach is to revert the map into a multimap and then traverse that.
   */
  for (size_t i=0; i<= max_distance; ++i) {
    for (BreadthFirstSearchGatherer::distance_map_t::const_iterator iter = distances.begin();
        iter != distances.end(); ++iter) {
      if (iter->second != i)
        continue;
      const atom* walker = world.getAtom(AtomById(iter->first));
      assert(walker != NULL);
      LOG(1, "INFO: Gathered atom " << *walker);
      if (!isFirst)
        out << ' ';
      isFirst = false;
      out << walker->getElement().getSymbol();
    }
  }
}

