/*
* Project: MoleCuilder
* Description: creates and alters molecular systems
* Copyright (C) 2014 Frederik Heber. All rights reserved.
*
*
* This file is part of MoleCuilder.
*
* MoleCuilder is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* MoleCuilder is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with MoleCuilder. If not, see .
*/
/*
* PotentialTrainer.cpp
*
* Created on: Sep 11, 2014
* Author: heber
*/
// include config.h
#ifdef HAVE_CONFIG_H
#include
#endif
// needs to come before MemDebug due to placement new
#include
#include "CodePatterns/MemDebug.hpp"
#include "PotentialTrainer.hpp"
#include
#include
#include
#include
#include
#include "CodePatterns/Assert.hpp"
#include "CodePatterns/Log.hpp"
#include "Element/element.hpp"
#include "Fragmentation/Homology/HomologyContainer.hpp"
#include "Fragmentation/Homology/HomologyGraph.hpp"
#include "FunctionApproximation/Extractors.hpp"
#include "FunctionApproximation/FunctionApproximation.hpp"
#include "FunctionApproximation/FunctionModel.hpp"
#include "FunctionApproximation/TrainingData.hpp"
#include "FunctionApproximation/writeDistanceEnergyTable.hpp"
#include "Potentials/CompoundPotential.hpp"
#include "Potentials/RegistrySerializer.hpp"
#include "Potentials/SerializablePotential.hpp"
PotentialTrainer::PotentialTrainer()
{}
PotentialTrainer::~PotentialTrainer()
{}
bool PotentialTrainer::operator()(
const HomologyContainer &_homologies,
const HomologyGraph &_graph,
const boost::filesystem::path &_trainingfile,
const unsigned int _maxiterations,
const double _threshold,
const unsigned int _best_of_howmany) const
{
// fit potential
FunctionModel *model = new CompoundPotential(_graph);
ASSERT( model != NULL,
"PotentialTrainer::operator() - model is NULL.");
{
CompoundPotential *compound = static_cast(model);
if (compound->begin() == compound->end()) {
ELOG(1, "Could not find any suitable potentials for the compound potential.");
return false;
}
}
/******************** TRAINING ********************/
// fit potential
FunctionModel::parameters_t bestparams(model->getParameterDimension(), 0.);
{
// Afterwards we go through all of this type and gather the distance and the energy value
TrainingData data(model->getSpecificFilter());
data(_homologies.getHomologousGraphs(_graph));
// check data
const TrainingData::FilteredInputVector_t &inputs = data.getTrainingInputs();
for (TrainingData::FilteredInputVector_t::const_iterator iter = inputs.begin();
iter != inputs.end(); ++iter)
if (((*iter).empty()) || ((*iter).front().empty())) {
ELOG(1, "At least one of the training inputs is empty! Correct fragment and potential charges selected?");
return false;
}
const TrainingData::OutputVector_t &outputs = data.getTrainingOutputs();
for (TrainingData::OutputVector_t::const_iterator iter = outputs.begin();
iter != outputs.end(); ++iter)
if ((*iter).empty()) {
ELOG(1, "At least one of the training outputs is empty! Correct fragment and potential charges selected?");
return false;
}
// print distances and energies if desired for debugging
if (!data.getTrainingInputs().empty()) {
// print which distance is which
size_t counter=1;
if (DoLog(3)) {
const FunctionModel::arguments_t &inputs = data.getAllArguments()[0];
for (FunctionModel::arguments_t::const_iterator iter = inputs.begin();
iter != inputs.end(); ++iter) {
const argument_t &arg = *iter;
LOG(3, "DEBUG: distance " << counter++ << " is between (#"
<< arg.indices.first << "c" << arg.types.first << ","
<< arg.indices.second << "c" << arg.types.second << ").");
}
}
// print table
if (_trainingfile.string().empty()) {
LOG(3, "DEBUG: I gathered the following training data:\n" <<
_detail::writeDistanceEnergyTable(data.getDistanceEnergyTable()));
} else {
std::ofstream trainingstream(_trainingfile.string().c_str());
if (trainingstream.good()) {
LOG(3, "DEBUG: Writing training data to file " <<
_trainingfile.string() << ".");
trainingstream << _detail::writeDistanceEnergyTable(data.getDistanceEnergyTable());
}
trainingstream.close();
}
}
if ((_threshold < 1.) && (_best_of_howmany))
ELOG(2, "threshold parameter always overrules max_runs, both are specified.");
// now perform the function approximation by optimizing the model function
FunctionApproximation approximator(data, *model, _threshold, _maxiterations);
if (model->isBoxConstraint() && approximator.checkParameterDerivatives()) {
double l2error = std::numeric_limits::max();
// seed with current time
srand((unsigned)time(0));
unsigned int runs=0;
// threshold overrules max_runs
const double threshold = _threshold;
const unsigned int max_runs = (threshold >= 1.) ? _best_of_howmany : 1;
LOG(1, "INFO: Maximum runs is " << max_runs << " and threshold set to " << threshold << ".");
do {
// generate new random initial parameter values
model->setParametersToRandomInitialValues(data);
LOG(1, "INFO: Initial parameters of run " << runs << " are "
<< model->getParameters() << ".");
approximator(FunctionApproximation::ParameterDerivative);
LOG(1, "INFO: Final parameters of run " << runs << " are "
<< model->getParameters() << ".");
const double new_l2error = data.getL2Error(*model);
if (new_l2error < l2error) {
// store currently best parameters
l2error = new_l2error;
bestparams = model->getParameters();
LOG(1, "STATUS: New fit from run " << runs
<< " has better error of " << l2error << ".");
}
} while (( ++runs < max_runs) || (l2error > threshold));
// reset parameters from best fit
model->setParameters(bestparams);
LOG(1, "INFO: Best parameters with L2 error of "
<< l2error << " are " << model->getParameters() << ".");
} else {
return false;
}
// create a map of each fragment with error.
HomologyContainer::range_t fragmentrange = _homologies.getHomologousGraphs(_graph);
TrainingData::L2ErrorConfigurationIndexMap_t WorseFragmentMap =
data.getWorstFragmentMap(*model, fragmentrange);
LOG(0, "RESULT: WorstFragmentMap " << WorseFragmentMap << ".");
}
delete model;
return true;
}
HomologyGraph PotentialTrainer::getFirstGraphwithSpecifiedElements(
const HomologyContainer &homologies,
const SerializablePotential::ParticleTypes_t &types)
{
ASSERT( !types.empty(),
"getFirstGraphwithSpecifiedElements() - charges is empty?");
// create charges
Fragment::charges_t charges;
charges.resize(types.size());
std::transform(types.begin(), types.end(),
charges.begin(), boost::lambda::_1);
// convert into count map
Extractors::elementcounts_t counts_per_charge =
Extractors::_detail::getElementCounts(charges);
ASSERT( !counts_per_charge.empty(),
"getFirstGraphwithSpecifiedElements() - charge counts are empty?");
LOG(1, "DEBUG: counts_per_charge is " << counts_per_charge << ".");
// we want to check each (unique) key only once
HomologyContainer::const_key_iterator olditer = homologies.key_end();
for (HomologyContainer::const_key_iterator iter =
homologies.key_begin(); iter != homologies.key_end();
iter = homologies.getNextKey(iter)) {
// if it's the same as the old one, skip it
if (olditer == iter)
continue;
else
olditer = iter;
// check whether we have the same set of atomic numbers
const HomologyGraph::nodes_t &nodes = (*iter).getNodes();
Extractors::elementcounts_t nodes_counts_per_charge;
for (HomologyGraph::nodes_t::const_iterator nodeiter = nodes.begin();
nodeiter != nodes.end(); ++nodeiter) {
const Extractors::element_t elem = nodeiter->first.getAtomicNumber();
const std::pair inserter =
nodes_counts_per_charge.insert( std::make_pair(elem, (Extractors::count_t)nodeiter->second ) );
if (!inserter.second)
inserter.first->second += (Extractors::count_t)nodeiter->second;
}
LOG(1, "DEBUG: Node (" << *iter << ")'s counts_per_charge is " << nodes_counts_per_charge << ".");
if (counts_per_charge == nodes_counts_per_charge)
return *iter;
}
return HomologyGraph();
}
SerializablePotential::ParticleTypes_t PotentialTrainer::getNumbersFromElements(
const std::vector &fragment)
{
SerializablePotential::ParticleTypes_t fragmentnumbers;
std::transform(fragment.begin(), fragment.end(), std::back_inserter(fragmentnumbers),
boost::bind(&element::getAtomicNumber, _1));
return fragmentnumbers;
}