| 1 | /*
 | 
|---|
| 2 |  * Project: MoleCuilder
 | 
|---|
| 3 |  * Description: creates and alters molecular systems
 | 
|---|
| 4 |  * Copyright (C)  2013 University of Bonn. All rights reserved.
 | 
|---|
| 5 |  * Copyright (C)  2013 Frederik Heber. All rights reserved.
 | 
|---|
| 6 |  * 
 | 
|---|
| 7 |  *
 | 
|---|
| 8 |  *   This file is part of MoleCuilder.
 | 
|---|
| 9 |  *
 | 
|---|
| 10 |  *    MoleCuilder is free software: you can redistribute it and/or modify
 | 
|---|
| 11 |  *    it under the terms of the GNU General Public License as published by
 | 
|---|
| 12 |  *    the Free Software Foundation, either version 2 of the License, or
 | 
|---|
| 13 |  *    (at your option) any later version.
 | 
|---|
| 14 |  *
 | 
|---|
| 15 |  *    MoleCuilder is distributed in the hope that it will be useful,
 | 
|---|
| 16 |  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
| 17 |  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
| 18 |  *    GNU General Public License for more details.
 | 
|---|
| 19 |  *
 | 
|---|
| 20 |  *    You should have received a copy of the GNU General Public License
 | 
|---|
| 21 |  *    along with MoleCuilder.  If not, see <http://www.gnu.org/licenses/>.
 | 
|---|
| 22 |  */
 | 
|---|
| 23 | 
 | 
|---|
| 24 | /*
 | 
|---|
| 25 |  * FitPotentialAction.cpp
 | 
|---|
| 26 |  *
 | 
|---|
| 27 |  *  Created on: Apr 09, 2013
 | 
|---|
| 28 |  *      Author: heber
 | 
|---|
| 29 |  */
 | 
|---|
| 30 | 
 | 
|---|
| 31 | // include config.h
 | 
|---|
| 32 | #ifdef HAVE_CONFIG_H
 | 
|---|
| 33 | #include <config.h>
 | 
|---|
| 34 | #endif
 | 
|---|
| 35 | 
 | 
|---|
| 36 | // needs to come before MemDebug due to placement new
 | 
|---|
| 37 | #include <boost/archive/text_iarchive.hpp>
 | 
|---|
| 38 | 
 | 
|---|
| 39 | #include "CodePatterns/MemDebug.hpp"
 | 
|---|
| 40 | 
 | 
|---|
| 41 | #include <algorithm>
 | 
|---|
| 42 | #include <boost/bind.hpp>
 | 
|---|
| 43 | #include <boost/filesystem.hpp>
 | 
|---|
| 44 | #include <boost/foreach.hpp>
 | 
|---|
| 45 | #include <map>
 | 
|---|
| 46 | #include <string>
 | 
|---|
| 47 | #include <sstream>
 | 
|---|
| 48 | 
 | 
|---|
| 49 | #include "Actions/FragmentationAction/FitPotentialAction.hpp"
 | 
|---|
| 50 | 
 | 
|---|
| 51 | #include "CodePatterns/Log.hpp"
 | 
|---|
| 52 | 
 | 
|---|
| 53 | #include "Element/element.hpp"
 | 
|---|
| 54 | #include "Fragmentation/Homology/HomologyContainer.hpp"
 | 
|---|
| 55 | #include "Fragmentation/Homology/HomologyGraph.hpp"
 | 
|---|
| 56 | #include "Fragmentation/Summation/SetValues/Fragment.hpp"
 | 
|---|
| 57 | #include "FunctionApproximation/Extractors.hpp"
 | 
|---|
| 58 | #include "FunctionApproximation/FunctionApproximation.hpp"
 | 
|---|
| 59 | #include "FunctionApproximation/FunctionModel.hpp"
 | 
|---|
| 60 | #include "FunctionApproximation/TrainingData.hpp"
 | 
|---|
| 61 | #include "FunctionApproximation/writeDistanceEnergyTable.hpp"
 | 
|---|
| 62 | #include "Potentials/PotentialFactory.hpp"
 | 
|---|
| 63 | #include "Potentials/SerializablePotential.hpp"
 | 
|---|
| 64 | 
 | 
|---|
| 65 | using namespace MoleCuilder;
 | 
|---|
| 66 | 
 | 
|---|
| 67 | // and construct the stuff
 | 
|---|
| 68 | #include "FitPotentialAction.def"
 | 
|---|
| 69 | #include "Action_impl_pre.hpp"
 | 
|---|
| 70 | /** =========== define the function ====================== */
 | 
|---|
| 71 | 
 | 
|---|
| 72 | HomologyGraph getFirstGraphwithSpecifiedElements(
 | 
|---|
| 73 |     const HomologyContainer &homologies,
 | 
|---|
| 74 |     const SerializablePotential::ParticleTypes_t &types)
 | 
|---|
| 75 | {
 | 
|---|
| 76 |   ASSERT( !types.empty(),
 | 
|---|
| 77 |       "getFirstGraphwithSpecifiedElements() - charges is empty?");
 | 
|---|
| 78 |   // create charges
 | 
|---|
| 79 |   Fragment::charges_t charges;
 | 
|---|
| 80 |   charges.resize(types.size());
 | 
|---|
| 81 |   std::transform(types.begin(), types.end(),
 | 
|---|
| 82 |       charges.begin(), boost::lambda::_1);
 | 
|---|
| 83 |   // convert into count map
 | 
|---|
| 84 |   Extractors::elementcounts_t counts_per_charge =
 | 
|---|
| 85 |       Extractors::_detail::getElementCounts(charges);
 | 
|---|
| 86 |   ASSERT( !counts_per_charge.empty(),
 | 
|---|
| 87 |       "getFirstGraphwithSpecifiedElements() - charge counts are empty?");
 | 
|---|
| 88 |   LOG(2, "DEBUG: counts_per_charge is " << counts_per_charge << ".");
 | 
|---|
| 89 |   // we want to check each (unique) key only once
 | 
|---|
| 90 |   HomologyContainer::const_key_iterator olditer = homologies.key_end();
 | 
|---|
| 91 |   for (HomologyContainer::const_key_iterator iter =
 | 
|---|
| 92 |       homologies.key_begin(); iter != homologies.key_end(); olditer = iter++) {
 | 
|---|
| 93 |     // if it's the same as the old one, skip it
 | 
|---|
| 94 |     if (*olditer == *iter)
 | 
|---|
| 95 |       continue;
 | 
|---|
| 96 |     // if it's a new key, check if every element has the right number of counts
 | 
|---|
| 97 |     Extractors::elementcounts_t::const_iterator countiter = counts_per_charge.begin();
 | 
|---|
| 98 |     for (; countiter != counts_per_charge.end(); ++countiter)
 | 
|---|
| 99 |       if (!(*iter).hasTimesAtomicNumber(
 | 
|---|
| 100 |           static_cast<size_t>(countiter->first),
 | 
|---|
| 101 |           static_cast<size_t>(countiter->second))
 | 
|---|
| 102 |           )
 | 
|---|
| 103 |         break;
 | 
|---|
| 104 |     if( countiter == counts_per_charge.end())
 | 
|---|
| 105 |       return *iter;
 | 
|---|
| 106 |   }
 | 
|---|
| 107 |   return HomologyGraph();
 | 
|---|
| 108 | }
 | 
|---|
| 109 | 
 | 
|---|
| 110 | Action::state_ptr FragmentationFitPotentialAction::performCall() {
 | 
|---|
| 111 |   // charges specify the potential type
 | 
|---|
| 112 |   SerializablePotential::ParticleTypes_t chargenumbers;
 | 
|---|
| 113 |   {
 | 
|---|
| 114 |     const std::vector<const element *> &charges = params.charges.get();
 | 
|---|
| 115 |     std::transform(charges.begin(), charges.end(), std::back_inserter(chargenumbers),
 | 
|---|
| 116 |         boost::bind(&element::getAtomicNumber, _1));
 | 
|---|
| 117 |   }
 | 
|---|
| 118 |   // fragment specifies the homology fragment to use
 | 
|---|
| 119 |   SerializablePotential::ParticleTypes_t fragmentnumbers;
 | 
|---|
| 120 |   {
 | 
|---|
| 121 |     const std::vector<const element *> &fragment = params.fragment.get();
 | 
|---|
| 122 |     std::transform(fragment.begin(), fragment.end(), std::back_inserter(fragmentnumbers),
 | 
|---|
| 123 |         boost::bind(&element::getAtomicNumber, _1));
 | 
|---|
| 124 |   }
 | 
|---|
| 125 | 
 | 
|---|
| 126 |   // parse homologies into container
 | 
|---|
| 127 |   HomologyContainer homologies;
 | 
|---|
| 128 |   if (boost::filesystem::exists(params.homology_file.get())) {
 | 
|---|
| 129 |     std::ifstream returnstream(params.homology_file.get().string().c_str());
 | 
|---|
| 130 |     if (returnstream.good()) {
 | 
|---|
| 131 |       boost::archive::text_iarchive ia(returnstream);
 | 
|---|
| 132 |       ia >> homologies;
 | 
|---|
| 133 |     } else {
 | 
|---|
| 134 |       ELOG(0, "Failed to parse from " << params.homology_file.get().string() << ".");
 | 
|---|
| 135 |       return Action::failure;
 | 
|---|
| 136 |     }
 | 
|---|
| 137 |     returnstream.close();
 | 
|---|
| 138 |   } else {
 | 
|---|
| 139 |     ELOG(0, params.homology_file.get() << " does not exist.");
 | 
|---|
| 140 |     return Action::failure;
 | 
|---|
| 141 |   }
 | 
|---|
| 142 | 
 | 
|---|
| 143 |   // first we try to look into the HomologyContainer
 | 
|---|
| 144 |   LOG(1, "INFO: Listing all present homologies ...");
 | 
|---|
| 145 |   for (HomologyContainer::container_t::const_iterator iter =
 | 
|---|
| 146 |       homologies.begin(); iter != homologies.end(); ++iter) {
 | 
|---|
| 147 |     LOG(1, "INFO: graph " << iter->first << " has Fragment " << iter->second.first
 | 
|---|
| 148 |         << " and associated energy " << iter->second.second << ".");
 | 
|---|
| 149 |   }
 | 
|---|
| 150 | 
 | 
|---|
| 151 |   LOG(0, "STATUS: I'm training now a " << params.potentialtype.get() << " potential on charges "
 | 
|---|
| 152 |       << chargenumbers << " on data from " << params.homology_file.get() << ".");
 | 
|---|
| 153 | 
 | 
|---|
| 154 |   /******************** TRAINING ********************/
 | 
|---|
| 155 |   // fit potential
 | 
|---|
| 156 |   FunctionModel *model =
 | 
|---|
| 157 |       PotentialFactory::getInstance().createInstance(
 | 
|---|
| 158 |           params.potentialtype.get(),
 | 
|---|
| 159 |           chargenumbers);
 | 
|---|
| 160 |   ASSERT( model != NULL,
 | 
|---|
| 161 |       "main() - model returned from PotentialFactory is NULL.");
 | 
|---|
| 162 |   FunctionModel::parameters_t bestparams(model->getParameterDimension(), 0.);
 | 
|---|
| 163 |   {
 | 
|---|
| 164 |     // then we ought to pick the right HomologyGraph ...
 | 
|---|
| 165 |     const HomologyGraph graph = getFirstGraphwithSpecifiedElements(homologies,fragmentnumbers);
 | 
|---|
| 166 |     if (graph != HomologyGraph()) {
 | 
|---|
| 167 |       LOG(1, "First representative graph containing fragment "
 | 
|---|
| 168 |           << fragmentnumbers << " is " << graph << ".");
 | 
|---|
| 169 | 
 | 
|---|
| 170 |       // Afterwards we go through all of this type and gather the distance and the energy value
 | 
|---|
| 171 |       TrainingData data(model->getFragmentSpecificExtractor());
 | 
|---|
| 172 |       data(homologies.getHomologousGraphs(graph));
 | 
|---|
| 173 | 
 | 
|---|
| 174 |       // print distances and energies if desired for debugging
 | 
|---|
| 175 |       if (!data.getTrainingInputs().empty()) {
 | 
|---|
| 176 |         // print which distance is which
 | 
|---|
| 177 |         size_t counter=1;
 | 
|---|
| 178 |         if (DoLog(3)) {
 | 
|---|
| 179 |           const FunctionModel::arguments_t &inputs = data.getTrainingInputs()[0];
 | 
|---|
| 180 |           for (FunctionModel::arguments_t::const_iterator iter = inputs.begin();
 | 
|---|
| 181 |               iter != inputs.end(); ++iter) {
 | 
|---|
| 182 |             const argument_t &arg = *iter;
 | 
|---|
| 183 |             LOG(3, "DEBUG: distance " << counter++ << " is between (#"
 | 
|---|
| 184 |                 << arg.indices.first << "c" << arg.types.first << ","
 | 
|---|
| 185 |                 << arg.indices.second << "c" << arg.types.second << ").");
 | 
|---|
| 186 |           }
 | 
|---|
| 187 |         }
 | 
|---|
| 188 | 
 | 
|---|
| 189 |         // print table
 | 
|---|
| 190 |         LOG(3, "DEBUG: I gathered the following training data:\n" <<
 | 
|---|
| 191 |             _detail::writeDistanceEnergyTable(data.getDistanceEnergyTable()));
 | 
|---|
| 192 |       }
 | 
|---|
| 193 | 
 | 
|---|
| 194 |       // now perform the function approximation by optimizing the model function
 | 
|---|
| 195 |       FunctionApproximation approximator(data, *model);
 | 
|---|
| 196 |       if (model->isBoxConstraint() && approximator.checkParameterDerivatives()) {
 | 
|---|
| 197 |         double l2error = std::numeric_limits<double>::infinity();
 | 
|---|
| 198 |         // seed with current time
 | 
|---|
| 199 |         srand((unsigned)time(0));
 | 
|---|
| 200 |         for (unsigned int runs=0; runs < params.best_of_howmany.get(); ++runs) {
 | 
|---|
| 201 |           // generate new random initial parameter values
 | 
|---|
| 202 |           model->setParametersToRandomInitialValues(data);
 | 
|---|
| 203 |           LOG(1, "INFO: Initial parameters of run " << runs << " are "
 | 
|---|
| 204 |               << model->getParameters() << ".");
 | 
|---|
| 205 |           approximator(FunctionApproximation::ParameterDerivative);
 | 
|---|
| 206 |           LOG(1, "INFO: Final parameters of run " << runs << " are "
 | 
|---|
| 207 |               << model->getParameters() << ".");
 | 
|---|
| 208 |           const double new_l2error = data.getL2Error(*model);
 | 
|---|
| 209 |           if (new_l2error < l2error) {
 | 
|---|
| 210 |             // store currently best parameters
 | 
|---|
| 211 |             l2error = new_l2error;
 | 
|---|
| 212 |             bestparams = model->getParameters();
 | 
|---|
| 213 |             LOG(1, "STATUS: New fit from run " << runs
 | 
|---|
| 214 |                 << " has better error of " << l2error << ".");
 | 
|---|
| 215 |           }
 | 
|---|
| 216 |         }
 | 
|---|
| 217 |         // reset parameters from best fit
 | 
|---|
| 218 |         model->setParameters(bestparams);
 | 
|---|
| 219 |         LOG(1, "INFO: Best parameters with L2 error of "
 | 
|---|
| 220 |             << l2error << " are " << model->getParameters() << ".");
 | 
|---|
| 221 |       } else {
 | 
|---|
| 222 |         ELOG(0, "We require parameter derivatives for a box constraint minimization.");
 | 
|---|
| 223 |         return Action::failure;
 | 
|---|
| 224 |       }
 | 
|---|
| 225 | 
 | 
|---|
| 226 |       // create a map of each fragment with error.
 | 
|---|
| 227 |       typedef std::multimap< double, size_t > WorseFragmentMap_t;
 | 
|---|
| 228 |       WorseFragmentMap_t WorseFragmentMap;
 | 
|---|
| 229 |       HomologyContainer::range_t fragmentrange = homologies.getHomologousGraphs(graph);
 | 
|---|
| 230 |       // fragments make it into the container in reversed order, hence count from top down
 | 
|---|
| 231 |       size_t index= std::distance(fragmentrange.first, fragmentrange.second)-1;
 | 
|---|
| 232 |       for (HomologyContainer::const_iterator iter = fragmentrange.first;
 | 
|---|
| 233 |           iter != fragmentrange.second;
 | 
|---|
| 234 |           ++iter) {
 | 
|---|
| 235 |         const Fragment& fragment = iter->second.first;
 | 
|---|
| 236 |         const double &energy = iter->second.second;
 | 
|---|
| 237 | 
 | 
|---|
| 238 |         // create arguments from the fragment
 | 
|---|
| 239 |         FunctionModel::extractor_t extractor = model->getFragmentSpecificExtractor();
 | 
|---|
| 240 |         FunctionModel::arguments_t args = extractor(fragment, 1);
 | 
|---|
| 241 | 
 | 
|---|
| 242 |         // calculate value from potential
 | 
|---|
| 243 |         const double fitvalue = (*model)(args)[0];
 | 
|---|
| 244 | 
 | 
|---|
| 245 |         // insert difference into map
 | 
|---|
| 246 |         const double error = fabs(energy - fitvalue);
 | 
|---|
| 247 |         WorseFragmentMap.insert( std::make_pair( error, index-- ) );
 | 
|---|
| 248 | 
 | 
|---|
| 249 |         {
 | 
|---|
| 250 |           // give only the distances in the debugging text
 | 
|---|
| 251 |           std::stringstream streamargs;
 | 
|---|
| 252 |           BOOST_FOREACH (argument_t arg, args) {
 | 
|---|
| 253 |             streamargs << " " << arg.distance*AtomicLengthToAngstroem;
 | 
|---|
| 254 |           }
 | 
|---|
| 255 |           LOG(2, "DEBUG: frag.#" << index+1 << "'s error is |" << energy << " - " << fitvalue
 | 
|---|
| 256 |               << "| = " << error << " for args " << streamargs.str() << ".");
 | 
|---|
| 257 |         }
 | 
|---|
| 258 |       }
 | 
|---|
| 259 |       LOG(0, "RESULT: WorstFragmentMap " << WorseFragmentMap << ".");
 | 
|---|
| 260 | 
 | 
|---|
| 261 |       SerializablePotential *potential = dynamic_cast<SerializablePotential *>(model);
 | 
|---|
| 262 |       if (potential != NULL) {
 | 
|---|
| 263 |         LOG(1, "STATUS: Resulting parameters are " << std::endl << *potential);
 | 
|---|
| 264 |       } else {
 | 
|---|
| 265 |         LOG(1, "INFO: FunctionModel is no serializable potential.");
 | 
|---|
| 266 |       }
 | 
|---|
| 267 |     }
 | 
|---|
| 268 |   }
 | 
|---|
| 269 |   delete model;
 | 
|---|
| 270 | 
 | 
|---|
| 271 |   return Action::success;
 | 
|---|
| 272 | }
 | 
|---|
| 273 | 
 | 
|---|
| 274 | Action::state_ptr FragmentationFitPotentialAction::performUndo(Action::state_ptr _state) {
 | 
|---|
| 275 |   return Action::success;
 | 
|---|
| 276 | }
 | 
|---|
| 277 | 
 | 
|---|
| 278 | Action::state_ptr FragmentationFitPotentialAction::performRedo(Action::state_ptr _state){
 | 
|---|
| 279 |   return Action::success;
 | 
|---|
| 280 | }
 | 
|---|
| 281 | 
 | 
|---|
| 282 | bool FragmentationFitPotentialAction::canUndo() {
 | 
|---|
| 283 |   return false;
 | 
|---|
| 284 | }
 | 
|---|
| 285 | 
 | 
|---|
| 286 | bool FragmentationFitPotentialAction::shouldUndo() {
 | 
|---|
| 287 |   return false;
 | 
|---|
| 288 | }
 | 
|---|
| 289 | /** =========== end of function ====================== */
 | 
|---|