source: src/FunctionApproximation/Extractors.hpp@ a844d8

Candidate_v1.6.1
Last change on this file since a844d8 was f5ea10, checked in by Frederik Heber <frederik.heber@…>, 7 years ago

Added Graph6Reader, extended BoostGraphCreator, added ChemicalSpaceEvaluatorAction.

  • added visible generateAllInducedSubgraphs to Extractors.
  • TESTS: due to new option "graph6" containing a digit we needed to modify moltest_check.py to also scan for digits and not just letters.
  • DOCU: Added evaluate-chemical-space to userguide.
  • Property mode set to 100644
File size: 8.7 KB
Line 
1/*
2 * Extractors.hpp
3 *
4 * Created on: 15.10.2012
5 * Author: heber
6 */
7
8#ifndef TRAININGDATA_EXTRACTORS_HPP_
9#define TRAININGDATA_EXTRACTORS_HPP_
10
11// include config.h
12#ifdef HAVE_CONFIG_H
13#include <config.h>
14#endif
15
16#include <boost/bimap.hpp>
17#include <boost/bimap/set_of.hpp>
18#include <boost/bimap/multiset_of.hpp>
19#include <boost/graph/adjacency_list.hpp>
20#include <boost/graph/breadth_first_search.hpp>
21#include <boost/graph/subgraph.hpp>
22#include <boost/function.hpp>
23
24#include <map>
25#include <set>
26
27#include "Fragmentation/EdgesPerFragment.hpp"
28#include "Fragmentation/Summation/SetValues/Fragment.hpp"
29#include "FunctionApproximation/FunctionModel.hpp"
30
31class BindingModel;
32class Fragment;
33class HomologyGraph;
34
35/** Namespace containing all simple extractor functions.
36 *
37 * Extractor functions extract distances from a given fragment matching with
38 * a given set of particle types (i.e. elements, e.h. H2O).
39 * Filter functions extract a subset of distances from a given set of distances
40 * to be used with a specific model.
41 *
42 * To this end, each FunctionModel has both a filter and an extractor function.
43 *
44 * The functions in this namespace act as helpers or basic building blocks in
45 * constructing such filters and extractors.
46 *
47 */
48namespace Extractors {
49 typedef Fragment::charges_t::const_iterator chargeiter_t;
50 typedef std::vector<chargeiter_t> chargeiters_t;
51
52 typedef size_t count_t;
53 typedef Fragment::atomicNumber_t element_t;
54 typedef std::map< element_t, count_t> elementcounts_t;
55 typedef std::map< element_t, chargeiters_t > elementtargets_t;
56 typedef std::vector< chargeiters_t > targets_per_combination_t;
57 //!> typedef for particle designation
58 typedef unsigned int ParticleType_t;
59 //!> typedef for a vector of particle designations
60 typedef std::vector<ParticleType_t> ParticleTypes_t;
61
62 typedef size_t level_t;
63 typedef size_t node_t;
64 typedef std::multimap< level_t, node_t > nodes_per_level_t;
65 typedef std::set<node_t> nodes_t;
66 typedef std::set<nodes_t> set_of_nodes_t;
67
68 typedef boost::bimap<
69 boost::bimaps::set_of< size_t >,
70 boost::bimaps::multiset_of< Extractors::ParticleType_t >
71 > type_index_lookup_t;
72
73 typedef std::set<node_t> set_type;
74 typedef std::set<set_type> powerset_type;
75
76 typedef boost::adjacency_list < boost::vecS, boost::vecS, boost::undirectedS,
77 boost::property<boost::vertex_name_t, atomId_t>,
78 boost::property<boost::vertex_color_t, boost::default_color_type> /* needed for limited-depth DFS,
79 otherwise the property_map gets full size of graph */
80 > UndirectedGraph;
81 typedef boost::subgraph< UndirectedGraph > UndirectedSubgraph;
82
83 typedef boost::property_map < UndirectedGraph, boost::vertex_index_t >::type index_map_t;
84
85 typedef std::map< node_t, std::pair<Extractors::ParticleType_t, size_t> > node_FragmentNode_map_t;
86
87 typedef std::map< argument_t::indices_t, size_t> argument_placement_map_t;
88
89 typedef std::map<size_t, size_t> argindex_to_nodeindex_t;
90
91 /**
92 * I have no idea why this is so complicated with BGL ...
93 *
94 * This is taken from the book "The Boost Graph Library: User Guide and Reference Manual, Portable Documents",
95 * chapter "Basic Graph Algorithms", example on calculating the bacon number.
96 */
97 template <typename DistanceMap>
98 class distance_recorder : public boost::default_bfs_visitor
99 {
100 public:
101 distance_recorder(DistanceMap dist) : d(dist) {}
102
103 template <typename Edge, typename Graph>
104 void tree_edge(Edge e, const Graph &g) const {
105 typename boost::graph_traits<Graph>::vertex_descriptor u = source(e,g), v = target(e,g);
106 d[v] = d[u] + 1;
107 }
108
109 private:
110 DistanceMap d;
111 };
112
113 template <typename DistanceMap>
114 distance_recorder<DistanceMap> record_distance(DistanceMap d)
115 {
116 return distance_recorder<DistanceMap>(d);
117 }
118
119 HomologyGraph createHomologyGraphFromNodes(
120 const nodes_t &nodes,
121 const type_index_lookup_t &type_index_lookup,
122 const UndirectedGraph &graph,
123 const index_map_t &index_map
124 );
125
126 void generateAllInducedConnectedSubgraphs(
127 const size_t N,
128 const level_t level,
129 const nodes_t &nodes,
130 set_of_nodes_t &set_of_nodes,
131 const nodes_per_level_t &nodes_per_level,
132 const UndirectedGraph &graph,
133 const std::vector<size_t> &_distance,
134 const index_map_t &index_map);
135
136 /** Namespace for some internal helper functions.
137 *
138 */
139 namespace _detail {
140
141 /** Counts all same elements in the vector and places into map of elements.
142 *
143 * \param elements vector of elements
144 * \return count of same element in vector
145 */
146 elementcounts_t getElementCounts(
147 const Fragment::atomicnumbers_t elements
148 );
149
150 }
151
152 /** Gather all distances from a given set of positions.
153 *
154 * Here, we only return one of the two equal distances.
155 *
156 * \param positions all nuclei positions
157 * \param atomicNumber all nuclei atomic numbers
158 * \param edges edges of the fragment's bond graph
159 * \param globalid index to associated in argument_t with
160 * \return vector of argument_ , each with a distance
161 */
162 FunctionModel::arguments_t
163 gatherAllSymmetricDistanceArguments(
164 const Fragment::positions_t& positions,
165 const Fragment::atomicnumbers_t& atomicnumbers,
166 const FragmentationEdges::edges_t &edges,
167 const size_t globalid);
168
169 /** Simple extractor of all unique pair distances of a given \a fragment, where
170 * the first index is less than the second one.
171 *
172 * \param positions all nuclei positions
173 * \param atomicNumber all nuclei atomic numbers
174 * \param edges edges of the fragment's bond graph
175 * \param index index refers to the index within the global set of configurations
176 * \return vector of of argument_t containing all found distances
177 */
178 inline FunctionModel::arguments_t gatherAllSymmetricDistances(
179 const Fragment::positions_t& positions,
180 const Fragment::atomicnumbers_t& atomicnumbers,
181 const FragmentationEdges::edges_t &edges,
182 const size_t index
183 ) {
184 // get distance out of Fragment
185 return gatherAllSymmetricDistanceArguments(positions, atomicnumbers, edges, index);
186 }
187
188 /** Filter the arguments to select only these required by the model.
189 *
190 * \warning this is meant as a faster way of getting the arguments for simple
191 * pair potentials. In any other case, one should use filterArgumentsByBindingModel()
192 *
193 * \param listargs list of arguments to reorder each
194 * \param _graph contains binding model of graph
195 * \param _types particle type vector
196 * \return reordered args
197 */
198 FunctionModel::list_of_arguments_t filterArgumentsByParticleTypes(
199 const FunctionModel::arguments_t &args,
200 const HomologyGraph &_graph,
201 const ParticleTypes_t &_types,
202 const BindingModel &_bindingmodel
203 );
204
205 /** Filter and reorder the arguments to bring adjacent ones together.
206 *
207 * We need to find all matching subgraphs (given by \a _bindingmodel) in the
208 * given homology graph (given by \a _graph) of the fragment molecule.
209 * This means filtering down to the desired particle types and then find
210 * all possible matching subgraphs in each of argument lists, \a eachargs.
211 *
212 * \param listargs list of arguments to filter and order appropriately
213 * \param _graph contains binding model of graph
214 * \param _types particle type vector
215 * \return reordered args
216 */
217 FunctionModel::list_of_arguments_t filterArgumentsByBindingModel(
218 const FunctionModel::arguments_t &args,
219 const HomologyGraph &_graph,
220 const ParticleTypes_t &_types,
221 const BindingModel &_bindingmodel
222 );
223
224 /** Combines two argument lists by sorting and making unique.
225 *
226 * @param firstargs first list of arguments
227 * @param secondargs second list of arguments
228 * @return concatenated lists
229 */
230 FunctionModel::arguments_t combineArguments(
231 const FunctionModel::arguments_t &firstargs,
232 const FunctionModel::arguments_t &secondargs);
233
234 /** Combines two argument lists by concatenation.
235 *
236 * @param firstargs first list of arguments
237 * @param secondargs second list of arguments
238 * @return concatenated lists
239 */
240 FunctionModel::arguments_t concatenateArguments(
241 const FunctionModel::arguments_t &firstargs,
242 const FunctionModel::arguments_t &secondargs);
243
244 /** Combines two argument lists by concatenation.
245 *
246 * @param firstlistargs first list of argument tuples
247 * @param secondlistargs second list of argument tuples
248 * @return concatenated lists
249 */
250 FunctionModel::list_of_arguments_t concatenateListOfArguments(
251 const FunctionModel::list_of_arguments_t &firstlistargs,
252 const FunctionModel::list_of_arguments_t &secondlistargs);
253
254}; /* namespace Extractors */
255
256
257#endif /* TRAININGDATA_EXTRACTORS_HPP_ */
Note: See TracBrowser for help on using the repository browser.