source: src/Parser/PdbParser.cpp@ 3867a7

Action_Thermostats Add_AtomRandomPerturbation Add_FitFragmentPartialChargesAction Add_RotateAroundBondAction Add_SelectAtomByNameAction Added_ParseSaveFragmentResults AddingActions_SaveParseParticleParameters Adding_Graph_to_ChangeBondActions Adding_MD_integration_tests Adding_ParticleName_to_Atom Adding_StructOpt_integration_tests AtomFragments Automaking_mpqc_open AutomationFragmentation_failures Candidate_v1.5.4 Candidate_v1.6.0 Candidate_v1.6.1 ChangeBugEmailaddress ChangingTestPorts ChemicalSpaceEvaluator CombiningParticlePotentialParsing Combining_Subpackages Debian_Package_split Debian_package_split_molecuildergui_only Disabling_MemDebug Docu_Python_wait EmpiricalPotential_contain_HomologyGraph EmpiricalPotential_contain_HomologyGraph_documentation Enable_parallel_make_install Enhance_userguide Enhanced_StructuralOptimization Enhanced_StructuralOptimization_continued Example_ManyWaysToTranslateAtom Exclude_Hydrogens_annealWithBondGraph FitPartialCharges_GlobalError Fix_BoundInBox_CenterInBox_MoleculeActions Fix_ChargeSampling_PBC Fix_ChronosMutex Fix_FitPartialCharges Fix_FitPotential_needs_atomicnumbers Fix_ForceAnnealing Fix_IndependentFragmentGrids Fix_ParseParticles Fix_ParseParticles_split_forward_backward_Actions Fix_PopActions Fix_QtFragmentList_sorted_selection Fix_Restrictedkeyset_FragmentMolecule Fix_StatusMsg Fix_StepWorldTime_single_argument Fix_Verbose_Codepatterns Fix_fitting_potentials Fixes ForceAnnealing_goodresults ForceAnnealing_oldresults ForceAnnealing_tocheck ForceAnnealing_with_BondGraph ForceAnnealing_with_BondGraph_continued ForceAnnealing_with_BondGraph_continued_betteresults ForceAnnealing_with_BondGraph_contraction-expansion FragmentAction_writes_AtomFragments FragmentMolecule_checks_bonddegrees GeometryObjects Gui_Fixes Gui_displays_atomic_force_velocity ImplicitCharges IndependentFragmentGrids IndependentFragmentGrids_IndividualZeroInstances IndependentFragmentGrids_IntegrationTest IndependentFragmentGrids_Sole_NN_Calculation JobMarket_RobustOnKillsSegFaults JobMarket_StableWorkerPool JobMarket_unresolvable_hostname_fix MoreRobust_FragmentAutomation ODR_violation_mpqc_open PartialCharges_OrthogonalSummation PdbParser_setsAtomName PythonUI_with_named_parameters QtGui_reactivate_TimeChanged_changes Recreated_GuiChecks Rewrite_FitPartialCharges RotateToPrincipalAxisSystem_UndoRedo SaturateAtoms_findBestMatching SaturateAtoms_singleDegree StoppableMakroAction Subpackage_CodePatterns Subpackage_JobMarket Subpackage_LinearAlgebra Subpackage_levmar Subpackage_mpqc_open Subpackage_vmg Switchable_LogView ThirdParty_MPQC_rebuilt_buildsystem TrajectoryDependenant_MaxOrder TremoloParser_IncreasedPrecision TremoloParser_MultipleTimesteps TremoloParser_setsAtomName Ubuntu_1604_changes stable
Last change on this file since 3867a7 was 8bf9c6, checked in by Frederik Heber <heber@…>, 13 years ago

All additionalAtomData maps now have const atomId_t as key.

  • Property mode set to 100644
File size: 31.7 KB
Line 
1/*
2 * Project: MoleCuilder
3 * Description: creates and alters molecular systems
4 * Copyright (C) 2010-2012 University of Bonn. All rights reserved.
5 * Please see the LICENSE file or "Copyright notice" in builder.cpp for details.
6 */
7
8/*
9 * PdbParser.cpp
10 *
11 * Created on: Aug 17, 2010
12 * Author: heber
13 */
14
15// include config.h
16#ifdef HAVE_CONFIG_H
17#include <config.h>
18#endif
19
20#include "CodePatterns/MemDebug.hpp"
21
22#include "CodePatterns/Assert.hpp"
23#include "CodePatterns/Log.hpp"
24#include "CodePatterns/toString.hpp"
25#include "CodePatterns/Verbose.hpp"
26
27#include "Atom/atom.hpp"
28#include "Bond/bond.hpp"
29#include "Descriptors/AtomIdDescriptor.hpp"
30#include "Element/element.hpp"
31#include "Element/periodentafel.hpp"
32#include "molecule.hpp"
33#include "MoleculeListClass.hpp"
34#include "Parser/PdbParser.hpp"
35#include "World.hpp"
36#include "WorldTime.hpp"
37
38#include <map>
39#include <vector>
40
41#include <iostream>
42#include <iomanip>
43
44using namespace std;
45
46// declare specialized static variables
47const std::string FormatParserTrait<pdb>::name = "pdb";
48const std::string FormatParserTrait<pdb>::suffix = "pdb";
49const ParserTypes FormatParserTrait<pdb>::type = pdb;
50
51/**
52 * Constructor.
53 */
54FormatParser< pdb >::FormatParser() :
55 FormatParser_common(NULL)
56{
57 knownTokens["ATOM"] = PdbKey::Atom;
58 knownTokens["HETATM"] = PdbKey::Atom;
59 knownTokens["TER"] = PdbKey::Filler;
60 knownTokens["END"] = PdbKey::EndOfTimestep;
61 knownTokens["CONECT"] = PdbKey::Connect;
62 knownTokens["REMARK"] = PdbKey::Remark;
63 knownTokens[""] = PdbKey::EndOfTimestep;
64
65 // argh, why can't just PdbKey::X+(size_t)i
66 PositionEnumMap[0] = PdbKey::X;
67 PositionEnumMap[1] = PdbKey::Y;
68 PositionEnumMap[2] = PdbKey::Z;
69}
70
71/**
72 * Destructor.
73 */
74FormatParser< pdb >::~FormatParser()
75{
76 PdbAtomInfoContainer::clearknownDataKeys();
77 additionalAtomData.clear();
78}
79
80
81/** Parses the initial word of the given \a line and returns the token type.
82 *
83 * @param line line to scan
84 * @return token type
85 */
86enum PdbKey::KnownTokens FormatParser< pdb >::getToken(string &line)
87{
88 // look for first space
89 const size_t space_location = line.find(' ');
90 const size_t tab_location = line.find('\t');
91 size_t location = space_location < tab_location ? space_location : tab_location;
92 string token;
93 if (location != string::npos) {
94 //LOG(1, "Found space at position " << space_location);
95 token = line.substr(0,space_location);
96 } else {
97 token = line;
98 }
99
100 //LOG(1, "Token is " << token);
101 if (knownTokens.count(token) == 0)
102 return PdbKey::NoToken;
103 else
104 return knownTokens[token];
105
106 return PdbKey::NoToken;
107}
108
109/**
110 * Loads atoms from a PDB-formatted file.
111 *
112 * \param PDB file
113 */
114void FormatParser< pdb >::load(istream* file) {
115 string line;
116 size_t linecount = 0;
117 enum PdbKey::KnownTokens token;
118
119 // reset id maps for this file (to correctly parse CONECT entries)
120 resetIdAssociations();
121
122 bool NotEndOfFile = true;
123 molecule *newmol = World::getInstance().createMolecule();
124 newmol->ActiveFlag = true;
125 unsigned int step = 0;
126 // TODO: Remove the insertion into molecule when saving does not depend on them anymore. Also, remove molecule.hpp include
127 World::getInstance().getMolecules()->insert(newmol);
128 while (NotEndOfFile) {
129 bool NotEndOfTimestep = true;
130 while (NotEndOfTimestep && NotEndOfFile) {
131 std::getline(*file, line, '\n');
132 if (!line.empty()) {
133 // extract first token
134 token = getToken(line);
135 switch (token) {
136 case PdbKey::Atom:
137 LOG(3,"INFO: Parsing ATOM entry for time step " << step << ".");
138 readAtomDataLine(step, line, newmol);
139 break;
140 case PdbKey::Remark:
141 LOG(3,"INFO: Parsing REM entry for time step " << step << ".");
142 break;
143 case PdbKey::Connect:
144 LOG(3,"INFO: Parsing CONECT entry for time step " << step << ".");
145 readNeighbors(step, line);
146 break;
147 case PdbKey::Filler:
148 LOG(3,"INFO: Stumbled upon Filler entry for time step " << step << ".");
149 break;
150 case PdbKey::EndOfTimestep:
151 LOG(3,"INFO: Parsing END entry or empty line for time step " << step << ".");
152 NotEndOfTimestep = false;
153 break;
154 default:
155 // TODO: put a throw here
156 ELOG(2, "Unknown token: '" << line << "' for time step " << step << ".");
157 //ASSERT(0, "FormatParser< pdb >::load() - Unknown token in line "+toString(linecount)+": "+line+".");
158 break;
159 }
160 }
161 NotEndOfFile = NotEndOfFile && (file->good());
162 linecount++;
163 }
164 ++step;
165 }
166 BOOST_FOREACH(atom *_atom, World::getInstance().getAllAtoms())
167 LOG(4, "INFO: Atom " << _atom->getName() << " " << *dynamic_cast<AtomInfo *>(_atom) << ".");
168
169 // refresh atom::nr and atom::name
170 newmol->getAtomCount();
171}
172
173/**
174 * Saves the \a atoms into as a PDB file.
175 *
176 * \param file where to save the state
177 * \param atoms atoms to store
178 */
179void FormatParser< pdb >::save(ostream* file, const std::vector<atom *> &AtomList)
180{
181 LOG(0, "Saving changes to pdb.");
182
183 // check for maximum number of time steps
184 size_t max_timesteps = 0;
185 BOOST_FOREACH(atom *_atom, World::getInstance().getAllAtoms()) {
186 LOG(4, "INFO: Atom " << _atom->getName() << " " << *dynamic_cast<AtomInfo *>(_atom) << ".");
187 if (_atom->getTrajectorySize() > max_timesteps)
188 max_timesteps = _atom->getTrajectorySize();
189 }
190 LOG(2,"INFO: Found a maximum of " << max_timesteps << " time steps to store.");
191
192 // re-distribute serials
193 resetIdAssociations();
194 // (new atoms might have been added)
195 int AtomNo = 1; // serial number starts at 1 in pdb
196 for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
197 PdbAtomInfoContainer &atomInfo = getadditionalAtomData(*atomIt);
198 associateLocaltoGlobalId(AtomNo, (*atomIt)->getId());
199 atomInfo.set(PdbKey::serial, toString(AtomNo));
200 ++AtomNo;
201 }
202
203 // store all time steps (always do first step)
204 for (size_t step = 0; (step == 0) || (step < max_timesteps); ++step) {
205 {
206 // add initial remark
207 *file << "REMARK created by molecuilder on ";
208 time_t now = time((time_t *)NULL); // Get the system time and put it into 'now' as 'calender time'
209 // ctime ends in \n\0, we have to cut away the newline
210 std::string time(ctime(&now));
211 size_t pos = time.find('\n');
212 if (pos != 0)
213 *file << time.substr(0,pos);
214 else
215 *file << time;
216 *file << ", time step " << step;
217 *file << endl;
218 }
219
220 {
221 std::map<size_t,size_t> MolIdMap;
222 size_t MolNo = 1; // residue number starts at 1 in pdb
223 for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
224 const molecule *mol = (*atomIt)->getMolecule();
225 if ((mol != NULL) && (MolIdMap.find(mol->getId()) == MolIdMap.end())) {
226 MolIdMap[mol->getId()] = MolNo++;
227 }
228 }
229 const size_t MaxMol = MolNo;
230
231 // have a count per element and per molecule (0 is for all homeless atoms)
232 std::vector<int> **elementNo = new std::vector<int>*[MaxMol];
233 for (size_t i = 0; i < MaxMol; ++i)
234 elementNo[i] = new std::vector<int>(MAX_ELEMENTS,1);
235 char name[MAXSTRINGSIZE];
236 std::string ResidueName;
237
238 // write ATOMs
239 for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
240 PdbAtomInfoContainer &atomInfo = getadditionalAtomData(*atomIt);
241 // gather info about residue
242 const molecule *mol = (*atomIt)->getMolecule();
243 if (mol == NULL) {
244 MolNo = 0;
245 atomInfo.set(PdbKey::resSeq, "0");
246 } else {
247 ASSERT(MolIdMap.find(mol->getId()) != MolIdMap.end(),
248 "FormatParser< pdb >::save() - Mol id "+toString(mol->getId())+" not present despite we set it?!");
249 MolNo = MolIdMap[mol->getId()];
250 atomInfo.set(PdbKey::resSeq, toString(MolIdMap[mol->getId()]));
251 if (atomInfo.get<std::string>(PdbKey::resName) == "-")
252 atomInfo.set(PdbKey::resName, mol->getName().substr(0,3));
253 }
254 // get info about atom
255 const size_t Z = (*atomIt)->getType()->getAtomicNumber();
256 if (atomInfo.get<std::string>(PdbKey::name) == "-") { // if no name set, give it a new name
257 sprintf(name, "%2s%02d",(*atomIt)->getType()->getSymbol().c_str(), (*elementNo[MolNo])[Z]);
258 (*elementNo[MolNo])[Z] = ((*elementNo[MolNo])[Z]+1) % 100; // confine to two digits
259 atomInfo.set(PdbKey::name, name);
260 }
261 // set position
262 for (size_t i=0; i<NDIM;++i) {
263 stringstream position;
264 position << setw(8) << fixed << setprecision(3) << (*atomIt)->getPositionAtStep(step).at(i);
265 atomInfo.set(PositionEnumMap[i], position.str());
266 }
267 // change element and charge if changed
268 if (atomInfo.get<std::string>(PdbKey::element) != (*atomIt)->getType()->getSymbol()) {
269 std::string symbol = (*atomIt)->getType()->getSymbol();
270 if ((symbol[1] >= 'a') && (symbol[1] <= 'z'))
271 symbol[1] = (symbol[1] - 'a') + 'A';
272 atomInfo.set(PdbKey::element, symbol);
273 }
274
275 // finally save the line
276 saveLine(file, atomInfo);
277 }
278 for (size_t i = 0; i < MaxMol; ++i)
279 delete elementNo[i];
280 delete elementNo;
281
282 // write CONECTs
283 for (vector<atom *>::const_iterator atomIt = AtomList.begin(); atomIt != AtomList.end(); atomIt++) {
284 writeNeighbors(file, 4, *atomIt);
285 }
286 }
287 // END
288 *file << "END" << endl;
289 }
290
291}
292
293/** Add default info, when new atom is added to World.
294 *
295 * @param id of atom
296 */
297void FormatParser< pdb >::AtomInserted(atomId_t id)
298{
299 //LOG(3, "FormatParser< pdb >::AtomInserted() - notified of atom " << id << "'s insertion.");
300 ASSERT(!isPresentadditionalAtomData(id),
301 "FormatParser< pdb >::AtomInserted() - additionalAtomData already present for newly added atom "
302 +toString(id)+".");
303 // don't insert here as this is our check whether we are in the first time step
304 //additionalAtomData.insert( std::make_pair(id, defaultAdditionalData) );
305}
306
307/** Remove additional AtomData info, when atom has been removed from World.
308 *
309 * @param id of atom
310 */
311void FormatParser< pdb >::AtomRemoved(atomId_t id)
312{
313 //LOG(3, "FormatParser< pdb >::AtomRemoved() - notified of atom " << id << "'s removal.");
314 std::map<const atomId_t, PdbAtomInfoContainer>::iterator iter = additionalAtomData.find(id);
315 // as we do not insert AtomData on AtomInserted, we cannot be assured of its presence
316// ASSERT(iter != additionalAtomData.end(),
317// "FormatParser< pdb >::AtomRemoved() - additionalAtomData is not present for atom "
318// +toString(id)+" to remove.");
319 if (iter != additionalAtomData.end()) {
320 additionalAtomData.erase(iter);
321 }
322}
323
324
325/** Checks whether there is an entry for the given atom's \a _id.
326 *
327 * @param _id atom's id we wish to check on
328 * @return true - entry present, false - only for atom's father or no entry
329 */
330bool FormatParser< pdb >::isPresentadditionalAtomData(const atomId_t _id) const
331{
332 std::map<const atomId_t, PdbAtomInfoContainer>::const_iterator iter = additionalAtomData.find(_id);
333 return (iter != additionalAtomData.end());
334}
335
336
337/** Either returns reference to present entry or creates new with default values.
338 *
339 * @param _atom atom whose entry we desire
340 * @return
341 */
342PdbAtomInfoContainer& FormatParser< pdb >::getadditionalAtomData(atom *_atom)
343{
344 if (additionalAtomData.find(_atom->getId()) != additionalAtomData.end()) {
345 } else if (additionalAtomData.find(_atom->father->getId()) != additionalAtomData.end()) {
346 // use info from direct father
347 additionalAtomData[_atom->getId()] = additionalAtomData[_atom->father->getId()];
348 } else if (additionalAtomData.find(_atom->GetTrueFather()->getId()) != additionalAtomData.end()) {
349 // use info from topmost father
350 additionalAtomData[_atom->getId()] = additionalAtomData[_atom->GetTrueFather()->getId()];
351 } else {
352 // create new entry use default values if nothing else is known
353 additionalAtomData[_atom->getId()] = defaultAdditionalData;
354 }
355 return additionalAtomData[_atom->getId()];
356}
357
358/**
359 * Writes one line of PDB-formatted data to the provided stream.
360 *
361 * \param stream where to write the line to
362 * \param *currentAtom the atom of which information should be written
363 * \param AtomNo serial number of atom
364 * \param *name name of atom, i.e. H01
365 * \param ResidueName Name of molecule
366 * \param ResidueNo number of residue
367 */
368void FormatParser< pdb >::saveLine(
369 ostream* file,
370 const PdbAtomInfoContainer &atomInfo)
371{
372 *file << setfill(' ') << left << setw(6)
373 << atomInfo.get<std::string>(PdbKey::token);
374 *file << setfill(' ') << right << setw(5)
375 << atomInfo.get<int>(PdbKey::serial); /* atom serial number */
376 *file << " "; /* char 12 is empty */
377 *file << setfill(' ') << left << setw(4)
378 << atomInfo.get<std::string>(PdbKey::name); /* atom name */
379 *file << setfill(' ') << left << setw(1)
380 << atomInfo.get<std::string>(PdbKey::altLoc); /* alternate location/conformation */
381 *file << setfill(' ') << left << setw(3)
382 << atomInfo.get<std::string>(PdbKey::resName); /* residue name */
383 *file << " "; /* char 21 is empty */
384 *file << setfill(' ') << left << setw(1)
385 << atomInfo.get<std::string>(PdbKey::chainID); /* chain identifier */
386 *file << setfill(' ') << left << setw(4)
387 << atomInfo.get<int>(PdbKey::resSeq); /* residue sequence number */
388 *file << setfill(' ') << left << setw(1)
389 << atomInfo.get<std::string>(PdbKey::iCode); /* iCode */
390 *file << " "; /* char 28-30 are empty */
391 // have the following operate on stringstreams such that format specifiers
392 // only act on these
393 for (size_t i=0;i<NDIM;++i) {
394 stringstream position;
395 position << fixed << setprecision(3) << showpoint
396 << atomInfo.get<double>(PositionEnumMap[i]);
397 *file << setfill(' ') << right << setw(8) << position.str();
398 }
399 {
400 stringstream occupancy;
401 occupancy << fixed << setprecision(2) << showpoint
402 << atomInfo.get<double>(PdbKey::occupancy); /* occupancy */
403 *file << setfill(' ') << right << setw(6) << occupancy.str();
404 }
405 {
406 stringstream tempFactor;
407 tempFactor << fixed << setprecision(2) << showpoint
408 << atomInfo.get<double>(PdbKey::tempFactor); /* temperature factor */
409 *file << setfill(' ') << right << setw(6) << tempFactor.str();
410 }
411 *file << " "; /* char 68-76 are empty */
412 *file << setfill(' ') << right << setw(2) << atomInfo.get<std::string>(PdbKey::element); /* element */
413 *file << setfill(' ') << right << setw(2) << atomInfo.get<int>(PdbKey::charge); /* charge */
414
415 *file << endl;
416}
417
418/**
419 * Writes the neighbor information of one atom to the provided stream.
420 *
421 * Note that ListOfBonds of WorldTime::CurrentTime is used.
422 *
423 * Also, we fill up the CONECT line to extend over 80 chars.
424 *
425 * \param *file where to write neighbor information to
426 * \param MaxnumberOfNeighbors of neighbors
427 * \param *currentAtom to the atom of which to take the neighbor information
428 */
429void FormatParser< pdb >::writeNeighbors(ostream* file, int MaxnumberOfNeighbors, atom* currentAtom) {
430 int MaxNo = MaxnumberOfNeighbors;
431 int charsleft = 80;
432 const BondList & ListOfBonds = currentAtom->getListOfBonds();
433 if (!ListOfBonds.empty()) {
434 for(BondList::const_iterator currentBond = ListOfBonds.begin(); currentBond != ListOfBonds.end(); ++currentBond) {
435 if (MaxNo >= MaxnumberOfNeighbors) {
436 *file << "CONECT";
437 *file << setw(5) << getLocalId(currentAtom->getId());
438 charsleft = 80-6-5;
439 MaxNo = 0;
440 }
441 *file << setw(5) << getLocalId((*currentBond)->GetOtherAtom(currentAtom)->getId());
442 charsleft -= 5;
443 MaxNo++;
444 if (MaxNo == MaxnumberOfNeighbors) {
445 for (;charsleft > 0; charsleft--)
446 *file << ' ';
447 *file << "\n";
448 }
449 }
450 if (MaxNo != MaxnumberOfNeighbors) {
451 for (;charsleft > 0; charsleft--)
452 *file << ' ';
453 *file << "\n";
454 }
455 }
456}
457
458/** Either returns present atom with given id or a newly created one.
459 *
460 * @param id_string
461 * @return
462 */
463atom* FormatParser< pdb >::getAtomToParse(std::string id_string)
464{
465 // get the local ID
466 ConvertTo<int> toInt;
467 const unsigned int AtomID_local = toInt(id_string);
468 LOG(4, "INFO: Local id is "+toString(AtomID_local)+".");
469 // get the atomic ID if present
470 atom* newAtom = NULL;
471 if (getGlobalId(AtomID_local) != -1) {
472 const unsigned int AtomID_global = getGlobalId(AtomID_local);
473 LOG(4, "INFO: Global id present as " << AtomID_global << ".");
474 // check if atom exists
475 newAtom = World::getInstance().getAtom(AtomById(AtomID_global));
476 LOG(5, "INFO: Listing all present atoms with id.");
477 BOOST_FOREACH(atom *_atom, World::getInstance().getAllAtoms())
478 LOG(5, "INFO: " << *_atom << " with id " << _atom->getId());
479 }
480 // if not exists, create
481 if (newAtom == NULL) {
482 newAtom = World::getInstance().createAtom();
483 //const unsigned int AtomID_global = newAtom->getId();
484 LOG(4, "INFO: No association to global id present, creating atom.");
485 } else {
486 LOG(4, "INFO: Existing atom found: " << *newAtom << ".");
487 }
488 return newAtom;
489}
490
491/** read a line starting with key ATOM.
492 *
493 * We check for line's length and parse only up to this value.
494 *
495 * @param atomInfo container to put information in
496 * @param line line containing key ATOM
497 */
498void FormatParser< pdb >::readPdbAtomInfoContainer(PdbAtomInfoContainer &atomInfo, std::string &line) const
499{
500 const size_t length = line.length();
501 if (length < 80)
502 ELOG(2, "FormatParser< pdb >::readPdbAtomInfoContainer() - pdb is mal-formed, containing less than 80 chars!");
503 if (length >= 6) {
504 LOG(4,"INFO: Parsing token from "+line.substr(0,6)+".");
505 atomInfo.set(PdbKey::token, line.substr(0,6));
506 }
507 if (length >= 11) {
508 LOG(4,"INFO: Parsing serial from "+line.substr(6,5)+".");
509 atomInfo.set(PdbKey::serial, line.substr(6,5));
510 ASSERT(atomInfo.get<int>(PdbKey::serial) != 0,
511 "FormatParser< pdb >::readPdbAtomInfoContainer() - serial 0 is invalid (filler id for conect entries).");
512 }
513
514 if (length >= 16) {
515 LOG(4,"INFO: Parsing name from "+line.substr(12,4)+".");
516 atomInfo.set(PdbKey::name, line.substr(12,4));
517 }
518 if (length >= 17) {
519 LOG(4,"INFO: Parsing altLoc from "+line.substr(16,1)+".");
520 atomInfo.set(PdbKey::altLoc, line.substr(16,1));
521 }
522 if (length >= 20) {
523 LOG(4,"INFO: Parsing resName from "+line.substr(17,3)+".");
524 atomInfo.set(PdbKey::resName, line.substr(17,3));
525 }
526 if (length >= 22) {
527 LOG(4,"INFO: Parsing chainID from "+line.substr(21,1)+".");
528 atomInfo.set(PdbKey::chainID, line.substr(21,1));
529 }
530 if (length >= 26) {
531 LOG(4,"INFO: Parsing resSeq from "+line.substr(22,4)+".");
532 atomInfo.set(PdbKey::resSeq, line.substr(22,4));
533 }
534 if (length >= 27) {
535 LOG(4,"INFO: Parsing iCode from "+line.substr(26,1)+".");
536 atomInfo.set(PdbKey::iCode, line.substr(26,1));
537 }
538
539 if (length >= 60) {
540 LOG(4,"INFO: Parsing occupancy from "+line.substr(54,6)+".");
541 atomInfo.set(PdbKey::occupancy, line.substr(54,6));
542 }
543 if (length >= 66) {
544 LOG(4,"INFO: Parsing tempFactor from "+line.substr(60,6)+".");
545 atomInfo.set(PdbKey::tempFactor, line.substr(60,6));
546 }
547 if (length >= 80) {
548 LOG(4,"INFO: Parsing charge from "+line.substr(78,2)+".");
549 atomInfo.set(PdbKey::charge, line.substr(78,2));
550 }
551 if (length >= 78) {
552 LOG(4,"INFO: Parsing element from "+line.substr(76,2)+".");
553 atomInfo.set(PdbKey::element, line.substr(76,2));
554 } else {
555 LOG(4,"INFO: Trying to parse alternative element from name "+line.substr(12,4)+".");
556 atomInfo.set(PdbKey::element, line.substr(12,4));
557 }
558}
559
560/** Parse an ATOM line from a PDB file.
561 *
562 * Reads one data line of a pdstatus file and interprets it according to the
563 * specifications of the PDB 3.2 format: http://www.wwpdb.org/docs.html
564 *
565 * A new atom is created and filled with available information, non-
566 * standard information is placed in additionalAtomData at the atom's id.
567 *
568 * \param _step time step to use
569 * \param line to parse as an atom
570 * \param newmol molecule to add parsed atoms to
571 */
572void FormatParser< pdb >::readAtomDataLine(const unsigned int _step, std::string &line, molecule *newmol = NULL) {
573 vector<string>::iterator it;
574
575 atom* newAtom = getAtomToParse(line.substr(6,5));
576 LOG(3,"INFO: Parsing END entry or empty line.");
577 bool FirstTimestep = isPresentadditionalAtomData(newAtom->getId()) ? false : true;
578 ASSERT((FirstTimestep && (_step == 0)) || (!FirstTimestep && (_step !=0)),
579 "FormatParser< pdb >::readAtomDataLine() - additionalAtomData present though atom is newly parsed.");
580 if (FirstTimestep) {
581 LOG(3,"INFO: Parsing new atom "+toString(*newAtom)+" "+toString(newAtom->getId())+".");
582 } else {
583 LOG(3,"INFO: Parsing present atom "+toString(*newAtom)+".");
584 }
585 PdbAtomInfoContainer &atomInfo = getadditionalAtomData(newAtom);
586 LOG(4,"INFO: Information in container is "+toString(atomInfo)+".");
587
588 string word;
589 ConvertTo<size_t> toSize_t;
590
591 // check whether serial exists, if so, assign next available
592
593// LOG(2, "Split line:"
594// << line.substr(6,5) << "|"
595// << line.substr(12,4) << "|"
596// << line.substr(16,1) << "|"
597// << line.substr(17,3) << "|"
598// << line.substr(21,1) << "|"
599// << line.substr(22,4) << "|"
600// << line.substr(26,1) << "|"
601// << line.substr(30,8) << "|"
602// << line.substr(38,8) << "|"
603// << line.substr(46,8) << "|"
604// << line.substr(54,6) << "|"
605// << line.substr(60,6) << "|"
606// << line.substr(76,2) << "|"
607// << line.substr(78,2));
608
609 if (FirstTimestep) {
610 // first time step
611 // then fill info container
612 readPdbAtomInfoContainer(atomInfo, line);
613 // associate local with global id
614 associateLocaltoGlobalId(toSize_t(atomInfo.get<std::string>(PdbKey::serial)), newAtom->getId());
615 // set position
616 Vector tempVector;
617 LOG(4,"INFO: Parsing position from ("
618 +line.substr(30,8)+","
619 +line.substr(38,8)+","
620 +line.substr(46,8)+").");
621 PdbAtomInfoContainer::ScanKey(tempVector[0], line.substr(30,8));
622 PdbAtomInfoContainer::ScanKey(tempVector[1], line.substr(38,8));
623 PdbAtomInfoContainer::ScanKey(tempVector[2], line.substr(46,8));
624 newAtom->setPosition(tempVector);
625 // set element
626 std::string value = atomInfo.get<std::string>(PdbKey::element);
627 // make second character lower case if not
628 if ((value[1] >= 'A') && (value[1] <= 'Z'))
629 value[1] = (value[1] - 'A') + 'a';
630 const element *elem = World::getInstance().getPeriode()
631 ->FindElement(value);
632 ASSERT(elem != NULL,
633 "FormatParser< pdb >::readAtomDataLine() - element "+atomInfo.get<std::string>(PdbKey::element)+" is unknown!");
634 newAtom->setType(elem);
635
636 if (newmol != NULL)
637 newmol->AddAtom(newAtom);
638 } else {
639 // not first time step
640 // then parse into different container
641 PdbAtomInfoContainer consistencyInfo;
642 readPdbAtomInfoContainer(consistencyInfo, line);
643 // then check additional info for consistency
644 ASSERT(atomInfo.get<std::string>(PdbKey::token) == consistencyInfo.get<std::string>(PdbKey::token),
645 "FormatParser< pdb >::readAtomDataLine() - difference in token on multiple time step for atom with id "
646 +atomInfo.get<std::string>(PdbKey::serial)+"!");
647 ASSERT(atomInfo.get<std::string>(PdbKey::name) == consistencyInfo.get<std::string>(PdbKey::name),
648 "FormatParser< pdb >::readAtomDataLine() - difference in name on multiple time step for atom with id "
649 +atomInfo.get<std::string>(PdbKey::serial)+":"
650 +atomInfo.get<std::string>(PdbKey::name)+"!="
651 +consistencyInfo.get<std::string>(PdbKey::name)+".");
652 ASSERT(atomInfo.get<std::string>(PdbKey::altLoc) == consistencyInfo.get<std::string>(PdbKey::altLoc),
653 "FormatParser< pdb >::readAtomDataLine() - difference in altLoc on multiple time step for atom with id "
654 +atomInfo.get<std::string>(PdbKey::serial)+"!");
655 ASSERT(atomInfo.get<std::string>(PdbKey::resName) == consistencyInfo.get<std::string>(PdbKey::resName),
656 "FormatParser< pdb >::readAtomDataLine() - difference in resName on multiple time step for atom with id "
657 +atomInfo.get<std::string>(PdbKey::serial)+"!");
658 ASSERT(atomInfo.get<std::string>(PdbKey::chainID) == consistencyInfo.get<std::string>(PdbKey::chainID),
659 "FormatParser< pdb >::readAtomDataLine() - difference in chainID on multiple time step for atom with id "
660 +atomInfo.get<std::string>(PdbKey::serial)+"!");
661 ASSERT(atomInfo.get<std::string>(PdbKey::resSeq) == consistencyInfo.get<std::string>(PdbKey::resSeq),
662 "FormatParser< pdb >::readAtomDataLine() - difference in resSeq on multiple time step for atom with id "
663 +atomInfo.get<std::string>(PdbKey::serial)+"!");
664 ASSERT(atomInfo.get<std::string>(PdbKey::iCode) == consistencyInfo.get<std::string>(PdbKey::iCode),
665 "FormatParser< pdb >::readAtomDataLine() - difference in iCode on multiple time step for atom with id "
666 +atomInfo.get<std::string>(PdbKey::serial)+"!");
667 ASSERT(atomInfo.get<std::string>(PdbKey::occupancy) == consistencyInfo.get<std::string>(PdbKey::occupancy),
668 "FormatParser< pdb >::readAtomDataLine() - difference in occupancy on multiple time step for atom with id "
669 +atomInfo.get<std::string>(PdbKey::serial)+"!");
670 ASSERT(atomInfo.get<std::string>(PdbKey::tempFactor) == consistencyInfo.get<std::string>(PdbKey::tempFactor),
671 "FormatParser< pdb >::readAtomDataLine() - difference in tempFactor on multiple time step for atom with id "
672 +atomInfo.get<std::string>(PdbKey::serial)+"!");
673 ASSERT(atomInfo.get<std::string>(PdbKey::charge) == consistencyInfo.get<std::string>(PdbKey::charge),
674 "FormatParser< pdb >::readAtomDataLine() - difference in charge on multiple time step for atom with id "
675 +atomInfo.get<std::string>(PdbKey::serial)+"!");
676 ASSERT(atomInfo.get<std::string>(PdbKey::element) == consistencyInfo.get<std::string>(PdbKey::element),
677 "FormatParser< pdb >::readAtomDataLine() - difference in element on multiple time step for atom with id "
678 +atomInfo.get<std::string>(PdbKey::serial)+"!");
679 // and parse in trajectory
680 Vector tempVector;
681 LOG(4,"INFO: Parsing trajectory position from ("
682 +line.substr(30,8)+","
683 +line.substr(38,8)+","
684 +line.substr(46,8)+").");
685 PdbAtomInfoContainer::ScanKey(tempVector[0], line.substr(30,8));
686 PdbAtomInfoContainer::ScanKey(tempVector[1], line.substr(38,8));
687 PdbAtomInfoContainer::ScanKey(tempVector[2], line.substr(46,8));
688 LOG(4,"INFO: Adding trajectory point to time step "+toString(_step)+".");
689 // and set position at new time step
690 newAtom->setPositionAtStep(_step, tempVector);
691 }
692
693
694// printAtomInfo(newAtom);
695}
696
697/** Prints all PDB-specific information known about an atom.
698 *
699 */
700void FormatParser< pdb >::printAtomInfo(const atom * const newAtom) const
701{
702 const PdbAtomInfoContainer &atomInfo = additionalAtomData.at(newAtom->getId()); // operator[] const does not exist
703
704 LOG(1, "We know about atom " << newAtom->getId() << ":");
705 LOG(1, "\ttoken is " << atomInfo.get<std::string>(PdbKey::token));
706 LOG(1, "\tserial is " << atomInfo.get<int>(PdbKey::serial));
707 LOG(1, "\tname is " << atomInfo.get<std::string>(PdbKey::name));
708 LOG(1, "\taltLoc is " << atomInfo.get<std::string>(PdbKey::altLoc));
709 LOG(1, "\tresName is " << atomInfo.get<std::string>(PdbKey::resName));
710 LOG(1, "\tchainID is " << atomInfo.get<std::string>(PdbKey::chainID));
711 LOG(1, "\tresSeq is " << atomInfo.get<int>(PdbKey::resSeq));
712 LOG(1, "\tiCode is " << atomInfo.get<std::string>(PdbKey::iCode));
713 LOG(1, "\tX is " << atomInfo.get<double>(PdbKey::X));
714 LOG(1, "\tY is " << atomInfo.get<double>(PdbKey::Y));
715 LOG(1, "\tZ is " << atomInfo.get<double>(PdbKey::Z));
716 LOG(1, "\toccupancy is " << atomInfo.get<double>(PdbKey::occupancy));
717 LOG(1, "\ttempFactor is " << atomInfo.get<double>(PdbKey::tempFactor));
718 LOG(1, "\telement is '" << *(newAtom->getType()) << "'");
719 LOG(1, "\tcharge is " << atomInfo.get<int>(PdbKey::charge));
720}
721
722/**
723 * Reads neighbor information for one atom from the input.
724 *
725 * \param _step time step to use
726 * \param line to parse as an atom
727 */
728void FormatParser< pdb >::readNeighbors(const unsigned int _step, std::string &line)
729{
730 const size_t length = line.length();
731 std::list<size_t> ListOfNeighbors;
732 ConvertTo<size_t> toSize_t;
733
734 // obtain neighbours
735 // show split line for debugging
736 string output;
737 ASSERT(length >=16,
738 "FormatParser< pdb >::readNeighbors() - CONECT entry has not enough entries: "+line+"!");
739// output = "Split line:|";
740// output += line.substr(6,5) + "|";
741 const size_t id = toSize_t(line.substr(6,5));
742 for (size_t index = 11; index <= 26; index+=5) {
743 if (index+5 <= length) {
744 output += line.substr(index,5) + "|";
745 // search for digits
746 int otherid = -1;
747 PdbAtomInfoContainer::ScanKey(otherid, line.substr(index,5));
748 if (otherid > 0)
749 ListOfNeighbors.push_back(otherid);
750 else
751 ELOG(2, "FormatParser< pdb >::readNeighbors() - discarding CONECT entry with id 0.");
752 } else {
753 break;
754 }
755 }
756 LOG(4, output);
757
758 // add neighbours
759 atom *_atom = World::getInstance().getAtom(AtomById(getGlobalId(id)));
760 LOG(2, "STATUS: Atom " << _atom->getId() << " gets " << ListOfNeighbors.size() << " more neighbours.");
761 for (std::list<size_t>::const_iterator iter = ListOfNeighbors.begin();
762 iter != ListOfNeighbors.end();
763 ++iter) {
764 atom * const _Otheratom = World::getInstance().getAtom(AtomById(getGlobalId(*iter)));
765 LOG(3, "INFO: Adding Bond (" << *_atom << "," << *_Otheratom << ")");
766 _atom->addBond(_step, _Otheratom);
767 }
768}
769
770bool FormatParser< pdb >::operator==(const FormatParser< pdb >& b) const
771{
772 bool status = true;
773 World::AtomComposite atoms = World::getInstance().getAllAtoms();
774 for (World::AtomComposite::const_iterator iter = atoms.begin(); iter != atoms.end(); ++iter) {
775 if ((additionalAtomData.find((*iter)->getId()) != additionalAtomData.end())
776 && (b.additionalAtomData.find((*iter)->getId()) != b.additionalAtomData.end())) {
777 const PdbAtomInfoContainer &atomInfo = additionalAtomData.at((*iter)->getId());
778 const PdbAtomInfoContainer &OtheratomInfo = b.additionalAtomData.at((*iter)->getId());
779
780 status = status && (atomInfo.get<std::string>(PdbKey::serial) == OtheratomInfo.get<std::string>(PdbKey::serial));
781 if (!status) ELOG(1, "Mismatch in serials!");
782 status = status && (atomInfo.get<std::string>(PdbKey::name) == OtheratomInfo.get<std::string>(PdbKey::name));
783 if (!status) ELOG(1, "Mismatch in names!");
784 status = status && (atomInfo.get<std::string>(PdbKey::altLoc) == OtheratomInfo.get<std::string>(PdbKey::altLoc));
785 if (!status) ELOG(1, "Mismatch in altLocs!");
786 status = status && (atomInfo.get<std::string>(PdbKey::resName) == OtheratomInfo.get<std::string>(PdbKey::resName));
787 if (!status) ELOG(1, "Mismatch in resNames!");
788 status = status && (atomInfo.get<std::string>(PdbKey::chainID) == OtheratomInfo.get<std::string>(PdbKey::chainID));
789 if (!status) ELOG(1, "Mismatch in chainIDs!");
790 status = status && (atomInfo.get<std::string>(PdbKey::resSeq) == OtheratomInfo.get<std::string>(PdbKey::resSeq));
791 if (!status) ELOG(1, "Mismatch in resSeqs!");
792 status = status && (atomInfo.get<std::string>(PdbKey::iCode) == OtheratomInfo.get<std::string>(PdbKey::iCode));
793 if (!status) ELOG(1, "Mismatch in iCodes!");
794 status = status && (atomInfo.get<std::string>(PdbKey::occupancy) == OtheratomInfo.get<std::string>(PdbKey::occupancy));
795 if (!status) ELOG(1, "Mismatch in occupancies!");
796 status = status && (atomInfo.get<std::string>(PdbKey::tempFactor) == OtheratomInfo.get<std::string>(PdbKey::tempFactor));
797 if (!status) ELOG(1, "Mismatch in tempFactors!");
798 status = status && (atomInfo.get<std::string>(PdbKey::charge) == OtheratomInfo.get<std::string>(PdbKey::charge));
799 if (!status) ELOG(1, "Mismatch in charges!");
800 }
801 }
802
803 return status;
804}
805
Note: See TracBrowser for help on using the repository browser.