ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/applications/atom2md/obmolecformat.cpp
Revision: 1210
Committed: Wed Jan 23 03:45:33 2008 UTC (17 years, 3 months ago) by gezelter
File size: 17487 byte(s)
Log Message:
Removed older version of openbabel from our code.  We now have a
configure check to see if openbabel is installed and then we link to
the stuff we need.  Conversion to OOPSE's md format is handled by only
one application (atom2md), so most of the work went on there.
ElementsTable still needs some work to function in parallel.

File Contents

# Content
1 /**********************************************************************
2 obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.
3
4 Copyright (C) 2005 Chris Morley
5
6 This file is part of the Open Babel project.
7 For more information, see <http://openbabel.sourceforge.net/>
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation version 2 of the License.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17 ***********************************************************************/
18 #include <openbabel/babelconfig.h>
19 #include <openbabel/obmolecformat.h>
20 #include <openbabel/obiter.h>
21
22 #ifdef _DEBUG
23 #undef THIS_FILE
24 static char THIS_FILE[]=__FILE__;
25 #define new DEBUG_NEW
26 #endif
27
28 using namespace std;
29 namespace OpenBabel
30 {
31
32 std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
33 OBMol* OBMoleculeFormat::_jmol;
34 std::vector<OBMol> OBMoleculeFormat::MolArray;
35 bool OBMoleculeFormat::StoredMolsReady=false;
36
37 bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
38 {
39 std::istream &ifs = *pConv->GetInStream();
40 if (!ifs.good()) //Possible to omit? ifs.peek() == EOF ||
41 return false;
42
43 OBMol* pmol = new OBMol;
44
45 std::string auditMsg = "OpenBabel::Read molecule ";
46 std::string description(pFormat->Description());
47 auditMsg += description.substr(0,description.find('\n'));
48 obErrorLog.ThrowError(__FUNCTION__,
49 auditMsg,
50 obAuditMsg);
51
52 if(pConv->IsOption("C",OBConversion::GENOPTIONS))
53 return DeferMolOutput(pmol, pConv, pFormat);
54
55 bool ret;
56 if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
57 {
58 //On first call, separate molecule and put fragments in MolArray.
59 //On subsequent calls, remove a fragment from MolArray and send it for writing
60 //Done this way so that each fragment can be written to its own file (with -m option)
61 if(!StoredMolsReady)
62 {
63 ret = pFormat->ReadMolecule(pmol,pConv);
64 if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK)))
65 MolArray = pmol->Separate(); //use un-transformed molecule
66 //Add an appropriate title to each fragment
67 for(unsigned int i=0;i<MolArray.size();++i)
68 {
69 stringstream ss;
70 ss << pmol->GetTitle() << '#' << i+1;
71 string title = ss.str();
72 MolArray[i].SetTitle(title);
73 }
74 reverse(MolArray.begin(),MolArray.end());
75 StoredMolsReady = true;
76 }
77
78 if(MolArray.empty()) //normal end of fragments
79 ret =false;
80 else
81 {
82 // Copying is needed because the OBMol passed to AddChemObject will be deleted.
83 // The OBMol in the vector is deleted here.
84 OBMol* pMolCopy = new OBMol( MolArray.back());
85 MolArray.pop_back();
86 ret = pConv->AddChemObject(
87 pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
88 }
89 if(!ret)
90 StoredMolsReady = false;
91
92 delete pmol;
93 return ret;
94 }
95
96 ret=pFormat->ReadMolecule(pmol,pConv);
97
98 OBMol* ptmol = NULL;
99 //Molecule is valid if it has some atoms
100 //or the format allows zero-atom molecules and it has a title
101 if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle())))
102 {
103 ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
104 if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS)
105 || pConv->IsOption("join",OBConversion::GENOPTIONS)))
106 {
107 //With j option, accumulate all mols in one stored in this class
108 if(pConv->IsFirstInput())
109 _jmol = new OBMol;
110 pConv->AddChemObject(_jmol);
111 //will be discarded in WriteChemObjectImpl until the last input mol. This complication
112 //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
113 //is zeroed at the end of a file and _jmol is in danger of not being output.
114 *_jmol += *ptmol;
115 delete ptmol;
116 return true;
117 }
118 }
119 else
120 delete pmol;
121
122 // Normal operation - send molecule to be written
123 ret = ret && pConv->AddChemObject(ptmol); //success of both writing and reading
124 return ret;
125 }
126
127 bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
128 {
129 if(pConv->IsOption("C",OBConversion::GENOPTIONS))
130 return OutputDeferredMols(pConv);
131 if(pConv->IsOption("j",OBConversion::GENOPTIONS)
132 || pConv->IsOption("join",OBConversion::GENOPTIONS))
133 {
134 //arrives here at the end of a file
135 if(!pConv->IsLast())
136 return true;
137 bool ret=pFormat->WriteMolecule(_jmol,pConv);
138 pConv->SetOutputIndex(1);
139 delete _jmol;
140 return ret;
141 }
142
143
144 //Retrieve the target OBMol
145 OBBase* pOb = pConv->GetChemObject();
146 OBMol* pmol = dynamic_cast<OBMol*> (pOb);
147 bool ret=false;
148 if(pmol)
149 {
150 if(pmol->NumAtoms()==0)
151 {
152 std::string auditMsg = "OpenBabel::Molecule ";
153 auditMsg += pmol->GetTitle();
154 auditMsg += " has 0 atoms";
155 obErrorLog.ThrowError(__FUNCTION__,
156 auditMsg,
157 obInfo);
158 }
159 ret=true;
160
161 std::string auditMsg = "OpenBabel::Write molecule ";
162 std::string description(pFormat->Description());
163 auditMsg += description.substr(0,description.find('\n'));
164 obErrorLog.ThrowError(__FUNCTION__,
165 auditMsg,
166 obAuditMsg);
167
168 ret=pFormat->WriteMolecule(pmol,pConv);
169 }
170 delete pOb; //move so that non-OBMol objects are deleted 9March2006
171 return ret;
172 }
173
174 /*! Instead of sending molecules for output via AddChemObject(), they are
175 saved in here in OBMoleculeFormat or discarded. By default they are
176 saved only if they are in the first input file. Parts of subsequent
177 molecules, such as chemical structure, coordinates and OBGenericData
178 can replace the parts in molecules with the same title that have already
179 been stored, subject to a set of rules. After all input files have been
180 read, the stored molecules (possibly now having augmented properties) are
181 sent to the output format.
182
183 Is a static function with <this> as parameter so that it can be called from other
184 format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat.
185 */
186 bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
187 {
188 static bool IsFirstFile;
189 bool OnlyMolsInFirstFile=true;
190
191 if(pConv->IsFirstInput())
192 {
193 IsFirstFile=true;
194 IMols.clear();
195 }
196 else
197 {
198 if((std::streamoff)pConv->GetInStream()->tellg()<=0)
199 IsFirstFile=false;//File has changed
200 }
201
202 if (!pF->ReadMolecule(pmol,pConv))
203 {
204 delete pmol;
205 return false;
206 }
207 const char* ptitle = pmol->GetTitle();
208 if(*ptitle==0)
209 obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
210 else
211 {
212 string title(ptitle);
213 string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
214 if(pos!=string::npos)
215 title.erase(pos);
216
217 map<std::string, OBMol*>::iterator itr;
218 itr = IMols.find(title);
219 if(itr!=IMols.end())
220 {
221 //Molecule with the same title has been input previously: update it
222 OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
223 if(pNewMol)
224 {
225 delete itr->second;
226 IMols[title] = pNewMol;
227 }
228 else
229 {
230 //error: cleanup and return false
231 delete pmol;
232 return DeleteDeferredMols();
233 }
234 }
235 else
236 {
237 //Molecule not already saved in IMols: save it if in first file
238 if(!OnlyMolsInFirstFile || IsFirstFile)
239 {
240 IMols[title] = pmol;
241 return true; //don't delete pmol
242 }
243 }
244 }
245 delete pmol;
246 return true;
247 }
248
249 /*! Makes a new OBMol on the heap by combining two molecules according to the rule below.
250 If both have OBGenericData of the same type, or OBPairData with the
251 same attribute, the version from pFirst is used.
252 Returns a pointer to a new OBMol which will need deleting by the calling program
253 (probably by being sent to an output format).
254 If the molecules cannot be regarded as being the same structure a NULL
255 pointer is returned and an error message logged.
256
257 pFirst and pSecond and the objects they point to are not changed. (const
258 modifiers difficult because class OBMol not designed appropriately)
259
260 Combining molecules: rules for each of the three parts
261 Title:
262 Use the title of pFirst unless it has none, when use that of pSecond.
263 Warning if neither molecule has a title.
264
265 Structure
266 - a structure with atoms replaces one with no atoms
267 - a structure with bonds replaces one with no bonds,
268 provided the formula is the same, else an error.
269 - structures with atoms and bonds are compared by InChI; error if not the same.
270 - a structure with 3D coordinates replaces one with 2D coordinates
271 - a structure with 2D coordinates replace one with 0D coordinates
272
273 OBGenericData
274 OBPairData
275 */
276 OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
277 {
278 string title("No title");
279 if(*pFirst->GetTitle()!=0)
280 title = pFirst->GetTitle();
281 else
282 {
283 if(*pSecond->GetTitle()!=0)
284 title = pSecond->GetTitle();
285 else
286 obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
287 }
288
289 bool swap=false;
290 if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
291 swap=true;
292 else
293 {
294 if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
295 {
296 obErrorLog.ThrowError(__FUNCTION__,
297 "Molecules with name = " + title + " have different formula",obError);
298 return NULL;
299 }
300 else
301 {
302 if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
303 swap=true;
304 else
305 {
306 //Compare by inchi; error if different NOT YET IMPLEMENTED
307 //Use the one with the higher dimension
308 if(pSecond->GetDimension() > pFirst->GetDimension())
309 swap=true;
310 }
311 }
312 }
313
314 OBMol* pNewMol = new OBMol;
315 pNewMol->SetTitle(title);
316
317 OBMol* pMain = swap ? pSecond : pFirst;
318 OBMol* pOther = swap ? pFirst : pSecond;
319
320 *pNewMol = *pMain; //Now copies all data
321
322 //Copy some OBGenericData from the OBMol which did not provide the structure
323 vector<OBGenericData*>::iterator igd;
324 for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
325 {
326 //copy only if not already data of the same type from molecule already copied to pNewMol
327 unsigned datatype = (*igd)->GetDataType();
328 OBGenericData* pData = pNewMol->GetData(datatype);
329 if(datatype==OBGenericDataType::PairData)
330 {
331 if(pData->GetAttribute() == (*igd)->GetAttribute())
332 continue;
333 }
334 else if(pNewMol->GetData(datatype)!=NULL)
335 continue;
336
337 OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
338 pNewMol->SetData(pCopiedData);
339 }
340 return pNewMol;
341 }
342
343 bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
344 {
345 std::map<std::string, OBMol*>::iterator itr, lastitr;
346 bool ret=false;
347 int i=1;
348 lastitr = IMols.end();
349 --lastitr;
350 pConv->SetOneObjectOnly(false);
351 for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
352 {
353 if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)))
354 continue;
355 pConv->SetOutputIndex(i);
356 if(itr==lastitr)
357 pConv->SetOneObjectOnly(); //to set IsLast
358
359 std::string auditMsg = "OpenBabel::Write molecule ";
360 std::string description((pConv->GetOutFormat())->Description());
361 auditMsg += description.substr(0,description.find('\n'));
362 obErrorLog.ThrowError(__FUNCTION__, auditMsg, obAuditMsg);
363
364 ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);
365
366 delete itr->second; //always delete OBMol object
367 itr->second = NULL; // so can be deleted in DeleteDeferredMols()
368 if (!ret) break;
369 }
370 DeleteDeferredMols();//cleans up in case there have been errors
371 return ret;
372 }
373
374 bool OBMoleculeFormat::DeleteDeferredMols()
375 {
376 //Empties IMols, deteting the OBMol objects whose pointers are stored there
377 std::map<std::string, OBMol*>::iterator itr;
378 for(itr=IMols.begin();itr!=IMols.end();++itr)
379 {
380 delete itr->second; //usually NULL
381 }
382 IMols.clear();
383 return false;
384 }
385
386 //////////////////////////////////////////////////////////////////
387 /** Attempts to read the index file datafilename.obindx successively
388 from the following directories:
389 - the current directory
390 - that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
391 if the environment variable is not set
392 - in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
393 An index of type NameIndexType is then constructed. NameIndexType is defined
394 in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map.
395 In any case it is searched by
396 @code
397 NameIndexType::iterator itr = index.find(molecule_name);
398 if(itr!=index.end())
399 unsigned pos_in_datafile = itr->second;
400 @endcode
401 pos_in_datafile is used as a paramter in seekg() to read from the datafile
402
403 If no index is found, it is constructed from the datafile by reading all of
404 it using the format pInFormat, and written to the directory containing the datafile.
405 This means that this function can be used without worrying whether there is an index.
406 It will be slow to execute the first time, but subsequent uses get the speed benefit
407 of indexed access to the datafile.
408
409 The serialization and de-serialization of the NameIndexType is entirely in
410 this routine and could possibly be improved. Currently re-hashing is done
411 every time the index is read.
412 **/
413 bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
414 const string& datafilename, OBFormat* pInFormat)
415 {
416 struct headertype
417 {
418 char filename[256];
419 unsigned size;
420 } header;
421
422 NameIndexType::iterator itr;
423
424 ifstream indexstream;
425 OpenDatafile(indexstream, datafilename + ".obindx");
426 if(!indexstream)
427 {
428 //Need to prepare the index
429 ifstream datastream;
430 string datafilepath = OpenDatafile(datastream, datafilename);
431 if(!datastream)
432 {
433 obErrorLog.ThrowError(__FUNCTION__,
434 datafilepath + " was not found or could not be opened", obError);
435 return false;
436 }
437
438 OBConversion Conv(&datastream,NULL);
439 Conv.SetInFormat(pInFormat);
440 OBMol mol;
441 streampos pos;
442 while(Conv.Read(&mol))
443 {
444 string name = mol.GetTitle();
445 if(!name.empty())
446 index.insert(make_pair(name, pos));
447 mol.Clear();
448 pos = datastream.tellg();
449 }
450 obErrorLog.ThrowError(__FUNCTION__,
451 "Prepared an index for " + datafilepath, obAuditMsg);
452 //Save index to file
453 ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary);
454 if(!dofs) return false;
455
456 strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
457 header.filename[sizeof(header.filename) - 1] = '\0';
458 header.size = index.size();
459 dofs.write((const char*)&header, sizeof(headertype));
460
461 for(itr=index.begin();itr!=index.end();++itr)
462 {
463 //#chars; chars; ofset(4bytes).
464 const char n = itr->first.size();
465 dofs.put(n);
466 dofs.write(itr->first.c_str(),n);
467 dofs.write((const char*)&itr->second,sizeof(unsigned));
468 }
469 }
470 else
471 {
472 //Read index data from file and put into hash_map
473 indexstream.read((char*)&header,sizeof(headertype));
474 itr=index.begin(); // for hint
475 for(unsigned int i=0;i<header.size;++i)
476 {
477 char len;
478 indexstream.get(len);
479 string title(len, 0);
480 unsigned pos;
481 indexstream.read(&title[0],len);
482 indexstream.read((char*)&pos,sizeof(unsigned));
483 index.insert(itr, make_pair(title,pos));
484 }
485 }
486 return true;
487 }
488
489
490 } //namespace OpenBabel