ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/applications/atom2md/obmolecformat.cpp
Revision: 1210
Committed: Wed Jan 23 03:45:33 2008 UTC (17 years, 3 months ago) by gezelter
File size: 17487 byte(s)
Log Message:
Removed older version of openbabel from our code.  We now have a
configure check to see if openbabel is installed and then we link to
the stuff we need.  Conversion to OOPSE's md format is handled by only
one application (atom2md), so most of the work went on there.
ElementsTable still needs some work to function in parallel.

File Contents

# User Rev Content
1 gezelter 1210 /**********************************************************************
2     obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.
3    
4     Copyright (C) 2005 Chris Morley
5    
6     This file is part of the Open Babel project.
7     For more information, see <http://openbabel.sourceforge.net/>
8    
9     This program is free software; you can redistribute it and/or modify
10     it under the terms of the GNU General Public License as published by
11     the Free Software Foundation version 2 of the License.
12    
13     This program is distributed in the hope that it will be useful,
14     but WITHOUT ANY WARRANTY; without even the implied warranty of
15     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16     GNU General Public License for more details.
17     ***********************************************************************/
18     #include <openbabel/babelconfig.h>
19     #include <openbabel/obmolecformat.h>
20     #include <openbabel/obiter.h>
21    
22     #ifdef _DEBUG
23     #undef THIS_FILE
24     static char THIS_FILE[]=__FILE__;
25     #define new DEBUG_NEW
26     #endif
27    
28     using namespace std;
29     namespace OpenBabel
30     {
31    
32     std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
33     OBMol* OBMoleculeFormat::_jmol;
34     std::vector<OBMol> OBMoleculeFormat::MolArray;
35     bool OBMoleculeFormat::StoredMolsReady=false;
36    
37     bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
38     {
39     std::istream &ifs = *pConv->GetInStream();
40     if (!ifs.good()) //Possible to omit? ifs.peek() == EOF ||
41     return false;
42    
43     OBMol* pmol = new OBMol;
44    
45     std::string auditMsg = "OpenBabel::Read molecule ";
46     std::string description(pFormat->Description());
47     auditMsg += description.substr(0,description.find('\n'));
48     obErrorLog.ThrowError(__FUNCTION__,
49     auditMsg,
50     obAuditMsg);
51    
52     if(pConv->IsOption("C",OBConversion::GENOPTIONS))
53     return DeferMolOutput(pmol, pConv, pFormat);
54    
55     bool ret;
56     if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
57     {
58     //On first call, separate molecule and put fragments in MolArray.
59     //On subsequent calls, remove a fragment from MolArray and send it for writing
60     //Done this way so that each fragment can be written to its own file (with -m option)
61     if(!StoredMolsReady)
62     {
63     ret = pFormat->ReadMolecule(pmol,pConv);
64     if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK)))
65     MolArray = pmol->Separate(); //use un-transformed molecule
66     //Add an appropriate title to each fragment
67     for(unsigned int i=0;i<MolArray.size();++i)
68     {
69     stringstream ss;
70     ss << pmol->GetTitle() << '#' << i+1;
71     string title = ss.str();
72     MolArray[i].SetTitle(title);
73     }
74     reverse(MolArray.begin(),MolArray.end());
75     StoredMolsReady = true;
76     }
77    
78     if(MolArray.empty()) //normal end of fragments
79     ret =false;
80     else
81     {
82     // Copying is needed because the OBMol passed to AddChemObject will be deleted.
83     // The OBMol in the vector is deleted here.
84     OBMol* pMolCopy = new OBMol( MolArray.back());
85     MolArray.pop_back();
86     ret = pConv->AddChemObject(
87     pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
88     }
89     if(!ret)
90     StoredMolsReady = false;
91    
92     delete pmol;
93     return ret;
94     }
95    
96     ret=pFormat->ReadMolecule(pmol,pConv);
97    
98     OBMol* ptmol = NULL;
99     //Molecule is valid if it has some atoms
100     //or the format allows zero-atom molecules and it has a title
101     if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle())))
102     {
103     ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
104     if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS)
105     || pConv->IsOption("join",OBConversion::GENOPTIONS)))
106     {
107     //With j option, accumulate all mols in one stored in this class
108     if(pConv->IsFirstInput())
109     _jmol = new OBMol;
110     pConv->AddChemObject(_jmol);
111     //will be discarded in WriteChemObjectImpl until the last input mol. This complication
112     //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
113     //is zeroed at the end of a file and _jmol is in danger of not being output.
114     *_jmol += *ptmol;
115     delete ptmol;
116     return true;
117     }
118     }
119     else
120     delete pmol;
121    
122     // Normal operation - send molecule to be written
123     ret = ret && pConv->AddChemObject(ptmol); //success of both writing and reading
124     return ret;
125     }
126    
127     bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
128     {
129     if(pConv->IsOption("C",OBConversion::GENOPTIONS))
130     return OutputDeferredMols(pConv);
131     if(pConv->IsOption("j",OBConversion::GENOPTIONS)
132     || pConv->IsOption("join",OBConversion::GENOPTIONS))
133     {
134     //arrives here at the end of a file
135     if(!pConv->IsLast())
136     return true;
137     bool ret=pFormat->WriteMolecule(_jmol,pConv);
138     pConv->SetOutputIndex(1);
139     delete _jmol;
140     return ret;
141     }
142    
143    
144     //Retrieve the target OBMol
145     OBBase* pOb = pConv->GetChemObject();
146     OBMol* pmol = dynamic_cast<OBMol*> (pOb);
147     bool ret=false;
148     if(pmol)
149     {
150     if(pmol->NumAtoms()==0)
151     {
152     std::string auditMsg = "OpenBabel::Molecule ";
153     auditMsg += pmol->GetTitle();
154     auditMsg += " has 0 atoms";
155     obErrorLog.ThrowError(__FUNCTION__,
156     auditMsg,
157     obInfo);
158     }
159     ret=true;
160    
161     std::string auditMsg = "OpenBabel::Write molecule ";
162     std::string description(pFormat->Description());
163     auditMsg += description.substr(0,description.find('\n'));
164     obErrorLog.ThrowError(__FUNCTION__,
165     auditMsg,
166     obAuditMsg);
167    
168     ret=pFormat->WriteMolecule(pmol,pConv);
169     }
170     delete pOb; //move so that non-OBMol objects are deleted 9March2006
171     return ret;
172     }
173    
174     /*! Instead of sending molecules for output via AddChemObject(), they are
175     saved in here in OBMoleculeFormat or discarded. By default they are
176     saved only if they are in the first input file. Parts of subsequent
177     molecules, such as chemical structure, coordinates and OBGenericData
178     can replace the parts in molecules with the same title that have already
179     been stored, subject to a set of rules. After all input files have been
180     read, the stored molecules (possibly now having augmented properties) are
181     sent to the output format.
182    
183     Is a static function with <this> as parameter so that it can be called from other
184     format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat.
185     */
186     bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
187     {
188     static bool IsFirstFile;
189     bool OnlyMolsInFirstFile=true;
190    
191     if(pConv->IsFirstInput())
192     {
193     IsFirstFile=true;
194     IMols.clear();
195     }
196     else
197     {
198     if((std::streamoff)pConv->GetInStream()->tellg()<=0)
199     IsFirstFile=false;//File has changed
200     }
201    
202     if (!pF->ReadMolecule(pmol,pConv))
203     {
204     delete pmol;
205     return false;
206     }
207     const char* ptitle = pmol->GetTitle();
208     if(*ptitle==0)
209     obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
210     else
211     {
212     string title(ptitle);
213     string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
214     if(pos!=string::npos)
215     title.erase(pos);
216    
217     map<std::string, OBMol*>::iterator itr;
218     itr = IMols.find(title);
219     if(itr!=IMols.end())
220     {
221     //Molecule with the same title has been input previously: update it
222     OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
223     if(pNewMol)
224     {
225     delete itr->second;
226     IMols[title] = pNewMol;
227     }
228     else
229     {
230     //error: cleanup and return false
231     delete pmol;
232     return DeleteDeferredMols();
233     }
234     }
235     else
236     {
237     //Molecule not already saved in IMols: save it if in first file
238     if(!OnlyMolsInFirstFile || IsFirstFile)
239     {
240     IMols[title] = pmol;
241     return true; //don't delete pmol
242     }
243     }
244     }
245     delete pmol;
246     return true;
247     }
248    
249     /*! Makes a new OBMol on the heap by combining two molecules according to the rule below.
250     If both have OBGenericData of the same type, or OBPairData with the
251     same attribute, the version from pFirst is used.
252     Returns a pointer to a new OBMol which will need deleting by the calling program
253     (probably by being sent to an output format).
254     If the molecules cannot be regarded as being the same structure a NULL
255     pointer is returned and an error message logged.
256    
257     pFirst and pSecond and the objects they point to are not changed. (const
258     modifiers difficult because class OBMol not designed appropriately)
259    
260     Combining molecules: rules for each of the three parts
261     Title:
262     Use the title of pFirst unless it has none, when use that of pSecond.
263     Warning if neither molecule has a title.
264    
265     Structure
266     - a structure with atoms replaces one with no atoms
267     - a structure with bonds replaces one with no bonds,
268     provided the formula is the same, else an error.
269     - structures with atoms and bonds are compared by InChI; error if not the same.
270     - a structure with 3D coordinates replaces one with 2D coordinates
271     - a structure with 2D coordinates replace one with 0D coordinates
272    
273     OBGenericData
274     OBPairData
275     */
276     OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
277     {
278     string title("No title");
279     if(*pFirst->GetTitle()!=0)
280     title = pFirst->GetTitle();
281     else
282     {
283     if(*pSecond->GetTitle()!=0)
284     title = pSecond->GetTitle();
285     else
286     obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
287     }
288    
289     bool swap=false;
290     if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
291     swap=true;
292     else
293     {
294     if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
295     {
296     obErrorLog.ThrowError(__FUNCTION__,
297     "Molecules with name = " + title + " have different formula",obError);
298     return NULL;
299     }
300     else
301     {
302     if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
303     swap=true;
304     else
305     {
306     //Compare by inchi; error if different NOT YET IMPLEMENTED
307     //Use the one with the higher dimension
308     if(pSecond->GetDimension() > pFirst->GetDimension())
309     swap=true;
310     }
311     }
312     }
313    
314     OBMol* pNewMol = new OBMol;
315     pNewMol->SetTitle(title);
316    
317     OBMol* pMain = swap ? pSecond : pFirst;
318     OBMol* pOther = swap ? pFirst : pSecond;
319    
320     *pNewMol = *pMain; //Now copies all data
321    
322     //Copy some OBGenericData from the OBMol which did not provide the structure
323     vector<OBGenericData*>::iterator igd;
324     for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
325     {
326     //copy only if not already data of the same type from molecule already copied to pNewMol
327     unsigned datatype = (*igd)->GetDataType();
328     OBGenericData* pData = pNewMol->GetData(datatype);
329     if(datatype==OBGenericDataType::PairData)
330     {
331     if(pData->GetAttribute() == (*igd)->GetAttribute())
332     continue;
333     }
334     else if(pNewMol->GetData(datatype)!=NULL)
335     continue;
336    
337     OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
338     pNewMol->SetData(pCopiedData);
339     }
340     return pNewMol;
341     }
342    
343     bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
344     {
345     std::map<std::string, OBMol*>::iterator itr, lastitr;
346     bool ret=false;
347     int i=1;
348     lastitr = IMols.end();
349     --lastitr;
350     pConv->SetOneObjectOnly(false);
351     for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
352     {
353     if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)))
354     continue;
355     pConv->SetOutputIndex(i);
356     if(itr==lastitr)
357     pConv->SetOneObjectOnly(); //to set IsLast
358    
359     std::string auditMsg = "OpenBabel::Write molecule ";
360     std::string description((pConv->GetOutFormat())->Description());
361     auditMsg += description.substr(0,description.find('\n'));
362     obErrorLog.ThrowError(__FUNCTION__, auditMsg, obAuditMsg);
363    
364     ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);
365    
366     delete itr->second; //always delete OBMol object
367     itr->second = NULL; // so can be deleted in DeleteDeferredMols()
368     if (!ret) break;
369     }
370     DeleteDeferredMols();//cleans up in case there have been errors
371     return ret;
372     }
373    
374     bool OBMoleculeFormat::DeleteDeferredMols()
375     {
376     //Empties IMols, deteting the OBMol objects whose pointers are stored there
377     std::map<std::string, OBMol*>::iterator itr;
378     for(itr=IMols.begin();itr!=IMols.end();++itr)
379     {
380     delete itr->second; //usually NULL
381     }
382     IMols.clear();
383     return false;
384     }
385    
386     //////////////////////////////////////////////////////////////////
387     /** Attempts to read the index file datafilename.obindx successively
388     from the following directories:
389     - the current directory
390     - that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
391     if the environment variable is not set
392     - in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
393     An index of type NameIndexType is then constructed. NameIndexType is defined
394     in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map.
395     In any case it is searched by
396     @code
397     NameIndexType::iterator itr = index.find(molecule_name);
398     if(itr!=index.end())
399     unsigned pos_in_datafile = itr->second;
400     @endcode
401     pos_in_datafile is used as a paramter in seekg() to read from the datafile
402    
403     If no index is found, it is constructed from the datafile by reading all of
404     it using the format pInFormat, and written to the directory containing the datafile.
405     This means that this function can be used without worrying whether there is an index.
406     It will be slow to execute the first time, but subsequent uses get the speed benefit
407     of indexed access to the datafile.
408    
409     The serialization and de-serialization of the NameIndexType is entirely in
410     this routine and could possibly be improved. Currently re-hashing is done
411     every time the index is read.
412     **/
413     bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
414     const string& datafilename, OBFormat* pInFormat)
415     {
416     struct headertype
417     {
418     char filename[256];
419     unsigned size;
420     } header;
421    
422     NameIndexType::iterator itr;
423    
424     ifstream indexstream;
425     OpenDatafile(indexstream, datafilename + ".obindx");
426     if(!indexstream)
427     {
428     //Need to prepare the index
429     ifstream datastream;
430     string datafilepath = OpenDatafile(datastream, datafilename);
431     if(!datastream)
432     {
433     obErrorLog.ThrowError(__FUNCTION__,
434     datafilepath + " was not found or could not be opened", obError);
435     return false;
436     }
437    
438     OBConversion Conv(&datastream,NULL);
439     Conv.SetInFormat(pInFormat);
440     OBMol mol;
441     streampos pos;
442     while(Conv.Read(&mol))
443     {
444     string name = mol.GetTitle();
445     if(!name.empty())
446     index.insert(make_pair(name, pos));
447     mol.Clear();
448     pos = datastream.tellg();
449     }
450     obErrorLog.ThrowError(__FUNCTION__,
451     "Prepared an index for " + datafilepath, obAuditMsg);
452     //Save index to file
453     ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary);
454     if(!dofs) return false;
455    
456     strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
457     header.filename[sizeof(header.filename) - 1] = '\0';
458     header.size = index.size();
459     dofs.write((const char*)&header, sizeof(headertype));
460    
461     for(itr=index.begin();itr!=index.end();++itr)
462     {
463     //#chars; chars; ofset(4bytes).
464     const char n = itr->first.size();
465     dofs.put(n);
466     dofs.write(itr->first.c_str(),n);
467     dofs.write((const char*)&itr->second,sizeof(unsigned));
468     }
469     }
470     else
471     {
472     //Read index data from file and put into hash_map
473     indexstream.read((char*)&header,sizeof(headertype));
474     itr=index.begin(); // for hint
475     for(unsigned int i=0;i<header.size;++i)
476     {
477     char len;
478     indexstream.get(len);
479     string title(len, 0);
480     unsigned pos;
481     indexstream.read(&title[0],len);
482     indexstream.read((char*)&pos,sizeof(unsigned));
483     index.insert(itr, make_pair(title,pos));
484     }
485     }
486     return true;
487     }
488    
489    
490     } //namespace OpenBabel