1 |
/********************************************************************** |
2 |
obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol. |
3 |
|
4 |
Copyright (C) 2005 Chris Morley |
5 |
|
6 |
This file is part of the Open Babel project. |
7 |
For more information, see <http://openbabel.sourceforge.net/> |
8 |
|
9 |
This program is free software; you can redistribute it and/or modify |
10 |
it under the terms of the GNU General Public License as published by |
11 |
the Free Software Foundation version 2 of the License. |
12 |
|
13 |
This program is distributed in the hope that it will be useful, |
14 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 |
GNU General Public License for more details. |
17 |
***********************************************************************/ |
18 |
#include <openbabel/babelconfig.h> |
19 |
#include <openbabel/obmolecformat.h> |
20 |
#include <openbabel/obiter.h> |
21 |
|
22 |
#ifdef _DEBUG |
23 |
#undef THIS_FILE |
24 |
static char THIS_FILE[]=__FILE__; |
25 |
#define new DEBUG_NEW |
26 |
#endif |
27 |
|
28 |
using namespace std; |
29 |
namespace OpenBabel |
30 |
{ |
31 |
|
32 |
std::map<std::string, OBMol*> OBMoleculeFormat::IMols; |
33 |
OBMol* OBMoleculeFormat::_jmol; |
34 |
std::vector<OBMol> OBMoleculeFormat::MolArray; |
35 |
bool OBMoleculeFormat::StoredMolsReady=false; |
36 |
|
37 |
bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat) |
38 |
{ |
39 |
std::istream &ifs = *pConv->GetInStream(); |
40 |
if (!ifs.good()) //Possible to omit? ifs.peek() == EOF || |
41 |
return false; |
42 |
|
43 |
OBMol* pmol = new OBMol; |
44 |
|
45 |
std::string auditMsg = "OpenBabel::Read molecule "; |
46 |
std::string description(pFormat->Description()); |
47 |
auditMsg += description.substr(0,description.find('\n')); |
48 |
obErrorLog.ThrowError(__FUNCTION__, |
49 |
auditMsg, |
50 |
obAuditMsg); |
51 |
|
52 |
if(pConv->IsOption("C",OBConversion::GENOPTIONS)) |
53 |
return DeferMolOutput(pmol, pConv, pFormat); |
54 |
|
55 |
bool ret; |
56 |
if(pConv->IsOption("separate",OBConversion::GENOPTIONS)) |
57 |
{ |
58 |
//On first call, separate molecule and put fragments in MolArray. |
59 |
//On subsequent calls, remove a fragment from MolArray and send it for writing |
60 |
//Done this way so that each fragment can be written to its own file (with -m option) |
61 |
if(!StoredMolsReady) |
62 |
{ |
63 |
ret = pFormat->ReadMolecule(pmol,pConv); |
64 |
if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK))) |
65 |
MolArray = pmol->Separate(); //use un-transformed molecule |
66 |
//Add an appropriate title to each fragment |
67 |
for(unsigned int i=0;i<MolArray.size();++i) |
68 |
{ |
69 |
stringstream ss; |
70 |
ss << pmol->GetTitle() << '#' << i+1; |
71 |
string title = ss.str(); |
72 |
MolArray[i].SetTitle(title); |
73 |
} |
74 |
reverse(MolArray.begin(),MolArray.end()); |
75 |
StoredMolsReady = true; |
76 |
} |
77 |
|
78 |
if(MolArray.empty()) //normal end of fragments |
79 |
ret =false; |
80 |
else |
81 |
{ |
82 |
// Copying is needed because the OBMol passed to AddChemObject will be deleted. |
83 |
// The OBMol in the vector is deleted here. |
84 |
OBMol* pMolCopy = new OBMol( MolArray.back()); |
85 |
MolArray.pop_back(); |
86 |
ret = pConv->AddChemObject( |
87 |
pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS))); |
88 |
} |
89 |
if(!ret) |
90 |
StoredMolsReady = false; |
91 |
|
92 |
delete pmol; |
93 |
return ret; |
94 |
} |
95 |
|
96 |
ret=pFormat->ReadMolecule(pmol,pConv); |
97 |
|
98 |
OBMol* ptmol = NULL; |
99 |
//Molecule is valid if it has some atoms |
100 |
//or the format allows zero-atom molecules and it has a title |
101 |
if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle()))) |
102 |
{ |
103 |
ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS))); |
104 |
if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS) |
105 |
|| pConv->IsOption("join",OBConversion::GENOPTIONS))) |
106 |
{ |
107 |
//With j option, accumulate all mols in one stored in this class |
108 |
if(pConv->IsFirstInput()) |
109 |
_jmol = new OBMol; |
110 |
pConv->AddChemObject(_jmol); |
111 |
//will be discarded in WriteChemObjectImpl until the last input mol. This complication |
112 |
//is needed to allow joined molecules to be from different files. pOb1 in AddChem Object |
113 |
//is zeroed at the end of a file and _jmol is in danger of not being output. |
114 |
*_jmol += *ptmol; |
115 |
delete ptmol; |
116 |
return true; |
117 |
} |
118 |
} |
119 |
else |
120 |
delete pmol; |
121 |
|
122 |
// Normal operation - send molecule to be written |
123 |
ret = ret && pConv->AddChemObject(ptmol); //success of both writing and reading |
124 |
return ret; |
125 |
} |
126 |
|
127 |
bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat) |
128 |
{ |
129 |
if(pConv->IsOption("C",OBConversion::GENOPTIONS)) |
130 |
return OutputDeferredMols(pConv); |
131 |
if(pConv->IsOption("j",OBConversion::GENOPTIONS) |
132 |
|| pConv->IsOption("join",OBConversion::GENOPTIONS)) |
133 |
{ |
134 |
//arrives here at the end of a file |
135 |
if(!pConv->IsLast()) |
136 |
return true; |
137 |
bool ret=pFormat->WriteMolecule(_jmol,pConv); |
138 |
pConv->SetOutputIndex(1); |
139 |
delete _jmol; |
140 |
return ret; |
141 |
} |
142 |
|
143 |
|
144 |
//Retrieve the target OBMol |
145 |
OBBase* pOb = pConv->GetChemObject(); |
146 |
OBMol* pmol = dynamic_cast<OBMol*> (pOb); |
147 |
bool ret=false; |
148 |
if(pmol) |
149 |
{ |
150 |
if(pmol->NumAtoms()==0) |
151 |
{ |
152 |
std::string auditMsg = "OpenBabel::Molecule "; |
153 |
auditMsg += pmol->GetTitle(); |
154 |
auditMsg += " has 0 atoms"; |
155 |
obErrorLog.ThrowError(__FUNCTION__, |
156 |
auditMsg, |
157 |
obInfo); |
158 |
} |
159 |
ret=true; |
160 |
|
161 |
std::string auditMsg = "OpenBabel::Write molecule "; |
162 |
std::string description(pFormat->Description()); |
163 |
auditMsg += description.substr(0,description.find('\n')); |
164 |
obErrorLog.ThrowError(__FUNCTION__, |
165 |
auditMsg, |
166 |
obAuditMsg); |
167 |
|
168 |
ret=pFormat->WriteMolecule(pmol,pConv); |
169 |
} |
170 |
delete pOb; //move so that non-OBMol objects are deleted 9March2006 |
171 |
return ret; |
172 |
} |
173 |
|
174 |
/*! Instead of sending molecules for output via AddChemObject(), they are |
175 |
saved in here in OBMoleculeFormat or discarded. By default they are |
176 |
saved only if they are in the first input file. Parts of subsequent |
177 |
molecules, such as chemical structure, coordinates and OBGenericData |
178 |
can replace the parts in molecules with the same title that have already |
179 |
been stored, subject to a set of rules. After all input files have been |
180 |
read, the stored molecules (possibly now having augmented properties) are |
181 |
sent to the output format. |
182 |
|
183 |
Is a static function with <this> as parameter so that it can be called from other |
184 |
format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat. |
185 |
*/ |
186 |
bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF ) |
187 |
{ |
188 |
static bool IsFirstFile; |
189 |
bool OnlyMolsInFirstFile=true; |
190 |
|
191 |
if(pConv->IsFirstInput()) |
192 |
{ |
193 |
IsFirstFile=true; |
194 |
IMols.clear(); |
195 |
} |
196 |
else |
197 |
{ |
198 |
if((std::streamoff)pConv->GetInStream()->tellg()<=0) |
199 |
IsFirstFile=false;//File has changed |
200 |
} |
201 |
|
202 |
if (!pF->ReadMolecule(pmol,pConv)) |
203 |
{ |
204 |
delete pmol; |
205 |
return false; |
206 |
} |
207 |
const char* ptitle = pmol->GetTitle(); |
208 |
if(*ptitle==0) |
209 |
obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning); |
210 |
else |
211 |
{ |
212 |
string title(ptitle); |
213 |
string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended |
214 |
if(pos!=string::npos) |
215 |
title.erase(pos); |
216 |
|
217 |
map<std::string, OBMol*>::iterator itr; |
218 |
itr = IMols.find(title); |
219 |
if(itr!=IMols.end()) |
220 |
{ |
221 |
//Molecule with the same title has been input previously: update it |
222 |
OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol); |
223 |
if(pNewMol) |
224 |
{ |
225 |
delete itr->second; |
226 |
IMols[title] = pNewMol; |
227 |
} |
228 |
else |
229 |
{ |
230 |
//error: cleanup and return false |
231 |
delete pmol; |
232 |
return DeleteDeferredMols(); |
233 |
} |
234 |
} |
235 |
else |
236 |
{ |
237 |
//Molecule not already saved in IMols: save it if in first file |
238 |
if(!OnlyMolsInFirstFile || IsFirstFile) |
239 |
{ |
240 |
IMols[title] = pmol; |
241 |
return true; //don't delete pmol |
242 |
} |
243 |
} |
244 |
} |
245 |
delete pmol; |
246 |
return true; |
247 |
} |
248 |
|
249 |
/*! Makes a new OBMol on the heap by combining two molecules according to the rule below. |
250 |
If both have OBGenericData of the same type, or OBPairData with the |
251 |
same attribute, the version from pFirst is used. |
252 |
Returns a pointer to a new OBMol which will need deleting by the calling program |
253 |
(probably by being sent to an output format). |
254 |
If the molecules cannot be regarded as being the same structure a NULL |
255 |
pointer is returned and an error message logged. |
256 |
|
257 |
pFirst and pSecond and the objects they point to are not changed. (const |
258 |
modifiers difficult because class OBMol not designed appropriately) |
259 |
|
260 |
Combining molecules: rules for each of the three parts |
261 |
Title: |
262 |
Use the title of pFirst unless it has none, when use that of pSecond. |
263 |
Warning if neither molecule has a title. |
264 |
|
265 |
Structure |
266 |
- a structure with atoms replaces one with no atoms |
267 |
- a structure with bonds replaces one with no bonds, |
268 |
provided the formula is the same, else an error. |
269 |
- structures with atoms and bonds are compared by InChI; error if not the same. |
270 |
- a structure with 3D coordinates replaces one with 2D coordinates |
271 |
- a structure with 2D coordinates replace one with 0D coordinates |
272 |
|
273 |
OBGenericData |
274 |
OBPairData |
275 |
*/ |
276 |
OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond) |
277 |
{ |
278 |
string title("No title"); |
279 |
if(*pFirst->GetTitle()!=0) |
280 |
title = pFirst->GetTitle(); |
281 |
else |
282 |
{ |
283 |
if(*pSecond->GetTitle()!=0) |
284 |
title = pSecond->GetTitle(); |
285 |
else |
286 |
obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning); |
287 |
} |
288 |
|
289 |
bool swap=false; |
290 |
if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0) |
291 |
swap=true; |
292 |
else |
293 |
{ |
294 |
if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula()) |
295 |
{ |
296 |
obErrorLog.ThrowError(__FUNCTION__, |
297 |
"Molecules with name = " + title + " have different formula",obError); |
298 |
return NULL; |
299 |
} |
300 |
else |
301 |
{ |
302 |
if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0) |
303 |
swap=true; |
304 |
else |
305 |
{ |
306 |
//Compare by inchi; error if different NOT YET IMPLEMENTED |
307 |
//Use the one with the higher dimension |
308 |
if(pSecond->GetDimension() > pFirst->GetDimension()) |
309 |
swap=true; |
310 |
} |
311 |
} |
312 |
} |
313 |
|
314 |
OBMol* pNewMol = new OBMol; |
315 |
pNewMol->SetTitle(title); |
316 |
|
317 |
OBMol* pMain = swap ? pSecond : pFirst; |
318 |
OBMol* pOther = swap ? pFirst : pSecond; |
319 |
|
320 |
*pNewMol = *pMain; //Now copies all data |
321 |
|
322 |
//Copy some OBGenericData from the OBMol which did not provide the structure |
323 |
vector<OBGenericData*>::iterator igd; |
324 |
for(igd=pOther->BeginData();igd!=pOther->EndData();++igd) |
325 |
{ |
326 |
//copy only if not already data of the same type from molecule already copied to pNewMol |
327 |
unsigned datatype = (*igd)->GetDataType(); |
328 |
OBGenericData* pData = pNewMol->GetData(datatype); |
329 |
if(datatype==OBGenericDataType::PairData) |
330 |
{ |
331 |
if(pData->GetAttribute() == (*igd)->GetAttribute()) |
332 |
continue; |
333 |
} |
334 |
else if(pNewMol->GetData(datatype)!=NULL) |
335 |
continue; |
336 |
|
337 |
OBGenericData* pCopiedData = (*igd)->Clone(pNewMol); |
338 |
pNewMol->SetData(pCopiedData); |
339 |
} |
340 |
return pNewMol; |
341 |
} |
342 |
|
343 |
bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv) |
344 |
{ |
345 |
std::map<std::string, OBMol*>::iterator itr, lastitr; |
346 |
bool ret=false; |
347 |
int i=1; |
348 |
lastitr = IMols.end(); |
349 |
--lastitr; |
350 |
pConv->SetOneObjectOnly(false); |
351 |
for(itr=IMols.begin();itr!=IMols.end();++itr,++i) |
352 |
{ |
353 |
if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS))) |
354 |
continue; |
355 |
pConv->SetOutputIndex(i); |
356 |
if(itr==lastitr) |
357 |
pConv->SetOneObjectOnly(); //to set IsLast |
358 |
|
359 |
std::string auditMsg = "OpenBabel::Write molecule "; |
360 |
std::string description((pConv->GetOutFormat())->Description()); |
361 |
auditMsg += description.substr(0,description.find('\n')); |
362 |
obErrorLog.ThrowError(__FUNCTION__, auditMsg, obAuditMsg); |
363 |
|
364 |
ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv); |
365 |
|
366 |
delete itr->second; //always delete OBMol object |
367 |
itr->second = NULL; // so can be deleted in DeleteDeferredMols() |
368 |
if (!ret) break; |
369 |
} |
370 |
DeleteDeferredMols();//cleans up in case there have been errors |
371 |
return ret; |
372 |
} |
373 |
|
374 |
bool OBMoleculeFormat::DeleteDeferredMols() |
375 |
{ |
376 |
//Empties IMols, deteting the OBMol objects whose pointers are stored there |
377 |
std::map<std::string, OBMol*>::iterator itr; |
378 |
for(itr=IMols.begin();itr!=IMols.end();++itr) |
379 |
{ |
380 |
delete itr->second; //usually NULL |
381 |
} |
382 |
IMols.clear(); |
383 |
return false; |
384 |
} |
385 |
|
386 |
////////////////////////////////////////////////////////////////// |
387 |
/** Attempts to read the index file datafilename.obindx successively |
388 |
from the following directories: |
389 |
- the current directory |
390 |
- that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR |
391 |
if the environment variable is not set |
392 |
- in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name |
393 |
An index of type NameIndexType is then constructed. NameIndexType is defined |
394 |
in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map. |
395 |
In any case it is searched by |
396 |
@code |
397 |
NameIndexType::iterator itr = index.find(molecule_name); |
398 |
if(itr!=index.end()) |
399 |
unsigned pos_in_datafile = itr->second; |
400 |
@endcode |
401 |
pos_in_datafile is used as a paramter in seekg() to read from the datafile |
402 |
|
403 |
If no index is found, it is constructed from the datafile by reading all of |
404 |
it using the format pInFormat, and written to the directory containing the datafile. |
405 |
This means that this function can be used without worrying whether there is an index. |
406 |
It will be slow to execute the first time, but subsequent uses get the speed benefit |
407 |
of indexed access to the datafile. |
408 |
|
409 |
The serialization and de-serialization of the NameIndexType is entirely in |
410 |
this routine and could possibly be improved. Currently re-hashing is done |
411 |
every time the index is read. |
412 |
**/ |
413 |
bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index, |
414 |
const string& datafilename, OBFormat* pInFormat) |
415 |
{ |
416 |
struct headertype |
417 |
{ |
418 |
char filename[256]; |
419 |
unsigned size; |
420 |
} header; |
421 |
|
422 |
NameIndexType::iterator itr; |
423 |
|
424 |
ifstream indexstream; |
425 |
OpenDatafile(indexstream, datafilename + ".obindx"); |
426 |
if(!indexstream) |
427 |
{ |
428 |
//Need to prepare the index |
429 |
ifstream datastream; |
430 |
string datafilepath = OpenDatafile(datastream, datafilename); |
431 |
if(!datastream) |
432 |
{ |
433 |
obErrorLog.ThrowError(__FUNCTION__, |
434 |
datafilepath + " was not found or could not be opened", obError); |
435 |
return false; |
436 |
} |
437 |
|
438 |
OBConversion Conv(&datastream,NULL); |
439 |
Conv.SetInFormat(pInFormat); |
440 |
OBMol mol; |
441 |
streampos pos; |
442 |
while(Conv.Read(&mol)) |
443 |
{ |
444 |
string name = mol.GetTitle(); |
445 |
if(!name.empty()) |
446 |
index.insert(make_pair(name, pos)); |
447 |
mol.Clear(); |
448 |
pos = datastream.tellg(); |
449 |
} |
450 |
obErrorLog.ThrowError(__FUNCTION__, |
451 |
"Prepared an index for " + datafilepath, obAuditMsg); |
452 |
//Save index to file |
453 |
ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary); |
454 |
if(!dofs) return false; |
455 |
|
456 |
strncpy(header.filename,datafilename.c_str(), sizeof(header.filename)); |
457 |
header.filename[sizeof(header.filename) - 1] = '\0'; |
458 |
header.size = index.size(); |
459 |
dofs.write((const char*)&header, sizeof(headertype)); |
460 |
|
461 |
for(itr=index.begin();itr!=index.end();++itr) |
462 |
{ |
463 |
//#chars; chars; ofset(4bytes). |
464 |
const char n = itr->first.size(); |
465 |
dofs.put(n); |
466 |
dofs.write(itr->first.c_str(),n); |
467 |
dofs.write((const char*)&itr->second,sizeof(unsigned)); |
468 |
} |
469 |
} |
470 |
else |
471 |
{ |
472 |
//Read index data from file and put into hash_map |
473 |
indexstream.read((char*)&header,sizeof(headertype)); |
474 |
itr=index.begin(); // for hint |
475 |
for(unsigned int i=0;i<header.size;++i) |
476 |
{ |
477 |
char len; |
478 |
indexstream.get(len); |
479 |
string title(len, 0); |
480 |
unsigned pos; |
481 |
indexstream.read(&title[0],len); |
482 |
indexstream.read((char*)&pos,sizeof(unsigned)); |
483 |
index.insert(itr, make_pair(title,pos)); |
484 |
} |
485 |
} |
486 |
return true; |
487 |
} |
488 |
|
489 |
|
490 |
} //namespace OpenBabel |