ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/openbabel/obconversion.cpp
Revision: 1081
Committed: Thu Oct 19 20:49:05 2006 UTC (18 years, 6 months ago) by gezelter
File size: 39394 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# Content
1 /**********************************************************************
2 obconversion.cpp - Declaration of OBFormat and OBConversion
3
4 Copyright (C) 2004 by Chris Morley
5 Some portions Copyright (C) 2005 by Geoffrey Hutchison
6
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.sourceforge.net/>
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19 // Definition of OBConversion routines
20
21 #ifdef _WIN32
22 #pragma warning (disable : 4786)
23
24 //using 'this' in base class initializer
25 #pragma warning (disable : 4355)
26
27 #ifdef GUI
28 #undef DATADIR
29 #include "stdafx.hpp" //(includes<windows.h>
30 #endif
31 #endif
32
33 #include <iostream>
34 #include <fstream>
35 #include <sstream>
36 #include <string>
37 #include <map>
38 //#include <dlfcn.h>
39
40 #include "obconversion.hpp"
41
42 #ifdef HAVE_LIBZ
43 #include "zipstream.hpp"
44 #endif
45
46 #if !HAVE_STRNCASECMP
47 extern "C" int strncasecmp(const char *s1, const char *s2, size_t n);
48 #endif
49
50 #ifndef BUFF_SIZE
51 #define BUFF_SIZE 32768
52 #endif
53
54 using namespace std;
55 namespace OpenBabel {
56
57 const char* OBFormat::TargetClassDescription()
58 {
59 //Provides class of default format unless overridden
60 if(OBConversion::GetDefaultFormat())
61 return OBConversion::GetDefaultFormat()->TargetClassDescription();
62 else
63 return "";
64 }
65 const type_info& OBFormat::GetType()
66 {
67 //Provides info on class of default format unless overridden
68 if(OBConversion::GetDefaultFormat())
69 return OBConversion::GetDefaultFormat()->GetType();
70 else
71 return typeid(this); //rubbish return if DefaultFormat not set
72 }
73
74 //***************************************************
75
76 /** @class OBConversion
77 OBConversion maintains a list of the available formats,
78 provides information on them, and controls the conversion process.
79
80 A conversion is carried out by the calling routine, usually in a
81 user interface or an application program, making an instance of
82 OBConversion. It is loaded with the in and out formats, any options
83 and (usually) the default streams for input and output. Then either
84 the Convert() function is called, which allows a single input file
85 to be converted, or the extended functionality of FullConvert()
86 is used. This allows multiple input and output files, allowing:
87 - aggregation - the contents of many input files converted
88 and sent to one output file;
89 - splitting - the molecules from one input file sent to
90 separate output files;
91 - batch conversion - each input file converted to an output file.
92
93 These procedures constitute the "Convert" interface. OBConversion
94 and the user interface or application program do not need to be
95 aware of any other part of OpenBabel - mol.h is not \#included. This
96 allows any chemical object derived from OBBase to be converted;
97 the type of object is decided by the input format class.
98 However,currently, almost all the conversions are for molecules of
99 class OBMol.
100 ///
101 OBConversion can also be used with an "API" interface
102 called from programs which manipulate chemical objects. Input/output is
103 done with the Read() and Write() functions which work with any
104 chemical object, but need to have its type specified. (The
105 ReadMolecule() and WriteMolecule() functions of the format classes
106 can also be used directly.)
107
108
109 Example code using OBConversion
110
111 <b>To read in a molecule, manipulate it and write it out.</b>
112
113 Set up an istream and an ostream, to and from files or elsewhere.
114 (cin and cout are used in the example). Specify the file formats.
115
116 @code
117 OBConversion conv(&cin,&cout);
118 if(conv.SetInAndOutFormats("SMI","MOL"))
119 {
120 OBMol mol;
121 if(conv.Read(&mol))
122 ...manipulate molecule
123
124 conv->Write(&mol);
125 }
126 @endcode
127
128 A two stage construction is used to allow error handling
129 if the format ID is not recognized. This is necessary now that the
130 formats are dynamic and errors are not caught at compile time.
131 OBConversion::Read() is a templated function so that objects derived
132 from OBBase can also be handled, in addition to OBMol, if the format
133 routines are written appropriately.
134
135 <b>To make a molecule from a SMILES string.</b>
136 @code
137 std::string SmilesString;
138 OBMol mol;
139 stringstream ss(SmilesString)
140 OBConversion conv(&ss);
141 if(conv.SetInFormat("smi") && conv.Read(&mol))
142 ...
143 @endcode
144
145 <b>To do a file conversion without manipulating the molecule.</b>
146
147 @code
148 #include "obconversion.hpp" //mol.h is not needed
149 ...set up an istream is and an ostream os
150 OBConversion conv(&is,&os);
151 if(conv.SetInAndOutFormats("SMI","MOL"))
152 {
153 conv.SetOptions("h"); //Optional; (h adds expicit hydrogens)
154 conv.Convert();
155 }
156 @endcode
157
158 <b>To add automatic format conversion to an existing program.</b>
159
160 The existing program inputs from the file identified by the
161 const char* filename into the istream is. The file is assumed to have
162 a format ORIG, but otherformats, identified by their file extensions,
163 can now be used.
164
165 @code
166 ifstream ifs(filename); //Original code
167
168 OBConversion conv;
169 OBFormat* inFormat = conv.FormatFromExt(filename);
170 OBFormat* outFormat = conv.GetFormat("ORIG");
171 istream* pIn = &ifs;
172 stringstream newstream;
173 if(inFormat && outFormat)
174 {
175 conv.SetInAndOutFormats(inFormat,outFormat);
176 conv.Convert(pIn,&newstream);
177 pIn=&newstream;
178 }
179 //else error; new features not available; fallback to original functionality
180
181 ...Carry on with original code using pIn
182 @endcode
183
184 In Windows a degree of independence from OpenBabel can be achieved using DLLs.
185 This code would be linked with obconv.lib.
186 At runtime the following DLLs would be in the executable directory:
187 obconv.dll, obdll.dll, one or more *.obf format files.
188 */
189
190 int OBConversion::FormatFilesLoaded = 0;
191
192 OBFormat* OBConversion::pDefaultFormat=NULL;
193
194 OBConversion::OBConversion(istream* is, ostream* os) :
195 pInFormat(NULL),pOutFormat(NULL), Index(0), StartNumber(1),
196 EndNumber(0), Count(-1), m_IsLast(true), MoreFilesToCome(false),
197 OneObjectOnly(false), pOb1(NULL), pAuxConv(NULL)
198 {
199 pInStream=is;
200 pOutStream=os;
201 if (FormatFilesLoaded == 0)
202 FormatFilesLoaded = LoadFormatFiles();
203
204 //These options take a parameter
205 RegisterOptionParam("f", NULL, 1,GENOPTIONS);
206 RegisterOptionParam("l", NULL, 1,GENOPTIONS);
207 }
208
209 ///This static function returns a reference to the FormatsMap
210 ///which, because it is a static local variable is constructed only once.
211 ///This fiddle is to avoid the "static initialization order fiasco"
212 ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
213 FMapType& OBConversion::FormatsMap()
214 {
215 static FMapType* fm = NULL;
216 if (!fm)
217 fm = new FMapType;
218 return *fm;
219 }
220
221 ///This static function returns a reference to the FormatsMIMEMap
222 ///which, because it is a static local variable is constructed only once.
223 ///This fiddle is to avoid the "static initialization order fiasco"
224 ///See Marshall Cline's C++ FAQ Lite document, www.parashift.com/c++-faq-lite/".
225 FMapType& OBConversion::FormatsMIMEMap()
226 {
227 static FMapType* fm = NULL;
228 if (!fm)
229 fm = new FMapType;
230 return *fm;
231 }
232
233 /////////////////////////////////////////////////
234 OBConversion::OBConversion(const OBConversion& o)
235 {
236 Index = o.Index;
237 Count = o.Count;
238 StartNumber = o.StartNumber;
239 EndNumber = o.EndNumber;
240 pInFormat = o.pInFormat;
241 pInStream = o.pInStream;
242 pOutFormat = o.pOutFormat;
243 pOutStream = o.pOutStream;
244 OptionsArray[0]= o.OptionsArray[0];
245 OptionsArray[1]= o.OptionsArray[1];
246 OptionsArray[2]= o.OptionsArray[2];
247 InFilename = o.InFilename;
248 rInpos = o.rInpos;
249 wInpos = o.wInpos;
250 rInlen = o.rInlen;
251 wInlen = o.wInlen;
252 m_IsLast = o.m_IsLast;
253 MoreFilesToCome= o.MoreFilesToCome;
254 OneObjectOnly = o.OneObjectOnly;
255 pOb1 = o.pOb1;
256 ReadyToInput = o.ReadyToInput;
257
258 pAuxConv = NULL;
259 }
260 ////////////////////////////////////////////////
261
262 OBConversion::~OBConversion()
263 {
264 if(pAuxConv!=this)
265 delete pAuxConv;
266 }
267 //////////////////////////////////////////////////////
268
269 /// Class information on formats is collected by making an instance of the class
270 /// derived from OBFormat(only one is usually required). RegisterFormat() is called
271 /// from its constructor.
272 ///
273 /// If the compiled format is stored separately, like in a DLL or shared library,
274 /// the initialization code makes an instance of the imported OBFormat class.
275 int OBConversion::RegisterFormat(const char* ID, OBFormat* pFormat, const char* MIME)
276 {
277 FormatsMap()[ID] = pFormat;
278 if (MIME)
279 FormatsMIMEMap()[MIME] = pFormat;
280 if(pFormat->Flags() & DEFAULTFORMAT)
281 pDefaultFormat=pFormat;
282 return FormatsMap().size();
283 }
284
285 //////////////////////////////////////////////////////
286 int OBConversion::LoadFormatFiles()
287 {
288 int count=0;
289 // if(FormatFilesLoaded) return 0;
290 // FormatFilesLoaded=true; //so will load files only once
291 #ifdef USING_DYNAMIC_LIBS
292 //Depending on availablilty, look successively in
293 //FORMATFILE_DIR, executable directory,or current directory
294 string TargetDir;
295 #ifdef FORMATFILE_DIR
296 TargetDir="FORMATFILE_DIR";
297 #endif
298
299 DLHandler::getConvDirectory(TargetDir);
300
301 vector<string> files;
302 if(!DLHandler::findFiles(files,DLHandler::getFormatFilePattern(),TargetDir)) return 0;
303
304 vector<string>::iterator itr;
305 for(itr=files.begin();itr!=files.end();itr++)
306 {
307 if(DLHandler::openLib(*itr))
308 count++;
309 else
310 cerr << *itr << " did not load properly" << endl;
311 }
312 #else
313 count = 1; //avoid calling this function several times
314 #endif //USING_DYNAMIC_LIBS
315 return count;
316 }
317
318 /**
319 *Returns the ID + the first line of the description in str
320 *and a pointer to the format in pFormat.
321 *If called with str==NULL the first format is returned;
322 *subsequent formats are returned by calling with str!=NULL and the previous value of itr
323 *returns false, and str and pFormat NULL, when there are no more formats.
324 *Use like:
325 *@code
326 * const char* str=NULL;
327 * Formatpos pos;
328 * OBConversion conv; // dummy to make sure static data is available
329 * while(OBConversion::GetNextFormat(pos,str,pFormat))
330 * {
331 * use str and pFormat
332 * }
333 *@endcode
334 *
335 * NOTE: Because of dynamic loading problems, it is usually necessary to
336 * declare a "dummy" OBConversion object to access this static method.
337 * (Not elegant, but will hopefully be fixed in the future.)
338 */
339 bool OBConversion::GetNextFormat(Formatpos& itr, const char*& str,OBFormat*& pFormat)
340 {
341
342 pFormat = NULL;
343 if(str==NULL)
344 itr = FormatsMap().begin();
345 else
346 itr++;
347 if(itr == FormatsMap().end())
348 {
349 str=NULL; pFormat=NULL;
350 return false;
351 }
352 static string s;
353 s =itr->first;
354 pFormat = itr->second;
355 if(pFormat)
356 {
357 string description(pFormat->Description());
358 s += " -- ";
359 s += description.substr(0,description.find('\n'));
360 }
361
362 if(pFormat->Flags() & NOTWRITABLE) s+=" [Read-only]";
363 if(pFormat->Flags() & NOTREADABLE) s+=" [Write-only]";
364
365 str = s.c_str();
366 return true;
367 }
368
369 //////////////////////////////////////////////////////
370 /// Sets the formats from their ids, e g CML.
371 /// If inID is NULL, the input format is left unchanged. Similarly for outID
372 /// Returns true if both formats have been successfully set at sometime
373 bool OBConversion::SetInAndOutFormats(const char* inID, const char* outID)
374 {
375 return SetInFormat(inID) && SetOutFormat(outID);
376 }
377 //////////////////////////////////////////////////////
378
379 bool OBConversion::SetInAndOutFormats(OBFormat* pIn, OBFormat* pOut)
380 {
381 return SetInFormat(pIn) && SetOutFormat(pOut);
382 }
383 //////////////////////////////////////////////////////
384 bool OBConversion::SetInFormat(OBFormat* pIn)
385 {
386 if(pIn==NULL)
387 return true;
388 pInFormat=pIn;
389 return !(pInFormat->Flags() & NOTREADABLE);
390 }
391 //////////////////////////////////////////////////////
392 bool OBConversion::SetOutFormat(OBFormat* pOut)
393 {
394 pOutFormat=pOut;
395 return !(pOutFormat->Flags() & NOTWRITABLE);
396 }
397 //////////////////////////////////////////////////////
398 bool OBConversion::SetInFormat(const char* inID)
399 {
400 if(inID)
401 pInFormat = FindFormat(inID);
402 return pInFormat && !(pInFormat->Flags() & NOTREADABLE);
403 }
404 //////////////////////////////////////////////////////
405
406 bool OBConversion::SetOutFormat(const char* outID)
407 {
408 if(outID)
409 pOutFormat= FindFormat(outID);
410 return pOutFormat && !(pOutFormat->Flags() & NOTWRITABLE);
411 }
412
413 //////////////////////////////////////////////////////
414 int OBConversion::Convert(istream* is, ostream* os)
415 {
416 if(is) pInStream=is;
417 if(os) pOutStream=os;
418 ostream* pOrigOutStream = pOutStream;
419
420 #ifdef HAVE_LIBZ
421 zlib_stream::zip_istream zIn(*pInStream);
422 if(zIn.is_gzip())
423 pInStream = &zIn;
424
425 zlib_stream::zip_ostream zOut(*pOutStream);
426 if(IsOption("z",GENOPTIONS))
427 {
428 // make sure to output the header
429 zOut.make_gzip();
430 pOutStream = &zOut;
431 }
432 #endif
433
434 int count = Convert();
435 pOutStream = pOrigOutStream;
436 return count;
437
438 }
439
440 ////////////////////////////////////////////////////
441 /// Actions the "convert" interface.
442 /// Calls the OBFormat class's ReadMolecule() which
443 /// - makes a new chemical object of its chosen type (e.g. OBMol)
444 /// - reads an object from the input file
445 /// - subjects the chemical object to 'transformations' as specified by the Options
446 /// - calls AddChemObject to add it to a buffer. The previous object is first output
447 /// via the output Format's WriteMolecule(). During the output process calling
448 /// IsFirst() and GetIndex() (the number of objects including the current one already output.
449 /// allows more control, for instance writing \<cml\> and \</cml\> tags for multiple molecule outputs only.
450 ///
451 /// AddChemObject does not save the object passed to it if it is NULL (as a result of a DoTransformation())
452 /// or if the number of the object is outside the range defined by
453 /// StartNumber and EndNumber.This means the start and end counts apply to all chemical objects
454 /// found whether or not they are output.
455 ///
456 /// If ReadMolecule returns false the input conversion loop is exited.
457 ///
458 int OBConversion::Convert()
459 {
460 if(pInStream==NULL || pOutStream==NULL)
461 {
462 cerr << "input or output stream not set" << endl;
463 return 0;
464 }
465
466 if(!pInFormat) return 0;
467 Count=0;//number objects processed
468
469 if(!SetStartAndEnd())
470 return 0;
471
472 ReadyToInput=true;
473 m_IsLast=false;
474 pOb1=NULL;
475 wInlen=0;
476
477 //Input loop
478 while(ReadyToInput && pInStream->peek() != EOF && pInStream->good())
479 {
480 if(pInStream==&cin)
481 {
482 if(pInStream->peek()=='\n')
483 break;
484 }
485 else
486 rInpos = pInStream->tellg();
487
488 bool ret=false;
489 try
490 {
491 ret = pInFormat->ReadChemObject(this);
492 }
493 catch(...)
494 {
495 if(!IsOption("e", GENOPTIONS) && !OneObjectOnly)
496 throw;
497 }
498
499 if(!ret)
500 {
501 //error or termination request: terminate unless
502 // -e option requested and sucessfully can skip past current object
503 if(!IsOption("e", GENOPTIONS) || pInFormat->SkipObjects(0,this)!=1)
504 break;
505 }
506 if(OneObjectOnly)
507 break;
508 // Objects supplied to AddChemObject() which may output them after a delay
509 //ReadyToInput may be made false in AddChemObject()
510 // by WriteMolecule() returning false or by Count==EndNumber
511 }
512
513 //Output last object
514 //if(!MoreFilesToCome)
515 // m_IsLast=true;
516 m_IsLast= !MoreFilesToCome;
517
518 if(pOutFormat)
519 if(!pOutFormat->WriteChemObject(this))
520 Index--;
521
522 //Put AddChemObject() into non-queue mode
523 Count= -1;
524 EndNumber=StartNumber=0; pOb1=NULL;//leave tidy
525 MoreFilesToCome=false;
526 OneObjectOnly=false;
527
528 return Index; //The number actually output
529 }
530 //////////////////////////////////////////////////////
531 bool OBConversion::SetStartAndEnd()
532 {
533 int TempStartNumber=0;
534 const char* p = IsOption("f",GENOPTIONS);
535 if(p)
536 {
537 StartNumber=atoi(p);
538 if(StartNumber>1)
539 {
540 TempStartNumber=StartNumber;
541 //Try to skip objects now
542 int ret = pInFormat->SkipObjects(StartNumber-1,this);
543 if(ret==-1) //error
544 return false;
545 if(ret==1) //success:objects skipped
546 {
547 Count = StartNumber-1;
548 StartNumber=0;
549 }
550 }
551 }
552
553 p = IsOption("l",GENOPTIONS);
554 if(p)
555 {
556 EndNumber=atoi(p);
557 if(TempStartNumber && EndNumber<TempStartNumber)
558 EndNumber=TempStartNumber;
559 }
560
561 return true;
562 }
563
564 //////////////////////////////////////////////////////
565 /// Retrieves an object stored by AddChemObject() during output
566 OBBase* OBConversion::GetChemObject()
567 {
568 Index++;
569 return pOb1;
570 }
571
572 //////////////////////////////////////////////////////
573 /// Called by ReadMolecule() to deliver an object it has read from an input stream.
574 /// Used in two modes:
575 /// - When Count is negative it is left negative and the routine is just a store
576 /// for an OBBase object. The negative value returned tells the calling
577 /// routine that no more objects are required.
578 /// - When count is >=0, probably set by Convert(), it acts as a queue of 2:
579 /// writing the currently stored value before accepting the supplied one. This delay
580 /// allows output routines to respond differently when the written object is the last.
581 /// Count is incremented with each call, even if pOb=NULL.
582 /// Objects are not added to the queue if the count is outside the range
583 /// StartNumber to EndNumber. There is no upper limit if EndNumber is zero.
584 /// The return value is the number of objects, including this one, which have been
585 /// input (but not necessarily output).
586 int OBConversion::AddChemObject(OBBase* pOb)
587 {
588 if(Count<0)
589 {
590 pOb1=pOb;
591 return Count;
592 }
593 Count++;
594 if(Count>=(int)StartNumber)//keeps reading objects but does nothing with them
595 {
596 if(Count==(int)EndNumber)
597 ReadyToInput=false; //stops any more objects being read
598
599 rInlen = pInStream->tellg() - rInpos;
600
601 if(pOb)
602 {
603 if(pOb1 && pOutFormat) //see if there is an object ready to be output
604 {
605 //Output object
606 if (!pOutFormat->WriteChemObject(this))
607 {
608 //faultly write, so finish
609 --Index;
610 ReadyToInput=false;
611 return Count;
612 }
613 //Stop after writing with single object output files
614 if(pOutFormat->Flags() & WRITEONEONLY)
615 {
616 ReadyToInput = false;
617 pOb1 = NULL;
618
619 // if there are more molecules to output, send a warning
620 cerr << "WARNING: You are attempting to convert a file"
621 << " with multiple molecule entries into a format"
622 << " which can only store one molecule. The current"
623 << " output will only contain the first molecule.\n\n";
624
625 cerr << "To convert this input into multiple separate"
626 << " output files, with one molecule per file, try:\n"
627 << "babel [input] [ouptut] -m\n\n";
628
629 cerr << "To pick one particular molecule"
630 << " (e.g., molecule 4), try:\n"
631 << "babel -f 4 -l 4 [input] [output]" << endl;
632
633 return true;
634 }
635 }
636 pOb1=pOb;
637 wInpos = rInpos; //Save the position in the input file to be accessed when writing it
638 wInlen = rInlen;
639 }
640 }
641 return Count;
642 }
643 //////////////////////////////////////////////////////
644 int OBConversion::GetOutputIndex() const
645 {
646 //The number of objects actually written already from this instance of OBConversion
647 return Index;
648 }
649 void OBConversion::SetOutputIndex(int indx)
650 {
651 Index=indx;
652 }
653 //////////////////////////////////////////////////////
654 OBFormat* OBConversion::FindFormat(const char* ID)
655 {
656 //Case insensitive
657 if(FormatsMap().find(ID) == FormatsMap().end())
658 return NULL;
659 else
660 return FormatsMap()[ID];
661 }
662
663 //////////////////////////////////////////////////
664 const char* OBConversion::GetTitle() const
665 {
666 return(InFilename.c_str());
667 }
668
669 void OBConversion::SetMoreFilesToCome()
670 {
671 MoreFilesToCome=true;
672 }
673
674 void OBConversion::SetOneObjectOnly()
675 {
676 OneObjectOnly=true;
677 m_IsLast=true;
678 }
679
680 /////////////////////////////////////////////////////////
681 OBFormat* OBConversion::FormatFromExt(const char* filename)
682 {
683 string file = filename;
684 size_t extPos = file.rfind(".");
685
686 if(extPos!=string::npos)
687 {
688 // only do this if we actually can read .gz files
689 #ifdef HAVE_LIBZ
690 if (file.substr(extPos,3) == ".gz")
691 {
692 file.erase(extPos);
693 extPos = file.rfind(".");
694 if (extPos!=string::npos)
695 return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
696 }
697 else
698 #endif
699 return FindFormat( (file.substr(extPos + 1, file.size())).c_str() );
700 }
701 return NULL; //if no extension
702 }
703
704 OBFormat* OBConversion::FormatFromMIME(const char* MIME)
705 {
706 if(FormatsMIMEMap().find(MIME) == FormatsMIMEMap().end())
707 return NULL;
708 else
709 return FormatsMIMEMap()[MIME];
710 }
711
712 bool OBConversion::Read(OBBase* pOb, std::istream* pin)
713 {
714 if(pin)
715 pInStream=pin;
716 if(!pInFormat) return false;
717
718 #ifdef HAVE_LIBZ
719 zlib_stream::zip_istream zIn(*pInStream);
720 if(zIn.is_gzip())
721 pInStream = &zIn;
722 #endif
723
724 return pInFormat->ReadMolecule(pOb, this);
725 }
726 //////////////////////////////////////////////////
727 /// Writes the object pOb but does not delete it afterwards.
728 /// The output stream is lastingly changed if pos is not NULL
729 /// Returns true if successful.
730 bool OBConversion::Write(OBBase* pOb, ostream* pos)
731 {
732 if(pos)
733 pOutStream=pos;
734 if(!pOutFormat) return false;
735
736 ostream* pOrigOutStream = pOutStream;
737 #ifdef HAVE_LIBZ
738 #ifndef _WIN32
739 zlib_stream::zip_ostream zOut(*pOutStream);
740 if(IsOption("z",GENOPTIONS))
741 {
742 // make sure to output the header
743 zOut.make_gzip();
744 pOutStream = &zOut;
745 }
746 #endif
747 #endif
748
749 bool ret = pOutFormat->WriteMolecule(pOb,this);
750 pOutStream = pOrigOutStream;
751 return ret;
752 }
753
754 //////////////////////////////////////////////////
755 /// Writes the object pOb but does not delete it afterwards.
756 /// The output stream not changed (since we cannot write to this string later)
757 /// Returns true if successful.
758 std::string OBConversion::WriteString(OBBase* pOb)
759 {
760 ostream *oldStream = pOutStream; // save old output
761 stringstream newStream;
762
763 if(pOutFormat)
764 {
765 Write(pOb, &newStream);
766 }
767 pOutStream = oldStream;
768
769 return newStream.str();
770 }
771
772 //////////////////////////////////////////////////
773 /// Writes the object pOb but does not delete it afterwards.
774 /// The output stream is lastingly changed to point to the file
775 /// Returns true if successful.
776 bool OBConversion::WriteFile(OBBase* pOb, string filePath)
777 {
778 if(!pOutFormat) return false;
779
780 ofstream *ofs = new ofstream;
781 ios_base::openmode omode =
782 pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
783
784 ofs->open(filePath.c_str(),omode);
785 if(!ofs || !ofs->good())
786 {
787 cerr << "Cannot write to " << filePath <<endl;
788 return false;
789 }
790
791 return Write(pOb, ofs);
792 }
793
794 ////////////////////////////////////////////
795 bool OBConversion::ReadString(OBBase* pOb, std::string input)
796 {
797 stringstream *pin = new stringstream(input);
798 return Read(pOb,pin);
799 }
800
801
802 ////////////////////////////////////////////
803 bool OBConversion::ReadFile(OBBase* pOb, std::string filePath)
804 {
805 if(!pInFormat) return false;
806
807 ifstream *ifs = new ifstream;
808 ios_base::openmode imode =
809 pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
810
811 ifs->open(filePath.c_str(),imode);
812 if(!ifs || !ifs->good())
813 {
814 cerr << "Cannot read from " << filePath << endl;
815 return false;
816 }
817
818 return Read(pOb,ifs);
819 }
820
821
822 ////////////////////////////////////////////
823 const char* OBConversion::Description()
824 {
825 return "Conversion options\n \
826 -f <#> Start import at molecule # specified\n \
827 -l <#> End import at molecule # specified\n \
828 -t All input files describe a single molecule\n \
829 -e Continue with next object after error, if possible\n \
830 -z Compress the output with gzip\n";
831 }
832
833 ////////////////////////////////////////////
834 bool OBConversion::IsLast()
835 {
836 return m_IsLast;
837 }
838 ////////////////////////////////////////////
839 bool OBConversion::IsFirstInput()
840 {
841 return (Count==0);
842 }
843
844 /////////////////////////////////////////////////
845 string OBConversion::BatchFileName(string& BaseName, string& InFile)
846 {
847 //Replaces * in BaseName by InFile without extension and path
848 string ofname(BaseName);
849 string::size_type pos = ofname.find('*');
850 if(pos != string::npos)
851 {
852 //Replace * by input filename
853 string::size_type posdot= InFile.rfind('.');
854 if(posdot == string::npos)
855 posdot = InFile.size();
856 else {
857 #ifdef HAVE_LIBZ
858 if (InFile.substr(posdot,3) == ".gz")
859 {
860 InFile.erase(posdot);
861 posdot = InFile.rfind('.');
862 if (posdot == string::npos)
863 posdot = InFile.size();
864 }
865 #endif
866 }
867
868 int posname= InFile.find_last_of("\\/");
869 ofname.replace(pos,1, InFile, posname+1, posdot-posname-1);
870 }
871 return ofname;
872 }
873
874 ////////////////////////////////////////////////
875 string OBConversion::IncrementedFileName(string& BaseName, const int Count)
876 {
877 //Replaces * in BaseName by Count
878 string ofname(BaseName);
879 int pos = ofname.find('*');
880 if(pos>=0)
881 {
882 char num[33];
883 snprintf(num, 33, "%d", Count);
884 ofname.replace(pos,1, num);
885 }
886 return ofname;
887 }
888 ////////////////////////////////////////////////////
889
890 /**
891 Makes input and output streams, and carries out normal,
892 batch, aggregation, and splitting conversion.
893
894 Normal
895 Done if FileList contains a single file name and OutputFileName
896 does not contain a *.
897
898 Aggregation
899 Done if FileList has more than one file name and OutputFileName does
900 not contain * . All the chemical objects are converted and sent
901 to the single output file.
902
903 Splitting
904 Done if FileList contains a single file name and OutputFileName
905 contains a * . Each chemical object in the input file is converted
906 and sent to a separate file whose name is OutputFileName with the
907 * replaced by 1, 2, 3, etc.
908 For example, if OutputFileName is NEW*.smi then the output files are
909 NEW1.smi, NEW2.smi, etc.
910
911 Batch Conversion
912 Done if FileList has more than one file name and contains a * .
913 Each input file is converted to an output file whose name is
914 OutputFileName with the * replaced by the inputfile name without its
915 path and extension.
916 So if the input files were inpath/First.cml, inpath/Second.cml
917 and OutputFileName was NEW*.mol, the output files would be
918 NEWFirst.mol, NEWSecond.mol.
919
920 If FileList is empty, the input stream that has already been set
921 (usually in the constructor) is used. If OutputFileName is empty,
922 the output stream already set is used.
923
924 On exit, OutputFileList contains the names of the output files.
925
926 Returns the number of Chemical objects converted.
927 */
928 int OBConversion::FullConvert(std::vector<std::string>& FileList, std::string& OutputFileName,
929 std::vector<std::string>& OutputFileList)
930 {
931 ostream* pOs=NULL;
932 istream* pIs=NULL;
933 ifstream is;
934 ofstream os;
935 bool HasMultipleOutputFiles=false;
936 int Count=0;
937 bool CommonInFormat = pInFormat ? true:false; //whether set in calling routine
938 ios_base::openmode omode =
939 pOutFormat->Flags() & WRITEBINARY ? ios_base::out|ios_base::binary : ios_base::out;
940 try
941 {
942 ofstream ofs;
943
944 //OUTPUT
945 if(OutputFileName.empty())
946 pOs = NULL; //use existing stream
947 else
948 {
949 if(OutputFileName.find_first_of('*')!=string::npos) HasMultipleOutputFiles = true;
950 if(!HasMultipleOutputFiles)
951 {
952 os.open(OutputFileName.c_str(),omode);
953 if(!os)
954 {
955 cerr << "Cannot write to " << OutputFileName <<endl;
956 return 0;
957 }
958 OutputFileList.push_back(OutputFileName);
959 pOs=&os;
960 }
961 }
962
963 if(IsOption("t",GENOPTIONS))
964 {
965 //Concatenate input file option (multiple files, single molecule)
966 if(HasMultipleOutputFiles)
967 {
968 cerr << "Cannot have multiple output files and also concatenate input files (-t option)" <<endl;
969 return 0;
970 }
971
972 stringstream allinput;
973 vector<string>::iterator itr;
974 for(itr=FileList.begin();itr!=FileList.end();itr++)
975 {
976 ifstream ifs((*itr).c_str());
977 if(!ifs)
978 {
979 cerr << "Cannot open " << *itr <<endl;
980 continue;
981 }
982 allinput << ifs.rdbuf(); //Copy all file contents
983 ifs.close();
984 }
985 Count = Convert(&allinput,pOs);
986 return Count;
987 }
988
989 //INPUT
990 if(FileList.empty())
991 pIs = NULL;
992 else
993 {
994 if(FileList.size()>1)
995 {
996 //multiple input files
997 vector<string>::iterator itr, tempitr;
998 tempitr = FileList.end();
999 tempitr--;
1000 for(itr=FileList.begin();itr!=FileList.end();itr++)
1001 {
1002 InFilename = *itr;
1003 ifstream ifs;
1004 if(!OpenAndSetFormat(CommonInFormat, &ifs))
1005 continue;
1006
1007 if(HasMultipleOutputFiles)
1008 {
1009 //Batch conversion
1010 string batchfile = BatchFileName(OutputFileName,*itr);
1011 if(ofs.is_open()) ofs.close();
1012 ofs.open(batchfile.c_str(), omode);
1013 if(!ofs)
1014 {
1015 cerr << "Cannot open " << batchfile << endl;
1016 return Count;
1017 }
1018 OutputFileList.push_back(batchfile);
1019 SetOutputIndex(0); //reset for new file
1020 Count += Convert(&ifs,&ofs);
1021 }
1022 else
1023 {
1024 //Aggregation
1025 if(itr!=tempitr) SetMoreFilesToCome();
1026 Count = Convert(&ifs,pOs);
1027 }
1028 }
1029 return Count;
1030 }
1031 else
1032 {
1033 //Single input file
1034 InFilename = FileList[0];
1035 if(!OpenAndSetFormat(CommonInFormat, &is))
1036 return 0;
1037 pIs=&is;
1038
1039 if(HasMultipleOutputFiles)
1040 {
1041 //Splitting
1042 //Output is put in a temporary stream and written to a file
1043 //with an augmenting name only when it contains a valid object.
1044 int Indx=1;
1045 SetInStream(&is);
1046 #ifdef HAVE_LIBZ
1047 zlib_stream::zip_istream zIn(is);
1048 #endif
1049 for(;;)
1050 {
1051 stringstream ss;
1052 SetOutStream(&ss);
1053 SetOutputIndex(0); //reset for new file
1054 SetOneObjectOnly();
1055
1056 #ifdef HAVE_LIBZ
1057 if(Indx==1 && zIn.is_gzip())
1058 SetInStream(&zIn);
1059 #endif
1060
1061 int ThisFileCount = Convert();
1062 if(ThisFileCount==0) break;
1063 Count+=ThisFileCount;
1064
1065 if(ofs.is_open()) ofs.close();
1066 string incrfile = IncrementedFileName(OutputFileName,Indx++);
1067 ofs.open(incrfile.c_str(), omode);
1068 if(!ofs)
1069 {
1070 cerr << "Cannot write to " << incrfile << endl;
1071 return Count;
1072 }
1073
1074 OutputFileList.push_back(incrfile);
1075 #ifdef HAVE_LIBZ
1076 if(IsOption("z",GENOPTIONS))
1077 {
1078 zlib_stream::zip_ostream zOut(ofs);
1079 // make sure to output the header
1080 zOut.make_gzip();
1081 zOut << ss.rdbuf();
1082 }
1083 else
1084 #endif
1085 ofs << ss.rdbuf();
1086
1087 ofs.close();
1088 ss.clear();
1089 }
1090 return Count;
1091 }
1092 }
1093 }
1094
1095 //Single input and output files
1096 Count = Convert(pIs,pOs);
1097 return Count;
1098 }
1099 catch(...)
1100 {
1101 cerr << "Conversion failed with an exception. Count=" << Count <<endl;
1102 return Count;
1103 }
1104 }
1105
1106 bool OBConversion::OpenAndSetFormat(bool SetFormat, ifstream* is)
1107 {
1108 //Opens file using InFilename and sets pInFormat if requested
1109 if(!SetFormat)
1110 {
1111 pInFormat = FormatFromExt(InFilename.c_str());
1112 if(pInFormat==NULL)
1113 {
1114 string::size_type pos = InFilename.rfind('.');
1115 string ext;
1116 if(pos!=string::npos)
1117 ext = InFilename.substr(pos);
1118 cerr << "Cannot read input format \"" << ext << '\"'
1119 << " for file \"" << InFilename << "\"" << endl;
1120 return false;
1121 }
1122 }
1123
1124 ios_base::openmode imode;
1125 #ifdef ALL_READS_BINARY //Makes unix files compatible with VC++6
1126 imode = ios_base::in|ios_base::binary;
1127 #else
1128 imode = pInFormat->Flags() & READBINARY ? ios_base::in|ios_base::binary : ios_base::in;
1129 #endif
1130
1131 is->open(InFilename.c_str(), imode);
1132 if(!is->good())
1133 {
1134 cerr << "Cannot open " << InFilename <<endl;
1135 return false;
1136 }
1137
1138 return true;
1139 }
1140
1141 ///////////////////////////////////////////////
1142 void OBConversion::AddOption(const char* opt, Option_type opttyp, const char* txt)
1143 {
1144 //Also updates an option
1145 if(txt==NULL)
1146 OptionsArray[opttyp][opt]=string();
1147 else
1148 OptionsArray[opttyp][opt]=txt;
1149 }
1150
1151 const char* OBConversion::IsOption(const char* opt, Option_type opttyp)
1152 {
1153 //Returns NULL if option not found or a pointer to the text if it is
1154 map<string,string>::iterator pos;
1155 pos = OptionsArray[opttyp].find(opt);
1156 if(pos==OptionsArray[opttyp].end())
1157 return NULL;
1158 return pos->second.c_str();
1159 }
1160
1161 bool OBConversion::RemoveOption(const char* opt, Option_type opttyp)
1162 {
1163 return OptionsArray[opttyp].erase(opt)!=0;//true if was there
1164 }
1165
1166 void OBConversion::SetOptions(const char* options, Option_type opttyp)
1167 {
1168 while(*options)
1169 {
1170 string ch(1, *options++);
1171 if(*options=='\"')
1172 {
1173 string txt = options+1;
1174 string::size_type pos = txt.find('\"');
1175 if(pos==string::npos)
1176 return; //options is illformed
1177 txt.erase(pos);
1178 OptionsArray[opttyp][ch]= txt;
1179 options += pos+2;
1180 }
1181 else
1182 OptionsArray[opttyp][ch] = string();
1183 }
1184 }
1185
1186 typedef std::map<string,int> OPAMapType;
1187 OPAMapType& OBConversion::OptionParamArray(Option_type typ)
1188 {
1189 static OPAMapType* opa = NULL;
1190 if (!opa)
1191 opa = new OPAMapType[3];
1192 return opa[typ];
1193 }
1194
1195 void OBConversion::RegisterOptionParam(string name, OBFormat* pFormat,
1196 int numberParams, Option_type typ)
1197 {
1198 //Gives error message if the number of parameters conflicts with an existing registration
1199 map<string,int>::iterator pos;
1200 pos = OptionParamArray(typ).find(name);
1201 if(pos!=OptionParamArray(typ).end())
1202 {
1203 if(pos->second!=numberParams)
1204 {
1205 string description("API");
1206 if(pFormat)
1207 description=pFormat->Description();
1208 cerr << "The number of parameters needed by option \"" << name << "\" in "
1209 << description.substr(0,description.find('\n'))
1210 << " differs from an earlier registration." << endl;
1211 return;
1212 }
1213 }
1214 OptionParamArray(typ)[name] = numberParams;
1215 }
1216
1217 int OBConversion::GetOptionParams(string name, Option_type typ)
1218 {
1219 //returns the number of parameters registered for the option, or 0 if not found
1220 map<string,int>::iterator pos;
1221 pos = OptionParamArray(typ).find(name);
1222 if(pos==OptionParamArray(typ).end())
1223 return 0;
1224 return pos->second;
1225 }
1226
1227 }//namespace OpenBabel
1228
1229 //! \file obconversion.cpp
1230 //! \brief Implementation of OBFormat and OBConversion classes.