applications/atom2md/obmolecformat.cpp

/**********************************************************************
obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.

Copyright (C) 2005 Chris Morley

This file is part of the Open Babel project.
For more information, see <http://openbabel.sourceforge.net/>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
***********************************************************************/
#include <openbabel/babelconfig.h>
#include <openbabel/obmolecformat.h>
#include <openbabel/obiter.h>

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

using namespace std;
namespace OpenBabel
{

  std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
  OBMol* OBMoleculeFormat::_jmol;
  std::vector<OBMol> OBMoleculeFormat::MolArray;
  bool OBMoleculeFormat::StoredMolsReady=false;

  bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
  {
    std::istream &ifs = *pConv->GetInStream();
    if (!ifs.good()) //Possible to omit? ifs.peek() == EOF || 
      return false;

    OBMol* pmol = new OBMol;

    std::string auditMsg = "OpenBabel::Read molecule ";
    std::string description(pFormat->Description());
    auditMsg += description.substr(0,description.find('\n'));
    obErrorLog.ThrowError(__FUNCTION__,
                          auditMsg,
                          obAuditMsg);

    if(pConv->IsOption("C",OBConversion::GENOPTIONS))
      return DeferMolOutput(pmol, pConv, pFormat);

    bool ret;
   if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
   {
     //On first call, separate molecule and put fragments in MolArray.
     //On subsequent calls, remove a fragment from MolArray and send it for writing
     //Done this way so that each fragment can be written to its own file (with -m option)
     if(!StoredMolsReady)
     {
       ret = pFormat->ReadMolecule(pmol,pConv); 
       if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK)))
         MolArray = pmol->Separate(); //use un-transformed molecule
       //Add an appropriate title to each fragment
       for(unsigned int i=0;i<MolArray.size();++i)
       {
         stringstream ss;
         ss << pmol->GetTitle() << '#' << i+1;
         string title = ss.str();
         MolArray[i].SetTitle(title);
       }
       reverse(MolArray.begin(),MolArray.end());
       StoredMolsReady = true;
     }

     if(MolArray.empty()) //normal end of fragments
       ret =false;
     else
     {
       // Copying is needed because the OBMol passed to AddChemObject will be deleted.
       // The OBMol in the vector is deleted here.
       OBMol* pMolCopy = new OBMol( MolArray.back());
       MolArray.pop_back();
       ret = pConv->AddChemObject(
           pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
     }
     if(!ret)
       StoredMolsReady = false;

     delete pmol;
     return ret;
   }

    ret=pFormat->ReadMolecule(pmol,pConv); 

    OBMol* ptmol = NULL;
    //Molecule is valid if it has some atoms 
    //or the format allows zero-atom molecules and it has a title
    if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle())))
    {
      ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
      if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS) 
                || pConv->IsOption("join",OBConversion::GENOPTIONS)))
      {
        //With j option, accumulate all mols in one stored in this class
        if(pConv->IsFirstInput())
          _jmol = new OBMol;
        pConv->AddChemObject(_jmol);
        //will be discarded in WriteChemObjectImpl until the last input mol. This complication
        //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
        //is zeroed at the end of a file and _jmol is in danger of not being output.
        *_jmol += *ptmol;
        delete ptmol;
        return true;
      }
    }
    else
      delete pmol;

    // Normal operation - send molecule to be written
    ret = ret && pConv->AddChemObject(ptmol); //success of both writing and reading
    return ret;
  }

  bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
  {
    if(pConv->IsOption("C",OBConversion::GENOPTIONS))
      return OutputDeferredMols(pConv);
    if(pConv->IsOption("j",OBConversion::GENOPTIONS)
        || pConv->IsOption("join",OBConversion::GENOPTIONS))
      {
        //arrives here at the end of a file
        if(!pConv->IsLast())
          return true;
        bool ret=pFormat->WriteMolecule(_jmol,pConv);
        pConv->SetOutputIndex(1);
        delete _jmol;
        return ret;
      }


    //Retrieve the target OBMol
    OBBase* pOb = pConv->GetChemObject();
    OBMol* pmol = dynamic_cast<OBMol*> (pOb);
    bool ret=false;
    if(pmol)
      { 
        if(pmol->NumAtoms()==0)
          {
            std::string auditMsg = "OpenBabel::Molecule ";
            auditMsg += pmol->GetTitle();
            auditMsg += " has 0 atoms";
            obErrorLog.ThrowError(__FUNCTION__,
                                  auditMsg,
                                  obInfo);
          }
        ret=true;

        std::string auditMsg = "OpenBabel::Write molecule ";
        std::string description(pFormat->Description());
        auditMsg += description.substr(0,description.find('\n'));
        obErrorLog.ThrowError(__FUNCTION__,
                              auditMsg,
                              obAuditMsg);
        
        ret=pFormat->WriteMolecule(pmol,pConv);
      }
    delete pOb; //move so that non-OBMol objects are deleted 9March2006
    return ret;
  }

  /*! Instead of sending molecules for output via AddChemObject(), they are
    saved in here in OBMoleculeFormat or discarded. By default they are 
    saved only if they are in the first input file. Parts of subsequent
    molecules, such as chemical structure, coordinates and OBGenericData
    can replace the parts in molecules with the same title that have already
    been stored, subject to a set of rules. After all input files have been
    read, the stored molecules (possibly now having augmented properties) are
    sent to the output format.
    
    Is a static function with <this> as parameter so that it can be called from other
    format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat. 
  */
  bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
  {
    static bool IsFirstFile;
    bool OnlyMolsInFirstFile=true;

    if(pConv->IsFirstInput())
      {
        IsFirstFile=true;
        IMols.clear();
      }
    else 
      {
        if((std::streamoff)pConv->GetInStream()->tellg()<=0)
          IsFirstFile=false;//File has changed
      }

    if (!pF->ReadMolecule(pmol,pConv))
      {
        delete pmol;
        return false;
      }
    const char* ptitle = pmol->GetTitle();
    if(*ptitle==0)
      obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
    else
      {
        string title(ptitle);
        string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
        if(pos!=string::npos)
          title.erase(pos);
    
        map<std::string, OBMol*>::iterator itr;
        itr = IMols.find(title);
        if(itr!=IMols.end())
          {
            //Molecule with the same title has been input previously: update it
            OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
            if(pNewMol)
              {
                delete itr->second;
                IMols[title] = pNewMol;
              }
            else
              {
                //error: cleanup and return false
                delete pmol;
                return DeleteDeferredMols();
              }
          }
        else
          {
            //Molecule not already saved in IMols: save it if in first file
            if(!OnlyMolsInFirstFile || IsFirstFile)
              {
                IMols[title] = pmol;
                return true; //don't delete pmol
              }
          }
      }
    delete pmol;
    return true;
  }
  
  /*! Makes a new OBMol on the heap by combining two molecules according to the rule below. 
    If both have OBGenericData of the same type, or OBPairData with the
    same attribute,  the version from pFirst is used.
    Returns a pointer to a new OBMol which will need deleting by the calling program
    (probably by being sent to an output format). 
    If the molecules cannot be regarded as being the same structure a NULL
    pointer is returned and an error message logged.
    
    pFirst and pSecond and the objects they point to are not changed. (const
    modifiers difficult because class OBMol not designed appropriately)
    
    Combining molecules: rules for each of the three parts
    Title:
    Use the title of pFirst unless it has none, when use that of pSecond.
    Warning if neither molecule has a title.
    
    Structure
    - a structure with atoms replaces one with no atoms
    - a structure with bonds replaces one with no bonds,
    provided the formula is the same, else an error.
    - structures with atoms and bonds are compared by InChI; error if not the same. 
    - a structure with 3D coordinates replaces one with 2D coordinates
    - a structure with 2D coordinates replace one with 0D coordinates
    
    OBGenericData
    OBPairData
  */
  OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
  {
    string title("No title");
    if(*pFirst->GetTitle()!=0)
      title = pFirst->GetTitle();
    else
      {
        if(*pSecond->GetTitle()!=0)
          title = pSecond->GetTitle();
        else
          obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
      }

    bool swap=false;
    if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
      swap=true;
    else
      {
        if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
          {
            obErrorLog.ThrowError(__FUNCTION__, 
                                  "Molecules with name = " + title + " have different formula",obError);
            return NULL;
          }
        else
          {
            if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
              swap=true;
            else
              {
                //Compare by inchi; error if different NOT YET IMPLEMENTED
                //Use the one with the higher dimension
                if(pSecond->GetDimension() > pFirst->GetDimension())
                  swap=true;
              }
          }
      }

    OBMol* pNewMol = new OBMol;
    pNewMol->SetTitle(title);

    OBMol* pMain = swap ? pSecond : pFirst;
    OBMol* pOther = swap ? pFirst : pSecond;
    
    *pNewMol = *pMain; //Now copies all data 

    //Copy some OBGenericData from the OBMol which did not provide the structure
    vector<OBGenericData*>::iterator igd;
    for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
      {
        //copy only if not already data of the same type from molecule already copied to pNewMol
        unsigned datatype = (*igd)->GetDataType();
        OBGenericData* pData = pNewMol->GetData(datatype);
        if(datatype==OBGenericDataType::PairData)
          {
            if(pData->GetAttribute() == (*igd)->GetAttribute())
              continue;
          }
        else if(pNewMol->GetData(datatype)!=NULL)
          continue;

        OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
        pNewMol->SetData(pCopiedData);
      }
    return pNewMol;
  }

  bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
  {
    std::map<std::string, OBMol*>::iterator itr, lastitr;
    bool ret=false;
    int i=1;
    lastitr = IMols.end();
    --lastitr;
    pConv->SetOneObjectOnly(false);
    for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
      {
        if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)))
          continue;
        pConv->SetOutputIndex(i);
        if(itr==lastitr)
          pConv->SetOneObjectOnly(); //to set IsLast

        std::string auditMsg = "OpenBabel::Write molecule ";
        std::string description((pConv->GetOutFormat())->Description());
        auditMsg += description.substr(0,description.find('\n'));
        obErrorLog.ThrowError(__FUNCTION__, auditMsg,  obAuditMsg);

        ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);

        delete itr->second; //always delete OBMol object
        itr->second = NULL; // so can be deleted in DeleteDeferredMols()
        if (!ret) break;
      }
    DeleteDeferredMols();//cleans up in case there have been errors
    return ret;
  }

  bool OBMoleculeFormat::DeleteDeferredMols()
  {
    //Empties IMols, deteting the OBMol objects whose pointers are stored there 
    std::map<std::string, OBMol*>::iterator itr;
    for(itr=IMols.begin();itr!=IMols.end();++itr)
      {
        delete itr->second; //usually NULL
      }
    IMols.clear();
    return false;
  }

  //////////////////////////////////////////////////////////////////
  /** Attempts to read the index file datafilename.obindx successively
      from the following directories:
      - the current directory
      - that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
      if the environment variable is not set
      - in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
      An index of type NameIndexType is then constructed. NameIndexType is defined
      in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map.
      In any case it is searched by 
      @code
      NameIndexType::iterator itr = index.find(molecule_name);
      if(itr!=index.end())
      unsigned pos_in_datafile = itr->second;
      @endcode
      pos_in_datafile is used as a paramter in seekg() to read from the datafile

      If no index is found, it is constructed from the datafile by reading all of
      it using the format pInFormat, and written to the directory containing the datafile.
      This means that this function can be used without worrying whether there is an index.
      It will be slow to execute the first time, but subsequent uses get the speed benefit
      of indexed access to the datafile. 

      The serialization and de-serialization of the NameIndexType is entirely in
      this routine and could possibly be improved. Currently re-hashing is done 
      every time the index is read.
  **/
  bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
                                       const string& datafilename, OBFormat* pInFormat)
  {
    struct headertype
    {
      char filename[256];
      unsigned size;
    } header;

    NameIndexType::iterator itr;

    ifstream indexstream;
    OpenDatafile(indexstream, datafilename + ".obindx");
    if(!indexstream)
      {
        //Need to prepare the index
        ifstream datastream;
        string datafilepath = OpenDatafile(datastream, datafilename);
        if(!datastream)
          {
            obErrorLog.ThrowError(__FUNCTION__, 
                                  datafilepath + " was not found or could not be opened",  obError);
            return false;
          }

        OBConversion Conv(&datastream,NULL);
        Conv.SetInFormat(pInFormat);
        OBMol mol;
        streampos pos;
        while(Conv.Read(&mol))
          {
            string name = mol.GetTitle();
            if(!name.empty())
              index.insert(make_pair(name, pos));
            mol.Clear();
            pos = datastream.tellg();
          }
        obErrorLog.ThrowError(__FUNCTION__, 
                              "Prepared an index for " + datafilepath, obAuditMsg);
        //Save index to file
        ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary);
        if(!dofs) return false;

        strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
        header.filename[sizeof(header.filename) - 1] = '\0';
        header.size = index.size();
        dofs.write((const char*)&header, sizeof(headertype));
        
        for(itr=index.begin();itr!=index.end();++itr)
          {
            //#chars; chars;  ofset(4bytes).
            const char n = itr->first.size();
            dofs.put(n);
            dofs.write(itr->first.c_str(),n);
            dofs.write((const char*)&itr->second,sizeof(unsigned));
          }                     
      }
    else
      {
        //Read index data from file and put into hash_map
        indexstream.read((char*)&header,sizeof(headertype));
        itr=index.begin(); // for hint
        for(unsigned int i=0;i<header.size;++i)
          {
            char len;
            indexstream.get(len);
            string title(len, 0);
            unsigned pos;
            indexstream.read(&title[0],len);
            indexstream.read((char*)&pos,sizeof(unsigned));
            index.insert(itr, make_pair(title,pos));
          }
      }
    return true;
  }


} //namespace OpenBabel
Revision:	1210
Committed:	Wed Jan 23 03:45:33 2008 UTC (17 years, 3 months ago) by gezelter
File size:	17487 byte(s)
Log Message:	Removed older version of openbabel from our code. We now have a configure check to see if openbabel is installed and then we link to the stuff we need. Conversion to OOPSE's md format is handled by only one application (atom2md), so most of the work went on there. ElementsTable still needs some work to function in parallel.
#	User	Rev	Content
1	gezelter	1210	/**********************************************************************
2			obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.
3
4			Copyright (C) 2005 Chris Morley
5
6			This file is part of the Open Babel project.
7			For more information, see <http://openbabel.sourceforge.net/>
8
9			This program is free software; you can redistribute it and/or modify
10			it under the terms of the GNU General Public License as published by
11			the Free Software Foundation version 2 of the License.
12
13			This program is distributed in the hope that it will be useful,
14			but WITHOUT ANY WARRANTY; without even the implied warranty of
15			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16			GNU General Public License for more details.
17			***********************************************************************/
18			#include <openbabel/babelconfig.h>
19			#include <openbabel/obmolecformat.h>
20			#include <openbabel/obiter.h>
21
22			#ifdef _DEBUG
23			#undef THIS_FILE
24			static char THIS_FILE[]=__FILE__;
25			#define new DEBUG_NEW
26			#endif
27
28			using namespace std;
29			namespace OpenBabel
30			{
31
32			std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
33			OBMol* OBMoleculeFormat::_jmol;
34			std::vector<OBMol> OBMoleculeFormat::MolArray;
35			bool OBMoleculeFormat::StoredMolsReady=false;
36
37			bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
38			{
39			std::istream &ifs = *pConv->GetInStream();
40			if (!ifs.good()) //Possible to omit? ifs.peek() == EOF \|\|
41			return false;
42
43			OBMol* pmol = new OBMol;
44
45			std::string auditMsg = "OpenBabel::Read molecule ";
46			std::string description(pFormat->Description());
47			auditMsg += description.substr(0,description.find('\n'));
48			obErrorLog.ThrowError(__FUNCTION__,
49			auditMsg,
50			obAuditMsg);
51
52			if(pConv->IsOption("C",OBConversion::GENOPTIONS))
53			return DeferMolOutput(pmol, pConv, pFormat);
54
55			bool ret;
56			if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
57			{
58			//On first call, separate molecule and put fragments in MolArray.
59			//On subsequent calls, remove a fragment from MolArray and send it for writing
60			//Done this way so that each fragment can be written to its own file (with -m option)
61			if(!StoredMolsReady)
62			{
63			ret = pFormat->ReadMolecule(pmol,pConv);
64			if(ret && (pmol->NumAtoms() > 0 \|\| (pFormat->Flags()&ZEROATOMSOK)))
65			MolArray = pmol->Separate(); //use un-transformed molecule
66			//Add an appropriate title to each fragment
67			for(unsigned int i=0;i<MolArray.size();++i)
68			{
69			stringstream ss;
70			ss << pmol->GetTitle() << '#' << i+1;
71			string title = ss.str();
72			MolArray[i].SetTitle(title);
73			}
74			reverse(MolArray.begin(),MolArray.end());
75			StoredMolsReady = true;
76			}
77
78			if(MolArray.empty()) //normal end of fragments
79			ret =false;
80			else
81			{
82			// Copying is needed because the OBMol passed to AddChemObject will be deleted.
83			// The OBMol in the vector is deleted here.
84			OBMol* pMolCopy = new OBMol( MolArray.back());
85			MolArray.pop_back();
86			ret = pConv->AddChemObject(
87			pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
88			}
89			if(!ret)
90			StoredMolsReady = false;
91
92			delete pmol;
93			return ret;
94			}
95
96			ret=pFormat->ReadMolecule(pmol,pConv);
97
98			OBMol* ptmol = NULL;
99			//Molecule is valid if it has some atoms
100			//or the format allows zero-atom molecules and it has a title
101			if(ret && (pmol->NumAtoms() > 0 \|\| (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle())))
102			{
103			ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
104			if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS)
105			\|\| pConv->IsOption("join",OBConversion::GENOPTIONS)))
106			{
107			//With j option, accumulate all mols in one stored in this class
108			if(pConv->IsFirstInput())
109			_jmol = new OBMol;
110			pConv->AddChemObject(_jmol);
111			//will be discarded in WriteChemObjectImpl until the last input mol. This complication
112			//is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
113			//is zeroed at the end of a file and _jmol is in danger of not being output.
114			_jmol += ptmol;
115			delete ptmol;
116			return true;
117			}
118			}
119			else
120			delete pmol;
121
122			// Normal operation - send molecule to be written
123			ret = ret && pConv->AddChemObject(ptmol); //success of both writing and reading
124			return ret;
125			}
126
127			bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
128			{
129			if(pConv->IsOption("C",OBConversion::GENOPTIONS))
130			return OutputDeferredMols(pConv);
131			if(pConv->IsOption("j",OBConversion::GENOPTIONS)
132			\|\| pConv->IsOption("join",OBConversion::GENOPTIONS))
133			{
134			//arrives here at the end of a file
135			if(!pConv->IsLast())
136			return true;
137			bool ret=pFormat->WriteMolecule(_jmol,pConv);
138			pConv->SetOutputIndex(1);
139			delete _jmol;
140			return ret;
141			}
142
143
144			//Retrieve the target OBMol
145			OBBase* pOb = pConv->GetChemObject();
146			OBMol* pmol = dynamic_cast<OBMol*> (pOb);
147			bool ret=false;
148			if(pmol)
149			{
150			if(pmol->NumAtoms()==0)
151			{
152			std::string auditMsg = "OpenBabel::Molecule ";
153			auditMsg += pmol->GetTitle();
154			auditMsg += " has 0 atoms";
155			obErrorLog.ThrowError(__FUNCTION__,
156			auditMsg,
157			obInfo);
158			}
159			ret=true;
160
161			std::string auditMsg = "OpenBabel::Write molecule ";
162			std::string description(pFormat->Description());
163			auditMsg += description.substr(0,description.find('\n'));
164			obErrorLog.ThrowError(__FUNCTION__,
165			auditMsg,
166			obAuditMsg);
167
168			ret=pFormat->WriteMolecule(pmol,pConv);
169			}
170			delete pOb; //move so that non-OBMol objects are deleted 9March2006
171			return ret;
172			}
173
174			/*! Instead of sending molecules for output via AddChemObject(), they are
175			saved in here in OBMoleculeFormat or discarded. By default they are
176			saved only if they are in the first input file. Parts of subsequent
177			molecules, such as chemical structure, coordinates and OBGenericData
178			can replace the parts in molecules with the same title that have already
179			been stored, subject to a set of rules. After all input files have been
180			read, the stored molecules (possibly now having augmented properties) are
181			sent to the output format.
182
183			Is a static function with <this> as parameter so that it can be called from other
184			format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat.
185			*/
186			bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
187			{
188			static bool IsFirstFile;
189			bool OnlyMolsInFirstFile=true;
190
191			if(pConv->IsFirstInput())
192			{
193			IsFirstFile=true;
194			IMols.clear();
195			}
196			else
197			{
198			if((std::streamoff)pConv->GetInStream()->tellg()<=0)
199			IsFirstFile=false;//File has changed
200			}
201
202			if (!pF->ReadMolecule(pmol,pConv))
203			{
204			delete pmol;
205			return false;
206			}
207			const char* ptitle = pmol->GetTitle();
208			if(*ptitle==0)
209			obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
210			else
211			{
212			string title(ptitle);
213			string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
214			if(pos!=string::npos)
215			title.erase(pos);
216
217			map<std::string, OBMol*>::iterator itr;
218			itr = IMols.find(title);
219			if(itr!=IMols.end())
220			{
221			//Molecule with the same title has been input previously: update it
222			OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
223			if(pNewMol)
224			{
225			delete itr->second;
226			IMols[title] = pNewMol;
227			}
228			else
229			{
230			//error: cleanup and return false
231			delete pmol;
232			return DeleteDeferredMols();
233			}
234			}
235			else
236			{
237			//Molecule not already saved in IMols: save it if in first file
238			if(!OnlyMolsInFirstFile \|\| IsFirstFile)
239			{
240			IMols[title] = pmol;
241			return true; //don't delete pmol
242			}
243			}
244			}
245			delete pmol;
246			return true;
247			}
248
249			/*! Makes a new OBMol on the heap by combining two molecules according to the rule below.
250			If both have OBGenericData of the same type, or OBPairData with the
251			same attribute, the version from pFirst is used.
252			Returns a pointer to a new OBMol which will need deleting by the calling program
253			(probably by being sent to an output format).
254			If the molecules cannot be regarded as being the same structure a NULL
255			pointer is returned and an error message logged.
256
257			pFirst and pSecond and the objects they point to are not changed. (const
258			modifiers difficult because class OBMol not designed appropriately)
259
260			Combining molecules: rules for each of the three parts
261			Title:
262			Use the title of pFirst unless it has none, when use that of pSecond.
263			Warning if neither molecule has a title.
264
265			Structure
266			- a structure with atoms replaces one with no atoms
267			- a structure with bonds replaces one with no bonds,
268			provided the formula is the same, else an error.
269			- structures with atoms and bonds are compared by InChI; error if not the same.
270			- a structure with 3D coordinates replaces one with 2D coordinates
271			- a structure with 2D coordinates replace one with 0D coordinates
272
273			OBGenericData
274			OBPairData
275			*/
276			OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
277			{
278			string title("No title");
279			if(*pFirst->GetTitle()!=0)
280			title = pFirst->GetTitle();
281			else
282			{
283			if(*pSecond->GetTitle()!=0)
284			title = pSecond->GetTitle();
285			else
286			obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
287			}
288
289			bool swap=false;
290			if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
291			swap=true;
292			else
293			{
294			if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
295			{
296			obErrorLog.ThrowError(__FUNCTION__,
297			"Molecules with name = " + title + " have different formula",obError);
298			return NULL;
299			}
300			else
301			{
302			if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
303			swap=true;
304			else
305			{
306			//Compare by inchi; error if different NOT YET IMPLEMENTED
307			//Use the one with the higher dimension
308			if(pSecond->GetDimension() > pFirst->GetDimension())
309			swap=true;
310			}
311			}
312			}
313
314			OBMol* pNewMol = new OBMol;
315			pNewMol->SetTitle(title);
316
317			OBMol* pMain = swap ? pSecond : pFirst;
318			OBMol* pOther = swap ? pFirst : pSecond;
319
320			pNewMol = pMain; //Now copies all data
321
322			//Copy some OBGenericData from the OBMol which did not provide the structure
323			vector<OBGenericData*>::iterator igd;
324			for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
325			{
326			//copy only if not already data of the same type from molecule already copied to pNewMol
327			unsigned datatype = (*igd)->GetDataType();
328			OBGenericData* pData = pNewMol->GetData(datatype);
329			if(datatype==OBGenericDataType::PairData)
330			{
331			if(pData->GetAttribute() == (*igd)->GetAttribute())
332			continue;
333			}
334			else if(pNewMol->GetData(datatype)!=NULL)
335			continue;
336
337			OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
338			pNewMol->SetData(pCopiedData);
339			}
340			return pNewMol;
341			}
342
343			bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
344			{
345			std::map<std::string, OBMol*>::iterator itr, lastitr;
346			bool ret=false;
347			int i=1;
348			lastitr = IMols.end();
349			--lastitr;
350			pConv->SetOneObjectOnly(false);
351			for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
352			{
353			if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)))
354			continue;
355			pConv->SetOutputIndex(i);
356			if(itr==lastitr)
357			pConv->SetOneObjectOnly(); //to set IsLast
358
359			std::string auditMsg = "OpenBabel::Write molecule ";
360			std::string description((pConv->GetOutFormat())->Description());
361			auditMsg += description.substr(0,description.find('\n'));
362			obErrorLog.ThrowError(__FUNCTION__, auditMsg, obAuditMsg);
363
364			ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);
365
366			delete itr->second; //always delete OBMol object
367			itr->second = NULL; // so can be deleted in DeleteDeferredMols()
368			if (!ret) break;
369			}
370			DeleteDeferredMols();//cleans up in case there have been errors
371			return ret;
372			}
373
374			bool OBMoleculeFormat::DeleteDeferredMols()
375			{
376			//Empties IMols, deteting the OBMol objects whose pointers are stored there
377			std::map<std::string, OBMol*>::iterator itr;
378			for(itr=IMols.begin();itr!=IMols.end();++itr)
379			{
380			delete itr->second; //usually NULL
381			}
382			IMols.clear();
383			return false;
384			}
385
386			//////////////////////////////////////////////////////////////////
387			/** Attempts to read the index file datafilename.obindx successively
388			from the following directories:
389			- the current directory
390			- that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
391			if the environment variable is not set
392			- in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
393			An index of type NameIndexType is then constructed. NameIndexType is defined
394			in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map.
395			In any case it is searched by
396			@code
397			NameIndexType::iterator itr = index.find(molecule_name);
398			if(itr!=index.end())
399			unsigned pos_in_datafile = itr->second;
400			@endcode
401			pos_in_datafile is used as a paramter in seekg() to read from the datafile
402
403			If no index is found, it is constructed from the datafile by reading all of
404			it using the format pInFormat, and written to the directory containing the datafile.
405			This means that this function can be used without worrying whether there is an index.
406			It will be slow to execute the first time, but subsequent uses get the speed benefit
407			of indexed access to the datafile.
408
409			The serialization and de-serialization of the NameIndexType is entirely in
410			this routine and could possibly be improved. Currently re-hashing is done
411			every time the index is read.
412			**/
413			bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
414			const string& datafilename, OBFormat* pInFormat)
415			{
416			struct headertype
417			{
418			char filename[256];
419			unsigned size;
420			} header;
421
422			NameIndexType::iterator itr;
423
424			ifstream indexstream;
425			OpenDatafile(indexstream, datafilename + ".obindx");
426			if(!indexstream)
427			{
428			//Need to prepare the index
429			ifstream datastream;
430			string datafilepath = OpenDatafile(datastream, datafilename);
431			if(!datastream)
432			{
433			obErrorLog.ThrowError(__FUNCTION__,
434			datafilepath + " was not found or could not be opened", obError);
435			return false;
436			}
437
438			OBConversion Conv(&datastream,NULL);
439			Conv.SetInFormat(pInFormat);
440			OBMol mol;
441			streampos pos;
442			while(Conv.Read(&mol))
443			{
444			string name = mol.GetTitle();
445			if(!name.empty())
446			index.insert(make_pair(name, pos));
447			mol.Clear();
448			pos = datastream.tellg();
449			}
450			obErrorLog.ThrowError(__FUNCTION__,
451			"Prepared an index for " + datafilepath, obAuditMsg);
452			//Save index to file
453			ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out\|ios_base::binary);
454			if(!dofs) return false;
455
456			strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
457			header.filename[sizeof(header.filename) - 1] = '\0';
458			header.size = index.size();
459			dofs.write((const char*)&header, sizeof(headertype));
460
461			for(itr=index.begin();itr!=index.end();++itr)
462			{
463			//#chars; chars; ofset(4bytes).
464			const char n = itr->first.size();
465			dofs.put(n);
466			dofs.write(itr->first.c_str(),n);
467			dofs.write((const char*)&itr->second,sizeof(unsigned));
468			}
469			}
470			else
471			{
472			//Read index data from file and put into hash_map
473			indexstream.read((char*)&header,sizeof(headertype));
474			itr=index.begin(); // for hint
475			for(unsigned int i=0;i<header.size;++i)
476			{
477			char len;
478			indexstream.get(len);
479			string title(len, 0);
480			unsigned pos;
481			indexstream.read(&title[0],len);
482			indexstream.read((char*)&pos,sizeof(unsigned));
483			index.insert(itr, make_pair(title,pos));
484			}
485			}
486			return true;
487			}
488
489
490			} //namespace OpenBabel