applications/atom2md/obmolecformat.cpp

/**********************************************************************
obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.

Copyright (C) 2005 Chris Morley

This file is part of the Open Babel project.
For more information, see <http://openbabel.sourceforge.net/>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
***********************************************************************/
#include <openbabel/babelconfig.h>
#include <openbabel/obmolecformat.h>
#include <openbabel/obiter.h>

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif

using namespace std;
namespace OpenBabel
{

  std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
  OBMol* OBMoleculeFormat::_jmol;
  std::vector<OBMol> OBMoleculeFormat::MolArray;
  bool OBMoleculeFormat::StoredMolsReady=false;

  bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
  {
    std::istream &ifs = *pConv->GetInStream();
    if (!ifs.good()) //Possible to omit? ifs.peek() == EOF || 
      return false;

    OBMol* pmol = new OBMol;

    std::string auditMsg = "OpenBabel::Read molecule ";
    std::string description(pFormat->Description());
    auditMsg += description.substr(0,description.find('\n'));
    obErrorLog.ThrowError(__FUNCTION__,
                          auditMsg,
                          obAuditMsg);

    if(pConv->IsOption("C",OBConversion::GENOPTIONS))
      return DeferMolOutput(pmol, pConv, pFormat);

    bool ret;
   if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
   {
     //On first call, separate molecule and put fragments in MolArray.
     //On subsequent calls, remove a fragment from MolArray and send it for writing
     //Done this way so that each fragment can be written to its own file (with -m option)
     if(!StoredMolsReady)
     {
       ret = pFormat->ReadMolecule(pmol,pConv); 
       if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK)))
         MolArray = pmol->Separate(); //use un-transformed molecule
       //Add an appropriate title to each fragment
       for(unsigned int i=0;i<MolArray.size();++i)
       {
         stringstream ss;
         ss << pmol->GetTitle() << '#' << i+1;
         string title = ss.str();
         MolArray[i].SetTitle(title);
       }
       reverse(MolArray.begin(),MolArray.end());
       StoredMolsReady = true;
     }

     if(MolArray.empty()) //normal end of fragments
       ret =false;
     else
     {
       // Copying is needed because the OBMol passed to AddChemObject will be deleted.
       // The OBMol in the vector is deleted here.
       OBMol* pMolCopy = new OBMol( MolArray.back());
       MolArray.pop_back();
       ret = pConv->AddChemObject(
           pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
     }
     if(!ret)
       StoredMolsReady = false;

     delete pmol;
     return ret;
   }

    ret=pFormat->ReadMolecule(pmol,pConv); 

    OBMol* ptmol = NULL;
    //Molecule is valid if it has some atoms 
    //or the format allows zero-atom molecules and it has a title
    if(ret && (pmol->NumAtoms() > 0 || (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle())))
    {
      ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
      if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS) 
                || pConv->IsOption("join",OBConversion::GENOPTIONS)))
      {
        //With j option, accumulate all mols in one stored in this class
        if(pConv->IsFirstInput())
          _jmol = new OBMol;
        pConv->AddChemObject(_jmol);
        //will be discarded in WriteChemObjectImpl until the last input mol. This complication
        //is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
        //is zeroed at the end of a file and _jmol is in danger of not being output.
        *_jmol += *ptmol;
        delete ptmol;
        return true;
      }
    }
    else
      delete pmol;

    // Normal operation - send molecule to be written
    ret = ret && pConv->AddChemObject(ptmol); //success of both writing and reading
    return ret;
  }

  bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
  {
    if(pConv->IsOption("C",OBConversion::GENOPTIONS))
      return OutputDeferredMols(pConv);
    if(pConv->IsOption("j",OBConversion::GENOPTIONS)
        || pConv->IsOption("join",OBConversion::GENOPTIONS))
      {
        //arrives here at the end of a file
        if(!pConv->IsLast())
          return true;
        bool ret=pFormat->WriteMolecule(_jmol,pConv);
        pConv->SetOutputIndex(1);
        delete _jmol;
        return ret;
      }


    //Retrieve the target OBMol
    OBBase* pOb = pConv->GetChemObject();
    OBMol* pmol = dynamic_cast<OBMol*> (pOb);
    bool ret=false;
    if(pmol)
      { 
        if(pmol->NumAtoms()==0)
          {
            std::string auditMsg = "OpenBabel::Molecule ";
            auditMsg += pmol->GetTitle();
            auditMsg += " has 0 atoms";
            obErrorLog.ThrowError(__FUNCTION__,
                                  auditMsg,
                                  obInfo);
          }
        ret=true;

        std::string auditMsg = "OpenBabel::Write molecule ";
        std::string description(pFormat->Description());
        auditMsg += description.substr(0,description.find('\n'));
        obErrorLog.ThrowError(__FUNCTION__,
                              auditMsg,
                              obAuditMsg);
        
        ret=pFormat->WriteMolecule(pmol,pConv);
      }
    delete pOb; //move so that non-OBMol objects are deleted 9March2006
    return ret;
  }

  /*! Instead of sending molecules for output via AddChemObject(), they are
    saved in here in OBMoleculeFormat or discarded. By default they are 
    saved only if they are in the first input file. Parts of subsequent
    molecules, such as chemical structure, coordinates and OBGenericData
    can replace the parts in molecules with the same title that have already
    been stored, subject to a set of rules. After all input files have been
    read, the stored molecules (possibly now having augmented properties) are
    sent to the output format.
    
    Is a static function with <this> as parameter so that it can be called from other
    format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat. 
  */
  bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
  {
    static bool IsFirstFile;
    bool OnlyMolsInFirstFile=true;

    if(pConv->IsFirstInput())
      {
        IsFirstFile=true;
        IMols.clear();
      }
    else 
      {
        if((std::streamoff)pConv->GetInStream()->tellg()<=0)
          IsFirstFile=false;//File has changed
      }

    if (!pF->ReadMolecule(pmol,pConv))
      {
        delete pmol;
        return false;
      }
    const char* ptitle = pmol->GetTitle();
    if(*ptitle==0)
      obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
    else
      {
        string title(ptitle);
        string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
        if(pos!=string::npos)
          title.erase(pos);
    
        map<std::string, OBMol*>::iterator itr;
        itr = IMols.find(title);
        if(itr!=IMols.end())
          {
            //Molecule with the same title has been input previously: update it
            OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
            if(pNewMol)
              {
                delete itr->second;
                IMols[title] = pNewMol;
              }
            else
              {
                //error: cleanup and return false
                delete pmol;
                return DeleteDeferredMols();
              }
          }
        else
          {
            //Molecule not already saved in IMols: save it if in first file
            if(!OnlyMolsInFirstFile || IsFirstFile)
              {
                IMols[title] = pmol;
                return true; //don't delete pmol
              }
          }
      }
    delete pmol;
    return true;
  }
  
  /*! Makes a new OBMol on the heap by combining two molecules according to the rule below. 
    If both have OBGenericData of the same type, or OBPairData with the
    same attribute,  the version from pFirst is used.
    Returns a pointer to a new OBMol which will need deleting by the calling program
    (probably by being sent to an output format). 
    If the molecules cannot be regarded as being the same structure a NULL
    pointer is returned and an error message logged.
    
    pFirst and pSecond and the objects they point to are not changed. (const
    modifiers difficult because class OBMol not designed appropriately)
    
    Combining molecules: rules for each of the three parts
    Title:
    Use the title of pFirst unless it has none, when use that of pSecond.
    Warning if neither molecule has a title.
    
    Structure
    - a structure with atoms replaces one with no atoms
    - a structure with bonds replaces one with no bonds,
    provided the formula is the same, else an error.
    - structures with atoms and bonds are compared by InChI; error if not the same. 
    - a structure with 3D coordinates replaces one with 2D coordinates
    - a structure with 2D coordinates replace one with 0D coordinates
    
    OBGenericData
    OBPairData
  */
  OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
  {
    string title("No title");
    if(*pFirst->GetTitle()!=0)
      title = pFirst->GetTitle();
    else
      {
        if(*pSecond->GetTitle()!=0)
          title = pSecond->GetTitle();
        else
          obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
      }

    bool swap=false;
    if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
      swap=true;
    else
      {
        if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
          {
            obErrorLog.ThrowError(__FUNCTION__, 
                                  "Molecules with name = " + title + " have different formula",obError);
            return NULL;
          }
        else
          {
            if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
              swap=true;
            else
              {
                //Compare by inchi; error if different NOT YET IMPLEMENTED
                //Use the one with the higher dimension
                if(pSecond->GetDimension() > pFirst->GetDimension())
                  swap=true;
              }
          }
      }

    OBMol* pNewMol = new OBMol;
    pNewMol->SetTitle(title);

    OBMol* pMain = swap ? pSecond : pFirst;
    OBMol* pOther = swap ? pFirst : pSecond;
    
    *pNewMol = *pMain; //Now copies all data 

    //Copy some OBGenericData from the OBMol which did not provide the structure
    vector<OBGenericData*>::iterator igd;
    for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
      {
        //copy only if not already data of the same type from molecule already copied to pNewMol
        unsigned datatype = (*igd)->GetDataType();
        OBGenericData* pData = pNewMol->GetData(datatype);
        if(datatype==OBGenericDataType::PairData)
          {
            if(pData->GetAttribute() == (*igd)->GetAttribute())
              continue;
          }
        else if(pNewMol->GetData(datatype)!=NULL)
          continue;

        OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
        pNewMol->SetData(pCopiedData);
      }
    return pNewMol;
  }

  bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
  {
    std::map<std::string, OBMol*>::iterator itr, lastitr;
    bool ret=false;
    int i=1;
    lastitr = IMols.end();
    --lastitr;
    pConv->SetOneObjectOnly(false);
    for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
      {
        if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)))
          continue;
        pConv->SetOutputIndex(i);
        if(itr==lastitr)
          pConv->SetOneObjectOnly(); //to set IsLast

        std::string auditMsg = "OpenBabel::Write molecule ";
        std::string description((pConv->GetOutFormat())->Description());
        auditMsg += description.substr(0,description.find('\n'));
        obErrorLog.ThrowError(__FUNCTION__, auditMsg,  obAuditMsg);

        ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);

        delete itr->second; //always delete OBMol object
        itr->second = NULL; // so can be deleted in DeleteDeferredMols()
        if (!ret) break;
      }
    DeleteDeferredMols();//cleans up in case there have been errors
    return ret;
  }

  bool OBMoleculeFormat::DeleteDeferredMols()
  {
    //Empties IMols, deteting the OBMol objects whose pointers are stored there 
    std::map<std::string, OBMol*>::iterator itr;
    for(itr=IMols.begin();itr!=IMols.end();++itr)
      {
        delete itr->second; //usually NULL
      }
    IMols.clear();
    return false;
  }

  //////////////////////////////////////////////////////////////////
  /** Attempts to read the index file datafilename.obindx successively
      from the following directories:
      - the current directory
      - that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
      if the environment variable is not set
      - in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
      An index of type NameIndexType is then constructed. NameIndexType is defined
      in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map.
      In any case it is searched by 
      @code
      NameIndexType::iterator itr = index.find(molecule_name);
      if(itr!=index.end())
      unsigned pos_in_datafile = itr->second;
      @endcode
      pos_in_datafile is used as a paramter in seekg() to read from the datafile

      If no index is found, it is constructed from the datafile by reading all of
      it using the format pInFormat, and written to the directory containing the datafile.
      This means that this function can be used without worrying whether there is an index.
      It will be slow to execute the first time, but subsequent uses get the speed benefit
      of indexed access to the datafile. 

      The serialization and de-serialization of the NameIndexType is entirely in
      this routine and could possibly be improved. Currently re-hashing is done 
      every time the index is read.
  **/
  bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
                                       const string& datafilename, OBFormat* pInFormat)
  {
    struct headertype
    {
      char filename[256];
      unsigned size;
    } header;

    NameIndexType::iterator itr;

    ifstream indexstream;
    OpenDatafile(indexstream, datafilename + ".obindx");
    if(!indexstream)
      {
        //Need to prepare the index
        ifstream datastream;
        string datafilepath = OpenDatafile(datastream, datafilename);
        if(!datastream)
          {
            obErrorLog.ThrowError(__FUNCTION__, 
                                  datafilepath + " was not found or could not be opened",  obError);
            return false;
          }

        OBConversion Conv(&datastream,NULL);
        Conv.SetInFormat(pInFormat);
        OBMol mol;
        streampos pos;
        while(Conv.Read(&mol))
          {
            string name = mol.GetTitle();
            if(!name.empty())
              index.insert(make_pair(name, pos));
            mol.Clear();
            pos = datastream.tellg();
          }
        obErrorLog.ThrowError(__FUNCTION__, 
                              "Prepared an index for " + datafilepath, obAuditMsg);
        //Save index to file
        ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out|ios_base::binary);
        if(!dofs) return false;

        strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
        header.filename[sizeof(header.filename) - 1] = '\0';
        header.size = index.size();
        dofs.write((const char*)&header, sizeof(headertype));
        
        for(itr=index.begin();itr!=index.end();++itr)
          {
            //#chars; chars;  ofset(4bytes).
            const char n = itr->first.size();
            dofs.put(n);
            dofs.write(itr->first.c_str(),n);
            dofs.write((const char*)&itr->second,sizeof(unsigned));
          }                     
      }
    else
      {
        //Read index data from file and put into hash_map
        indexstream.read((char*)&header,sizeof(headertype));
        itr=index.begin(); // for hint
        for(unsigned int i=0;i<header.size;++i)
          {
            char len;
            indexstream.get(len);
            string title(len, 0);
            unsigned pos;
            indexstream.read(&title[0],len);
            indexstream.read((char*)&pos,sizeof(unsigned));
            index.insert(itr, make_pair(title,pos));
          }
      }
    return true;
  }


} //namespace OpenBabel
Revision:	1210
Committed:	Wed Jan 23 03:45:33 2008 UTC (17 years, 10 months ago) by gezelter
File size:	17487 byte(s)
Log Message:	Removed older version of openbabel from our code. We now have a configure check to see if openbabel is installed and then we link to the stuff we need. Conversion to OOPSE's md format is handled by only one application (atom2md), so most of the work went on there. ElementsTable still needs some work to function in parallel.
#	Content
1	/**********************************************************************
2	obmolecformat.cpp - Implementation of subclass of OBFormat for conversion of OBMol.
3
4	Copyright (C) 2005 Chris Morley
5
6	This file is part of the Open Babel project.
7	For more information, see <http://openbabel.sourceforge.net/>
8
9	This program is free software; you can redistribute it and/or modify
10	it under the terms of the GNU General Public License as published by
11	the Free Software Foundation version 2 of the License.
12
13	This program is distributed in the hope that it will be useful,
14	but WITHOUT ANY WARRANTY; without even the implied warranty of
15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16	GNU General Public License for more details.
17	***********************************************************************/
18	#include <openbabel/babelconfig.h>
19	#include <openbabel/obmolecformat.h>
20	#include <openbabel/obiter.h>
21
22	#ifdef _DEBUG
23	#undef THIS_FILE
24	static char THIS_FILE[]=__FILE__;
25	#define new DEBUG_NEW
26	#endif
27
28	using namespace std;
29	namespace OpenBabel
30	{
31
32	std::map<std::string, OBMol*> OBMoleculeFormat::IMols;
33	OBMol* OBMoleculeFormat::_jmol;
34	std::vector<OBMol> OBMoleculeFormat::MolArray;
35	bool OBMoleculeFormat::StoredMolsReady=false;
36
37	bool OBMoleculeFormat::ReadChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
38	{
39	std::istream &ifs = *pConv->GetInStream();
40	if (!ifs.good()) //Possible to omit? ifs.peek() == EOF \|\|
41	return false;
42
43	OBMol* pmol = new OBMol;
44
45	std::string auditMsg = "OpenBabel::Read molecule ";
46	std::string description(pFormat->Description());
47	auditMsg += description.substr(0,description.find('\n'));
48	obErrorLog.ThrowError(__FUNCTION__,
49	auditMsg,
50	obAuditMsg);
51
52	if(pConv->IsOption("C",OBConversion::GENOPTIONS))
53	return DeferMolOutput(pmol, pConv, pFormat);
54
55	bool ret;
56	if(pConv->IsOption("separate",OBConversion::GENOPTIONS))
57	{
58	//On first call, separate molecule and put fragments in MolArray.
59	//On subsequent calls, remove a fragment from MolArray and send it for writing
60	//Done this way so that each fragment can be written to its own file (with -m option)
61	if(!StoredMolsReady)
62	{
63	ret = pFormat->ReadMolecule(pmol,pConv);
64	if(ret && (pmol->NumAtoms() > 0 \|\| (pFormat->Flags()&ZEROATOMSOK)))
65	MolArray = pmol->Separate(); //use un-transformed molecule
66	//Add an appropriate title to each fragment
67	for(unsigned int i=0;i<MolArray.size();++i)
68	{
69	stringstream ss;
70	ss << pmol->GetTitle() << '#' << i+1;
71	string title = ss.str();
72	MolArray[i].SetTitle(title);
73	}
74	reverse(MolArray.begin(),MolArray.end());
75	StoredMolsReady = true;
76	}
77
78	if(MolArray.empty()) //normal end of fragments
79	ret =false;
80	else
81	{
82	// Copying is needed because the OBMol passed to AddChemObject will be deleted.
83	// The OBMol in the vector is deleted here.
84	OBMol* pMolCopy = new OBMol( MolArray.back());
85	MolArray.pop_back();
86	ret = pConv->AddChemObject(
87	pMolCopy->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
88	}
89	if(!ret)
90	StoredMolsReady = false;
91
92	delete pmol;
93	return ret;
94	}
95
96	ret=pFormat->ReadMolecule(pmol,pConv);
97
98	OBMol* ptmol = NULL;
99	//Molecule is valid if it has some atoms
100	//or the format allows zero-atom molecules and it has a title
101	if(ret && (pmol->NumAtoms() > 0 \|\| (pFormat->Flags()&ZEROATOMSOK && *pmol->GetTitle())))
102	{
103	ptmol = static_cast<OBMol*>(pmol->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)));
104	if(ptmol && (pConv->IsOption("j",OBConversion::GENOPTIONS)
105	\|\| pConv->IsOption("join",OBConversion::GENOPTIONS)))
106	{
107	//With j option, accumulate all mols in one stored in this class
108	if(pConv->IsFirstInput())
109	_jmol = new OBMol;
110	pConv->AddChemObject(_jmol);
111	//will be discarded in WriteChemObjectImpl until the last input mol. This complication
112	//is needed to allow joined molecules to be from different files. pOb1 in AddChem Object
113	//is zeroed at the end of a file and _jmol is in danger of not being output.
114	_jmol += ptmol;
115	delete ptmol;
116	return true;
117	}
118	}
119	else
120	delete pmol;
121
122	// Normal operation - send molecule to be written
123	ret = ret && pConv->AddChemObject(ptmol); //success of both writing and reading
124	return ret;
125	}
126
127	bool OBMoleculeFormat::WriteChemObjectImpl(OBConversion* pConv, OBFormat* pFormat)
128	{
129	if(pConv->IsOption("C",OBConversion::GENOPTIONS))
130	return OutputDeferredMols(pConv);
131	if(pConv->IsOption("j",OBConversion::GENOPTIONS)
132	\|\| pConv->IsOption("join",OBConversion::GENOPTIONS))
133	{
134	//arrives here at the end of a file
135	if(!pConv->IsLast())
136	return true;
137	bool ret=pFormat->WriteMolecule(_jmol,pConv);
138	pConv->SetOutputIndex(1);
139	delete _jmol;
140	return ret;
141	}
142
143
144	//Retrieve the target OBMol
145	OBBase* pOb = pConv->GetChemObject();
146	OBMol* pmol = dynamic_cast<OBMol*> (pOb);
147	bool ret=false;
148	if(pmol)
149	{
150	if(pmol->NumAtoms()==0)
151	{
152	std::string auditMsg = "OpenBabel::Molecule ";
153	auditMsg += pmol->GetTitle();
154	auditMsg += " has 0 atoms";
155	obErrorLog.ThrowError(__FUNCTION__,
156	auditMsg,
157	obInfo);
158	}
159	ret=true;
160
161	std::string auditMsg = "OpenBabel::Write molecule ";
162	std::string description(pFormat->Description());
163	auditMsg += description.substr(0,description.find('\n'));
164	obErrorLog.ThrowError(__FUNCTION__,
165	auditMsg,
166	obAuditMsg);
167
168	ret=pFormat->WriteMolecule(pmol,pConv);
169	}
170	delete pOb; //move so that non-OBMol objects are deleted 9March2006
171	return ret;
172	}
173
174	/*! Instead of sending molecules for output via AddChemObject(), they are
175	saved in here in OBMoleculeFormat or discarded. By default they are
176	saved only if they are in the first input file. Parts of subsequent
177	molecules, such as chemical structure, coordinates and OBGenericData
178	can replace the parts in molecules with the same title that have already
179	been stored, subject to a set of rules. After all input files have been
180	read, the stored molecules (possibly now having augmented properties) are
181	sent to the output format.
182
183	Is a static function with <this> as parameter so that it can be called from other
184	format classes like XMLMoleculeFormat which are not derived from OBMoleculeFormat.
185	*/
186	bool OBMoleculeFormat::DeferMolOutput(OBMol* pmol, OBConversion* pConv, OBFormat* pF )
187	{
188	static bool IsFirstFile;
189	bool OnlyMolsInFirstFile=true;
190
191	if(pConv->IsFirstInput())
192	{
193	IsFirstFile=true;
194	IMols.clear();
195	}
196	else
197	{
198	if((std::streamoff)pConv->GetInStream()->tellg()<=0)
199	IsFirstFile=false;//File has changed
200	}
201
202	if (!pF->ReadMolecule(pmol,pConv))
203	{
204	delete pmol;
205	return false;
206	}
207	const char* ptitle = pmol->GetTitle();
208	if(*ptitle==0)
209	obErrorLog.ThrowError(__FUNCTION__, "Molecule with no title ignored", obWarning);
210	else
211	{
212	string title(ptitle);
213	string::size_type pos = title.find_first_of("\t\r\n"); //some title have other data appended
214	if(pos!=string::npos)
215	title.erase(pos);
216
217	map<std::string, OBMol*>::iterator itr;
218	itr = IMols.find(title);
219	if(itr!=IMols.end())
220	{
221	//Molecule with the same title has been input previously: update it
222	OBMol* pNewMol = MakeCombinedMolecule(itr->second, pmol);
223	if(pNewMol)
224	{
225	delete itr->second;
226	IMols[title] = pNewMol;
227	}
228	else
229	{
230	//error: cleanup and return false
231	delete pmol;
232	return DeleteDeferredMols();
233	}
234	}
235	else
236	{
237	//Molecule not already saved in IMols: save it if in first file
238	if(!OnlyMolsInFirstFile \|\| IsFirstFile)
239	{
240	IMols[title] = pmol;
241	return true; //don't delete pmol
242	}
243	}
244	}
245	delete pmol;
246	return true;
247	}
248
249	/*! Makes a new OBMol on the heap by combining two molecules according to the rule below.
250	If both have OBGenericData of the same type, or OBPairData with the
251	same attribute, the version from pFirst is used.
252	Returns a pointer to a new OBMol which will need deleting by the calling program
253	(probably by being sent to an output format).
254	If the molecules cannot be regarded as being the same structure a NULL
255	pointer is returned and an error message logged.
256
257	pFirst and pSecond and the objects they point to are not changed. (const
258	modifiers difficult because class OBMol not designed appropriately)
259
260	Combining molecules: rules for each of the three parts
261	Title:
262	Use the title of pFirst unless it has none, when use that of pSecond.
263	Warning if neither molecule has a title.
264
265	Structure
266	- a structure with atoms replaces one with no atoms
267	- a structure with bonds replaces one with no bonds,
268	provided the formula is the same, else an error.
269	- structures with atoms and bonds are compared by InChI; error if not the same.
270	- a structure with 3D coordinates replaces one with 2D coordinates
271	- a structure with 2D coordinates replace one with 0D coordinates
272
273	OBGenericData
274	OBPairData
275	*/
276	OBMol* OBMoleculeFormat::MakeCombinedMolecule(OBMol* pFirst, OBMol* pSecond)
277	{
278	string title("No title");
279	if(*pFirst->GetTitle()!=0)
280	title = pFirst->GetTitle();
281	else
282	{
283	if(*pSecond->GetTitle()!=0)
284	title = pSecond->GetTitle();
285	else
286	obErrorLog.ThrowError(__FUNCTION__,"Combined molecule has no title", obWarning);
287	}
288
289	bool swap=false;
290	if(pFirst->NumAtoms()==0 && pSecond->NumAtoms()!=0)
291	swap=true;
292	else
293	{
294	if(pFirst->GetSpacedFormula()!=pSecond->GetSpacedFormula())
295	{
296	obErrorLog.ThrowError(__FUNCTION__,
297	"Molecules with name = " + title + " have different formula",obError);
298	return NULL;
299	}
300	else
301	{
302	if(pSecond->NumBonds()!=0 && pFirst->NumBonds()==0)
303	swap=true;
304	else
305	{
306	//Compare by inchi; error if different NOT YET IMPLEMENTED
307	//Use the one with the higher dimension
308	if(pSecond->GetDimension() > pFirst->GetDimension())
309	swap=true;
310	}
311	}
312	}
313
314	OBMol* pNewMol = new OBMol;
315	pNewMol->SetTitle(title);
316
317	OBMol* pMain = swap ? pSecond : pFirst;
318	OBMol* pOther = swap ? pFirst : pSecond;
319
320	pNewMol = pMain; //Now copies all data
321
322	//Copy some OBGenericData from the OBMol which did not provide the structure
323	vector<OBGenericData*>::iterator igd;
324	for(igd=pOther->BeginData();igd!=pOther->EndData();++igd)
325	{
326	//copy only if not already data of the same type from molecule already copied to pNewMol
327	unsigned datatype = (*igd)->GetDataType();
328	OBGenericData* pData = pNewMol->GetData(datatype);
329	if(datatype==OBGenericDataType::PairData)
330	{
331	if(pData->GetAttribute() == (*igd)->GetAttribute())
332	continue;
333	}
334	else if(pNewMol->GetData(datatype)!=NULL)
335	continue;
336
337	OBGenericData* pCopiedData = (*igd)->Clone(pNewMol);
338	pNewMol->SetData(pCopiedData);
339	}
340	return pNewMol;
341	}
342
343	bool OBMoleculeFormat::OutputDeferredMols(OBConversion* pConv)
344	{
345	std::map<std::string, OBMol*>::iterator itr, lastitr;
346	bool ret=false;
347	int i=1;
348	lastitr = IMols.end();
349	--lastitr;
350	pConv->SetOneObjectOnly(false);
351	for(itr=IMols.begin();itr!=IMols.end();++itr,++i)
352	{
353	if(!(itr->second)->DoTransformations(pConv->GetOptions(OBConversion::GENOPTIONS)))
354	continue;
355	pConv->SetOutputIndex(i);
356	if(itr==lastitr)
357	pConv->SetOneObjectOnly(); //to set IsLast
358
359	std::string auditMsg = "OpenBabel::Write molecule ";
360	std::string description((pConv->GetOutFormat())->Description());
361	auditMsg += description.substr(0,description.find('\n'));
362	obErrorLog.ThrowError(__FUNCTION__, auditMsg, obAuditMsg);
363
364	ret = pConv->GetOutFormat()->WriteMolecule(itr->second, pConv);
365
366	delete itr->second; //always delete OBMol object
367	itr->second = NULL; // so can be deleted in DeleteDeferredMols()
368	if (!ret) break;
369	}
370	DeleteDeferredMols();//cleans up in case there have been errors
371	return ret;
372	}
373
374	bool OBMoleculeFormat::DeleteDeferredMols()
375	{
376	//Empties IMols, deteting the OBMol objects whose pointers are stored there
377	std::map<std::string, OBMol*>::iterator itr;
378	for(itr=IMols.begin();itr!=IMols.end();++itr)
379	{
380	delete itr->second; //usually NULL
381	}
382	IMols.clear();
383	return false;
384	}
385
386	//////////////////////////////////////////////////////////////////
387	/** Attempts to read the index file datafilename.obindx successively
388	from the following directories:
389	- the current directory
390	- that in the environment variable BABEL_DATADIR or in the macro BABEL_DATADIR
391	if the environment variable is not set
392	- in a subdirectory of the BABEL_DATADIR directory with the version of OpenBabel as its name
393	An index of type NameIndexType is then constructed. NameIndexType is defined
394	in obmolecformat.h and may be a std::tr1::unordered_map (a hash_map) or std::map.
395	In any case it is searched by
396	@code
397	NameIndexType::iterator itr = index.find(molecule_name);
398	if(itr!=index.end())
399	unsigned pos_in_datafile = itr->second;
400	@endcode
401	pos_in_datafile is used as a paramter in seekg() to read from the datafile
402
403	If no index is found, it is constructed from the datafile by reading all of
404	it using the format pInFormat, and written to the directory containing the datafile.
405	This means that this function can be used without worrying whether there is an index.
406	It will be slow to execute the first time, but subsequent uses get the speed benefit
407	of indexed access to the datafile.
408
409	The serialization and de-serialization of the NameIndexType is entirely in
410	this routine and could possibly be improved. Currently re-hashing is done
411	every time the index is read.
412	**/
413	bool OBMoleculeFormat::ReadNameIndex(NameIndexType& index,
414	const string& datafilename, OBFormat* pInFormat)
415	{
416	struct headertype
417	{
418	char filename[256];
419	unsigned size;
420	} header;
421
422	NameIndexType::iterator itr;
423
424	ifstream indexstream;
425	OpenDatafile(indexstream, datafilename + ".obindx");
426	if(!indexstream)
427	{
428	//Need to prepare the index
429	ifstream datastream;
430	string datafilepath = OpenDatafile(datastream, datafilename);
431	if(!datastream)
432	{
433	obErrorLog.ThrowError(__FUNCTION__,
434	datafilepath + " was not found or could not be opened", obError);
435	return false;
436	}
437
438	OBConversion Conv(&datastream,NULL);
439	Conv.SetInFormat(pInFormat);
440	OBMol mol;
441	streampos pos;
442	while(Conv.Read(&mol))
443	{
444	string name = mol.GetTitle();
445	if(!name.empty())
446	index.insert(make_pair(name, pos));
447	mol.Clear();
448	pos = datastream.tellg();
449	}
450	obErrorLog.ThrowError(__FUNCTION__,
451	"Prepared an index for " + datafilepath, obAuditMsg);
452	//Save index to file
453	ofstream dofs((datafilepath + ".obindx").c_str(), ios_base::out\|ios_base::binary);
454	if(!dofs) return false;
455
456	strncpy(header.filename,datafilename.c_str(), sizeof(header.filename));
457	header.filename[sizeof(header.filename) - 1] = '\0';
458	header.size = index.size();
459	dofs.write((const char*)&header, sizeof(headertype));
460
461	for(itr=index.begin();itr!=index.end();++itr)
462	{
463	//#chars; chars; ofset(4bytes).
464	const char n = itr->first.size();
465	dofs.put(n);
466	dofs.write(itr->first.c_str(),n);
467	dofs.write((const char*)&itr->second,sizeof(unsigned));
468	}
469	}
470	else
471	{
472	//Read index data from file and put into hash_map
473	indexstream.read((char*)&header,sizeof(headertype));
474	itr=index.begin(); // for hint
475	for(unsigned int i=0;i<header.size;++i)
476	{
477	char len;
478	indexstream.get(len);
479	string title(len, 0);
480	unsigned pos;
481	indexstream.read(&title[0],len);
482	indexstream.read((char*)&pos,sizeof(unsigned));
483	index.insert(itr, make_pair(title,pos));
484	}
485	}
486	return true;
487	}
488
489
490	} //namespace OpenBabel