ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/branches/development/src/applications/atom2md/openmdformat.cpp
Revision: 1399
Committed: Thu Dec 24 05:45:30 2009 UTC (15 years, 4 months ago) by gezelter
Original Path: trunk/src/applications/atom2md/openmdformat.cpp
File size: 15297 byte(s)
Log Message:
fixing atom2md and bringing it up to speed with OpenBabel 2.2.3

File Contents

# User Rev Content
1 gezelter 1390 /**********************************************************************
2     Copyright (C) 2000 by OpenEye Scientific Software, Inc.
3 gezelter 1399 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
4 gezelter 1390 Some portions Copyright (C) 2004 by Chris Morley
5 gezelter 1399 Some portions Copyright (C) 2008-2009 by J. Daniel Gezelter
6 gezelter 1390
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation version 2 of the License.
10    
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14     GNU General Public License for more details.
15     ***********************************************************************/
16    
17     #include <openbabel/babelconfig.h>
18     #include <openbabel/obmolecformat.h>
19     #include <openbabel/obiter.h>
20     #include <openbabel/mol.h>
21     #include <openbabel/chains.h>
22     #include <openbabel/data.h>
23     #include <fstream>
24    
25     #include "utils/StringUtils.hpp"
26    
27     using namespace std;
28     namespace OpenBabel
29     {
30    
31     class OpenMDFormat : public OBMoleculeFormat
32     {
33     public:
34     //Register this format type ID
35     OpenMDFormat()
36     {
37     OBConversion::RegisterFormat("md",this);
38     }
39    
40     virtual const char* Description() //required
41     {
42     return
43 gezelter 1399 "OpenMD combined meta-data / cartesian coordinates format\n\
44     No comments yet\n";
45 gezelter 1390 };
46    
47     virtual const char* SpecificationURL()
48     {return "http://www.openmd.net";}; //optional
49    
50     virtual const char* GetMIMEType()
51     {return "chemical/x-md"; };
52    
53     virtual unsigned int Flags()
54     {
55     return NOTREADABLE | WRITEONEONLY;
56     }
57    
58     virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
59    
60     private:
61     bool AreSameFragments(OBMol& mol, vector<int>& frag1, vector<int>& frag2);
62     OBMol* createMolFromFragment(OBMol& mol, vector<int>& fragment);
63     void WriteMDFile(vector<OBMol*> mols, vector<int> numMols, ostream& os,
64     OBMol& mol, vector<int>& indices);
65     void CalcBoundingBox(OBMol &mol,
66     double &min_x, double &max_x,
67     double &min_y, double &max_y,
68     double &min_z, double &max_z);
69    
70     };
71    
72     //Make an instance of the format class
73     OpenMDFormat theOpenMDFormat;
74    
75     bool OpenMDFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) {
76     OBMol* pmol = dynamic_cast<OBMol*>(pOb);
77     if(pmol==NULL)
78     return false;
79    
80     vector<vector<int> > fragmentLists;
81     pmol->ContigFragList(fragmentLists);
82     OBBitVec unused;
83     vector<bool> used(fragmentLists.size(), 0);
84     vector<vector<int> > molecules;
85     vector<int> indices;
86     for(int i =0; i < used.size(); ++i) {
87     if (used[i])
88     continue;
89    
90     used[i] = true;
91     vector<int> sameMolTypes;
92     sameMolTypes.push_back(i);
93     indices.insert(indices.end(), fragmentLists[i].begin(),
94     fragmentLists[i].end());
95     for (int j = i + 1;j < used.size(); ++j) {
96     if (used[j])
97     continue;
98    
99     if (AreSameFragments(*pmol, fragmentLists[i], fragmentLists[j])) {
100     sameMolTypes.push_back(j);
101     indices.insert(indices.end(), fragmentLists[j].begin(),
102     fragmentLists[j].end());
103     used[j]=true;
104     }
105     }
106     molecules.push_back(sameMolTypes);
107     }
108    
109     vector<OBMol*> mdMols;
110     vector<int> numMols;
111     for(vector<vector<int> >::iterator i = molecules.begin();
112     i != molecules.end(); ++i) {
113    
114     mdMols.push_back(createMolFromFragment(*pmol,
115     fragmentLists[i->front()]));
116     numMols.push_back((*i).size());
117     }
118    
119     string OutputFileName = pConv->GetInFilename();
120     size_t pos = OutputFileName.rfind(".");
121     if(pos!=string::npos)
122     OutputFileName = OutputFileName.substr(0, pos) + ".md";
123     else
124     OutputFileName += ".md";
125    
126     ofstream ofs(OutputFileName.c_str());
127     if(!ofs) {
128     cerr << "Cannot write to " << OutputFileName <<endl;
129     return false;
130     }
131    
132     WriteMDFile(mdMols, numMols, ofs, *pmol, indices);
133    
134     for(vector<OBMol*>::iterator i = mdMols.begin(); i != mdMols.end(); ++i) {
135     delete *i;
136     }
137    
138     return(true);
139     }
140    
141     bool OpenMDFormat::AreSameFragments(OBMol& mol, vector<int>& frag1,
142     vector<int>& frag2) {
143     if (frag1.size() != frag2.size())
144     return false;
145    
146     // Exact graph matching is an NP complete problem.
147     // This just matches all of the atom atomic numbers and may falsely
148     // detect identical fragments which aren't really identical.
149     // @todo using sparse matrix to store the connectivities
150    
151     for (unsigned int i =0 ; i < frag1.size(); ++i) {
152     OBAtom* atom1 = mol.GetAtom(frag1[i]);
153     OBAtom* atom2 = mol.GetAtom(frag2[i]);
154    
155     if (atom1->GetAtomicNum() != atom2->GetAtomicNum())
156     return false;
157    
158     }
159     return true;
160     }
161    
162     struct SameAngle {
163     bool operator()(const triple<OBAtom*,OBAtom*,OBAtom*> t1,
164     const triple<OBAtom*,OBAtom*,OBAtom*> t2) const {
165     return (t1.second == t2.second) && ( (t1.first == t2.first && t1.third == t2.third) || (t1.first == t2.third && t1.third == t2.first));
166     }
167     };
168    
169    
170     OBMol* OpenMDFormat::createMolFromFragment(OBMol& mol,
171     vector<int>& fragment) {
172    
173     OBMol* newMol = new OBMol();
174     newMol->ReserveAtoms(fragment.size());
175     newMol->BeginModify();
176     for(vector<int>::iterator i = fragment.begin(); i != fragment.end(); ++i) {
177     OBAtom* newAtom = newMol->NewAtom();
178     *newAtom = *mol.GetAtom(*i);
179     }
180    
181     newMol->EndModify();
182     newMol->ConnectTheDots();
183     newMol->PerceiveBondOrders();
184    
185     return newMol;
186     }
187    
188     void OpenMDFormat::WriteMDFile(vector<OBMol*> mols, vector<int> numMols,
189     ostream& os, OBMol& mol,
190     vector<int>& indices) {
191    
192     std::string molPrefix("MolName");
193     std::string resName;
194     unsigned int i;
195     const int BUFFLEN = 1024;
196     char buffer[BUFFLEN];
197     string str, str1, str2, str3;
198     OBAtom *a, *b, *c, *d;
199     bool molIsWater = false;
200     OBResidue *r;
201     int resKey, myserial;
202     char type_name[10];
203     char *element_name;
204     int res_num;
205     OBChainsParser* chainParser = new OBChainsParser();
206     double min_x, max_x, min_y, max_y, min_z, max_z; /* Edges of bounding box */
207    
208     os << "<OpenMD version=1>" << endl;
209     os << " <MetaData>" << endl << endl;
210    
211     for(i = 0; i < mols.size(); ++i) {
212     OBMol* pmol = mols[i];
213     map<OBAtom*, int> atomMap;
214    
215 gezelter 1398 //chainParser->PerceiveChains(*pmol, false);
216 gezelter 1390 molIsWater = false;
217     FOR_RESIDUES_OF_MOL(residue, *pmol) {
218     std::cerr << "residue = " << residue->GetName() << "\n";
219     if (residue->GetName().compare("HOH") == 0) {
220     molIsWater = true;
221     }
222     }
223    
224     if (molIsWater) {
225     // water include files define all of the known water types
226     os << "#include \"water.md\";\n";
227     pmol->SetTitle("HOH");
228     } else {
229    
230     os << "molecule {\n";
231     sprintf(buffer, "%d", i);
232     os << " name = \"" << molPrefix << buffer << "\";\n";
233    
234     int ai = 0;
235     FOR_ATOMS_OF_MOL(atom, *pmol ) {
236     str = atom->GetType();
237     r = atom->GetResidue();
238    
239     if (r == NULL)
240     resName = "NULL";
241     else
242     resName = r->GetName();
243    
244     if (resName.compare("NULL") ==0 ||
245     resName.compare("LIG") == 0 ||
246     resName.compare("UNK") == 0) {
247     // Either couldn't find a residue at all or couldn't find a
248     // reasonable residue name to use. We'll punt and use
249     // OpenBabel's internal atom typing:
250     ttab.SetFromType("INT");
251     ttab.SetToType("INT");
252     ttab.Translate(str1, str);
253     } else {
254    
255     // If we know what residue we've got, the specific atom name can
256     // be used to help specify partial charges.
257    
258     //resdat.SetResName(resName);
259    
260     // atom type from residue:
261     str = r->GetAtomID(&*atom);
262    
263     // arginine has separate indices for chemically-identical
264     // nitrogen atoms:
265     if (resName.compare("ARG") == 0) {
266     if (str.compare("NH1") == 0 || str.compare("NH2") == 0) {
267     str = "NH";
268     }
269     }
270     if (resName.compare("VAL") == 0) {
271     if (str.compare("CG1") == 0 || str.compare("CG2") == 0) {
272     str = "CG";
273     }
274     }
275     if (resName.compare("LEU") == 0) {
276     if (str.compare("CD1") == 0 || str.compare("CD2") == 0) {
277     str = "CD";
278     }
279     }
280     if (resName.compare("ASP") == 0) {
281     if (str.compare("OD1") == 0 || str.compare("OD2") == 0) {
282     str = "OD";
283     }
284     }
285     if (resName.compare("GLU") == 0) {
286     if (str.compare("OE1") == 0 || str.compare("OE2") == 0) {
287     str = "OE";
288     }
289     }
290     if (resName.compare("TYR") == 0) {
291     if (str.compare("CD1") == 0 || str.compare("CD2") == 0) {
292     str = "CD";
293     }
294     if (str.compare("CE1") == 0 || str.compare("CE2") == 0) {
295     str = "CE";
296     }
297     }
298    
299    
300     if ((&*atom)->IsHydrogen()) {
301     FOR_NBORS_OF_ATOM(nbr, *atom) {
302     str2 = r->GetAtomID(&*nbr);
303     size_t startpos = str2.find_first_not_of(" ");
304     size_t endpos = str2.find_last_not_of(" ");
305     if ((endpos - startpos) < 1) {
306     // if the bonded atom type has only one character (i.e. N)
307     // then the hydrogen will be labeled "HN" to show what
308     // kind of proton it is:
309     str3 = str2;
310     } else {
311     if (str2.compare("OH") == 0) {
312     str3 = "O";
313     } else {
314     // When the bonded atom type is more specific, we drop
315     // the first character: i.e. H bonded to OG1 is HG1 type:
316     str3 = str2.substr(startpos+1, endpos-startpos);
317     }
318     }
319     str = "H" + str3;
320     }
321     // same problem with arginine NH atoms, but now for connected hydrogens
322     if (resName.compare("ARG") == 0) {
323     if (str.compare("HH1") == 0 || str.compare("HH2") == 0) {
324     str = "HH";
325     }
326     }
327     if (resName.compare("VAL") == 0) {
328     if (str.compare("HG1") == 0 || str.compare("HG2") == 0) {
329     str = "HG";
330     }
331     }
332     if (resName.compare("LEU") == 0) {
333     if (str.compare("HD1") == 0 || str.compare("HD2") == 0) {
334     str = "HD";
335     }
336     }
337     if (resName.compare("TYR") == 0) {
338     if (str.compare("HD1") == 0 || str.compare("HD2") == 0) {
339     str = "HD";
340     }
341     if (str.compare("HE1") == 0 || str.compare("HE2") == 0) {
342     str = "HE";
343     }
344     }
345    
346     }
347    
348     // atom type from residue table:
349     //resdat.LookupType(str, str2, hyb);
350     size_t startpos = str.find_first_not_of(" ");
351     size_t endpos = str.find_last_not_of(" ");
352     str = str.substr( startpos, endpos-startpos+1 );
353     str1 = resName + "-" + str;
354     }
355     os << " atom[" << ai << "] { ";
356     os << "type = " << "\"" << str1 << "\"" << "; ";
357 gezelter 1397 os << "position( " << (&*atom)->GetX() << ", " << (&*atom)->GetY() << ", " << (&*atom)->GetZ() << ");";
358 gezelter 1390 os << "}\n";
359     atomMap[&(*atom)] = ai++;
360     }
361     os << "\n";
362    
363     //bond
364    
365     int b1, b2;
366     FOR_BONDS_OF_MOL(bond, *pmol ) {
367     b1 = atomMap[bond->GetBeginAtom()];
368     b2 = atomMap[bond->GetEndAtom()];
369    
370     os << " bond { ";
371    
372     if (b1 < b2)
373     os << "members(" << b1 << ", " << b2 << "); ";
374     else
375     os << "members(" << b2 << ", " << b1 << "); ";
376    
377     os << "}" << endl;
378     }
379    
380     os << endl;
381    
382     os << "}" << endl;
383     os << endl;
384     }
385     }
386    
387     os << endl;
388    
389     for(i=0; i < mols.size(); ++i) {
390     OBMol* pmol = mols[i];
391     os << "component{" << endl;
392     if (std::string(pmol->GetTitle()).compare("HOH") == 0) {
393     os << " type = " << "HOH" << ";" << endl;
394     } else {
395     sprintf(buffer, "%d", i);
396     os << " type = " << molPrefix << buffer << ";" << endl;
397     }
398     os << " nMol = " << numMols[i]<< ";" << endl;
399     os << "}" << endl;
400     }
401    
402     os << " </MetaData>" << endl;
403     os << " <Snapshot>" << endl;
404     os << " <FrameData>" << endl;
405    
406     sprintf(buffer, " Time: %.10g", 0.0);
407    
408     os << buffer << endl;
409    
410     CalcBoundingBox(mol, min_x, max_x, min_y, max_y, min_z, max_z);
411    
412     // still to do: should compute a bounding box here
413     sprintf(buffer, " Hmat: {{ %.10g, %.10g, %.10g }, { %.10g, %.10g, %.10g }, { %.10g, %.10g, %.10g }}",
414     max_x - min_x, 0.0, 0.0, 0.0, max_y - min_y, 0.0, 0.0, 0.0, max_z - min_z);
415    
416     os << buffer << endl;
417     os << " </FrameData>" << endl;
418     os << " <StuntDoubles>" << endl;
419    
420     OBAtom *atom;
421    
422     // still to do: intercept waters and recompute pvqj lines
423    
424     for(vector<int>::iterator i = indices.begin();i != indices.end(); ++i) {
425     atom = mol.GetAtom(*i);
426     sprintf(buffer, "%10d %7s %18.10g %18.10g %18.10g %13e %13e %13e", *i - 1,
427     "pv", atom->GetX(), atom->GetY(), atom->GetZ(), 0.0, 0.0, 0.0);
428     os << buffer << endl;
429     }
430     os << " </StuntDoubles>" << endl;
431     os << " </Snapshot>" << endl;
432     os << "</OpenMD>" << endl;
433     }
434    
435     void OpenMDFormat::CalcBoundingBox(OBMol &mol,
436     double &min_x, double &max_x,
437     double &min_y, double &max_y,
438     double &min_z, double &max_z
439     )
440     {
441     /* ---- Init bounding-box variables ---- */
442     min_x = (double) 0.0;
443     max_x = (double) 0.0;
444     min_y = (double) 0.0;
445     max_y = (double) 0.0;
446     min_z = (double) 0.0;
447     max_z = (double) 0.0;
448    
449     /* ---- Check all atoms ---- */
450     for(unsigned int i = 1; i <= mol.NumAtoms(); ++i)
451     {
452    
453     /* ---- Get a pointer to ith atom ---- */
454     OBAtom *atom = mol.GetAtom(i);
455    
456     /* ---- Check for minimal/maximal x-position ---- */
457     if (atom -> GetX() < min_x)
458     min_x = atom -> GetX();
459     if (atom -> GetX() > max_x)
460     max_x = atom -> GetX();
461    
462     /* ---- Check for minimal/maximal y-position ---- */
463     if (atom -> GetY() < min_y)
464     min_y = atom -> GetY();
465     if (atom -> GetY() > max_y)
466     max_y = atom -> GetY();
467    
468     /* ---- Check for minimal/maximal z-position ---- */
469     if (atom -> GetZ() < min_z)
470     min_z = atom -> GetZ();
471     if (atom -> GetZ() > max_z)
472     max_z = atom -> GetZ();
473    
474     }
475     }
476     } //namespace OpenBabel
477