1 |
/********************************************************************** |
2 |
Copyright (C) 2000 by OpenEye Scientific Software, Inc. |
3 |
Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison |
4 |
Some portions Copyright (C) 2004 by Chris Morley |
5 |
Some portions Copyright (C) 2008-2009 by J. Daniel Gezelter |
6 |
|
7 |
This program is free software; you can redistribute it and/or modify |
8 |
it under the terms of the GNU General Public License as published by |
9 |
the Free Software Foundation version 2 of the License. |
10 |
|
11 |
This program is distributed in the hope that it will be useful, |
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
GNU General Public License for more details. |
15 |
***********************************************************************/ |
16 |
|
17 |
#include <openbabel/babelconfig.h> |
18 |
#include <openbabel/obmolecformat.h> |
19 |
#include <openbabel/obiter.h> |
20 |
#include <openbabel/mol.h> |
21 |
#include <openbabel/chains.h> |
22 |
#include <openbabel/data.h> |
23 |
#include <fstream> |
24 |
|
25 |
#include "utils/StringUtils.hpp" |
26 |
|
27 |
using namespace std; |
28 |
namespace OpenBabel |
29 |
{ |
30 |
|
31 |
class OpenMDFormat : public OBMoleculeFormat |
32 |
{ |
33 |
public: |
34 |
//Register this format type ID |
35 |
OpenMDFormat() |
36 |
{ |
37 |
OBConversion::RegisterFormat("md",this); |
38 |
} |
39 |
|
40 |
virtual const char* Description() //required |
41 |
{ |
42 |
return |
43 |
"OpenMD combined meta-data / cartesian coordinates format\n\ |
44 |
No comments yet\n"; |
45 |
}; |
46 |
|
47 |
virtual const char* SpecificationURL() |
48 |
{return "http://www.openmd.org";}; //optional |
49 |
|
50 |
virtual const char* GetMIMEType() |
51 |
{return "chemical/x-md"; }; |
52 |
|
53 |
virtual unsigned int Flags() |
54 |
{ |
55 |
return NOTREADABLE | WRITEONEONLY; |
56 |
} |
57 |
|
58 |
virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); |
59 |
|
60 |
private: |
61 |
bool AreSameFragments(OBMol& mol, vector<int>& frag1, vector<int>& frag2); |
62 |
OBMol* createMolFromFragment(OBMol& mol, vector<int>& fragment); |
63 |
void WriteMDFile(vector<OBMol*> mols, vector<int> numMols, ostream& os, |
64 |
OBMol& mol, vector<int>& indices); |
65 |
void CalcBoundingBox(OBMol &mol, |
66 |
double &min_x, double &max_x, |
67 |
double &min_y, double &max_y, |
68 |
double &min_z, double &max_z); |
69 |
|
70 |
}; |
71 |
|
72 |
//Make an instance of the format class |
73 |
OpenMDFormat theOpenMDFormat; |
74 |
|
75 |
bool OpenMDFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) { |
76 |
OBMol* pmol = dynamic_cast<OBMol*>(pOb); |
77 |
if(pmol==NULL) |
78 |
return false; |
79 |
|
80 |
vector<vector<int> > fragmentLists; |
81 |
pmol->ContigFragList(fragmentLists); |
82 |
OBBitVec unused; |
83 |
vector<bool> used(fragmentLists.size(), 0); |
84 |
vector<vector<int> > molecules; |
85 |
vector<int> indices; |
86 |
for(int i =0; i < used.size(); ++i) { |
87 |
if (used[i]) |
88 |
continue; |
89 |
|
90 |
used[i] = true; |
91 |
vector<int> sameMolTypes; |
92 |
sameMolTypes.push_back(i); |
93 |
indices.insert(indices.end(), fragmentLists[i].begin(), |
94 |
fragmentLists[i].end()); |
95 |
for (int j = i + 1;j < used.size(); ++j) { |
96 |
if (used[j]) |
97 |
continue; |
98 |
|
99 |
if (AreSameFragments(*pmol, fragmentLists[i], fragmentLists[j])) { |
100 |
sameMolTypes.push_back(j); |
101 |
indices.insert(indices.end(), fragmentLists[j].begin(), |
102 |
fragmentLists[j].end()); |
103 |
used[j]=true; |
104 |
} |
105 |
} |
106 |
molecules.push_back(sameMolTypes); |
107 |
} |
108 |
|
109 |
vector<OBMol*> mdMols; |
110 |
vector<int> numMols; |
111 |
for(vector<vector<int> >::iterator i = molecules.begin(); |
112 |
i != molecules.end(); ++i) { |
113 |
|
114 |
mdMols.push_back(createMolFromFragment(*pmol, |
115 |
fragmentLists[i->front()])); |
116 |
numMols.push_back((*i).size()); |
117 |
} |
118 |
|
119 |
string OutputFileName = pConv->GetInFilename(); |
120 |
size_t pos = OutputFileName.rfind("."); |
121 |
if(pos!=string::npos) |
122 |
OutputFileName = OutputFileName.substr(0, pos) + ".md"; |
123 |
else |
124 |
OutputFileName += ".md"; |
125 |
|
126 |
ofstream ofs(OutputFileName.c_str()); |
127 |
if(!ofs) { |
128 |
cerr << "Cannot write to " << OutputFileName <<endl; |
129 |
return false; |
130 |
} |
131 |
|
132 |
|
133 |
|
134 |
WriteMDFile(mdMols, numMols, ofs, *pmol, indices); |
135 |
|
136 |
for(vector<OBMol*>::iterator i = mdMols.begin(); i != mdMols.end(); ++i) { |
137 |
delete *i; |
138 |
} |
139 |
|
140 |
return(true); |
141 |
} |
142 |
|
143 |
bool OpenMDFormat::AreSameFragments(OBMol& mol, vector<int>& frag1, |
144 |
vector<int>& frag2) { |
145 |
if (frag1.size() != frag2.size()) |
146 |
return false; |
147 |
|
148 |
// Exact graph matching is an NP complete problem. |
149 |
// This just matches all of the atom atomic numbers and may falsely |
150 |
// detect identical fragments which aren't really identical. |
151 |
// @todo using sparse matrix to store the connectivities |
152 |
|
153 |
for (unsigned int i =0 ; i < frag1.size(); ++i) { |
154 |
OBAtom* atom1 = mol.GetAtom(frag1[i]); |
155 |
OBAtom* atom2 = mol.GetAtom(frag2[i]); |
156 |
|
157 |
if (atom1->GetAtomicNum() != atom2->GetAtomicNum()) |
158 |
return false; |
159 |
|
160 |
} |
161 |
return true; |
162 |
} |
163 |
|
164 |
struct SameAngle { |
165 |
bool operator()(const triple<OBAtom*,OBAtom*,OBAtom*> t1, |
166 |
const triple<OBAtom*,OBAtom*,OBAtom*> t2) const { |
167 |
return (t1.second == t2.second) && ( (t1.first == t2.first && t1.third == t2.third) || (t1.first == t2.third && t1.third == t2.first)); |
168 |
} |
169 |
}; |
170 |
|
171 |
|
172 |
OBMol* OpenMDFormat::createMolFromFragment(OBMol& mol, |
173 |
vector<int>& fragment) { |
174 |
|
175 |
OBMol* newMol = new OBMol(); |
176 |
|
177 |
newMol->ReserveAtoms(fragment.size()); |
178 |
newMol->BeginModify(); |
179 |
for(vector<int>::iterator i = fragment.begin(); i != fragment.end(); ++i) { |
180 |
OBAtom* newAtom = newMol->NewAtom(); |
181 |
*newAtom = *mol.GetAtom(*i); |
182 |
} |
183 |
|
184 |
newMol->EndModify(); |
185 |
newMol->ConnectTheDots(); |
186 |
newMol->PerceiveBondOrders(); |
187 |
|
188 |
return newMol; |
189 |
} |
190 |
|
191 |
void OpenMDFormat::WriteMDFile(vector<OBMol*> mols, vector<int> numMols, |
192 |
ostream& os, OBMol& mol, |
193 |
vector<int>& indices) { |
194 |
|
195 |
std::string molPrefix("MolName"); |
196 |
std::string resName; |
197 |
unsigned int i; |
198 |
const int BUFFLEN = 1024; |
199 |
char buffer[BUFFLEN]; |
200 |
string str, str1, str2, str3; |
201 |
bool molIsWater = false; |
202 |
OBResidue *r; |
203 |
double min_x, max_x, min_y, max_y, min_z, max_z; /* Edges of bounding box */ |
204 |
|
205 |
os << "<OpenMD version=2>" << endl; |
206 |
os << " <MetaData>" << endl << endl; |
207 |
|
208 |
for(i = 0; i < mols.size(); ++i) { |
209 |
OBMol* pmol = mols[i]; |
210 |
map<OBAtom*, int> atomMap; |
211 |
|
212 |
molIsWater = false; |
213 |
FOR_RESIDUES_OF_MOL(residue, *pmol) { |
214 |
if (residue->GetName().compare("HOH") == 0) { |
215 |
molIsWater = true; |
216 |
} |
217 |
} |
218 |
|
219 |
if (molIsWater) { |
220 |
// water include files define all of the known water types |
221 |
os << "#include \"water.md\";\n"; |
222 |
pmol->SetTitle("HOH"); |
223 |
} else { |
224 |
|
225 |
os << "molecule {\n"; |
226 |
sprintf(buffer, "%u", i); |
227 |
os << " name = \"" << molPrefix << buffer << "\";\n"; |
228 |
|
229 |
int ai = 0; |
230 |
FOR_ATOMS_OF_MOL(atom, *pmol ) { |
231 |
str = atom->GetType(); |
232 |
r = atom->GetResidue(); |
233 |
|
234 |
if (r == NULL) |
235 |
resName = "NULL"; |
236 |
else |
237 |
resName = r->GetName(); |
238 |
|
239 |
if (resName.compare("NULL") ==0 || |
240 |
resName.compare("LIG") == 0 || |
241 |
resName.compare("UNK") == 0) { |
242 |
// Either couldn't find a residue at all or couldn't find a |
243 |
// reasonable residue name to use. We'll punt and use |
244 |
// OpenBabel's internal atom typing: |
245 |
ttab.SetFromType("INT"); |
246 |
ttab.SetToType("INT"); |
247 |
ttab.Translate(str1, str); |
248 |
} else { |
249 |
|
250 |
// If we know what residue we've got, the specific atom name can |
251 |
// be used to help specify partial charges. |
252 |
|
253 |
//resdat.SetResName(resName); |
254 |
|
255 |
// atom type from residue: |
256 |
str = r->GetAtomID(&*atom); |
257 |
|
258 |
// arginine has separate indices for chemically-identical |
259 |
// nitrogen atoms: |
260 |
if (resName.compare("ARG") == 0) { |
261 |
if (str.compare("NH1") == 0 || str.compare("NH2") == 0) { |
262 |
str = "NH"; |
263 |
} |
264 |
} |
265 |
if (resName.compare("VAL") == 0) { |
266 |
if (str.compare("CG1") == 0 || str.compare("CG2") == 0) { |
267 |
str = "CG"; |
268 |
} |
269 |
} |
270 |
if (resName.compare("LEU") == 0) { |
271 |
if (str.compare("CD1") == 0 || str.compare("CD2") == 0) { |
272 |
str = "CD"; |
273 |
} |
274 |
} |
275 |
if (resName.compare("ASP") == 0) { |
276 |
if (str.compare("OD1") == 0 || str.compare("OD2") == 0) { |
277 |
str = "OD"; |
278 |
} |
279 |
} |
280 |
if (resName.compare("GLU") == 0) { |
281 |
if (str.compare("OE1") == 0 || str.compare("OE2") == 0) { |
282 |
str = "OE"; |
283 |
} |
284 |
} |
285 |
if (resName.compare("TYR") == 0) { |
286 |
if (str.compare("CD1") == 0 || str.compare("CD2") == 0) { |
287 |
str = "CD"; |
288 |
} |
289 |
if (str.compare("CE1") == 0 || str.compare("CE2") == 0) { |
290 |
str = "CE"; |
291 |
} |
292 |
} |
293 |
|
294 |
|
295 |
if ((&*atom)->IsHydrogen()) { |
296 |
FOR_NBORS_OF_ATOM(nbr, *atom) { |
297 |
str2 = r->GetAtomID(&*nbr); |
298 |
size_t startpos = str2.find_first_not_of(" "); |
299 |
size_t endpos = str2.find_last_not_of(" "); |
300 |
if ((endpos - startpos) < 1) { |
301 |
// if the bonded atom type has only one character (i.e. N) |
302 |
// then the hydrogen will be labeled "HN" to show what |
303 |
// kind of proton it is: |
304 |
str3 = str2; |
305 |
} else { |
306 |
if (str2.compare("OH") == 0) { |
307 |
str3 = "O"; |
308 |
} else { |
309 |
// When the bonded atom type is more specific, we drop |
310 |
// the first character: i.e. H bonded to OG1 is HG1 type: |
311 |
str3 = str2.substr(startpos+1, endpos-startpos); |
312 |
} |
313 |
} |
314 |
str = "H" + str3; |
315 |
} |
316 |
// same problem with arginine NH atoms, but now for connected hydrogens |
317 |
if (resName.compare("ARG") == 0) { |
318 |
if (str.compare("HH1") == 0 || str.compare("HH2") == 0) { |
319 |
str = "HH"; |
320 |
} |
321 |
} |
322 |
if (resName.compare("VAL") == 0) { |
323 |
if (str.compare("HG1") == 0 || str.compare("HG2") == 0) { |
324 |
str = "HG"; |
325 |
} |
326 |
} |
327 |
if (resName.compare("LEU") == 0) { |
328 |
if (str.compare("HD1") == 0 || str.compare("HD2") == 0) { |
329 |
str = "HD"; |
330 |
} |
331 |
} |
332 |
if (resName.compare("TYR") == 0) { |
333 |
if (str.compare("HD1") == 0 || str.compare("HD2") == 0) { |
334 |
str = "HD"; |
335 |
} |
336 |
if (str.compare("HE1") == 0 || str.compare("HE2") == 0) { |
337 |
str = "HE"; |
338 |
} |
339 |
} |
340 |
|
341 |
} |
342 |
|
343 |
// atom type from residue table: |
344 |
//resdat.LookupType(str, str2, hyb); |
345 |
size_t startpos = str.find_first_not_of(" "); |
346 |
size_t endpos = str.find_last_not_of(" "); |
347 |
str = str.substr( startpos, endpos-startpos+1 ); |
348 |
str1 = resName + "-" + str; |
349 |
} |
350 |
os << " atom[" << ai << "] { "; |
351 |
os << "type = " << "\"" << str1 << "\"" << "; "; |
352 |
os << "position( " << (&*atom)->GetX() << ", " << (&*atom)->GetY() << ", " << (&*atom)->GetZ() << ");"; |
353 |
os << "}\n"; |
354 |
atomMap[&(*atom)] = ai++; |
355 |
} |
356 |
os << "\n"; |
357 |
|
358 |
//bond |
359 |
|
360 |
int b1, b2; |
361 |
FOR_BONDS_OF_MOL(bond, *pmol ) { |
362 |
b1 = atomMap[bond->GetBeginAtom()]; |
363 |
b2 = atomMap[bond->GetEndAtom()]; |
364 |
|
365 |
os << " bond { "; |
366 |
|
367 |
if (b1 < b2) |
368 |
os << "members(" << b1 << ", " << b2 << "); "; |
369 |
else |
370 |
os << "members(" << b2 << ", " << b1 << "); "; |
371 |
|
372 |
os << "}" << endl; |
373 |
} |
374 |
|
375 |
os << endl; |
376 |
|
377 |
os << "}" << endl; |
378 |
os << endl; |
379 |
} |
380 |
} |
381 |
|
382 |
os << endl; |
383 |
|
384 |
for(i=0; i < mols.size(); ++i) { |
385 |
OBMol* pmol = mols[i]; |
386 |
os << "component{" << endl; |
387 |
if (std::string(pmol->GetTitle()).compare("HOH") == 0) { |
388 |
os << " type = " << "\"HOH\"" << "; // change to appropriate water model" << endl; |
389 |
} else { |
390 |
sprintf(buffer, "%u", i); |
391 |
os << " type = " << molPrefix << buffer << ";" << endl; |
392 |
} |
393 |
os << " nMol = " << numMols[i]<< ";" << endl; |
394 |
os << "}" << endl; |
395 |
} |
396 |
|
397 |
os << " </MetaData>" << endl; |
398 |
os << " <Snapshot>" << endl; |
399 |
os << " <FrameData>" << endl; |
400 |
|
401 |
sprintf(buffer, " Time: %.10g", 0.0); |
402 |
|
403 |
os << buffer << endl; |
404 |
|
405 |
CalcBoundingBox(mol, min_x, max_x, min_y, max_y, min_z, max_z); |
406 |
|
407 |
// still to do: should compute a bounding box here |
408 |
sprintf(buffer, " Hmat: {{ %.10g, %.10g, %.10g }, { %.10g, %.10g, %.10g }, { %.10g, %.10g, %.10g }}", |
409 |
max_x - min_x, 0.0, 0.0, 0.0, max_y - min_y, 0.0, 0.0, 0.0, max_z - min_z); |
410 |
|
411 |
os << buffer << endl; |
412 |
os << " </FrameData>" << endl; |
413 |
os << " <StuntDoubles>" << endl; |
414 |
|
415 |
OBAtom *atom; |
416 |
|
417 |
for(vector<int>::iterator i = indices.begin();i != indices.end(); ++i) { |
418 |
|
419 |
atom = mol.GetAtom(*i); |
420 |
sprintf(buffer, "%10d %7s %18.10g %18.10g %18.10g %13e %13e %13e", *i - 1, |
421 |
"pv", atom->GetX(), atom->GetY(), atom->GetZ(), 0.0, 0.0, 0.0); |
422 |
os << buffer << endl; |
423 |
} |
424 |
os << " </StuntDoubles>" << endl; |
425 |
os << " </Snapshot>" << endl; |
426 |
os << "</OpenMD>" << endl; |
427 |
} |
428 |
|
429 |
void OpenMDFormat::CalcBoundingBox(OBMol &mol, |
430 |
double &min_x, double &max_x, |
431 |
double &min_y, double &max_y, |
432 |
double &min_z, double &max_z |
433 |
) |
434 |
{ |
435 |
/* ---- Init bounding-box variables ---- */ |
436 |
min_x = (double) 0.0; |
437 |
max_x = (double) 0.0; |
438 |
min_y = (double) 0.0; |
439 |
max_y = (double) 0.0; |
440 |
min_z = (double) 0.0; |
441 |
max_z = (double) 0.0; |
442 |
|
443 |
/* ---- Check all atoms ---- */ |
444 |
for(unsigned int i = 1; i <= mol.NumAtoms(); ++i) |
445 |
{ |
446 |
|
447 |
/* ---- Get a pointer to ith atom ---- */ |
448 |
OBAtom *atom = mol.GetAtom(i); |
449 |
|
450 |
/* ---- Check for minimal/maximal x-position ---- */ |
451 |
if (atom -> GetX() < min_x) |
452 |
min_x = atom -> GetX(); |
453 |
if (atom -> GetX() > max_x) |
454 |
max_x = atom -> GetX(); |
455 |
|
456 |
/* ---- Check for minimal/maximal y-position ---- */ |
457 |
if (atom -> GetY() < min_y) |
458 |
min_y = atom -> GetY(); |
459 |
if (atom -> GetY() > max_y) |
460 |
max_y = atom -> GetY(); |
461 |
|
462 |
/* ---- Check for minimal/maximal z-position ---- */ |
463 |
if (atom -> GetZ() < min_z) |
464 |
min_z = atom -> GetZ(); |
465 |
if (atom -> GetZ() > max_z) |
466 |
max_z = atom -> GetZ(); |
467 |
|
468 |
} |
469 |
} |
470 |
} //namespace OpenBabel |
471 |
|