1 |
/********************************************************************** |
2 |
Copyright (C) 2000 by OpenEye Scientific Software, Inc. |
3 |
Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison |
4 |
Some portions Copyright (C) 2004 by Chris Morley |
5 |
Some portions Copyright (C) 2008 by J. Daniel Gezelter |
6 |
|
7 |
This program is free software; you can redistribute it and/or modify |
8 |
it under the terms of the GNU General Public License as published by |
9 |
the Free Software Foundation version 2 of the License. |
10 |
|
11 |
This program is distributed in the hope that it will be useful, |
12 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 |
GNU General Public License for more details. |
15 |
***********************************************************************/ |
16 |
|
17 |
#include <openbabel/babelconfig.h> |
18 |
#include <openbabel/obmolecformat.h> |
19 |
#include <openbabel/obiter.h> |
20 |
#include <openbabel/mol.h> |
21 |
#include <openbabel/chains.h> |
22 |
#include <fstream> |
23 |
|
24 |
#include "utils/StringUtils.hpp" |
25 |
|
26 |
using namespace std; |
27 |
namespace OpenBabel |
28 |
{ |
29 |
|
30 |
class OOPSEFormat : public OBMoleculeFormat |
31 |
{ |
32 |
public: |
33 |
//Register this format type ID |
34 |
OOPSEFormat() |
35 |
{ |
36 |
OBConversion::RegisterFormat("md",this); |
37 |
} |
38 |
|
39 |
virtual const char* Description() //required |
40 |
{ |
41 |
return |
42 |
"OOPSE combined meta-data / cartesian coordinates format\nNo comments yet\n"; |
43 |
}; |
44 |
|
45 |
virtual const char* SpecificationURL() |
46 |
{return "http://www.oopse.org";}; //optional |
47 |
|
48 |
virtual const char* GetMIMEType() |
49 |
{return "chemical/x-md"; }; |
50 |
|
51 |
virtual unsigned int Flags() |
52 |
{ |
53 |
return NOTREADABLE | WRITEONEONLY; |
54 |
} |
55 |
|
56 |
virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv); |
57 |
|
58 |
private: |
59 |
bool AreSameFragments(OBMol& mol, vector<int>& frag1, vector<int>& frag2); |
60 |
OBMol* createMolFromFragment(OBMol& mol, vector<int>& fragment); |
61 |
void WriteMDFile(vector<OBMol*> mols, vector<int> numMols, ostream& os, |
62 |
OBMol& mol, vector<int>& indices); |
63 |
}; |
64 |
|
65 |
//Make an instance of the format class |
66 |
OOPSEFormat theOOPSEFormat; |
67 |
|
68 |
bool OOPSEFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv) { |
69 |
OBMol* pmol = dynamic_cast<OBMol*>(pOb); |
70 |
if(pmol==NULL) |
71 |
return false; |
72 |
|
73 |
vector<vector<int> > fragmentLists; |
74 |
pmol->ContigFragList(fragmentLists); |
75 |
OBBitVec unused; |
76 |
vector<bool> used(fragmentLists.size(), 0); |
77 |
vector<vector<int> > molecules; |
78 |
vector<int> indices; |
79 |
for(int i =0; i < used.size(); ++i) { |
80 |
if (used[i]) |
81 |
continue; |
82 |
|
83 |
used[i] = true; |
84 |
vector<int> sameMolTypes; |
85 |
sameMolTypes.push_back(i); |
86 |
indices.insert(indices.end(), fragmentLists[i].begin(), |
87 |
fragmentLists[i].end()); |
88 |
for (int j = i + 1;j < used.size(); ++j) { |
89 |
if (used[j]) |
90 |
continue; |
91 |
|
92 |
if (AreSameFragments(*pmol, fragmentLists[i], fragmentLists[j])) { |
93 |
sameMolTypes.push_back(j); |
94 |
indices.insert(indices.end(), fragmentLists[j].begin(), |
95 |
fragmentLists[j].end()); |
96 |
used[j]=true; |
97 |
} |
98 |
} |
99 |
molecules.push_back(sameMolTypes); |
100 |
} |
101 |
|
102 |
vector<OBMol*> mdMols; |
103 |
vector<int> numMols; |
104 |
for(vector<vector<int> >::iterator i = molecules.begin(); |
105 |
i != molecules.end(); ++i) { |
106 |
|
107 |
mdMols.push_back(createMolFromFragment(*pmol, |
108 |
fragmentLists[i->front()])); |
109 |
numMols.push_back((*i).size()); |
110 |
} |
111 |
|
112 |
string OutputFileName = pConv->GetInFilename(); |
113 |
size_t pos = OutputFileName.rfind("."); |
114 |
if(pos!=string::npos) |
115 |
OutputFileName = OutputFileName.substr(0, pos) + ".md"; |
116 |
else |
117 |
OutputFileName += ".md"; |
118 |
|
119 |
ofstream ofs(OutputFileName.c_str()); |
120 |
if(!ofs) { |
121 |
cerr << "Cannot write to " << OutputFileName <<endl; |
122 |
return false; |
123 |
} |
124 |
|
125 |
WriteMDFile(mdMols, numMols, ofs, *pmol, indices); |
126 |
|
127 |
for(vector<OBMol*>::iterator i = mdMols.begin(); i != mdMols.end(); ++i) { |
128 |
delete *i; |
129 |
} |
130 |
|
131 |
return(true); |
132 |
} |
133 |
|
134 |
bool OOPSEFormat::AreSameFragments(OBMol& mol, vector<int>& frag1, |
135 |
vector<int>& frag2) { |
136 |
if (frag1.size() != frag2.size()) |
137 |
return false; |
138 |
|
139 |
// Exact graph matching is an NP complete problem. |
140 |
// This just matches all of the atom atomic numbers and may falsely |
141 |
// detect identical fragments which aren't really identical. |
142 |
// @todo using sparse matrix to store the connectivities |
143 |
|
144 |
for (unsigned int i =0 ; i < frag1.size(); ++i) { |
145 |
OBAtom* atom1 = mol.GetAtom(frag1[i]); |
146 |
OBAtom* atom2 = mol.GetAtom(frag2[i]); |
147 |
|
148 |
if (atom1->GetAtomicNum() != atom2->GetAtomicNum()) |
149 |
return false; |
150 |
|
151 |
} |
152 |
return true; |
153 |
} |
154 |
|
155 |
struct SameAngle { |
156 |
bool operator()(const triple<OBAtom*,OBAtom*,OBAtom*> t1, |
157 |
const triple<OBAtom*,OBAtom*,OBAtom*> t2) const { |
158 |
return (t1.second == t2.second) && ( (t1.first == t2.first && t1.third == t2.third) || (t1.first == t2.third && t1.third == t2.first)); |
159 |
} |
160 |
}; |
161 |
|
162 |
|
163 |
OBMol* OOPSEFormat::createMolFromFragment(OBMol& mol, |
164 |
vector<int>& fragment) { |
165 |
|
166 |
OBMol* newMol = new OBMol(); |
167 |
newMol->ReserveAtoms(fragment.size()); |
168 |
newMol->BeginModify(); |
169 |
for(vector<int>::iterator i = fragment.begin(); i != fragment.end(); ++i) { |
170 |
OBAtom* newAtom = newMol->NewAtom(); |
171 |
*newAtom = *mol.GetAtom(*i); |
172 |
} |
173 |
|
174 |
newMol->EndModify(); |
175 |
newMol->ConnectTheDots(); |
176 |
newMol->PerceiveBondOrders(); |
177 |
|
178 |
return newMol; |
179 |
} |
180 |
|
181 |
void OOPSEFormat::WriteMDFile(vector<OBMol*> mols, vector<int> numMols, |
182 |
ostream& os, OBMol& mol, |
183 |
vector<int>& indices) { |
184 |
|
185 |
std::string molPrefix("MolName"); |
186 |
std::string resName; |
187 |
unsigned int i; |
188 |
const int BUFFLEN = 1024; |
189 |
char buffer[BUFFLEN]; |
190 |
string str, str1; |
191 |
OBAtom *a, *b, *c, *d; |
192 |
bool molIsWater = false; |
193 |
OBResidue *r; |
194 |
int resKey; |
195 |
char type_name[10]; |
196 |
char *element_name; |
197 |
int res_num; |
198 |
OBChainsParser* chainParser = new OBChainsParser(); |
199 |
|
200 |
|
201 |
os << "<OOPSE version=4>" << endl; |
202 |
os << " <MetaData>" << endl << endl; |
203 |
|
204 |
for(i = 0; i < mols.size(); ++i) { |
205 |
OBMol* pmol = mols[i]; |
206 |
map<OBAtom*, int> atomMap; |
207 |
|
208 |
chainParser->PerceiveChains(*pmol, false); |
209 |
molIsWater = false; |
210 |
FOR_RESIDUES_OF_MOL(residue, *pmol) { |
211 |
std::cerr << "residue = " << residue->GetName() << "\n"; |
212 |
if (residue->GetName().compare("HOH") == 0) { |
213 |
molIsWater = true; |
214 |
} |
215 |
} |
216 |
|
217 |
if (molIsWater) { |
218 |
// water include files define all of the known water types |
219 |
os << "#include \"water.md\";\n"; |
220 |
pmol->SetTitle("HOH"); |
221 |
} else { |
222 |
|
223 |
os << "molecule {\n"; |
224 |
sprintf(buffer, "%d", i); |
225 |
os << " name = \"" << molPrefix << buffer << "\";\n"; |
226 |
|
227 |
int ai = 0; |
228 |
FOR_ATOMS_OF_MOL(atom, *pmol ) { |
229 |
str = atom->GetType(); |
230 |
r = atom->GetResidue(); |
231 |
|
232 |
if (r == NULL) |
233 |
resName = "NULL"; |
234 |
else |
235 |
resName = r->GetName(); |
236 |
|
237 |
if (resName.compare("NULL") ==0 || |
238 |
resName.compare("LIG") == 0 || |
239 |
resName.compare("UNK") == 0) { |
240 |
// Either couldn't find a residue at all or couldn't find a |
241 |
// reasonable residue name to use. We'll punt and use |
242 |
// OpenBabel's internal atom typing: |
243 |
ttab.SetFromType("INT"); |
244 |
ttab.SetToType("INT"); |
245 |
ttab.Translate(str1, str); |
246 |
} else { |
247 |
|
248 |
// If we know what residue we've got, the specific atom name can |
249 |
// be used to help specify partial charges. |
250 |
|
251 |
str = r->GetAtomID(&*atom); |
252 |
size_t startpos = str.find_first_not_of(" "); |
253 |
size_t endpos = str.find_last_not_of(" "); |
254 |
str = str.substr( startpos, endpos-startpos+1 ); |
255 |
str1 = resName + "-" + str; |
256 |
} |
257 |
os << " atom[" << ai << "] { "; |
258 |
os << "type = " << "\"" << str1 << "\"" << "; "; |
259 |
os << "}\n"; |
260 |
atomMap[&(*atom)] = ai++; |
261 |
} |
262 |
os << "\n"; |
263 |
|
264 |
//bond |
265 |
|
266 |
int b1, b2; |
267 |
FOR_BONDS_OF_MOL(bond, *pmol ) { |
268 |
b1 = atomMap[bond->GetBeginAtom()]; |
269 |
b2 = atomMap[bond->GetEndAtom()]; |
270 |
|
271 |
os << " bond { "; |
272 |
|
273 |
if (b1 < b2) |
274 |
os << "members(" << b1 << ", " << b2 << "); "; |
275 |
else |
276 |
os << "members(" << b2 << ", " << b1 << "); "; |
277 |
|
278 |
os << "}\n"; |
279 |
} |
280 |
|
281 |
os << "\n"; |
282 |
|
283 |
std::vector<int> possibleInversion; |
284 |
FOR_ATOMS_OF_MOL(atom, *pmol) { |
285 |
possibleInversion.clear(); |
286 |
|
287 |
FOR_NBORS_OF_ATOM(nbor, &*atom) { |
288 |
possibleInversion.push_back(atomMap[&(*nbor)]); |
289 |
} |
290 |
|
291 |
if (possibleInversion.size() == 3) { |
292 |
|
293 |
os << " inversion { "; |
294 |
os << "center(" << atomMap[&(*atom)] << "); "; |
295 |
os << "}\n"; |
296 |
} |
297 |
|
298 |
} |
299 |
os << "}" << endl; |
300 |
os << endl; |
301 |
} |
302 |
} |
303 |
|
304 |
os << endl; |
305 |
|
306 |
|
307 |
for(i=0; i < mols.size(); ++i) { |
308 |
OBMol* pmol = mols[i]; |
309 |
os << "component{" << endl; |
310 |
if (std::string(pmol->GetTitle()).compare("HOH") == 0) { |
311 |
os << " type = " << "HOH" << ";" << endl; |
312 |
} else { |
313 |
sprintf(buffer, "%d", i); |
314 |
os << " type = " << molPrefix << buffer << ";" << endl; |
315 |
} |
316 |
os << " nMol = " << numMols[i]<< ";" << endl; |
317 |
os << "}" << endl; |
318 |
} |
319 |
|
320 |
os << " </MetaData>" << endl; |
321 |
os << " <Snapshot>" << endl; |
322 |
os << " <FrameData>" << endl; |
323 |
|
324 |
sprintf(buffer, " Time: %.10g", 0.0); |
325 |
|
326 |
os << buffer << endl; |
327 |
|
328 |
|
329 |
// should really compute a bounding box here: |
330 |
sprintf(buffer, " Hmat: {{ %.10g, %.10g, %.10g }, { %.10g, %.10g, %.10g }, { %.10g, %.10g, %.10g }}", 100.0, 0.0, 0.0, 0.0, 100.0, 0.0, 0.0, 0.0, 100.0); |
331 |
|
332 |
os << buffer << endl; |
333 |
os << " </FrameData>" << endl; |
334 |
os << " <StuntDoubles>" << endl; |
335 |
|
336 |
OBAtom *atom; |
337 |
|
338 |
// still to do: intercept waters and recompute pvqj lines |
339 |
|
340 |
for(vector<int>::iterator i = indices.begin();i != indices.end(); ++i) { |
341 |
atom = mol.GetAtom(*i); |
342 |
sprintf(buffer, "%10d %7s %18.10g %18.10g %18.10g %13e %13e %13e", *i - 1, "pv", atom->GetX(), atom->GetY(), atom->GetZ(), 0.0, 0.0, 0.0); |
343 |
os << buffer << endl; |
344 |
} |
345 |
os << " </StuntDoubles>" << endl; |
346 |
os << " </Snapshot>" << endl; |
347 |
os << "</OOPSE>" << endl; |
348 |
} |
349 |
|
350 |
} //namespace OpenBabel |
351 |
|