1 |
/********************************************************************** |
2 |
patty.cpp - Programmable atom typer. |
3 |
|
4 |
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
5 |
Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison |
6 |
|
7 |
This file is part of the Open Babel project. |
8 |
For more information, see <http://openbabel.sourceforge.net/> |
9 |
|
10 |
This program is free software; you can redistribute it and/or modify |
11 |
it under the terms of the GNU General Public License as published by |
12 |
the Free Software Foundation version 2 of the License. |
13 |
|
14 |
This program is distributed in the hope that it will be useful, |
15 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 |
GNU General Public License for more details. |
18 |
***********************************************************************/ |
19 |
|
20 |
#include "mol.hpp" |
21 |
#include "obutil.hpp" |
22 |
#include "parsmart.hpp" |
23 |
#include "patty.hpp" |
24 |
|
25 |
// Simple programmable atom typer |
26 |
// WPW - 070199 |
27 |
// Usage is in sample main below |
28 |
|
29 |
using namespace std; |
30 |
|
31 |
namespace OpenBabel |
32 |
{ |
33 |
|
34 |
/*! \class patty |
35 |
\brief Programmable Atom Typer |
36 |
|
37 |
Patty stands for programmable atom typer. The patty class was kindly |
38 |
donated by W. Patrick Walters. The patty class provides a more |
39 |
flexible means for atom typing than the OBAtomTyper. The behavior of |
40 |
patty is similar to the OBAtomTyper in that rules apply only to the |
41 |
first atom in the SMARTS pattern. The patty class can read any free |
42 |
format ASCII file which contains SMARTS patterns associated with user |
43 |
defined atom type. The following is an example of a valid patty rule |
44 |
\code |
45 |
O=C hbacceptor |
46 |
\endcode |
47 |
The following is a code sample that demonstrates the use of patty |
48 |
class: |
49 |
\code |
50 |
OBMol mol; |
51 |
|
52 |
string rulefile = "rules.txt"; |
53 |
patty p; |
54 |
p.read_rules(p); |
55 |
vector<string> type; |
56 |
p.assign_types(mol,type); |
57 |
for (int i = 1;i <= mol.NumAtoms();i++) |
58 |
cout << "atom number " << i << " was given a type " << type[i] << endl; |
59 |
\endcode |
60 |
The array indices in the vector<string> into which the result values |
61 |
are placed match the corresponding atom numbers. Since atoms are |
62 |
numbered beginning from one, the first element in the vector<string> |
63 |
is empty, and the values are placed in [1...mol.NumAtoms()]. |
64 |
*/ |
65 |
void patty::read_rules(const string &infile) |
66 |
{ |
67 |
ifstream ifs, ifs1, *ifsP; |
68 |
vector<string> vs; |
69 |
char buffer[BUFF_SIZE]; |
70 |
char tmp_str[BUFF_SIZE]; |
71 |
string patty_dir; |
72 |
OBSmartsPattern *sp; |
73 |
|
74 |
ifs.open(infile.c_str()); |
75 |
ifsP= &ifs; |
76 |
if (!ifs) |
77 |
{ |
78 |
if (getenv("BABEL_DATADIR") == NULL) |
79 |
{ |
80 |
#ifdef HAVE_SSTREAM |
81 |
stringstream errorMsg; |
82 |
#else |
83 |
strstream errorMsg; |
84 |
#endif |
85 |
errorMsg << "The BABEL_DATADIR environment variable is not defined" << endl; |
86 |
errorMsg << "Please define it so the program can find " << infile << endl; |
87 |
obErrorLog.ThrowError(__func__, errorMsg.str(), obWarning); |
88 |
// exit(0); |
89 |
} |
90 |
else |
91 |
patty_dir = getenv("BABEL_DATADIR"); |
92 |
patty_dir += FILE_SEP_CHAR; |
93 |
patty_dir += infile; |
94 |
ifs1.open(patty_dir.c_str()); |
95 |
ifsP= &ifs1; |
96 |
// if (!ifs1) |
97 |
// { |
98 |
// cerr << "Could not open " << patty_dir << endl; |
99 |
// exit(0); |
100 |
// } |
101 |
} |
102 |
|
103 |
if (!ifsP) |
104 |
{ |
105 |
#ifdef HAVE_SSTREAM |
106 |
stringstream errorMsg; |
107 |
#else |
108 |
strstream errorMsg; |
109 |
#endif |
110 |
errorMsg << "Could not open " << patty_dir << endl; |
111 |
obErrorLog.ThrowError(__func__, errorMsg.str(), obWarning); |
112 |
// exit(0); |
113 |
} |
114 |
while (ifsP->getline(buffer,BUFF_SIZE)) |
115 |
{ |
116 |
if (buffer[0] != '#') |
117 |
{ |
118 |
tokenize(vs,buffer," \t\n"); |
119 |
if (vs.size() >= 2) |
120 |
{ |
121 |
strncpy(tmp_str,vs[0].c_str(), sizeof(tmp_str) - 1); |
122 |
tmp_str[sizeof(tmp_str) - 1] = '\0'; |
123 |
sp = new OBSmartsPattern; |
124 |
sp->Init(tmp_str); |
125 |
_sp.push_back(sp); |
126 |
smarts.push_back(vs[0]); |
127 |
typ.push_back(vs[1]); |
128 |
} |
129 |
} |
130 |
} |
131 |
} |
132 |
|
133 |
void patty::assign_rules(std::vector<std::string> &rules) |
134 |
{ |
135 |
vector<string> vs; |
136 |
char buffer[BUFF_SIZE]; |
137 |
char tmp_str[BUFF_SIZE]; |
138 |
unsigned int i; |
139 |
OBSmartsPattern *sp; |
140 |
|
141 |
for ( i = 0 ; i < rules.size() ; i++ ) |
142 |
{ |
143 |
strncpy(buffer, rules[i].c_str(), BUFF_SIZE); |
144 |
if (buffer[0] != '#') |
145 |
{ |
146 |
tokenize(vs,buffer," \t\n"); |
147 |
if (vs.size() >= 2) |
148 |
{ |
149 |
strncpy(tmp_str,vs[0].c_str(), sizeof(tmp_str) - 1); |
150 |
tmp_str[sizeof(tmp_str) - 1] = '\0'; |
151 |
sp = new OBSmartsPattern; |
152 |
sp->Init(tmp_str); |
153 |
_sp.push_back(sp); |
154 |
smarts.push_back(vs[0]); |
155 |
typ.push_back(vs[1]); |
156 |
} |
157 |
} |
158 |
} |
159 |
} |
160 |
|
161 |
|
162 |
void patty::assign_types(OBMol &mol, std::vector<std::string> &atm_typ) |
163 |
{ |
164 |
atm_typ.resize(mol.NumAtoms()+1); |
165 |
|
166 |
obErrorLog.ThrowError(__func__, |
167 |
"Ran OpenBabel::PATTY::AssignTypes", obAuditMsg); |
168 |
|
169 |
for (unsigned int i = 0; i < _sp.size(); i++) |
170 |
{ |
171 |
_sp[i]->Match(mol); |
172 |
vector<vector<int> > match = _sp[i]->GetMapList(); |
173 |
//vector<vector<int> >& match = _sp[i]->GetMapList(); |
174 |
if (match.size()) |
175 |
{ |
176 |
if (debug) |
177 |
{ |
178 |
#ifdef HAVE_SSTREAM |
179 |
stringstream errorMsg; |
180 |
#else |
181 |
strstream errorMsg; |
182 |
#endif |
183 |
errorMsg << typ[i] << " " << smarts[i] << " matched "; |
184 |
obErrorLog.ThrowError(__func__, errorMsg.str(), obDebug); |
185 |
} |
186 |
|
187 |
for (unsigned int j = 0; j < match.size(); j++) |
188 |
{ |
189 |
if (debug) |
190 |
{ |
191 |
#ifdef HAVE_SSTREAM |
192 |
stringstream errorMsg; |
193 |
#else |
194 |
strstream errorMsg; |
195 |
#endif |
196 |
errorMsg << match[j][0] << " "; |
197 |
obErrorLog.ThrowError(__func__, errorMsg.str(), obDebug); |
198 |
} |
199 |
atm_typ[match[j][0]] = typ[i]; |
200 |
} |
201 |
} |
202 |
} |
203 |
} |
204 |
|
205 |
void patty::assign_types(OBMol &mol,vector<int> &atm_typ) |
206 |
{ |
207 |
atm_typ.resize(mol.NumAtoms()+1); |
208 |
|
209 |
obErrorLog.ThrowError(__func__, |
210 |
"Ran OpenBabel::PATTY::AssignTypes", obAuditMsg); |
211 |
|
212 |
for (unsigned int i = 0; i < _sp.size(); i++) |
213 |
{ |
214 |
_sp[i]->Match(mol); |
215 |
vector<vector<int> > match = _sp[i]->GetMapList(); |
216 |
//vector<vector<int> >& match = _sp[i]->GetMapList(); |
217 |
if (match.size()) |
218 |
{ |
219 |
if (debug) |
220 |
{ |
221 |
#ifdef HAVE_SSTREAM |
222 |
stringstream errorMsg; |
223 |
#else |
224 |
strstream errorMsg; |
225 |
#endif |
226 |
errorMsg << typ[i] << " " << smarts[i] << " matched " ; |
227 |
obErrorLog.ThrowError(__func__, errorMsg.str(), obDebug); |
228 |
} |
229 |
|
230 |
for (unsigned int j = 0; j < match.size(); j++) |
231 |
{ |
232 |
if (debug) |
233 |
{ |
234 |
#ifdef HAVE_SSTREAM |
235 |
stringstream errorMsg; |
236 |
#else |
237 |
strstream errorMsg; |
238 |
#endif |
239 |
errorMsg << match[j][0] << " "; |
240 |
obErrorLog.ThrowError(__func__, errorMsg.str(), obDebug); |
241 |
} |
242 |
atm_typ[match[j][0]] = type_to_int(typ[i]); |
243 |
} |
244 |
} |
245 |
} |
246 |
} |
247 |
|
248 |
|
249 |
int patty::type_to_int(const string &type, bool failOnUndefined) |
250 |
{ |
251 |
int result; |
252 |
|
253 |
switch(toupper(type.c_str()[0])) |
254 |
{ |
255 |
case 'C' : // CAT - CATION |
256 |
result = PT_CATION; |
257 |
break; |
258 |
case 'A' : |
259 |
if (toupper(type.c_str()[1]) == 'N') // ANI - ANION |
260 |
result = PT_ANION; |
261 |
else |
262 |
result = PT_ACCEPTOR; |
263 |
break; |
264 |
case 'P' : // POL - POLAR |
265 |
result = PT_POLAR; |
266 |
break; |
267 |
case 'D' : // DON - DONOR |
268 |
result = PT_DONOR; |
269 |
break; |
270 |
case 'H' : // HYD - HYDROPHOBIC |
271 |
result = PT_HYDROPHOBIC; |
272 |
break; |
273 |
case 'M' : // Metal |
274 |
result = PT_METAL; |
275 |
break; |
276 |
case 'O' : // OTH - OTHER |
277 |
result = PT_OTHER; |
278 |
break; |
279 |
default : |
280 |
// This was added by Brian, |
281 |
// Behavior will fail if type is undefined |
282 |
if (failOnUndefined) |
283 |
{ |
284 |
#ifdef HAVE_SSTREAM |
285 |
stringstream errorMsg; |
286 |
#else |
287 |
strstream errorMsg; |
288 |
#endif |
289 |
errorMsg << "Unable to find type of feature passed in " << endl; |
290 |
errorMsg << "Feature passed in is " << type << endl; |
291 |
obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo); |
292 |
// exit(-1); |
293 |
} |
294 |
else |
295 |
{ |
296 |
result = 7; |
297 |
} |
298 |
} |
299 |
return(result); |
300 |
} |
301 |
|
302 |
//! return null if the type does not exist, the type position otherwise |
303 |
//! the first position start at 1 |
304 |
int patty::Istype(const std::string &type) |
305 |
{ |
306 |
for(unsigned int pos=0; pos < typ.size(); pos++) |
307 |
{ |
308 |
if(typ[pos] == type) |
309 |
return (pos + 1); |
310 |
} |
311 |
|
312 |
return (0); |
313 |
} |
314 |
|
315 |
} |
316 |
|
317 |
#ifdef COMPILE_PATTY_MAIN |
318 |
// This was written for OB-1.x and would need updating for OB-2.0 if needed |
319 |
// Consider it deprecated. |
320 |
|
321 |
int main(int argc, char *argv[]) |
322 |
{ |
323 |
OBMol mol(SDF,SDF); |
324 |
vector<string> types; |
325 |
|
326 |
ifstream ifs(argv[1]); |
327 |
if (!ifs) |
328 |
{ |
329 |
cerr << "Could not open supplied file " << endl; |
330 |
// exit(0); |
331 |
} |
332 |
|
333 |
patty p("simple.txt"); |
334 |
for (;;) |
335 |
{ |
336 |
ifs >> mol; |
337 |
if (!mol.NumAtoms()) |
338 |
break; |
339 |
p.assign_types(mol,types); |
340 |
mol.Clear(); |
341 |
} |
342 |
|
343 |
for (int i = 1; i < types.size(); i++) |
344 |
{ |
345 |
cout << i << " " << types[i] << endl; |
346 |
} |
347 |
} |
348 |
|
349 |
#endif |
350 |
|
351 |
//! \file patty.cpp |
352 |
//! \brief Programmable atom typer. |