1 |
/********************************************************************** |
2 |
patty.cpp - Programmable atom typer. |
3 |
|
4 |
Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. |
5 |
Some portions Copyright (C) 2001-2005 by Geoffrey R. Hutchison |
6 |
|
7 |
This file is part of the Open Babel project. |
8 |
For more information, see <http://openbabel.sourceforge.net/> |
9 |
|
10 |
This program is free software; you can redistribute it and/or modify |
11 |
it under the terms of the GNU General Public License as published by |
12 |
the Free Software Foundation version 2 of the License. |
13 |
|
14 |
This program is distributed in the hope that it will be useful, |
15 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 |
GNU General Public License for more details. |
18 |
***********************************************************************/ |
19 |
|
20 |
#include "mol.hpp" |
21 |
#include "obutil.hpp" |
22 |
#include "parsmart.hpp" |
23 |
#include "patty.hpp" |
24 |
|
25 |
// Simple programmable atom typer |
26 |
// WPW - 070199 |
27 |
// Usage is in sample main below |
28 |
|
29 |
using namespace std; |
30 |
|
31 |
namespace OpenBabel |
32 |
{ |
33 |
|
34 |
/*! \class patty |
35 |
\brief Programmable Atom Typer |
36 |
|
37 |
Patty stands for programmable atom typer. The patty class was kindly |
38 |
donated by W. Patrick Walters. The patty class provides a more |
39 |
flexible means for atom typing than the OBAtomTyper. The behavior of |
40 |
patty is similar to the OBAtomTyper in that rules apply only to the |
41 |
first atom in the SMARTS pattern. The patty class can read any free |
42 |
format ASCII file which contains SMARTS patterns associated with user |
43 |
defined atom type. The following is an example of a valid patty rule |
44 |
\code |
45 |
O=C hbacceptor |
46 |
\endcode |
47 |
The following is a code sample that demonstrates the use of patty |
48 |
class: |
49 |
\code |
50 |
OBMol mol; |
51 |
|
52 |
string rulefile = "rules.txt"; |
53 |
patty p; |
54 |
p.read_rules(p); |
55 |
vector<string> type; |
56 |
p.assign_types(mol,type); |
57 |
for (int i = 1;i <= mol.NumAtoms();i++) |
58 |
cout << "atom number " << i << " was given a type " << type[i] << endl; |
59 |
\endcode |
60 |
The array indices in the vector<string> into which the result values |
61 |
are placed match the corresponding atom numbers. Since atoms are |
62 |
numbered beginning from one, the first element in the vector<string> |
63 |
is empty, and the values are placed in [1...mol.NumAtoms()]. |
64 |
*/ |
65 |
void patty::read_rules(const string &infile) |
66 |
{ |
67 |
ifstream ifs, ifs1, *ifsP; |
68 |
vector<string> vs; |
69 |
char buffer[BUFF_SIZE]; |
70 |
char tmp_str[BUFF_SIZE]; |
71 |
char patty_dir[BUFF_SIZE]; |
72 |
OBSmartsPattern *sp; |
73 |
|
74 |
ifs.open(infile.c_str()); |
75 |
ifsP= &ifs; |
76 |
if (!ifs) |
77 |
{ |
78 |
if (getenv("BABEL_DATADIR") == NULL) |
79 |
{ |
80 |
#ifdef HAVE_SSTREAM |
81 |
stringstream errorMsg; |
82 |
#else |
83 |
strstream errorMsg; |
84 |
#endif |
85 |
errorMsg << "The BABEL_DATADIR environment variable is not defined" << endl; |
86 |
errorMsg << "Please define it so the program can find " << infile << endl; |
87 |
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); |
88 |
// exit(0); |
89 |
} |
90 |
else |
91 |
strcpy(patty_dir,getenv("BABEL_DATADIR")); |
92 |
strcat(patty_dir,FILE_SEP_CHAR); |
93 |
strcat(patty_dir,infile.c_str()); |
94 |
ifs1.open(patty_dir); |
95 |
ifsP= &ifs1; |
96 |
// if (!ifs1) |
97 |
// { |
98 |
// cerr << "Could not open " << patty_dir << endl; |
99 |
// exit(0); |
100 |
// } |
101 |
} |
102 |
|
103 |
if (!ifsP) |
104 |
{ |
105 |
#ifdef HAVE_SSTREAM |
106 |
stringstream errorMsg; |
107 |
#else |
108 |
strstream errorMsg; |
109 |
#endif |
110 |
errorMsg << "Could not open " << patty_dir << endl; |
111 |
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obWarning); |
112 |
// exit(0); |
113 |
} |
114 |
while (ifsP->getline(buffer,BUFF_SIZE)) |
115 |
{ |
116 |
if (buffer[0] != '#') |
117 |
{ |
118 |
tokenize(vs,buffer," \t\n"); |
119 |
if (vs.size() >= 2) |
120 |
{ |
121 |
strcpy(tmp_str,vs[0].c_str()); |
122 |
sp = new OBSmartsPattern; |
123 |
sp->Init(tmp_str); |
124 |
_sp.push_back(sp); |
125 |
smarts.push_back(vs[0]); |
126 |
typ.push_back(vs[1]); |
127 |
} |
128 |
} |
129 |
} |
130 |
} |
131 |
|
132 |
void patty::assign_rules(std::vector<std::string> &rules) |
133 |
{ |
134 |
vector<string> vs; |
135 |
char buffer[BUFF_SIZE]; |
136 |
char tmp_str[BUFF_SIZE]; |
137 |
unsigned int i; |
138 |
OBSmartsPattern *sp; |
139 |
|
140 |
for ( i = 0 ; i < rules.size() ; i++ ) |
141 |
{ |
142 |
strncpy(buffer, rules[i].c_str(), BUFF_SIZE); |
143 |
if (buffer[0] != '#') |
144 |
{ |
145 |
tokenize(vs,buffer," \t\n"); |
146 |
if (vs.size() >= 2) |
147 |
{ |
148 |
strcpy(tmp_str,vs[0].c_str()); |
149 |
sp = new OBSmartsPattern; |
150 |
sp->Init(tmp_str); |
151 |
_sp.push_back(sp); |
152 |
smarts.push_back(vs[0]); |
153 |
typ.push_back(vs[1]); |
154 |
} |
155 |
} |
156 |
} |
157 |
} |
158 |
|
159 |
|
160 |
void patty::assign_types(OBMol &mol, std::vector<std::string> &atm_typ) |
161 |
{ |
162 |
atm_typ.resize(mol.NumAtoms()+1); |
163 |
|
164 |
obErrorLog.ThrowError(__FUNCTION__, |
165 |
"Ran OpenBabel::PATTY::AssignTypes", obAuditMsg); |
166 |
|
167 |
for (unsigned int i = 0; i < _sp.size(); i++) |
168 |
{ |
169 |
_sp[i]->Match(mol); |
170 |
vector<vector<int> > match = _sp[i]->GetMapList(); |
171 |
//vector<vector<int> >& match = _sp[i]->GetMapList(); |
172 |
if (match.size()) |
173 |
{ |
174 |
if (debug) |
175 |
{ |
176 |
#ifdef HAVE_SSTREAM |
177 |
stringstream errorMsg; |
178 |
#else |
179 |
strstream errorMsg; |
180 |
#endif |
181 |
errorMsg << typ[i] << " " << smarts[i] << " matched "; |
182 |
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obDebug); |
183 |
} |
184 |
|
185 |
for (unsigned int j = 0; j < match.size(); j++) |
186 |
{ |
187 |
if (debug) |
188 |
{ |
189 |
#ifdef HAVE_SSTREAM |
190 |
stringstream errorMsg; |
191 |
#else |
192 |
strstream errorMsg; |
193 |
#endif |
194 |
errorMsg << match[j][0] << " "; |
195 |
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obDebug); |
196 |
} |
197 |
atm_typ[match[j][0]] = typ[i]; |
198 |
} |
199 |
} |
200 |
} |
201 |
} |
202 |
|
203 |
void patty::assign_types(OBMol &mol,vector<int> &atm_typ) |
204 |
{ |
205 |
atm_typ.resize(mol.NumAtoms()+1); |
206 |
|
207 |
obErrorLog.ThrowError(__FUNCTION__, |
208 |
"Ran OpenBabel::PATTY::AssignTypes", obAuditMsg); |
209 |
|
210 |
for (unsigned int i = 0; i < _sp.size(); i++) |
211 |
{ |
212 |
_sp[i]->Match(mol); |
213 |
vector<vector<int> > match = _sp[i]->GetMapList(); |
214 |
//vector<vector<int> >& match = _sp[i]->GetMapList(); |
215 |
if (match.size()) |
216 |
{ |
217 |
if (debug) |
218 |
{ |
219 |
#ifdef HAVE_SSTREAM |
220 |
stringstream errorMsg; |
221 |
#else |
222 |
strstream errorMsg; |
223 |
#endif |
224 |
errorMsg << typ[i] << " " << smarts[i] << " matched " ; |
225 |
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obDebug); |
226 |
} |
227 |
|
228 |
for (unsigned int j = 0; j < match.size(); j++) |
229 |
{ |
230 |
if (debug) |
231 |
{ |
232 |
#ifdef HAVE_SSTREAM |
233 |
stringstream errorMsg; |
234 |
#else |
235 |
strstream errorMsg; |
236 |
#endif |
237 |
errorMsg << match[j][0] << " "; |
238 |
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obDebug); |
239 |
} |
240 |
atm_typ[match[j][0]] = type_to_int(typ[i]); |
241 |
} |
242 |
} |
243 |
} |
244 |
} |
245 |
|
246 |
|
247 |
int patty::type_to_int(const string &type, bool failOnUndefined) |
248 |
{ |
249 |
int result; |
250 |
|
251 |
switch(toupper(type.c_str()[0])) |
252 |
{ |
253 |
case 'C' : // CAT - CATION |
254 |
result = PT_CATION; |
255 |
break; |
256 |
case 'A' : |
257 |
if (toupper(type.c_str()[1]) == 'N') // ANI - ANION |
258 |
result = PT_ANION; |
259 |
else |
260 |
result = PT_ACCEPTOR; |
261 |
break; |
262 |
case 'P' : // POL - POLAR |
263 |
result = PT_POLAR; |
264 |
break; |
265 |
case 'D' : // DON - DONOR |
266 |
result = PT_DONOR; |
267 |
break; |
268 |
case 'H' : // HYD - HYDROPHOBIC |
269 |
result = PT_HYDROPHOBIC; |
270 |
break; |
271 |
case 'M' : // Metal |
272 |
result = PT_METAL; |
273 |
break; |
274 |
case 'O' : // OTH - OTHER |
275 |
result = PT_OTHER; |
276 |
break; |
277 |
default : |
278 |
// This was added by Brian, |
279 |
// Behavior will fail if type is undefined |
280 |
if (failOnUndefined) |
281 |
{ |
282 |
#ifdef HAVE_SSTREAM |
283 |
stringstream errorMsg; |
284 |
#else |
285 |
strstream errorMsg; |
286 |
#endif |
287 |
errorMsg << "Unable to find type of feature passed in " << endl; |
288 |
errorMsg << "Feature passed in is " << type << endl; |
289 |
obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obInfo); |
290 |
// exit(-1); |
291 |
} |
292 |
else |
293 |
{ |
294 |
result = 7; |
295 |
} |
296 |
} |
297 |
return(result); |
298 |
} |
299 |
|
300 |
//! return null if the type does not exist, the type position otherwise |
301 |
//! the first position start at 1 |
302 |
int patty::Istype(const std::string &type) |
303 |
{ |
304 |
for(unsigned int pos=0; pos < typ.size(); pos++) |
305 |
{ |
306 |
if(typ[pos] == type) |
307 |
return (pos + 1); |
308 |
} |
309 |
|
310 |
return (0); |
311 |
} |
312 |
|
313 |
} |
314 |
|
315 |
#ifdef COMPILE_PATTY_MAIN |
316 |
// This was written for OB-1.x and would need updating for OB-2.0 if needed |
317 |
// Consider it deprecated. |
318 |
|
319 |
int main(int argc, char *argv[]) |
320 |
{ |
321 |
OBMol mol(SDF,SDF); |
322 |
vector<string> types; |
323 |
|
324 |
ifstream ifs(argv[1]); |
325 |
if (!ifs) |
326 |
{ |
327 |
cerr << "Could not open supplied file " << endl; |
328 |
// exit(0); |
329 |
} |
330 |
|
331 |
patty p("simple.txt"); |
332 |
for (;;) |
333 |
{ |
334 |
ifs >> mol; |
335 |
if (!mol.NumAtoms()) |
336 |
break; |
337 |
p.assign_types(mol,types); |
338 |
mol.Clear(); |
339 |
} |
340 |
|
341 |
for (int i = 1; i < types.size(); i++) |
342 |
{ |
343 |
cout << i << " " << types[i] << endl; |
344 |
} |
345 |
} |
346 |
|
347 |
#endif |
348 |
|
349 |
//! \file patty.cpp |
350 |
//! \brief Programmable atom typer. |