ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/openbabel/kekulize.cpp
Revision: 1081
Committed: Thu Oct 19 20:49:05 2006 UTC (18 years, 6 months ago) by gezelter
File size: 17953 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# Content
1 /**********************************************************************
2 kekulize.cpp - Alternate algorithm to kekulize a molecule.
3
4 Copyright (C) 2004-2005 by Fabien Fontaine
5 Some portions Copyright (C) 2005 by Geoffrey R. Hutchison
6
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.sourceforge.net/>
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19
20 #include "mol.hpp"
21 #include "oberror.hpp"
22
23 #ifdef HAVE_SSTREAM
24 #include <sstream>
25 #else
26 #include <strstream>
27 #endif
28
29 #define SINGLE 1
30 #define DOUBLE 2
31
32 using namespace std;
33
34 namespace OpenBabel {
35
36 ///////////////////////////////////////////////////////////////////////////////
37 //! \brief Kekulize aromatic rings without using implicit valence
38
39 //! This new perceive kekule bonds function has been especifically designed to
40 //! handle molecule files without explicit hydrogens such as pdb or xyz.
41 //! The function does not rely on GetImplicitValence function
42 //! The function looks for groups of aromatic cycle
43 //! For each group it tries to guess the number of electrons given by each atom
44 //! in order to satisfy the huckel (4n+2) rule
45 //! If the huckel rule cannot be satisfied the algorithm try with its best alternative guess
46 //! Then it recursively walk on the atoms of the cycle and assign single and double bonds
47 void OBMol::NewPerceiveKekuleBonds()
48 {
49
50 if (HasKekulePerceived()) return;
51 SetKekulePerceived();
52
53 OBAtom *atom;
54 int n, de, minde;
55 std::vector<OBAtom*> cycle;
56 OBBitVec avisit,cvisit;
57 avisit.Resize(NumAtoms()+1);
58 cvisit.Resize(NumAtoms()+1);
59 OBBond *bond;
60 std::vector<OBEdgeBase*>::iterator bi;
61 std::vector<int> electron;
62 int sume, orden, bestatom;
63 int bestorden = 99;
64 // Init the kekulized bonds
65 unsigned i;
66 FOR_BONDS_OF_MOL(bond, *this)
67 {
68 switch (bond->GetBO())
69 {
70 case 1: bond->SetKSingle(); break;
71 case 2: bond->SetKDouble(); break;
72 case 3: bond->SetKTriple(); break;
73 }
74 }
75
76 // Find all the groups of aromatic cycle
77 for(i=1; i<= NumAtoms(); i++ ) {
78 atom = GetAtom(i);
79 if (atom->HasAromaticBond() && !cvisit[i]) { // is new aromatic atom of an aromatic cycle ?
80
81 avisit.Clear();
82 electron.clear();
83 cycle.clear();
84
85 avisit.SetBitOn(i);
86 expandcycle (atom, avisit);
87 //store the atoms of the cycle(s)
88 unsigned int j;
89 for(j=1; j<= NumAtoms(); j++) {
90 if ( avisit[j] ) {
91 atom = GetAtom(j);
92 cycle.push_back(atom);
93 }
94 }
95 // At the begining each atom give one electron to the cycle
96 for(j=0; j< cycle.size(); j++) {
97 electron.push_back(1);
98 }
99
100 // remove one electron if the atom make a double bond out of the cycle
101 sume =0;
102 for(j=0; j< cycle.size(); j++) {
103 atom = cycle[j];
104 for(bond = atom->BeginBond(bi); bond; bond = atom->NextBond(bi)) {
105 if ( bond->IsDouble() ) {
106 OBAtom *atom2 = bond->GetNbrAtom(atom);
107 int fcharge = atom->GetFormalCharge();
108 int fcharge2 = atom2->GetFormalCharge();
109 if(atom->IsNitrogen() && atom2->IsOxygen()
110 && fcharge == 0 && fcharge2 == 0) { //n=O to [n+][O-]
111 atom->SetFormalCharge(1);
112 atom2->SetFormalCharge(-1);
113 bond->SetKSingle();
114 bond->SetBO(1);
115 }
116 else {
117 electron[j] = 0;
118 }
119 }
120 }
121 // count the number of electrons
122 sume += electron[j];
123 }
124
125
126 // Save the electron state in case huckel rule is not satisfied
127 vector<int> previousElectron = electron;
128
129 // find the ideal number of electrons according to the huckel 4n+2 rule
130 minde=99;
131 for (i=1; 1; i++) {
132 n = 4 *i +2;
133 de = n - sume;
134 if ( de < minde )
135 minde=de;
136 else if ( minde < 0 )
137 minde=de;
138 else
139 break;
140 }
141
142 #ifdef HAVE_SSTREAM
143 stringstream errorMsg;
144 #else
145 strstream errorMsg;
146 #endif
147
148 //cout << "minde before:" << minde << endl;
149 // if huckel rule not satisfied some atoms must give more electrons
150 //cout << "minde " << minde << endl;
151 while ( minde != 0 ) {
152 bestorden=99;
153 for(j=0; j< cycle.size(); j++) {
154 if (electron[j] == 1) {
155 orden = getorden(cycle[j]);
156 if (orden < bestorden) {
157 bestorden = orden;
158 bestatom = j;
159 }
160 }
161 }
162 if (bestorden==99) { // no electron giving atom found
163 errorMsg << "Kekulize: Huckel rule not satisfied for molecule " << GetTitle() << endl;
164 obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo);
165 break; // Huckel rule cannot be satisfied
166 } // try to kekulize anyway
167 else {
168 electron[bestatom] += 1;
169 minde--;
170 }
171 }
172
173 if (bestorden == 99) { // Huckel rule not satisfied, just try to get an even number of electron before kekulizing
174
175 electron = previousElectron; // restore electon's state
176
177 int odd = sume % 2;
178 //cout << "odd:" << odd << endl;
179 if(odd) { // odd number of electrons try to add an electron to the best possible atom
180 for(j=0; j< cycle.size(); j++) {
181 if (electron[j] == 1) {
182 orden = getorden(cycle[j]);
183 if (orden < bestorden) {
184 bestorden = orden;
185 bestatom = j;
186 }
187 }
188 }
189 if (bestorden==99) { // no electron giving atom found
190 errorMsg << "Kekulize: Cannot get an even number of electron for molecule " << GetTitle() << "\n";
191 obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo);
192 break; // impossible to choose an atom to obtain an even number of electron
193 } // try to kekulize anyway
194 else {
195 electron[bestatom] += 1;
196 }
197 }
198 }
199
200 //cout << "minde after:" << minde <<endl;
201 //for(j=0; j < cycle.size(); j++) {
202 //OBAtom *cycleAtom = cycle[j];
203 //cout << "\t" << cycleAtom->GetIdx();
204 //}
205 //cout << endl;
206
207 //for(j=0; j < electron.size(); j++) {
208 //cout << "\t" << electron[j];
209 //}
210 //cout << endl;
211
212 // kekulize the cycle(s)
213 start_kekulize(cycle,electron);
214
215 // Set the kekulized cycle(s) as visited
216 for(j=1; j<= NumAtoms(); j++) {
217 if (avisit[j])
218 cvisit.SetBitOn(j);
219 }
220
221 }
222 }
223 // Double bond have been assigned, set the remaining aromatic bonds to single
224 //std::cout << "Set not assigned single bonds\n";
225 for(i=0;i <NumBonds(); i++) {
226 bond = GetBond(i);
227 //std::cout << "bond " << bond->GetBeginAtomIdx() << " " << bond->GetEndAtomIdx() << " ";
228 if (bond->GetBO()==5 ) {
229 bond->SetKSingle();
230 bond->SetBO(1);
231 //std::cout << "single\n";
232 }
233 //else
234 // std::cout << "double\n";
235 }
236
237 return;
238 }
239
240 ///////////////////////////////////////////////////////////////////////////////////////
241 //! \brief Start kekulizing one or a fused set of aromatic ring(s)
242
243 //! The initial electronic state indicates if an atoms must make a double bond or not
244 //! Kekulizing is attempted recursively for all the atoms bonded to the first atom
245 //! of the cycle.
246 void OBMol::start_kekulize( std::vector <OBAtom*> &cycle, std::vector<int> &electron) {
247
248 std::vector<int> initState;
249 std::vector<int> currentState;
250 std::vector<int> binitState;
251 std::vector<int> bcurrentState;
252 std::vector<bool> mark;
253 unsigned int Idx;
254 OBAtom *atom, *atom2;
255 OBBond *bond;
256
257 //init the atom arrays
258 unsigned i;
259 for(i=0;i <NumAtoms()+1; i++) {
260 initState.push_back(-1);
261 currentState.push_back(-1);
262 mark.push_back(false);
263 }
264
265 //init the bond arrays with single bonds
266 for(i=0;i <NumBonds(); i++) {
267 binitState.push_back(SINGLE);
268 bcurrentState.push_back(SINGLE);
269 }
270
271 //set the electron number
272 for(i=0; i< cycle.size(); i++) {
273 atom = cycle[i];
274 Idx = atom->GetIdx();
275 if ( electron[i] == 1)
276 initState[Idx] = 1; // make 1 double bond
277 else
278 initState[Idx] = 2; // make 2 single bonds
279
280 currentState[Idx] = initState[Idx];
281 }
282
283 std::vector<OBEdgeBase*>::iterator b;
284 OBAtom *nbr;
285
286 bool second_pass=false;
287 // for( i=1; i<= NumAtoms(); i++) {
288 // if(currentState[i] == 1) { // the atom can make a double bond
289 // atom = GetAtom(i);
290 // //find a neighbour that can make a double bond
291 // // and start kekulize
292 // for (nbr = atom->BeginNbrAtom(b);nbr;nbr = atom->NextNbrAtom(b)) {
293 // if(currentState[nbr->GetIdx()]==1){
294 // if(!expand_kekulize(atom,nbr,currentState,initState, bcurrentState,binitState, mark)) {
295 // second_pass=true;
296 // }
297
298 // }
299 // }
300 // }
301 // }
302 bool expand_successful;
303 atom = cycle[0];
304 for (nbr = atom->BeginNbrAtom(b);nbr;nbr = atom->NextNbrAtom(b)) {
305 if(initState[nbr->GetIdx()] == -1) //neighbor atom not in the cycle, try next one
306 continue;
307 //std::cout << "Expand kekulize\n";
308 expand_kekulize(atom,nbr,currentState,initState, bcurrentState,binitState, mark) ;
309 //Control that all the electron have been given to the cycle(s)
310 expand_successful = true;
311 for(unsigned i=0; i< cycle.size(); i++) {
312 atom2 = cycle[i];
313 Idx = atom2->GetIdx();
314 //cout << "\t" << currentState[Idx];
315 if (currentState[Idx] == 1)
316 expand_successful=false;
317 }
318 //cout << endl;
319 if (expand_successful)
320 break;
321 else {
322 unsigned i;
323 for(i=0;i <NumAtoms()+1; i++) {
324 currentState[i]=initState[i];
325 mark[i]=false;
326 }
327 for(i=0;i <NumBonds(); i++) {
328 bcurrentState[i]=binitState[i];
329 }
330 }
331 }
332 if (!expand_successful)
333 {
334 #ifdef HAVE_SSTREAM
335 stringstream errorMsg;
336 #else
337 strstream errorMsg;
338 #endif
339 errorMsg << "Kekulize Error for molecule " << GetTitle() << endl;
340 obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo);
341 }
342
343 // Set the double bonds
344 // std::cout << "Set double bonds\n";
345 for(i=0;i <NumBonds(); i++) {
346 bond = GetBond(i);
347 // std::cout << "bond " << bond->GetBeginAtomIdx() << " " << bond->GetEndAtomIdx() << " ";
348 if (bond->GetBO()==5 && bcurrentState[i] == DOUBLE) {
349 if ( (bond->GetBeginAtom())->IsSulfur()
350 && bond->GetEndAtom()->IsSulfur() ) {
351 // no double bonds between aromatic sulfur atoms -- PR#1504089
352 continue;
353 }
354
355 bond->SetKDouble();
356 bond->SetBO(2);
357 //std::cout << "double\n";
358 }
359 //else
360 //std::cout << "single\n";
361 //else if (bond->IsAromatic() && bond->GetBO() != 2)
362 // bond->SetBO(1);
363 }
364
365 return;
366 }
367
368
369 /////////////////////////////////////////////////////////////////////////////////////////
370 //! \brief recursively assign single and double bonds according to the electronical state
371 //! of the atoms of the current bond
372 int OBMol::expand_kekulize(OBAtom *atom1, OBAtom *atom2, std::vector<int> &currentState, std::vector<int> &initState, std::vector<int> &bcurrentState, std::vector<int> &binitState, std::vector<bool> &mark)
373 {
374 int done;
375 int Idx1=atom1->GetIdx(), Idx2=atom2->GetIdx();
376 OBBond *bond;
377 std::vector<OBEdgeBase*>::iterator i;
378 OBAtom *nbr;
379 int natom;
380
381 mark[Idx1]= true;
382 bond = atom1->GetBond(atom2);
383 int bIdx = bond->GetIdx();
384
385 //cout << "assign bond state for atoms " << Idx1 << " and " << Idx2 << endl;
386 if (currentState[Idx1] == 1 && currentState[Idx2] == 1) {
387 currentState[Idx1]=0;
388 currentState[Idx2]=0;
389 // set bond to double
390 //std::cout << "bond " << Idx1 << " " << Idx2 << " double\n";
391 bcurrentState[bIdx]=DOUBLE;
392 }
393 else if (currentState[Idx1] == 0 && currentState[Idx2] == 1 ||
394 currentState[Idx1] == 2 && currentState[Idx2] == 1 ||
395 currentState[Idx1] == 2 && currentState[Idx2] == 2) {
396 //std::cout << "bond " << Idx1 << " " << Idx2 << " single\n";
397 // leave bond to single
398 }
399 else if (currentState[Idx1] == 1 && currentState[Idx2] == 0 ||
400 currentState[Idx1] == 1 && currentState[Idx2] == 2) {
401 mark[Idx1]=false;
402 //std::cout << "bond " << Idx1 << " " << Idx2 << " error\n";
403 return (0); // error
404 }
405 else if (currentState[Idx1] == 0 && currentState[Idx2] == 0
406 || currentState[Idx1] == 2 && currentState[Idx2] == 0) {
407 //std::cout << "bond " << Idx1 << " " << Idx2 << " done\n";
408 mark[Idx2]=true;
409 return (1); //done
410 }
411 else if (currentState[Idx1] == 0 && currentState[Idx2] == 2) {
412 currentState[Idx2]=0;
413 //std::cout << "bond " << Idx1 << " " << Idx2 << " leave single\n";
414 // leave bond to single
415 }
416 else {
417
418 #ifdef HAVE_SSTREAM
419 stringstream errorMsg;
420 #else
421 strstream errorMsg;
422 #endif
423
424 errorMsg << "unexpected state:" << "atom " << Idx1 << " " << currentState[Idx1]
425 << " atom " << Idx2 << " " << currentState[Idx2] << endl;
426 obErrorLog.ThrowError(__func__, errorMsg.str(), obDebug);
427 return(false);
428 }
429
430 //int c;
431 //for(c=1; c < currentState.size(); c++) {
432 //cout << c << "\t";
433 //}
434 //cout << endl;
435 //for(c=1; c < currentState.size(); c++) {
436 //cout << currentState[c] << "\t";
437 //}
438 //cout << endl;
439
440 vector<int> previousState = currentState; // Backup the atom
441 vector<int> bpreviousState = bcurrentState; // and the bond states before expanding again
442
443 bool return_false=false;
444 // for each neighbor of atom 2 not already kekulized
445 for (nbr = atom2->BeginNbrAtom(i);nbr;nbr = atom2->NextNbrAtom(i))
446 {
447 natom = nbr->GetIdx();
448 if(initState[natom] == -1) //neighbor atom not in the cycle, try next one
449 continue;
450 if ( !mark[natom] ) {
451 done = expand_kekulize(atom2, nbr, currentState, initState, bcurrentState, binitState, mark);
452 if ( !done ) // kekulize failed
453 return_false =true;
454 else
455 return_false =false;
456 }
457
458 }
459 if (return_false) { // no good solution found
460 //cout << "return_false:no good solution\n" << endl;
461 //cout << "reset state of " << Idx1 << " and " << Idx2 << " from " << currentState[Idx1]
462 //<< " " << currentState[Idx2] << " to ";
463
464 // retrieve the states that might have been changed during kekulize expansion
465 currentState = previousState;
466
467
468 bcurrentState = bpreviousState;
469
470
471 // reset the bond and the atom states
472 if (bcurrentState[bIdx] == DOUBLE)
473 currentState[Idx1]=initState[Idx1];
474
475 currentState[Idx2]=initState[Idx2];
476 bcurrentState[bIdx]=binitState[bIdx]; // should be always single
477 mark[Idx2]=false;
478
479 //cout << currentState[Idx1] << " " << currentState[Idx2] << endl;
480 return (false);
481 }
482 // atom 2 cannot make any bond, should not have 1 electron
483 if (currentState[Idx2] == 1) {
484 // currentState[Idx1]=initState[Idx1];
485 //cout << "return true but " << Idx2 << " state = 1\n";
486 mark[Idx1]=false;
487 return (false);
488 }
489 else {
490 // if we found a good solution, then the state of Idx2 may have shifted from 1 to 0 during the kekulization
491 // If it is the case, we should check if there is a remaining unmarked neighbor because it is possible
492 // that kekulizing from this neigbor failed just because Idx2 was equal to 1
493
494 if(previousState[Idx2] == 1) {
495 // Since now Idx2 is equal to 0 because it kekulized well the kekulizing of the failed neigbor could be successfull
496 // If there is still an unmarked neigbor try to kekulize it again
497 //mark[Idx2]=true;
498 return_false=false;
499 //cout << "retry kekulizing from " << Idx2 << endl;
500
501 for (nbr = atom2->BeginNbrAtom(i);nbr;nbr = atom2->NextNbrAtom(i))
502 {
503 natom = nbr->GetIdx();
504 if(initState[natom] == -1) //neighbor atom not in the cycle, try next one
505 continue;
506 if ( !mark[natom] ) {
507 //cout << "atom " << natom << " not marked, expand again" << endl;
508 done = expand_kekulize(atom2, nbr, currentState, initState, bcurrentState, binitState, mark);
509 if ( !done ) // kekulize failed
510 return_false =true;
511 else
512 return_false =false;
513 }
514
515 }
516
517 // if we cannot kekulize the remaining neigbor again then we have to return false
518 // we do not have to reset the states because the kekulize will fail anyway
519 if(return_false) {
520 //cout << "rekekulize failed" << endl;
521 return(false);
522 }
523 else {
524 //cout << "rekekulized successfull" << endl;
525 return (true);
526 }
527
528 }
529
530
531 //cout << "return_true: good solution" << endl;
532 return (true);
533 }
534 }
535
536 //! Give the priority to give two electrons instead of 1
537 int OBMol::getorden( OBAtom *atom)
538 {
539 if ( atom->IsSulfur() ) return 1;
540 if ( atom->IsOxygen() ) return 2;
541 if ( atom->GetAtomicNum() == 34 ) return 3;
542 if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 && atom->GetValence() == 3) return 5;
543 if ( atom->IsAmideNitrogen() ) return 4;
544 if ( atom->IsNitrogen() && atom->GetFormalCharge() == -1) return 6;
545 if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 && atom->IsInRingSize(5) ) return 7;
546 if ( atom->IsNitrogen() && atom->GetFormalCharge() == 0 ) return 8;
547 if ( atom->IsCarbon() && atom->GetFormalCharge() == -1) return 9;
548 //if ( atom->IsCarbon() ) return 9;
549
550 return (100); //no atom found
551 }
552
553 //! Recursively find the aromatic atoms with an aromatic bond to the current atom
554 void OBMol::expandcycle (OBAtom *atom, OBBitVec &avisit)
555 {
556 OBAtom *nbr;
557 // OBBond *bond;
558 std::vector<OBEdgeBase*>::iterator i;
559 int natom;
560 //for each neighbour atom test if it is in the aromatic ring
561 for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
562 {
563 natom = nbr->GetIdx();
564 // if (!avisit[natom] && nbr->IsAromatic() && ((OBBond*) *i)->IsAromatic()) {
565 if (!avisit[natom] && ((OBBond*) *i)->GetBO()==5) {
566 avisit.SetBitOn(natom);
567 expandcycle(nbr, avisit);
568 }
569 }
570 }
571
572 } // end namespace OpenBabel
573
574 //! \file kekulize.cpp
575 //! \brief Alternate algorithm to kekulize a molecule (OBMol::NewPerceiveKekuleBonds()).