ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/openbabel/parsmart.cpp
Revision: 1081
Committed: Thu Oct 19 20:49:05 2006 UTC (18 years, 6 months ago) by gezelter
File size: 97203 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# Content
1 /**********************************************************************
2 parsmart.cpp - SMARTS parser.
3
4 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
5 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
6
7 This file is part of the Open Babel project.
8 For more information, see <http://openbabel.sourceforge.net/>
9
10 This program is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation version 2 of the License.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18 ***********************************************************************/
19
20 #include <ctype.h>
21 #include <iostream>
22 #include <iomanip>
23
24 #include "mol.hpp"
25 #include "bitvec.hpp"
26 #include "parsmart.hpp"
27
28 #ifndef True
29 #define True 1
30 #define False 0
31 #endif
32
33 /* Strict syntax checking! */
34 // Currently causes problems with aromatic flags in atomtyp.txt
35 // #define STRICT
36 #define VERBOSE
37
38 #ifdef __sgi
39 #define UnusedArgument(x) ((x)=(x))
40 #else
41 #define UnusedArgument(x)
42 #endif
43
44 using namespace std;
45
46 namespace OpenBabel
47 {
48
49 /*! \class OBSmartsPattern
50
51 Substructure search is an incredibly useful tool in the context of a
52 small molecule programming library. Having an efficient substructure
53 search engine reduces the amount of hard code needed for molecule
54 perception, as well as increases the flexibility of certain
55 operations. For instance, atom typing can be easily performed based on
56 hard coded rules of element type and bond orders (or
57 hybridization). Alternatively, atom typing can also be done by
58 matching a set of substructure rules read at run time. In the latter
59 case customization based on application (such as changing the pH)
60 becomes a facile operation. Fortunately for Open Babel and its users,
61 Roger Sayle donated a SMARTS parser which became the basis for SMARTS
62 matching in Open Babel.
63
64 The SMARTS matcher, or OBSmartsPattern, is a separate object which can
65 match patterns in the OBMol class. The following code demonstrates how
66 to use the OBSmartsPattern class:
67 \code
68 OBMol mol;
69 ...
70 OBSmartsPattern sp;
71 sp.Init("CC");
72 sp.Match(mol);
73 vector<vector<int> > maplist;
74 maplist = sp.GetMapList();
75 //or maplist = sp.GetUMapList();
76 //print out the results
77 vector<vector<int> >::iterator i;
78 vector<int>::iterator j;
79 for (i = maplist.begin();i != maplist.end();i++)
80 {
81 for (j = i->begin();j != i->end();j++)
82 cout << j << ' `;
83 cout << endl;
84 }
85 \endcode
86 The preceding code reads in a molecule, initializes a smarts pattern
87 of two single-bonded carbons, and locates all instances of the
88 pattern in the molecule. Note that calling the Match() function
89 does not return the results of the substructure match. The results
90 from a match are stored in the OBSmartsPattern, and a call to
91 GetMapList() or GetUMapList() must be made to extract the
92 results. The function GetMapList() returns all matches of a
93 particular pattern while GetUMapList() returns only the unique
94 matches. For instance, the pattern [OD1]~C~[OD1] describes a
95 carboxylate group. This pattern will match both atom number
96 permutations of the carboxylate, and if GetMapList() is called, both
97 matches will be returned. If GetUMapList() is called only unique
98 matches of the pattern will be returned. A unique match is defined as
99 one which does not cover the identical atoms that a previous match
100 has covered.
101 */
102
103 /*============================*/
104 /* Period Table of Elements */
105 /*============================*/
106
107 std::vector<std::pair<Pattern*,std::vector<bool> > > RSCACHE; //recursive smarts cache
108
109 typedef struct
110 {
111 char *symbol;
112 int organic;
113 int aromflag;
114 double weight;
115 }
116 Element;
117
118 #define ELEMMAX 104
119
120 typedef struct
121 {
122 BondExpr *closord[100];
123 int closure[100];
124 int closindex;
125 }
126 ParseState;
127
128 /*
129 #define ATOMEXPRPOOL 16
130 #define BONDEXPRPOOL 16
131 #define ATOMPOOL 16
132 #define BONDPOOL 16
133 */
134
135 #define ATOMEXPRPOOL 1
136 #define BONDEXPRPOOL 1
137 #define ATOMPOOL 1
138 #define BONDPOOL 1
139
140 /*=====================*/
141 /* BondExpr Bit Sets */
142 /*=====================*/
143
144 #define BF_NONRINGUNSPEC 0x0001
145 #define BF_NONRINGDOWN 0x0002
146 #define BF_NONRINGUP 0x0004
147 #define BF_NONRINGDOUBLE 0x0008
148 #define BF_NONRINGTRIPLE 0x0010
149 #define BF_RINGUNSPEC 0x0020
150 #define BF_RINGDOWN 0x0040
151 #define BF_RINGUP 0x0080
152 #define BF_RINGAROM 0x0100
153 #define BF_RINGDOUBLE 0x0200
154 #define BF_RINGTRIPLE 0x0400
155
156 #define BS_ALL 0x07FF
157 #define BS_SINGLE 0x00E7
158 #define BS_DOUBLE 0x0208
159 #define BS_TRIPLE 0x0410
160 #define BS_AROM 0x0100
161 #define BS_UP 0x0084
162 #define BS_DOWN 0x0042
163 #define BS_UPUNSPEC 0x00A5
164 #define BS_DOWNUNSPEC 0x0063
165 #define BS_RING 0x07E0
166 #define BS_DEFAULT 0x01E7
167
168 static char *MainPtr;
169 static char *LexPtr;
170
171 #define BUFMAX 1024
172 static char Buffer[BUFMAX];
173 static char Descr[BUFMAX];
174
175 static bool match(OBMol &mol,Pattern *pat,std::vector<std::vector<int> > &mlist,bool single=false);
176 static bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom);
177 static bool EvalBondExpr(BondExpr *expr,OBBond *bond);
178 static int GetVectorBinding();
179 static int CreateAtom(Pattern*,AtomExpr*,int,int vb=0);
180
181 /*=============================*/
182 /* Standard Utility Routines */
183 /*=============================*/
184
185 static void FatalAllocationError( char *ptr )
186 {
187 printf("Error: Unable to allocate %s!\n",ptr);
188 // exit(1);
189 }
190
191 /*================================*/
192 /* Atom Expression Manipulation */
193 /*================================*/
194
195 static void FreePattern( Pattern* );
196 static Pattern *CopyPattern( Pattern* );
197
198 static AtomExpr *AllocAtomExpr( void )
199 {
200 register AtomExpr *result;
201
202 result = (AtomExpr*)malloc(sizeof(AtomExpr));
203 return result;
204 }
205
206 static AtomExpr *CopyAtomExpr( AtomExpr *expr )
207 {
208 register AtomExpr *result;
209
210 result = AllocAtomExpr();
211 result->type = expr->type;
212 switch( expr->type )
213 {
214 case(AE_ANDHI):
215 case(AE_ANDLO):
216 case(AE_OR): result->bin.lft = CopyAtomExpr(expr->bin.lft);
217 result->bin.rgt = CopyAtomExpr(expr->bin.rgt);
218 break;
219
220 case(AE_NOT): result->mon.arg = CopyAtomExpr(expr->mon.arg);
221 break;
222
223 case(AE_RECUR): result->recur.recur = CopyPattern(
224 (Pattern*)expr->recur.recur );
225 break;
226
227 case(AE_LEAF): result->leaf.prop = expr->leaf.prop;
228 result->leaf.value = expr->leaf.value;
229 break;
230 }
231 return result;
232 }
233
234 static void FreeAtomExpr( AtomExpr *expr )
235 {
236 if( expr )
237 {
238 switch( expr->type )
239 {
240 case(AE_ANDHI):
241 case(AE_ANDLO):
242 case(AE_OR): FreeAtomExpr(expr->bin.lft);
243 FreeAtomExpr(expr->bin.rgt);
244 break;
245
246 case(AE_NOT): FreeAtomExpr(expr->mon.arg);
247 break;
248
249 case(AE_RECUR): FreePattern( (Pattern*)expr->recur.recur );
250 break;
251 }
252 if (expr)
253 {
254 free(expr);
255 expr = (AtomExpr*)NULL;
256 }
257 }
258 }
259
260 static AtomExpr *BuildAtomLeaf( int prop, int val )
261 {
262 register AtomExpr *result;
263
264 result = AllocAtomExpr();
265 result->leaf.type = AE_LEAF;
266 result->leaf.prop = prop;
267 result->leaf.value = val;
268 return result;
269 }
270
271 static AtomExpr *BuildAtomNot( AtomExpr *expr )
272 {
273 register AtomExpr *result;
274
275 result = AllocAtomExpr();
276 result->mon.type = AE_NOT;
277 result->mon.arg = expr;
278 return result;
279 }
280
281 static AtomExpr *BuildAtomBin( int op, AtomExpr *lft, AtomExpr *rgt )
282 {
283 register AtomExpr *result;
284
285 result = AllocAtomExpr();
286 result->bin.type = op;
287 result->bin.lft = lft;
288 result->bin.rgt = rgt;
289 return result;
290 }
291
292 static AtomExpr *BuildAtomRecurs( Pattern *pat )
293 {
294 register AtomExpr *result;
295
296 result = AllocAtomExpr();
297 result->recur.type = AE_RECUR;
298 result->recur.recur = (void*)pat;
299 return result;
300 }
301
302 static AtomExpr *GenerateElement( int elem )
303 {
304 return BuildAtomLeaf(AL_ELEM,elem);
305 }
306
307 static AtomExpr *GenerateAromElem( int elem, int flag )
308 {
309 AtomExpr *expr1;
310 AtomExpr *expr2;
311
312 expr1 = BuildAtomLeaf(AL_AROM,flag);
313 expr2 = BuildAtomLeaf(AL_ELEM,elem);
314 return BuildAtomBin(AE_ANDHI,expr1,expr2);
315 }
316
317 static int IsInvalidAtom( AtomExpr *expr )
318 {
319 if( !expr )
320 return True;
321 return( (expr->type==AE_LEAF) &&
322 (expr->leaf.prop==AL_CONST)
323 && !expr->leaf.value );
324 }
325
326 /*================================*/
327 /* Bond Expression Manipulation */
328 /*================================*/
329
330 static BondExpr *AllocBondExpr( void )
331 {
332 register BondExpr *result;
333
334 result = (BondExpr*)malloc(sizeof(BondExpr));
335 return result;
336 }
337
338 static BondExpr *CopyBondExpr( BondExpr *expr )
339 {
340 register BondExpr *result;
341
342 result = AllocBondExpr();
343 result->type = expr->type;
344 switch( expr->type )
345 {
346 case(AE_ANDHI):
347 case(AE_ANDLO):
348 case(AE_OR): result->bin.lft = CopyBondExpr(expr->bin.lft);
349 result->bin.rgt = CopyBondExpr(expr->bin.rgt);
350 break;
351
352 case(AE_NOT): result->mon.arg = CopyBondExpr(expr->mon.arg);
353 break;
354
355 case(AE_LEAF): result->leaf.prop = expr->leaf.prop;
356 result->leaf.value = expr->leaf.value;
357 break;
358 }
359 return result;
360 }
361
362 static bool EquivalentBondExpr( BondExpr *expr1, BondExpr *expr2 )
363 {
364 if (expr1 == NULL && expr2 == NULL)
365 return true;
366 else if (expr1 == NULL && expr2 != NULL)
367 return false;
368 else if (expr1 != NULL && expr2 == NULL)
369 return false;
370
371 if (expr1->type != expr2->type)
372 return false;
373
374 bool result = false;
375 switch( expr1->type )
376 {
377 case(AE_ANDHI):
378 case(AE_ANDLO):
379 case(AE_OR):
380 result = (EquivalentBondExpr(expr1->bin.lft, expr2->bin.lft)) &&
381 (EquivalentBondExpr(expr1->bin.rgt, expr2->bin.rgt));
382 break;
383
384 case(AE_NOT):
385 result = EquivalentBondExpr(expr1->mon.arg, expr2->mon.arg);
386 break;
387
388 case(AE_LEAF):
389 result = (expr1->leaf.prop == expr2->leaf.prop) &&
390 (expr1->leaf.value == expr2->leaf.value);
391 break;
392 }
393 return result;
394 }
395
396 static void FreeBondExpr( BondExpr *expr )
397 {
398 if( expr )
399 {
400 switch( expr->type )
401 {
402 case(BE_ANDHI):
403 case(BE_ANDLO):
404 case(BE_OR): FreeBondExpr(expr->bin.lft);
405 FreeBondExpr(expr->bin.rgt);
406 break;
407
408 case(BE_NOT): FreeBondExpr(expr->mon.arg);
409 break;
410 }
411
412 if (expr)
413 {
414 free(expr);
415 expr = (BondExpr*)NULL;
416 }
417 }
418 }
419
420 static BondExpr *BuildBondLeaf( int prop, int val )
421 {
422 register BondExpr *result;
423
424 result = AllocBondExpr();
425 result->leaf.type = BE_LEAF;
426 result->leaf.prop = prop;
427 result->leaf.value = val;
428 return result;
429 }
430
431 static BondExpr *BuildBondNot( BondExpr *expr )
432 {
433 register BondExpr *result;
434
435 result = AllocBondExpr();
436 result->mon.type = BE_NOT;
437 result->mon.arg = expr;
438 return result;
439 }
440
441 static BondExpr *BuildBondBin( int op, BondExpr *lft, BondExpr *rgt )
442 {
443 register BondExpr *result;
444
445 result = AllocBondExpr();
446 result->bin.type = op;
447 result->bin.lft = lft;
448 result->bin.rgt = rgt;
449 return result;
450 }
451
452 static BondExpr *GenerateDefaultBond( void )
453 {
454 register BondExpr *expr1;
455 register BondExpr *expr2;
456
457 expr1 = BuildBondLeaf(BL_TYPE,BT_SINGLE);
458 expr2 = BuildBondLeaf(BL_TYPE,BT_AROM);
459 return(BuildBondBin(BE_OR,expr1,expr2));
460 }
461
462 /*===============================*/
463 /* SMARTS Pattern Manipulation */
464 /*===============================*/
465
466 static Pattern *AllocPattern( void )
467 {
468 Pattern *ptr;
469
470 ptr = (Pattern*)malloc(sizeof(Pattern));
471 if( !ptr )
472 FatalAllocationError("pattern");
473
474 ptr->atom = (AtomSpec*)0;
475 ptr->aalloc = 0;
476 ptr->acount = 0;
477
478 ptr->bond = (BondSpec*)0;
479 ptr->balloc = 0;
480 ptr->bcount = 0;
481
482 ptr->parts = 1;
483 return ptr;
484 }
485
486 static int CreateAtom( Pattern *pat, AtomExpr *expr, int part,int vb)
487 {
488 int index,size;
489
490 if( pat->acount == pat->aalloc )
491 {
492 pat->aalloc += ATOMPOOL;
493 size = (int)(pat->aalloc*sizeof(AtomSpec));
494 if( pat->atom )
495 {
496 pat->atom = (AtomSpec*)realloc(pat->atom,size);
497 }
498 else
499 pat->atom = (AtomSpec*)malloc(size);
500 if( !pat->atom )
501 FatalAllocationError("atom pool");
502 }
503
504 index = pat->acount++;
505 pat->atom[index].part = part;
506 pat->atom[index].expr = expr;
507 pat->atom[index].vb = vb; //std::vector binding
508
509 return index;
510 }
511
512 static int CreateBond( Pattern *pat, BondExpr *expr, int src, int dst )
513 {
514 int index,size;
515
516 if( pat->bcount == pat->balloc )
517 {
518 pat->balloc += BONDPOOL;
519 size = (int)(pat->balloc*sizeof(BondSpec));
520 if( pat->bond )
521 {
522 pat->bond = (BondSpec*)realloc(pat->bond,size);
523 }
524 else
525 pat->bond = (BondSpec*)malloc(size);
526 if( !pat->bond )
527 FatalAllocationError("bond pool");
528 }
529
530 index = pat->bcount++;
531 pat->bond[index].expr = expr;
532 pat->bond[index].src = src;
533 pat->bond[index].dst = dst;
534 return(index);
535 }
536
537 static Pattern *CopyPattern( Pattern *pat )
538 {
539 Pattern *result;
540 AtomExpr *aexpr;
541 BondExpr *bexpr;
542 int i;
543
544 result = AllocPattern();
545 result->parts = pat->parts;
546 for( i=0; i<pat->acount; i++ )
547 {
548 aexpr = CopyAtomExpr(pat->atom[i].expr);
549 CreateAtom(result,aexpr,pat->atom[i].part);
550 }
551
552 for( i=0; i<pat->bcount; i++ )
553 {
554 bexpr = CopyBondExpr(pat->bond[i].expr);
555 CreateBond(result,bexpr,pat->bond[i].src,pat->bond[i].dst);
556 }
557
558 return result;
559 }
560
561 static void FreePattern( Pattern *pat )
562 {
563 int i;
564
565 if( pat )
566 {
567 if( pat->aalloc )
568 {
569 for( i=0; i<pat->acount; i++ )
570 FreeAtomExpr(pat->atom[i].expr);
571 free(pat->atom);
572 }
573
574 if( pat->balloc )
575 {
576 for( i=0; i<pat->bcount; i++ )
577 FreeBondExpr(pat->bond[i].expr);
578 free(pat->bond);
579 }
580 free(pat);
581 }
582 }
583
584 /*=========================*/
585 /* SMARTS Syntax Parsing */
586 /*=========================*/
587
588 static Pattern *ParseSMARTSPattern( void );
589 static Pattern *ParseSMARTSPart( Pattern*, int );
590
591 static Pattern *SMARTSError( Pattern *pat )
592 {
593 /* char *ptr;
594
595 fprintf(stderr,"SMARTS Error: %s\n",MainPtr);
596
597 fputs(" ",stdout);
598 for( ptr=MainPtr; ptr<LexPtr; ptr++ )
599 fputc(' ',stdout);
600 fputs("^\n",stdout);
601 */
602
603 #ifdef HAVE_SSTREAM
604 stringstream errorMsg;
605 #else
606 strstream errorMsg;
607 #endif
608 errorMsg << "SMARTS Error:\n" << MainPtr << endl;
609 errorMsg << setw(LexPtr-MainPtr+1) << '^' << endl;
610 obErrorLog.ThrowError(__func__, errorMsg.str(), obError);
611
612 FreePattern(pat);
613 return (Pattern*)0;
614 }
615
616 static AtomExpr *ParseSimpleAtomPrimitive( void )
617 {
618 switch( *LexPtr++ )
619 {
620 case '*':
621 return BuildAtomLeaf(AL_CONST,True);
622 #ifndef STRICT
623 case 'A':
624 return BuildAtomLeaf(AL_AROM,False);
625 #endif
626
627 case 'B':
628 if( *LexPtr == 'r' )
629 {
630 LexPtr++;
631 return GenerateElement(35);
632 }
633 return GenerateElement(5);
634
635 case 'C':
636 if( *LexPtr == 'l' )
637 {
638 LexPtr++;
639 return GenerateElement(17);
640 }
641 return GenerateAromElem(6,False);
642
643 case 'F':
644 return GenerateElement( 9);
645 case 'I':
646 return GenerateElement(53);
647 case 'N':
648 return GenerateAromElem( 7,False);
649 case 'O':
650 return GenerateAromElem( 8,False);
651 case 'P':
652 return GenerateElement(15);
653 case 'S':
654 return GenerateAromElem(16,False);
655
656 #ifndef STRICT
657 case 'a':
658 if( *LexPtr == 's' )
659 {
660 LexPtr++;
661 return GenerateAromElem(33, true);
662 }
663 return BuildAtomLeaf(AL_AROM,True);
664 #endif
665
666 case 'c':
667 return GenerateAromElem( 6,True);
668 case 'n':
669 return GenerateAromElem( 7,True);
670 case 'o':
671 return GenerateAromElem( 8,True);
672 case 'p':
673 return GenerateAromElem(15,True);
674 case 's':
675 if( *LexPtr == 'e' )
676 {
677 LexPtr++;
678 return GenerateAromElem(34, true);
679 }
680 return GenerateAromElem(16,True);
681 }
682 LexPtr--;
683 return (AtomExpr*)0;
684 }
685
686 static AtomExpr *ParseComplexAtomPrimitive( void )
687 {
688 register Pattern *pat;
689 register int index;
690
691 switch( *LexPtr++ )
692 {
693 case('#'):
694 if( !isdigit(*LexPtr) )
695 return( (AtomExpr*)0 );
696
697 index = 0;
698 while( isdigit(*LexPtr) )
699 index = index*10 + ((*LexPtr++)-'0');
700 if( index > ELEMMAX )
701 {
702 LexPtr--;
703 return( (AtomExpr*)0 );
704 }
705 else if( !index )
706 return( (AtomExpr*)0 );
707 return( GenerateElement(index) );
708
709 case('$'):
710 if( *LexPtr != '(' )
711 return( (AtomExpr*)0 );
712 LexPtr++;
713 #ifdef STRICT
714 pat = ParseSMARTSPart(AllocPattern(),0);
715 #else
716 pat = ParseSMARTSPattern();
717 #endif
718
719 if( !pat )
720 return( (AtomExpr*)0 );
721 if( *LexPtr != ')' )
722 {
723 FreePattern(pat);
724 return( (AtomExpr*)0 );
725 }
726 LexPtr++;
727 return( BuildAtomRecurs(pat) );
728
729 case('*'):
730 return( BuildAtomLeaf(AL_CONST,True) );
731
732 case('+'):
733 if( isdigit(*LexPtr) )
734 {
735 index = 0;
736 while( isdigit(*LexPtr) )
737 index = index*10 + ((*LexPtr++)-'0');
738 }
739 else
740 {
741 index = 1;
742 while( *LexPtr == '+' )
743 {
744 LexPtr++;
745 index++;
746 }
747 }
748 return( BuildAtomLeaf(AL_POSITIVE,index) );
749
750 case('-'):
751 if( isdigit(*LexPtr) )
752 {
753 index = 0;
754 while( isdigit(*LexPtr) )
755 index = index*10 + ((*LexPtr++)-'0');
756 }
757 else
758 {
759 index = 1;
760 while( *LexPtr == '-' )
761 {
762 LexPtr++;
763 index++;
764 }
765 }
766 return BuildAtomLeaf(AL_NEGATIVE,index);
767
768 case '@':
769 if (*LexPtr == '?')
770 {
771 LexPtr++;
772 return(BuildAtomLeaf(AL_CHIRAL, 0)); // unspecified
773 }
774 else if (*LexPtr != '@')
775 return(BuildAtomLeaf(AL_CHIRAL,AL_ANTICLOCKWISE));
776 else
777 {
778 LexPtr++;
779 return(BuildAtomLeaf(AL_CHIRAL,AL_CLOCKWISE));
780 }
781
782 case '^':
783 if (isdigit(*LexPtr))
784 {
785 index = 0;
786 while( isdigit(*LexPtr) )
787 index = index*10 + ((*LexPtr++)-'0');
788 return(BuildAtomLeaf(AL_HYB,index));
789 }
790 else
791 return(BuildAtomLeaf(AL_HYB,1));
792
793 case('0'): case('1'): case('2'): case('3'): case('4'):
794 case('5'): case('6'): case('7'): case('8'): case('9'):
795 index = LexPtr[-1]-'0';
796 while( isdigit(*LexPtr) )
797 index = index*10 + ((*LexPtr++)-'0');
798 return BuildAtomLeaf(AL_MASS,index);
799
800 case('A'):
801 switch( *LexPtr++ )
802 {
803 case('c'): return GenerateElement(89);
804 case('g'): return GenerateElement(47);
805 case('l'): return GenerateElement(13);
806 case('m'): return GenerateElement(95);
807 case('r'): return GenerateElement(18);
808 case('s'): return GenerateElement(33);
809 case('t'): return GenerateElement(85);
810 case('u'): return GenerateElement(79);
811 }
812 LexPtr--;
813 return BuildAtomLeaf(AL_AROM,False);
814
815 case('B'):
816 switch( *LexPtr++ )
817 {
818 case('a'): return GenerateElement(56);
819 case('e'): return GenerateElement( 4);
820 case('i'): return GenerateElement(83);
821 case('k'): return GenerateElement(97);
822 case('r'): return GenerateElement(35);
823 }
824 LexPtr--;
825 return GenerateElement(5);
826
827 case('C'):
828 switch( *LexPtr++ )
829 {
830 case('a'): return GenerateElement(20);
831 case('d'): return GenerateElement(48);
832 case('e'): return GenerateElement(58);
833 case('f'): return GenerateElement(98);
834 case('l'): return GenerateElement(17);
835 case('m'): return GenerateElement(96);
836 case('o'): return GenerateElement(27);
837 case('r'): return GenerateElement(24);
838 case('s'): return GenerateElement(55);
839 case('u'): return GenerateElement(29);
840 }
841 LexPtr--;
842 return GenerateAromElem(6,False);
843
844 case('D'):
845 if( *LexPtr == 'y' )
846 {
847 LexPtr++;
848 return GenerateElement(66);
849 }
850 else if( isdigit(*LexPtr) )
851 {
852 index = 0;
853 while( isdigit(*LexPtr) )
854 index = index*10 + ((*LexPtr++)-'0');
855 return BuildAtomLeaf(AL_DEGREE,index);
856 }
857 return BuildAtomLeaf(AL_DEGREE, 1);
858 break;
859
860 case('E'):
861 if( *LexPtr == 'r' )
862 {
863 LexPtr++;
864 return GenerateElement(68);
865 }
866 else if( *LexPtr == 's' )
867 {
868 LexPtr++;
869 return GenerateElement(99);
870 }
871 else if( *LexPtr == 'u' )
872 {
873 LexPtr++;
874 return GenerateElement(63);
875 }
876 break;
877
878 case('F'):
879 if( *LexPtr == 'e' )
880 {
881 LexPtr++;
882 return GenerateElement(26);
883 }
884 else if( *LexPtr == 'm' )
885 {
886 LexPtr++;
887 return GenerateElement(100);
888 }
889 else if( *LexPtr == 'r' )
890 {
891 LexPtr++;
892 return GenerateElement(87);
893 }
894 return GenerateElement(9);
895
896 case('G'):
897 if( *LexPtr == 'a' )
898 {
899 LexPtr++;
900 return( GenerateElement(31) );
901 }
902 else if( *LexPtr == 'd' )
903 {
904 LexPtr++;
905 return( GenerateElement(64) );
906 }
907 else if( *LexPtr == 'e' )
908 {
909 LexPtr++;
910 return( GenerateElement(32) );
911 }
912 break;
913
914 case('H'):
915 if( *LexPtr == 'e' )
916 {
917 LexPtr++;
918 return( GenerateElement( 2) );
919 }
920 else if( *LexPtr == 'f' )
921 {
922 LexPtr++;
923 return( GenerateElement(72) );
924 }
925 else if( *LexPtr == 'g' )
926 {
927 LexPtr++;
928 return( GenerateElement(80) );
929 }
930 else if( *LexPtr == 'o' )
931 {
932 LexPtr++;
933 return( GenerateElement(67) );
934 }
935 else if( isdigit(*LexPtr) )
936 {
937 index = 0;
938 while( isdigit(*LexPtr) )
939 index = index*10 + ((*LexPtr++)-'0');
940 return( BuildAtomLeaf(AL_HCOUNT,index) );
941 }
942 return( BuildAtomLeaf(AL_HCOUNT,1) );
943
944 case('I'):
945 if( *LexPtr == 'n' )
946 {
947 LexPtr++;
948 return( GenerateElement(49) );
949 }
950 else if( *LexPtr == 'r' )
951 {
952 LexPtr++;
953 return( GenerateElement(77) );
954 }
955 return( GenerateElement(53) );
956
957 case('K'):
958 if( *LexPtr == 'r' )
959 {
960 LexPtr++;
961 return( GenerateElement(36) );
962 }
963 return( GenerateElement(19) );
964
965 case('L'):
966 if( *LexPtr == 'a' )
967 {
968 LexPtr++;
969 return( GenerateElement( 57) );
970 }
971 else if( *LexPtr == 'i' )
972 {
973 LexPtr++;
974 return( GenerateElement( 3) );
975 }
976 else if( *LexPtr == 'r' )
977 {
978 LexPtr++;
979 return( GenerateElement(103) );
980 }
981 else if( *LexPtr == 'u' )
982 {
983 LexPtr++;
984 return( GenerateElement( 71) );
985 }
986 break;
987
988 case('M'):
989 if( *LexPtr == 'd' )
990 {
991 LexPtr++;
992 return( GenerateElement(101) );
993 }
994 else if( *LexPtr == 'g' )
995 {
996 LexPtr++;
997 return( GenerateElement( 12) );
998 }
999 else if( *LexPtr == 'n' )
1000 {
1001 LexPtr++;
1002 return( GenerateElement( 25) );
1003 }
1004 else if( *LexPtr == 'o' )
1005 {
1006 LexPtr++;
1007 return( GenerateElement( 42) );
1008 }
1009 break;
1010
1011 case('N'):
1012 switch( *LexPtr++ )
1013 {
1014 case('a'): return( GenerateElement( 11) );
1015 case('b'): return( GenerateElement( 41) );
1016 case('d'): return( GenerateElement( 60) );
1017 case('e'): return( GenerateElement( 10) );
1018 case('i'): return( GenerateElement( 28) );
1019 case('o'): return( GenerateElement(102) );
1020 case('p'): return( GenerateElement( 93) );
1021 }
1022 LexPtr--;
1023 return( GenerateAromElem(7,False) );
1024
1025 case('O'):
1026 if( *LexPtr == 's' )
1027 {
1028 LexPtr++;
1029 return( GenerateElement(76) );
1030 }
1031 return( GenerateAromElem(8,False) );
1032
1033 case('P'):
1034 switch( *LexPtr++ )
1035 {
1036 case('a'): return( GenerateElement(91) );
1037 case('b'): return( GenerateElement(82) );
1038 case('d'): return( GenerateElement(46) );
1039 case('m'): return( GenerateElement(61) );
1040 case('o'): return( GenerateElement(84) );
1041 case('r'): return( GenerateElement(59) );
1042 case('t'): return( GenerateElement(78) );
1043 case('u'): return( GenerateElement(94) );
1044 }
1045 LexPtr--;
1046 return( GenerateElement(15) );
1047
1048 case('R'):
1049 switch( *LexPtr++ )
1050 {
1051 case('a'): return( GenerateElement(88) );
1052 case('b'): return( GenerateElement(37) );
1053 case('e'): return( GenerateElement(75) );
1054 case('h'): return( GenerateElement(45) );
1055 case('n'): return( GenerateElement(86) );
1056 case('u'): return( GenerateElement(44) );
1057 }
1058 LexPtr--;
1059 if( isdigit(*LexPtr) )
1060 {
1061 index = 0;
1062 while( isdigit(*LexPtr) )
1063 index = index*10 + ((*LexPtr++)-'0');
1064 }
1065 else
1066 index = -1;
1067 return( BuildAtomLeaf(AL_RINGS,index) );
1068
1069 case('S'):
1070 switch( *LexPtr++ )
1071 {
1072 case('b'): return( GenerateElement(51) );
1073 case('c'): return( GenerateElement(21) );
1074 case('e'): return( GenerateElement(34) );
1075 case('i'): return( GenerateElement(14) );
1076 case('m'): return( GenerateElement(62) );
1077 case('n'): return( GenerateElement(50) );
1078 case('r'): return( GenerateElement(38) );
1079 }
1080 LexPtr--;
1081 return( GenerateAromElem(16,False) );
1082
1083 case('T'):
1084 switch( *LexPtr++ )
1085 {
1086 case('a'): return( GenerateElement(73) );
1087 case('b'): return( GenerateElement(65) );
1088 case('c'): return( GenerateElement(43) );
1089 case('e'): return( GenerateElement(52) );
1090 case('h'): return( GenerateElement(90) );
1091 case('i'): return( GenerateElement(22) );
1092 case('l'): return( GenerateElement(81) );
1093 case('m'): return( GenerateElement(69) );
1094 }
1095 LexPtr--;
1096 break;
1097
1098 case('U'): return( GenerateElement(92) );
1099 case('V'): return( GenerateElement(23) );
1100 case('W'): return( GenerateElement(74) );
1101
1102 case('X'):
1103 if( *LexPtr == 'e' )
1104 {
1105 LexPtr++;
1106 return( GenerateElement(54) );
1107 }
1108 else if( isdigit(*LexPtr) )
1109 {
1110 index = 0;
1111 while( isdigit(*LexPtr) )
1112 index = index*10 + ((*LexPtr++)-'0');
1113 return( BuildAtomLeaf(AL_CONNECT,index) );
1114 }
1115 return( BuildAtomLeaf(AL_CONNECT, 1) );
1116 break;
1117
1118 case('Y'):
1119 if( *LexPtr == 'b' )
1120 {
1121 LexPtr++;
1122 return( GenerateElement(70) );
1123 }
1124 return( GenerateElement(39) );
1125
1126 case('Z'):
1127 if( *LexPtr == 'n' )
1128 {
1129 LexPtr++;
1130 return GenerateElement(30);
1131 }
1132 else if( *LexPtr == 'r' )
1133 {
1134 LexPtr++;
1135 return GenerateElement(40);
1136 }
1137 break;
1138
1139 case('a'):
1140 if( *LexPtr == 's' )
1141 {
1142 LexPtr++;
1143 return GenerateAromElem(33,True);
1144 }
1145 return BuildAtomLeaf(AL_AROM,True);
1146
1147 case('c'): return GenerateAromElem(6,True);
1148
1149 case('h'):
1150 if( isdigit(*LexPtr) )
1151 {
1152 index = 0;
1153 while( isdigit(*LexPtr) )
1154 index = index*10 + ((*LexPtr++)-'0');
1155 }
1156 else
1157 index = 1;
1158 return BuildAtomLeaf(AL_IMPLICIT,index);
1159
1160 case('n'): return GenerateAromElem(7,True);
1161 case('o'): return GenerateAromElem(8,True);
1162 case('p'): return GenerateAromElem(15,True);
1163
1164 case('r'):
1165 if( isdigit(*LexPtr) )
1166 {
1167 index = 0;
1168 while( isdigit(*LexPtr) )
1169 index = index*10 + ((*LexPtr++)-'0');
1170 if( index == 0 )
1171 return BuildAtomLeaf(AL_RINGS,0);
1172 return BuildAtomLeaf(AL_SIZE,index);
1173 }
1174 return BuildAtomLeaf(AL_RINGS,-1);
1175
1176 case('s'):
1177 if( *LexPtr == 'e' )
1178 {
1179 LexPtr++;
1180 return GenerateAromElem(34,True);
1181 }
1182 return GenerateAromElem(16,True);
1183
1184 case('v'):
1185 if( isdigit(*LexPtr) )
1186 {
1187 index = 0;
1188 while( isdigit(*LexPtr) )
1189 index = index*10 + ((*LexPtr++)-'0');
1190 return BuildAtomLeaf(AL_VALENCE,index);
1191 }
1192 return BuildAtomLeaf(AL_VALENCE, 1);
1193 break;
1194 }
1195 LexPtr--;
1196 return (AtomExpr*)0;
1197 }
1198
1199 static AtomExpr *ParseAtomExpr( int level )
1200 {
1201 register AtomExpr *expr1;
1202 register AtomExpr *expr2;
1203 register char *prev;
1204
1205 switch( level )
1206 {
1207 case(0): /* Low Precedence Conjunction */
1208 if( !(expr1=ParseAtomExpr(1)) )
1209 return (AtomExpr*)0;
1210
1211 while( *LexPtr == ';' )
1212 {
1213 LexPtr++;
1214 if( !(expr2=ParseAtomExpr(1)) )
1215 {
1216 FreeAtomExpr(expr1);
1217 return (AtomExpr*)0;
1218 }
1219 expr1 = BuildAtomBin(AE_ANDLO,expr1,expr2);
1220 }
1221 return expr1;
1222
1223 case(1): /* Disjunction */
1224 if( !(expr1=ParseAtomExpr(2)) )
1225 return (AtomExpr*)0;
1226
1227 while( *LexPtr == ',' )
1228 {
1229 LexPtr++;
1230 if( !(expr2=ParseAtomExpr(2)) )
1231 {
1232 FreeAtomExpr(expr1);
1233 return( (AtomExpr*)0 );
1234 }
1235 expr1 = BuildAtomBin(AE_OR,expr1,expr2);
1236 }
1237 return( expr1 );
1238
1239 case(2): /* High Precedence Conjunction */
1240 if( !(expr1=ParseAtomExpr(3)) )
1241 return( (AtomExpr*)0 );
1242
1243 while( (*LexPtr!=']') && (*LexPtr!=';') &&
1244 (*LexPtr!=',') && *LexPtr )
1245 {
1246 if( *LexPtr=='&' )
1247 LexPtr++;
1248 prev = LexPtr;
1249 if( !(expr2=ParseAtomExpr(3)) )
1250 {
1251 if( prev != LexPtr )
1252 {
1253 FreeAtomExpr(expr1);
1254 return( (AtomExpr*)0 );
1255 }
1256 else
1257 return( expr1 );
1258 }
1259 expr1 = BuildAtomBin(AE_ANDHI,expr1,expr2);
1260 }
1261 return( expr1 );
1262
1263 case(3): /* Negation or Primitive */
1264 if( *LexPtr == '!' )
1265 {
1266 LexPtr++;
1267 if( !(expr1=ParseAtomExpr(3)) )
1268 return( (AtomExpr*)0 );
1269 return( BuildAtomNot(expr1) );
1270 }
1271 return( ParseComplexAtomPrimitive() );
1272 }
1273 return (AtomExpr*)0;
1274 }
1275
1276 static BondExpr *ParseBondPrimitive( void )
1277 {
1278 switch( *LexPtr++ )
1279 {
1280 case('-'): return BuildBondLeaf(BL_TYPE,BT_SINGLE);
1281 case('='): return BuildBondLeaf(BL_TYPE,BT_DOUBLE);
1282 case('#'): return BuildBondLeaf(BL_TYPE,BT_TRIPLE);
1283 case(':'): return BuildBondLeaf(BL_TYPE,BT_AROM);
1284 case('@'): return BuildBondLeaf(BL_TYPE,BT_RING);
1285 case('~'): return BuildBondLeaf(BL_CONST,True);
1286
1287 case('/'):
1288 if( *LexPtr == '?' )
1289 {
1290 LexPtr++;
1291 return BuildBondLeaf(BL_TYPE,BT_UPUNSPEC);
1292 }
1293 return BuildBondLeaf(BL_TYPE,BT_UP);
1294
1295 case('\\'):
1296 if( *LexPtr == '?' )
1297 {
1298 LexPtr++;
1299 return BuildBondLeaf(BL_TYPE,BT_DOWNUNSPEC);
1300 }
1301
1302 return BuildBondLeaf(BL_TYPE,BT_DOWN);
1303 }
1304 LexPtr--;
1305 return (BondExpr*)0;
1306 }
1307
1308 static BondExpr *ParseBondExpr( int level )
1309 {
1310 register BondExpr *expr1;
1311 register BondExpr *expr2;
1312 register char *prev;
1313
1314 switch( level )
1315 {
1316 case(0): /* Low Precedence Conjunction */
1317 if( !(expr1=ParseBondExpr(1)) )
1318 return (BondExpr*)0;
1319
1320 while( *LexPtr == ';' )
1321 {
1322 LexPtr++;
1323 if( !(expr2=ParseBondExpr(1)) )
1324 {
1325 FreeBondExpr(expr1);
1326 return (BondExpr*)0;
1327 }
1328 expr1 = BuildBondBin(BE_ANDLO,expr1,expr2);
1329 }
1330 return expr1;
1331
1332 case(1): /* Disjunction */
1333 if( !(expr1=ParseBondExpr(2)) )
1334 return (BondExpr*)0;
1335
1336 while( *LexPtr == ',' )
1337 {
1338 LexPtr++;
1339 if( !(expr2=ParseBondExpr(2)) )
1340 {
1341 FreeBondExpr(expr1);
1342 return (BondExpr*)0;
1343 }
1344 expr1 = BuildBondBin(BE_OR,expr1,expr2);
1345 }
1346 return expr1;
1347
1348 case(2): /* High Precedence Conjunction */
1349 if( !(expr1=ParseBondExpr(3)) )
1350 return (BondExpr*)0;
1351
1352 while( (*LexPtr!=']') && (*LexPtr!=';') &&
1353 (*LexPtr!=',') && *LexPtr )
1354 {
1355 if( *LexPtr == '&' )
1356 LexPtr++;
1357 prev = LexPtr;
1358 if( !(expr2=ParseBondExpr(3)) )
1359 {
1360 if( prev != LexPtr )
1361 {
1362 FreeBondExpr(expr1);
1363 return (BondExpr*)0;
1364 }
1365 else
1366 return expr1;
1367 }
1368 expr1 = BuildBondBin(BE_ANDHI,expr1,expr2);
1369 }
1370 return expr1;
1371
1372 case(3): /* Negation or Primitive */
1373 if( *LexPtr == '!' )
1374 {
1375 LexPtr++;
1376 if( !(expr1=ParseBondExpr(3)) )
1377 return (BondExpr*)0;
1378 return BuildBondNot(expr1);
1379 }
1380 return ParseBondPrimitive();
1381 }
1382 return (BondExpr*)0;
1383 }
1384
1385 static int GetVectorBinding()
1386 {
1387 int vb=0;
1388
1389 LexPtr++; //skip colon
1390 if(isdigit(*LexPtr))
1391 {
1392 vb = 0;
1393 while( isdigit(*LexPtr) )
1394 vb = vb*10 + ((*LexPtr++)-'0');
1395 }
1396
1397 return(vb);
1398 }
1399
1400 static Pattern *ParseSMARTSError( Pattern *pat, BondExpr *expr )
1401 {
1402 if( expr )
1403 FreeBondExpr(expr);
1404 return SMARTSError(pat);
1405 }
1406
1407 static Pattern *SMARTSParser( Pattern *pat, ParseState *stat,
1408 int prev, int part )
1409 {
1410 int vb = 0;
1411 register AtomExpr *aexpr;
1412 register BondExpr *bexpr;
1413 register int index;
1414
1415 bexpr = (BondExpr*)0;
1416
1417 while( *LexPtr )
1418 {
1419 switch( *LexPtr++ )
1420 {
1421 case('.'):
1422 // if( bexpr || (prev==-1) )
1423 return ParseSMARTSError(pat,bexpr);
1424 prev = -1;
1425 break;
1426
1427 case('-'): case('='): case('#'):
1428 case(':'): case('~'): case('@'):
1429 case('/'): case('\\'): case('!'):
1430 LexPtr--;
1431 if( (prev==-1) || bexpr )
1432 return ParseSMARTSError(pat,bexpr);
1433 if( !(bexpr=ParseBondExpr(0)) )
1434 return ParseSMARTSError(pat,bexpr);
1435 break;
1436
1437 case('('):
1438 #ifdef STRICT
1439 if( (prev==-1) || bexpr )
1440 {
1441 LexPtr--;
1442 return ParseSMARTSError(pat,bexpr);
1443 }
1444 pat = SMARTSParser(pat,stat,prev,part);
1445 if( !pat )
1446 return (Pattern*)0;
1447 #else /* STRICT */
1448
1449 if( bexpr )
1450 {
1451 LexPtr--;
1452 return ParseSMARTSError(pat,bexpr);
1453 }
1454 if( prev == -1 )
1455 {
1456 index = pat->acount;
1457 pat = SMARTSParser(pat,stat,-1,part);
1458 if( !pat )
1459 return( (Pattern*)0 );
1460 if( index == pat->acount )
1461 return ParseSMARTSError(pat,bexpr);
1462 prev = index;
1463 }
1464 else
1465 {
1466 pat = SMARTSParser(pat,stat,prev,part);
1467 if( !pat )
1468 return (Pattern*)0;
1469 }
1470 #endif /* STRICT */
1471
1472 if( *LexPtr != ')' )
1473 return ParseSMARTSError(pat,bexpr);
1474 LexPtr++;
1475 break;
1476
1477 case(')'): LexPtr--;
1478 if( (prev==-1) || bexpr )
1479 return ParseSMARTSError(pat,bexpr);
1480 return pat;
1481
1482 case('%'): if( prev == -1 )
1483 {
1484 LexPtr--;
1485 return ParseSMARTSError(pat,bexpr);
1486 }
1487
1488 if( isdigit(LexPtr[0]) && isdigit(LexPtr[1]) )
1489 {
1490 index = 10*(LexPtr[0]-'0') + (LexPtr[1]-'0');
1491 LexPtr += 2;
1492 }
1493 else
1494 return ParseSMARTSError(pat,bexpr);
1495
1496 if( stat->closure[index] == -1 )
1497 {
1498 stat->closord[index] = bexpr;
1499 stat->closure[index] = prev;
1500 }
1501 else if( stat->closure[index] != prev )
1502 {
1503 if( !bexpr ) {
1504 if (!stat->closord[index]) {
1505 bexpr = GenerateDefaultBond();
1506 FreeBondExpr(stat->closord[index]);
1507 } else
1508 bexpr = stat->closord[index];
1509 } else if (!EquivalentBondExpr(bexpr, stat->closord[index]))
1510 return ParseSMARTSError(pat,bexpr);
1511
1512 CreateBond(pat,bexpr,prev,stat->closure[index]);
1513 stat->closure[index] = -1;
1514 bexpr = (BondExpr*)0;
1515 }
1516 else
1517 return ParseSMARTSError(pat,bexpr);
1518 break;
1519
1520 case('0'): case('1'): case('2'):
1521 case('3'): case('4'): case('5'):
1522 case('6'): case('7'): case('8'):
1523 case('9'): LexPtr--;
1524 if( prev == -1 )
1525 return ParseSMARTSError(pat,bexpr);
1526 index = (*LexPtr++)-'0';
1527
1528 if( stat->closure[index] == -1 )
1529 {
1530 stat->closord[index] = bexpr;
1531 stat->closure[index] = prev;
1532 bexpr = (BondExpr*)0;
1533 }
1534 else if( stat->closure[index] != prev )
1535 {
1536 if( !bexpr ) {
1537 if (!stat->closord[index]) {
1538 bexpr = GenerateDefaultBond();
1539 FreeBondExpr(stat->closord[index]);
1540 } else
1541 bexpr = stat->closord[index];
1542 } else if (!EquivalentBondExpr(bexpr, stat->closord[index]))
1543 return ParseSMARTSError(pat,bexpr);
1544
1545 CreateBond(pat,bexpr,prev,stat->closure[index]);
1546 stat->closure[index] = -1;
1547 bexpr = (BondExpr*)0;
1548 }
1549 else
1550 return ParseSMARTSError(pat,bexpr);
1551 break;
1552
1553 case('['): aexpr = ParseAtomExpr(0);
1554 vb = (*LexPtr == ':') ? GetVectorBinding():0;
1555 if( !aexpr || (*LexPtr!=']') )
1556 return ParseSMARTSError(pat,bexpr);
1557 index = CreateAtom(pat,aexpr,part,vb);
1558 if( prev != -1 )
1559 {
1560 if( !bexpr )
1561 bexpr = GenerateDefaultBond();
1562 CreateBond(pat,bexpr,prev,index);
1563 bexpr = (BondExpr*)0;
1564 }
1565 prev = index;
1566 LexPtr++;
1567 break;
1568
1569 default:
1570 LexPtr--;
1571 aexpr = ParseSimpleAtomPrimitive();
1572 if( !aexpr )
1573 return ParseSMARTSError(pat,bexpr);
1574 index = CreateAtom(pat,aexpr,part);
1575 if( prev != -1 )
1576 {
1577 if( !bexpr )
1578 bexpr = GenerateDefaultBond();
1579 CreateBond(pat,bexpr,prev,index);
1580 bexpr = (BondExpr*)0;
1581 }
1582 prev = index;
1583 }
1584 }
1585
1586 if( (prev==-1) || bexpr )
1587 return ParseSMARTSError(pat,bexpr);
1588
1589 return pat;
1590 }
1591
1592 static void MarkGrowBonds(Pattern *pat)
1593 {
1594 int i;
1595 OBBitVec bv;
1596
1597 for (i = 0;i < pat->bcount;i++)
1598 {
1599 pat->bond[i].grow = (bv[pat->bond[i].src] && bv[pat->bond[i].dst])?
1600 false:true;
1601
1602 bv.SetBitOn(pat->bond[i].src);
1603 bv.SetBitOn(pat->bond[i].dst);
1604 }
1605 }
1606
1607 static int GetChiralFlag(AtomExpr *expr)
1608 {
1609 int size=0;
1610 #define OB_EVAL_STACKSIZE 40
1611
1612 AtomExpr *stack[OB_EVAL_STACKSIZE];
1613 memset(stack,'\0',sizeof(AtomExpr*)*OB_EVAL_STACKSIZE);
1614 #undef OB_EVAL_STACKSIZE
1615
1616 bool lftest=true;
1617
1618 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
1619 {
1620 switch (expr->type)
1621 {
1622 case AE_LEAF:
1623 if (expr->leaf.prop == AL_CHIRAL)
1624 return(expr->leaf.value);
1625 size--;
1626 break;
1627
1628 case AE_ANDHI:
1629 case AE_ANDLO:
1630
1631 if (stack[size+1] == expr->bin.rgt)
1632 size--;
1633 else if (stack[size+1] == expr->bin.lft)
1634 {
1635 if (lftest)
1636 {
1637 size++;
1638 stack[size] = expr->bin.rgt;
1639 }
1640 else
1641 size--;
1642 }
1643 else
1644 {
1645 size++;
1646 stack[size] = expr->bin.lft;
1647 }
1648 break;
1649
1650 case AE_OR:
1651
1652 if (stack[size+1] == expr->bin.rgt)
1653 size--;
1654 else if (stack[size+1] == expr->bin.lft)
1655 {
1656 if (!lftest)
1657 {
1658 size++;
1659 stack[size] = expr->bin.rgt;
1660 }
1661 else
1662 size--;
1663 }
1664 else
1665 {
1666 size++;
1667 stack[size] = expr->bin.lft;
1668 }
1669 break;
1670
1671 case AE_NOT:
1672 if (stack[size+1] != expr->mon.arg)
1673 {
1674 size++;
1675 stack[size] = expr->mon.arg;
1676 }
1677 else
1678 {
1679 lftest = !lftest;
1680 size--;
1681 }
1682 break;
1683
1684 case AE_RECUR:
1685 size--;
1686 break;
1687 }
1688 }
1689
1690 return((int)false);
1691 }
1692
1693 static Pattern *ParseSMARTSPart( Pattern *result, int part )
1694 {
1695 auto ParseState stat;
1696 int i,flag;
1697
1698 for( i=0; i<100; i++ )
1699 stat.closure[i] = -1;
1700
1701 result = SMARTSParser(result,&stat,-1,part);
1702
1703 flag = False;
1704 for( i=0; i<100; i++ )
1705 if( stat.closure[i] != -1 )
1706 {
1707 FreeBondExpr(stat.closord[i]);
1708 flag = True;
1709 }
1710
1711 if( result )
1712 {
1713 if( flag )
1714 return(SMARTSError(result));
1715 else
1716 {
1717 MarkGrowBonds(result);
1718 result->ischiral = false;
1719 for (i = 0;i < result->acount;i++)
1720 {
1721 result->atom[i].chiral_flag = GetChiralFlag(result->atom[i].expr);
1722 if (result->atom[i].chiral_flag)
1723 result->ischiral = true;
1724 }
1725 return(result);
1726 }
1727 }
1728 else
1729 return (Pattern*)0;
1730 }
1731
1732
1733 static Pattern *ParseSMARTSPattern( void )
1734 {
1735 Pattern *result;
1736 result = AllocPattern();
1737
1738 while( *LexPtr == '(' )
1739 {
1740 LexPtr++;
1741 result = ParseSMARTSPart(result,result->parts);
1742 if( !result )
1743 return (Pattern*)0;
1744 result->parts++;
1745
1746 if( *LexPtr != ')' )
1747 return SMARTSError(result);
1748 LexPtr++;
1749
1750 if( !*LexPtr || (*LexPtr==')') )
1751 return result;
1752
1753 if( *LexPtr != '.' )
1754 return SMARTSError(result);
1755 LexPtr++;
1756 }
1757
1758 return ParseSMARTSPart(result,0);
1759 }
1760
1761 static Pattern *ParseSMARTSString( char *ptr )
1762 {
1763 register Pattern *result;
1764
1765 if( !ptr || !*ptr )
1766 return (Pattern*)0;
1767
1768 LexPtr = MainPtr = ptr;
1769 result = ParseSMARTSPattern();
1770 if( result && *LexPtr )
1771 return SMARTSError(result);
1772 return result;
1773 }
1774
1775 Pattern *ParseSMARTSRecord( char *ptr )
1776 {
1777 register char *src,*dst;
1778
1779 src = ptr;
1780 while( *src && !isspace(*src) )
1781 src++;
1782
1783 if( isspace(*src) )
1784 {
1785 *src++ = '\0';
1786 while( isspace(*src) )
1787 src++;
1788 }
1789
1790 dst = Descr;
1791 while( *src && (dst<Descr+78) )
1792 {
1793 if( isspace(*src) )
1794 {
1795 *dst++ = ' ';
1796 while( isspace(*src) )
1797 src++;
1798 }
1799 else
1800 *dst++ = *src++;
1801 }
1802 *dst = '\0';
1803
1804 return ParseSMARTSString(Buffer);
1805 }
1806
1807 /*==============================*/
1808 /* SMARTS Component Traversal */
1809 /*==============================*/
1810
1811 static void TraverseSMARTS( Pattern *pat, int i )
1812 {
1813 register int j,k;
1814
1815 pat->atom[i].visit = True;
1816 for( j=0; j<pat->bcount; j++ )
1817 if( pat->bond[j].visit == -1 )
1818 {
1819 if( pat->bond[j].src == i )
1820 {
1821 pat->bond[j].visit = i;
1822 k = pat->bond[j].dst;
1823 if( !pat->atom[k].visit )
1824 TraverseSMARTS(pat,k);
1825 }
1826 else if( pat->bond[j].dst == i )
1827 {
1828 pat->bond[j].visit = i;
1829 k = pat->bond[j].src;
1830 if( !pat->atom[k].visit )
1831 TraverseSMARTS(pat,k);
1832 }
1833 }
1834 }
1835
1836 /*============================*/
1837 /* Canonical SMARTS Pattern */
1838 /*============================*/
1839
1840 static AtomExpr *NotAtomExpr( AtomExpr* );
1841 static AtomExpr *AndAtomExpr( AtomExpr*, AtomExpr* );
1842 static AtomExpr *OrAtomExpr( AtomExpr*, AtomExpr* );
1843 //static AtomExpr *TransformAtomExpr( AtomExpr* );
1844 //static Pattern *CanonicaliseSMARTS( Pattern* );
1845
1846 static int IsBooleanAtomLeaf( AtomExpr *expr )
1847 {
1848 return (expr->leaf.prop==AL_AROM) ||
1849 (expr->leaf.prop==AL_CONST);
1850 }
1851
1852 static int IsNegatingAtomLeaf( AtomExpr *expr )
1853 {
1854 return (expr->leaf.prop==AL_RINGS);
1855 }
1856
1857 static int EqualAtomExpr( AtomExpr *lft, AtomExpr *rgt )
1858 {
1859 if( lft->type != rgt->type )
1860 return False;
1861
1862 if( lft->type == AE_LEAF )
1863 {
1864 return( (lft->leaf.prop==rgt->leaf.prop) &&
1865 (lft->leaf.value==rgt->leaf.value) );
1866 }
1867 else if( lft->type == AE_NOT )
1868 {
1869 return EqualAtomExpr(lft->mon.arg,rgt->mon.arg);
1870 }
1871 else if( lft->type == AE_RECUR )
1872 return False;
1873
1874 return EqualAtomExpr(lft->bin.lft,rgt->bin.lft) &&
1875 EqualAtomExpr(lft->bin.rgt,rgt->bin.rgt);
1876 }
1877
1878 static int OrderAtomExpr( AtomExpr *lft, AtomExpr *rgt )
1879 {
1880 register AtomExpr *larg;
1881 register AtomExpr *rarg;
1882 register int stat;
1883
1884 if( lft->type == AE_NOT )
1885 { /* larg->type == AE_LEAF */
1886 larg = lft->mon.arg;
1887 }
1888 else
1889 larg = lft;
1890
1891 if( rgt->type == AE_NOT )
1892 { /* rarg->type == AE_LEAF */
1893 rarg = rgt->mon.arg;
1894 }
1895 else
1896 rarg = rgt;
1897
1898 if( larg->type > rarg->type )
1899 {
1900 return 1;
1901 }
1902 else if( larg->type < rarg->type )
1903 return -1;
1904
1905 if( larg->type == AE_LEAF )
1906 {
1907 if( larg->leaf.prop > rarg->leaf.prop )
1908 return 1;
1909 if( larg->leaf.prop < rarg->leaf.prop )
1910 return -1;
1911 return( larg->leaf.value - rarg->leaf.value );
1912 }
1913
1914 stat = OrderAtomExpr(lft->bin.lft,rgt->bin.lft);
1915 if( stat != 0 )
1916 return stat;
1917 return OrderAtomExpr(lft->bin.rgt,rgt->bin.rgt);
1918 }
1919
1920 static int AtomLeafConflict( AtomExpr *lft, AtomExpr *rgt )
1921 {
1922 register AtomExpr *tmp;
1923
1924 if( (lft->type==AE_LEAF) && (rgt->type==AE_LEAF) )
1925 {
1926 if( lft->leaf.prop == rgt->leaf.prop )
1927 {
1928 if( IsNegatingAtomLeaf(lft) )
1929 {
1930 if( lft->leaf.value == 0 )
1931 {
1932 return rgt->leaf.value != 0;
1933 }
1934 else if( lft->leaf.value == -1 )
1935 return rgt->leaf.value == 0;
1936
1937 if( rgt->leaf.value == 0 )
1938 {
1939 return lft->leaf.value != 0;
1940 }
1941 else if( rgt->leaf.value == -1 )
1942 return lft->leaf.value == 0;
1943 }
1944 return lft->leaf.value != rgt->leaf.value;
1945 }
1946
1947 if( lft->leaf.prop > rgt->leaf.prop )
1948 {
1949 tmp = lft;
1950 lft = rgt;
1951 rgt = tmp;
1952 }
1953
1954 /* Aromaticity -> Ring */
1955 if( (lft->leaf.prop==AL_AROM) && (rgt->leaf.prop==AL_RINGS) )
1956 return( lft->leaf.value && !rgt->leaf.value );
1957
1958 /* Positive charge ~ Negative charge */
1959 if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1960 return( (lft->leaf.value!=0) || (rgt->leaf.value!=0) );
1961
1962 /* Total hcount >= Implicit hcount */
1963 if( (lft->leaf.prop==AL_HCOUNT) && (rgt->leaf.prop==AL_IMPLICIT) )
1964 return( lft->leaf.value < rgt->leaf.value );
1965 }
1966
1967 if( (lft->type==AE_LEAF) && (rgt->type==AE_NOT) )
1968 {
1969 rgt = rgt->mon.arg;
1970 if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1971 return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1972 if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
1973 return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1974 return False;
1975 }
1976
1977 if( (lft->type==AE_NOT) && (rgt->type==AE_LEAF) )
1978 {
1979 lft = lft->mon.arg;
1980 if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
1981 return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1982 if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
1983 return( (lft->leaf.value==0) && (rgt->leaf.value==0) );
1984 return False;
1985 }
1986
1987 return False;
1988 }
1989
1990 static int AtomExprConflict( AtomExpr *lft, AtomExpr *rgt )
1991 {
1992 while( rgt->type == AE_ANDHI )
1993 {
1994 if( AtomLeafConflict(lft,rgt->bin.lft) )
1995 return True;
1996 rgt = rgt->bin.rgt;
1997 }
1998 return AtomLeafConflict(lft,rgt);
1999 }
2000
2001 /* return LEAF(lft) => LEAF(rgt); */
2002 static int AtomLeafImplies( AtomExpr *lft, AtomExpr *rgt )
2003 {
2004 if( (lft->type==AE_LEAF) && (rgt->type==AE_LEAF) )
2005 { /* Implied Ring Membership */
2006 if( (rgt->leaf.prop==AL_RINGS) && (rgt->leaf.value==-1) )
2007 {
2008 if( lft->leaf.prop == AL_AROM )
2009 return lft->leaf.value;
2010
2011 if( lft->leaf.prop == AL_RINGS )
2012 return lft->leaf.value > 0;
2013
2014 if( lft->leaf.prop == AL_SIZE )
2015 return lft->leaf.value > 0;
2016 }
2017
2018 /* Positive charge ~ Negative charge */
2019 if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
2020 return (lft->leaf.value==0) && (rgt->leaf.value==0);
2021 return False;
2022 }
2023
2024 if( (lft->type==AE_LEAF) && (rgt->type==AE_NOT) )
2025 {
2026 rgt = rgt->mon.arg;
2027 if( lft->leaf.prop == rgt->leaf.prop )
2028 return lft->leaf.value != rgt->leaf.value;
2029
2030 if( (lft->leaf.prop==AL_POSITIVE) && (rgt->leaf.prop==AL_NEGATIVE) )
2031 return True;
2032 if( (lft->leaf.prop==AL_NEGATIVE) && (rgt->leaf.prop==AL_POSITIVE) )
2033 return True;
2034 return False;
2035 }
2036
2037 return False;
2038 }
2039
2040 /* return EXPR(rgt) => LEAF(lft); */
2041 static int AtomExprImplied( AtomExpr *lft, AtomExpr *rgt )
2042 {
2043 while( rgt->type == AE_ANDHI )
2044 {
2045 if( AtomLeafImplies(rgt->bin.lft,lft) )
2046 return True;
2047 rgt = rgt->bin.rgt;
2048 }
2049 return AtomLeafImplies(rgt,lft);
2050 }
2051
2052 /* remove implied nodes from EXPR(rgt) */
2053 static AtomExpr *AtomExprImplies( AtomExpr *lft, AtomExpr *rgt )
2054 {
2055 register AtomExpr *tmp;
2056
2057 if( rgt->type != AE_ANDHI )
2058 {
2059 if( AtomLeafImplies(lft,rgt) )
2060 {
2061 FreeAtomExpr(rgt);
2062 return (AtomExpr*)0;
2063 }
2064 return rgt;
2065 }
2066
2067 tmp = AtomExprImplies(lft,rgt->bin.rgt);
2068
2069 if( tmp )
2070 {
2071 if( AtomLeafImplies(lft,rgt->bin.lft) )
2072 {
2073 rgt->bin.rgt = (AtomExpr*)0;
2074 FreeAtomExpr(rgt);
2075 return tmp;
2076 }
2077 rgt->bin.rgt = tmp;
2078 return rgt;
2079 }
2080 else
2081 {
2082 rgt->bin.rgt = (AtomExpr*)0;
2083 if( AtomLeafImplies(lft,rgt->bin.lft) )
2084 {
2085 FreeAtomExpr(rgt);
2086 return (AtomExpr*)0;
2087 }
2088 tmp = rgt->bin.lft;
2089 rgt->bin.lft = (AtomExpr*)0;
2090 FreeAtomExpr(rgt);
2091 return tmp;
2092 }
2093 }
2094
2095 static AtomExpr *AndAtomExprLeaf( AtomExpr *lft, AtomExpr *rgt )
2096 {
2097 if( AtomExprConflict(lft,rgt) )
2098 {
2099 FreeAtomExpr(lft);
2100 FreeAtomExpr(rgt);
2101 return BuildAtomLeaf(AL_CONST,False);
2102 }
2103
2104 if( AtomExprImplied(lft,rgt) )
2105 {
2106 FreeAtomExpr(lft);
2107 return rgt;
2108 }
2109
2110 rgt = AtomExprImplies(lft,rgt);
2111 if( !rgt )
2112 return lft;
2113
2114 return BuildAtomBin(AE_ANDHI,lft,rgt);
2115 }
2116
2117 static AtomExpr *ConstrainRecursion( AtomExpr *recur, AtomExpr *expr )
2118 {
2119 register AtomExpr *head;
2120 register Pattern *pat;
2121
2122 pat = (Pattern*)recur->recur.recur;
2123 head = AndAtomExpr(pat->atom[0].expr,expr);
2124 pat->atom[0].expr = head;
2125
2126 if( IsInvalidAtom(head) )
2127 {
2128 FreePattern(pat);
2129 return BuildAtomLeaf(AL_CONST,False);
2130 }
2131 return recur;
2132 }
2133
2134 static AtomExpr *AndAtomExpr( AtomExpr *lft, AtomExpr *rgt )
2135 {
2136 register AtomExpr *expr;
2137 register int order;
2138
2139 /* Identities */
2140 if( EqualAtomExpr(lft,rgt) )
2141 {
2142 FreeAtomExpr(rgt);
2143 return lft;
2144 }
2145
2146 if( (lft->type==AE_LEAF) && (lft->leaf.prop==AL_CONST) )
2147 {
2148 if( lft->leaf.value )
2149 {
2150 FreeAtomExpr(lft);
2151 return rgt;
2152 }
2153 else
2154 {
2155 FreeAtomExpr(rgt);
2156 return lft;
2157 }
2158 }
2159
2160 if( (rgt->type==AE_LEAF) && (rgt->leaf.prop==AL_CONST) )
2161 {
2162 if( rgt->leaf.value )
2163 {
2164 FreeAtomExpr(rgt);
2165 return lft;
2166 }
2167 else
2168 {
2169 FreeAtomExpr(lft);
2170 return rgt;
2171 }
2172 }
2173
2174 /* Distributivity */
2175 if( lft->type == AE_OR )
2176 {
2177 expr = CopyAtomExpr(rgt);
2178 expr = OrAtomExpr(AndAtomExpr(expr,lft->bin.lft),
2179 AndAtomExpr(rgt, lft->bin.rgt));
2180 lft->bin.lft = (AtomExpr*)0;
2181 lft->bin.rgt = (AtomExpr*)0;
2182 FreeAtomExpr(lft);
2183 return( expr );
2184 }
2185
2186 if( rgt->type == AE_OR )
2187 {
2188 expr = CopyAtomExpr(lft);
2189 expr = OrAtomExpr(AndAtomExpr(expr,rgt->bin.lft),
2190 AndAtomExpr(lft, rgt->bin.rgt));
2191 rgt->bin.lft = (AtomExpr*)0;
2192 rgt->bin.rgt = (AtomExpr*)0;
2193 FreeAtomExpr(rgt);
2194 return( expr );
2195 }
2196
2197 /* Recursion */
2198 if( (rgt->type==AE_RECUR) && (lft->type!=AE_RECUR) )
2199 return ConstrainRecursion(rgt,lft);
2200
2201 if( (rgt->type!=AE_RECUR) && (lft->type==AE_RECUR) )
2202 return ConstrainRecursion(lft,rgt);
2203
2204 order = OrderAtomExpr(lft,rgt);
2205 if( order > 0 )
2206 {
2207 expr = lft;
2208 lft = rgt;
2209 rgt = expr;
2210 }
2211
2212 if( lft->type == AE_ANDHI )
2213 {
2214 expr = AndAtomExpr(lft->bin.rgt,rgt);
2215 expr = AndAtomExpr(lft->bin.lft,expr);
2216 lft->bin.lft = (AtomExpr*)0;
2217 lft->bin.rgt = (AtomExpr*)0;
2218 FreeAtomExpr(lft);
2219 return expr;
2220 }
2221
2222 if( rgt->type == AE_ANDHI )
2223 {
2224 if( OrderAtomExpr(lft,rgt->bin.lft) > 0 )
2225 {
2226 expr = AndAtomExpr(lft,rgt->bin.rgt);
2227 expr = AndAtomExpr(rgt->bin.lft,expr);
2228 rgt->bin.lft = (AtomExpr*)0;
2229 rgt->bin.rgt = (AtomExpr*)0;
2230 FreeAtomExpr(rgt);
2231 return expr;
2232 }
2233
2234 if( EqualAtomExpr(lft,rgt->bin.lft) )
2235 {
2236 FreeAtomExpr(lft);
2237 return rgt;
2238 }
2239 }
2240
2241 return AndAtomExprLeaf(lft,rgt);
2242 }
2243
2244 static AtomExpr *OrAtomExprLeaf( AtomExpr *lft, AtomExpr *rgt )
2245 {
2246 return BuildAtomBin(AE_OR,lft,rgt);
2247 }
2248
2249 static AtomExpr *OrAtomExpr( AtomExpr *lft, AtomExpr *rgt )
2250 {
2251 register AtomExpr *expr;
2252 register int order;
2253
2254 /* Identities */
2255 if( EqualAtomExpr(lft,rgt) )
2256 {
2257 FreeAtomExpr(rgt);
2258 return lft;
2259 }
2260
2261 if( (lft->type==AE_LEAF) && (lft->leaf.prop==AL_CONST) )
2262 {
2263 if( lft->leaf.value )
2264 {
2265 FreeAtomExpr(rgt);
2266 return lft;
2267 }
2268 else
2269 {
2270 FreeAtomExpr(lft);
2271 return rgt;
2272 }
2273 }
2274
2275 if( (rgt->type==AE_LEAF) && (rgt->leaf.prop==AL_CONST) )
2276 {
2277 if( rgt->leaf.value )
2278 {
2279 FreeAtomExpr(lft);
2280 return rgt;
2281 }
2282 else
2283 {
2284 FreeAtomExpr(rgt);
2285 return lft;
2286 }
2287 }
2288
2289 order = OrderAtomExpr(lft,rgt);
2290 if( order > 0 )
2291 {
2292 expr = lft;
2293 lft = rgt;
2294 rgt = expr;
2295 }
2296
2297 if( lft->type == AE_OR )
2298 {
2299 expr = OrAtomExpr(lft->bin.rgt,rgt);
2300 expr = OrAtomExpr(lft->bin.lft,expr);
2301 lft->bin.lft = (AtomExpr*)0;
2302 lft->bin.rgt = (AtomExpr*)0;
2303 FreeAtomExpr(lft);
2304 return expr;
2305 }
2306
2307 if( rgt->type == AE_OR )
2308 {
2309 if( OrderAtomExpr(lft,rgt->bin.lft) > 0 )
2310 {
2311 expr = OrAtomExpr(lft,rgt->bin.rgt);
2312 expr = OrAtomExpr(rgt->bin.lft,expr);
2313 rgt->bin.lft = (AtomExpr*)0;
2314 rgt->bin.rgt = (AtomExpr*)0;
2315 FreeAtomExpr(rgt);
2316 return expr;
2317 }
2318
2319 if( EqualAtomExpr(lft,rgt->bin.lft) )
2320 {
2321 FreeAtomExpr(lft);
2322 return rgt;
2323 }
2324 }
2325
2326 return OrAtomExprLeaf(lft,rgt);
2327 }
2328
2329 static AtomExpr *NotAtomExpr( AtomExpr *expr )
2330 {
2331 register AtomExpr *result;
2332 register AtomExpr *lft;
2333 register AtomExpr *rgt;
2334
2335 if( expr->type == AE_LEAF )
2336 {
2337 if( IsBooleanAtomLeaf(expr) )
2338 {
2339 expr->leaf.value = !expr->leaf.value;
2340 return expr;
2341 }
2342 else if( IsNegatingAtomLeaf(expr) )
2343 {
2344 if( expr->leaf.value == -1 )
2345 {
2346 expr->leaf.value = 0;
2347 return expr;
2348 }
2349 else if( expr->leaf.value == 0 )
2350 {
2351 expr->leaf.value = -1;
2352 return expr;
2353 }
2354 }
2355 }
2356 else if( expr->type == AE_NOT )
2357 {
2358 result = expr->mon.arg;
2359 expr->mon.arg = (AtomExpr*)0;
2360 FreeAtomExpr(expr);
2361 return result;
2362 }
2363 else if( (expr->type==AE_ANDHI) ||
2364 (expr->type==AE_ANDLO) )
2365 {
2366 lft = NotAtomExpr(expr->bin.lft);
2367 rgt = NotAtomExpr(expr->bin.rgt);
2368 expr->bin.lft = (AtomExpr*)0;
2369 expr->bin.rgt = (AtomExpr*)0;
2370 FreeAtomExpr(expr);
2371 return OrAtomExpr(lft,rgt);
2372 }
2373 else if( expr->type == AE_OR )
2374 {
2375 lft = NotAtomExpr(expr->bin.lft);
2376 rgt = NotAtomExpr(expr->bin.rgt);
2377 expr->bin.lft = (AtomExpr*)0;
2378 expr->bin.rgt = (AtomExpr*)0;
2379 FreeAtomExpr(expr);
2380 return AndAtomExpr(lft,rgt);
2381 }
2382 return BuildAtomNot(expr);
2383 }
2384
2385 /*==============================*/
2386 /* Canonical Bond Expressions */
2387 /*==============================*/
2388
2389 static int GetBondLeafIndex( BondExpr *expr )
2390 {
2391 if( expr->leaf.prop == BL_CONST )
2392 {
2393 if( expr->leaf.value )
2394 {
2395 return( BS_ALL );
2396 }
2397 else
2398 return( 0 );
2399 }
2400 else /* expr->leaf.prop == BL_TYPE */
2401 switch( expr->leaf.value )
2402 {
2403 case(BT_SINGLE): return( BS_SINGLE );
2404 case(BT_DOUBLE): return( BS_DOUBLE );
2405 case(BT_TRIPLE): return( BS_TRIPLE );
2406 case(BT_AROM): return( BS_AROM );
2407 case(BT_UP): return( BS_UP );
2408 case(BT_DOWN): return( BS_DOWN );
2409 case(BT_UPUNSPEC): return( BS_UPUNSPEC );
2410 case(BT_DOWNUNSPEC): return( BS_DOWNUNSPEC );
2411 case(BT_RING): return( BS_RING );
2412 }
2413 return 0;
2414 }
2415
2416 static int GetBondExprIndex( BondExpr *expr )
2417 {
2418 register int lft,rgt;
2419 register int arg;
2420
2421 switch( expr->type )
2422 {
2423 case(BE_LEAF): return GetBondLeafIndex(expr);
2424
2425 case(BE_NOT): arg = GetBondExprIndex(expr->mon.arg);
2426 return( arg ^ BS_ALL );
2427
2428 case(BE_ANDHI):
2429 case(BE_ANDLO): lft = GetBondExprIndex(expr->bin.lft);
2430 rgt = GetBondExprIndex(expr->bin.rgt);
2431 return( lft & rgt );
2432
2433 case(BE_OR): lft = GetBondExprIndex(expr->bin.lft);
2434 rgt = GetBondExprIndex(expr->bin.rgt);
2435 return( lft | rgt );
2436 }
2437 /* Avoid Compiler Warning */
2438 return 0;
2439 }
2440
2441 static BondExpr *NotBondExpr( BondExpr *expr )
2442 {
2443 register BondExpr *result;
2444
2445 if( expr->type == BE_LEAF )
2446 {
2447 if( expr->leaf.prop == BL_CONST )
2448 {
2449 expr->leaf.value = !expr->leaf.value;
2450 return expr;
2451 }
2452 }
2453 else if( expr->type == BE_NOT )
2454 {
2455 result = expr->mon.arg;
2456 expr->mon.arg = (BondExpr*)0;
2457 FreeBondExpr(expr);
2458 return result;
2459 }
2460 return BuildBondNot(expr);
2461 }
2462
2463 static BondExpr *TransformBondExpr( BondExpr *expr )
2464 {
2465 register BondExpr *lft,*rgt;
2466 register BondExpr *arg;
2467
2468 if( expr->type == BE_LEAF )
2469 {
2470 return expr;
2471 }
2472 else if( expr->type == BE_NOT )
2473 {
2474 arg = expr->mon.arg;
2475 arg = TransformBondExpr(arg);
2476 expr->mon.arg = (BondExpr*)0;
2477 FreeBondExpr(expr);
2478 return NotBondExpr(arg);
2479 }
2480 else if( expr->type == BE_ANDHI )
2481 {
2482 lft = expr->bin.lft;
2483 rgt = expr->bin.rgt;
2484 lft = TransformBondExpr(lft);
2485 rgt = TransformBondExpr(rgt);
2486 expr->bin.lft = lft;
2487 expr->bin.rgt = rgt;
2488 return expr;
2489 }
2490 else if( expr->type == BE_ANDLO )
2491 {
2492 lft = expr->bin.lft;
2493 rgt = expr->bin.rgt;
2494 lft = TransformBondExpr(lft);
2495 rgt = TransformBondExpr(rgt);
2496 expr->bin.lft = lft;
2497 expr->bin.rgt = rgt;
2498 return expr;
2499 }
2500 else if( expr->type == BE_OR )
2501 {
2502 lft = expr->bin.lft;
2503 rgt = expr->bin.rgt;
2504 lft = TransformBondExpr(lft);
2505 rgt = TransformBondExpr(rgt);
2506 expr->bin.lft = lft;
2507 expr->bin.rgt = rgt;
2508 return expr;
2509 }
2510 return expr;
2511 }
2512
2513 #ifdef FOO
2514 static BondExpr *CanonicaliseBond( BondExpr *expr )
2515 {
2516 #ifndef ORIG
2517 register int index;
2518
2519 index = GetBondExprIndex(expr);
2520 FreeBondExpr(expr);
2521
2522 LexPtr = CanBondExpr[index];
2523 if( *LexPtr )
2524 {
2525 expr = ParseBondExpr(0);
2526 }
2527 else
2528 expr = GenerateDefaultBond();
2529 #endif
2530
2531 return TransformBondExpr(expr);
2532 }
2533 #endif
2534
2535
2536 //**********************************
2537 //********Pattern Matching**********
2538 //**********************************
2539
2540 bool OBSmartsPattern::Init(const char *buffer)
2541 {
2542 strncpy(Buffer,buffer, sizeof(Buffer) - 1);
2543 Buffer[sizeof(Buffer) - 1] = '\0';
2544
2545 _pat = ParseSMARTSRecord(Buffer);
2546 _str = buffer;
2547
2548 return(_pat != (Pattern*)NULL);
2549 }
2550
2551 bool OBSmartsPattern::Init(const std::string &s)
2552 {
2553 strncpy(Buffer, s.c_str(), sizeof(Buffer) - 1);
2554 Buffer[sizeof(Buffer) - 1] = '\0';
2555
2556 _pat = ParseSMARTSRecord(Buffer);
2557 _str = s;
2558
2559 return(_pat != (Pattern*)NULL);
2560 }
2561
2562 OBSmartsPattern::~OBSmartsPattern()
2563 {
2564 if (_pat)
2565 FreePattern(_pat);
2566 }
2567
2568 bool OBSmartsPattern::Match(OBMol &mol,bool single)
2569 {
2570 RSCACHE.clear();
2571 return(match(mol,_pat,_mlist,single));
2572 }
2573
2574 bool OBSmartsPattern::RestrictedMatch(OBMol &mol,std::vector<std::pair<int,int> > &pr,bool single)
2575 {
2576 bool ok;
2577 std::vector<std::vector<int> > mlist;
2578 std::vector<std::vector<int> >::iterator i;
2579 std::vector<std::pair<int,int> >::iterator j;
2580
2581 RSCACHE.clear();
2582 match(mol,_pat,mlist);
2583 _mlist.clear();
2584 if (mlist.empty())
2585 return(false);
2586
2587 for (i = mlist.begin();i != mlist.end();i++)
2588 {
2589 ok = true;
2590 for (j = pr.begin();j != pr.end() && ok;j++)
2591 if ((*i)[j->first] != j->second)
2592 ok = false;
2593
2594 if (ok)
2595 _mlist.push_back(*i);
2596 if (single && !_mlist.empty())
2597 return(true);
2598 }
2599
2600 return((_mlist.empty()) ? false:true);
2601 }
2602
2603 bool OBSmartsPattern::RestrictedMatch(OBMol &mol,OBBitVec &vres,bool single)
2604 {
2605 bool ok;
2606 std::vector<int>::iterator j;
2607 std::vector<std::vector<int> > mlist;
2608 std::vector<std::vector<int> >::iterator i;
2609
2610 RSCACHE.clear();
2611 match(mol,_pat,mlist);
2612
2613 _mlist.clear();
2614 if (mlist.empty())
2615 return(false);
2616
2617 for (i = mlist.begin();i != mlist.end();i++)
2618 {
2619 ok = true;
2620 for (j = i->begin();j != i->end();j++)
2621 if (!vres[*j])
2622 {
2623 ok = false;
2624 break;
2625 }
2626 if (!ok)
2627 continue;
2628
2629 _mlist.push_back(*i);
2630 if (single && !_mlist.empty())
2631 return(true);
2632 }
2633
2634 return((_mlist.empty()) ? false:true);
2635 }
2636
2637 void SetupAtomMatchTable(std::vector<std::vector<bool> > &ttab,Pattern *pat,OBMol &mol)
2638 {
2639 int i;
2640
2641 ttab.resize(pat->acount);
2642 for (i = 0;i < pat->acount;i++)
2643 ttab[i].resize(mol.NumAtoms()+1);
2644
2645 OBAtom *atom;
2646 std::vector<OBNodeBase*>::iterator j;
2647 for (i = 0;i < pat->acount;i++)
2648 for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
2649 if (EvalAtomExpr(pat->atom[0].expr,atom))
2650 ttab[i][atom->GetIdx()] = true;
2651 }
2652
2653 static void FastSingleMatch(OBMol &mol,Pattern *pat,std::vector<std::vector<int> > &mlist)
2654 {
2655 OBAtom *atom,*a1,*nbr;
2656 std::vector<OBNodeBase*>::iterator i;
2657
2658 OBBitVec bv(mol.NumAtoms()+1);
2659 std::vector<int> map;
2660 map.resize(pat->acount);
2661 std::vector<std::vector<OBEdgeBase*>::iterator> vi;
2662 std::vector<bool> vif;
2663
2664 if (pat->bcount)
2665 {
2666 vif.resize(pat->bcount);
2667 vi.resize(pat->bcount);
2668 }
2669
2670 int bcount;
2671 for (atom = mol.BeginAtom(i);atom;atom=mol.NextAtom(i))
2672 if (EvalAtomExpr(pat->atom[0].expr,atom))
2673 {
2674 map[0] = atom->GetIdx();
2675 if (pat->bcount)
2676 vif[0] = false;
2677 bv.Clear();
2678 bv.SetBitOn(atom->GetIdx());
2679
2680 for (bcount=0;bcount >=0;)
2681 {
2682 //***entire pattern matched***
2683 if (bcount == pat->bcount) //save full match here
2684 {
2685 mlist.push_back(map);
2686 bcount--;
2687 return; //found a single match
2688 }
2689
2690 //***match the next bond***
2691 if (!pat->bond[bcount].grow) //just check bond here
2692 {
2693 if ( !vif[bcount] )
2694 {
2695 OBBond *bond = mol.GetBond(map[pat->bond[bcount].src],
2696 map[pat->bond[bcount].dst]);
2697 if (bond && EvalBondExpr(pat->bond[bcount].expr,bond))
2698 {
2699 vif[bcount++] = true;
2700 if (bcount < pat->bcount)
2701 vif[bcount] = false;
2702 }
2703 else
2704 bcount--;
2705 }
2706 else //bond must have already been visited - backtrack
2707 bcount--;
2708 }
2709 else //need to map atom and check bond
2710 {
2711 a1 = mol.GetAtom(map[pat->bond[bcount].src]);
2712
2713 if (!vif[bcount]) //figure out which nbr atom we are mapping
2714 {
2715 nbr = a1->BeginNbrAtom(vi[bcount]);
2716 }
2717 else
2718 {
2719 bv.SetBitOff(map[pat->bond[bcount].dst]);
2720 nbr = a1->NextNbrAtom(vi[bcount]);
2721 }
2722
2723 for (;nbr;nbr=a1->NextNbrAtom(vi[bcount]))
2724 if (!bv[nbr->GetIdx()])
2725 if (EvalAtomExpr(pat->atom[pat->bond[bcount].dst].expr,nbr)
2726 && EvalBondExpr(pat->bond[bcount].expr,(OBBond *)*(vi[bcount])))
2727 {
2728 bv.SetBitOn(nbr->GetIdx());
2729 map[pat->bond[bcount].dst] = nbr->GetIdx();
2730 vif[bcount] = true;
2731 bcount++;
2732 if (bcount < pat->bcount)
2733 vif[bcount] = false;
2734 break;
2735 }
2736
2737 if (!nbr)//no match - time to backtrack
2738 bcount--;
2739 }
2740 }
2741 }
2742 }
2743
2744
2745 static bool match(OBMol &mol,Pattern *pat,std::vector<std::vector<int> > &mlist,bool single)
2746 {
2747 mlist.clear();
2748 if (!pat || pat->acount == 0)
2749 return(false);//shouldn't ever happen
2750
2751 if (single && !pat->ischiral)
2752 FastSingleMatch(mol,pat,mlist);
2753 else
2754 {
2755 OBSSMatch ssm(mol,pat);
2756 ssm.Match(mlist);
2757 }
2758
2759 if (pat->ischiral && mol.Has3D())
2760 {
2761 int j,k,r1,r2,r3,r4;
2762 std::vector<std::vector<int> >::iterator m;
2763 OBAtom *ra1,*ra2,*ra3,*ra4;
2764 std::vector<std::vector<int> > tmpmlist;
2765
2766 for (j = 0;j < pat->acount;j++)
2767 if (pat->atom[j].chiral_flag)
2768 {
2769 r1 = r2 = r3 = r4 = -1;
2770 r2 = j;
2771 for (k = 0;k < pat->bcount;k++)
2772 if (pat->bond[k].dst == r2)
2773 if (r1 == -1)
2774 r1 = pat->bond[k].src;
2775 else if (r3 == -1)
2776 r3 = pat->bond[k].src;
2777 else if (r4 == -1)
2778 r4 = pat->bond[k].src;
2779
2780 for (k = 0;k < pat->bcount;k++)
2781 if (pat->bond[k].src == r2)
2782 if (r1 == -1)
2783 r1 = pat->bond[k].dst;
2784 else if (r3 == -1)
2785 r3 = pat->bond[k].dst;
2786 else if (r4 == -1)
2787 r4 = pat->bond[k].dst;
2788
2789 if (r1 == -1 || r2 == -1 || r3 == -1 || r4 == -1)
2790 continue;
2791
2792 tmpmlist.clear();
2793 for (m = mlist.begin();m != mlist.end();m++)
2794 {
2795 ra1 = mol.GetAtom((*m)[r1]);
2796 ra2 = mol.GetAtom((*m)[r2]);
2797 ra3 = mol.GetAtom((*m)[r3]);
2798 ra4 = mol.GetAtom((*m)[r4]);
2799 double sign = CalcTorsionAngle(ra1->GetVector(),
2800 ra2->GetVector(),
2801 ra3->GetVector(),
2802 ra4->GetVector());
2803 if (sign > 0.0 && pat->atom[j].chiral_flag == AL_ANTICLOCKWISE)
2804 continue;
2805 if (sign < 0.0 && pat->atom[j].chiral_flag == AL_CLOCKWISE)
2806 continue;
2807
2808 //ok - go ahead and save it
2809 tmpmlist.push_back(*m);
2810 }
2811 mlist = tmpmlist;
2812 }
2813 }
2814
2815 return(!mlist.empty());
2816 }
2817
2818 #define RECURSIVE
2819
2820 #ifdef RECURSIVE
2821 static bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom)
2822 {
2823 for (;;)
2824 switch (expr->type)
2825 {
2826 case AE_LEAF:
2827 switch( expr->leaf.prop )
2828 {
2829 case AL_ELEM:
2830 return(expr->leaf.value == (int)atom->GetAtomicNum());
2831 case AL_AROM:
2832 if( !expr->leaf.value )
2833 return !atom->IsAromatic();
2834 return atom->IsAromatic();
2835 case AL_HCOUNT:
2836 return (expr->leaf.value==(signed int)atom->ExplicitHydrogenCount() + (signed int)atom->ImplicitHydrogenCount());
2837 case AL_DEGREE:
2838 return(expr->leaf.value == (int)atom->GetValence());
2839 case AL_VALENCE:
2840 return(expr->leaf.value == (int)atom->KBOSum());
2841 case AL_CONNECT:
2842 return(expr->leaf.value == (int)atom->GetImplicitValence());
2843 case AL_NEGATIVE:
2844 return(expr->leaf.value == -(atom->GetFormalCharge()));
2845 case AL_POSITIVE:
2846 return(expr->leaf.value == atom->GetFormalCharge());
2847 case AL_HYB:
2848 return(expr->leaf.value == (int)atom->GetHyb());
2849
2850 case AL_RINGS:
2851 if( expr->leaf.value == -1 )
2852 return atom->IsInRing();
2853 else if( expr->leaf.value == 0 )
2854 return !atom->IsInRing();
2855 else
2856 return expr->leaf.value == (int)atom->MemberOfRingCount();
2857
2858 case AL_SIZE:
2859 if( expr->leaf.value == -1 )
2860 return atom->IsInRing();
2861 if (!expr->leaf.value)
2862 return !atom->IsInRing();
2863 else
2864 return atom->IsInRingSize(expr->leaf.value);
2865
2866 case AL_IMPLICIT:
2867 return expr->leaf.value == (signed int)atom->ImplicitHydrogenCount();
2868
2869 case AL_CONST:
2870 if( !expr->leaf.value )
2871 return false;
2872 return(true);
2873
2874 case AL_CHIRAL:
2875 if( expr->leaf.value == AL_CLOCKWISE)
2876 return atom->IsClockwise();
2877 else if ( expr->leaf.value == AL_ANTICLOCKWISE)
2878 return atom->IsAntiClockwise();
2879 else if ( expr->leaf.value == 0) // unspecified
2880 return (atom->IsChiral() && !atom->HasChiralitySpecified());
2881
2882 default:
2883 return false;
2884 }
2885
2886 case AE_NOT:
2887 return(!EvalAtomExpr(expr->mon.arg,atom));
2888 case AE_ANDHI: /* Same as AE_ANDLO */
2889 case AE_ANDLO:
2890 if( !EvalAtomExpr(expr->bin.lft,atom))
2891 return(false);
2892 expr = expr->bin.rgt;
2893 break;
2894 case AE_OR:
2895 if(EvalAtomExpr(expr->bin.lft,atom))
2896 return(true);
2897 expr = expr->bin.rgt;
2898 break;
2899
2900 case AE_RECUR:
2901 {
2902 //see if pattern has been matched
2903 std::vector<std::pair<Pattern*,std::vector<bool> > >::iterator i;
2904 for (i = RSCACHE.begin();i != RSCACHE.end();i++)
2905 if (i->first == (Pattern*)expr->recur.recur)
2906 return(i->second[atom->GetIdx()]);
2907
2908 //perceive and match pattern
2909 std::vector<std::vector<int> >::iterator j;
2910 std::vector<bool> vb(((OBMol*) atom->GetParent())->NumAtoms()+1);
2911 std::vector<std::vector<int> > mlist;
2912 if (match( *((OBMol *) atom->GetParent()),
2913 (Pattern*)expr->recur.recur,mlist))
2914 for (j = mlist.begin();j != mlist.end();j++)
2915 vb[(*j)[0]] = true;
2916
2917 RSCACHE.push_back(std::pair<Pattern*,std::vector<bool> > ((Pattern*)expr->recur.recur,vb));
2918
2919 return(vb[atom->GetIdx()]);
2920 }
2921
2922 default:
2923 return(false);
2924 }
2925 }
2926
2927 #else
2928
2929 static bool EvalAtomExpr(AtomExpr *expr,OBAtom *atom)
2930 {
2931 int size=0;
2932 #define OB_EVAL_STACKSIZE 40
2933
2934 AtomExpr *stack[OB_EVAL_STACKSIZE];
2935 memset(stack,'\0',sizeof(AtomExpr*)*OB_EVAL_STACKSIZE);
2936 #undef OB_EVAL_STACKSIZE
2937
2938 bool lftest=true;
2939
2940 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
2941 {
2942 switch (expr->type)
2943 {
2944 case AE_LEAF:
2945 switch( expr->leaf.prop )
2946 {
2947 //expr->leaf.value
2948 case AL_ELEM:
2949 lftest = (expr->leaf.value == atom->GetAtomicNum());
2950 break;
2951 case AL_AROM:
2952 lftest = (expr->leaf.value == (int)atom->IsAromatic());
2953 break;
2954 case AL_HCOUNT:
2955 if (atom->ExplicitHydrogenCount() > atom->ImplicitHydrogenCount())
2956 lftest=(expr->leaf.value==atom->ExplicitHydrogenCount());
2957 else
2958 lftest=(expr->leaf.value==atom->ImplicitHydrogenCount());
2959 break;
2960 case AL_DEGREE:
2961 lftest = (expr->leaf.value == atom->GetHvyValence());
2962 break;
2963 case AL_VALENCE:
2964 lftest = (expr->leaf.value == atom->BOSum());
2965 break;
2966 case AL_CONNECT: //X
2967 lftest = (expr->leaf.value == atom->GetImplicitValence());
2968 break;
2969 case AL_NEGATIVE:
2970 lftest=(expr->leaf.value == -1*(atom->GetFormalCharge()));
2971 break;
2972 case AL_POSITIVE:
2973 lftest=(expr->leaf.value == atom->GetFormalCharge());
2974 break;
2975 case AL_HYB:
2976 lftest=(expr->leaf.value == atom->GetHyb());
2977 break;
2978 case AL_RINGS:
2979 if (expr->leaf.value == -1)
2980 lftest = (atom->IsInRing());
2981 else
2982 if (expr->leaf.value == 0)
2983 lftest = !(atom->IsInRing());
2984 else
2985 lftest=(atom->MemberOfRingCount()==expr->leaf.value);
2986 break;
2987 case AL_SIZE:
2988 if (!expr->leaf.value)
2989 lftest = !atom->IsInRing();
2990 else
2991 lftest = atom->IsInRingSize(expr->leaf.value);
2992 break;
2993
2994 case AL_IMPLICIT:
2995 lftest=(expr->leaf.value==atom->ImplicitHydrogenCount());
2996 break;
2997 case AL_CONST:
2998 lftest= true; // not limited to non-hydrogens
2999 break;
3000 case AL_MASS:
3001 break;
3002 default:
3003 break;
3004 }
3005 size--;
3006 break;
3007
3008 case AE_ANDHI:
3009
3010 if (stack[size+1] == expr->bin.rgt)
3011 size--;
3012 else if (stack[size+1] == expr->bin.lft)
3013 {
3014 if (lftest)
3015 {
3016 size++;
3017 stack[size] = expr->bin.rgt;
3018 }
3019 else
3020 size--;
3021 }
3022 else
3023 {
3024 size++;
3025 stack[size] = expr->bin.lft;
3026 }
3027 break;
3028
3029 case AE_OR:
3030
3031 if (stack[size+1] == expr->bin.rgt)
3032 size--;
3033 else if (stack[size+1] == expr->bin.lft)
3034 {
3035 if (!lftest)
3036 {
3037 size++;
3038 stack[size] = expr->bin.rgt;
3039 }
3040 else
3041 size--;
3042 }
3043 else
3044 {
3045 size++;
3046 stack[size] = expr->bin.lft;
3047 }
3048 break;
3049
3050 case AE_ANDLO:
3051
3052 if (stack[size+1] == expr->bin.rgt)
3053 size--;
3054 else if (stack[size+1] == expr->bin.lft)
3055 {
3056 if (lftest)
3057 {
3058 size++;
3059 stack[size] = expr->bin.rgt;
3060 }
3061 else
3062 size--;
3063 }
3064 else
3065 {
3066 size++;
3067 stack[size] = expr->bin.lft;
3068 }
3069 break;
3070
3071 case AE_NOT:
3072 if (stack[size+1] != expr->mon.arg)
3073 {
3074 size++;
3075 stack[size] = expr->mon.arg;
3076 }
3077 else
3078 {
3079 lftest = !lftest;
3080 size--;
3081 }
3082 break;
3083
3084 case AE_RECUR:
3085 //see if pattern has been matched
3086 bool matched=false;
3087
3088 std::vector<std::pair<Pattern*,std::vector<bool> > >::iterator i;
3089 for (i = RSCACHE.begin();i != RSCACHE.end();i++)
3090 if (i->first == (Pattern*)expr->recur.recur)
3091 {
3092 lftest = i->second[atom->GetIdx()];
3093 matched = true;
3094 break;
3095 }
3096
3097 if (!matched)
3098 {
3099 std::vector<bool> vb(atom->GetParent()->NumAtoms()+1);
3100 std::vector<std::vector<int> > mlist;
3101 lftest = false;
3102 if (match((*atom->GetParent()),(Pattern*)expr->recur.recur,mlist))
3103 {
3104 std::vector<std::vector<int> >::iterator i;
3105 for (i = mlist.begin();i != mlist.end();i++)
3106 {
3107 if ((*i)[0] == atom->GetIdx())
3108 lftest = true;
3109 vb[(*i)[0]] = true;
3110 }
3111 }
3112 RSCACHE.push_back(std::pair<Pattern*,std::vector<bool> > ((Pattern*)expr->recur.recur,vb));
3113 }
3114
3115 size--;
3116 break;
3117 }
3118 }
3119
3120 return(lftest);
3121 }
3122 #endif
3123
3124 #ifdef RECURSIVE
3125
3126 static bool EvalBondExpr(BondExpr *expr,OBBond *bond)
3127 {
3128 for (;;)
3129 switch( expr->type )
3130 {
3131 case BE_LEAF:
3132
3133 if( expr->leaf.prop == BL_CONST )
3134 return((expr->leaf.value != 0) ? true : false);
3135 else
3136 switch( expr->leaf.value )
3137 {
3138 case BT_SINGLE:
3139 return(bond->GetBO() == 1 && !bond->IsAromatic());
3140 case BT_AROM:
3141 return(bond->IsAromatic());
3142 case BT_DOUBLE:
3143 return(bond->GetBO()==2 && !bond->IsAromatic());
3144 case BT_TRIPLE:
3145 return(bond->GetBO()==3);
3146 case BT_RING:
3147 return(bond->IsInRing());
3148 case BT_UP:
3149 return(bond->IsUp());
3150 case BT_DOWN:
3151 return(bond->IsDown());
3152 case BT_UPUNSPEC: // up or unspecified (i.e., not down)
3153 return(!bond->IsDown());
3154 case BT_DOWNUNSPEC: // down or unspecified (i.e., not up)
3155 return(!bond->IsUp());
3156 default:
3157 return(false);
3158 }
3159
3160
3161 case BE_NOT:
3162 return(!EvalBondExpr(expr->mon.arg,bond));
3163 case BE_ANDHI:
3164 case BE_ANDLO:
3165 if (!EvalBondExpr(expr->bin.lft,bond))
3166 return(false);
3167 expr = expr->bin.rgt;
3168 break;
3169
3170 case BE_OR:
3171 if (EvalBondExpr(expr->bin.lft,bond))
3172 return(true);
3173 expr = expr->bin.rgt;
3174 break;
3175 default:
3176 return false;
3177 }
3178 }
3179
3180 #else
3181
3182 static bool EvalBondExpr(BondExpr *expr,OBBond *bond)
3183 {
3184 int size=0;
3185 #define OB_EVAL_STACKSIZE 40
3186
3187 BondExpr *stack[OB_EVAL_STACKSIZE];
3188 memset(stack,'\0',sizeof(AtomExpr*)*OB_EVAL_STACKSIZE);
3189 #undef OB_EVAL_STACKSIZE
3190
3191 bool lftest=true;
3192 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3193 switch( expr->type )
3194 {
3195 case(BE_LEAF):
3196
3197 if( expr->leaf.prop == BL_CONST )
3198 lftest = (expr->leaf.value)?true:false;
3199 else /* expr->leaf.prop == BL_TYPE */
3200 switch( expr->leaf.value )
3201 {
3202 case(BT_SINGLE):
3203 lftest = (bond->GetBO() == 1 && !bond->IsAromatic());
3204 break;
3205 case(BT_DOUBLE):
3206 lftest = (bond->GetBO()==2 && !bond->IsAromatic());
3207 break;
3208 case(BT_TRIPLE):
3209 lftest = (bond->GetBO()==3);
3210 break;
3211 case(BT_AROM): lftest=bond->IsAromatic();
3212 break;
3213 case(BT_RING): lftest=bond->IsInRing();
3214 break;
3215 case(BT_UP): lftest= (bond->IsUp() && bond->GetBO()==1 && !bond->IsAromatic());
3216 break;
3217 case(BT_DOWN): lftest= (bond->IsDown() && bond->GetBO()==1 && !bond->IsAromatic());
3218 break;
3219 case(BT_UPUNSPEC): lftest= !bond->IsDown();
3220 break;
3221 case(BT_DOWNUNSPEC): lftest= !bond->IsUp();
3222 break;
3223 }
3224 size--;
3225 break;
3226
3227 case(BE_NOT):
3228 if (stack[size+1] != expr->mon.arg)
3229 {
3230 size++;
3231 stack[size] = expr->mon.arg;
3232 }
3233 else
3234 {
3235 lftest = !lftest;
3236 size--;
3237 }
3238 break;
3239
3240 case(BE_ANDHI):
3241 if (stack[size+1] == expr->bin.rgt)
3242 size--;
3243 else if (stack[size+1] == expr->bin.lft)
3244 {
3245 if (lftest)
3246 {
3247 size++;
3248 stack[size] = expr->bin.rgt;
3249 }
3250 else
3251 size--;
3252 }
3253 else
3254 {
3255 size++;
3256 stack[size] = expr->bin.lft;
3257 }
3258 break;
3259
3260 case(BE_ANDLO):
3261 if (stack[size+1] == expr->bin.rgt)
3262 size--;
3263 else if (stack[size+1] == expr->bin.lft)
3264 {
3265 if (lftest)
3266 {
3267 size++;
3268 stack[size] = expr->bin.rgt;
3269 }
3270 else
3271 size--;
3272 }
3273 else
3274 {
3275 size++;
3276 stack[size] = expr->bin.lft;
3277 }
3278 break;
3279
3280 case(BE_OR):
3281 if (stack[size+1] == expr->bin.rgt)
3282 size--;
3283 else if (stack[size+1] == expr->bin.lft)
3284 {
3285 if (!lftest)
3286 {
3287 size++;
3288 stack[size] = expr->bin.rgt;
3289 }
3290 else
3291 size--;
3292 }
3293 else
3294 {
3295 size++;
3296 stack[size] = expr->bin.lft;
3297 }
3298 break;
3299 }
3300 return(lftest);
3301 }
3302 #endif
3303
3304 std::vector<std::vector<int> > &OBSmartsPattern::GetUMapList()
3305 {
3306 if (_mlist.empty() || _mlist.size() == 1)
3307 return(_mlist);
3308
3309 bool ok;
3310 OBBitVec bv;
3311 std::vector<OBBitVec> vbv;
3312 std::vector<std::vector<int> > mlist;
3313 std::vector<std::vector<int> >::iterator i;
3314 std::vector<OBBitVec>::iterator j;
3315
3316 for (i = _mlist.begin();i != _mlist.end();i++)
3317 {
3318 ok = true;
3319 bv.Clear();
3320 bv.FromVecInt(*i);
3321 for (j = vbv.begin();j != vbv.end() && ok;j++)
3322 if ((*j) == bv)
3323 ok = false;
3324
3325 if (ok)
3326 {
3327 mlist.push_back(*i);
3328 vbv.push_back(bv);
3329 }
3330 }
3331
3332 _mlist = mlist;
3333 return(_mlist);
3334 }
3335
3336 void OBSmartsPattern::WriteMapList(ostream &ofs)
3337 {
3338 std::vector<std::vector<int> >::iterator i;
3339 std::vector<int>::iterator j;
3340
3341 for ( i = _mlist.begin() ; i != _mlist.end() ; i++ )
3342 {
3343 for (j = (*i).begin();j != (*i).end();j++)
3344 ofs << *j << ' ' << ends;
3345 ofs << endl;
3346 }
3347 }
3348
3349 //*******************************************************************
3350 // The OBSSMatch class performs exhaustive matching using recursion
3351 // Explicit stack handling is used to find just a single match in
3352 // match()
3353 //*******************************************************************
3354
3355 OBSSMatch::OBSSMatch(OBMol &mol,Pattern *pat)
3356 {
3357 _mol = &mol;
3358 _pat = pat;
3359 _map.resize(pat->acount);
3360
3361 if (!mol.Empty())
3362 {
3363 _uatoms = new bool [mol.NumAtoms()+1];
3364 memset((char*)_uatoms,'\0',sizeof(bool)*(mol.NumAtoms()+1));
3365 }
3366 else
3367 _uatoms = (bool*)NULL;
3368 }
3369
3370 OBSSMatch::~OBSSMatch()
3371 {
3372 if (_uatoms)
3373 delete [] _uatoms;
3374 }
3375
3376 void OBSSMatch::Match(std::vector<std::vector<int> > &mlist,int bidx)
3377 {
3378 if (bidx == -1)
3379 {
3380 OBAtom *atom;
3381 std::vector<OBNodeBase*>::iterator i;
3382 for (atom = _mol->BeginAtom(i);atom;atom = _mol->NextAtom(i))
3383 if (EvalAtomExpr(_pat->atom[0].expr,atom))
3384 {
3385 _map[0] = atom->GetIdx();
3386 _uatoms[atom->GetIdx()] = true;
3387 Match(mlist,0);
3388 _map[0] = 0;
3389 _uatoms[atom->GetIdx()] = false;
3390 }
3391 return;
3392 }
3393
3394 if (bidx == _pat->bcount) //save full match here
3395 {
3396 mlist.push_back(_map);
3397 return;
3398 }
3399
3400 if (_pat->bond[bidx].grow) //match the next bond
3401 {
3402 int src,dst;
3403 src = _pat->bond[bidx].src;
3404 dst = _pat->bond[bidx].dst;
3405
3406 if (_map[src] <= 0 || _map[src] > _mol->NumAtoms())
3407 return;
3408
3409 AtomExpr *aexpr = _pat->atom[dst].expr;
3410 BondExpr *bexpr = _pat->bond[bidx].expr;
3411 OBAtom *atom,*nbr;
3412 std::vector<OBEdgeBase*>::iterator i;
3413
3414 atom = _mol->GetAtom(_map[src]);
3415 for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
3416 if (!_uatoms[nbr->GetIdx()] && EvalAtomExpr(aexpr,nbr) &&
3417 EvalBondExpr(bexpr,((OBBond*) *i)))
3418 {
3419 _map[dst] = nbr->GetIdx();
3420 _uatoms[nbr->GetIdx()] = true;
3421 Match(mlist,bidx+1);
3422 _uatoms[nbr->GetIdx()] = false;
3423 _map[dst] = 0;
3424 }
3425 }
3426 else //just check bond here
3427 {
3428 OBBond *bond = _mol->GetBond(_map[_pat->bond[bidx].src],
3429 _map[_pat->bond[bidx].dst]);
3430 if (bond && EvalBondExpr(_pat->bond[bidx].expr,bond))
3431 Match(mlist,bidx+1);
3432 }
3433 }
3434
3435 static int GetExprOrder(BondExpr *expr)
3436 {
3437 int size=0;
3438 BondExpr *stack[15];
3439 memset(stack,'\0',sizeof(AtomExpr*)*15);
3440 bool lftest=true;
3441
3442 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3443 switch( expr->type )
3444 {
3445 case(BE_LEAF):
3446
3447 if( expr->leaf.prop == BL_CONST )
3448 lftest = true;
3449 else /* expr->leaf.prop == BL_TYPE */
3450 switch( expr->leaf.value )
3451 {
3452 case(BT_SINGLE): return(1);
3453 case(BT_DOUBLE): return(2);
3454 case(BT_TRIPLE): return(3);
3455 case(BT_AROM): return(5);
3456 default:
3457 lftest = true;
3458 }
3459 size--;
3460 break;
3461
3462 case(BE_NOT): return(0);
3463 case(BE_ANDHI):
3464 case(BE_ANDLO):
3465 case(BE_OR):
3466 if (stack[size+1] == expr->bin.rgt)
3467 size--;
3468 else if (stack[size+1] == expr->bin.lft)
3469 {
3470 if (lftest)
3471 {
3472 size++;
3473 stack[size] = expr->bin.rgt;
3474 }
3475 else
3476 size--;
3477 }
3478 else
3479 {
3480 size++;
3481 stack[size] = expr->bin.lft;
3482 }
3483 break;
3484 }
3485
3486 return(0);
3487 }
3488
3489 int OBSmartsPattern::GetCharge(int idx)
3490 {
3491 AtomExpr *expr = _pat->atom[idx].expr;
3492
3493 int size=0;
3494 AtomExpr *stack[15];
3495 memset(stack,'\0',sizeof(AtomExpr*)*15);
3496 bool lftest=true;
3497
3498 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3499 {
3500 switch (expr->type)
3501 {
3502 case AE_LEAF:
3503 switch( expr->leaf.prop )
3504 {
3505 case AL_NEGATIVE:
3506 return(-1*(int)expr->leaf.value);
3507 case AL_POSITIVE:
3508 return((int)expr->leaf.value);
3509 default:
3510 lftest=true;
3511 }
3512 size--;
3513 break;
3514
3515 case AE_OR:
3516 case AE_ANDHI:
3517 case AE_ANDLO:
3518
3519 if (stack[size+1] == expr->bin.rgt)
3520 size--;
3521 else if (stack[size+1] == expr->bin.lft)
3522 {
3523 if (lftest)
3524 {
3525 size++;
3526 stack[size] = expr->bin.rgt;
3527 }
3528 else
3529 size--;
3530 }
3531 else
3532 {
3533 size++;
3534 stack[size] = expr->bin.lft;
3535 }
3536 break;
3537
3538 case AE_NOT:
3539 return(0);
3540 case AE_RECUR:
3541 return(0);
3542 }
3543 }
3544
3545 return(0);
3546 }
3547
3548 int OBSmartsPattern::GetAtomicNum(int idx)
3549 {
3550 AtomExpr *expr = _pat->atom[idx].expr;
3551
3552 int size=0;
3553 AtomExpr *stack[15];
3554 memset(stack,'\0',sizeof(AtomExpr*)*15);
3555 bool lftest=true;
3556
3557 for (size=0,stack[size] = expr;size >= 0;expr=stack[size])
3558 {
3559 switch (expr->type)
3560 {
3561 case AE_LEAF:
3562 if ( expr->leaf.prop == AL_ELEM)
3563 return(expr->leaf.value);
3564 lftest = true;
3565 size--;
3566 break;
3567
3568 case AE_OR:
3569 case AE_ANDHI:
3570 case AE_ANDLO:
3571
3572 if (stack[size+1] == expr->bin.rgt)
3573 size--;
3574 else if (stack[size+1] == expr->bin.lft)
3575 {
3576 if (lftest)
3577 {
3578 size++;
3579 stack[size] = expr->bin.rgt;
3580 }
3581 else
3582 size--;
3583 }
3584 else
3585 {
3586 size++;
3587 stack[size] = expr->bin.lft;
3588 }
3589 break;
3590
3591 case AE_NOT:
3592 return(0);
3593 case AE_RECUR:
3594 return(0);
3595 }
3596 }
3597
3598 return(0);
3599 }
3600
3601 void OBSmartsPattern::GetBond(int &src,int &dst,int &ord,int idx)
3602 {
3603 src = _pat->bond[idx].src;
3604 dst = _pat->bond[idx].dst;
3605 ord = GetExprOrder(_pat->bond[idx].expr);
3606 }
3607
3608 void SmartsLexReplace(std::string &s,std::vector<std::pair<std::string,std::string> > &vlex)
3609 {
3610 size_t j,pos;
3611 std::string token,repstr;
3612 std::vector<std::pair<std::string,std::string> >::iterator i;
3613
3614 for (pos = 0,pos = s.find("$",pos);pos < s.size();pos = s.find("$",pos))
3615 //for (pos = 0,pos = s.find("$",pos);pos != std::string::npos;pos = s.find("$",pos))
3616 {
3617 pos++;
3618 for (j = pos;j < s.size();j++)
3619 if (!isalpha(s[j]) && !isdigit(s[j]) && s[j] != '_')
3620 break;
3621 if (pos == j)
3622 continue;
3623
3624 token = s.substr(pos,j-pos);
3625 for (i = vlex.begin();i != vlex.end();i++)
3626 if (token == i->first)
3627 {
3628 repstr = "(" + i->second + ")";
3629 s.replace(pos,j-pos,repstr);
3630 j = 0;
3631 }
3632 pos = j;
3633 }
3634 }
3635
3636 } // end namespace OpenBabel
3637
3638 //! \file parsmart.cpp
3639 //! \brief Implementation of Daylight SMARTS parser.