ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/openbabel/smilesformat.cpp
Revision: 1081
Committed: Thu Oct 19 20:49:05 2006 UTC (18 years, 6 months ago) by gezelter
File size: 71511 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# User Rev Content
1 tim 746 /**********************************************************************
2     Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
3 gezelter 1081 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
4 tim 746 Some portions Copyright (C) 2004 by Chris Morley
5    
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation version 2 of the License.
9    
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13     GNU General Public License for more details.
14     ***********************************************************************/
15     //Contains SMIFormat and FIXFormat classes
16 gezelter 1081 // TODO: Rewrite. Use std::string in place of char * to avoid buffer overflow
17     // use std::string::reserve (or different allocator) to avoid resize slowdown
18 tim 746
19 gezelter 1081 #include "mol.hpp"
20     #include "obconversion.hpp"
21     #include "obmolecformat.hpp"
22 tim 746
23     using namespace std;
24    
25     namespace OpenBabel
26     {
27 gezelter 1081 class SMIFormat : public OBMoleculeFormat
28     {
29     public:
30     //Register this format type ID
31     SMIFormat()
32     {
33     OBConversion::RegisterFormat("smi",this, "chemical/x-daylight-smiles");
34     OBConversion::RegisterOptionParam("n", this);
35     OBConversion::RegisterOptionParam("t", this);
36     }
37    
38     virtual const char* GetMIMEType()
39     { return "chemical/x-daylight-smiles"; };
40    
41     ////////////////////////////////////////////////////
42     /// The "API" interface functions
43     virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
44     virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
45    
46     ///////////////////////////////////////////////////////
47    
48     virtual const char* Description()
49     {
50     return
51     "SMILES format\n \
52     A linear text format which can describe the connectivity\n \
53     and chirality of a molecule\n \
54     Write Options e.g. -xt\n \
55     -n no molecule name\n \
56     -t molecule name only\n \
57     -r radicals lower case eg ethyl is Cc\n\n";
58     };
59    
60     virtual unsigned int Flags() { return DEFAULTFORMAT;};
61     virtual const char* TargetClassDescription(){return OBMol::ClassDescription();};
62    
63     virtual const char* SpecificationURL()
64     {return "http://www.daylight.com/smiles/f_smiles.html";};
65    
66     virtual int SkipObjects(int n, OBConversion* pConv)
67     {
68     if(n==0) return 1; //already points after current line
69     string temp;
70     istream& ifs = *pConv->GetInStream();
71     int i;
72     for(i=0;i<n && ifs.good();i++)
73     getline(ifs, temp);
74     return ifs.good() ? 1 : -1;
75     };
76     };
77    
78     //Make an instance of the format class
79     SMIFormat theSMIFormat;
80    
81     //////////////////////////////////////////////////////////////////
82     class OBSmiNode
83     {
84 tim 746 OBAtom *_atom,*_parent;
85     std::vector<OBSmiNode*> _nextnode;
86     std::vector<OBBond*> _nextbond;
87 gezelter 1081 public:
88 tim 746 OBSmiNode(OBAtom *atom);
89     ~OBSmiNode();
90     int Size()
91     {
92 gezelter 1081 return((_nextnode.empty())?0:_nextnode.size());
93 tim 746 }
94     void SetParent(OBAtom *a)
95     {
96 gezelter 1081 _parent = a;
97 tim 746 }
98     void SetNextNode(OBSmiNode*,OBBond*);
99     OBAtom *GetAtom()
100     {
101 gezelter 1081 return(_atom);
102 tim 746 }
103     OBAtom *GetParent()
104     {
105 gezelter 1081 return(_parent);
106 tim 746 }
107     OBAtom *GetNextAtom(int i)
108     {
109 gezelter 1081 return(_nextnode[i]->GetAtom());
110 tim 746 }
111     OBBond *GetNextBond(int i)
112     {
113 gezelter 1081 return(_nextbond[i]);
114 tim 746 }
115     OBSmiNode *GetNextNode(int i)
116     {
117 gezelter 1081 return(_nextnode[i]);
118 tim 746 }
119 gezelter 1081 };
120 tim 746
121 gezelter 1081 class OBMol2Smi
122     {
123 tim 746 std::vector<int> _atmorder;
124     std::vector<int> _storder;
125     std::vector<bool> _aromNH;
126     OBBitVec _uatoms,_ubonds;
127     std::vector<OBEdgeBase*> _vclose;
128     std::vector<std::pair<OBAtom*,std::pair<int,int> > > _vopen;
129 gezelter 1081 OBConversion* _pconv;
130     public:
131 tim 746 OBMol2Smi()
132     {
133 gezelter 1081 _vclose.clear();
134 tim 746 }
135     ~OBMol2Smi()
136     {}
137     int GetUnusedIndex();
138     void Init(OBConversion* pconv=NULL);
139     void CreateSmiString(OBMol&,char*);
140     void GetClosureAtoms(OBAtom*,std::vector<OBNodeBase*>&);
141     void FindClosureBonds(OBMol&);
142     void ToSmilesString(OBSmiNode *node,char *buffer);
143     void RemoveUsedClosures();
144     void AssignCisTrans(OBSmiNode*);
145     bool BuildTree(OBSmiNode*);
146     bool GetSmilesElement(OBSmiNode*,char*);
147     bool GetChiralStereo(OBSmiNode*,char*);
148     void CorrectAromaticAmineCharge(OBMol&);
149     std::vector<std::pair<int,OBBond*> > GetClosureDigits(OBAtom*);
150     std::vector<int> &GetOutputOrder()
151     {
152 gezelter 1081 return(_atmorder);
153 tim 746 }
154 gezelter 1081 };
155 tim 746
156 gezelter 1081 bool WriteTheSmiles(OBMol & mol,char *out);
157 tim 746
158 gezelter 1081 /////////////////////////////////////////////////////////////////
159     class OBSmilesParser
160     {
161 tim 746 int _bondflags;
162     int _order;
163     int _prev;
164     char *_ptr;
165     vector<int> _vprev;
166     vector<vector<int> > _rclose;
167     vector<vector<int> > _extbond;
168     vector<int> _path;
169     vector<bool> _avisit;
170     vector<bool> _bvisit;
171     char _buffer[BUFF_SIZE];
172     bool chiralWatch; // set when a chiral atom is read
173     map<OBAtom*,OBChiralData*> _mapcd; // map of ChiralAtoms and their data
174 gezelter 1081 public:
175 tim 746
176 gezelter 1081 OBSmilesParser() { }
177     ~OBSmilesParser() { }
178 tim 746
179     bool SmiToMol(OBMol&,string&);
180     bool ParseSmiles(OBMol&);
181     bool ParseSimple(OBMol&);
182     bool ParseComplex(OBMol&);
183     bool ParseRingBond(OBMol&);
184     bool ParseExternalBond(OBMol&);
185     bool CapExternalBonds(OBMol &mol);
186     void FindAromaticBonds(OBMol &mol,OBAtom*,int);
187     void FindAromaticBonds(OBMol&);
188     void FindOrphanAromaticAtoms(OBMol &mol); //CM 18 Sept 2003
189 gezelter 1081 };
190 tim 746
191 gezelter 1081 /////////////////////////////////////////////////////////////////
192     bool SMIFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
193     {
194 tim 746 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
195    
196     //Define some references so we can use the old parameter names
197     istream &ifs = *pConv->GetInStream();
198     OBMol &mol = *pmol;
199     const char* title = pConv->GetTitle();
200    
201     //Taken unchanged from ReadSmiles
202     char buffer[BUFF_SIZE];
203    
204     if (!ifs.getline(buffer,BUFF_SIZE))
205 gezelter 1081 return(false);
206 tim 746 vector<string> vs;
207     tokenize(vs,buffer);
208    
209     // RWT 10/3/2000
210     //
211     // added the following to allow spaces in compound names (titles).
212     // Essentially everything after the first space on a SMILES file line
213     // is treated as the name.
214     // Also had to change the condition a few lines below from:
215     // if (vs.size() == 2) ... to
216     // if (vs.size() >= 2)
217    
218     if (vs.size() > 2)
219 gezelter 1081 {
220 tim 746 for (unsigned int i=2;i<vs.size(); i++)
221 gezelter 1081 {
222 tim 746 vs[1]=vs[1]+" "+vs[i];
223 gezelter 1081 }
224     }
225 tim 746
226     if (vs.empty())
227     return false;
228     mol.SetDimension(0);
229    
230     if (vs.size() >= 2)
231     mol.SetTitle(vs[1].c_str());
232 gezelter 1081 else
233     mol.SetTitle(title);
234 tim 746
235     OBSmilesParser sp;
236     return sp.SmiToMol(mol,vs[0]);
237 gezelter 1081 }
238 tim 746
239 gezelter 1081 //////////////////////////////////////////////////
240     bool SMIFormat::WriteMolecule(OBBase* pOb,OBConversion* pConv)
241     {
242 tim 746 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
243    
244     //Define some references so we can use the old parameter names
245     ostream &ofs = *pConv->GetOutStream();
246     OBMol &mol = *pmol;
247    
248     if(pConv->IsOption("t")) //Title only option
249 gezelter 1081 {
250 tim 746 ofs << mol.GetTitle() <<endl;
251     return true;
252 gezelter 1081 }
253 tim 746 char buffer[BUFF_SIZE];
254 gezelter 1081 *buffer='\0'; //empty buffer
255 tim 746
256     // This is a hack to prevent recursion problems.
257     // we still need to fix the underlying problem (mainly chiral centers) -GRH
258     if (mol.NumAtoms() > 1000)
259     {
260     #ifdef HAVE_SSTREAM
261 gezelter 1081 stringstream errorMsg;
262 tim 746 #else
263 gezelter 1081 strstream errorMsg;
264 tim 746 #endif
265 gezelter 1081 errorMsg << "SMILES Conversion failed: Molecule is too large to convert. Open Babel is currently limited to 1000 atoms." << endl;
266     errorMsg << " Molecule size: " << mol.NumAtoms() << " atoms " << endl;
267     obErrorLog.ThrowError(__func__, errorMsg.str(), obWarning);
268     return(false);
269 tim 746 }
270    
271     if(mol.NumAtoms()!=0)
272 gezelter 1081 {
273     OBMol2Smi m2s;
274     m2s.Init(pConv);
275     m2s.CorrectAromaticAmineCharge(mol);
276     m2s.CreateSmiString(mol,buffer);
277     }
278 tim 746
279     ofs << buffer ;
280     if(!pConv->IsOption("n"))
281 gezelter 1081 ofs << '\t' << mol.GetTitle();
282 tim 746 ofs << endl;
283    
284     return true;
285 gezelter 1081 }
286 tim 746
287 gezelter 1081 //////////////////////////////////////////////
288 tim 746
289 gezelter 1081 //CM not needed extern OBAromaticTyper aromtyper;
290     //OBAtomTyper atomtyperx; //CM
291 tim 746
292 gezelter 1081 bool OBSmilesParser::SmiToMol(OBMol &mol,string &s)
293     {
294     strncpy(_buffer,s.c_str(), BUFF_SIZE);
295     _buffer[BUFF_SIZE - 1] = '\0';
296 tim 746
297     _vprev.clear();
298     _rclose.clear();
299     _prev=0;
300 gezelter 1081 chiralWatch=false;
301 tim 746
302     if (!ParseSmiles(mol))
303 gezelter 1081 {
304 tim 746 mol.Clear();
305     return(false);
306 gezelter 1081 }
307 tim 746
308 gezelter 1081 // SMILES always sets formal charges -- don't throw them away
309     mol.SetAutomaticFormalCharge(false);
310 tim 746
311     return(true);
312 gezelter 1081 }
313 tim 746
314 gezelter 1081 bool OBSmilesParser::ParseSmiles(OBMol &mol)
315     {
316 tim 746 mol.BeginModify();
317    
318     for (_ptr=_buffer;*_ptr;_ptr++)
319 gezelter 1081 {
320 tim 746 if (isspace(*_ptr))
321 gezelter 1081 continue;
322 tim 746 else if (isdigit(*_ptr) || *_ptr == '%') //ring open/close
323 gezelter 1081 {
324     if (!ParseRingBond(mol))
325     return false;
326 tim 746 continue;
327 gezelter 1081 }
328 tim 746 else if(*_ptr == '&') //external bond
329 gezelter 1081 {
330 tim 746 ParseExternalBond(mol);
331     continue;
332 gezelter 1081 }
333 tim 746 else
334 gezelter 1081 switch(*_ptr)
335 tim 746 {
336     case '.':
337 gezelter 1081 _prev=0;
338     break;
339 tim 746 case '(':
340 gezelter 1081 _vprev.push_back(_prev);
341     break;
342 tim 746 case ')':
343 gezelter 1081 if(_vprev.empty()) //CM
344     return false;
345     _prev = _vprev.back();
346     _vprev.pop_back();
347     break;
348 tim 746 case '[':
349 gezelter 1081 if (!ParseComplex(mol))
350 tim 746 {
351 gezelter 1081 mol.EndModify();
352     mol.Clear();
353     return(false);
354 tim 746 }
355 gezelter 1081 break;
356 tim 746 case '-':
357 gezelter 1081 _order = 1;
358     break;
359 tim 746 case '=':
360 gezelter 1081 _order = 2;
361     break;
362 tim 746 case '#':
363 gezelter 1081 _order = 3;
364     break;
365 tim 746 case ':':
366 gezelter 1081 _order = 5;
367     break;
368 tim 746 case '/':
369 gezelter 1081 _bondflags |= OB_TORUP_BOND;
370     break;
371 tim 746 case '\\':
372 gezelter 1081 _bondflags |= OB_TORDOWN_BOND;
373     break;
374 tim 746 default:
375 gezelter 1081 if (!ParseSimple(mol))
376 tim 746 {
377 gezelter 1081 mol.EndModify();
378     mol.Clear();
379     return(false);
380 tim 746 }
381     } // end switch
382 gezelter 1081 } // end for _ptr
383 tim 746
384     // place dummy atoms for each unfilled external bond
385     if(!_extbond.empty())
386 gezelter 1081 CapExternalBonds(mol);
387 tim 746
388     //set aromatic bond orders
389     mol.SetAromaticPerceived();
390     FindAromaticBonds(mol);
391     FindOrphanAromaticAtoms(mol);// CM 18 Sept 2003
392     mol.AssignSpinMultiplicity();
393     mol.UnsetAromaticPerceived();
394    
395     mol.EndModify();
396    
397     //NE add the OBChiralData stored inside the _mapcd to the atoms now after end
398     // modify so they don't get lost.
399     if(_mapcd.size()>0)
400 gezelter 1081 {
401     OBAtom* atom;
402     OBChiralData* cd;
403     map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
404     for(ChiralSearch=_mapcd.begin();ChiralSearch!=_mapcd.end();ChiralSearch++)
405     {
406     atom=ChiralSearch->first;
407     cd=ChiralSearch->second;
408     atom->SetData(cd);
409     }
410     }
411 tim 746
412     return(true);
413 gezelter 1081 }
414 tim 746
415 gezelter 1081 // CM 18 Sept 2003
416     void OBSmilesParser::FindOrphanAromaticAtoms(OBMol &mol)
417     {
418 tim 746 //Facilitates the use lower case shorthand for radical entry
419     //Atoms which are marked as aromatic but have no aromatic bonds
420     //are taken to be radical centres
421     OBAtom *atom;
422     vector<OBNodeBase*>::iterator j;
423    
424     for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
425 gezelter 1081 if(atom->IsAromatic())
426     {
427     if(atom->CountBondsOfOrder(5)<2) //bonds order 5 set in FindAromaticBonds()
428     //not proper aromatic atoms - could be conjugated chain or radical centre
429     atom->UnsetAromatic();
430     else
431     {
432     //recognized as aromatic, so are not radicals
433     atom->SetSpinMultiplicity(0);
434     }
435     }
436     }
437 tim 746
438 gezelter 1081 void OBSmilesParser::FindAromaticBonds(OBMol &mol)
439     {
440 tim 746 _path.clear();
441     _avisit.clear();
442     _bvisit.clear();
443     _avisit.resize(mol.NumAtoms()+1);
444     _bvisit.resize(mol.NumBonds());
445     _path.resize(mol.NumAtoms()+1);
446    
447     OBBond *bond;
448     vector<OBEdgeBase*>::iterator i;
449     for (bond = mol.BeginBond(i);bond;bond = mol.NextBond(i))
450 gezelter 1081 if (!bond->GetBeginAtom()->IsAromatic() ||
451     !bond->GetEndAtom()->IsAromatic())
452     _bvisit[bond->GetIdx()] = true;
453 tim 746
454     OBAtom *atom;
455     vector<OBNodeBase*>::iterator j;
456    
457     for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
458 gezelter 1081 if(!_avisit[atom->GetIdx()] && atom->IsAromatic())
459     FindAromaticBonds(mol,atom,0);
460     }
461 tim 746
462 gezelter 1081 void OBSmilesParser::FindAromaticBonds(OBMol &mol,OBAtom *atom,int depth )
463     {
464 tim 746 OBBond *bond;
465     vector<OBEdgeBase*>::iterator k;
466    
467     if (_avisit[atom->GetIdx()])
468 gezelter 1081 {
469 tim 746 int j = depth-1;
470     bond=mol.GetBond(_path[j--]);
471     bond->SetBO(5);
472     while( j >= 0 )
473 gezelter 1081 {
474 tim 746 bond=mol.GetBond(_path[j--]);
475     bond->SetBO(5);
476     if(bond->GetBeginAtom() == atom || bond->GetEndAtom() == atom)
477 gezelter 1081 break;
478     }
479     }
480 tim 746 else
481 gezelter 1081 {
482 tim 746 _avisit[atom->GetIdx()] = true;
483     for(bond = atom->BeginBond(k);bond;bond = atom->NextBond(k))
484 gezelter 1081 if( !_bvisit[bond->GetIdx()])
485 tim 746 {
486 gezelter 1081 _path[depth] = bond->GetIdx();
487     _bvisit[bond->GetIdx()] = true;
488     FindAromaticBonds(mol,bond->GetNbrAtom(atom),depth+1);
489 tim 746 }
490 gezelter 1081 }
491     }
492 tim 746
493    
494 gezelter 1081 bool OBSmilesParser::ParseSimple(OBMol &mol)
495     {
496 tim 746 char symbol[3];
497     int element;
498     bool arom=false;
499     memset(symbol,'\0',sizeof(char)*3);
500    
501     if (isupper(*_ptr))
502 gezelter 1081 switch(*_ptr)
503 tim 746 {
504     case 'C':
505 gezelter 1081 _ptr++;
506     if (*_ptr == 'l')
507 tim 746 {
508 gezelter 1081 strcpy(symbol,"Cl");
509     element = 17;
510 tim 746 }
511 gezelter 1081 else
512 tim 746 {
513 gezelter 1081 symbol[0] = 'C';
514     element = 6;
515     _ptr--;
516 tim 746 }
517 gezelter 1081 break;
518 tim 746
519     case 'N':
520 gezelter 1081 element = 7;
521     symbol[0] = 'N';
522     break;
523 tim 746 case 'O':
524 gezelter 1081 element = 8;
525     symbol[0] = 'O';
526     break;
527 tim 746 case 'S':
528 gezelter 1081 element = 16;
529     symbol[0] = 'S';
530     break;
531 tim 746 case 'P':
532 gezelter 1081 element = 15;
533     symbol[0] = 'P';
534     break;
535 tim 746 case 'F':
536 gezelter 1081 element = 9;
537     symbol[0] = 'F';
538     break;
539 tim 746 case 'I':
540 gezelter 1081 element = 53;
541     symbol[0] = 'I';
542     break;
543 tim 746
544     case 'B':
545 gezelter 1081 _ptr++;
546     if (*_ptr == 'r')
547 tim 746 {
548 gezelter 1081 element = 35;
549     strcpy(symbol,"Br");
550 tim 746 }
551 gezelter 1081 else
552 tim 746 {
553 gezelter 1081 element = 5;
554     symbol[0] = 'B';
555     _ptr--;
556 tim 746 }
557 gezelter 1081 break;
558 tim 746 default:
559 gezelter 1081 return(false);
560 tim 746 }
561     else
562 gezelter 1081 {
563 tim 746 arom = true;
564     switch(*_ptr)
565 gezelter 1081 {
566     case 'c':
567 tim 746 element = 6;
568     symbol[0] = 'C';
569     break;
570 gezelter 1081 case 'n':
571 tim 746 element = 7;
572     symbol[0] = 'N';
573     break;
574 gezelter 1081 case 'o':
575 tim 746 element = 8;
576     symbol[0] = 'O';
577     break;
578 gezelter 1081 case 'p':
579 tim 746 element = 15;
580     symbol[0] = 'P';
581     break;
582 gezelter 1081 case 's':
583 tim 746 element = 16;
584     symbol[0] = 'S';
585     break;
586 gezelter 1081 case '*':
587 tim 746 element = 0;
588     strcpy(symbol,"Du");
589     break;
590 gezelter 1081 default:
591 tim 746 return(false);
592 gezelter 1081 }
593     }
594 tim 746
595     OBAtom *atom = mol.NewAtom();
596     atom->SetAtomicNum(element);
597     atom->SetType(symbol);
598     if (arom)
599 gezelter 1081 {
600 tim 746 atom->SetAromatic();
601     atom->SetSpinMultiplicity(2); // CM 18 Sept 2003
602 gezelter 1081 }
603 tim 746
604     if (_prev) //need to add bond
605 gezelter 1081 {
606 tim 746 /* CM 18 Sept 2003
607 gezelter 1081 An extension to the SMILES format has been added so that lower case c,n,o can
608     represent a radical centre: CcC is isopropyl radical;
609     and cccc... a carbon chain bonded by conjugated double bonds.
610     Fails sometimes when using c as both aromatic and as the extened form.
611     For benzyl radical C6H5CH2. c1ccccc1c is ok; c1cc(c)ccc1 fails.
612     Radical centres should not be involved in ring closure:
613     for cyclohexyl radical C1cCCCC1 is ok, c1CCCCC1 is not.
614 tim 746
615 gezelter 1081 Implementation
616     Atoms c,n,o, etc initially added as a radical centre
617     unless _prev is a radical centre when both are made a normal atoms
618     connected by a double bond.
619     Since they are still marked as aromatic, FindAromaticBonds() will
620     replace the bonds by aromatic bonds if they are in a ring.
621     FindOrphanAromand removes the aromatic tag from the atoms not found in this way
622     and removes stray radical centres in .
623 tim 746
624 gezelter 1081 To avoid difficulties in complex aromatics with 5 membered rings containing N and O,
625     the above scheme modified to prevent recognition of aromatic structures is not confused.
626     - the double bond is made single if it would exceed valence of atom (not all aromatics have conjugated bonds)
627     - the radical centre is removed on both atoms when forming a ring (in ParseRingBond())
628 tim 746 and on the new atom if the valence of the prev atom is being exceeded.
629 gezelter 1081 Note that the bond orders made here are reassigned in aromatic structures in FindAromaticBonds()
630 tim 746 */
631     if(arom)
632 gezelter 1081 {
633 tim 746 OBAtom* prevatom = mol.GetAtom(_prev);
634    
635     //Calculate available valency on prevatom
636     //This is far more difficult than it should be!
637     //Data not always updated during molecule constuction.
638     int val=0;
639     if(prevatom->IsCarbon())
640 gezelter 1081 val=4;
641 tim 746 else if(prevatom->IsNitrogen())
642 gezelter 1081 val=3;
643 tim 746 else if(prevatom->IsPhosphorus())
644 gezelter 1081 val=3;
645 tim 746 else if(prevatom->IsOxygen())
646 gezelter 1081 val=2;
647 tim 746 else if(prevatom->IsSulfur())
648 gezelter 1081 val=2;
649 tim 746
650     /* int sumBO=0;
651 gezelter 1081 vector<OBEdgeBase*>::iterator itr;
652     OBBond* bond = prevatom->BeginBond(itr);
653     while(bond)
654     {
655     sumBO +=bond->GetBO();
656     bond=prevatom->NextBond(itr);
657     }
658     if(prevatom->BOSum() != sumBO)
659     cerr << "BOSum != sumBO" << endl;
660 tim 746 */
661     int AvailableValence = val + prevatom->GetFormalCharge() - prevatom->BOSum();//sumBO;
662    
663     if (prevatom->GetSpinMultiplicity())
664 gezelter 1081 {
665 tim 746 prevatom->SetSpinMultiplicity(0);
666     atom->SetSpinMultiplicity(0);
667    
668 gezelter 1081 //Make the new bond potentially aromatic, unless a double
669     // bond might be impossible given the valence
670     _order = AvailableValence>=2 ? 5 : 1 ;
671     }
672 tim 746 else
673 gezelter 1081 if(AvailableValence<1) //Must be complex aromatic with O, N
674     atom->SetSpinMultiplicity(0); //radical centres not appropriate in complex aromatics
675     }
676 tim 746 // CM end
677     mol.AddBond(_prev,mol.NumAtoms(),_order,_bondflags);
678    
679     //NE iterate through and see if atom is bonded to chiral atom
680     map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
681     ChiralSearch=_mapcd.find(mol.GetAtom(_prev));
682     if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
683 gezelter 1081 {
684     (ChiralSearch->second)->AddAtomRef(mol.NumAtoms(), input);
685     // cout << "Line 650: Adding "<<mol.NumAtoms()<<" to "<<ChiralSearch->second<<endl;
686     }
687     }
688 tim 746 //set values
689     _prev = mol.NumAtoms();
690     _order = 1;
691     _bondflags = 0;
692    
693     return(true);
694 gezelter 1081 }
695 tim 746
696 gezelter 1081 bool OBSmilesParser::ParseComplex(OBMol &mol)
697     {
698 tim 746 char symbol[7];
699     int element=0;
700     int isotope=0;
701     int isoPtr=0;
702     bool arom=false;
703     memset(symbol,'\0',sizeof(char)*7);
704    
705     _ptr++;
706    
707     //grab isotope information
708 gezelter 1081 for (;*_ptr && isdigit(*_ptr) && (isoPtr <= 6); _ptr++)
709     {
710 tim 746 symbol[isoPtr] = *_ptr;
711     isoPtr++;
712 gezelter 1081 }
713     if (isoPtr >= 6)
714     return false;
715 tim 746 isotope = atoi(symbol);
716    
717     //parse element data
718     if (isupper(*_ptr))
719 gezelter 1081 switch(*_ptr)
720 tim 746 {
721     case 'C':
722 gezelter 1081 _ptr++;
723     switch(*_ptr)
724 tim 746 {
725     case 'a':
726 gezelter 1081 element = 20;
727     strcpy(symbol,"Ca");
728     break;
729 tim 746 case 'd':
730 gezelter 1081 element = 48;
731     strcpy(symbol,"Cd");
732     break;
733 tim 746 case 'e':
734 gezelter 1081 element = 58;
735     strcpy(symbol,"Ce");
736     break;
737 tim 746 case 'f':
738 gezelter 1081 element = 98;
739     strcpy(symbol,"Cf");
740     break;
741 tim 746 case 'l':
742 gezelter 1081 element = 17;
743     strcpy(symbol,"Cl");
744     break;
745 tim 746 case 'm':
746 gezelter 1081 element = 96;
747     strcpy(symbol,"Cm");
748     break;
749 tim 746 case 'o':
750 gezelter 1081 element = 27;
751     strcpy(symbol,"Co");
752     break;
753 tim 746 case 'r':
754 gezelter 1081 element = 24;
755     strcpy(symbol,"Cr");
756     break;
757 tim 746 case 's':
758 gezelter 1081 element = 55;
759     strcpy(symbol,"Cs");
760     break;
761 tim 746 case 'u':
762 gezelter 1081 element = 29;
763     strcpy(symbol,"Cu");
764     break;
765 tim 746 default:
766 gezelter 1081 element = 6;
767     symbol[0] = 'C';
768     _ptr--;
769 tim 746 }
770 gezelter 1081 break;
771 tim 746
772     case 'N':
773 gezelter 1081 _ptr++;
774     switch(*_ptr)
775 tim 746 {
776     case 'a':
777 gezelter 1081 element = 11;
778     strcpy(symbol,"Na");
779     break;
780 tim 746 case 'b':
781 gezelter 1081 element = 41;
782     strcpy(symbol,"Nb");
783     break;
784 tim 746 case 'd':
785 gezelter 1081 element = 60;
786     strcpy(symbol,"Nd");
787     break;
788 tim 746 case 'e':
789 gezelter 1081 element = 10;
790     strcpy(symbol,"Ne");
791     break;
792 tim 746 case 'i':
793 gezelter 1081 element = 28;
794     strcpy(symbol,"Ni");
795     break;
796 tim 746 case 'o':
797 gezelter 1081 element = 102;
798     strcpy(symbol,"No");
799     break;
800 tim 746 case 'p':
801 gezelter 1081 element = 93;
802     strcpy(symbol,"Np");
803     break;
804 tim 746 default:
805 gezelter 1081 element = 7;
806     symbol[0] = 'N';
807     _ptr--;
808 tim 746 }
809 gezelter 1081 break;
810 tim 746
811     case('O'):
812 gezelter 1081 _ptr++;
813     if(*_ptr == 's')
814     {
815     element = 76;
816     strcpy(symbol,"Os");
817 tim 746 }
818 gezelter 1081 else
819 tim 746 {
820 gezelter 1081 element = 8;
821     symbol[0] = 'O';
822     _ptr--;
823 tim 746 }
824 gezelter 1081 break;
825 tim 746
826     case 'P':
827 gezelter 1081 _ptr++;
828     switch(*_ptr)
829 tim 746 {
830     case 'a':
831 gezelter 1081 element = 91;
832     strcpy(symbol,"Pa");
833     break;
834 tim 746 case 'b':
835 gezelter 1081 element = 82;
836     strcpy(symbol,"Pb");
837     break;
838 tim 746 case 'd':
839 gezelter 1081 element = 46;
840     strcpy(symbol,"Pd");
841     break;
842 tim 746 case 'm':
843 gezelter 1081 element = 61;
844     strcpy(symbol,"Pm");
845     break;
846 tim 746 case 'o':
847 gezelter 1081 element = 84;
848     strcpy(symbol,"Po");
849     break;
850 tim 746 case 'r':
851 gezelter 1081 element = 59;
852     strcpy(symbol,"Pr");
853     break;
854 tim 746 case 't':
855 gezelter 1081 element = 78;
856     strcpy(symbol,"Pt");
857     break;
858 tim 746 case 'u':
859 gezelter 1081 element = 94;
860     strcpy(symbol,"Pu");
861     break;
862 tim 746 default:
863 gezelter 1081 element = 15;
864     symbol[0] = 'P';
865     _ptr--;
866 tim 746 }
867 gezelter 1081 break;
868 tim 746
869     case('S'):
870 gezelter 1081 _ptr++;
871     switch(*_ptr)
872     {
873 tim 746 case 'b':
874 gezelter 1081 element = 51;
875     strcpy(symbol,"Sb");
876     break;
877 tim 746 case 'c':
878 gezelter 1081 element = 21;
879     strcpy(symbol,"Sc");
880     break;
881 tim 746 case 'e':
882 gezelter 1081 element = 34;
883     strcpy(symbol,"Se");
884     break;
885 tim 746 case 'i':
886 gezelter 1081 element = 14;
887     strcpy(symbol,"Si");
888     break;
889 tim 746 case 'm':
890 gezelter 1081 element = 62;
891     strcpy(symbol,"Sm");
892     break;
893 tim 746 case 'n':
894 gezelter 1081 element = 50;
895     strcpy(symbol,"Sn");
896     break;
897 tim 746 case 'r':
898 gezelter 1081 element = 38;
899     strcpy(symbol,"Sr");
900     break;
901 tim 746 default:
902 gezelter 1081 element = 16;
903     symbol[0] = 'S';
904     _ptr--;
905 tim 746 }
906 gezelter 1081 break;
907 tim 746
908     case 'B':
909 gezelter 1081 _ptr++;
910     switch(*_ptr)
911 tim 746 {
912     case 'a':
913 gezelter 1081 element = 56;
914     strcpy(symbol,"Ba");
915     break;
916 tim 746 case 'e':
917 gezelter 1081 element = 4;
918     strcpy(symbol,"Be");
919     break;
920 tim 746 case 'i':
921 gezelter 1081 element = 83;
922     strcpy(symbol,"Bi");
923     break;
924 tim 746 case 'k':
925 gezelter 1081 element = 97;
926     strcpy(symbol,"Bk");
927     break;
928 tim 746 case 'r':
929 gezelter 1081 element = 35;
930     strcpy(symbol,"Br");
931     break;
932 tim 746 default:
933 gezelter 1081 element = 5;
934     symbol[0] = 'B';
935     _ptr--;
936 tim 746 }
937 gezelter 1081 break;
938 tim 746
939     case 'F':
940 gezelter 1081 _ptr++;
941     switch(*_ptr)
942 tim 746 {
943     case 'e':
944 gezelter 1081 element = 26;
945     strcpy(symbol,"Fe");
946     break;
947 tim 746 case 'm':
948 gezelter 1081 element = 100;
949     strcpy(symbol,"Fm");
950     break;
951 tim 746 case 'r':
952 gezelter 1081 element = 87;
953     strcpy(symbol,"Fr");
954     break;
955 tim 746 default:
956 gezelter 1081 element = 9;
957     symbol[0] = 'F';
958     _ptr--;
959 tim 746 }
960 gezelter 1081 break;
961 tim 746
962     case 'I':
963 gezelter 1081 _ptr++;
964     switch(*_ptr)
965 tim 746 {
966     case 'n':
967 gezelter 1081 element = 49;
968     strcpy(symbol,"In");
969     break;
970 tim 746 case 'r':
971 gezelter 1081 element = 77;
972     strcpy(symbol,"Ir");
973     break;
974 tim 746 default:
975 gezelter 1081 element = 53;
976     symbol[0] = 'I';
977     _ptr--;
978 tim 746 }
979 gezelter 1081 break;
980 tim 746
981     case 'A':
982 gezelter 1081 _ptr++;
983     switch(*_ptr)
984 tim 746 {
985     case 'c':
986 gezelter 1081 element = 89;
987     strcpy(symbol,"Ac");
988     break;
989 tim 746 case 'g':
990 gezelter 1081 element = 47;
991     strcpy(symbol,"Ag");
992     break;
993 tim 746 case 'l':
994 gezelter 1081 element = 13;
995     strcpy(symbol,"Al");
996     break;
997 tim 746 case 'm':
998 gezelter 1081 element = 95;
999     strcpy(symbol,"Am");
1000     break;
1001 tim 746 case 'r':
1002 gezelter 1081 element = 18;
1003     strcpy(symbol,"Ar");
1004     break;
1005 tim 746 case 's':
1006 gezelter 1081 element = 33;
1007     strcpy(symbol,"As");
1008     break;
1009 tim 746 case 't':
1010 gezelter 1081 element = 85;
1011     strcpy(symbol,"At");
1012     break;
1013 tim 746 case 'u':
1014 gezelter 1081 element = 79;
1015     strcpy(symbol,"Au");
1016     break;
1017 tim 746 default:
1018 gezelter 1081 _ptr--;
1019     return(false);
1020 tim 746 }
1021 gezelter 1081 break;
1022 tim 746
1023     case 'D':
1024 gezelter 1081 _ptr++;
1025     if (*_ptr == 'y')
1026 tim 746 {
1027 gezelter 1081 element = 66;
1028     strcpy(symbol,"Dy");
1029 tim 746 }
1030 gezelter 1081 else
1031 tim 746 {
1032 gezelter 1081 _ptr--;
1033     return(false);
1034 tim 746 }
1035 gezelter 1081 break;
1036 tim 746
1037     case 'E':
1038 gezelter 1081 _ptr++;
1039     switch(*_ptr)
1040 tim 746 {
1041     case 'r':
1042 gezelter 1081 element = 68;
1043     strcpy(symbol,"Er");
1044     break;
1045 tim 746 case 's':
1046 gezelter 1081 element = 99;
1047     strcpy(symbol,"Es");
1048     break;
1049 tim 746 case 'u':
1050 gezelter 1081 element = 63;
1051     strcpy(symbol,"Eu");
1052     break;
1053 tim 746 default:
1054 gezelter 1081 _ptr--;
1055     return(false);
1056 tim 746 }
1057 gezelter 1081 break;
1058 tim 746
1059     case 'G':
1060 gezelter 1081 _ptr++;
1061     switch (*_ptr)
1062 tim 746 {
1063     case 'a':
1064 gezelter 1081 element = 31;
1065     strcpy(symbol,"Ga");
1066     break;
1067 tim 746 case 'd':
1068 gezelter 1081 element = 64;
1069     strcpy(symbol,"Gd");
1070     break;
1071 tim 746 case 'e':
1072 gezelter 1081 element = 32;
1073     strcpy(symbol,"Ge");
1074     break;
1075 tim 746 default:
1076 gezelter 1081 _ptr--;
1077     return(false);
1078 tim 746 }
1079 gezelter 1081 break;
1080 tim 746
1081     case 'H':
1082 gezelter 1081 _ptr++;
1083     switch (*_ptr)
1084 tim 746 {
1085     case 'e':
1086 gezelter 1081 element = 2;
1087     strcpy(symbol,"He");
1088     break;
1089 tim 746 case 'f':
1090 gezelter 1081 element = 72;
1091     strcpy(symbol,"Hf");
1092     break;
1093 tim 746 case 'g':
1094 gezelter 1081 element = 80;
1095     strcpy(symbol,"Hg");
1096     break;
1097 tim 746 case 'o':
1098 gezelter 1081 element = 67;
1099     strcpy(symbol,"Ho");
1100     break;
1101 tim 746 default:
1102 gezelter 1081 element = 1;
1103     symbol[0] = 'H';
1104     _ptr--;
1105 tim 746 }
1106 gezelter 1081 break;
1107 tim 746
1108     case 'K':
1109 gezelter 1081 _ptr++;
1110     if(*_ptr == 'r')
1111 tim 746 {
1112 gezelter 1081 element = 36;
1113     strcpy(symbol,"Kr");
1114 tim 746 }
1115 gezelter 1081 else
1116 tim 746 {
1117 gezelter 1081 element = 19;
1118     symbol[0] = 'K';
1119     _ptr--;
1120 tim 746 }
1121 gezelter 1081 break;
1122 tim 746
1123     case 'L':
1124 gezelter 1081 _ptr++;
1125     switch(*_ptr)
1126 tim 746 {
1127     case 'a':
1128 gezelter 1081 element = 57;
1129     strcpy(symbol,"La");
1130     break;
1131 tim 746 case 'i':
1132 gezelter 1081 element = 3;
1133     strcpy(symbol,"Li");
1134     break;
1135 tim 746 case 'r':
1136 gezelter 1081 element = 103;
1137     strcpy(symbol,"Lr");
1138     break;
1139 tim 746 case 'u':
1140 gezelter 1081 element = 71;
1141     strcpy(symbol,"Lu");
1142     break;
1143 tim 746 default:
1144 gezelter 1081 _ptr--;
1145     return(false);
1146 tim 746 }
1147 gezelter 1081 break;
1148 tim 746
1149     case 'M':
1150 gezelter 1081 _ptr++;
1151     switch(*_ptr)
1152 tim 746 {
1153     case 'd':
1154 gezelter 1081 element = 101;
1155     strcpy(symbol,"Md");
1156     break;
1157 tim 746 case 'g':
1158 gezelter 1081 element = 12;
1159     strcpy(symbol,"Mg");
1160     break;
1161 tim 746 case 'n':
1162 gezelter 1081 element = 25;
1163     strcpy(symbol,"Mn");
1164     break;
1165 tim 746 case 'o':
1166 gezelter 1081 element = 42;
1167     strcpy(symbol,"Mo");
1168     break;
1169 tim 746 default:
1170 gezelter 1081 _ptr--;
1171     return(false);
1172 tim 746 }
1173 gezelter 1081 break;
1174 tim 746
1175     case 'R':
1176 gezelter 1081 _ptr++;
1177     switch(*_ptr)
1178 tim 746 {
1179     case 'a':
1180 gezelter 1081 element = 88;
1181     strcpy(symbol,"Ra");
1182     break;
1183 tim 746 case 'b':
1184 gezelter 1081 element = 37;
1185     strcpy(symbol,"Rb");
1186     break;
1187 tim 746 case 'e':
1188 gezelter 1081 element = 75;
1189     strcpy(symbol,"Re");
1190     break;
1191 tim 746 case 'h':
1192 gezelter 1081 element = 45;
1193     strcpy(symbol,"Rh");
1194     break;
1195 tim 746 case 'n':
1196 gezelter 1081 element = 86;
1197     strcpy(symbol,"Rn");
1198     break;
1199 tim 746 case 'u':
1200 gezelter 1081 element = 44;
1201     strcpy(symbol,"Ru");
1202     break;
1203 tim 746 default:
1204 gezelter 1081 _ptr--;
1205     return(false);
1206 tim 746 }
1207 gezelter 1081 break;
1208 tim 746
1209     case 'T':
1210 gezelter 1081 _ptr++;
1211     switch(*_ptr)
1212 tim 746 {
1213     case 'a':
1214 gezelter 1081 element = 73;
1215     strcpy(symbol,"Ta");
1216     break;
1217 tim 746 case 'b':
1218 gezelter 1081 element = 65;
1219     strcpy(symbol,"Tb");
1220     break;
1221 tim 746 case 'c':
1222 gezelter 1081 element = 43;
1223     strcpy(symbol,"Tc");
1224     break;
1225 tim 746 case 'e':
1226 gezelter 1081 element = 52;
1227     strcpy(symbol,"Te");
1228     break;
1229 tim 746 case 'h':
1230 gezelter 1081 element = 90;
1231     strcpy(symbol,"Th");
1232     break;
1233 tim 746 case 'i':
1234 gezelter 1081 element = 22;
1235     strcpy(symbol,"Ti");
1236     break;
1237 tim 746 case 'l':
1238 gezelter 1081 element = 81;
1239     strcpy(symbol,"Tl");
1240     break;
1241 tim 746 case 'm':
1242 gezelter 1081 element = 69;
1243     strcpy(symbol,"Tm");
1244     break;
1245 tim 746 default:
1246 gezelter 1081 _ptr--;
1247     return(false);
1248 tim 746 }
1249 gezelter 1081 break;
1250 tim 746
1251     case('U'): element = 92;
1252 gezelter 1081 symbol[0] = 'U';
1253     break;
1254 tim 746 case('V'): element = 23;
1255 gezelter 1081 symbol[0] = 'V';
1256     break;
1257 tim 746 case('W'): element = 74;
1258 gezelter 1081 symbol[0] = 'W';
1259     break;
1260 tim 746
1261     case('X'):
1262 gezelter 1081 _ptr++;
1263     if (*_ptr == 'e')
1264     {
1265     element = 54;
1266     strcpy(symbol,"Xe");
1267 tim 746 }
1268 gezelter 1081 else
1269 tim 746 {
1270 gezelter 1081 _ptr--;
1271     return(false);
1272 tim 746 }
1273 gezelter 1081 break;
1274 tim 746
1275     case('Y'):
1276 gezelter 1081 _ptr++;
1277     if (*_ptr == 'b')
1278     {
1279     element = 70;
1280     strcpy(symbol,"Yb");
1281 tim 746 }
1282 gezelter 1081 else
1283 tim 746 {
1284 gezelter 1081 element = 39;
1285     symbol[0] = 'Y';
1286     _ptr--;
1287 tim 746 }
1288 gezelter 1081 break;
1289 tim 746
1290     case('Z'):
1291 gezelter 1081 _ptr++;
1292     switch(*_ptr)
1293     {
1294 tim 746 case 'n':
1295 gezelter 1081 element = 30;
1296     strcpy(symbol,"Zn");
1297     break;
1298 tim 746 case 'r':
1299 gezelter 1081 element = 40;
1300     strcpy(symbol,"Zr");
1301     break;
1302 tim 746 default:
1303 gezelter 1081 _ptr--;
1304     return(false);
1305 tim 746 }
1306 gezelter 1081 break;
1307 tim 746 }
1308     else
1309 gezelter 1081 {
1310 tim 746 arom = true;
1311     switch(*_ptr)
1312 gezelter 1081 {
1313     case 'c':
1314 tim 746 element = 6;
1315     symbol[0] = 'C';
1316     break;
1317 gezelter 1081 case 'n':
1318 tim 746 element = 7;
1319     symbol[0] = 'N';
1320     break;
1321 gezelter 1081 case 'o':
1322 tim 746 element = 8;
1323     symbol[0] = 'O';
1324     break;
1325 gezelter 1081 case 'p':
1326 tim 746 element = 15;
1327     symbol[0] = 'P';
1328     break;
1329 gezelter 1081 case 's':
1330 tim 746 _ptr++;
1331     if (*_ptr == 'e')
1332 gezelter 1081 {
1333 tim 746 element = 34;
1334     strcpy(symbol,"Se");
1335 gezelter 1081 }
1336 tim 746 else
1337 gezelter 1081 {
1338 tim 746 element = 16;
1339     symbol[0] = 'S';
1340     _ptr--;
1341 gezelter 1081 }
1342 tim 746 break;
1343 gezelter 1081 case 'a':
1344 tim 746 _ptr++;
1345     if (*_ptr == 's')
1346 gezelter 1081 {
1347 tim 746 element = 33;
1348     strcpy(symbol,"As");
1349 gezelter 1081 }
1350 tim 746 else
1351 gezelter 1081 return(false);
1352 tim 746 break;
1353 gezelter 1081 default:
1354 tim 746 return(false);
1355 gezelter 1081 }
1356     }
1357 tim 746
1358     //handle hydrogen count, stereochemistry, and charge
1359    
1360     OBAtom *atom = mol.NewAtom();
1361     int hcount = 0;
1362     int charge=0;
1363 gezelter 1081 int rad=0;
1364 tim 746 char tmpc[2];
1365     tmpc[1] = '\0';
1366     for (_ptr++;*_ptr && *_ptr != ']';_ptr++)
1367 gezelter 1081 {
1368 tim 746 switch(*_ptr)
1369 gezelter 1081 {
1370     case '@':
1371 tim 746 _ptr++;
1372     chiralWatch=true;
1373     _mapcd[atom]= new OBChiralData;
1374     if (*_ptr == '@')
1375 gezelter 1081 atom->SetClockwiseStereo();
1376 tim 746 else
1377 gezelter 1081 {
1378 tim 746 atom->SetAntiClockwiseStereo();
1379     _ptr--;
1380 gezelter 1081 }
1381 tim 746 break;
1382 gezelter 1081 case '-':
1383 tim 746 _ptr++;
1384     if (isdigit(*_ptr))
1385 gezelter 1081 {
1386 tim 746 tmpc[0] = *_ptr;
1387     charge = -atoi(tmpc);
1388 gezelter 1081 }
1389 tim 746 else
1390 gezelter 1081 {
1391 tim 746 charge--;
1392     _ptr--;
1393 gezelter 1081 }
1394 tim 746 break;
1395 gezelter 1081 case '+':
1396 tim 746 _ptr++;
1397     if (isdigit(*_ptr))
1398 gezelter 1081 {
1399 tim 746 tmpc[0] = *_ptr;
1400     charge = atoi(tmpc);
1401 gezelter 1081 }
1402 tim 746 else
1403 gezelter 1081 {
1404 tim 746 charge++;
1405     _ptr--;
1406 gezelter 1081 }
1407 tim 746 break;
1408    
1409 gezelter 1081 case 'H':
1410 tim 746 _ptr++;
1411     if (isdigit(*_ptr))
1412 gezelter 1081 {
1413 tim 746 tmpc[0] = *_ptr;
1414     hcount = atoi(tmpc);
1415 gezelter 1081 }
1416 tim 746 else
1417 gezelter 1081 {
1418 tim 746 hcount = 1;
1419     _ptr--;
1420 gezelter 1081 }
1421 tim 746 break;
1422 gezelter 1081 case '.': //CM Feb05
1423     rad=2;
1424     if(*(++_ptr)=='.')
1425     rad=3;
1426     else
1427     _ptr--;
1428     break;
1429 tim 746
1430 gezelter 1081 default:
1431 tim 746 return(false);
1432 gezelter 1081 }
1433     }
1434 tim 746
1435     if (charge)
1436 gezelter 1081 atom->SetFormalCharge(charge);
1437     if (rad)
1438     atom->SetSpinMultiplicity(rad);
1439 tim 746 atom->SetAtomicNum(element);
1440     atom->SetIsotope(isotope);
1441     atom->SetType(symbol);
1442     if (arom)
1443 gezelter 1081 atom->SetAromatic();
1444 tim 746
1445     if (_prev) //need to add bond
1446 gezelter 1081 {
1447     mol.AddBond(_prev,mol.NumAtoms(),_order,_bondflags);
1448 tim 746 if(chiralWatch) // if chiral atom need to add its previous into atom4ref
1449 gezelter 1081 {
1450     if (_mapcd[atom] == NULL)
1451     _mapcd[atom]= new OBChiralData;
1452 tim 746
1453 gezelter 1081 (_mapcd[atom])->AddAtomRef((unsigned int)_prev,input);
1454     // cout <<"line 1405: Added atom ref "<<_prev<<" to "<<_mapcd[atom]<<endl;
1455     }
1456 tim 746 map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
1457     ChiralSearch = _mapcd.find(mol.GetAtom(_prev));
1458     if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
1459 gezelter 1081 {
1460     (ChiralSearch->second)->AddAtomRef(mol.NumAtoms(), input);
1461     // cout <<"line 1431: Added atom ref "<<mol.NumAtoms()<<" to "<<ChiralSearch->second<<endl;
1462     }
1463     }
1464 tim 746
1465     //set values
1466     _prev = mol.NumAtoms();
1467     _order = 1;
1468     _bondflags = 0;
1469    
1470     //now add hydrogens
1471    
1472     for (int i = 0;i < hcount;i++)
1473 gezelter 1081 {
1474 tim 746 atom = mol.NewAtom();
1475     atom->SetAtomicNum(1);
1476     atom->SetType("H");
1477     mol.AddBond(_prev,mol.NumAtoms(),1);
1478     if(chiralWatch)
1479 gezelter 1081 {
1480     if (_mapcd[mol.GetAtom(_prev)] != NULL)
1481     (_mapcd[mol.GetAtom(_prev)])->AddAtomRef(mol.NumAtoms(),input);
1482     // cout << "line 1434: Added atom ref "<<mol.NumAtoms()<<" to "<<_mapcd[mol.GetAtom(_prev)]<<endl;
1483 tim 746
1484 gezelter 1081 }
1485     }
1486 tim 746 chiralWatch=false;
1487     return(true);
1488 gezelter 1081 }
1489 tim 746
1490 gezelter 1081 bool OBSmilesParser::CapExternalBonds(OBMol &mol)
1491     {
1492 tim 746
1493     if(_extbond.empty())
1494 gezelter 1081 return(true);
1495 tim 746
1496     OBAtom *atom;
1497     vector<vector<int> >::iterator bond;
1498    
1499     for(bond = _extbond.begin();bond != _extbond.end();bond++)
1500 gezelter 1081 {
1501 tim 746 // create new dummy atom
1502     atom = mol.NewAtom();
1503     atom->SetAtomicNum(0);
1504     atom->SetType("*");
1505    
1506     // bond dummy atom to mol via external bond
1507     mol.AddBond((*bond)[1],atom->GetIdx(),(*bond)[2],(*bond)[3]);
1508     OBBond *refbond = atom->GetBond(mol.GetAtom((*bond)[1]));
1509    
1510     //record external bond information
1511     OBExternalBondData *xbd;
1512     if(mol.HasData(OBGenericDataType::ExternalBondData))
1513 gezelter 1081 xbd = (OBExternalBondData*)mol.GetData(OBGenericDataType::ExternalBondData);
1514 tim 746 else
1515 gezelter 1081 {
1516 tim 746 xbd = new OBExternalBondData;
1517     mol.SetData(xbd);
1518 gezelter 1081 }
1519 tim 746 xbd->SetData(atom,refbond,(*bond)[0]);
1520    
1521     /* old code written by AGS -- mts
1522 gezelter 1081 {
1523     externalbonds = (vector<pair<int,pair<OBAtom *,OBBond *> > > *)mol.GetData("extBonds");
1524     }
1525     else
1526     {
1527     externalbonds = new vector<pair<int,pair<OBAtom *,OBBond *> > >;
1528     }
1529 tim 746
1530 gezelter 1081 //save data <external bond count, bond index>
1531     externalbonds->push_back(pair<int,pair<OBAtom *,OBBond *> > ((*bond)[0], pair<OBAtom *,OBBond *> (atom,mol.GetBond((*bond)[1],atom->GetIdx()))));
1532     mol.SetData("extBonds",externalbonds);
1533 tim 746 */
1534    
1535     //this data gets cleaned up in mol.Clear.
1536 gezelter 1081 }
1537 tim 746
1538     return(true);
1539 gezelter 1081 }
1540 tim 746
1541 gezelter 1081 bool OBSmilesParser::ParseExternalBond(OBMol &mol)
1542     {
1543 tim 746 int digit;
1544     char str[10];
1545    
1546     //*_ptr should == '&'
1547     _ptr++;
1548    
1549     switch (*_ptr) // check for bond order indicators CC&=1.C&1
1550 gezelter 1081 {
1551     case '-':
1552 tim 746 _order = 1;
1553     _ptr++;
1554     break;
1555 gezelter 1081 case '=':
1556 tim 746 _order = 2;
1557     _ptr++;
1558     break;
1559 gezelter 1081 case '#':
1560 tim 746 _order = 3;
1561     _ptr++;
1562     break;
1563 gezelter 1081 case ';':
1564 tim 746 _order = 5;
1565     _ptr++;
1566     break;
1567 gezelter 1081 case '/': //chiral, but _order still == 1
1568     _bondflags |= OB_TORUP_BOND;
1569 tim 746 _ptr++;
1570     break;
1571     _ptr++;
1572 gezelter 1081 case '\\': // chiral, but _order still == 1
1573     _bondflags |= OB_TORDOWN_BOND;
1574 tim 746 _ptr++;
1575     break;
1576 gezelter 1081 default: // no bond indicator just leave order = 1
1577 tim 746 break;
1578 gezelter 1081 }
1579 tim 746
1580     if (*_ptr == '%') // external bond indicator > 10
1581 gezelter 1081 {
1582 tim 746 _ptr++;
1583     str[0] = *_ptr;
1584     _ptr++;
1585     str[1] = *_ptr;
1586     str[2] = '\0';
1587 gezelter 1081 }
1588 tim 746 else // simple single digit external bond indicator
1589 gezelter 1081 {
1590 tim 746 str[0] = *_ptr;
1591     str[1] = '\0';
1592 gezelter 1081 }
1593 tim 746 digit = atoi(str); // convert indicator to digit
1594    
1595     //check for dot disconnect closures
1596     vector<vector<int> >::iterator j;
1597     int bondFlags,bondOrder;
1598     for(j = _extbond.begin();j != _extbond.end();j++)
1599 gezelter 1081 {
1600 tim 746 if((*j)[0] == digit)
1601 gezelter 1081 {
1602 tim 746 bondFlags = (_bondflags > (*j)[3]) ? _bondflags : (*j)[3];
1603     bondOrder = (_order > (*j)[2]) ? _order : (*j)[2];
1604     mol.AddBond((*j)[1],_prev,bondOrder,bondFlags);
1605    
1606 gezelter 1081 // after adding a bond to atom "_prev"
1607     // search to see if atom is bonded to a chiral atom
1608     map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
1609     ChiralSearch = _mapcd.find(mol.GetAtom(_prev));
1610     if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
1611 tim 746 {
1612 gezelter 1081 (ChiralSearch->second)->AddAtomRef((*j)[1], input);
1613 tim 746 // cout << "Added external "<<(*j)[1]<<" to "<<ChiralSearch->second<<endl;
1614     }
1615    
1616     _extbond.erase(j);
1617     _bondflags = 0;
1618     _order = 0;
1619     return(true);
1620 gezelter 1081 }
1621     }
1622 tim 746
1623     //since no closures save another ext bond
1624     vector<int> vtmp(4);
1625     vtmp[0] = digit;
1626     vtmp[1] = _prev;
1627     vtmp[2] = _order;
1628     vtmp[3] = _bondflags;
1629    
1630     _extbond.push_back(vtmp);
1631     _order = 1;
1632     _bondflags = 0;
1633    
1634     return(true);
1635    
1636 gezelter 1081 }
1637 tim 746
1638 gezelter 1081 bool OBSmilesParser::ParseRingBond(OBMol &mol)
1639     {
1640 tim 746 int digit;
1641     char str[10];
1642    
1643     if (*_ptr == '%')
1644 gezelter 1081 {
1645 tim 746 _ptr++;
1646     str[0] = *_ptr;
1647     _ptr++;
1648     str[1] = *_ptr;
1649     str[2] = '\0';
1650 gezelter 1081 }
1651 tim 746 else
1652 gezelter 1081 {
1653 tim 746 str[0] = *_ptr;
1654     str[1] = '\0';
1655 gezelter 1081 }
1656 tim 746 digit = atoi(str);
1657    
1658     int bf,ord;
1659     vector<vector<int> >::iterator j;
1660     for (j = _rclose.begin();j != _rclose.end();j++)
1661 gezelter 1081 if ((*j)[0] == digit)
1662 tim 746 {
1663 gezelter 1081 bf = (_bondflags > (*j)[3]) ? _bondflags : (*j)[3];
1664     ord = (_order > (*j)[2]) ? _order : (*j)[2];
1665     mol.AddBond((*j)[1],_prev,ord,bf,(*j)[4]);
1666 tim 746
1667 gezelter 1081 // after adding a bond to atom "_prev"
1668     // search to see if atom is bonded to a chiral atom
1669     // need to check both _prev and (*j)[1] as closure is direction independant
1670     map<OBAtom*,OBChiralData*>::iterator ChiralSearch,cs2;
1671     ChiralSearch = _mapcd.find(mol.GetAtom(_prev));
1672     cs2=_mapcd.find(mol.GetAtom((*j)[1]));
1673     if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
1674     {
1675     (ChiralSearch->second)->AddAtomRef((*j)[1], input);
1676     //cout << "Added ring closure "<<(*j)[1]<<" to "<<ChiralSearch->second<<endl;
1677     }
1678     if (cs2!=_mapcd.end() && cs2->second != NULL)
1679     {
1680     (cs2->second)->AddAtomRef(_prev,input);
1681     // cout <<"Added ring opening "<<_prev<<" to "<<cs2->second<<endl;
1682     }
1683 tim 746
1684 gezelter 1081 //CM ensure neither atoms in ring closure is a radical centre
1685     OBAtom* patom = mol.GetAtom(_prev);
1686     patom->SetSpinMultiplicity(0);
1687     patom = mol.GetAtom((*j)[1]);
1688     patom->SetSpinMultiplicity(0);
1689     //CM end
1690     _rclose.erase(j);
1691     _bondflags = 0;
1692     _order = 1;
1693     return(true);
1694 tim 746 }
1695    
1696     vector<int> vtmp(5);
1697     vtmp[0] = digit;
1698     vtmp[1] = _prev;
1699     vtmp[2] = _order;
1700     vtmp[3] = _bondflags;
1701 gezelter 1081 OBAtom* atom = mol.GetAtom(_prev);
1702     if(!atom)
1703     {
1704     obErrorLog.ThrowError(__func__,"Number not parsed correctly as a ring bond", obWarning);
1705     return false;
1706     }
1707    
1708     vtmp[4] = atom->GetValence(); //store position to insert closure bond
1709 tim 746 for (j = _rclose.begin();j != _rclose.end();j++) //correct for multiple closure bonds to a single atom
1710 gezelter 1081 if ((*j)[1] == _prev)
1711     vtmp[4]++;
1712 tim 746
1713     _rclose.push_back(vtmp);
1714     _order = 1;
1715     _bondflags = 0;
1716    
1717     return(true);
1718 gezelter 1081 }
1719 tim 746
1720 gezelter 1081 static bool IsCisOrTransH(OBAtom *atom)
1721     {
1722     if (!atom->IsHydrogen())
1723     return false;
1724     else
1725     FOR_BONDS_OF_ATOM(bond, atom)
1726     {
1727     if (bond->IsUp() || bond->IsDown())
1728     return true;
1729     }
1730     return false;
1731     }
1732 tim 746
1733 gezelter 1081 void OBMol2Smi::CreateSmiString(OBMol &mol,char *buffer)
1734     {
1735 tim 746 OBAtom *atom;
1736     OBSmiNode *root =NULL;
1737     buffer[0] = '\0';
1738     vector<OBNodeBase*>::iterator i;
1739    
1740     for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
1741 gezelter 1081 // don't use a hydrogen as the root node unless it's not bonded
1742     // or it's involved in a cis/trans '/' or '\' specification
1743     if ((!atom->IsHydrogen() || atom->GetValence() == 0 || IsCisOrTransH(atom))
1744     && !_uatoms[atom->GetIdx()])
1745     if (!atom->IsChiral() || !mol.HasNonZeroCoords())
1746     // don't use chiral root atoms unless this is from a SMILES
1747     {
1748     //clear out closures in case structure is dot disconnected
1749     _vclose.clear();
1750     _atmorder.clear();
1751     _storder.clear();
1752     _vopen.clear();
1753     //dot disconnected structure
1754     if (strlen(buffer) > 0)
1755     strcat(buffer,".");
1756     root = new OBSmiNode (atom);
1757     BuildTree(root);
1758     FindClosureBonds(mol);
1759     if (mol.Has2D())
1760     AssignCisTrans(root);
1761     ToSmilesString(root,buffer);
1762     delete root;
1763     }
1764 tim 746
1765     //If no starting node found e.g. [H][H] CM 21Mar05
1766     if(root==NULL)
1767     {
1768 gezelter 1081 root = new OBSmiNode(mol.GetFirstAtom());
1769     BuildTree(root);
1770     ToSmilesString(root,buffer);
1771     delete root;
1772 tim 746 }
1773 gezelter 1081 }
1774 tim 746
1775 gezelter 1081 bool OBMol2Smi::BuildTree(OBSmiNode *node)
1776     {
1777 tim 746 vector<OBEdgeBase*>::iterator i;
1778     OBAtom *nbr,*atom = node->GetAtom();
1779    
1780     _uatoms.SetBitOn(atom->GetIdx()); //mark the atom as visited
1781     _atmorder.push_back(atom->GetIdx()); //store the atom ordering
1782     _storder.push_back(atom->GetIdx()); //store the atom ordering for stereo
1783    
1784     for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
1785 gezelter 1081 {
1786     // Normally skip hydrogens
1787     // but D,T is explicit and so is H2
1788     // or an H atom involved in a cis/trans '/' or '\' bond spec
1789     if ( (!nbr->IsHydrogen() || nbr->GetIsotope() || atom->IsHydrogen() ||
1790     (((OBBond*)*i)->IsUp() || ((OBBond*)*i)->IsDown()) )
1791     && !_uatoms[nbr->GetIdx()])
1792     {
1793 tim 746 _ubonds.SetBitOn((*i)->GetIdx());
1794     OBSmiNode *next = new OBSmiNode (nbr);
1795     next->SetParent(atom);
1796     node->SetNextNode(next,(OBBond*)*i);
1797     BuildTree(next);
1798 gezelter 1081 }
1799     }
1800 tim 746
1801     return(true);
1802 gezelter 1081 }
1803 tim 746
1804 gezelter 1081 void OBMol2Smi::ToSmilesString(OBSmiNode *node,char *buffer)
1805     {
1806     char tmpbuf[16];
1807 tim 746 OBAtom *atom = node->GetAtom();
1808    
1809     //write the current atom to the string
1810     GetSmilesElement(node,tmpbuf);
1811     strcat(buffer,tmpbuf);
1812    
1813     //handle ring closures here
1814     vector<pair<int,OBBond*> > vc = GetClosureDigits(atom);
1815     if (!vc.empty())
1816 gezelter 1081 {
1817 tim 746 vector<pair<int,OBBond*> >::iterator i;
1818     for (i = vc.begin();i != vc.end();i++)
1819 gezelter 1081 {
1820 tim 746 if (i->second)
1821 gezelter 1081 {
1822 tim 746 if (i->second->IsUp())
1823 gezelter 1081 {
1824     if ( (i->second->GetBeginAtom())->HasDoubleBond() ||
1825     (i->second->GetEndAtom())->HasDoubleBond() )
1826     strcat(buffer,"/");
1827     }
1828 tim 746 if (i->second->IsDown())
1829 gezelter 1081 {
1830     if ( (i->second->GetBeginAtom())->HasDoubleBond() ||
1831     (i->second->GetEndAtom())->HasDoubleBond() )
1832     strcat(buffer,"\\");
1833     }
1834 tim 746 #ifndef KEKULE
1835    
1836     if (i->second->GetBO() == 2 && !i->second->IsAromatic())
1837 gezelter 1081 strcat(buffer,"=");
1838 tim 746 #else
1839    
1840     if (i->second->GetBO() == 2)
1841 gezelter 1081 strcat(buffer,"=");
1842 tim 746 #endif
1843    
1844     if (i->second->GetBO() == 3)
1845 gezelter 1081 strcat(buffer,"#");
1846     }
1847 tim 746
1848     if (i->first > 9)
1849 gezelter 1081 strcat(buffer,"%");
1850     snprintf(tmpbuf,sizeof(tmpbuf), "%d",i->first);
1851 tim 746 strcat(buffer,tmpbuf);
1852 gezelter 1081 }
1853     }
1854 tim 746
1855     //follow path to child atoms
1856     OBBond *bond;
1857     for (int i = 0;i < node->Size();i++)
1858 gezelter 1081 {
1859 tim 746 bond = node->GetNextBond(i);
1860     if (i+1 < node->Size())
1861 gezelter 1081 strcat(buffer,"(");
1862 tim 746 if (bond->IsUp())
1863 gezelter 1081 {
1864     if ( (bond->GetBeginAtom())->HasDoubleBond() ||
1865     (bond->GetEndAtom())->HasDoubleBond() )
1866     strcat(buffer,"/");
1867     }
1868 tim 746 if (bond->IsDown())
1869 gezelter 1081 {
1870     if ( (bond->GetBeginAtom())->HasDoubleBond() ||
1871     (bond->GetEndAtom())->HasDoubleBond() )
1872     strcat(buffer,"\\");
1873     }
1874 tim 746 #ifndef KEKULE
1875    
1876     if (bond->GetBO() == 2 && !bond->IsAromatic())
1877 gezelter 1081 strcat(buffer,"=");
1878 tim 746 #else
1879    
1880     if (bond->GetBO() == 2)
1881 gezelter 1081 strcat(buffer,"=");
1882 tim 746 #endif
1883    
1884     if (bond->GetBO() == 3)
1885 gezelter 1081 strcat(buffer,"#");
1886 tim 746
1887     ToSmilesString(node->GetNextNode(i),buffer);
1888     if (i+1 < node->Size())
1889 gezelter 1081 strcat(buffer,")");
1890     }
1891     }
1892 tim 746
1893 gezelter 1081 void OBMol2Smi::GetClosureAtoms(OBAtom *atom,vector<OBNodeBase*> &va)
1894     {
1895 tim 746
1896     //look through closure list for start atom
1897     vector<OBEdgeBase*>::iterator i;
1898     for (i = _vclose.begin();i != _vclose.end();i++)
1899 gezelter 1081 if (*i)
1900 tim 746 {
1901 gezelter 1081 if (((OBBond*)*i)->GetBeginAtom() == atom)
1902     va.push_back(((OBBond*)*i)->GetEndAtom());
1903     if (((OBBond*)*i)->GetEndAtom() == atom)
1904     va.push_back(((OBBond*)*i)->GetBeginAtom());
1905 tim 746 }
1906    
1907     OBAtom *nbr;
1908     vector<pair<OBAtom*,pair<int,int> > >::iterator j;
1909     for (j = _vopen.begin();j != _vopen.end();j++)
1910 gezelter 1081 for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
1911     if (nbr == j->first)
1912     va.push_back(nbr);
1913     }
1914 tim 746
1915 gezelter 1081 vector<pair<int,OBBond*> > OBMol2Smi::GetClosureDigits(OBAtom *atom)
1916     {
1917 tim 746 vector<pair<int,OBBond*> > vc;
1918     vc.clear();
1919    
1920     //look through closure list for start atom
1921     int idx,bo;
1922     OBBond *bond;
1923     vector<OBEdgeBase*>::iterator i;
1924     for (i = _vclose.begin();i != _vclose.end();i++)
1925 gezelter 1081 if ((bond=(OBBond*)*i))
1926     if (bond->GetBeginAtom() == atom || bond->GetEndAtom() == atom)
1927     {
1928     idx = GetUnusedIndex();
1929     vc.push_back(pair<int,OBBond*> (idx,bond));
1930     bo = (bond->IsAromatic())? 1 : bond->GetBO();
1931     _vopen.push_back(pair<OBAtom*,pair<int,int> >
1932     (bond->GetNbrAtom(atom),pair<int,int>(idx,bo)));
1933     *i = NULL;//remove bond from closure list
1934     }
1935 tim 746
1936     //try to complete closures
1937     if (!_vopen.empty())
1938 gezelter 1081 {
1939 tim 746 vector<pair<OBAtom*,pair<int,int> > >::iterator j;
1940     for (j = _vopen.begin();j != _vopen.end();)
1941 gezelter 1081 if (j->first == atom)
1942 tim 746 {
1943 gezelter 1081 vc.push_back(pair<int,OBBond*> (j->second.first,(OBBond*)NULL));
1944     _vopen.erase(j);
1945     j = _vopen.begin();
1946 tim 746 }
1947 gezelter 1081 else
1948     j++;
1949     }
1950 tim 746
1951     return(vc);
1952 gezelter 1081 }
1953 tim 746
1954 gezelter 1081 void OBMol2Smi::FindClosureBonds(OBMol &mol)
1955     {
1956 tim 746 //find closure bonds
1957     OBAtom *a1,*a2;
1958     OBBond *bond;
1959     vector<OBEdgeBase*>::iterator i;
1960     OBBitVec bv;
1961     bv.FromVecInt(_storder);
1962    
1963     for (bond = mol.BeginBond(i);bond;bond = mol.NextBond(i))
1964 gezelter 1081 if (!_ubonds[bond->GetIdx()] && bv[bond->GetBeginAtomIdx()])
1965 tim 746 {
1966 gezelter 1081 a1 = bond->GetBeginAtom();
1967     a2 = bond->GetEndAtom();
1968     if (!a1->IsHydrogen() && !a2->IsHydrogen())
1969     _vclose.push_back(bond);
1970 tim 746 }
1971    
1972     vector<OBEdgeBase*>::reverse_iterator j;
1973     vector<int>::iterator k;
1974    
1975     //modify _order to reflect ring closures
1976     for (j = _vclose.rbegin();j != _vclose.rend();j++)
1977 gezelter 1081 {
1978 tim 746 bond = (OBBond*)*j;
1979     a1 = a2 = NULL;
1980    
1981     for (k = _storder.begin();k != _storder.end();k++)
1982 gezelter 1081 if (bond->GetBeginAtomIdx() == static_cast<unsigned int>(*k) ||
1983     bond->
1984     GetEndAtomIdx() == static_cast<unsigned int>(*k))
1985     if (!a1) a1 = mol.GetAtom(*k);
1986     else if (!a2)
1987     {
1988     a2 = mol.GetAtom(*k)
1989     ;
1990     _storder.erase(k);
1991     break;
1992     }
1993 tim 746
1994     for (k = _storder.begin()
1995 gezelter 1081 ;
1996     k != _storder.end();
1997     k++)
1998     if (a1->GetIdx()
1999     == static_cast<unsigned int>(*k))
2000 tim 746 {
2001 gezelter 1081 k++;
2002     if (k != _storder.end())
2003     _storder.insert(k,a2->GetIdx());
2004     else
2005     _storder.push_back(a2->GetIdx());
2006     break;
2007 tim 746 }
2008 gezelter 1081 }
2009     }
2010 tim 746
2011 gezelter 1081 int OBMol2Smi::GetUnusedIndex()
2012     {
2013 tim 746 int idx=1;
2014    
2015     vector<pair<OBAtom*,pair<int,int> > >::iterator j;
2016     for (j = _vopen.begin();j != _vopen.end();)
2017 gezelter 1081 if (j->second.first == idx)
2018 tim 746 {
2019 gezelter 1081 idx++; //increment idx and start over if digit is already used
2020     j = _vopen.begin();
2021 tim 746 }
2022 gezelter 1081 else
2023     j++;
2024 tim 746
2025     return(idx);
2026 gezelter 1081 }
2027 tim 746
2028 gezelter 1081 void OBMol2Smi::CorrectAromaticAmineCharge(OBMol &mol)
2029     {
2030 tim 746 OBAtom *atom;
2031     vector<OBNodeBase*>::iterator i;
2032    
2033     _aromNH.clear();
2034     _aromNH.resize(mol.NumAtoms()+1);
2035    
2036     for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
2037 gezelter 1081 if (atom->IsNitrogen() && atom->IsAromatic())
2038     if (atom->GetHvyValence() == 2)
2039     {
2040     if (atom->GetValence() == 3 || atom->GetImplicitValence() == 3)
2041     _aromNH[atom->GetIdx()] = true;
2042     }
2043     }
2044 tim 746
2045 gezelter 1081 void OBMol2Smi::AssignCisTrans(OBSmiNode *node)
2046     {
2047 tim 746 //traverse the tree searching for acyclic olefins - if it
2048     //has at least one heavy atom attachment on each end assign stereochem
2049    
2050     OBBond *bond;
2051     for (int i = 0;i < node->Size();i++)
2052 gezelter 1081 {
2053 tim 746 bond = node->GetNextBond(i);
2054     if (bond->GetBO() == 2 && !bond->IsInRing())
2055 gezelter 1081 {
2056 tim 746 OBAtom *b = node->GetAtom();
2057     OBAtom *c = bond->GetNbrAtom(b);
2058    
2059     //skip allenes
2060     if (b->GetHyb() == 1 || c->GetHyb() == 1)
2061 gezelter 1081 continue;
2062 tim 746
2063     if (b->GetHvyValence() > 1 && c->GetHvyValence() > 1)
2064 gezelter 1081 {
2065 tim 746 OBAtom *a,*d;
2066     vector<OBEdgeBase*>::iterator j,k;
2067    
2068     //look for bond with assigned stereo as in poly-ene
2069     for (a = b->BeginNbrAtom(j);a;a = b->NextNbrAtom(j))
2070 gezelter 1081 if (((OBBond*)*j)->IsUp() ||((OBBond*)*j)->IsDown())
2071     break;
2072 tim 746
2073     if (!a)
2074 gezelter 1081 for (a = b->BeginNbrAtom(j);a;a = b->NextNbrAtom(j))
2075     if (a != c && !a->IsHydrogen())
2076     break;
2077 tim 746 for (d = c->BeginNbrAtom(k);d;d = c->NextNbrAtom(k))
2078 gezelter 1081 if (d != b && !d->IsHydrogen())
2079     break;
2080     // obAssert(a);
2081     // obAssert(d);
2082 tim 746
2083     if (((OBBond*)*j)->IsUp() || ((OBBond*)*j)->IsDown()) //stereo already assigned
2084 gezelter 1081 {
2085 tim 746 if (fabs(CalcTorsionAngle(a->GetVector(),b->GetVector(),
2086     c->GetVector(),d->GetVector())) > 10.0)
2087 gezelter 1081 if (((OBBond*)*j)->IsUp())
2088     ((OBBond*)*k)->SetUp();
2089     else
2090     ((OBBond*)*k)->SetDown();
2091 tim 746 else
2092 gezelter 1081 if (((OBBond*)*j)->IsUp())
2093     ((OBBond*)*k)->SetDown();
2094     else
2095     ((OBBond*)*k)->SetUp();
2096     }
2097 tim 746 else //assign stereo to both ends
2098 gezelter 1081 {
2099 tim 746 ((OBBond*)*j)->SetUp();
2100     if (fabs(CalcTorsionAngle(a->GetVector(),b->GetVector(),
2101     c->GetVector(),d->GetVector())) > 10.0)
2102 gezelter 1081 ((OBBond*)*k)->SetUp();
2103 tim 746 else
2104 gezelter 1081 ((OBBond*)*k)->SetDown();
2105     }
2106     }
2107     }
2108 tim 746 AssignCisTrans(node->GetNextNode(i));
2109 gezelter 1081 }
2110     }
2111 tim 746
2112 gezelter 1081 void OBMol2Smi::Init(OBConversion* pconv)
2113     {
2114 tim 746 _vclose.clear();
2115     _atmorder.clear();
2116     _storder.clear();
2117     _aromNH.clear();
2118     _uatoms.Clear();
2119     _ubonds.Clear();
2120     _vopen.clear();
2121 gezelter 1081 _pconv = pconv;
2122     }
2123 tim 746
2124 gezelter 1081 bool OBMol2Smi::GetSmilesElement(OBSmiNode *node,char *element)
2125     {
2126 tim 746 //***handle reference atom stuff here and return***
2127 gezelter 1081 char symbol[16];
2128 tim 746 bool bracketElement = false;
2129     bool normalValence = true;
2130    
2131     OBAtom *atom = node->GetAtom();
2132    
2133     int bosum = atom->KBOSum();
2134     atom->BOSum(); //CM temp
2135     switch (atom->GetAtomicNum())
2136 gezelter 1081 {
2137     case 0:
2138 tim 746 break;
2139 gezelter 1081 case 5: /*bracketElement = !(normalValence = (bosum == 3)); break;*/
2140 tim 746 break;
2141 gezelter 1081 case 6:
2142 tim 746 break;
2143 gezelter 1081 case 7:
2144 tim 746 if (atom->IsAromatic() && atom->GetHvyValence() == 2 && atom->GetImplicitValence() == 3)
2145 gezelter 1081 {
2146     bracketElement = !(normalValence = false);
2147 tim 746 break;
2148 gezelter 1081 }
2149 tim 746 else
2150 gezelter 1081 bracketElement = !(normalValence = (bosum == 3 || bosum == 5));
2151 tim 746 break;
2152 gezelter 1081 case 8:
2153 tim 746 break;
2154 gezelter 1081 case 9:
2155 tim 746 break;
2156 gezelter 1081 case 15:
2157 tim 746 break;
2158 gezelter 1081 case 16:
2159 tim 746 bracketElement = !(normalValence = (bosum == 2 || bosum == 4 || bosum == 6));
2160     break;
2161 gezelter 1081 case 17:
2162 tim 746 break;
2163 gezelter 1081 case 35:
2164 tim 746 break;
2165 gezelter 1081 case 53:
2166 tim 746 break;
2167    
2168 gezelter 1081 default:
2169 tim 746 bracketElement = true;
2170 gezelter 1081 }
2171 tim 746
2172     if (atom->GetHvyValence() > 2 && atom->IsChiral())
2173 gezelter 1081 if (((OBMol*)atom->GetParent())->HasNonZeroCoords() || atom->HasChiralitySpecified())
2174     bracketElement = true;
2175 tim 746
2176     if (atom->GetFormalCharge() != 0) //bracket charged elements
2177 gezelter 1081 bracketElement = true;
2178 tim 746
2179     if(atom->GetIsotope()) //CM 19Mar05
2180     bracketElement = true;
2181    
2182     //CM begin 18 Sept 2003
2183    
2184     //This outputs form [CH3][CH3] rather than CC if -h option has been specified
2185     if (((OBMol*)atom->GetParent())->HasHydrogensAdded())
2186 gezelter 1081 bracketElement = true;
2187     else
2188     {
2189     if (atom->GetSpinMultiplicity())
2190     {
2191     //For radicals output bracket form anyway unless r option specified
2192     if(!(_pconv && _pconv->IsOption ("r")))
2193     bracketElement = true;
2194     }
2195     }
2196 tim 746 //CM end
2197    
2198     if (!bracketElement)
2199 gezelter 1081 {
2200 tim 746 if (!atom->GetAtomicNum())
2201 gezelter 1081 {
2202 tim 746 bool external = false;
2203     vector<pair<int,pair<OBAtom *,OBBond *> > > *externalBonds =
2204 gezelter 1081 (vector<pair<int,pair<OBAtom *,OBBond *> > > *)((OBMol*)atom->GetParent())->GetData("extBonds");
2205 tim 746 vector<pair<int,pair<OBAtom *,OBBond *> > >::iterator externalBond;
2206    
2207     if (externalBonds)
2208 gezelter 1081 for(externalBond = externalBonds->begin();externalBond != externalBonds->end();externalBond++)
2209 tim 746 {
2210 gezelter 1081 if (externalBond->second.first == atom)
2211 tim 746 {
2212 gezelter 1081 external = true;
2213     strcpy(symbol,"&");
2214     OBBond *bond = externalBond->second.second;
2215     if (bond->IsUp())
2216     {
2217     if ( (bond->GetBeginAtom())->HasDoubleBond() ||
2218     (bond->GetEndAtom())->HasDoubleBond() )
2219     strcat(symbol,"/");
2220     }
2221     if (bond->IsDown())
2222     {
2223     if ( (bond->GetBeginAtom())->HasDoubleBond() ||
2224     (bond->GetEndAtom())->HasDoubleBond() )
2225     strcat(symbol,"\\");
2226     }
2227 tim 746 #ifndef KEKULE
2228    
2229 gezelter 1081 if (bond->GetBO() == 2 && !bond->IsAromatic())
2230     strcat(symbol,"=");
2231     if (bond->GetBO() == 2 && bond->IsAromatic())
2232     strcat(symbol,";");
2233 tim 746 #else
2234    
2235 gezelter 1081 if (bond->GetBO() == 2)
2236     strcat(symbol,"=");
2237 tim 746 #endif
2238    
2239 gezelter 1081 if (bond->GetBO() == 3)
2240     strcat(symbol,"#");
2241     sprintf(symbol,"%s%d",symbol,externalBond->first);
2242     break;
2243 tim 746 }
2244     }
2245    
2246     if(!external)
2247 gezelter 1081 strcpy(symbol,"*");
2248     }
2249 tim 746 else
2250 gezelter 1081 {
2251 tim 746 strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum()));
2252     #ifndef KEKULE
2253     if (atom->IsAromatic())
2254 gezelter 1081 symbol[0] = tolower(symbol[0]);
2255 tim 746 #endif
2256    
2257 gezelter 1081 //Radical centres lc if r option set
2258     if(atom->GetSpinMultiplicity() && _pconv && _pconv->IsOption ("r"))
2259     symbol[0] = tolower(symbol[0]);
2260     }
2261 tim 746 strcpy(element,symbol);
2262    
2263     return(true);
2264 gezelter 1081 }
2265 tim 746
2266     strcpy(element,"[");
2267 gezelter 1081 if(atom->GetIsotope()) //CM 19Mar05
2268     {
2269     char iso[4];
2270     sprintf(iso,"%d",atom->GetIsotope());
2271     strcat(element,iso);
2272     }
2273 tim 746 if (!atom->GetAtomicNum())
2274 gezelter 1081 strcpy(symbol,"*");
2275 tim 746 else
2276 gezelter 1081 {
2277 tim 746 strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum()));
2278     #ifndef KEKULE
2279     if (atom->IsAromatic())
2280 gezelter 1081 symbol[0] = tolower(symbol[0]);
2281 tim 746 #endif
2282    
2283 gezelter 1081 }
2284 tim 746 strcat(element,symbol);
2285    
2286     //if (atom->IsCarbon() && atom->GetHvyValence() > 2 && atom->IsChiral())
2287     if (atom->GetHvyValence() > 2 && atom->IsChiral())
2288 gezelter 1081 {
2289 tim 746 char stereo[5];
2290     if (GetChiralStereo(node,stereo))
2291 gezelter 1081 strcat(element,stereo);
2292     }
2293 tim 746
2294     //add extra hydrogens
2295 gezelter 1081 if (!atom->IsHydrogen())
2296     {
2297     int hcount = atom->ImplicitHydrogenCount() + atom->ExplicitHydrogenCount();
2298     if (hcount != 0)
2299     {
2300     strcat(element,"H");
2301     if (hcount > 1)
2302     {
2303     char tcount[10];
2304     sprintf(tcount,"%d", hcount);
2305     strcat(element,tcount);
2306     }
2307     }
2308     }
2309 tim 746
2310     //cat charge on the end
2311     if (atom->GetFormalCharge() != 0)
2312 gezelter 1081 {
2313 tim 746
2314     /*
2315 gezelter 1081 if (atom->ImplicitHydrogenCount())
2316     {
2317     cerr << "imp = " << atom->GetAtomicNum() << ' ' << atom->GetImplicitValence() << endl;
2318     strcat(element,"H");
2319     if (atom->ImplicitHydrogenCount() > 1)
2320     {
2321     char tcount[10];
2322     sprintf(tcount,"%d",atom->ImplicitHydrogenCount());
2323     strcat(element,tcount);
2324     }
2325     }
2326     */
2327 tim 746 if (atom->GetFormalCharge() > 0)
2328 gezelter 1081 strcat(element,"+");
2329 tim 746 else
2330 gezelter 1081 strcat(element,"-");
2331 tim 746
2332     if (abs(atom->GetFormalCharge()) > 1)
2333 gezelter 1081 {
2334 tim 746 char tcharge[10];
2335     sprintf(tcharge,"%d",abs(atom->GetFormalCharge()));
2336     strcat(element,tcharge);
2337 gezelter 1081 }
2338     }
2339 tim 746
2340     strcat(element,"]");
2341    
2342     return(true);
2343 gezelter 1081 }
2344 tim 746
2345 gezelter 1081 bool OBMol2Smi::GetChiralStereo(OBSmiNode *node,char *stereo)
2346     {
2347 tim 746 bool is2D=false;
2348     double torsion;
2349     OBAtom *a,*b,*c,*d,hydrogen;
2350    
2351     b = node->GetAtom();
2352     OBMol *mol = (OBMol*)b->GetParent();
2353    
2354     if (!mol->HasNonZeroCoords()) //must have come in from smiles string
2355 gezelter 1081 {
2356 tim 746 if (!b->HasChiralitySpecified())
2357 gezelter 1081 return(false);
2358 tim 746 if (b->IsClockwise())
2359 gezelter 1081 strcpy(stereo,"@@");
2360 tim 746 else if (b->IsAntiClockwise())
2361 gezelter 1081 strcpy(stereo,"@");
2362 tim 746 else
2363 gezelter 1081 return(false);
2364 tim 746 //if (b->GetHvyValence() == 3) strcat(stereo,"H");
2365     return(true);
2366 gezelter 1081 }
2367 tim 746
2368     //give peudo Z coords if mol is 2D
2369     if (!mol->Has3D())
2370 gezelter 1081 {
2371 tim 746 vector3 v,vz(0.0,0.0,1.0);
2372     is2D = true;
2373     OBAtom *nbr;
2374     OBBond *bond;
2375     vector<OBEdgeBase*>::iterator i;
2376     for (bond = b->BeginBond(i);bond;bond = b->NextBond(i))
2377 gezelter 1081 {
2378 tim 746 nbr = bond->GetEndAtom();
2379     if (nbr != b)
2380 gezelter 1081 {
2381 tim 746 v = nbr->GetVector();
2382     if (bond->IsWedge())
2383 gezelter 1081 v += vz;
2384     else if (bond->IsHash())
2385     v -= vz;
2386 tim 746
2387     nbr->SetVector(v);
2388 gezelter 1081 }
2389 tim 746 else
2390 gezelter 1081 {
2391 tim 746 nbr = bond->GetBeginAtom();
2392     v = nbr->GetVector();
2393     if (bond->IsWedge())
2394 gezelter 1081 v -= vz;
2395     else if (bond->IsHash())
2396     v += vz;
2397 tim 746
2398     nbr->SetVector(v);
2399 gezelter 1081 }
2400     }
2401     }
2402 tim 746
2403     c = d = NULL;
2404     a = node->GetParent();
2405     // obAssert(a); //chiral atom can't be used as root node - must have parent
2406    
2407     if (b->GetHvyValence() == 3) //must have attached hydrogen
2408 gezelter 1081 {
2409     if (b->GetValence() == 4) //has explicit hydrogen
2410     {
2411 tim 746 vector<OBEdgeBase*>::iterator i;
2412     for (c = b->BeginNbrAtom(i);c;c = b->NextNbrAtom(i))
2413 gezelter 1081 if (c->IsHydrogen())
2414     break;
2415     // obAssert(c);
2416     }
2417 tim 746 else //implicit hydrogen
2418 gezelter 1081 {
2419 tim 746 vector3 v;
2420     b->GetNewBondVector(v,1.0);
2421     hydrogen.SetVector(v);
2422     c = &hydrogen;
2423 gezelter 1081 }
2424     }
2425 tim 746
2426     //get connected atoms in order
2427     OBAtom *nbr;
2428     vector<int>::iterator j;
2429    
2430     //try to get neighbors that are closure atoms in the order they appear in the string
2431     vector<OBNodeBase*> va;
2432     GetClosureAtoms(b,va);
2433     if (!va.empty())
2434 gezelter 1081 {
2435 tim 746 vector<OBNodeBase*>::iterator k;
2436     for (k = va.begin();k != va.end();k++)
2437 gezelter 1081 if (*k != a)
2438 tim 746 {
2439 gezelter 1081 if (!c)
2440     c = (OBAtom*)*k;
2441     else if (!d)
2442     d = (OBAtom*)*k;
2443 tim 746 }
2444 gezelter 1081 }
2445 tim 746
2446     for (j = _storder.begin();j != _storder.end();j++)
2447 gezelter 1081 {
2448 tim 746 nbr = mol->GetAtom(*j);
2449     if (!b->IsConnected(nbr))
2450 gezelter 1081 continue;
2451 tim 746 if (nbr == a || nbr == b || nbr == c)
2452 gezelter 1081 continue;
2453 tim 746 if (!c)
2454 gezelter 1081 c = nbr;
2455 tim 746 else if (!d)
2456 gezelter 1081 d = nbr;
2457     }
2458 tim 746
2459     torsion = CalcTorsionAngle(a->GetVector(),b->GetVector(),
2460     c->GetVector(),d->GetVector());
2461    
2462 gezelter 1081 // if we have a 2D structure with marked unspecified chirality
2463     // we should leave it blank and return false
2464     bool success = false;
2465     if ( !(is2D && b->IsChiral() && !b->HasChiralitySpecified()) )
2466     {
2467     strcpy(stereo,(torsion<0.0)?"@":"@@");
2468     success = true;
2469     }
2470 tim 746 //if (b->GetHvyValence() == 3) strcat(stereo,"H");
2471    
2472     //re-zero psuedo-coords
2473     if (is2D)
2474 gezelter 1081 {
2475 tim 746 vector3 v;
2476     OBAtom *atom;
2477     vector<OBNodeBase*>::iterator k;
2478     for (atom = mol->BeginAtom(k);atom;atom = mol->NextAtom(k))
2479 gezelter 1081 {
2480 tim 746 v = atom->GetVector();
2481     v.SetZ(0.0);
2482     atom->SetVector(v);
2483 gezelter 1081 }
2484     }
2485 tim 746
2486 gezelter 1081 return(success);
2487     }
2488     //********************************************************
2489     class FIXFormat : public OBFormat
2490     {
2491     public:
2492 tim 746 //Register this format type ID
2493     FIXFormat()
2494     {
2495 gezelter 1081 OBConversion::RegisterFormat("fix",this);
2496 tim 746 }
2497    
2498     virtual const char* Description() //required
2499     {
2500 gezelter 1081 return
2501     "SMILES FIX format\n \
2502 tim 746 No comments yet\n \
2503     ";
2504     };
2505    
2506 gezelter 1081 virtual const char* SpecificationURL(){return "";}; //optional
2507 tim 746
2508     //Flags() can return be any the following combined by | or be omitted if none apply
2509     // NOTREADABLE READONEONLY NOTWRITABLE WRITEONEONLY
2510     virtual unsigned int Flags()
2511     {
2512 gezelter 1081 return NOTREADABLE;
2513 tim 746 };
2514    
2515     ////////////////////////////////////////////////////
2516     /// The "API" interface functions
2517     virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
2518    
2519     ////////////////////////////////////////////////////
2520     /// The "Convert" interface functions
2521     virtual bool WriteChemObject(OBConversion* pConv)
2522     {
2523 gezelter 1081 //Retrieve the target OBMol
2524     OBBase* pOb = pConv->GetChemObject();
2525     OBMol* pmol = dynamic_cast<OBMol*> (pOb);
2526     bool ret=false;
2527     if(pmol)
2528     ret=WriteMolecule(pmol,pConv);
2529     delete pOb;
2530     return ret;
2531 tim 746 };
2532 gezelter 1081 };
2533 tim 746
2534 gezelter 1081 //Make an instance of the format class
2535     FIXFormat theFIXFormat;
2536 tim 746
2537 gezelter 1081 /////////////////////////////////////////////////////////////////
2538 tim 746
2539 gezelter 1081 bool FIXFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
2540     {
2541 tim 746 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
2542     if(pmol==NULL)
2543 gezelter 1081 return false;
2544 tim 746
2545     //Define some references so we can use the old parameter names
2546     ostream &ofs = *pConv->GetOutStream();
2547     OBMol &mol = *pmol;
2548    
2549     char buffer[BUFF_SIZE];
2550     OBMol2Smi m2s;
2551    
2552     // This is a hack to prevent recursion problems.
2553     // we still need to fix the underlying problem -GRH
2554     if (mol.NumAtoms() > 1000)
2555 gezelter 1081 {
2556 tim 746 #ifdef HAVE_SSTREAM
2557 gezelter 1081 stringstream errorMsg;
2558 tim 746 #else
2559 gezelter 1081 strstream errorMsg;
2560 tim 746 #endif
2561 gezelter 1081 errorMsg << "SMILES Conversion failed: Molecule is too large to convert. Open Babel is currently limited to 1000 atoms." << endl;
2562     errorMsg << " Molecule size: " << mol.NumAtoms() << " atoms " << endl;
2563     obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo);
2564     return(false);
2565     }
2566 tim 746
2567     m2s.Init();
2568     //m2s.AssignCisTrans(mol);
2569     m2s.CorrectAromaticAmineCharge(mol);
2570     m2s.CreateSmiString(mol,buffer);
2571    
2572     OBAtom *atom;
2573     vector<int>::iterator i;
2574     vector<int> order = m2s.GetOutputOrder();
2575     ofs << buffer << endl;
2576    
2577     int j;
2578     for (j = 0;j < mol.NumConformers();j++)
2579 gezelter 1081 {
2580 tim 746 mol.SetConformer(j);
2581     for (i = order.begin();i != order.end();i++)
2582 gezelter 1081 {
2583 tim 746 atom = mol.GetAtom(*i);
2584     sprintf(buffer,"%9.3f %9.3f %9.3f",atom->GetX(),atom->GetY(),atom->GetZ());
2585     ofs << buffer<< endl;
2586 gezelter 1081 }
2587     }
2588 tim 746 return(true);
2589 gezelter 1081 }
2590 tim 746
2591 gezelter 1081 OBSmiNode::OBSmiNode(OBAtom *atom)
2592     {
2593 tim 746 _atom = atom;
2594     _parent = NULL;
2595     _nextnode.clear();
2596     _nextbond.clear();
2597 gezelter 1081 }
2598 tim 746
2599 gezelter 1081 void OBSmiNode::SetNextNode(OBSmiNode *node,OBBond *bond)
2600     {
2601 tim 746 _nextnode.push_back(node);
2602     _nextbond.push_back(bond);
2603 gezelter 1081 }
2604 tim 746
2605 gezelter 1081 OBSmiNode::~OBSmiNode()
2606     {
2607 tim 746 vector<OBSmiNode*>::iterator i;
2608     for (i = _nextnode.begin();i != _nextnode.end();i++)
2609 gezelter 1081 delete (*i);
2610     }
2611 tim 746
2612    
2613 gezelter 1081 bool WriteTheSmiles(OBMol & mol,char *out)
2614     {
2615 tim 746 char buffer[2*BUFF_SIZE];
2616    
2617     OBMol2Smi m2s;
2618    
2619     m2s.Init();
2620     m2s.CorrectAromaticAmineCharge(mol);
2621     m2s.CreateSmiString(mol,buffer);
2622    
2623     strcpy(out,buffer);
2624     return(true);
2625    
2626 gezelter 1081 }
2627 tim 746 }