ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/openbabel/smilesformat.cpp
Revision: 1081
Committed: Thu Oct 19 20:49:05 2006 UTC (18 years, 6 months ago) by gezelter
File size: 71511 byte(s)
Log Message:
updated OpenBabel to version 2.0.2

File Contents

# Content
1 /**********************************************************************
2 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc.
3 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison
4 Some portions Copyright (C) 2004 by Chris Morley
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation version 2 of the License.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14 ***********************************************************************/
15 //Contains SMIFormat and FIXFormat classes
16 // TODO: Rewrite. Use std::string in place of char * to avoid buffer overflow
17 // use std::string::reserve (or different allocator) to avoid resize slowdown
18
19 #include "mol.hpp"
20 #include "obconversion.hpp"
21 #include "obmolecformat.hpp"
22
23 using namespace std;
24
25 namespace OpenBabel
26 {
27 class SMIFormat : public OBMoleculeFormat
28 {
29 public:
30 //Register this format type ID
31 SMIFormat()
32 {
33 OBConversion::RegisterFormat("smi",this, "chemical/x-daylight-smiles");
34 OBConversion::RegisterOptionParam("n", this);
35 OBConversion::RegisterOptionParam("t", this);
36 }
37
38 virtual const char* GetMIMEType()
39 { return "chemical/x-daylight-smiles"; };
40
41 ////////////////////////////////////////////////////
42 /// The "API" interface functions
43 virtual bool ReadMolecule(OBBase* pOb, OBConversion* pConv);
44 virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
45
46 ///////////////////////////////////////////////////////
47
48 virtual const char* Description()
49 {
50 return
51 "SMILES format\n \
52 A linear text format which can describe the connectivity\n \
53 and chirality of a molecule\n \
54 Write Options e.g. -xt\n \
55 -n no molecule name\n \
56 -t molecule name only\n \
57 -r radicals lower case eg ethyl is Cc\n\n";
58 };
59
60 virtual unsigned int Flags() { return DEFAULTFORMAT;};
61 virtual const char* TargetClassDescription(){return OBMol::ClassDescription();};
62
63 virtual const char* SpecificationURL()
64 {return "http://www.daylight.com/smiles/f_smiles.html";};
65
66 virtual int SkipObjects(int n, OBConversion* pConv)
67 {
68 if(n==0) return 1; //already points after current line
69 string temp;
70 istream& ifs = *pConv->GetInStream();
71 int i;
72 for(i=0;i<n && ifs.good();i++)
73 getline(ifs, temp);
74 return ifs.good() ? 1 : -1;
75 };
76 };
77
78 //Make an instance of the format class
79 SMIFormat theSMIFormat;
80
81 //////////////////////////////////////////////////////////////////
82 class OBSmiNode
83 {
84 OBAtom *_atom,*_parent;
85 std::vector<OBSmiNode*> _nextnode;
86 std::vector<OBBond*> _nextbond;
87 public:
88 OBSmiNode(OBAtom *atom);
89 ~OBSmiNode();
90 int Size()
91 {
92 return((_nextnode.empty())?0:_nextnode.size());
93 }
94 void SetParent(OBAtom *a)
95 {
96 _parent = a;
97 }
98 void SetNextNode(OBSmiNode*,OBBond*);
99 OBAtom *GetAtom()
100 {
101 return(_atom);
102 }
103 OBAtom *GetParent()
104 {
105 return(_parent);
106 }
107 OBAtom *GetNextAtom(int i)
108 {
109 return(_nextnode[i]->GetAtom());
110 }
111 OBBond *GetNextBond(int i)
112 {
113 return(_nextbond[i]);
114 }
115 OBSmiNode *GetNextNode(int i)
116 {
117 return(_nextnode[i]);
118 }
119 };
120
121 class OBMol2Smi
122 {
123 std::vector<int> _atmorder;
124 std::vector<int> _storder;
125 std::vector<bool> _aromNH;
126 OBBitVec _uatoms,_ubonds;
127 std::vector<OBEdgeBase*> _vclose;
128 std::vector<std::pair<OBAtom*,std::pair<int,int> > > _vopen;
129 OBConversion* _pconv;
130 public:
131 OBMol2Smi()
132 {
133 _vclose.clear();
134 }
135 ~OBMol2Smi()
136 {}
137 int GetUnusedIndex();
138 void Init(OBConversion* pconv=NULL);
139 void CreateSmiString(OBMol&,char*);
140 void GetClosureAtoms(OBAtom*,std::vector<OBNodeBase*>&);
141 void FindClosureBonds(OBMol&);
142 void ToSmilesString(OBSmiNode *node,char *buffer);
143 void RemoveUsedClosures();
144 void AssignCisTrans(OBSmiNode*);
145 bool BuildTree(OBSmiNode*);
146 bool GetSmilesElement(OBSmiNode*,char*);
147 bool GetChiralStereo(OBSmiNode*,char*);
148 void CorrectAromaticAmineCharge(OBMol&);
149 std::vector<std::pair<int,OBBond*> > GetClosureDigits(OBAtom*);
150 std::vector<int> &GetOutputOrder()
151 {
152 return(_atmorder);
153 }
154 };
155
156 bool WriteTheSmiles(OBMol & mol,char *out);
157
158 /////////////////////////////////////////////////////////////////
159 class OBSmilesParser
160 {
161 int _bondflags;
162 int _order;
163 int _prev;
164 char *_ptr;
165 vector<int> _vprev;
166 vector<vector<int> > _rclose;
167 vector<vector<int> > _extbond;
168 vector<int> _path;
169 vector<bool> _avisit;
170 vector<bool> _bvisit;
171 char _buffer[BUFF_SIZE];
172 bool chiralWatch; // set when a chiral atom is read
173 map<OBAtom*,OBChiralData*> _mapcd; // map of ChiralAtoms and their data
174 public:
175
176 OBSmilesParser() { }
177 ~OBSmilesParser() { }
178
179 bool SmiToMol(OBMol&,string&);
180 bool ParseSmiles(OBMol&);
181 bool ParseSimple(OBMol&);
182 bool ParseComplex(OBMol&);
183 bool ParseRingBond(OBMol&);
184 bool ParseExternalBond(OBMol&);
185 bool CapExternalBonds(OBMol &mol);
186 void FindAromaticBonds(OBMol &mol,OBAtom*,int);
187 void FindAromaticBonds(OBMol&);
188 void FindOrphanAromaticAtoms(OBMol &mol); //CM 18 Sept 2003
189 };
190
191 /////////////////////////////////////////////////////////////////
192 bool SMIFormat::ReadMolecule(OBBase* pOb, OBConversion* pConv)
193 {
194 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
195
196 //Define some references so we can use the old parameter names
197 istream &ifs = *pConv->GetInStream();
198 OBMol &mol = *pmol;
199 const char* title = pConv->GetTitle();
200
201 //Taken unchanged from ReadSmiles
202 char buffer[BUFF_SIZE];
203
204 if (!ifs.getline(buffer,BUFF_SIZE))
205 return(false);
206 vector<string> vs;
207 tokenize(vs,buffer);
208
209 // RWT 10/3/2000
210 //
211 // added the following to allow spaces in compound names (titles).
212 // Essentially everything after the first space on a SMILES file line
213 // is treated as the name.
214 // Also had to change the condition a few lines below from:
215 // if (vs.size() == 2) ... to
216 // if (vs.size() >= 2)
217
218 if (vs.size() > 2)
219 {
220 for (unsigned int i=2;i<vs.size(); i++)
221 {
222 vs[1]=vs[1]+" "+vs[i];
223 }
224 }
225
226 if (vs.empty())
227 return false;
228 mol.SetDimension(0);
229
230 if (vs.size() >= 2)
231 mol.SetTitle(vs[1].c_str());
232 else
233 mol.SetTitle(title);
234
235 OBSmilesParser sp;
236 return sp.SmiToMol(mol,vs[0]);
237 }
238
239 //////////////////////////////////////////////////
240 bool SMIFormat::WriteMolecule(OBBase* pOb,OBConversion* pConv)
241 {
242 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
243
244 //Define some references so we can use the old parameter names
245 ostream &ofs = *pConv->GetOutStream();
246 OBMol &mol = *pmol;
247
248 if(pConv->IsOption("t")) //Title only option
249 {
250 ofs << mol.GetTitle() <<endl;
251 return true;
252 }
253 char buffer[BUFF_SIZE];
254 *buffer='\0'; //empty buffer
255
256 // This is a hack to prevent recursion problems.
257 // we still need to fix the underlying problem (mainly chiral centers) -GRH
258 if (mol.NumAtoms() > 1000)
259 {
260 #ifdef HAVE_SSTREAM
261 stringstream errorMsg;
262 #else
263 strstream errorMsg;
264 #endif
265 errorMsg << "SMILES Conversion failed: Molecule is too large to convert. Open Babel is currently limited to 1000 atoms." << endl;
266 errorMsg << " Molecule size: " << mol.NumAtoms() << " atoms " << endl;
267 obErrorLog.ThrowError(__func__, errorMsg.str(), obWarning);
268 return(false);
269 }
270
271 if(mol.NumAtoms()!=0)
272 {
273 OBMol2Smi m2s;
274 m2s.Init(pConv);
275 m2s.CorrectAromaticAmineCharge(mol);
276 m2s.CreateSmiString(mol,buffer);
277 }
278
279 ofs << buffer ;
280 if(!pConv->IsOption("n"))
281 ofs << '\t' << mol.GetTitle();
282 ofs << endl;
283
284 return true;
285 }
286
287 //////////////////////////////////////////////
288
289 //CM not needed extern OBAromaticTyper aromtyper;
290 //OBAtomTyper atomtyperx; //CM
291
292 bool OBSmilesParser::SmiToMol(OBMol &mol,string &s)
293 {
294 strncpy(_buffer,s.c_str(), BUFF_SIZE);
295 _buffer[BUFF_SIZE - 1] = '\0';
296
297 _vprev.clear();
298 _rclose.clear();
299 _prev=0;
300 chiralWatch=false;
301
302 if (!ParseSmiles(mol))
303 {
304 mol.Clear();
305 return(false);
306 }
307
308 // SMILES always sets formal charges -- don't throw them away
309 mol.SetAutomaticFormalCharge(false);
310
311 return(true);
312 }
313
314 bool OBSmilesParser::ParseSmiles(OBMol &mol)
315 {
316 mol.BeginModify();
317
318 for (_ptr=_buffer;*_ptr;_ptr++)
319 {
320 if (isspace(*_ptr))
321 continue;
322 else if (isdigit(*_ptr) || *_ptr == '%') //ring open/close
323 {
324 if (!ParseRingBond(mol))
325 return false;
326 continue;
327 }
328 else if(*_ptr == '&') //external bond
329 {
330 ParseExternalBond(mol);
331 continue;
332 }
333 else
334 switch(*_ptr)
335 {
336 case '.':
337 _prev=0;
338 break;
339 case '(':
340 _vprev.push_back(_prev);
341 break;
342 case ')':
343 if(_vprev.empty()) //CM
344 return false;
345 _prev = _vprev.back();
346 _vprev.pop_back();
347 break;
348 case '[':
349 if (!ParseComplex(mol))
350 {
351 mol.EndModify();
352 mol.Clear();
353 return(false);
354 }
355 break;
356 case '-':
357 _order = 1;
358 break;
359 case '=':
360 _order = 2;
361 break;
362 case '#':
363 _order = 3;
364 break;
365 case ':':
366 _order = 5;
367 break;
368 case '/':
369 _bondflags |= OB_TORUP_BOND;
370 break;
371 case '\\':
372 _bondflags |= OB_TORDOWN_BOND;
373 break;
374 default:
375 if (!ParseSimple(mol))
376 {
377 mol.EndModify();
378 mol.Clear();
379 return(false);
380 }
381 } // end switch
382 } // end for _ptr
383
384 // place dummy atoms for each unfilled external bond
385 if(!_extbond.empty())
386 CapExternalBonds(mol);
387
388 //set aromatic bond orders
389 mol.SetAromaticPerceived();
390 FindAromaticBonds(mol);
391 FindOrphanAromaticAtoms(mol);// CM 18 Sept 2003
392 mol.AssignSpinMultiplicity();
393 mol.UnsetAromaticPerceived();
394
395 mol.EndModify();
396
397 //NE add the OBChiralData stored inside the _mapcd to the atoms now after end
398 // modify so they don't get lost.
399 if(_mapcd.size()>0)
400 {
401 OBAtom* atom;
402 OBChiralData* cd;
403 map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
404 for(ChiralSearch=_mapcd.begin();ChiralSearch!=_mapcd.end();ChiralSearch++)
405 {
406 atom=ChiralSearch->first;
407 cd=ChiralSearch->second;
408 atom->SetData(cd);
409 }
410 }
411
412 return(true);
413 }
414
415 // CM 18 Sept 2003
416 void OBSmilesParser::FindOrphanAromaticAtoms(OBMol &mol)
417 {
418 //Facilitates the use lower case shorthand for radical entry
419 //Atoms which are marked as aromatic but have no aromatic bonds
420 //are taken to be radical centres
421 OBAtom *atom;
422 vector<OBNodeBase*>::iterator j;
423
424 for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
425 if(atom->IsAromatic())
426 {
427 if(atom->CountBondsOfOrder(5)<2) //bonds order 5 set in FindAromaticBonds()
428 //not proper aromatic atoms - could be conjugated chain or radical centre
429 atom->UnsetAromatic();
430 else
431 {
432 //recognized as aromatic, so are not radicals
433 atom->SetSpinMultiplicity(0);
434 }
435 }
436 }
437
438 void OBSmilesParser::FindAromaticBonds(OBMol &mol)
439 {
440 _path.clear();
441 _avisit.clear();
442 _bvisit.clear();
443 _avisit.resize(mol.NumAtoms()+1);
444 _bvisit.resize(mol.NumBonds());
445 _path.resize(mol.NumAtoms()+1);
446
447 OBBond *bond;
448 vector<OBEdgeBase*>::iterator i;
449 for (bond = mol.BeginBond(i);bond;bond = mol.NextBond(i))
450 if (!bond->GetBeginAtom()->IsAromatic() ||
451 !bond->GetEndAtom()->IsAromatic())
452 _bvisit[bond->GetIdx()] = true;
453
454 OBAtom *atom;
455 vector<OBNodeBase*>::iterator j;
456
457 for (atom = mol.BeginAtom(j);atom;atom = mol.NextAtom(j))
458 if(!_avisit[atom->GetIdx()] && atom->IsAromatic())
459 FindAromaticBonds(mol,atom,0);
460 }
461
462 void OBSmilesParser::FindAromaticBonds(OBMol &mol,OBAtom *atom,int depth )
463 {
464 OBBond *bond;
465 vector<OBEdgeBase*>::iterator k;
466
467 if (_avisit[atom->GetIdx()])
468 {
469 int j = depth-1;
470 bond=mol.GetBond(_path[j--]);
471 bond->SetBO(5);
472 while( j >= 0 )
473 {
474 bond=mol.GetBond(_path[j--]);
475 bond->SetBO(5);
476 if(bond->GetBeginAtom() == atom || bond->GetEndAtom() == atom)
477 break;
478 }
479 }
480 else
481 {
482 _avisit[atom->GetIdx()] = true;
483 for(bond = atom->BeginBond(k);bond;bond = atom->NextBond(k))
484 if( !_bvisit[bond->GetIdx()])
485 {
486 _path[depth] = bond->GetIdx();
487 _bvisit[bond->GetIdx()] = true;
488 FindAromaticBonds(mol,bond->GetNbrAtom(atom),depth+1);
489 }
490 }
491 }
492
493
494 bool OBSmilesParser::ParseSimple(OBMol &mol)
495 {
496 char symbol[3];
497 int element;
498 bool arom=false;
499 memset(symbol,'\0',sizeof(char)*3);
500
501 if (isupper(*_ptr))
502 switch(*_ptr)
503 {
504 case 'C':
505 _ptr++;
506 if (*_ptr == 'l')
507 {
508 strcpy(symbol,"Cl");
509 element = 17;
510 }
511 else
512 {
513 symbol[0] = 'C';
514 element = 6;
515 _ptr--;
516 }
517 break;
518
519 case 'N':
520 element = 7;
521 symbol[0] = 'N';
522 break;
523 case 'O':
524 element = 8;
525 symbol[0] = 'O';
526 break;
527 case 'S':
528 element = 16;
529 symbol[0] = 'S';
530 break;
531 case 'P':
532 element = 15;
533 symbol[0] = 'P';
534 break;
535 case 'F':
536 element = 9;
537 symbol[0] = 'F';
538 break;
539 case 'I':
540 element = 53;
541 symbol[0] = 'I';
542 break;
543
544 case 'B':
545 _ptr++;
546 if (*_ptr == 'r')
547 {
548 element = 35;
549 strcpy(symbol,"Br");
550 }
551 else
552 {
553 element = 5;
554 symbol[0] = 'B';
555 _ptr--;
556 }
557 break;
558 default:
559 return(false);
560 }
561 else
562 {
563 arom = true;
564 switch(*_ptr)
565 {
566 case 'c':
567 element = 6;
568 symbol[0] = 'C';
569 break;
570 case 'n':
571 element = 7;
572 symbol[0] = 'N';
573 break;
574 case 'o':
575 element = 8;
576 symbol[0] = 'O';
577 break;
578 case 'p':
579 element = 15;
580 symbol[0] = 'P';
581 break;
582 case 's':
583 element = 16;
584 symbol[0] = 'S';
585 break;
586 case '*':
587 element = 0;
588 strcpy(symbol,"Du");
589 break;
590 default:
591 return(false);
592 }
593 }
594
595 OBAtom *atom = mol.NewAtom();
596 atom->SetAtomicNum(element);
597 atom->SetType(symbol);
598 if (arom)
599 {
600 atom->SetAromatic();
601 atom->SetSpinMultiplicity(2); // CM 18 Sept 2003
602 }
603
604 if (_prev) //need to add bond
605 {
606 /* CM 18 Sept 2003
607 An extension to the SMILES format has been added so that lower case c,n,o can
608 represent a radical centre: CcC is isopropyl radical;
609 and cccc... a carbon chain bonded by conjugated double bonds.
610 Fails sometimes when using c as both aromatic and as the extened form.
611 For benzyl radical C6H5CH2. c1ccccc1c is ok; c1cc(c)ccc1 fails.
612 Radical centres should not be involved in ring closure:
613 for cyclohexyl radical C1cCCCC1 is ok, c1CCCCC1 is not.
614
615 Implementation
616 Atoms c,n,o, etc initially added as a radical centre
617 unless _prev is a radical centre when both are made a normal atoms
618 connected by a double bond.
619 Since they are still marked as aromatic, FindAromaticBonds() will
620 replace the bonds by aromatic bonds if they are in a ring.
621 FindOrphanAromand removes the aromatic tag from the atoms not found in this way
622 and removes stray radical centres in .
623
624 To avoid difficulties in complex aromatics with 5 membered rings containing N and O,
625 the above scheme modified to prevent recognition of aromatic structures is not confused.
626 - the double bond is made single if it would exceed valence of atom (not all aromatics have conjugated bonds)
627 - the radical centre is removed on both atoms when forming a ring (in ParseRingBond())
628 and on the new atom if the valence of the prev atom is being exceeded.
629 Note that the bond orders made here are reassigned in aromatic structures in FindAromaticBonds()
630 */
631 if(arom)
632 {
633 OBAtom* prevatom = mol.GetAtom(_prev);
634
635 //Calculate available valency on prevatom
636 //This is far more difficult than it should be!
637 //Data not always updated during molecule constuction.
638 int val=0;
639 if(prevatom->IsCarbon())
640 val=4;
641 else if(prevatom->IsNitrogen())
642 val=3;
643 else if(prevatom->IsPhosphorus())
644 val=3;
645 else if(prevatom->IsOxygen())
646 val=2;
647 else if(prevatom->IsSulfur())
648 val=2;
649
650 /* int sumBO=0;
651 vector<OBEdgeBase*>::iterator itr;
652 OBBond* bond = prevatom->BeginBond(itr);
653 while(bond)
654 {
655 sumBO +=bond->GetBO();
656 bond=prevatom->NextBond(itr);
657 }
658 if(prevatom->BOSum() != sumBO)
659 cerr << "BOSum != sumBO" << endl;
660 */
661 int AvailableValence = val + prevatom->GetFormalCharge() - prevatom->BOSum();//sumBO;
662
663 if (prevatom->GetSpinMultiplicity())
664 {
665 prevatom->SetSpinMultiplicity(0);
666 atom->SetSpinMultiplicity(0);
667
668 //Make the new bond potentially aromatic, unless a double
669 // bond might be impossible given the valence
670 _order = AvailableValence>=2 ? 5 : 1 ;
671 }
672 else
673 if(AvailableValence<1) //Must be complex aromatic with O, N
674 atom->SetSpinMultiplicity(0); //radical centres not appropriate in complex aromatics
675 }
676 // CM end
677 mol.AddBond(_prev,mol.NumAtoms(),_order,_bondflags);
678
679 //NE iterate through and see if atom is bonded to chiral atom
680 map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
681 ChiralSearch=_mapcd.find(mol.GetAtom(_prev));
682 if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
683 {
684 (ChiralSearch->second)->AddAtomRef(mol.NumAtoms(), input);
685 // cout << "Line 650: Adding "<<mol.NumAtoms()<<" to "<<ChiralSearch->second<<endl;
686 }
687 }
688 //set values
689 _prev = mol.NumAtoms();
690 _order = 1;
691 _bondflags = 0;
692
693 return(true);
694 }
695
696 bool OBSmilesParser::ParseComplex(OBMol &mol)
697 {
698 char symbol[7];
699 int element=0;
700 int isotope=0;
701 int isoPtr=0;
702 bool arom=false;
703 memset(symbol,'\0',sizeof(char)*7);
704
705 _ptr++;
706
707 //grab isotope information
708 for (;*_ptr && isdigit(*_ptr) && (isoPtr <= 6); _ptr++)
709 {
710 symbol[isoPtr] = *_ptr;
711 isoPtr++;
712 }
713 if (isoPtr >= 6)
714 return false;
715 isotope = atoi(symbol);
716
717 //parse element data
718 if (isupper(*_ptr))
719 switch(*_ptr)
720 {
721 case 'C':
722 _ptr++;
723 switch(*_ptr)
724 {
725 case 'a':
726 element = 20;
727 strcpy(symbol,"Ca");
728 break;
729 case 'd':
730 element = 48;
731 strcpy(symbol,"Cd");
732 break;
733 case 'e':
734 element = 58;
735 strcpy(symbol,"Ce");
736 break;
737 case 'f':
738 element = 98;
739 strcpy(symbol,"Cf");
740 break;
741 case 'l':
742 element = 17;
743 strcpy(symbol,"Cl");
744 break;
745 case 'm':
746 element = 96;
747 strcpy(symbol,"Cm");
748 break;
749 case 'o':
750 element = 27;
751 strcpy(symbol,"Co");
752 break;
753 case 'r':
754 element = 24;
755 strcpy(symbol,"Cr");
756 break;
757 case 's':
758 element = 55;
759 strcpy(symbol,"Cs");
760 break;
761 case 'u':
762 element = 29;
763 strcpy(symbol,"Cu");
764 break;
765 default:
766 element = 6;
767 symbol[0] = 'C';
768 _ptr--;
769 }
770 break;
771
772 case 'N':
773 _ptr++;
774 switch(*_ptr)
775 {
776 case 'a':
777 element = 11;
778 strcpy(symbol,"Na");
779 break;
780 case 'b':
781 element = 41;
782 strcpy(symbol,"Nb");
783 break;
784 case 'd':
785 element = 60;
786 strcpy(symbol,"Nd");
787 break;
788 case 'e':
789 element = 10;
790 strcpy(symbol,"Ne");
791 break;
792 case 'i':
793 element = 28;
794 strcpy(symbol,"Ni");
795 break;
796 case 'o':
797 element = 102;
798 strcpy(symbol,"No");
799 break;
800 case 'p':
801 element = 93;
802 strcpy(symbol,"Np");
803 break;
804 default:
805 element = 7;
806 symbol[0] = 'N';
807 _ptr--;
808 }
809 break;
810
811 case('O'):
812 _ptr++;
813 if(*_ptr == 's')
814 {
815 element = 76;
816 strcpy(symbol,"Os");
817 }
818 else
819 {
820 element = 8;
821 symbol[0] = 'O';
822 _ptr--;
823 }
824 break;
825
826 case 'P':
827 _ptr++;
828 switch(*_ptr)
829 {
830 case 'a':
831 element = 91;
832 strcpy(symbol,"Pa");
833 break;
834 case 'b':
835 element = 82;
836 strcpy(symbol,"Pb");
837 break;
838 case 'd':
839 element = 46;
840 strcpy(symbol,"Pd");
841 break;
842 case 'm':
843 element = 61;
844 strcpy(symbol,"Pm");
845 break;
846 case 'o':
847 element = 84;
848 strcpy(symbol,"Po");
849 break;
850 case 'r':
851 element = 59;
852 strcpy(symbol,"Pr");
853 break;
854 case 't':
855 element = 78;
856 strcpy(symbol,"Pt");
857 break;
858 case 'u':
859 element = 94;
860 strcpy(symbol,"Pu");
861 break;
862 default:
863 element = 15;
864 symbol[0] = 'P';
865 _ptr--;
866 }
867 break;
868
869 case('S'):
870 _ptr++;
871 switch(*_ptr)
872 {
873 case 'b':
874 element = 51;
875 strcpy(symbol,"Sb");
876 break;
877 case 'c':
878 element = 21;
879 strcpy(symbol,"Sc");
880 break;
881 case 'e':
882 element = 34;
883 strcpy(symbol,"Se");
884 break;
885 case 'i':
886 element = 14;
887 strcpy(symbol,"Si");
888 break;
889 case 'm':
890 element = 62;
891 strcpy(symbol,"Sm");
892 break;
893 case 'n':
894 element = 50;
895 strcpy(symbol,"Sn");
896 break;
897 case 'r':
898 element = 38;
899 strcpy(symbol,"Sr");
900 break;
901 default:
902 element = 16;
903 symbol[0] = 'S';
904 _ptr--;
905 }
906 break;
907
908 case 'B':
909 _ptr++;
910 switch(*_ptr)
911 {
912 case 'a':
913 element = 56;
914 strcpy(symbol,"Ba");
915 break;
916 case 'e':
917 element = 4;
918 strcpy(symbol,"Be");
919 break;
920 case 'i':
921 element = 83;
922 strcpy(symbol,"Bi");
923 break;
924 case 'k':
925 element = 97;
926 strcpy(symbol,"Bk");
927 break;
928 case 'r':
929 element = 35;
930 strcpy(symbol,"Br");
931 break;
932 default:
933 element = 5;
934 symbol[0] = 'B';
935 _ptr--;
936 }
937 break;
938
939 case 'F':
940 _ptr++;
941 switch(*_ptr)
942 {
943 case 'e':
944 element = 26;
945 strcpy(symbol,"Fe");
946 break;
947 case 'm':
948 element = 100;
949 strcpy(symbol,"Fm");
950 break;
951 case 'r':
952 element = 87;
953 strcpy(symbol,"Fr");
954 break;
955 default:
956 element = 9;
957 symbol[0] = 'F';
958 _ptr--;
959 }
960 break;
961
962 case 'I':
963 _ptr++;
964 switch(*_ptr)
965 {
966 case 'n':
967 element = 49;
968 strcpy(symbol,"In");
969 break;
970 case 'r':
971 element = 77;
972 strcpy(symbol,"Ir");
973 break;
974 default:
975 element = 53;
976 symbol[0] = 'I';
977 _ptr--;
978 }
979 break;
980
981 case 'A':
982 _ptr++;
983 switch(*_ptr)
984 {
985 case 'c':
986 element = 89;
987 strcpy(symbol,"Ac");
988 break;
989 case 'g':
990 element = 47;
991 strcpy(symbol,"Ag");
992 break;
993 case 'l':
994 element = 13;
995 strcpy(symbol,"Al");
996 break;
997 case 'm':
998 element = 95;
999 strcpy(symbol,"Am");
1000 break;
1001 case 'r':
1002 element = 18;
1003 strcpy(symbol,"Ar");
1004 break;
1005 case 's':
1006 element = 33;
1007 strcpy(symbol,"As");
1008 break;
1009 case 't':
1010 element = 85;
1011 strcpy(symbol,"At");
1012 break;
1013 case 'u':
1014 element = 79;
1015 strcpy(symbol,"Au");
1016 break;
1017 default:
1018 _ptr--;
1019 return(false);
1020 }
1021 break;
1022
1023 case 'D':
1024 _ptr++;
1025 if (*_ptr == 'y')
1026 {
1027 element = 66;
1028 strcpy(symbol,"Dy");
1029 }
1030 else
1031 {
1032 _ptr--;
1033 return(false);
1034 }
1035 break;
1036
1037 case 'E':
1038 _ptr++;
1039 switch(*_ptr)
1040 {
1041 case 'r':
1042 element = 68;
1043 strcpy(symbol,"Er");
1044 break;
1045 case 's':
1046 element = 99;
1047 strcpy(symbol,"Es");
1048 break;
1049 case 'u':
1050 element = 63;
1051 strcpy(symbol,"Eu");
1052 break;
1053 default:
1054 _ptr--;
1055 return(false);
1056 }
1057 break;
1058
1059 case 'G':
1060 _ptr++;
1061 switch (*_ptr)
1062 {
1063 case 'a':
1064 element = 31;
1065 strcpy(symbol,"Ga");
1066 break;
1067 case 'd':
1068 element = 64;
1069 strcpy(symbol,"Gd");
1070 break;
1071 case 'e':
1072 element = 32;
1073 strcpy(symbol,"Ge");
1074 break;
1075 default:
1076 _ptr--;
1077 return(false);
1078 }
1079 break;
1080
1081 case 'H':
1082 _ptr++;
1083 switch (*_ptr)
1084 {
1085 case 'e':
1086 element = 2;
1087 strcpy(symbol,"He");
1088 break;
1089 case 'f':
1090 element = 72;
1091 strcpy(symbol,"Hf");
1092 break;
1093 case 'g':
1094 element = 80;
1095 strcpy(symbol,"Hg");
1096 break;
1097 case 'o':
1098 element = 67;
1099 strcpy(symbol,"Ho");
1100 break;
1101 default:
1102 element = 1;
1103 symbol[0] = 'H';
1104 _ptr--;
1105 }
1106 break;
1107
1108 case 'K':
1109 _ptr++;
1110 if(*_ptr == 'r')
1111 {
1112 element = 36;
1113 strcpy(symbol,"Kr");
1114 }
1115 else
1116 {
1117 element = 19;
1118 symbol[0] = 'K';
1119 _ptr--;
1120 }
1121 break;
1122
1123 case 'L':
1124 _ptr++;
1125 switch(*_ptr)
1126 {
1127 case 'a':
1128 element = 57;
1129 strcpy(symbol,"La");
1130 break;
1131 case 'i':
1132 element = 3;
1133 strcpy(symbol,"Li");
1134 break;
1135 case 'r':
1136 element = 103;
1137 strcpy(symbol,"Lr");
1138 break;
1139 case 'u':
1140 element = 71;
1141 strcpy(symbol,"Lu");
1142 break;
1143 default:
1144 _ptr--;
1145 return(false);
1146 }
1147 break;
1148
1149 case 'M':
1150 _ptr++;
1151 switch(*_ptr)
1152 {
1153 case 'd':
1154 element = 101;
1155 strcpy(symbol,"Md");
1156 break;
1157 case 'g':
1158 element = 12;
1159 strcpy(symbol,"Mg");
1160 break;
1161 case 'n':
1162 element = 25;
1163 strcpy(symbol,"Mn");
1164 break;
1165 case 'o':
1166 element = 42;
1167 strcpy(symbol,"Mo");
1168 break;
1169 default:
1170 _ptr--;
1171 return(false);
1172 }
1173 break;
1174
1175 case 'R':
1176 _ptr++;
1177 switch(*_ptr)
1178 {
1179 case 'a':
1180 element = 88;
1181 strcpy(symbol,"Ra");
1182 break;
1183 case 'b':
1184 element = 37;
1185 strcpy(symbol,"Rb");
1186 break;
1187 case 'e':
1188 element = 75;
1189 strcpy(symbol,"Re");
1190 break;
1191 case 'h':
1192 element = 45;
1193 strcpy(symbol,"Rh");
1194 break;
1195 case 'n':
1196 element = 86;
1197 strcpy(symbol,"Rn");
1198 break;
1199 case 'u':
1200 element = 44;
1201 strcpy(symbol,"Ru");
1202 break;
1203 default:
1204 _ptr--;
1205 return(false);
1206 }
1207 break;
1208
1209 case 'T':
1210 _ptr++;
1211 switch(*_ptr)
1212 {
1213 case 'a':
1214 element = 73;
1215 strcpy(symbol,"Ta");
1216 break;
1217 case 'b':
1218 element = 65;
1219 strcpy(symbol,"Tb");
1220 break;
1221 case 'c':
1222 element = 43;
1223 strcpy(symbol,"Tc");
1224 break;
1225 case 'e':
1226 element = 52;
1227 strcpy(symbol,"Te");
1228 break;
1229 case 'h':
1230 element = 90;
1231 strcpy(symbol,"Th");
1232 break;
1233 case 'i':
1234 element = 22;
1235 strcpy(symbol,"Ti");
1236 break;
1237 case 'l':
1238 element = 81;
1239 strcpy(symbol,"Tl");
1240 break;
1241 case 'm':
1242 element = 69;
1243 strcpy(symbol,"Tm");
1244 break;
1245 default:
1246 _ptr--;
1247 return(false);
1248 }
1249 break;
1250
1251 case('U'): element = 92;
1252 symbol[0] = 'U';
1253 break;
1254 case('V'): element = 23;
1255 symbol[0] = 'V';
1256 break;
1257 case('W'): element = 74;
1258 symbol[0] = 'W';
1259 break;
1260
1261 case('X'):
1262 _ptr++;
1263 if (*_ptr == 'e')
1264 {
1265 element = 54;
1266 strcpy(symbol,"Xe");
1267 }
1268 else
1269 {
1270 _ptr--;
1271 return(false);
1272 }
1273 break;
1274
1275 case('Y'):
1276 _ptr++;
1277 if (*_ptr == 'b')
1278 {
1279 element = 70;
1280 strcpy(symbol,"Yb");
1281 }
1282 else
1283 {
1284 element = 39;
1285 symbol[0] = 'Y';
1286 _ptr--;
1287 }
1288 break;
1289
1290 case('Z'):
1291 _ptr++;
1292 switch(*_ptr)
1293 {
1294 case 'n':
1295 element = 30;
1296 strcpy(symbol,"Zn");
1297 break;
1298 case 'r':
1299 element = 40;
1300 strcpy(symbol,"Zr");
1301 break;
1302 default:
1303 _ptr--;
1304 return(false);
1305 }
1306 break;
1307 }
1308 else
1309 {
1310 arom = true;
1311 switch(*_ptr)
1312 {
1313 case 'c':
1314 element = 6;
1315 symbol[0] = 'C';
1316 break;
1317 case 'n':
1318 element = 7;
1319 symbol[0] = 'N';
1320 break;
1321 case 'o':
1322 element = 8;
1323 symbol[0] = 'O';
1324 break;
1325 case 'p':
1326 element = 15;
1327 symbol[0] = 'P';
1328 break;
1329 case 's':
1330 _ptr++;
1331 if (*_ptr == 'e')
1332 {
1333 element = 34;
1334 strcpy(symbol,"Se");
1335 }
1336 else
1337 {
1338 element = 16;
1339 symbol[0] = 'S';
1340 _ptr--;
1341 }
1342 break;
1343 case 'a':
1344 _ptr++;
1345 if (*_ptr == 's')
1346 {
1347 element = 33;
1348 strcpy(symbol,"As");
1349 }
1350 else
1351 return(false);
1352 break;
1353 default:
1354 return(false);
1355 }
1356 }
1357
1358 //handle hydrogen count, stereochemistry, and charge
1359
1360 OBAtom *atom = mol.NewAtom();
1361 int hcount = 0;
1362 int charge=0;
1363 int rad=0;
1364 char tmpc[2];
1365 tmpc[1] = '\0';
1366 for (_ptr++;*_ptr && *_ptr != ']';_ptr++)
1367 {
1368 switch(*_ptr)
1369 {
1370 case '@':
1371 _ptr++;
1372 chiralWatch=true;
1373 _mapcd[atom]= new OBChiralData;
1374 if (*_ptr == '@')
1375 atom->SetClockwiseStereo();
1376 else
1377 {
1378 atom->SetAntiClockwiseStereo();
1379 _ptr--;
1380 }
1381 break;
1382 case '-':
1383 _ptr++;
1384 if (isdigit(*_ptr))
1385 {
1386 tmpc[0] = *_ptr;
1387 charge = -atoi(tmpc);
1388 }
1389 else
1390 {
1391 charge--;
1392 _ptr--;
1393 }
1394 break;
1395 case '+':
1396 _ptr++;
1397 if (isdigit(*_ptr))
1398 {
1399 tmpc[0] = *_ptr;
1400 charge = atoi(tmpc);
1401 }
1402 else
1403 {
1404 charge++;
1405 _ptr--;
1406 }
1407 break;
1408
1409 case 'H':
1410 _ptr++;
1411 if (isdigit(*_ptr))
1412 {
1413 tmpc[0] = *_ptr;
1414 hcount = atoi(tmpc);
1415 }
1416 else
1417 {
1418 hcount = 1;
1419 _ptr--;
1420 }
1421 break;
1422 case '.': //CM Feb05
1423 rad=2;
1424 if(*(++_ptr)=='.')
1425 rad=3;
1426 else
1427 _ptr--;
1428 break;
1429
1430 default:
1431 return(false);
1432 }
1433 }
1434
1435 if (charge)
1436 atom->SetFormalCharge(charge);
1437 if (rad)
1438 atom->SetSpinMultiplicity(rad);
1439 atom->SetAtomicNum(element);
1440 atom->SetIsotope(isotope);
1441 atom->SetType(symbol);
1442 if (arom)
1443 atom->SetAromatic();
1444
1445 if (_prev) //need to add bond
1446 {
1447 mol.AddBond(_prev,mol.NumAtoms(),_order,_bondflags);
1448 if(chiralWatch) // if chiral atom need to add its previous into atom4ref
1449 {
1450 if (_mapcd[atom] == NULL)
1451 _mapcd[atom]= new OBChiralData;
1452
1453 (_mapcd[atom])->AddAtomRef((unsigned int)_prev,input);
1454 // cout <<"line 1405: Added atom ref "<<_prev<<" to "<<_mapcd[atom]<<endl;
1455 }
1456 map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
1457 ChiralSearch = _mapcd.find(mol.GetAtom(_prev));
1458 if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
1459 {
1460 (ChiralSearch->second)->AddAtomRef(mol.NumAtoms(), input);
1461 // cout <<"line 1431: Added atom ref "<<mol.NumAtoms()<<" to "<<ChiralSearch->second<<endl;
1462 }
1463 }
1464
1465 //set values
1466 _prev = mol.NumAtoms();
1467 _order = 1;
1468 _bondflags = 0;
1469
1470 //now add hydrogens
1471
1472 for (int i = 0;i < hcount;i++)
1473 {
1474 atom = mol.NewAtom();
1475 atom->SetAtomicNum(1);
1476 atom->SetType("H");
1477 mol.AddBond(_prev,mol.NumAtoms(),1);
1478 if(chiralWatch)
1479 {
1480 if (_mapcd[mol.GetAtom(_prev)] != NULL)
1481 (_mapcd[mol.GetAtom(_prev)])->AddAtomRef(mol.NumAtoms(),input);
1482 // cout << "line 1434: Added atom ref "<<mol.NumAtoms()<<" to "<<_mapcd[mol.GetAtom(_prev)]<<endl;
1483
1484 }
1485 }
1486 chiralWatch=false;
1487 return(true);
1488 }
1489
1490 bool OBSmilesParser::CapExternalBonds(OBMol &mol)
1491 {
1492
1493 if(_extbond.empty())
1494 return(true);
1495
1496 OBAtom *atom;
1497 vector<vector<int> >::iterator bond;
1498
1499 for(bond = _extbond.begin();bond != _extbond.end();bond++)
1500 {
1501 // create new dummy atom
1502 atom = mol.NewAtom();
1503 atom->SetAtomicNum(0);
1504 atom->SetType("*");
1505
1506 // bond dummy atom to mol via external bond
1507 mol.AddBond((*bond)[1],atom->GetIdx(),(*bond)[2],(*bond)[3]);
1508 OBBond *refbond = atom->GetBond(mol.GetAtom((*bond)[1]));
1509
1510 //record external bond information
1511 OBExternalBondData *xbd;
1512 if(mol.HasData(OBGenericDataType::ExternalBondData))
1513 xbd = (OBExternalBondData*)mol.GetData(OBGenericDataType::ExternalBondData);
1514 else
1515 {
1516 xbd = new OBExternalBondData;
1517 mol.SetData(xbd);
1518 }
1519 xbd->SetData(atom,refbond,(*bond)[0]);
1520
1521 /* old code written by AGS -- mts
1522 {
1523 externalbonds = (vector<pair<int,pair<OBAtom *,OBBond *> > > *)mol.GetData("extBonds");
1524 }
1525 else
1526 {
1527 externalbonds = new vector<pair<int,pair<OBAtom *,OBBond *> > >;
1528 }
1529
1530 //save data <external bond count, bond index>
1531 externalbonds->push_back(pair<int,pair<OBAtom *,OBBond *> > ((*bond)[0], pair<OBAtom *,OBBond *> (atom,mol.GetBond((*bond)[1],atom->GetIdx()))));
1532 mol.SetData("extBonds",externalbonds);
1533 */
1534
1535 //this data gets cleaned up in mol.Clear.
1536 }
1537
1538 return(true);
1539 }
1540
1541 bool OBSmilesParser::ParseExternalBond(OBMol &mol)
1542 {
1543 int digit;
1544 char str[10];
1545
1546 //*_ptr should == '&'
1547 _ptr++;
1548
1549 switch (*_ptr) // check for bond order indicators CC&=1.C&1
1550 {
1551 case '-':
1552 _order = 1;
1553 _ptr++;
1554 break;
1555 case '=':
1556 _order = 2;
1557 _ptr++;
1558 break;
1559 case '#':
1560 _order = 3;
1561 _ptr++;
1562 break;
1563 case ';':
1564 _order = 5;
1565 _ptr++;
1566 break;
1567 case '/': //chiral, but _order still == 1
1568 _bondflags |= OB_TORUP_BOND;
1569 _ptr++;
1570 break;
1571 _ptr++;
1572 case '\\': // chiral, but _order still == 1
1573 _bondflags |= OB_TORDOWN_BOND;
1574 _ptr++;
1575 break;
1576 default: // no bond indicator just leave order = 1
1577 break;
1578 }
1579
1580 if (*_ptr == '%') // external bond indicator > 10
1581 {
1582 _ptr++;
1583 str[0] = *_ptr;
1584 _ptr++;
1585 str[1] = *_ptr;
1586 str[2] = '\0';
1587 }
1588 else // simple single digit external bond indicator
1589 {
1590 str[0] = *_ptr;
1591 str[1] = '\0';
1592 }
1593 digit = atoi(str); // convert indicator to digit
1594
1595 //check for dot disconnect closures
1596 vector<vector<int> >::iterator j;
1597 int bondFlags,bondOrder;
1598 for(j = _extbond.begin();j != _extbond.end();j++)
1599 {
1600 if((*j)[0] == digit)
1601 {
1602 bondFlags = (_bondflags > (*j)[3]) ? _bondflags : (*j)[3];
1603 bondOrder = (_order > (*j)[2]) ? _order : (*j)[2];
1604 mol.AddBond((*j)[1],_prev,bondOrder,bondFlags);
1605
1606 // after adding a bond to atom "_prev"
1607 // search to see if atom is bonded to a chiral atom
1608 map<OBAtom*,OBChiralData*>::iterator ChiralSearch;
1609 ChiralSearch = _mapcd.find(mol.GetAtom(_prev));
1610 if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
1611 {
1612 (ChiralSearch->second)->AddAtomRef((*j)[1], input);
1613 // cout << "Added external "<<(*j)[1]<<" to "<<ChiralSearch->second<<endl;
1614 }
1615
1616 _extbond.erase(j);
1617 _bondflags = 0;
1618 _order = 0;
1619 return(true);
1620 }
1621 }
1622
1623 //since no closures save another ext bond
1624 vector<int> vtmp(4);
1625 vtmp[0] = digit;
1626 vtmp[1] = _prev;
1627 vtmp[2] = _order;
1628 vtmp[3] = _bondflags;
1629
1630 _extbond.push_back(vtmp);
1631 _order = 1;
1632 _bondflags = 0;
1633
1634 return(true);
1635
1636 }
1637
1638 bool OBSmilesParser::ParseRingBond(OBMol &mol)
1639 {
1640 int digit;
1641 char str[10];
1642
1643 if (*_ptr == '%')
1644 {
1645 _ptr++;
1646 str[0] = *_ptr;
1647 _ptr++;
1648 str[1] = *_ptr;
1649 str[2] = '\0';
1650 }
1651 else
1652 {
1653 str[0] = *_ptr;
1654 str[1] = '\0';
1655 }
1656 digit = atoi(str);
1657
1658 int bf,ord;
1659 vector<vector<int> >::iterator j;
1660 for (j = _rclose.begin();j != _rclose.end();j++)
1661 if ((*j)[0] == digit)
1662 {
1663 bf = (_bondflags > (*j)[3]) ? _bondflags : (*j)[3];
1664 ord = (_order > (*j)[2]) ? _order : (*j)[2];
1665 mol.AddBond((*j)[1],_prev,ord,bf,(*j)[4]);
1666
1667 // after adding a bond to atom "_prev"
1668 // search to see if atom is bonded to a chiral atom
1669 // need to check both _prev and (*j)[1] as closure is direction independant
1670 map<OBAtom*,OBChiralData*>::iterator ChiralSearch,cs2;
1671 ChiralSearch = _mapcd.find(mol.GetAtom(_prev));
1672 cs2=_mapcd.find(mol.GetAtom((*j)[1]));
1673 if (ChiralSearch!=_mapcd.end() && ChiralSearch->second != NULL)
1674 {
1675 (ChiralSearch->second)->AddAtomRef((*j)[1], input);
1676 //cout << "Added ring closure "<<(*j)[1]<<" to "<<ChiralSearch->second<<endl;
1677 }
1678 if (cs2!=_mapcd.end() && cs2->second != NULL)
1679 {
1680 (cs2->second)->AddAtomRef(_prev,input);
1681 // cout <<"Added ring opening "<<_prev<<" to "<<cs2->second<<endl;
1682 }
1683
1684 //CM ensure neither atoms in ring closure is a radical centre
1685 OBAtom* patom = mol.GetAtom(_prev);
1686 patom->SetSpinMultiplicity(0);
1687 patom = mol.GetAtom((*j)[1]);
1688 patom->SetSpinMultiplicity(0);
1689 //CM end
1690 _rclose.erase(j);
1691 _bondflags = 0;
1692 _order = 1;
1693 return(true);
1694 }
1695
1696 vector<int> vtmp(5);
1697 vtmp[0] = digit;
1698 vtmp[1] = _prev;
1699 vtmp[2] = _order;
1700 vtmp[3] = _bondflags;
1701 OBAtom* atom = mol.GetAtom(_prev);
1702 if(!atom)
1703 {
1704 obErrorLog.ThrowError(__func__,"Number not parsed correctly as a ring bond", obWarning);
1705 return false;
1706 }
1707
1708 vtmp[4] = atom->GetValence(); //store position to insert closure bond
1709 for (j = _rclose.begin();j != _rclose.end();j++) //correct for multiple closure bonds to a single atom
1710 if ((*j)[1] == _prev)
1711 vtmp[4]++;
1712
1713 _rclose.push_back(vtmp);
1714 _order = 1;
1715 _bondflags = 0;
1716
1717 return(true);
1718 }
1719
1720 static bool IsCisOrTransH(OBAtom *atom)
1721 {
1722 if (!atom->IsHydrogen())
1723 return false;
1724 else
1725 FOR_BONDS_OF_ATOM(bond, atom)
1726 {
1727 if (bond->IsUp() || bond->IsDown())
1728 return true;
1729 }
1730 return false;
1731 }
1732
1733 void OBMol2Smi::CreateSmiString(OBMol &mol,char *buffer)
1734 {
1735 OBAtom *atom;
1736 OBSmiNode *root =NULL;
1737 buffer[0] = '\0';
1738 vector<OBNodeBase*>::iterator i;
1739
1740 for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
1741 // don't use a hydrogen as the root node unless it's not bonded
1742 // or it's involved in a cis/trans '/' or '\' specification
1743 if ((!atom->IsHydrogen() || atom->GetValence() == 0 || IsCisOrTransH(atom))
1744 && !_uatoms[atom->GetIdx()])
1745 if (!atom->IsChiral() || !mol.HasNonZeroCoords())
1746 // don't use chiral root atoms unless this is from a SMILES
1747 {
1748 //clear out closures in case structure is dot disconnected
1749 _vclose.clear();
1750 _atmorder.clear();
1751 _storder.clear();
1752 _vopen.clear();
1753 //dot disconnected structure
1754 if (strlen(buffer) > 0)
1755 strcat(buffer,".");
1756 root = new OBSmiNode (atom);
1757 BuildTree(root);
1758 FindClosureBonds(mol);
1759 if (mol.Has2D())
1760 AssignCisTrans(root);
1761 ToSmilesString(root,buffer);
1762 delete root;
1763 }
1764
1765 //If no starting node found e.g. [H][H] CM 21Mar05
1766 if(root==NULL)
1767 {
1768 root = new OBSmiNode(mol.GetFirstAtom());
1769 BuildTree(root);
1770 ToSmilesString(root,buffer);
1771 delete root;
1772 }
1773 }
1774
1775 bool OBMol2Smi::BuildTree(OBSmiNode *node)
1776 {
1777 vector<OBEdgeBase*>::iterator i;
1778 OBAtom *nbr,*atom = node->GetAtom();
1779
1780 _uatoms.SetBitOn(atom->GetIdx()); //mark the atom as visited
1781 _atmorder.push_back(atom->GetIdx()); //store the atom ordering
1782 _storder.push_back(atom->GetIdx()); //store the atom ordering for stereo
1783
1784 for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
1785 {
1786 // Normally skip hydrogens
1787 // but D,T is explicit and so is H2
1788 // or an H atom involved in a cis/trans '/' or '\' bond spec
1789 if ( (!nbr->IsHydrogen() || nbr->GetIsotope() || atom->IsHydrogen() ||
1790 (((OBBond*)*i)->IsUp() || ((OBBond*)*i)->IsDown()) )
1791 && !_uatoms[nbr->GetIdx()])
1792 {
1793 _ubonds.SetBitOn((*i)->GetIdx());
1794 OBSmiNode *next = new OBSmiNode (nbr);
1795 next->SetParent(atom);
1796 node->SetNextNode(next,(OBBond*)*i);
1797 BuildTree(next);
1798 }
1799 }
1800
1801 return(true);
1802 }
1803
1804 void OBMol2Smi::ToSmilesString(OBSmiNode *node,char *buffer)
1805 {
1806 char tmpbuf[16];
1807 OBAtom *atom = node->GetAtom();
1808
1809 //write the current atom to the string
1810 GetSmilesElement(node,tmpbuf);
1811 strcat(buffer,tmpbuf);
1812
1813 //handle ring closures here
1814 vector<pair<int,OBBond*> > vc = GetClosureDigits(atom);
1815 if (!vc.empty())
1816 {
1817 vector<pair<int,OBBond*> >::iterator i;
1818 for (i = vc.begin();i != vc.end();i++)
1819 {
1820 if (i->second)
1821 {
1822 if (i->second->IsUp())
1823 {
1824 if ( (i->second->GetBeginAtom())->HasDoubleBond() ||
1825 (i->second->GetEndAtom())->HasDoubleBond() )
1826 strcat(buffer,"/");
1827 }
1828 if (i->second->IsDown())
1829 {
1830 if ( (i->second->GetBeginAtom())->HasDoubleBond() ||
1831 (i->second->GetEndAtom())->HasDoubleBond() )
1832 strcat(buffer,"\\");
1833 }
1834 #ifndef KEKULE
1835
1836 if (i->second->GetBO() == 2 && !i->second->IsAromatic())
1837 strcat(buffer,"=");
1838 #else
1839
1840 if (i->second->GetBO() == 2)
1841 strcat(buffer,"=");
1842 #endif
1843
1844 if (i->second->GetBO() == 3)
1845 strcat(buffer,"#");
1846 }
1847
1848 if (i->first > 9)
1849 strcat(buffer,"%");
1850 snprintf(tmpbuf,sizeof(tmpbuf), "%d",i->first);
1851 strcat(buffer,tmpbuf);
1852 }
1853 }
1854
1855 //follow path to child atoms
1856 OBBond *bond;
1857 for (int i = 0;i < node->Size();i++)
1858 {
1859 bond = node->GetNextBond(i);
1860 if (i+1 < node->Size())
1861 strcat(buffer,"(");
1862 if (bond->IsUp())
1863 {
1864 if ( (bond->GetBeginAtom())->HasDoubleBond() ||
1865 (bond->GetEndAtom())->HasDoubleBond() )
1866 strcat(buffer,"/");
1867 }
1868 if (bond->IsDown())
1869 {
1870 if ( (bond->GetBeginAtom())->HasDoubleBond() ||
1871 (bond->GetEndAtom())->HasDoubleBond() )
1872 strcat(buffer,"\\");
1873 }
1874 #ifndef KEKULE
1875
1876 if (bond->GetBO() == 2 && !bond->IsAromatic())
1877 strcat(buffer,"=");
1878 #else
1879
1880 if (bond->GetBO() == 2)
1881 strcat(buffer,"=");
1882 #endif
1883
1884 if (bond->GetBO() == 3)
1885 strcat(buffer,"#");
1886
1887 ToSmilesString(node->GetNextNode(i),buffer);
1888 if (i+1 < node->Size())
1889 strcat(buffer,")");
1890 }
1891 }
1892
1893 void OBMol2Smi::GetClosureAtoms(OBAtom *atom,vector<OBNodeBase*> &va)
1894 {
1895
1896 //look through closure list for start atom
1897 vector<OBEdgeBase*>::iterator i;
1898 for (i = _vclose.begin();i != _vclose.end();i++)
1899 if (*i)
1900 {
1901 if (((OBBond*)*i)->GetBeginAtom() == atom)
1902 va.push_back(((OBBond*)*i)->GetEndAtom());
1903 if (((OBBond*)*i)->GetEndAtom() == atom)
1904 va.push_back(((OBBond*)*i)->GetBeginAtom());
1905 }
1906
1907 OBAtom *nbr;
1908 vector<pair<OBAtom*,pair<int,int> > >::iterator j;
1909 for (j = _vopen.begin();j != _vopen.end();j++)
1910 for (nbr = atom->BeginNbrAtom(i);nbr;nbr = atom->NextNbrAtom(i))
1911 if (nbr == j->first)
1912 va.push_back(nbr);
1913 }
1914
1915 vector<pair<int,OBBond*> > OBMol2Smi::GetClosureDigits(OBAtom *atom)
1916 {
1917 vector<pair<int,OBBond*> > vc;
1918 vc.clear();
1919
1920 //look through closure list for start atom
1921 int idx,bo;
1922 OBBond *bond;
1923 vector<OBEdgeBase*>::iterator i;
1924 for (i = _vclose.begin();i != _vclose.end();i++)
1925 if ((bond=(OBBond*)*i))
1926 if (bond->GetBeginAtom() == atom || bond->GetEndAtom() == atom)
1927 {
1928 idx = GetUnusedIndex();
1929 vc.push_back(pair<int,OBBond*> (idx,bond));
1930 bo = (bond->IsAromatic())? 1 : bond->GetBO();
1931 _vopen.push_back(pair<OBAtom*,pair<int,int> >
1932 (bond->GetNbrAtom(atom),pair<int,int>(idx,bo)));
1933 *i = NULL;//remove bond from closure list
1934 }
1935
1936 //try to complete closures
1937 if (!_vopen.empty())
1938 {
1939 vector<pair<OBAtom*,pair<int,int> > >::iterator j;
1940 for (j = _vopen.begin();j != _vopen.end();)
1941 if (j->first == atom)
1942 {
1943 vc.push_back(pair<int,OBBond*> (j->second.first,(OBBond*)NULL));
1944 _vopen.erase(j);
1945 j = _vopen.begin();
1946 }
1947 else
1948 j++;
1949 }
1950
1951 return(vc);
1952 }
1953
1954 void OBMol2Smi::FindClosureBonds(OBMol &mol)
1955 {
1956 //find closure bonds
1957 OBAtom *a1,*a2;
1958 OBBond *bond;
1959 vector<OBEdgeBase*>::iterator i;
1960 OBBitVec bv;
1961 bv.FromVecInt(_storder);
1962
1963 for (bond = mol.BeginBond(i);bond;bond = mol.NextBond(i))
1964 if (!_ubonds[bond->GetIdx()] && bv[bond->GetBeginAtomIdx()])
1965 {
1966 a1 = bond->GetBeginAtom();
1967 a2 = bond->GetEndAtom();
1968 if (!a1->IsHydrogen() && !a2->IsHydrogen())
1969 _vclose.push_back(bond);
1970 }
1971
1972 vector<OBEdgeBase*>::reverse_iterator j;
1973 vector<int>::iterator k;
1974
1975 //modify _order to reflect ring closures
1976 for (j = _vclose.rbegin();j != _vclose.rend();j++)
1977 {
1978 bond = (OBBond*)*j;
1979 a1 = a2 = NULL;
1980
1981 for (k = _storder.begin();k != _storder.end();k++)
1982 if (bond->GetBeginAtomIdx() == static_cast<unsigned int>(*k) ||
1983 bond->
1984 GetEndAtomIdx() == static_cast<unsigned int>(*k))
1985 if (!a1) a1 = mol.GetAtom(*k);
1986 else if (!a2)
1987 {
1988 a2 = mol.GetAtom(*k)
1989 ;
1990 _storder.erase(k);
1991 break;
1992 }
1993
1994 for (k = _storder.begin()
1995 ;
1996 k != _storder.end();
1997 k++)
1998 if (a1->GetIdx()
1999 == static_cast<unsigned int>(*k))
2000 {
2001 k++;
2002 if (k != _storder.end())
2003 _storder.insert(k,a2->GetIdx());
2004 else
2005 _storder.push_back(a2->GetIdx());
2006 break;
2007 }
2008 }
2009 }
2010
2011 int OBMol2Smi::GetUnusedIndex()
2012 {
2013 int idx=1;
2014
2015 vector<pair<OBAtom*,pair<int,int> > >::iterator j;
2016 for (j = _vopen.begin();j != _vopen.end();)
2017 if (j->second.first == idx)
2018 {
2019 idx++; //increment idx and start over if digit is already used
2020 j = _vopen.begin();
2021 }
2022 else
2023 j++;
2024
2025 return(idx);
2026 }
2027
2028 void OBMol2Smi::CorrectAromaticAmineCharge(OBMol &mol)
2029 {
2030 OBAtom *atom;
2031 vector<OBNodeBase*>::iterator i;
2032
2033 _aromNH.clear();
2034 _aromNH.resize(mol.NumAtoms()+1);
2035
2036 for (atom = mol.BeginAtom(i);atom;atom = mol.NextAtom(i))
2037 if (atom->IsNitrogen() && atom->IsAromatic())
2038 if (atom->GetHvyValence() == 2)
2039 {
2040 if (atom->GetValence() == 3 || atom->GetImplicitValence() == 3)
2041 _aromNH[atom->GetIdx()] = true;
2042 }
2043 }
2044
2045 void OBMol2Smi::AssignCisTrans(OBSmiNode *node)
2046 {
2047 //traverse the tree searching for acyclic olefins - if it
2048 //has at least one heavy atom attachment on each end assign stereochem
2049
2050 OBBond *bond;
2051 for (int i = 0;i < node->Size();i++)
2052 {
2053 bond = node->GetNextBond(i);
2054 if (bond->GetBO() == 2 && !bond->IsInRing())
2055 {
2056 OBAtom *b = node->GetAtom();
2057 OBAtom *c = bond->GetNbrAtom(b);
2058
2059 //skip allenes
2060 if (b->GetHyb() == 1 || c->GetHyb() == 1)
2061 continue;
2062
2063 if (b->GetHvyValence() > 1 && c->GetHvyValence() > 1)
2064 {
2065 OBAtom *a,*d;
2066 vector<OBEdgeBase*>::iterator j,k;
2067
2068 //look for bond with assigned stereo as in poly-ene
2069 for (a = b->BeginNbrAtom(j);a;a = b->NextNbrAtom(j))
2070 if (((OBBond*)*j)->IsUp() ||((OBBond*)*j)->IsDown())
2071 break;
2072
2073 if (!a)
2074 for (a = b->BeginNbrAtom(j);a;a = b->NextNbrAtom(j))
2075 if (a != c && !a->IsHydrogen())
2076 break;
2077 for (d = c->BeginNbrAtom(k);d;d = c->NextNbrAtom(k))
2078 if (d != b && !d->IsHydrogen())
2079 break;
2080 // obAssert(a);
2081 // obAssert(d);
2082
2083 if (((OBBond*)*j)->IsUp() || ((OBBond*)*j)->IsDown()) //stereo already assigned
2084 {
2085 if (fabs(CalcTorsionAngle(a->GetVector(),b->GetVector(),
2086 c->GetVector(),d->GetVector())) > 10.0)
2087 if (((OBBond*)*j)->IsUp())
2088 ((OBBond*)*k)->SetUp();
2089 else
2090 ((OBBond*)*k)->SetDown();
2091 else
2092 if (((OBBond*)*j)->IsUp())
2093 ((OBBond*)*k)->SetDown();
2094 else
2095 ((OBBond*)*k)->SetUp();
2096 }
2097 else //assign stereo to both ends
2098 {
2099 ((OBBond*)*j)->SetUp();
2100 if (fabs(CalcTorsionAngle(a->GetVector(),b->GetVector(),
2101 c->GetVector(),d->GetVector())) > 10.0)
2102 ((OBBond*)*k)->SetUp();
2103 else
2104 ((OBBond*)*k)->SetDown();
2105 }
2106 }
2107 }
2108 AssignCisTrans(node->GetNextNode(i));
2109 }
2110 }
2111
2112 void OBMol2Smi::Init(OBConversion* pconv)
2113 {
2114 _vclose.clear();
2115 _atmorder.clear();
2116 _storder.clear();
2117 _aromNH.clear();
2118 _uatoms.Clear();
2119 _ubonds.Clear();
2120 _vopen.clear();
2121 _pconv = pconv;
2122 }
2123
2124 bool OBMol2Smi::GetSmilesElement(OBSmiNode *node,char *element)
2125 {
2126 //***handle reference atom stuff here and return***
2127 char symbol[16];
2128 bool bracketElement = false;
2129 bool normalValence = true;
2130
2131 OBAtom *atom = node->GetAtom();
2132
2133 int bosum = atom->KBOSum();
2134 atom->BOSum(); //CM temp
2135 switch (atom->GetAtomicNum())
2136 {
2137 case 0:
2138 break;
2139 case 5: /*bracketElement = !(normalValence = (bosum == 3)); break;*/
2140 break;
2141 case 6:
2142 break;
2143 case 7:
2144 if (atom->IsAromatic() && atom->GetHvyValence() == 2 && atom->GetImplicitValence() == 3)
2145 {
2146 bracketElement = !(normalValence = false);
2147 break;
2148 }
2149 else
2150 bracketElement = !(normalValence = (bosum == 3 || bosum == 5));
2151 break;
2152 case 8:
2153 break;
2154 case 9:
2155 break;
2156 case 15:
2157 break;
2158 case 16:
2159 bracketElement = !(normalValence = (bosum == 2 || bosum == 4 || bosum == 6));
2160 break;
2161 case 17:
2162 break;
2163 case 35:
2164 break;
2165 case 53:
2166 break;
2167
2168 default:
2169 bracketElement = true;
2170 }
2171
2172 if (atom->GetHvyValence() > 2 && atom->IsChiral())
2173 if (((OBMol*)atom->GetParent())->HasNonZeroCoords() || atom->HasChiralitySpecified())
2174 bracketElement = true;
2175
2176 if (atom->GetFormalCharge() != 0) //bracket charged elements
2177 bracketElement = true;
2178
2179 if(atom->GetIsotope()) //CM 19Mar05
2180 bracketElement = true;
2181
2182 //CM begin 18 Sept 2003
2183
2184 //This outputs form [CH3][CH3] rather than CC if -h option has been specified
2185 if (((OBMol*)atom->GetParent())->HasHydrogensAdded())
2186 bracketElement = true;
2187 else
2188 {
2189 if (atom->GetSpinMultiplicity())
2190 {
2191 //For radicals output bracket form anyway unless r option specified
2192 if(!(_pconv && _pconv->IsOption ("r")))
2193 bracketElement = true;
2194 }
2195 }
2196 //CM end
2197
2198 if (!bracketElement)
2199 {
2200 if (!atom->GetAtomicNum())
2201 {
2202 bool external = false;
2203 vector<pair<int,pair<OBAtom *,OBBond *> > > *externalBonds =
2204 (vector<pair<int,pair<OBAtom *,OBBond *> > > *)((OBMol*)atom->GetParent())->GetData("extBonds");
2205 vector<pair<int,pair<OBAtom *,OBBond *> > >::iterator externalBond;
2206
2207 if (externalBonds)
2208 for(externalBond = externalBonds->begin();externalBond != externalBonds->end();externalBond++)
2209 {
2210 if (externalBond->second.first == atom)
2211 {
2212 external = true;
2213 strcpy(symbol,"&");
2214 OBBond *bond = externalBond->second.second;
2215 if (bond->IsUp())
2216 {
2217 if ( (bond->GetBeginAtom())->HasDoubleBond() ||
2218 (bond->GetEndAtom())->HasDoubleBond() )
2219 strcat(symbol,"/");
2220 }
2221 if (bond->IsDown())
2222 {
2223 if ( (bond->GetBeginAtom())->HasDoubleBond() ||
2224 (bond->GetEndAtom())->HasDoubleBond() )
2225 strcat(symbol,"\\");
2226 }
2227 #ifndef KEKULE
2228
2229 if (bond->GetBO() == 2 && !bond->IsAromatic())
2230 strcat(symbol,"=");
2231 if (bond->GetBO() == 2 && bond->IsAromatic())
2232 strcat(symbol,";");
2233 #else
2234
2235 if (bond->GetBO() == 2)
2236 strcat(symbol,"=");
2237 #endif
2238
2239 if (bond->GetBO() == 3)
2240 strcat(symbol,"#");
2241 sprintf(symbol,"%s%d",symbol,externalBond->first);
2242 break;
2243 }
2244 }
2245
2246 if(!external)
2247 strcpy(symbol,"*");
2248 }
2249 else
2250 {
2251 strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum()));
2252 #ifndef KEKULE
2253 if (atom->IsAromatic())
2254 symbol[0] = tolower(symbol[0]);
2255 #endif
2256
2257 //Radical centres lc if r option set
2258 if(atom->GetSpinMultiplicity() && _pconv && _pconv->IsOption ("r"))
2259 symbol[0] = tolower(symbol[0]);
2260 }
2261 strcpy(element,symbol);
2262
2263 return(true);
2264 }
2265
2266 strcpy(element,"[");
2267 if(atom->GetIsotope()) //CM 19Mar05
2268 {
2269 char iso[4];
2270 sprintf(iso,"%d",atom->GetIsotope());
2271 strcat(element,iso);
2272 }
2273 if (!atom->GetAtomicNum())
2274 strcpy(symbol,"*");
2275 else
2276 {
2277 strcpy(symbol,etab.GetSymbol(atom->GetAtomicNum()));
2278 #ifndef KEKULE
2279 if (atom->IsAromatic())
2280 symbol[0] = tolower(symbol[0]);
2281 #endif
2282
2283 }
2284 strcat(element,symbol);
2285
2286 //if (atom->IsCarbon() && atom->GetHvyValence() > 2 && atom->IsChiral())
2287 if (atom->GetHvyValence() > 2 && atom->IsChiral())
2288 {
2289 char stereo[5];
2290 if (GetChiralStereo(node,stereo))
2291 strcat(element,stereo);
2292 }
2293
2294 //add extra hydrogens
2295 if (!atom->IsHydrogen())
2296 {
2297 int hcount = atom->ImplicitHydrogenCount() + atom->ExplicitHydrogenCount();
2298 if (hcount != 0)
2299 {
2300 strcat(element,"H");
2301 if (hcount > 1)
2302 {
2303 char tcount[10];
2304 sprintf(tcount,"%d", hcount);
2305 strcat(element,tcount);
2306 }
2307 }
2308 }
2309
2310 //cat charge on the end
2311 if (atom->GetFormalCharge() != 0)
2312 {
2313
2314 /*
2315 if (atom->ImplicitHydrogenCount())
2316 {
2317 cerr << "imp = " << atom->GetAtomicNum() << ' ' << atom->GetImplicitValence() << endl;
2318 strcat(element,"H");
2319 if (atom->ImplicitHydrogenCount() > 1)
2320 {
2321 char tcount[10];
2322 sprintf(tcount,"%d",atom->ImplicitHydrogenCount());
2323 strcat(element,tcount);
2324 }
2325 }
2326 */
2327 if (atom->GetFormalCharge() > 0)
2328 strcat(element,"+");
2329 else
2330 strcat(element,"-");
2331
2332 if (abs(atom->GetFormalCharge()) > 1)
2333 {
2334 char tcharge[10];
2335 sprintf(tcharge,"%d",abs(atom->GetFormalCharge()));
2336 strcat(element,tcharge);
2337 }
2338 }
2339
2340 strcat(element,"]");
2341
2342 return(true);
2343 }
2344
2345 bool OBMol2Smi::GetChiralStereo(OBSmiNode *node,char *stereo)
2346 {
2347 bool is2D=false;
2348 double torsion;
2349 OBAtom *a,*b,*c,*d,hydrogen;
2350
2351 b = node->GetAtom();
2352 OBMol *mol = (OBMol*)b->GetParent();
2353
2354 if (!mol->HasNonZeroCoords()) //must have come in from smiles string
2355 {
2356 if (!b->HasChiralitySpecified())
2357 return(false);
2358 if (b->IsClockwise())
2359 strcpy(stereo,"@@");
2360 else if (b->IsAntiClockwise())
2361 strcpy(stereo,"@");
2362 else
2363 return(false);
2364 //if (b->GetHvyValence() == 3) strcat(stereo,"H");
2365 return(true);
2366 }
2367
2368 //give peudo Z coords if mol is 2D
2369 if (!mol->Has3D())
2370 {
2371 vector3 v,vz(0.0,0.0,1.0);
2372 is2D = true;
2373 OBAtom *nbr;
2374 OBBond *bond;
2375 vector<OBEdgeBase*>::iterator i;
2376 for (bond = b->BeginBond(i);bond;bond = b->NextBond(i))
2377 {
2378 nbr = bond->GetEndAtom();
2379 if (nbr != b)
2380 {
2381 v = nbr->GetVector();
2382 if (bond->IsWedge())
2383 v += vz;
2384 else if (bond->IsHash())
2385 v -= vz;
2386
2387 nbr->SetVector(v);
2388 }
2389 else
2390 {
2391 nbr = bond->GetBeginAtom();
2392 v = nbr->GetVector();
2393 if (bond->IsWedge())
2394 v -= vz;
2395 else if (bond->IsHash())
2396 v += vz;
2397
2398 nbr->SetVector(v);
2399 }
2400 }
2401 }
2402
2403 c = d = NULL;
2404 a = node->GetParent();
2405 // obAssert(a); //chiral atom can't be used as root node - must have parent
2406
2407 if (b->GetHvyValence() == 3) //must have attached hydrogen
2408 {
2409 if (b->GetValence() == 4) //has explicit hydrogen
2410 {
2411 vector<OBEdgeBase*>::iterator i;
2412 for (c = b->BeginNbrAtom(i);c;c = b->NextNbrAtom(i))
2413 if (c->IsHydrogen())
2414 break;
2415 // obAssert(c);
2416 }
2417 else //implicit hydrogen
2418 {
2419 vector3 v;
2420 b->GetNewBondVector(v,1.0);
2421 hydrogen.SetVector(v);
2422 c = &hydrogen;
2423 }
2424 }
2425
2426 //get connected atoms in order
2427 OBAtom *nbr;
2428 vector<int>::iterator j;
2429
2430 //try to get neighbors that are closure atoms in the order they appear in the string
2431 vector<OBNodeBase*> va;
2432 GetClosureAtoms(b,va);
2433 if (!va.empty())
2434 {
2435 vector<OBNodeBase*>::iterator k;
2436 for (k = va.begin();k != va.end();k++)
2437 if (*k != a)
2438 {
2439 if (!c)
2440 c = (OBAtom*)*k;
2441 else if (!d)
2442 d = (OBAtom*)*k;
2443 }
2444 }
2445
2446 for (j = _storder.begin();j != _storder.end();j++)
2447 {
2448 nbr = mol->GetAtom(*j);
2449 if (!b->IsConnected(nbr))
2450 continue;
2451 if (nbr == a || nbr == b || nbr == c)
2452 continue;
2453 if (!c)
2454 c = nbr;
2455 else if (!d)
2456 d = nbr;
2457 }
2458
2459 torsion = CalcTorsionAngle(a->GetVector(),b->GetVector(),
2460 c->GetVector(),d->GetVector());
2461
2462 // if we have a 2D structure with marked unspecified chirality
2463 // we should leave it blank and return false
2464 bool success = false;
2465 if ( !(is2D && b->IsChiral() && !b->HasChiralitySpecified()) )
2466 {
2467 strcpy(stereo,(torsion<0.0)?"@":"@@");
2468 success = true;
2469 }
2470 //if (b->GetHvyValence() == 3) strcat(stereo,"H");
2471
2472 //re-zero psuedo-coords
2473 if (is2D)
2474 {
2475 vector3 v;
2476 OBAtom *atom;
2477 vector<OBNodeBase*>::iterator k;
2478 for (atom = mol->BeginAtom(k);atom;atom = mol->NextAtom(k))
2479 {
2480 v = atom->GetVector();
2481 v.SetZ(0.0);
2482 atom->SetVector(v);
2483 }
2484 }
2485
2486 return(success);
2487 }
2488 //********************************************************
2489 class FIXFormat : public OBFormat
2490 {
2491 public:
2492 //Register this format type ID
2493 FIXFormat()
2494 {
2495 OBConversion::RegisterFormat("fix",this);
2496 }
2497
2498 virtual const char* Description() //required
2499 {
2500 return
2501 "SMILES FIX format\n \
2502 No comments yet\n \
2503 ";
2504 };
2505
2506 virtual const char* SpecificationURL(){return "";}; //optional
2507
2508 //Flags() can return be any the following combined by | or be omitted if none apply
2509 // NOTREADABLE READONEONLY NOTWRITABLE WRITEONEONLY
2510 virtual unsigned int Flags()
2511 {
2512 return NOTREADABLE;
2513 };
2514
2515 ////////////////////////////////////////////////////
2516 /// The "API" interface functions
2517 virtual bool WriteMolecule(OBBase* pOb, OBConversion* pConv);
2518
2519 ////////////////////////////////////////////////////
2520 /// The "Convert" interface functions
2521 virtual bool WriteChemObject(OBConversion* pConv)
2522 {
2523 //Retrieve the target OBMol
2524 OBBase* pOb = pConv->GetChemObject();
2525 OBMol* pmol = dynamic_cast<OBMol*> (pOb);
2526 bool ret=false;
2527 if(pmol)
2528 ret=WriteMolecule(pmol,pConv);
2529 delete pOb;
2530 return ret;
2531 };
2532 };
2533
2534 //Make an instance of the format class
2535 FIXFormat theFIXFormat;
2536
2537 /////////////////////////////////////////////////////////////////
2538
2539 bool FIXFormat::WriteMolecule(OBBase* pOb, OBConversion* pConv)
2540 {
2541 OBMol* pmol = dynamic_cast<OBMol*>(pOb);
2542 if(pmol==NULL)
2543 return false;
2544
2545 //Define some references so we can use the old parameter names
2546 ostream &ofs = *pConv->GetOutStream();
2547 OBMol &mol = *pmol;
2548
2549 char buffer[BUFF_SIZE];
2550 OBMol2Smi m2s;
2551
2552 // This is a hack to prevent recursion problems.
2553 // we still need to fix the underlying problem -GRH
2554 if (mol.NumAtoms() > 1000)
2555 {
2556 #ifdef HAVE_SSTREAM
2557 stringstream errorMsg;
2558 #else
2559 strstream errorMsg;
2560 #endif
2561 errorMsg << "SMILES Conversion failed: Molecule is too large to convert. Open Babel is currently limited to 1000 atoms." << endl;
2562 errorMsg << " Molecule size: " << mol.NumAtoms() << " atoms " << endl;
2563 obErrorLog.ThrowError(__func__, errorMsg.str(), obInfo);
2564 return(false);
2565 }
2566
2567 m2s.Init();
2568 //m2s.AssignCisTrans(mol);
2569 m2s.CorrectAromaticAmineCharge(mol);
2570 m2s.CreateSmiString(mol,buffer);
2571
2572 OBAtom *atom;
2573 vector<int>::iterator i;
2574 vector<int> order = m2s.GetOutputOrder();
2575 ofs << buffer << endl;
2576
2577 int j;
2578 for (j = 0;j < mol.NumConformers();j++)
2579 {
2580 mol.SetConformer(j);
2581 for (i = order.begin();i != order.end();i++)
2582 {
2583 atom = mol.GetAtom(*i);
2584 sprintf(buffer,"%9.3f %9.3f %9.3f",atom->GetX(),atom->GetY(),atom->GetZ());
2585 ofs << buffer<< endl;
2586 }
2587 }
2588 return(true);
2589 }
2590
2591 OBSmiNode::OBSmiNode(OBAtom *atom)
2592 {
2593 _atom = atom;
2594 _parent = NULL;
2595 _nextnode.clear();
2596 _nextbond.clear();
2597 }
2598
2599 void OBSmiNode::SetNextNode(OBSmiNode *node,OBBond *bond)
2600 {
2601 _nextnode.push_back(node);
2602 _nextbond.push_back(bond);
2603 }
2604
2605 OBSmiNode::~OBSmiNode()
2606 {
2607 vector<OBSmiNode*>::iterator i;
2608 for (i = _nextnode.begin();i != _nextnode.end();i++)
2609 delete (*i);
2610 }
2611
2612
2613 bool WriteTheSmiles(OBMol & mol,char *out)
2614 {
2615 char buffer[2*BUFF_SIZE];
2616
2617 OBMol2Smi m2s;
2618
2619 m2s.Init();
2620 m2s.CorrectAromaticAmineCharge(mol);
2621 m2s.CreateSmiString(mol,buffer);
2622
2623 strcpy(out,buffer);
2624 return(true);
2625
2626 }
2627 }