1 |
gezelter |
1081 |
# |
2 |
|
|
# SMARTS Patterns for Functional Group Classification |
3 |
|
|
# |
4 |
|
|
# written by Christian Laggner |
5 |
|
|
# Copyright 2005 Inte:Ligand Software-Entwicklungs und Consulting GmbH |
6 |
|
|
# |
7 |
|
|
# Released under the Lesser General Public License (LGPL license) |
8 |
|
|
# see http://www.gnu.org/copyleft/lesser.html |
9 |
|
|
# Version 221105 |
10 |
|
|
##################################################################################################### |
11 |
|
|
|
12 |
|
|
# General Stuff: |
13 |
|
|
# These patters were written in an attempt to represent the classification of organic compounds |
14 |
|
|
# from the viewpoint of an organic chemist. |
15 |
|
|
# They are often very restrictive. This may be generally a good thing, but it also takes some time |
16 |
|
|
# for filtering/indexing large compound sets. |
17 |
|
|
# For filtering undesired groups (in druglike compounds) one will want to have more general patterns |
18 |
|
|
# (e.g. you don't want *any* halide of *any* acid, *neither* aldehyde *nor* formyl esters and amides, ...). |
19 |
|
|
# |
20 |
|
|
|
21 |
|
|
# Part I: Carbon |
22 |
|
|
# ============== |
23 |
|
|
|
24 |
|
|
|
25 |
|
|
# I.1: Carbon-Carbon Bonds |
26 |
|
|
# ------------------------ |
27 |
|
|
|
28 |
|
|
# I.1.1 Alkanes: |
29 |
|
|
|
30 |
|
|
Primary_carbon: [CX4H3][#6] |
31 |
|
|
|
32 |
|
|
Secondary_carbon: [CX4H2]([#6])[#6] |
33 |
|
|
|
34 |
|
|
Tertiary_carbon: [CX4H1]([#6])([#6])[#6] |
35 |
|
|
|
36 |
|
|
Quartary_carbon: [CX4]([#6])([#6])([#6])[#6] |
37 |
|
|
|
38 |
|
|
|
39 |
|
|
# I.1.2 C-C double and Tripple Bonds |
40 |
|
|
|
41 |
|
|
Alkene: [CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]=[CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])] |
42 |
|
|
# sp2 C may be substituted only by C or H - |
43 |
|
|
# does not hit ketenes and allenes, nor enamines, enols and the like |
44 |
|
|
|
45 |
|
|
Alkyne: [CX2]#[CX2] |
46 |
|
|
# non-carbon substituents (e.g. alkynol ethers) are rather rare, thus no further discrimination |
47 |
|
|
|
48 |
|
|
Allene: [CX3]=[CX2]=[CX3] |
49 |
|
|
|
50 |
|
|
|
51 |
|
|
# I.2: One Carbon-Hetero Bond |
52 |
|
|
# --------------------------- |
53 |
|
|
|
54 |
|
|
|
55 |
|
|
# I.2.1 Alkyl Halogenides |
56 |
|
|
|
57 |
|
|
Alkylchloride: [ClX1][CX4] |
58 |
|
|
# will also hit chloromethylethers and the like, but no chloroalkenes, -alkynes or -aromats |
59 |
|
|
# a more restrictive version can be obtained by modifying the Alcohol string. |
60 |
|
|
|
61 |
|
|
Alkylfluoride: [FX1][CX4] |
62 |
|
|
|
63 |
|
|
Alkylbromide: [BrX1][CX4] |
64 |
|
|
|
65 |
|
|
Alkyliodide: [IX1][CX4] |
66 |
|
|
|
67 |
|
|
|
68 |
|
|
# I.2.2 Alcohols and Ethers |
69 |
|
|
|
70 |
|
|
Alcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])] |
71 |
|
|
# nonspecific definition, no acetals, aminals, and the like |
72 |
|
|
|
73 |
|
|
Primary_alcohol: [OX2H][CX4H2;!$(C([OX2H])[O,S,#7,#15])] |
74 |
|
|
|
75 |
|
|
Secondary_alcohol: [OX2H][CX4H;!$(C([OX2H])[O,S,#7,#15])] |
76 |
|
|
|
77 |
|
|
Tertiary_alcohol: [OX2H][CX4D4;!$(C([OX2H])[O,S,#7,#15])] |
78 |
|
|
|
79 |
|
|
Dialkylether: [OX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])] |
80 |
|
|
# no acetals and the like; no enolethers |
81 |
|
|
|
82 |
|
|
Dialkylthioether: [SX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])] |
83 |
|
|
# no acetals and the like; no enolethers |
84 |
|
|
|
85 |
|
|
Alkylarylether: [OX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])] |
86 |
|
|
# no acetals and the like; no enolethers |
87 |
|
|
|
88 |
|
|
Diarylether: [c][OX2][c] |
89 |
|
|
|
90 |
|
|
Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])] |
91 |
|
|
|
92 |
|
|
Diarylthioether: [c][SX2][c] |
93 |
|
|
|
94 |
|
|
Oxonium: [O+;!$([O]~[!#6]);!$([S]*~[#7,#8,#15,#16])] |
95 |
|
|
# can't be aromatic, thus O and not #8 |
96 |
|
|
|
97 |
|
|
# I.2.3 Amines |
98 |
|
|
|
99 |
|
|
Amine: [NX3+0,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])] |
100 |
|
|
# hits all amines (prim/sec/tert/quart), including ammonium salts, also enamines, but not amides, imides, aminals, ... |
101 |
|
|
|
102 |
|
|
# the following amines include also the protonated forms |
103 |
|
|
|
104 |
|
|
Primary_aliph_amine: [NX3H2+0,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] |
105 |
|
|
|
106 |
|
|
Secondary_aliph_amine: [NX3H1+0,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] |
107 |
|
|
|
108 |
|
|
Tertiary_aliph_amine: [NX3H0+0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] |
109 |
|
|
|
110 |
|
|
Quartary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])] |
111 |
|
|
|
112 |
|
|
Primary_arom_amine: [NX3H2+0,NX4H3+]c |
113 |
|
|
|
114 |
|
|
Secondary_arom_amine: [NX3H1+0,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] |
115 |
|
|
|
116 |
|
|
Tertiary_arom_amine: [NX3H0+0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] |
117 |
|
|
|
118 |
|
|
Quartary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])] |
119 |
|
|
|
120 |
|
|
Secondary_mixed_amine: [NX3H1+0,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])] |
121 |
|
|
|
122 |
|
|
Tertiary_mixed_amine: [NX3H0+0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])] |
123 |
|
|
|
124 |
|
|
Quartary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])] |
125 |
|
|
|
126 |
|
|
Ammonium: [N+;!$([N]~[!#6]);!$(N=*);!$([N]*~[#7,#8,#15,#16])] |
127 |
|
|
# only C and H substituents allowed. Quartary or protonated amines |
128 |
|
|
# NX4+ or Nv4+ is not recognized by Daylight's depictmatch if less than four C are present |
129 |
|
|
|
130 |
|
|
|
131 |
|
|
# I.2.4 Others |
132 |
|
|
|
133 |
|
|
Alkylthiol: [SX2H][CX4;!$(C([SX2H])~[O,S,#7,#15])] |
134 |
|
|
|
135 |
|
|
Dialkylthioether: [SX2]([CX4;!$(C([SX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([SX2])[O,S,#7,#15])] |
136 |
|
|
|
137 |
|
|
Alkylarylthioether: [SX2](c)[CX4;!$(C([SX2])[O,S,#7,#15])] |
138 |
|
|
|
139 |
|
|
Disulfide: [SX2D2][SX2D2] |
140 |
|
|
|
141 |
|
|
1,2-Aminoalcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15,F,Cl,Br,I])][CX4;!$(C([N])[O,S,#7,#15])][NX3;!$(NC=[O,S,N])] |
142 |
|
|
# does not hit alpha-amino acids, enaminoalcohols, 1,2-aminoacetals, o-aminophenols, etc. |
143 |
|
|
|
144 |
|
|
1,2-Diol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])][CX4;!$(C([OX2H])[O,S,#7,#15])][OX2H] |
145 |
|
|
# does not hit alpha-hydroxy acids, enolalcohols, 1,2-hydroxyacetals, 1,2-diphenols, etc. |
146 |
|
|
|
147 |
|
|
1,1-Diol: [OX2H][CX4;!$(C([OX2H])([OX2H])[O,S,#7,#15])][OX2H] |
148 |
|
|
|
149 |
|
|
Hydroperoxide: [OX2H][OX2] |
150 |
|
|
#does not neccessarily have to be connected to a carbon atom, includes also hydrotrioxides |
151 |
|
|
|
152 |
|
|
Peroxo: [OX2D2][OX2D2] |
153 |
|
|
|
154 |
|
|
Organolithium_compounds: [LiX1][#6,#14] |
155 |
|
|
|
156 |
|
|
Organomagnesium_compounds: [MgX2][#6,#14] |
157 |
|
|
# not restricted to Grignard compounds, also dialkyl Mg |
158 |
|
|
|
159 |
|
|
Organometallic_compounds: [!#1;!#5;!#6;!#7;!#8;!#9;!#14;!#15;!#16;!#17;!#33;!#34;!#35;!#52;!#53;!#85]~[#6;!-] |
160 |
|
|
# very general, includes all metals covalently bound to carbon |
161 |
|
|
|
162 |
|
|
|
163 |
|
|
# I.3: Two Carbon-Hetero Bonds (Carbonyl and Derivatives) |
164 |
|
|
# ---------------------------- |
165 |
|
|
|
166 |
|
|
# I.3.1 Double Bond to Hetero |
167 |
|
|
|
168 |
|
|
Aldehyde: [$([CX3H][#6]),$([CX3H2])]=[OX1] |
169 |
|
|
# hits aldehydes including formaldehyde |
170 |
|
|
|
171 |
|
|
Ketone: [#6][CX3](=[OX1])[#6] |
172 |
|
|
# does not include oxo-groups connected to a (hetero-) aromatic ring |
173 |
|
|
|
174 |
|
|
Thioaldehyde: [$([CX3H][#6]),$([CX3H2])]=[SX1] |
175 |
|
|
|
176 |
|
|
Thioketone: [#6][CX3](=[SX1])[#6] |
177 |
|
|
# does not include thioxo-groups connected to a (hetero-) aromatic ring |
178 |
|
|
|
179 |
|
|
Imine: [NX2;$([N][#6]),$([NH]);!$([N][CX3]=[#7,#8,#15,#16])]=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])] |
180 |
|
|
# nitrogen is not part of an amidelike strukture, nor of an aromatic ring, but can be part of an aminal or similar |
181 |
|
|
|
182 |
|
|
Immonium: [NX3+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])] |
183 |
|
|
|
184 |
|
|
Oxime: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2H] |
185 |
|
|
|
186 |
|
|
Oximether: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2][#6;!$(C=[#7,#8])] |
187 |
|
|
# ether, not ester or amide; does not hit isoxazole |
188 |
|
|
|
189 |
|
|
|
190 |
|
|
# I.3.2. Two Single Bonds to Hetero |
191 |
|
|
|
192 |
|
|
Acetal: [OX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])] |
193 |
|
|
# does not hit hydroxy-methylesters, ketenacetals, hemiacetals, orthoesters, etc. |
194 |
|
|
|
195 |
|
|
Hemiacetal: [OX2H][CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])] |
196 |
|
|
|
197 |
|
|
Aminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][NX3v3;!$(NC=[#7,#8,#15,#16])][#6] |
198 |
|
|
# Ns are not part of an amide or similar. v3 ist to exclude nitro and similar groups |
199 |
|
|
|
200 |
|
|
Hemiaminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][OX2H] |
201 |
|
|
|
202 |
|
|
Thioacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][SX2][#6;!$(C=[O,S,N])] |
203 |
|
|
|
204 |
|
|
Thiohemiacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][OX2H] |
205 |
|
|
|
206 |
|
|
Halogen_acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1] |
207 |
|
|
# hits chloromethylenethers and other reactive alkylating agents |
208 |
|
|
|
209 |
|
|
Acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] |
210 |
|
|
# includes all of the above and other combinations (S-C-N, hydrates, ...), but still no aminomethylenesters and similar |
211 |
|
|
|
212 |
|
|
Halogenmethylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1] |
213 |
|
|
# also reactive alkylating agents. Acid does not have to be carboxylic acid, also S- and P-based acids allowed |
214 |
|
|
|
215 |
|
|
NOS_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] |
216 |
|
|
# Same as above, but N,O or S instead of halogen. Ester/amide allowed only on one side |
217 |
|
|
|
218 |
|
|
Hetero_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])] |
219 |
|
|
# Combination of the last two patterns |
220 |
|
|
|
221 |
|
|
Cyanhydrine: [NX1]#[CX2][CX4;$([CH2]),$([CH]([CX2])[#6]),$(C([CX2])([#6])[#6])][OX2H] |
222 |
|
|
|
223 |
|
|
|
224 |
|
|
# I.3.3 Single Bond to Hetero, C=C Double Bond (Enols and Similar) |
225 |
|
|
|
226 |
|
|
Chloroalkene: [ClX1][CX3]=[CX3] |
227 |
|
|
|
228 |
|
|
Fluoroalkene: [FX1][CX3]=[CX3] |
229 |
|
|
|
230 |
|
|
Bromoalkene: [BrX1][CX3]=[CX3] |
231 |
|
|
|
232 |
|
|
Iodoalkene: [IX1][CX3]=[CX3] |
233 |
|
|
|
234 |
|
|
Enol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3] |
235 |
|
|
# no phenols |
236 |
|
|
|
237 |
|
|
Endiol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3;$([H1]),$(C[#6])][OX2H] |
238 |
|
|
# no 1,2-diphenols, ketenacetals, ... |
239 |
|
|
|
240 |
|
|
Enolether: [OX2]([#6;!$(C=[N,O,S])])[CX3;$([H0][#6]),$([H1])]=[CX3] |
241 |
|
|
# finds also endiodiethers, but not enolesters, no aromats |
242 |
|
|
|
243 |
|
|
Enolester: [OX2]([CX3]=[OX1])[#6X3;$([#6][#6]),$([H1])]=[#6X3;!$(C[OX2H])] |
244 |
|
|
|
245 |
|
|
|
246 |
|
|
Enamine: [NX3;$([NH2][CX3]),$([NH1]([CX3])[#6]),$([N]([CX3])([#6])[#6]);!$([N]*=[#7,#8,#15,#16])][CX3;$([CH]),$([C][#6])]=[CX3] |
247 |
|
|
# does not hit amines attached to aromatic rings, nor may the nitrogen be aromatic |
248 |
|
|
|
249 |
|
|
Thioenol: [SX2H][CX3;$([H1]),$(C[#6])]=[CX3] |
250 |
|
|
|
251 |
|
|
Thioenolether: [SX2]([#6;!$(C=[N,O,S])])[CX3;$(C[#6]),$([CH])]=[CX3] |
252 |
|
|
|
253 |
|
|
|
254 |
|
|
# I.4: Three Carbon-Hetero Bonds (Carboxyl and Derivatives) |
255 |
|
|
# ------------------------------ |
256 |
|
|
|
257 |
|
|
Acylchloride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[ClX1] |
258 |
|
|
|
259 |
|
|
Acylfluoride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1] |
260 |
|
|
|
261 |
|
|
Acylbromide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[BrX1] |
262 |
|
|
|
263 |
|
|
Acyliodide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[IX1] |
264 |
|
|
|
265 |
|
|
Acylhalide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1,ClX1,BrX1,IX1] |
266 |
|
|
# all of the above |
267 |
|
|
|
268 |
|
|
|
269 |
|
|
# The following contains all simple carboxylic combinations of O, N, S, & Hal - |
270 |
|
|
# - acids, esters, amides, ... as well as a few extra cases (anhydride, hydrazide...) |
271 |
|
|
# Cyclic structures (including aromats) like lactones, lactames, ... got their own |
272 |
|
|
# definitions. Structures where both heteroatoms are part of an aromatic ring |
273 |
|
|
# (oxazoles, imidazoles, ...) were excluded. |
274 |
|
|
|
275 |
|
|
Carboxylic_acid: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[$([OX2H]),$([OX1-])] |
276 |
|
|
# includes carboxylate anions |
277 |
|
|
|
278 |
|
|
Carboxylic_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][#6;!$(C=[O,N,S])] |
279 |
|
|
# does not hit anhydrides or lactones |
280 |
|
|
|
281 |
|
|
Lactone: [#6][#6X3R](=[OX1])[#8X2][#6;!$(C=[O,N,S])] |
282 |
|
|
# may also be aromatic |
283 |
|
|
|
284 |
|
|
Carboxylic_anhydride: [CX3;$([H0][#6]),$([H1])](=[OX1])[#8X2][CX3;$([H0][#6]),$([H1])](=[OX1]) |
285 |
|
|
# anhydride formed by two carboxylic acids, no mixed anhydrides (e.g. between carboxylic acid and sulfuric acid); may be part of a ring, even aromatic |
286 |
|
|
|
287 |
|
|
Carboxylic_acid_derivative: [$([#6X3H0][#6]),$([#6X3H])](=[!#6])[!#6] |
288 |
|
|
# includes most of the structures of I.4 and many more, also 1,3-heteroaromatics such as isoxazole |
289 |
|
|
|
290 |
|
|
Carbothioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[OX1])[$([SX2H]),$([SX1-])]),$([C](=[SX1])[$([OX2H]),$([OX1-])])] |
291 |
|
|
# hits both tautomeric forms, as well as anions |
292 |
|
|
|
293 |
|
|
Carbothioic_S_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[SX2][#6;!$(C=[O,N,S])] |
294 |
|
|
|
295 |
|
|
Carbothioic_S_lactone: [#6][#6X3R](=[OX1])[#16X2][#6;!$(C=[O,N,S])] |
296 |
|
|
# may also be aromatic |
297 |
|
|
|
298 |
|
|
Carbothioic_O_ester: [CX3;$([H0][#6]),$([H1])](=[SX1])[OX2][#6;!$(C=[O,N,S])] |
299 |
|
|
|
300 |
|
|
Carbothioic_O_lactone: [#6][#6X3R](=[SX1])[#8X2][#6;!$(C=[O,N,S])] |
301 |
|
|
|
302 |
|
|
Carbothioic_halide: [CX3;$([H0][#6]),$([H1])](=[SX1])[FX1,ClX1,BrX1,IX1] |
303 |
|
|
|
304 |
|
|
Carbodithioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2H])] |
305 |
|
|
|
306 |
|
|
Carbodithioic_ester: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2][#6;!$(C=[O,N,S])])] |
307 |
|
|
|
308 |
|
|
Carbodithiolactone: [#6][#6X3R](=[SX1])[#16X2][#6;!$(C=[O,N,S])] |
309 |
|
|
|
310 |
|
|
|
311 |
|
|
Amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
312 |
|
|
# does not hit lactames |
313 |
|
|
|
314 |
|
|
Primary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[NX3H2] |
315 |
|
|
|
316 |
|
|
Secondary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])] |
317 |
|
|
|
318 |
|
|
Tertiary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])] |
319 |
|
|
|
320 |
|
|
Lactam: [#6R][#6X3R](=[OX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
321 |
|
|
# cyclic amides, may also be aromatic |
322 |
|
|
|
323 |
|
|
Alkyl_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([#6])[#6X3;$([H0][#6]),$([H1])](=[OX1]) |
324 |
|
|
# may be part of a ring, even aromatic. only C allowed at central N. May also be triacyl amide |
325 |
|
|
|
326 |
|
|
N_hetero_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([!#6])[#6X3;$([H0][#6]),$([H1])](=[OX1]) |
327 |
|
|
# everything else than H or C at central N |
328 |
|
|
|
329 |
|
|
Imide_acidic: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H1][#6X3;$([H0][#6]),$([H1])](=[OX1]) |
330 |
|
|
# can be deprotonated |
331 |
|
|
|
332 |
|
|
Thioamide: [$([CX3;!R][#6]),$([CX3H;!R])](=[SX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
333 |
|
|
# does not hit thiolactames |
334 |
|
|
|
335 |
|
|
Thiolactam: [#6R][#6X3R](=[SX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
336 |
|
|
# cyclic thioamides, may also be aromatic |
337 |
|
|
|
338 |
|
|
|
339 |
|
|
Oximester: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#8X2][#7X2]=,:[#6X3;$([H0]([#6])[#6]),$([H1][#6]),$([H2])] |
340 |
|
|
# may also be part of a ring / aromatic |
341 |
|
|
|
342 |
|
|
Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH]),$([C][#6])]=[NX2;!$(NC=[O,S])] |
343 |
|
|
# only basic amidines, not as part of aromatic ring (e.g. imidazole) |
344 |
|
|
|
345 |
|
|
Hydroxamic_acid: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][$([OX2H]),$([OX1-])] |
346 |
|
|
|
347 |
|
|
Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])] |
348 |
|
|
# does not hit anhydrides of carboxylic acids withs hydroxamic acids |
349 |
|
|
|
350 |
|
|
|
351 |
|
|
Imidoacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] |
352 |
|
|
# not cyclic |
353 |
|
|
|
354 |
|
|
Imidoacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] |
355 |
|
|
# the enamide-form of lactames. may be aromatic like 2-hydroxypyridine |
356 |
|
|
|
357 |
|
|
Imidoester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])] |
358 |
|
|
# esters of the above structures. no anhydrides. |
359 |
|
|
|
360 |
|
|
Imidolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])] |
361 |
|
|
# no oxazoles and similar |
362 |
|
|
|
363 |
|
|
Imidothioacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])] |
364 |
|
|
# not cyclic |
365 |
|
|
|
366 |
|
|
Imidothioacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])] |
367 |
|
|
# the enamide-form of thiolactames. may be aromatic like 2-thiopyridine |
368 |
|
|
|
369 |
|
|
Imidothioester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])] |
370 |
|
|
# thioesters of the above structures. no anhydrides. |
371 |
|
|
|
372 |
|
|
Imidothiolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])] |
373 |
|
|
# no thioxazoles and similar |
374 |
|
|
|
375 |
|
|
Amidine: [#7X3v3;!$(N([#6X3]=[#7X2])C=[O,S])][CX3R0;$([H1]),$([H0][#6])]=[NX2v3;!$(N(=[#6X3][#7X3])C=[O,S])] |
376 |
|
|
# only basic amidines, not substituted by carbonyl or thiocarbonyl, not as part of a ring |
377 |
|
|
|
378 |
|
|
Imidolactam: [#6][#6X3R;$([H0](=[NX2;!$(N(=[#6X3][#7X3])C=[O,S])])[#7X3;!$(N([#6X3]=[#7X2])C=[O,S])]),$([H0](-[NX3;!$(N([#6X3]=[#7X2])C=[O,S])])=,:[#7X2;!$(N(=[#6X3][#7X3])C=[O,S])])] |
379 |
|
|
# one of the two C~N bonds is part of a ring (may be aromatic), but not both - thus no imidazole |
380 |
|
|
|
381 |
|
|
Imidoylhalide: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1] |
382 |
|
|
# not cyclic |
383 |
|
|
|
384 |
|
|
Imidoylhalide_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1] |
385 |
|
|
# may also be aromatic |
386 |
|
|
|
387 |
|
|
# may be ring, aromatic, substituted with carbonyls, hetero, ... |
388 |
|
|
# (everything else would get too complicated) |
389 |
|
|
|
390 |
|
|
Amidrazone: [$([$([#6X3][#6]),$([#6X3H])](=[#7X2v3])[#7X3v3][#7X3v3]),$([$([#6X3][#6]),$([#6X3H])]([#7X3v3])=[#7X2v3][#7X3v3])] |
391 |
|
|
# hits both tautomers. as above, it may be ring, aromatic, substituted with carbonyls, hetero, ... |
392 |
|
|
|
393 |
|
|
|
394 |
|
|
Alpha_aminoacid: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[OX2H,OX1-] |
395 |
|
|
# N may be alkylated, but not part of an amide (as in peptides), ionic forms are included |
396 |
|
|
# includes also non-natural aminoacids with double-bonded or two aliph./arom. substituents at alpha-C |
397 |
|
|
# N may not be aromatic as in 1H-pyrrole-2-carboxylic acid |
398 |
|
|
|
399 |
|
|
Alpha_hydroxyacid: [OX2H][C][CX3](=[OX1])[OX2H,OX1-] |
400 |
|
|
|
401 |
|
|
Peptide_middle: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])] |
402 |
|
|
# finds peptidic structures which are neither C- nor N-terminal. Both neighbours must be amino-acids/peptides |
403 |
|
|
|
404 |
|
|
Peptide_C_term: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[OX2H,OX1-] |
405 |
|
|
# finds C-terminal amino acids |
406 |
|
|
|
407 |
|
|
Peptide_N_term: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])] |
408 |
|
|
# finds N-terminal amino acids. As above, N may be substituted, but not part of an amide-bond. |
409 |
|
|
|
410 |
|
|
|
411 |
|
|
Carboxylic_orthoester: [#6][OX2][CX4;$(C[#6]),$([CH])]([OX2][#6])[OX2][#6] |
412 |
|
|
# hits also anhydride like struktures (e. g. HC(OMe)2-OC=O residues) |
413 |
|
|
|
414 |
|
|
Ketene: [CX3]=[CX2]=[OX1] |
415 |
|
|
|
416 |
|
|
Ketenacetal: [#7X2,#8X3,#16X2;$(*[#6,#14])][#6X3]([#7X2,#8X3,#16X2;$(*[#6,#14])])=[#6X3] |
417 |
|
|
# includes aminals, silylacetals, ketenesters, etc. C=C DB is not aromatic, everything else may be |
418 |
|
|
|
419 |
|
|
Nitrile: [NX1]#[CX2] |
420 |
|
|
# includes cyanhydrines |
421 |
|
|
|
422 |
|
|
Isonitrile: [CX1-]#[NX2+] |
423 |
|
|
|
424 |
|
|
|
425 |
|
|
Vinylogous_carbonyl_or_carboxyl_derivative: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7,#8,#16,F,Cl,Br,I] |
426 |
|
|
# may be part of a ring, even aromatic |
427 |
|
|
|
428 |
|
|
Vinylogous_acid: [#6X3](=[OX1])[#6X3]=,:[#6X3][$([OX2H]),$([OX1-])] |
429 |
|
|
|
430 |
|
|
Vinylogous_ester: [#6X3](=[OX1])[#6X3]=,:[#6X3][#6;!$(C=[O,N,S])] |
431 |
|
|
|
432 |
|
|
Vinylogous_amide: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
433 |
|
|
|
434 |
|
|
Vinylogous_halide: [#6X3](=[OX1])[#6X3]=,:[#6X3][FX1,ClX1,BrX1,IX1] |
435 |
|
|
|
436 |
|
|
|
437 |
|
|
|
438 |
|
|
# I.5: Four Carbon-Hetero Bonds (Carbonic Acid and Derivatives) |
439 |
|
|
# ----------------------------- |
440 |
|
|
|
441 |
|
|
Carbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[OX1])[#8X2][#6;!$(C=[O,N,S])] |
442 |
|
|
# may be part of a ring, even aromatic |
443 |
|
|
|
444 |
|
|
Carbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[OX2][FX1,ClX1,BrX1,IX1] |
445 |
|
|
|
446 |
|
|
Carbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[$([OX2H]),$([OX1-])] |
447 |
|
|
# unstable |
448 |
|
|
|
449 |
|
|
Carbonic_acid_derivatives: [!#6][#6X3](=[!#6])[!#6] |
450 |
|
|
|
451 |
|
|
|
452 |
|
|
Thiocarbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[SX1])[#8X2][#6;!$(C=[O,N,S])] |
453 |
|
|
# may be part of a ring, even aromatic |
454 |
|
|
|
455 |
|
|
Thiocarbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[OX2][FX1,ClX1,BrX1,IX1] |
456 |
|
|
|
457 |
|
|
Thiocarbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[$([OX2H]),$([OX1-])] |
458 |
|
|
|
459 |
|
|
|
460 |
|
|
Urea:[#7X3;!$([#7][!#6])][#6X3](=[OX1])[#7X3;!$([#7][!#6])] |
461 |
|
|
# no check whether part of imide, biuret, etc. Aromatic structures are only hit if |
462 |
|
|
# both N share no double bonds, like in the dioxo-form of uracil |
463 |
|
|
|
464 |
|
|
Thiourea: [#7X3;!$([#7][!#6])][#6X3](=[SX1])[#7X3;!$([#7][!#6])] |
465 |
|
|
|
466 |
|
|
Isourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#8X2&!$([#8][!#6]),OX1-])[#7X3;!$([#7][!#6])] |
467 |
|
|
# O may be substituted. no check whether further amide-like bonds are present. Aromatic |
468 |
|
|
# structures are only hit if single bonded N shares no additional double bond, like in |
469 |
|
|
# the 1-hydroxy-3-oxo form of uracil |
470 |
|
|
|
471 |
|
|
Isothiourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#16X2&!$([#16][!#6]),SX1-])[#7X3;!$([#7][!#6])] |
472 |
|
|
|
473 |
|
|
Guanidine: [N;v3X3,v4X4+][CX3](=[N;v3X2,v4X3+])[N;v3X3,v4X4+] |
474 |
|
|
# also hits guanidinium salts. v3 and v4 to avoid nitroamidines |
475 |
|
|
|
476 |
|
|
Carbaminic_acid: [NX3]C(=[OX1])[O;X2H,X1-] |
477 |
|
|
# quite unstable, unlikely to be found. Also hits salts |
478 |
|
|
|
479 |
|
|
Urethan: [#7X3][#6](=[OX1])[#8X2][#6] |
480 |
|
|
# also hits when part of a ring, no check whether the last C is part of carbonyl |
481 |
|
|
|
482 |
|
|
Biuret: [#7X3][#6](=[OX1])[#7X3][#6](=[OX1])[#7X3] |
483 |
|
|
|
484 |
|
|
Semicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1] |
485 |
|
|
|
486 |
|
|
Carbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[OX1] |
487 |
|
|
|
488 |
|
|
Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1] |
489 |
|
|
|
490 |
|
|
Carbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[OX1] |
491 |
|
|
|
492 |
|
|
Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1] |
493 |
|
|
|
494 |
|
|
Thiocarbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[SX1] |
495 |
|
|
|
496 |
|
|
Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1] |
497 |
|
|
|
498 |
|
|
Thiocarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[SX1] |
499 |
|
|
|
500 |
|
|
|
501 |
|
|
Isocyanate: [NX2]=[CX2]=[OX1] |
502 |
|
|
|
503 |
|
|
Cyanate: [OX2][CX2]#[NX1] |
504 |
|
|
|
505 |
|
|
Isothiocyanate: [NX2]=[CX2]=[SX1] |
506 |
|
|
|
507 |
|
|
Thiocyanate: [SX2][CX2]#[NX1] |
508 |
|
|
|
509 |
|
|
Carbodiimide: [NX2]=[CX2]=[NX2] |
510 |
|
|
|
511 |
|
|
Orthocarbonic_derivatives: [CX4H0]([O,S,#7])([O,S,#7])([O,S,#7])[O,S,#7,F,Cl,Br,I] |
512 |
|
|
# halogen allowed just once, to avoid mapping to -OCF3 and similar groups (much more |
513 |
|
|
# stable as for example C(OCH3)4) |
514 |
|
|
|
515 |
|
|
|
516 |
|
|
# I.6 Aromatics |
517 |
|
|
# ------------- |
518 |
|
|
|
519 |
|
|
# I know that this classification is not very logical, arylamines are found under I.2 ... |
520 |
|
|
|
521 |
|
|
Phenol: [OX2H][c] |
522 |
|
|
|
523 |
|
|
1,2-Diphenol: [OX2H][c][c][OX2H] |
524 |
|
|
|
525 |
|
|
Arylchloride: [Cl][c] |
526 |
|
|
|
527 |
|
|
Arylfluoride: [F][c] |
528 |
|
|
|
529 |
|
|
Arylbromide: [Br][c] |
530 |
|
|
|
531 |
|
|
Aryliodide: [I][c] |
532 |
|
|
|
533 |
|
|
Arylthiol: [SX2H][c] |
534 |
|
|
|
535 |
|
|
Iminoarene: [c]=[NX2;$([H1]),$([H0][#6;!$([C]=[N,S,O])])] |
536 |
|
|
# N may be substituted with H or C, but not carbonyl or similar |
537 |
|
|
# aromatic atom is always C, not S or P (these are not planar when substituted) |
538 |
|
|
|
539 |
|
|
Oxoarene: [c]=[OX1] |
540 |
|
|
|
541 |
|
|
Thioarene: [c]=[SX1] |
542 |
|
|
|
543 |
|
|
Hetero_N_basic_H: [nX3H1+0] |
544 |
|
|
# as in pyrole. uncharged to exclude pyridinium ions |
545 |
|
|
|
546 |
|
|
Hetero_N_basic_no_H: [nX3H0+0] |
547 |
|
|
# as in N-methylpyrole. uncharged to exclude pyridinium ions |
548 |
|
|
|
549 |
|
|
Hetero_N_nonbasic: [nX2,nX3+] |
550 |
|
|
# as in pyridine, pyridinium |
551 |
|
|
|
552 |
|
|
Hetero_O: [o] |
553 |
|
|
|
554 |
|
|
Hetero_S: [sX2] |
555 |
|
|
# X2 because Daylight's depictmatch falsely describes C1=CS(=O)C=C1 as aromatic |
556 |
|
|
# (is not planar because of lonepair at S) |
557 |
|
|
|
558 |
|
|
Heteroaromatic: [a;!c] |
559 |
|
|
|
560 |
|
|
|
561 |
|
|
# Part II: N, S, P, Si, B |
562 |
|
|
# ======================= |
563 |
|
|
|
564 |
|
|
|
565 |
|
|
# II.1 Nitrogen |
566 |
|
|
# ------------- |
567 |
|
|
|
568 |
|
|
Nitrite: [NX2](=[OX1])[O;$([X2]),$([X1-])] |
569 |
|
|
# hits nitrous acid, its anion, esters, and other O-substituted derivatives |
570 |
|
|
|
571 |
|
|
Thionitrite: [SX2][NX2]=[OX1] |
572 |
|
|
|
573 |
|
|
Nitrate: [$([NX3](=[OX1])(=[OX1])[O;$([X2]),$([X1-])]),$([NX3+]([OX1-])(=[OX1])[O;$([X2]),$([X1-])])] |
574 |
|
|
# hits nitric acid, its anion, esters, and other O-substituted derivatives |
575 |
|
|
|
576 |
|
|
Nitro: [$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8] |
577 |
|
|
# hits nitro groups attached to C,N, ... but not nitrates |
578 |
|
|
|
579 |
|
|
Nitroso: [NX2](=[OX1])[!#7;!#8] |
580 |
|
|
# no nitrites, no nitrosamines |
581 |
|
|
|
582 |
|
|
Azide: [NX1]~[NX2]~[NX2,NX1] |
583 |
|
|
# hits both mesomeric forms, also anion |
584 |
|
|
|
585 |
|
|
Acylazide: [CX3](=[OX1])[NX2]~[NX2]~[NX1] |
586 |
|
|
|
587 |
|
|
Diazo: [$([#6]=[NX2+]=[NX1-]),$([#6-]-[NX2+]#[NX1])] |
588 |
|
|
|
589 |
|
|
Diazonium: [#6][NX2+]#[NX1] |
590 |
|
|
|
591 |
|
|
Nitrosamine: [#7;!$(N*=O)][NX2]=[OX1] |
592 |
|
|
|
593 |
|
|
Nitrosamide: [NX2](=[OX1])N-*=O |
594 |
|
|
# includes nitrososulfonamides |
595 |
|
|
|
596 |
|
|
N-Oxide: [$([#7+][OX1-]),$([#7v5]=[OX1]);!$([#7](~[O])~[O]);!$([#7]=[#7])] |
597 |
|
|
# Hits both forms. Won't hit azoxy, nitro, nitroso, or nitrate. |
598 |
|
|
|
599 |
|
|
|
600 |
|
|
Hydrazine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])] |
601 |
|
|
# no hydrazides |
602 |
|
|
|
603 |
|
|
Hydrazone: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX2]=[#6] |
604 |
|
|
|
605 |
|
|
Hydroxylamine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][OX2;$([H1]),$(O[#6;!$(C=[N,O,S])])] |
606 |
|
|
# no discrimination between O-, N-, and O,N-substitution |
607 |
|
|
|
608 |
|
|
|
609 |
|
|
# II.2 Sulfur |
610 |
|
|
# ----------- |
611 |
|
|
|
612 |
|
|
Sulfon: [$([SX4](=[OX1])(=[OX1])([#6])[#6]),$([SX4+2]([OX1-])([OX1-])([#6])[#6])] |
613 |
|
|
# can't be aromatic, thus S and not #16 |
614 |
|
|
|
615 |
|
|
Sulfoxide: [$([SX3](=[OX1])([#6])[#6]),$([SX3+]([OX1-])([#6])[#6])] |
616 |
|
|
|
617 |
|
|
Sulfonium: [S+;!$([S]~[!#6]);!$([S]*~[#7,#8,#15,#16])] |
618 |
|
|
# can't be aromatic, thus S and not #16 |
619 |
|
|
|
620 |
|
|
Sulfuric_acid: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] |
621 |
|
|
# includes anions |
622 |
|
|
|
623 |
|
|
Sulfuric_monoester: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] |
624 |
|
|
|
625 |
|
|
Sulfuric_diester: [SX4](=[OX1])(=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] |
626 |
|
|
|
627 |
|
|
Sulfuric_monoamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])] |
628 |
|
|
|
629 |
|
|
Sulfuric_diamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
630 |
|
|
|
631 |
|
|
Sulfuric_esteramide: [SX4](=[OX1])(=[OX1])([#7X3][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] |
632 |
|
|
|
633 |
|
|
Sulfuric_derivative: [SX4D4](=[!#6])(=[!#6])([!#6])[!#6] |
634 |
|
|
# everything else (would not be a "true" derivative of sulfuric acid, if one of the substituents were less electronegative |
635 |
|
|
# than sulfur, but this should be very very rare, anyway) |
636 |
|
|
|
637 |
|
|
|
638 |
|
|
|
639 |
|
|
#### sulfurous acid and derivatives missing!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
640 |
|
|
|
641 |
|
|
|
642 |
|
|
|
643 |
|
|
|
644 |
|
|
Sulfonic_acid: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[$([OX2H]),$([OX1-])] |
645 |
|
|
|
646 |
|
|
Sulfonamide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
647 |
|
|
|
648 |
|
|
Sulfonic_ester: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[OX2][#6;!$(C=[O,N,S])] |
649 |
|
|
|
650 |
|
|
Sulfonic_halide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[FX1,ClX1,BrX1,IX1] |
651 |
|
|
|
652 |
|
|
Sulfonic_derivative: [SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6])[!#6] |
653 |
|
|
# includes all of the above and many more |
654 |
|
|
# for comparison: this is what "all sulfonic derivatives but not the ones above" would look like: |
655 |
|
|
# [$([SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6;!O])[!#6]),$([SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[!$([FX1,ClX1,BrX1,IX1]);!$([#6]);!$([OX2H]);!$([OX1-]);!$([OX2][#6;!$(C=[O,N,S])]);!$([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])])] |
656 |
|
|
|
657 |
|
|
|
658 |
|
|
Sulfinic_acid: [SX3;$([H1]),$([H0][#6])](=[OX1])[$([OX2H]),$([OX1-])] |
659 |
|
|
|
660 |
|
|
Sulfinic_amide: [SX3;$([H1]),$([H0][#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
661 |
|
|
|
662 |
|
|
Sulfinic_ester: [SX3;$([H1]),$([H0][#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])] |
663 |
|
|
|
664 |
|
|
Sulfinic_halide: [SX3;$([H1]),$([H0][#6])](=[OX1])[FX1,ClX1,BrX1,IX1] |
665 |
|
|
|
666 |
|
|
Sulfinic_derivative: [SX3;$([H1]),$([H0][#6])](=[!#6])[!#6] |
667 |
|
|
|
668 |
|
|
Sulfenic_acid: [SX2;$([H1]),$([H0][#6])][$([OX2H]),$([OX1-])] |
669 |
|
|
|
670 |
|
|
Sulfenic_amide: [SX2;$([H1]),$([H0][#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
671 |
|
|
|
672 |
|
|
Sulfenic_ester: [SX2;$([H1]),$([H0][#6])][OX2][#6;!$(C=[O,N,S])] |
673 |
|
|
|
674 |
|
|
Sulfenic_halide: [SX2;$([H1]),$([H0][#6])][FX1,ClX1,BrX1,IX1] |
675 |
|
|
|
676 |
|
|
Sulfenic_derivative: [SX2;$([H1]),$([H0][#6])][!#6] |
677 |
|
|
|
678 |
|
|
|
679 |
|
|
# II.3 Phosphorous |
680 |
|
|
# ---------------- |
681 |
|
|
|
682 |
|
|
Phosphine: [PX3;$([H3]),$([H2][#6]),$([H1]([#6])[#6]),$([H0]([#6])([#6])[#6])] |
683 |
|
|
# similar to amine, but less restrictive: includes also amide- and aminal-analogues |
684 |
|
|
|
685 |
|
|
Phosphine_oxide: [PX4;$([H3]=[OX1]),$([H2](=[OX1])[#6]),$([H1](=[OX1])([#6])[#6]),$([H0](=[OX1])([#6])([#6])[#6])] |
686 |
|
|
|
687 |
|
|
Phosphonium: [P+;!$([P]~[!#6]);!$([P]*~[#7,#8,#15,#16])] |
688 |
|
|
# similar to Ammonium |
689 |
|
|
|
690 |
|
|
Phosphorylen: [PX4;$([H3]=[CX3]),$([H2](=[CX3])[#6]),$([H1](=[CX3])([#6])[#6]),$([H0](=[CX3])([#6])([#6])[#6])] |
691 |
|
|
|
692 |
|
|
|
693 |
|
|
# conventions for the following acids and derivatives: |
694 |
|
|
# acids find protonated and deprotonated acids |
695 |
|
|
# esters do not find mixed anhydrides ( ...P-O-C(=O)) |
696 |
|
|
# derivatives: subtituents which go in place of the OH and =O are not H or C (may also be O, |
697 |
|
|
# thus including acids and esters) |
698 |
|
|
|
699 |
|
|
Phosphonic_acid: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] |
700 |
|
|
# includes anions |
701 |
|
|
|
702 |
|
|
Phosphonic_monoester: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] |
703 |
|
|
|
704 |
|
|
Phosphonic_diester: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] |
705 |
|
|
|
706 |
|
|
Phosphonic_monoamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
707 |
|
|
|
708 |
|
|
Phosphonic_diamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
709 |
|
|
|
710 |
|
|
Phosphonic_esteramide: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
711 |
|
|
|
712 |
|
|
Phosphonic_acid_derivative: [PX4;$([H1]),$([H0][#6])](=[!#6])([!#6])[!#6] |
713 |
|
|
# all of the above and much more |
714 |
|
|
|
715 |
|
|
|
716 |
|
|
Phosphoric_acid: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] |
717 |
|
|
# includes anions |
718 |
|
|
|
719 |
|
|
Phosphoric_monoester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] |
720 |
|
|
|
721 |
|
|
Phosphoric_diester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] |
722 |
|
|
|
723 |
|
|
Phosphoric_triester: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] |
724 |
|
|
|
725 |
|
|
Phosphoric_monoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
726 |
|
|
|
727 |
|
|
Phosphoric_diamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
728 |
|
|
|
729 |
|
|
Phosphoric_triamide: [PX4D4](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
730 |
|
|
|
731 |
|
|
Phosphoric_monoestermonoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
732 |
|
|
|
733 |
|
|
Phosphoric_diestermonoamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
734 |
|
|
|
735 |
|
|
Phosphoric_monoesterdiamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
736 |
|
|
|
737 |
|
|
Phosphoric_acid_derivative: [PX4D4](=[!#6])([!#6])([!#6])[!#6] |
738 |
|
|
|
739 |
|
|
|
740 |
|
|
Phosphinic_acid: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[$([OX2H]),$([OX1-])] |
741 |
|
|
|
742 |
|
|
Phosphinic_ester: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])] |
743 |
|
|
|
744 |
|
|
Phosphinic_amide: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
745 |
|
|
|
746 |
|
|
Phosphinic_acid_derivative: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[!#6])[!#6] |
747 |
|
|
|
748 |
|
|
|
749 |
|
|
Phosphonous_acid: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])] |
750 |
|
|
|
751 |
|
|
Phosphonous_monoester: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])] |
752 |
|
|
|
753 |
|
|
Phosphonous_diester: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])] |
754 |
|
|
|
755 |
|
|
Phosphonous_monoamide: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
756 |
|
|
|
757 |
|
|
Phosphonous_diamide: [PX3;$([H1]),$([H0][#6])]([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
758 |
|
|
|
759 |
|
|
Phosphonous_esteramide: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
760 |
|
|
|
761 |
|
|
Phosphonous_derivatives: [PX3;$([D2]),$([D3][#6])]([!#6])[!#6] |
762 |
|
|
|
763 |
|
|
|
764 |
|
|
Phosphinous_acid: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][$([OX2H]),$([OX1-])] |
765 |
|
|
|
766 |
|
|
Phosphinous_ester: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][OX2][#6;!$(C=[O,N,S])] |
767 |
|
|
|
768 |
|
|
Phosphinous_amide: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])] |
769 |
|
|
|
770 |
|
|
Phosphinous_derivatives: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][!#6] |
771 |
|
|
|
772 |
|
|
|
773 |
|
|
# II.4 Silicon |
774 |
|
|
# ------------ |
775 |
|
|
|
776 |
|
|
Quart_silane: [SiX4]([#6])([#6])([#6])[#6] |
777 |
|
|
# four C-substituents. non-reactive, non-toxic, in experimental phase for drug development |
778 |
|
|
|
779 |
|
|
Non-quart_silane: [SiX4;$([H1]([#6])([#6])[#6]),$([H2]([#6])[#6]),$([H3][#6]),$([H4])] |
780 |
|
|
# has 1-4 hydride(s), reactive. Daylight's depictmatch does not add hydrogens automatically to |
781 |
|
|
# the free positions at Si, thus Hs had to be added implicitly |
782 |
|
|
|
783 |
|
|
Silylmonohalide: [SiX4]([FX1,ClX1,BrX1,IX1])([#6])([#6])[#6] |
784 |
|
|
# reagents for inserting protection groups |
785 |
|
|
|
786 |
|
|
Het_trialkylsilane: [SiX4]([!#6])([#6])([#6])[#6] |
787 |
|
|
# mostly acid-labile protection groups such as trimethylsilyl-ethers |
788 |
|
|
|
789 |
|
|
Dihet_dialkylsilane: [SiX4]([!#6])([!#6])([#6])[#6] |
790 |
|
|
|
791 |
|
|
Trihet_alkylsilane: [SiX4]([!#6])([!#6])([!#6])[#6] |
792 |
|
|
|
793 |
|
|
Silicic_acid_derivative: [SiX4]([!#6])([!#6])([!#6])[!#6] |
794 |
|
|
# four substituent which are neither C nor H |
795 |
|
|
|
796 |
|
|
|
797 |
|
|
# II.5 Boron |
798 |
|
|
# ---------- |
799 |
|
|
|
800 |
|
|
Trialkylborane: [BX3]([#6])([#6])[#6] |
801 |
|
|
# also carbonyls allowed |
802 |
|
|
|
803 |
|
|
Boric_acid_derivatives: [BX3]([!#6])([!#6])[!#6] |
804 |
|
|
# includes acids, esters, amides, ... H-substituent at B is very rare. |
805 |
|
|
|
806 |
|
|
Boronic_acid_derivative: [BX3]([!#6])([!#6])[!#6] |
807 |
|
|
# # includes acids, esters, amides, ... |
808 |
|
|
|
809 |
|
|
Borohydride: [BH1,BH2,BH3,BH4] |
810 |
|
|
# at least one H attached to B |
811 |
|
|
|
812 |
|
|
Quartary_boron: [BX4] |
813 |
|
|
# mostly borates (negative charge), in complex with Lewis-base |
814 |
|
|
|
815 |
|
|
|
816 |
|
|
|
817 |
|
|
# Part III: Some Special Patterns |
818 |
|
|
# =============================== |
819 |
|
|
|
820 |
|
|
|
821 |
|
|
# III.1 Chains |
822 |
|
|
# ------------ |
823 |
|
|
|
824 |
|
|
# some simple chains |
825 |
|
|
|
826 |
|
|
|
827 |
|
|
|
828 |
|
|
# III.2 Rings |
829 |
|
|
# ----------- |
830 |
|
|
|
831 |
|
|
Aromatic: a |
832 |
|
|
|
833 |
|
|
Heterocyclic: [!#6;!R0] |
834 |
|
|
# may be aromatic or not |
835 |
|
|
|
836 |
|
|
Epoxide: [OX2r3]1[#6r3][#6r3]1 |
837 |
|
|
# toxic/reactive. may be annelated to aromat, but must not be aromatic itself (oxirane-2,3-dione) |
838 |
|
|
|
839 |
|
|
NH_aziridine: [NX3H1r3]1[#6r3][#6r3]1 |
840 |
|
|
# toxic/reactive according to Maybridge's garbage filter |
841 |
|
|
|
842 |
|
|
Spiro: [D4R;$(*(@*)(@*)(@*)@*)] |
843 |
|
|
# at least two different rings can be found which are sharing just one atom. |
844 |
|
|
# these two rings can be connected by a third ring, so it matches also some |
845 |
|
|
# bridged systems, like morphine |
846 |
|
|
|
847 |
|
|
Annelated_rings: [R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])] |
848 |
|
|
# two different rings sharing exactly two atoms |
849 |
|
|
|
850 |
|
|
Bridged_rings: [R;$(*(@*)(@*)@*);!$([D4R;$(*(@*)(@*)(@*)@*)]);!$([R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])])] |
851 |
|
|
# part of two or more rings, not spiro, not annelated -> finds bridgehead atoms, |
852 |
|
|
# but only if they are not annelated at the same time - otherwise impossible (?) |
853 |
|
|
# to distinguish from non-bridgehead annelated atoms |
854 |
|
|
|
855 |
|
|
# some basic ring-patterns (just size, no other information): |
856 |
|
|
|
857 |
|
|
|
858 |
|
|
|
859 |
|
|
|
860 |
|
|
|
861 |
|
|
# III.3 Sugars and Nucleosides/Nucleotides, Steroids |
862 |
|
|
# -------------------------------------------------- |
863 |
|
|
|
864 |
|
|
# because of the large variety of sugar derivatives, different patterns can be applied. |
865 |
|
|
# The choice of patterns and their combinations will depend on the contents of the database |
866 |
|
|
# e.g. natural products, nucleoside analoges with modified sugars, ... as well as on the |
867 |
|
|
# desired restriction |
868 |
|
|
|
869 |
|
|
|
870 |
|
|
Sugar_pattern_1: [OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)] |
871 |
|
|
# 5 or 6-membered ring containing one O and at least one (r5) or two (r6) oxygen-substituents. |
872 |
|
|
|
873 |
|
|
Sugar_pattern_2: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] |
874 |
|
|
# 5 or 6-membered ring containing one O and an acetal-like bond at postion 2. |
875 |
|
|
|
876 |
|
|
Sugar_pattern_combi: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C(O)@C1)] |
877 |
|
|
# combination of the two above |
878 |
|
|
|
879 |
|
|
Sugar_pattern_2_reducing: [OX2;$([r5]1@C(!@[OX2H1])@C@C@C1),$([r6]1@C(!@[OX2H1])@C@C@C@C1)] |
880 |
|
|
# 5 or 6-membered cyclic hemi-acetal |
881 |
|
|
|
882 |
|
|
Sugar_pattern_2_alpha: [OX2;$([r5]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] |
883 |
|
|
# 5 or 6-membered cyclic hemi-acetal |
884 |
|
|
|
885 |
|
|
Sugar_pattern_2_beta: [OX2;$([r5]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)] |
886 |
|
|
# 5 or 6-membered cyclic hemi-acetal |
887 |
|
|
|
888 |
|
|
Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)]) |
889 |
|
|
# pattern1 occours more than once (in same molecule, but moieties don't have to be adjacent!) |
890 |
|
|
|
891 |
|
|
Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]) |
892 |
|
|
# pattern2 occours more than once (in same molecule, but moieties don't have to be adjacent!) |
893 |
|
|
|
894 |
|
|
|
895 |
|
|
# III.4 Everything else... |
896 |
|
|
# ------------------------ |
897 |
|
|
|
898 |
|
|
Conjugated_double_bond: *=*[*]=,#,:[*] |
899 |
|
|
|
900 |
|
|
Conjugated_tripple_bond: *#*[*]=,#,:[*] |
901 |
|
|
|
902 |
|
|
Cis_double_bond: */[D2]=[D2]\* |
903 |
|
|
# only one single-bonded substituent on each DB-atom. no aromats. |
904 |
|
|
# only found when character of DB is explicitely stated. |
905 |
|
|
|
906 |
|
|
Trans_double_bond: */[D2]=[D2]/* |
907 |
|
|
# analog |
908 |
|
|
|
909 |
|
|
Mixed_anhydrides: [$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))][#8X2][$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))] |
910 |
|
|
# should hits all combinations of two acids |
911 |
|
|
|
912 |
|
|
Halogen_on_hetero: [FX1,ClX1,BrX1,IX1][!#6] |
913 |
|
|
|
914 |
|
|
Halogen_multi_subst: [F,Cl,Br,I;!$([X1]);!$([X0-])] |
915 |
|
|
# Halogen which is not mono-substituted nor an anion, e.g. chlorate. |
916 |
|
|
# Most of these cases should be also filtered by Halogen_on_hetero. |
917 |
|
|
|
918 |
|
|
Trifluoromethyl: [FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F])[F])]([FX1])([FX1]) |
919 |
|
|
# C with three F attached, connected to anything which is not another halogen |
920 |
|
|
|
921 |
|
|
C_ONS_bond: [#6]~[#7,#8,#16] |
922 |
|
|
# probably all drug-like molecules have at least one O, N, or S connected to a C -> nice filter |
923 |
|
|
|
924 |
|
|
# Mixture: (*).(*) |
925 |
|
|
# two or more seperate parts, may also be salt |
926 |
|
|
# component-level grouping is not yet supported in Open Babel Version 2.0 |
927 |
|
|
|
928 |
|
|
|
929 |
|
|
Charged: [!+0] |
930 |
|
|
|
931 |
|
|
Anion: [-1,-2,-3,-4,-5,-6,-7] |
932 |
|
|
|
933 |
|
|
Kation: [+1,+2,+3,+4,+5,+6,+7] |
934 |
|
|
|
935 |
|
|
Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7]) |
936 |
|
|
# two or more seperate components with opposite charges |
937 |
|
|
|
938 |
|
|
Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7]) |
939 |
|
|
# both negative and positive charges somewhere within the same molecule. |
940 |
|
|
|
941 |
|
|
1,3-Tautomerizable: [$([#7X2,OX1,SX1]=*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=*),$([#7X3,OX2,SX2;!H0]*:n)] |
942 |
|
|
# 1,3 migration of H allowed. Includes keto/enol and amide/enamide. |
943 |
|
|
# Aromatic rings must stay aromatic - no keto form of phenol |
944 |
|
|
|
945 |
|
|
1,5-Tautomerizable: [$([#7X2,OX1,SX1]=,:**=,:*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=**=*),$([#7X3,OX2,SX2;!H0]*=,:**:n)] |
946 |
|
|
|
947 |
|
|
Rotatable_bond: [!$(*#*)&!D1]-!@[!$(*#*)&!D1] |
948 |
|
|
# taken from http://www.daylight.com/support/contrib/smarts/content.html |
949 |
|
|
|
950 |
|
|
Michael_acceptor: [CX3]=[CX3][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-])] |
951 |
|
|
# the classical case: C=C near carbonyl, nitrile, nitro, or similar |
952 |
|
|
# Oxo-heteroaromats and similar are not included. |
953 |
|
|
|
954 |
|
|
Dicarbodiazene: [CX3](=[OX1])[NX2]=[NX2][CX3](=[OX1]) |
955 |
|
|
# Michael-like acceptor, see Mitsunobu reaction |
956 |
|
|
|
957 |
|
|
# H-Bond_donor: |
958 |
|
|
|
959 |
|
|
# H-Bond_acceptor: |
960 |
|
|
|
961 |
|
|
# Pos_ionizable: |
962 |
|
|
|
963 |
|
|
# Neg_ionizable: |
964 |
|
|
|
965 |
|
|
# Unlikely_ions: |
966 |
|
|
# O+,N-,C+,C-, ... |
967 |
|
|
|
968 |
|
|
CH-acidic: [$([CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]),$([CX4;!$([H0])]1[CX3]=[CX3][CX3]=[CX3]1)] |
969 |
|
|
# C-H alpha to carbony, nitro or similar, C is not double-bonded, only C, H, S,P=O and nitro substituents allowed. |
970 |
|
|
# pentadiene is included. acids, their salts, prim./sec. amides, and imides are excluded. |
971 |
|
|
# hits also CH-acidic_strong |
972 |
|
|
|
973 |
|
|
CH-acidic_strong: [CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])]([$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])])[$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])] |
974 |
|
|
# same as above (without pentadiene), but carbonyl or similar on two or three sides |
975 |
|
|
|
976 |
|
|
Chiral_center_specified: [$([*@](~*)(~*)(*)*),$([*@H](*)(*)*),$([*@](~*)(*)*),$([*@H](~*)~*)] |
977 |
|
|
# Hits atoms with tetrahedral chirality, if chiral center is specified in the SMILES string |
978 |
|
|
# depictmach does not find oxonium, sulfonium, or sulfoxides! |
979 |
|
|
|
980 |
|
|
# Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)] |
981 |
|
|
# Hits atoms with tetrahedral chirality, if chiral center is not specified in the SMILES string |
982 |
|
|
# "@?" (unspecified chirality) is not yet supported in Open Babel Version 2.0 |
983 |
|
|
|