43 |
|
#include "utils/StringUtils.hpp" |
44 |
|
namespace oopse { |
45 |
|
|
46 |
< |
bool SelectionCompiler::compile(const std::string& filename, const std::string& script) { |
46 |
> |
bool SelectionCompiler::compile(const std::string& filename, const std::string& script) { |
47 |
|
|
48 |
|
this->filename = filename; |
49 |
|
this->script = script; |
52 |
|
aatokenCompiled.clear(); |
53 |
|
|
54 |
|
if (internalCompile()) { |
55 |
< |
return true; |
55 |
> |
return true; |
56 |
|
} |
57 |
|
|
58 |
|
int icharEnd; |
59 |
|
if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos && |
60 |
|
(icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) { |
61 |
< |
icharEnd = script.size(); |
61 |
> |
icharEnd = script.size(); |
62 |
|
} |
63 |
|
errorLine = script.substr(ichCurrentCommand, icharEnd); |
64 |
|
return false; |
65 |
< |
} |
65 |
> |
} |
66 |
|
|
67 |
< |
bool SelectionCompiler::internalCompile(){ |
67 |
> |
bool SelectionCompiler::internalCompile(){ |
68 |
|
|
69 |
|
cchScript = script.size(); |
70 |
|
ichToken = 0; |
80 |
|
int tokCommand = Token::nada; |
81 |
|
|
82 |
|
for ( ; true; ichToken += cchToken) { |
83 |
< |
if (lookingAtLeadingWhitespace()) |
84 |
< |
continue; |
85 |
< |
if (lookingAtComment()) |
86 |
< |
continue; |
87 |
< |
bool endOfLine = lookingAtEndOfLine(); |
88 |
< |
if (endOfLine || lookingAtEndOfStatement()) { |
89 |
< |
if (tokCommand != Token::nada) { |
90 |
< |
if (! compileCommand(ltoken)) { |
91 |
< |
return false; |
92 |
< |
} |
93 |
< |
aatokenCompiled.push_back(atokenCommand); |
94 |
< |
lineNumbers.push_back(lineCurrent); |
95 |
< |
lineIndices.push_back(ichCurrentCommand); |
96 |
< |
ltoken.clear(); |
97 |
< |
tokCommand = Token::nada; |
98 |
< |
} |
83 |
> |
if (lookingAtLeadingWhitespace()) |
84 |
> |
continue; |
85 |
> |
//if (lookingAtComment()) |
86 |
> |
// continue; |
87 |
> |
bool endOfLine = lookingAtEndOfLine(); |
88 |
> |
if (endOfLine || lookingAtEndOfStatement()) { |
89 |
> |
if (tokCommand != Token::nada) { |
90 |
> |
if (! compileCommand(ltoken)) { |
91 |
> |
return false; |
92 |
> |
} |
93 |
> |
aatokenCompiled.push_back(atokenCommand); |
94 |
> |
lineNumbers.push_back(lineCurrent); |
95 |
> |
lineIndices.push_back(ichCurrentCommand); |
96 |
> |
ltoken.clear(); |
97 |
> |
tokCommand = Token::nada; |
98 |
> |
} |
99 |
|
|
100 |
< |
if (ichToken < cchScript) { |
101 |
< |
if (endOfLine) |
102 |
< |
++lineCurrent; |
103 |
< |
continue; |
104 |
< |
} |
105 |
< |
break; |
106 |
< |
} |
100 |
> |
if (ichToken < cchScript) { |
101 |
> |
if (endOfLine) |
102 |
> |
++lineCurrent; |
103 |
> |
continue; |
104 |
> |
} |
105 |
> |
break; |
106 |
> |
} |
107 |
|
|
108 |
< |
if (tokCommand != Token::nada) { |
109 |
< |
if (lookingAtString()) { |
110 |
< |
std::string str = getUnescapedStringLiteral(); |
111 |
< |
ltoken.push_back(Token(Token::string, str)); |
112 |
< |
continue; |
113 |
< |
} |
114 |
< |
//if ((tokCommand & Token::specialstring) != 0 && |
115 |
< |
// lookingAtSpecialString()) { |
116 |
< |
// std::string str = script.substr(ichToken, ichToken + cchToken); |
117 |
< |
// ltoken.push_back(Token(Token::string, str)); |
118 |
< |
// continue; |
119 |
< |
//} |
120 |
< |
if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) { |
121 |
< |
float value = lexi_cast<float>(script.substr(ichToken, ichToken + cchToken)); |
122 |
< |
ltoken.push_back(Token(Token::decimal, value));/**@todo*/ |
123 |
< |
continue; |
124 |
< |
} |
125 |
< |
if (lookingAtInteger((tokCommand & Token::negnums) != 0)) { |
126 |
< |
std::string intString = script.substr(ichToken, ichToken + cchToken); |
127 |
< |
int val = lexi_cast<int>(intString); |
128 |
< |
ltoken.push_back(Token(Token::integer, val, intString));/**@todo*/ |
129 |
< |
continue; |
130 |
< |
} |
131 |
< |
} |
132 |
< |
|
133 |
< |
if (lookingAtLookupToken()) { |
134 |
< |
std::string ident = script.substr(ichToken, ichToken + cchToken); |
108 |
> |
if (tokCommand != Token::nada) { |
109 |
> |
if (lookingAtString()) { |
110 |
> |
std::string str = getUnescapedStringLiteral(); |
111 |
> |
ltoken.push_back(Token(Token::string, str)); |
112 |
> |
continue; |
113 |
> |
} |
114 |
> |
//if ((tokCommand & Token::specialstring) != 0 && |
115 |
> |
// lookingAtSpecialString()) { |
116 |
> |
// std::string str = script.substr(ichToken, ichToken + cchToken); |
117 |
> |
// ltoken.push_back(Token(Token::string, str)); |
118 |
> |
// continue; |
119 |
> |
//} |
120 |
> |
if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) { |
121 |
> |
float value = lexi_cast<float>(script.substr(ichToken, cchToken)); |
122 |
> |
ltoken.push_back(Token(Token::decimal, boost::any(value))); |
123 |
> |
continue; |
124 |
> |
} |
125 |
> |
if (lookingAtInteger((tokCommand & Token::negnums) != 0)) { |
126 |
|
|
127 |
< |
Token token; |
128 |
< |
Token* pToken = TokenMap::getInstance()->getToken(ident); |
129 |
< |
if (pToken != NULL) { |
130 |
< |
token = *pToken; |
131 |
< |
} else { |
132 |
< |
token = Token(Token::identifier, ident); |
133 |
< |
} |
134 |
< |
|
135 |
< |
int tok = token.tok; |
127 |
> |
int val = lexi_cast<int>(script.substr(ichToken, cchToken)); |
128 |
> |
ltoken.push_back(Token(Token::integer, boost::any(val))); |
129 |
> |
continue; |
130 |
> |
} |
131 |
> |
} |
132 |
> |
|
133 |
> |
if (lookingAtLookupToken()) { |
134 |
> |
std::string ident = script.substr(ichToken, cchToken); |
135 |
> |
Token token; |
136 |
> |
Token* pToken = TokenMap::getInstance()->getToken(ident); |
137 |
> |
if (pToken != NULL) { |
138 |
> |
token = *pToken; |
139 |
> |
} else { |
140 |
> |
token = Token(Token::identifier, ident); |
141 |
> |
} |
142 |
|
|
143 |
< |
switch (tokCommand) { |
144 |
< |
case Token::nada: |
145 |
< |
ichCurrentCommand = ichToken; |
146 |
< |
//tokenCommand = token; |
147 |
< |
tokCommand = tok; |
148 |
< |
if ((tokCommand & Token::command) == 0) |
149 |
< |
return commandExpected(); |
150 |
< |
break; |
143 |
> |
int tok = token.tok; |
144 |
> |
|
145 |
> |
switch (tokCommand) { |
146 |
> |
case Token::nada: |
147 |
> |
ichCurrentCommand = ichToken; |
148 |
> |
//tokenCommand = token; |
149 |
> |
tokCommand = tok; |
150 |
> |
if ((tokCommand & Token::command) == 0) |
151 |
> |
return commandExpected(); |
152 |
> |
break; |
153 |
|
|
154 |
< |
case Token::define: |
155 |
< |
if (ltoken.size() == 1) { |
156 |
< |
// we are looking at the variable name |
157 |
< |
if (tok != Token::identifier && |
158 |
< |
(tok & Token::predefinedset) != Token::predefinedset) |
159 |
< |
return invalidExpressionToken(ident); |
160 |
< |
} else { |
161 |
< |
// we are looking at the expression |
162 |
< |
if (tok != Token::identifier && |
163 |
< |
(tok & (Token::expression | Token::predefinedset)) == 0) |
164 |
< |
return invalidExpressionToken(ident); |
165 |
< |
} |
154 |
> |
case Token::define: |
155 |
> |
if (ltoken.size() == 1) { |
156 |
> |
// we are looking at the variable name |
157 |
> |
if (tok != Token::identifier && |
158 |
> |
(tok & Token::predefinedset) != Token::predefinedset) |
159 |
> |
return invalidExpressionToken(ident); |
160 |
> |
} else { |
161 |
> |
// we are looking at the expression |
162 |
> |
if (tok != Token::identifier && |
163 |
> |
(tok & (Token::expression | Token::predefinedset)) == 0) |
164 |
> |
return invalidExpressionToken(ident); |
165 |
> |
} |
166 |
|
|
167 |
< |
break; |
167 |
> |
break; |
168 |
|
|
169 |
< |
case Token::select: |
170 |
< |
if (tok != Token::identifier && (tok & Token::expression) == 0) |
171 |
< |
return invalidExpressionToken(ident); |
172 |
< |
break; |
173 |
< |
} |
174 |
< |
ltoken.push_back(token); |
175 |
< |
continue; |
176 |
< |
} |
169 |
> |
case Token::select: |
170 |
> |
if (tok != Token::identifier && (tok & Token::expression) == 0) |
171 |
> |
return invalidExpressionToken(ident); |
172 |
> |
break; |
173 |
> |
} |
174 |
> |
ltoken.push_back(token); |
175 |
> |
continue; |
176 |
> |
} |
177 |
|
|
178 |
< |
if (ltoken.size() == 0) { |
179 |
< |
return commandExpected(); |
180 |
< |
} |
178 |
> |
if (ltoken.size() == 0) { |
179 |
> |
return commandExpected(); |
180 |
> |
} |
181 |
|
|
182 |
< |
return unrecognizedToken(); |
182 |
> |
return unrecognizedToken(); |
183 |
|
} |
184 |
|
|
185 |
|
return true; |
204 |
|
if (ch == '\r') { |
205 |
|
++ichT; |
206 |
|
if (ichT < cchScript && script[ichT] == '\n') |
207 |
< |
++ichT; |
207 |
> |
++ichT; |
208 |
|
} else if (ch == '\n') { |
209 |
|
++ichT; |
210 |
|
} else { |
242 |
|
previousCharBackslash = ch == '\\' ? !previousCharBackslash : false; |
243 |
|
} |
244 |
|
cchToken = ichT - ichToken; |
245 |
+ |
|
246 |
|
return true; |
247 |
|
} |
248 |
|
|
249 |
|
|
250 |
< |
std::string SelectionCompiler::getUnescapedStringLiteral() { |
250 |
> |
std::string SelectionCompiler::getUnescapedStringLiteral() { |
251 |
|
/** @todo */ |
252 |
|
std::string sb(cchToken - 2, ' '); |
253 |
|
|
255 |
|
int ich = ichToken + 1; |
256 |
|
|
257 |
|
while (ich < ichMax) { |
258 |
< |
char ch = script[ich++]; |
259 |
< |
if (ch == '\\' && ich < ichMax) { |
260 |
< |
ch = script[ich++]; |
261 |
< |
switch (ch) { |
262 |
< |
case 'b': |
263 |
< |
ch = '\b'; |
264 |
< |
break; |
265 |
< |
case 'n': |
266 |
< |
ch = '\n'; |
267 |
< |
break; |
268 |
< |
case 't': |
269 |
< |
ch = '\t'; |
270 |
< |
break; |
271 |
< |
case 'r': |
272 |
< |
ch = '\r'; |
273 |
< |
// fall into |
274 |
< |
case '"': |
275 |
< |
case '\\': |
276 |
< |
case '\'': |
277 |
< |
break; |
278 |
< |
case 'x': |
279 |
< |
case 'u': |
280 |
< |
int digitCount = ch == 'x' ? 2 : 4; |
281 |
< |
if (ich < ichMax) { |
282 |
< |
int unicode = 0; |
283 |
< |
for (int k = digitCount; --k >= 0 && ich < ichMax; ) { |
284 |
< |
char chT = script[ich]; |
285 |
< |
int hexit = getHexitValue(chT); |
286 |
< |
if (hexit < 0) |
287 |
< |
break; |
288 |
< |
unicode <<= 4; |
289 |
< |
unicode += hexit; |
290 |
< |
++ich; |
291 |
< |
} |
292 |
< |
ch = (char)unicode; |
293 |
< |
} |
294 |
< |
} |
295 |
< |
} |
296 |
< |
sb.append(1, ch); |
258 |
> |
char ch = script[ich++]; |
259 |
> |
if (ch == '\\' && ich < ichMax) { |
260 |
> |
ch = script[ich++]; |
261 |
> |
switch (ch) { |
262 |
> |
case 'b': |
263 |
> |
ch = '\b'; |
264 |
> |
break; |
265 |
> |
case 'n': |
266 |
> |
ch = '\n'; |
267 |
> |
break; |
268 |
> |
case 't': |
269 |
> |
ch = '\t'; |
270 |
> |
break; |
271 |
> |
case 'r': |
272 |
> |
ch = '\r'; |
273 |
> |
// fall into |
274 |
> |
case '"': |
275 |
> |
case '\\': |
276 |
> |
case '\'': |
277 |
> |
break; |
278 |
> |
case 'x': |
279 |
> |
case 'u': |
280 |
> |
int digitCount = ch == 'x' ? 2 : 4; |
281 |
> |
if (ich < ichMax) { |
282 |
> |
int unicode = 0; |
283 |
> |
for (int k = digitCount; --k >= 0 && ich < ichMax; ) { |
284 |
> |
char chT = script[ich]; |
285 |
> |
int hexit = getHexitValue(chT); |
286 |
> |
if (hexit < 0) |
287 |
> |
break; |
288 |
> |
unicode <<= 4; |
289 |
> |
unicode += hexit; |
290 |
> |
++ich; |
291 |
> |
} |
292 |
> |
ch = (char)unicode; |
293 |
> |
} |
294 |
> |
} |
295 |
> |
} |
296 |
> |
sb.append(1, ch); |
297 |
|
} |
298 |
|
|
299 |
|
return sb; |
300 |
< |
} |
300 |
> |
} |
301 |
|
|
302 |
< |
int SelectionCompiler::getHexitValue(char ch) { |
302 |
> |
int SelectionCompiler::getHexitValue(char ch) { |
303 |
|
if (ch >= '0' && ch <= '9') |
304 |
< |
return ch - '0'; |
304 |
> |
return ch - '0'; |
305 |
|
else if (ch >= 'a' && ch <= 'f') |
306 |
< |
return 10 + ch - 'a'; |
306 |
> |
return 10 + ch - 'a'; |
307 |
|
else if (ch >= 'A' && ch <= 'F') |
308 |
< |
return 10 + ch - 'A'; |
308 |
> |
return 10 + ch - 'A'; |
309 |
|
else |
310 |
< |
return -1; |
311 |
< |
} |
310 |
> |
return -1; |
311 |
> |
} |
312 |
|
|
313 |
< |
bool SelectionCompiler::lookingAtSpecialString() { |
313 |
> |
bool SelectionCompiler::lookingAtSpecialString() { |
314 |
|
int ichT = ichToken; |
315 |
|
char ch = script[ichT]; |
316 |
|
while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') { |
317 |
< |
++ichT; |
317 |
> |
++ichT; |
318 |
|
} |
319 |
|
cchToken = ichT - ichToken; |
320 |
|
return cchToken > 0; |
321 |
< |
} |
321 |
> |
} |
322 |
|
|
323 |
< |
bool SelectionCompiler::lookingAtDecimal(bool allowNegative) { |
323 |
> |
bool SelectionCompiler::lookingAtDecimal(bool allowNegative) { |
324 |
|
if (ichToken == cchScript) { |
325 |
< |
return false; |
325 |
> |
return false; |
326 |
|
} |
327 |
|
|
328 |
|
int ichT = ichToken; |
329 |
|
if (script[ichT] == '-') { |
330 |
< |
++ichT; |
330 |
> |
++ichT; |
331 |
|
} |
332 |
|
bool digitSeen = false; |
333 |
|
char ch = 'X'; |
334 |
|
while (ichT < cchScript && std::isdigit(ch = script[ichT])) { |
335 |
< |
++ichT; |
336 |
< |
digitSeen = true; |
335 |
> |
++ichT; |
336 |
> |
digitSeen = true; |
337 |
|
} |
338 |
|
|
339 |
|
if (ichT == cchScript || ch != '.') { |
340 |
< |
return false; |
340 |
> |
return false; |
341 |
|
} |
342 |
|
|
343 |
< |
// to support 1.ca, let's check the character after the dot |
344 |
< |
// to determine if it is an alpha |
345 |
< |
if (ch == '.' && (ichT + 1 < cchScript) && std::isalpha(script[ichT + 1])) { |
346 |
< |
return false; |
343 |
> |
// to support DMPC.1, let's check the character before the dot |
344 |
> |
if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) { |
345 |
> |
return false; |
346 |
|
} |
347 |
|
|
348 |
|
++ichT; |
349 |
|
while (ichT < cchScript && std::isdigit(script[ichT])) { |
350 |
< |
++ichT; |
351 |
< |
digitSeen = true; |
350 |
> |
++ichT; |
351 |
> |
digitSeen = true; |
352 |
|
} |
353 |
|
cchToken = ichT - ichToken; |
354 |
|
return digitSeen; |
355 |
< |
} |
355 |
> |
} |
356 |
|
|
357 |
< |
bool SelectionCompiler::lookingAtInteger(bool allowNegative) { |
357 |
> |
bool SelectionCompiler::lookingAtInteger(bool allowNegative) { |
358 |
|
if (ichToken == cchScript) { |
359 |
< |
return false; |
359 |
> |
return false; |
360 |
|
} |
361 |
|
int ichT = ichToken; |
362 |
|
if (allowNegative && script[ichToken] == '-') { |
363 |
< |
++ichT; |
363 |
> |
++ichT; |
364 |
|
} |
365 |
|
int ichBeginDigits = ichT; |
366 |
|
while (ichT < cchScript && std::isdigit(script[ichT])) { |
367 |
< |
++ichT; |
367 |
> |
++ichT; |
368 |
|
} |
369 |
|
if (ichBeginDigits == ichT) { |
370 |
< |
return false; |
370 |
> |
return false; |
371 |
|
} |
372 |
|
cchToken = ichT - ichToken; |
373 |
|
return true; |
374 |
< |
} |
374 |
> |
} |
375 |
|
|
376 |
< |
bool SelectionCompiler::lookingAtLookupToken() { |
376 |
> |
bool SelectionCompiler::lookingAtLookupToken() { |
377 |
|
if (ichToken == cchScript) { |
378 |
< |
return false; |
378 |
> |
return false; |
379 |
|
} |
380 |
|
|
381 |
|
int ichT = ichToken; |
382 |
|
char ch; |
383 |
|
switch (ch = script[ichT++]) { |
384 |
< |
case '(': |
385 |
< |
case ')': |
386 |
< |
case ',': |
387 |
< |
case '*': |
388 |
< |
case '-': |
389 |
< |
case '[': |
390 |
< |
case ']': |
391 |
< |
case '+': |
392 |
< |
case ':': |
393 |
< |
case '@': |
394 |
< |
case '.': |
395 |
< |
case '%': |
396 |
< |
break; |
397 |
< |
case '&': |
398 |
< |
case '|': |
399 |
< |
if (ichT < cchScript && script[ichT] == ch) { |
400 |
< |
++ichT; |
401 |
< |
} |
402 |
< |
break; |
403 |
< |
case '<': |
404 |
< |
case '=': |
405 |
< |
case '>': |
406 |
< |
if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) { |
407 |
< |
++ichT; |
408 |
< |
} |
409 |
< |
break; |
410 |
< |
case '/': |
411 |
< |
case '!': |
412 |
< |
if (ichT < cchScript && script[ichT] == '=') { |
413 |
< |
++ichT; |
414 |
< |
} |
415 |
< |
break; |
416 |
< |
default: |
417 |
< |
if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') { |
418 |
< |
return false; |
419 |
< |
} |
420 |
< |
case '?': // include question marks in identifier for atom expressions |
422 |
< |
while (ichT < cchScript && (std::isalpha(ch = script[ichT]) ||std::isdigit(ch) || |
423 |
< |
ch == '_' || ch == '?') ||(ch == '^' && ichT > ichToken && std::isdigit(script[ichT - 1]))) { |
424 |
< |
// hack for insertion codes embedded in an atom expression :-( |
425 |
< |
// select c3^a |
426 |
< |
++ichT; |
427 |
< |
} |
428 |
< |
break; |
384 |
> |
case '(': |
385 |
> |
case ')': |
386 |
> |
case ',': |
387 |
> |
case '[': |
388 |
> |
case ']': |
389 |
> |
break; |
390 |
> |
case '&': |
391 |
> |
case '|': |
392 |
> |
if (ichT < cchScript && script[ichT] == ch) { |
393 |
> |
++ichT; |
394 |
> |
} |
395 |
> |
break; |
396 |
> |
case '<': |
397 |
> |
case '=': |
398 |
> |
case '>': |
399 |
> |
if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) { |
400 |
> |
++ichT; |
401 |
> |
} |
402 |
> |
break; |
403 |
> |
case '/': |
404 |
> |
case '!': |
405 |
> |
if (ichT < cchScript && script[ichT] == '=') { |
406 |
> |
++ichT; |
407 |
> |
} |
408 |
> |
break; |
409 |
> |
default: |
410 |
> |
if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') { |
411 |
> |
return false; |
412 |
> |
} |
413 |
> |
case '*': |
414 |
> |
case '?': // include question marks in identifier for atom expressions |
415 |
> |
while (ichT < cchScript && !std::isspace(ch = script[ichT]) && |
416 |
> |
(std::isalpha(ch) ||std::isdigit(ch) || ch == '_' || ch == '.' || ch == '*' || ch == '?' || ch == '+' || ch == '-' || ch == '[' || ch == ']') ){ |
417 |
> |
|
418 |
> |
++ichT; |
419 |
> |
} |
420 |
> |
break; |
421 |
|
} |
422 |
+ |
|
423 |
|
cchToken = ichT - ichToken; |
424 |
< |
return true; |
425 |
< |
} |
424 |
> |
|
425 |
> |
return true; |
426 |
> |
} |
427 |
|
|
428 |
< |
bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) { |
428 |
> |
bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) { |
429 |
|
const Token& tokenCommand = ltoken[0]; |
430 |
|
int tokCommand = tokenCommand.tok; |
431 |
|
|
432 |
|
atokenCommand = ltoken; |
433 |
|
if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) { |
434 |
< |
return false; |
434 |
> |
return false; |
435 |
|
} |
436 |
|
|
437 |
|
return true; |
438 |
< |
} |
438 |
> |
} |
439 |
|
|
440 |
< |
bool SelectionCompiler::compileExpression() { |
440 |
> |
bool SelectionCompiler::compileExpression() { |
441 |
|
/** todo */ |
442 |
|
int i = 1; |
443 |
|
int tokCommand = atokenCommand[0].tok; |
444 |
|
if (tokCommand == Token::define) { |
445 |
< |
i = 2; |
445 |
> |
i = 2; |
446 |
|
} else if ((tokCommand & Token::embeddedExpression) != 0) { |
447 |
< |
// look for the open parenthesis |
448 |
< |
while (i < atokenCommand.size() && |
449 |
< |
atokenCommand[i].tok != Token::leftparen) |
447 |
> |
// look for the open parenthesis |
448 |
> |
while (i < atokenCommand.size() && |
449 |
> |
atokenCommand[i].tok != Token::leftparen) |
450 |
|
++i; |
451 |
|
} |
452 |
|
|
453 |
|
if (i >= atokenCommand.size()) { |
454 |
< |
return true; |
454 |
> |
return true; |
455 |
|
} |
456 |
|
return compileExpression(i); |
457 |
|
} |
458 |
|
|
459 |
|
|
460 |
< |
bool SelectionCompiler::addTokenToPostfix(const Token& token) { |
460 |
> |
bool SelectionCompiler::addTokenToPostfix(const Token& token) { |
461 |
|
ltokenPostfix.push_back(token); |
462 |
|
return true; |
463 |
< |
} |
463 |
> |
} |
464 |
|
|
465 |
< |
bool SelectionCompiler::compileExpression(int itoken) { |
465 |
> |
bool SelectionCompiler::compileExpression(int itoken) { |
466 |
|
ltokenPostfix.clear(); |
467 |
|
for (int i = 0; i < itoken; ++i) { |
468 |
< |
addTokenToPostfix(atokenCommand[i]); |
468 |
> |
addTokenToPostfix(atokenCommand[i]); |
469 |
|
} |
470 |
|
|
471 |
|
atokenInfix = atokenCommand; |
473 |
|
|
474 |
|
addTokenToPostfix(Token::tokenExpressionBegin); |
475 |
|
if (!clauseOr()) { |
476 |
< |
return false; |
476 |
> |
return false; |
477 |
|
} |
478 |
|
|
479 |
|
addTokenToPostfix(Token::tokenExpressionEnd); |
480 |
|
if (itokenInfix != atokenInfix.size()) { |
481 |
< |
return endOfExpressionExpected(); |
481 |
> |
return endOfExpressionExpected(); |
482 |
|
} |
483 |
|
|
484 |
|
atokenCommand = ltokenPostfix; |
485 |
|
return true; |
486 |
< |
} |
486 |
> |
} |
487 |
|
|
488 |
< |
Token SelectionCompiler::tokenNext() { |
488 |
> |
Token SelectionCompiler::tokenNext() { |
489 |
|
if (itokenInfix == atokenInfix.size()) { |
490 |
< |
return Token(); |
490 |
> |
return Token(); |
491 |
|
} |
492 |
|
return atokenInfix[itokenInfix++]; |
493 |
< |
} |
493 |
> |
} |
494 |
|
|
495 |
< |
boost::any SelectionCompiler::valuePeek() { |
495 |
> |
boost::any SelectionCompiler::valuePeek() { |
496 |
|
if (itokenInfix == atokenInfix.size()) { |
497 |
< |
return boost::any(); |
497 |
> |
return boost::any(); |
498 |
|
} else { |
499 |
< |
return atokenInfix[itokenInfix].value; |
499 |
> |
return atokenInfix[itokenInfix].value; |
500 |
|
} |
501 |
< |
} |
501 |
> |
} |
502 |
|
|
503 |
< |
int SelectionCompiler::tokPeek() { |
503 |
> |
int SelectionCompiler::tokPeek() { |
504 |
|
if (itokenInfix == atokenInfix.size()) { |
505 |
< |
return 0; |
505 |
> |
return 0; |
506 |
|
}else { |
507 |
< |
return atokenInfix[itokenInfix].tok; |
507 |
> |
return atokenInfix[itokenInfix].tok; |
508 |
|
} |
509 |
< |
} |
509 |
> |
} |
510 |
|
|
511 |
< |
bool SelectionCompiler::clauseOr() { |
511 |
> |
bool SelectionCompiler::clauseOr() { |
512 |
|
if (!clauseAnd()) { |
513 |
< |
return false; |
513 |
> |
return false; |
514 |
|
} |
515 |
|
|
516 |
|
while (tokPeek() == Token::opOr) { |
517 |
< |
Token tokenOr = tokenNext(); |
518 |
< |
if (!clauseAnd()) { |
519 |
< |
return false; |
520 |
< |
} |
521 |
< |
addTokenToPostfix(tokenOr); |
517 |
> |
Token tokenOr = tokenNext(); |
518 |
> |
if (!clauseAnd()) { |
519 |
> |
return false; |
520 |
> |
} |
521 |
> |
addTokenToPostfix(tokenOr); |
522 |
|
} |
523 |
|
return true; |
524 |
< |
} |
524 |
> |
} |
525 |
|
|
526 |
< |
bool SelectionCompiler::clauseAnd() { |
526 |
> |
bool SelectionCompiler::clauseAnd() { |
527 |
|
if (!clauseNot()) { |
528 |
< |
return false; |
528 |
> |
return false; |
529 |
|
} |
530 |
|
|
531 |
|
while (tokPeek() == Token::opAnd) { |
532 |
< |
Token tokenAnd = tokenNext(); |
533 |
< |
if (!clauseNot()) { |
534 |
< |
return false; |
535 |
< |
} |
536 |
< |
addTokenToPostfix(tokenAnd); |
532 |
> |
Token tokenAnd = tokenNext(); |
533 |
> |
if (!clauseNot()) { |
534 |
> |
return false; |
535 |
> |
} |
536 |
> |
addTokenToPostfix(tokenAnd); |
537 |
|
} |
538 |
|
return true; |
539 |
< |
} |
539 |
> |
} |
540 |
|
|
541 |
< |
bool SelectionCompiler::clauseNot() { |
541 |
> |
bool SelectionCompiler::clauseNot() { |
542 |
|
if (tokPeek() == Token::opNot) { |
543 |
< |
Token tokenNot = tokenNext(); |
544 |
< |
if (!clauseNot()) { |
545 |
< |
return false; |
546 |
< |
} |
547 |
< |
return addTokenToPostfix(tokenNot); |
543 |
> |
Token tokenNot = tokenNext(); |
544 |
> |
if (!clauseNot()) { |
545 |
> |
return false; |
546 |
> |
} |
547 |
> |
return addTokenToPostfix(tokenNot); |
548 |
|
} |
549 |
|
return clausePrimitive(); |
550 |
< |
} |
550 |
> |
} |
551 |
|
|
552 |
< |
bool SelectionCompiler::clausePrimitive() { |
552 |
> |
bool SelectionCompiler::clausePrimitive() { |
553 |
|
int tok = tokPeek(); |
554 |
|
switch (tok) { |
555 |
< |
case Token::within: |
556 |
< |
return clauseWithin(); |
555 |
> |
case Token::within: |
556 |
> |
return clauseWithin(); |
557 |
|
|
558 |
< |
case Token::asterisk: |
559 |
< |
case Token::identifier: |
560 |
< |
return clauseChemObjName(); |
561 |
< |
|
562 |
< |
default: |
563 |
< |
if ((tok & Token::atomproperty) == Token::atomproperty) { |
564 |
< |
return clauseComparator(); |
565 |
< |
} |
566 |
< |
if ((tok & Token::predefinedset) != Token::predefinedset) { |
567 |
< |
break; |
568 |
< |
} |
569 |
< |
// fall into the code and below and just add the token |
570 |
< |
case Token::all: |
571 |
< |
case Token::none: |
572 |
< |
return addTokenToPostfix(tokenNext()); |
573 |
< |
case Token::leftparen: |
574 |
< |
tokenNext(); |
575 |
< |
if (!clauseOr()) { |
576 |
< |
return false; |
577 |
< |
} |
578 |
< |
if (tokenNext().tok != Token::rightparen) { |
579 |
< |
return rightParenthesisExpected(); |
580 |
< |
} |
581 |
< |
return true; |
558 |
> |
case Token::asterisk: |
559 |
> |
case Token::identifier: |
560 |
> |
return clauseChemObjName(); |
561 |
> |
|
562 |
> |
case Token::integer : |
563 |
> |
return clauseIndex(); |
564 |
> |
default: |
565 |
> |
if ((tok & Token::atomproperty) == Token::atomproperty) { |
566 |
> |
return clauseComparator(); |
567 |
> |
} |
568 |
> |
if ((tok & Token::predefinedset) != Token::predefinedset) { |
569 |
> |
break; |
570 |
> |
} |
571 |
> |
// fall into the code and below and just add the token |
572 |
> |
case Token::all: |
573 |
> |
case Token::none: |
574 |
> |
return addTokenToPostfix(tokenNext()); |
575 |
> |
case Token::leftparen: |
576 |
> |
tokenNext(); |
577 |
> |
if (!clauseOr()) { |
578 |
> |
return false; |
579 |
> |
} |
580 |
> |
if (tokenNext().tok != Token::rightparen) { |
581 |
> |
return rightParenthesisExpected(); |
582 |
> |
} |
583 |
> |
return true; |
584 |
|
} |
585 |
|
return unrecognizedExpressionToken(); |
586 |
< |
} |
586 |
> |
} |
587 |
|
|
588 |
< |
bool SelectionCompiler::clauseComparator() { |
588 |
> |
bool SelectionCompiler::clauseComparator() { |
589 |
|
Token tokenAtomProperty = tokenNext(); |
590 |
|
Token tokenComparator = tokenNext(); |
591 |
|
if ((tokenComparator.tok & Token::comparator) == 0) { |
592 |
< |
return comparisonOperatorExpected(); |
592 |
> |
return comparisonOperatorExpected(); |
593 |
|
} |
594 |
|
|
595 |
|
Token tokenValue = tokenNext(); |
596 |
< |
if (tokenValue.tok != Token::integer) { |
597 |
< |
return integerExpected(); |
596 |
> |
if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) { |
597 |
> |
return numberExpected(); |
598 |
|
} |
599 |
< |
int val = tokenValue.intValue; |
600 |
< |
// note that a comparator instruction is a complicated instruction |
601 |
< |
// int intValue is the tok of the property you are comparing |
602 |
< |
// the value against which you are comparing is stored as an Integer |
603 |
< |
// in the object value |
599 |
> |
|
600 |
> |
float val; |
601 |
> |
if (tokenValue.value.type() == typeid(int)) { |
602 |
> |
val = boost::any_cast<int>(tokenValue.value); |
603 |
> |
} else if (tokenValue.value.type() == typeid(float)) { |
604 |
> |
val = boost::any_cast<float>(tokenValue.value); |
605 |
> |
} else { |
606 |
> |
return false; |
607 |
> |
} |
608 |
> |
|
609 |
> |
boost::any floatVal; |
610 |
> |
floatVal = val; |
611 |
|
return addTokenToPostfix(Token(tokenComparator.tok, |
612 |
< |
tokenAtomProperty.tok, boost::any(val))); |
613 |
< |
} |
612 |
> |
tokenAtomProperty.tok, floatVal)); |
613 |
> |
} |
614 |
|
|
615 |
< |
bool SelectionCompiler::clauseWithin() { |
615 |
> |
bool SelectionCompiler::clauseWithin() { |
616 |
|
tokenNext(); // WITHIN |
617 |
|
if (tokenNext().tok != Token::leftparen) { // ( |
618 |
< |
return leftParenthesisExpected(); |
618 |
> |
return leftParenthesisExpected(); |
619 |
|
} |
620 |
|
|
621 |
|
boost::any distance; |
622 |
|
Token tokenDistance = tokenNext(); // distance |
623 |
|
switch(tokenDistance.tok) { |
624 |
< |
case Token::integer: |
625 |
< |
distance = float(tokenDistance.intValue); |
626 |
< |
break; |
627 |
< |
case Token::decimal: |
628 |
< |
distance = tokenDistance.value; |
629 |
< |
break; |
627 |
< |
default: |
628 |
< |
return numberOrKeywordExpected(); |
624 |
> |
case Token::integer: |
625 |
> |
case Token::decimal: |
626 |
> |
distance = tokenDistance.value; |
627 |
> |
break; |
628 |
> |
default: |
629 |
> |
return numberOrKeywordExpected(); |
630 |
|
} |
631 |
|
|
632 |
|
if (tokenNext().tok != Token::opOr) { // , |
633 |
< |
return commaExpected(); |
633 |
> |
return commaExpected(); |
634 |
|
} |
635 |
|
|
636 |
|
if (! clauseOr()) { // *expression* |
637 |
< |
return false; |
637 |
> |
return false; |
638 |
|
} |
639 |
|
|
640 |
|
if (tokenNext().tok != Token::rightparen) { // )T |
641 |
< |
return rightParenthesisExpected(); |
641 |
> |
return rightParenthesisExpected(); |
642 |
|
} |
643 |
|
|
644 |
|
return addTokenToPostfix(Token(Token::within, distance)); |
645 |
< |
} |
645 |
> |
} |
646 |
|
|
647 |
< |
bool SelectionCompiler::clauseChemObjName() { |
648 |
< |
std::string chemObjName; |
649 |
< |
int tok = tokPeek(); |
649 |
< |
if (!clauseName(chemObjName)){ |
650 |
< |
return false; |
651 |
< |
} |
647 |
> |
bool SelectionCompiler::clauseChemObjName() { |
648 |
> |
Token token = tokenNext(); |
649 |
> |
if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) { |
650 |
|
|
651 |
+ |
std::string name = boost::any_cast<std::string>(token.value); |
652 |
+ |
if (isNameValid(name)) { |
653 |
+ |
return addTokenToPostfix(Token(Token::name, name)); |
654 |
+ |
} else { |
655 |
+ |
return compileError("invalid name: " + name); |
656 |
+ |
} |
657 |
+ |
} |
658 |
|
|
659 |
< |
tok = tokPeek(); |
660 |
< |
//allow two dot at most |
661 |
< |
if (tok == Token::dot) { |
657 |
< |
if (!clauseName(chemObjName)) { |
658 |
< |
return false; |
659 |
< |
} |
660 |
< |
tok = tokPeek(); |
661 |
< |
if (tok == Token::dot) { |
662 |
< |
if (!clauseName(chemObjName)) { |
663 |
< |
return false; |
664 |
< |
} |
665 |
< |
} |
666 |
< |
} |
659 |
> |
return false; |
660 |
> |
|
661 |
> |
} |
662 |
|
|
663 |
< |
return addTokenToPostfix(Token(Token::name, chemObjName)); |
664 |
< |
} |
663 |
> |
bool SelectionCompiler::isNameValid(const std::string& name) { |
664 |
> |
int nbracket = 0; |
665 |
> |
int ndot = 0; |
666 |
> |
for (int i =0 ; i < name.size(); ++i) { |
667 |
> |
switch(name[i]) { |
668 |
|
|
669 |
< |
bool SelectionCompiler:: clauseName(std::string& name) { |
669 |
> |
case '[' : |
670 |
> |
++nbracket; |
671 |
> |
break; |
672 |
> |
case ']' : |
673 |
> |
--nbracket; |
674 |
> |
break; |
675 |
> |
case '.' : |
676 |
> |
++ndot; |
677 |
> |
break; |
678 |
> |
} |
679 |
> |
} |
680 |
|
|
681 |
< |
int tok = tokPeek(); |
681 |
> |
//only allow 3 dots at most |
682 |
> |
return (ndot <=3 && nbracket == 0) ? true : false; |
683 |
> |
} |
684 |
|
|
685 |
< |
if (tok == Token::asterisk || tok == Token::identifier) { |
686 |
< |
name += boost::any_cast<std::string>(tokenNext().value); |
687 |
< |
|
688 |
< |
while(true){ |
689 |
< |
tok = tokPeek(); |
690 |
< |
switch (tok) { |
691 |
< |
case Token::asterisk : |
692 |
< |
name += "*"; |
693 |
< |
tokenNext(); |
694 |
< |
break; |
695 |
< |
case Token::identifier : |
696 |
< |
name += boost::any_cast<std::string>(tokenNext().value); |
697 |
< |
break; |
698 |
< |
case Token::integer : |
699 |
< |
name += toString(boost::any_cast<int>(tokenNext().value)); |
700 |
< |
break; |
701 |
< |
case Token::dot : |
702 |
< |
return true; |
703 |
< |
default : |
704 |
< |
return true; |
705 |
< |
} |
706 |
< |
} |
707 |
< |
|
708 |
< |
}else { |
709 |
< |
return false; |
685 |
> |
bool SelectionCompiler::clauseIndex(){ |
686 |
> |
Token token = tokenNext(); |
687 |
> |
if (token.tok == Token::integer) { |
688 |
> |
int index = boost::any_cast<int>(token.value); |
689 |
> |
int tok = tokPeek(); |
690 |
> |
std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl; |
691 |
> |
if (tok == Token::to) { |
692 |
> |
tokenNext(); |
693 |
> |
tok = tokPeek(); |
694 |
> |
if (tok != Token::integer) { |
695 |
> |
return numberExpected(); |
696 |
> |
} |
697 |
> |
|
698 |
> |
boost::any intVal = tokenNext().value; |
699 |
> |
int first = index; |
700 |
> |
if (intVal.type() != typeid(int)){ |
701 |
> |
return false; |
702 |
> |
} |
703 |
> |
int second = boost::any_cast<int>(intVal); |
704 |
> |
|
705 |
> |
return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second)))); |
706 |
> |
|
707 |
> |
}else { |
708 |
> |
return addTokenToPostfix(Token(Token::index, boost::any(index))); |
709 |
> |
} |
710 |
> |
} else { |
711 |
> |
return numberExpected(); |
712 |
|
} |
713 |
+ |
} |
714 |
|
|
715 |
|
} |
703 |
– |
|
704 |
– |
|
705 |
– |
} |