ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/selection/SelectionCompiler.cpp
Revision: 1782
Committed: Wed Aug 22 02:28:28 2012 UTC (12 years, 8 months ago) by gezelter
File size: 18456 byte(s)
Log Message:
MERGE OpenMD development branch 1465:1781 into trunk

File Contents

# Content
1 /*
2 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3 *
4 * The University of Notre Dame grants you ("Licensee") a
5 * non-exclusive, royalty free, license to use, modify and
6 * redistribute this software in source and binary code form, provided
7 * that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the
15 * distribution.
16 *
17 * This software is provided "AS IS," without a warranty of any
18 * kind. All express or implied conditions, representations and
19 * warranties, including any implied warranty of merchantability,
20 * fitness for a particular purpose or non-infringement, are hereby
21 * excluded. The University of Notre Dame and its licensors shall not
22 * be liable for any damages suffered by licensee as a result of
23 * using, modifying or distributing the software or its
24 * derivatives. In no event will the University of Notre Dame or its
25 * licensors be liable for any lost revenue, profit or data, or for
26 * direct, indirect, special, consequential, incidental or punitive
27 * damages, however caused and regardless of the theory of liability,
28 * arising out of the use of or inability to use software, even if the
29 * University of Notre Dame has been advised of the possibility of
30 * such damages.
31 *
32 * SUPPORT OPEN SCIENCE! If you use OpenMD or its source code in your
33 * research, please cite the appropriate papers when you publish your
34 * work. Good starting points are:
35 *
36 * [1] Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).
37 * [2] Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).
38 * [3] Sun, Lin & Gezelter, J. Chem. Phys. 128, 24107 (2008).
39 * [4] Kuang & Gezelter, J. Chem. Phys. 133, 164101 (2010).
40 * [5] Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
41 */
42
43 #include "selection/SelectionCompiler.hpp"
44 #include "utils/StringUtils.hpp"
45 namespace OpenMD {
46
47 bool SelectionCompiler::compile(const std::string& filename,
48 const std::string& script) {
49
50 this->filename = filename;
51 this->script = script;
52 lineNumbers.clear();
53 lineIndices.clear();
54 aatokenCompiled.clear();
55
56 if (internalCompile()) {
57 return true;
58 }
59
60 int icharEnd;
61 if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
62 (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
63 icharEnd = script.size();
64 }
65 errorLine = script.substr(ichCurrentCommand, icharEnd);
66 return false;
67 }
68
69 bool SelectionCompiler::internalCompile(){
70
71 cchScript = script.size();
72 ichToken = 0;
73 lineCurrent = 1;
74
75 error = false;
76
77 //std::vector<Token> lltoken;
78 aatokenCompiled.clear();
79 std::vector<Token> ltoken;
80
81 Token tokenCommand;
82 int tokCommand = Token::nada;
83
84 for ( ; true; ichToken += cchToken) {
85 if (lookingAtLeadingWhitespace())
86 continue;
87 //if (lookingAtComment())
88 // continue;
89 bool endOfLine = lookingAtEndOfLine();
90 if (endOfLine || lookingAtEndOfStatement()) {
91 if (tokCommand != Token::nada) {
92 if (! compileCommand(ltoken)) {
93 return false;
94 }
95 aatokenCompiled.push_back(atokenCommand);
96 lineNumbers.push_back(lineCurrent);
97 lineIndices.push_back(ichCurrentCommand);
98 ltoken.clear();
99 tokCommand = Token::nada;
100 }
101
102 if (ichToken < cchScript) {
103 if (endOfLine)
104 ++lineCurrent;
105 continue;
106 }
107 break;
108 }
109
110 if (tokCommand != Token::nada) {
111 if (lookingAtString()) {
112 std::string str = getUnescapedStringLiteral();
113 ltoken.push_back(Token(Token::string, str));
114 continue;
115 }
116 //if ((tokCommand & Token::specialstring) != 0 &&
117 // lookingAtSpecialString()) {
118 // std::string str = script.substr(ichToken, ichToken + cchToken);
119 // ltoken.push_back(Token(Token::string, str));
120 // continue;
121 //}
122 if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
123 float value = lexi_cast<float>(script.substr(ichToken, cchToken));
124 ltoken.push_back(Token(Token::decimal, boost::any(value)));
125 continue;
126 }
127 if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
128
129 int val = lexi_cast<int>(script.substr(ichToken, cchToken));
130 ltoken.push_back(Token(Token::integer, boost::any(val)));
131 continue;
132 }
133 }
134
135 if (lookingAtLookupToken()) {
136 std::string ident = script.substr(ichToken, cchToken);
137 Token token;
138 Token* pToken = TokenMap::getInstance()->getToken(ident);
139 if (pToken != NULL) {
140 token = *pToken;
141 } else {
142 token = Token(Token::identifier, ident);
143 }
144
145 int tok = token.tok;
146
147 switch (tokCommand) {
148 case Token::nada:
149 ichCurrentCommand = ichToken;
150 //tokenCommand = token;
151 tokCommand = tok;
152 if ((tokCommand & Token::command) == 0)
153 return commandExpected();
154 break;
155
156 case Token::define:
157 if (ltoken.size() == 1) {
158 // we are looking at the variable name
159 if (tok != Token::identifier &&
160 (tok & Token::predefinedset) != Token::predefinedset)
161 return invalidExpressionToken(ident);
162 } else {
163 // we are looking at the expression
164 if (tok != Token::identifier &&
165 (tok & (Token::expression | Token::predefinedset)) == 0)
166 return invalidExpressionToken(ident);
167 }
168
169 break;
170
171 case Token::select:
172 if (tok != Token::identifier && (tok & Token::expression) == 0)
173 return invalidExpressionToken(ident);
174 break;
175 }
176 ltoken.push_back(token);
177 continue;
178 }
179
180 if (ltoken.size() == 0) {
181 return commandExpected();
182 }
183
184 return unrecognizedToken();
185 }
186
187 return true;
188 }
189
190
191 bool SelectionCompiler::lookingAtLeadingWhitespace() {
192
193 int ichT = ichToken;
194 while (ichT < cchScript && std::isspace(script[ichT])) {
195 ++ichT;
196 }
197 cchToken = ichT - ichToken;
198 return cchToken > 0;
199 }
200
201 bool SelectionCompiler::lookingAtEndOfLine() {
202 if (ichToken == cchScript)
203 return true;
204 int ichT = ichToken;
205 char ch = script[ichT];
206 if (ch == '\r') {
207 ++ichT;
208 if (ichT < cchScript && script[ichT] == '\n')
209 ++ichT;
210 } else if (ch == '\n') {
211 ++ichT;
212 } else {
213 return false;
214 }
215 cchToken = ichT - ichToken;
216 return true;
217 }
218
219 bool SelectionCompiler::lookingAtEndOfStatement() {
220 if (ichToken == cchScript || script[ichToken] != ';')
221 return false;
222 cchToken = 1;
223 return true;
224 }
225
226 bool SelectionCompiler::lookingAtString() {
227 if (ichToken == cchScript)
228 return false;
229 if (script[ichToken] != '"')
230 return false;
231 // remove support for single quote
232 // in order to use it in atom expressions
233 // char chFirst = script.charAt(ichToken);
234 // if (chFirst != '"' && chFirst != '\'')
235 // return false;
236 int ichT = ichToken + 1;
237 // while (ichT < cchScript && script.charAt(ichT++) != chFirst)
238 char ch;
239 bool previousCharBackslash = false;
240 while (ichT < cchScript) {
241 ch = script[ichT++];
242 if (ch == '"' && !previousCharBackslash)
243 break;
244 previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
245 }
246 cchToken = ichT - ichToken;
247
248 return true;
249 }
250
251
252 std::string SelectionCompiler::getUnescapedStringLiteral() {
253 /** @todo */
254 std::string sb(cchToken - 2, ' ');
255
256 int ichMax = ichToken + cchToken - 1;
257 int ich = ichToken + 1;
258
259 while (ich < ichMax) {
260 char ch = script[ich++];
261 if (ch == '\\' && ich < ichMax) {
262 ch = script[ich++];
263 switch (ch) {
264 case 'b':
265 ch = '\b';
266 break;
267 case 'n':
268 ch = '\n';
269 break;
270 case 't':
271 ch = '\t';
272 break;
273 case 'r':
274 ch = '\r';
275 // fall into
276 case '"':
277 case '\\':
278 case '\'':
279 break;
280 case 'x':
281 case 'u':
282 int digitCount = ch == 'x' ? 2 : 4;
283 if (ich < ichMax) {
284 int unicode = 0;
285 for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
286 char chT = script[ich];
287 int hexit = getHexitValue(chT);
288 if (hexit < 0)
289 break;
290 unicode <<= 4;
291 unicode += hexit;
292 ++ich;
293 }
294 ch = (char)unicode;
295 }
296 }
297 }
298 sb.append(1, ch);
299 }
300
301 return sb;
302 }
303
304 int SelectionCompiler::getHexitValue(char ch) {
305 if (ch >= '0' && ch <= '9')
306 return ch - '0';
307 else if (ch >= 'a' && ch <= 'f')
308 return 10 + ch - 'a';
309 else if (ch >= 'A' && ch <= 'F')
310 return 10 + ch - 'A';
311 else
312 return -1;
313 }
314
315 bool SelectionCompiler::lookingAtSpecialString() {
316 int ichT = ichToken;
317 char ch = script[ichT];
318 while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
319 ++ichT;
320 }
321 cchToken = ichT - ichToken;
322 return cchToken > 0;
323 }
324
325 bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
326 if (ichToken == cchScript) {
327 return false;
328 }
329
330 int ichT = ichToken;
331 if (script[ichT] == '-') {
332 ++ichT;
333 }
334 bool digitSeen = false;
335 char ch = 'X';
336 while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
337 ++ichT;
338 digitSeen = true;
339 }
340
341 if (ichT == cchScript || ch != '.') {
342 return false;
343 }
344
345 // to support DMPC.1, let's check the character before the dot
346 if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
347 return false;
348 }
349
350 ++ichT;
351 while (ichT < cchScript && std::isdigit(script[ichT])) {
352 ++ichT;
353 digitSeen = true;
354 }
355 cchToken = ichT - ichToken;
356 return digitSeen;
357 }
358
359 bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
360 if (ichToken == cchScript) {
361 return false;
362 }
363 int ichT = ichToken;
364 if (allowNegative && script[ichToken] == '-') {
365 ++ichT;
366 }
367 int ichBeginDigits = ichT;
368 while (ichT < cchScript && std::isdigit(script[ichT])) {
369 ++ichT;
370 }
371 if (ichBeginDigits == ichT) {
372 return false;
373 }
374 cchToken = ichT - ichToken;
375 return true;
376 }
377
378 bool SelectionCompiler::lookingAtLookupToken() {
379 if (ichToken == cchScript) {
380 return false;
381 }
382
383 int ichT = ichToken;
384 char ch;
385 switch (ch = script[ichT++]) {
386 case '(':
387 case ')':
388 case ',':
389 case '[':
390 case ']':
391 break;
392 case '&':
393 case '|':
394 if (ichT < cchScript && script[ichT] == ch) {
395 ++ichT;
396 }
397 break;
398 case '<':
399 case '=':
400 case '>':
401 if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
402 ++ichT;
403 }
404 break;
405 case '/':
406 case '!':
407 if (ichT < cchScript && script[ichT] == '=') {
408 ++ichT;
409 }
410 break;
411 default:
412 if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
413 return false;
414 }
415 case '*':
416 case '?': // include question marks in identifier for atom expressions
417 while (ichT < cchScript && !std::isspace(ch = script[ichT]) &&
418 (std::isalpha(ch) ||std::isdigit(ch) || ch == '_' || ch == '.' || ch == '*' || ch == '?' || ch == '+' || ch == '-' || ch == '[' || ch == ']') ){
419
420 ++ichT;
421 }
422 break;
423 }
424
425 cchToken = ichT - ichToken;
426
427 return true;
428 }
429
430 bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
431 const Token& tokenCommand = ltoken[0];
432 int tokCommand = tokenCommand.tok;
433
434 atokenCommand = ltoken;
435 if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
436 return false;
437 }
438
439 return true;
440 }
441
442 bool SelectionCompiler::compileExpression() {
443 /** todo */
444 unsigned int i = 1;
445 int tokCommand = atokenCommand[0].tok;
446 if (tokCommand == Token::define) {
447 i = 2;
448 } else if ((tokCommand & Token::embeddedExpression) != 0) {
449 // look for the open parenthesis
450 while (i < atokenCommand.size() &&
451 atokenCommand[i].tok != Token::leftparen)
452 ++i;
453 }
454
455 if (i >= atokenCommand.size()) {
456 return true;
457 }
458 return compileExpression(i);
459 }
460
461
462 bool SelectionCompiler::addTokenToPostfix(const Token& token) {
463 ltokenPostfix.push_back(token);
464 return true;
465 }
466
467 bool SelectionCompiler::compileExpression(int itoken) {
468 ltokenPostfix.clear();
469 for (int i = 0; i < itoken; ++i) {
470 addTokenToPostfix(atokenCommand[i]);
471 }
472
473 atokenInfix = atokenCommand;
474 itokenInfix = itoken;
475
476 addTokenToPostfix(Token::tokenExpressionBegin);
477 if (!clauseOr()) {
478 return false;
479 }
480
481 addTokenToPostfix(Token::tokenExpressionEnd);
482 if (itokenInfix != atokenInfix.size()) {
483 return endOfExpressionExpected();
484 }
485
486 atokenCommand = ltokenPostfix;
487 return true;
488 }
489
490 Token SelectionCompiler::tokenNext() {
491 if (itokenInfix == atokenInfix.size()) {
492 return Token();
493 }
494 return atokenInfix[itokenInfix++];
495 }
496
497 boost::any SelectionCompiler::valuePeek() {
498 if (itokenInfix == atokenInfix.size()) {
499 return boost::any();
500 } else {
501 return atokenInfix[itokenInfix].value;
502 }
503 }
504
505 int SelectionCompiler::tokPeek() {
506 if (itokenInfix == atokenInfix.size()) {
507 return 0;
508 }else {
509 return atokenInfix[itokenInfix].tok;
510 }
511 }
512
513 bool SelectionCompiler::clauseOr() {
514 if (!clauseAnd()) {
515 return false;
516 }
517
518 while (tokPeek() == Token::opOr) {
519 Token tokenOr = tokenNext();
520 if (!clauseAnd()) {
521 return false;
522 }
523 addTokenToPostfix(tokenOr);
524 }
525 return true;
526 }
527
528 bool SelectionCompiler::clauseAnd() {
529 if (!clauseNot()) {
530 return false;
531 }
532
533 while (tokPeek() == Token::opAnd) {
534 Token tokenAnd = tokenNext();
535 if (!clauseNot()) {
536 return false;
537 }
538 addTokenToPostfix(tokenAnd);
539 }
540 return true;
541 }
542
543 bool SelectionCompiler::clauseNot() {
544 if (tokPeek() == Token::opNot) {
545 Token tokenNot = tokenNext();
546 if (!clauseNot()) {
547 return false;
548 }
549 return addTokenToPostfix(tokenNot);
550 }
551 return clausePrimitive();
552 }
553
554 bool SelectionCompiler::clausePrimitive() {
555 int tok = tokPeek();
556 switch (tok) {
557 case Token::within:
558 return clauseWithin();
559
560 case Token::asterisk:
561 case Token::identifier:
562 return clauseChemObjName();
563
564 case Token::integer :
565 return clauseIndex();
566 default:
567 if ((tok & Token::atomproperty) == Token::atomproperty) {
568 return clauseComparator();
569 }
570 if ((tok & Token::predefinedset) != Token::predefinedset) {
571 break;
572 }
573 // fall into the code and below and just add the token
574 case Token::all:
575 case Token::none:
576 case Token::hull:
577 return addTokenToPostfix(tokenNext());
578 case Token::leftparen:
579 tokenNext();
580 if (!clauseOr()) {
581 return false;
582 }
583 if (tokenNext().tok != Token::rightparen) {
584 return rightParenthesisExpected();
585 }
586 return true;
587 }
588 return unrecognizedExpressionToken();
589 }
590
591 bool SelectionCompiler::clauseComparator() {
592 Token tokenAtomProperty = tokenNext();
593 Token tokenComparator = tokenNext();
594 if ((tokenComparator.tok & Token::comparator) == 0) {
595 return comparisonOperatorExpected();
596 }
597
598 Token tokenValue = tokenNext();
599 if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
600 return numberExpected();
601 }
602
603 float val;
604 if (tokenValue.value.type() == typeid(int)) {
605 val = boost::any_cast<int>(tokenValue.value);
606 } else if (tokenValue.value.type() == typeid(float)) {
607 val = boost::any_cast<float>(tokenValue.value);
608 } else {
609 return false;
610 }
611
612 boost::any floatVal;
613 floatVal = val;
614 return addTokenToPostfix(Token(tokenComparator.tok,
615 tokenAtomProperty.tok, floatVal));
616 }
617
618 bool SelectionCompiler::clauseWithin() {
619 tokenNext(); // WITHIN
620 if (tokenNext().tok != Token::leftparen) { // (
621 return leftParenthesisExpected();
622 }
623
624 boost::any distance;
625 Token tokenDistance = tokenNext(); // distance
626 switch(tokenDistance.tok) {
627 case Token::integer:
628 case Token::decimal:
629 distance = tokenDistance.value;
630 break;
631 default:
632 return numberOrKeywordExpected();
633 }
634
635 if (tokenNext().tok != Token::opOr) { // ,
636 return commaExpected();
637 }
638
639 if (! clauseOr()) { // *expression*
640 return false;
641 }
642
643 if (tokenNext().tok != Token::rightparen) { // )T
644 return rightParenthesisExpected();
645 }
646
647 return addTokenToPostfix(Token(Token::within, distance));
648 }
649
650 bool SelectionCompiler::clauseChemObjName() {
651 Token token = tokenNext();
652 if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) {
653
654 std::string name = boost::any_cast<std::string>(token.value);
655 if (isNameValid(name)) {
656 return addTokenToPostfix(Token(Token::name, name));
657 } else {
658 return compileError("invalid name: " + name);
659 }
660 }
661
662 return false;
663
664 }
665
666 bool SelectionCompiler::isNameValid(const std::string& name) {
667 int nbracket = 0;
668 int ndot = 0;
669 for (unsigned int i = 0 ; i < name.size(); ++i) {
670 switch(name[i]) {
671
672 case '[' :
673 ++nbracket;
674 break;
675 case ']' :
676 --nbracket;
677 break;
678 case '.' :
679 ++ndot;
680 break;
681 }
682 }
683
684 //only allow 3 dots at most
685 return (ndot <=3 && nbracket == 0) ? true : false;
686 }
687
688 bool SelectionCompiler::clauseIndex(){
689 Token token = tokenNext();
690 if (token.tok == Token::integer) {
691 int index = boost::any_cast<int>(token.value);
692 int tok = tokPeek();
693 std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
694 if (tok == Token::to) {
695 tokenNext();
696 tok = tokPeek();
697 if (tok != Token::integer) {
698 return numberExpected();
699 }
700
701 boost::any intVal = tokenNext().value;
702 int first = index;
703 if (intVal.type() != typeid(int)){
704 return false;
705 }
706 int second = boost::any_cast<int>(intVal);
707
708 return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
709
710 }else {
711 return addTokenToPostfix(Token(Token::index, boost::any(index)));
712 }
713 } else {
714 return numberExpected();
715 }
716 }
717
718 }

Properties

Name Value
svn:executable *
svn:keywords Author Id Revision Date