ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/selection/SelectionCompiler.cpp
Revision: 1931
Committed: Mon Aug 19 19:20:32 2013 UTC (11 years, 8 months ago) by gezelter
File size: 18559 byte(s)
Log Message:
fixed a sameRegion initialization issue

File Contents

# Content
1 /*
2 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3 *
4 * The University of Notre Dame grants you ("Licensee") a
5 * non-exclusive, royalty free, license to use, modify and
6 * redistribute this software in source and binary code form, provided
7 * that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the
15 * distribution.
16 *
17 * This software is provided "AS IS," without a warranty of any
18 * kind. All express or implied conditions, representations and
19 * warranties, including any implied warranty of merchantability,
20 * fitness for a particular purpose or non-infringement, are hereby
21 * excluded. The University of Notre Dame and its licensors shall not
22 * be liable for any damages suffered by licensee as a result of
23 * using, modifying or distributing the software or its
24 * derivatives. In no event will the University of Notre Dame or its
25 * licensors be liable for any lost revenue, profit or data, or for
26 * direct, indirect, special, consequential, incidental or punitive
27 * damages, however caused and regardless of the theory of liability,
28 * arising out of the use of or inability to use software, even if the
29 * University of Notre Dame has been advised of the possibility of
30 * such damages.
31 *
32 * SUPPORT OPEN SCIENCE! If you use OpenMD or its source code in your
33 * research, please cite the appropriate papers when you publish your
34 * work. Good starting points are:
35 *
36 * [1] Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).
37 * [2] Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).
38 * [3] Sun, Lin & Gezelter, J. Chem. Phys. 128, 234107 (2008).
39 * [4] Kuang & Gezelter, J. Chem. Phys. 133, 164101 (2010).
40 * [5] Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
41 */
42
43 #include "selection/SelectionCompiler.hpp"
44 #include "utils/StringUtils.hpp"
45 namespace OpenMD {
46
47 bool SelectionCompiler::compile(const std::string& filename,
48 const std::string& script) {
49
50 this->filename = filename;
51 this->script = script;
52 lineNumbers.clear();
53 lineIndices.clear();
54 aatokenCompiled.clear();
55
56 if (internalCompile()) {
57 return true;
58 }
59
60 int icharEnd;
61 if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
62 (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
63 icharEnd = script.size();
64 }
65 errorLine = script.substr(ichCurrentCommand, icharEnd);
66 return false;
67 }
68
69 bool SelectionCompiler::internalCompile(){
70
71 cchScript = script.size();
72 ichToken = 0;
73 lineCurrent = 1;
74
75 error = false;
76
77 //std::vector<Token> lltoken;
78 aatokenCompiled.clear();
79 std::vector<Token> ltoken;
80
81 Token tokenCommand;
82 int tokCommand = Token::nada;
83
84 for ( ; true; ichToken += cchToken) {
85 if (lookingAtLeadingWhitespace())
86 continue;
87 //if (lookingAtComment())
88 // continue;
89 bool endOfLine = lookingAtEndOfLine();
90 if (endOfLine || lookingAtEndOfStatement()) {
91 if (tokCommand != Token::nada) {
92 if (! compileCommand(ltoken)) {
93 return false;
94 }
95 aatokenCompiled.push_back(atokenCommand);
96 lineNumbers.push_back(lineCurrent);
97 lineIndices.push_back(ichCurrentCommand);
98 ltoken.clear();
99 tokCommand = Token::nada;
100 }
101
102 if (ichToken < cchScript) {
103 if (endOfLine)
104 ++lineCurrent;
105 continue;
106 }
107 break;
108 }
109
110 if (tokCommand != Token::nada) {
111 if (lookingAtString()) {
112 std::string str = getUnescapedStringLiteral();
113 ltoken.push_back(Token(Token::string, str));
114 continue;
115 }
116 //if ((tokCommand & Token::specialstring) != 0 &&
117 // lookingAtSpecialString()) {
118 // std::string str = script.substr(ichToken, ichToken + cchToken);
119 // ltoken.push_back(Token(Token::string, str));
120 // continue;
121 //}
122 //if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
123 if (lookingAtDecimal((tokCommand) != 0)) {
124 float value = lexi_cast<float>(script.substr(ichToken, cchToken));
125 ltoken.push_back(Token(Token::decimal, boost::any(value)));
126 continue;
127 }
128 //if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
129 if (lookingAtInteger((tokCommand) != 0)) {
130
131 int val = lexi_cast<int>(script.substr(ichToken, cchToken));
132 ltoken.push_back(Token(Token::integer, boost::any(val)));
133 continue;
134 }
135 }
136
137 if (lookingAtLookupToken()) {
138 std::string ident = script.substr(ichToken, cchToken);
139 Token token;
140 Token* pToken = TokenMap::getInstance()->getToken(ident);
141 if (pToken != NULL) {
142 token = *pToken;
143 } else {
144 token = Token(Token::identifier, ident);
145 }
146
147 int tok = token.tok;
148
149 switch (tokCommand) {
150 case Token::nada:
151 ichCurrentCommand = ichToken;
152 //tokenCommand = token;
153 tokCommand = tok;
154 if ((tokCommand & Token::command) == 0)
155 return commandExpected();
156 break;
157
158 case Token::define:
159 if (ltoken.size() == 1) {
160 // we are looking at the variable name
161 if (tok != Token::identifier &&
162 (tok & Token::predefinedset) != Token::predefinedset)
163 return invalidExpressionToken(ident);
164 } else {
165 // we are looking at the expression
166 if (tok != Token::identifier &&
167 (tok & (Token::expression | Token::predefinedset)) == 0)
168 return invalidExpressionToken(ident);
169 }
170
171 break;
172
173 case Token::select:
174 if (tok != Token::identifier && (tok & Token::expression) == 0)
175 return invalidExpressionToken(ident);
176 break;
177 }
178 ltoken.push_back(token);
179 continue;
180 }
181
182 if (ltoken.empty()) {
183 return commandExpected();
184 }
185
186 return unrecognizedToken();
187 }
188
189 return true;
190 }
191
192
193 bool SelectionCompiler::lookingAtLeadingWhitespace() {
194
195 int ichT = ichToken;
196 while (ichT < cchScript && std::isspace(script[ichT])) {
197 ++ichT;
198 }
199 cchToken = ichT - ichToken;
200 return cchToken > 0;
201 }
202
203 bool SelectionCompiler::lookingAtEndOfLine() {
204 if (ichToken == cchScript)
205 return true;
206 int ichT = ichToken;
207 char ch = script[ichT];
208 if (ch == '\r') {
209 ++ichT;
210 if (ichT < cchScript && script[ichT] == '\n')
211 ++ichT;
212 } else if (ch == '\n') {
213 ++ichT;
214 } else {
215 return false;
216 }
217 cchToken = ichT - ichToken;
218 return true;
219 }
220
221 bool SelectionCompiler::lookingAtEndOfStatement() {
222 if (ichToken == cchScript || script[ichToken] != ';')
223 return false;
224 cchToken = 1;
225 return true;
226 }
227
228 bool SelectionCompiler::lookingAtString() {
229 if (ichToken == cchScript)
230 return false;
231 if (script[ichToken] != '"')
232 return false;
233 // remove support for single quote
234 // in order to use it in atom expressions
235 // char chFirst = script.charAt(ichToken);
236 // if (chFirst != '"' && chFirst != '\'')
237 // return false;
238 int ichT = ichToken + 1;
239 // while (ichT < cchScript && script.charAt(ichT++) != chFirst)
240 char ch;
241 bool previousCharBackslash = false;
242 while (ichT < cchScript) {
243 ch = script[ichT++];
244 if (ch == '"' && !previousCharBackslash)
245 break;
246 previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
247 }
248 cchToken = ichT - ichToken;
249
250 return true;
251 }
252
253
254 std::string SelectionCompiler::getUnescapedStringLiteral() {
255 /** @todo */
256 std::string sb(cchToken - 2, ' ');
257
258 int ichMax = ichToken + cchToken - 1;
259 int ich = ichToken + 1;
260
261 while (ich < ichMax) {
262 char ch = script[ich++];
263 if (ch == '\\' && ich < ichMax) {
264 ch = script[ich++];
265 switch (ch) {
266 case 'b':
267 ch = '\b';
268 break;
269 case 'n':
270 ch = '\n';
271 break;
272 case 't':
273 ch = '\t';
274 break;
275 case 'r':
276 ch = '\r';
277 // fall into
278 case '"':
279 case '\\':
280 case '\'':
281 break;
282 case 'x':
283 case 'u':
284 int digitCount = ch == 'x' ? 2 : 4;
285 if (ich < ichMax) {
286 int unicode = 0;
287 for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
288 char chT = script[ich];
289 int hexit = getHexitValue(chT);
290 if (hexit < 0)
291 break;
292 unicode <<= 4;
293 unicode += hexit;
294 ++ich;
295 }
296 ch = (char)unicode;
297 }
298 }
299 }
300 sb.append(1, ch);
301 }
302
303 return sb;
304 }
305
306 int SelectionCompiler::getHexitValue(char ch) {
307 if (ch >= '0' && ch <= '9')
308 return ch - '0';
309 else if (ch >= 'a' && ch <= 'f')
310 return 10 + ch - 'a';
311 else if (ch >= 'A' && ch <= 'F')
312 return 10 + ch - 'A';
313 else
314 return -1;
315 }
316
317 bool SelectionCompiler::lookingAtSpecialString() {
318 int ichT = ichToken;
319 char ch = script[ichT];
320 while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
321 ++ichT;
322 }
323 cchToken = ichT - ichToken;
324 return cchToken > 0;
325 }
326
327 bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
328 if (ichToken == cchScript) {
329 return false;
330 }
331
332 int ichT = ichToken;
333 if (script[ichT] == '-') {
334 ++ichT;
335 }
336 bool digitSeen = false;
337 char ch = 'X';
338 while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
339 ++ichT;
340 digitSeen = true;
341 }
342
343 if (ichT == cchScript || ch != '.') {
344 return false;
345 }
346
347 // to support DMPC.1, let's check the character before the dot
348 if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
349 return false;
350 }
351
352 ++ichT;
353 while (ichT < cchScript && std::isdigit(script[ichT])) {
354 ++ichT;
355 digitSeen = true;
356 }
357 cchToken = ichT - ichToken;
358 return digitSeen;
359 }
360
361 bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
362 if (ichToken == cchScript) {
363 return false;
364 }
365 int ichT = ichToken;
366 if (allowNegative && script[ichToken] == '-') {
367 ++ichT;
368 }
369 int ichBeginDigits = ichT;
370 while (ichT < cchScript && std::isdigit(script[ichT])) {
371 ++ichT;
372 }
373 if (ichBeginDigits == ichT) {
374 return false;
375 }
376 cchToken = ichT - ichToken;
377 return true;
378 }
379
380 bool SelectionCompiler::lookingAtLookupToken() {
381 if (ichToken == cchScript) {
382 return false;
383 }
384
385 int ichT = ichToken;
386 char ch;
387 switch (ch = script[ichT++]) {
388 case '(':
389 case ')':
390 case ',':
391 case '[':
392 case ']':
393 break;
394 case '&':
395 case '|':
396 if (ichT < cchScript && script[ichT] == ch) {
397 ++ichT;
398 }
399 break;
400 case '<':
401 case '=':
402 case '>':
403 if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
404 ++ichT;
405 }
406 break;
407 case '/':
408 case '!':
409 if (ichT < cchScript && script[ichT] == '=') {
410 ++ichT;
411 }
412 break;
413 default:
414 if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
415 return false;
416 }
417 case '*':
418 case '?': // include question marks in identifier for atom expressions
419 while (ichT < cchScript && !std::isspace(ch = script[ichT]) &&
420 (std::isalpha(ch) ||std::isdigit(ch) || ch == '_' || ch == '.' || ch == '*' || ch == '?' || ch == '+' || ch == '-' || ch == '[' || ch == ']') ){
421
422 ++ichT;
423 }
424 break;
425 }
426
427 cchToken = ichT - ichToken;
428
429 return true;
430 }
431
432 bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
433 const Token& tokenCommand = ltoken[0];
434 int tokCommand = tokenCommand.tok;
435
436 atokenCommand = ltoken;
437 if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
438 return false;
439 }
440
441 return true;
442 }
443
444 bool SelectionCompiler::compileExpression() {
445 /** todo */
446 unsigned int i = 1;
447 int tokCommand = atokenCommand[0].tok;
448 if (tokCommand == Token::define) {
449 i = 2;
450 } else if ((tokCommand & Token::embeddedExpression) != 0) {
451 // look for the open parenthesis
452 while (i < atokenCommand.size() &&
453 atokenCommand[i].tok != Token::leftparen)
454 ++i;
455 }
456
457 if (i >= atokenCommand.size()) {
458 return true;
459 }
460 return compileExpression(i);
461 }
462
463
464 bool SelectionCompiler::addTokenToPostfix(const Token& token) {
465 ltokenPostfix.push_back(token);
466 return true;
467 }
468
469 bool SelectionCompiler::compileExpression(int itoken) {
470 ltokenPostfix.clear();
471 for (int i = 0; i < itoken; ++i) {
472 addTokenToPostfix(atokenCommand[i]);
473 }
474
475 atokenInfix = atokenCommand;
476 itokenInfix = itoken;
477
478 addTokenToPostfix(Token::tokenExpressionBegin);
479 if (!clauseOr()) {
480 return false;
481 }
482
483 addTokenToPostfix(Token::tokenExpressionEnd);
484 if (itokenInfix != atokenInfix.size()) {
485 return endOfExpressionExpected();
486 }
487
488 atokenCommand = ltokenPostfix;
489 return true;
490 }
491
492 Token SelectionCompiler::tokenNext() {
493 if (itokenInfix == atokenInfix.size()) {
494 return Token();
495 }
496 return atokenInfix[itokenInfix++];
497 }
498
499 boost::any SelectionCompiler::valuePeek() {
500 if (itokenInfix == atokenInfix.size()) {
501 return boost::any();
502 } else {
503 return atokenInfix[itokenInfix].value;
504 }
505 }
506
507 int SelectionCompiler::tokPeek() {
508 if (itokenInfix == atokenInfix.size()) {
509 return 0;
510 }else {
511 return atokenInfix[itokenInfix].tok;
512 }
513 }
514
515 bool SelectionCompiler::clauseOr() {
516 if (!clauseAnd()) {
517 return false;
518 }
519
520 while (tokPeek() == Token::opOr) {
521 Token tokenOr = tokenNext();
522 if (!clauseAnd()) {
523 return false;
524 }
525 addTokenToPostfix(tokenOr);
526 }
527 return true;
528 }
529
530 bool SelectionCompiler::clauseAnd() {
531 if (!clauseNot()) {
532 return false;
533 }
534
535 while (tokPeek() == Token::opAnd) {
536 Token tokenAnd = tokenNext();
537 if (!clauseNot()) {
538 return false;
539 }
540 addTokenToPostfix(tokenAnd);
541 }
542 return true;
543 }
544
545 bool SelectionCompiler::clauseNot() {
546 if (tokPeek() == Token::opNot) {
547 Token tokenNot = tokenNext();
548 if (!clauseNot()) {
549 return false;
550 }
551 return addTokenToPostfix(tokenNot);
552 }
553 return clausePrimitive();
554 }
555
556 bool SelectionCompiler::clausePrimitive() {
557 int tok = tokPeek();
558 switch (tok) {
559 case Token::within:
560 return clauseWithin();
561
562 case Token::asterisk:
563 case Token::identifier:
564 return clauseChemObjName();
565
566 case Token::integer :
567 return clauseIndex();
568 default:
569 if ((tok & Token::atomproperty) == Token::atomproperty) {
570 return clauseComparator();
571 }
572 if ((tok & Token::predefinedset) != Token::predefinedset) {
573 break;
574 }
575 // fall into the code and below and just add the token
576 case Token::all:
577 case Token::none:
578 case Token::hull:
579 return addTokenToPostfix(tokenNext());
580 case Token::leftparen:
581 tokenNext();
582 if (!clauseOr()) {
583 return false;
584 }
585 if (tokenNext().tok != Token::rightparen) {
586 return rightParenthesisExpected();
587 }
588 return true;
589 }
590 return unrecognizedExpressionToken();
591 }
592
593 bool SelectionCompiler::clauseComparator() {
594 Token tokenAtomProperty = tokenNext();
595 Token tokenComparator = tokenNext();
596 if ((tokenComparator.tok & Token::comparator) == 0) {
597 return comparisonOperatorExpected();
598 }
599
600 Token tokenValue = tokenNext();
601 if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
602 return numberExpected();
603 }
604
605 float val;
606 if (tokenValue.value.type() == typeid(int)) {
607 val = boost::any_cast<int>(tokenValue.value);
608 } else if (tokenValue.value.type() == typeid(float)) {
609 val = boost::any_cast<float>(tokenValue.value);
610 } else {
611 return false;
612 }
613
614 boost::any floatVal;
615 floatVal = val;
616 return addTokenToPostfix(Token(tokenComparator.tok,
617 tokenAtomProperty.tok, floatVal));
618 }
619
620 bool SelectionCompiler::clauseWithin() {
621 tokenNext(); // WITHIN
622 if (tokenNext().tok != Token::leftparen) { // (
623 return leftParenthesisExpected();
624 }
625
626 boost::any distance;
627 Token tokenDistance = tokenNext(); // distance
628 switch(tokenDistance.tok) {
629 case Token::integer:
630 case Token::decimal:
631 distance = tokenDistance.value;
632 break;
633 default:
634 return numberOrKeywordExpected();
635 }
636
637 if (tokenNext().tok != Token::opOr) { // ,
638 return commaExpected();
639 }
640
641 if (! clauseOr()) { // *expression*
642 return false;
643 }
644
645 if (tokenNext().tok != Token::rightparen) { // )T
646 return rightParenthesisExpected();
647 }
648
649 return addTokenToPostfix(Token(Token::within, distance));
650 }
651
652 bool SelectionCompiler::clauseChemObjName() {
653 Token token = tokenNext();
654 if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) {
655
656 std::string name = boost::any_cast<std::string>(token.value);
657 if (isNameValid(name)) {
658 return addTokenToPostfix(Token(Token::name, name));
659 } else {
660 return compileError("invalid name: " + name);
661 }
662 }
663
664 return false;
665
666 }
667
668 bool SelectionCompiler::isNameValid(const std::string& name) {
669 int nbracket = 0;
670 int ndot = 0;
671 for (unsigned int i = 0 ; i < name.size(); ++i) {
672 switch(name[i]) {
673
674 case '[' :
675 ++nbracket;
676 break;
677 case ']' :
678 --nbracket;
679 break;
680 case '.' :
681 ++ndot;
682 break;
683 }
684 }
685
686 //only allow 3 dots at most
687 return (ndot <=3 && nbracket == 0) ? true : false;
688 }
689
690 bool SelectionCompiler::clauseIndex(){
691 Token token = tokenNext();
692 if (token.tok == Token::integer) {
693 int index = boost::any_cast<int>(token.value);
694 int tok = tokPeek();
695 std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
696 if (tok == Token::to) {
697 tokenNext();
698 tok = tokPeek();
699 if (tok != Token::integer) {
700 return numberExpected();
701 }
702
703 boost::any intVal = tokenNext().value;
704 int first = index;
705 if (intVal.type() != typeid(int)){
706 return false;
707 }
708 int second = boost::any_cast<int>(intVal);
709
710 return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
711
712 }else {
713 return addTokenToPostfix(Token(Token::index, boost::any(index)));
714 }
715 } else {
716 return numberExpected();
717 }
718 }
719
720 }

Properties

Name Value
svn:executable *
svn:keywords Author Id Revision Date