src/selection/SelectionCompiler.cpp

/*
 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
 *
 * The University of Notre Dame grants you ("Licensee") a
 * non-exclusive, royalty free, license to use, modify and
 * redistribute this software in source and binary code form, provided
 * that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 *
 * This software is provided "AS IS," without a warranty of any
 * kind. All express or implied conditions, representations and
 * warranties, including any implied warranty of merchantability,
 * fitness for a particular purpose or non-infringement, are hereby
 * excluded.  The University of Notre Dame and its licensors shall not
 * be liable for any damages suffered by licensee as a result of
 * using, modifying or distributing the software or its
 * derivatives. In no event will the University of Notre Dame or its
 * licensors be liable for any lost revenue, profit or data, or for
 * direct, indirect, special, consequential, incidental or punitive
 * damages, however caused and regardless of the theory of liability,
 * arising out of the use of or inability to use software, even if the
 * University of Notre Dame has been advised of the possibility of
 * such damages.
 *
 * SUPPORT OPEN SCIENCE!  If you use OpenMD or its source code in your
 * research, please cite the appropriate papers when you publish your
 * work.  Good starting points are:
 *                                                                      
 * [1]  Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).             
 * [2]  Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).          
 * [3]  Sun, Lin & Gezelter, J. Chem. Phys. 128, 234107 (2008).          
 * [4]  Kuang & Gezelter,  J. Chem. Phys. 133, 164101 (2010).
 * [5]  Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
 */

#include "selection/SelectionCompiler.hpp"
#include "utils/StringUtils.hpp"
namespace OpenMD {

  bool SelectionCompiler::compile(const std::string& filename, 
                                  const std::string& script) {

    this->filename = filename;
    this->script = script;
    lineNumbers.clear();
    lineIndices.clear();
    aatokenCompiled.clear();
        
    if (internalCompile()) {
      return true;
    }
    
    int icharEnd;
    if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
        (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
      icharEnd = script.size();
    }
    errorLine = script.substr(ichCurrentCommand, icharEnd);
    return false;
  }

  bool SelectionCompiler::internalCompile(){

    cchScript = script.size();
    ichToken = 0;
    lineCurrent = 1;

    error = false;

    //std::vector<Token> lltoken;
    aatokenCompiled.clear();
    std::vector<Token> ltoken;

    Token tokenCommand;
    int tokCommand = Token::nada;

    for ( ; true; ichToken += cchToken) {
      if (lookingAtLeadingWhitespace())
        continue;
      //if (lookingAtComment())
      //    continue;
      bool endOfLine = lookingAtEndOfLine();
      if (endOfLine || lookingAtEndOfStatement()) {
        if (tokCommand != Token::nada) {
          if (! compileCommand(ltoken)) {
            return false;
          }
          aatokenCompiled.push_back(atokenCommand);
          lineNumbers.push_back(lineCurrent);
          lineIndices.push_back(ichCurrentCommand);
          ltoken.clear();
          tokCommand = Token::nada;
        }
            
        if (ichToken < cchScript) {
          if (endOfLine)
            ++lineCurrent;
          continue;
        }
        break;
      }

      if (tokCommand != Token::nada) {
        if (lookingAtString()) {
          std::string str = getUnescapedStringLiteral();
          ltoken.push_back(Token(Token::string, str));
          continue;
        }
        //if ((tokCommand & Token::specialstring) != 0 &&
        //    lookingAtSpecialString()) {
        //    std::string str = script.substr(ichToken, ichToken + cchToken);
        //    ltoken.push_back(Token(Token::string, str));
        //    continue;
        //}
        //if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
        if (lookingAtDecimal((tokCommand) != 0)) {
          float value = lexi_cast<float>(script.substr(ichToken, cchToken));        
          ltoken.push_back(Token(Token::decimal, boost::any(value)));
          continue;
        }
        //if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
        if (lookingAtInteger((tokCommand) != 0)) {

          int val = lexi_cast<int>(script.substr(ichToken, cchToken));
          ltoken.push_back(Token(Token::integer,   boost::any(val)));
          continue;
        }
      }
      
      if (lookingAtLookupToken()) {
        std::string ident = script.substr(ichToken, cchToken);
        Token token;            
        Token* pToken = TokenMap::getInstance()->getToken(ident);
        if (pToken != NULL) {
          token = *pToken;
        } else {
          token = Token(Token::identifier, ident);
        }
            
        int tok = token.tok;
            
        switch (tokCommand) {
        case Token::nada:
          ichCurrentCommand = ichToken;
          //tokenCommand = token;
          tokCommand = tok;
          if ((tokCommand & Token::command) == 0)
            return commandExpected();
          break;

        case Token::define:
          if (ltoken.size() == 1) {
            // we are looking at the variable name
            if (tok != Token::identifier &&
                (tok & Token::predefinedset) != Token::predefinedset)
              return invalidExpressionToken(ident);
          } else {
            // we are looking at the expression
            if (tok != Token::identifier && 
                (tok & (Token::expression | Token::predefinedset)) == 0)
              return invalidExpressionToken(ident);
          }
                    
          break;

        case Token::select:
          if (tok != Token::identifier && (tok & Token::expression) == 0)
            return invalidExpressionToken(ident);
          break;
        }
        ltoken.push_back(token);
        continue;
      }

      if (ltoken.empty()) {
        return commandExpected();
      }
        
      return unrecognizedToken();
    }

    return true;
  }


  bool SelectionCompiler::lookingAtLeadingWhitespace() {

    int ichT = ichToken;
    while (ichT < cchScript && std::isspace(script[ichT])) {
      ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
  }

  bool SelectionCompiler::lookingAtEndOfLine() {
    if (ichToken == cchScript)
      return true;
    int ichT = ichToken;
    char ch = script[ichT];
    if (ch == '\r') {
      ++ichT;
      if (ichT < cchScript && script[ichT] == '\n')
        ++ichT;
    } else if (ch == '\n') {
      ++ichT;
    } else {
      return false;
    }
    cchToken = ichT - ichToken;
    return true;
  }

  bool SelectionCompiler::lookingAtEndOfStatement() {
    if (ichToken == cchScript || script[ichToken] != ';')
      return false;
    cchToken = 1;
    return true;
  }

  bool SelectionCompiler::lookingAtString() {
    if (ichToken == cchScript)
      return false;
    if (script[ichToken] != '"')
      return false;
    // remove support for single quote
    // in order to use it in atom expressions
    //    char chFirst = script.charAt(ichToken);
    //    if (chFirst != '"' && chFirst != '\'')
    //      return false;
    int ichT = ichToken + 1;
    //    while (ichT < cchScript && script.charAt(ichT++) != chFirst)
    char ch;
    bool previousCharBackslash = false;
    while (ichT < cchScript) {
      ch = script[ichT++];
      if (ch == '"' && !previousCharBackslash)
        break;
      previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
    }
    cchToken = ichT - ichToken;

    return true;
  }

  
  std::string SelectionCompiler::getUnescapedStringLiteral() {
    /** @todo */
    std::string sb(cchToken - 2, ' ');
    
    int ichMax = ichToken + cchToken - 1;
    int ich = ichToken + 1;

    while (ich < ichMax) {
      char ch = script[ich++];
      if (ch == '\\' && ich < ichMax) {
        ch = script[ich++];
        switch (ch) {
        case 'b':
          ch = '\b';
          break;
        case 'n':
          ch = '\n';
          break;
        case 't':
          ch = '\t';
          break;
        case 'r':
          ch = '\r';
          // fall into
        case '"':
        case '\\':
        case '\'':
          break;
        case 'x':
        case 'u':
          int digitCount = ch == 'x' ? 2 : 4;
          if (ich < ichMax) {
            int unicode = 0;
            for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
              char chT = script[ich];
              int hexit = getHexitValue(chT);
              if (hexit < 0)
                break;
              unicode <<= 4;
              unicode += hexit;
              ++ich;
            }
            ch = (char)unicode;
          }
        }
      }
      sb.append(1, ch);
    }

    return sb;
  }

  int SelectionCompiler::getHexitValue(char ch) {
    if (ch >= '0' && ch <= '9')
      return ch - '0';
    else if (ch >= 'a' && ch <= 'f')
      return 10 + ch - 'a';
    else if (ch >= 'A' && ch <= 'F')
      return 10 + ch - 'A';
    else
      return -1;
  }

  bool SelectionCompiler::lookingAtSpecialString() {
    int ichT = ichToken;
    char ch = script[ichT];
    while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
      ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
  }

  bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
    if (ichToken == cchScript) {
      return false;
    }
    
    int ichT = ichToken;
    if (script[ichT] == '-') {
      ++ichT;
    }
    bool digitSeen = false;
    char ch = 'X';
    while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
      ++ichT;
      digitSeen = true;
    }

    if (ichT == cchScript || ch != '.') {
      return false;
    }

    // to support DMPC.1, let's check the character before the dot
    if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
      return false;
    }

    ++ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
      ++ichT;
      digitSeen = true;
    }
    cchToken = ichT - ichToken;
    return digitSeen;
  }

  bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
    if (ichToken == cchScript) {
      return false;
    }
    int ichT = ichToken;
    if (allowNegative && script[ichToken] == '-') {
      ++ichT;
    }
    int ichBeginDigits = ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
      ++ichT;
    }
    if (ichBeginDigits == ichT) {
      return false;
    }
    cchToken = ichT - ichToken;
    return true;
  }

  bool SelectionCompiler::lookingAtLookupToken() {
    if (ichToken == cchScript) {
      return false;
    }

    int ichT = ichToken;
    char ch;
    switch (ch = script[ichT++]) {
    case '(':
    case ')':
    case ',':
    case '[':
    case ']':
      break;
    case '&':
    case '|':
      if (ichT < cchScript && script[ichT] == ch) {
        ++ichT;
      }
      break;
    case '<':
    case '=':
    case '>':
      if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
        ++ichT;
      }
      break;
    case '/':
    case '!':
      if (ichT < cchScript && script[ichT] == '=') {
        ++ichT;
      }
      break;
    default:
      if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
        return false;
      }
    case '*':
    case '?': // include question marks in identifier for atom expressions
      while (ichT < cchScript && !std::isspace(ch = script[ichT]) && 
             (std::isalpha(ch) ||std::isdigit(ch) || ch == '_' || ch == '.' || ch == '*' || ch == '?' || ch == '+' || ch == '-' || ch == '[' || ch == ']') ){

        ++ichT;
      }
      break;
    }

    cchToken = ichT - ichToken;

    return true;
  }

  bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
    const Token& tokenCommand = ltoken[0];
    int tokCommand = tokenCommand.tok;

    atokenCommand = ltoken;
    if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
      return false;
    }
    
    return true;
  }

  bool SelectionCompiler::compileExpression() {
    /** todo */
    unsigned int i = 1;
    int tokCommand = atokenCommand[0].tok;
    if (tokCommand == Token::define) {
      i = 2;
    } else if ((tokCommand & Token::embeddedExpression) != 0) {
      // look for the open parenthesis
      while (i < atokenCommand.size() &&
             atokenCommand[i].tok != Token::leftparen)
        ++i;
    }

    if (i >= atokenCommand.size()) {
      return true;
    }
    return compileExpression(i);
  }

                  
  bool SelectionCompiler::addTokenToPostfix(const Token& token) {
    ltokenPostfix.push_back(token);
    return true;
  }

  bool SelectionCompiler::compileExpression(int itoken) {
    ltokenPostfix.clear();
    for (int i = 0; i < itoken; ++i) {
      addTokenToPostfix(atokenCommand[i]);
    }
    
    atokenInfix = atokenCommand;
    itokenInfix = itoken;

    addTokenToPostfix(Token::tokenExpressionBegin);
    if (!clauseOr()) {
      return false;
    }
    
    addTokenToPostfix(Token::tokenExpressionEnd);
    if (itokenInfix != atokenInfix.size()) {
      return endOfExpressionExpected();
    }

    atokenCommand = ltokenPostfix;
    return true;
  }

  Token SelectionCompiler::tokenNext() {
    if (itokenInfix == atokenInfix.size()) {
      return Token();
    }
    return atokenInfix[itokenInfix++];
  }

  boost::any SelectionCompiler::valuePeek() {
    if (itokenInfix == atokenInfix.size()) {
      return boost::any();
    } else {
      return atokenInfix[itokenInfix].value;
    }
  }

  int SelectionCompiler::tokPeek() {
    if (itokenInfix == atokenInfix.size()) {
      return 0;
    }else {
      return atokenInfix[itokenInfix].tok;
    }
  }

  bool SelectionCompiler::clauseOr() {
    if (!clauseAnd()) {
      return false;
    }
    
    while (tokPeek() == Token::opOr) {
      Token tokenOr = tokenNext();
      if (!clauseAnd()) {
        return false;
      }
      addTokenToPostfix(tokenOr);
    }
    return true;
  }

  bool SelectionCompiler::clauseAnd() {
    if (!clauseNot()) {
      return false;
    }

    while (tokPeek() == Token::opAnd) {
      Token tokenAnd = tokenNext();
      if (!clauseNot()) {
        return false;
      }
      addTokenToPostfix(tokenAnd);
    }
    return true;
  }

  bool SelectionCompiler::clauseNot() {
    if (tokPeek() == Token::opNot) {
      Token tokenNot = tokenNext();
      if (!clauseNot()) {
        return false;
      }
      return addTokenToPostfix(tokenNot);
    }
    return clausePrimitive();
  }

  bool SelectionCompiler::clausePrimitive() {
    int tok = tokPeek();
    switch (tok) {
    case Token::within:
      return clauseWithin();

    case Token::asterisk:
    case Token::identifier:
      return clauseChemObjName();

    case Token::integer :
      return clauseIndex();
    default:
      if ((tok & Token::atomproperty) == Token::atomproperty) {
        return clauseComparator();
      }
      if ((tok & Token::predefinedset) != Token::predefinedset) {
        break;
      }
      // fall into the code and below and just add the token
    case Token::all:
    case Token::none:
    case Token::hull:
      return addTokenToPostfix(tokenNext());
    case Token::leftparen:
      tokenNext();
      if (!clauseOr()) {
        return false;
      }
      if (tokenNext().tok != Token::rightparen) {
        return rightParenthesisExpected();
      }
      return true;
    }
    return unrecognizedExpressionToken();
  }

  bool SelectionCompiler::clauseComparator() {
    Token tokenAtomProperty = tokenNext();
    Token tokenComparator = tokenNext();
    if ((tokenComparator.tok & Token::comparator) == 0) {
      return comparisonOperatorExpected();
    }

    Token tokenValue = tokenNext();
    if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
      return numberExpected();
    }
    
    float val;
    if (tokenValue.value.type() == typeid(int)) {
      val = boost::any_cast<int>(tokenValue.value);
    } else if (tokenValue.value.type() == typeid(float)) {
      val = boost::any_cast<float>(tokenValue.value);
    } else {
      return false;
    }

    boost::any floatVal;
    floatVal = val;
    return addTokenToPostfix(Token(tokenComparator.tok,
                                   tokenAtomProperty.tok, floatVal));
  }

  bool SelectionCompiler::clauseWithin() {
    tokenNext();                             // WITHIN
    if (tokenNext().tok != Token::leftparen) {  // (
      return leftParenthesisExpected();
    }
    
    boost::any distance;
    Token tokenDistance = tokenNext();       // distance
    switch(tokenDistance.tok) {
    case Token::integer:
    case Token::decimal:
      distance = tokenDistance.value;
      break;
    default:
      return numberOrKeywordExpected();
    }

    if (tokenNext().tok != Token::opOr) {       // ,
      return commaExpected();
    }
    
    if (! clauseOr()) {                        // *expression*
      return false;
    }
    
    if (tokenNext().tok != Token::rightparen) { // )T
      return rightParenthesisExpected();
    }
    
    return addTokenToPostfix(Token(Token::within, distance));
  }

  bool SelectionCompiler::clauseChemObjName() {
    Token token = tokenNext();
    if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) {

      std::string name = boost::any_cast<std::string>(token.value);
      if (isNameValid(name)) {
        return addTokenToPostfix(Token(Token::name, name));
      } else {
        return compileError("invalid name: " + name);
      }
    } 

    return false;
        
  }

  bool SelectionCompiler::isNameValid(const std::string& name) {
    int nbracket = 0;
    int ndot = 0;
    for (unsigned int i = 0 ; i < name.size(); ++i) {
      switch(name[i]) {

      case '[' :
        ++nbracket;
        break;
      case ']' :
        --nbracket;
        break;
      case '.' :
        ++ndot;
        break;       
      }
    }

    //only allow 3 dots at most
    return (ndot <=3 && nbracket == 0) ? true : false;
  }

  bool SelectionCompiler::clauseIndex(){
    Token token = tokenNext();
    if (token.tok == Token::integer) {
      int index = boost::any_cast<int>(token.value);
      int tok = tokPeek();
      std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
      if (tok == Token::to) {
        tokenNext();
        tok = tokPeek();
        if (tok != Token::integer) {
          return numberExpected();
        }
            
        boost::any intVal = tokenNext().value;
        int first = index;
        if (intVal.type() != typeid(int)){
          return false;
        }
        int second = boost::any_cast<int>(intVal);

        return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
            
      }else {
        return addTokenToPostfix(Token(Token::index, boost::any(index)));
      }
    } else {
      return numberExpected();
    }
  }

}
Revision:	1931
Committed:	Mon Aug 19 19:20:32 2013 UTC (11 years, 8 months ago) by gezelter
File size:	18559 byte(s)
Log Message:	fixed a sameRegion initialization issue
#	User	Rev	Content
1	tim	279	/*
2			* Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3			*
4			* The University of Notre Dame grants you ("Licensee") a
5			* non-exclusive, royalty free, license to use, modify and
6			* redistribute this software in source and binary code form, provided
7			* that the following conditions are met:
8			*
9	gezelter	1390	* 1. Redistributions of source code must retain the above copyright
10	tim	279	* notice, this list of conditions and the following disclaimer.
11			*
12	gezelter	1390	* 2. Redistributions in binary form must reproduce the above copyright
13	tim	279	* notice, this list of conditions and the following disclaimer in the
14			* documentation and/or other materials provided with the
15			* distribution.
16			*
17			* This software is provided "AS IS," without a warranty of any
18			* kind. All express or implied conditions, representations and
19			* warranties, including any implied warranty of merchantability,
20			* fitness for a particular purpose or non-infringement, are hereby
21			* excluded. The University of Notre Dame and its licensors shall not
22			* be liable for any damages suffered by licensee as a result of
23			* using, modifying or distributing the software or its
24			* derivatives. In no event will the University of Notre Dame or its
25			* licensors be liable for any lost revenue, profit or data, or for
26			* direct, indirect, special, consequential, incidental or punitive
27			* damages, however caused and regardless of the theory of liability,
28			* arising out of the use of or inability to use software, even if the
29			* University of Notre Dame has been advised of the possibility of
30			* such damages.
31	gezelter	1390	*
32			* SUPPORT OPEN SCIENCE! If you use OpenMD or its source code in your
33			* research, please cite the appropriate papers when you publish your
34			* work. Good starting points are:
35			*
36			* [1] Meineke, et al., J. Comp. Chem. 26, 252-271 (2005).
37			* [2] Fennell & Gezelter, J. Chem. Phys. 124, 234104 (2006).
38	gezelter	1879	* [3] Sun, Lin & Gezelter, J. Chem. Phys. 128, 234107 (2008).
39	gezelter	1782	* [4] Kuang & Gezelter, J. Chem. Phys. 133, 164101 (2010).
40			* [5] Vardeman, Stocker & Gezelter, J. Chem. Theory Comput. 7, 834 (2011).
41	tim	279	*/
42
43			#include "selection/SelectionCompiler.hpp"
44	tim	281	#include "utils/StringUtils.hpp"
45	gezelter	1390	namespace OpenMD {
46	tim	279
47	cli2	1364	bool SelectionCompiler::compile(const std::string& filename,
48			const std::string& script) {
49	tim	279
50			this->filename = filename;
51			this->script = script;
52			lineNumbers.clear();
53			lineIndices.clear();
54			aatokenCompiled.clear();
55
56	tim	281	if (internalCompile()) {
57	gezelter	507	return true;
58	tim	279	}
59
60			int icharEnd;
61	tim	281	if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
62			(icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
63	gezelter	507	icharEnd = script.size();
64	tim	279	}
65			errorLine = script.substr(ichCurrentCommand, icharEnd);
66			return false;
67	gezelter	507	}
68	tim	279
69	gezelter	507	bool SelectionCompiler::internalCompile(){
70	tim	279
71			cchScript = script.size();
72			ichToken = 0;
73			lineCurrent = 1;
74
75			error = false;
76
77	tim	281	//std::vector<Token> lltoken;
78			aatokenCompiled.clear();
79	tim	279	std::vector<Token> ltoken;
80
81	tim	281	Token tokenCommand;
82			int tokCommand = Token::nada;
83	tim	279
84			for ( ; true; ichToken += cchToken) {
85	gezelter	507	if (lookingAtLeadingWhitespace())
86			continue;
87			//if (lookingAtComment())
88			// continue;
89			bool endOfLine = lookingAtEndOfLine();
90			if (endOfLine \|\| lookingAtEndOfStatement()) {
91			if (tokCommand != Token::nada) {
92			if (! compileCommand(ltoken)) {
93			return false;
94			}
95			aatokenCompiled.push_back(atokenCommand);
96			lineNumbers.push_back(lineCurrent);
97			lineIndices.push_back(ichCurrentCommand);
98			ltoken.clear();
99			tokCommand = Token::nada;
100			}
101	tim	279
102	gezelter	507	if (ichToken < cchScript) {
103			if (endOfLine)
104			++lineCurrent;
105			continue;
106			}
107			break;
108			}
109	tim	279
110	gezelter	507	if (tokCommand != Token::nada) {
111			if (lookingAtString()) {
112			std::string str = getUnescapedStringLiteral();
113			ltoken.push_back(Token(Token::string, str));
114			continue;
115			}
116			//if ((tokCommand & Token::specialstring) != 0 &&
117			// lookingAtSpecialString()) {
118			// std::string str = script.substr(ichToken, ichToken + cchToken);
119			// ltoken.push_back(Token(Token::string, str));
120			// continue;
121			//}
122	gezelter	1931	//if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
123			if (lookingAtDecimal((tokCommand) != 0)) {
124	gezelter	507	float value = lexi_cast<float>(script.substr(ichToken, cchToken));
125			ltoken.push_back(Token(Token::decimal, boost::any(value)));
126			continue;
127			}
128	gezelter	1931	//if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
129			if (lookingAtInteger((tokCommand) != 0)) {
130	tim	295
131	gezelter	507	int val = lexi_cast<int>(script.substr(ichToken, cchToken));
132			ltoken.push_back(Token(Token::integer, boost::any(val)));
133			continue;
134			}
135			}
136	tim	279
137	gezelter	507	if (lookingAtLookupToken()) {
138			std::string ident = script.substr(ichToken, cchToken);
139			Token token;
140			Token* pToken = TokenMap::getInstance()->getToken(ident);
141			if (pToken != NULL) {
142			token = *pToken;
143			} else {
144			token = Token(Token::identifier, ident);
145			}
146	tim	279
147	gezelter	507	int tok = token.tok;
148	tim	279
149	gezelter	507	switch (tokCommand) {
150			case Token::nada:
151			ichCurrentCommand = ichToken;
152			//tokenCommand = token;
153			tokCommand = tok;
154			if ((tokCommand & Token::command) == 0)
155			return commandExpected();
156			break;
157	tim	279
158	gezelter	507	case Token::define:
159			if (ltoken.size() == 1) {
160			// we are looking at the variable name
161			if (tok != Token::identifier &&
162			(tok & Token::predefinedset) != Token::predefinedset)
163			return invalidExpressionToken(ident);
164			} else {
165			// we are looking at the expression
166			if (tok != Token::identifier &&
167			(tok & (Token::expression \| Token::predefinedset)) == 0)
168			return invalidExpressionToken(ident);
169			}
170	tim	279
171	gezelter	507	break;
172	tim	279
173	gezelter	507	case Token::select:
174			if (tok != Token::identifier && (tok & Token::expression) == 0)
175			return invalidExpressionToken(ident);
176			break;
177			}
178			ltoken.push_back(token);
179			continue;
180			}
181	tim	279
182	gezelter	1879	if (ltoken.empty()) {
183	gezelter	507	return commandExpected();
184			}
185	tim	279
186	gezelter	507	return unrecognizedToken();
187	tim	279	}
188
189			return true;
190			}
191
192
193			bool SelectionCompiler::lookingAtLeadingWhitespace() {
194
195			int ichT = ichToken;
196			while (ichT < cchScript && std::isspace(script[ichT])) {
197			++ichT;
198			}
199			cchToken = ichT - ichToken;
200			return cchToken > 0;
201			}
202
203			bool SelectionCompiler::lookingAtEndOfLine() {
204			if (ichToken == cchScript)
205			return true;
206			int ichT = ichToken;
207			char ch = script[ichT];
208			if (ch == '\r') {
209			++ichT;
210			if (ichT < cchScript && script[ichT] == '\n')
211	gezelter	507	++ichT;
212	tim	279	} else if (ch == '\n') {
213			++ichT;
214			} else {
215			return false;
216			}
217			cchToken = ichT - ichToken;
218			return true;
219			}
220
221			bool SelectionCompiler::lookingAtEndOfStatement() {
222			if (ichToken == cchScript \|\| script[ichToken] != ';')
223			return false;
224			cchToken = 1;
225			return true;
226			}
227
228			bool SelectionCompiler::lookingAtString() {
229			if (ichToken == cchScript)
230			return false;
231			if (script[ichToken] != '"')
232			return false;
233			// remove support for single quote
234			// in order to use it in atom expressions
235			// char chFirst = script.charAt(ichToken);
236			// if (chFirst != '"' && chFirst != '\'')
237			// return false;
238			int ichT = ichToken + 1;
239			// while (ichT < cchScript && script.charAt(ichT++) != chFirst)
240			char ch;
241	tim	281	bool previousCharBackslash = false;
242	tim	279	while (ichT < cchScript) {
243	tim	281	ch = script[ichT++];
244	tim	279	if (ch == '"' && !previousCharBackslash)
245			break;
246			previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
247			}
248			cchToken = ichT - ichToken;
249	tim	295
250	tim	279	return true;
251			}
252
253
254	gezelter	507	std::string SelectionCompiler::getUnescapedStringLiteral() {
255	tim	281	/** @todo */
256			std::string sb(cchToken - 2, ' ');
257
258	tim	279	int ichMax = ichToken + cchToken - 1;
259			int ich = ichToken + 1;
260
261			while (ich < ichMax) {
262	gezelter	507	char ch = script[ich++];
263			if (ch == '\\' && ich < ichMax) {
264			ch = script[ich++];
265			switch (ch) {
266			case 'b':
267			ch = '\b';
268			break;
269			case 'n':
270			ch = '\n';
271			break;
272			case 't':
273			ch = '\t';
274			break;
275			case 'r':
276			ch = '\r';
277			// fall into
278			case '"':
279			case '\\':
280			case '\'':
281			break;
282			case 'x':
283			case 'u':
284			int digitCount = ch == 'x' ? 2 : 4;
285			if (ich < ichMax) {
286			int unicode = 0;
287			for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
288			char chT = script[ich];
289			int hexit = getHexitValue(chT);
290			if (hexit < 0)
291			break;
292			unicode <<= 4;
293			unicode += hexit;
294			++ich;
295			}
296			ch = (char)unicode;
297			}
298			}
299			}
300			sb.append(1, ch);
301	tim	279	}
302
303	tim	281	return sb;
304	gezelter	507	}
305	tim	279
306	gezelter	507	int SelectionCompiler::getHexitValue(char ch) {
307	tim	279	if (ch >= '0' && ch <= '9')
308	gezelter	507	return ch - '0';
309	tim	279	else if (ch >= 'a' && ch <= 'f')
310	gezelter	507	return 10 + ch - 'a';
311	tim	279	else if (ch >= 'A' && ch <= 'F')
312	gezelter	507	return 10 + ch - 'A';
313	tim	279	else
314	gezelter	507	return -1;
315			}
316	tim	279
317	gezelter	507	bool SelectionCompiler::lookingAtSpecialString() {
318	tim	279	int ichT = ichToken;
319			char ch = script[ichT];
320			while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
321	gezelter	507	++ichT;
322	tim	279	}
323			cchToken = ichT - ichToken;
324			return cchToken > 0;
325	gezelter	507	}
326	tim	279
327	gezelter	507	bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
328	tim	279	if (ichToken == cchScript) {
329	gezelter	507	return false;
330	tim	279	}
331
332			int ichT = ichToken;
333			if (script[ichT] == '-') {
334	gezelter	507	++ichT;
335	tim	279	}
336	tim	281	bool digitSeen = false;
337	tim	279	char ch = 'X';
338			while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
339	gezelter	507	++ichT;
340			digitSeen = true;
341	tim	279	}
342
343			if (ichT == cchScript \|\| ch != '.') {
344	gezelter	507	return false;
345	tim	279	}
346
347	tim	303	// to support DMPC.1, let's check the character before the dot
348			if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
349	gezelter	507	return false;
350	tim	279	}
351
352			++ichT;
353			while (ichT < cchScript && std::isdigit(script[ichT])) {
354	gezelter	507	++ichT;
355			digitSeen = true;
356	tim	279	}
357			cchToken = ichT - ichToken;
358			return digitSeen;
359	gezelter	507	}
360	tim	279
361	gezelter	507	bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
362	tim	279	if (ichToken == cchScript) {
363	gezelter	507	return false;
364	tim	279	}
365			int ichT = ichToken;
366			if (allowNegative && script[ichToken] == '-') {
367	gezelter	507	++ichT;
368	tim	279	}
369			int ichBeginDigits = ichT;
370			while (ichT < cchScript && std::isdigit(script[ichT])) {
371	gezelter	507	++ichT;
372	tim	279	}
373			if (ichBeginDigits == ichT) {
374	gezelter	507	return false;
375	tim	279	}
376			cchToken = ichT - ichToken;
377			return true;
378	gezelter	507	}
379	tim	279
380	gezelter	507	bool SelectionCompiler::lookingAtLookupToken() {
381	tim	279	if (ichToken == cchScript) {
382	gezelter	507	return false;
383	tim	279	}
384
385			int ichT = ichToken;
386			char ch;
387			switch (ch = script[ichT++]) {
388	gezelter	507	case '(':
389			case ')':
390			case ',':
391			case '[':
392			case ']':
393			break;
394			case '&':
395			case '\|':
396			if (ichT < cchScript && script[ichT] == ch) {
397			++ichT;
398			}
399			break;
400			case '<':
401			case '=':
402			case '>':
403			if (ichT < cchScript && ((ch = script[ichT]) == '<' \|\| ch == '=' \|\| ch == '>')) {
404			++ichT;
405			}
406			break;
407			case '/':
408			case '!':
409			if (ichT < cchScript && script[ichT] == '=') {
410			++ichT;
411			}
412			break;
413			default:
414			if ((ch < 'a' \|\| ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
415			return false;
416			}
417			case '*':
418			case '?': // include question marks in identifier for atom expressions
419			while (ichT < cchScript && !std::isspace(ch = script[ichT]) &&
420			(std::isalpha(ch) \|\|std::isdigit(ch) \|\| ch == '_' \|\| ch == '.' \|\| ch == '*' \|\| ch == '?' \|\| ch == '+' \|\| ch == '-' \|\| ch == '[' \|\| ch == ']') ){
421	tim	288
422	gezelter	507	++ichT;
423			}
424			break;
425	tim	279	}
426	tim	295
427	tim	279	cchToken = ichT - ichToken;
428	tim	295
429	tim	279	return true;
430	gezelter	507	}
431	tim	279
432	gezelter	507	bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
433	tim	281	const Token& tokenCommand = ltoken[0];
434	tim	279	int tokCommand = tokenCommand.tok;
435	tim	281
436			atokenCommand = ltoken;
437			if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
438	gezelter	507	return false;
439	tim	279	}
440	tim	281
441	tim	279	return true;
442	gezelter	507	}
443	tim	279
444	gezelter	507	bool SelectionCompiler::compileExpression() {
445	tim	279	/** todo */
446	gezelter	1782	unsigned int i = 1;
447	tim	279	int tokCommand = atokenCommand[0].tok;
448	tim	281	if (tokCommand == Token::define) {
449	gezelter	507	i = 2;
450	tim	281	} else if ((tokCommand & Token::embeddedExpression) != 0) {
451	gezelter	507	// look for the open parenthesis
452			while (i < atokenCommand.size() &&
453			atokenCommand[i].tok != Token::leftparen)
454	tim	279	++i;
455			}
456	tim	281
457			if (i >= atokenCommand.size()) {
458	gezelter	507	return true;
459	tim	281	}
460	tim	279	return compileExpression(i);
461			}
462
463
464	gezelter	507	bool SelectionCompiler::addTokenToPostfix(const Token& token) {
465	tim	279	ltokenPostfix.push_back(token);
466			return true;
467	gezelter	507	}
468	tim	279
469	gezelter	507	bool SelectionCompiler::compileExpression(int itoken) {
470	tim	281	ltokenPostfix.clear();
471			for (int i = 0; i < itoken; ++i) {
472	gezelter	507	addTokenToPostfix(atokenCommand[i]);
473	tim	281	}
474
475	tim	279	atokenInfix = atokenCommand;
476			itokenInfix = itoken;
477
478	tim	281	addTokenToPostfix(Token::tokenExpressionBegin);
479	tim	279	if (!clauseOr()) {
480	gezelter	507	return false;
481	tim	279	}
482
483	tim	281	addTokenToPostfix(Token::tokenExpressionEnd);
484			if (itokenInfix != atokenInfix.size()) {
485	gezelter	507	return endOfExpressionExpected();
486	tim	279	}
487
488			atokenCommand = ltokenPostfix;
489			return true;
490	gezelter	507	}
491	tim	279
492	gezelter	507	Token SelectionCompiler::tokenNext() {
493	tim	281	if (itokenInfix == atokenInfix.size()) {
494	gezelter	507	return Token();
495	tim	281	}
496			return atokenInfix[itokenInfix++];
497	gezelter	507	}
498	tim	279
499	gezelter	507	boost::any SelectionCompiler::valuePeek() {
500	tim	281	if (itokenInfix == atokenInfix.size()) {
501	gezelter	507	return boost::any();
502	tim	279	} else {
503	gezelter	507	return atokenInfix[itokenInfix].value;
504	tim	279	}
505	gezelter	507	}
506	tim	279
507	gezelter	507	int SelectionCompiler::tokPeek() {
508	tim	281	if (itokenInfix == atokenInfix.size()) {
509	gezelter	507	return 0;
510	tim	279	}else {
511	gezelter	507	return atokenInfix[itokenInfix].tok;
512	tim	279	}
513	gezelter	507	}
514	tim	279
515	gezelter	507	bool SelectionCompiler::clauseOr() {
516	tim	279	if (!clauseAnd()) {
517	gezelter	507	return false;
518	tim	279	}
519
520	tim	281	while (tokPeek() == Token::opOr) {
521	gezelter	507	Token tokenOr = tokenNext();
522			if (!clauseAnd()) {
523			return false;
524			}
525			addTokenToPostfix(tokenOr);
526	tim	279	}
527			return true;
528	gezelter	507	}
529	tim	279
530	gezelter	507	bool SelectionCompiler::clauseAnd() {
531	tim	279	if (!clauseNot()) {
532	gezelter	507	return false;
533	tim	279	}
534
535	tim	281	while (tokPeek() == Token::opAnd) {
536	gezelter	507	Token tokenAnd = tokenNext();
537			if (!clauseNot()) {
538			return false;
539			}
540			addTokenToPostfix(tokenAnd);
541	tim	279	}
542			return true;
543	gezelter	507	}
544	tim	279
545	gezelter	507	bool SelectionCompiler::clauseNot() {
546	tim	281	if (tokPeek() == Token::opNot) {
547	gezelter	507	Token tokenNot = tokenNext();
548			if (!clauseNot()) {
549			return false;
550			}
551			return addTokenToPostfix(tokenNot);
552	tim	279	}
553			return clausePrimitive();
554	gezelter	507	}
555	tim	279
556	gezelter	507	bool SelectionCompiler::clausePrimitive() {
557	tim	279	int tok = tokPeek();
558			switch (tok) {
559	gezelter	507	case Token::within:
560			return clauseWithin();
561	tim	283
562	gezelter	507	case Token::asterisk:
563			case Token::identifier:
564			return clauseChemObjName();
565	tim	295
566	gezelter	507	case Token::integer :
567			return clauseIndex();
568			default:
569			if ((tok & Token::atomproperty) == Token::atomproperty) {
570			return clauseComparator();
571			}
572			if ((tok & Token::predefinedset) != Token::predefinedset) {
573			break;
574			}
575			// fall into the code and below and just add the token
576			case Token::all:
577			case Token::none:
578	kstocke1	1523	case Token::hull:
579	gezelter	507	return addTokenToPostfix(tokenNext());
580			case Token::leftparen:
581			tokenNext();
582			if (!clauseOr()) {
583			return false;
584			}
585			if (tokenNext().tok != Token::rightparen) {
586			return rightParenthesisExpected();
587			}
588			return true;
589	tim	279	}
590			return unrecognizedExpressionToken();
591	gezelter	507	}
592	tim	279
593	gezelter	507	bool SelectionCompiler::clauseComparator() {
594	tim	279	Token tokenAtomProperty = tokenNext();
595			Token tokenComparator = tokenNext();
596	tim	281	if ((tokenComparator.tok & Token::comparator) == 0) {
597	gezelter	507	return comparisonOperatorExpected();
598	tim	279	}
599
600			Token tokenValue = tokenNext();
601	tim	288	if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
602	gezelter	507	return numberExpected();
603	tim	279	}
604	tim	288
605			float val;
606			if (tokenValue.value.type() == typeid(int)) {
607	gezelter	507	val = boost::any_cast<int>(tokenValue.value);
608	tim	288	} else if (tokenValue.value.type() == typeid(float)) {
609	gezelter	507	val = boost::any_cast<float>(tokenValue.value);
610	tim	288	} else {
611	gezelter	507	return false;
612	tim	288	}
613
614	tim	295	boost::any floatVal;
615			floatVal = val;
616	tim	281	return addTokenToPostfix(Token(tokenComparator.tok,
617	gezelter	507	tokenAtomProperty.tok, floatVal));
618			}
619	tim	279
620	gezelter	507	bool SelectionCompiler::clauseWithin() {
621	tim	279	tokenNext(); // WITHIN
622	tim	281	if (tokenNext().tok != Token::leftparen) { // (
623	gezelter	507	return leftParenthesisExpected();
624	tim	279	}
625
626	tim	281	boost::any distance;
627	tim	279	Token tokenDistance = tokenNext(); // distance
628			switch(tokenDistance.tok) {
629	gezelter	507	case Token::integer:
630			case Token::decimal:
631			distance = tokenDistance.value;
632			break;
633			default:
634			return numberOrKeywordExpected();
635	tim	279	}
636
637	tim	281	if (tokenNext().tok != Token::opOr) { // ,
638	gezelter	507	return commaExpected();
639	tim	279	}
640
641			if (! clauseOr()) { // expression
642	gezelter	507	return false;
643	tim	279	}
644
645	tim	281	if (tokenNext().tok != Token::rightparen) { // )T
646	gezelter	507	return rightParenthesisExpected();
647	tim	279	}
648
649	tim	281	return addTokenToPostfix(Token(Token::within, distance));
650	gezelter	507	}
651	tim	279
652	gezelter	507	bool SelectionCompiler::clauseChemObjName() {
653	tim	452	Token token = tokenNext();
654			if (token.tok == Token::identifier && token.value.type() == typeid(std::string)) {
655	tim	279
656	gezelter	507	std::string name = boost::any_cast<std::string>(token.value);
657			if (isNameValid(name)) {
658			return addTokenToPostfix(Token(Token::name, name));
659			} else {
660			return compileError("invalid name: " + name);
661			}
662	tim	452	}
663	tim	288
664	tim	452	return false;
665
666	gezelter	507	}
667	tim	279
668	gezelter	507	bool SelectionCompiler::isNameValid(const std::string& name) {
669			int nbracket = 0;
670	tim	452	int ndot = 0;
671	gezelter	1782	for (unsigned int i = 0 ; i < name.size(); ++i) {
672	gezelter	507	switch(name[i]) {
673	tim	283
674	gezelter	507	case '[' :
675			++nbracket;
676			break;
677			case ']' :
678			--nbracket;
679			break;
680			case '.' :
681			++ndot;
682			break;
683			}
684	tim	283	}
685
686	tim	452	//only allow 3 dots at most
687			return (ndot <=3 && nbracket == 0) ? true : false;
688	gezelter	507	}
689	tim	279
690	gezelter	507	bool SelectionCompiler::clauseIndex(){
691	tim	295	Token token = tokenNext();
692			if (token.tok == Token::integer) {
693	gezelter	507	int index = boost::any_cast<int>(token.value);
694			int tok = tokPeek();
695			std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
696			if (tok == Token::to) {
697			tokenNext();
698			tok = tokPeek();
699			if (tok != Token::integer) {
700			return numberExpected();
701			}
702	tim	295
703	gezelter	507	boost::any intVal = tokenNext().value;
704			int first = index;
705			if (intVal.type() != typeid(int)){
706			return false;
707			}
708			int second = boost::any_cast<int>(intVal);
709	tim	283
710	gezelter	507	return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
711	tim	295
712	gezelter	507	}else {
713			return addTokenToPostfix(Token(Token::index, boost::any(index)));
714			}
715	tim	295	} else {
716	gezelter	507	return numberExpected();
717	tim	295	}
718	gezelter	507	}
719	tim	295
720			}