src/selection/SelectionCompiler.cpp

/*
 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
 *
 * The University of Notre Dame grants you ("Licensee") a
 * non-exclusive, royalty free, license to use, modify and
 * redistribute this software in source and binary code form, provided
 * that the following conditions are met:
 *
 * 1. Acknowledgement of the program authors must be made in any
 *    publication of scientific results based in part on use of the
 *    program.  An acceptable form of acknowledgement is citation of
 *    the article in which the program was described (Matthew
 *    A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
 *    J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
 *    Parallel Simulation Engine for Molecular Dynamics,"
 *    J. Comput. Chem. 26, pp. 252-271 (2005))
 *
 * 2. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 3. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 *
 * This software is provided "AS IS," without a warranty of any
 * kind. All express or implied conditions, representations and
 * warranties, including any implied warranty of merchantability,
 * fitness for a particular purpose or non-infringement, are hereby
 * excluded.  The University of Notre Dame and its licensors shall not
 * be liable for any damages suffered by licensee as a result of
 * using, modifying or distributing the software or its
 * derivatives. In no event will the University of Notre Dame or its
 * licensors be liable for any lost revenue, profit or data, or for
 * direct, indirect, special, consequential, incidental or punitive
 * damages, however caused and regardless of the theory of liability,
 * arising out of the use of or inability to use software, even if the
 * University of Notre Dame has been advised of the possibility of
 * such damages.
 */

#include "selection/SelectionCompiler.hpp"
#include "utils/StringUtils.hpp"
namespace oopse {

bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {

    this->filename = filename;
    this->script = script;
    lineNumbers.clear();
    lineIndices.clear();
    aatokenCompiled.clear();
        
    if (internalCompile()) {
        return true;
    }
    
    int icharEnd;
    if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
        (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
        icharEnd = script.size();
    }
    errorLine = script.substr(ichCurrentCommand, icharEnd);
    return false;
}

bool SelectionCompiler::internalCompile(){

    cchScript = script.size();
    ichToken = 0;
    lineCurrent = 1;

    error = false;

    //std::vector<Token> lltoken;
    aatokenCompiled.clear();
    std::vector<Token> ltoken;

    Token tokenCommand;
    int tokCommand = Token::nada;

    for ( ; true; ichToken += cchToken) {
        if (lookingAtLeadingWhitespace())
            continue;
        //if (lookingAtComment())
        //    continue;
        bool endOfLine = lookingAtEndOfLine();
        if (endOfLine || lookingAtEndOfStatement()) {
            if (tokCommand != Token::nada) {
                if (! compileCommand(ltoken)) {
                    return false;
                }
                aatokenCompiled.push_back(atokenCommand);
                lineNumbers.push_back(lineCurrent);
                lineIndices.push_back(ichCurrentCommand);
                ltoken.clear();
                tokCommand = Token::nada;
            }
            
            if (ichToken < cchScript) {
                if (endOfLine)
                    ++lineCurrent;
              continue;
            }
            break;
        }

        if (tokCommand != Token::nada) {
            if (lookingAtString()) {
                std::string str = getUnescapedStringLiteral();
                ltoken.push_back(Token(Token::string, str));
                continue;
            }
            //if ((tokCommand & Token::specialstring) != 0 &&
            //    lookingAtSpecialString()) {
            //    std::string str = script.substr(ichToken, ichToken + cchToken);
            //    ltoken.push_back(Token(Token::string, str));
            //    continue;
            //}
            if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
                float value = lexi_cast<float>(script.substr(ichToken, cchToken));        
                ltoken.push_back(Token(Token::decimal, boost::any(value)));
                continue;
            }
            if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {

                int val = lexi_cast<int>(script.substr(ichToken, cchToken));
                ltoken.push_back(Token(Token::integer,   boost::any(val)));
                continue;
            }
        }
      
        if (lookingAtLookupToken()) {
            std::string ident = script.substr(ichToken, cchToken);
            Token token;            
            Token* pToken = TokenMap::getInstance()->getToken(ident);
            if (pToken != NULL) {
                token = *pToken;
            } else {
                token = Token(Token::identifier, ident);
            }
            
            int tok = token.tok;
            
            switch (tokCommand) {
                case Token::nada:
                    ichCurrentCommand = ichToken;
                    //tokenCommand = token;
                    tokCommand = tok;
                    if ((tokCommand & Token::command) == 0)
                    return commandExpected();
                    break;

                case Token::define:
                    if (ltoken.size() == 1) {
                        // we are looking at the variable name
                        if (tok != Token::identifier &&
                        (tok & Token::predefinedset) != Token::predefinedset)
                        return invalidExpressionToken(ident);
                    } else {
                    // we are looking at the expression
                    if (tok != Token::identifier && 
                        (tok & (Token::expression | Token::predefinedset)) == 0)
                        return invalidExpressionToken(ident);
                    }
                    
                    break;

                case Token::select:
                    if (tok != Token::identifier && (tok & Token::expression) == 0)
                        return invalidExpressionToken(ident);
                break;
            }
            ltoken.push_back(token);
            continue;
        }

        if (ltoken.size() == 0) {
            return commandExpected();
        }
        
        return unrecognizedToken();
    }

    return true;
  }


  bool SelectionCompiler::lookingAtLeadingWhitespace() {

    int ichT = ichToken;
    while (ichT < cchScript && std::isspace(script[ichT])) {
      ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
  }

  bool SelectionCompiler::lookingAtEndOfLine() {
    if (ichToken == cchScript)
      return true;
    int ichT = ichToken;
    char ch = script[ichT];
    if (ch == '\r') {
      ++ichT;
      if (ichT < cchScript && script[ichT] == '\n')
          ++ichT;
    } else if (ch == '\n') {
      ++ichT;
    } else {
      return false;
    }
    cchToken = ichT - ichToken;
    return true;
  }

  bool SelectionCompiler::lookingAtEndOfStatement() {
    if (ichToken == cchScript || script[ichToken] != ';')
      return false;
    cchToken = 1;
    return true;
  }

  bool SelectionCompiler::lookingAtString() {
    if (ichToken == cchScript)
      return false;
    if (script[ichToken] != '"')
      return false;
    // remove support for single quote
    // in order to use it in atom expressions
    //    char chFirst = script.charAt(ichToken);
    //    if (chFirst != '"' && chFirst != '\'')
    //      return false;
    int ichT = ichToken + 1;
    //    while (ichT < cchScript && script.charAt(ichT++) != chFirst)
    char ch;
    bool previousCharBackslash = false;
    while (ichT < cchScript) {
      ch = script[ichT++];
      if (ch == '"' && !previousCharBackslash)
        break;
      previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
    }
    cchToken = ichT - ichToken;

    return true;
  }

  
std::string SelectionCompiler::getUnescapedStringLiteral() {
    /** @todo */
    std::string sb(cchToken - 2, ' ');
    
    int ichMax = ichToken + cchToken - 1;
    int ich = ichToken + 1;

    while (ich < ichMax) {
        char ch = script[ich++];
        if (ch == '\\' && ich < ichMax) {
            ch = script[ich++];
            switch (ch) {
                case 'b':
                    ch = '\b';
                    break;
                case 'n':
                    ch = '\n';
                    break;
                case 't':
                    ch = '\t';
                    break;
                case 'r':
                    ch = '\r';
                    // fall into
                case '"':
                case '\\':
                case '\'':
                break;
                case 'x':
                case 'u':
                    int digitCount = ch == 'x' ? 2 : 4;
                    if (ich < ichMax) {
                        int unicode = 0;
                        for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
                            char chT = script[ich];
                            int hexit = getHexitValue(chT);
                            if (hexit < 0)
                                break;
                            unicode <<= 4;
                            unicode += hexit;
                            ++ich;
                        }
                        ch = (char)unicode;
                    }
            }
        }
        sb.append(1, ch);
    }

    return sb;
}

int SelectionCompiler::getHexitValue(char ch) {
    if (ch >= '0' && ch <= '9')
        return ch - '0';
    else if (ch >= 'a' && ch <= 'f')
        return 10 + ch - 'a';
    else if (ch >= 'A' && ch <= 'F')
        return 10 + ch - 'A';
    else
        return -1;
}

bool SelectionCompiler::lookingAtSpecialString() {
    int ichT = ichToken;
    char ch = script[ichT];
    while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
        ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
}

bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
    if (ichToken == cchScript) {
        return false;
    }
    
    int ichT = ichToken;
    if (script[ichT] == '-') {
        ++ichT;
    }
    bool digitSeen = false;
    char ch = 'X';
    while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
        ++ichT;
        digitSeen = true;
    }

    if (ichT == cchScript || ch != '.') {
        return false;
    }

    // to support DMPC.1, let's check the character before the dot
    if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
        return false;
    }

    ++ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
        ++ichT;
        digitSeen = true;
    }
    cchToken = ichT - ichToken;
    return digitSeen;
}

bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
    if (ichToken == cchScript) {
        return false;
    }
    int ichT = ichToken;
    if (allowNegative && script[ichToken] == '-') {
        ++ichT;
    }
    int ichBeginDigits = ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
        ++ichT;
    }
    if (ichBeginDigits == ichT) {
        return false;
    }
    cchToken = ichT - ichToken;
    return true;
}

bool SelectionCompiler::lookingAtLookupToken() {
    if (ichToken == cchScript) {
        return false;
    }

    int ichT = ichToken;
    char ch;
    switch (ch = script[ichT++]) {
        case '(':
        case ')':
        case ',':
        case '*':
        case '-':
        case '[':
        case ']':
        case '+':
        case ':':
        case '@':
        case '.':
        case '%':
        break;
        case '&':
        case '|':
            if (ichT < cchScript && script[ichT] == ch) {
                ++ichT;
            }
        break;
        case '<':
        case '=':
        case '>':
            if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
                ++ichT;
            }
            break;
        case '/':
        case '!':
            if (ichT < cchScript && script[ichT] == '=') {
                ++ichT;
            }
            break;
        default:
            if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
                return false;
            }
        case '?': // include question marks in identifier for atom expressions
            while (ichT < cchScript && !std::isspace(ch = script[ichT]) && (std::isalpha(ch) ||std::isdigit(ch) ||
                ch == '_' || ch == '?') ) {

                ++ichT;
            }
        break;
    }

    cchToken = ichT - ichToken;

    return true;
}

bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
    const Token& tokenCommand = ltoken[0];
    int tokCommand = tokenCommand.tok;

    atokenCommand = ltoken;
    if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
        return false;
    }
    
    return true;
}

bool SelectionCompiler::compileExpression() {
    /** todo */
    int i = 1;
    int tokCommand = atokenCommand[0].tok;
    if (tokCommand == Token::define) {
        i = 2;
    } else if ((tokCommand & Token::embeddedExpression) != 0) {
        // look for the open parenthesis
        while (i < atokenCommand.size() &&
         atokenCommand[i].tok != Token::leftparen)
        ++i;
    }

    if (i >= atokenCommand.size()) {
        return true;
    }
    return compileExpression(i);
  }

                  
bool SelectionCompiler::addTokenToPostfix(const Token& token) {
    ltokenPostfix.push_back(token);
    return true;
}

bool SelectionCompiler::compileExpression(int itoken) {
    ltokenPostfix.clear();
    for (int i = 0; i < itoken; ++i) {
        addTokenToPostfix(atokenCommand[i]);
    }
    
    atokenInfix = atokenCommand;
    itokenInfix = itoken;

    addTokenToPostfix(Token::tokenExpressionBegin);
    if (!clauseOr()) {
        return false;
    }
    
    addTokenToPostfix(Token::tokenExpressionEnd);
    if (itokenInfix != atokenInfix.size()) {
        return endOfExpressionExpected();
    }

    atokenCommand = ltokenPostfix;
    return true;
}

Token SelectionCompiler::tokenNext() {
    if (itokenInfix == atokenInfix.size()) {
        return Token();
    }
    return atokenInfix[itokenInfix++];
}

boost::any SelectionCompiler::valuePeek() {
    if (itokenInfix == atokenInfix.size()) {
        return boost::any();
    } else {
        return atokenInfix[itokenInfix].value;
    }
}

int SelectionCompiler::tokPeek() {
    if (itokenInfix == atokenInfix.size()) {
        return 0;
    }else {
        return atokenInfix[itokenInfix].tok;
    }
}

bool SelectionCompiler::clauseOr() {
    if (!clauseAnd()) {
        return false;
    }
    
    while (tokPeek() == Token::opOr) {
        Token tokenOr = tokenNext();
        if (!clauseAnd()) {
            return false;
        }
        addTokenToPostfix(tokenOr);
    }
    return true;
}

bool SelectionCompiler::clauseAnd() {
    if (!clauseNot()) {
        return false;
    }

    while (tokPeek() == Token::opAnd) {
        Token tokenAnd = tokenNext();
        if (!clauseNot()) {
            return false;
        }
        addTokenToPostfix(tokenAnd);
    }
    return true;
}

bool SelectionCompiler::clauseNot() {
    if (tokPeek() == Token::opNot) {
        Token tokenNot = tokenNext();
        if (!clauseNot()) {
            return false;
        }
        return addTokenToPostfix(tokenNot);
    }
    return clausePrimitive();
}

bool SelectionCompiler::clausePrimitive() {
    int tok = tokPeek();
    switch (tok) {
        case Token::within:
            return clauseWithin();

        case Token::asterisk:
        case Token::identifier:
            return clauseChemObjName();

        case Token::integer :
            return clauseIndex();
        default:
            if ((tok & Token::atomproperty) == Token::atomproperty) {
                return clauseComparator();
            }
            if ((tok & Token::predefinedset) != Token::predefinedset) {
                break;
            }
            // fall into the code and below and just add the token
        case Token::all:
        case Token::none:
            return addTokenToPostfix(tokenNext());
        case Token::leftparen:
            tokenNext();
            if (!clauseOr()) {
                return false;
            }
            if (tokenNext().tok != Token::rightparen) {
                return rightParenthesisExpected();
            }
            return true;
    }
    return unrecognizedExpressionToken();
}

bool SelectionCompiler::clauseComparator() {
    Token tokenAtomProperty = tokenNext();
    Token tokenComparator = tokenNext();
    if ((tokenComparator.tok & Token::comparator) == 0) {
        return comparisonOperatorExpected();
    }

    Token tokenValue = tokenNext();
    if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
        return numberExpected();
    }
    
    float val;
    if (tokenValue.value.type() == typeid(int)) {
        val = boost::any_cast<int>(tokenValue.value);
    } else if (tokenValue.value.type() == typeid(float)) {
        val = boost::any_cast<float>(tokenValue.value);
    } else {
        return false;
    }

    boost::any floatVal;
    floatVal = val;
    return addTokenToPostfix(Token(tokenComparator.tok,
                       tokenAtomProperty.tok, floatVal));
}

bool SelectionCompiler::clauseWithin() {
    tokenNext();                             // WITHIN
    if (tokenNext().tok != Token::leftparen) {  // (
        return leftParenthesisExpected();
    }
    
    boost::any distance;
    Token tokenDistance = tokenNext();       // distance
    switch(tokenDistance.tok) {
        case Token::integer:
        case Token::decimal:
            distance = tokenDistance.value;
            break;
        default:
            return numberOrKeywordExpected();
    }

    if (tokenNext().tok != Token::opOr) {       // ,
        return commaExpected();
    }
    
    if (! clauseOr()) {                        // *expression*
        return false;
    }
    
    if (tokenNext().tok != Token::rightparen) { // )T
        return rightParenthesisExpected();
    }
    
    return addTokenToPostfix(Token(Token::within, distance));
}

bool SelectionCompiler::clauseChemObjName() {
    std::string chemObjName;
    int tok = tokPeek();
    if (!clauseName(chemObjName)){
        return false;
    }


    tok = tokPeek();
    //allow two dot at most
    if (tok == Token::dot) {
        tokenNext();
        chemObjName += ".";
        if (!clauseName(chemObjName)) {
            return false;
        }
        tok = tokPeek();
        if (tok == Token::dot) {
            tokenNext();
            chemObjName += ".";

            if (!clauseName(chemObjName)) {
                return false;
            }
        }        
    }

    return addTokenToPostfix(Token(Token::name, chemObjName));
}

bool SelectionCompiler:: clauseName(std::string& name) {

    int tok = tokPeek();

    if (tok == Token::asterisk || tok == Token::identifier || tok == Token::integer) {

        Token token = tokenNext();
        if (token.value.type() == typeid(std::string)) {
            name += boost::any_cast<std::string>(token.value);
        } else if (token.value.type() == typeid(int)){
            int intVal = boost::any_cast<int>(token.value);
            char buffer[255];
            sprintf(buffer,"%d", intVal);
            name += buffer; /** @todo */
            //name += toString<int>(intVal);
        }
        while(true){
            tok = tokPeek();
            switch (tok) {
                case Token::asterisk :
                    name += "*";
                    tokenNext();
                    break;
                case Token::identifier :
                    name += boost::any_cast<std::string>(tokenNext().value);
                    break;
                case Token::integer :
                    name += toString(boost::any_cast<int>(tokenNext().value));
                    break;
                case Token::dot :
                    return true;
                default :
                    return true;
            }
        }
        
    }else {
        return false;
    }

}

bool SelectionCompiler::clauseIndex(){
    Token token = tokenNext();
    if (token.tok == Token::integer) {
        int index = boost::any_cast<int>(token.value);
        int tok = tokPeek();
        std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
        if (tok == Token::to) {
            tokenNext();
            tok = tokPeek();
            if (tok != Token::integer) {
                return numberExpected();
            }
            
            boost::any intVal = tokenNext().value;
            int first = index;
            if (intVal.type() != typeid(int)){
                return false;
            }
            int second = boost::any_cast<int>(intVal);

            return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
            
        }else {
            return addTokenToPostfix(Token(Token::index, boost::any(index)));
        }
    } else {
        return numberExpected();
    }
}

}
Revision:	415
Committed:	Wed Mar 9 18:46:16 2005 UTC (20 years, 3 months ago) by tim
File size:	21787 byte(s)
Log Message:	fix compilation issue
#	User	Rev	Content
1	tim	279	/*
2			* Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3			*
4			* The University of Notre Dame grants you ("Licensee") a
5			* non-exclusive, royalty free, license to use, modify and
6			* redistribute this software in source and binary code form, provided
7			* that the following conditions are met:
8			*
9			* 1. Acknowledgement of the program authors must be made in any
10			* publication of scientific results based in part on use of the
11			* program. An acceptable form of acknowledgement is citation of
12			* the article in which the program was described (Matthew
13			* A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14			* J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15			* Parallel Simulation Engine for Molecular Dynamics,"
16			* J. Comput. Chem. 26, pp. 252-271 (2005))
17			*
18			* 2. Redistributions of source code must retain the above copyright
19			* notice, this list of conditions and the following disclaimer.
20			*
21			* 3. Redistributions in binary form must reproduce the above copyright
22			* notice, this list of conditions and the following disclaimer in the
23			* documentation and/or other materials provided with the
24			* distribution.
25			*
26			* This software is provided "AS IS," without a warranty of any
27			* kind. All express or implied conditions, representations and
28			* warranties, including any implied warranty of merchantability,
29			* fitness for a particular purpose or non-infringement, are hereby
30			* excluded. The University of Notre Dame and its licensors shall not
31			* be liable for any damages suffered by licensee as a result of
32			* using, modifying or distributing the software or its
33			* derivatives. In no event will the University of Notre Dame or its
34			* licensors be liable for any lost revenue, profit or data, or for
35			* direct, indirect, special, consequential, incidental or punitive
36			* damages, however caused and regardless of the theory of liability,
37			* arising out of the use of or inability to use software, even if the
38			* University of Notre Dame has been advised of the possibility of
39			* such damages.
40			*/
41
42			#include "selection/SelectionCompiler.hpp"
43	tim	281	#include "utils/StringUtils.hpp"
44	tim	279	namespace oopse {
45
46			bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {
47
48			this->filename = filename;
49			this->script = script;
50			lineNumbers.clear();
51			lineIndices.clear();
52			aatokenCompiled.clear();
53
54	tim	281	if (internalCompile()) {
55	tim	279	return true;
56			}
57
58			int icharEnd;
59	tim	281	if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
60			(icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
61	tim	279	icharEnd = script.size();
62			}
63			errorLine = script.substr(ichCurrentCommand, icharEnd);
64			return false;
65			}
66
67			bool SelectionCompiler::internalCompile(){
68
69			cchScript = script.size();
70			ichToken = 0;
71			lineCurrent = 1;
72
73			error = false;
74
75	tim	281	//std::vector<Token> lltoken;
76			aatokenCompiled.clear();
77	tim	279	std::vector<Token> ltoken;
78
79	tim	281	Token tokenCommand;
80			int tokCommand = Token::nada;
81	tim	279
82			for ( ; true; ichToken += cchToken) {
83			if (lookingAtLeadingWhitespace())
84			continue;
85	tim	288	//if (lookingAtComment())
86			// continue;
87	tim	281	bool endOfLine = lookingAtEndOfLine();
88	tim	279	if (endOfLine \|\| lookingAtEndOfStatement()) {
89	tim	281	if (tokCommand != Token::nada) {
90	tim	279	if (! compileCommand(ltoken)) {
91			return false;
92			}
93	tim	281	aatokenCompiled.push_back(atokenCommand);
94			lineNumbers.push_back(lineCurrent);
95			lineIndices.push_back(ichCurrentCommand);
96			ltoken.clear();
97			tokCommand = Token::nada;
98	tim	279	}
99
100			if (ichToken < cchScript) {
101			if (endOfLine)
102			++lineCurrent;
103			continue;
104			}
105			break;
106			}
107
108	tim	281	if (tokCommand != Token::nada) {
109	tim	279	if (lookingAtString()) {
110			std::string str = getUnescapedStringLiteral();
111	tim	281	ltoken.push_back(Token(Token::string, str));
112	tim	279	continue;
113			}
114	tim	281	//if ((tokCommand & Token::specialstring) != 0 &&
115			// lookingAtSpecialString()) {
116			// std::string str = script.substr(ichToken, ichToken + cchToken);
117			// ltoken.push_back(Token(Token::string, str));
118			// continue;
119			//}
120			if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
121	tim	295	float value = lexi_cast<float>(script.substr(ichToken, cchToken));
122			ltoken.push_back(Token(Token::decimal, boost::any(value)));
123	tim	279	continue;
124			}
125	tim	281	if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
126	tim	295
127			int val = lexi_cast<int>(script.substr(ichToken, cchToken));
128			ltoken.push_back(Token(Token::integer, boost::any(val)));
129	tim	279	continue;
130			}
131			}
132
133			if (lookingAtLookupToken()) {
134	tim	288	std::string ident = script.substr(ichToken, cchToken);
135	tim	281	Token token;
136			Token* pToken = TokenMap::getInstance()->getToken(ident);
137			if (pToken != NULL) {
138			token = *pToken;
139			} else {
140			token = Token(Token::identifier, ident);
141	tim	279	}
142
143			int tok = token.tok;
144
145			switch (tokCommand) {
146	tim	281	case Token::nada:
147	tim	279	ichCurrentCommand = ichToken;
148			//tokenCommand = token;
149			tokCommand = tok;
150	tim	281	if ((tokCommand & Token::command) == 0)
151	tim	279	return commandExpected();
152			break;
153
154	tim	281	case Token::define:
155	tim	279	if (ltoken.size() == 1) {
156			// we are looking at the variable name
157	tim	281	if (tok != Token::identifier &&
158			(tok & Token::predefinedset) != Token::predefinedset)
159	tim	279	return invalidExpressionToken(ident);
160			} else {
161			// we are looking at the expression
162	tim	281	if (tok != Token::identifier &&
163			(tok & (Token::expression \| Token::predefinedset)) == 0)
164	tim	279	return invalidExpressionToken(ident);
165			}
166
167			break;
168
169	tim	281	case Token::select:
170			if (tok != Token::identifier && (tok & Token::expression) == 0)
171	tim	279	return invalidExpressionToken(ident);
172			break;
173			}
174			ltoken.push_back(token);
175			continue;
176			}
177
178			if (ltoken.size() == 0) {
179			return commandExpected();
180			}
181
182			return unrecognizedToken();
183			}
184
185			return true;
186			}
187
188
189			bool SelectionCompiler::lookingAtLeadingWhitespace() {
190
191			int ichT = ichToken;
192			while (ichT < cchScript && std::isspace(script[ichT])) {
193			++ichT;
194			}
195			cchToken = ichT - ichToken;
196			return cchToken > 0;
197			}
198
199			bool SelectionCompiler::lookingAtEndOfLine() {
200			if (ichToken == cchScript)
201			return true;
202			int ichT = ichToken;
203			char ch = script[ichT];
204			if (ch == '\r') {
205			++ichT;
206			if (ichT < cchScript && script[ichT] == '\n')
207			++ichT;
208			} else if (ch == '\n') {
209			++ichT;
210			} else {
211			return false;
212			}
213			cchToken = ichT - ichToken;
214			return true;
215			}
216
217			bool SelectionCompiler::lookingAtEndOfStatement() {
218			if (ichToken == cchScript \|\| script[ichToken] != ';')
219			return false;
220			cchToken = 1;
221			return true;
222			}
223
224			bool SelectionCompiler::lookingAtString() {
225			if (ichToken == cchScript)
226			return false;
227			if (script[ichToken] != '"')
228			return false;
229			// remove support for single quote
230			// in order to use it in atom expressions
231			// char chFirst = script.charAt(ichToken);
232			// if (chFirst != '"' && chFirst != '\'')
233			// return false;
234			int ichT = ichToken + 1;
235			// while (ichT < cchScript && script.charAt(ichT++) != chFirst)
236			char ch;
237	tim	281	bool previousCharBackslash = false;
238	tim	279	while (ichT < cchScript) {
239	tim	281	ch = script[ichT++];
240	tim	279	if (ch == '"' && !previousCharBackslash)
241			break;
242			previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
243			}
244			cchToken = ichT - ichToken;
245	tim	295
246	tim	279	return true;
247			}
248
249
250			std::string SelectionCompiler::getUnescapedStringLiteral() {
251	tim	281	/** @todo */
252			std::string sb(cchToken - 2, ' ');
253
254	tim	279	int ichMax = ichToken + cchToken - 1;
255			int ich = ichToken + 1;
256
257			while (ich < ichMax) {
258			char ch = script[ich++];
259			if (ch == '\\' && ich < ichMax) {
260			ch = script[ich++];
261			switch (ch) {
262			case 'b':
263			ch = '\b';
264			break;
265			case 'n':
266			ch = '\n';
267			break;
268			case 't':
269			ch = '\t';
270			break;
271			case 'r':
272			ch = '\r';
273			// fall into
274			case '"':
275			case '\\':
276			case '\'':
277			break;
278			case 'x':
279			case 'u':
280			int digitCount = ch == 'x' ? 2 : 4;
281			if (ich < ichMax) {
282			int unicode = 0;
283			for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
284			char chT = script[ich];
285			int hexit = getHexitValue(chT);
286			if (hexit < 0)
287			break;
288			unicode <<= 4;
289			unicode += hexit;
290			++ich;
291			}
292			ch = (char)unicode;
293			}
294			}
295			}
296	tim	281	sb.append(1, ch);
297	tim	279	}
298
299	tim	281	return sb;
300	tim	279	}
301
302	tim	281	int SelectionCompiler::getHexitValue(char ch) {
303	tim	279	if (ch >= '0' && ch <= '9')
304			return ch - '0';
305			else if (ch >= 'a' && ch <= 'f')
306			return 10 + ch - 'a';
307			else if (ch >= 'A' && ch <= 'F')
308			return 10 + ch - 'A';
309			else
310			return -1;
311			}
312
313			bool SelectionCompiler::lookingAtSpecialString() {
314			int ichT = ichToken;
315			char ch = script[ichT];
316			while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
317			++ichT;
318			}
319			cchToken = ichT - ichToken;
320			return cchToken > 0;
321			}
322
323	tim	281	bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
324	tim	279	if (ichToken == cchScript) {
325			return false;
326			}
327
328			int ichT = ichToken;
329			if (script[ichT] == '-') {
330			++ichT;
331			}
332	tim	281	bool digitSeen = false;
333	tim	279	char ch = 'X';
334			while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
335			++ichT;
336			digitSeen = true;
337			}
338
339			if (ichT == cchScript \|\| ch != '.') {
340			return false;
341			}
342
343	tim	303	// to support DMPC.1, let's check the character before the dot
344			if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
345	tim	279	return false;
346			}
347
348			++ichT;
349			while (ichT < cchScript && std::isdigit(script[ichT])) {
350			++ichT;
351			digitSeen = true;
352			}
353			cchToken = ichT - ichToken;
354			return digitSeen;
355			}
356
357	tim	281	bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
358	tim	279	if (ichToken == cchScript) {
359			return false;
360			}
361			int ichT = ichToken;
362			if (allowNegative && script[ichToken] == '-') {
363			++ichT;
364			}
365			int ichBeginDigits = ichT;
366			while (ichT < cchScript && std::isdigit(script[ichT])) {
367			++ichT;
368			}
369			if (ichBeginDigits == ichT) {
370			return false;
371			}
372			cchToken = ichT - ichToken;
373			return true;
374			}
375
376			bool SelectionCompiler::lookingAtLookupToken() {
377			if (ichToken == cchScript) {
378			return false;
379			}
380
381			int ichT = ichToken;
382			char ch;
383			switch (ch = script[ichT++]) {
384			case '(':
385			case ')':
386			case ',':
387			case '*':
388			case '-':
389			case '[':
390			case ']':
391			case '+':
392			case ':':
393			case '@':
394			case '.':
395			case '%':
396			break;
397			case '&':
398			case '\|':
399			if (ichT < cchScript && script[ichT] == ch) {
400			++ichT;
401			}
402			break;
403			case '<':
404			case '=':
405			case '>':
406			if (ichT < cchScript && ((ch = script[ichT]) == '<' \|\| ch == '=' \|\| ch == '>')) {
407			++ichT;
408			}
409			break;
410			case '/':
411			case '!':
412			if (ichT < cchScript && script[ichT] == '=') {
413			++ichT;
414			}
415			break;
416			default:
417			if ((ch < 'a' \|\| ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
418			return false;
419			}
420			case '?': // include question marks in identifier for atom expressions
421	tim	288	while (ichT < cchScript && !std::isspace(ch = script[ichT]) && (std::isalpha(ch) \|\|std::isdigit(ch) \|\|
422			ch == '_' \|\| ch == '?') ) {
423
424	tim	279	++ichT;
425			}
426			break;
427			}
428	tim	295
429	tim	279	cchToken = ichT - ichToken;
430	tim	295
431	tim	279	return true;
432			}
433
434	tim	281	bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
435			const Token& tokenCommand = ltoken[0];
436	tim	279	int tokCommand = tokenCommand.tok;
437	tim	281
438			atokenCommand = ltoken;
439			if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
440	tim	279	return false;
441			}
442	tim	281
443	tim	279	return true;
444			}
445
446			bool SelectionCompiler::compileExpression() {
447			/** todo */
448			int i = 1;
449			int tokCommand = atokenCommand[0].tok;
450	tim	281	if (tokCommand == Token::define) {
451			i = 2;
452			} else if ((tokCommand & Token::embeddedExpression) != 0) {
453			// look for the open parenthesis
454			while (i < atokenCommand.size() &&
455			atokenCommand[i].tok != Token::leftparen)
456	tim	279	++i;
457			}
458	tim	281
459			if (i >= atokenCommand.size()) {
460			return true;
461			}
462	tim	279	return compileExpression(i);
463			}
464
465
466	tim	281	bool SelectionCompiler::addTokenToPostfix(const Token& token) {
467	tim	279	ltokenPostfix.push_back(token);
468			return true;
469			}
470
471			bool SelectionCompiler::compileExpression(int itoken) {
472	tim	281	ltokenPostfix.clear();
473			for (int i = 0; i < itoken; ++i) {
474	tim	279	addTokenToPostfix(atokenCommand[i]);
475	tim	281	}
476
477	tim	279	atokenInfix = atokenCommand;
478			itokenInfix = itoken;
479
480	tim	281	addTokenToPostfix(Token::tokenExpressionBegin);
481	tim	279	if (!clauseOr()) {
482			return false;
483			}
484
485	tim	281	addTokenToPostfix(Token::tokenExpressionEnd);
486			if (itokenInfix != atokenInfix.size()) {
487	tim	279	return endOfExpressionExpected();
488			}
489
490			atokenCommand = ltokenPostfix;
491			return true;
492			}
493
494			Token SelectionCompiler::tokenNext() {
495	tim	281	if (itokenInfix == atokenInfix.size()) {
496			return Token();
497			}
498			return atokenInfix[itokenInfix++];
499	tim	279	}
500
501	tim	281	boost::any SelectionCompiler::valuePeek() {
502			if (itokenInfix == atokenInfix.size()) {
503			return boost::any();
504	tim	279	} else {
505			return atokenInfix[itokenInfix].value;
506			}
507			}
508
509			int SelectionCompiler::tokPeek() {
510	tim	281	if (itokenInfix == atokenInfix.size()) {
511	tim	279	return 0;
512			}else {
513			return atokenInfix[itokenInfix].tok;
514			}
515			}
516
517			bool SelectionCompiler::clauseOr() {
518			if (!clauseAnd()) {
519			return false;
520			}
521
522	tim	281	while (tokPeek() == Token::opOr) {
523	tim	279	Token tokenOr = tokenNext();
524			if (!clauseAnd()) {
525			return false;
526			}
527			addTokenToPostfix(tokenOr);
528			}
529			return true;
530			}
531
532			bool SelectionCompiler::clauseAnd() {
533			if (!clauseNot()) {
534			return false;
535			}
536
537	tim	281	while (tokPeek() == Token::opAnd) {
538	tim	279	Token tokenAnd = tokenNext();
539			if (!clauseNot()) {
540			return false;
541			}
542			addTokenToPostfix(tokenAnd);
543			}
544			return true;
545			}
546
547			bool SelectionCompiler::clauseNot() {
548	tim	281	if (tokPeek() == Token::opNot) {
549	tim	279	Token tokenNot = tokenNext();
550			if (!clauseNot()) {
551			return false;
552			}
553			return addTokenToPostfix(tokenNot);
554			}
555			return clausePrimitive();
556			}
557
558			bool SelectionCompiler::clausePrimitive() {
559			int tok = tokPeek();
560			switch (tok) {
561	tim	281	case Token::within:
562	tim	279	return clauseWithin();
563	tim	283
564			case Token::asterisk:
565			case Token::identifier:
566			return clauseChemObjName();
567	tim	295
568			case Token::integer :
569			return clauseIndex();
570	tim	279	default:
571	tim	281	if ((tok & Token::atomproperty) == Token::atomproperty) {
572	tim	279	return clauseComparator();
573			}
574	tim	281	if ((tok & Token::predefinedset) != Token::predefinedset) {
575	tim	279	break;
576			}
577			// fall into the code and below and just add the token
578	tim	281	case Token::all:
579			case Token::none:
580	tim	279	return addTokenToPostfix(tokenNext());
581	tim	281	case Token::leftparen:
582	tim	279	tokenNext();
583			if (!clauseOr()) {
584			return false;
585			}
586	tim	281	if (tokenNext().tok != Token::rightparen) {
587	tim	279	return rightParenthesisExpected();
588			}
589			return true;
590			}
591			return unrecognizedExpressionToken();
592			}
593
594			bool SelectionCompiler::clauseComparator() {
595			Token tokenAtomProperty = tokenNext();
596			Token tokenComparator = tokenNext();
597	tim	281	if ((tokenComparator.tok & Token::comparator) == 0) {
598	tim	279	return comparisonOperatorExpected();
599			}
600
601			Token tokenValue = tokenNext();
602	tim	288	if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
603			return numberExpected();
604	tim	279	}
605	tim	288
606			float val;
607			if (tokenValue.value.type() == typeid(int)) {
608			val = boost::any_cast<int>(tokenValue.value);
609			} else if (tokenValue.value.type() == typeid(float)) {
610			val = boost::any_cast<float>(tokenValue.value);
611			} else {
612			return false;
613			}
614
615	tim	295	boost::any floatVal;
616			floatVal = val;
617	tim	281	return addTokenToPostfix(Token(tokenComparator.tok,
618	tim	295	tokenAtomProperty.tok, floatVal));
619	tim	279	}
620
621			bool SelectionCompiler::clauseWithin() {
622			tokenNext(); // WITHIN
623	tim	281	if (tokenNext().tok != Token::leftparen) { // (
624	tim	279	return leftParenthesisExpected();
625			}
626
627	tim	281	boost::any distance;
628	tim	279	Token tokenDistance = tokenNext(); // distance
629			switch(tokenDistance.tok) {
630	tim	281	case Token::integer:
631			case Token::decimal:
632	tim	279	distance = tokenDistance.value;
633			break;
634			default:
635			return numberOrKeywordExpected();
636			}
637
638	tim	281	if (tokenNext().tok != Token::opOr) { // ,
639	tim	279	return commaExpected();
640			}
641
642			if (! clauseOr()) { // expression
643			return false;
644			}
645
646	tim	281	if (tokenNext().tok != Token::rightparen) { // )T
647	tim	279	return rightParenthesisExpected();
648			}
649
650	tim	281	return addTokenToPostfix(Token(Token::within, distance));
651	tim	279	}
652
653	tim	283	bool SelectionCompiler::clauseChemObjName() {
654			std::string chemObjName;
655			int tok = tokPeek();
656			if (!clauseName(chemObjName)){
657			return false;
658			}
659	tim	279
660
661	tim	283	tok = tokPeek();
662			//allow two dot at most
663			if (tok == Token::dot) {
664	tim	288	tokenNext();
665			chemObjName += ".";
666	tim	283	if (!clauseName(chemObjName)) {
667			return false;
668			}
669			tok = tokPeek();
670			if (tok == Token::dot) {
671	tim	288	tokenNext();
672			chemObjName += ".";
673
674	tim	283	if (!clauseName(chemObjName)) {
675			return false;
676			}
677			}
678			}
679
680			return addTokenToPostfix(Token(Token::name, chemObjName));
681	tim	279	}
682
683	tim	283	bool SelectionCompiler:: clauseName(std::string& name) {
684
685			int tok = tokPeek();
686
687	tim	303	if (tok == Token::asterisk \|\| tok == Token::identifier \|\| tok == Token::integer) {
688
689			Token token = tokenNext();
690			if (token.value.type() == typeid(std::string)) {
691			name += boost::any_cast<std::string>(token.value);
692			} else if (token.value.type() == typeid(int)){
693			int intVal = boost::any_cast<int>(token.value);
694			char buffer[255];
695			sprintf(buffer,"%d", intVal);
696			name += buffer; /** @todo */
697			//name += toString<int>(intVal);
698			}
699	tim	283	while(true){
700			tok = tokPeek();
701			switch (tok) {
702			case Token::asterisk :
703			name += "*";
704			tokenNext();
705			break;
706			case Token::identifier :
707			name += boost::any_cast<std::string>(tokenNext().value);
708			break;
709			case Token::integer :
710			name += toString(boost::any_cast<int>(tokenNext().value));
711			break;
712			case Token::dot :
713			return true;
714			default :
715			return true;
716			}
717			}
718
719			}else {
720			return false;
721			}
722
723	tim	279	}
724
725	tim	295	bool SelectionCompiler::clauseIndex(){
726			Token token = tokenNext();
727			if (token.tok == Token::integer) {
728			int index = boost::any_cast<int>(token.value);
729			int tok = tokPeek();
730			std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
731			if (tok == Token::to) {
732			tokenNext();
733			tok = tokPeek();
734			if (tok != Token::integer) {
735			return numberExpected();
736			}
737
738			boost::any intVal = tokenNext().value;
739			int first = index;
740			if (intVal.type() != typeid(int)){
741			return false;
742			}
743			int second = boost::any_cast<int>(intVal);
744	tim	283
745	tim	295	return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
746
747			}else {
748			return addTokenToPostfix(Token(Token::index, boost::any(index)));
749			}
750			} else {
751			return numberExpected();
752			}
753	tim	279	}
754	tim	295
755			}