src/selection/SelectionCompiler.cpp

/*
 * Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
 *
 * The University of Notre Dame grants you ("Licensee") a
 * non-exclusive, royalty free, license to use, modify and
 * redistribute this software in source and binary code form, provided
 * that the following conditions are met:
 *
 * 1. Acknowledgement of the program authors must be made in any
 *    publication of scientific results based in part on use of the
 *    program.  An acceptable form of acknowledgement is citation of
 *    the article in which the program was described (Matthew
 *    A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
 *    J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
 *    Parallel Simulation Engine for Molecular Dynamics,"
 *    J. Comput. Chem. 26, pp. 252-271 (2005))
 *
 * 2. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 3. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 *
 * This software is provided "AS IS," without a warranty of any
 * kind. All express or implied conditions, representations and
 * warranties, including any implied warranty of merchantability,
 * fitness for a particular purpose or non-infringement, are hereby
 * excluded.  The University of Notre Dame and its licensors shall not
 * be liable for any damages suffered by licensee as a result of
 * using, modifying or distributing the software or its
 * derivatives. In no event will the University of Notre Dame or its
 * licensors be liable for any lost revenue, profit or data, or for
 * direct, indirect, special, consequential, incidental or punitive
 * damages, however caused and regardless of the theory of liability,
 * arising out of the use of or inability to use software, even if the
 * University of Notre Dame has been advised of the possibility of
 * such damages.
 */

#include "selection/SelectionCompiler.hpp"
#include "utils/StringUtils.hpp"
namespace oopse {

bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {

    this->filename = filename;
    this->script = script;
    lineNumbers.clear();
    lineIndices.clear();
    aatokenCompiled.clear();
        
    if (internalCompile()) {
        return true;
    }
    
    int icharEnd;
    if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
        (icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
        icharEnd = script.size();
    }
    errorLine = script.substr(ichCurrentCommand, icharEnd);
    return false;
}

bool SelectionCompiler::internalCompile(){

    cchScript = script.size();
    ichToken = 0;
    lineCurrent = 1;

    error = false;

    //std::vector<Token> lltoken;
    aatokenCompiled.clear();
    std::vector<Token> ltoken;

    Token tokenCommand;
    int tokCommand = Token::nada;

    for ( ; true; ichToken += cchToken) {
        if (lookingAtLeadingWhitespace())
            continue;
        //if (lookingAtComment())
        //    continue;
        bool endOfLine = lookingAtEndOfLine();
        if (endOfLine || lookingAtEndOfStatement()) {
            if (tokCommand != Token::nada) {
                if (! compileCommand(ltoken)) {
                    return false;
                }
                aatokenCompiled.push_back(atokenCommand);
                lineNumbers.push_back(lineCurrent);
                lineIndices.push_back(ichCurrentCommand);
                ltoken.clear();
                tokCommand = Token::nada;
            }
            
            if (ichToken < cchScript) {
                if (endOfLine)
                    ++lineCurrent;
              continue;
            }
            break;
        }

        if (tokCommand != Token::nada) {
            if (lookingAtString()) {
                std::string str = getUnescapedStringLiteral();
                ltoken.push_back(Token(Token::string, str));
                continue;
            }
            //if ((tokCommand & Token::specialstring) != 0 &&
            //    lookingAtSpecialString()) {
            //    std::string str = script.substr(ichToken, ichToken + cchToken);
            //    ltoken.push_back(Token(Token::string, str));
            //    continue;
            //}
            if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
                float value = lexi_cast<float>(script.substr(ichToken, cchToken));        
                std::cout << "encount an decimal: " << value << std::endl;
                ltoken.push_back(Token(Token::decimal, boost::any(value)));
                continue;
            }
            if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {

                int val = lexi_cast<int>(script.substr(ichToken, cchToken));
                std::cout << "encount an integer: " << val << std::endl;
                ltoken.push_back(Token(Token::integer,   boost::any(val)));
                continue;
            }
        }
      
        if (lookingAtLookupToken()) {
            std::string ident = script.substr(ichToken, cchToken);
            Token token;            
            Token* pToken = TokenMap::getInstance()->getToken(ident);
            if (pToken != NULL) {
                token = *pToken;
            } else {
                token = Token(Token::identifier, ident);
            }
            
            int tok = token.tok;
            
            switch (tokCommand) {
                case Token::nada:
                    ichCurrentCommand = ichToken;
                    //tokenCommand = token;
                    tokCommand = tok;
                    if ((tokCommand & Token::command) == 0)
                    return commandExpected();
                    break;

                case Token::define:
                    if (ltoken.size() == 1) {
                        // we are looking at the variable name
                        if (tok != Token::identifier &&
                        (tok & Token::predefinedset) != Token::predefinedset)
                        return invalidExpressionToken(ident);
                    } else {
                    // we are looking at the expression
                    if (tok != Token::identifier && 
                        (tok & (Token::expression | Token::predefinedset)) == 0)
                        return invalidExpressionToken(ident);
                    }
                    
                    break;

                case Token::select:
                    if (tok != Token::identifier && (tok & Token::expression) == 0)
                        return invalidExpressionToken(ident);
                break;
            }
            ltoken.push_back(token);
            continue;
        }

        if (ltoken.size() == 0) {
            return commandExpected();
        }
        
        return unrecognizedToken();
    }

    return true;
  }


  bool SelectionCompiler::lookingAtLeadingWhitespace() {

    int ichT = ichToken;
    while (ichT < cchScript && std::isspace(script[ichT])) {
      ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
  }

  bool SelectionCompiler::lookingAtEndOfLine() {
    if (ichToken == cchScript)
      return true;
    int ichT = ichToken;
    char ch = script[ichT];
    if (ch == '\r') {
      ++ichT;
      if (ichT < cchScript && script[ichT] == '\n')
          ++ichT;
    } else if (ch == '\n') {
      ++ichT;
    } else {
      return false;
    }
    cchToken = ichT - ichToken;
    return true;
  }

  bool SelectionCompiler::lookingAtEndOfStatement() {
    if (ichToken == cchScript || script[ichToken] != ';')
      return false;
    cchToken = 1;
    return true;
  }

  bool SelectionCompiler::lookingAtString() {
    if (ichToken == cchScript)
      return false;
    if (script[ichToken] != '"')
      return false;
    // remove support for single quote
    // in order to use it in atom expressions
    //    char chFirst = script.charAt(ichToken);
    //    if (chFirst != '"' && chFirst != '\'')
    //      return false;
    int ichT = ichToken + 1;
    //    while (ichT < cchScript && script.charAt(ichT++) != chFirst)
    char ch;
    bool previousCharBackslash = false;
    while (ichT < cchScript) {
      ch = script[ichT++];
      if (ch == '"' && !previousCharBackslash)
        break;
      previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
    }
    cchToken = ichT - ichToken;


    std::cout << "lookingAtString: encount " << script.substr(ichToken, cchToken) << std::endl; 
    return true;
  }

  
std::string SelectionCompiler::getUnescapedStringLiteral() {
    /** @todo */
    std::string sb(cchToken - 2, ' ');
    
    int ichMax = ichToken + cchToken - 1;
    int ich = ichToken + 1;

    while (ich < ichMax) {
        char ch = script[ich++];
        if (ch == '\\' && ich < ichMax) {
            ch = script[ich++];
            switch (ch) {
                case 'b':
                    ch = '\b';
                    break;
                case 'n':
                    ch = '\n';
                    break;
                case 't':
                    ch = '\t';
                    break;
                case 'r':
                    ch = '\r';
                    // fall into
                case '"':
                case '\\':
                case '\'':
                break;
                case 'x':
                case 'u':
                    int digitCount = ch == 'x' ? 2 : 4;
                    if (ich < ichMax) {
                        int unicode = 0;
                        for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
                            char chT = script[ich];
                            int hexit = getHexitValue(chT);
                            if (hexit < 0)
                                break;
                            unicode <<= 4;
                            unicode += hexit;
                            ++ich;
                        }
                        ch = (char)unicode;
                    }
            }
        }
        sb.append(1, ch);
    }

    return sb;
}

int SelectionCompiler::getHexitValue(char ch) {
    if (ch >= '0' && ch <= '9')
        return ch - '0';
    else if (ch >= 'a' && ch <= 'f')
        return 10 + ch - 'a';
    else if (ch >= 'A' && ch <= 'F')
        return 10 + ch - 'A';
    else
        return -1;
}

bool SelectionCompiler::lookingAtSpecialString() {
    int ichT = ichToken;
    char ch = script[ichT];
    while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
        ++ichT;
    }
    cchToken = ichT - ichToken;
    return cchToken > 0;
}

bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
    if (ichToken == cchScript) {
        return false;
    }
    
    int ichT = ichToken;
    if (script[ichT] == '-') {
        ++ichT;
    }
    bool digitSeen = false;
    char ch = 'X';
    while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
        ++ichT;
        digitSeen = true;
    }

    if (ichT == cchScript || ch != '.') {
        return false;
    }

    // to support DMPC.1, let's check the character before the dot
    if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
        return false;
    }

    ++ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
        ++ichT;
        digitSeen = true;
    }
    cchToken = ichT - ichToken;
    return digitSeen;
}

bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
    if (ichToken == cchScript) {
        return false;
    }
    int ichT = ichToken;
    if (allowNegative && script[ichToken] == '-') {
        ++ichT;
    }
    int ichBeginDigits = ichT;
    while (ichT < cchScript && std::isdigit(script[ichT])) {
        ++ichT;
    }
    if (ichBeginDigits == ichT) {
        return false;
    }
    cchToken = ichT - ichToken;
    return true;
}

bool SelectionCompiler::lookingAtLookupToken() {
    if (ichToken == cchScript) {
        return false;
    }

    int ichT = ichToken;
    char ch;
    switch (ch = script[ichT++]) {
        case '(':
        case ')':
        case ',':
        case '*':
        case '-':
        case '[':
        case ']':
        case '+':
        case ':':
        case '@':
        case '.':
        case '%':
        break;
        case '&':
        case '|':
            if (ichT < cchScript && script[ichT] == ch) {
                ++ichT;
            }
        break;
        case '<':
        case '=':
        case '>':
            if (ichT < cchScript && ((ch = script[ichT]) == '<' || ch == '=' || ch == '>')) {
                ++ichT;
            }
            break;
        case '/':
        case '!':
            if (ichT < cchScript && script[ichT] == '=') {
                ++ichT;
            }
            break;
        default:
            if ((ch < 'a' || ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
                return false;
            }
        case '?': // include question marks in identifier for atom expressions
            while (ichT < cchScript && !std::isspace(ch = script[ichT]) && (std::isalpha(ch) ||std::isdigit(ch) ||
                ch == '_' || ch == '?') ) {

                ++ichT;
            }
        break;
    }

    cchToken = ichT - ichToken;

    std::cout << "lookingAtLookupToken: encount " << script.substr(ichToken, cchToken) << std::endl; 
    return true;
}

bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
    const Token& tokenCommand = ltoken[0];
    int tokCommand = tokenCommand.tok;

    atokenCommand = ltoken;
    if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
        return false;
    }
    
    return true;
}

bool SelectionCompiler::compileExpression() {
    /** todo */
    int i = 1;
    int tokCommand = atokenCommand[0].tok;
    if (tokCommand == Token::define) {
        i = 2;
    } else if ((tokCommand & Token::embeddedExpression) != 0) {
        // look for the open parenthesis
        while (i < atokenCommand.size() &&
         atokenCommand[i].tok != Token::leftparen)
        ++i;
    }

    if (i >= atokenCommand.size()) {
        return true;
    }
    return compileExpression(i);
  }

                  
bool SelectionCompiler::addTokenToPostfix(const Token& token) {
    ltokenPostfix.push_back(token);
    return true;
}

bool SelectionCompiler::compileExpression(int itoken) {
    ltokenPostfix.clear();
    for (int i = 0; i < itoken; ++i) {
        addTokenToPostfix(atokenCommand[i]);
    }
    
    atokenInfix = atokenCommand;
    itokenInfix = itoken;

    addTokenToPostfix(Token::tokenExpressionBegin);
    if (!clauseOr()) {
        return false;
    }
    
    addTokenToPostfix(Token::tokenExpressionEnd);
    if (itokenInfix != atokenInfix.size()) {
        return endOfExpressionExpected();
    }

    atokenCommand = ltokenPostfix;
    return true;
}

Token SelectionCompiler::tokenNext() {
    if (itokenInfix == atokenInfix.size()) {
        return Token();
    }
    return atokenInfix[itokenInfix++];
}

boost::any SelectionCompiler::valuePeek() {
    if (itokenInfix == atokenInfix.size()) {
        return boost::any();
    } else {
        return atokenInfix[itokenInfix].value;
    }
}

int SelectionCompiler::tokPeek() {
    if (itokenInfix == atokenInfix.size()) {
        return 0;
    }else {
        return atokenInfix[itokenInfix].tok;
    }
}

bool SelectionCompiler::clauseOr() {
    if (!clauseAnd()) {
        return false;
    }
    
    while (tokPeek() == Token::opOr) {
        Token tokenOr = tokenNext();
        if (!clauseAnd()) {
            return false;
        }
        addTokenToPostfix(tokenOr);
    }
    return true;
}

bool SelectionCompiler::clauseAnd() {
    if (!clauseNot()) {
        return false;
    }

    while (tokPeek() == Token::opAnd) {
        Token tokenAnd = tokenNext();
        if (!clauseNot()) {
            return false;
        }
        addTokenToPostfix(tokenAnd);
    }
    return true;
}

bool SelectionCompiler::clauseNot() {
    if (tokPeek() == Token::opNot) {
        Token tokenNot = tokenNext();
        if (!clauseNot()) {
            return false;
        }
        return addTokenToPostfix(tokenNot);
    }
    return clausePrimitive();
}

bool SelectionCompiler::clausePrimitive() {
    int tok = tokPeek();
    switch (tok) {
        case Token::within:
            return clauseWithin();

        case Token::asterisk:
        case Token::identifier:
            return clauseChemObjName();

        case Token::integer :
            return clauseIndex();
        default:
            if ((tok & Token::atomproperty) == Token::atomproperty) {
                return clauseComparator();
            }
            if ((tok & Token::predefinedset) != Token::predefinedset) {
                break;
            }
            // fall into the code and below and just add the token
        case Token::all:
        case Token::none:
            return addTokenToPostfix(tokenNext());
        case Token::leftparen:
            tokenNext();
            if (!clauseOr()) {
                return false;
            }
            if (tokenNext().tok != Token::rightparen) {
                return rightParenthesisExpected();
            }
            return true;
    }
    return unrecognizedExpressionToken();
}

bool SelectionCompiler::clauseComparator() {
    Token tokenAtomProperty = tokenNext();
    Token tokenComparator = tokenNext();
    if ((tokenComparator.tok & Token::comparator) == 0) {
        return comparisonOperatorExpected();
    }

    Token tokenValue = tokenNext();
    if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
        return numberExpected();
    }
    
    float val;
    if (tokenValue.value.type() == typeid(int)) {
        val = boost::any_cast<int>(tokenValue.value);
    } else if (tokenValue.value.type() == typeid(float)) {
        val = boost::any_cast<float>(tokenValue.value);
    } else {
        return false;
    }

    boost::any floatVal;
    floatVal = val;
    return addTokenToPostfix(Token(tokenComparator.tok,
                       tokenAtomProperty.tok, floatVal));
}

bool SelectionCompiler::clauseWithin() {
    tokenNext();                             // WITHIN
    if (tokenNext().tok != Token::leftparen) {  // (
        return leftParenthesisExpected();
    }
    
    boost::any distance;
    Token tokenDistance = tokenNext();       // distance
    switch(tokenDistance.tok) {
        case Token::integer:
        case Token::decimal:
            distance = tokenDistance.value;
            break;
        default:
            return numberOrKeywordExpected();
    }

    if (tokenNext().tok != Token::opOr) {       // ,
        return commaExpected();
    }
    
    if (! clauseOr()) {                        // *expression*
        return false;
    }
    
    if (tokenNext().tok != Token::rightparen) { // )T
        return rightParenthesisExpected();
    }
    
    return addTokenToPostfix(Token(Token::within, distance));
}

bool SelectionCompiler::clauseChemObjName() {
    std::string chemObjName;
    int tok = tokPeek();
    if (!clauseName(chemObjName)){
        return false;
    }


    tok = tokPeek();
    //allow two dot at most
    if (tok == Token::dot) {
        tokenNext();
        chemObjName += ".";
        if (!clauseName(chemObjName)) {
            return false;
        }
        tok = tokPeek();
        if (tok == Token::dot) {
            tokenNext();
            chemObjName += ".";

            if (!clauseName(chemObjName)) {
                return false;
            }
        }        
    }

    return addTokenToPostfix(Token(Token::name, chemObjName));
}

bool SelectionCompiler:: clauseName(std::string& name) {

    int tok = tokPeek();

    if (tok == Token::asterisk || tok == Token::identifier || tok == Token::integer) {

        Token token = tokenNext();
        if (token.value.type() == typeid(std::string)) {
            name += boost::any_cast<std::string>(token.value);
        } else if (token.value.type() == typeid(int)){
            int intVal = boost::any_cast<int>(token.value);
            char buffer[255];
            sprintf(buffer,"%d", intVal);
            name += buffer; /** @todo */
            //name += toString<int>(intVal);
        }
        while(true){
            tok = tokPeek();
            switch (tok) {
                case Token::asterisk :
                    name += "*";
                    tokenNext();
                    break;
                case Token::identifier :
                    name += boost::any_cast<std::string>(tokenNext().value);
                    break;
                case Token::integer :
                    name += toString(boost::any_cast<int>(tokenNext().value));
                    break;
                case Token::dot :
                    return true;
                default :
                    return true;
            }
        }
        
    }else {
        return false;
    }

}

bool SelectionCompiler::clauseIndex(){
    Token token = tokenNext();
    if (token.tok == Token::integer) {
        int index = boost::any_cast<int>(token.value);
        int tok = tokPeek();
        std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
        if (tok == Token::to) {
            tokenNext();
            tok = tokPeek();
            if (tok != Token::integer) {
                return numberExpected();
            }
            
            boost::any intVal = tokenNext().value;
            int first = index;
            if (intVal.type() != typeid(int)){
                return false;
            }
            int second = boost::any_cast<int>(intVal);

            return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
            
        }else {
            return addTokenToPostfix(Token(Token::index, boost::any(index)));
        }
    } else {
        return numberExpected();
    }
}

}
Revision:	303
Committed:	Mon Feb 7 22:36:32 2005 UTC (20 years, 2 months ago) by tim
File size:	22135 byte(s)
Log Message:	support select internal index selection, for example, select DMPC.3
#	User	Rev	Content
1	tim	279	/*
2			* Copyright (c) 2005 The University of Notre Dame. All Rights Reserved.
3			*
4			* The University of Notre Dame grants you ("Licensee") a
5			* non-exclusive, royalty free, license to use, modify and
6			* redistribute this software in source and binary code form, provided
7			* that the following conditions are met:
8			*
9			* 1. Acknowledgement of the program authors must be made in any
10			* publication of scientific results based in part on use of the
11			* program. An acceptable form of acknowledgement is citation of
12			* the article in which the program was described (Matthew
13			* A. Meineke, Charles F. Vardeman II, Teng Lin, Christopher
14			* J. Fennell and J. Daniel Gezelter, "OOPSE: An Object-Oriented
15			* Parallel Simulation Engine for Molecular Dynamics,"
16			* J. Comput. Chem. 26, pp. 252-271 (2005))
17			*
18			* 2. Redistributions of source code must retain the above copyright
19			* notice, this list of conditions and the following disclaimer.
20			*
21			* 3. Redistributions in binary form must reproduce the above copyright
22			* notice, this list of conditions and the following disclaimer in the
23			* documentation and/or other materials provided with the
24			* distribution.
25			*
26			* This software is provided "AS IS," without a warranty of any
27			* kind. All express or implied conditions, representations and
28			* warranties, including any implied warranty of merchantability,
29			* fitness for a particular purpose or non-infringement, are hereby
30			* excluded. The University of Notre Dame and its licensors shall not
31			* be liable for any damages suffered by licensee as a result of
32			* using, modifying or distributing the software or its
33			* derivatives. In no event will the University of Notre Dame or its
34			* licensors be liable for any lost revenue, profit or data, or for
35			* direct, indirect, special, consequential, incidental or punitive
36			* damages, however caused and regardless of the theory of liability,
37			* arising out of the use of or inability to use software, even if the
38			* University of Notre Dame has been advised of the possibility of
39			* such damages.
40			*/
41
42			#include "selection/SelectionCompiler.hpp"
43	tim	281	#include "utils/StringUtils.hpp"
44	tim	279	namespace oopse {
45
46			bool SelectionCompiler::compile(const std::string& filename, const std::string& script) {
47
48			this->filename = filename;
49			this->script = script;
50			lineNumbers.clear();
51			lineIndices.clear();
52			aatokenCompiled.clear();
53
54	tim	281	if (internalCompile()) {
55	tim	279	return true;
56			}
57
58			int icharEnd;
59	tim	281	if ((icharEnd = script.find('\r', ichCurrentCommand)) == std::string::npos &&
60			(icharEnd = script.find('\n', ichCurrentCommand)) == std::string::npos) {
61	tim	279	icharEnd = script.size();
62			}
63			errorLine = script.substr(ichCurrentCommand, icharEnd);
64			return false;
65			}
66
67			bool SelectionCompiler::internalCompile(){
68
69			cchScript = script.size();
70			ichToken = 0;
71			lineCurrent = 1;
72
73			error = false;
74
75	tim	281	//std::vector<Token> lltoken;
76			aatokenCompiled.clear();
77	tim	279	std::vector<Token> ltoken;
78
79	tim	281	Token tokenCommand;
80			int tokCommand = Token::nada;
81	tim	279
82			for ( ; true; ichToken += cchToken) {
83			if (lookingAtLeadingWhitespace())
84			continue;
85	tim	288	//if (lookingAtComment())
86			// continue;
87	tim	281	bool endOfLine = lookingAtEndOfLine();
88	tim	279	if (endOfLine \|\| lookingAtEndOfStatement()) {
89	tim	281	if (tokCommand != Token::nada) {
90	tim	279	if (! compileCommand(ltoken)) {
91			return false;
92			}
93	tim	281	aatokenCompiled.push_back(atokenCommand);
94			lineNumbers.push_back(lineCurrent);
95			lineIndices.push_back(ichCurrentCommand);
96			ltoken.clear();
97			tokCommand = Token::nada;
98	tim	279	}
99
100			if (ichToken < cchScript) {
101			if (endOfLine)
102			++lineCurrent;
103			continue;
104			}
105			break;
106			}
107
108	tim	281	if (tokCommand != Token::nada) {
109	tim	279	if (lookingAtString()) {
110			std::string str = getUnescapedStringLiteral();
111	tim	281	ltoken.push_back(Token(Token::string, str));
112	tim	279	continue;
113			}
114	tim	281	//if ((tokCommand & Token::specialstring) != 0 &&
115			// lookingAtSpecialString()) {
116			// std::string str = script.substr(ichToken, ichToken + cchToken);
117			// ltoken.push_back(Token(Token::string, str));
118			// continue;
119			//}
120			if (lookingAtDecimal((tokCommand & Token::negnums) != 0)) {
121	tim	295	float value = lexi_cast<float>(script.substr(ichToken, cchToken));
122			std::cout << "encount an decimal: " << value << std::endl;
123			ltoken.push_back(Token(Token::decimal, boost::any(value)));
124	tim	279	continue;
125			}
126	tim	281	if (lookingAtInteger((tokCommand & Token::negnums) != 0)) {
127	tim	295
128			int val = lexi_cast<int>(script.substr(ichToken, cchToken));
129			std::cout << "encount an integer: " << val << std::endl;
130			ltoken.push_back(Token(Token::integer, boost::any(val)));
131	tim	279	continue;
132			}
133			}
134
135			if (lookingAtLookupToken()) {
136	tim	288	std::string ident = script.substr(ichToken, cchToken);
137	tim	281	Token token;
138			Token* pToken = TokenMap::getInstance()->getToken(ident);
139			if (pToken != NULL) {
140			token = *pToken;
141			} else {
142			token = Token(Token::identifier, ident);
143	tim	279	}
144
145			int tok = token.tok;
146
147			switch (tokCommand) {
148	tim	281	case Token::nada:
149	tim	279	ichCurrentCommand = ichToken;
150			//tokenCommand = token;
151			tokCommand = tok;
152	tim	281	if ((tokCommand & Token::command) == 0)
153	tim	279	return commandExpected();
154			break;
155
156	tim	281	case Token::define:
157	tim	279	if (ltoken.size() == 1) {
158			// we are looking at the variable name
159	tim	281	if (tok != Token::identifier &&
160			(tok & Token::predefinedset) != Token::predefinedset)
161	tim	279	return invalidExpressionToken(ident);
162			} else {
163			// we are looking at the expression
164	tim	281	if (tok != Token::identifier &&
165			(tok & (Token::expression \| Token::predefinedset)) == 0)
166	tim	279	return invalidExpressionToken(ident);
167			}
168
169			break;
170
171	tim	281	case Token::select:
172			if (tok != Token::identifier && (tok & Token::expression) == 0)
173	tim	279	return invalidExpressionToken(ident);
174			break;
175			}
176			ltoken.push_back(token);
177			continue;
178			}
179
180			if (ltoken.size() == 0) {
181			return commandExpected();
182			}
183
184			return unrecognizedToken();
185			}
186
187			return true;
188			}
189
190
191			bool SelectionCompiler::lookingAtLeadingWhitespace() {
192
193			int ichT = ichToken;
194			while (ichT < cchScript && std::isspace(script[ichT])) {
195			++ichT;
196			}
197			cchToken = ichT - ichToken;
198			return cchToken > 0;
199			}
200
201			bool SelectionCompiler::lookingAtEndOfLine() {
202			if (ichToken == cchScript)
203			return true;
204			int ichT = ichToken;
205			char ch = script[ichT];
206			if (ch == '\r') {
207			++ichT;
208			if (ichT < cchScript && script[ichT] == '\n')
209			++ichT;
210			} else if (ch == '\n') {
211			++ichT;
212			} else {
213			return false;
214			}
215			cchToken = ichT - ichToken;
216			return true;
217			}
218
219			bool SelectionCompiler::lookingAtEndOfStatement() {
220			if (ichToken == cchScript \|\| script[ichToken] != ';')
221			return false;
222			cchToken = 1;
223			return true;
224			}
225
226			bool SelectionCompiler::lookingAtString() {
227			if (ichToken == cchScript)
228			return false;
229			if (script[ichToken] != '"')
230			return false;
231			// remove support for single quote
232			// in order to use it in atom expressions
233			// char chFirst = script.charAt(ichToken);
234			// if (chFirst != '"' && chFirst != '\'')
235			// return false;
236			int ichT = ichToken + 1;
237			// while (ichT < cchScript && script.charAt(ichT++) != chFirst)
238			char ch;
239	tim	281	bool previousCharBackslash = false;
240	tim	279	while (ichT < cchScript) {
241	tim	281	ch = script[ichT++];
242	tim	279	if (ch == '"' && !previousCharBackslash)
243			break;
244			previousCharBackslash = ch == '\\' ? !previousCharBackslash : false;
245			}
246			cchToken = ichT - ichToken;
247	tim	295
248
249			std::cout << "lookingAtString: encount " << script.substr(ichToken, cchToken) << std::endl;
250	tim	279	return true;
251			}
252
253
254			std::string SelectionCompiler::getUnescapedStringLiteral() {
255	tim	281	/** @todo */
256			std::string sb(cchToken - 2, ' ');
257
258	tim	279	int ichMax = ichToken + cchToken - 1;
259			int ich = ichToken + 1;
260
261			while (ich < ichMax) {
262			char ch = script[ich++];
263			if (ch == '\\' && ich < ichMax) {
264			ch = script[ich++];
265			switch (ch) {
266			case 'b':
267			ch = '\b';
268			break;
269			case 'n':
270			ch = '\n';
271			break;
272			case 't':
273			ch = '\t';
274			break;
275			case 'r':
276			ch = '\r';
277			// fall into
278			case '"':
279			case '\\':
280			case '\'':
281			break;
282			case 'x':
283			case 'u':
284			int digitCount = ch == 'x' ? 2 : 4;
285			if (ich < ichMax) {
286			int unicode = 0;
287			for (int k = digitCount; --k >= 0 && ich < ichMax; ) {
288			char chT = script[ich];
289			int hexit = getHexitValue(chT);
290			if (hexit < 0)
291			break;
292			unicode <<= 4;
293			unicode += hexit;
294			++ich;
295			}
296			ch = (char)unicode;
297			}
298			}
299			}
300	tim	281	sb.append(1, ch);
301	tim	279	}
302
303	tim	281	return sb;
304	tim	279	}
305
306	tim	281	int SelectionCompiler::getHexitValue(char ch) {
307	tim	279	if (ch >= '0' && ch <= '9')
308			return ch - '0';
309			else if (ch >= 'a' && ch <= 'f')
310			return 10 + ch - 'a';
311			else if (ch >= 'A' && ch <= 'F')
312			return 10 + ch - 'A';
313			else
314			return -1;
315			}
316
317			bool SelectionCompiler::lookingAtSpecialString() {
318			int ichT = ichToken;
319			char ch = script[ichT];
320			while (ichT < cchScript && ch != ';' && ch != '\r' && ch != '\n') {
321			++ichT;
322			}
323			cchToken = ichT - ichToken;
324			return cchToken > 0;
325			}
326
327	tim	281	bool SelectionCompiler::lookingAtDecimal(bool allowNegative) {
328	tim	279	if (ichToken == cchScript) {
329			return false;
330			}
331
332			int ichT = ichToken;
333			if (script[ichT] == '-') {
334			++ichT;
335			}
336	tim	281	bool digitSeen = false;
337	tim	279	char ch = 'X';
338			while (ichT < cchScript && std::isdigit(ch = script[ichT])) {
339			++ichT;
340			digitSeen = true;
341			}
342
343			if (ichT == cchScript \|\| ch != '.') {
344			return false;
345			}
346
347	tim	303	// to support DMPC.1, let's check the character before the dot
348			if (ch == '.' && (ichT > 0) && std::isalpha(script[ichT - 1])) {
349	tim	279	return false;
350			}
351
352			++ichT;
353			while (ichT < cchScript && std::isdigit(script[ichT])) {
354			++ichT;
355			digitSeen = true;
356			}
357			cchToken = ichT - ichToken;
358			return digitSeen;
359			}
360
361	tim	281	bool SelectionCompiler::lookingAtInteger(bool allowNegative) {
362	tim	279	if (ichToken == cchScript) {
363			return false;
364			}
365			int ichT = ichToken;
366			if (allowNegative && script[ichToken] == '-') {
367			++ichT;
368			}
369			int ichBeginDigits = ichT;
370			while (ichT < cchScript && std::isdigit(script[ichT])) {
371			++ichT;
372			}
373			if (ichBeginDigits == ichT) {
374			return false;
375			}
376			cchToken = ichT - ichToken;
377			return true;
378			}
379
380			bool SelectionCompiler::lookingAtLookupToken() {
381			if (ichToken == cchScript) {
382			return false;
383			}
384
385			int ichT = ichToken;
386			char ch;
387			switch (ch = script[ichT++]) {
388			case '(':
389			case ')':
390			case ',':
391			case '*':
392			case '-':
393			case '[':
394			case ']':
395			case '+':
396			case ':':
397			case '@':
398			case '.':
399			case '%':
400			break;
401			case '&':
402			case '\|':
403			if (ichT < cchScript && script[ichT] == ch) {
404			++ichT;
405			}
406			break;
407			case '<':
408			case '=':
409			case '>':
410			if (ichT < cchScript && ((ch = script[ichT]) == '<' \|\| ch == '=' \|\| ch == '>')) {
411			++ichT;
412			}
413			break;
414			case '/':
415			case '!':
416			if (ichT < cchScript && script[ichT] == '=') {
417			++ichT;
418			}
419			break;
420			default:
421			if ((ch < 'a' \|\| ch > 'z') && (ch < 'A' && ch > 'Z') && ch != '_') {
422			return false;
423			}
424			case '?': // include question marks in identifier for atom expressions
425	tim	288	while (ichT < cchScript && !std::isspace(ch = script[ichT]) && (std::isalpha(ch) \|\|std::isdigit(ch) \|\|
426			ch == '_' \|\| ch == '?') ) {
427
428	tim	279	++ichT;
429			}
430			break;
431			}
432	tim	295
433	tim	279	cchToken = ichT - ichToken;
434	tim	295
435			std::cout << "lookingAtLookupToken: encount " << script.substr(ichToken, cchToken) << std::endl;
436	tim	279	return true;
437			}
438
439	tim	281	bool SelectionCompiler::compileCommand(const std::vector<Token>& ltoken) {
440			const Token& tokenCommand = ltoken[0];
441	tim	279	int tokCommand = tokenCommand.tok;
442	tim	281
443			atokenCommand = ltoken;
444			if ((tokCommand & Token::expressionCommand) != 0 && !compileExpression()) {
445	tim	279	return false;
446			}
447	tim	281
448	tim	279	return true;
449			}
450
451			bool SelectionCompiler::compileExpression() {
452			/** todo */
453			int i = 1;
454			int tokCommand = atokenCommand[0].tok;
455	tim	281	if (tokCommand == Token::define) {
456			i = 2;
457			} else if ((tokCommand & Token::embeddedExpression) != 0) {
458			// look for the open parenthesis
459			while (i < atokenCommand.size() &&
460			atokenCommand[i].tok != Token::leftparen)
461	tim	279	++i;
462			}
463	tim	281
464			if (i >= atokenCommand.size()) {
465			return true;
466			}
467	tim	279	return compileExpression(i);
468			}
469
470
471	tim	281	bool SelectionCompiler::addTokenToPostfix(const Token& token) {
472	tim	279	ltokenPostfix.push_back(token);
473			return true;
474			}
475
476			bool SelectionCompiler::compileExpression(int itoken) {
477	tim	281	ltokenPostfix.clear();
478			for (int i = 0; i < itoken; ++i) {
479	tim	279	addTokenToPostfix(atokenCommand[i]);
480	tim	281	}
481
482	tim	279	atokenInfix = atokenCommand;
483			itokenInfix = itoken;
484
485	tim	281	addTokenToPostfix(Token::tokenExpressionBegin);
486	tim	279	if (!clauseOr()) {
487			return false;
488			}
489
490	tim	281	addTokenToPostfix(Token::tokenExpressionEnd);
491			if (itokenInfix != atokenInfix.size()) {
492	tim	279	return endOfExpressionExpected();
493			}
494
495			atokenCommand = ltokenPostfix;
496			return true;
497			}
498
499			Token SelectionCompiler::tokenNext() {
500	tim	281	if (itokenInfix == atokenInfix.size()) {
501			return Token();
502			}
503			return atokenInfix[itokenInfix++];
504	tim	279	}
505
506	tim	281	boost::any SelectionCompiler::valuePeek() {
507			if (itokenInfix == atokenInfix.size()) {
508			return boost::any();
509	tim	279	} else {
510			return atokenInfix[itokenInfix].value;
511			}
512			}
513
514			int SelectionCompiler::tokPeek() {
515	tim	281	if (itokenInfix == atokenInfix.size()) {
516	tim	279	return 0;
517			}else {
518			return atokenInfix[itokenInfix].tok;
519			}
520			}
521
522			bool SelectionCompiler::clauseOr() {
523			if (!clauseAnd()) {
524			return false;
525			}
526
527	tim	281	while (tokPeek() == Token::opOr) {
528	tim	279	Token tokenOr = tokenNext();
529			if (!clauseAnd()) {
530			return false;
531			}
532			addTokenToPostfix(tokenOr);
533			}
534			return true;
535			}
536
537			bool SelectionCompiler::clauseAnd() {
538			if (!clauseNot()) {
539			return false;
540			}
541
542	tim	281	while (tokPeek() == Token::opAnd) {
543	tim	279	Token tokenAnd = tokenNext();
544			if (!clauseNot()) {
545			return false;
546			}
547			addTokenToPostfix(tokenAnd);
548			}
549			return true;
550			}
551
552			bool SelectionCompiler::clauseNot() {
553	tim	281	if (tokPeek() == Token::opNot) {
554	tim	279	Token tokenNot = tokenNext();
555			if (!clauseNot()) {
556			return false;
557			}
558			return addTokenToPostfix(tokenNot);
559			}
560			return clausePrimitive();
561			}
562
563			bool SelectionCompiler::clausePrimitive() {
564			int tok = tokPeek();
565			switch (tok) {
566	tim	281	case Token::within:
567	tim	279	return clauseWithin();
568	tim	283
569			case Token::asterisk:
570			case Token::identifier:
571			return clauseChemObjName();
572	tim	295
573			case Token::integer :
574			return clauseIndex();
575	tim	279	default:
576	tim	281	if ((tok & Token::atomproperty) == Token::atomproperty) {
577	tim	279	return clauseComparator();
578			}
579	tim	281	if ((tok & Token::predefinedset) != Token::predefinedset) {
580	tim	279	break;
581			}
582			// fall into the code and below and just add the token
583	tim	281	case Token::all:
584			case Token::none:
585	tim	279	return addTokenToPostfix(tokenNext());
586	tim	281	case Token::leftparen:
587	tim	279	tokenNext();
588			if (!clauseOr()) {
589			return false;
590			}
591	tim	281	if (tokenNext().tok != Token::rightparen) {
592	tim	279	return rightParenthesisExpected();
593			}
594			return true;
595			}
596			return unrecognizedExpressionToken();
597			}
598
599			bool SelectionCompiler::clauseComparator() {
600			Token tokenAtomProperty = tokenNext();
601			Token tokenComparator = tokenNext();
602	tim	281	if ((tokenComparator.tok & Token::comparator) == 0) {
603	tim	279	return comparisonOperatorExpected();
604			}
605
606			Token tokenValue = tokenNext();
607	tim	288	if (tokenValue.tok != Token::integer && tokenValue.tok != Token::decimal) {
608			return numberExpected();
609	tim	279	}
610	tim	288
611			float val;
612			if (tokenValue.value.type() == typeid(int)) {
613			val = boost::any_cast<int>(tokenValue.value);
614			} else if (tokenValue.value.type() == typeid(float)) {
615			val = boost::any_cast<float>(tokenValue.value);
616			} else {
617			return false;
618			}
619
620	tim	295	boost::any floatVal;
621			floatVal = val;
622	tim	281	return addTokenToPostfix(Token(tokenComparator.tok,
623	tim	295	tokenAtomProperty.tok, floatVal));
624	tim	279	}
625
626			bool SelectionCompiler::clauseWithin() {
627			tokenNext(); // WITHIN
628	tim	281	if (tokenNext().tok != Token::leftparen) { // (
629	tim	279	return leftParenthesisExpected();
630			}
631
632	tim	281	boost::any distance;
633	tim	279	Token tokenDistance = tokenNext(); // distance
634			switch(tokenDistance.tok) {
635	tim	281	case Token::integer:
636			case Token::decimal:
637	tim	279	distance = tokenDistance.value;
638			break;
639			default:
640			return numberOrKeywordExpected();
641			}
642
643	tim	281	if (tokenNext().tok != Token::opOr) { // ,
644	tim	279	return commaExpected();
645			}
646
647			if (! clauseOr()) { // expression
648			return false;
649			}
650
651	tim	281	if (tokenNext().tok != Token::rightparen) { // )T
652	tim	279	return rightParenthesisExpected();
653			}
654
655	tim	281	return addTokenToPostfix(Token(Token::within, distance));
656	tim	279	}
657
658	tim	283	bool SelectionCompiler::clauseChemObjName() {
659			std::string chemObjName;
660			int tok = tokPeek();
661			if (!clauseName(chemObjName)){
662			return false;
663			}
664	tim	279
665
666	tim	283	tok = tokPeek();
667			//allow two dot at most
668			if (tok == Token::dot) {
669	tim	288	tokenNext();
670			chemObjName += ".";
671	tim	283	if (!clauseName(chemObjName)) {
672			return false;
673			}
674			tok = tokPeek();
675			if (tok == Token::dot) {
676	tim	288	tokenNext();
677			chemObjName += ".";
678
679	tim	283	if (!clauseName(chemObjName)) {
680			return false;
681			}
682			}
683			}
684
685			return addTokenToPostfix(Token(Token::name, chemObjName));
686	tim	279	}
687
688	tim	283	bool SelectionCompiler:: clauseName(std::string& name) {
689
690			int tok = tokPeek();
691
692	tim	303	if (tok == Token::asterisk \|\| tok == Token::identifier \|\| tok == Token::integer) {
693
694			Token token = tokenNext();
695			if (token.value.type() == typeid(std::string)) {
696			name += boost::any_cast<std::string>(token.value);
697			} else if (token.value.type() == typeid(int)){
698			int intVal = boost::any_cast<int>(token.value);
699			char buffer[255];
700			sprintf(buffer,"%d", intVal);
701			name += buffer; /** @todo */
702			//name += toString<int>(intVal);
703			}
704	tim	283	while(true){
705			tok = tokPeek();
706			switch (tok) {
707			case Token::asterisk :
708			name += "*";
709			tokenNext();
710			break;
711			case Token::identifier :
712			name += boost::any_cast<std::string>(tokenNext().value);
713			break;
714			case Token::integer :
715			name += toString(boost::any_cast<int>(tokenNext().value));
716			break;
717			case Token::dot :
718			return true;
719			default :
720			return true;
721			}
722			}
723
724			}else {
725			return false;
726			}
727
728	tim	279	}
729
730	tim	295	bool SelectionCompiler::clauseIndex(){
731			Token token = tokenNext();
732			if (token.tok == Token::integer) {
733			int index = boost::any_cast<int>(token.value);
734			int tok = tokPeek();
735			std::cout << "Token::to is " << Token::to << ", tok = " << tok << std::endl;
736			if (tok == Token::to) {
737			tokenNext();
738			tok = tokPeek();
739			if (tok != Token::integer) {
740			return numberExpected();
741			}
742
743			boost::any intVal = tokenNext().value;
744			int first = index;
745			if (intVal.type() != typeid(int)){
746			return false;
747			}
748			int second = boost::any_cast<int>(intVal);
749	tim	283
750	tim	295	return addTokenToPostfix(Token(Token::index, boost::any(std::make_pair(first, second))));
751
752			}else {
753			return addTokenToPostfix(Token(Token::index, boost::any(index)));
754			}
755			} else {
756			return numberExpected();
757			}
758	tim	279	}
759	tim	295
760			}