src/antlr/CharScanner.hpp

#ifndef INC_CharScanner_hpp__
#define INC_CharScanner_hpp__

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 * $Id$
 */

#include <antlr/config.hpp>

#include <map>

#ifdef HAS_NOT_CCTYPE_H
#include <ctype.h>
#else
#include <cctype>
#endif

#if ( _MSC_VER == 1200 )
// VC6 seems to need this
// note that this is not a standard C++ include file.
# include <stdio.h>
#endif

#include <antlr/TokenStream.hpp>
#include <antlr/RecognitionException.hpp>
#include <antlr/SemanticException.hpp>
#include <antlr/MismatchedCharException.hpp>
#include <antlr/InputBuffer.hpp>
#include <antlr/BitSet.hpp>
#include <antlr/LexerSharedInputState.hpp>

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
namespace antlr {
#endif

class ANTLR_API CharScanner;

ANTLR_C_USING(tolower)

#ifdef ANTLR_REALLY_NO_STRCASECMP
// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
// on the mac has neither...
inline int strcasecmp(const char *s1, const char *s2)
{
        while (true)
        {
                char  c1 = tolower(*s1++),
                                c2 = tolower(*s2++);
                if (c1 < c2) return -1;
                if (c1 > c2) return 1;
                if (c1 == 0) return 0;
        }
}
#else
#ifdef NO_STRCASECMP
ANTLR_C_USING(stricmp)
#else
ANTLR_C_USING(strcasecmp)
#endif
#endif

/** Functor for the literals map
 */
class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
private:
        const CharScanner* scanner;
public:
#ifdef NO_TEMPLATE_PARTS
        CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
#endif
        CharScannerLiteralsLess(const CharScanner* theScanner)
        : scanner(theScanner)
        {
        }
        bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
// defaults are good enough..
        //      CharScannerLiteralsLess(const CharScannerLiteralsLess&);
        //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
};

/** Superclass of generated lexers
 */
class ANTLR_API CharScanner : public TokenStream {
protected:
        typedef RefToken (*factory_type)();
public:
        CharScanner(InputBuffer& cb, bool case_sensitive );
        CharScanner(InputBuffer* cb, bool case_sensitive );
        CharScanner(const LexerSharedInputState& state, bool case_sensitive );

        virtual ~CharScanner()
        {
        }

        virtual int LA(unsigned int i);

        virtual void append(char c)
        {
                if (saveConsumedInput)
                {
                        size_t l = text.length();

                        if ((l%256) == 0)
                                text.reserve(l+256);

                        text.replace(l,0,&c,1);
                }
        }

        virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                if( saveConsumedInput )
                        text += s;
        }

        virtual void commit()
        {
                inputState->getInput().commit();
        }

        /** called by the generated lexer to do error recovery, override to
         * customize the behaviour.
         */
        virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
        {
                consume();
                consumeUntil(tokenSet);
        }

        virtual void consume()
        {
                if (inputState->guessing == 0)
                {
                        int c = LA(1);
                        if (caseSensitive)
                        {
                                append(c);
                        }
                        else
                        {
                                // use input.LA(), not LA(), to get original case
                                // CharScanner.LA() would toLower it.
                                append(inputState->getInput().LA(1));
                        }

                        // RK: in a sense I don't like this automatic handling.
                        if (c == '\t')
                                tab();
                        else
                                inputState->column++;
                }
                inputState->getInput().consume();
        }

        /** Consume chars until one matches the given char */
        virtual void consumeUntil(int c)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || la_1 == c )
                                break;
                        consume();
                }
        }

        /** Consume chars until one matches the given set */
        virtual void consumeUntil(const BitSet& set)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || set.member(la_1) )
                                break;
                        consume();
                }
        }

        /// Mark the current position and return a id for it
        virtual unsigned int mark()
        {
                return inputState->getInput().mark();
        }
        /// Rewind the scanner to a previously marked position
        virtual void rewind(unsigned int pos)
        {
                inputState->getInput().rewind(pos);
        }

        /// See if input contains character 'c' throw MismatchedCharException if not
        virtual void match(int c)
        {
                int la_1 = LA(1);
                if ( la_1 != c )
                        throw MismatchedCharException(la_1, c, false, this);
                consume();
        }

        /** See if input contains element from bitset b
         * throw MismatchedCharException if not
         */
        virtual void match(const BitSet& b)
        {
                int la_1 = LA(1);

                if ( !b.member(la_1) )
                        throw MismatchedCharException( la_1, b, false, this );
                consume();
        }

        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match( const char* s )
        {
                while( *s != '\0' )
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (*s++ & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                size_t len = s.length();

                for (size_t i = 0; i < len; i++)
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (s[i] & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input does not contain character 'c'
         * throw MismatchedCharException if not
         */
        virtual void matchNot(int c)
        {
                int la_1 = LA(1);

                if ( la_1 == c )
                        throw MismatchedCharException(la_1, c, true, this);

                consume();
        }
        /** See if input contains character in range c1-c2
         * throw MismatchedCharException if not
         */
        virtual void matchRange(int c1, int c2)
        {
                int la_1 = LA(1);

                if ( la_1 < c1 || la_1 > c2 )
                        throw MismatchedCharException(la_1, c1, c2, false, this);

                consume();
        }

        virtual bool getCaseSensitive() const
        {
                return caseSensitive;
        }

        virtual void setCaseSensitive(bool t)
        {
                caseSensitive = t;
        }

        virtual bool getCaseSensitiveLiterals() const=0;

        /// Get the line the scanner currently is in (starts at 1)
        virtual int getLine() const
        {
                return inputState->line;
        }

        /// set the line number
        virtual void setLine(int l)
        {
                inputState->line = l;
        }

        /// Get the column the scanner currently is in (starts at 1)
        virtual int getColumn() const
        {
                return inputState->column;
        }
        /// set the column number
        virtual void setColumn(int c)
        {
                inputState->column = c;
        }

        /// get the filename for the file currently used
        virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
        {
                return inputState->filename;
        }
        /// Set the filename the scanner is using (used in error messages)
        virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
        {
                inputState->filename = f;
        }

        virtual bool getCommitToPath() const
        {
                return commitToPath;
        }

        virtual void setCommitToPath(bool commit)
        {
                commitToPath = commit;
        }

        /** return a copy of the current text buffer */
        virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
        {
                return text;
        }

        virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                text = s;
        }

        virtual void resetText()
        {
                text = "";
                inputState->tokenStartColumn = inputState->column;
                inputState->tokenStartLine = inputState->line;
        }

        virtual RefToken getTokenObject() const
        {
                return _returnToken;
        }

        /** Used to keep track of line breaks, needs to be called from
         * within generated lexers when a \n \r is encountered.
         */
        virtual void newline()
        {
                ++inputState->line;
                inputState->column = 1;
        }

        /** Advance the current column number by an appropriate amount according
         * to the tabsize. This method needs to be explicitly called from the
         * lexer rules encountering tabs.
         */
        virtual void tab()
        {
                int c = getColumn();
                int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
                setColumn( nc );
        }
        /// set the tabsize. Returns the old tabsize
        int setTabsize( int size )
        {
                int oldsize = tabsize;
                tabsize = size;
                return oldsize;
        }
        /// Return the tabsize used by the scanner
        int getTabSize() const
        {
                return tabsize;
        }

        /** Report exception errors caught in nextToken() */
        virtual void reportError(const RecognitionException& e);

        /** Parser error-reporting function can be overridden in subclass */
        virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);

        /** Parser warning-reporting function can be overridden in subclass */
        virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);

        virtual InputBuffer& getInputBuffer()
        {
                return inputState->getInput();
        }

        virtual LexerSharedInputState getInputState()
        {
                return inputState;
        }

        /** set the input state for the lexer.
         * @note state is a reference counted object, hence no reference */
        virtual void setInputState(LexerSharedInputState state)
        {
                inputState = state;
        }

        /// Set the factory for created tokens
        virtual void setTokenObjectFactory(factory_type factory)
        {
                tokenFactory = factory;
        }

        /** Test the token text against the literals table
         * Override this method to perform a different literals test
         */
        virtual int testLiteralsTable(int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /** Test the text passed in against the literals table
         * Override this method to perform a different literals test
         * This is used primarily when you want to test a portion of
         * a token
         */
        virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /// Override this method to get more specific case handling
        virtual int toLower(int c) const
        {
                // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
                // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
                // this one is more structural. Maybe make this configurable.
                return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
        }

        /** This method is called by YourLexer::nextToken() when the lexer has
         *  hit EOF condition.  EOF is NOT a character.
         *  This method is not called if EOF is reached during
         *  syntactic predicate evaluation or during evaluation
         *  of normal lexical rules, which presumably would be
         *  an IOException.  This traps the "normal" EOF condition.
         *
         *  uponEOF() is called after the complete evaluation of
         *  the previous token and only if your parser asks
         *  for another token beyond that last non-EOF token.
         *
         *  You might want to throw token or char stream exceptions
         *  like: "Heh, premature eof" or a retry stream exception
         *  ("I found the end of this file, go back to referencing file").
         */
        virtual void uponEOF()
        {
        }

        /// Methods used to change tracing behavior
        virtual void traceIndent();
        virtual void traceIn(const char* rname);
        virtual void traceOut(const char* rname);

#ifndef NO_STATIC_CONSTS
        static const int EOF_CHAR = EOF;
#else
        enum {
                EOF_CHAR = EOF
        };
#endif
protected:
        ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
        /// flag indicating wether consume saves characters
        bool saveConsumedInput;
        factory_type tokenFactory;                              ///< Factory for tokens
        bool caseSensitive;                                             ///< Is this lexer case sensitive
        ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass

        RefToken _returnToken;          ///< used to return tokens w/o using return val

        /// Input state, gives access to input stream, shared among different lexers
        LexerSharedInputState inputState;

        /** Used during filter mode to indicate that path is desired.
         * A subsequent scan error will report an error as usual
         * if acceptPath=true;
         */
        bool commitToPath;

        int tabsize;    ///< tab size the scanner uses.

        /// Create a new RefToken of type t
        virtual RefToken makeToken(int t)
        {
                RefToken tok = tokenFactory();
                tok->setType(t);
                tok->setColumn(inputState->tokenStartColumn);
                tok->setLine(inputState->tokenStartLine);
                return tok;
        }

        /** Tracer class, used when -traceLexer is passed to antlr
         */
        class Tracer {
        private:
                CharScanner* parser;
                const char* text;

                Tracer(const Tracer& other);                                    // undefined
                Tracer& operator=(const Tracer& other);         // undefined
        public:
                Tracer( CharScanner* p,const char* t )
                : parser(p), text(t)
                {
                        parser->traceIn(text);
                }
                ~Tracer()
                {
                        parser->traceOut(text);
                }
        };

        int traceDepth;
private:
        CharScanner( const CharScanner& other );                                        // undefined
        CharScanner& operator=( const CharScanner& other );     // undefined

#ifndef NO_STATIC_CONSTS
        static const int NO_CHAR = 0;
#else
        enum {
                NO_CHAR = 0
        };
#endif
};

inline int CharScanner::LA(unsigned int i)
{
        int c = inputState->getInput().LA(i);

        if ( caseSensitive )
                return c;
        else
                return toLower(c);      // VC 6 tolower bug caught in toLower.
}

inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
{
        if (scanner->getCaseSensitiveLiterals())
                return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
        else
        {
#ifdef NO_STRCASECMP
                return (stricmp(x.c_str(),y.c_str())<0);
#else
                return (strcasecmp(x.c_str(),y.c_str())<0);
#endif
        }
}

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
}
#endif

#endif //INC_CharScanner_hpp__
Revision:	1633
Committed:	Thu Sep 15 13:39:36 2011 UTC (13 years, 7 months ago) by gezelter
File size:	13661 byte(s)
Log Message:	Updated antlr, fixed a clang compilation bug, removed a warning message
#	User	Rev	Content
1	tim	770	#ifndef INC_CharScanner_hpp__
2			#define INC_CharScanner_hpp__
3
4			/* ANTLR Translator Generator
5			* Project led by Terence Parr at http://www.jGuru.com
6			* Software rights: http://www.antlr.org/license.html
7			*
8	gezelter	1442	* $Id$
9	tim	770	*/
10
11			#include <antlr/config.hpp>
12
13			#include <map>
14
15			#ifdef HAS_NOT_CCTYPE_H
16			#include <ctype.h>
17			#else
18			#include <cctype>
19			#endif
20
21			#if ( _MSC_VER == 1200 )
22			// VC6 seems to need this
23			// note that this is not a standard C++ include file.
24			# include <stdio.h>
25			#endif
26
27			#include <antlr/TokenStream.hpp>
28			#include <antlr/RecognitionException.hpp>
29			#include <antlr/SemanticException.hpp>
30			#include <antlr/MismatchedCharException.hpp>
31			#include <antlr/InputBuffer.hpp>
32			#include <antlr/BitSet.hpp>
33			#include <antlr/LexerSharedInputState.hpp>
34
35			#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
36			namespace antlr {
37			#endif
38
39			class ANTLR_API CharScanner;
40
41			ANTLR_C_USING(tolower)
42
43	gezelter	1633	#ifdef ANTLR_REALLY_NO_STRCASECMP
44			// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
45			// on the mac has neither...
46	tim	770	inline int strcasecmp(const char s1, const char s2)
47			{
48			while (true)
49			{
50			char c1 = tolower(*s1++),
51			c2 = tolower(*s2++);
52			if (c1 < c2) return -1;
53			if (c1 > c2) return 1;
54			if (c1 == 0) return 0;
55			}
56			}
57	gezelter	1633	#else
58			#ifdef NO_STRCASECMP
59			ANTLR_C_USING(stricmp)
60			#else
61			ANTLR_C_USING(strcasecmp)
62	tim	770	#endif
63	gezelter	1633	#endif
64	tim	770
65			/** Functor for the literals map
66			*/
67			class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
68			private:
69			const CharScanner* scanner;
70			public:
71			#ifdef NO_TEMPLATE_PARTS
72			CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
73			#endif
74			CharScannerLiteralsLess(const CharScanner* theScanner)
75			: scanner(theScanner)
76			{
77			}
78			bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
79			// defaults are good enough..
80			// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
81			// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
82			};
83
84			/** Superclass of generated lexers
85			*/
86			class ANTLR_API CharScanner : public TokenStream {
87			protected:
88			typedef RefToken (*factory_type)();
89			public:
90			CharScanner(InputBuffer& cb, bool case_sensitive );
91			CharScanner(InputBuffer* cb, bool case_sensitive );
92			CharScanner(const LexerSharedInputState& state, bool case_sensitive );
93
94			virtual ~CharScanner()
95			{
96			}
97
98			virtual int LA(unsigned int i);
99
100			virtual void append(char c)
101			{
102			if (saveConsumedInput)
103			{
104			size_t l = text.length();
105
106			if ((l%256) == 0)
107			text.reserve(l+256);
108
109			text.replace(l,0,&c,1);
110			}
111			}
112
113			virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
114			{
115			if( saveConsumedInput )
116			text += s;
117			}
118
119			virtual void commit()
120			{
121			inputState->getInput().commit();
122			}
123
124	gezelter	1633	/** called by the generated lexer to do error recovery, override to
125			* customize the behaviour.
126			*/
127			virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
128			{
129			consume();
130			consumeUntil(tokenSet);
131			}
132
133	tim	770	virtual void consume()
134			{
135			if (inputState->guessing == 0)
136			{
137			int c = LA(1);
138			if (caseSensitive)
139			{
140			append(c);
141			}
142			else
143			{
144			// use input.LA(), not LA(), to get original case
145			// CharScanner.LA() would toLower it.
146			append(inputState->getInput().LA(1));
147			}
148
149			// RK: in a sense I don't like this automatic handling.
150			if (c == '\t')
151			tab();
152			else
153			inputState->column++;
154			}
155			inputState->getInput().consume();
156			}
157
158			/** Consume chars until one matches the given char */
159			virtual void consumeUntil(int c)
160			{
161			for(;;)
162			{
163			int la_1 = LA(1);
164			if( la_1 == EOF_CHAR \|\| la_1 == c )
165			break;
166			consume();
167			}
168			}
169
170			/** Consume chars until one matches the given set */
171			virtual void consumeUntil(const BitSet& set)
172			{
173			for(;;)
174			{
175			int la_1 = LA(1);
176			if( la_1 == EOF_CHAR \|\| set.member(la_1) )
177			break;
178			consume();
179			}
180			}
181
182			/// Mark the current position and return a id for it
183			virtual unsigned int mark()
184			{
185			return inputState->getInput().mark();
186			}
187			/// Rewind the scanner to a previously marked position
188			virtual void rewind(unsigned int pos)
189			{
190			inputState->getInput().rewind(pos);
191			}
192
193			/// See if input contains character 'c' throw MismatchedCharException if not
194			virtual void match(int c)
195			{
196			int la_1 = LA(1);
197			if ( la_1 != c )
198			throw MismatchedCharException(la_1, c, false, this);
199			consume();
200			}
201
202			/** See if input contains element from bitset b
203			* throw MismatchedCharException if not
204			*/
205			virtual void match(const BitSet& b)
206			{
207			int la_1 = LA(1);
208
209			if ( !b.member(la_1) )
210			throw MismatchedCharException( la_1, b, false, this );
211			consume();
212			}
213
214			/** See if input contains string 's' throw MismatchedCharException if not
215			* @note the string cannot match EOF
216			*/
217			virtual void match( const char* s )
218			{
219			while( *s != '\0' )
220			{
221			// the & 0xFF is here to prevent sign extension lateron
222			int la_1 = LA(1), c = (*s++ & 0xFF);
223
224			if ( la_1 != c )
225			throw MismatchedCharException(la_1, c, false, this);
226
227			consume();
228			}
229			}
230			/** See if input contains string 's' throw MismatchedCharException if not
231			* @note the string cannot match EOF
232			*/
233			virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
234			{
235			size_t len = s.length();
236
237			for (size_t i = 0; i < len; i++)
238			{
239			// the & 0xFF is here to prevent sign extension lateron
240			int la_1 = LA(1), c = (s[i] & 0xFF);
241
242			if ( la_1 != c )
243			throw MismatchedCharException(la_1, c, false, this);
244
245			consume();
246			}
247			}
248			/** See if input does not contain character 'c'
249			* throw MismatchedCharException if not
250			*/
251			virtual void matchNot(int c)
252			{
253			int la_1 = LA(1);
254
255			if ( la_1 == c )
256			throw MismatchedCharException(la_1, c, true, this);
257
258			consume();
259			}
260			/** See if input contains character in range c1-c2
261			* throw MismatchedCharException if not
262			*/
263			virtual void matchRange(int c1, int c2)
264			{
265			int la_1 = LA(1);
266
267			if ( la_1 < c1 \|\| la_1 > c2 )
268			throw MismatchedCharException(la_1, c1, c2, false, this);
269
270			consume();
271			}
272
273			virtual bool getCaseSensitive() const
274			{
275			return caseSensitive;
276			}
277
278			virtual void setCaseSensitive(bool t)
279			{
280			caseSensitive = t;
281			}
282
283			virtual bool getCaseSensitiveLiterals() const=0;
284
285			/// Get the line the scanner currently is in (starts at 1)
286			virtual int getLine() const
287			{
288			return inputState->line;
289			}
290
291			/// set the line number
292			virtual void setLine(int l)
293			{
294			inputState->line = l;
295			}
296
297			/// Get the column the scanner currently is in (starts at 1)
298			virtual int getColumn() const
299			{
300			return inputState->column;
301			}
302			/// set the column number
303			virtual void setColumn(int c)
304			{
305			inputState->column = c;
306			}
307
308			/// get the filename for the file currently used
309			virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
310			{
311			return inputState->filename;
312			}
313			/// Set the filename the scanner is using (used in error messages)
314			virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
315			{
316			inputState->filename = f;
317			}
318
319			virtual bool getCommitToPath() const
320			{
321			return commitToPath;
322			}
323
324			virtual void setCommitToPath(bool commit)
325			{
326			commitToPath = commit;
327			}
328
329			/** return a copy of the current text buffer */
330			virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
331			{
332			return text;
333			}
334
335			virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
336			{
337			text = s;
338			}
339
340			virtual void resetText()
341			{
342			text = "";
343			inputState->tokenStartColumn = inputState->column;
344			inputState->tokenStartLine = inputState->line;
345			}
346
347			virtual RefToken getTokenObject() const
348			{
349			return _returnToken;
350			}
351
352			/** Used to keep track of line breaks, needs to be called from
353			* within generated lexers when a \n \r is encountered.
354			*/
355			virtual void newline()
356			{
357			++inputState->line;
358			inputState->column = 1;
359			}
360
361			/** Advance the current column number by an appropriate amount according
362			* to the tabsize. This method needs to be explicitly called from the
363			* lexer rules encountering tabs.
364			*/
365			virtual void tab()
366			{
367			int c = getColumn();
368			int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
369			setColumn( nc );
370			}
371			/// set the tabsize. Returns the old tabsize
372			int setTabsize( int size )
373			{
374			int oldsize = tabsize;
375			tabsize = size;
376			return oldsize;
377			}
378			/// Return the tabsize used by the scanner
379			int getTabSize() const
380			{
381			return tabsize;
382			}
383
384			/** Report exception errors caught in nextToken() */
385			virtual void reportError(const RecognitionException& e);
386
387			/** Parser error-reporting function can be overridden in subclass */
388			virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
389
390			/** Parser warning-reporting function can be overridden in subclass */
391			virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
392
393			virtual InputBuffer& getInputBuffer()
394			{
395			return inputState->getInput();
396			}
397
398			virtual LexerSharedInputState getInputState()
399			{
400			return inputState;
401			}
402
403			/** set the input state for the lexer.
404			* @note state is a reference counted object, hence no reference */
405			virtual void setInputState(LexerSharedInputState state)
406			{
407			inputState = state;
408			}
409
410			/// Set the factory for created tokens
411			virtual void setTokenObjectFactory(factory_type factory)
412			{
413			tokenFactory = factory;
414			}
415
416			/** Test the token text against the literals table
417			* Override this method to perform a different literals test
418			*/
419			virtual int testLiteralsTable(int ttype) const
420			{
421			ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
422			if (i != literals.end())
423			ttype = (*i).second;
424			return ttype;
425			}
426
427			/** Test the text passed in against the literals table
428			* Override this method to perform a different literals test
429			* This is used primarily when you want to test a portion of
430			* a token
431			*/
432			virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
433			{
434			ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
435			if (i != literals.end())
436			ttype = (*i).second;
437			return ttype;
438			}
439
440			/// Override this method to get more specific case handling
441			virtual int toLower(int c) const
442			{
443			// test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
444			// also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
445			// this one is more structural. Maybe make this configurable.
446			return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
447			}
448
449			/** This method is called by YourLexer::nextToken() when the lexer has
450			* hit EOF condition. EOF is NOT a character.
451			* This method is not called if EOF is reached during
452			* syntactic predicate evaluation or during evaluation
453			* of normal lexical rules, which presumably would be
454			* an IOException. This traps the "normal" EOF condition.
455			*
456			* uponEOF() is called after the complete evaluation of
457			* the previous token and only if your parser asks
458			* for another token beyond that last non-EOF token.
459			*
460			* You might want to throw token or char stream exceptions
461			* like: "Heh, premature eof" or a retry stream exception
462			* ("I found the end of this file, go back to referencing file").
463			*/
464			virtual void uponEOF()
465			{
466			}
467
468			/// Methods used to change tracing behavior
469			virtual void traceIndent();
470			virtual void traceIn(const char* rname);
471			virtual void traceOut(const char* rname);
472
473			#ifndef NO_STATIC_CONSTS
474			static const int EOF_CHAR = EOF;
475			#else
476			enum {
477			EOF_CHAR = EOF
478			};
479			#endif
480			protected:
481			ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
482			/// flag indicating wether consume saves characters
483			bool saveConsumedInput;
484			factory_type tokenFactory; ///< Factory for tokens
485			bool caseSensitive; ///< Is this lexer case sensitive
486			ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
487
488			RefToken _returnToken; ///< used to return tokens w/o using return val
489
490			/// Input state, gives access to input stream, shared among different lexers
491			LexerSharedInputState inputState;
492
493			/** Used during filter mode to indicate that path is desired.
494			* A subsequent scan error will report an error as usual
495			* if acceptPath=true;
496			*/
497			bool commitToPath;
498
499			int tabsize; ///< tab size the scanner uses.
500
501			/// Create a new RefToken of type t
502			virtual RefToken makeToken(int t)
503			{
504			RefToken tok = tokenFactory();
505			tok->setType(t);
506			tok->setColumn(inputState->tokenStartColumn);
507			tok->setLine(inputState->tokenStartLine);
508			return tok;
509			}
510
511			/** Tracer class, used when -traceLexer is passed to antlr
512			*/
513			class Tracer {
514			private:
515			CharScanner* parser;
516			const char* text;
517
518			Tracer(const Tracer& other); // undefined
519			Tracer& operator=(const Tracer& other); // undefined
520			public:
521			Tracer( CharScanner* p,const char* t )
522			: parser(p), text(t)
523			{
524			parser->traceIn(text);
525			}
526			~Tracer()
527			{
528			parser->traceOut(text);
529			}
530			};
531
532			int traceDepth;
533			private:
534			CharScanner( const CharScanner& other ); // undefined
535			CharScanner& operator=( const CharScanner& other ); // undefined
536
537			#ifndef NO_STATIC_CONSTS
538			static const int NO_CHAR = 0;
539			#else
540			enum {
541			NO_CHAR = 0
542			};
543			#endif
544			};
545
546			inline int CharScanner::LA(unsigned int i)
547			{
548			int c = inputState->getInput().LA(i);
549
550			if ( caseSensitive )
551			return c;
552			else
553			return toLower(c); // VC 6 tolower bug caught in toLower.
554			}
555
556			inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
557			{
558			if (scanner->getCaseSensitiveLiterals())
559			return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
560			else
561			{
562			#ifdef NO_STRCASECMP
563			return (stricmp(x.c_str(),y.c_str())<0);
564			#else
565			return (strcasecmp(x.c_str(),y.c_str())<0);
566			#endif
567			}
568			}
569
570			#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
571			}
572			#endif
573
574			#endif //INC_CharScanner_hpp__