src/antlr/CharScanner.hpp

#ifndef INC_CharScanner_hpp__
#define INC_CharScanner_hpp__

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 * $Id$
 */

#include <antlr/config.hpp>
#include <cstdio>
#include <map>
#include <cstring>

#ifdef HAS_NOT_CCTYPE_H
#include <ctype.h>
#else
#include <cctype>
#endif

#if ( _MSC_VER == 1200 )
// VC6 seems to need this
// note that this is not a standard C++ include file.
# include <stdio.h>
#endif

#include <antlr/TokenStream.hpp>
#include <antlr/RecognitionException.hpp>
#include <antlr/SemanticException.hpp>
#include <antlr/MismatchedCharException.hpp>
#include <antlr/InputBuffer.hpp>
#include <antlr/BitSet.hpp>
#include <antlr/LexerSharedInputState.hpp>

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
namespace antlr {
#endif

class ANTLR_API CharScanner;

ANTLR_C_USING(tolower)

#ifdef ANTLR_REALLY_NO_STRCASECMP
// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
// on the mac has neither...
inline int strcasecmp(const char *s1, const char *s2)
{
        while (true)
        {
                char  c1 = tolower(*s1++),
                                c2 = tolower(*s2++);
                if (c1 < c2) return -1;
                if (c1 > c2) return 1;
                if (c1 == 0) return 0;
        }
}
#else
#ifdef NO_STRCASECMP
ANTLR_C_USING(stricmp)
#else
ANTLR_C_USING(strcasecmp)
#endif
#endif

/** Functor for the literals map
 */
class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
private:
        const CharScanner* scanner;
public:
#ifdef NO_TEMPLATE_PARTS
        CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
#endif
        CharScannerLiteralsLess(const CharScanner* theScanner)
        : scanner(theScanner)
        {
        }
        bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
// defaults are good enough..
        //      CharScannerLiteralsLess(const CharScannerLiteralsLess&);
        //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
};

/** Superclass of generated lexers
 */
class ANTLR_API CharScanner : public TokenStream {
protected:
        typedef RefToken (*factory_type)();
public:
        CharScanner(InputBuffer& cb, bool case_sensitive );
        CharScanner(InputBuffer* cb, bool case_sensitive );
        CharScanner(const LexerSharedInputState& state, bool case_sensitive );

        virtual ~CharScanner()
        {
        }

        virtual int LA(unsigned int i);

        virtual void append(char c)
        {
                if (saveConsumedInput)
                {
                        size_t l = text.length();

                        if ((l%256) == 0)
                                text.reserve(l+256);

                        text.replace(l,0,&c,1);
                }
        }

        virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                if( saveConsumedInput )
                        text += s;
        }

        virtual void commit()
        {
                inputState->getInput().commit();
        }

        /** called by the generated lexer to do error recovery, override to
         * customize the behaviour.
         */
        virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
        {
                consume();
                consumeUntil(tokenSet);
        }

        virtual void consume()
        {
                if (inputState->guessing == 0)
                {
                        int c = LA(1);
                        if (caseSensitive)
                        {
                                append(c);
                        }
                        else
                        {
                                // use input.LA(), not LA(), to get original case
                                // CharScanner.LA() would toLower it.
                                append(inputState->getInput().LA(1));
                        }

                        // RK: in a sense I don't like this automatic handling.
                        if (c == '\t')
                                tab();
                        else
                                inputState->column++;
                }
                inputState->getInput().consume();
        }

        /** Consume chars until one matches the given char */
        virtual void consumeUntil(int c)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || la_1 == c )
                                break;
                        consume();
                }
        }

        /** Consume chars until one matches the given set */
        virtual void consumeUntil(const BitSet& set)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || set.member(la_1) )
                                break;
                        consume();
                }
        }

        /// Mark the current position and return a id for it
        virtual unsigned int mark()
        {
                return inputState->getInput().mark();
        }
        /// Rewind the scanner to a previously marked position
        virtual void rewind(unsigned int pos)
        {
                inputState->getInput().rewind(pos);
        }

        /// See if input contains character 'c' throw MismatchedCharException if not
        virtual void match(int c)
        {
                int la_1 = LA(1);
                if ( la_1 != c )
                        throw MismatchedCharException(la_1, c, false, this);
                consume();
        }

        /** See if input contains element from bitset b
         * throw MismatchedCharException if not
         */
        virtual void match(const BitSet& b)
        {
                int la_1 = LA(1);

                if ( !b.member(la_1) )
                        throw MismatchedCharException( la_1, b, false, this );
                consume();
        }

        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match( const char* s )
        {
                while( *s != '\0' )
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (*s++ & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                size_t len = s.length();

                for (size_t i = 0; i < len; i++)
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (s[i] & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input does not contain character 'c'
         * throw MismatchedCharException if not
         */
        virtual void matchNot(int c)
        {
                int la_1 = LA(1);

                if ( la_1 == c )
                        throw MismatchedCharException(la_1, c, true, this);

                consume();
        }
        /** See if input contains character in range c1-c2
         * throw MismatchedCharException if not
         */
        virtual void matchRange(int c1, int c2)
        {
                int la_1 = LA(1);

                if ( la_1 < c1 || la_1 > c2 )
                        throw MismatchedCharException(la_1, c1, c2, false, this);

                consume();
        }

        virtual bool getCaseSensitive() const
        {
                return caseSensitive;
        }

        virtual void setCaseSensitive(bool t)
        {
                caseSensitive = t;
        }

        virtual bool getCaseSensitiveLiterals() const=0;

        /// Get the line the scanner currently is in (starts at 1)
        virtual int getLine() const
        {
                return inputState->line;
        }

        /// set the line number
        virtual void setLine(int l)
        {
                inputState->line = l;
        }

        /// Get the column the scanner currently is in (starts at 1)
        virtual int getColumn() const
        {
                return inputState->column;
        }
        /// set the column number
        virtual void setColumn(int c)
        {
                inputState->column = c;
        }

        /// get the filename for the file currently used
        virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
        {
                return inputState->filename;
        }
        /// Set the filename the scanner is using (used in error messages)
        virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
        {
                inputState->filename = f;
        }

        virtual bool getCommitToPath() const
        {
                return commitToPath;
        }

        virtual void setCommitToPath(bool commit)
        {
                commitToPath = commit;
        }

        /** return a copy of the current text buffer */
        virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
        {
                return text;
        }

        virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                text = s;
        }

        virtual void resetText()
        {
                text = "";
                inputState->tokenStartColumn = inputState->column;
                inputState->tokenStartLine = inputState->line;
        }

        virtual RefToken getTokenObject() const
        {
                return _returnToken;
        }

        /** Used to keep track of line breaks, needs to be called from
         * within generated lexers when a \n \r is encountered.
         */
        virtual void newline()
        {
                ++inputState->line;
                inputState->column = 1;
        }

        /** Advance the current column number by an appropriate amount according
         * to the tabsize. This method needs to be explicitly called from the
         * lexer rules encountering tabs.
         */
        virtual void tab()
        {
                int c = getColumn();
                int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
                setColumn( nc );
        }
        /// set the tabsize. Returns the old tabsize
        int setTabsize( int size )
        {
                int oldsize = tabsize;
                tabsize = size;
                return oldsize;
        }
        /// Return the tabsize used by the scanner
        int getTabSize() const
        {
                return tabsize;
        }

        /** Report exception errors caught in nextToken() */
        virtual void reportError(const RecognitionException& e);

        /** Parser error-reporting function can be overridden in subclass */
        virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);

        /** Parser warning-reporting function can be overridden in subclass */
        virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);

        virtual InputBuffer& getInputBuffer()
        {
                return inputState->getInput();
        }

        virtual LexerSharedInputState getInputState()
        {
                return inputState;
        }

        /** set the input state for the lexer.
         * @note state is a reference counted object, hence no reference */
        virtual void setInputState(LexerSharedInputState state)
        {
                inputState = state;
        }

        /// Set the factory for created tokens
        virtual void setTokenObjectFactory(factory_type factory)
        {
                tokenFactory = factory;
        }

        /** Test the token text against the literals table
         * Override this method to perform a different literals test
         */
        virtual int testLiteralsTable(int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /** Test the text passed in against the literals table
         * Override this method to perform a different literals test
         * This is used primarily when you want to test a portion of
         * a token
         */
        virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /// Override this method to get more specific case handling
        virtual int toLower(int c) const
        {
                // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
                // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
                // this one is more structural. Maybe make this configurable.
                return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
        }

        /** This method is called by YourLexer::nextToken() when the lexer has
         *  hit EOF condition.  EOF is NOT a character.
         *  This method is not called if EOF is reached during
         *  syntactic predicate evaluation or during evaluation
         *  of normal lexical rules, which presumably would be
         *  an IOException.  This traps the "normal" EOF condition.
         *
         *  uponEOF() is called after the complete evaluation of
         *  the previous token and only if your parser asks
         *  for another token beyond that last non-EOF token.
         *
         *  You might want to throw token or char stream exceptions
         *  like: "Heh, premature eof" or a retry stream exception
         *  ("I found the end of this file, go back to referencing file").
         */
        virtual void uponEOF()
        {
        }

        /// Methods used to change tracing behavior
        virtual void traceIndent();
        virtual void traceIn(const char* rname);
        virtual void traceOut(const char* rname);

#ifndef NO_STATIC_CONSTS
        static const int EOF_CHAR = EOF;
#else
        enum {
                EOF_CHAR = EOF
        };
#endif
protected:
        ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
        /// flag indicating wether consume saves characters
        bool saveConsumedInput;
        factory_type tokenFactory;                              ///< Factory for tokens
        bool caseSensitive;                                             ///< Is this lexer case sensitive
        ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass

        RefToken _returnToken;          ///< used to return tokens w/o using return val

        /// Input state, gives access to input stream, shared among different lexers
        LexerSharedInputState inputState;

        /** Used during filter mode to indicate that path is desired.
         * A subsequent scan error will report an error as usual
         * if acceptPath=true;
         */
        bool commitToPath;

        int tabsize;    ///< tab size the scanner uses.

        /// Create a new RefToken of type t
        virtual RefToken makeToken(int t)
        {
                RefToken tok = tokenFactory();
                tok->setType(t);
                tok->setColumn(inputState->tokenStartColumn);
                tok->setLine(inputState->tokenStartLine);
                return tok;
        }

        /** Tracer class, used when -traceLexer is passed to antlr
         */
        class Tracer {
        private:
                CharScanner* parser;
                const char* text;

                Tracer(const Tracer& other);                                    // undefined
                Tracer& operator=(const Tracer& other);         // undefined
        public:
                Tracer( CharScanner* p,const char* t )
                : parser(p), text(t)
                {
                        parser->traceIn(text);
                }
                ~Tracer()
                {
                        parser->traceOut(text);
                }
        };

        int traceDepth;
private:
        CharScanner( const CharScanner& other );                                        // undefined
        CharScanner& operator=( const CharScanner& other );     // undefined

#ifndef NO_STATIC_CONSTS
        static const int NO_CHAR = 0;
#else
        enum {
                NO_CHAR = 0
        };
#endif
};

inline int CharScanner::LA(unsigned int i)
{
        int c = inputState->getInput().LA(i);

        if ( caseSensitive )
                return c;
        else
                return toLower(c);      // VC 6 tolower bug caught in toLower.
}

inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
{
        if (scanner->getCaseSensitiveLiterals())
                return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
        else
        {
#ifdef NO_STRCASECMP
                return (stricmp(x.c_str(),y.c_str())<0);
#else
                return (strcasecmp(x.c_str(),y.c_str())<0);
#endif
        }
}

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
}
#endif

#endif //INC_CharScanner_hpp__
Revision:	1653
Committed:	Wed Sep 28 19:37:06 2011 UTC (14 years ago) by gezelter
File size:	13697 byte(s)
Log Message:	fixing compilation bugs on gcc-4.4
#	Content
1	#ifndef INC_CharScanner_hpp__
2	#define INC_CharScanner_hpp__
3
4	/* ANTLR Translator Generator
5	* Project led by Terence Parr at http://www.jGuru.com
6	* Software rights: http://www.antlr.org/license.html
7	*
8	* $Id$
9	*/
10
11	#include <antlr/config.hpp>
12	#include <cstdio>
13	#include <map>
14	#include <cstring>
15
16	#ifdef HAS_NOT_CCTYPE_H
17	#include <ctype.h>
18	#else
19	#include <cctype>
20	#endif
21
22	#if ( _MSC_VER == 1200 )
23	// VC6 seems to need this
24	// note that this is not a standard C++ include file.
25	# include <stdio.h>
26	#endif
27
28	#include <antlr/TokenStream.hpp>
29	#include <antlr/RecognitionException.hpp>
30	#include <antlr/SemanticException.hpp>
31	#include <antlr/MismatchedCharException.hpp>
32	#include <antlr/InputBuffer.hpp>
33	#include <antlr/BitSet.hpp>
34	#include <antlr/LexerSharedInputState.hpp>
35
36	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
37	namespace antlr {
38	#endif
39
40	class ANTLR_API CharScanner;
41
42	ANTLR_C_USING(tolower)
43
44	#ifdef ANTLR_REALLY_NO_STRCASECMP
45	// Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
46	// on the mac has neither...
47	inline int strcasecmp(const char s1, const char s2)
48	{
49	while (true)
50	{
51	char c1 = tolower(*s1++),
52	c2 = tolower(*s2++);
53	if (c1 < c2) return -1;
54	if (c1 > c2) return 1;
55	if (c1 == 0) return 0;
56	}
57	}
58	#else
59	#ifdef NO_STRCASECMP
60	ANTLR_C_USING(stricmp)
61	#else
62	ANTLR_C_USING(strcasecmp)
63	#endif
64	#endif
65
66	/** Functor for the literals map
67	*/
68	class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
69	private:
70	const CharScanner* scanner;
71	public:
72	#ifdef NO_TEMPLATE_PARTS
73	CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
74	#endif
75	CharScannerLiteralsLess(const CharScanner* theScanner)
76	: scanner(theScanner)
77	{
78	}
79	bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
80	// defaults are good enough..
81	// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
82	// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
83	};
84
85	/** Superclass of generated lexers
86	*/
87	class ANTLR_API CharScanner : public TokenStream {
88	protected:
89	typedef RefToken (*factory_type)();
90	public:
91	CharScanner(InputBuffer& cb, bool case_sensitive );
92	CharScanner(InputBuffer* cb, bool case_sensitive );
93	CharScanner(const LexerSharedInputState& state, bool case_sensitive );
94
95	virtual ~CharScanner()
96	{
97	}
98
99	virtual int LA(unsigned int i);
100
101	virtual void append(char c)
102	{
103	if (saveConsumedInput)
104	{
105	size_t l = text.length();
106
107	if ((l%256) == 0)
108	text.reserve(l+256);
109
110	text.replace(l,0,&c,1);
111	}
112	}
113
114	virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
115	{
116	if( saveConsumedInput )
117	text += s;
118	}
119
120	virtual void commit()
121	{
122	inputState->getInput().commit();
123	}
124
125	/** called by the generated lexer to do error recovery, override to
126	* customize the behaviour.
127	*/
128	virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
129	{
130	consume();
131	consumeUntil(tokenSet);
132	}
133
134	virtual void consume()
135	{
136	if (inputState->guessing == 0)
137	{
138	int c = LA(1);
139	if (caseSensitive)
140	{
141	append(c);
142	}
143	else
144	{
145	// use input.LA(), not LA(), to get original case
146	// CharScanner.LA() would toLower it.
147	append(inputState->getInput().LA(1));
148	}
149
150	// RK: in a sense I don't like this automatic handling.
151	if (c == '\t')
152	tab();
153	else
154	inputState->column++;
155	}
156	inputState->getInput().consume();
157	}
158
159	/** Consume chars until one matches the given char */
160	virtual void consumeUntil(int c)
161	{
162	for(;;)
163	{
164	int la_1 = LA(1);
165	if( la_1 == EOF_CHAR \|\| la_1 == c )
166	break;
167	consume();
168	}
169	}
170
171	/** Consume chars until one matches the given set */
172	virtual void consumeUntil(const BitSet& set)
173	{
174	for(;;)
175	{
176	int la_1 = LA(1);
177	if( la_1 == EOF_CHAR \|\| set.member(la_1) )
178	break;
179	consume();
180	}
181	}
182
183	/// Mark the current position and return a id for it
184	virtual unsigned int mark()
185	{
186	return inputState->getInput().mark();
187	}
188	/// Rewind the scanner to a previously marked position
189	virtual void rewind(unsigned int pos)
190	{
191	inputState->getInput().rewind(pos);
192	}
193
194	/// See if input contains character 'c' throw MismatchedCharException if not
195	virtual void match(int c)
196	{
197	int la_1 = LA(1);
198	if ( la_1 != c )
199	throw MismatchedCharException(la_1, c, false, this);
200	consume();
201	}
202
203	/** See if input contains element from bitset b
204	* throw MismatchedCharException if not
205	*/
206	virtual void match(const BitSet& b)
207	{
208	int la_1 = LA(1);
209
210	if ( !b.member(la_1) )
211	throw MismatchedCharException( la_1, b, false, this );
212	consume();
213	}
214
215	/** See if input contains string 's' throw MismatchedCharException if not
216	* @note the string cannot match EOF
217	*/
218	virtual void match( const char* s )
219	{
220	while( *s != '\0' )
221	{
222	// the & 0xFF is here to prevent sign extension lateron
223	int la_1 = LA(1), c = (*s++ & 0xFF);
224
225	if ( la_1 != c )
226	throw MismatchedCharException(la_1, c, false, this);
227
228	consume();
229	}
230	}
231	/** See if input contains string 's' throw MismatchedCharException if not
232	* @note the string cannot match EOF
233	*/
234	virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
235	{
236	size_t len = s.length();
237
238	for (size_t i = 0; i < len; i++)
239	{
240	// the & 0xFF is here to prevent sign extension lateron
241	int la_1 = LA(1), c = (s[i] & 0xFF);
242
243	if ( la_1 != c )
244	throw MismatchedCharException(la_1, c, false, this);
245
246	consume();
247	}
248	}
249	/** See if input does not contain character 'c'
250	* throw MismatchedCharException if not
251	*/
252	virtual void matchNot(int c)
253	{
254	int la_1 = LA(1);
255
256	if ( la_1 == c )
257	throw MismatchedCharException(la_1, c, true, this);
258
259	consume();
260	}
261	/** See if input contains character in range c1-c2
262	* throw MismatchedCharException if not
263	*/
264	virtual void matchRange(int c1, int c2)
265	{
266	int la_1 = LA(1);
267
268	if ( la_1 < c1 \|\| la_1 > c2 )
269	throw MismatchedCharException(la_1, c1, c2, false, this);
270
271	consume();
272	}
273
274	virtual bool getCaseSensitive() const
275	{
276	return caseSensitive;
277	}
278
279	virtual void setCaseSensitive(bool t)
280	{
281	caseSensitive = t;
282	}
283
284	virtual bool getCaseSensitiveLiterals() const=0;
285
286	/// Get the line the scanner currently is in (starts at 1)
287	virtual int getLine() const
288	{
289	return inputState->line;
290	}
291
292	/// set the line number
293	virtual void setLine(int l)
294	{
295	inputState->line = l;
296	}
297
298	/// Get the column the scanner currently is in (starts at 1)
299	virtual int getColumn() const
300	{
301	return inputState->column;
302	}
303	/// set the column number
304	virtual void setColumn(int c)
305	{
306	inputState->column = c;
307	}
308
309	/// get the filename for the file currently used
310	virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
311	{
312	return inputState->filename;
313	}
314	/// Set the filename the scanner is using (used in error messages)
315	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
316	{
317	inputState->filename = f;
318	}
319
320	virtual bool getCommitToPath() const
321	{
322	return commitToPath;
323	}
324
325	virtual void setCommitToPath(bool commit)
326	{
327	commitToPath = commit;
328	}
329
330	/** return a copy of the current text buffer */
331	virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
332	{
333	return text;
334	}
335
336	virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
337	{
338	text = s;
339	}
340
341	virtual void resetText()
342	{
343	text = "";
344	inputState->tokenStartColumn = inputState->column;
345	inputState->tokenStartLine = inputState->line;
346	}
347
348	virtual RefToken getTokenObject() const
349	{
350	return _returnToken;
351	}
352
353	/** Used to keep track of line breaks, needs to be called from
354	* within generated lexers when a \n \r is encountered.
355	*/
356	virtual void newline()
357	{
358	++inputState->line;
359	inputState->column = 1;
360	}
361
362	/** Advance the current column number by an appropriate amount according
363	* to the tabsize. This method needs to be explicitly called from the
364	* lexer rules encountering tabs.
365	*/
366	virtual void tab()
367	{
368	int c = getColumn();
369	int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
370	setColumn( nc );
371	}
372	/// set the tabsize. Returns the old tabsize
373	int setTabsize( int size )
374	{
375	int oldsize = tabsize;
376	tabsize = size;
377	return oldsize;
378	}
379	/// Return the tabsize used by the scanner
380	int getTabSize() const
381	{
382	return tabsize;
383	}
384
385	/** Report exception errors caught in nextToken() */
386	virtual void reportError(const RecognitionException& e);
387
388	/** Parser error-reporting function can be overridden in subclass */
389	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
390
391	/** Parser warning-reporting function can be overridden in subclass */
392	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
393
394	virtual InputBuffer& getInputBuffer()
395	{
396	return inputState->getInput();
397	}
398
399	virtual LexerSharedInputState getInputState()
400	{
401	return inputState;
402	}
403
404	/** set the input state for the lexer.
405	* @note state is a reference counted object, hence no reference */
406	virtual void setInputState(LexerSharedInputState state)
407	{
408	inputState = state;
409	}
410
411	/// Set the factory for created tokens
412	virtual void setTokenObjectFactory(factory_type factory)
413	{
414	tokenFactory = factory;
415	}
416
417	/** Test the token text against the literals table
418	* Override this method to perform a different literals test
419	*/
420	virtual int testLiteralsTable(int ttype) const
421	{
422	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
423	if (i != literals.end())
424	ttype = (*i).second;
425	return ttype;
426	}
427
428	/** Test the text passed in against the literals table
429	* Override this method to perform a different literals test
430	* This is used primarily when you want to test a portion of
431	* a token
432	*/
433	virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
434	{
435	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
436	if (i != literals.end())
437	ttype = (*i).second;
438	return ttype;
439	}
440
441	/// Override this method to get more specific case handling
442	virtual int toLower(int c) const
443	{
444	// test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
445	// also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
446	// this one is more structural. Maybe make this configurable.
447	return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
448	}
449
450	/** This method is called by YourLexer::nextToken() when the lexer has
451	* hit EOF condition. EOF is NOT a character.
452	* This method is not called if EOF is reached during
453	* syntactic predicate evaluation or during evaluation
454	* of normal lexical rules, which presumably would be
455	* an IOException. This traps the "normal" EOF condition.
456	*
457	* uponEOF() is called after the complete evaluation of
458	* the previous token and only if your parser asks
459	* for another token beyond that last non-EOF token.
460	*
461	* You might want to throw token or char stream exceptions
462	* like: "Heh, premature eof" or a retry stream exception
463	* ("I found the end of this file, go back to referencing file").
464	*/
465	virtual void uponEOF()
466	{
467	}
468
469	/// Methods used to change tracing behavior
470	virtual void traceIndent();
471	virtual void traceIn(const char* rname);
472	virtual void traceOut(const char* rname);
473
474	#ifndef NO_STATIC_CONSTS
475	static const int EOF_CHAR = EOF;
476	#else
477	enum {
478	EOF_CHAR = EOF
479	};
480	#endif
481	protected:
482	ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
483	/// flag indicating wether consume saves characters
484	bool saveConsumedInput;
485	factory_type tokenFactory; ///< Factory for tokens
486	bool caseSensitive; ///< Is this lexer case sensitive
487	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
488
489	RefToken _returnToken; ///< used to return tokens w/o using return val
490
491	/// Input state, gives access to input stream, shared among different lexers
492	LexerSharedInputState inputState;
493
494	/** Used during filter mode to indicate that path is desired.
495	* A subsequent scan error will report an error as usual
496	* if acceptPath=true;
497	*/
498	bool commitToPath;
499
500	int tabsize; ///< tab size the scanner uses.
501
502	/// Create a new RefToken of type t
503	virtual RefToken makeToken(int t)
504	{
505	RefToken tok = tokenFactory();
506	tok->setType(t);
507	tok->setColumn(inputState->tokenStartColumn);
508	tok->setLine(inputState->tokenStartLine);
509	return tok;
510	}
511
512	/** Tracer class, used when -traceLexer is passed to antlr
513	*/
514	class Tracer {
515	private:
516	CharScanner* parser;
517	const char* text;
518
519	Tracer(const Tracer& other); // undefined
520	Tracer& operator=(const Tracer& other); // undefined
521	public:
522	Tracer( CharScanner* p,const char* t )
523	: parser(p), text(t)
524	{
525	parser->traceIn(text);
526	}
527	~Tracer()
528	{
529	parser->traceOut(text);
530	}
531	};
532
533	int traceDepth;
534	private:
535	CharScanner( const CharScanner& other ); // undefined
536	CharScanner& operator=( const CharScanner& other ); // undefined
537
538	#ifndef NO_STATIC_CONSTS
539	static const int NO_CHAR = 0;
540	#else
541	enum {
542	NO_CHAR = 0
543	};
544	#endif
545	};
546
547	inline int CharScanner::LA(unsigned int i)
548	{
549	int c = inputState->getInput().LA(i);
550
551	if ( caseSensitive )
552	return c;
553	else
554	return toLower(c); // VC 6 tolower bug caught in toLower.
555	}
556
557	inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
558	{
559	if (scanner->getCaseSensitiveLiterals())
560	return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
561	else
562	{
563	#ifdef NO_STRCASECMP
564	return (stricmp(x.c_str(),y.c_str())<0);
565	#else
566	return (strcasecmp(x.c_str(),y.c_str())<0);
567	#endif
568	}
569	}
570
571	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
572	}
573	#endif
574
575	#endif //INC_CharScanner_hpp__