src/antlr/CharScanner.hpp

#ifndef INC_CharScanner_hpp__
#define INC_CharScanner_hpp__

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.jGuru.com
 * Software rights: http://www.antlr.org/license.html
 *
 * $Id$
 */

#include <antlr/config.hpp>

#include <map>
#include <cstdio>

#ifdef HAS_NOT_CCTYPE_H
#include <ctype.h>
#else
#include <cctype>
#endif

#if ( _MSC_VER == 1200 )
// VC6 seems to need this
// note that this is not a standard C++ include file.
# include <stdio.h>
#endif

#include <antlr/TokenStream.hpp>
#include <antlr/RecognitionException.hpp>
#include <antlr/SemanticException.hpp>
#include <antlr/MismatchedCharException.hpp>
#include <antlr/InputBuffer.hpp>
#include <antlr/BitSet.hpp>
#include <antlr/LexerSharedInputState.hpp>

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
namespace antlr {
#endif

class ANTLR_API CharScanner;

ANTLR_C_USING(tolower)

#if !defined(HAVE_STRCASECMP) && defined(HAVE_STRICMP) && !defined(stricmp)
#define strcasecmp stricmp
#endif
#if !defined(HAVE_STRNCASECMP) && defined(HAVE_STRNICMP) && !defined(strnicmp)
#define strncasecmp strnicmp
#endif


#if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)
inline int strcasecmp(const char *s1, const char *s2)
{
        while (true)
        {
                char  c1 = tolower(*s1++),
                                c2 = tolower(*s2++);
                if (c1 < c2) return -1;
                if (c1 > c2) return 1;
                if (c1 == 0) return 0;
        }
}
#endif

/** Functor for the literals map
 */
class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
private:
        const CharScanner* scanner;
public:
#ifdef NO_TEMPLATE_PARTS
        CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
#endif
        CharScannerLiteralsLess(const CharScanner* theScanner)
        : scanner(theScanner)
        {
        }
        bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
// defaults are good enough..
        //      CharScannerLiteralsLess(const CharScannerLiteralsLess&);
        //      CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
};

/** Superclass of generated lexers
 */
class ANTLR_API CharScanner : public TokenStream {
protected:
        typedef RefToken (*factory_type)();
public:
        CharScanner(InputBuffer& cb, bool case_sensitive );
        CharScanner(InputBuffer* cb, bool case_sensitive );
        CharScanner(const LexerSharedInputState& state, bool case_sensitive );

        virtual ~CharScanner()
        {
        }

        virtual int LA(unsigned int i);

        virtual void append(char c)
        {
                if (saveConsumedInput)
                {
                        size_t l = text.length();

                        if ((l%256) == 0)
                                text.reserve(l+256);

                        text.replace(l,0,&c,1);
                }
        }

        virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                if( saveConsumedInput )
                        text += s;
        }

        virtual void commit()
        {
                inputState->getInput().commit();
        }

        virtual void consume()
        {
                if (inputState->guessing == 0)
                {
                        int c = LA(1);
                        if (caseSensitive)
                        {
                                append(c);
                        }
                        else
                        {
                                // use input.LA(), not LA(), to get original case
                                // CharScanner.LA() would toLower it.
                                append(inputState->getInput().LA(1));
                        }

                        // RK: in a sense I don't like this automatic handling.
                        if (c == '\t')
                                tab();
                        else
                                inputState->column++;
                }
                inputState->getInput().consume();
        }

        /** Consume chars until one matches the given char */
        virtual void consumeUntil(int c)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || la_1 == c )
                                break;
                        consume();
                }
        }

        /** Consume chars until one matches the given set */
        virtual void consumeUntil(const BitSet& set)
        {
                for(;;)
                {
                        int la_1 = LA(1);
                        if( la_1 == EOF_CHAR || set.member(la_1) )
                                break;
                        consume();
                }
        }

        /// Mark the current position and return a id for it
        virtual unsigned int mark()
        {
                return inputState->getInput().mark();
        }
        /// Rewind the scanner to a previously marked position
        virtual void rewind(unsigned int pos)
        {
                inputState->getInput().rewind(pos);
        }

        /// See if input contains character 'c' throw MismatchedCharException if not
        virtual void match(int c)
        {
                int la_1 = LA(1);
                if ( la_1 != c )
                        throw MismatchedCharException(la_1, c, false, this);
                consume();
        }

        /** See if input contains element from bitset b
         * throw MismatchedCharException if not
         */
        virtual void match(const BitSet& b)
        {
                int la_1 = LA(1);

                if ( !b.member(la_1) )
                        throw MismatchedCharException( la_1, b, false, this );
                consume();
        }

        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match( const char* s )
        {
                while( *s != '\0' )
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (*s++ & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input contains string 's' throw MismatchedCharException if not
         * @note the string cannot match EOF
         */
        virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                size_t len = s.length();

                for (size_t i = 0; i < len; i++)
                {
                        // the & 0xFF is here to prevent sign extension lateron
                        int la_1 = LA(1), c = (s[i] & 0xFF);

                        if ( la_1 != c )
                                throw MismatchedCharException(la_1, c, false, this);

                        consume();
                }
        }
        /** See if input does not contain character 'c'
         * throw MismatchedCharException if not
         */
        virtual void matchNot(int c)
        {
                int la_1 = LA(1);

                if ( la_1 == c )
                        throw MismatchedCharException(la_1, c, true, this);

                consume();
        }
        /** See if input contains character in range c1-c2
         * throw MismatchedCharException if not
         */
        virtual void matchRange(int c1, int c2)
        {
                int la_1 = LA(1);

                if ( la_1 < c1 || la_1 > c2 )
                        throw MismatchedCharException(la_1, c1, c2, false, this);

                consume();
        }

        virtual bool getCaseSensitive() const
        {
                return caseSensitive;
        }

        virtual void setCaseSensitive(bool t)
        {
                caseSensitive = t;
        }

        virtual bool getCaseSensitiveLiterals() const=0;

        /// Get the line the scanner currently is in (starts at 1)
        virtual int getLine() const
        {
                return inputState->line;
        }

        /// set the line number
        virtual void setLine(int l)
        {
                inputState->line = l;
        }

        /// Get the column the scanner currently is in (starts at 1)
        virtual int getColumn() const
        {
                return inputState->column;
        }
        /// set the column number
        virtual void setColumn(int c)
        {
                inputState->column = c;
        }

        /// get the filename for the file currently used
        virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
        {
                return inputState->filename;
        }
        /// Set the filename the scanner is using (used in error messages)
        virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
        {
                inputState->filename = f;
        }

        virtual bool getCommitToPath() const
        {
                return commitToPath;
        }

        virtual void setCommitToPath(bool commit)
        {
                commitToPath = commit;
        }

        /** return a copy of the current text buffer */
        virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
        {
                return text;
        }

        virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
        {
                text = s;
        }

        virtual void resetText()
        {
                text = "";
                inputState->tokenStartColumn = inputState->column;
                inputState->tokenStartLine = inputState->line;
        }

        virtual RefToken getTokenObject() const
        {
                return _returnToken;
        }

        /** Used to keep track of line breaks, needs to be called from
         * within generated lexers when a \n \r is encountered.
         */
        virtual void newline()
        {
                ++inputState->line;
                inputState->column = 1;
        }

        /** Advance the current column number by an appropriate amount according
         * to the tabsize. This method needs to be explicitly called from the
         * lexer rules encountering tabs.
         */
        virtual void tab()
        {
                int c = getColumn();
                int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1;      // calculate tab stop
                setColumn( nc );
        }
        /// set the tabsize. Returns the old tabsize
        int setTabsize( int size )
        {
                int oldsize = tabsize;
                tabsize = size;
                return oldsize;
        }
        /// Return the tabsize used by the scanner
        int getTabSize() const
        {
                return tabsize;
        }

        /** Report exception errors caught in nextToken() */
        virtual void reportError(const RecognitionException& e);

        /** Parser error-reporting function can be overridden in subclass */
        virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);

        /** Parser warning-reporting function can be overridden in subclass */
        virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);

        virtual InputBuffer& getInputBuffer()
        {
                return inputState->getInput();
        }

        virtual LexerSharedInputState getInputState()
        {
                return inputState;
        }

        /** set the input state for the lexer.
         * @note state is a reference counted object, hence no reference */
        virtual void setInputState(LexerSharedInputState state)
        {
                inputState = state;
        }

        /// Set the factory for created tokens
        virtual void setTokenObjectFactory(factory_type factory)
        {
                tokenFactory = factory;
        }

        /** Test the token text against the literals table
         * Override this method to perform a different literals test
         */
        virtual int testLiteralsTable(int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /** Test the text passed in against the literals table
         * Override this method to perform a different literals test
         * This is used primarily when you want to test a portion of
         * a token
         */
        virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
        {
                ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
                if (i != literals.end())
                        ttype = (*i).second;
                return ttype;
        }

        /// Override this method to get more specific case handling
        virtual int toLower(int c) const
        {
                // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
                // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
                // this one is more structural. Maybe make this configurable.
                return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
        }

        /** This method is called by YourLexer::nextToken() when the lexer has
         *  hit EOF condition.  EOF is NOT a character.
         *  This method is not called if EOF is reached during
         *  syntactic predicate evaluation or during evaluation
         *  of normal lexical rules, which presumably would be
         *  an IOException.  This traps the "normal" EOF condition.
         *
         *  uponEOF() is called after the complete evaluation of
         *  the previous token and only if your parser asks
         *  for another token beyond that last non-EOF token.
         *
         *  You might want to throw token or char stream exceptions
         *  like: "Heh, premature eof" or a retry stream exception
         *  ("I found the end of this file, go back to referencing file").
         */
        virtual void uponEOF()
        {
        }

        /// Methods used to change tracing behavior
        virtual void traceIndent();
        virtual void traceIn(const char* rname);
        virtual void traceOut(const char* rname);

#ifndef NO_STATIC_CONSTS
        static const int EOF_CHAR = EOF;
#else
        enum {
                EOF_CHAR = EOF
        };
#endif
protected:
        ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
        /// flag indicating wether consume saves characters
        bool saveConsumedInput;
        factory_type tokenFactory;                              ///< Factory for tokens
        bool caseSensitive;                                             ///< Is this lexer case sensitive
        ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass

        RefToken _returnToken;          ///< used to return tokens w/o using return val

        /// Input state, gives access to input stream, shared among different lexers
        LexerSharedInputState inputState;

        /** Used during filter mode to indicate that path is desired.
         * A subsequent scan error will report an error as usual
         * if acceptPath=true;
         */
        bool commitToPath;

        int tabsize;    ///< tab size the scanner uses.

        /// Create a new RefToken of type t
        virtual RefToken makeToken(int t)
        {
                RefToken tok = tokenFactory();
                tok->setType(t);
                tok->setColumn(inputState->tokenStartColumn);
                tok->setLine(inputState->tokenStartLine);
                return tok;
        }

        /** Tracer class, used when -traceLexer is passed to antlr
         */
        class Tracer {
        private:
                CharScanner* parser;
                const char* text;

                Tracer(const Tracer& other);                                    // undefined
                Tracer& operator=(const Tracer& other);         // undefined
        public:
                Tracer( CharScanner* p,const char* t )
                : parser(p), text(t)
                {
                        parser->traceIn(text);
                }
                ~Tracer()
                {
                        parser->traceOut(text);
                }
        };

        int traceDepth;
private:
        CharScanner( const CharScanner& other );                                        // undefined
        CharScanner& operator=( const CharScanner& other );     // undefined

#ifndef NO_STATIC_CONSTS
        static const int NO_CHAR = 0;
#else
        enum {
                NO_CHAR = 0
        };
#endif
};

inline int CharScanner::LA(unsigned int i)
{
        int c = inputState->getInput().LA(i);

        if ( caseSensitive )
                return c;
        else
                return toLower(c);      // VC 6 tolower bug caught in toLower.
}

inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
{
        if (scanner->getCaseSensitiveLiterals())
                return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
        else
        {
#ifdef NO_STRCASECMP
                return (stricmp(x.c_str(),y.c_str())<0);
#else
                return (strcasecmp(x.c_str(),y.c_str())<0);
#endif
        }
}

#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
}
#endif

#endif //INC_CharScanner_hpp__
Revision:	1465
Committed:	Fri Jul 9 23:08:25 2010 UTC (15 years ago) by chuckv
File size:	13508 byte(s)
Log Message:	Creating busticated version of OpenMD
#	Content
1	#ifndef INC_CharScanner_hpp__
2	#define INC_CharScanner_hpp__
3
4	/* ANTLR Translator Generator
5	* Project led by Terence Parr at http://www.jGuru.com
6	* Software rights: http://www.antlr.org/license.html
7	*
8	* $Id$
9	*/
10
11	#include <antlr/config.hpp>
12
13	#include <map>
14	#include <cstdio>
15
16	#ifdef HAS_NOT_CCTYPE_H
17	#include <ctype.h>
18	#else
19	#include <cctype>
20	#endif
21
22	#if ( _MSC_VER == 1200 )
23	// VC6 seems to need this
24	// note that this is not a standard C++ include file.
25	# include <stdio.h>
26	#endif
27
28	#include <antlr/TokenStream.hpp>
29	#include <antlr/RecognitionException.hpp>
30	#include <antlr/SemanticException.hpp>
31	#include <antlr/MismatchedCharException.hpp>
32	#include <antlr/InputBuffer.hpp>
33	#include <antlr/BitSet.hpp>
34	#include <antlr/LexerSharedInputState.hpp>
35
36	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
37	namespace antlr {
38	#endif
39
40	class ANTLR_API CharScanner;
41
42	ANTLR_C_USING(tolower)
43
44	#if !defined(HAVE_STRCASECMP) && defined(HAVE_STRICMP) && !defined(stricmp)
45	#define strcasecmp stricmp
46	#endif
47	#if !defined(HAVE_STRNCASECMP) && defined(HAVE_STRNICMP) && !defined(strnicmp)
48	#define strncasecmp strnicmp
49	#endif
50
51
52	#if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)
53	inline int strcasecmp(const char s1, const char s2)
54	{
55	while (true)
56	{
57	char c1 = tolower(*s1++),
58	c2 = tolower(*s2++);
59	if (c1 < c2) return -1;
60	if (c1 > c2) return 1;
61	if (c1 == 0) return 0;
62	}
63	}
64	#endif
65
66	/** Functor for the literals map
67	*/
68	class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
69	private:
70	const CharScanner* scanner;
71	public:
72	#ifdef NO_TEMPLATE_PARTS
73	CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
74	#endif
75	CharScannerLiteralsLess(const CharScanner* theScanner)
76	: scanner(theScanner)
77	{
78	}
79	bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
80	// defaults are good enough..
81	// CharScannerLiteralsLess(const CharScannerLiteralsLess&);
82	// CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
83	};
84
85	/** Superclass of generated lexers
86	*/
87	class ANTLR_API CharScanner : public TokenStream {
88	protected:
89	typedef RefToken (*factory_type)();
90	public:
91	CharScanner(InputBuffer& cb, bool case_sensitive );
92	CharScanner(InputBuffer* cb, bool case_sensitive );
93	CharScanner(const LexerSharedInputState& state, bool case_sensitive );
94
95	virtual ~CharScanner()
96	{
97	}
98
99	virtual int LA(unsigned int i);
100
101	virtual void append(char c)
102	{
103	if (saveConsumedInput)
104	{
105	size_t l = text.length();
106
107	if ((l%256) == 0)
108	text.reserve(l+256);
109
110	text.replace(l,0,&c,1);
111	}
112	}
113
114	virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
115	{
116	if( saveConsumedInput )
117	text += s;
118	}
119
120	virtual void commit()
121	{
122	inputState->getInput().commit();
123	}
124
125	virtual void consume()
126	{
127	if (inputState->guessing == 0)
128	{
129	int c = LA(1);
130	if (caseSensitive)
131	{
132	append(c);
133	}
134	else
135	{
136	// use input.LA(), not LA(), to get original case
137	// CharScanner.LA() would toLower it.
138	append(inputState->getInput().LA(1));
139	}
140
141	// RK: in a sense I don't like this automatic handling.
142	if (c == '\t')
143	tab();
144	else
145	inputState->column++;
146	}
147	inputState->getInput().consume();
148	}
149
150	/** Consume chars until one matches the given char */
151	virtual void consumeUntil(int c)
152	{
153	for(;;)
154	{
155	int la_1 = LA(1);
156	if( la_1 == EOF_CHAR \|\| la_1 == c )
157	break;
158	consume();
159	}
160	}
161
162	/** Consume chars until one matches the given set */
163	virtual void consumeUntil(const BitSet& set)
164	{
165	for(;;)
166	{
167	int la_1 = LA(1);
168	if( la_1 == EOF_CHAR \|\| set.member(la_1) )
169	break;
170	consume();
171	}
172	}
173
174	/// Mark the current position and return a id for it
175	virtual unsigned int mark()
176	{
177	return inputState->getInput().mark();
178	}
179	/// Rewind the scanner to a previously marked position
180	virtual void rewind(unsigned int pos)
181	{
182	inputState->getInput().rewind(pos);
183	}
184
185	/// See if input contains character 'c' throw MismatchedCharException if not
186	virtual void match(int c)
187	{
188	int la_1 = LA(1);
189	if ( la_1 != c )
190	throw MismatchedCharException(la_1, c, false, this);
191	consume();
192	}
193
194	/** See if input contains element from bitset b
195	* throw MismatchedCharException if not
196	*/
197	virtual void match(const BitSet& b)
198	{
199	int la_1 = LA(1);
200
201	if ( !b.member(la_1) )
202	throw MismatchedCharException( la_1, b, false, this );
203	consume();
204	}
205
206	/** See if input contains string 's' throw MismatchedCharException if not
207	* @note the string cannot match EOF
208	*/
209	virtual void match( const char* s )
210	{
211	while( *s != '\0' )
212	{
213	// the & 0xFF is here to prevent sign extension lateron
214	int la_1 = LA(1), c = (*s++ & 0xFF);
215
216	if ( la_1 != c )
217	throw MismatchedCharException(la_1, c, false, this);
218
219	consume();
220	}
221	}
222	/** See if input contains string 's' throw MismatchedCharException if not
223	* @note the string cannot match EOF
224	*/
225	virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
226	{
227	size_t len = s.length();
228
229	for (size_t i = 0; i < len; i++)
230	{
231	// the & 0xFF is here to prevent sign extension lateron
232	int la_1 = LA(1), c = (s[i] & 0xFF);
233
234	if ( la_1 != c )
235	throw MismatchedCharException(la_1, c, false, this);
236
237	consume();
238	}
239	}
240	/** See if input does not contain character 'c'
241	* throw MismatchedCharException if not
242	*/
243	virtual void matchNot(int c)
244	{
245	int la_1 = LA(1);
246
247	if ( la_1 == c )
248	throw MismatchedCharException(la_1, c, true, this);
249
250	consume();
251	}
252	/** See if input contains character in range c1-c2
253	* throw MismatchedCharException if not
254	*/
255	virtual void matchRange(int c1, int c2)
256	{
257	int la_1 = LA(1);
258
259	if ( la_1 < c1 \|\| la_1 > c2 )
260	throw MismatchedCharException(la_1, c1, c2, false, this);
261
262	consume();
263	}
264
265	virtual bool getCaseSensitive() const
266	{
267	return caseSensitive;
268	}
269
270	virtual void setCaseSensitive(bool t)
271	{
272	caseSensitive = t;
273	}
274
275	virtual bool getCaseSensitiveLiterals() const=0;
276
277	/// Get the line the scanner currently is in (starts at 1)
278	virtual int getLine() const
279	{
280	return inputState->line;
281	}
282
283	/// set the line number
284	virtual void setLine(int l)
285	{
286	inputState->line = l;
287	}
288
289	/// Get the column the scanner currently is in (starts at 1)
290	virtual int getColumn() const
291	{
292	return inputState->column;
293	}
294	/// set the column number
295	virtual void setColumn(int c)
296	{
297	inputState->column = c;
298	}
299
300	/// get the filename for the file currently used
301	virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
302	{
303	return inputState->filename;
304	}
305	/// Set the filename the scanner is using (used in error messages)
306	virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
307	{
308	inputState->filename = f;
309	}
310
311	virtual bool getCommitToPath() const
312	{
313	return commitToPath;
314	}
315
316	virtual void setCommitToPath(bool commit)
317	{
318	commitToPath = commit;
319	}
320
321	/** return a copy of the current text buffer */
322	virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
323	{
324	return text;
325	}
326
327	virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
328	{
329	text = s;
330	}
331
332	virtual void resetText()
333	{
334	text = "";
335	inputState->tokenStartColumn = inputState->column;
336	inputState->tokenStartLine = inputState->line;
337	}
338
339	virtual RefToken getTokenObject() const
340	{
341	return _returnToken;
342	}
343
344	/** Used to keep track of line breaks, needs to be called from
345	* within generated lexers when a \n \r is encountered.
346	*/
347	virtual void newline()
348	{
349	++inputState->line;
350	inputState->column = 1;
351	}
352
353	/** Advance the current column number by an appropriate amount according
354	* to the tabsize. This method needs to be explicitly called from the
355	* lexer rules encountering tabs.
356	*/
357	virtual void tab()
358	{
359	int c = getColumn();
360	int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
361	setColumn( nc );
362	}
363	/// set the tabsize. Returns the old tabsize
364	int setTabsize( int size )
365	{
366	int oldsize = tabsize;
367	tabsize = size;
368	return oldsize;
369	}
370	/// Return the tabsize used by the scanner
371	int getTabSize() const
372	{
373	return tabsize;
374	}
375
376	/** Report exception errors caught in nextToken() */
377	virtual void reportError(const RecognitionException& e);
378
379	/** Parser error-reporting function can be overridden in subclass */
380	virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
381
382	/** Parser warning-reporting function can be overridden in subclass */
383	virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
384
385	virtual InputBuffer& getInputBuffer()
386	{
387	return inputState->getInput();
388	}
389
390	virtual LexerSharedInputState getInputState()
391	{
392	return inputState;
393	}
394
395	/** set the input state for the lexer.
396	* @note state is a reference counted object, hence no reference */
397	virtual void setInputState(LexerSharedInputState state)
398	{
399	inputState = state;
400	}
401
402	/// Set the factory for created tokens
403	virtual void setTokenObjectFactory(factory_type factory)
404	{
405	tokenFactory = factory;
406	}
407
408	/** Test the token text against the literals table
409	* Override this method to perform a different literals test
410	*/
411	virtual int testLiteralsTable(int ttype) const
412	{
413	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
414	if (i != literals.end())
415	ttype = (*i).second;
416	return ttype;
417	}
418
419	/** Test the text passed in against the literals table
420	* Override this method to perform a different literals test
421	* This is used primarily when you want to test a portion of
422	* a token
423	*/
424	virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
425	{
426	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
427	if (i != literals.end())
428	ttype = (*i).second;
429	return ttype;
430	}
431
432	/// Override this method to get more specific case handling
433	virtual int toLower(int c) const
434	{
435	// test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
436	// also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
437	// this one is more structural. Maybe make this configurable.
438	return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
439	}
440
441	/** This method is called by YourLexer::nextToken() when the lexer has
442	* hit EOF condition. EOF is NOT a character.
443	* This method is not called if EOF is reached during
444	* syntactic predicate evaluation or during evaluation
445	* of normal lexical rules, which presumably would be
446	* an IOException. This traps the "normal" EOF condition.
447	*
448	* uponEOF() is called after the complete evaluation of
449	* the previous token and only if your parser asks
450	* for another token beyond that last non-EOF token.
451	*
452	* You might want to throw token or char stream exceptions
453	* like: "Heh, premature eof" or a retry stream exception
454	* ("I found the end of this file, go back to referencing file").
455	*/
456	virtual void uponEOF()
457	{
458	}
459
460	/// Methods used to change tracing behavior
461	virtual void traceIndent();
462	virtual void traceIn(const char* rname);
463	virtual void traceOut(const char* rname);
464
465	#ifndef NO_STATIC_CONSTS
466	static const int EOF_CHAR = EOF;
467	#else
468	enum {
469	EOF_CHAR = EOF
470	};
471	#endif
472	protected:
473	ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
474	/// flag indicating wether consume saves characters
475	bool saveConsumedInput;
476	factory_type tokenFactory; ///< Factory for tokens
477	bool caseSensitive; ///< Is this lexer case sensitive
478	ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
479
480	RefToken _returnToken; ///< used to return tokens w/o using return val
481
482	/// Input state, gives access to input stream, shared among different lexers
483	LexerSharedInputState inputState;
484
485	/** Used during filter mode to indicate that path is desired.
486	* A subsequent scan error will report an error as usual
487	* if acceptPath=true;
488	*/
489	bool commitToPath;
490
491	int tabsize; ///< tab size the scanner uses.
492
493	/// Create a new RefToken of type t
494	virtual RefToken makeToken(int t)
495	{
496	RefToken tok = tokenFactory();
497	tok->setType(t);
498	tok->setColumn(inputState->tokenStartColumn);
499	tok->setLine(inputState->tokenStartLine);
500	return tok;
501	}
502
503	/** Tracer class, used when -traceLexer is passed to antlr
504	*/
505	class Tracer {
506	private:
507	CharScanner* parser;
508	const char* text;
509
510	Tracer(const Tracer& other); // undefined
511	Tracer& operator=(const Tracer& other); // undefined
512	public:
513	Tracer( CharScanner* p,const char* t )
514	: parser(p), text(t)
515	{
516	parser->traceIn(text);
517	}
518	~Tracer()
519	{
520	parser->traceOut(text);
521	}
522	};
523
524	int traceDepth;
525	private:
526	CharScanner( const CharScanner& other ); // undefined
527	CharScanner& operator=( const CharScanner& other ); // undefined
528
529	#ifndef NO_STATIC_CONSTS
530	static const int NO_CHAR = 0;
531	#else
532	enum {
533	NO_CHAR = 0
534	};
535	#endif
536	};
537
538	inline int CharScanner::LA(unsigned int i)
539	{
540	int c = inputState->getInput().LA(i);
541
542	if ( caseSensitive )
543	return c;
544	else
545	return toLower(c); // VC 6 tolower bug caught in toLower.
546	}
547
548	inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
549	{
550	if (scanner->getCaseSensitiveLiterals())
551	return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
552	else
553	{
554	#ifdef NO_STRCASECMP
555	return (stricmp(x.c_str(),y.c_str())<0);
556	#else
557	return (strcasecmp(x.c_str(),y.c_str())<0);
558	#endif
559	}
560	}
561
562	#ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
563	}
564	#endif
565
566	#endif //INC_CharScanner_hpp__