ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/branches/development/src/antlr/CharScanner.hpp
Revision: 1633
Committed: Thu Sep 15 13:39:36 2011 UTC (13 years, 7 months ago) by gezelter
File size: 13661 byte(s)
Log Message:
Updated antlr, fixed a clang compilation bug, removed a warning message

File Contents

# User Rev Content
1 tim 770 #ifndef INC_CharScanner_hpp__
2     #define INC_CharScanner_hpp__
3    
4     /* ANTLR Translator Generator
5     * Project led by Terence Parr at http://www.jGuru.com
6     * Software rights: http://www.antlr.org/license.html
7     *
8 gezelter 1442 * $Id$
9 tim 770 */
10    
11     #include <antlr/config.hpp>
12    
13     #include <map>
14    
15     #ifdef HAS_NOT_CCTYPE_H
16     #include <ctype.h>
17     #else
18     #include <cctype>
19     #endif
20    
21     #if ( _MSC_VER == 1200 )
22     // VC6 seems to need this
23     // note that this is not a standard C++ include file.
24     # include <stdio.h>
25     #endif
26    
27     #include <antlr/TokenStream.hpp>
28     #include <antlr/RecognitionException.hpp>
29     #include <antlr/SemanticException.hpp>
30     #include <antlr/MismatchedCharException.hpp>
31     #include <antlr/InputBuffer.hpp>
32     #include <antlr/BitSet.hpp>
33     #include <antlr/LexerSharedInputState.hpp>
34    
35     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
36     namespace antlr {
37     #endif
38    
39     class ANTLR_API CharScanner;
40    
41     ANTLR_C_USING(tolower)
42    
43 gezelter 1633 #ifdef ANTLR_REALLY_NO_STRCASECMP
44     // Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
45     // on the mac has neither...
46 tim 770 inline int strcasecmp(const char *s1, const char *s2)
47     {
48     while (true)
49     {
50     char c1 = tolower(*s1++),
51     c2 = tolower(*s2++);
52     if (c1 < c2) return -1;
53     if (c1 > c2) return 1;
54     if (c1 == 0) return 0;
55     }
56     }
57 gezelter 1633 #else
58     #ifdef NO_STRCASECMP
59     ANTLR_C_USING(stricmp)
60     #else
61     ANTLR_C_USING(strcasecmp)
62 tim 770 #endif
63 gezelter 1633 #endif
64 tim 770
65     /** Functor for the literals map
66     */
67     class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
68     private:
69     const CharScanner* scanner;
70     public:
71     #ifdef NO_TEMPLATE_PARTS
72     CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
73     #endif
74     CharScannerLiteralsLess(const CharScanner* theScanner)
75     : scanner(theScanner)
76     {
77     }
78     bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
79     // defaults are good enough..
80     // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
81     // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
82     };
83    
84     /** Superclass of generated lexers
85     */
86     class ANTLR_API CharScanner : public TokenStream {
87     protected:
88     typedef RefToken (*factory_type)();
89     public:
90     CharScanner(InputBuffer& cb, bool case_sensitive );
91     CharScanner(InputBuffer* cb, bool case_sensitive );
92     CharScanner(const LexerSharedInputState& state, bool case_sensitive );
93    
94     virtual ~CharScanner()
95     {
96     }
97    
98     virtual int LA(unsigned int i);
99    
100     virtual void append(char c)
101     {
102     if (saveConsumedInput)
103     {
104     size_t l = text.length();
105    
106     if ((l%256) == 0)
107     text.reserve(l+256);
108    
109     text.replace(l,0,&c,1);
110     }
111     }
112    
113     virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
114     {
115     if( saveConsumedInput )
116     text += s;
117     }
118    
119     virtual void commit()
120     {
121     inputState->getInput().commit();
122     }
123    
124 gezelter 1633 /** called by the generated lexer to do error recovery, override to
125     * customize the behaviour.
126     */
127     virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
128     {
129     consume();
130     consumeUntil(tokenSet);
131     }
132    
133 tim 770 virtual void consume()
134     {
135     if (inputState->guessing == 0)
136     {
137     int c = LA(1);
138     if (caseSensitive)
139     {
140     append(c);
141     }
142     else
143     {
144     // use input.LA(), not LA(), to get original case
145     // CharScanner.LA() would toLower it.
146     append(inputState->getInput().LA(1));
147     }
148    
149     // RK: in a sense I don't like this automatic handling.
150     if (c == '\t')
151     tab();
152     else
153     inputState->column++;
154     }
155     inputState->getInput().consume();
156     }
157    
158     /** Consume chars until one matches the given char */
159     virtual void consumeUntil(int c)
160     {
161     for(;;)
162     {
163     int la_1 = LA(1);
164     if( la_1 == EOF_CHAR || la_1 == c )
165     break;
166     consume();
167     }
168     }
169    
170     /** Consume chars until one matches the given set */
171     virtual void consumeUntil(const BitSet& set)
172     {
173     for(;;)
174     {
175     int la_1 = LA(1);
176     if( la_1 == EOF_CHAR || set.member(la_1) )
177     break;
178     consume();
179     }
180     }
181    
182     /// Mark the current position and return a id for it
183     virtual unsigned int mark()
184     {
185     return inputState->getInput().mark();
186     }
187     /// Rewind the scanner to a previously marked position
188     virtual void rewind(unsigned int pos)
189     {
190     inputState->getInput().rewind(pos);
191     }
192    
193     /// See if input contains character 'c' throw MismatchedCharException if not
194     virtual void match(int c)
195     {
196     int la_1 = LA(1);
197     if ( la_1 != c )
198     throw MismatchedCharException(la_1, c, false, this);
199     consume();
200     }
201    
202     /** See if input contains element from bitset b
203     * throw MismatchedCharException if not
204     */
205     virtual void match(const BitSet& b)
206     {
207     int la_1 = LA(1);
208    
209     if ( !b.member(la_1) )
210     throw MismatchedCharException( la_1, b, false, this );
211     consume();
212     }
213    
214     /** See if input contains string 's' throw MismatchedCharException if not
215     * @note the string cannot match EOF
216     */
217     virtual void match( const char* s )
218     {
219     while( *s != '\0' )
220     {
221     // the & 0xFF is here to prevent sign extension lateron
222     int la_1 = LA(1), c = (*s++ & 0xFF);
223    
224     if ( la_1 != c )
225     throw MismatchedCharException(la_1, c, false, this);
226    
227     consume();
228     }
229     }
230     /** See if input contains string 's' throw MismatchedCharException if not
231     * @note the string cannot match EOF
232     */
233     virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
234     {
235     size_t len = s.length();
236    
237     for (size_t i = 0; i < len; i++)
238     {
239     // the & 0xFF is here to prevent sign extension lateron
240     int la_1 = LA(1), c = (s[i] & 0xFF);
241    
242     if ( la_1 != c )
243     throw MismatchedCharException(la_1, c, false, this);
244    
245     consume();
246     }
247     }
248     /** See if input does not contain character 'c'
249     * throw MismatchedCharException if not
250     */
251     virtual void matchNot(int c)
252     {
253     int la_1 = LA(1);
254    
255     if ( la_1 == c )
256     throw MismatchedCharException(la_1, c, true, this);
257    
258     consume();
259     }
260     /** See if input contains character in range c1-c2
261     * throw MismatchedCharException if not
262     */
263     virtual void matchRange(int c1, int c2)
264     {
265     int la_1 = LA(1);
266    
267     if ( la_1 < c1 || la_1 > c2 )
268     throw MismatchedCharException(la_1, c1, c2, false, this);
269    
270     consume();
271     }
272    
273     virtual bool getCaseSensitive() const
274     {
275     return caseSensitive;
276     }
277    
278     virtual void setCaseSensitive(bool t)
279     {
280     caseSensitive = t;
281     }
282    
283     virtual bool getCaseSensitiveLiterals() const=0;
284    
285     /// Get the line the scanner currently is in (starts at 1)
286     virtual int getLine() const
287     {
288     return inputState->line;
289     }
290    
291     /// set the line number
292     virtual void setLine(int l)
293     {
294     inputState->line = l;
295     }
296    
297     /// Get the column the scanner currently is in (starts at 1)
298     virtual int getColumn() const
299     {
300     return inputState->column;
301     }
302     /// set the column number
303     virtual void setColumn(int c)
304     {
305     inputState->column = c;
306     }
307    
308     /// get the filename for the file currently used
309     virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
310     {
311     return inputState->filename;
312     }
313     /// Set the filename the scanner is using (used in error messages)
314     virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
315     {
316     inputState->filename = f;
317     }
318    
319     virtual bool getCommitToPath() const
320     {
321     return commitToPath;
322     }
323    
324     virtual void setCommitToPath(bool commit)
325     {
326     commitToPath = commit;
327     }
328    
329     /** return a copy of the current text buffer */
330     virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
331     {
332     return text;
333     }
334    
335     virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
336     {
337     text = s;
338     }
339    
340     virtual void resetText()
341     {
342     text = "";
343     inputState->tokenStartColumn = inputState->column;
344     inputState->tokenStartLine = inputState->line;
345     }
346    
347     virtual RefToken getTokenObject() const
348     {
349     return _returnToken;
350     }
351    
352     /** Used to keep track of line breaks, needs to be called from
353     * within generated lexers when a \n \r is encountered.
354     */
355     virtual void newline()
356     {
357     ++inputState->line;
358     inputState->column = 1;
359     }
360    
361     /** Advance the current column number by an appropriate amount according
362     * to the tabsize. This method needs to be explicitly called from the
363     * lexer rules encountering tabs.
364     */
365     virtual void tab()
366     {
367     int c = getColumn();
368     int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
369     setColumn( nc );
370     }
371     /// set the tabsize. Returns the old tabsize
372     int setTabsize( int size )
373     {
374     int oldsize = tabsize;
375     tabsize = size;
376     return oldsize;
377     }
378     /// Return the tabsize used by the scanner
379     int getTabSize() const
380     {
381     return tabsize;
382     }
383    
384     /** Report exception errors caught in nextToken() */
385     virtual void reportError(const RecognitionException& e);
386    
387     /** Parser error-reporting function can be overridden in subclass */
388     virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
389    
390     /** Parser warning-reporting function can be overridden in subclass */
391     virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
392    
393     virtual InputBuffer& getInputBuffer()
394     {
395     return inputState->getInput();
396     }
397    
398     virtual LexerSharedInputState getInputState()
399     {
400     return inputState;
401     }
402    
403     /** set the input state for the lexer.
404     * @note state is a reference counted object, hence no reference */
405     virtual void setInputState(LexerSharedInputState state)
406     {
407     inputState = state;
408     }
409    
410     /// Set the factory for created tokens
411     virtual void setTokenObjectFactory(factory_type factory)
412     {
413     tokenFactory = factory;
414     }
415    
416     /** Test the token text against the literals table
417     * Override this method to perform a different literals test
418     */
419     virtual int testLiteralsTable(int ttype) const
420     {
421     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
422     if (i != literals.end())
423     ttype = (*i).second;
424     return ttype;
425     }
426    
427     /** Test the text passed in against the literals table
428     * Override this method to perform a different literals test
429     * This is used primarily when you want to test a portion of
430     * a token
431     */
432     virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
433     {
434     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
435     if (i != literals.end())
436     ttype = (*i).second;
437     return ttype;
438     }
439    
440     /// Override this method to get more specific case handling
441     virtual int toLower(int c) const
442     {
443     // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
444     // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
445     // this one is more structural. Maybe make this configurable.
446     return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
447     }
448    
449     /** This method is called by YourLexer::nextToken() when the lexer has
450     * hit EOF condition. EOF is NOT a character.
451     * This method is not called if EOF is reached during
452     * syntactic predicate evaluation or during evaluation
453     * of normal lexical rules, which presumably would be
454     * an IOException. This traps the "normal" EOF condition.
455     *
456     * uponEOF() is called after the complete evaluation of
457     * the previous token and only if your parser asks
458     * for another token beyond that last non-EOF token.
459     *
460     * You might want to throw token or char stream exceptions
461     * like: "Heh, premature eof" or a retry stream exception
462     * ("I found the end of this file, go back to referencing file").
463     */
464     virtual void uponEOF()
465     {
466     }
467    
468     /// Methods used to change tracing behavior
469     virtual void traceIndent();
470     virtual void traceIn(const char* rname);
471     virtual void traceOut(const char* rname);
472    
473     #ifndef NO_STATIC_CONSTS
474     static const int EOF_CHAR = EOF;
475     #else
476     enum {
477     EOF_CHAR = EOF
478     };
479     #endif
480     protected:
481     ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
482     /// flag indicating wether consume saves characters
483     bool saveConsumedInput;
484     factory_type tokenFactory; ///< Factory for tokens
485     bool caseSensitive; ///< Is this lexer case sensitive
486     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
487    
488     RefToken _returnToken; ///< used to return tokens w/o using return val
489    
490     /// Input state, gives access to input stream, shared among different lexers
491     LexerSharedInputState inputState;
492    
493     /** Used during filter mode to indicate that path is desired.
494     * A subsequent scan error will report an error as usual
495     * if acceptPath=true;
496     */
497     bool commitToPath;
498    
499     int tabsize; ///< tab size the scanner uses.
500    
501     /// Create a new RefToken of type t
502     virtual RefToken makeToken(int t)
503     {
504     RefToken tok = tokenFactory();
505     tok->setType(t);
506     tok->setColumn(inputState->tokenStartColumn);
507     tok->setLine(inputState->tokenStartLine);
508     return tok;
509     }
510    
511     /** Tracer class, used when -traceLexer is passed to antlr
512     */
513     class Tracer {
514     private:
515     CharScanner* parser;
516     const char* text;
517    
518     Tracer(const Tracer& other); // undefined
519     Tracer& operator=(const Tracer& other); // undefined
520     public:
521     Tracer( CharScanner* p,const char* t )
522     : parser(p), text(t)
523     {
524     parser->traceIn(text);
525     }
526     ~Tracer()
527     {
528     parser->traceOut(text);
529     }
530     };
531    
532     int traceDepth;
533     private:
534     CharScanner( const CharScanner& other ); // undefined
535     CharScanner& operator=( const CharScanner& other ); // undefined
536    
537     #ifndef NO_STATIC_CONSTS
538     static const int NO_CHAR = 0;
539     #else
540     enum {
541     NO_CHAR = 0
542     };
543     #endif
544     };
545    
546     inline int CharScanner::LA(unsigned int i)
547     {
548     int c = inputState->getInput().LA(i);
549    
550     if ( caseSensitive )
551     return c;
552     else
553     return toLower(c); // VC 6 tolower bug caught in toLower.
554     }
555    
556     inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
557     {
558     if (scanner->getCaseSensitiveLiterals())
559     return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
560     else
561     {
562     #ifdef NO_STRCASECMP
563     return (stricmp(x.c_str(),y.c_str())<0);
564     #else
565     return (strcasecmp(x.c_str(),y.c_str())<0);
566     #endif
567     }
568     }
569    
570     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
571     }
572     #endif
573    
574     #endif //INC_CharScanner_hpp__

Properties

Name Value
svn:keywords Author Id Revision Date