ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/antlr/CharScanner.hpp
Revision: 1782
Committed: Wed Aug 22 02:28:28 2012 UTC (12 years, 8 months ago) by gezelter
File size: 13697 byte(s)
Log Message:
MERGE OpenMD development branch 1465:1781 into trunk

File Contents

# User Rev Content
1 tim 770 #ifndef INC_CharScanner_hpp__
2     #define INC_CharScanner_hpp__
3    
4     /* ANTLR Translator Generator
5     * Project led by Terence Parr at http://www.jGuru.com
6     * Software rights: http://www.antlr.org/license.html
7     *
8 gezelter 1442 * $Id$
9 tim 770 */
10    
11     #include <antlr/config.hpp>
12 gezelter 1782 #include <cstdio>
13 tim 770 #include <map>
14 gezelter 1782 #include <cstring>
15 tim 770
16     #ifdef HAS_NOT_CCTYPE_H
17     #include <ctype.h>
18     #else
19     #include <cctype>
20     #endif
21    
22     #if ( _MSC_VER == 1200 )
23     // VC6 seems to need this
24     // note that this is not a standard C++ include file.
25     # include <stdio.h>
26     #endif
27    
28     #include <antlr/TokenStream.hpp>
29     #include <antlr/RecognitionException.hpp>
30     #include <antlr/SemanticException.hpp>
31     #include <antlr/MismatchedCharException.hpp>
32     #include <antlr/InputBuffer.hpp>
33     #include <antlr/BitSet.hpp>
34     #include <antlr/LexerSharedInputState.hpp>
35    
36     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
37     namespace antlr {
38     #endif
39    
40     class ANTLR_API CharScanner;
41    
42     ANTLR_C_USING(tolower)
43    
44 gezelter 1558 #ifdef ANTLR_REALLY_NO_STRCASECMP
45     // Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
46     // on the mac has neither...
47 tim 770 inline int strcasecmp(const char *s1, const char *s2)
48     {
49     while (true)
50     {
51     char c1 = tolower(*s1++),
52     c2 = tolower(*s2++);
53     if (c1 < c2) return -1;
54     if (c1 > c2) return 1;
55     if (c1 == 0) return 0;
56     }
57     }
58 gezelter 1558 #else
59     #ifdef NO_STRCASECMP
60     ANTLR_C_USING(stricmp)
61     #else
62     ANTLR_C_USING(strcasecmp)
63 tim 770 #endif
64 gezelter 1558 #endif
65 tim 770
66     /** Functor for the literals map
67     */
68     class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
69     private:
70     const CharScanner* scanner;
71     public:
72     #ifdef NO_TEMPLATE_PARTS
73     CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
74     #endif
75     CharScannerLiteralsLess(const CharScanner* theScanner)
76     : scanner(theScanner)
77     {
78     }
79     bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
80     // defaults are good enough..
81     // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
82     // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
83     };
84    
85     /** Superclass of generated lexers
86     */
87     class ANTLR_API CharScanner : public TokenStream {
88     protected:
89     typedef RefToken (*factory_type)();
90     public:
91     CharScanner(InputBuffer& cb, bool case_sensitive );
92     CharScanner(InputBuffer* cb, bool case_sensitive );
93     CharScanner(const LexerSharedInputState& state, bool case_sensitive );
94    
95     virtual ~CharScanner()
96     {
97     }
98    
99     virtual int LA(unsigned int i);
100    
101     virtual void append(char c)
102     {
103     if (saveConsumedInput)
104     {
105     size_t l = text.length();
106    
107     if ((l%256) == 0)
108     text.reserve(l+256);
109    
110     text.replace(l,0,&c,1);
111     }
112     }
113    
114     virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
115     {
116     if( saveConsumedInput )
117     text += s;
118     }
119    
120     virtual void commit()
121     {
122     inputState->getInput().commit();
123     }
124    
125 gezelter 1558 /** called by the generated lexer to do error recovery, override to
126     * customize the behaviour.
127     */
128     virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
129     {
130     consume();
131     consumeUntil(tokenSet);
132     }
133    
134 tim 770 virtual void consume()
135     {
136     if (inputState->guessing == 0)
137     {
138     int c = LA(1);
139     if (caseSensitive)
140     {
141     append(c);
142     }
143     else
144     {
145     // use input.LA(), not LA(), to get original case
146     // CharScanner.LA() would toLower it.
147     append(inputState->getInput().LA(1));
148     }
149    
150     // RK: in a sense I don't like this automatic handling.
151     if (c == '\t')
152     tab();
153     else
154     inputState->column++;
155     }
156     inputState->getInput().consume();
157     }
158    
159     /** Consume chars until one matches the given char */
160     virtual void consumeUntil(int c)
161     {
162     for(;;)
163     {
164     int la_1 = LA(1);
165     if( la_1 == EOF_CHAR || la_1 == c )
166     break;
167     consume();
168     }
169     }
170    
171     /** Consume chars until one matches the given set */
172     virtual void consumeUntil(const BitSet& set)
173     {
174     for(;;)
175     {
176     int la_1 = LA(1);
177     if( la_1 == EOF_CHAR || set.member(la_1) )
178     break;
179     consume();
180     }
181     }
182    
183     /// Mark the current position and return a id for it
184     virtual unsigned int mark()
185     {
186     return inputState->getInput().mark();
187     }
188     /// Rewind the scanner to a previously marked position
189     virtual void rewind(unsigned int pos)
190     {
191     inputState->getInput().rewind(pos);
192     }
193    
194     /// See if input contains character 'c' throw MismatchedCharException if not
195     virtual void match(int c)
196     {
197     int la_1 = LA(1);
198     if ( la_1 != c )
199     throw MismatchedCharException(la_1, c, false, this);
200     consume();
201     }
202    
203     /** See if input contains element from bitset b
204     * throw MismatchedCharException if not
205     */
206     virtual void match(const BitSet& b)
207     {
208     int la_1 = LA(1);
209    
210     if ( !b.member(la_1) )
211     throw MismatchedCharException( la_1, b, false, this );
212     consume();
213     }
214    
215     /** See if input contains string 's' throw MismatchedCharException if not
216     * @note the string cannot match EOF
217     */
218     virtual void match( const char* s )
219     {
220     while( *s != '\0' )
221     {
222     // the & 0xFF is here to prevent sign extension lateron
223     int la_1 = LA(1), c = (*s++ & 0xFF);
224    
225     if ( la_1 != c )
226     throw MismatchedCharException(la_1, c, false, this);
227    
228     consume();
229     }
230     }
231     /** See if input contains string 's' throw MismatchedCharException if not
232     * @note the string cannot match EOF
233     */
234     virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
235     {
236     size_t len = s.length();
237    
238     for (size_t i = 0; i < len; i++)
239     {
240     // the & 0xFF is here to prevent sign extension lateron
241     int la_1 = LA(1), c = (s[i] & 0xFF);
242    
243     if ( la_1 != c )
244     throw MismatchedCharException(la_1, c, false, this);
245    
246     consume();
247     }
248     }
249     /** See if input does not contain character 'c'
250     * throw MismatchedCharException if not
251     */
252     virtual void matchNot(int c)
253     {
254     int la_1 = LA(1);
255    
256     if ( la_1 == c )
257     throw MismatchedCharException(la_1, c, true, this);
258    
259     consume();
260     }
261     /** See if input contains character in range c1-c2
262     * throw MismatchedCharException if not
263     */
264     virtual void matchRange(int c1, int c2)
265     {
266     int la_1 = LA(1);
267    
268     if ( la_1 < c1 || la_1 > c2 )
269     throw MismatchedCharException(la_1, c1, c2, false, this);
270    
271     consume();
272     }
273    
274     virtual bool getCaseSensitive() const
275     {
276     return caseSensitive;
277     }
278    
279     virtual void setCaseSensitive(bool t)
280     {
281     caseSensitive = t;
282     }
283    
284     virtual bool getCaseSensitiveLiterals() const=0;
285    
286     /// Get the line the scanner currently is in (starts at 1)
287     virtual int getLine() const
288     {
289     return inputState->line;
290     }
291    
292     /// set the line number
293     virtual void setLine(int l)
294     {
295     inputState->line = l;
296     }
297    
298     /// Get the column the scanner currently is in (starts at 1)
299     virtual int getColumn() const
300     {
301     return inputState->column;
302     }
303     /// set the column number
304     virtual void setColumn(int c)
305     {
306     inputState->column = c;
307     }
308    
309     /// get the filename for the file currently used
310     virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
311     {
312     return inputState->filename;
313     }
314     /// Set the filename the scanner is using (used in error messages)
315     virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
316     {
317     inputState->filename = f;
318     }
319    
320     virtual bool getCommitToPath() const
321     {
322     return commitToPath;
323     }
324    
325     virtual void setCommitToPath(bool commit)
326     {
327     commitToPath = commit;
328     }
329    
330     /** return a copy of the current text buffer */
331     virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
332     {
333     return text;
334     }
335    
336     virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
337     {
338     text = s;
339     }
340    
341     virtual void resetText()
342     {
343     text = "";
344     inputState->tokenStartColumn = inputState->column;
345     inputState->tokenStartLine = inputState->line;
346     }
347    
348     virtual RefToken getTokenObject() const
349     {
350     return _returnToken;
351     }
352    
353     /** Used to keep track of line breaks, needs to be called from
354     * within generated lexers when a \n \r is encountered.
355     */
356     virtual void newline()
357     {
358     ++inputState->line;
359     inputState->column = 1;
360     }
361    
362     /** Advance the current column number by an appropriate amount according
363     * to the tabsize. This method needs to be explicitly called from the
364     * lexer rules encountering tabs.
365     */
366     virtual void tab()
367     {
368     int c = getColumn();
369     int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
370     setColumn( nc );
371     }
372     /// set the tabsize. Returns the old tabsize
373     int setTabsize( int size )
374     {
375     int oldsize = tabsize;
376     tabsize = size;
377     return oldsize;
378     }
379     /// Return the tabsize used by the scanner
380     int getTabSize() const
381     {
382     return tabsize;
383     }
384    
385     /** Report exception errors caught in nextToken() */
386     virtual void reportError(const RecognitionException& e);
387    
388     /** Parser error-reporting function can be overridden in subclass */
389     virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
390    
391     /** Parser warning-reporting function can be overridden in subclass */
392     virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
393    
394     virtual InputBuffer& getInputBuffer()
395     {
396     return inputState->getInput();
397     }
398    
399     virtual LexerSharedInputState getInputState()
400     {
401     return inputState;
402     }
403    
404     /** set the input state for the lexer.
405     * @note state is a reference counted object, hence no reference */
406     virtual void setInputState(LexerSharedInputState state)
407     {
408     inputState = state;
409     }
410    
411     /// Set the factory for created tokens
412     virtual void setTokenObjectFactory(factory_type factory)
413     {
414     tokenFactory = factory;
415     }
416    
417     /** Test the token text against the literals table
418     * Override this method to perform a different literals test
419     */
420     virtual int testLiteralsTable(int ttype) const
421     {
422     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
423     if (i != literals.end())
424     ttype = (*i).second;
425     return ttype;
426     }
427    
428     /** Test the text passed in against the literals table
429     * Override this method to perform a different literals test
430     * This is used primarily when you want to test a portion of
431     * a token
432     */
433     virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
434     {
435     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
436     if (i != literals.end())
437     ttype = (*i).second;
438     return ttype;
439     }
440    
441     /// Override this method to get more specific case handling
442     virtual int toLower(int c) const
443     {
444     // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
445     // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
446     // this one is more structural. Maybe make this configurable.
447     return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
448     }
449    
450     /** This method is called by YourLexer::nextToken() when the lexer has
451     * hit EOF condition. EOF is NOT a character.
452     * This method is not called if EOF is reached during
453     * syntactic predicate evaluation or during evaluation
454     * of normal lexical rules, which presumably would be
455     * an IOException. This traps the "normal" EOF condition.
456     *
457     * uponEOF() is called after the complete evaluation of
458     * the previous token and only if your parser asks
459     * for another token beyond that last non-EOF token.
460     *
461     * You might want to throw token or char stream exceptions
462     * like: "Heh, premature eof" or a retry stream exception
463     * ("I found the end of this file, go back to referencing file").
464     */
465     virtual void uponEOF()
466     {
467     }
468    
469     /// Methods used to change tracing behavior
470     virtual void traceIndent();
471     virtual void traceIn(const char* rname);
472     virtual void traceOut(const char* rname);
473    
474     #ifndef NO_STATIC_CONSTS
475     static const int EOF_CHAR = EOF;
476     #else
477     enum {
478     EOF_CHAR = EOF
479     };
480     #endif
481     protected:
482     ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
483     /// flag indicating wether consume saves characters
484     bool saveConsumedInput;
485     factory_type tokenFactory; ///< Factory for tokens
486     bool caseSensitive; ///< Is this lexer case sensitive
487     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
488    
489     RefToken _returnToken; ///< used to return tokens w/o using return val
490    
491     /// Input state, gives access to input stream, shared among different lexers
492     LexerSharedInputState inputState;
493    
494     /** Used during filter mode to indicate that path is desired.
495     * A subsequent scan error will report an error as usual
496     * if acceptPath=true;
497     */
498     bool commitToPath;
499    
500     int tabsize; ///< tab size the scanner uses.
501    
502     /// Create a new RefToken of type t
503     virtual RefToken makeToken(int t)
504     {
505     RefToken tok = tokenFactory();
506     tok->setType(t);
507     tok->setColumn(inputState->tokenStartColumn);
508     tok->setLine(inputState->tokenStartLine);
509     return tok;
510     }
511    
512     /** Tracer class, used when -traceLexer is passed to antlr
513     */
514     class Tracer {
515     private:
516     CharScanner* parser;
517     const char* text;
518    
519     Tracer(const Tracer& other); // undefined
520     Tracer& operator=(const Tracer& other); // undefined
521     public:
522     Tracer( CharScanner* p,const char* t )
523     : parser(p), text(t)
524     {
525     parser->traceIn(text);
526     }
527     ~Tracer()
528     {
529     parser->traceOut(text);
530     }
531     };
532    
533     int traceDepth;
534     private:
535     CharScanner( const CharScanner& other ); // undefined
536     CharScanner& operator=( const CharScanner& other ); // undefined
537    
538     #ifndef NO_STATIC_CONSTS
539     static const int NO_CHAR = 0;
540     #else
541     enum {
542     NO_CHAR = 0
543     };
544     #endif
545     };
546    
547     inline int CharScanner::LA(unsigned int i)
548     {
549     int c = inputState->getInput().LA(i);
550    
551     if ( caseSensitive )
552     return c;
553     else
554     return toLower(c); // VC 6 tolower bug caught in toLower.
555     }
556    
557     inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
558     {
559     if (scanner->getCaseSensitiveLiterals())
560     return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
561     else
562     {
563     #ifdef NO_STRCASECMP
564     return (stricmp(x.c_str(),y.c_str())<0);
565     #else
566     return (strcasecmp(x.c_str(),y.c_str())<0);
567     #endif
568     }
569     }
570    
571     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
572     }
573     #endif
574    
575     #endif //INC_CharScanner_hpp__

Properties

Name Value
svn:keywords Author Id Revision Date