ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/antlr/CharScanner.hpp
Revision: 1442
Committed: Mon May 10 17:28:26 2010 UTC (14 years, 11 months ago) by gezelter
File size: 13508 byte(s)
Log Message:
Adding property set to svn entries

File Contents

# User Rev Content
1 tim 770 #ifndef INC_CharScanner_hpp__
2     #define INC_CharScanner_hpp__
3    
4     /* ANTLR Translator Generator
5     * Project led by Terence Parr at http://www.jGuru.com
6     * Software rights: http://www.antlr.org/license.html
7     *
8 gezelter 1442 * $Id$
9 tim 770 */
10    
11     #include <antlr/config.hpp>
12    
13     #include <map>
14 jmarr 1401 #include <cstdio>
15 tim 770
16     #ifdef HAS_NOT_CCTYPE_H
17     #include <ctype.h>
18     #else
19     #include <cctype>
20     #endif
21    
22     #if ( _MSC_VER == 1200 )
23     // VC6 seems to need this
24     // note that this is not a standard C++ include file.
25     # include <stdio.h>
26     #endif
27    
28     #include <antlr/TokenStream.hpp>
29     #include <antlr/RecognitionException.hpp>
30     #include <antlr/SemanticException.hpp>
31     #include <antlr/MismatchedCharException.hpp>
32     #include <antlr/InputBuffer.hpp>
33     #include <antlr/BitSet.hpp>
34     #include <antlr/LexerSharedInputState.hpp>
35    
36     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
37     namespace antlr {
38     #endif
39    
40     class ANTLR_API CharScanner;
41    
42     ANTLR_C_USING(tolower)
43    
44 gezelter 812 #if !defined(HAVE_STRCASECMP) && defined(HAVE_STRICMP) && !defined(stricmp)
45     #define strcasecmp stricmp
46     #endif
47     #if !defined(HAVE_STRNCASECMP) && defined(HAVE_STRNICMP) && !defined(strnicmp)
48     #define strncasecmp strnicmp
49     #endif
50    
51    
52     #if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)
53 tim 770 inline int strcasecmp(const char *s1, const char *s2)
54     {
55     while (true)
56     {
57     char c1 = tolower(*s1++),
58     c2 = tolower(*s2++);
59     if (c1 < c2) return -1;
60     if (c1 > c2) return 1;
61     if (c1 == 0) return 0;
62     }
63     }
64     #endif
65    
66     /** Functor for the literals map
67     */
68     class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
69     private:
70     const CharScanner* scanner;
71     public:
72     #ifdef NO_TEMPLATE_PARTS
73     CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
74     #endif
75     CharScannerLiteralsLess(const CharScanner* theScanner)
76     : scanner(theScanner)
77     {
78     }
79     bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
80     // defaults are good enough..
81     // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
82     // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
83     };
84    
85     /** Superclass of generated lexers
86     */
87     class ANTLR_API CharScanner : public TokenStream {
88     protected:
89     typedef RefToken (*factory_type)();
90     public:
91     CharScanner(InputBuffer& cb, bool case_sensitive );
92     CharScanner(InputBuffer* cb, bool case_sensitive );
93     CharScanner(const LexerSharedInputState& state, bool case_sensitive );
94    
95     virtual ~CharScanner()
96     {
97     }
98    
99     virtual int LA(unsigned int i);
100    
101     virtual void append(char c)
102     {
103     if (saveConsumedInput)
104     {
105     size_t l = text.length();
106    
107     if ((l%256) == 0)
108     text.reserve(l+256);
109    
110     text.replace(l,0,&c,1);
111     }
112     }
113    
114     virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
115     {
116     if( saveConsumedInput )
117     text += s;
118     }
119    
120     virtual void commit()
121     {
122     inputState->getInput().commit();
123     }
124    
125     virtual void consume()
126     {
127     if (inputState->guessing == 0)
128     {
129     int c = LA(1);
130     if (caseSensitive)
131     {
132     append(c);
133     }
134     else
135     {
136     // use input.LA(), not LA(), to get original case
137     // CharScanner.LA() would toLower it.
138     append(inputState->getInput().LA(1));
139     }
140    
141     // RK: in a sense I don't like this automatic handling.
142     if (c == '\t')
143     tab();
144     else
145     inputState->column++;
146     }
147     inputState->getInput().consume();
148     }
149    
150     /** Consume chars until one matches the given char */
151     virtual void consumeUntil(int c)
152     {
153     for(;;)
154     {
155     int la_1 = LA(1);
156     if( la_1 == EOF_CHAR || la_1 == c )
157     break;
158     consume();
159     }
160     }
161    
162     /** Consume chars until one matches the given set */
163     virtual void consumeUntil(const BitSet& set)
164     {
165     for(;;)
166     {
167     int la_1 = LA(1);
168     if( la_1 == EOF_CHAR || set.member(la_1) )
169     break;
170     consume();
171     }
172     }
173    
174     /// Mark the current position and return a id for it
175     virtual unsigned int mark()
176     {
177     return inputState->getInput().mark();
178     }
179     /// Rewind the scanner to a previously marked position
180     virtual void rewind(unsigned int pos)
181     {
182     inputState->getInput().rewind(pos);
183     }
184    
185     /// See if input contains character 'c' throw MismatchedCharException if not
186     virtual void match(int c)
187     {
188     int la_1 = LA(1);
189     if ( la_1 != c )
190     throw MismatchedCharException(la_1, c, false, this);
191     consume();
192     }
193    
194     /** See if input contains element from bitset b
195     * throw MismatchedCharException if not
196     */
197     virtual void match(const BitSet& b)
198     {
199     int la_1 = LA(1);
200    
201     if ( !b.member(la_1) )
202     throw MismatchedCharException( la_1, b, false, this );
203     consume();
204     }
205    
206     /** See if input contains string 's' throw MismatchedCharException if not
207     * @note the string cannot match EOF
208     */
209     virtual void match( const char* s )
210     {
211     while( *s != '\0' )
212     {
213     // the & 0xFF is here to prevent sign extension lateron
214     int la_1 = LA(1), c = (*s++ & 0xFF);
215    
216     if ( la_1 != c )
217     throw MismatchedCharException(la_1, c, false, this);
218    
219     consume();
220     }
221     }
222     /** See if input contains string 's' throw MismatchedCharException if not
223     * @note the string cannot match EOF
224     */
225     virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
226     {
227     size_t len = s.length();
228    
229     for (size_t i = 0; i < len; i++)
230     {
231     // the & 0xFF is here to prevent sign extension lateron
232     int la_1 = LA(1), c = (s[i] & 0xFF);
233    
234     if ( la_1 != c )
235     throw MismatchedCharException(la_1, c, false, this);
236    
237     consume();
238     }
239     }
240     /** See if input does not contain character 'c'
241     * throw MismatchedCharException if not
242     */
243     virtual void matchNot(int c)
244     {
245     int la_1 = LA(1);
246    
247     if ( la_1 == c )
248     throw MismatchedCharException(la_1, c, true, this);
249    
250     consume();
251     }
252     /** See if input contains character in range c1-c2
253     * throw MismatchedCharException if not
254     */
255     virtual void matchRange(int c1, int c2)
256     {
257     int la_1 = LA(1);
258    
259     if ( la_1 < c1 || la_1 > c2 )
260     throw MismatchedCharException(la_1, c1, c2, false, this);
261    
262     consume();
263     }
264    
265     virtual bool getCaseSensitive() const
266     {
267     return caseSensitive;
268     }
269    
270     virtual void setCaseSensitive(bool t)
271     {
272     caseSensitive = t;
273     }
274    
275     virtual bool getCaseSensitiveLiterals() const=0;
276    
277     /// Get the line the scanner currently is in (starts at 1)
278     virtual int getLine() const
279     {
280     return inputState->line;
281     }
282    
283     /// set the line number
284     virtual void setLine(int l)
285     {
286     inputState->line = l;
287     }
288    
289     /// Get the column the scanner currently is in (starts at 1)
290     virtual int getColumn() const
291     {
292     return inputState->column;
293     }
294     /// set the column number
295     virtual void setColumn(int c)
296     {
297     inputState->column = c;
298     }
299    
300     /// get the filename for the file currently used
301     virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
302     {
303     return inputState->filename;
304     }
305     /// Set the filename the scanner is using (used in error messages)
306     virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
307     {
308     inputState->filename = f;
309     }
310    
311     virtual bool getCommitToPath() const
312     {
313     return commitToPath;
314     }
315    
316     virtual void setCommitToPath(bool commit)
317     {
318     commitToPath = commit;
319     }
320    
321     /** return a copy of the current text buffer */
322     virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
323     {
324     return text;
325     }
326    
327     virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
328     {
329     text = s;
330     }
331    
332     virtual void resetText()
333     {
334     text = "";
335     inputState->tokenStartColumn = inputState->column;
336     inputState->tokenStartLine = inputState->line;
337     }
338    
339     virtual RefToken getTokenObject() const
340     {
341     return _returnToken;
342     }
343    
344     /** Used to keep track of line breaks, needs to be called from
345     * within generated lexers when a \n \r is encountered.
346     */
347     virtual void newline()
348     {
349     ++inputState->line;
350     inputState->column = 1;
351     }
352    
353     /** Advance the current column number by an appropriate amount according
354     * to the tabsize. This method needs to be explicitly called from the
355     * lexer rules encountering tabs.
356     */
357     virtual void tab()
358     {
359     int c = getColumn();
360     int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
361     setColumn( nc );
362     }
363     /// set the tabsize. Returns the old tabsize
364     int setTabsize( int size )
365     {
366     int oldsize = tabsize;
367     tabsize = size;
368     return oldsize;
369     }
370     /// Return the tabsize used by the scanner
371     int getTabSize() const
372     {
373     return tabsize;
374     }
375    
376     /** Report exception errors caught in nextToken() */
377     virtual void reportError(const RecognitionException& e);
378    
379     /** Parser error-reporting function can be overridden in subclass */
380     virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
381    
382     /** Parser warning-reporting function can be overridden in subclass */
383     virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
384    
385     virtual InputBuffer& getInputBuffer()
386     {
387     return inputState->getInput();
388     }
389    
390     virtual LexerSharedInputState getInputState()
391     {
392     return inputState;
393     }
394    
395     /** set the input state for the lexer.
396     * @note state is a reference counted object, hence no reference */
397     virtual void setInputState(LexerSharedInputState state)
398     {
399     inputState = state;
400     }
401    
402     /// Set the factory for created tokens
403     virtual void setTokenObjectFactory(factory_type factory)
404     {
405     tokenFactory = factory;
406     }
407    
408     /** Test the token text against the literals table
409     * Override this method to perform a different literals test
410     */
411     virtual int testLiteralsTable(int ttype) const
412     {
413     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
414     if (i != literals.end())
415     ttype = (*i).second;
416     return ttype;
417     }
418    
419     /** Test the text passed in against the literals table
420     * Override this method to perform a different literals test
421     * This is used primarily when you want to test a portion of
422     * a token
423     */
424     virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
425     {
426     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
427     if (i != literals.end())
428     ttype = (*i).second;
429     return ttype;
430     }
431    
432     /// Override this method to get more specific case handling
433     virtual int toLower(int c) const
434     {
435     // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
436     // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
437     // this one is more structural. Maybe make this configurable.
438     return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
439     }
440    
441     /** This method is called by YourLexer::nextToken() when the lexer has
442     * hit EOF condition. EOF is NOT a character.
443     * This method is not called if EOF is reached during
444     * syntactic predicate evaluation or during evaluation
445     * of normal lexical rules, which presumably would be
446     * an IOException. This traps the "normal" EOF condition.
447     *
448     * uponEOF() is called after the complete evaluation of
449     * the previous token and only if your parser asks
450     * for another token beyond that last non-EOF token.
451     *
452     * You might want to throw token or char stream exceptions
453     * like: "Heh, premature eof" or a retry stream exception
454     * ("I found the end of this file, go back to referencing file").
455     */
456     virtual void uponEOF()
457     {
458     }
459    
460     /// Methods used to change tracing behavior
461     virtual void traceIndent();
462     virtual void traceIn(const char* rname);
463     virtual void traceOut(const char* rname);
464    
465     #ifndef NO_STATIC_CONSTS
466     static const int EOF_CHAR = EOF;
467     #else
468     enum {
469     EOF_CHAR = EOF
470     };
471     #endif
472     protected:
473     ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
474     /// flag indicating wether consume saves characters
475     bool saveConsumedInput;
476     factory_type tokenFactory; ///< Factory for tokens
477     bool caseSensitive; ///< Is this lexer case sensitive
478     ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
479    
480     RefToken _returnToken; ///< used to return tokens w/o using return val
481    
482     /// Input state, gives access to input stream, shared among different lexers
483     LexerSharedInputState inputState;
484    
485     /** Used during filter mode to indicate that path is desired.
486     * A subsequent scan error will report an error as usual
487     * if acceptPath=true;
488     */
489     bool commitToPath;
490    
491     int tabsize; ///< tab size the scanner uses.
492    
493     /// Create a new RefToken of type t
494     virtual RefToken makeToken(int t)
495     {
496     RefToken tok = tokenFactory();
497     tok->setType(t);
498     tok->setColumn(inputState->tokenStartColumn);
499     tok->setLine(inputState->tokenStartLine);
500     return tok;
501     }
502    
503     /** Tracer class, used when -traceLexer is passed to antlr
504     */
505     class Tracer {
506     private:
507     CharScanner* parser;
508     const char* text;
509    
510     Tracer(const Tracer& other); // undefined
511     Tracer& operator=(const Tracer& other); // undefined
512     public:
513     Tracer( CharScanner* p,const char* t )
514     : parser(p), text(t)
515     {
516     parser->traceIn(text);
517     }
518     ~Tracer()
519     {
520     parser->traceOut(text);
521     }
522     };
523    
524     int traceDepth;
525     private:
526     CharScanner( const CharScanner& other ); // undefined
527     CharScanner& operator=( const CharScanner& other ); // undefined
528    
529     #ifndef NO_STATIC_CONSTS
530     static const int NO_CHAR = 0;
531     #else
532     enum {
533     NO_CHAR = 0
534     };
535     #endif
536     };
537    
538     inline int CharScanner::LA(unsigned int i)
539     {
540     int c = inputState->getInput().LA(i);
541    
542     if ( caseSensitive )
543     return c;
544     else
545     return toLower(c); // VC 6 tolower bug caught in toLower.
546     }
547    
548     inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
549     {
550     if (scanner->getCaseSensitiveLiterals())
551     return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
552     else
553     {
554     #ifdef NO_STRCASECMP
555     return (stricmp(x.c_str(),y.c_str())<0);
556     #else
557     return (strcasecmp(x.c_str(),y.c_str())<0);
558     #endif
559     }
560     }
561    
562     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
563     }
564     #endif
565    
566     #endif //INC_CharScanner_hpp__

Properties

Name Value
svn:keywords Author Id Revision Date