ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/branches/development/src/antlr/CharScanner.hpp
Revision: 1653
Committed: Wed Sep 28 19:37:06 2011 UTC (13 years, 7 months ago) by gezelter
File size: 13697 byte(s)
Log Message:
fixing compilation bugs on gcc-4.4

File Contents

# Content
1 #ifndef INC_CharScanner_hpp__
2 #define INC_CharScanner_hpp__
3
4 /* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
7 *
8 * $Id$
9 */
10
11 #include <antlr/config.hpp>
12 #include <cstdio>
13 #include <map>
14 #include <cstring>
15
16 #ifdef HAS_NOT_CCTYPE_H
17 #include <ctype.h>
18 #else
19 #include <cctype>
20 #endif
21
22 #if ( _MSC_VER == 1200 )
23 // VC6 seems to need this
24 // note that this is not a standard C++ include file.
25 # include <stdio.h>
26 #endif
27
28 #include <antlr/TokenStream.hpp>
29 #include <antlr/RecognitionException.hpp>
30 #include <antlr/SemanticException.hpp>
31 #include <antlr/MismatchedCharException.hpp>
32 #include <antlr/InputBuffer.hpp>
33 #include <antlr/BitSet.hpp>
34 #include <antlr/LexerSharedInputState.hpp>
35
36 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
37 namespace antlr {
38 #endif
39
40 class ANTLR_API CharScanner;
41
42 ANTLR_C_USING(tolower)
43
44 #ifdef ANTLR_REALLY_NO_STRCASECMP
45 // Apparently, neither strcasecmp nor stricmp is standard, and Codewarrior
46 // on the mac has neither...
47 inline int strcasecmp(const char *s1, const char *s2)
48 {
49 while (true)
50 {
51 char c1 = tolower(*s1++),
52 c2 = tolower(*s2++);
53 if (c1 < c2) return -1;
54 if (c1 > c2) return 1;
55 if (c1 == 0) return 0;
56 }
57 }
58 #else
59 #ifdef NO_STRCASECMP
60 ANTLR_C_USING(stricmp)
61 #else
62 ANTLR_C_USING(strcasecmp)
63 #endif
64 #endif
65
66 /** Functor for the literals map
67 */
68 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
69 private:
70 const CharScanner* scanner;
71 public:
72 #ifdef NO_TEMPLATE_PARTS
73 CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
74 #endif
75 CharScannerLiteralsLess(const CharScanner* theScanner)
76 : scanner(theScanner)
77 {
78 }
79 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
80 // defaults are good enough..
81 // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
82 // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
83 };
84
85 /** Superclass of generated lexers
86 */
87 class ANTLR_API CharScanner : public TokenStream {
88 protected:
89 typedef RefToken (*factory_type)();
90 public:
91 CharScanner(InputBuffer& cb, bool case_sensitive );
92 CharScanner(InputBuffer* cb, bool case_sensitive );
93 CharScanner(const LexerSharedInputState& state, bool case_sensitive );
94
95 virtual ~CharScanner()
96 {
97 }
98
99 virtual int LA(unsigned int i);
100
101 virtual void append(char c)
102 {
103 if (saveConsumedInput)
104 {
105 size_t l = text.length();
106
107 if ((l%256) == 0)
108 text.reserve(l+256);
109
110 text.replace(l,0,&c,1);
111 }
112 }
113
114 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
115 {
116 if( saveConsumedInput )
117 text += s;
118 }
119
120 virtual void commit()
121 {
122 inputState->getInput().commit();
123 }
124
125 /** called by the generated lexer to do error recovery, override to
126 * customize the behaviour.
127 */
128 virtual void recover(const RecognitionException& ex, const BitSet& tokenSet)
129 {
130 consume();
131 consumeUntil(tokenSet);
132 }
133
134 virtual void consume()
135 {
136 if (inputState->guessing == 0)
137 {
138 int c = LA(1);
139 if (caseSensitive)
140 {
141 append(c);
142 }
143 else
144 {
145 // use input.LA(), not LA(), to get original case
146 // CharScanner.LA() would toLower it.
147 append(inputState->getInput().LA(1));
148 }
149
150 // RK: in a sense I don't like this automatic handling.
151 if (c == '\t')
152 tab();
153 else
154 inputState->column++;
155 }
156 inputState->getInput().consume();
157 }
158
159 /** Consume chars until one matches the given char */
160 virtual void consumeUntil(int c)
161 {
162 for(;;)
163 {
164 int la_1 = LA(1);
165 if( la_1 == EOF_CHAR || la_1 == c )
166 break;
167 consume();
168 }
169 }
170
171 /** Consume chars until one matches the given set */
172 virtual void consumeUntil(const BitSet& set)
173 {
174 for(;;)
175 {
176 int la_1 = LA(1);
177 if( la_1 == EOF_CHAR || set.member(la_1) )
178 break;
179 consume();
180 }
181 }
182
183 /// Mark the current position and return a id for it
184 virtual unsigned int mark()
185 {
186 return inputState->getInput().mark();
187 }
188 /// Rewind the scanner to a previously marked position
189 virtual void rewind(unsigned int pos)
190 {
191 inputState->getInput().rewind(pos);
192 }
193
194 /// See if input contains character 'c' throw MismatchedCharException if not
195 virtual void match(int c)
196 {
197 int la_1 = LA(1);
198 if ( la_1 != c )
199 throw MismatchedCharException(la_1, c, false, this);
200 consume();
201 }
202
203 /** See if input contains element from bitset b
204 * throw MismatchedCharException if not
205 */
206 virtual void match(const BitSet& b)
207 {
208 int la_1 = LA(1);
209
210 if ( !b.member(la_1) )
211 throw MismatchedCharException( la_1, b, false, this );
212 consume();
213 }
214
215 /** See if input contains string 's' throw MismatchedCharException if not
216 * @note the string cannot match EOF
217 */
218 virtual void match( const char* s )
219 {
220 while( *s != '\0' )
221 {
222 // the & 0xFF is here to prevent sign extension lateron
223 int la_1 = LA(1), c = (*s++ & 0xFF);
224
225 if ( la_1 != c )
226 throw MismatchedCharException(la_1, c, false, this);
227
228 consume();
229 }
230 }
231 /** See if input contains string 's' throw MismatchedCharException if not
232 * @note the string cannot match EOF
233 */
234 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
235 {
236 size_t len = s.length();
237
238 for (size_t i = 0; i < len; i++)
239 {
240 // the & 0xFF is here to prevent sign extension lateron
241 int la_1 = LA(1), c = (s[i] & 0xFF);
242
243 if ( la_1 != c )
244 throw MismatchedCharException(la_1, c, false, this);
245
246 consume();
247 }
248 }
249 /** See if input does not contain character 'c'
250 * throw MismatchedCharException if not
251 */
252 virtual void matchNot(int c)
253 {
254 int la_1 = LA(1);
255
256 if ( la_1 == c )
257 throw MismatchedCharException(la_1, c, true, this);
258
259 consume();
260 }
261 /** See if input contains character in range c1-c2
262 * throw MismatchedCharException if not
263 */
264 virtual void matchRange(int c1, int c2)
265 {
266 int la_1 = LA(1);
267
268 if ( la_1 < c1 || la_1 > c2 )
269 throw MismatchedCharException(la_1, c1, c2, false, this);
270
271 consume();
272 }
273
274 virtual bool getCaseSensitive() const
275 {
276 return caseSensitive;
277 }
278
279 virtual void setCaseSensitive(bool t)
280 {
281 caseSensitive = t;
282 }
283
284 virtual bool getCaseSensitiveLiterals() const=0;
285
286 /// Get the line the scanner currently is in (starts at 1)
287 virtual int getLine() const
288 {
289 return inputState->line;
290 }
291
292 /// set the line number
293 virtual void setLine(int l)
294 {
295 inputState->line = l;
296 }
297
298 /// Get the column the scanner currently is in (starts at 1)
299 virtual int getColumn() const
300 {
301 return inputState->column;
302 }
303 /// set the column number
304 virtual void setColumn(int c)
305 {
306 inputState->column = c;
307 }
308
309 /// get the filename for the file currently used
310 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
311 {
312 return inputState->filename;
313 }
314 /// Set the filename the scanner is using (used in error messages)
315 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
316 {
317 inputState->filename = f;
318 }
319
320 virtual bool getCommitToPath() const
321 {
322 return commitToPath;
323 }
324
325 virtual void setCommitToPath(bool commit)
326 {
327 commitToPath = commit;
328 }
329
330 /** return a copy of the current text buffer */
331 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
332 {
333 return text;
334 }
335
336 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
337 {
338 text = s;
339 }
340
341 virtual void resetText()
342 {
343 text = "";
344 inputState->tokenStartColumn = inputState->column;
345 inputState->tokenStartLine = inputState->line;
346 }
347
348 virtual RefToken getTokenObject() const
349 {
350 return _returnToken;
351 }
352
353 /** Used to keep track of line breaks, needs to be called from
354 * within generated lexers when a \n \r is encountered.
355 */
356 virtual void newline()
357 {
358 ++inputState->line;
359 inputState->column = 1;
360 }
361
362 /** Advance the current column number by an appropriate amount according
363 * to the tabsize. This method needs to be explicitly called from the
364 * lexer rules encountering tabs.
365 */
366 virtual void tab()
367 {
368 int c = getColumn();
369 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
370 setColumn( nc );
371 }
372 /// set the tabsize. Returns the old tabsize
373 int setTabsize( int size )
374 {
375 int oldsize = tabsize;
376 tabsize = size;
377 return oldsize;
378 }
379 /// Return the tabsize used by the scanner
380 int getTabSize() const
381 {
382 return tabsize;
383 }
384
385 /** Report exception errors caught in nextToken() */
386 virtual void reportError(const RecognitionException& e);
387
388 /** Parser error-reporting function can be overridden in subclass */
389 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
390
391 /** Parser warning-reporting function can be overridden in subclass */
392 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
393
394 virtual InputBuffer& getInputBuffer()
395 {
396 return inputState->getInput();
397 }
398
399 virtual LexerSharedInputState getInputState()
400 {
401 return inputState;
402 }
403
404 /** set the input state for the lexer.
405 * @note state is a reference counted object, hence no reference */
406 virtual void setInputState(LexerSharedInputState state)
407 {
408 inputState = state;
409 }
410
411 /// Set the factory for created tokens
412 virtual void setTokenObjectFactory(factory_type factory)
413 {
414 tokenFactory = factory;
415 }
416
417 /** Test the token text against the literals table
418 * Override this method to perform a different literals test
419 */
420 virtual int testLiteralsTable(int ttype) const
421 {
422 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
423 if (i != literals.end())
424 ttype = (*i).second;
425 return ttype;
426 }
427
428 /** Test the text passed in against the literals table
429 * Override this method to perform a different literals test
430 * This is used primarily when you want to test a portion of
431 * a token
432 */
433 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
434 {
435 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
436 if (i != literals.end())
437 ttype = (*i).second;
438 return ttype;
439 }
440
441 /// Override this method to get more specific case handling
442 virtual int toLower(int c) const
443 {
444 // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
445 // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
446 // this one is more structural. Maybe make this configurable.
447 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
448 }
449
450 /** This method is called by YourLexer::nextToken() when the lexer has
451 * hit EOF condition. EOF is NOT a character.
452 * This method is not called if EOF is reached during
453 * syntactic predicate evaluation or during evaluation
454 * of normal lexical rules, which presumably would be
455 * an IOException. This traps the "normal" EOF condition.
456 *
457 * uponEOF() is called after the complete evaluation of
458 * the previous token and only if your parser asks
459 * for another token beyond that last non-EOF token.
460 *
461 * You might want to throw token or char stream exceptions
462 * like: "Heh, premature eof" or a retry stream exception
463 * ("I found the end of this file, go back to referencing file").
464 */
465 virtual void uponEOF()
466 {
467 }
468
469 /// Methods used to change tracing behavior
470 virtual void traceIndent();
471 virtual void traceIn(const char* rname);
472 virtual void traceOut(const char* rname);
473
474 #ifndef NO_STATIC_CONSTS
475 static const int EOF_CHAR = EOF;
476 #else
477 enum {
478 EOF_CHAR = EOF
479 };
480 #endif
481 protected:
482 ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
483 /// flag indicating wether consume saves characters
484 bool saveConsumedInput;
485 factory_type tokenFactory; ///< Factory for tokens
486 bool caseSensitive; ///< Is this lexer case sensitive
487 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
488
489 RefToken _returnToken; ///< used to return tokens w/o using return val
490
491 /// Input state, gives access to input stream, shared among different lexers
492 LexerSharedInputState inputState;
493
494 /** Used during filter mode to indicate that path is desired.
495 * A subsequent scan error will report an error as usual
496 * if acceptPath=true;
497 */
498 bool commitToPath;
499
500 int tabsize; ///< tab size the scanner uses.
501
502 /// Create a new RefToken of type t
503 virtual RefToken makeToken(int t)
504 {
505 RefToken tok = tokenFactory();
506 tok->setType(t);
507 tok->setColumn(inputState->tokenStartColumn);
508 tok->setLine(inputState->tokenStartLine);
509 return tok;
510 }
511
512 /** Tracer class, used when -traceLexer is passed to antlr
513 */
514 class Tracer {
515 private:
516 CharScanner* parser;
517 const char* text;
518
519 Tracer(const Tracer& other); // undefined
520 Tracer& operator=(const Tracer& other); // undefined
521 public:
522 Tracer( CharScanner* p,const char* t )
523 : parser(p), text(t)
524 {
525 parser->traceIn(text);
526 }
527 ~Tracer()
528 {
529 parser->traceOut(text);
530 }
531 };
532
533 int traceDepth;
534 private:
535 CharScanner( const CharScanner& other ); // undefined
536 CharScanner& operator=( const CharScanner& other ); // undefined
537
538 #ifndef NO_STATIC_CONSTS
539 static const int NO_CHAR = 0;
540 #else
541 enum {
542 NO_CHAR = 0
543 };
544 #endif
545 };
546
547 inline int CharScanner::LA(unsigned int i)
548 {
549 int c = inputState->getInput().LA(i);
550
551 if ( caseSensitive )
552 return c;
553 else
554 return toLower(c); // VC 6 tolower bug caught in toLower.
555 }
556
557 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
558 {
559 if (scanner->getCaseSensitiveLiterals())
560 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
561 else
562 {
563 #ifdef NO_STRCASECMP
564 return (stricmp(x.c_str(),y.c_str())<0);
565 #else
566 return (strcasecmp(x.c_str(),y.c_str())<0);
567 #endif
568 }
569 }
570
571 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
572 }
573 #endif
574
575 #endif //INC_CharScanner_hpp__

Properties

Name Value
svn:keywords Author Id Revision Date