ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/branches/development/src/antlr/CharScanner.hpp
Revision: 1465
Committed: Fri Jul 9 23:08:25 2010 UTC (14 years, 9 months ago) by chuckv
File size: 13508 byte(s)
Log Message:
Creating busticated version of OpenMD

File Contents

# Content
1 #ifndef INC_CharScanner_hpp__
2 #define INC_CharScanner_hpp__
3
4 /* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
7 *
8 * $Id$
9 */
10
11 #include <antlr/config.hpp>
12
13 #include <map>
14 #include <cstdio>
15
16 #ifdef HAS_NOT_CCTYPE_H
17 #include <ctype.h>
18 #else
19 #include <cctype>
20 #endif
21
22 #if ( _MSC_VER == 1200 )
23 // VC6 seems to need this
24 // note that this is not a standard C++ include file.
25 # include <stdio.h>
26 #endif
27
28 #include <antlr/TokenStream.hpp>
29 #include <antlr/RecognitionException.hpp>
30 #include <antlr/SemanticException.hpp>
31 #include <antlr/MismatchedCharException.hpp>
32 #include <antlr/InputBuffer.hpp>
33 #include <antlr/BitSet.hpp>
34 #include <antlr/LexerSharedInputState.hpp>
35
36 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
37 namespace antlr {
38 #endif
39
40 class ANTLR_API CharScanner;
41
42 ANTLR_C_USING(tolower)
43
44 #if !defined(HAVE_STRCASECMP) && defined(HAVE_STRICMP) && !defined(stricmp)
45 #define strcasecmp stricmp
46 #endif
47 #if !defined(HAVE_STRNCASECMP) && defined(HAVE_STRNICMP) && !defined(strnicmp)
48 #define strncasecmp strnicmp
49 #endif
50
51
52 #if !defined(HAVE_STRCASECMP) && !defined(HAVE_STRICMP)
53 inline int strcasecmp(const char *s1, const char *s2)
54 {
55 while (true)
56 {
57 char c1 = tolower(*s1++),
58 c2 = tolower(*s2++);
59 if (c1 < c2) return -1;
60 if (c1 > c2) return 1;
61 if (c1 == 0) return 0;
62 }
63 }
64 #endif
65
66 /** Functor for the literals map
67 */
68 class ANTLR_API CharScannerLiteralsLess : public ANTLR_USE_NAMESPACE(std)binary_function<ANTLR_USE_NAMESPACE(std)string,ANTLR_USE_NAMESPACE(std)string,bool> {
69 private:
70 const CharScanner* scanner;
71 public:
72 #ifdef NO_TEMPLATE_PARTS
73 CharScannerLiteralsLess() {} // not really used, definition to appease MSVC
74 #endif
75 CharScannerLiteralsLess(const CharScanner* theScanner)
76 : scanner(theScanner)
77 {
78 }
79 bool operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const;
80 // defaults are good enough..
81 // CharScannerLiteralsLess(const CharScannerLiteralsLess&);
82 // CharScannerLiteralsLess& operator=(const CharScannerLiteralsLess&);
83 };
84
85 /** Superclass of generated lexers
86 */
87 class ANTLR_API CharScanner : public TokenStream {
88 protected:
89 typedef RefToken (*factory_type)();
90 public:
91 CharScanner(InputBuffer& cb, bool case_sensitive );
92 CharScanner(InputBuffer* cb, bool case_sensitive );
93 CharScanner(const LexerSharedInputState& state, bool case_sensitive );
94
95 virtual ~CharScanner()
96 {
97 }
98
99 virtual int LA(unsigned int i);
100
101 virtual void append(char c)
102 {
103 if (saveConsumedInput)
104 {
105 size_t l = text.length();
106
107 if ((l%256) == 0)
108 text.reserve(l+256);
109
110 text.replace(l,0,&c,1);
111 }
112 }
113
114 virtual void append(const ANTLR_USE_NAMESPACE(std)string& s)
115 {
116 if( saveConsumedInput )
117 text += s;
118 }
119
120 virtual void commit()
121 {
122 inputState->getInput().commit();
123 }
124
125 virtual void consume()
126 {
127 if (inputState->guessing == 0)
128 {
129 int c = LA(1);
130 if (caseSensitive)
131 {
132 append(c);
133 }
134 else
135 {
136 // use input.LA(), not LA(), to get original case
137 // CharScanner.LA() would toLower it.
138 append(inputState->getInput().LA(1));
139 }
140
141 // RK: in a sense I don't like this automatic handling.
142 if (c == '\t')
143 tab();
144 else
145 inputState->column++;
146 }
147 inputState->getInput().consume();
148 }
149
150 /** Consume chars until one matches the given char */
151 virtual void consumeUntil(int c)
152 {
153 for(;;)
154 {
155 int la_1 = LA(1);
156 if( la_1 == EOF_CHAR || la_1 == c )
157 break;
158 consume();
159 }
160 }
161
162 /** Consume chars until one matches the given set */
163 virtual void consumeUntil(const BitSet& set)
164 {
165 for(;;)
166 {
167 int la_1 = LA(1);
168 if( la_1 == EOF_CHAR || set.member(la_1) )
169 break;
170 consume();
171 }
172 }
173
174 /// Mark the current position and return a id for it
175 virtual unsigned int mark()
176 {
177 return inputState->getInput().mark();
178 }
179 /// Rewind the scanner to a previously marked position
180 virtual void rewind(unsigned int pos)
181 {
182 inputState->getInput().rewind(pos);
183 }
184
185 /// See if input contains character 'c' throw MismatchedCharException if not
186 virtual void match(int c)
187 {
188 int la_1 = LA(1);
189 if ( la_1 != c )
190 throw MismatchedCharException(la_1, c, false, this);
191 consume();
192 }
193
194 /** See if input contains element from bitset b
195 * throw MismatchedCharException if not
196 */
197 virtual void match(const BitSet& b)
198 {
199 int la_1 = LA(1);
200
201 if ( !b.member(la_1) )
202 throw MismatchedCharException( la_1, b, false, this );
203 consume();
204 }
205
206 /** See if input contains string 's' throw MismatchedCharException if not
207 * @note the string cannot match EOF
208 */
209 virtual void match( const char* s )
210 {
211 while( *s != '\0' )
212 {
213 // the & 0xFF is here to prevent sign extension lateron
214 int la_1 = LA(1), c = (*s++ & 0xFF);
215
216 if ( la_1 != c )
217 throw MismatchedCharException(la_1, c, false, this);
218
219 consume();
220 }
221 }
222 /** See if input contains string 's' throw MismatchedCharException if not
223 * @note the string cannot match EOF
224 */
225 virtual void match(const ANTLR_USE_NAMESPACE(std)string& s)
226 {
227 size_t len = s.length();
228
229 for (size_t i = 0; i < len; i++)
230 {
231 // the & 0xFF is here to prevent sign extension lateron
232 int la_1 = LA(1), c = (s[i] & 0xFF);
233
234 if ( la_1 != c )
235 throw MismatchedCharException(la_1, c, false, this);
236
237 consume();
238 }
239 }
240 /** See if input does not contain character 'c'
241 * throw MismatchedCharException if not
242 */
243 virtual void matchNot(int c)
244 {
245 int la_1 = LA(1);
246
247 if ( la_1 == c )
248 throw MismatchedCharException(la_1, c, true, this);
249
250 consume();
251 }
252 /** See if input contains character in range c1-c2
253 * throw MismatchedCharException if not
254 */
255 virtual void matchRange(int c1, int c2)
256 {
257 int la_1 = LA(1);
258
259 if ( la_1 < c1 || la_1 > c2 )
260 throw MismatchedCharException(la_1, c1, c2, false, this);
261
262 consume();
263 }
264
265 virtual bool getCaseSensitive() const
266 {
267 return caseSensitive;
268 }
269
270 virtual void setCaseSensitive(bool t)
271 {
272 caseSensitive = t;
273 }
274
275 virtual bool getCaseSensitiveLiterals() const=0;
276
277 /// Get the line the scanner currently is in (starts at 1)
278 virtual int getLine() const
279 {
280 return inputState->line;
281 }
282
283 /// set the line number
284 virtual void setLine(int l)
285 {
286 inputState->line = l;
287 }
288
289 /// Get the column the scanner currently is in (starts at 1)
290 virtual int getColumn() const
291 {
292 return inputState->column;
293 }
294 /// set the column number
295 virtual void setColumn(int c)
296 {
297 inputState->column = c;
298 }
299
300 /// get the filename for the file currently used
301 virtual const ANTLR_USE_NAMESPACE(std)string& getFilename() const
302 {
303 return inputState->filename;
304 }
305 /// Set the filename the scanner is using (used in error messages)
306 virtual void setFilename(const ANTLR_USE_NAMESPACE(std)string& f)
307 {
308 inputState->filename = f;
309 }
310
311 virtual bool getCommitToPath() const
312 {
313 return commitToPath;
314 }
315
316 virtual void setCommitToPath(bool commit)
317 {
318 commitToPath = commit;
319 }
320
321 /** return a copy of the current text buffer */
322 virtual const ANTLR_USE_NAMESPACE(std)string& getText() const
323 {
324 return text;
325 }
326
327 virtual void setText(const ANTLR_USE_NAMESPACE(std)string& s)
328 {
329 text = s;
330 }
331
332 virtual void resetText()
333 {
334 text = "";
335 inputState->tokenStartColumn = inputState->column;
336 inputState->tokenStartLine = inputState->line;
337 }
338
339 virtual RefToken getTokenObject() const
340 {
341 return _returnToken;
342 }
343
344 /** Used to keep track of line breaks, needs to be called from
345 * within generated lexers when a \n \r is encountered.
346 */
347 virtual void newline()
348 {
349 ++inputState->line;
350 inputState->column = 1;
351 }
352
353 /** Advance the current column number by an appropriate amount according
354 * to the tabsize. This method needs to be explicitly called from the
355 * lexer rules encountering tabs.
356 */
357 virtual void tab()
358 {
359 int c = getColumn();
360 int nc = ( ((c-1)/tabsize) + 1) * tabsize + 1; // calculate tab stop
361 setColumn( nc );
362 }
363 /// set the tabsize. Returns the old tabsize
364 int setTabsize( int size )
365 {
366 int oldsize = tabsize;
367 tabsize = size;
368 return oldsize;
369 }
370 /// Return the tabsize used by the scanner
371 int getTabSize() const
372 {
373 return tabsize;
374 }
375
376 /** Report exception errors caught in nextToken() */
377 virtual void reportError(const RecognitionException& e);
378
379 /** Parser error-reporting function can be overridden in subclass */
380 virtual void reportError(const ANTLR_USE_NAMESPACE(std)string& s);
381
382 /** Parser warning-reporting function can be overridden in subclass */
383 virtual void reportWarning(const ANTLR_USE_NAMESPACE(std)string& s);
384
385 virtual InputBuffer& getInputBuffer()
386 {
387 return inputState->getInput();
388 }
389
390 virtual LexerSharedInputState getInputState()
391 {
392 return inputState;
393 }
394
395 /** set the input state for the lexer.
396 * @note state is a reference counted object, hence no reference */
397 virtual void setInputState(LexerSharedInputState state)
398 {
399 inputState = state;
400 }
401
402 /// Set the factory for created tokens
403 virtual void setTokenObjectFactory(factory_type factory)
404 {
405 tokenFactory = factory;
406 }
407
408 /** Test the token text against the literals table
409 * Override this method to perform a different literals test
410 */
411 virtual int testLiteralsTable(int ttype) const
412 {
413 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(text);
414 if (i != literals.end())
415 ttype = (*i).second;
416 return ttype;
417 }
418
419 /** Test the text passed in against the literals table
420 * Override this method to perform a different literals test
421 * This is used primarily when you want to test a portion of
422 * a token
423 */
424 virtual int testLiteralsTable(const ANTLR_USE_NAMESPACE(std)string& txt,int ttype) const
425 {
426 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess>::const_iterator i = literals.find(txt);
427 if (i != literals.end())
428 ttype = (*i).second;
429 return ttype;
430 }
431
432 /// Override this method to get more specific case handling
433 virtual int toLower(int c) const
434 {
435 // test on EOF_CHAR for buggy (?) STLPort tolower (or HPUX tolower?)
436 // also VC++ 6.0 does this. (see fix 422 (is reverted by this fix)
437 // this one is more structural. Maybe make this configurable.
438 return (c == EOF_CHAR ? EOF_CHAR : tolower(c));
439 }
440
441 /** This method is called by YourLexer::nextToken() when the lexer has
442 * hit EOF condition. EOF is NOT a character.
443 * This method is not called if EOF is reached during
444 * syntactic predicate evaluation or during evaluation
445 * of normal lexical rules, which presumably would be
446 * an IOException. This traps the "normal" EOF condition.
447 *
448 * uponEOF() is called after the complete evaluation of
449 * the previous token and only if your parser asks
450 * for another token beyond that last non-EOF token.
451 *
452 * You might want to throw token or char stream exceptions
453 * like: "Heh, premature eof" or a retry stream exception
454 * ("I found the end of this file, go back to referencing file").
455 */
456 virtual void uponEOF()
457 {
458 }
459
460 /// Methods used to change tracing behavior
461 virtual void traceIndent();
462 virtual void traceIn(const char* rname);
463 virtual void traceOut(const char* rname);
464
465 #ifndef NO_STATIC_CONSTS
466 static const int EOF_CHAR = EOF;
467 #else
468 enum {
469 EOF_CHAR = EOF
470 };
471 #endif
472 protected:
473 ANTLR_USE_NAMESPACE(std)string text; ///< Text of current token
474 /// flag indicating wether consume saves characters
475 bool saveConsumedInput;
476 factory_type tokenFactory; ///< Factory for tokens
477 bool caseSensitive; ///< Is this lexer case sensitive
478 ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,int,CharScannerLiteralsLess> literals; // set by subclass
479
480 RefToken _returnToken; ///< used to return tokens w/o using return val
481
482 /// Input state, gives access to input stream, shared among different lexers
483 LexerSharedInputState inputState;
484
485 /** Used during filter mode to indicate that path is desired.
486 * A subsequent scan error will report an error as usual
487 * if acceptPath=true;
488 */
489 bool commitToPath;
490
491 int tabsize; ///< tab size the scanner uses.
492
493 /// Create a new RefToken of type t
494 virtual RefToken makeToken(int t)
495 {
496 RefToken tok = tokenFactory();
497 tok->setType(t);
498 tok->setColumn(inputState->tokenStartColumn);
499 tok->setLine(inputState->tokenStartLine);
500 return tok;
501 }
502
503 /** Tracer class, used when -traceLexer is passed to antlr
504 */
505 class Tracer {
506 private:
507 CharScanner* parser;
508 const char* text;
509
510 Tracer(const Tracer& other); // undefined
511 Tracer& operator=(const Tracer& other); // undefined
512 public:
513 Tracer( CharScanner* p,const char* t )
514 : parser(p), text(t)
515 {
516 parser->traceIn(text);
517 }
518 ~Tracer()
519 {
520 parser->traceOut(text);
521 }
522 };
523
524 int traceDepth;
525 private:
526 CharScanner( const CharScanner& other ); // undefined
527 CharScanner& operator=( const CharScanner& other ); // undefined
528
529 #ifndef NO_STATIC_CONSTS
530 static const int NO_CHAR = 0;
531 #else
532 enum {
533 NO_CHAR = 0
534 };
535 #endif
536 };
537
538 inline int CharScanner::LA(unsigned int i)
539 {
540 int c = inputState->getInput().LA(i);
541
542 if ( caseSensitive )
543 return c;
544 else
545 return toLower(c); // VC 6 tolower bug caught in toLower.
546 }
547
548 inline bool CharScannerLiteralsLess::operator() (const ANTLR_USE_NAMESPACE(std)string& x,const ANTLR_USE_NAMESPACE(std)string& y) const
549 {
550 if (scanner->getCaseSensitiveLiterals())
551 return ANTLR_USE_NAMESPACE(std)less<ANTLR_USE_NAMESPACE(std)string>()(x,y);
552 else
553 {
554 #ifdef NO_STRCASECMP
555 return (stricmp(x.c_str(),y.c_str())<0);
556 #else
557 return (strcasecmp(x.c_str(),y.c_str())<0);
558 #endif
559 }
560 }
561
562 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
563 }
564 #endif
565
566 #endif //INC_CharScanner_hpp__

Properties

Name Value
svn:keywords Author Id Revision Date