27 |
|
#endif |
28 |
|
|
29 |
|
/** This token stream tracks the *entire* token stream coming from |
30 |
< |
* a lexer, but does not pass on the whitespace (or whatever else |
31 |
< |
* you want to discard) to the parser. |
30 |
> |
* a lexer, but does not pass on the whitespace (or whatever else |
31 |
> |
* you want to discard) to the parser. |
32 |
|
* |
33 |
< |
* This class can then be asked for the ith token in the input stream. |
34 |
< |
* Useful for dumping out the input stream exactly after doing some |
35 |
< |
* augmentation or other manipulations. Tokens are index from 0..n-1 |
33 |
> |
* This class can then be asked for the ith token in the input stream. |
34 |
> |
* Useful for dumping out the input stream exactly after doing some |
35 |
> |
* augmentation or other manipulations. Tokens are index from 0..n-1 |
36 |
|
* |
37 |
< |
* You can insert stuff, replace, and delete chunks. Note that the |
38 |
< |
* operations are done lazily--only if you convert the buffer to a |
39 |
< |
* String. This is very efficient because you are not moving data around |
40 |
< |
* all the time. As the buffer of tokens is converted to strings, the |
41 |
< |
* toString() method(s) check to see if there is an operation at the |
42 |
< |
* current index. If so, the operation is done and then normal String |
43 |
< |
* rendering continues on the buffer. This is like having multiple Turing |
44 |
< |
* machine instruction streams (programs) operating on a single input tape. :) |
37 |
> |
* You can insert stuff, replace, and delete chunks. Note that the |
38 |
> |
* operations are done lazily--only if you convert the buffer to a |
39 |
> |
* String. This is very efficient because you are not moving data around |
40 |
> |
* all the time. As the buffer of tokens is converted to strings, the |
41 |
> |
* toString() method(s) check to see if there is an operation at the |
42 |
> |
* current index. If so, the operation is done and then normal String |
43 |
> |
* rendering continues on the buffer. This is like having multiple Turing |
44 |
> |
* machine instruction streams (programs) operating on a single input tape. :) |
45 |
|
* |
46 |
< |
* Since the operations are done lazily at toString-time, operations do not |
47 |
< |
* screw up the token index values. That is, an insert operation at token |
48 |
< |
* index i does not change the index values for tokens i+1..n-1. |
46 |
> |
* Since the operations are done lazily at toString-time, operations do not |
47 |
> |
* screw up the token index values. That is, an insert operation at token |
48 |
> |
* index i does not change the index values for tokens i+1..n-1. |
49 |
|
* |
50 |
< |
* Because operations never actually alter the buffer, you may always get |
51 |
< |
* the original token stream back without undoing anything. Since |
52 |
< |
* the instructions are queued up, you can easily simulate transactions and |
53 |
< |
* roll back any changes if there is an error just by removing instructions. |
54 |
< |
* For example, |
50 |
> |
* Because operations never actually alter the buffer, you may always get |
51 |
> |
* the original token stream back without undoing anything. Since |
52 |
> |
* the instructions are queued up, you can easily simulate transactions and |
53 |
> |
* roll back any changes if there is an error just by removing instructions. |
54 |
> |
* For example, |
55 |
|
* |
56 |
< |
* TokenStreamRewriteEngine rewriteEngine = |
57 |
< |
* new TokenStreamRewriteEngine(lexer); |
58 |
< |
* JavaRecognizer parser = new JavaRecognizer(rewriteEngine); |
59 |
< |
* ... |
60 |
< |
* rewriteEngine.insertAfter("pass1", t, "foobar");} |
61 |
< |
* rewriteEngine.insertAfter("pass2", u, "start");} |
62 |
< |
* System.out.println(rewriteEngine.toString("pass1")); |
63 |
< |
* System.out.println(rewriteEngine.toString("pass2")); |
64 |
< |
* |
65 |
< |
* You can also have multiple "instruction streams" and get multiple |
66 |
< |
* rewrites from a single pass over the input. Just name the instruction |
67 |
< |
* streams and use that name again when printing the buffer. This could be |
68 |
< |
* useful for generating a C file and also its header file--all from the |
69 |
< |
* same buffer. |
56 |
> |
* TokenStreamRewriteEngine rewriteEngine = |
57 |
> |
* new TokenStreamRewriteEngine(lexer); |
58 |
> |
* JavaRecognizer parser = new JavaRecognizer(rewriteEngine); |
59 |
> |
* ... |
60 |
> |
* rewriteEngine.insertAfter("pass1", t, "foobar");} |
61 |
> |
* rewriteEngine.insertAfter("pass2", u, "start");} |
62 |
> |
* System.out.println(rewriteEngine.toString("pass1")); |
63 |
> |
* System.out.println(rewriteEngine.toString("pass2")); |
64 |
|
* |
65 |
< |
* If you don't use named rewrite streams, a "default" stream is used. |
65 |
> |
* You can also have multiple "instruction streams" and get multiple |
66 |
> |
* rewrites from a single pass over the input. Just name the instruction |
67 |
> |
* streams and use that name again when printing the buffer. This could be |
68 |
> |
* useful for generating a C file and also its header file--all from the |
69 |
> |
* same buffer. |
70 |
|
* |
71 |
< |
* Terence Parr, parrt@cs.usfca.edu |
72 |
< |
* University of San Francisco |
73 |
< |
* February 2004 |
71 |
> |
* If you don't use named rewrite streams, a "default" stream is used. |
72 |
> |
* |
73 |
> |
* Terence Parr, parrt@cs.usfca.edu |
74 |
> |
* University of San Francisco |
75 |
> |
* February 2004 |
76 |
|
*/ |
77 |
|
class TokenStreamRewriteEngine : public TokenStream |
78 |
|
{ |
79 |
|
public: |
80 |
|
typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list; |
81 |
– |
|
82 |
– |
static const size_t MIN_TOKEN_INDEX = 0; |
81 |
|
static const char* DEFAULT_PROGRAM_NAME; |
82 |
< |
static const int PROGRAM_INIT_SIZE = 100; |
82 |
> |
#ifndef NO_STATIC_CONSTS |
83 |
> |
static const size_t MIN_TOKEN_INDEX; |
84 |
> |
static const int PROGRAM_INIT_SIZE; |
85 |
> |
#else |
86 |
> |
enum { |
87 |
> |
MIN_TOKEN_INDEX = 0, |
88 |
> |
PROGRAM_INIT_SIZE = 100 |
89 |
> |
}; |
90 |
> |
#endif |
91 |
|
|
92 |
|
struct tokenToStream { |
93 |
|
tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {} |
108 |
|
{ |
109 |
|
} |
110 |
|
/** Execute the rewrite operation by possibly adding to the buffer. |
111 |
< |
* Return the index of the next token to operate on. |
111 |
> |
* Return the index of the next token to operate on. |
112 |
|
*/ |
113 |
|
virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) { |
114 |
|
return index; |
197 |
|
} |
198 |
|
|
199 |
|
/** Rollback the instruction stream for a program so that |
200 |
< |
* the indicated instruction (via instructionIndex) is no |
201 |
< |
* longer in the stream. UNTESTED! |
200 |
> |
* the indicated instruction (via instructionIndex) is no |
201 |
> |
* longer in the stream. UNTESTED! |
202 |
|
*/ |
203 |
|
void rollback(const ANTLR_USE_NAMESPACE(std)string& programName, |
204 |
|
size_t instructionIndex ); |
230 |
|
} |
231 |
|
|
232 |
|
void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName, |
233 |
< |
size_t index, |
233 |
> |
size_t index, |
234 |
|
const ANTLR_USE_NAMESPACE(std)string& text ) |
235 |
|
{ |
236 |
|
// to insert after, just insert before next index (even if past end) |
353 |
|
size_t start, size_t end ) const; |
354 |
|
|
355 |
|
void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { |
356 |
< |
return toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); |
356 |
> |
toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); |
357 |
|
} |
358 |
|
|
359 |
|
void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, |
360 |
|
const ANTLR_USE_NAMESPACE(std)string& programName ) const |
361 |
|
{ |
362 |
< |
return toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize()); |
362 |
> |
toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize()); |
363 |
|
} |
364 |
|
|
365 |
|
void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, |
366 |
|
size_t start, size_t end ) const |
367 |
|
{ |
368 |
< |
return toStream(out, DEFAULT_PROGRAM_NAME, start, end); |
368 |
> |
toStream(out, DEFAULT_PROGRAM_NAME, start, end); |
369 |
|
} |
370 |
|
|
371 |
|
void toStream( ANTLR_USE_NAMESPACE(std)ostream& out, |
373 |
|
size_t firstToken, size_t lastToken ) const; |
374 |
|
|
375 |
|
void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const { |
376 |
< |
return toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); |
376 |
> |
toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize()); |
377 |
|
} |
378 |
|
|
379 |
|
void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out, |
405 |
|
|
406 |
|
protected: |
407 |
|
/** If op.index > lastRewriteTokenIndexes, just add to the end. |
408 |
< |
* Otherwise, do linear */ |
408 |
> |
* Otherwise, do linear */ |
409 |
|
void addToSortedRewriteList(RewriteOperation* op) { |
410 |
|
addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op); |
411 |
|
} |