ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/OpenMD/trunk/src/antlr/TokenStreamRewriteEngine.hpp
Revision: 1558
Committed: Wed May 11 16:32:48 2011 UTC (13 years, 11 months ago) by gezelter
File size: 12588 byte(s)
Log Message:
Updated antlr, some minor formatting changes

File Contents

# User Rev Content
1 tim 770 #ifndef INC_TokenStreamRewriteEngine_hpp__
2     #define INC_TokenStreamRewriteEngine_hpp__
3    
4     /* ANTLR Translator Generator
5     * Project led by Terence Parr at http://www.jGuru.com
6     * Software rights: http://www.antlr.org/license.html
7     */
8    
9     #include <string>
10     #include <list>
11     #include <vector>
12     #include <map>
13     #include <utility>
14     #include <iostream>
15     #include <iterator>
16     #include <cassert>
17     #include <algorithm>
18    
19     #include <antlr/config.hpp>
20    
21     #include <antlr/TokenStream.hpp>
22     #include <antlr/TokenWithIndex.hpp>
23     #include <antlr/BitSet.hpp>
24    
25     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
26     namespace antlr {
27     #endif
28    
29     /** This token stream tracks the *entire* token stream coming from
30 gezelter 1558 * a lexer, but does not pass on the whitespace (or whatever else
31     * you want to discard) to the parser.
32 tim 770 *
33 gezelter 1558 * This class can then be asked for the ith token in the input stream.
34     * Useful for dumping out the input stream exactly after doing some
35     * augmentation or other manipulations. Tokens are index from 0..n-1
36 tim 770 *
37 gezelter 1558 * You can insert stuff, replace, and delete chunks. Note that the
38     * operations are done lazily--only if you convert the buffer to a
39     * String. This is very efficient because you are not moving data around
40     * all the time. As the buffer of tokens is converted to strings, the
41     * toString() method(s) check to see if there is an operation at the
42     * current index. If so, the operation is done and then normal String
43     * rendering continues on the buffer. This is like having multiple Turing
44     * machine instruction streams (programs) operating on a single input tape. :)
45 tim 770 *
46 gezelter 1558 * Since the operations are done lazily at toString-time, operations do not
47     * screw up the token index values. That is, an insert operation at token
48     * index i does not change the index values for tokens i+1..n-1.
49 tim 770 *
50 gezelter 1558 * Because operations never actually alter the buffer, you may always get
51     * the original token stream back without undoing anything. Since
52     * the instructions are queued up, you can easily simulate transactions and
53     * roll back any changes if there is an error just by removing instructions.
54     * For example,
55 tim 770 *
56 gezelter 1558 * TokenStreamRewriteEngine rewriteEngine =
57     * new TokenStreamRewriteEngine(lexer);
58     * JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
59     * ...
60     * rewriteEngine.insertAfter("pass1", t, "foobar");}
61     * rewriteEngine.insertAfter("pass2", u, "start");}
62     * System.out.println(rewriteEngine.toString("pass1"));
63     * System.out.println(rewriteEngine.toString("pass2"));
64 tim 770 *
65 gezelter 1558 * You can also have multiple "instruction streams" and get multiple
66     * rewrites from a single pass over the input. Just name the instruction
67     * streams and use that name again when printing the buffer. This could be
68     * useful for generating a C file and also its header file--all from the
69     * same buffer.
70 tim 770 *
71 gezelter 1558 * If you don't use named rewrite streams, a "default" stream is used.
72 tim 770 *
73 gezelter 1558 * Terence Parr, parrt@cs.usfca.edu
74     * University of San Francisco
75     * February 2004
76 tim 770 */
77     class TokenStreamRewriteEngine : public TokenStream
78     {
79     public:
80     typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list;
81     static const char* DEFAULT_PROGRAM_NAME;
82 gezelter 1558 #ifndef NO_STATIC_CONSTS
83     static const size_t MIN_TOKEN_INDEX;
84     static const int PROGRAM_INIT_SIZE;
85     #else
86     enum {
87     MIN_TOKEN_INDEX = 0,
88     PROGRAM_INIT_SIZE = 100
89     };
90     #endif
91 tim 770
92     struct tokenToStream {
93     tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {}
94     template <typename T> void operator() ( const T& t ) {
95     out << t->getText();
96     }
97     ANTLR_USE_NAMESPACE(std)ostream& out;
98     };
99    
100     class RewriteOperation {
101     protected:
102     RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt )
103     : index(idx), text(txt)
104     {
105     }
106     public:
107     virtual ~RewriteOperation()
108     {
109     }
110     /** Execute the rewrite operation by possibly adding to the buffer.
111 gezelter 1558 * Return the index of the next token to operate on.
112 tim 770 */
113     virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) {
114     return index;
115     }
116     virtual size_t getIndex() const {
117     return index;
118     }
119     virtual const char* type() const {
120     return "RewriteOperation";
121     }
122     protected:
123     size_t index;
124     ANTLR_USE_NAMESPACE(std)string text;
125     };
126    
127     struct executeOperation {
128     ANTLR_USE_NAMESPACE(std)ostream& out;
129     executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {}
130     void operator () ( RewriteOperation* t ) {
131     t->execute(out);
132     }
133     };
134    
135     /// list of rewrite operations
136     typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list;
137     /// map program name to <program counter,program> tuple
138     typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map;
139    
140     class InsertBeforeOp : public RewriteOperation
141     {
142     public:
143     InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text )
144     : RewriteOperation(index, text)
145     {
146     }
147     virtual ~InsertBeforeOp() {}
148     virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out )
149     {
150     out << text;
151     return index;
152     }
153     virtual const char* type() const {
154     return "InsertBeforeOp";
155     }
156     };
157    
158     class ReplaceOp : public RewriteOperation
159     {
160     public:
161     ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text)
162     : RewriteOperation(from,text)
163     , lastIndex(to)
164     {
165     }
166     virtual ~ReplaceOp() {}
167     virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) {
168     out << text;
169     return lastIndex+1;
170     }
171     virtual const char* type() const {
172     return "ReplaceOp";
173     }
174     protected:
175     size_t lastIndex;
176     };
177    
178     class DeleteOp : public ReplaceOp {
179     public:
180     DeleteOp(size_t from, size_t to)
181     : ReplaceOp(from,to,"")
182     {
183     }
184     virtual const char* type() const {
185     return "DeleteOp";
186     }
187     };
188    
189     TokenStreamRewriteEngine(TokenStream& upstream);
190    
191     TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize);
192    
193     RefToken nextToken( void );
194    
195     void rollback(size_t instructionIndex) {
196     rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
197     }
198    
199     /** Rollback the instruction stream for a program so that
200 gezelter 1558 * the indicated instruction (via instructionIndex) is no
201     * longer in the stream. UNTESTED!
202 tim 770 */
203     void rollback(const ANTLR_USE_NAMESPACE(std)string& programName,
204     size_t instructionIndex );
205    
206     void deleteProgram() {
207     deleteProgram(DEFAULT_PROGRAM_NAME);
208     }
209    
210     /** Reset the program so that no instructions exist */
211     void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) {
212     rollback(programName, MIN_TOKEN_INDEX);
213     }
214    
215     void insertAfter( RefTokenWithIndex t,
216     const ANTLR_USE_NAMESPACE(std)string& text )
217     {
218     insertAfter(DEFAULT_PROGRAM_NAME, t, text);
219     }
220    
221     void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
222     insertAfter(DEFAULT_PROGRAM_NAME, index, text);
223     }
224    
225     void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
226     RefTokenWithIndex t,
227     const ANTLR_USE_NAMESPACE(std)string& text )
228     {
229     insertAfter(programName, t->getIndex(), text);
230     }
231    
232     void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
233 gezelter 1558 size_t index,
234 tim 770 const ANTLR_USE_NAMESPACE(std)string& text )
235     {
236     // to insert after, just insert before next index (even if past end)
237     insertBefore(programName,index+1, text);
238     }
239    
240     void insertBefore( RefTokenWithIndex t,
241     const ANTLR_USE_NAMESPACE(std)string& text )
242     {
243     // std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl;
244     insertBefore(DEFAULT_PROGRAM_NAME, t, text);
245     }
246    
247     void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
248     insertBefore(DEFAULT_PROGRAM_NAME, index, text);
249     }
250    
251     void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
252     RefTokenWithIndex t,
253     const ANTLR_USE_NAMESPACE(std)string& text )
254     {
255     insertBefore(programName, t->getIndex(), text);
256     }
257    
258     void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
259     size_t index,
260     const ANTLR_USE_NAMESPACE(std)string& text )
261     {
262     addToSortedRewriteList(programName, new InsertBeforeOp(index,text));
263     }
264    
265     void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text)
266     {
267     replace(DEFAULT_PROGRAM_NAME, index, index, text);
268     }
269    
270     void replace( size_t from, size_t to,
271     const ANTLR_USE_NAMESPACE(std)string& text)
272     {
273     replace(DEFAULT_PROGRAM_NAME, from, to, text);
274     }
275    
276     void replace( RefTokenWithIndex indexT,
277     const ANTLR_USE_NAMESPACE(std)string& text )
278     {
279     replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text);
280     }
281    
282     void replace( RefTokenWithIndex from,
283     RefTokenWithIndex to,
284     const ANTLR_USE_NAMESPACE(std)string& text )
285     {
286     replace(DEFAULT_PROGRAM_NAME, from, to, text);
287     }
288    
289     void replace(const ANTLR_USE_NAMESPACE(std)string& programName,
290     size_t from, size_t to,
291     const ANTLR_USE_NAMESPACE(std)string& text )
292     {
293     addToSortedRewriteList(programName,new ReplaceOp(from, to, text));
294     }
295    
296     void replace( const ANTLR_USE_NAMESPACE(std)string& programName,
297     RefTokenWithIndex from,
298     RefTokenWithIndex to,
299     const ANTLR_USE_NAMESPACE(std)string& text )
300     {
301     replace(programName,
302     from->getIndex(),
303     to->getIndex(),
304     text);
305     }
306    
307     void remove(size_t index) {
308     remove(DEFAULT_PROGRAM_NAME, index, index);
309     }
310    
311     void remove(size_t from, size_t to) {
312     remove(DEFAULT_PROGRAM_NAME, from, to);
313     }
314    
315     void remove(RefTokenWithIndex indexT) {
316     remove(DEFAULT_PROGRAM_NAME, indexT, indexT);
317     }
318    
319     void remove(RefTokenWithIndex from, RefTokenWithIndex to) {
320     remove(DEFAULT_PROGRAM_NAME, from, to);
321     }
322    
323     void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
324     size_t from, size_t to)
325     {
326     replace(programName,from,to,"");
327     }
328    
329     void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
330     RefTokenWithIndex from, RefTokenWithIndex to )
331     {
332     replace(programName,from,to,"");
333     }
334    
335     void discard(int ttype) {
336     discardMask.add(ttype);
337     }
338    
339     RefToken getToken( size_t i )
340     {
341     return RefToken(tokens.at(i));
342     }
343    
344     size_t getTokenStreamSize() const {
345     return tokens.size();
346     }
347    
348     void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
349     ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) );
350     }
351    
352     void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out,
353     size_t start, size_t end ) const;
354    
355     void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
356 gezelter 1558 toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
357 tim 770 }
358    
359     void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
360     const ANTLR_USE_NAMESPACE(std)string& programName ) const
361     {
362 gezelter 1558 toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize());
363 tim 770 }
364    
365     void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
366     size_t start, size_t end ) const
367     {
368 gezelter 1558 toStream(out, DEFAULT_PROGRAM_NAME, start, end);
369 tim 770 }
370    
371     void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
372     const ANTLR_USE_NAMESPACE(std)string& programName,
373     size_t firstToken, size_t lastToken ) const;
374    
375     void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
376 gezelter 1558 toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
377 tim 770 }
378    
379     void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out,
380     size_t start, size_t end ) const;
381    
382     size_t getLastRewriteTokenIndex() const {
383     return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
384     }
385    
386     /** Return the last index for the program named programName
387     * return 0 if the program does not exist or the program is empty.
388     * (Note this is different from the java implementation that returns -1)
389     */
390     size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const {
391     program_map::const_iterator rewrites = programs.find(programName);
392    
393     if( rewrites == programs.end() )
394     return 0;
395    
396     const operation_list& prog = rewrites->second;
397     if( !prog.empty() )
398     {
399     operation_list::const_iterator last = prog.end();
400     --last;
401     return (*last)->getIndex();
402     }
403     return 0;
404     }
405    
406     protected:
407     /** If op.index > lastRewriteTokenIndexes, just add to the end.
408 gezelter 1558 * Otherwise, do linear */
409 tim 770 void addToSortedRewriteList(RewriteOperation* op) {
410     addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
411     }
412    
413     void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName,
414     RewriteOperation* op );
415    
416     protected:
417     /** Who do we suck tokens from? */
418     TokenStream& stream;
419     /** track index of tokens */
420     size_t index;
421    
422     /** Track the incoming list of tokens */
423     token_list tokens;
424    
425     /** You may have multiple, named streams of rewrite operations.
426     * I'm calling these things "programs."
427     * Maps String (name) -> rewrite (List)
428     */
429     program_map programs;
430    
431     /** Which (whitespace) token(s) to throw out */
432     BitSet discardMask;
433     };
434    
435     #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
436     }
437     #endif
438    
439     #endif

Properties

Name Value
svn:keywords Author Id Revision Date