KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > antlr > CodeGenerator


1 package antlr;
2
3 /* ANTLR Translator Generator
4  * Project led by Terence Parr at http://www.jGuru.com
5  * Software rights: http://www.antlr.org/RIGHTS.html
6  *
7  * $Id: //depot/code/org.antlr/main/main/antlr/CodeGenerator.java#10 $
8  */

9
10 import java.io.PrintWriter JavaDoc;
11 import java.io.IOException JavaDoc;
12 import java.io.FileWriter JavaDoc;
13
14 import antlr.collections.impl.Vector;
15 import antlr.collections.impl.BitSet;
16
17 /**A generic ANTLR code generator. All code generators
18  * Derive from this class.
19  *
20  * <p>
21  * A CodeGenerator knows about a Grammar data structure and
22  * a grammar analyzer. The Grammar is walked to generate the
23  * appropriate code for both a parser and lexer (if present).
24  * This interface may change slightly so that the lexer is
25  * itself living inside of a Grammar object (in which case,
26  * this class generates only one recognizer). The main method
27  * to call is <tt>gen()</tt>, which initiates all code gen.
28  *
29  * <p>
30  * The interaction of the code generator with the analyzer is
31  * simple: each subrule block calls deterministic() before generating
32  * code for the block. Method deterministic() sets lookahead caches
33  * in each Alternative object. Technically, a code generator
34  * doesn't need the grammar analyzer if all lookahead analysis
35  * is done at runtime, but this would result in a slower parser.
36  *
37  * <p>
38  * This class provides a set of support utilities to handle argument
39  * list parsing and so on.
40  *
41  * @author Terence Parr, John Lilley
42  * @version 2.00a
43  * @see antlr.JavaCodeGenerator
44  * @see antlr.DiagnosticCodeGenerator
45  * @see antlr.LLkAnalyzer
46  * @see antlr.Grammar
47  * @see antlr.AlternativeElement
48  * @see antlr.Lookahead
49  */

50 public abstract class CodeGenerator {
51     protected antlr.Tool antlrTool;
52
53     /** Current tab indentation for code output */
54     protected int tabs = 0;
55
56     /** Current output Stream */
57     transient protected PrintWriter JavaDoc currentOutput; // SAS: for proper text i/o
58

59     /** The grammar for which we generate code */
60     protected Grammar grammar = null;
61
62     /** List of all bitsets that must be dumped. These are Vectors of BitSet. */
63     protected Vector bitsetsUsed;
64
65     /** The grammar behavior */
66     protected DefineGrammarSymbols behavior;
67
68     /** The LLk analyzer */
69     protected LLkGrammarAnalyzer analyzer;
70
71     /** Object used to format characters in the target language.
72      * subclass must initialize this to the language-specific formatter
73      */

74     protected CharFormatter charFormatter;
75
76     /** Use option "codeGenDebug" to generate debugging output */
77     protected boolean DEBUG_CODE_GENERATOR = false;
78
79     /** Default values for code-generation thresholds */
80     protected static final int DEFAULT_MAKE_SWITCH_THRESHOLD = 2;
81     protected static final int DEFAULT_BITSET_TEST_THRESHOLD = 4;
82
83     /** If there are more than 8 long words to init in a bitset,
84      * try to optimize it; e.g., detect runs of -1L and 0L.
85      */

86     protected static final int BITSET_OPTIMIZE_INIT_THRESHOLD = 8;
87
88     /** This is a hint for the language-specific code generator.
89      * A switch() or language-specific equivalent will be generated instead
90      * of a series of if/else statements for blocks with number of alternates
91      * greater than or equal to this number of non-predicated LL(1) alternates.
92      * This is modified by the grammar option "codeGenMakeSwitchThreshold"
93      */

94     protected int makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
95
96     /** This is a hint for the language-specific code generator.
97      * A bitset membership test will be generated instead of an
98      * ORed series of LA(k) comparisions for lookahead sets with
99      * degree greater than or equal to this value.
100      * This is modified by the grammar option "codeGenBitsetTestThreshold"
101      */

102     protected int bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
103
104     private static boolean OLD_ACTION_TRANSLATOR = true;
105
106     public static String JavaDoc TokenTypesFileSuffix = "TokenTypes";
107     public static String JavaDoc TokenTypesFileExt = ".txt";
108
109     /** Construct code generator base class */
110     public CodeGenerator() {
111     }
112
113     /** Output a String to the currentOutput stream.
114      * Ignored if string is null.
115      * @param s The string to output
116      */

117     protected void _print(String JavaDoc s) {
118         if (s != null) {
119             currentOutput.print(s);
120         }
121     }
122
123     /** Print an action without leading tabs, attempting to
124      * preserve the current indentation level for multi-line actions
125      * Ignored if string is null.
126      * @param s The action string to output
127      */

128     protected void _printAction(String JavaDoc s) {
129         if (s == null) {
130             return;
131         }
132
133         // Skip leading newlines, tabs and spaces
134
int start = 0;
135         while (start < s.length() && Character.isSpaceChar(s.charAt(start))) {
136             start++;
137         }
138
139         // Skip leading newlines, tabs and spaces
140
int end = s.length() - 1;
141         while (end > start && Character.isSpaceChar(s.charAt(end))) {
142             end--;
143         }
144
145         char c = 0;
146         for (int i = start; i <= end;) {
147             c = s.charAt(i);
148             i++;
149             boolean newline = false;
150             switch (c) {
151                 case '\n':
152                     newline = true;
153                     break;
154                 case '\r':
155                     if (i <= end && s.charAt(i) == '\n') {
156                         i++;
157                     }
158                     newline = true;
159                     break;
160                 default:
161                     currentOutput.print(c);
162                     break;
163             }
164             if (newline) {
165                 currentOutput.println();
166                 printTabs();
167                 // Absorb leading whitespace
168
while (i <= end && Character.isSpaceChar(s.charAt(i))) {
169                     i++;
170                 }
171                 newline = false;
172             }
173         }
174         currentOutput.println();
175     }
176
177     /** Output a String followed by newline, to the currentOutput stream.
178      * Ignored if string is null.
179      * @param s The string to output
180      */

181     protected void _println(String JavaDoc s) {
182         if (s != null) {
183             currentOutput.println(s);
184         }
185     }
186
187     /** Test if a set element array represents a contiguous range.
188      * @param elems The array of elements representing the set, usually from BitSet.toArray().
189      * @return true if the elements are a contiguous range (with two or more).
190      */

191     public static boolean elementsAreRange(int[] elems) {
192         if (elems.length == 0) {
193             return false;
194         }
195         int begin = elems[0];
196         int end = elems[elems.length - 1];
197         if (elems.length <= 2) {
198             // Not enough elements for a range expression
199
return false;
200         }
201         if (end - begin + 1 > elems.length) {
202             // The set does not represent a contiguous range
203
return false;
204         }
205         int v = begin + 1;
206         for (int i = 1; i < elems.length - 1; i++) {
207             if (v != elems[i]) {
208                 // The set does not represent a contiguous range
209
return false;
210             }
211             v++;
212         }
213         return true;
214     }
215
216     /** Get the identifier portion of an argument-action token.
217      * The ID of an action is assumed to be a trailing identifier.
218      * Specific code-generators may want to override this
219      * if the language has unusual declaration syntax.
220      * @param t The action token
221      * @return A string containing the text of the identifier
222      */

223     protected String JavaDoc extractIdOfAction(Token t) {
224         return extractIdOfAction(t.getText(), t.getLine(), t.getColumn());
225     }
226
227     /** Get the identifier portion of an argument-action.
228      * The ID of an action is assumed to be a trailing identifier.
229      * Specific code-generators may want to override this
230      * if the language has unusual declaration syntax.
231      * @param s The action text
232      * @param line Line used for error reporting.
233      * @param column Line used for error reporting.
234      * @return A string containing the text of the identifier
235      */

236     protected String JavaDoc extractIdOfAction(String JavaDoc s, int line, int column) {
237         s = removeAssignmentFromDeclaration(s);
238         // Search back from the end for a non alphanumeric. That marks the
239
// beginning of the identifier
240
for (int i = s.length() - 2; i >= 0; i--) {
241             // TODO: make this work for language-independent identifiers?
242
if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_') {
243                 // Found end of type part
244
return s.substring(i + 1);
245             }
246         }
247         // Something is bogus, but we cannot parse the language-specific
248
// actions any better. The compiler will have to catch the problem.
249
antlrTool.warning("Ill-formed action", grammar.getFilename(), line, column);
250         return "";
251     }
252
253     /** Get the type string out of an argument-action token.
254      * The type of an action is assumed to precede a trailing identifier
255      * Specific code-generators may want to override this
256      * if the language has unusual declaration syntax.
257      * @param t The action token
258      * @return A string containing the text of the type
259      */

260     protected String JavaDoc extractTypeOfAction(Token t) {
261         return extractTypeOfAction(t.getText(), t.getLine(), t.getColumn());
262     }
263
264     /** Get the type portion of an argument-action.
265      * The type of an action is assumed to precede a trailing identifier
266      * Specific code-generators may want to override this
267      * if the language has unusual declaration syntax.
268      * @param s The action text
269      * @param line Line used for error reporting.
270      * @return A string containing the text of the type
271      */

272     protected String JavaDoc extractTypeOfAction(String JavaDoc s, int line, int column) {
273         s = removeAssignmentFromDeclaration(s);
274         // Search back from the end for a non alphanumeric. That marks the
275
// beginning of the identifier
276
for (int i = s.length() - 2; i >= 0; i--) {
277             // TODO: make this work for language-independent identifiers?
278
if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_') {
279                 // Found end of type part
280
return s.substring(0, i + 1);
281             }
282         }
283         // Something is bogus, but we cannot parse the language-specific
284
// actions any better. The compiler will have to catch the problem.
285
antlrTool.warning("Ill-formed action", grammar.getFilename(), line, column);
286         return "";
287     }
288
289     /** Generate the code for all grammars
290      */

291     public abstract void gen();
292
293     /** Generate code for the given grammar element.
294      * @param action The {...} action to generate
295      */

296     public abstract void gen(ActionElement action);
297
298     /** Generate code for the given grammar element.
299      * @param blk The "x|y|z|..." block to generate
300      */

301     public abstract void gen(AlternativeBlock blk);
302
303     /** Generate code for the given grammar element.
304      * @param end The block-end element to generate. Block-end
305      * elements are synthesized by the grammar parser to represent
306      * the end of a block.
307      */

308     public abstract void gen(BlockEndElement end);
309
310     /** Generate code for the given grammar element.
311      * @param atom The character literal reference to generate
312      */

313     public abstract void gen(CharLiteralElement atom);
314
315     /** Generate code for the given grammar element.
316      * @param r The character-range reference to generate
317      */

318     public abstract void gen(CharRangeElement r);
319
320     /** Generate the code for a parser */
321     public abstract void gen(LexerGrammar g) throws IOException JavaDoc;
322
323     /** Generate code for the given grammar element.
324      * @param blk The (...)+ block to generate
325      */

326     public abstract void gen(OneOrMoreBlock blk);
327
328     /** Generate the code for a parser */
329     public abstract void gen(ParserGrammar g) throws IOException JavaDoc;
330
331     /** Generate code for the given grammar element.
332      * @param rr The rule-reference to generate
333      */

334     public abstract void gen(RuleRefElement rr);
335
336     /** Generate code for the given grammar element.
337      * @param atom The string-literal reference to generate
338      */

339     public abstract void gen(StringLiteralElement atom);
340
341     /** Generate code for the given grammar element.
342      * @param r The token-range reference to generate
343      */

344     public abstract void gen(TokenRangeElement r);
345
346     /** Generate code for the given grammar element.
347      * @param atom The token-reference to generate
348      */

349     public abstract void gen(TokenRefElement atom);
350
351     /** Generate code for the given grammar element.
352      * @param blk The tree to generate code for.
353      */

354     public abstract void gen(TreeElement t);
355
356     /** Generate the code for a parser */
357     public abstract void gen(TreeWalkerGrammar g) throws IOException JavaDoc;
358
359     /** Generate code for the given grammar element.
360      * @param wc The wildcard element to generate
361      */

362     public abstract void gen(WildcardElement wc);
363
364     /** Generate code for the given grammar element.
365      * @param blk The (...)* block to generate
366      */

367     public abstract void gen(ZeroOrMoreBlock blk);
368
369     /** Generate the token types as a text file for persistence across shared lexer/parser */
370     protected void genTokenInterchange(TokenManager tm) throws IOException JavaDoc {
371         // Open the token output Java file and set the currentOutput stream
372
String JavaDoc fName = tm.getName() + TokenTypesFileSuffix + TokenTypesFileExt;
373         currentOutput = antlrTool.openOutputFile(fName);
374
375         println("// $ANTLR " + antlrTool.version + ": " +
376                 antlrTool.fileMinusPath(antlrTool.grammarFile) +
377                 " -> " +
378                 fName +
379                 "$");
380
381         tabs = 0;
382
383         // Header
384
println(tm.getName() + " // output token vocab name");
385
386         // Generate a definition for each token type
387
Vector v = tm.getVocabulary();
388         for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
389             String JavaDoc s = (String JavaDoc)v.elementAt(i);
390             if (DEBUG_CODE_GENERATOR) {
391                 System.out.println("gen persistence file entry for: " + s);
392             }
393             if (s != null && !s.startsWith("<")) {
394                 // if literal, find label
395
if (s.startsWith("\"")) {
396                     StringLiteralSymbol sl = (StringLiteralSymbol)tm.getTokenSymbol(s);
397                     if (sl != null && sl.label != null) {
398                         print(sl.label + "=");
399                     }
400                     println(s + "=" + i);
401                 }
402                 else {
403                     print(s);
404                     // check for a paraphrase
405
TokenSymbol ts = (TokenSymbol)tm.getTokenSymbol(s);
406                     if (ts == null) {
407                         antlrTool.warning("undefined token symbol: " + s);
408                     }
409                     else {
410                         if (ts.getParaphrase() != null) {
411                             print("(" + ts.getParaphrase() + ")");
412                         }
413                     }
414                     println("=" + i);
415                 }
416             }
417         }
418
419         // Close the tokens output file
420
currentOutput.close();
421         currentOutput = null;
422     }
423
424     /** Process a string for an simple expression for use in xx/action.g
425      * it is used to cast simple tokens/references to the right type for
426      * the generated language.
427      * @param str A String.
428      */

429     public String JavaDoc processStringForASTConstructor(String JavaDoc str) {
430         return str;
431     }
432
433     /** Get a string for an expression to generate creation of an AST subtree.
434      * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
435      */

436     public abstract String JavaDoc getASTCreateString(Vector v);
437
438     /** Get a string for an expression to generate creating of an AST node
439      * @param str The text of the arguments to the AST construction
440      */

441     public abstract String JavaDoc getASTCreateString(GrammarAtom atom, String JavaDoc str);
442
443     /** Given the index of a bitset in the bitset list, generate a unique name.
444      * Specific code-generators may want to override this
445      * if the language does not allow '_' or numerals in identifiers.
446      * @param index The index of the bitset in the bitset list.
447      */

448     protected String JavaDoc getBitsetName(int index) {
449         return "_tokenSet_" + index;
450     }
451
452     public static String JavaDoc encodeLexerRuleName(String JavaDoc id) {
453         return "m" + id;
454     }
455
456     public static String JavaDoc decodeLexerRuleName(String JavaDoc id) {
457         if ( id==null ) {
458             return null;
459         }
460         return id.substring(1,id.length());
461     }
462
463     /** Map an identifier to it's corresponding tree-node variable.
464      * This is context-sensitive, depending on the rule and alternative
465      * being generated
466      * @param id The identifier name to map
467      * @param forInput true if the input tree node variable is to be returned, otherwise the output variable is returned.
468      * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
469      */

470     public abstract String JavaDoc mapTreeId(String JavaDoc id, ActionTransInfo tInfo);
471
472     /** Add a bitset to the list of bitsets to be generated.
473      * if the bitset is already in the list, ignore the request.
474      * Always adds the bitset to the end of the list, so the
475      * caller can rely on the position of bitsets in the list.
476      * The returned position can be used to format the bitset
477      * name, since it is invariant.
478      * @param p Bit set to mark for code generation
479      * @param forParser true if the bitset is used for the parser, false for the lexer
480      * @return The position of the bitset in the list.
481      */

482     protected int markBitsetForGen(BitSet p) {
483         // Is the bitset (or an identical one) already marked for gen?
484
for (int i = 0; i < bitsetsUsed.size(); i++) {
485             BitSet set = (BitSet)bitsetsUsed.elementAt(i);
486             if (p.equals(set)) {
487                 // Use the identical one already stored
488
return i;
489             }
490         }
491
492         // Add the new bitset
493
bitsetsUsed.appendElement(p.clone());
494         return bitsetsUsed.size() - 1;
495     }
496
497     /** Output tab indent followed by a String, to the currentOutput stream.
498      * Ignored if string is null.
499      * @param s The string to output.
500      */

501     protected void print(String JavaDoc s) {
502         if (s != null) {
503             printTabs();
504             currentOutput.print(s);
505         }
506     }
507
508     /** Print an action with leading tabs, attempting to
509      * preserve the current indentation level for multi-line actions
510      * Ignored if string is null.
511      * @param s The action string to output
512      */

513     protected void printAction(String JavaDoc s) {
514         if (s != null) {
515             printTabs();
516             _printAction(s);
517         }
518     }
519
520     /** Output tab indent followed by a String followed by newline,
521      * to the currentOutput stream. Ignored if string is null.
522      * @param s The string to output
523      */

524     protected void println(String JavaDoc s) {
525         if (s != null) {
526             printTabs();
527             currentOutput.println(s);
528         }
529     }
530
531     /** Output the current tab indentation. This outputs the number of tabs
532      * indicated by the "tabs" variable to the currentOutput stream.
533      */

534     protected void printTabs() {
535         for (int i = 1; i <= tabs; i++) {
536             currentOutput.print("\t");
537         }
538     }
539
540     /** Lexically process tree-specifiers in the action.
541      * This will replace #id and #(...) with the appropriate
542      * function calls and/or variables.
543      */

544     protected abstract String JavaDoc processActionForTreeSpecifiers(String JavaDoc actionStr, int line, RuleBlock currentRule, ActionTransInfo tInfo);
545
546     /**
547      * Remove the assignment portion of a declaration, if any.
548      * @param d the declaration
549      * @return the declaration without any assignment portion
550      */

551     protected String JavaDoc removeAssignmentFromDeclaration(String JavaDoc d) {
552         // If d contains an equal sign, then it's a declaration
553
// with an initialization. Strip off the initialization part.
554
if (d.indexOf('=') >= 0) d = d.substring(0, d.indexOf('=')).trim();
555         return d;
556     }
557
558     /** Set all fields back like one just created */
559     private void reset() {
560         tabs = 0;
561         // Allocate list of bitsets tagged for code generation
562
bitsetsUsed = new Vector();
563         currentOutput = null;
564         grammar = null;
565         DEBUG_CODE_GENERATOR = false;
566         makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
567         bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
568     }
569
570     public static String JavaDoc reverseLexerRuleName(String JavaDoc id) {
571         return id.substring(1, id.length());
572     }
573
574     public void setAnalyzer(LLkGrammarAnalyzer analyzer_) {
575         analyzer = analyzer_;
576     }
577
578     public void setBehavior(DefineGrammarSymbols behavior_) {
579         behavior = behavior_;
580     }
581
582     /** Set a grammar for the code generator to use */
583     protected void setGrammar(Grammar g) {
584         reset();
585         grammar = g;
586         // Lookup make-switch threshold in the grammar generic options
587
if (grammar.hasOption("codeGenMakeSwitchThreshold")) {
588             try {
589                 makeSwitchThreshold = grammar.getIntegerOption("codeGenMakeSwitchThreshold");
590                 //System.out.println("setting codeGenMakeSwitchThreshold to " + makeSwitchThreshold);
591
}
592             catch (NumberFormatException JavaDoc e) {
593                 Token tok = grammar.getOption("codeGenMakeSwitchThreshold");
594                 antlrTool.error(
595                     "option 'codeGenMakeSwitchThreshold' must be an integer",
596                     grammar.getClassName(),
597                     tok.getLine(), tok.getColumn()
598                 );
599             }
600         }
601
602         // Lookup bitset-test threshold in the grammar generic options
603
if (grammar.hasOption("codeGenBitsetTestThreshold")) {
604             try {
605                 bitsetTestThreshold = grammar.getIntegerOption("codeGenBitsetTestThreshold");
606                 //System.out.println("setting codeGenBitsetTestThreshold to " + bitsetTestThreshold);
607
}
608             catch (NumberFormatException JavaDoc e) {
609                 Token tok = grammar.getOption("codeGenBitsetTestThreshold");
610                 antlrTool.error(
611                     "option 'codeGenBitsetTestThreshold' must be an integer",
612                     grammar.getClassName(),
613                     tok.getLine(), tok.getColumn()
614                 );
615             }
616         }
617
618         // Lookup debug code-gen in the grammar generic options
619
if (grammar.hasOption("codeGenDebug")) {
620             Token t = grammar.getOption("codeGenDebug");
621             if (t.getText().equals("true")) {
622                 //System.out.println("setting code-generation debug ON");
623
DEBUG_CODE_GENERATOR = true;
624             }
625             else if (t.getText().equals("false")) {
626                 //System.out.println("setting code-generation debug OFF");
627
DEBUG_CODE_GENERATOR = false;
628             }
629             else {
630                 antlrTool.error("option 'codeGenDebug' must be true or false", grammar.getClassName(), t.getLine(), t.getColumn());
631             }
632         }
633     }
634
635     public void setTool(Tool tool) {
636         antlrTool = tool;
637     }
638 }
639
Popular Tags