CodeGenerator


1   package antlr;
2   
3   /* ANTLR Translator Generator
4    * Project led by Terence Parr at http://www.jGuru.com
5    * Software rights: http://www.antlr.org/RIGHTS.html
6    *
7    * $Id: //depot/code/org.antlr/main/main/antlr/CodeGenerator.java#10 $
8    */
9   
10  import java.io.PrintWriter  ;
11  import java.io.IOException  ;
12  import java.io.FileWriter  ;
13  
14  import antlr.collections.impl.Vector;
15  import antlr.collections.impl.BitSet;
16  
17  /**A generic ANTLR code generator.  All code generators
18   * Derive from this class.
19   *
20   * <p>
21   * A CodeGenerator knows about a Grammar data structure and
22   * a grammar analyzer.  The Grammar is walked to generate the
23   * appropriate code for both a parser and lexer (if present).
24   * This interface may change slightly so that the lexer is
25   * itself living inside of a Grammar object (in which case,
26   * this class generates only one recognizer).  The main method
27   * to call is <tt>gen()</tt>, which initiates all code gen.
28   *
29   * <p>
30   * The interaction of the code generator with the analyzer is
31   * simple: each subrule block calls deterministic() before generating
32   * code for the block.  Method deterministic() sets lookahead caches
33   * in each Alternative object.  Technically, a code generator
34   * doesn't need the grammar analyzer if all lookahead analysis
35   * is done at runtime, but this would result in a slower parser.
36   *
37   * <p>
38   * This class provides a set of support utilities to handle argument
39   * list parsing and so on.
40   *
41   * @author  Terence Parr, John Lilley
42   * @version 2.00a
43   * @see     antlr.JavaCodeGenerator
44   * @see     antlr.DiagnosticCodeGenerator
45   * @see     antlr.LLkAnalyzer
46   * @see     antlr.Grammar
47   * @see     antlr.AlternativeElement
48   * @see     antlr.Lookahead
49   */
50  public abstract class CodeGenerator {
51      protected antlr.Tool antlrTool;
52  
53      /** Current tab indentation for code output */
54      protected int tabs = 0;
55  
56      /** Current output Stream */
57      transient protected PrintWriter   currentOutput; // SAS: for proper text i/o
58  
59      /** The grammar for which we generate code */
60      protected Grammar grammar = null;
61  
62      /** List of all bitsets that must be dumped.  These are Vectors of BitSet. */
63      protected Vector bitsetsUsed;
64  
65      /** The grammar behavior */
66      protected DefineGrammarSymbols behavior;
67  
68      /** The LLk analyzer */
69      protected LLkGrammarAnalyzer analyzer;
70  
71      /** Object used to format characters in the target language.
72       * subclass must initialize this to the language-specific formatter
73       */
74      protected CharFormatter charFormatter;
75  
76      /** Use option "codeGenDebug" to generate debugging output */
77      protected boolean DEBUG_CODE_GENERATOR = false;
78  
79      /** Default values for code-generation thresholds */
80      protected static final int DEFAULT_MAKE_SWITCH_THRESHOLD = 2;
81      protected static final int DEFAULT_BITSET_TEST_THRESHOLD = 4;
82  
83      /** If there are more than 8 long words to init in a bitset,
84       *  try to optimize it; e.g., detect runs of -1L and 0L.
85       */
86      protected static final int BITSET_OPTIMIZE_INIT_THRESHOLD = 8;
87  
88      /** This is a hint for the language-specific code generator.
89       * A switch() or language-specific equivalent will be generated instead
90       * of a series of if/else statements for blocks with number of alternates
91       * greater than or equal to this number of non-predicated LL(1) alternates.
92       * This is modified by the grammar option "codeGenMakeSwitchThreshold"
93       */
94      protected int makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
95  
96      /** This is a hint for the language-specific code generator.
97       * A bitset membership test will be generated instead of an
98       * ORed series of LA(k) comparisions for lookahead sets with
99       * degree greater than or equal to this value.
100      * This is modified by the grammar option "codeGenBitsetTestThreshold"
101      */
102     protected int bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
103 
104     private static boolean OLD_ACTION_TRANSLATOR = true;
105 
106     public static String   TokenTypesFileSuffix = "TokenTypes";
107     public static String   TokenTypesFileExt = ".txt";
108 
109     /** Construct code generator base class */
110     public CodeGenerator() {
111     }
112 
113     /** Output a String to the currentOutput stream.
114      * Ignored if string is null.
115      * @param s The string to output
116      */
117     protected void _print(String   s) {
118         if (s != null) {
119             currentOutput.print(s);
120         }
121     }
122 
123     /** Print an action without leading tabs, attempting to
124      * preserve the current indentation level for multi-line actions
125      * Ignored if string is null.
126      * @param s The action string to output
127      */
128     protected void _printAction(String   s) {
129         if (s == null) {
130             return;
131         }
132 
133         // Skip leading newlines, tabs and spaces
134         int start = 0;
135         while (start < s.length() && Character.isSpaceChar(s.charAt(start))) {
136             start++;
137         }
138 
139         // Skip leading newlines, tabs and spaces
140         int end = s.length() - 1;
141         while (end > start && Character.isSpaceChar(s.charAt(end))) {
142             end--;
143         }
144 
145         char c = 0;
146         for (int i = start; i <= end;) {
147             c = s.charAt(i);
148             i++;
149             boolean newline = false;
150             switch (c) {
151                 case '\n':
152                     newline = true;
153                     break;
154                 case '\r':
155                     if (i <= end && s.charAt(i) == '\n') {
156                         i++;
157                     }
158                     newline = true;
159                     break;
160                 default:
161                     currentOutput.print(c);
162                     break;
163             }
164             if (newline) {
165                 currentOutput.println();
166                 printTabs();
167                 // Absorb leading whitespace
168                 while (i <= end && Character.isSpaceChar(s.charAt(i))) {
169                     i++;
170                 }
171                 newline = false;
172             }
173         }
174         currentOutput.println();
175     }
176 
177     /** Output a String followed by newline, to the currentOutput stream.
178      * Ignored if string is null.
179      * @param s The string to output
180      */
181     protected void _println(String   s) {
182         if (s != null) {
183             currentOutput.println(s);
184         }
185     }
186 
187     /** Test if a set element array represents a contiguous range.
188      * @param elems The array of elements representing the set, usually from BitSet.toArray().
189      * @return true if the elements are a contiguous range (with two or more).
190      */
191     public static boolean elementsAreRange(int[] elems) {
192         if (elems.length == 0) {
193             return false;
194         }
195         int begin = elems[0];
196         int end = elems[elems.length - 1];
197         if (elems.length <= 2) {
198             // Not enough elements for a range expression
199             return false;
200         }
201         if (end - begin + 1 > elems.length) {
202             // The set does not represent a contiguous range
203             return false;
204         }
205         int v = begin + 1;
206         for (int i = 1; i < elems.length - 1; i++) {
207             if (v != elems[i]) {
208                 // The set does not represent a contiguous range
209                 return false;
210             }
211             v++;
212         }
213         return true;
214     }
215 
216     /** Get the identifier portion of an argument-action token.
217      * The ID of an action is assumed to be a trailing identifier.
218      * Specific code-generators may want to override this
219      * if the language has unusual declaration syntax.
220      * @param t The action token
221      * @return A string containing the text of the identifier
222      */
223     protected String   extractIdOfAction(Token t) {
224         return extractIdOfAction(t.getText(), t.getLine(), t.getColumn());
225     }
226 
227     /** Get the identifier portion of an argument-action.
228      * The ID of an action is assumed to be a trailing identifier.
229      * Specific code-generators may want to override this
230      * if the language has unusual declaration syntax.
231      * @param s The action text
232      * @param line Line used for error reporting.
233      * @param column Line used for error reporting.
234      * @return A string containing the text of the identifier
235      */
236     protected String   extractIdOfAction(String   s, int line, int column) {
237         s = removeAssignmentFromDeclaration(s);
238         // Search back from the end for a non alphanumeric.  That marks the
239         // beginning of the identifier
240         for (int i = s.length() - 2; i >= 0; i--) {
241             // TODO: make this work for language-independent identifiers?
242             if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_') {
243                 // Found end of type part
244                 return s.substring(i + 1);
245             }
246         }
247         // Something is bogus, but we cannot parse the language-specific
248         // actions any better.  The compiler will have to catch the problem.
249         antlrTool.warning("Ill-formed action", grammar.getFilename(), line, column);
250         return "";
251     }
252 
253     /** Get the type string out of an argument-action token.
254      * The type of an action is assumed to precede a trailing identifier
255      * Specific code-generators may want to override this
256      * if the language has unusual declaration syntax.
257      * @param t The action token
258      * @return A string containing the text of the type
259      */
260     protected String   extractTypeOfAction(Token t) {
261         return extractTypeOfAction(t.getText(), t.getLine(), t.getColumn());
262     }
263 
264     /** Get the type portion of an argument-action.
265      * The type of an action is assumed to precede a trailing identifier
266      * Specific code-generators may want to override this
267      * if the language has unusual declaration syntax.
268      * @param s The action text
269      * @param line Line used for error reporting.
270      * @return A string containing the text of the type
271      */
272     protected String   extractTypeOfAction(String   s, int line, int column) {
273         s = removeAssignmentFromDeclaration(s);
274         // Search back from the end for a non alphanumeric.  That marks the
275         // beginning of the identifier
276         for (int i = s.length() - 2; i >= 0; i--) {
277             // TODO: make this work for language-independent identifiers?
278             if (!Character.isLetterOrDigit(s.charAt(i)) && s.charAt(i) != '_') {
279                 // Found end of type part
280                 return s.substring(0, i + 1);
281             }
282         }
283         // Something is bogus, but we cannot parse the language-specific
284         // actions any better.  The compiler will have to catch the problem.
285         antlrTool.warning("Ill-formed action", grammar.getFilename(), line, column);
286         return "";
287     }
288 
289     /** Generate the code for all grammars
290      */
291     public abstract void gen();
292 
293     /** Generate code for the given grammar element.
294      * @param action The {...} action to generate
295      */
296     public abstract void gen(ActionElement action);
297 
298     /** Generate code for the given grammar element.
299      * @param blk The "x|y|z|..." block to generate
300      */
301     public abstract void gen(AlternativeBlock blk);
302 
303     /** Generate code for the given grammar element.
304      * @param end The block-end element to generate.  Block-end
305      * elements are synthesized by the grammar parser to represent
306      * the end of a block.
307      */
308     public abstract void gen(BlockEndElement end);
309 
310     /** Generate code for the given grammar element.
311      * @param atom The character literal reference to generate
312      */
313     public abstract void gen(CharLiteralElement atom);
314 
315     /** Generate code for the given grammar element.
316      * @param r The character-range reference to generate
317      */
318     public abstract void gen(CharRangeElement r);
319 
320     /** Generate the code for a parser */
321     public abstract void gen(LexerGrammar g) throws IOException  ;
322 
323     /** Generate code for the given grammar element.
324      * @param blk The (...)+ block to generate
325      */
326     public abstract void gen(OneOrMoreBlock blk);
327 
328     /** Generate the code for a parser */
329     public abstract void gen(ParserGrammar g) throws IOException  ;
330 
331     /** Generate code for the given grammar element.
332      * @param rr The rule-reference to generate
333      */
334     public abstract void gen(RuleRefElement rr);
335 
336     /** Generate code for the given grammar element.
337      * @param atom The string-literal reference to generate
338      */
339     public abstract void gen(StringLiteralElement atom);
340 
341     /** Generate code for the given grammar element.
342      * @param r The token-range reference to generate
343      */
344     public abstract void gen(TokenRangeElement r);
345 
346     /** Generate code for the given grammar element.
347      * @param atom The token-reference to generate
348      */
349     public abstract void gen(TokenRefElement atom);
350 
351     /** Generate code for the given grammar element.
352      * @param blk The tree to generate code for.
353      */
354     public abstract void gen(TreeElement t);
355 
356     /** Generate the code for a parser */
357     public abstract void gen(TreeWalkerGrammar g) throws IOException  ;
358 
359     /** Generate code for the given grammar element.
360      * @param wc The wildcard element to generate
361      */
362     public abstract void gen(WildcardElement wc);
363 
364     /** Generate code for the given grammar element.
365      * @param blk The (...)* block to generate
366      */
367     public abstract void gen(ZeroOrMoreBlock blk);
368 
369     /** Generate the token types as a text file for persistence across shared lexer/parser */
370     protected void genTokenInterchange(TokenManager tm) throws IOException   {
371         // Open the token output Java file and set the currentOutput stream
372         String   fName = tm.getName() + TokenTypesFileSuffix + TokenTypesFileExt;
373         currentOutput = antlrTool.openOutputFile(fName);
374 
375         println("// $ANTLR " + antlrTool.version + ": " +
376                 antlrTool.fileMinusPath(antlrTool.grammarFile) +
377                 " -> " +
378                 fName +
379                 "$");
380 
381         tabs = 0;
382 
383         // Header
384         println(tm.getName() + "    // output token vocab name");
385 
386         // Generate a definition for each token type
387         Vector v = tm.getVocabulary();
388         for (int i = Token.MIN_USER_TYPE; i < v.size(); i++) {
389             String   s = (String  )v.elementAt(i);
390             if (DEBUG_CODE_GENERATOR) {
391                 System.out.println("gen persistence file entry for: " + s);
392             }
393             if (s != null && !s.startsWith("<")) {
394                 // if literal, find label
395                 if (s.startsWith("\"")) {
396                     StringLiteralSymbol sl = (StringLiteralSymbol)tm.getTokenSymbol(s);
397                     if (sl != null && sl.label != null) {
398                         print(sl.label + "=");
399                     }
400                     println(s + "=" + i);
401                 }
402                 else {
403                     print(s);
404                     // check for a paraphrase
405                     TokenSymbol ts = (TokenSymbol)tm.getTokenSymbol(s);
406                     if (ts == null) {
407                         antlrTool.warning("undefined token symbol: " + s);
408                     }
409                     else {
410                         if (ts.getParaphrase() != null) {
411                             print("(" + ts.getParaphrase() + ")");
412                         }
413                     }
414                     println("=" + i);
415                 }
416             }
417         }
418 
419         // Close the tokens output file
420         currentOutput.close();
421         currentOutput = null;
422     }
423 
424     /** Process a string for an simple expression for use in xx/action.g
425      * it is used to cast simple tokens/references to the right type for
426      * the generated language.
427      * @param str A String.
428      */
429     public String   processStringForASTConstructor(String   str) {
430         return str;
431     }
432 
433     /** Get a string for an expression to generate creation of an AST subtree.
434      * @param v A Vector of String, where each element is an expression in the target language yielding an AST node.
435      */
436     public abstract String   getASTCreateString(Vector v);
437 
438     /** Get a string for an expression to generate creating of an AST node
439      * @param str The text of the arguments to the AST construction
440      */
441     public abstract String   getASTCreateString(GrammarAtom atom, String   str);
442 
443     /** Given the index of a bitset in the bitset list, generate a unique name.
444      * Specific code-generators may want to override this
445      * if the language does not allow '_' or numerals in identifiers.
446      * @param index  The index of the bitset in the bitset list.
447      */
448     protected String   getBitsetName(int index) {
449         return "_tokenSet_" + index;
450     }
451 
452     public static String   encodeLexerRuleName(String   id) {
453         return "m" + id;
454     }
455 
456     public static String   decodeLexerRuleName(String   id) {
457         if ( id==null ) {
458             return null;
459         }
460         return id.substring(1,id.length());
461     }
462 
463     /** Map an identifier to it's corresponding tree-node variable.
464      * This is context-sensitive, depending on the rule and alternative
465      * being generated
466      * @param id The identifier name to map
467      * @param forInput true if the input tree node variable is to be returned, otherwise the output variable is returned.
468      * @return The mapped id (which may be the same as the input), or null if the mapping is invalid due to duplicates
469      */
470     public abstract String   mapTreeId(String   id, ActionTransInfo tInfo);
471 
472     /** Add a bitset to the list of bitsets to be generated.
473      * if the bitset is already in the list, ignore the request.
474      * Always adds the bitset to the end of the list, so the
475      * caller can rely on the position of bitsets in the list.
476      * The returned position can be used to format the bitset
477      * name, since it is invariant.
478      * @param p Bit set to mark for code generation
479      * @param forParser true if the bitset is used for the parser, false for the lexer
480      * @return The position of the bitset in the list.
481      */
482     protected int markBitsetForGen(BitSet p) {
483         // Is the bitset (or an identical one) already marked for gen?
484         for (int i = 0; i < bitsetsUsed.size(); i++) {
485             BitSet set = (BitSet)bitsetsUsed.elementAt(i);
486             if (p.equals(set)) {
487                 // Use the identical one already stored
488                 return i;
489             }
490         }
491 
492         // Add the new bitset
493         bitsetsUsed.appendElement(p.clone());
494         return bitsetsUsed.size() - 1;
495     }
496 
497     /** Output tab indent followed by a String, to the currentOutput stream.
498      * Ignored if string is null.
499      * @param s The string to output.
500      */
501     protected void print(String   s) {
502         if (s != null) {
503             printTabs();
504             currentOutput.print(s);
505         }
506     }
507 
508     /** Print an action with leading tabs, attempting to
509      * preserve the current indentation level for multi-line actions
510      * Ignored if string is null.
511      * @param s The action string to output
512      */
513     protected void printAction(String   s) {
514         if (s != null) {
515             printTabs();
516             _printAction(s);
517         }
518     }
519 
520     /** Output tab indent followed by a String followed by newline,
521      * to the currentOutput stream.  Ignored if string is null.
522      * @param s The string to output
523      */
524     protected void println(String   s) {
525         if (s != null) {
526             printTabs();
527             currentOutput.println(s);
528         }
529     }
530 
531     /** Output the current tab indentation.  This outputs the number of tabs
532      * indicated by the "tabs" variable to the currentOutput stream.
533      */
534     protected void printTabs() {
535         for (int i = 1; i <= tabs; i++) {
536             currentOutput.print("\t");
537         }
538     }
539 
540     /** Lexically process tree-specifiers in the action.
541      *  This will replace #id and #(...) with the appropriate
542      *  function calls and/or variables.
543      */
544     protected abstract String   processActionForTreeSpecifiers(String   actionStr, int line, RuleBlock currentRule, ActionTransInfo tInfo);
545 
546     /**
547      * Remove the assignment portion of a declaration, if any.
548      * @param d the declaration
549      * @return the declaration without any assignment portion
550      */
551     protected String   removeAssignmentFromDeclaration(String   d) {
552         // If d contains an equal sign, then it's a declaration
553         // with an initialization.  Strip off the initialization part.
554         if (d.indexOf('=') >= 0) d = d.substring(0, d.indexOf('=')).trim();
555         return d;
556     }
557 
558     /** Set all fields back like one just created */
559     private void reset() {
560         tabs = 0;
561         // Allocate list of bitsets tagged for code generation
562         bitsetsUsed = new Vector();
563         currentOutput = null;
564         grammar = null;
565         DEBUG_CODE_GENERATOR = false;
566         makeSwitchThreshold = DEFAULT_MAKE_SWITCH_THRESHOLD;
567         bitsetTestThreshold = DEFAULT_BITSET_TEST_THRESHOLD;
568     }
569 
570     public static String   reverseLexerRuleName(String   id) {
571         return id.substring(1, id.length());
572     }
573 
574     public void setAnalyzer(LLkGrammarAnalyzer analyzer_) {
575         analyzer = analyzer_;
576     }
577 
578     public void setBehavior(DefineGrammarSymbols behavior_) {
579         behavior = behavior_;
580     }
581 
582     /** Set a grammar for the code generator to use */
583     protected void setGrammar(Grammar g) {
584         reset();
585         grammar = g;
586         // Lookup make-switch threshold in the grammar generic options
587         if (grammar.hasOption("codeGenMakeSwitchThreshold")) {
588             try {
589                 makeSwitchThreshold = grammar.getIntegerOption("codeGenMakeSwitchThreshold");
590                 //System.out.println("setting codeGenMakeSwitchThreshold to " + makeSwitchThreshold);
591             }
592             catch (NumberFormatException   e) {
593                 Token tok = grammar.getOption("codeGenMakeSwitchThreshold");
594                 antlrTool.error(
595                     "option 'codeGenMakeSwitchThreshold' must be an integer",
596                     grammar.getClassName(),
597                     tok.getLine(), tok.getColumn()
598                 );
599             }
600         }
601 
602         // Lookup bitset-test threshold in the grammar generic options
603         if (grammar.hasOption("codeGenBitsetTestThreshold")) {
604             try {
605                 bitsetTestThreshold = grammar.getIntegerOption("codeGenBitsetTestThreshold");
606                 //System.out.println("setting codeGenBitsetTestThreshold to " + bitsetTestThreshold);
607             }
608             catch (NumberFormatException   e) {
609                 Token tok = grammar.getOption("codeGenBitsetTestThreshold");
610                 antlrTool.error(
611                     "option 'codeGenBitsetTestThreshold' must be an integer",
612                     grammar.getClassName(),
613                     tok.getLine(), tok.getColumn()
614                 );
615             }
616         }
617 
618         // Lookup debug code-gen in the grammar generic options
619         if (grammar.hasOption("codeGenDebug")) {
620             Token t = grammar.getOption("codeGenDebug");
621             if (t.getText().equals("true")) {
622                 //System.out.println("setting code-generation debug ON");
623                 DEBUG_CODE_GENERATOR = true;
624             }
625             else if (t.getText().equals("false")) {
626                 //System.out.println("setting code-generation debug OFF");
627                 DEBUG_CODE_GENERATOR = false;
628             }
629             else {
630                 antlrTool.error("option 'codeGenDebug' must be true or false", grammar.getClassName(), t.getLine(), t.getColumn());
631             }
632         }
633     }
634 
635     public void setTool(Tool tool) {
636         antlrTool = tool;
637     }
638 }
639
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags