KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > antlr > LLkAnalyzer


1 package antlr;
2
3 /* ANTLR Translator Generator
4  * Project led by Terence Parr at http://www.jGuru.com
5  * Software rights: http://www.antlr.org/RIGHTS.html
6  *
7  * $Id: //depot/code/org.antlr/main/main/antlr/LLkAnalyzer.java#7 $
8  */

9
10 import antlr.collections.impl.BitSet;
11 import antlr.collections.impl.Vector;
12
13 /**A linear-approximate LL(k) grammar analzyer.
14  *
15  * All lookahead elements are sets of token types.
16  *
17  * @author Terence Parr, John Lilley
18  * @see antlr.Grammar
19  * @see antlr.Lookahead
20  */

21 public class LLkAnalyzer implements LLkGrammarAnalyzer {
22     // Set "analyzerDebug" to true
23
public boolean DEBUG_ANALYZER = false;
24     private AlternativeBlock currentBlock;
25     protected Tool tool = null;
26     protected Grammar grammar = null;
27     // True if analyzing a lexical grammar
28
protected boolean lexicalAnalysis = false;
29     // Used for formatting bit sets in default (Java) format
30
CharFormatter charFormatter = new JavaCharFormatter();
31
32     /** Create an LLk analyzer */
33     public LLkAnalyzer(Tool tool_) {
34         tool = tool_;
35     }
36
37     /** Return true if someone used the '.' wildcard default idiom.
38      * Either #(. children) or '.' as an alt by itself.
39      */

40     protected boolean altUsesWildcardDefault(Alternative alt) {
41         AlternativeElement head = alt.head;
42         // if element is #(. blah) then check to see if el is root
43
if (head instanceof TreeElement &&
44             ((TreeElement)head).root instanceof WildcardElement) {
45             return true;
46         }
47         if (head instanceof WildcardElement && head.next instanceof BlockEndElement) {
48             return true;
49         }
50         return false;
51     }
52
53     /**Is this block of alternatives LL(k)? Fill in alternative cache for this block.
54      * @return true if the block is deterministic
55      */

56     public boolean deterministic(AlternativeBlock blk) {
57         /** The lookahead depth for this decision */
58         int k = 1; // start at k=1
59
if (DEBUG_ANALYZER) System.out.println("deterministic(" + blk + ")");
60         boolean det = true;
61         int nalts = blk.alternatives.size();
62         AlternativeBlock saveCurrentBlock = currentBlock;
63         Alternative wildcardAlt = null;
64         currentBlock = blk;
65
66         /* don't allow nongreedy (...) blocks */
67         if (blk.greedy == false && !(blk instanceof OneOrMoreBlock) && !(blk instanceof ZeroOrMoreBlock)) {
68             tool.warning("Being nongreedy only makes sense for (...)+ and (...)*", grammar.getFilename(), blk.getLine(), blk.getColumn());
69         }
70
71         // SPECIAL CASE: only one alternative. We don't need to check the
72
// determinism, but other code expects the lookahead cache to be
73
// set for the single alt.
74
if (nalts == 1) {
75             AlternativeElement e = blk.getAlternativeAt(0).head;
76             currentBlock.alti = 0;
77             blk.getAlternativeAt(0).cache[1] = e.look(1);
78             blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1)
79
currentBlock = saveCurrentBlock;
80             return true; // always deterministic for one alt
81
}
82
83         outer:
84             for (int i = 0; i < nalts - 1; i++) {
85                 currentBlock.alti = i;
86                 currentBlock.analysisAlt = i; // which alt are we analyzing?
87
currentBlock.altj = i + 1; // reset this alt. Haven't computed yet,
88
// but we need the alt number.
89
inner:
90                     // compare against other alternatives with lookahead depth k
91
for (int j = i + 1; j < nalts; j++) {
92                         currentBlock.altj = j;
93                         if (DEBUG_ANALYZER) System.out.println("comparing " + i + " against alt " + j);
94                         currentBlock.analysisAlt = j; // which alt are we analyzing?
95
k = 1; // always attempt minimum lookahead possible.
96

97                         // check to see if there is a lookahead depth that distinguishes
98
// between alternatives i and j.
99
Lookahead[] r = new Lookahead[grammar.maxk + 1];
100                         boolean haveAmbiguity;
101                         do {
102                             haveAmbiguity = false;
103                             if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
104                             Lookahead p,q;
105                             p = getAltLookahead(blk, i, k);
106                             q = getAltLookahead(blk, j, k);
107
108                             // compare LOOK(alt i) with LOOK(alt j). Is there an intersection?
109
// Lookahead must be disjoint.
110
if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
111                             if (DEBUG_ANALYZER) System.out.println("q is " + q.toString(",", charFormatter, grammar));
112                             // r[i] = p.fset.and(q.fset);
113
r[k] = p.intersection(q);
114                             if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k].toString());
115                             if (!r[k].nil()) {
116                                 haveAmbiguity = true;
117                                 k++;
118                             }
119                             // go until no more lookahead to use or no intersection
120
} while (haveAmbiguity && k <= grammar.maxk);
121
122                         Alternative ai = blk.getAlternativeAt(i);
123                         Alternative aj = blk.getAlternativeAt(j);
124                         if (haveAmbiguity) {
125                             det = false;
126                             ai.lookaheadDepth = NONDETERMINISTIC;
127                             aj.lookaheadDepth = NONDETERMINISTIC;
128
129                             /* if ith alt starts with a syntactic predicate, computing the
130                              * lookahead is still done for code generation, but messages
131                              * should not be generated when comparing against alt j.
132                              * Alternatives with syn preds that are unnecessary do
133                              * not result in syn pred try-blocks.
134                              */

135                             if (ai.synPred != null) {
136                                 if (DEBUG_ANALYZER) {
137                                     System.out.println("alt " + i + " has a syn pred");
138                                 }
139                                 // The alt with the (...)=> block is nondeterministic for sure.
140
// If the (...)=> conflicts with alt j, j is nondeterministic.
141
// This prevents alt j from being in any switch statements.
142
// move on to next alternative=>no possible ambiguity!
143
// continue inner;
144
}
145
146                             /* if ith alt starts with a semantic predicate, computing the
147                              * lookahead is still done for code generation, but messages
148                              * should not be generated when comparing against alt j.
149                              */

150                             else if (ai.semPred != null) {
151                                 if (DEBUG_ANALYZER) {
152                                     System.out.println("alt " + i + " has a sem pred");
153                                 }
154                             }
155
156                             /* if jth alt is exactly the wildcard or wildcard root of tree,
157                              * then remove elements from alt i lookahead from alt j's lookahead.
158                              * Don't do an ambiguity warning.
159                              */

160                             else if (altUsesWildcardDefault(aj)) {
161                                 // System.out.println("removing pred sets");
162
// removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
163
wildcardAlt = aj;
164                             }
165
166                             /* If the user specified warnWhenFollowAmbig=false, then we
167                              * can turn off this warning IFF one of the alts is empty;
168                              * that is, it points immediately at the end block.
169                              */

170                             else if (!blk.warnWhenFollowAmbig &&
171                                 (ai.head instanceof BlockEndElement ||
172                                 aj.head instanceof BlockEndElement)) {
173                                 // System.out.println("ai.head pts to "+ai.head.getClass());
174
// System.out.println("aj.head pts to "+aj.head.getClass());
175
}
176
177                             /* If they have the generateAmbigWarnings option off for the block
178                              * then don't generate a warning.
179                              */

180                             else if (!blk.generateAmbigWarnings) {
181                             }
182
183                             /* If greedy=true and *one* empty alt shut off warning. */
184                             else if (blk.greedySet && blk.greedy &&
185                                 ((ai.head instanceof BlockEndElement &&
186                                 !(aj.head instanceof BlockEndElement)) ||
187                                 (aj.head instanceof BlockEndElement &&
188                                 !(ai.head instanceof BlockEndElement)))) {
189                                 // System.out.println("greedy set to true; one alt empty");
190
}
191
192
193                             /* We have no choice, but to report a nondetermism */
194                             else {
195                                 tool.errorHandler.warnAltAmbiguity(
196                                     grammar,
197                                     blk, // the block
198
lexicalAnalysis, // true if lexical
199
grammar.maxk, // depth of ambiguity
200
r, // set of linear ambiguities
201
i, // first ambiguous alternative
202
j // second ambiguous alternative
203
);
204                             }
205                         }
206                         else {
207                             // a lookahead depth, k, was found where i and j do not conflict
208
ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k);
209                             aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k);
210                         }
211                     }
212             }
213
214         // finished with block.
215

216         // If had wildcard default clause idiom, remove competing lookahead
217
/*
218           if ( wildcardAlt!=null ) {
219           removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
220           }
221         */

222
223         currentBlock = saveCurrentBlock;
224         return det;
225     }
226
227     /**Is (...)+ block LL(1)? Fill in alternative cache for this block.
228      * @return true if the block is deterministic
229      */

230     public boolean deterministic(OneOrMoreBlock blk) {
231         if (DEBUG_ANALYZER) System.out.println("deterministic(...)+(" + blk + ")");
232         AlternativeBlock saveCurrentBlock = currentBlock;
233         currentBlock = blk;
234         boolean blkOk = deterministic((AlternativeBlock)blk);
235         // block has been checked, now check that what follows does not conflict
236
// with the lookahead of the (...)+ block.
237
boolean det = deterministicImpliedPath(blk);
238         currentBlock = saveCurrentBlock;
239         return det && blkOk;
240     }
241
242     /**Is (...)* block LL(1)? Fill in alternative cache for this block.
243      * @return true if the block is deterministic
244      */

245     public boolean deterministic(ZeroOrMoreBlock blk) {
246         if (DEBUG_ANALYZER) System.out.println("deterministic(...)*(" + blk + ")");
247         AlternativeBlock saveCurrentBlock = currentBlock;
248         currentBlock = blk;
249         boolean blkOk = deterministic((AlternativeBlock)blk);
250         // block has been checked, now check that what follows does not conflict
251
// with the lookahead of the (...)* block.
252
boolean det = deterministicImpliedPath(blk);
253         currentBlock = saveCurrentBlock;
254         return det && blkOk;
255     }
256
257     /**Is this (...)* or (...)+ block LL(k)?
258      * @return true if the block is deterministic
259      */

260     public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
261         /** The lookahead depth for this decision considering implied exit path */
262         int k;
263         boolean det = true;
264         Vector alts = blk.getAlternatives();
265         int nalts = alts.size();
266         currentBlock.altj = -1; // comparing against implicit optional/exit alt
267

268         if (DEBUG_ANALYZER) System.out.println("deterministicImpliedPath");
269         for (int i = 0; i < nalts; i++) { // check follow against all alts
270
Alternative alt = blk.getAlternativeAt(i);
271
272             if (alt.head instanceof BlockEndElement) {
273                 tool.warning("empty alternative makes no sense in (...)* or (...)+", grammar.getFilename(), blk.getLine(), blk.getColumn());
274             }
275
276             k = 1; // assume eac alt is LL(1) with exit branch
277
// check to see if there is a lookahead depth that distinguishes
278
// between alternative i and the exit branch.
279
Lookahead[] r = new Lookahead[grammar.maxk + 1];
280             boolean haveAmbiguity;
281             do {
282                 haveAmbiguity = false;
283                 if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
284                 Lookahead p;
285                 Lookahead follow = blk.next.look(k);
286                 blk.exitCache[k] = follow;
287                 currentBlock.alti = i;
288                 p = getAltLookahead(blk, i, k);
289
290                 if (DEBUG_ANALYZER) System.out.println("follow is " + follow.toString(",", charFormatter, grammar));
291                 if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
292                 //r[k] = follow.fset.and(p.fset);
293
r[k] = follow.intersection(p);
294                 if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k]);
295                 if (!r[k].nil()) {
296                     haveAmbiguity = true;
297                     k++;
298                 }
299                 // go until no more lookahead to use or no intersection
300
} while (haveAmbiguity && k <= grammar.maxk);
301
302             if (haveAmbiguity) {
303                 det = false;
304                 alt.lookaheadDepth = NONDETERMINISTIC;
305                 blk.exitLookaheadDepth = NONDETERMINISTIC;
306                 Alternative ambigAlt = blk.getAlternativeAt(currentBlock.alti);
307
308                 /* If the user specified warnWhenFollowAmbig=false, then we
309                  * can turn off this warning.
310                  */

311                 if (!blk.warnWhenFollowAmbig) {
312                 }
313
314                 /* If they have the generateAmbigWarnings option off for the block
315                  * then don't generate a warning.
316                  */

317                 else if (!blk.generateAmbigWarnings) {
318                 }
319
320                 /* If greedy=true and alt not empty, shut off warning */
321                 else if (blk.greedy == true && blk.greedySet &&
322                     !(ambigAlt.head instanceof BlockEndElement)) {
323                     if (DEBUG_ANALYZER) System.out.println("greedy loop");
324                 }
325
326                 /* If greedy=false then shut off warning...will have
327                  * to add "if FOLLOW break"
328                  * block during code gen to compensate for removal of warning.
329                  */

330                 else if (blk.greedy == false &&
331                     !(ambigAlt.head instanceof BlockEndElement)) {
332                     if (DEBUG_ANALYZER) System.out.println("nongreedy loop");
333                     // if FOLLOW not single k-string (|set[k]| can
334
// be > 1 actually) then must warn them that
335
// loop may terminate incorrectly.
336
// For example, ('a'..'d')+ ("ad"|"cb")
337
if (!lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) {
338                         tool.warning(new String JavaDoc[]{
339                             "nongreedy block may exit incorrectly due",
340                             "\tto limitations of linear approximate lookahead (first k-1 sets",
341                             "\tin lookahead not singleton)."},
342                                      grammar.getFilename(), blk.getLine(), blk.getColumn());
343                     }
344                 }
345
346                 // no choice but to generate a warning
347
else {
348                     tool.errorHandler.warnAltExitAmbiguity(
349                         grammar,
350                         blk, // the block
351
lexicalAnalysis, // true if lexical
352
grammar.maxk, // depth of ambiguity
353
r, // set of linear ambiguities
354
i // ambiguous alternative
355
);
356                 }
357             }
358             else {
359                 alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k);
360                 blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth, k);
361             }
362         }
363         return det;
364     }
365
366     /**Compute the lookahead set of whatever follows references to
367      * the rule associated witht the FOLLOW block.
368      */

369     public Lookahead FOLLOW(int k, RuleEndElement end) {
370         // what rule are we trying to compute FOLLOW of?
371
RuleBlock rb = (RuleBlock)end.block;
372         // rule name is different in lexer
373
String JavaDoc rule;
374         if (lexicalAnalysis) {
375             rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName());
376         }
377         else {
378             rule = rb.getRuleName();
379         }
380
381         if (DEBUG_ANALYZER) System.out.println("FOLLOW(" + k + "," + rule + ")");
382
383         // are we in the midst of computing this FOLLOW already?
384
if (end.lock[k]) {
385             if (DEBUG_ANALYZER) System.out.println("FOLLOW cycle to " + rule);
386             return new Lookahead(rule);
387         }
388
389         // Check to see if there is cached value
390
if (end.cache[k] != null) {
391             if (DEBUG_ANALYZER) {
392                 System.out.println("cache entry FOLLOW(" + k + ") for " + rule + ": " + end.cache[k].toString(",", charFormatter, grammar));
393             }
394             // if the cache is a complete computation then simply return entry
395
if (end.cache[k].cycle == null) {
396                 return (Lookahead)end.cache[k].clone();
397             }
398             // A cache entry exists, but it is a reference to a cyclic computation.
399
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
400             RuleEndElement re = rs.getBlock().endNode;
401             // The other entry may not exist because it is still being
402
// computed when this cycle cache entry was found here.
403
if (re.cache[k] == null) {
404                 // return the cycle...that's all we can do at the moment.
405
return (Lookahead)end.cache[k].clone();
406             }
407             else {
408                 // replace this cache entry with the entry from the referenced computation.
409
// Eventually, this percolates a complete (no cycle reference) cache entry
410
// to this node (or at least gets it closer and closer). This is not
411
// crucial, but makes cache lookup faster as we might have to look up
412
// lots of cycle references before finding a complete reference.
413
end.cache[k] = re.cache[k];
414                 // Return the cache entry associated with the cycle reference.
415
return (Lookahead)re.cache[k].clone();
416             }
417         }
418
419         end.lock[k] = true; // prevent FOLLOW computation cycles
420

421         Lookahead p = new Lookahead();
422
423         RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
424
425         // Walk list of references to this rule to compute FOLLOW
426
for (int i = 0; i < rs.numReferences(); i++) {
427             RuleRefElement rr = rs.getReference(i);
428             if (DEBUG_ANALYZER) System.out.println("next[" + rule + "] is " + rr.next.toString());
429             Lookahead q = rr.next.look(k);
430             if (DEBUG_ANALYZER) System.out.println("FIRST of next[" + rule + "] ptr is " + q.toString());
431             /* If there is a cycle then if the cycle is to the rule for
432              * this end block, you have a cycle to yourself. Remove the
433              * cycle indication--the lookahead is complete.
434              */

435             if (q.cycle != null && q.cycle.equals(rule)) {
436                 q.cycle = null; // don't want cycle to yourself!
437
}
438             // add the lookahead into the current FOLLOW computation set
439
p.combineWith(q);
440             if (DEBUG_ANALYZER) System.out.println("combined FOLLOW[" + rule + "] is " + p.toString());
441         }
442
443         end.lock[k] = false; // we're not doing FOLLOW anymore
444

445         // if no rules follow this, it can be a start symbol or called by a start sym.
446
// set the follow to be end of file.
447
if (p.fset.nil() && p.cycle == null) {
448             if (grammar instanceof TreeWalkerGrammar) {
449                 // Tree grammars don't see EOF, they see end of sibling list or
450
// "NULL TREE LOOKAHEAD".
451
p.fset.add(Token.NULL_TREE_LOOKAHEAD);
452             }
453             else if (grammar instanceof LexerGrammar) {
454                 // Lexical grammars use Epsilon to indicate that the end of rule has been hit
455
// EOF would be misleading; any character can follow a token rule not just EOF
456
// as in a grammar (where a start symbol is followed by EOF). There is no
457
// sequence info in a lexer between tokens to indicate what is the last token
458
// to be seen.
459
// p.fset.add(EPSILON_TYPE);
460
p.setEpsilon();
461             }
462             else {
463                 p.fset.add(Token.EOF_TYPE);
464             }
465         }
466
467         // Cache the result of the FOLLOW computation
468
if (DEBUG_ANALYZER) {
469             System.out.println("saving FOLLOW(" + k + ") for " + rule + ": " + p.toString(",", charFormatter, grammar));
470         }
471         end.cache[k] = (Lookahead)p.clone();
472
473         return p;
474     }
475
476     private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
477         Lookahead p;
478         Alternative a = blk.getAlternativeAt(alt);
479         AlternativeElement e = a.head;
480         //System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
481
if (a.cache[k] == null) {
482             p = e.look(k);
483             a.cache[k] = p;
484         }
485         else {
486             p = a.cache[k];
487         }
488         return p;
489     }
490
491     /**Actions are ignored */
492     public Lookahead look(int k, ActionElement action) {
493         if (DEBUG_ANALYZER) System.out.println("lookAction(" + k + "," + action + ")");
494         return action.next.look(k);
495     }
496
497     /**Combine the lookahead computed for each alternative */
498     public Lookahead look(int k, AlternativeBlock blk) {
499         if (DEBUG_ANALYZER) System.out.println("lookAltBlk(" + k + "," + blk + ")");
500         AlternativeBlock saveCurrentBlock = currentBlock;
501         currentBlock = blk;
502         Lookahead p = new Lookahead();
503         for (int i = 0; i < blk.alternatives.size(); i++) {
504             if (DEBUG_ANALYZER) System.out.println("alt " + i + " of " + blk);
505             // must set analysis alt
506
currentBlock.analysisAlt = i;
507             Alternative alt = blk.getAlternativeAt(i);
508             AlternativeElement elem = alt.head;
509             if (DEBUG_ANALYZER) {
510                 if (alt.head == alt.tail) {
511                     System.out.println("alt " + i + " is empty");
512                 }
513             }
514             Lookahead q = elem.look(k);
515             p.combineWith(q);
516         }
517         if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
518             // Invert the lookahead set
519
if (lexicalAnalysis) {
520                 BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
521                 int[] elems = p.fset.toArray();
522                 for (int j = 0; j < elems.length; j++) {
523                     b.remove(elems[j]);
524                 }
525                 p.fset = b;
526             }
527             else {
528                 p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
529             }
530         }
531         currentBlock = saveCurrentBlock;
532         return p;
533     }
534
535     /**Compute what follows this place-holder node and possibly
536      * what begins the associated loop unless the
537      * node is locked.
538      * <p>
539      * if we hit the end of a loop, we have to include
540      * what tokens can begin the loop as well. If the start
541      * node is locked, then we simply found an empty path
542      * through this subrule while analyzing it. If the
543      * start node is not locked, then this node was hit
544      * during a FOLLOW operation and the FIRST of this
545      * block must be included in that lookahead computation.
546      */

547     public Lookahead look(int k, BlockEndElement end) {
548         if (DEBUG_ANALYZER) System.out.println("lookBlockEnd(" + k + ", " + end.block + "); lock is " + end.lock[k]);
549         if (end.lock[k]) {
550             // computation in progress => the tokens we would have
551
// computed (had we not been locked) will be included
552
// in the set by that computation with the lock on this
553
// node.
554
return new Lookahead();
555         }
556
557         Lookahead p;
558
559         /* Hitting the end of a loop means you can see what begins the loop */
560         if (end.block instanceof ZeroOrMoreBlock ||
561             end.block instanceof OneOrMoreBlock) {
562             // compute what can start the block,
563
// but lock end node so we don't do it twice in same
564
// computation.
565
end.lock[k] = true;
566             p = look(k, end.block);
567             end.lock[k] = false;
568         }
569         else {
570             p = new Lookahead();
571         }
572
573         /* Tree blocks do not have any follow because they are children
574          * of what surrounds them. For example, A #(B C) D results in
575          * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
576          * indicates that nothing can follow the last node of tree #(B C)
577          */

578         if (end.block instanceof TreeElement) {
579             p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
580         }
581
582         /* Syntactic predicates such as ( (A)? )=> have no follow per se.
583          * We cannot accurately say what would be matched following a
584          * syntactic predicate (you MIGHT be ok if you said it was whatever
585          * followed the alternative predicted by the predicate). Hence,
586          * (like end-of-token) we return Epsilon to indicate "unknown
587          * lookahead."
588          */

589         else if (end.block instanceof SynPredBlock) {
590             p.setEpsilon();
591         }
592
593         // compute what can follow the block
594
else {
595             Lookahead q = end.block.next.look(k);
596             p.combineWith(q);
597         }
598
599         return p;
600     }
601
602     /**Return this char as the lookahead if k=1.
603      * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
604      * <p>
605      * If the atom has the <tt>not</tt> flag on, then
606      * create the set complement of the tokenType
607      * which is the set of all characters referenced
608      * in the grammar with this char turned off.
609      * Also remove characters from the set that
610      * are currently allocated for predicting
611      * previous alternatives. This avoids ambiguity
612      * messages and is more properly what is meant.
613      * ( 'a' | ~'a' ) implies that the ~'a' is the
614      * "else" clause.
615      * <p>
616      * NOTE: we do <b>NOT</b> include exit path in
617      * the exclusion set. E.g.,
618      * ( 'a' | ~'a' )* 'b'
619      * should exit upon seeing a 'b' during the loop.
620      */

621     public Lookahead look(int k, CharLiteralElement atom) {
622         if (DEBUG_ANALYZER) System.out.println("lookCharLiteral(" + k + "," + atom + ")");
623         // Skip until analysis hits k==1
624
if (k > 1) {
625             return atom.next.look(k - 1);
626         }
627         if (lexicalAnalysis) {
628             if (atom.not) {
629                 BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
630                 if (DEBUG_ANALYZER) System.out.println("charVocab is " + b.toString());
631                 // remove stuff predicted by preceding alts and follow of block
632
removeCompetingPredictionSets(b, atom);
633                 if (DEBUG_ANALYZER) System.out.println("charVocab after removal of prior alt lookahead " + b.toString());
634                 // now remove element that is stated not to be in the set
635
b.clear(atom.getType());
636                 return new Lookahead(b);
637             }
638             else {
639                 return Lookahead.of(atom.getType());
640             }
641         }
642         else {
643             // Should have been avoided by MakeGrammar
644
tool.panic("Character literal reference found in parser");
645             // ... so we make the compiler happy
646
return Lookahead.of(atom.getType());
647         }
648     }
649
650     public Lookahead look(int k, CharRangeElement r) {
651         if (DEBUG_ANALYZER) System.out.println("lookCharRange(" + k + "," + r + ")");
652         // Skip until analysis hits k==1
653
if (k > 1) {
654             return r.next.look(k - 1);
655         }
656         BitSet p = BitSet.of(r.begin);
657         for (int i = r.begin + 1; i <= r.end; i++) {
658             p.add(i);
659         }
660         return new Lookahead(p);
661     }
662
663     public Lookahead look(int k, GrammarAtom atom) {
664         if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + atom + "[" + atom.getType() + "])");
665
666         if (lexicalAnalysis) {
667             // MakeGrammar should have created a rule reference instead
668
tool.panic("token reference found in lexer");
669         }
670         // Skip until analysis hits k==1
671
if (k > 1) {
672             return atom.next.look(k - 1);
673         }
674         Lookahead l = Lookahead.of(atom.getType());
675         if (atom.not) {
676             // Invert the lookahead set against the token vocabulary
677
int maxToken = grammar.tokenManager.maxTokenType();
678             l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
679             // remove stuff predicted by preceding alts and follow of block
680
removeCompetingPredictionSets(l.fset, atom);
681         }
682         return l;
683     }
684
685     /**The lookahead of a (...)+ block is the combined lookahead of
686      * all alternatives and, if an empty path is found, the lookahead
687      * of what follows the block.
688      */

689     public Lookahead look(int k, OneOrMoreBlock blk) {
690         if (DEBUG_ANALYZER) System.out.println("look+" + k + "," + blk + ")");
691         Lookahead p = look(k, (AlternativeBlock)blk);
692         return p;
693     }
694
695     /**Combine the lookahead computed for each alternative.
696      * Lock the node so that no other computation may come back
697      * on itself--infinite loop. This also implies infinite left-recursion
698      * in the grammar (or an error in this algorithm ;)).
699      */

700     public Lookahead look(int k, RuleBlock blk) {
701         if (DEBUG_ANALYZER) System.out.println("lookRuleBlk(" + k + "," + blk + ")");
702         Lookahead p = look(k, (AlternativeBlock)blk);
703         return p;
704     }
705
706     /**If not locked or noFOLLOW set, compute FOLLOW of a rule.
707      * <p>
708      * TJP says 8/12/99: not true anymore:
709      * Lexical rules never compute follow. They set epsilon and
710      * the code generator gens code to check for any character.
711      * The code generator must remove the tokens used to predict
712      * any previous alts in the same block.
713      * <p>
714      * When the last node of a rule is reached and noFOLLOW,
715      * it implies that a "local" FOLLOW will be computed
716      * after this call. I.e.,
717      * <pre>
718      * a : b A;
719      * b : B | ;
720      * c : b C;
721      * </pre>
722      * Here, when computing the look of rule b from rule a,
723      * we want only {B,EPSILON_TYPE} so that look(b A) will
724      * be {B,A} not {B,A,C}.
725      * <p>
726      * if the end block is not locked and the FOLLOW is
727      * wanted, the algorithm must compute the lookahead
728      * of what follows references to this rule. If
729      * end block is locked, FOLLOW will return an empty set
730      * with a cycle to the rule associated with this end block.
731      */

732     public Lookahead look(int k, RuleEndElement end) {
733         if (DEBUG_ANALYZER)
734             System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW=" +
735                                end.noFOLLOW + "; lock is " + end.lock[k]);
736         if (/*lexicalAnalysis ||*/ end.noFOLLOW) {
737             Lookahead p = new Lookahead();
738             p.setEpsilon();
739             p.epsilonDepth = BitSet.of(k);
740             return p;
741         }
742         Lookahead p = FOLLOW(k, end);
743         return p;
744     }
745
746     /**Compute the lookahead contributed by a rule reference.
747      *
748      * <p>
749      * When computing ruleref lookahead, we don't want the FOLLOW
750      * computation done if an empty path exists for the rule.
751      * The FOLLOW is too loose of a set...we want only to
752      * include the "local" FOLLOW or what can follow this
753      * particular ref to the node. In other words, we use
754      * context information to reduce the complexity of the
755      * analysis and strengthen the parser.
756      *
757      * The noFOLLOW flag is used as a means of restricting
758      * the FOLLOW to a "local" FOLLOW. This variable is
759      * orthogonal to the <tt>lock</tt> variable that prevents
760      * infinite recursion. noFOLLOW does not care about what k is.
761      */

762     public Lookahead look(int k, RuleRefElement rr) {
763         if (DEBUG_ANALYZER) System.out.println("lookRuleRef(" + k + "," + rr + ")");
764         RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
765         if (rs == null || !rs.defined) {
766             tool.error("no definition of rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn());
767             return new Lookahead();
768         }
769         RuleBlock rb = rs.getBlock();
770         RuleEndElement end = rb.endNode;
771         boolean saveEnd = end.noFOLLOW;
772         end.noFOLLOW = true;
773         // go off to the rule and get the lookahead (w/o FOLLOW)
774
Lookahead p = look(k, rr.targetRule);
775         if (DEBUG_ANALYZER) System.out.println("back from rule ref to " + rr.targetRule);
776         // restore state of end block
777
end.noFOLLOW = saveEnd;
778
779         // check for infinite recursion. If a cycle is returned: trouble!
780
if (p.cycle != null) {
781             tool.error("infinite recursion to rule " + p.cycle + " from rule " +
782                        rr.enclosingRuleName, grammar.getFilename(), rr.getLine(), rr.getColumn());
783         }
784
785         // is the local FOLLOW required?
786
if (p.containsEpsilon()) {
787             if (DEBUG_ANALYZER)
788                 System.out.println("rule ref to " +
789                                    rr.targetRule + " has eps, depth: " + p.epsilonDepth);
790
791             // remove epsilon
792
p.resetEpsilon();
793             // fset.clear(EPSILON_TYPE);
794

795             // for each lookahead depth that saw epsilon
796
int[] depths = p.epsilonDepth.toArray();
797             p.epsilonDepth = null; // clear all epsilon stuff
798
for (int i = 0; i < depths.length; i++) {
799                 int rk = k - (k - depths[i]);
800                 Lookahead q = rr.next.look(rk); // see comments in Lookahead
801
p.combineWith(q);
802             }
803             // note: any of these look() computations for local follow can
804
// set EPSILON in the set again if the end of this rule is found.
805
}
806
807         return p;
808     }
809
810     public Lookahead look(int k, StringLiteralElement atom) {
811         if (DEBUG_ANALYZER) System.out.println("lookStringLiteral(" + k + "," + atom + ")");
812         if (lexicalAnalysis) {
813             // need more lookahead than string can provide?
814
if (k > atom.processedAtomText.length()) {
815                 return atom.next.look(k - atom.processedAtomText.length());
816             }
817             else {
818                 // get char at lookahead depth k, from the processed literal text
819
return Lookahead.of(atom.processedAtomText.charAt(k - 1));
820             }
821         }
822         else {
823             // Skip until analysis hits k==1
824
if (k > 1) {
825                 return atom.next.look(k - 1);
826             }
827             Lookahead l = Lookahead.of(atom.getType());
828             if (atom.not) {
829                 // Invert the lookahead set against the token vocabulary
830
int maxToken = grammar.tokenManager.maxTokenType();
831                 l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
832             }
833             return l;
834         }
835     }
836
837     /**The lookahead of a (...)=> block is the lookahead of
838      * what follows the block. By definition, the syntactic
839      * predicate block defies static analysis (you want to try it
840      * out at run-time). The LOOK of (a)=>A B is A for LL(1)
841      * ### is this even called?
842      */

843     public Lookahead look(int k, SynPredBlock blk) {
844         if (DEBUG_ANALYZER) System.out.println("look=>(" + k + "," + blk + ")");
845         return blk.next.look(k);
846     }
847
848     public Lookahead look(int k, TokenRangeElement r) {
849         if (DEBUG_ANALYZER) System.out.println("lookTokenRange(" + k + "," + r + ")");
850         // Skip until analysis hits k==1
851
if (k > 1) {
852             return r.next.look(k - 1);
853         }
854         BitSet p = BitSet.of(r.begin);
855         for (int i = r.begin + 1; i <= r.end; i++) {
856             p.add(i);
857         }
858         return new Lookahead(p);
859     }
860
861     public Lookahead look(int k, TreeElement t) {
862         if (DEBUG_ANALYZER)
863             System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])");
864         if (k > 1) {
865             return t.next.look(k - 1);
866         }
867         Lookahead l = null;
868         if (t.root instanceof WildcardElement) {
869             l = t.root.look(1); // compute FIRST set minus previous rows
870
}
871         else {
872             l = Lookahead.of(t.root.getType());
873             if (t.root.not) {
874                 // Invert the lookahead set against the token vocabulary
875
int maxToken = grammar.tokenManager.maxTokenType();
876                 l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
877             }
878         }
879         return l;
880     }
881
882     public Lookahead look(int k, WildcardElement wc) {
883         if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ")");
884
885         // Skip until analysis hits k==1
886
if (k > 1) {
887             return wc.next.look(k - 1);
888         }
889
890         BitSet b;
891         if (lexicalAnalysis) {
892             // Copy the character vocabulary
893
b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
894         }
895         else {
896             b = new BitSet(1);
897             // Invert the lookahead set against the token vocabulary
898
int maxToken = grammar.tokenManager.maxTokenType();
899             b.notInPlace(Token.MIN_USER_TYPE, maxToken);
900             if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ") after not: " + b);
901         }
902
903         // Remove prediction sets from competing alternatives
904
// removeCompetingPredictionSets(b, wc);
905

906         return new Lookahead(b);
907     }
908
909     /** The (...)* element is the combined lookahead of the alternatives and what can
910      * follow the loop.
911      */

912     public Lookahead look(int k, ZeroOrMoreBlock blk) {
913         if (DEBUG_ANALYZER) System.out.println("look*(" + k + "," + blk + ")");
914         Lookahead p = look(k, (AlternativeBlock)blk);
915         Lookahead q = blk.next.look(k);
916         p.combineWith(q);
917         return p;
918     }
919
920     /**Compute the combined lookahead for all productions of a rule.
921      * If the lookahead returns with epsilon, at least one epsilon
922      * path exists (one that consumes no tokens). The noFOLLOW
923      * flag being set for this endruleblk, indicates that the
924      * a rule ref invoked this rule.
925      *
926      * Currently only look(RuleRef) calls this. There is no need
927      * for the code generator to call this.
928      */

929     public Lookahead look(int k, String JavaDoc rule) {
930         if (DEBUG_ANALYZER) System.out.println("lookRuleName(" + k + "," + rule + ")");
931         RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
932         RuleBlock rb = rs.getBlock();
933
934         if (rb.lock[k]) {
935             if (DEBUG_ANALYZER)
936                 System.out.println("infinite recursion to rule " + rb.getRuleName());
937             return new Lookahead(rule);
938         }
939
940         // have we computed it before?
941
if (rb.cache[k] != null) {
942             if (DEBUG_ANALYZER) {
943                 System.out.println("found depth " + k + " result in FIRST " + rule + " cache: " +
944                                    rb.cache[k].toString(",", charFormatter, grammar));
945             }
946             return (Lookahead)rb.cache[k].clone();
947         }
948
949         rb.lock[k] = true;
950         Lookahead p = look(k, (RuleBlock)rb);
951         rb.lock[k] = false;
952
953         // cache results
954
rb.cache[k] = (Lookahead)p.clone();
955         if (DEBUG_ANALYZER) {
956             System.out.println("saving depth " + k + " result in FIRST " + rule + " cache: " +
957                                rb.cache[k].toString(",", charFormatter, grammar));
958         }
959         return p;
960     }
961
962     /** If the first k-1 sets are singleton sets, the appoximate
963      * lookahead analysis is equivalent to full lookahead analysis.
964      */

965     public static boolean lookaheadEquivForApproxAndFullAnalysis(Lookahead[] bset, int k) {
966         // first k-1 sets degree 1?
967
for (int i = 1; i <= k - 1; i++) {
968             BitSet look = bset[i].fset;
969             if (look.degree() > 1) {
970                 return false;
971             }
972         }
973         return true;
974     }
975
976     /** Remove the prediction sets from preceding alternatives
977      * and follow set, but *only* if this element is the first element
978      * of the alternative. The class members currenBlock and
979      * currentBlock.analysisAlt must be set correctly.
980      * @param b The prediction bitset to be modified
981      * @el The element of interest
982      */

983     private void removeCompetingPredictionSets(BitSet b, AlternativeElement el) {
984         // Only do this if the element is the first element of the alt,
985
// because we are making an implicit assumption that k==1.
986
GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head;
987         // if element is #(. blah) then check to see if el is root
988
if (head instanceof TreeElement) {
989             if (((TreeElement)head).root != el) {
990                 return;
991             }
992         }
993         else if (el != head) {
994             return;
995         }
996         for (int i = 0; i < currentBlock.analysisAlt; i++) {
997             AlternativeElement e = currentBlock.getAlternativeAt(i).head;
998             b.subtractInPlace(e.look(1).fset);
999         }
1000    }
1001
1002    /** Remove the prediction sets from preceding alternatives
1003     * The class members currenBlock must be set correctly.
1004     * Remove prediction sets from 1..k.
1005     * @param look The prediction lookahead to be modified
1006     * @el The element of interest
1007     * @k How deep into lookahead to modify
1008     */

1009    private void removeCompetingPredictionSetsFromWildcard(Lookahead[] look, AlternativeElement el, int k) {
1010        for (int d = 1; d <= k; d++) {
1011            for (int i = 0; i < currentBlock.analysisAlt; i++) {
1012                AlternativeElement e = currentBlock.getAlternativeAt(i).head;
1013                look[d].fset.subtractInPlace(e.look(d).fset);
1014            }
1015        }
1016    }
1017
1018    /** reset the analyzer so it looks like a new one */
1019    private void reset() {
1020        grammar = null;
1021        DEBUG_ANALYZER = false;
1022        currentBlock = null;
1023        lexicalAnalysis = false;
1024    }
1025
1026    /** Set the grammar for the analyzer */
1027    public void setGrammar(Grammar g) {
1028        if (grammar != null) {
1029            reset();
1030        }
1031        grammar = g;
1032
1033        // Is this lexical?
1034
lexicalAnalysis = (grammar instanceof LexerGrammar);
1035        DEBUG_ANALYZER = grammar.analyzerDebug;
1036    }
1037
1038    public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer) {
1039        if (
1040            blk instanceof ZeroOrMoreBlock ||
1041            blk instanceof OneOrMoreBlock ||
1042            blk instanceof SynPredBlock
1043        ) {
1044            return false;
1045        }
1046        // Cannot invert an empty subrule
1047
if (blk.alternatives.size() == 0) {
1048            return false;
1049        }
1050        // The block must only contain alternatives with a single element,
1051
// where each element is a char, token, char range, or token range.
1052
for (int i = 0; i < blk.alternatives.size(); i++) {
1053            Alternative alt = blk.getAlternativeAt(i);
1054            // Cannot have anything interesting in the alternative ...
1055
if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
1056                return false;
1057            }
1058            // ... and there must be one simple element
1059
AlternativeElement elt = alt.head;
1060            if (
1061                !(
1062                elt instanceof CharLiteralElement ||
1063                elt instanceof TokenRefElement ||
1064                elt instanceof CharRangeElement ||
1065                elt instanceof TokenRangeElement ||
1066                (elt instanceof StringLiteralElement && !forLexer)
1067                ) ||
1068                !(elt.next instanceof BlockEndElement) ||
1069                elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
1070            ) {
1071                return false;
1072            }
1073        }
1074        return true;
1075    }
1076}
1077
Popular Tags