KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > persistence > antlr > LLkAnalyzer


1 package persistence.antlr;
2
3 /* ANTLR Translator Generator
4  * Project led by Terence Parr at http://www.jGuru.com
5  * Software rights: http://www.antlr.org/license.html
6  *
7  */

8
9 import persistence.antlr.collections.impl.BitSet;
10 import persistence.antlr.collections.impl.Vector;
11
12 /**A linear-approximate LL(k) grammar analzyer.
13  *
14  * All lookahead elements are sets of token types.
15  *
16  * @author Terence Parr, John Lilley
17  * @see persistence.antlr.Grammar
18  * @see persistence.antlr.Lookahead
19  */

20 public class LLkAnalyzer implements LLkGrammarAnalyzer {
21     // Set "analyzerDebug" to true
22
public boolean DEBUG_ANALYZER = false;
23     private AlternativeBlock currentBlock;
24     protected Tool tool = null;
25     protected Grammar grammar = null;
26     // True if analyzing a lexical grammar
27
protected boolean lexicalAnalysis = false;
28     // Used for formatting bit sets in default (Java) format
29
CharFormatter charFormatter = new JavaCharFormatter();
30
31     /** Create an LLk analyzer */
32     public LLkAnalyzer(Tool tool_) {
33         tool = tool_;
34     }
35
36     /** Return true if someone used the '.' wildcard default idiom.
37      * Either #(. children) or '.' as an alt by itself.
38      */

39     protected boolean altUsesWildcardDefault(Alternative alt) {
40         AlternativeElement head = alt.head;
41         // if element is #(. blah) then check to see if el is root
42
if (head instanceof TreeElement &&
43             ((TreeElement)head).root instanceof WildcardElement) {
44             return true;
45         }
46         if (head instanceof WildcardElement && head.next instanceof BlockEndElement) {
47             return true;
48         }
49         return false;
50     }
51
52     /**Is this block of alternatives LL(k)? Fill in alternative cache for this block.
53      * @return true if the block is deterministic
54      */

55     public boolean deterministic(AlternativeBlock blk) {
56         /** The lookahead depth for this decision */
57         int k = 1; // start at k=1
58
if (DEBUG_ANALYZER) System.out.println("deterministic(" + blk + ")");
59         boolean det = true;
60         int nalts = blk.alternatives.size();
61         AlternativeBlock saveCurrentBlock = currentBlock;
62         Alternative wildcardAlt = null;
63         currentBlock = blk;
64
65         /* don't allow nongreedy (...) blocks */
66         if (blk.greedy == false && !(blk instanceof OneOrMoreBlock) && !(blk instanceof ZeroOrMoreBlock)) {
67             tool.warning("Being nongreedy only makes sense for (...)+ and (...)*", grammar.getFilename(), blk.getLine(), blk.getColumn());
68         }
69
70         // SPECIAL CASE: only one alternative. We don't need to check the
71
// determinism, but other code expects the lookahead cache to be
72
// set for the single alt.
73
if (nalts == 1) {
74             AlternativeElement e = blk.getAlternativeAt(0).head;
75             currentBlock.alti = 0;
76             blk.getAlternativeAt(0).cache[1] = e.look(1);
77             blk.getAlternativeAt(0).lookaheadDepth = 1; // set lookahead to LL(1)
78
currentBlock = saveCurrentBlock;
79             return true; // always deterministic for one alt
80
}
81
82         outer:
83             for (int i = 0; i < nalts - 1; i++) {
84                 currentBlock.alti = i;
85                 currentBlock.analysisAlt = i; // which alt are we analyzing?
86
currentBlock.altj = i + 1; // reset this alt. Haven't computed yet,
87
// but we need the alt number.
88
inner:
89                     // compare against other alternatives with lookahead depth k
90
for (int j = i + 1; j < nalts; j++) {
91                         currentBlock.altj = j;
92                         if (DEBUG_ANALYZER) System.out.println("comparing " + i + " against alt " + j);
93                         currentBlock.analysisAlt = j; // which alt are we analyzing?
94
k = 1; // always attempt minimum lookahead possible.
95

96                         // check to see if there is a lookahead depth that distinguishes
97
// between alternatives i and j.
98
Lookahead[] r = new Lookahead[grammar.maxk + 1];
99                         boolean haveAmbiguity;
100                         do {
101                             haveAmbiguity = false;
102                             if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
103                             Lookahead p,q;
104                             p = getAltLookahead(blk, i, k);
105                             q = getAltLookahead(blk, j, k);
106
107                             // compare LOOK(alt i) with LOOK(alt j). Is there an intersection?
108
// Lookahead must be disjoint.
109
if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
110                             if (DEBUG_ANALYZER) System.out.println("q is " + q.toString(",", charFormatter, grammar));
111                             // r[i] = p.fset.and(q.fset);
112
r[k] = p.intersection(q);
113                             if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k].toString());
114                             if (!r[k].nil()) {
115                                 haveAmbiguity = true;
116                                 k++;
117                             }
118                             // go until no more lookahead to use or no intersection
119
} while (haveAmbiguity && k <= grammar.maxk);
120
121                         Alternative ai = blk.getAlternativeAt(i);
122                         Alternative aj = blk.getAlternativeAt(j);
123                         if (haveAmbiguity) {
124                             det = false;
125                             ai.lookaheadDepth = NONDETERMINISTIC;
126                             aj.lookaheadDepth = NONDETERMINISTIC;
127
128                             /* if ith alt starts with a syntactic predicate, computing the
129                              * lookahead is still done for code generation, but messages
130                              * should not be generated when comparing against alt j.
131                              * Alternatives with syn preds that are unnecessary do
132                              * not result in syn pred try-blocks.
133                              */

134                             if (ai.synPred != null) {
135                                 if (DEBUG_ANALYZER) {
136                                     System.out.println("alt " + i + " has a syn pred");
137                                 }
138                                 // The alt with the (...)=> block is nondeterministic for sure.
139
// If the (...)=> conflicts with alt j, j is nondeterministic.
140
// This prevents alt j from being in any switch statements.
141
// move on to next alternative=>no possible ambiguity!
142
// continue inner;
143
}
144
145                             /* if ith alt starts with a semantic predicate, computing the
146                              * lookahead is still done for code generation, but messages
147                              * should not be generated when comparing against alt j.
148                              */

149                             else if (ai.semPred != null) {
150                                 if (DEBUG_ANALYZER) {
151                                     System.out.println("alt " + i + " has a sem pred");
152                                 }
153                             }
154
155                             /* if jth alt is exactly the wildcard or wildcard root of tree,
156                              * then remove elements from alt i lookahead from alt j's lookahead.
157                              * Don't do an ambiguity warning.
158                              */

159                             else if (altUsesWildcardDefault(aj)) {
160                                 // System.out.println("removing pred sets");
161
// removeCompetingPredictionSetsFromWildcard(aj.cache, aj.head, grammar.maxk);
162
wildcardAlt = aj;
163                             }
164
165                             /* If the user specified warnWhenFollowAmbig=false, then we
166                              * can turn off this warning IFF one of the alts is empty;
167                              * that is, it points immediately at the end block.
168                              */

169                             else if (!blk.warnWhenFollowAmbig &&
170                                 (ai.head instanceof BlockEndElement ||
171                                 aj.head instanceof BlockEndElement)) {
172                                 // System.out.println("ai.head pts to "+ai.head.getClass());
173
// System.out.println("aj.head pts to "+aj.head.getClass());
174
}
175
176                             /* If they have the generateAmbigWarnings option off for the block
177                              * then don't generate a warning.
178                              */

179                             else if (!blk.generateAmbigWarnings) {
180                             }
181
182                             /* If greedy=true and *one* empty alt shut off warning. */
183                             else if (blk.greedySet && blk.greedy &&
184                                 ((ai.head instanceof BlockEndElement &&
185                                 !(aj.head instanceof BlockEndElement)) ||
186                                 (aj.head instanceof BlockEndElement &&
187                                 !(ai.head instanceof BlockEndElement)))) {
188                                 // System.out.println("greedy set to true; one alt empty");
189
}
190
191
192                             /* We have no choice, but to report a nondetermism */
193                             else {
194                                 tool.errorHandler.warnAltAmbiguity(
195                                     grammar,
196                                     blk, // the block
197
lexicalAnalysis, // true if lexical
198
grammar.maxk, // depth of ambiguity
199
r, // set of linear ambiguities
200
i, // first ambiguous alternative
201
j // second ambiguous alternative
202
);
203                             }
204                         }
205                         else {
206                             // a lookahead depth, k, was found where i and j do not conflict
207
ai.lookaheadDepth = Math.max(ai.lookaheadDepth, k);
208                             aj.lookaheadDepth = Math.max(aj.lookaheadDepth, k);
209                         }
210                     }
211             }
212
213         // finished with block.
214

215         // If had wildcard default clause idiom, remove competing lookahead
216
/*
217           if ( wildcardAlt!=null ) {
218           removeCompetingPredictionSetsFromWildcard(wildcardAlt.cache, wildcardAlt.head, grammar.maxk);
219           }
220         */

221
222         currentBlock = saveCurrentBlock;
223         return det;
224     }
225
226     /**Is (...)+ block LL(1)? Fill in alternative cache for this block.
227      * @return true if the block is deterministic
228      */

229     public boolean deterministic(OneOrMoreBlock blk) {
230         if (DEBUG_ANALYZER) System.out.println("deterministic(...)+(" + blk + ")");
231         AlternativeBlock saveCurrentBlock = currentBlock;
232         currentBlock = blk;
233         boolean blkOk = deterministic((AlternativeBlock)blk);
234         // block has been checked, now check that what follows does not conflict
235
// with the lookahead of the (...)+ block.
236
boolean det = deterministicImpliedPath(blk);
237         currentBlock = saveCurrentBlock;
238         return det && blkOk;
239     }
240
241     /**Is (...)* block LL(1)? Fill in alternative cache for this block.
242      * @return true if the block is deterministic
243      */

244     public boolean deterministic(ZeroOrMoreBlock blk) {
245         if (DEBUG_ANALYZER) System.out.println("deterministic(...)*(" + blk + ")");
246         AlternativeBlock saveCurrentBlock = currentBlock;
247         currentBlock = blk;
248         boolean blkOk = deterministic((AlternativeBlock)blk);
249         // block has been checked, now check that what follows does not conflict
250
// with the lookahead of the (...)* block.
251
boolean det = deterministicImpliedPath(blk);
252         currentBlock = saveCurrentBlock;
253         return det && blkOk;
254     }
255
256     /**Is this (...)* or (...)+ block LL(k)?
257      * @return true if the block is deterministic
258      */

259     public boolean deterministicImpliedPath(BlockWithImpliedExitPath blk) {
260         /** The lookahead depth for this decision considering implied exit path */
261         int k;
262         boolean det = true;
263         Vector alts = blk.getAlternatives();
264         int nalts = alts.size();
265         currentBlock.altj = -1; // comparing against implicit optional/exit alt
266

267         if (DEBUG_ANALYZER) System.out.println("deterministicImpliedPath");
268         for (int i = 0; i < nalts; i++) { // check follow against all alts
269
Alternative alt = blk.getAlternativeAt(i);
270
271             if (alt.head instanceof BlockEndElement) {
272                 tool.warning("empty alternative makes no sense in (...)* or (...)+", grammar.getFilename(), blk.getLine(), blk.getColumn());
273             }
274
275             k = 1; // assume eac alt is LL(1) with exit branch
276
// check to see if there is a lookahead depth that distinguishes
277
// between alternative i and the exit branch.
278
Lookahead[] r = new Lookahead[grammar.maxk + 1];
279             boolean haveAmbiguity;
280             do {
281                 haveAmbiguity = false;
282                 if (DEBUG_ANALYZER) System.out.println("checking depth " + k + "<=" + grammar.maxk);
283                 Lookahead p;
284                 Lookahead follow = blk.next.look(k);
285                 blk.exitCache[k] = follow;
286                 currentBlock.alti = i;
287                 p = getAltLookahead(blk, i, k);
288
289                 if (DEBUG_ANALYZER) System.out.println("follow is " + follow.toString(",", charFormatter, grammar));
290                 if (DEBUG_ANALYZER) System.out.println("p is " + p.toString(",", charFormatter, grammar));
291                 //r[k] = follow.fset.and(p.fset);
292
r[k] = follow.intersection(p);
293                 if (DEBUG_ANALYZER) System.out.println("intersection at depth " + k + " is " + r[k]);
294                 if (!r[k].nil()) {
295                     haveAmbiguity = true;
296                     k++;
297                 }
298                 // go until no more lookahead to use or no intersection
299
} while (haveAmbiguity && k <= grammar.maxk);
300
301             if (haveAmbiguity) {
302                 det = false;
303                 alt.lookaheadDepth = NONDETERMINISTIC;
304                 blk.exitLookaheadDepth = NONDETERMINISTIC;
305                 Alternative ambigAlt = blk.getAlternativeAt(currentBlock.alti);
306
307                 /* If the user specified warnWhenFollowAmbig=false, then we
308                  * can turn off this warning.
309                  */

310                 if (!blk.warnWhenFollowAmbig) {
311                 }
312
313                 /* If they have the generateAmbigWarnings option off for the block
314                  * then don't generate a warning.
315                  */

316                 else if (!blk.generateAmbigWarnings) {
317                 }
318
319                 /* If greedy=true and alt not empty, shut off warning */
320                 else if (blk.greedy == true && blk.greedySet &&
321                     !(ambigAlt.head instanceof BlockEndElement)) {
322                     if (DEBUG_ANALYZER) System.out.println("greedy loop");
323                 }
324
325                 /* If greedy=false then shut off warning...will have
326                  * to add "if FOLLOW break"
327                  * block during code gen to compensate for removal of warning.
328                  */

329                 else if (blk.greedy == false &&
330                     !(ambigAlt.head instanceof BlockEndElement)) {
331                     if (DEBUG_ANALYZER) System.out.println("nongreedy loop");
332                     // if FOLLOW not single k-string (|set[k]| can
333
// be > 1 actually) then must warn them that
334
// loop may terminate incorrectly.
335
// For example, ('a'..'d')+ ("ad"|"cb")
336
if (!lookaheadEquivForApproxAndFullAnalysis(blk.exitCache, grammar.maxk)) {
337                         tool.warning(new String JavaDoc[]{
338                             "nongreedy block may exit incorrectly due",
339                             "\tto limitations of linear approximate lookahead (first k-1 sets",
340                             "\tin lookahead not singleton)."},
341                                      grammar.getFilename(), blk.getLine(), blk.getColumn());
342                     }
343                 }
344
345                 // no choice but to generate a warning
346
else {
347                     tool.errorHandler.warnAltExitAmbiguity(
348                         grammar,
349                         blk, // the block
350
lexicalAnalysis, // true if lexical
351
grammar.maxk, // depth of ambiguity
352
r, // set of linear ambiguities
353
i // ambiguous alternative
354
);
355                 }
356             }
357             else {
358                 alt.lookaheadDepth = Math.max(alt.lookaheadDepth, k);
359                 blk.exitLookaheadDepth = Math.max(blk.exitLookaheadDepth, k);
360             }
361         }
362         return det;
363     }
364
365     /**Compute the lookahead set of whatever follows references to
366      * the rule associated witht the FOLLOW block.
367      */

368     public Lookahead FOLLOW(int k, RuleEndElement end) {
369         // what rule are we trying to compute FOLLOW of?
370
RuleBlock rb = (RuleBlock)end.block;
371         // rule name is different in lexer
372
String JavaDoc rule;
373         if (lexicalAnalysis) {
374             rule = CodeGenerator.encodeLexerRuleName(rb.getRuleName());
375         }
376         else {
377             rule = rb.getRuleName();
378         }
379
380         if (DEBUG_ANALYZER) System.out.println("FOLLOW(" + k + "," + rule + ")");
381
382         // are we in the midst of computing this FOLLOW already?
383
if (end.lock[k]) {
384             if (DEBUG_ANALYZER) System.out.println("FOLLOW cycle to " + rule);
385             return new Lookahead(rule);
386         }
387
388         // Check to see if there is cached value
389
if (end.cache[k] != null) {
390             if (DEBUG_ANALYZER) {
391                 System.out.println("cache entry FOLLOW(" + k + ") for " + rule + ": " + end.cache[k].toString(",", charFormatter, grammar));
392             }
393             // if the cache is a complete computation then simply return entry
394
if (end.cache[k].cycle == null) {
395                 return (Lookahead)end.cache[k].clone();
396             }
397             // A cache entry exists, but it is a reference to a cyclic computation.
398
RuleSymbol rs = (RuleSymbol)grammar.getSymbol(end.cache[k].cycle);
399             RuleEndElement re = rs.getBlock().endNode;
400             // The other entry may not exist because it is still being
401
// computed when this cycle cache entry was found here.
402
if (re.cache[k] == null) {
403                 // return the cycle...that's all we can do at the moment.
404
return (Lookahead)end.cache[k].clone();
405             }
406             else {
407                 if (DEBUG_ANALYZER) {
408                     System.out.println("combining FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar) + " with FOLLOW for "+((RuleBlock)re.block).getRuleName()+": "+re.cache[k].toString(",", charFormatter, grammar));
409                 }
410                 // combine results from other rule's FOLLOW
411
if ( re.cache[k].cycle==null ) {
412                     // current rule depends on another rule's FOLLOW and
413
// it is complete with no cycle; just kill our cycle and
414
// combine full result from other rule's FOLLOW
415
end.cache[k].combineWith(re.cache[k]);
416                     end.cache[k].cycle = null; // kill cycle as we're complete
417
}
418                 else {
419                     // the FOLLOW cache for other rule has a cycle also.
420
// Here is where we bubble up a cycle. We better recursively
421
// wipe out cycles (partial computations). I'm a little nervous
422
// that we might leave a cycle here, however.
423
Lookahead refFOLLOW = FOLLOW(k, re);
424                     end.cache[k].combineWith( refFOLLOW );
425                     // all cycles should be gone, but if not, record ref to cycle
426
end.cache[k].cycle = refFOLLOW.cycle;
427                 }
428                 if (DEBUG_ANALYZER) {
429                     System.out.println("saving FOLLOW(" + k + ") for " + rule + ": from "+end.cache[k].toString(",", charFormatter, grammar));
430                 }
431                 // Return the updated cache entry associated
432
// with the cycle reference.
433
return (Lookahead)end.cache[k].clone();
434             }
435         }
436
437         end.lock[k] = true; // prevent FOLLOW computation cycles
438

439         Lookahead p = new Lookahead();
440
441         RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
442
443         // Walk list of references to this rule to compute FOLLOW
444
for (int i = 0; i < rs.numReferences(); i++) {
445             RuleRefElement rr = rs.getReference(i);
446             if (DEBUG_ANALYZER) System.out.println("next[" + rule + "] is " + rr.next.toString());
447             Lookahead q = rr.next.look(k);
448             if (DEBUG_ANALYZER) System.out.println("FIRST of next[" + rule + "] ptr is " + q.toString());
449             /* If there is a cycle then if the cycle is to the rule for
450              * this end block, you have a cycle to yourself. Remove the
451              * cycle indication--the lookahead is complete.
452              */

453             if (q.cycle != null && q.cycle.equals(rule)) {
454                 q.cycle = null; // don't want cycle to yourself!
455
}
456             // add the lookahead into the current FOLLOW computation set
457
p.combineWith(q);
458             if (DEBUG_ANALYZER) System.out.println("combined FOLLOW[" + rule + "] is " + p.toString());
459         }
460
461         end.lock[k] = false; // we're not doing FOLLOW anymore
462

463         // if no rules follow this, it can be a start symbol or called by a start sym.
464
// set the follow to be end of file.
465
if (p.fset.nil() && p.cycle == null) {
466             if (grammar instanceof TreeWalkerGrammar) {
467                 // Tree grammars don't see EOF, they see end of sibling list or
468
// "NULL TREE LOOKAHEAD".
469
p.fset.add(Token.NULL_TREE_LOOKAHEAD);
470             }
471             else if (grammar instanceof LexerGrammar) {
472                 // Lexical grammars use Epsilon to indicate that the end of rule has been hit
473
// EOF would be misleading; any character can follow a token rule not just EOF
474
// as in a grammar (where a start symbol is followed by EOF). There is no
475
// sequence info in a lexer between tokens to indicate what is the last token
476
// to be seen.
477
// p.fset.add(EPSILON_TYPE);
478
p.setEpsilon();
479             }
480             else {
481                 p.fset.add(Token.EOF_TYPE);
482             }
483         }
484
485         // Cache the result of the FOLLOW computation
486
if (DEBUG_ANALYZER) {
487             System.out.println("saving FOLLOW(" + k + ") for " + rule + ": " + p.toString(",", charFormatter, grammar));
488         }
489         end.cache[k] = (Lookahead)p.clone();
490
491         return p;
492     }
493
494     private Lookahead getAltLookahead(AlternativeBlock blk, int alt, int k) {
495         Lookahead p;
496         Alternative a = blk.getAlternativeAt(alt);
497         AlternativeElement e = a.head;
498         //System.out.println("getAltLookahead("+k+","+e+"), cache size is "+a.cache.length);
499
if (a.cache[k] == null) {
500             p = e.look(k);
501             a.cache[k] = p;
502         }
503         else {
504             p = a.cache[k];
505         }
506         return p;
507     }
508
509     /**Actions are ignored */
510     public Lookahead look(int k, ActionElement action) {
511         if (DEBUG_ANALYZER) System.out.println("lookAction(" + k + "," + action + ")");
512         return action.next.look(k);
513     }
514
515     /**Combine the lookahead computed for each alternative */
516     public Lookahead look(int k, AlternativeBlock blk) {
517         if (DEBUG_ANALYZER) System.out.println("lookAltBlk(" + k + "," + blk + ")");
518         AlternativeBlock saveCurrentBlock = currentBlock;
519         currentBlock = blk;
520         Lookahead p = new Lookahead();
521         for (int i = 0; i < blk.alternatives.size(); i++) {
522             if (DEBUG_ANALYZER) System.out.println("alt " + i + " of " + blk);
523             // must set analysis alt
524
currentBlock.analysisAlt = i;
525             Alternative alt = blk.getAlternativeAt(i);
526             AlternativeElement elem = alt.head;
527             if (DEBUG_ANALYZER) {
528                 if (alt.head == alt.tail) {
529                     System.out.println("alt " + i + " is empty");
530                 }
531             }
532             Lookahead q = elem.look(k);
533             p.combineWith(q);
534         }
535         if (k == 1 && blk.not && subruleCanBeInverted(blk, lexicalAnalysis)) {
536             // Invert the lookahead set
537
if (lexicalAnalysis) {
538                 BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
539                 int[] elems = p.fset.toArray();
540                 for (int j = 0; j < elems.length; j++) {
541                     b.remove(elems[j]);
542                 }
543                 p.fset = b;
544             }
545             else {
546                 p.fset.notInPlace(Token.MIN_USER_TYPE, grammar.tokenManager.maxTokenType());
547             }
548         }
549         currentBlock = saveCurrentBlock;
550         return p;
551     }
552
553     /**Compute what follows this place-holder node and possibly
554      * what begins the associated loop unless the
555      * node is locked.
556      * <p>
557      * if we hit the end of a loop, we have to include
558      * what tokens can begin the loop as well. If the start
559      * node is locked, then we simply found an empty path
560      * through this subrule while analyzing it. If the
561      * start node is not locked, then this node was hit
562      * during a FOLLOW operation and the FIRST of this
563      * block must be included in that lookahead computation.
564      */

565     public Lookahead look(int k, BlockEndElement end) {
566         if (DEBUG_ANALYZER) System.out.println("lookBlockEnd(" + k + ", " + end.block + "); lock is " + end.lock[k]);
567         if (end.lock[k]) {
568             // computation in progress => the tokens we would have
569
// computed (had we not been locked) will be included
570
// in the set by that computation with the lock on this
571
// node.
572
return new Lookahead();
573         }
574
575         Lookahead p;
576
577         /* Hitting the end of a loop means you can see what begins the loop */
578         if (end.block instanceof ZeroOrMoreBlock ||
579             end.block instanceof OneOrMoreBlock) {
580             // compute what can start the block,
581
// but lock end node so we don't do it twice in same
582
// computation.
583
end.lock[k] = true;
584             p = look(k, end.block);
585             end.lock[k] = false;
586         }
587         else {
588             p = new Lookahead();
589         }
590
591         /* Tree blocks do not have any follow because they are children
592          * of what surrounds them. For example, A #(B C) D results in
593          * a look() for the TreeElement end of NULL_TREE_LOOKAHEAD, which
594          * indicates that nothing can follow the last node of tree #(B C)
595          */

596         if (end.block instanceof TreeElement) {
597             p.combineWith(Lookahead.of(Token.NULL_TREE_LOOKAHEAD));
598         }
599
600         /* Syntactic predicates such as ( (A)? )=> have no follow per se.
601          * We cannot accurately say what would be matched following a
602          * syntactic predicate (you MIGHT be ok if you said it was whatever
603          * followed the alternative predicted by the predicate). Hence,
604          * (like end-of-token) we return Epsilon to indicate "unknown
605          * lookahead."
606          */

607         else if (end.block instanceof SynPredBlock) {
608             p.setEpsilon();
609         }
610
611         // compute what can follow the block
612
else {
613             Lookahead q = end.block.next.look(k);
614             p.combineWith(q);
615         }
616
617         return p;
618     }
619
620     /**Return this char as the lookahead if k=1.
621      * <p>### Doesn't work for ( 'a' 'b' | 'a' ~'b' ) yet!!!
622      * <p>
623      * If the atom has the <tt>not</tt> flag on, then
624      * create the set complement of the tokenType
625      * which is the set of all characters referenced
626      * in the grammar with this char turned off.
627      * Also remove characters from the set that
628      * are currently allocated for predicting
629      * previous alternatives. This avoids ambiguity
630      * messages and is more properly what is meant.
631      * ( 'a' | ~'a' ) implies that the ~'a' is the
632      * "else" clause.
633      * <p>
634      * NOTE: we do <b>NOT</b> include exit path in
635      * the exclusion set. E.g.,
636      * ( 'a' | ~'a' )* 'b'
637      * should exit upon seeing a 'b' during the loop.
638      */

639     public Lookahead look(int k, CharLiteralElement atom) {
640         if (DEBUG_ANALYZER) System.out.println("lookCharLiteral(" + k + "," + atom + ")");
641         // Skip until analysis hits k==1
642
if (k > 1) {
643             return atom.next.look(k - 1);
644         }
645         if (lexicalAnalysis) {
646             if (atom.not) {
647                 BitSet b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
648                 if (DEBUG_ANALYZER) System.out.println("charVocab is " + b.toString());
649                 // remove stuff predicted by preceding alts and follow of block
650
removeCompetingPredictionSets(b, atom);
651                 if (DEBUG_ANALYZER) System.out.println("charVocab after removal of prior alt lookahead " + b.toString());
652                 // now remove element that is stated not to be in the set
653
b.clear(atom.getType());
654                 return new Lookahead(b);
655             }
656             else {
657                 return Lookahead.of(atom.getType());
658             }
659         }
660         else {
661             // Should have been avoided by MakeGrammar
662
tool.panic("Character literal reference found in parser");
663             // ... so we make the compiler happy
664
return Lookahead.of(atom.getType());
665         }
666     }
667
668     public Lookahead look(int k, CharRangeElement r) {
669         if (DEBUG_ANALYZER) System.out.println("lookCharRange(" + k + "," + r + ")");
670         // Skip until analysis hits k==1
671
if (k > 1) {
672             return r.next.look(k - 1);
673         }
674         BitSet p = BitSet.of(r.begin);
675         for (int i = r.begin + 1; i <= r.end; i++) {
676             p.add(i);
677         }
678         return new Lookahead(p);
679     }
680
681     public Lookahead look(int k, GrammarAtom atom) {
682         if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + atom + "[" + atom.getType() + "])");
683
684         if (lexicalAnalysis) {
685             // MakeGrammar should have created a rule reference instead
686
tool.panic("token reference found in lexer");
687         }
688         // Skip until analysis hits k==1
689
if (k > 1) {
690             return atom.next.look(k - 1);
691         }
692         Lookahead l = Lookahead.of(atom.getType());
693         if (atom.not) {
694             // Invert the lookahead set against the token vocabulary
695
int maxToken = grammar.tokenManager.maxTokenType();
696             l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
697             // remove stuff predicted by preceding alts and follow of block
698
removeCompetingPredictionSets(l.fset, atom);
699         }
700         return l;
701     }
702
703     /**The lookahead of a (...)+ block is the combined lookahead of
704      * all alternatives and, if an empty path is found, the lookahead
705      * of what follows the block.
706      */

707     public Lookahead look(int k, OneOrMoreBlock blk) {
708         if (DEBUG_ANALYZER) System.out.println("look+" + k + "," + blk + ")");
709         Lookahead p = look(k, (AlternativeBlock)blk);
710         return p;
711     }
712
713     /**Combine the lookahead computed for each alternative.
714      * Lock the node so that no other computation may come back
715      * on itself--infinite loop. This also implies infinite left-recursion
716      * in the grammar (or an error in this algorithm ;)).
717      */

718     public Lookahead look(int k, RuleBlock blk) {
719         if (DEBUG_ANALYZER) System.out.println("lookRuleBlk(" + k + "," + blk + ")");
720         Lookahead p = look(k, (AlternativeBlock)blk);
721         return p;
722     }
723
724     /**If not locked or noFOLLOW set, compute FOLLOW of a rule.
725      * <p>
726      * TJP says 8/12/99: not true anymore:
727      * Lexical rules never compute follow. They set epsilon and
728      * the code generator gens code to check for any character.
729      * The code generator must remove the tokens used to predict
730      * any previous alts in the same block.
731      * <p>
732      * When the last node of a rule is reached and noFOLLOW,
733      * it implies that a "local" FOLLOW will be computed
734      * after this call. I.e.,
735      * <pre>
736      * a : b A;
737      * b : B | ;
738      * c : b C;
739      * </pre>
740      * Here, when computing the look of rule b from rule a,
741      * we want only {B,EPSILON_TYPE} so that look(b A) will
742      * be {B,A} not {B,A,C}.
743      * <p>
744      * if the end block is not locked and the FOLLOW is
745      * wanted, the algorithm must compute the lookahead
746      * of what follows references to this rule. If
747      * end block is locked, FOLLOW will return an empty set
748      * with a cycle to the rule associated with this end block.
749      */

750     public Lookahead look(int k, RuleEndElement end) {
751         if (DEBUG_ANALYZER)
752             System.out.println("lookRuleBlockEnd(" + k + "); noFOLLOW=" +
753                                end.noFOLLOW + "; lock is " + end.lock[k]);
754         if (/*lexicalAnalysis ||*/ end.noFOLLOW) {
755             Lookahead p = new Lookahead();
756             p.setEpsilon();
757             p.epsilonDepth = BitSet.of(k);
758             return p;
759         }
760         Lookahead p = FOLLOW(k, end);
761         return p;
762     }
763
764     /**Compute the lookahead contributed by a rule reference.
765      *
766      * <p>
767      * When computing ruleref lookahead, we don't want the FOLLOW
768      * computation done if an empty path exists for the rule.
769      * The FOLLOW is too loose of a set...we want only to
770      * include the "local" FOLLOW or what can follow this
771      * particular ref to the node. In other words, we use
772      * context information to reduce the complexity of the
773      * analysis and strengthen the parser.
774      *
775      * The noFOLLOW flag is used as a means of restricting
776      * the FOLLOW to a "local" FOLLOW. This variable is
777      * orthogonal to the <tt>lock</tt> variable that prevents
778      * infinite recursion. noFOLLOW does not care about what k is.
779      */

780     public Lookahead look(int k, RuleRefElement rr) {
781         if (DEBUG_ANALYZER) System.out.println("lookRuleRef(" + k + "," + rr + ")");
782         RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rr.targetRule);
783         if (rs == null || !rs.defined) {
784             tool.error("no definition of rule " + rr.targetRule, grammar.getFilename(), rr.getLine(), rr.getColumn());
785             return new Lookahead();
786         }
787         RuleBlock rb = rs.getBlock();
788         RuleEndElement end = rb.endNode;
789         boolean saveEnd = end.noFOLLOW;
790         end.noFOLLOW = true;
791         // go off to the rule and get the lookahead (w/o FOLLOW)
792
Lookahead p = look(k, rr.targetRule);
793         if (DEBUG_ANALYZER) System.out.println("back from rule ref to " + rr.targetRule);
794         // restore state of end block
795
end.noFOLLOW = saveEnd;
796
797         // check for infinite recursion. If a cycle is returned: trouble!
798
if (p.cycle != null) {
799             tool.error("infinite recursion to rule " + p.cycle + " from rule " +
800                        rr.enclosingRuleName, grammar.getFilename(), rr.getLine(), rr.getColumn());
801         }
802
803         // is the local FOLLOW required?
804
if (p.containsEpsilon()) {
805             if (DEBUG_ANALYZER)
806                 System.out.println("rule ref to " +
807                                    rr.targetRule + " has eps, depth: " + p.epsilonDepth);
808
809             // remove epsilon
810
p.resetEpsilon();
811             // fset.clear(EPSILON_TYPE);
812

813             // for each lookahead depth that saw epsilon
814
int[] depths = p.epsilonDepth.toArray();
815             p.epsilonDepth = null; // clear all epsilon stuff
816
for (int i = 0; i < depths.length; i++) {
817                 int rk = k - (k - depths[i]);
818                 Lookahead q = rr.next.look(rk); // see comments in Lookahead
819
p.combineWith(q);
820             }
821             // note: any of these look() computations for local follow can
822
// set EPSILON in the set again if the end of this rule is found.
823
}
824
825         return p;
826     }
827
828     public Lookahead look(int k, StringLiteralElement atom) {
829         if (DEBUG_ANALYZER) System.out.println("lookStringLiteral(" + k + "," + atom + ")");
830         if (lexicalAnalysis) {
831             // need more lookahead than string can provide?
832
if (k > atom.processedAtomText.length()) {
833                 return atom.next.look(k - atom.processedAtomText.length());
834             }
835             else {
836                 // get char at lookahead depth k, from the processed literal text
837
return Lookahead.of(atom.processedAtomText.charAt(k - 1));
838             }
839         }
840         else {
841             // Skip until analysis hits k==1
842
if (k > 1) {
843                 return atom.next.look(k - 1);
844             }
845             Lookahead l = Lookahead.of(atom.getType());
846             if (atom.not) {
847                 // Invert the lookahead set against the token vocabulary
848
int maxToken = grammar.tokenManager.maxTokenType();
849                 l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
850             }
851             return l;
852         }
853     }
854
855     /**The lookahead of a (...)=> block is the lookahead of
856      * what follows the block. By definition, the syntactic
857      * predicate block defies static analysis (you want to try it
858      * out at run-time). The LOOK of (a)=>A B is A for LL(1)
859      * ### is this even called?
860      */

861     public Lookahead look(int k, SynPredBlock blk) {
862         if (DEBUG_ANALYZER) System.out.println("look=>(" + k + "," + blk + ")");
863         return blk.next.look(k);
864     }
865
866     public Lookahead look(int k, TokenRangeElement r) {
867         if (DEBUG_ANALYZER) System.out.println("lookTokenRange(" + k + "," + r + ")");
868         // Skip until analysis hits k==1
869
if (k > 1) {
870             return r.next.look(k - 1);
871         }
872         BitSet p = BitSet.of(r.begin);
873         for (int i = r.begin + 1; i <= r.end; i++) {
874             p.add(i);
875         }
876         return new Lookahead(p);
877     }
878
879     public Lookahead look(int k, TreeElement t) {
880         if (DEBUG_ANALYZER)
881             System.out.println("look(" + k + "," + t.root + "[" + t.root.getType() + "])");
882         if (k > 1) {
883             return t.next.look(k - 1);
884         }
885         Lookahead l = null;
886         if (t.root instanceof WildcardElement) {
887             l = t.root.look(1); // compute FIRST set minus previous rows
888
}
889         else {
890             l = Lookahead.of(t.root.getType());
891             if (t.root.not) {
892                 // Invert the lookahead set against the token vocabulary
893
int maxToken = grammar.tokenManager.maxTokenType();
894                 l.fset.notInPlace(Token.MIN_USER_TYPE, maxToken);
895             }
896         }
897         return l;
898     }
899
900     public Lookahead look(int k, WildcardElement wc) {
901         if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ")");
902
903         // Skip until analysis hits k==1
904
if (k > 1) {
905             return wc.next.look(k - 1);
906         }
907
908         BitSet b;
909         if (lexicalAnalysis) {
910             // Copy the character vocabulary
911
b = (BitSet)((LexerGrammar)grammar).charVocabulary.clone();
912         }
913         else {
914             b = new BitSet(1);
915             // Invert the lookahead set against the token vocabulary
916
int maxToken = grammar.tokenManager.maxTokenType();
917             b.notInPlace(Token.MIN_USER_TYPE, maxToken);
918             if (DEBUG_ANALYZER) System.out.println("look(" + k + "," + wc + ") after not: " + b);
919         }
920
921         // Remove prediction sets from competing alternatives
922
// removeCompetingPredictionSets(b, wc);
923

924         return new Lookahead(b);
925     }
926
927     /** The (...)* element is the combined lookahead of the alternatives and what can
928      * follow the loop.
929      */

930     public Lookahead look(int k, ZeroOrMoreBlock blk) {
931         if (DEBUG_ANALYZER) System.out.println("look*(" + k + "," + blk + ")");
932         Lookahead p = look(k, (AlternativeBlock)blk);
933         Lookahead q = blk.next.look(k);
934         p.combineWith(q);
935         return p;
936     }
937
938     /**Compute the combined lookahead for all productions of a rule.
939      * If the lookahead returns with epsilon, at least one epsilon
940      * path exists (one that consumes no tokens). The noFOLLOW
941      * flag being set for this endruleblk, indicates that the
942      * a rule ref invoked this rule.
943      *
944      * Currently only look(RuleRef) calls this. There is no need
945      * for the code generator to call this.
946      */

947     public Lookahead look(int k, String JavaDoc rule) {
948         if (DEBUG_ANALYZER) System.out.println("lookRuleName(" + k + "," + rule + ")");
949         RuleSymbol rs = (RuleSymbol)grammar.getSymbol(rule);
950         RuleBlock rb = rs.getBlock();
951
952         if (rb.lock[k]) {
953             if (DEBUG_ANALYZER)
954                 System.out.println("infinite recursion to rule " + rb.getRuleName());
955             return new Lookahead(rule);
956         }
957
958         // have we computed it before?
959
if (rb.cache[k] != null) {
960             if (DEBUG_ANALYZER) {
961                 System.out.println("found depth " + k + " result in FIRST " + rule + " cache: " +
962                                    rb.cache[k].toString(",", charFormatter, grammar));
963             }
964             return (Lookahead)rb.cache[k].clone();
965         }
966
967         rb.lock[k] = true;
968         Lookahead p = look(k, (RuleBlock)rb);
969         rb.lock[k] = false;
970
971         // cache results
972
rb.cache[k] = (Lookahead)p.clone();
973         if (DEBUG_ANALYZER) {
974             System.out.println("saving depth " + k + " result in FIRST " + rule + " cache: " +
975                                rb.cache[k].toString(",", charFormatter, grammar));
976         }
977         return p;
978     }
979
980     /** If the first k-1 sets are singleton sets, the appoximate
981      * lookahead analysis is equivalent to full lookahead analysis.
982      */

983     public static boolean lookaheadEquivForApproxAndFullAnalysis(Lookahead[] bset, int k) {
984         // first k-1 sets degree 1?
985
for (int i = 1; i <= k - 1; i++) {
986             BitSet look = bset[i].fset;
987             if (look.degree() > 1) {
988                 return false;
989             }
990         }
991         return true;
992     }
993
994     /** Remove the prediction sets from preceding alternatives
995      * and follow set, but *only* if this element is the first element
996      * of the alternative. The class members currenBlock and
997      * currentBlock.analysisAlt must be set correctly.
998      * @param b The prediction bitset to be modified
999      * @el The element of interest
1000     */

1001    private void removeCompetingPredictionSets(BitSet b, AlternativeElement el) {
1002        // Only do this if the element is the first element of the alt,
1003
// because we are making an implicit assumption that k==1.
1004
GrammarElement head = currentBlock.getAlternativeAt(currentBlock.analysisAlt).head;
1005        // if element is #(. blah) then check to see if el is root
1006
if (head instanceof TreeElement) {
1007            if (((TreeElement)head).root != el) {
1008                return;
1009            }
1010        }
1011        else if (el != head) {
1012            return;
1013        }
1014        for (int i = 0; i < currentBlock.analysisAlt; i++) {
1015            AlternativeElement e = currentBlock.getAlternativeAt(i).head;
1016            b.subtractInPlace(e.look(1).fset);
1017        }
1018    }
1019
1020    /** Remove the prediction sets from preceding alternatives
1021     * The class members currenBlock must be set correctly.
1022     * Remove prediction sets from 1..k.
1023     * @param look The prediction lookahead to be modified
1024     * @el The element of interest
1025     * @k How deep into lookahead to modify
1026     */

1027    private void removeCompetingPredictionSetsFromWildcard(Lookahead[] look, AlternativeElement el, int k) {
1028        for (int d = 1; d <= k; d++) {
1029            for (int i = 0; i < currentBlock.analysisAlt; i++) {
1030                AlternativeElement e = currentBlock.getAlternativeAt(i).head;
1031                look[d].fset.subtractInPlace(e.look(d).fset);
1032            }
1033        }
1034    }
1035
1036    /** reset the analyzer so it looks like a new one */
1037    private void reset() {
1038        grammar = null;
1039        DEBUG_ANALYZER = false;
1040        currentBlock = null;
1041        lexicalAnalysis = false;
1042    }
1043
1044    /** Set the grammar for the analyzer */
1045    public void setGrammar(Grammar g) {
1046        if (grammar != null) {
1047            reset();
1048        }
1049        grammar = g;
1050
1051        // Is this lexical?
1052
lexicalAnalysis = (grammar instanceof LexerGrammar);
1053        DEBUG_ANALYZER = grammar.analyzerDebug;
1054    }
1055
1056    public boolean subruleCanBeInverted(AlternativeBlock blk, boolean forLexer) {
1057        if (
1058            blk instanceof ZeroOrMoreBlock ||
1059            blk instanceof OneOrMoreBlock ||
1060            blk instanceof SynPredBlock
1061        ) {
1062            return false;
1063        }
1064        // Cannot invert an empty subrule
1065
if (blk.alternatives.size() == 0) {
1066            return false;
1067        }
1068        // The block must only contain alternatives with a single element,
1069
// where each element is a char, token, char range, or token range.
1070
for (int i = 0; i < blk.alternatives.size(); i++) {
1071            Alternative alt = blk.getAlternativeAt(i);
1072            // Cannot have anything interesting in the alternative ...
1073
if (alt.synPred != null || alt.semPred != null || alt.exceptionSpec != null) {
1074                return false;
1075            }
1076            // ... and there must be one simple element
1077
AlternativeElement elt = alt.head;
1078            if (
1079                !(
1080                elt instanceof CharLiteralElement ||
1081                elt instanceof TokenRefElement ||
1082                elt instanceof CharRangeElement ||
1083                elt instanceof TokenRangeElement ||
1084                (elt instanceof StringLiteralElement && !forLexer)
1085                ) ||
1086                !(elt.next instanceof BlockEndElement) ||
1087                elt.getAutoGenType() != GrammarElement.AUTO_GEN_NONE
1088            ) {
1089                return false;
1090            }
1091        }
1092        return true;
1093    }
1094}
1095
Popular Tags