KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > fri > patterns > interpreter > parsergenerator > lexer > Consumer


1 package fri.patterns.interpreter.parsergenerator.lexer;
2
3 import java.util.*;
4 import java.io.IOException JavaDoc;
5 import java.io.Serializable JavaDoc;
6 import fri.patterns.interpreter.parsergenerator.Token;
7 import fri.patterns.interpreter.parsergenerator.syntax.Rule;
8
9 /**
10     Consuming characters (or bytes) means reading from an input stream as long
11     as the read characters match the pattern the consumer was built for.
12     A consumer can be built with a fixed string, a character set, other
13     character consumers, or a mixed sequence of those. It can be a list of
14     alternating (parallel) consumers. It can be set repeatable and nullable.
15     A character consumer succeeds when he could apply all consumers or
16     patterns/sets stored in its sequence when reading input. It fails and
17     unreads all characters when one non-nullable consumer in its sequence fails.
18     
19     @author (c) 2002, Fritz Ritzberger
20 */

21
22 class Consumer implements
23     Comparable JavaDoc,
24     Serializable JavaDoc
25 {
26     private List sequence = new ArrayList();
27     private List constraints;
28     private boolean nullable = false;
29     private boolean repeatable = false;
30     protected Rule rule;
31     protected int fixedLength = -1, startLength = -1, variance = -1;
32     
33     
34     /** Empty consumer knowing its rule. This could become a Lexer toplevel consumer. */
35     Consumer(Rule rule) {
36         this.rule = rule;
37     }
38
39     /** Consumer with some start character or fixed string, without rule. */
40     Consumer(String JavaDoc charOrString) {
41         append(charOrString);
42     }
43
44     /** Internal constructor needed in LexerBuilder. */
45     protected Consumer() {
46     }
47
48
49     /** Append a String or character to sequence. */
50     public void append(String JavaDoc charOrString) {
51         Object JavaDoc o = sequence.size() > 0 ? sequence.get(sequence.size() - 1) : null; // check preceding
52
if (o instanceof String JavaDoc) { // has a string preceding
53
String JavaDoc s = (String JavaDoc)o;
54             s = s + charOrString; // append to it
55
sequence.set(sequence.size() - 1, s);
56         }
57         else {
58             sequence.add(charOrString);
59         }
60     }
61
62     /** Add a character consumer reference. */
63     public void append(Reference subConsumer) {
64         sequence.add(subConsumer);
65     }
66
67     /** Add a character consumer. */
68     public void append(Consumer subConsumer) {
69         sequence.add(subConsumer);
70     }
71
72     /** Add a character set by its high character. Low character is previously appended one. */
73     public void appendSet(String JavaDoc high)
74         throws LexerException
75     {
76         String JavaDoc low = (String JavaDoc) sequence.get(sequence.size() - 1); // throws ClassCastException if not String
77
if (low.length() > 1) { // low character was appended to previous string
78
int i = low.length() - 1;
79             sequence.set(sequence.size() - 1, low.substring(0, i)); // cut last character
80
sequence.add(new CharacterSet(""+low.charAt(i), high)); // append set
81
}
82         else {
83             sequence.set(sequence.size() - 1, new CharacterSet(low, high));
84         }
85     }
86
87     /** Passed Consumer will constrain every character of this consumer. */
88     public void subtract(Consumer constraint) {
89         if (constraints == null)
90             constraints = new ArrayList();
91         constraints.add(constraint);
92     }
93     
94     /** Passed reference will constrain every character of this consumer. */
95     public void subtract(Reference constraint) {
96         if (constraints == null)
97             constraints = new ArrayList();
98         constraints.add(constraint);
99     }
100     
101
102     /** Resolve all references to real consumers after build. */
103     public void resolveConsumerReferences(Map charConsumers, Map doneList)
104         throws LexerException
105     {
106         if (doneList.get(this) != null)
107             return;
108         doneList.put(this, this);
109         
110         List [] varr = new List[] { sequence, getAlternatives(), constraints };
111         for (int j = 0; j < varr.length; j++) {
112             List v = varr[j];
113             
114             if (v != null) {
115                 for (int i = 0; v != null && i < v.size(); i++) {
116                     Object JavaDoc o = v.get(i);
117                     
118                     if (o instanceof Reference) {
119                         //System.err.println("Found consumer reference "+o+" within "+rule);
120
Reference cr = (Reference)o;
121                         Object JavaDoc cc = charConsumers.get(cr.nonterminal);
122                         
123                         if (cc == null)
124                             throw new LexerException("Consumer-Reference not found: "+cr.nonterminal);
125                         
126                         v.set(i, cc);
127                     }
128                     else
129                     if (o instanceof Consumer) {
130                         Consumer cc = (Consumer)o;
131                         cc.resolveConsumerReferences(charConsumers, doneList);
132                     }
133                 }
134             }
135         }
136     }
137     
138     
139     public void setNullable() {
140         nullable = true;
141     }
142     
143     public boolean isNullable() {
144         return nullable;
145     }
146     
147     public void setRepeatable() {
148         repeatable = true;
149     }
150     
151     public boolean isRepeatable() {
152         return repeatable;
153     }
154
155     /** Always returns null as this consumer holds no alternatives. */
156     public List getAlternatives() {
157         return null;
158     }
159     
160     
161     /** Implements Comparable: Sort alternatives by precedence: - getStartVariance(), + getStartLength(), + getFixedLength(). */
162     public int compareTo(Object JavaDoc o) {
163         Consumer cc = (Consumer)o;
164         int i;
165         i = getStartVariance() - cc.getStartVariance(); // be as exact as possible
166
if (i != 0)
167             return i;
168         i = cc.getStartLength() - getStartLength(); // be as significant as possible
169
if (i != 0)
170             return i;
171         i = cc.getFixedLength() - getFixedLength(); // be long as possible
172
if (i != 0)
173             return i;
174         return 0;
175     }
176
177     /** Returns the first character of this consumer, or null if starting with a set. */
178     public Character JavaDoc getStartCharacter() {
179         Object JavaDoc o = sequence.get(0);
180         //System.err.println("getStartCharacter from sequence "+o);
181
if (o instanceof Consumer)
182             return ((Consumer)o).getStartCharacter();
183         else
184         if (o instanceof CharacterSet)
185             return null;
186         else
187             return new Character JavaDoc(((String JavaDoc)o).charAt(0));
188     }
189     
190
191     /** Returns the count of possible start characters. For fixed start character this is 1. */
192     public int getStartVariance() {
193         if (this.variance > 0)
194             return this.variance;
195             
196         if (getStartCharacter() != null)
197             return this.variance = 1;
198
199         int variance;
200         Object JavaDoc o = sequence.get(0);
201         if (o instanceof Consumer)
202             variance = ((Consumer)o).getStartVariance();
203         else
204         if (o instanceof CharacterSet)
205             variance = ((CharacterSet)o).getVariance();
206         else
207             throw new IllegalStateException JavaDoc("No fixed start character, no character set, where am i?");
208             
209         for (int i = 0; constraints != null && i < constraints.size(); i++)
210             variance -= ((Consumer) constraints.get(i)).getStartVariance();
211             
212         return this.variance = variance;
213     }
214     
215     /** The fixed length ends before the first found character set (like "0..9") or repeatable or nullable consumer. */
216     public int getFixedLength() {
217         if (fixedLength >= 0) // never call toString() before all sequences have arrived
218
return fixedLength;
219         fixedLength = getSomeLength(false, new ArrayList());
220         return fixedLength;
221     }
222     
223     /** The start length ends before the first found repeatable or nullable consumer (like "chars*"), but not before a character set. */
224     public int getStartLength() {
225         if (startLength >= 0) // never call toString() before all sequences have arrived
226
return startLength;
227         List reason = new ArrayList();
228         startLength = getSomeLength(true, reason);
229         //System.err.println("found start length "+startLength+" for rule "+rule+", sequence "+sequence+", reason "+reason);
230
return startLength;
231     }
232
233
234     protected int getSomeLength(boolean exploreStartLength, List breakIndicator) {
235         int len = 0;
236         
237         for (int i = 0; breakIndicator.size() <= 0 && i < sequence.size(); i++) {
238             Object JavaDoc o = sequence.get(i);
239             
240             if (o instanceof Consumer) {
241                 Consumer cc = (Consumer)o;
242
243                 if (cc.isNullable()) { // fixed start length ends here
244
breakIndicator.add("nullable"); // make parent terminate
245
return len;
246                 }
247                 else
248                 if (cc.isRepeatable()) { // fixed start length ends here
249
len += cc.getSomeLength(exploreStartLength, breakIndicator);
250                     breakIndicator.add("repeatable"); // make parent terminate
251
return len;
252                 }
253                 
254                 len += cc.getSomeLength(exploreStartLength, breakIndicator);
255             }
256             else
257             if (o instanceof CharacterSet) {
258                 if (exploreStartLength) {
259                     breakIndicator.add("set"); // make parent terminate
260
return len;
261                 }
262                 len += 1; // would match exactly one character
263
}
264             else { // must be String
265
len += ((String JavaDoc)o).length(); // matches a string of same length
266
}
267         }
268         
269         return len;
270     }
271
272
273
274     /** Return contained consumer when having only one and no constraints. This is called immediately after construction. */
275     Consumer optimize() {
276         if (constraints == null && sequence.size() == 1 && sequence.get(0) instanceof Consumer) {
277             // give up this formal-only consumer, take rule to sub-consumer
278
Consumer cc = (Consumer) sequence.get(0);
279             cc.rule = rule;
280             return cc;
281         }
282         return this;
283     }
284
285     /**
286         Returns true if the rule of this consumer matches the passed left recursive rule.
287         E.g. passing "nonterm ::= nonterm something" would match "nonterm ::= something".
288     */

289     boolean matchesRepeatableRule(Rule rule) {
290         if (rule.rightSize() != this.rule.rightSize() + 1)
291             return false;
292         for (int i = 0; i < this.rule.rightSize(); i++)
293             if (this.rule.getRightSymbol(i).equals(rule.getRightSymbol(i + 1)) == false)
294                 return false;
295         return true;
296     }
297
298
299
300     // consume methods
301

302     /** Passes the factory for Strategy objects to all contained ConsumerAlternatives. */
303     public void setStrategyFactoryMethod(StrategyFactoryMethod strategyFactoryMethod) {
304         for (int i = 0; i < sequence.size(); i++) {
305             Object JavaDoc c = sequence.get(i);
306             if (c instanceof Consumer) {
307                 ((Consumer) c).setStrategyFactoryMethod(strategyFactoryMethod);
308             }
309         }
310     }
311
312     /**
313         Reads from input. Returns null if input did not match, else a result tree containing the text.
314         Ensures that a nullable consumer never returns null and a repeatable consumer repeats.
315         @param input Input object where to read from.
316         @return null if no match, else scanned input as a result tree.
317     */

318     public ResultTree consume(InputText input)
319         throws IOException JavaDoc
320     {
321         // prepare scanning for results
322
ResultTree result = isRepeatable() ? ensureResultTree(null) : null; // prepare a list container when repeatable
323
ResultTree r;
324         Token.Address start = new Token.Address(input.getScanLine(), input.getScanColumn(), input.getScanOffset());
325
326         // consume input and optionally loop when having a repeatable rule
327
do {
328             r = consumeInternal(input);
329             if (r != null && r.hasText())
330                 result = (result == null) ? r : result.addChild(r);
331         }
332         while (r != null && isRepeatable());
333
334         // check the result tree element
335
if (result != null && isRepeatable() && result.getChildCount() <= 0)
336             result = null;
337             
338         if (result == null && isNullable()) // having read no input should not break loop of caller
339
result = ensureResultTree(null); // return empty result element
340

341         // add range when there was a result
342
if (result != null) {
343             Token.Address end = new Token.Address(input.getScanLine(), input.getScanColumn(), input.getScanOffset());
344             result.setRange(new Token.Range(start, end));
345         }
346         
347         return result;
348     }
349
350
351     /**
352         Consumes characters from Input and stores it into returned result tree.
353         Returns null if nothing has been consumed.
354     */

355     protected ResultTree consumeInternal(InputText input)
356         throws IOException JavaDoc
357     {
358         ResultTree result = null;
359         boolean ok = true;
360         int mark = input.getMark();
361         Token.Address start = new Token.Address(input.getScanLine(), input.getScanColumn(), input.getScanOffset());
362         
363         // all members of this sequence must match
364
for (int i = 0; ok && i < sequence.size(); i++) {
365         
366             // first check constraint consumers negatively
367
for (int j = 0; ok && constraints != null && j < constraints.size(); j++) {
368                 Consumer cc = (Consumer) constraints.get(j);
369
370                 int mark1 = input.getMark(); // set mark as constraint-consumer is no permitted consumer
371
ok = (cc.consumeInternal(input) == null); // ok when constraint failed to read
372
input.setMark(mark1); // reset to mark
373
}
374             
375             if (ok) { // no constraint succeeded, now check positively
376
Object JavaDoc o = sequence.get(i);
377                 
378                 if (o instanceof Consumer) { // match next consumer
379
Consumer cc = (Consumer)o;
380                     ResultTree r = cc.consume(input);
381                     
382                     if (r == null)
383                         ok = false;
384                     else
385                         result = ensureResultTree(result).addChild(r);
386                 }
387                 else
388                 if (o instanceof CharacterSet) { // match character set
389
CharacterSet charSet = (CharacterSet)o;
390                     int ic = input.read();
391                     char c = (char)ic;
392                     if (ic == Input.EOF || charSet.includes(c) == false)
393                         ok = false;
394                     else
395                         result = ensureResultTree(result).append(c);
396                 }
397                 else { // match a literal terminal String
398
String JavaDoc s = (String JavaDoc)o;
399                     int j = 0;
400                     
401                     do {
402                         int ic = input.read();
403                         ok = (ic != Input.EOF && ((char)ic) == s.charAt(j));
404                         j++;
405                     }
406                     while (ok && j < s.length());
407                     
408                     if (ok)
409                         result = ensureResultTree(result).append(s);
410                         
411                 } // end possible sequence objects
412

413             } // end if ok (not constrained)
414
} // end for all sequences
415

416
417         if (ok && result != null && result.hasText()) { // sequence did match and read non-null text
418
Token.Address end = new Token.Address(input.getScanLine(), input.getScanColumn(), input.getScanOffset());
419             result.setRange(new Token.Range(start, end));
420             //System.err.println("Consumer success, free memory: "+Runtime.getRuntime().freeMemory()+", "+rule);
421
return result; // null-text will be considered by consume() that knows about nullable rules
422
}
423
424         input.setMark(mark); // start again from initial mark
425

426         //System.err.println("Consumer failed: "+rule);
427
return null; // there was no match
428
}
429
430
431     private ResultTree ensureResultTree(ResultTree result) {
432         if (result == null)
433             result = new ResultTree(rule);
434         return result;
435     }
436     
437     
438
439     /** String representation of consumer: hashcode, sequence and constraints. */
440     public String JavaDoc toString() {
441         return hashCode()+
442             "("+getStartVariance()+","+getStartLength()+","+getFixedLength()+")> "+
443             toStringBase()+
444             (isNullable() && isRepeatable() ? " *" : isNullable() ? " ?" : isRepeatable() ? " +" : "");
445     }
446     
447     /** Returns the base string for toString() method. */
448     protected String JavaDoc toStringBase() {
449         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
450         listToString(sequence, sb, " ", false);
451         listToString(constraints, sb, " - ", true);
452         return sb.toString();
453     }
454
455     /** Converts a list into String, for toString() method. */
456     protected void listToString(List list, StringBuffer JavaDoc sb, String JavaDoc separator, boolean separatorAtFirst) {
457         for (int i = 0; list != null && i < list.size(); i++) {
458             Object JavaDoc o = list.get(i);
459             
460             if (separatorAtFirst || i > 0)
461                 sb.append(separator);
462             
463             if (o instanceof Consumer)
464                 sb.append("["+o.hashCode()+"]");
465             else // String or CharacterSet
466
sb.append(o.toString());
467         }
468     }
469         
470
471
472     /**
473         Returns true if the passed consumer could be concurrent with this.
474         This method does not consider constraints, as these could be complex.
475     */

476     public boolean overlaps(Consumer cc) {
477         Object JavaDoc o1 = sequence.get(0);
478         
479         if (o1 instanceof Consumer) { // drill down
480
return ((Consumer)o1).overlaps(cc);
481         }
482         else { // primitive found
483
if (cc.sequence.size() <= 0) // must be ConsumerAlternatives
484
return cc.overlaps(this);
485
486             Object JavaDoc o2 = cc.sequence.get(0);
487             
488             if (o2 instanceof Consumer) { // drill down
489
return ((Consumer)o2).overlaps(this);
490             }
491             else
492             if (o1 instanceof CharacterSet) {
493                 if (o2 instanceof CharacterSet)
494                     return ((CharacterSet)o1).overlaps((CharacterSet)o2);
495                 else
496                 if (o2 instanceof String JavaDoc)
497                     return ((CharacterSet)o1).includes(((String JavaDoc)o2).charAt(0));
498             }
499             else
500             if (o2 instanceof CharacterSet) {
501                 if (o1 instanceof String JavaDoc)
502                     return ((CharacterSet)o2).includes(((String JavaDoc)o1).charAt(0));
503             }
504
505             String JavaDoc seq1 = (String JavaDoc) o1;
506             String JavaDoc seq2 = (String JavaDoc) o2;
507             return seq1.charAt(0) == seq2.charAt(0);
508         }
509     }
510     
511     
512     
513     
514     // helper classes
515

516     
517     /** Marker class for references that must be resolved after building consumers. */
518     public static class Reference
519     {
520         String JavaDoc nonterminal;
521         
522         Reference(String JavaDoc nonterminal) {
523             this.nonterminal = nonterminal;
524         }
525         public String JavaDoc toString() {
526             return nonterminal;
527         }
528     }
529     
530     
531     
532     private static class CharacterSet implements Serializable JavaDoc
533     {
534         private String JavaDoc stringRepres;
535         private char firstChar, lastChar;
536         
537         CharacterSet(String JavaDoc first, String JavaDoc last)
538             throws LexerException
539         {
540             this.firstChar = first.charAt(0);
541             this.lastChar = last.charAt(0);
542
543             if (firstChar >= lastChar)
544                 throw new LexerException("First character is bigger equal last: "+toString());
545         }
546         
547         public char getFirstChar() {
548             return firstChar;
549         }
550         
551         public char getLastChar() {
552             return lastChar;
553         }
554         
555         /** Returns the number of contained characters. */
556         public int getVariance() {
557             return lastChar - firstChar;
558         }
559
560         /** Returns true if passed character is contained in this set. */
561         public boolean includes(char c) {
562             return c >= firstChar && c <= lastChar;
563         }
564
565         /** Returns true if passed character set contains characters contained in this set. */
566         public boolean overlaps(CharacterSet other) {
567             return other.includes(firstChar) || other.includes(lastChar) || includes(other.firstChar) || includes(other.lastChar);
568         }
569         
570         public String JavaDoc toString() {
571             if (stringRepres == null) {
572                 if (Character.isISOControl(firstChar) || Character.isISOControl(lastChar))
573                     this.stringRepres = Integer.toHexString(firstChar)+".."+Integer.toHexString(lastChar);
574                 else
575                     this.stringRepres = firstChar+".."+lastChar;
576             }
577             return stringRepres;
578         }
579     }
580
581 }
Popular Tags