KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > fri > patterns > interpreter > parsergenerator > lexer > Strategy


1 package fri.patterns.interpreter.parsergenerator.lexer;
2
3 import java.io.*;
4 import java.util.*;
5 import fri.util.collections.AggregatingHashtable;
6 import fri.patterns.interpreter.parsergenerator.Token;
7
8 /**
9     Strategy is the way how alternative concurrent character consumers are applied to input.
10     It is used by <i>LexerImpl</i> and <i>ConsumerAlternatives</i>.
11     There are consumers that have a fixed start character and others that have not.
12     Consumers have different length of fixed start sequences.
13     Some consumers have a fixed length to scan, others (with repeatable rules) have not.
14     Consumers differ in their character sets, some having many possible characters,
15     others lesser, some sets might overlap those of other consumers.
16     Last but not least the Parser gives hints about currently expected tokens.
17     <p />
18     Following is the strategy implemented here:
19     <ul>
20         <li>Divide consumers in two groups: those with fixed start character and those without.</li>
21         <li>Sort both consumers groups by
22             <ol>
23                 <li>by the variance of their start character (which may be a set), the less the better</li>
24                 <li>their start sequence length (ends before first repeatable character), the longer the better</li>
25                 <li>by their fixed start length (ends before first character set), the longer the better.</li>
26             </ol>
27             Sorting is done by <i>Consumer implements Comparable</i>
28             </li>
29         <li>The lookahead character chooses consumers with fixed start character.</li>
30         <li>If one matches, overlapping consumers without fixed start character are tried, when one scans longer, it wins.</li>
31         <li>If none matches, consumers without fixed start character are tried, by sort order.</li>
32         <li>If one matches, overlapping consumers without fixed start character are tried, when one scans longer, it wins.</li>
33     </ul>
34     
35     @author Fritz Ritzberger
36 */

37
38 public class Strategy implements
39     Serializable
40 {
41     private boolean inited;
42     private AggregatingHashtable itemsWithStartChar = new AggregatingHashtable();
43     private List itemsWithoutStartChar = new ArrayList();
44     private AggregatingHashtable competitiveGroups = new AggregatingHashtable();
45     private boolean competeForLongestInput = true;
46
47
48     public void setCompeteForLongestInput(boolean competeForLongestInput) {
49         this.competeForLongestInput = competeForLongestInput;
50     }
51
52     /** Adds a Consumer to list of possible consumers. This consumer will read input to be ignored by the parser. */
53     public void addIgnoringConsumer(String JavaDoc symbol, Consumer cc) {
54         addConsumer(symbol, cc);
55     }
56     
57     /** Adds a Consumer to list of possible consumers producing valid tokens. */
58     public void addTokenConsumer(String JavaDoc symbol, Consumer cc) {
59         addConsumer(symbol, cc);
60     }
61
62     /** Returns true if the passed terminal is already in list. */
63     public boolean hasTerminal(String JavaDoc terminal) {
64         for (Enumeration e = new ItemEnumerator(); e.hasMoreElements(); )
65             if (((Item) e.nextElement()).getSymbol().equals(terminal))
66                 return true;
67         return false;
68     }
69
70     private void addConsumer(String JavaDoc symbol, Consumer cc) {
71         Item item = new Item(symbol, cc);
72         Character JavaDoc c = item.consumer.getStartCharacter();
73         if (c != null)
74             itemsWithStartChar.put(c, item);
75         else
76             itemsWithoutStartChar.add(item);
77     }
78
79     /** Liefert die Items, die dem uebergebenen Start-Zeichen entsprechen wuerden. */
80     private List getItemsWithStartCharacter(int c) {
81         init();
82         return (List) itemsWithStartChar.get(new Character JavaDoc((char)c));
83     }
84
85     /** Liefert die Items, die kein bestimmtes Start-Zeichen definieren. */
86     private List getItemsWithoutStartCharacter() {
87         init();
88         return itemsWithoutStartChar;
89     }
90
91
92     // separate consumers with or without fixed start character, build hashtable for start characters
93
private void init() {
94         if (inited == false) {
95             inited = true;
96             // sort items with start character
97
for (Iterator it = itemsWithStartChar.entrySet().iterator(); it.hasNext(); ) {
98                 Map.Entry entry = (Map.Entry) it.next();
99                 Collections.sort((List) entry.getValue());
100             }
101             // sort items without start character
102
Collections.sort(itemsWithoutStartChar);
103         }
104     }
105
106     private List initCompetitors(Item candidate) {
107         List competitors = (List) competitiveGroups.get(candidate);
108         if (competitors != null) // already inited
109
if (competitors.get(0) instanceof Item) // valid competitor list
110
return competitors;
111             else // was dummy object
112
return null;
113             
114         for (Enumeration e = new ItemEnumerator(); e.hasMoreElements(); ) { // search a competitor among all items
115
Item item = (Item) e.nextElement();
116             if (item != candidate && item.consumer.overlaps(candidate.consumer)) {
117                 //System.err.println("Found competitive consumers: candidate = "+candidate.consumer+", competitor = "+item.consumer);
118
competitiveGroups.put(candidate, item);
119             }
120         }
121         
122         competitors = (List) competitiveGroups.get(candidate);
123         if (competitors == null) // no competitors were found, mark candidate with dummy object
124
competitiveGroups.put(candidate, new Byte JavaDoc((byte)0));
125         
126         return competitors;
127     }
128
129
130
131     /**
132         Liefert null wenn kein consumer den input lesen kann, sonst den laengstmoeglichen gescannten Text.
133         @param input the input to read from
134         @param lookahead the first byte or character from input
135         @param expectedTokenSymbols expected token symbols (in key enumeration), can be null
136     */

137     public Item consume(InputText input, int lookahead, Map expectedTokenSymbols)
138         throws IOException
139     {
140         // try all scan-items by sort order, indexed ones first
141
List [] allItems = new List [] { getItemsWithStartCharacter(lookahead), getItemsWithoutStartCharacter() };
142         
143         for (int j = 0; j < allItems.length; j++) {
144             List items = allItems[j];
145             
146             if (items != null) {
147                 for (int i = 0; i < items.size(); i++) {
148                     Item item = (Item) items.get(i);
149                     
150                     if (expectedTokenSymbols == null || expectedTokenSymbols.get(item.getSymbol()) != null) {
151                         int startMark = input.getMark(); // save start position for concurrent consumers
152
ResultTree result = item.consume(input);
153                         
154                         if (result != null) { // consumer succeeded
155
if (competeForLongestInput) {
156                                 List competitors = initCompetitors(item);
157                                 if (competitors != null) {
158                                     int bestMark = input.getMark();
159                                     input.setMark(startMark);
160                                     item = checkCompetitiveGroups(result, item, input, competitors, bestMark, expectedTokenSymbols); // returns item that scans longest
161
}
162                             }
163                             return item;
164                         }
165                     }
166                 } // end for all items
167
} // end if item != null
168
} // end for both item groups
169

170         if (expectedTokenSymbols != null) // when this was a run with hints,
171
return consume(input, lookahead, null); // now try without hints
172

173         return null;
174     }
175
176
177     // loop competitive group of passed Item (if existent), return given Item or an Item that scans longer
178
private Item checkCompetitiveGroups(ResultTree result, Item item, InputText input, List competitors, int bestMark, Map expectedTokenSymbols)
179         throws IOException
180     {
181         int max = bestMark - input.getMark();
182         
183         for (int i = 0; i < competitors.size(); i++) {
184             Item competitor = (Item) competitors.get(i);
185             
186             // let take part only if no expected symbols or is in expected symbols
187
if (expectedTokenSymbols != null && expectedTokenSymbols.get(competitor.getSymbol()) == null)
188                 continue;
189
190             int mark = input.getMark(); // memorize current input mark
191

192             ResultTree r = competitor.consume(input); // consume
193
int len = input.getMark() - mark;
194             
195             if (r != null && len > max) { // scanned longer
196
bestMark = input.getMark();
197                 max = len;
198                 item = competitor;
199             }
200             
201             input.setMark(mark); // reset for next candidate
202
}
203
204         input.setMark(bestMark); // set mark forward to best scan result
205

206         return item;
207     }
208
209
210     /** Returns a human readable representation of the lists and maps within this strategy. */
211     public String JavaDoc toString() {
212         init();
213         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
214         sb.append(" Indexed list is: "+itemsWithStartChar.size()+"\n");
215         for (Iterator it = itemsWithStartChar.entrySet().iterator(); it.hasNext(); ) {
216             Map.Entry entry = (Map.Entry) it.next();
217             sb.append(" "+entry.getKey()+" -> "+entry.getValue()+"\n");
218         }
219         sb.append(" Sorted unindexed list is: "+itemsWithoutStartChar.size()+"\n");
220         for (int i = 0; i < itemsWithoutStartChar.size(); i++) {
221             sb.append(" "+itemsWithoutStartChar.get(i)+"\n");
222         }
223         return sb.toString();
224     }
225
226
227
228     /**
229         The List item wrapper for toplevel consumers. Items can be sorted by their relevance.
230         They encapsulate a consumer, its token symbol and the consumed lexer result.
231     */

232     public static class Item implements
233         Comparable JavaDoc,
234         Serializable
235     {
236         private String JavaDoc symbol;
237         private Consumer consumer;
238         private transient ResultTree result;
239         
240         public Item(String JavaDoc symbol, Consumer consumer) {
241             if (consumer == null)
242                 throw new IllegalArgumentException JavaDoc("Got no character consumer for symbol "+symbol);
243             if (symbol == null)
244                 throw new IllegalArgumentException JavaDoc("Got no symbol for consumer "+consumer);
245
246             this.symbol = symbol;
247             this.consumer = consumer;
248         }
249         
250         /** Consumes from input by delegating to contained consumer, stores the result. */
251         public ResultTree consume(InputText input)
252             throws IOException
253         {
254             return result = consumer.consume(input);
255         }
256         
257         /** Returns the lexer item symbol, enclosed in `backquotes` when not a literal. */
258         public String JavaDoc getSymbol() {
259             return symbol;
260         }
261         
262         /** Returns the token symbol, always enclosed in some quotes. */
263         public String JavaDoc getTokenIdentifier() {
264             return Token.isTerminal(symbol) ? symbol : Token.COMMAND_QUOTE + symbol + Token.COMMAND_QUOTE;
265         }
266     
267         public ResultTree getResultTree() { // this is for ConsumerAlternatives
268
return result;
269         }
270     
271         public String JavaDoc toString() {
272             return "{"+symbol+"} "+consumer.toString();
273         }
274     
275         /** Implements Comparable: delegates to character consumer. If they are equal, "terminals" are preferred. */
276         public int compareTo(Object JavaDoc o) {
277             Consumer cc1 = consumer;
278             Consumer cc2 = ((Item)o).consumer;
279             int i;
280             if ((i = cc1.compareTo(cc2)) != 0)
281                 return i;
282             return Token.isTerminal(symbol) ? -1 : 1;
283         }
284     
285         /** Compares the contained consumer with other via "==". */
286         public boolean equals(Object JavaDoc o) {
287             return ((Item) o).consumer == consumer;
288         }
289
290         /** Returns the consumers hashcode. */
291         public int hashCode() {
292             return consumer.hashCode();
293         }
294     }
295
296
297
298     // enumeration over all strategy items (toplevel consumers)
299
private class ItemEnumerator implements Enumeration
300     {
301         private Iterator it, it1, it2;
302         
303         public ItemEnumerator() {
304             it = itemsWithStartChar.entrySet().iterator();
305             it1 = it.hasNext() ? ((List) ((Map.Entry)it.next()).getValue()).iterator() : null;
306             it2 = itemsWithoutStartChar.iterator();
307         }
308         public boolean hasMoreElements() {
309             return it.hasNext() || it1 != null && it1.hasNext() || it2.hasNext();
310         }
311         public Object JavaDoc nextElement() {
312             if (it1 != null)
313                 if (it1.hasNext())
314                     return it1.next();
315                 else
316                 if (it.hasNext())
317                     return (it1 = ((List) ((Map.Entry)it.next()).getValue()).iterator()).next();
318
319             if (it2.hasNext())
320                 return it2.next();
321                 
322             throw new IllegalStateException JavaDoc("Do not call nextElement() when hasMoreElements() returned false!");
323         }
324     }
325
326 }
327
Popular Tags