KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > fri > patterns > interpreter > parsergenerator > Parser


1 package fri.patterns.interpreter.parsergenerator;
2
3 import java.util.*;
4 import java.io.*;
5 import fri.patterns.interpreter.parsergenerator.syntax.Rule;
6
7 /**
8     The universal bottom-up parser algorithm. Runs with a Lexer (containing the input),
9     ParserTables (containing the syntax), and a Semantic (optional).
10     <pre>
11     private static final String [][] syntax = {
12         { "Start", "\"Hello\"", "\"World\"" },
13         { Token.IGNORED, "`whitespaces`" },
14     };
15
16     SyntaxSeparation separation = new SyntaxSeparation(new Syntax(syntax));
17     LexerBuilder builder = new LexerBuilder(separation.getLexerSyntax(), separation.getIgnoredSymbols());
18     Lexer lexer = builder.getLexer();
19     lexer.setInput("\tHello \r\n\tWorld\n");
20     ParserTables parserTables = new SLRParserTables(separation.getParserSyntax());
21     Parser parser = new Parser(parserTables);
22     parser.parse(lexer, new PrintSemantic());
23     </pre>
24
25     TODO: implement error recovery: method recover()
26     @author (c) 2000, Fritz Ritzberger
27 */

28
29 public class Parser implements Serializable
30 {
31     private Lexer lexer;
32     private ParserTables tables;
33     private transient Semantic semantic;
34     protected Stack stateStack = new Stack();
35     protected Stack valueStack = new Stack();
36     protected Stack rangeStack = new Stack();
37     private transient Object JavaDoc result;
38     private transient List inputTokens;
39     private transient List rangeList;
40     private transient Token.Range range = new Token.Range(null, null);
41     private transient PrintStream out;
42     private boolean passExpectedToLexer = true;
43     // private boolean showConflicts;
44
private boolean DEBUG;
45
46     
47     /**
48         Create a generic bottom-up Parser with passed ParserTables (representing the current syntax to apply).
49         @param tables ParserTables representing the syntax.
50     */

51     public Parser(ParserTables tables) {
52         this.tables = tables;
53     }
54     
55
56     /** Returns the parsing result built from Semantic call return values. Retrievable after parsing. */
57     public Object JavaDoc getResult() {
58         return result;
59     }
60     
61 // /** Sets if SHIFT/REDUCE or REDUCE/REDUCE conflicts wil be shown on System.err. */
62
// public void setShowConflicts(boolean showConflicts) {
63
// this.showConflicts = showConflicts;
64
// }
65

66     /**
67         Sets the lexer to be used for parsing. The Lexer contains (or will contain) the input to parse.
68         The Parser calls <i>setTerminals()</i> on this call.
69     */

70     public void setLexer(Lexer lexer) {
71         boolean initLexer = (this.lexer != lexer); // look if passed lexer needs terminals
72
this.lexer = lexer;
73         clear(); // clear if reused
74
if (initLexer)
75             lexer.setTerminals(getParserTables().getTerminals()); // pass terminals to lexer
76
}
77     
78     /** Returns the lexer that was set to this parser, to call <i>setInput()</i> to the lexer. */
79     public Lexer getLexer() {
80         return lexer;
81     }
82     
83     /** Sets the input to contained lexer, or throws IllegalStateException if no lexer was set. */
84     public void setInput(Object JavaDoc input)
85         throws IOException
86     {
87         if (lexer == null)
88             throw new IllegalStateException JavaDoc("Can not set input when no lexer was defined!");
89         clear(); // clear if reused
90
lexer.setInput(input);
91     }
92
93     /** Sets the semantic to be applied to parsing results. */
94     public void setSemantic(Semantic semantic) {
95         this.semantic = semantic;
96     }
97
98     /** Returns the semantic that was set to this parser. */
99     public Semantic getSemantic() {
100         return semantic;
101     }
102
103     /** Returns current ParserTables. */
104     public ParserTables getParserTables() {
105         return tables;
106     }
107
108     /** Default is true. When true, the Parser will pass a Map of expected symbols to Lexer at every token request. */
109     public void setPassExpectedToLexer(boolean passExpectedToLexer) {
110         this.passExpectedToLexer = passExpectedToLexer;
111     }
112     
113
114
115     // bottom-up state machine methods
116

117     private Integer JavaDoc top() {
118         return (Integer JavaDoc) stateStack.peek();
119     }
120     
121     private void push(Integer JavaDoc state, Object JavaDoc result, Token.Range range) {
122         stateStack.push(state);
123         semanticPush(result, range);
124     }
125
126     private void pop(int pops) {
127         inputTokens = new ArrayList();
128         rangeList = new ArrayList();
129         
130         for (int i = 0; i < pops; i++) {
131             stateStack.pop();
132             semanticPop(i, pops);
133         }
134     }
135
136     private void semanticPush(Object JavaDoc result, Token.Range range) {
137         if (semantic != null) { // when a semantic is present
138
valueStack.push(result); // we need to know parse result
139
rangeStack.push(range); // and its start-end positions within input text
140
}
141     }
142
143     private void semanticPop(int popIndex, int countOfPops) {
144         if (semantic != null) {
145             // the value pop
146
inputTokens.add(0, valueStack.pop());
147             
148             // the range pop
149
Token.Range currentRange = (Token.Range) rangeStack.pop();
150             rangeList.add(0, currentRange);
151             
152             if (popIndex == 0) // first pop of right side holds last token value
153
this.range = new Token.Range(null, currentRange.end); // helper to remember end address
154

155             if (popIndex == countOfPops - 1) // if it is the last pop, make a valid range for next push()
156
this.range = new Token.Range(currentRange.start, this.range.end);
157         }
158     }
159
160     /**
161         Reduce a rule when input satisfied it. Pop the stack n times, n is the number of right symbols of the rule.
162         Semantic gets called with all input tokens corresponding to the rule, if not null.
163         A new state gets pushed, determined by the new state (after pops) and the nonterminal of the rule (left side).
164     */

165     protected void reduce(Integer JavaDoc ruleIndex) {
166         if (DEBUG)
167             dump("reduce "+ruleIndex);
168
169         Rule rule = getParserTables().getSyntax().getRule(ruleIndex.intValue());
170         pop(rule.rightSize()); // pop count of elements on right side
171

172         semanticReduce(rule);
173         
174         String JavaDoc nonterminal = rule.getNonterminal();
175         push(getParserTables().getGotoState(top(), nonterminal), result, range);
176         
177         dumpStack();
178     }
179
180     private void semanticReduce(Rule rule) {
181         if (semantic != null) {
182             result = semantic.doSemantic(rule, inputTokens, rangeList);
183         }
184     }
185     
186     /**
187         Push a new state upon state stack, determined by the GOTO table with current state
188         and the received token symbol. Then read a new token from Lexer, trying to evaluate a rule.
189     */

190     protected Token shift(Token token)
191         throws IOException
192     {
193         if (DEBUG)
194             dump("shift from token symbol >"+token.symbol+"<");
195
196         push(getParserTables().getGotoState(top(), token.symbol), token.text, token.range);
197         dumpStack();
198         
199         Token newToken = getNextToken();
200         if (DEBUG)
201             dump("next token "+newToken.symbol+" >"+newToken.text+"<");
202
203         return newToken;
204     }
205     
206     /** Delivers the next token from lexer to parser. Override to convert the Token value. */
207     protected Token getNextToken()
208         throws IOException
209     {
210         Map expected = passExpectedToLexer && top().intValue() >= 0 ? getParserTables().getExpected(top()) : null;
211         Token token = lexer.getNextToken(expected);
212         return token;
213     }
214     
215
216     // public parsing methods
217

218     /**
219         Parse the tokens returned from passed lexer. This call is for checking correctness without semantics.
220         @param lexer the Lexer, loaded with input to scan.
221         @return true when input was syntactically correct.
222     */

223     public boolean parse(Lexer lexer)
224         throws IOException
225     {
226         setLexer(lexer);
227         return parse();
228     }
229     
230     /**
231         Parse the tokens returned from passed lexer. This call is for processing input with semantics.
232         At least <i>setLexer()</i> must have been called before.
233         @param semantic the semantic to apply to parser results.
234         @return true when input was syntactically correct.
235     */

236     public boolean parse(Semantic semantic)
237         throws IOException
238     {
239         if (lexer == null)
240             throw new IllegalStateException JavaDoc("No lexer was defined to scan input!");
241         setSemantic(semantic);
242         return parse();
243     }
244     
245     /**
246         Parse the tokens returned from passed input.
247         At least <i>setLexer()</i> must have been called before.
248         @param input the input to parse, as File, InputStream, String, ....
249         @return true when input was syntactically correct.
250     */

251     public boolean parse(Object JavaDoc input)
252         throws IOException
253     {
254         setInput(input);
255         return parse();
256     }
257     
258     /**
259         Parse the tokens returned from passed lexer. This call is for integrating a semantic.
260         @param lexer Lexer containing the input to parse
261         @param semantic the semantic to apply to parser results.
262         @return true when input was syntactically correct.
263     */

264     public boolean parse(Lexer lexer, Semantic semantic)
265         throws IOException
266     {
267         setLexer(lexer);
268         setSemantic(semantic);
269         return parse();
270     }
271     
272     /**
273         Parse the tokens returned from passed lexer. This call is for integrating a semantic.
274         @param input the input to parse, as File, InputStream, String, ....
275         @param semantic the semantic to apply to parser results.
276         @return true when input was syntactically correct.
277     */

278     public boolean parse(Object JavaDoc input, Semantic semantic)
279         throws IOException
280     {
281         setInput(input);
282         setSemantic(semantic);
283         return parse();
284     }
285         
286     /**
287         Start parsing after setting Lexer and optionally Semantic. At least <i>setLexer()</i> must have been called before.
288         <p>
289         Init the parser, read first token, push state 0 and set action to SHIFT.
290         Loop while action is not ERROR or ACCEPT, and token symbol is not ERROR, and top of stack is not ERROR.
291         Within loop, get next action from PARSE-ACTION table using current state and token symbol.
292         When action greater than zero, call reduce(), else when action is SHIFT, call shift().
293         @return true when input was syntactically correct.
294     */

295     public boolean parse()
296         throws IOException
297     {
298         stateStack.push(new Integer JavaDoc(0)); // push first state on stack
299
Integer JavaDoc action = ParserTables.SHIFT; // some allowed initial value
300
Token token = getNextToken(); // start reading input
301
if (DEBUG)
302             dump("initial token symbol >"+token.symbol+"<, text >"+token.text+"<");
303         
304         while (token.symbol != null && // lexer error
305
action.equals(ParserTables.ACCEPT) == false && // input accepted
306
action.equals(ParserTables.ERROR) == false && // parse-action table error
307
top().equals(ParserTables.ERROR) == false) // goto table error
308
{
309             action = getParserTables().getParseAction(top(), token.symbol);
310             
311             if (action.intValue() > 0)
312                 reduce(action);
313             else
314             if (action.equals(ParserTables.SHIFT))
315                 token = shift(token);
316                 
317             action = recover(action, token); // recover if error
318
}
319         
320         return detectError(token, top(), action);
321     }
322     
323
324     /**
325         Recover from error. Not implemented.
326         @param action current action from PARSE-ACTION table.
327         @param token recently received Token.
328         @return action to proceed with. Token.symbol may not be null and current state may not be ERROR after this call.
329     */

330     protected Integer JavaDoc recover(Integer JavaDoc action, Token token) {
331         return action;
332     }
333     
334     
335     /**
336         Called after parse loop to determine if everything was OK.
337         @return true when action is ACCEPT, token.symbol is EPSILON, and state is not ERROR.
338     */

339     protected boolean detectError(Token token, Integer JavaDoc state, Integer JavaDoc action) {
340         boolean ret = true;
341         
342         if (token.symbol == null || action.equals(ParserTables.ERROR)) {
343             if (token.symbol == null)
344                 ensureOut().println("ERROR: Unknown symbol: >"+token.text+"<, state "+state);
345             else
346                 ensureOut().println("ERROR: Wrong symbol: "+(Token.isEpsilon(token) ? "EOF" : token.symbol+", text: >"+token.text+"<")+", state "+state);
347
348             lexer.dump(out);
349
350             Map h = getParserTables().getExpected(state);
351             if (h != null) {
352                 ensureOut().print("Expected was (one of): ");
353                 
354                 for (Iterator it = h.keySet().iterator(); it.hasNext(); ) {
355                     String JavaDoc s = (String JavaDoc) it.next();
356                     ensureOut().print((Token.isEpsilon(s) ? "EOF" : s)+(it.hasNext() ? ", " : ""));
357                 }
358                 ensureOut().println();
359             }
360
361             ret = false;
362         }
363         else
364         if (state.equals(ParserTables.ERROR)) { // ERROR lies on stack, from SHIFT
365
pop(1);
366             ensureOut().println("ERROR: found no possible follow state for "+top()+", text >"+token.text+"<");
367             lexer.dump(out);
368             ret = false;
369         }
370         else
371         if (Token.isEpsilon(token) == false) {
372             ensureOut().println("ERROR: Input is not finished.");
373             lexer.dump(out);
374             ret = false;
375         }
376         else
377         if (action.equals(ParserTables.ACCEPT) == false) {
378             ensureOut().println("ERROR: Could not achieve ACCEPT. Symbol: "+token.symbol);
379             lexer.dump(out);
380             ret = false;
381         }
382
383         if (ret == false)
384             result = null;
385         
386         return ret;
387     }
388
389
390     private void clear() {
391         stateStack.removeAllElements();
392         valueStack.removeAllElements();
393         rangeStack.removeAllElements();
394         range = new Token.Range(null, null);
395         inputTokens = null;
396         result = null;
397         if (lexer != null)
398             lexer.clear();
399     }
400
401
402
403     private void dumpStack() {
404         if (DEBUG) {
405             ensureOut().print("stack: ");
406             for (int i = 0; i < stateStack.size(); i++)
407                 ensureOut().print(stateStack.elementAt(i)+" ");
408             ensureOut().println();
409         }
410     }
411
412     private void dump(String JavaDoc s) {
413         ensureOut().println(s);
414     }
415     
416     private PrintStream ensureOut() {
417         if (out == null)
418             out = System.err;
419         return out;
420     }
421     
422     /** Debug output will go to passed stream. */
423     public void setPrintStream(PrintStream out) {
424         this.out = (out != null) ? out : System.err;
425     }
426     
427     /** Set the debug mode. */
428     public void setDebug(boolean debug) {
429         DEBUG = debug;
430     }
431     
432 }
433
Popular Tags