KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > percederberg > grammatica > parser > Parser


1 /*
2  * Parser.java
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 2.1
7  * of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free
16  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17  * MA 02111-1307, USA.
18  *
19  * Copyright (c) 2003-2005 Per Cederberg. All rights reserved.
20  */

21
22 package net.percederberg.grammatica.parser;
23
24 import java.util.ArrayList JavaDoc;
25 import java.util.Collection JavaDoc;
26 import java.util.HashMap JavaDoc;
27
28 /**
29  * A base parser class. This class provides the standard parser
30  * interface, as well as token handling.
31  *
32  * @author Per Cederberg, <per at percederberg dot net>
33  * @version 1.5
34  */

35 public abstract class Parser {
36
37     /**
38      * The parser initialization flag.
39      */

40     private boolean initialized = false;
41
42     /**
43      * The tokenizer to use.
44      */

45     private Tokenizer tokenizer;
46
47     /**
48      * The analyzer to use for callbacks.
49      */

50     private Analyzer analyzer;
51
52     /**
53      * The list of production patterns.
54      */

55     private ArrayList JavaDoc patterns = new ArrayList JavaDoc();
56
57     /**
58      * The map with production patterns and their id:s. This map
59      * contains the production patterns indexed by their id:s.
60      */

61     private HashMap JavaDoc patternIds = new HashMap JavaDoc();
62
63     /**
64      * The list of buffered tokens. This list will contain tokens that
65      * have been read from the tokenizer, but not yet consumed.
66      */

67     private ArrayList JavaDoc tokens = new ArrayList JavaDoc();
68
69     /**
70      * The error log. All parse errors will be added to this log as
71      * the parser attempts to recover from the error. If the error
72      * count is higher than zero (0), this log will be thrown as the
73      * result from the parse() method.
74      */

75     private ParserLogException errorLog = new ParserLogException();
76
77     /**
78      * The error recovery counter. This counter is initially set to a
79      * negative value to indicate that no error requiring recovery
80      * has been encountered. When a parse error is found, the counter
81      * is set to three (3), and is then decreased by one for each
82      * correctly read token until it reaches zero (0).
83      */

84     private int errorRecovery = -1;
85
86     /**
87      * Creates a new parser.
88      *
89      * @param tokenizer the tokenizer to use
90      */

91     Parser(Tokenizer tokenizer) {
92         this(tokenizer, null);
93     }
94
95     /**
96      * Creates a new parser.
97      *
98      * @param tokenizer the tokenizer to use
99      * @param analyzer the analyzer callback to use
100      */

101     Parser(Tokenizer tokenizer, Analyzer analyzer) {
102         this.tokenizer = tokenizer;
103         if (analyzer == null) {
104             this.analyzer = new Analyzer();
105         } else {
106             this.analyzer = analyzer;
107         }
108     }
109
110     /**
111      * Returns the tokenizer in use by this parser.
112      *
113      * @return the tokenizer in use by this parser
114      *
115      * @since 1.4
116      */

117     public Tokenizer getTokenizer() {
118         return tokenizer;
119     }
120
121     /**
122      * Returns the analyzer in use by this parser.
123      *
124      * @return the analyzer in use by this parser
125      *
126      * @since 1.4
127      */

128     public Analyzer getAnalyzer() {
129         return analyzer;
130     }
131
132     /**
133      * Sets the parser initialized flag. Normally this flag is set by
134      * the prepare() method, but this method allows further
135      * modifications to it.
136      *
137      * @param initialized the new initialized flag
138      */

139     void setInitialized(boolean initialized) {
140         this.initialized = initialized;
141     }
142
143     /**
144      * Adds a new production pattern to the parser. The first pattern
145      * added is assumed to be the starting point in the grammar. The
146      * patterns added may be validated to some extent.
147      *
148      * @param pattern the pattern to add
149      *
150      * @throws ParserCreationException if the pattern couldn't be
151      * added correctly to the parser
152      */

153     public void addPattern(ProductionPattern pattern)
154         throws ParserCreationException {
155
156         Integer JavaDoc id = new Integer JavaDoc(pattern.getId());
157
158         if (pattern.getAlternativeCount() <= 0) {
159             throw new ParserCreationException(
160                 ParserCreationException.INVALID_PRODUCTION_ERROR,
161                 pattern.getName(),
162                 "no production alternatives are present (must have at " +
163                 "least one)");
164         }
165         if (patternIds.containsKey(id)) {
166             throw new ParserCreationException(
167                 ParserCreationException.INVALID_PRODUCTION_ERROR,
168                 pattern.getName(),
169                 "another pattern with the same id (" + id +
170                 ") has already been added");
171         }
172         patterns.add(pattern);
173         patternIds.put(id, pattern);
174         setInitialized(false);
175     }
176
177     /**
178      * Initializes the parser. All the added production patterns will
179      * be analyzed for ambiguities and errors. This method also
180      * initializes internal data structures used during the parsing.
181      *
182      * @throws ParserCreationException if the parser couldn't be
183      * initialized correctly
184      */

185     public void prepare() throws ParserCreationException {
186         if (patterns.size() <= 0) {
187             throw new ParserCreationException(
188                 ParserCreationException.INVALID_PARSER_ERROR,
189                 "no production patterns have been added");
190         }
191         for (int i = 0; i < patterns.size(); i++) {
192             checkPattern((ProductionPattern) patterns.get(i));
193         }
194         setInitialized(true);
195     }
196
197     /**
198      * Checks a production pattern for completeness. If some rule in
199      * the pattern referenced an production pattern not added to this
200      * parser, a parser creation exception will be thrown.
201      *
202      * @param pattern the production pattern to check
203      *
204      * @throws ParserCreationException if the pattern referenced a
205      * pattern not added to this parser
206      */

207     private void checkPattern(ProductionPattern pattern)
208         throws ParserCreationException {
209
210         for (int i = 0; i < pattern.getAlternativeCount(); i++) {
211             checkAlternative(pattern.getName(), pattern.getAlternative(i));
212         }
213     }
214
215     /**
216      * Checks a production pattern alternative for completeness. If
217      * some element in the alternative referenced an production
218      * pattern not added to this parser, a parser creation exception
219      * will be thrown.
220      *
221      * @param name the name of the pattern being checked
222      * @param alt the production pattern alternative
223      *
224      * @throws ParserCreationException if the alternative referenced a
225      * pattern not added to this parser
226      */

227     private void checkAlternative(String JavaDoc name,
228                                   ProductionPatternAlternative alt)
229         throws ParserCreationException {
230
231         for (int i = 0; i < alt.getElementCount(); i++) {
232             checkElement(name, alt.getElement(i));
233         }
234     }
235
236     /**
237      * Checks a production pattern element for completeness. If the
238      * element references a production pattern not added to this
239      * parser, a parser creation exception will be thrown.
240      *
241      * @param name the name of the pattern being checked
242      * @param elem the production pattern element to check
243      *
244      * @throws ParserCreationException if the element referenced a
245      * pattern not added to this parser
246      */

247     private void checkElement(String JavaDoc name, ProductionPatternElement elem)
248         throws ParserCreationException {
249
250         if (elem.isProduction() && getPattern(elem.getId()) == null) {
251             throw new ParserCreationException(
252                 ParserCreationException.INVALID_PRODUCTION_ERROR,
253                 name,
254                 "an undefined production pattern id (" + elem.getId() +
255                 ") is referenced");
256         }
257     }
258
259     /**
260      * Resets this parser. This method will clear all the internal
261      * state and error log in the parser, but will not reset the
262      * tokenizer. In order to parse multiple input streams with the
263      * same parser, the Tokenizer.reset() method must also be called.
264      *
265      * @see Tokenizer#reset
266      *
267      * @since 1.5
268      */

269     public void reset() {
270         this.tokens.clear();
271         this.errorLog = new ParserLogException();
272         this.errorRecovery = -1;
273     }
274
275     /**
276      * Parses the token stream and returns a parse tree. This method
277      * will call prepare() if not previously called. It will also call
278      * the reset() method, to making sure that only the
279      * Tokenizer.reset() method must be explicitly called in order to
280      * reuse a parser for multiple input streams. In case of a parse
281      * error, the parser will attempt to recover and throw all the
282      * errors found in a parser log exception at the end of the
283      * parsing.
284      *
285      * @return the parse tree
286      *
287      * @throws ParserCreationException if the parser couldn't be
288      * initialized correctly
289      * @throws ParserLogException if the input couldn't be parsed
290      * correctly
291      *
292      * @see #prepare
293      * @see #reset
294      * @see Tokenizer#reset
295      */

296     public Node parse() throws ParserCreationException, ParserLogException {
297         Node root = null;
298
299         // Initialize parser
300
if (!initialized) {
301             prepare();
302         }
303         reset();
304
305         // Parse input
306
try {
307             root = parseStart();
308         } catch (ParseException e) {
309             addError(e, true);
310         }
311
312         // Check for errors
313
if (errorLog.getErrorCount() > 0) {
314             throw errorLog;
315         }
316
317         return root;
318     }
319
320     /**
321      * Parses the token stream and returns a parse tree.
322      *
323      * @return the parse tree
324      *
325      * @throws ParseException if the input couldn't be parsed
326      * correctly
327      */

328     protected abstract Node parseStart() throws ParseException;
329
330     /**
331      * Adds an error to the error log. If the parser is in error
332      * recovery mode, the error will not be added to the log. If the
333      * recovery flag is set, this method will set the error recovery
334      * counter thus enter error recovery mode. Only lexical or
335      * syntactical errors require recovery, so this flag shouldn't be
336      * set otherwise.
337      *
338      * @param e the error to add
339      * @param recovery the recover flag
340      */

341     void addError(ParseException e, boolean recovery) {
342         if (errorRecovery <= 0) {
343             errorLog.addError(e);
344         }
345         if (recovery) {
346             errorRecovery = 3;
347         }
348     }
349
350     /**
351      * Returns the production pattern with the specified id.
352      *
353      * @param id the production pattern id
354      *
355      * @return the production pattern found, or
356      * null if non-existent
357      */

358     ProductionPattern getPattern(int id) {
359         Integer JavaDoc value = new Integer JavaDoc(id);
360
361         return (ProductionPattern) patternIds.get(value);
362     }
363
364     /**
365      * Returns the production pattern for the starting production.
366      *
367      * @return the start production pattern, or
368      * null if no patterns have been added
369      */

370     ProductionPattern getStartPattern() {
371         if (patterns.size() <= 0) {
372             return null;
373         } else {
374             return (ProductionPattern) patterns.get(0);
375         }
376     }
377
378     /**
379      * Returns the ordered set of production patterns.
380      *
381      * @return the ordered set of production patterns
382      */

383     Collection JavaDoc getPatterns() {
384         return patterns;
385     }
386
387     /**
388      * Handles the parser entering a production. This method calls the
389      * appropriate analyzer callback if the node is not hidden. Note
390      * that this method will not call any callback if an error
391      * requiring recovery has ocurred.
392      *
393      * @param node the parse tree node
394      */

395     void enterNode(Node node) {
396         if (!node.isHidden() && errorRecovery < 0) {
397             try {
398                 analyzer.enter(node);
399             } catch (ParseException e) {
400                 addError(e, false);
401             }
402         }
403     }
404
405     /**
406      * Handles the parser leaving a production. This method calls the
407      * appropriate analyzer callback if the node is not hidden, and
408      * returns the result. Note that this method will not call any
409      * callback if an error requiring recovery has ocurred.
410      *
411      * @param node the parse tree node
412      *
413      * @return the parse tree node, or
414      * null if no parse tree should be created
415      */

416     Node exitNode(Node node) {
417         if (!node.isHidden() && errorRecovery < 0) {
418             try {
419                 return analyzer.exit(node);
420             } catch (ParseException e) {
421                 addError(e, false);
422             }
423         }
424         return node;
425     }
426
427     /**
428      * Handles the parser adding a child node to a production. This
429      * method calls the appropriate analyzer callback. Note that this
430      * method will not call any callback if an error requiring
431      * recovery has ocurred.
432      *
433      * @param node the parent parse tree node
434      * @param child the child parse tree node, or null
435      */

436     void addNode(Production node, Node child) {
437         if (errorRecovery >= 0) {
438             // Do nothing
439
} else if (node.isHidden()) {
440             node.addChild(child);
441         } else if (child != null && child.isHidden()) {
442             for (int i = 0; i < child.getChildCount(); i++) {
443                 addNode(node, child.getChildAt(i));
444             }
445         } else {
446             try {
447                 analyzer.child(node, child);
448             } catch (ParseException e) {
449                 addError(e, false);
450             }
451         }
452     }
453
454     /**
455      * Reads and consumes the next token in the queue. If no token was
456      * available for consumation, a parse error will be thrown.
457      *
458      * @return the token consumed
459      *
460      * @throws ParseException if the input stream couldn't be read or
461      * parsed correctly
462      */

463     Token nextToken() throws ParseException {
464         Token token = peekToken(0);
465
466         if (token != null) {
467             tokens.remove(0);
468             return token;
469         } else {
470             throw new ParseException(
471                 ParseException.UNEXPECTED_EOF_ERROR,
472                 null,
473                 tokenizer.getCurrentLine(),
474                 tokenizer.getCurrentColumn());
475         }
476     }
477
478     /**
479      * Reads and consumes the next token in the queue. If no token was
480      * available for consumation, a parse error will be thrown. A
481      * parse error will also be thrown if the token id didn't match
482      * the specified one.
483      *
484      * @param id the expected token id
485      *
486      * @return the token consumed
487      *
488      * @throws ParseException if the input stream couldn't be parsed
489      * correctly, or if the token wasn't expected
490      */

491     Token nextToken(int id) throws ParseException {
492         Token token = nextToken();
493         ArrayList JavaDoc list;
494
495         if (token.getId() == id) {
496             if (errorRecovery > 0) {
497                 errorRecovery--;
498             }
499             return token;
500         } else {
501             list = new ArrayList JavaDoc(1);
502             list.add(tokenizer.getPatternDescription(id));
503             throw new ParseException(
504                 ParseException.UNEXPECTED_TOKEN_ERROR,
505                 token.toShortString(),
506                 list,
507                 token.getStartLine(),
508                 token.getStartColumn());
509         }
510     }
511
512     /**
513      * Returns a token from the queue. This method is used to check
514      * coming tokens before they have been consumed. Any number of
515      * tokens forward can be checked.
516      *
517      * @param steps the token queue number, zero (0) for first
518      *
519      * @return the token in the queue, or
520      * null if no more tokens in the queue
521      */

522     Token peekToken(int steps) {
523         Token token;
524
525         while (steps >= tokens.size()) {
526             try {
527                 token = tokenizer.next();
528                 if (token == null) {
529                     return null;
530                 } else {
531                     tokens.add(token);
532                 }
533             } catch (ParseException e) {
534                 addError(e, true);
535             }
536         }
537         return (Token) tokens.get(steps);
538     }
539
540     /**
541      * Returns a string representation of this parser. The string will
542      * contain all the production definitions and various additional
543      * information.
544      *
545      * @return a detailed string representation of this parser
546      */

547     public String JavaDoc toString() {
548         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
549
550         for (int i = 0; i < patterns.size(); i++) {
551             buffer.append(toString((ProductionPattern) patterns.get(i)));
552             buffer.append("\n");
553         }
554         return buffer.toString();
555     }
556
557     /**
558      * Returns a string representation of a production pattern.
559      *
560      * @param prod the production pattern
561      *
562      * @return a detailed string representation of the pattern
563      */

564     private String JavaDoc toString(ProductionPattern prod) {
565         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
566         StringBuffer JavaDoc indent = new StringBuffer JavaDoc();
567         LookAheadSet set;
568         int i;
569
570         buffer.append(prod.getName());
571         buffer.append(" (");
572         buffer.append(prod.getId());
573         buffer.append(") ");
574         for (i = 0; i < buffer.length(); i++) {
575             indent.append(" ");
576         }
577         buffer.append("= ");
578         indent.append("| ");
579         for (i = 0; i < prod.getAlternativeCount(); i++) {
580             if (i > 0) {
581                 buffer.append(indent);
582             }
583             buffer.append(toString(prod.getAlternative(i)));
584             buffer.append("\n");
585         }
586         for (i = 0; i < prod.getAlternativeCount(); i++) {
587             set = prod.getAlternative(i).getLookAhead();
588             if (set.getMaxLength() > 1) {
589                 buffer.append("Using ");
590                 buffer.append(set.getMaxLength());
591                 buffer.append(" token look-ahead for alternative ");
592                 buffer.append(i + 1);
593                 buffer.append(": ");
594                 buffer.append(set.toString(tokenizer));
595                 buffer.append("\n");
596             }
597         }
598         return buffer.toString();
599     }
600
601     /**
602      * Returns a string representation of a production pattern
603      * alternative.
604      *
605      * @param alt the production pattern alternative
606      *
607      * @return a detailed string representation of the alternative
608      */

609     private String JavaDoc toString(ProductionPatternAlternative alt) {
610         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
611
612         for (int i = 0; i < alt.getElementCount(); i++) {
613             if (i > 0) {
614                 buffer.append(" ");
615             }
616             buffer.append(toString(alt.getElement(i)));
617         }
618         return buffer.toString();
619     }
620
621     /**
622      * Returns a string representation of a production pattern
623      * element.
624      *
625      * @param elem the production pattern element
626      *
627      * @return a detailed string representation of the element
628      */

629     private String JavaDoc toString(ProductionPatternElement elem) {
630         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
631         int min = elem.getMinCount();
632         int max = elem.getMaxCount();
633
634         if (min == 0 && max == 1) {
635             buffer.append("[");
636         }
637         if (elem.isToken()) {
638             buffer.append(getTokenDescription(elem.getId()));
639         } else {
640             buffer.append(getPattern(elem.getId()).getName());
641         }
642         if (min == 0 && max == 1) {
643             buffer.append("]");
644         } else if (min == 0 && max == Integer.MAX_VALUE) {
645             buffer.append("*");
646         } else if (min == 1 && max == Integer.MAX_VALUE) {
647             buffer.append("+");
648         } else if (min != 1 || max != 1) {
649             buffer.append("{");
650             buffer.append(min);
651             buffer.append(",");
652             buffer.append(max);
653             buffer.append("}");
654         }
655         return buffer.toString();
656     }
657
658     /**
659      * Returns a token description for a specified token.
660      *
661      * @param token the token to describe
662      *
663      * @return the token description
664      */

665     String JavaDoc getTokenDescription(int token) {
666         if (tokenizer == null) {
667             return "";
668         } else {
669             return tokenizer.getPatternDescription(token);
670         }
671     }
672 }
673
Popular Tags