Parser


1   /*
2    * Parser.java
3    *
4    * This library is free software; you can redistribute it and/or
5    * modify it under the terms of the GNU Lesser General Public License
6    * as published by the Free Software Foundation; either version 2.1
7    * of the License, or (at your option) any later version.
8    *
9    * This library is distributed in the hope that it will be useful,
10   * but WITHOUT ANY WARRANTY; without even the implied warranty of
11   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   * Lesser General Public License for more details.
13   *
14   * You should have received a copy of the GNU Lesser General Public
15   * License along with this library; if not, write to the Free
16   * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17   * MA 02111-1307, USA.
18   *
19   * Copyright (c) 2003-2005 Per Cederberg. All rights reserved.
20   */
21  
22  package net.percederberg.grammatica.parser;
23  
24  import java.util.ArrayList  ;
25  import java.util.Collection  ;
26  import java.util.HashMap  ;
27  
28  /**
29   * A base parser class. This class provides the standard parser
30   * interface, as well as token handling.
31   *
32   * @author   Per Cederberg, <per at percederberg dot net>
33   * @version  1.5
34   */
35  public abstract class Parser {
36  
37      /**
38       * The parser initialization flag.
39       */
40      private boolean initialized = false;
41  
42      /**
43       * The tokenizer to use.
44       */
45      private Tokenizer tokenizer;
46  
47      /**
48       * The analyzer to use for callbacks.
49       */
50      private Analyzer analyzer;
51  
52      /**
53       * The list of production patterns.
54       */
55      private ArrayList   patterns = new ArrayList  ();
56  
57      /**
58       * The map with production patterns and their id:s. This map
59       * contains the production patterns indexed by their id:s.
60       */
61      private HashMap   patternIds = new HashMap  ();
62  
63      /**
64       * The list of buffered tokens. This list will contain tokens that
65       * have been read from the tokenizer, but not yet consumed.
66       */
67      private ArrayList   tokens = new ArrayList  ();
68  
69      /**
70       * The error log. All parse errors will be added to this log as
71       * the parser attempts to recover from the error. If the error
72       * count is higher than zero (0), this log will be thrown as the
73       * result from the parse() method.
74       */
75      private ParserLogException errorLog = new ParserLogException();
76  
77      /**
78       * The error recovery counter. This counter is initially set to a
79       * negative value to indicate that no error requiring recovery
80       * has been encountered. When a parse error is found, the counter
81       * is set to three (3), and is then decreased by one for each
82       * correctly read token until it reaches zero (0).
83       */
84      private int errorRecovery = -1;
85  
86      /**
87       * Creates a new parser.
88       *
89       * @param tokenizer      the tokenizer to use
90       */
91      Parser(Tokenizer tokenizer) {
92          this(tokenizer, null);
93      }
94  
95      /**
96       * Creates a new parser.
97       *
98       * @param tokenizer      the tokenizer to use
99       * @param analyzer       the analyzer callback to use
100      */
101     Parser(Tokenizer tokenizer, Analyzer analyzer) {
102         this.tokenizer = tokenizer;
103         if (analyzer == null) {
104             this.analyzer = new Analyzer();
105         } else {
106             this.analyzer = analyzer;
107         }
108     }
109 
110     /**
111      * Returns the tokenizer in use by this parser.
112      *
113      * @return the tokenizer in use by this parser
114      *
115      * @since 1.4
116      */
117     public Tokenizer getTokenizer() {
118         return tokenizer;
119     }
120 
121     /**
122      * Returns the analyzer in use by this parser.
123      *
124      * @return the analyzer in use by this parser
125      *
126      * @since 1.4
127      */
128     public Analyzer getAnalyzer() {
129         return analyzer;
130     }
131 
132     /**
133      * Sets the parser initialized flag. Normally this flag is set by
134      * the prepare() method, but this method allows further
135      * modifications to it.
136      *
137      * @param initialized    the new initialized flag
138      */
139     void setInitialized(boolean initialized) {
140         this.initialized = initialized;
141     }
142 
143     /**
144      * Adds a new production pattern to the parser. The first pattern
145      * added is assumed to be the starting point in the grammar. The
146      * patterns added may be validated to some extent.
147      *
148      * @param pattern        the pattern to add
149      *
150      * @throws ParserCreationException if the pattern couldn't be
151      *             added correctly to the parser
152      */
153     public void addPattern(ProductionPattern pattern)
154         throws ParserCreationException {
155 
156         Integer    id = new Integer  (pattern.getId());
157 
158         if (pattern.getAlternativeCount() <= 0) {
159             throw new ParserCreationException(
160                 ParserCreationException.INVALID_PRODUCTION_ERROR,
161                 pattern.getName(),
162                 "no production alternatives are present (must have at " +
163                 "least one)");
164         }
165         if (patternIds.containsKey(id)) {
166             throw new ParserCreationException(
167                 ParserCreationException.INVALID_PRODUCTION_ERROR,
168                 pattern.getName(),
169                 "another pattern with the same id (" + id +
170                 ") has already been added");
171         }
172         patterns.add(pattern);
173         patternIds.put(id, pattern);
174         setInitialized(false);
175     }
176 
177     /**
178      * Initializes the parser. All the added production patterns will
179      * be analyzed for ambiguities and errors. This method also
180      * initializes internal data structures used during the parsing.
181      *
182      * @throws ParserCreationException if the parser couldn't be
183      *             initialized correctly
184      */
185     public void prepare() throws ParserCreationException {
186         if (patterns.size() <= 0) {
187             throw new ParserCreationException(
188                 ParserCreationException.INVALID_PARSER_ERROR,
189                 "no production patterns have been added");
190         }
191         for (int i = 0; i < patterns.size(); i++) {
192             checkPattern((ProductionPattern) patterns.get(i));
193         }
194         setInitialized(true);
195     }
196 
197     /**
198      * Checks a production pattern for completeness. If some rule in
199      * the pattern referenced an production pattern not added to this
200      * parser, a parser creation exception will be thrown.
201      *
202      * @param pattern        the production pattern to check
203      *
204      * @throws ParserCreationException if the pattern referenced a
205      *             pattern not added to this parser
206      */
207     private void checkPattern(ProductionPattern pattern)
208         throws ParserCreationException {
209 
210         for (int i = 0; i < pattern.getAlternativeCount(); i++) {
211             checkAlternative(pattern.getName(), pattern.getAlternative(i));
212         }
213     }
214 
215     /**
216      * Checks a production pattern alternative for completeness. If
217      * some element in the alternative referenced an production
218      * pattern not added to this parser, a parser creation exception
219      * will be thrown.
220      *
221      * @param name           the name of the pattern being checked
222      * @param alt            the production pattern alternative
223      *
224      * @throws ParserCreationException if the alternative referenced a
225      *             pattern not added to this parser
226      */
227     private void checkAlternative(String   name,
228                                   ProductionPatternAlternative alt)
229         throws ParserCreationException {
230 
231         for (int i = 0; i < alt.getElementCount(); i++) {
232             checkElement(name, alt.getElement(i));
233         }
234     }
235 
236     /**
237      * Checks a production pattern element for completeness. If the
238      * element references a production pattern not added to this
239      * parser, a parser creation exception will be thrown.
240      *
241      * @param name           the name of the pattern being checked
242      * @param elem           the production pattern element to check
243      *
244      * @throws ParserCreationException if the element referenced a
245      *             pattern not added to this parser
246      */
247     private void checkElement(String   name, ProductionPatternElement elem)
248         throws ParserCreationException {
249 
250         if (elem.isProduction() && getPattern(elem.getId()) == null) {
251             throw new ParserCreationException(
252                 ParserCreationException.INVALID_PRODUCTION_ERROR,
253                 name,
254                 "an undefined production pattern id (" + elem.getId() +
255                 ") is referenced");
256         }
257     }
258 
259     /**
260      * Resets this parser. This method will clear all the internal
261      * state and error log in the parser, but will not reset the
262      * tokenizer. In order to parse multiple input streams with the
263      * same parser, the Tokenizer.reset() method must also be called.
264      *
265      * @see Tokenizer#reset
266      *
267      * @since 1.5
268      */
269     public void reset() {
270         this.tokens.clear();
271         this.errorLog = new ParserLogException();
272         this.errorRecovery = -1;
273     }
274 
275     /**
276      * Parses the token stream and returns a parse tree. This method
277      * will call prepare() if not previously called. It will also call
278      * the reset() method, to making sure that only the
279      * Tokenizer.reset() method must be explicitly called in order to
280      * reuse a parser for multiple input streams. In case of a parse
281      * error, the parser will attempt to recover and throw all the
282      * errors found in a parser log exception at the end of the
283      * parsing.
284      *
285      * @return the parse tree
286      *
287      * @throws ParserCreationException if the parser couldn't be
288      *             initialized correctly
289      * @throws ParserLogException if the input couldn't be parsed
290      *             correctly
291      *
292      * @see #prepare
293      * @see #reset
294      * @see Tokenizer#reset
295      */
296     public Node parse() throws ParserCreationException, ParserLogException {
297         Node  root = null;
298 
299         // Initialize parser
300         if (!initialized) {
301             prepare();
302         }
303         reset();
304 
305         // Parse input
306         try {
307             root = parseStart();
308         } catch (ParseException e) {
309             addError(e, true);
310         }
311 
312         // Check for errors
313         if (errorLog.getErrorCount() > 0) {
314             throw errorLog;
315         }
316 
317         return root;
318     }
319 
320     /**
321      * Parses the token stream and returns a parse tree.
322      *
323      * @return the parse tree
324      *
325      * @throws ParseException if the input couldn't be parsed
326      *             correctly
327      */
328     protected abstract Node parseStart() throws ParseException;
329 
330     /**
331      * Adds an error to the error log. If the parser is in error
332      * recovery mode, the error will not be added to the log. If the
333      * recovery flag is set, this method will set the error recovery
334      * counter thus enter error recovery mode. Only lexical or
335      * syntactical errors require recovery, so this flag shouldn't be
336      * set otherwise.
337      *
338      * @param e              the error to add
339      * @param recovery       the recover flag
340      */
341     void addError(ParseException e, boolean recovery) {
342         if (errorRecovery <= 0) {
343             errorLog.addError(e);
344         }
345         if (recovery) {
346             errorRecovery = 3;
347         }
348     }
349 
350     /**
351      * Returns the production pattern with the specified id.
352      *
353      * @param id             the production pattern id
354      *
355      * @return the production pattern found, or
356      *         null if non-existent
357      */
358     ProductionPattern getPattern(int id) {
359         Integer    value = new Integer  (id);
360 
361         return (ProductionPattern) patternIds.get(value);
362     }
363 
364     /**
365      * Returns the production pattern for the starting production.
366      *
367      * @return the start production pattern, or
368      *         null if no patterns have been added
369      */
370     ProductionPattern getStartPattern() {
371         if (patterns.size() <= 0) {
372             return null;
373         } else {
374             return (ProductionPattern) patterns.get(0);
375         }
376     }
377 
378     /**
379      * Returns the ordered set of production patterns.
380      *
381      * @return the ordered set of production patterns
382      */
383     Collection   getPatterns() {
384         return patterns;
385     }
386 
387     /**
388      * Handles the parser entering a production. This method calls the
389      * appropriate analyzer callback if the node is not hidden. Note
390      * that this method will not call any callback if an error
391      * requiring recovery has ocurred.
392      *
393      * @param node           the parse tree node
394      */
395     void enterNode(Node node) {
396         if (!node.isHidden() && errorRecovery < 0) {
397             try {
398                 analyzer.enter(node);
399             } catch (ParseException e) {
400                 addError(e, false);
401             }
402         }
403     }
404 
405     /**
406      * Handles the parser leaving a production. This method calls the
407      * appropriate analyzer callback if the node is not hidden, and
408      * returns the result. Note that this method will not call any
409      * callback if an error requiring recovery has ocurred.
410      *
411      * @param node           the parse tree node
412      *
413      * @return the parse tree node, or
414      *         null if no parse tree should be created
415      */
416     Node exitNode(Node node) {
417         if (!node.isHidden() && errorRecovery < 0) {
418             try {
419                 return analyzer.exit(node);
420             } catch (ParseException e) {
421                 addError(e, false);
422             }
423         }
424         return node;
425     }
426 
427     /**
428      * Handles the parser adding a child node to a production. This
429      * method calls the appropriate analyzer callback. Note that this
430      * method will not call any callback if an error requiring
431      * recovery has ocurred.
432      *
433      * @param node           the parent parse tree node
434      * @param child          the child parse tree node, or null
435      */
436     void addNode(Production node, Node child) {
437         if (errorRecovery >= 0) {
438             // Do nothing
439         } else if (node.isHidden()) {
440             node.addChild(child);
441         } else if (child != null && child.isHidden()) {
442             for (int i = 0; i < child.getChildCount(); i++) {
443                 addNode(node, child.getChildAt(i));
444             }
445         } else {
446             try {
447                 analyzer.child(node, child);
448             } catch (ParseException e) {
449                 addError(e, false);
450             }
451         }
452     }
453 
454     /**
455      * Reads and consumes the next token in the queue. If no token was
456      * available for consumation, a parse error will be thrown.
457      *
458      * @return the token consumed
459      *
460      * @throws ParseException if the input stream couldn't be read or
461      *             parsed correctly
462      */
463     Token nextToken() throws ParseException {
464         Token  token = peekToken(0);
465 
466         if (token != null) {
467             tokens.remove(0);
468             return token;
469         } else {
470             throw new ParseException(
471                 ParseException.UNEXPECTED_EOF_ERROR,
472                 null,
473                 tokenizer.getCurrentLine(),
474                 tokenizer.getCurrentColumn());
475         }
476     }
477 
478     /**
479      * Reads and consumes the next token in the queue. If no token was
480      * available for consumation, a parse error will be thrown. A
481      * parse error will also be thrown if the token id didn't match
482      * the specified one.
483      *
484      * @param id             the expected token id
485      *
486      * @return the token consumed
487      *
488      * @throws ParseException if the input stream couldn't be parsed
489      *             correctly, or if the token wasn't expected
490      */
491     Token nextToken(int id) throws ParseException {
492         Token      token = nextToken();
493         ArrayList    list;
494 
495         if (token.getId() == id) {
496             if (errorRecovery > 0) {
497                 errorRecovery--;
498             }
499             return token;
500         } else {
501             list = new ArrayList  (1);
502             list.add(tokenizer.getPatternDescription(id));
503             throw new ParseException(
504                 ParseException.UNEXPECTED_TOKEN_ERROR,
505                 token.toShortString(),
506                 list,
507                 token.getStartLine(),
508                 token.getStartColumn());
509         }
510     }
511 
512     /**
513      * Returns a token from the queue. This method is used to check
514      * coming tokens before they have been consumed. Any number of
515      * tokens forward can be checked.
516      *
517      * @param steps          the token queue number, zero (0) for first
518      *
519      * @return the token in the queue, or
520      *         null if no more tokens in the queue
521      */
522     Token peekToken(int steps) {
523         Token  token;
524 
525         while (steps >= tokens.size()) {
526             try {
527                 token = tokenizer.next();
528                 if (token == null) {
529                     return null;
530                 } else {
531                     tokens.add(token);
532                 }
533             } catch (ParseException e) {
534                 addError(e, true);
535             }
536         }
537         return (Token) tokens.get(steps);
538     }
539 
540     /**
541      * Returns a string representation of this parser. The string will
542      * contain all the production definitions and various additional
543      * information.
544      *
545      * @return a detailed string representation of this parser
546      */
547     public String   toString() {
548         StringBuffer    buffer = new StringBuffer  ();
549 
550         for (int i = 0; i < patterns.size(); i++) {
551             buffer.append(toString((ProductionPattern) patterns.get(i)));
552             buffer.append("\n");
553         }
554         return buffer.toString();
555     }
556 
557     /**
558      * Returns a string representation of a production pattern.
559      *
560      * @param prod           the production pattern
561      *
562      * @return a detailed string representation of the pattern
563      */
564     private String   toString(ProductionPattern prod) {
565         StringBuffer    buffer = new StringBuffer  ();
566         StringBuffer    indent = new StringBuffer  ();
567         LookAheadSet  set;
568         int           i;
569 
570         buffer.append(prod.getName());
571         buffer.append(" (");
572         buffer.append(prod.getId());
573         buffer.append(") ");
574         for (i = 0; i < buffer.length(); i++) {
575             indent.append(" ");
576         }
577         buffer.append("= ");
578         indent.append("| ");
579         for (i = 0; i < prod.getAlternativeCount(); i++) {
580             if (i > 0) {
581                 buffer.append(indent);
582             }
583             buffer.append(toString(prod.getAlternative(i)));
584             buffer.append("\n");
585         }
586         for (i = 0; i < prod.getAlternativeCount(); i++) {
587             set = prod.getAlternative(i).getLookAhead();
588             if (set.getMaxLength() > 1) {
589                 buffer.append("Using ");
590                 buffer.append(set.getMaxLength());
591                 buffer.append(" token look-ahead for alternative ");
592                 buffer.append(i + 1);
593                 buffer.append(": ");
594                 buffer.append(set.toString(tokenizer));
595                 buffer.append("\n");
596             }
597         }
598         return buffer.toString();
599     }
600 
601     /**
602      * Returns a string representation of a production pattern
603      * alternative.
604      *
605      * @param alt            the production pattern alternative
606      *
607      * @return a detailed string representation of the alternative
608      */
609     private String   toString(ProductionPatternAlternative alt) {
610         StringBuffer    buffer = new StringBuffer  ();
611 
612         for (int i = 0; i < alt.getElementCount(); i++) {
613             if (i > 0) {
614                 buffer.append(" ");
615             }
616             buffer.append(toString(alt.getElement(i)));
617         }
618         return buffer.toString();
619     }
620 
621     /**
622      * Returns a string representation of a production pattern
623      * element.
624      *
625      * @param elem           the production pattern element
626      *
627      * @return a detailed string representation of the element
628      */
629     private String   toString(ProductionPatternElement elem) {
630         StringBuffer    buffer = new StringBuffer  ();
631         int           min = elem.getMinCount();
632         int           max = elem.getMaxCount();
633 
634         if (min == 0 && max == 1) {
635             buffer.append("[");
636         }
637         if (elem.isToken()) {
638             buffer.append(getTokenDescription(elem.getId()));
639         } else {
640             buffer.append(getPattern(elem.getId()).getName());
641         }
642         if (min == 0 && max == 1) {
643             buffer.append("]");
644         } else if (min == 0 && max == Integer.MAX_VALUE) {
645             buffer.append("*");
646         } else if (min == 1 && max == Integer.MAX_VALUE) {
647             buffer.append("+");
648         } else if (min != 1 || max != 1) {
649             buffer.append("{");
650             buffer.append(min);
651             buffer.append(",");
652             buffer.append(max);
653             buffer.append("}");
654         }
655         return buffer.toString();
656     }
657 
658     /**
659      * Returns a token description for a specified token.
660      *
661      * @param token          the token to describe
662      *
663      * @return the token description
664      */
665     String   getTokenDescription(int token) {
666         if (tokenizer == null) {
667             return "";
668         } else {
669             return tokenizer.getPatternDescription(token);
670         }
671     }
672 }
673
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags