FirstPassAnalyzer


1   /*
2    * FirstPassAnalyzer.java
3    *
4    * This library is free software; you can redistribute it and/or
5    * modify it under the terms of the GNU Lesser General Public License
6    * as published by the Free Software Foundation; either version 2.1
7    * of the License, or (at your option) any later version.
8    *
9    * This library is distributed in the hope that it will be useful,
10   * but WITHOUT ANY WARRANTY; without even the implied warranty of
11   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   * Lesser General Public License for more details.
13   *
14   * You should have received a copy of the GNU Lesser General Public
15   * License along with this library; if not, write to the Free
16   * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17   * MA 02111-1307, USA.
18   *
19   * Copyright (c) 2003-2005 Per Cederberg. All rights reserved.
20   */
21  
22  package net.percederberg.grammatica;
23  
24  import java.util.HashMap  ;
25  
26  import net.percederberg.grammatica.parser.Node;
27  import net.percederberg.grammatica.parser.ParseException;
28  import net.percederberg.grammatica.parser.Production;
29  import net.percederberg.grammatica.parser.ProductionPattern;
30  import net.percederberg.grammatica.parser.Token;
31  import net.percederberg.grammatica.parser.TokenPattern;
32  
33  /**
34   * A first pass grammar analyzer. This class processes the grammar
35   * parse tree and creates the token and production patterns. Both
36   * token and production patterns are added to the grammar, but the
37   * production patterns will all be empty. In order to analyze the
38   * production pattern rules, all the production pattern names and
39   * identifiers must be present in the grammar, so the pattern rules
40   * must be analyzed in a second pass. This analyzer also adds all
41   * header declarations to the grammar.
42   *
43   * @author   Per Cederberg, <per at percederberg dot net>
44   * @version  1.0
45   */
46  class FirstPassAnalyzer extends GrammarAnalyzer {
47  
48      /**
49       * The grammar where objects are added.
50       */
51      private Grammar grammar;
52  
53      /**
54       * The token id to use.
55       */
56      private int nextTokenId = 1001;
57  
58      /**
59       * The production id to use.
60       */
61      private int nextProductionId = 2001;
62  
63      /**
64       * A map with all token and production names. This map is indexed
65       * by the upper-case names (without '_' characters), and maps
66       * these to the declared case-sensitive name.
67       */
68      private HashMap   names = new HashMap  ();
69  
70      /**
71       * Creates a new grammar analyser.
72       *
73       * @param grammar        the grammar where objects are added
74       */
75      public FirstPassAnalyzer(Grammar grammar) {
76          this.grammar = grammar;
77      }
78  
79      /**
80       * Sets the node value to the ignore message. If no message is
81       * set, no node value will be added.
82       *
83       * @param node           the token node
84       *
85       * @return the token node
86       */
87      protected Node exitIgnore(Token node) {
88          String    str = node.getImage();
89  
90          str = str.substring(7, str.length() - 1).trim();
91          if (!str.equals("")) {
92              node.addValue(str);
93          }
94          return node;
95      }
96  
97      /**
98       * Sets the node value to the error message. If no message is set,
99       * no node value will be added.
100      *
101      * @param node           the token node
102      *
103      * @return the token node
104      */
105     protected Node exitError(Token node) {
106         String    str = node.getImage();
107 
108         str = str.substring(6, str.length() - 1).trim();
109         if (!str.equals("")) {
110             node.addValue(str);
111         }
112         return node;
113     }
114 
115     /**
116      * Sets the node value to the identifier string.
117      *
118      * @param node           the token node
119      *
120      * @return the token node
121      */
122     protected Node exitIdentifier(Token node) {
123         node.addValue(node.getImage());
124         return node;
125     }
126 
127     /**
128      * Sets the node value to the contents of the quoted string. The
129      * quotation marks will be removed, but any escaped character
130      * will be left intact.
131      *
132      * @param node           the token node
133      *
134      * @return the token node
135      */
136     protected Node exitQuotedString(Token node) {
137         String    str = node.getImage();
138 
139         node.addValue(str.substring(1, str.length() - 1));
140         return node;
141     }
142 
143     /**
144      * Sets the node value to the regular expression string. The
145      * quotation marks will be removed, and the "\<" and "\>" will be
146      * unescaped (replaced by the '<' and '>' characters). The rest of
147      * the expression is left intact.
148      *
149      * @param node           the token node
150      *
151      * @return the token node
152      */
153     protected Node exitRegexp(Token node) {
154         String          str = node.getImage();
155         StringBuffer    buf = new StringBuffer  ();
156 
157         str = str.substring(2, str.length() - 2);
158         for (int i = 0; i < str.length(); i++) {
159             if (str.startsWith("\\<", i)) {
160                 buf.append('<');
161                 i++;
162             } else if (str.startsWith("\\>", i)) {
163                 buf.append('>');
164                 i++;
165             } else {
166                 buf.append(str.charAt(i));
167             }
168         }
169         node.addValue(buf.toString());
170         return node;
171     }
172 
173     /**
174      * Removes the header part from the parse tree by returning null.
175      *
176      * @param node           the production node
177      *
178      * @return the new production node
179      */
180     protected Node exitHeaderPart(Production node) {
181         return null;
182     }
183 
184     /**
185      * Adds the header declaration to the grammar. This method will
186      * also remove the header declaration from the parse tree by
187      * returning null.
188      *
189      * @param node           the production node
190      *
191      * @return the new production node
192      *
193      * @throws ParseException if the node analysis discovered errors
194      */
195     protected Node exitHeaderDeclaration(Production node)
196         throws ParseException {
197 
198         String    name;
199         String    value;
200 
201         name = getStringValue(getChildAt(node, 0), 0);
202         value = getStringValue(getChildAt(node, 2), 0);
203         grammar.addDeclaration(name, value);
204         return null;
205     }
206 
207     /**
208      * Removes the token part from the parse tree by returning null.
209      *
210      * @param node           the production node
211      *
212      * @return the new production node
213      */
214     protected Node exitTokenPart(Production node) {
215         return null;
216     }
217 
218     /**
219      * Adds a token pattern to the grammar. This method will also
220      * remove the token declaration from the parse tree by reutrning
221      * null.
222      *
223      * @param node           the production node
224      *
225      * @return the new production node
226      *
227      * @throws ParseException if the node analysis discovered errors
228      */
229     protected Node exitTokenDeclaration(Production node)
230         throws ParseException {
231 
232         TokenPattern  pattern;
233         String          name;
234         int           type;
235         String          str;
236         Token         token;
237         Node          child;
238 
239         // Create token pattern
240         name = getIdentifier((Token) getChildAt(node, 0));
241         child = getChildAt(node, 2);
242         type = getIntValue(child, 0);
243         str = getStringValue(child, 1);
244         pattern = new TokenPattern(nextTokenId++, name, type, str);
245 
246         // Process optional ignore or error
247         if (node.getChildCount() == 4) {
248             child = getChildAt(node, 3);
249             token = (Token) getValue(child, 0);
250             str = null;
251             if (child.getValueCount() == 2) {
252                 str = getStringValue(child, 1);
253             }
254             switch (token.getId()) {
255             case GrammarConstants.IGNORE:
256                 if (str == null) {
257                     pattern.setIgnore();
258                 } else {
259                     pattern.setIgnore(str);
260                 }
261                 break;
262             case GrammarConstants.ERROR:
263                 if (str == null) {
264                     pattern.setError();
265                 } else {
266                     pattern.setError(str);
267                 }
268                 break;
269             }
270         }
271 
272         // Add token to grammar
273         grammar.addToken(pattern,
274                          node.getStartLine(),
275                          node.getEndLine());
276         return null;
277     }
278 
279     /**
280      * Sets the node values to the token pattern type and the token
281      * pattern string.
282      *
283      * @param node           the production node
284      *
285      * @return the new production node
286      *
287      * @throws ParseException if the node analysis discovered errors
288      */
289     protected Node exitTokenValue(Production node) throws ParseException {
290         switch (getChildAt(node, 0).getId()) {
291         case GrammarConstants.QUOTED_STRING:
292             node.addValue(new Integer  (TokenPattern.STRING_TYPE));
293             break;
294         case GrammarConstants.REGEXP:
295             node.addValue(new Integer  (TokenPattern.REGEXP_TYPE));
296             break;
297         }
298         node.addValue(getStringValue(getChildAt(node, 0), 0));
299         return node;
300     }
301 
302     /**
303      * Sets the node values to the error or ignore token. If present,
304      * the message string will also be added as a node value.
305      *
306      * @param node           the production node
307      *
308      * @return the new production node
309      *
310      * @throws ParseException if the node analysis discovered errors
311      */
312     protected Node exitTokenHandling(Production node)
313         throws ParseException {
314 
315         Node  child = getChildAt(node, 0);
316 
317         node.addValue(child);
318         if (child.getValueCount() > 0) {
319             node.addValue(getValue(child, 0));
320         }
321         return node;
322     }
323 
324     /**
325      * Adds an empty production pattern to the grammar. This metod
326      * will return the production node to make it available for the
327      * second pass analyzer.
328      *
329      * @param node           the production node
330      *
331      * @return the new production node
332      *
333      * @throws ParseException if the node analysis discovered errors
334      */
335     protected Node exitProductionDeclaration(Production node)
336         throws ParseException {
337 
338         ProductionPattern  production;
339         String               name;
340 
341         name = getIdentifier((Token) getChildAt(node, 0));
342         production = new ProductionPattern(nextProductionId++, name);
343         grammar.addProduction(production,
344                               node.getStartLine(),
345                               node.getEndLine());
346         return node;
347     }
348 
349     /**
350      * Returns a token identifier. This method should only be called
351      * with identifier tokens, otherwise an exception will be thrown.
352      * This method also checks that the identifier name found is
353      * globally unique in it's upper-case form, and throws an
354      * exception if it is not.
355      *
356      * @param token          the identifier token
357      *
358      * @return the identifier name
359      *
360      * @throws ParseException if the identifier wasn't unique
361      */
362     private String   getIdentifier(Token token) throws ParseException {
363         String          name = token.getImage();
364         StringBuffer    buf = new StringBuffer  (name.toUpperCase());
365         char          c;
366 
367         // Check for identifier token
368         if (token.getId() != GrammarConstants.IDENTIFIER) {
369             throw new ParseException(ParseException.INTERNAL_ERROR,
370                                      null,
371                                      token.getStartLine(),
372                                      token.getStartColumn());
373         }
374 
375         // Remove all non-identifier characters
376         for (int i = 0; i < buf.length(); i++) {
377             c = buf.charAt(i);
378             if (('A' <= c && c <= 'Z') || ('0' <= c && c <= '9')) {
379                 // Do nothing
380             } else {
381                 buf.deleteCharAt(i--);
382             }
383         }
384 
385         // Check for name collitions
386         if (names.containsKey(buf.toString())) {
387             throw new ParseException(
388                 ParseException.ANALYSIS_ERROR,
389                 "duplicate identifier '" + name + "' is similar or " +
390                 "equal to previously defined identifier '" +
391                 names.get(buf.toString()) + "'",
392                 token.getStartLine(),
393                 token.getStartColumn());
394         } else {
395             names.put(buf.toString(), name);
396         }
397 
398         // Return the identifier
399         return name;
400     }
401 }
402
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags