Grammar


1   /*
2    * Grammar.java
3    *
4    * This library is free software; you can redistribute it and/or
5    * modify it under the terms of the GNU Lesser General Public License
6    * as published by the Free Software Foundation; either version 2.1
7    * of the License, or (at your option) any later version.
8    *
9    * This library is distributed in the hope that it will be useful,
10   * but WITHOUT ANY WARRANTY; without even the implied warranty of
11   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   * Lesser General Public License for more details.
13   *
14   * You should have received a copy of the GNU Lesser General Public
15   * License along with this library; if not, write to the Free
16   * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17   * MA 02111-1307, USA.
18   *
19   * Copyright (c) 2003-2005 Per Cederberg. All rights reserved.
20   */
21  
22  package net.percederberg.grammatica;
23  
24  import java.io.File  ;
25  import java.io.FileNotFoundException  ;
26  import java.io.FileReader  ;
27  import java.io.Reader  ;
28  import java.util.HashMap  ;
29  import java.util.LinkedList  ;
30  
31  import net.percederberg.grammatica.parser.Analyzer;
32  import net.percederberg.grammatica.parser.Parser;
33  import net.percederberg.grammatica.parser.ParserCreationException;
34  import net.percederberg.grammatica.parser.ParserLogException;
35  import net.percederberg.grammatica.parser.ProductionPattern;
36  import net.percederberg.grammatica.parser.RecursiveDescentParser;
37  import net.percederberg.grammatica.parser.TokenPattern;
38  import net.percederberg.grammatica.parser.Tokenizer;
39  
40  /**
41   * A grammar definition object. This object supports parsing a grammar
42   * file and create a lexical analyzer (tokenizer) for the grammar.
43   *
44   * @author   Per Cederberg, <per at percederberg dot net>
45   * @version  1.5
46   */
47  public class Grammar extends Object   {
48  
49      /**
50       * The author grammar declaration constant.
51       */
52      public static final String   AUTHOR_DECLARATION = "AUTHOR";
53  
54      /**
55       * The case-sensitive grammar declaration constant.
56       *
57       * @since 1.5
58       */
59      public static final String   CASE_SENSITIVE_DECLARATION = "CASESENSITIVE";
60  
61      /**
62       * The copyright grammar declaration constant.
63       */
64      public static final String   COPYRIGHT_DECLARATION = "COPYRIGHT";
65  
66      /**
67       * The date grammar declaration constant.
68       */
69      public static final String   DATE_DECLARATION = "DATE";
70  
71      /**
72       * The description grammar declaration constant.
73       */
74      public static final String   DESCRIPTION_DECLARATION = "DESCRIPTION";
75  
76      /**
77       * The grammar type grammar declaration constant.
78       */
79      public static final String   GRAMMAR_TYPE_DECLARATION = "GRAMMARTYPE";
80  
81      /**
82       * The license grammar declaration constant.
83       */
84      public static final String   LICENSE_DECLARATION = "LICENSE";
85  
86      /**
87       * The version grammar declaration constant.
88       */
89      public static final String   VERSION_DECLARATION = "VERSION";
90  
91      /**
92       * The grammar file name.
93       */
94      private String   fileName = "";
95  
96      /**
97       * The grammar declarations. This is a hash map with all the name
98       * value pairs in the header part of the grammar.
99       */
100     private HashMap   declarations = new HashMap  ();
101 
102     /**
103      * The tokens found in the processing.
104      */
105     private LinkedList   tokens = new LinkedList  ();
106 
107     /**
108      * The token id map. This is a map from the token pattern id to
109      * the token pattern.
110      */
111     private HashMap   tokenIds = new HashMap  ();
112 
113     /**
114      * The token name map. This is map from the token pattern name to
115      * the token pattern.
116      */
117     private HashMap   tokenNames = new HashMap  ();
118 
119     /**
120      * The token pattern map. This is map from the token pattern
121      * string to the token pattern object.
122      */
123     private HashMap   tokenPatterns = new HashMap  ();
124 
125     /**
126      * The productions found in the processing.
127      */
128     private LinkedList   productions = new LinkedList  ();
129 
130     /**
131      * The production id map. This is a map from the production
132      * pattern id to the production pattern.
133      */
134     private HashMap   productionIds = new HashMap  ();
135 
136     /**
137      * The production name map. This is map from the production
138      * pattern name to the production pattern.
139      */
140     private HashMap   productionNames = new HashMap  ();
141 
142     /**
143      * The map from token or production pattern name to a line range.
144      */
145     private HashMap   lines = new HashMap  ();
146 
147     /**
148      * Creates a new grammar from the specified file.
149      *
150      * @param file     the grammar file to read
151      *
152      * @throws FileNotFoundException if the grammar file could not be
153      *             found
154      * @throws ParserLogException if the grammar file couldn't be
155      *             parsed correctly
156      * @throws GrammarException if the grammar wasn't valid
157      */
158     public Grammar(File   file) throws FileNotFoundException  ,
159         ParserLogException, GrammarException {
160 
161         GrammarParser       parser;
162         FirstPassAnalyzer   first = new FirstPassAnalyzer(this);
163         SecondPassAnalyzer  second = new SecondPassAnalyzer(this);
164 
165         fileName = file.toString();
166         try {
167             parser = new GrammarParser(new FileReader  (file), first);
168             second.analyze(parser.parse());
169         } catch (ParserCreationException e) {
170             throw new UnsupportedOperationException  (
171                 "internal error in grammar parser: " + e.getMessage());
172         }
173         verify();
174     }
175 
176     /**
177      * Checks that the grammar is valid.
178      *
179      * @throws GrammarException if the grammar wasn't valid
180      */
181     private void verify() throws GrammarException {
182         String    type;
183 
184         // Check grammar type
185         type = (String  ) declarations.get(GRAMMAR_TYPE_DECLARATION);
186         if (type == null) {
187             throw new GrammarException(
188                 fileName,
189                 "grammar header missing " + GRAMMAR_TYPE_DECLARATION +
190                 " declaration");
191         } else if (!type.equals("LL")) {
192             throw new GrammarException(
193                 fileName,
194                 "unrecognized " + GRAMMAR_TYPE_DECLARATION + " value: '" +
195                 type + "', currently only 'LL' is supported");
196         }
197 
198         // Check tokens and productions
199         if (productions.size() > 0) {
200             createParser(createTokenizer(null));
201         }
202     }
203 
204     /**
205      * Creates a tokenizer from this grammar.
206      *
207      * @param in             the input stream to use
208      *
209      * @return the newly created tokenizer
210      *
211      * @throws GrammarException if the tokenizer couldn't be created
212      *             or initialized correctly
213      */
214     public Tokenizer createTokenizer(Reader   in)
215         throws GrammarException {
216 
217         Tokenizer  tokenizer;
218 
219         try {
220             tokenizer = new Tokenizer(in, !getCaseSensitive());
221             for (int i = 0; i < tokens.size(); i++) {
222                 tokenizer.addPattern((TokenPattern) tokens.get(i));
223             }
224         } catch (ParserCreationException e) {
225             if (e.getName() == null) {
226                 throw new GrammarException(fileName, e.getMessage());
227             } else {
228                 LineRange range = (LineRange) lines.get(e.getName());
229                 throw new GrammarException(fileName,
230                                            e.getMessage(),
231                                            range.getStart(),
232                                            range.getEnd());
233             }
234         }
235 
236         return tokenizer;
237     }
238 
239     /**
240      * Creates a parser from this grammar.
241      *
242      * @param tokenizer      the tokenizer to use
243      *
244      * @return the newly created parser
245      *
246      * @throws GrammarException if the parser couldn't be created or
247      *             initialized correctly
248      */
249     public Parser createParser(Tokenizer tokenizer)
250         throws GrammarException {
251 
252         return createParser(tokenizer, null);
253     }
254 
255     /**
256      * Creates a parser from this grammar.
257      *
258      * @param tokenizer      the tokenizer to use
259      * @param analyzer       the analyzer to use
260      *
261      * @return the newly created parser
262      *
263      * @throws GrammarException if the parser couldn't be created or
264      *             initialized correctly
265      */
266     public Parser createParser(Tokenizer tokenizer, Analyzer analyzer)
267         throws GrammarException {
268 
269         Parser  parser;
270 
271         try {
272             parser = new RecursiveDescentParser(tokenizer, analyzer);
273             for (int i = 0; i < productions.size(); i++) {
274                 parser.addPattern((ProductionPattern) productions.get(i));
275             }
276             parser.prepare();
277         } catch (ParserCreationException e) {
278             LineRange range = (LineRange) lines.get(e.getName());
279             if (range == null) {
280                 throw new GrammarException(fileName, e.getMessage());
281             } else {
282                 throw new GrammarException(fileName,
283                                            e.getMessage(),
284                                            range.getStart(),
285                                            range.getEnd());
286             }
287         }
288 
289         return parser;
290     }
291 
292     /**
293      * Returns the grammar file name and path.
294      *
295      * @return the grammar file name and path
296      */
297     public String   getFileName() {
298         return fileName;
299     }
300 
301     /**
302      * Returns the declaration value for the specified name.
303      *
304      * @param name           the declaration name
305      *
306      * @return the declaration value, or
307      *         null if not specified in the grammar header
308      */
309     public String   getDeclaration(String   name) {
310         return (String  ) declarations.get(name);
311     }
312 
313     /**
314      * Checks if the grammar tokenizer is case-sensitive. Unless an
315      * explicit case-sensitive declaration in the grammar says
316      * otherwise, a grammar is assumed to be case-sensitive.
317      *
318      * @return true if the grammar is case-sensitive (the default), or
319      *         false otherwise
320      *
321      * @since 1.5
322      */
323     public boolean getCaseSensitive() {
324         String    str = getDeclaration(CASE_SENSITIVE_DECLARATION);
325 
326         if (str == null) {
327             return true;
328         } else {
329             return !str.equalsIgnoreCase("no")
330                 && !str.equalsIgnoreCase("false");
331         }
332     }
333 
334     /**
335      * Returns the number of token patterns in the grammar.
336      *
337      * @return the number of token patterns
338      */
339     public int getTokenPatternCount() {
340         return tokens.size();
341     }
342 
343     /**
344      * Returns a specific token pattern.
345      *
346      * @param pos            the pattern position, 0 <= pos < count
347      *
348      * @return the token pattern
349      */
350     public TokenPattern getTokenPattern(int pos) {
351         return (TokenPattern) tokens.get(pos);
352     }
353 
354     /**
355      * Returns a token pattern identified by its id.
356      *
357      * @param id             the pattern id
358      *
359      * @return the token pattern, or null
360      */
361     public TokenPattern getTokenPatternById(int id) {
362         return (TokenPattern) tokenIds.get(new Integer  (id));
363     }
364 
365     /**
366      * Returns a token pattern identified by its name.
367      *
368      * @param name           the pattern name
369      *
370      * @return the token pattern, or null
371      */
372     public TokenPattern getTokenPatternByName(String   name) {
373         return (TokenPattern) tokenNames.get(name);
374     }
375 
376     /**
377      * Returns a token pattern identified by its pattern string. This
378      * method will only return matches for patterns of string type.
379      *
380      * @param image          the pattern string
381      *
382      * @return the token pattern, or null
383      */
384     TokenPattern getTokenPatternByImage(String   image) {
385         return (TokenPattern) tokenPatterns.get(image);
386     }
387 
388     /**
389      * Returns the number of production patterns in the grammar.
390      *
391      * @return the number of production patterns
392      */
393     public int getProductionPatternCount() {
394         return productions.size();
395     }
396 
397     /**
398      * Returns a specific production pattern.
399      *
400      * @param pos            the pattern position, 0 <= pos < count
401      *
402      * @return the production pattern
403      */
404     public ProductionPattern getProductionPattern(int pos) {
405         return (ProductionPattern) productions.get(pos);
406     }
407 
408     /**
409      * Returns a production pattern identified by its id.
410      *
411      * @param id             the pattern id
412      *
413      * @return the production pattern, or null
414      */
415     public ProductionPattern getProductionPatternById(int id) {
416         return (ProductionPattern) productionIds.get(new Integer  (id));
417     }
418 
419     /**
420      * Returns a production pattern identified by its name.
421      *
422      * @param name           the pattern name
423      *
424      * @return the production pattern, or null
425      */
426     public ProductionPattern getProductionPatternByName(String   name) {
427         return (ProductionPattern) productionNames.get(name);
428     }
429 
430     /**
431      * Adds a grammar declaration name-value pair.
432      *
433      * @param name           the name part
434      * @param value          the value part
435      */
436     void addDeclaration(String   name, String   value) {
437         declarations.put(name, value);
438     }
439 
440     /**
441      * Adds a token pattern to this grammar.
442      *
443      * @param token          the token pattern to add
444      * @param start          the starting line
445      * @param end            the ending line
446      */
447     void addToken(TokenPattern token, int start, int end) {
448         tokens.add(token);
449         tokenIds.put(new Integer  (token.getId()), token);
450         tokenNames.put(token.getName(), token);
451         if (token.getType() == TokenPattern.STRING_TYPE) {
452             tokenPatterns.put(token.getPattern(), token);
453         }
454         lines.put(token.getName(), new LineRange(start, end));
455     }
456 
457     /**
458      * Adds a production pattern to this grammar.
459      *
460      * @param production     the production pattern to add
461      * @param start          the starting line
462      * @param end            the ending line
463      */
464     void addProduction(ProductionPattern production, int start, int end) {
465         productions.add(production);
466         productionIds.put(new Integer  (production.getId()), production);
467         productionNames.put(production.getName(), production);
468         lines.put(production.getName(), new LineRange(start, end));
469     }
470 
471 
472     /**
473      * A line number range.
474      */
475     private class LineRange {
476 
477         /**
478          * The first line number.
479          */
480         private int start;
481 
482         /**
483          * The last line number.
484          */
485         private int end;
486 
487         /**
488          * Creates a new line number range.
489          *
490          * @param start      the first line number
491          * @param end        the last line number
492          */
493         public LineRange(int start, int end) {
494             this.start = start;
495             this.end = end;
496         }
497 
498         /**
499          * Returns the first line number.
500          *
501          * @return the first line number
502          */
503         public int getStart() {
504             return start;
505         }
506 
507         /**
508          * Returns the last line number.
509          *
510          * @return the last line number
511          */
512         public int getEnd() {
513             return end;
514         }
515     }
516 }
517
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags