KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > percederberg > grammatica > Grammar


1 /*
2  * Grammar.java
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 2.1
7  * of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free
16  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17  * MA 02111-1307, USA.
18  *
19  * Copyright (c) 2003-2005 Per Cederberg. All rights reserved.
20  */

21
22 package net.percederberg.grammatica;
23
24 import java.io.File JavaDoc;
25 import java.io.FileNotFoundException JavaDoc;
26 import java.io.FileReader JavaDoc;
27 import java.io.Reader JavaDoc;
28 import java.util.HashMap JavaDoc;
29 import java.util.LinkedList JavaDoc;
30
31 import net.percederberg.grammatica.parser.Analyzer;
32 import net.percederberg.grammatica.parser.Parser;
33 import net.percederberg.grammatica.parser.ParserCreationException;
34 import net.percederberg.grammatica.parser.ParserLogException;
35 import net.percederberg.grammatica.parser.ProductionPattern;
36 import net.percederberg.grammatica.parser.RecursiveDescentParser;
37 import net.percederberg.grammatica.parser.TokenPattern;
38 import net.percederberg.grammatica.parser.Tokenizer;
39
40 /**
41  * A grammar definition object. This object supports parsing a grammar
42  * file and create a lexical analyzer (tokenizer) for the grammar.
43  *
44  * @author Per Cederberg, <per at percederberg dot net>
45  * @version 1.5
46  */

47 public class Grammar extends Object JavaDoc {
48
49     /**
50      * The author grammar declaration constant.
51      */

52     public static final String JavaDoc AUTHOR_DECLARATION = "AUTHOR";
53
54     /**
55      * The case-sensitive grammar declaration constant.
56      *
57      * @since 1.5
58      */

59     public static final String JavaDoc CASE_SENSITIVE_DECLARATION = "CASESENSITIVE";
60
61     /**
62      * The copyright grammar declaration constant.
63      */

64     public static final String JavaDoc COPYRIGHT_DECLARATION = "COPYRIGHT";
65
66     /**
67      * The date grammar declaration constant.
68      */

69     public static final String JavaDoc DATE_DECLARATION = "DATE";
70
71     /**
72      * The description grammar declaration constant.
73      */

74     public static final String JavaDoc DESCRIPTION_DECLARATION = "DESCRIPTION";
75
76     /**
77      * The grammar type grammar declaration constant.
78      */

79     public static final String JavaDoc GRAMMAR_TYPE_DECLARATION = "GRAMMARTYPE";
80
81     /**
82      * The license grammar declaration constant.
83      */

84     public static final String JavaDoc LICENSE_DECLARATION = "LICENSE";
85
86     /**
87      * The version grammar declaration constant.
88      */

89     public static final String JavaDoc VERSION_DECLARATION = "VERSION";
90
91     /**
92      * The grammar file name.
93      */

94     private String JavaDoc fileName = "";
95
96     /**
97      * The grammar declarations. This is a hash map with all the name
98      * value pairs in the header part of the grammar.
99      */

100     private HashMap JavaDoc declarations = new HashMap JavaDoc();
101
102     /**
103      * The tokens found in the processing.
104      */

105     private LinkedList JavaDoc tokens = new LinkedList JavaDoc();
106
107     /**
108      * The token id map. This is a map from the token pattern id to
109      * the token pattern.
110      */

111     private HashMap JavaDoc tokenIds = new HashMap JavaDoc();
112
113     /**
114      * The token name map. This is map from the token pattern name to
115      * the token pattern.
116      */

117     private HashMap JavaDoc tokenNames = new HashMap JavaDoc();
118
119     /**
120      * The token pattern map. This is map from the token pattern
121      * string to the token pattern object.
122      */

123     private HashMap JavaDoc tokenPatterns = new HashMap JavaDoc();
124
125     /**
126      * The productions found in the processing.
127      */

128     private LinkedList JavaDoc productions = new LinkedList JavaDoc();
129
130     /**
131      * The production id map. This is a map from the production
132      * pattern id to the production pattern.
133      */

134     private HashMap JavaDoc productionIds = new HashMap JavaDoc();
135
136     /**
137      * The production name map. This is map from the production
138      * pattern name to the production pattern.
139      */

140     private HashMap JavaDoc productionNames = new HashMap JavaDoc();
141
142     /**
143      * The map from token or production pattern name to a line range.
144      */

145     private HashMap JavaDoc lines = new HashMap JavaDoc();
146
147     /**
148      * Creates a new grammar from the specified file.
149      *
150      * @param file the grammar file to read
151      *
152      * @throws FileNotFoundException if the grammar file could not be
153      * found
154      * @throws ParserLogException if the grammar file couldn't be
155      * parsed correctly
156      * @throws GrammarException if the grammar wasn't valid
157      */

158     public Grammar(File JavaDoc file) throws FileNotFoundException JavaDoc,
159         ParserLogException, GrammarException {
160
161         GrammarParser parser;
162         FirstPassAnalyzer first = new FirstPassAnalyzer(this);
163         SecondPassAnalyzer second = new SecondPassAnalyzer(this);
164
165         fileName = file.toString();
166         try {
167             parser = new GrammarParser(new FileReader JavaDoc(file), first);
168             second.analyze(parser.parse());
169         } catch (ParserCreationException e) {
170             throw new UnsupportedOperationException JavaDoc(
171                 "internal error in grammar parser: " + e.getMessage());
172         }
173         verify();
174     }
175
176     /**
177      * Checks that the grammar is valid.
178      *
179      * @throws GrammarException if the grammar wasn't valid
180      */

181     private void verify() throws GrammarException {
182         String JavaDoc type;
183
184         // Check grammar type
185
type = (String JavaDoc) declarations.get(GRAMMAR_TYPE_DECLARATION);
186         if (type == null) {
187             throw new GrammarException(
188                 fileName,
189                 "grammar header missing " + GRAMMAR_TYPE_DECLARATION +
190                 " declaration");
191         } else if (!type.equals("LL")) {
192             throw new GrammarException(
193                 fileName,
194                 "unrecognized " + GRAMMAR_TYPE_DECLARATION + " value: '" +
195                 type + "', currently only 'LL' is supported");
196         }
197
198         // Check tokens and productions
199
if (productions.size() > 0) {
200             createParser(createTokenizer(null));
201         }
202     }
203
204     /**
205      * Creates a tokenizer from this grammar.
206      *
207      * @param in the input stream to use
208      *
209      * @return the newly created tokenizer
210      *
211      * @throws GrammarException if the tokenizer couldn't be created
212      * or initialized correctly
213      */

214     public Tokenizer createTokenizer(Reader JavaDoc in)
215         throws GrammarException {
216
217         Tokenizer tokenizer;
218
219         try {
220             tokenizer = new Tokenizer(in, !getCaseSensitive());
221             for (int i = 0; i < tokens.size(); i++) {
222                 tokenizer.addPattern((TokenPattern) tokens.get(i));
223             }
224         } catch (ParserCreationException e) {
225             if (e.getName() == null) {
226                 throw new GrammarException(fileName, e.getMessage());
227             } else {
228                 LineRange range = (LineRange) lines.get(e.getName());
229                 throw new GrammarException(fileName,
230                                            e.getMessage(),
231                                            range.getStart(),
232                                            range.getEnd());
233             }
234         }
235
236         return tokenizer;
237     }
238
239     /**
240      * Creates a parser from this grammar.
241      *
242      * @param tokenizer the tokenizer to use
243      *
244      * @return the newly created parser
245      *
246      * @throws GrammarException if the parser couldn't be created or
247      * initialized correctly
248      */

249     public Parser createParser(Tokenizer tokenizer)
250         throws GrammarException {
251
252         return createParser(tokenizer, null);
253     }
254
255     /**
256      * Creates a parser from this grammar.
257      *
258      * @param tokenizer the tokenizer to use
259      * @param analyzer the analyzer to use
260      *
261      * @return the newly created parser
262      *
263      * @throws GrammarException if the parser couldn't be created or
264      * initialized correctly
265      */

266     public Parser createParser(Tokenizer tokenizer, Analyzer analyzer)
267         throws GrammarException {
268
269         Parser parser;
270
271         try {
272             parser = new RecursiveDescentParser(tokenizer, analyzer);
273             for (int i = 0; i < productions.size(); i++) {
274                 parser.addPattern((ProductionPattern) productions.get(i));
275             }
276             parser.prepare();
277         } catch (ParserCreationException e) {
278             LineRange range = (LineRange) lines.get(e.getName());
279             if (range == null) {
280                 throw new GrammarException(fileName, e.getMessage());
281             } else {
282                 throw new GrammarException(fileName,
283                                            e.getMessage(),
284                                            range.getStart(),
285                                            range.getEnd());
286             }
287         }
288
289         return parser;
290     }
291
292     /**
293      * Returns the grammar file name and path.
294      *
295      * @return the grammar file name and path
296      */

297     public String JavaDoc getFileName() {
298         return fileName;
299     }
300
301     /**
302      * Returns the declaration value for the specified name.
303      *
304      * @param name the declaration name
305      *
306      * @return the declaration value, or
307      * null if not specified in the grammar header
308      */

309     public String JavaDoc getDeclaration(String JavaDoc name) {
310         return (String JavaDoc) declarations.get(name);
311     }
312
313     /**
314      * Checks if the grammar tokenizer is case-sensitive. Unless an
315      * explicit case-sensitive declaration in the grammar says
316      * otherwise, a grammar is assumed to be case-sensitive.
317      *
318      * @return true if the grammar is case-sensitive (the default), or
319      * false otherwise
320      *
321      * @since 1.5
322      */

323     public boolean getCaseSensitive() {
324         String JavaDoc str = getDeclaration(CASE_SENSITIVE_DECLARATION);
325
326         if (str == null) {
327             return true;
328         } else {
329             return !str.equalsIgnoreCase("no")
330                 && !str.equalsIgnoreCase("false");
331         }
332     }
333
334     /**
335      * Returns the number of token patterns in the grammar.
336      *
337      * @return the number of token patterns
338      */

339     public int getTokenPatternCount() {
340         return tokens.size();
341     }
342
343     /**
344      * Returns a specific token pattern.
345      *
346      * @param pos the pattern position, 0 <= pos < count
347      *
348      * @return the token pattern
349      */

350     public TokenPattern getTokenPattern(int pos) {
351         return (TokenPattern) tokens.get(pos);
352     }
353
354     /**
355      * Returns a token pattern identified by its id.
356      *
357      * @param id the pattern id
358      *
359      * @return the token pattern, or null
360      */

361     public TokenPattern getTokenPatternById(int id) {
362         return (TokenPattern) tokenIds.get(new Integer JavaDoc(id));
363     }
364
365     /**
366      * Returns a token pattern identified by its name.
367      *
368      * @param name the pattern name
369      *
370      * @return the token pattern, or null
371      */

372     public TokenPattern getTokenPatternByName(String JavaDoc name) {
373         return (TokenPattern) tokenNames.get(name);
374     }
375
376     /**
377      * Returns a token pattern identified by its pattern string. This
378      * method will only return matches for patterns of string type.
379      *
380      * @param image the pattern string
381      *
382      * @return the token pattern, or null
383      */

384     TokenPattern getTokenPatternByImage(String JavaDoc image) {
385         return (TokenPattern) tokenPatterns.get(image);
386     }
387
388     /**
389      * Returns the number of production patterns in the grammar.
390      *
391      * @return the number of production patterns
392      */

393     public int getProductionPatternCount() {
394         return productions.size();
395     }
396
397     /**
398      * Returns a specific production pattern.
399      *
400      * @param pos the pattern position, 0 <= pos < count
401      *
402      * @return the production pattern
403      */

404     public ProductionPattern getProductionPattern(int pos) {
405         return (ProductionPattern) productions.get(pos);
406     }
407
408     /**
409      * Returns a production pattern identified by its id.
410      *
411      * @param id the pattern id
412      *
413      * @return the production pattern, or null
414      */

415     public ProductionPattern getProductionPatternById(int id) {
416         return (ProductionPattern) productionIds.get(new Integer JavaDoc(id));
417     }
418
419     /**
420      * Returns a production pattern identified by its name.
421      *
422      * @param name the pattern name
423      *
424      * @return the production pattern, or null
425      */

426     public ProductionPattern getProductionPatternByName(String JavaDoc name) {
427         return (ProductionPattern) productionNames.get(name);
428     }
429
430     /**
431      * Adds a grammar declaration name-value pair.
432      *
433      * @param name the name part
434      * @param value the value part
435      */

436     void addDeclaration(String JavaDoc name, String JavaDoc value) {
437         declarations.put(name, value);
438     }
439
440     /**
441      * Adds a token pattern to this grammar.
442      *
443      * @param token the token pattern to add
444      * @param start the starting line
445      * @param end the ending line
446      */

447     void addToken(TokenPattern token, int start, int end) {
448         tokens.add(token);
449         tokenIds.put(new Integer JavaDoc(token.getId()), token);
450         tokenNames.put(token.getName(), token);
451         if (token.getType() == TokenPattern.STRING_TYPE) {
452             tokenPatterns.put(token.getPattern(), token);
453         }
454         lines.put(token.getName(), new LineRange(start, end));
455     }
456
457     /**
458      * Adds a production pattern to this grammar.
459      *
460      * @param production the production pattern to add
461      * @param start the starting line
462      * @param end the ending line
463      */

464     void addProduction(ProductionPattern production, int start, int end) {
465         productions.add(production);
466         productionIds.put(new Integer JavaDoc(production.getId()), production);
467         productionNames.put(production.getName(), production);
468         lines.put(production.getName(), new LineRange(start, end));
469     }
470
471
472     /**
473      * A line number range.
474      */

475     private class LineRange {
476
477         /**
478          * The first line number.
479          */

480         private int start;
481
482         /**
483          * The last line number.
484          */

485         private int end;
486
487         /**
488          * Creates a new line number range.
489          *
490          * @param start the first line number
491          * @param end the last line number
492          */

493         public LineRange(int start, int end) {
494             this.start = start;
495             this.end = end;
496         }
497
498         /**
499          * Returns the first line number.
500          *
501          * @return the first line number
502          */

503         public int getStart() {
504             return start;
505         }
506
507         /**
508          * Returns the last line number.
509          *
510          * @return the last line number
511          */

512         public int getEnd() {
513             return end;
514         }
515     }
516 }
517
Popular Tags