KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sourceforge > chaperon > model > lexicon > LexiconFactory


1 /*
2  * Copyright (C) Chaperon. All rights reserved.
3  * -------------------------------------------------------------------------
4  * This software is published under the terms of the Apache Software License
5  * version 1.1, a copy of which has been included with this distribution in
6  * the LICENSE file.
7  */

8
9 package net.sourceforge.chaperon.model.lexicon;
10
11 import net.sourceforge.chaperon.model.pattern.*;
12 import net.sourceforge.chaperon.model.symbol.Terminal;
13
14 import org.xml.sax.*;
15 import org.xml.sax.helpers.*;
16
17 import java.util.Stack JavaDoc;
18
19 /**
20  * This class should generate a lexicon from a SAX stream
21  *
22  * @author <a HREF="mailto:stephan@apache.org">Stephan Michels </a>
23  * @version CVS $Id: LexiconFactory.java,v 1.3 2003/12/09 19:55:52 benedikta Exp $
24  */

25 public class LexiconFactory extends DefaultHandler
26 {
27   /** The namspace of the lexicon configuration */
28   public static final String JavaDoc NS = "http://chaperon.sourceforge.net/schema/lexicon/1.0";
29
30   /** Element name */
31   public static final String JavaDoc LEXEME_ELEMENT = "lexeme";
32
33   /** Attribute name of the symbol property */
34   public static final String JavaDoc SYMBOL_ATTRIBUTE = "symbol";
35
36   /** Element name */
37   public static final String JavaDoc LEXICON_ELEMENT = "lexicon";
38
39   /** Element name */
40   public static final String JavaDoc ALTERNATION_ELEMENT = "alt";
41
42   /** Element name */
43   public static final String JavaDoc BEGINOFLINE_ELEMENT = "bol";
44
45   /** Element name */
46   public static final String JavaDoc CHARACTERCLASS_ELEMENT = "cclass";
47
48   /** Attribute name of the exclusive property */
49   public static final String JavaDoc EXCLUSIVE_ATTRIBUTE = "exclusive";
50
51   /** Element name */
52   public static final String JavaDoc CHARACTERINTERVAL_ELEMENT = "cinterval";
53
54   /** Attribute name of the min property */
55   public static final String JavaDoc CHARACTERINTERVAL_MIN_ATTRIBUTE = "min";
56
57   /** Attribute name of the max property */
58   public static final String JavaDoc CHARACTERINTERVAL_MAX_ATTRIBUTE = "max";
59
60   /** Element name */
61   public static final String JavaDoc CHARACTERSTRING_ELEMENT = "cstring";
62
63   /** Attribute name of the sequence property */
64   public static final String JavaDoc CHARACTERSTRING_SEQUENCE_ATTRIBUTE = "content";
65
66   /** Element name */
67   public static final String JavaDoc CHARACTERSET_ELEMENT = "cset";
68
69   /** Attribute name of the characters property */
70   public static final String JavaDoc CHARACTERSET_CHARACTERS_ATTRIBUTE = "content";
71
72 /* public final static String CHARACTERGENERIC_ELEMENT = "cgeneric";
73
74   public final static String CHARACTERGENERIC_CODE_ATTRIBUTE = "code";*/

75   public static final String JavaDoc CODE_ATTRIBUTE = "code";
76
77   /** Element name */
78   public static final String JavaDoc CONCATENATION_ELEMENT = "concat";
79
80   /** Element name */
81   public static final String JavaDoc GROUP_ELEMENT = "group";
82
83   /** Element name */
84   public static final String JavaDoc UNIVERSALCHARACTER_ELEMENT = "cuniversal";
85
86   /** Element name */
87   public static final String JavaDoc ENDOFLINE_ELEMENT = "eol";
88
89   /** Attribute name of the minOccurs property */
90   public static final String JavaDoc MINOCCURS_ATTRIBUTE = "minOccurs";
91
92   /** Attribute name of the minOccurs property */
93   public static final String JavaDoc MAXOCCURS_ATTRIBUTE = "maxOccurs";
94   private static final int STATE_OUTER = 0;
95   private static final int STATE_LEXICON = 1;
96   private static final int STATE_LEXEME = 2;
97   private static final int STATE_CHARACTERCLASS = 3;
98   private static final int STATE_CHARACTERCLASSELEMENT = 4;
99   private int state = STATE_OUTER;
100   private Lexicon lexicon;
101   private Locator locator = null;
102   private Stack JavaDoc stack;
103
104   /**
105    * Returns the generated lexicon
106    *
107    * @return Lexicon
108    */

109   public Lexicon getLexicon()
110   {
111     return lexicon;
112   }
113
114   private String JavaDoc getLocation()
115   {
116     if (locator==null)
117       return "unknown";
118
119     return locator.getSystemId()+":"+locator.getLineNumber()+":"+locator.getColumnNumber();
120   }
121
122   /**
123    * Receive an object for locating the origin of SAX document events.
124    */

125   public void setDocumentLocator(Locator locator)
126   {
127     this.locator = locator;
128   }
129
130   /**
131    * Receive notification of the beginning of a document.
132    */

133   public void startDocument()
134   {
135     stack = new Stack JavaDoc();
136   }
137
138   /**
139    * Return the content of the minOccurs attribute
140    *
141    * @param atts Attributes of an element
142    *
143    * @return minOccurs attribute
144    */

145   private int getMinOccursFromAttributes(Attributes atts)
146   {
147     int minOccurs = 1;
148     String JavaDoc attribute = atts.getValue(MINOCCURS_ATTRIBUTE);
149
150     if ((attribute!=null) && (attribute.length()>0))
151     {
152       try
153       {
154         minOccurs = Integer.parseInt(attribute);
155       }
156       catch (NumberFormatException JavaDoc e)
157       {
158         // System.err.println("error: "+attribute+" ist not an integer number");
159
minOccurs = 1;
160       }
161
162       if (minOccurs<0)
163         minOccurs = 0;
164     }
165
166     return minOccurs;
167   }
168
169   /**
170    * Return the content of the maxOccurs attribute
171    *
172    * @param atts Attributes of an element
173    *
174    * @return maxOccurs attribute
175    */

176   private int getMaxOccursFromAttributes(Attributes atts)
177   {
178     int maxOccurs = 1;
179     String JavaDoc attribute = atts.getValue(MAXOCCURS_ATTRIBUTE);
180
181     if ((attribute!=null) && (attribute.length()>0))
182     {
183       if (attribute.equals("*"))
184         maxOccurs = Integer.MAX_VALUE;
185       else
186       {
187         try
188         {
189           maxOccurs = Integer.parseInt(attribute);
190         }
191         catch (NumberFormatException JavaDoc e)
192         {
193           // System.err.println("error: "+attribute+" ist not an integer number");
194
maxOccurs = 1;
195         }
196
197         if (maxOccurs<1)
198           maxOccurs = 1;
199       }
200     }
201
202     return maxOccurs;
203   }
204
205   /**
206    * @param atts
207    *
208    * @return
209    */

210   private boolean getExclusiveFromAttributes(Attributes atts)
211   {
212     String JavaDoc attribute = atts.getValue(EXCLUSIVE_ATTRIBUTE);
213
214     if ((attribute!=null) && (attribute.length()>0))
215     {
216       boolean value = false;
217
218       try
219       {
220         value = Boolean.valueOf(attribute).booleanValue();
221         return value;
222       }
223       catch (Exception JavaDoc e)
224       {
225         return false;
226       }
227     }
228
229     return false;
230   }
231
232   /**
233    * Receive notification of the beginning of an element.
234    *
235    * @param namespaceURI The Namespace URI, or the empty string if the element has no Namespace URI
236    * or if Namespace processing is not being performed.
237    * @param localName The local name (without prefix), or the empty string if Namespace processing
238    * is not being performed.
239    * @param qName The raw XML 1.0 name (with prefix), or the empty string if raw names are not
240    * available.
241    * @param atts The attributes attached to the element. If there are no attributes, it shall be an
242    * empty Attributes object.
243    */

244   public void startElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName, Attributes atts)
245     throws SAXException
246   {
247     if (namespaceURI.equals(NS))
248     {
249       if ((localName.equals(LEXICON_ELEMENT)) && (state==STATE_OUTER))
250       {
251         Lexicon lexicon = new Lexicon();
252         lexicon.setLocation(getLocation());
253         stack.push(lexicon);
254
255         state = STATE_LEXICON;
256       }
257       else if ((localName.equals(LEXEME_ELEMENT)) && (state==STATE_LEXICON))
258       {
259         Lexeme lexeme = new Lexeme();
260         lexeme.setLocation(getLocation());
261         if (atts.getValue(SYMBOL_ATTRIBUTE)!=null)
262           lexeme.setSymbol(new Terminal(atts.getValue(SYMBOL_ATTRIBUTE)));
263
264         stack.push(lexeme);
265
266         state = STATE_LEXEME;
267       }
268       else if ((localName.equals(ALTERNATION_ELEMENT)) && (state==STATE_LEXEME))
269       {
270         Alternation alternation = new Alternation();
271         alternation.setLocation(getLocation());
272
273         alternation.setMinOccurs(getMinOccursFromAttributes(atts));
274         alternation.setMaxOccurs(getMaxOccursFromAttributes(atts));
275         stack.push(alternation);
276       }
277       else if ((localName.equals(CONCATENATION_ELEMENT)) && (state==STATE_LEXEME))
278       {
279         Concatenation concatenation = new Concatenation();
280         concatenation.setLocation(getLocation());
281
282         concatenation.setMinOccurs(getMinOccursFromAttributes(atts));
283         concatenation.setMaxOccurs(getMaxOccursFromAttributes(atts));
284         stack.push(concatenation);
285       }
286       else if ((localName.equals(CHARACTERSTRING_ELEMENT)) && (state==STATE_LEXEME))
287       {
288         CharacterString characterstring = new CharacterString();
289         characterstring.setLocation(getLocation());
290
291         characterstring.setMinOccurs(getMinOccursFromAttributes(atts));
292         characterstring.setMaxOccurs(getMaxOccursFromAttributes(atts));
293
294         if (atts.getValue(CODE_ATTRIBUTE)!=null)
295         {
296           char character = (char)Integer.parseInt(atts.getValue(CODE_ATTRIBUTE));
297           characterstring.setString(String.valueOf(character));
298         }
299         else
300           characterstring.setString(atts.getValue(CHARACTERSTRING_SEQUENCE_ATTRIBUTE));
301
302         stack.push(characterstring);
303       }
304       else if ((localName.equals(GROUP_ELEMENT)) && (state==STATE_LEXEME))
305       {
306         PatternGroup group = new PatternGroup();
307         group.setLocation(getLocation());
308
309         group.setMinOccurs(getMinOccursFromAttributes(atts));
310         group.setMaxOccurs(getMaxOccursFromAttributes(atts));
311         stack.push(group);
312       }
313       else if ((localName.equals(UNIVERSALCHARACTER_ELEMENT)) && (state==STATE_LEXEME))
314       {
315         UniversalCharacter uni = new UniversalCharacter();
316         uni.setLocation(getLocation());
317
318         uni.setMinOccurs(getMinOccursFromAttributes(atts));
319         uni.setMaxOccurs(getMaxOccursFromAttributes(atts));
320
321         stack.push(uni);
322       }
323       else if ((localName.equals(BEGINOFLINE_ELEMENT)) && (state==STATE_LEXEME))
324       {
325         BeginOfLine bol = new BeginOfLine();
326         bol.setLocation(getLocation());
327
328         stack.push(bol);
329       }
330       else if ((localName.equals(ENDOFLINE_ELEMENT)) && (state==STATE_LEXEME))
331       {
332         EndOfLine eol = new EndOfLine();
333
334         stack.push(eol);
335       }
336       else if ((localName.equals(CHARACTERCLASS_ELEMENT)) && (state==STATE_LEXEME))
337       {
338         CharacterClass characterclass = new CharacterClass();
339         characterclass.setLocation(getLocation());
340
341         characterclass.setExclusive(getExclusiveFromAttributes(atts));
342         characterclass.setMinOccurs(getMinOccursFromAttributes(atts));
343         characterclass.setMaxOccurs(getMaxOccursFromAttributes(atts));
344         stack.push(characterclass);
345
346         state = STATE_CHARACTERCLASS;
347       }
348       else if ((localName.equals(CHARACTERSET_ELEMENT)) && (state==STATE_CHARACTERCLASS))
349       {
350         CharacterSet characterset = new CharacterSet();
351         characterset.setLocation(getLocation());
352
353         if (atts.getValue(CODE_ATTRIBUTE)!=null)
354         {
355           char character = (char)Integer.decode(atts.getValue(CODE_ATTRIBUTE)).intValue();
356           characterset.setCharacters(String.valueOf(character));
357         }
358         else
359           characterset.setCharacters(atts.getValue(CHARACTERSET_CHARACTERS_ATTRIBUTE));
360
361         stack.push(characterset);
362
363         state = STATE_CHARACTERCLASSELEMENT;
364       }
365       else if ((localName.equals(CHARACTERINTERVAL_ELEMENT)) && (state==STATE_CHARACTERCLASS))
366       {
367         CharacterInterval characterinterval = new CharacterInterval();
368         characterinterval.setLocation(getLocation());
369
370         characterinterval.setMinimum(atts.getValue(CHARACTERINTERVAL_MIN_ATTRIBUTE).charAt(0));
371         characterinterval.setMaximum(atts.getValue(CHARACTERINTERVAL_MAX_ATTRIBUTE).charAt(0));
372         stack.push(characterinterval);
373
374         state = STATE_CHARACTERCLASSELEMENT;
375       }
376       else
377         throw new SAXException("Unexpected element "+qName+" at "+getLocation());
378     }
379     else
380       throw new SAXException("Unexpected element "+qName+" at "+getLocation());
381   }
382
383   /**
384    * Receive notification of the end of an element.
385    *
386    * @param namespaceURI The Namespace URI, or the empty string if the element has no Namespace URI
387    * or if Namespace processing is not being performed.
388    * @param localName The local name (without prefix), or the empty string if Namespace processing
389    * is not being performed.
390    * @param qName The raw XML 1.0 name (with prefix), or the empty string if raw names are not
391    * available.
392    *
393    * @throws SAXException
394    */

395   public void endElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName)
396     throws SAXException
397   {
398     if (namespaceURI.equals(NS))
399     {
400       if ((localName.equals(LEXICON_ELEMENT)) && (state==STATE_LEXICON))
401       {
402         lexicon = (Lexicon)stack.pop();
403         state = STATE_OUTER;
404       }
405       else if ((localName.equals(LEXEME_ELEMENT)) && (state==STATE_LEXEME))
406       {
407         Lexeme lexeme = (Lexeme)stack.pop();
408         Lexicon lexicon = (Lexicon)stack.peek();
409
410         lexicon.addLexeme(lexeme);
411         state = STATE_LEXICON;
412       }
413       else if (((localName.equals(ALTERNATION_ELEMENT)) ||
414                (localName.equals(CONCATENATION_ELEMENT)) ||
415                (localName.equals(CHARACTERSTRING_ELEMENT)) || (localName.equals(GROUP_ELEMENT)) ||
416                (localName.equals(UNIVERSALCHARACTER_ELEMENT)) ||
417                (localName.equals(BEGINOFLINE_ELEMENT)) || (localName.equals(ENDOFLINE_ELEMENT))) &&
418                (state==STATE_LEXEME))
419       {
420         Pattern patternelement = (Pattern)stack.pop();
421
422         if (stack.peek() instanceof Alternation)
423         {
424           Alternation alternation = (Alternation)stack.peek();
425
426           alternation.addPattern(patternelement);
427         }
428         else if (stack.peek() instanceof Concatenation)
429         {
430           Concatenation concatenation = (Concatenation)stack.peek();
431
432           concatenation.addPattern(patternelement);
433         }
434         else if (stack.peek() instanceof PatternGroup)
435         {
436           PatternGroup group = (PatternGroup)stack.peek();
437
438           group.addPattern(patternelement);
439         }
440         else if (stack.peek() instanceof Lexeme)
441         {
442           Lexeme lexeme = (Lexeme)stack.peek();
443
444           lexeme.setDefinition(patternelement);
445         }
446       }
447       else if ((localName.equals(CHARACTERCLASS_ELEMENT)) && (state==STATE_CHARACTERCLASS))
448       {
449         Pattern patternelement = (Pattern)stack.pop();
450
451         if (stack.peek() instanceof Alternation)
452         {
453           Alternation alternation = (Alternation)stack.peek();
454
455           alternation.addPattern(patternelement);
456         }
457         else if (stack.peek() instanceof Concatenation)
458         {
459           Concatenation concatenation = (Concatenation)stack.peek();
460
461           concatenation.addPattern(patternelement);
462         }
463         else if (stack.peek() instanceof PatternGroup)
464         {
465           PatternGroup group = (PatternGroup)stack.peek();
466
467           group.addPattern(patternelement);
468         }
469         else if (stack.peek() instanceof Lexeme)
470         {
471           Lexeme lexeme = (Lexeme)stack.peek();
472
473           lexeme.setDefinition(patternelement);
474         }
475
476         state = STATE_LEXEME;
477       }
478       else if (((localName.equals(CHARACTERSET_ELEMENT)) ||
479                (localName.equals(CHARACTERINTERVAL_ELEMENT))) &&
480                (state==STATE_CHARACTERCLASSELEMENT))
481       {
482         CharacterClassElement characterclasselement = (CharacterClassElement)stack.pop();
483         CharacterClass characterclass = (CharacterClass)stack.peek();
484
485         characterclass.addCharacterClassElement(characterclasselement);
486
487         state = STATE_CHARACTERCLASS;
488       }
489       else
490         throw new SAXException("Unexpected element "+qName+" at "+getLocation());
491     }
492     else
493       throw new SAXException("Unexpected element "+qName+" at "+getLocation());
494   }
495 }
496
Popular Tags