1 8 9 package net.sourceforge.chaperon.model.lexicon; 10 11 import net.sourceforge.chaperon.model.pattern.*; 12 import net.sourceforge.chaperon.model.symbol.Terminal; 13 14 import org.xml.sax.*; 15 import org.xml.sax.helpers.*; 16 17 import java.util.Stack ; 18 19 25 public class LexiconFactory extends DefaultHandler 26 { 27 28 public static final String NS = "http://chaperon.sourceforge.net/schema/lexicon/1.0"; 29 30 31 public static final String LEXEME_ELEMENT = "lexeme"; 32 33 34 public static final String SYMBOL_ATTRIBUTE = "symbol"; 35 36 37 public static final String LEXICON_ELEMENT = "lexicon"; 38 39 40 public static final String ALTERNATION_ELEMENT = "alt"; 41 42 43 public static final String BEGINOFLINE_ELEMENT = "bol"; 44 45 46 public static final String CHARACTERCLASS_ELEMENT = "cclass"; 47 48 49 public static final String EXCLUSIVE_ATTRIBUTE = "exclusive"; 50 51 52 public static final String CHARACTERINTERVAL_ELEMENT = "cinterval"; 53 54 55 public static final String CHARACTERINTERVAL_MIN_ATTRIBUTE = "min"; 56 57 58 public static final String CHARACTERINTERVAL_MAX_ATTRIBUTE = "max"; 59 60 61 public static final String CHARACTERSTRING_ELEMENT = "cstring"; 62 63 64 public static final String CHARACTERSTRING_SEQUENCE_ATTRIBUTE = "content"; 65 66 67 public static final String CHARACTERSET_ELEMENT = "cset"; 68 69 70 public static final String CHARACTERSET_CHARACTERS_ATTRIBUTE = "content"; 71 72 75 public static final String CODE_ATTRIBUTE = "code"; 76 77 78 public static final String CONCATENATION_ELEMENT = "concat"; 79 80 81 public static final String GROUP_ELEMENT = "group"; 82 83 84 public static final String UNIVERSALCHARACTER_ELEMENT = "cuniversal"; 85 86 87 public static final String ENDOFLINE_ELEMENT = "eol"; 88 89 90 public static final String MINOCCURS_ATTRIBUTE = "minOccurs"; 91 92 93 public static final String MAXOCCURS_ATTRIBUTE = "maxOccurs"; 94 private static final int STATE_OUTER = 0; 95 private static final int STATE_LEXICON = 1; 96 private static final int STATE_LEXEME = 2; 97 private static final int STATE_CHARACTERCLASS = 3; 98 private static final int STATE_CHARACTERCLASSELEMENT = 4; 99 private int state = STATE_OUTER; 100 private Lexicon lexicon; 101 private Locator locator = null; 102 private Stack stack; 103 104 109 public Lexicon getLexicon() 110 { 111 return lexicon; 112 } 113 114 private String getLocation() 115 { 116 if (locator==null) 117 return "unknown"; 118 119 return locator.getSystemId()+":"+locator.getLineNumber()+":"+locator.getColumnNumber(); 120 } 121 122 125 public void setDocumentLocator(Locator locator) 126 { 127 this.locator = locator; 128 } 129 130 133 public void startDocument() 134 { 135 stack = new Stack (); 136 } 137 138 145 private int getMinOccursFromAttributes(Attributes atts) 146 { 147 int minOccurs = 1; 148 String attribute = atts.getValue(MINOCCURS_ATTRIBUTE); 149 150 if ((attribute!=null) && (attribute.length()>0)) 151 { 152 try 153 { 154 minOccurs = Integer.parseInt(attribute); 155 } 156 catch (NumberFormatException e) 157 { 158 minOccurs = 1; 160 } 161 162 if (minOccurs<0) 163 minOccurs = 0; 164 } 165 166 return minOccurs; 167 } 168 169 176 private int getMaxOccursFromAttributes(Attributes atts) 177 { 178 int maxOccurs = 1; 179 String attribute = atts.getValue(MAXOCCURS_ATTRIBUTE); 180 181 if ((attribute!=null) && (attribute.length()>0)) 182 { 183 if (attribute.equals("*")) 184 maxOccurs = Integer.MAX_VALUE; 185 else 186 { 187 try 188 { 189 maxOccurs = Integer.parseInt(attribute); 190 } 191 catch (NumberFormatException e) 192 { 193 maxOccurs = 1; 195 } 196 197 if (maxOccurs<1) 198 maxOccurs = 1; 199 } 200 } 201 202 return maxOccurs; 203 } 204 205 210 private boolean getExclusiveFromAttributes(Attributes atts) 211 { 212 String attribute = atts.getValue(EXCLUSIVE_ATTRIBUTE); 213 214 if ((attribute!=null) && (attribute.length()>0)) 215 { 216 boolean value = false; 217 218 try 219 { 220 value = Boolean.valueOf(attribute).booleanValue(); 221 return value; 222 } 223 catch (Exception e) 224 { 225 return false; 226 } 227 } 228 229 return false; 230 } 231 232 244 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 245 throws SAXException 246 { 247 if (namespaceURI.equals(NS)) 248 { 249 if ((localName.equals(LEXICON_ELEMENT)) && (state==STATE_OUTER)) 250 { 251 Lexicon lexicon = new Lexicon(); 252 lexicon.setLocation(getLocation()); 253 stack.push(lexicon); 254 255 state = STATE_LEXICON; 256 } 257 else if ((localName.equals(LEXEME_ELEMENT)) && (state==STATE_LEXICON)) 258 { 259 Lexeme lexeme = new Lexeme(); 260 lexeme.setLocation(getLocation()); 261 if (atts.getValue(SYMBOL_ATTRIBUTE)!=null) 262 lexeme.setSymbol(new Terminal(atts.getValue(SYMBOL_ATTRIBUTE))); 263 264 stack.push(lexeme); 265 266 state = STATE_LEXEME; 267 } 268 else if ((localName.equals(ALTERNATION_ELEMENT)) && (state==STATE_LEXEME)) 269 { 270 Alternation alternation = new Alternation(); 271 alternation.setLocation(getLocation()); 272 273 alternation.setMinOccurs(getMinOccursFromAttributes(atts)); 274 alternation.setMaxOccurs(getMaxOccursFromAttributes(atts)); 275 stack.push(alternation); 276 } 277 else if ((localName.equals(CONCATENATION_ELEMENT)) && (state==STATE_LEXEME)) 278 { 279 Concatenation concatenation = new Concatenation(); 280 concatenation.setLocation(getLocation()); 281 282 concatenation.setMinOccurs(getMinOccursFromAttributes(atts)); 283 concatenation.setMaxOccurs(getMaxOccursFromAttributes(atts)); 284 stack.push(concatenation); 285 } 286 else if ((localName.equals(CHARACTERSTRING_ELEMENT)) && (state==STATE_LEXEME)) 287 { 288 CharacterString characterstring = new CharacterString(); 289 characterstring.setLocation(getLocation()); 290 291 characterstring.setMinOccurs(getMinOccursFromAttributes(atts)); 292 characterstring.setMaxOccurs(getMaxOccursFromAttributes(atts)); 293 294 if (atts.getValue(CODE_ATTRIBUTE)!=null) 295 { 296 char character = (char)Integer.parseInt(atts.getValue(CODE_ATTRIBUTE)); 297 characterstring.setString(String.valueOf(character)); 298 } 299 else 300 characterstring.setString(atts.getValue(CHARACTERSTRING_SEQUENCE_ATTRIBUTE)); 301 302 stack.push(characterstring); 303 } 304 else if ((localName.equals(GROUP_ELEMENT)) && (state==STATE_LEXEME)) 305 { 306 PatternGroup group = new PatternGroup(); 307 group.setLocation(getLocation()); 308 309 group.setMinOccurs(getMinOccursFromAttributes(atts)); 310 group.setMaxOccurs(getMaxOccursFromAttributes(atts)); 311 stack.push(group); 312 } 313 else if ((localName.equals(UNIVERSALCHARACTER_ELEMENT)) && (state==STATE_LEXEME)) 314 { 315 UniversalCharacter uni = new UniversalCharacter(); 316 uni.setLocation(getLocation()); 317 318 uni.setMinOccurs(getMinOccursFromAttributes(atts)); 319 uni.setMaxOccurs(getMaxOccursFromAttributes(atts)); 320 321 stack.push(uni); 322 } 323 else if ((localName.equals(BEGINOFLINE_ELEMENT)) && (state==STATE_LEXEME)) 324 { 325 BeginOfLine bol = new BeginOfLine(); 326 bol.setLocation(getLocation()); 327 328 stack.push(bol); 329 } 330 else if ((localName.equals(ENDOFLINE_ELEMENT)) && (state==STATE_LEXEME)) 331 { 332 EndOfLine eol = new EndOfLine(); 333 334 stack.push(eol); 335 } 336 else if ((localName.equals(CHARACTERCLASS_ELEMENT)) && (state==STATE_LEXEME)) 337 { 338 CharacterClass characterclass = new CharacterClass(); 339 characterclass.setLocation(getLocation()); 340 341 characterclass.setExclusive(getExclusiveFromAttributes(atts)); 342 characterclass.setMinOccurs(getMinOccursFromAttributes(atts)); 343 characterclass.setMaxOccurs(getMaxOccursFromAttributes(atts)); 344 stack.push(characterclass); 345 346 state = STATE_CHARACTERCLASS; 347 } 348 else if ((localName.equals(CHARACTERSET_ELEMENT)) && (state==STATE_CHARACTERCLASS)) 349 { 350 CharacterSet characterset = new CharacterSet(); 351 characterset.setLocation(getLocation()); 352 353 if (atts.getValue(CODE_ATTRIBUTE)!=null) 354 { 355 char character = (char)Integer.decode(atts.getValue(CODE_ATTRIBUTE)).intValue(); 356 characterset.setCharacters(String.valueOf(character)); 357 } 358 else 359 characterset.setCharacters(atts.getValue(CHARACTERSET_CHARACTERS_ATTRIBUTE)); 360 361 stack.push(characterset); 362 363 state = STATE_CHARACTERCLASSELEMENT; 364 } 365 else if ((localName.equals(CHARACTERINTERVAL_ELEMENT)) && (state==STATE_CHARACTERCLASS)) 366 { 367 CharacterInterval characterinterval = new CharacterInterval(); 368 characterinterval.setLocation(getLocation()); 369 370 characterinterval.setMinimum(atts.getValue(CHARACTERINTERVAL_MIN_ATTRIBUTE).charAt(0)); 371 characterinterval.setMaximum(atts.getValue(CHARACTERINTERVAL_MAX_ATTRIBUTE).charAt(0)); 372 stack.push(characterinterval); 373 374 state = STATE_CHARACTERCLASSELEMENT; 375 } 376 else 377 throw new SAXException("Unexpected element "+qName+" at "+getLocation()); 378 } 379 else 380 throw new SAXException("Unexpected element "+qName+" at "+getLocation()); 381 } 382 383 395 public void endElement(String namespaceURI, String localName, String qName) 396 throws SAXException 397 { 398 if (namespaceURI.equals(NS)) 399 { 400 if ((localName.equals(LEXICON_ELEMENT)) && (state==STATE_LEXICON)) 401 { 402 lexicon = (Lexicon)stack.pop(); 403 state = STATE_OUTER; 404 } 405 else if ((localName.equals(LEXEME_ELEMENT)) && (state==STATE_LEXEME)) 406 { 407 Lexeme lexeme = (Lexeme)stack.pop(); 408 Lexicon lexicon = (Lexicon)stack.peek(); 409 410 lexicon.addLexeme(lexeme); 411 state = STATE_LEXICON; 412 } 413 else if (((localName.equals(ALTERNATION_ELEMENT)) || 414 (localName.equals(CONCATENATION_ELEMENT)) || 415 (localName.equals(CHARACTERSTRING_ELEMENT)) || (localName.equals(GROUP_ELEMENT)) || 416 (localName.equals(UNIVERSALCHARACTER_ELEMENT)) || 417 (localName.equals(BEGINOFLINE_ELEMENT)) || (localName.equals(ENDOFLINE_ELEMENT))) && 418 (state==STATE_LEXEME)) 419 { 420 Pattern patternelement = (Pattern)stack.pop(); 421 422 if (stack.peek() instanceof Alternation) 423 { 424 Alternation alternation = (Alternation)stack.peek(); 425 426 alternation.addPattern(patternelement); 427 } 428 else if (stack.peek() instanceof Concatenation) 429 { 430 Concatenation concatenation = (Concatenation)stack.peek(); 431 432 concatenation.addPattern(patternelement); 433 } 434 else if (stack.peek() instanceof PatternGroup) 435 { 436 PatternGroup group = (PatternGroup)stack.peek(); 437 438 group.addPattern(patternelement); 439 } 440 else if (stack.peek() instanceof Lexeme) 441 { 442 Lexeme lexeme = (Lexeme)stack.peek(); 443 444 lexeme.setDefinition(patternelement); 445 } 446 } 447 else if ((localName.equals(CHARACTERCLASS_ELEMENT)) && (state==STATE_CHARACTERCLASS)) 448 { 449 Pattern patternelement = (Pattern)stack.pop(); 450 451 if (stack.peek() instanceof Alternation) 452 { 453 Alternation alternation = (Alternation)stack.peek(); 454 455 alternation.addPattern(patternelement); 456 } 457 else if (stack.peek() instanceof Concatenation) 458 { 459 Concatenation concatenation = (Concatenation)stack.peek(); 460 461 concatenation.addPattern(patternelement); 462 } 463 else if (stack.peek() instanceof PatternGroup) 464 { 465 PatternGroup group = (PatternGroup)stack.peek(); 466 467 group.addPattern(patternelement); 468 } 469 else if (stack.peek() instanceof Lexeme) 470 { 471 Lexeme lexeme = (Lexeme)stack.peek(); 472 473 lexeme.setDefinition(patternelement); 474 } 475 476 state = STATE_LEXEME; 477 } 478 else if (((localName.equals(CHARACTERSET_ELEMENT)) || 479 (localName.equals(CHARACTERINTERVAL_ELEMENT))) && 480 (state==STATE_CHARACTERCLASSELEMENT)) 481 { 482 CharacterClassElement characterclasselement = (CharacterClassElement)stack.pop(); 483 CharacterClass characterclass = (CharacterClass)stack.peek(); 484 485 characterclass.addCharacterClassElement(characterclasselement); 486 487 state = STATE_CHARACTERCLASS; 488 } 489 else 490 throw new SAXException("Unexpected element "+qName+" at "+getLocation()); 491 } 492 else 493 throw new SAXException("Unexpected element "+qName+" at "+getLocation()); 494 } 495 } 496 | Popular Tags |