1 8 9 package net.sourceforge.chaperon.process; 10 11 import net.sourceforge.chaperon.common.Decoder; 12 13 import org.apache.commons.logging.Log; 14 15 import org.xml.sax.*; 16 import org.xml.sax.ext.LexicalHandler ; 17 import org.xml.sax.helpers.AttributesImpl ; 18 import org.xml.sax.helpers.LocatorImpl ; 19 20 26 public class LexicalProcessor implements ContentHandler, LexicalHandler 27 { 28 public static final String NS = "http://chaperon.sourceforge.net/schema/text/1.0"; 29 public static final String TEXT = "text"; 30 public static final String NS_OUTPUT = "http://chaperon.sourceforge.net/schema/lexer/2.0"; 31 public static final String OUTPUT = "output"; 32 public static final String LEXEME = "lexeme"; 33 public static final String GROUP = "group"; 34 public static final String ERROR = "error"; 35 private ContentHandler contentHandler = null; 36 private LexicalHandler lexicalHandler = null; 37 private static final int STATE_OUTSIDE = 0; 38 private static final int STATE_TEXT = 1; 39 private int state = STATE_OUTSIDE; 40 private Locator locator = null; 41 private LocatorImpl locatorImpl = null; 42 private LexicalAutomaton automaton = null; 43 private Log log = null; 44 private boolean grouping = false; 45 private boolean localizable = false; 46 private String source; 47 private int lineNumber; 48 private int columnNumber; 49 private StringBuffer buffer = null; 50 private char[] text = null; 51 52 55 public LexicalProcessor() {} 56 57 63 public LexicalProcessor(LexicalAutomaton automaton) 64 { 65 this.automaton = automaton; 66 } 67 68 73 public void setLexicalAutomaton(LexicalAutomaton automaton) 74 { 75 this.automaton = automaton; 76 } 77 78 81 public void setContentHandler(ContentHandler handler) 82 { 83 this.contentHandler = handler; 84 } 85 86 89 public void setLexicalHandler(LexicalHandler handler) 90 { 91 this.lexicalHandler = handler; 92 } 93 94 99 public void setLog(Log log) 100 { 101 this.log = log; 102 } 103 104 public void setGrouping(boolean grouping) 105 { 106 this.grouping = grouping; 107 } 108 109 public void setLocalizable(boolean localizable) 110 { 111 this.localizable = localizable; 112 } 113 114 117 public void setDocumentLocator(Locator locator) 118 { 119 this.locator = locator; 120 this.locatorImpl = null; 121 if (locator!=null) 122 { 123 this.locatorImpl = new LocatorImpl (locator); 124 contentHandler.setDocumentLocator(locatorImpl); 125 } 126 } 127 128 131 public void startDocument() throws SAXException 132 { 133 if (locatorImpl!=null) 134 { 135 locatorImpl.setLineNumber(locator.getLineNumber()); 136 locatorImpl.setColumnNumber(locator.getColumnNumber()); 137 } 138 139 contentHandler.startDocument(); 140 state = STATE_OUTSIDE; 141 142 buffer = new StringBuffer (); 143 } 144 145 148 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) 149 throws SAXException 150 { 151 if (state==STATE_OUTSIDE) 152 { 153 if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(TEXT))) 154 { 155 state = STATE_TEXT; 156 buffer = new StringBuffer (); 157 158 if (atts.getValue("source")!=null) 159 source = atts.getValue("source"); 160 else if (locator!=null) 161 source = locator.getSystemId(); 162 else 163 source = "unknown"; 164 165 if (atts.getValue("column")!=null) 166 columnNumber = Integer.parseInt(atts.getValue("column")); 167 else if (locator!=null) 168 columnNumber = locator.getColumnNumber(); 169 else 170 columnNumber = 1; 171 172 if (atts.getValue("line")!=null) 173 lineNumber = Integer.parseInt(atts.getValue("line")); 174 else if (locator!=null) 175 lineNumber = locator.getLineNumber(); 176 else 177 lineNumber = 1; 178 } 179 else 180 contentHandler.startElement(namespaceURI, localName, qName, atts); 181 } 182 else if (state==STATE_TEXT) 183 throw new SAXException("Unexpected start element '"+qName+"'."); 184 } 185 186 189 public void characters(char[] ch, int start, int length) 190 throws SAXException 191 { 192 if (state==STATE_OUTSIDE) 193 contentHandler.characters(ch, start, length); 194 else if (state==STATE_TEXT) 195 buffer.append(ch, start, length); 196 } 197 198 201 public void ignorableWhitespace(char[] ch, int start, int length) 202 throws SAXException 203 { 204 if (state==STATE_OUTSIDE) 205 contentHandler.characters(ch, start, length); 206 else if (state==STATE_TEXT) 207 buffer.append(ch, start, length); 208 } 209 210 213 public void endElement(String namespaceURI, String localName, String qName) 214 throws SAXException 215 { 216 if (state==STATE_OUTSIDE) 217 contentHandler.endElement(namespaceURI, localName, qName); 218 else if (state==STATE_TEXT) 219 { 220 if ((namespaceURI!=null) && (namespaceURI.equals(NS)) && (localName.equals(TEXT))) 221 { 222 state = STATE_OUTSIDE; 223 224 handleEndDocument(); 225 } 226 else 227 throw new SAXException("Unexpected end element '"+qName+"'."); 228 } 229 } 230 231 234 public void startPrefixMapping(String prefix, String uri) 235 throws SAXException 236 { 237 if (locatorImpl!=null) 238 { 239 locatorImpl.setLineNumber(locator.getLineNumber()); 240 locatorImpl.setColumnNumber(locator.getColumnNumber()); 241 } 242 243 contentHandler.startPrefixMapping(prefix, uri); 244 } 245 246 249 public void endPrefixMapping(String prefix) throws SAXException 250 { 251 if (locatorImpl!=null) 252 { 253 locatorImpl.setLineNumber(locator.getLineNumber()); 254 locatorImpl.setColumnNumber(locator.getColumnNumber()); 255 } 256 257 contentHandler.endPrefixMapping(prefix); 258 } 259 260 263 public void processingInstruction(String target, String data) 264 throws SAXException 265 { 266 if (locatorImpl!=null) 267 { 268 locatorImpl.setLineNumber(locator.getLineNumber()); 269 locatorImpl.setColumnNumber(locator.getColumnNumber()); 270 } 271 272 if (state==STATE_OUTSIDE) 273 contentHandler.processingInstruction(target, data); 274 } 275 276 279 public void skippedEntity(String name) throws SAXException 280 { 281 if (locatorImpl!=null) 282 { 283 locatorImpl.setLineNumber(locator.getLineNumber()); 284 locatorImpl.setColumnNumber(locator.getColumnNumber()); 285 } 286 287 if (state==STATE_OUTSIDE) 288 contentHandler.skippedEntity(name); 289 } 290 291 294 public void endDocument() throws SAXException 295 { 296 if (locatorImpl!=null) 297 { 298 locatorImpl.setLineNumber(locator.getLineNumber()); 299 locatorImpl.setColumnNumber(locator.getColumnNumber()); 300 } 301 302 if (state==STATE_OUTSIDE) 303 contentHandler.endDocument(); 304 } 305 306 309 public void startDTD(String name, String publicId, String systemId) 310 throws SAXException 311 { 312 if (lexicalHandler!=null) 313 lexicalHandler.startDTD(name, publicId, systemId); 314 } 315 316 319 public void endDTD() throws SAXException 320 { 321 if (lexicalHandler!=null) 322 lexicalHandler.endDTD(); 323 } 324 325 328 public void startEntity(String name) throws SAXException 329 { 330 if (lexicalHandler!=null) 331 lexicalHandler.startEntity(name); 332 } 333 334 337 public void endEntity(String name) throws SAXException 338 { 339 if (lexicalHandler!=null) 340 lexicalHandler.endEntity(name); 341 } 342 343 346 public void startCDATA() throws SAXException 347 { 348 if (lexicalHandler!=null) 349 lexicalHandler.startCDATA(); 350 } 351 352 355 public void endCDATA() throws SAXException 356 { 357 if (lexicalHandler!=null) 358 lexicalHandler.endCDATA(); 359 } 360 361 364 public void comment(char[] ch, int start, int len) throws SAXException 365 { 366 if (lexicalHandler!=null) 367 lexicalHandler.comment(ch, start, len); 368 } 369 370 373 public void handleEndDocument() throws SAXException 374 { 375 PatternProcessor processor = new PatternProcessor(); 376 text = buffer.toString().toCharArray(); 377 378 int position = 0; 379 380 if (locatorImpl!=null) 381 { 382 locatorImpl.setSystemId(source); 383 locatorImpl.setLineNumber(lineNumber); 384 locatorImpl.setColumnNumber(columnNumber); 385 } 386 387 contentHandler.startPrefixMapping("", NS_OUTPUT); 388 389 AttributesImpl atts = new AttributesImpl (); 390 if (localizable) 391 atts.addAttribute("", "source", "source", "CDATA", source); 392 393 contentHandler.startElement(NS_OUTPUT, OUTPUT, OUTPUT, new AttributesImpl ()); 394 395 StringBuffer unrecognized = new StringBuffer (); 396 while (position<text.length) 397 { 398 String tokensymbol = null; 399 String tokentext = null; 400 401 for (int lexemeindex = automaton.getLexemeCount()-1; lexemeindex>=0; lexemeindex--) 402 { 403 processor.setPatternAutomaton(automaton.getLexemeDefinition(lexemeindex)); 404 405 if ((processor.match(text, position)) && 406 ((tokentext==null) || (processor.getGroup().length()>=tokentext.length()))) 407 { 408 tokensymbol = automaton.getLexemeSymbol(lexemeindex); 409 tokentext = processor.getGroup(); 410 } 411 } 412 413 if ((tokentext!=null) && (tokentext.length()==0)) 414 log.warn("Lexical processor recognized empty lexeme '"+tokensymbol+"'"); 415 416 if ((tokentext!=null) && (tokentext.length()>0)) 417 { 418 if (unrecognized.length()>0) 419 { 420 if (log!=null) 421 log.debug("Text was not recognized "+Decoder.toString(unrecognized.toString())); 422 423 atts = new AttributesImpl (); 424 atts.addAttribute("", "text", "text", "CDATA", unrecognized.toString()); 425 if (localizable) 426 { 427 atts.addAttribute("", "line", "line", "CDATA", String.valueOf(lineNumber)); 428 atts.addAttribute("", "column", "column", "CDATA", String.valueOf(columnNumber)); 429 } 430 431 contentHandler.startElement(NS_OUTPUT, ERROR, ERROR, atts); 432 contentHandler.endElement(NS_OUTPUT, ERROR, ERROR); 433 434 increasePosition(position-unrecognized.length(), unrecognized.length()); 435 436 unrecognized = new StringBuffer (); 437 } 438 439 if (tokensymbol!=null) 440 { 441 if (log!=null) 442 log.debug("Recognize token "+tokensymbol+" with "+Decoder.toString(tokentext)); 443 444 if (locatorImpl!=null) 445 { 446 locatorImpl.setLineNumber(locator.getLineNumber()); 447 locatorImpl.setColumnNumber(locator.getColumnNumber()); 448 } 449 450 atts = new AttributesImpl (); 451 452 atts.addAttribute("", "symbol", "symbol", "CDATA", tokensymbol); 453 atts.addAttribute("", "text", "text", "CDATA", tokentext); 454 if (localizable) 455 { 456 atts.addAttribute("", "line", "line", "CDATA", String.valueOf(lineNumber)); 457 atts.addAttribute("", "column", "column", "CDATA", String.valueOf(columnNumber)); 458 } 459 460 contentHandler.startElement(NS_OUTPUT, LEXEME, LEXEME, atts); 461 462 if (grouping) 463 for (int i = 1; i<processor.getGroupCount(); i++) 464 { 465 AttributesImpl groupatts = new AttributesImpl (); 466 groupatts.addAttribute("", "text", "text", "CDATA", processor.getGroup(i)); 467 contentHandler.startElement(NS_OUTPUT, GROUP, GROUP, groupatts); 468 contentHandler.endElement(NS_OUTPUT, GROUP, GROUP); 469 } 470 471 contentHandler.endElement(NS_OUTPUT, LEXEME, LEXEME); 472 } 473 else if (log!=null) 474 log.debug("Ignore lexeme with "+Decoder.toString(tokentext)); 475 476 if (locatorImpl!=null) 477 { 478 locatorImpl.setColumnNumber(columnNumber); 479 locatorImpl.setLineNumber(lineNumber); 480 } 481 482 position += tokentext.length(); 483 484 increasePosition(position-tokentext.length(), tokentext.length()); 485 } 486 else 487 { 488 if (locatorImpl!=null) 489 { 490 locatorImpl.setColumnNumber(columnNumber); 491 locatorImpl.setLineNumber(lineNumber); 492 } 493 494 unrecognized.append(text[position]); 495 position++; 496 } 497 } 498 499 if (unrecognized.length()>0) 500 { 501 if (log!=null) 502 log.debug("Text was not recognized "+Decoder.toString(unrecognized.toString())); 503 504 atts = new AttributesImpl (); 505 atts.addAttribute("", "text", "text", "CDATA", unrecognized.toString()); 506 if (localizable) 507 { 508 atts.addAttribute("", "line", "line", "CDATA", String.valueOf(lineNumber)); 509 atts.addAttribute("", "column", "column", "CDATA", String.valueOf(columnNumber)); 510 } 511 512 contentHandler.startElement(NS_OUTPUT, ERROR, ERROR, atts); 513 contentHandler.endElement(NS_OUTPUT, ERROR, ERROR); 514 515 System.out.println("push \""+unrecognized.toString()+"\""); 516 increasePosition(position-unrecognized.length(), unrecognized.length()); 517 } 518 519 if (locatorImpl!=null) 520 { 521 locatorImpl.setLineNumber(locator.getLineNumber()); 522 locatorImpl.setColumnNumber(locator.getColumnNumber()); 523 } 524 525 contentHandler.endElement(NS_OUTPUT, OUTPUT, OUTPUT); 526 contentHandler.endPrefixMapping(""); 527 } 528 529 private void increasePosition(int position, int length) 530 { 531 for (int i = position; i<(position+length); i++) 532 { 533 if (text[i]=='\n') 534 { 535 columnNumber = 1; 536 lineNumber++; 537 } 538 else if ((text[i]=='\r') && ((i==(text.length-1)) || (text[i+1]!='\n'))) 539 { 540 columnNumber = 1; 541 lineNumber++; 542 } 543 else 544 columnNumber++; 545 } 546 } 547 } 548 | Popular Tags |