1 6 package uk.co.wilson.xml; 7 8 45 46 import java.io.InputStream ; 47 import java.io.Reader ; 48 import java.io.Writer ; 49 import java.io.InputStreamReader ; 50 import java.io.IOException ; 51 52 import java.net.URL ; 53 54 import java.util.Locale ; 55 import java.util.Vector ; 56 import java.util.Stack ; 57 import java.util.EmptyStackException ; 58 59 import uk.org.xml.sax.Parser; 60 import uk.org.xml.sax.DocumentHandler; 61 62 import org.xml.sax.EntityResolver ; 63 import org.xml.sax.DTDHandler ; 64 import org.xml.sax.ErrorHandler ; 65 import org.xml.sax.Locator ; 66 import org.xml.sax.InputSource ; 67 import org.xml.sax.AttributeList ; 68 import org.xml.sax.SAXException ; 69 import org.xml.sax.SAXParseException ; 70 71 public class MinML implements Parser, Locator , DocumentHandler, ErrorHandler { 72 public static final int endStartName = 0; 73 public static final int emitStartElement = 1; 74 public static final int emitEndElement = 2; 75 public static final int possiblyEmitCharacters = 3; 76 public static final int emitCharacters = 4; 77 public static final int emitCharactersSave = 5; 78 public static final int saveAttributeName = 6; 79 public static final int saveAttributeValue = 7; 80 public static final int startComment = 8; 81 public static final int endComment = 9; 82 public static final int incLevel = 10; 83 public static final int decLevel = 11; 84 public static final int startCDATA = 12; 85 public static final int endCDATA = 13; 86 public static final int processCharRef = 14; 87 public static final int writeCdata = 15; 88 public static final int exitParser = 16; 89 public static final int parseError = 17; 90 public static final int discardAndChange = 18; 91 public static final int discardSaveAndChange = 19; 92 public static final int saveAndChange = 20; 93 public static final int change = 21; 94 95 public static final int inSkipping = 0; 96 public static final int inSTag = 1; 97 public static final int inPossiblyAttribute = 2; 98 public static final int inNextAttribute = 3; 99 public static final int inAttribute = 4; 100 public static final int inAttribute1 = 5; 101 public static final int inAttributeValue = 6; 102 public static final int inAttributeQuoteValue = 7; 103 public static final int inAttributeQuotesValue = 8; 104 public static final int inETag = 9; 105 public static final int inETag1 = 10; 106 public static final int inMTTag = 11; 107 public static final int inTag = 12; 108 public static final int inTag1 = 13; 109 public static final int inPI = 14; 110 public static final int inPI1 = 15; 111 public static final int inPossiblySkipping = 16; 112 public static final int inCharData = 17; 113 public static final int inCDATA = 18; 114 public static final int inCDATA1 = 19; 115 public static final int inComment =20; 116 public static final int inDTD = 21; 117 118 public MinML(final int initialBufferSize, final int bufferIncrement) { 119 this.initialBufferSize = initialBufferSize; 120 this.bufferIncrement = bufferIncrement; 121 } 122 123 public MinML() { 124 this(256, 128); 125 } 126 127 public void parse(final Reader in) throws SAXException , IOException { 128 final Vector attributeNames = new Vector (); 129 final Vector attributeValues = new Vector (); 130 131 final AttributeList attrs = new AttributeList () { 132 public int getLength() { 133 return attributeNames.size(); 134 } 135 136 public String getName(final int i) { 137 return (String )attributeNames.elementAt(i); 138 } 139 140 public String getType(final int i) { 141 return "CDATA"; 142 } 143 144 public String getValue(final int i) { 145 return (String )attributeValues.elementAt(i); 146 } 147 148 public String getType(final String name) { 149 return "CDATA"; 150 } 151 152 public String getValue(final String name) { 153 final int index = attributeNames.indexOf(name); 154 155 return (index == -1) ? null : (String )attributeValues.elementAt(index); 156 } 157 }; 158 159 final MinMLBuffer buffer = new MinMLBuffer(in); 160 int currentChar = 0, charCount = 0; 161 int level = 0; 162 int mixedContentLevel = -1; 163 String elementName = null; 164 String state = operands[inSkipping]; 165 166 this.lineNumber = 1; 167 this.columnNumber = 0; 168 169 try { 170 while(true) { 171 charCount++; 172 173 currentChar = (buffer.nextIn == buffer.lastIn) ? buffer.read() : buffer.chars[buffer.nextIn++]; 178 179 final int transition; 180 181 if (currentChar > ']') { 182 transition = state.charAt(14); 183 } else { 184 final int charClass = charClasses[currentChar + 1]; 185 186 if (charClass == -1) fatalError("Document contains illegal control character with value " + currentChar, this.lineNumber, this.columnNumber); 187 188 if (charClass == 12) { 189 if (currentChar == '\r') { 190 currentChar = '\n'; 191 charCount = -1; 192 } 193 194 if (currentChar == '\n') { 195 if (charCount == 0) continue; 197 if (charCount != -1) charCount = 0; 198 199 this.lineNumber++; 200 this.columnNumber = 0; 201 } 202 } 203 204 transition = state.charAt(charClass); 205 } 206 207 this.columnNumber++; 208 209 final String operand = operands[transition >>> 8]; 210 211 switch (transition & 0XFF) { 212 case endStartName: 213 elementName = buffer.getString(); 215 if (currentChar != '>' && currentChar != '/') break; 218 case emitStartElement: 219 221 final Writer newWriter = this.extDocumentHandler.startElement(elementName, attrs, 222 (this.tags.empty()) ? 223 this.extDocumentHandler.startDocument(buffer) 224 : 225 buffer.getWriter()); 226 227 buffer.pushWriter(newWriter); 228 this.tags.push(elementName); 229 230 attributeValues.removeAllElements(); 231 attributeNames.removeAllElements(); 232 233 if (mixedContentLevel != -1) mixedContentLevel++; 234 235 if (currentChar != '/') break; 237 239 case emitEndElement: 240 242 try { 243 final String begin = (String )this.tags.pop(); 244 245 buffer.popWriter(); 246 elementName = buffer.getString(); 247 248 if (currentChar != '/' && !elementName.equals(begin)) { 249 fatalError("end tag </" + elementName + "> does not match begin tag <" + begin + ">", 250 this.lineNumber, this.columnNumber); 251 } else { 252 this.documentHandler.endElement(begin); 253 254 if (this.tags.empty()) { 255 this.documentHandler.endDocument(); 256 return; 257 } 258 } 259 } 260 catch (final EmptyStackException e) { 261 fatalError("end tag at begining of document", this.lineNumber, this.columnNumber); 262 } 263 264 if (mixedContentLevel != -1) --mixedContentLevel; 265 266 break; 268 case emitCharacters: 269 271 buffer.flush(); 272 break; 274 case emitCharactersSave: 275 277 if (mixedContentLevel == -1) mixedContentLevel = 0; 278 279 buffer.flush(); 280 281 buffer.saveChar((char)currentChar); 282 283 break; 285 case possiblyEmitCharacters: 286 288 if (mixedContentLevel != -1) buffer.flush(); 289 break; 291 case saveAttributeName: 292 294 attributeNames.addElement(buffer.getString()); 295 break; 297 case saveAttributeValue: 298 300 attributeValues.addElement(buffer.getString()); 301 break; 303 case startComment: 304 306 if (buffer.read() != '-') continue; 308 break; 310 case endComment: 311 313 if ((currentChar = buffer.read()) == '-') { 314 while ((currentChar = buffer.read()) == '-'); 316 317 if (currentChar == '>') break; } 319 320 continue; 322 case incLevel: 323 324 level++; 325 326 break; 327 328 case decLevel: 329 330 if (level == 0) break; 332 level--; 333 334 continue; 336 case startCDATA: 337 339 if (buffer.read() != 'C') continue; if (buffer.read() != 'D') continue; if (buffer.read() != 'A') continue; if (buffer.read() != 'T') continue; if (buffer.read() != 'A') continue; if (buffer.read() != '[') continue; break; 347 case endCDATA: 348 350 if ((currentChar = buffer.read()) == ']') { 351 while ((currentChar = buffer.read()) == ']') buffer.write(']'); 353 354 if (currentChar == '>') break; 356 buffer.write(']'); 357 } 358 359 buffer.write(']'); 360 buffer.write(currentChar); 361 continue; 363 case processCharRef: 364 366 int crefState = 0; 367 368 currentChar = buffer.read(); 369 370 while (true) { 371 if ("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState) == currentChar) { 372 crefState++; 373 374 if (currentChar == ';') { 375 buffer.write("#amp;&pos;'quot;\"gt;>lt;<".charAt(crefState)); 376 break; 377 378 } else if (currentChar == '#') { 379 final int radix; 380 381 currentChar = buffer.read(); 382 383 if (currentChar == 'x') { 384 radix = 16; 385 currentChar = buffer.read(); 386 } else { 387 radix = 10; 388 } 389 390 int charRef = Character.digit((char)currentChar, radix); 391 392 while (true) { 393 currentChar = buffer.read(); 394 395 final int digit = Character.digit((char)currentChar, radix); 396 397 if (digit == -1) break; 398 399 charRef = (char)((charRef * radix) + digit); 400 } 401 402 if (currentChar == ';' && charRef != -1) { 403 buffer.write(charRef); 404 break; 405 } 406 407 fatalError("invalid Character Entitiy", this.lineNumber, this.columnNumber); 408 } else { 409 currentChar = buffer.read(); 410 } 411 } else { 412 crefState = ("\u0001\u000b\u0006\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff\u00ff" + 413 "\u0011\u00ff\u00ff\u00ff\u00ff\u00ff\u0015\u00ff\u00ff\u00ff" + 416 "\u00ff\u00ff\u00ff").charAt(crefState); 419 422 if (crefState == 255) fatalError("invalid Character Entitiy", this.lineNumber, this.columnNumber); 423 } 424 } 425 426 break; 427 428 case parseError: 429 431 fatalError(operand, this.lineNumber, this.columnNumber); 432 434 case exitParser: 435 437 return; 438 439 case writeCdata: 440 443 buffer.write(currentChar); 444 break; 446 case discardAndChange: 447 449 buffer.reset(); 450 break; 452 case discardSaveAndChange: 453 455 buffer.reset(); 456 458 case saveAndChange: 459 461 buffer.saveChar((char)currentChar); 462 break; 464 case change: 465 467 break; } 469 470 state = operand; 471 } 472 } 473 catch (final IOException e) { 474 this.errorHandler.fatalError(new SAXParseException (e.toString(), null, null, this.lineNumber, this.columnNumber, e)); 475 } 476 finally { 477 this.errorHandler = this; 478 this.documentHandler = this.extDocumentHandler = this; 479 this.tags.removeAllElements(); 480 } 481 } 482 483 public void parse(final InputSource source) throws SAXException , IOException { 484 if (source.getCharacterStream() != null) 485 parse(source.getCharacterStream()); 486 else if (source.getByteStream() != null) 487 parse(new InputStreamReader (source.getByteStream())); 488 else 489 parse(new InputStreamReader (new URL (source.getSystemId()).openStream())); 490 } 491 492 public void parse(final String systemId) throws SAXException , IOException { 493 parse(new InputSource (systemId)); 494 } 495 496 public void setLocale(final Locale locale) throws SAXException { 497 throw new SAXException ("Not supported"); 498 } 499 500 public void setEntityResolver(final EntityResolver resolver) { 501 } 503 504 public void setDTDHandler(final DTDHandler handler) { 505 } 507 508 public void setDocumentHandler(final org.xml.sax.DocumentHandler handler) { 509 this.documentHandler = (handler == null) ? this : handler; 510 this.extDocumentHandler = this; 511 } 512 513 public void setDocumentHandler(final DocumentHandler handler) { 514 this.documentHandler = this.extDocumentHandler = (handler == null) ? this : handler; 515 this.documentHandler.setDocumentLocator(this); 516 } 517 518 public void setErrorHandler(final ErrorHandler handler) { 519 this.errorHandler = (handler == null) ? this : handler; 520 } 521 522 public void setDocumentLocator(final Locator locator) { 523 } 524 525 public void startDocument() throws SAXException { 526 } 527 528 public Writer startDocument(final Writer writer) throws SAXException { 529 this.documentHandler.startDocument(); 530 return writer; 531 } 532 533 public void endDocument() throws SAXException { 534 } 535 536 public void startElement(final String name, final AttributeList attributes) throws SAXException { 537 } 538 539 public Writer startElement(final String name, final AttributeList attributes, final Writer writer) 540 throws SAXException 541 { 542 this.documentHandler.startElement(name, attributes); 543 return writer; 544 } 545 546 public void endElement(final String name) throws SAXException { 547 } 548 549 public void characters(final char ch[], final int start, final int length) throws SAXException { 550 } 551 552 public void ignorableWhitespace(final char ch[], final int start, final int length) throws SAXException { 553 } 554 555 public void processingInstruction(final String target, final String data) throws SAXException { 556 } 557 558 public void warning(final SAXParseException e) throws SAXException { 559 } 560 561 public void error(final SAXParseException e) throws SAXException { 562 } 563 564 public void fatalError(final SAXParseException e) throws SAXException { 565 throw e; 566 } 567 568 public String getPublicId() { 569 return ""; 570 } 571 572 573 public String getSystemId() { 574 return ""; 575 } 576 577 public int getLineNumber () { 578 return this.lineNumber; 579 } 580 581 public int getColumnNumber () { 582 return this.columnNumber; 583 } 584 585 private void fatalError(final String msg, final int lineNumber, final int columnNumber) throws SAXException { 586 this.errorHandler.fatalError(new SAXParseException (msg, null, null, lineNumber, columnNumber)); 587 } 588 589 private class MinMLBuffer extends Writer { 590 public MinMLBuffer(final Reader in) { 591 this.in = in; 592 } 593 594 public void close() throws IOException { 595 flush(); 596 } 597 598 public void flush() throws IOException { 599 try { 600 _flush(); 601 if (writer != this) writer.flush(); 602 } 603 finally { 604 flushed = true; 605 } 606 } 607 608 public void write(final int c) throws IOException { 609 written = true; 610 chars[count++] = (char)c; 611 } 612 613 public void write(final char[] cbuf, final int off, final int len) throws IOException { 614 written = true; 615 System.arraycopy(cbuf, off, chars, count, len); 616 count += len; 617 } 618 619 public void saveChar(final char c) { 620 written = false; 621 chars[count++] = c; 622 } 623 624 public void pushWriter(final Writer writer) { 625 MinML.this.tags.push(this.writer); 626 627 this.writer = (writer == null) ? this : writer; 628 629 flushed = written = false; 630 } 631 632 public Writer getWriter() { 633 return writer; 634 } 635 636 public void popWriter() throws IOException { 637 try { 638 if (!flushed && writer != this) writer.flush(); 639 } 640 finally { 641 writer = (Writer )MinML.this.tags.pop(); 642 flushed = written = false; 643 } 644 } 645 646 public String getString() { 647 final String result = new String (chars, 0, count); 648 649 count = 0; 650 return result; 651 } 652 653 public void reset() { 654 count = 0; 655 } 656 657 public int read() throws IOException { 658 if (nextIn == lastIn) { 659 if (count != 0) { 660 if (written) { 661 _flush(); 662 } else if (count >= (chars.length - MinML.this.bufferIncrement)) { 663 final char[] newChars = new char[chars.length + MinML.this.bufferIncrement]; 664 665 System.arraycopy(chars, 0, newChars, 0, count); 666 chars = newChars; 667 } 668 } 669 670 final int numRead = in.read(chars, count, chars.length - count); 671 672 if (numRead == -1) return -1; 673 674 nextIn = count; 675 lastIn = count + numRead; 676 } 677 678 return chars[nextIn++]; 679 } 680 681 private void _flush() throws IOException { 682 if (count != 0) { 683 try { 684 if (writer == this) { 685 try { 686 MinML.this.documentHandler.characters(chars, 0, count); 687 } 688 catch (final SAXException e) { 689 throw new IOException (e.toString()); 690 } 691 } else { 692 writer.write(chars, 0, count); 693 } 694 } 695 finally { 696 count = 0; 697 } 698 } 699 } 700 701 private int nextIn = 0, lastIn = 0; 702 private char[] chars = new char[MinML.this.initialBufferSize]; 703 private final Reader in; 704 private int count = 0; 705 private Writer writer = this; 706 private boolean flushed = false; 707 private boolean written = false; 708 } 709 710 private DocumentHandler extDocumentHandler = this; 711 private org.xml.sax.DocumentHandler documentHandler = this; 712 private ErrorHandler errorHandler = this; 713 private final Stack tags = new Stack (); 714 private int lineNumber = 1; 715 private int columnNumber = 0; 716 private final int initialBufferSize; 717 private final int bufferIncrement; 718 719 private static final byte[] charClasses = { 720 13, 722 -1, -1, -1, -1, -1, -1, -1, -1, -1, 12, 12, -1, -1, 12, -1, -1, 724 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 726 12, 8, 7, 14, 14, 14, 3, 6, 14, 14, 14, 14, 14, 11, 14, 2, 728 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 0, 5, 1, 4, 730 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 732 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 9, 14, 10 734 }; 735 736 private static final String [] operands = { 737 "\u0d15\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u1611\u0015\u0010\u1611", 738 "\u1711\u1000\u0b00\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u0114\u0200\u1811\u0114", 739 "\u1711\u1001\u0b01\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u0215\u1811\u0414", 740 "\u1711\u1001\u0b01\u1711\u1911\u1911\u1911\u1911\u1911\u1911\u1911\u1911\u0315\u1811\u0414", 741 "\u1911\u1911\u1911\u1911\u1911\u0606\u1911\u1911\u1911\u1911\u1911\u0414\u0515\u1811\u0414", 742 "\u1911\u1911\u1911\u1911\u1911\u0606\u1911\u1911\u1911\u1911\u1911\u1911\u0515\u1811\u1911", 743 "\u1a11\u1a11\u1a11\u1a11\u1a11\u1a11\u0715\u0815\u1a11\u1a11\u1a11\u1a11\u0615\u1811\u1a11", 744 "\u0714\u0714\u0714\u070e\u0714\u0714\u0307\u0714\u0714\u0714\u0714\u0714\u0714\u1811\u0714", 745 "\u0814\u0814\u0814\u080e\u0814\u0814\u0814\u0307\u0814\u0814\u0814\u0814\u0814\u1811\u0814", 746 "\u1711\u1002\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u0914\u0915\u1811\u0914", 747 "\u1b11\u1b11\u0904\u1b11\u1b11\u1b11\u1b11\u1b11\u1215\u1b11\u1b11\u1b11\u1b11\u1811\u0105", 748 "\u1711\u1012\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1711\u1811\u1711", 749 "\u1711\u1c11\u0912\u1711\u0e12\u1711\u1711\u1711\u1212\u1711\u1711\u1711\u1711\u1811\u0113", 750 "\u1711\u1c11\u0912\u1711\u0e12\u1711\u1711\u1711\u1212\u1711\u1711\u1711\u1711\u1811\u0113", 751 "\u0e15\u0e15\u0e15\u0e15\u0f15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u1811\u0e15", 752 "\u0e15\u0015\u0e15\u0e15\u0f15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u0e15\u1811\u0e15", 753 "\u0c03\u110f\u110f\u110e\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u1014\u1811\u110f", 754 "\u0a15\u110f\u110f\u110e\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u110f\u1811\u110f", 755 "\u1d11\u1d11\u1d11\u1d11\u1d11\u1d11\u1d11\u1d11\u1d11\u130c\u1d11\u1408\u1d11\u1811\u1515", 756 "\u130f\u130f\u130f\u130f\u130f\u130f\u130f\u130f\u130f\u130f\u110d\u130f\u130f\u1811\u130f", 757 "\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u1415\u0009\u1415\u1811\u1415", 758 "\u150a\u000b\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1515\u1811\u1515", 759 "expected Element", 760 "unexpected character in tag", 761 "unexpected end of file found", 762 "attribute name not followed by '='", 763 "invalid attribute value", 764 "expecting end tag", 765 "empty tag", 766 "unexpected character after <!" 767 }; 768 } 769 | Popular Tags |