| 1 57 58 package org.enhydra.apache.xerces.framework; 59 60 import org.enhydra.apache.xerces.readers.DefaultEntityHandler; 61 import org.enhydra.apache.xerces.readers.XMLEntityHandler; 62 import org.enhydra.apache.xerces.utils.ChunkyCharArray; 63 import org.enhydra.apache.xerces.utils.QName; 64 import org.enhydra.apache.xerces.utils.StringPool; 65 import org.enhydra.apache.xerces.utils.XMLCharacterProperties; 66 import org.enhydra.apache.xerces.utils.XMLMessages; 67 import org.enhydra.apache.xerces.validators.common.GrammarResolver; 68 69 83 public final class XMLDocumentScanner { 84 88 94 static final char[] cdata_string = { '[','C','D','A','T','A','[' }; 98 static final char[] xml_string = { 'x','m','l' }; 103 private static final char[] version_string = { 'v','e','r','s','i','o','n' }; 107 static final char[] doctype_string = { 'D','O','C','T','Y','P','E' }; 112 private static final char[] standalone_string = { 's','t','a','n','d','a','l','o','n','e' }; 117 private static final char[] encoding_string = { 'e','n','c','o','d','i','n','g' }; 121 122 125 public static final int 126 RESULT_SUCCESS = 0, 127 RESULT_FAILURE = -1, 128 RESULT_DUPLICATE_ATTR = -2; 129 130 131 static final int 132 SCANNER_STATE_XML_DECL = 0, 133 SCANNER_STATE_START_OF_MARKUP = 1, 134 SCANNER_STATE_COMMENT = 2, 135 SCANNER_STATE_PI = 3, 136 SCANNER_STATE_DOCTYPE = 4, 137 SCANNER_STATE_PROLOG = 5, 138 SCANNER_STATE_ROOT_ELEMENT = 6, 139 SCANNER_STATE_CONTENT = 7, 140 SCANNER_STATE_REFERENCE = 8, 141 SCANNER_STATE_ATTRIBUTE_LIST = 9, 142 SCANNER_STATE_ATTRIBUTE_NAME = 10, 143 SCANNER_STATE_ATTRIBUTE_VALUE = 11, 144 SCANNER_STATE_TRAILING_MISC = 12, 145 SCANNER_STATE_END_OF_INPUT = 13, 146 SCANNER_STATE_TERMINATED = 14; 147 148 152 private StringPool.CharArrayRange fCurrentElementCharArrayRange = null; 154 155 int fAttrListHandle = -1; 156 XMLAttrList fAttrList = null; 157 GrammarResolver fGrammarResolver = null; 158 XMLDTDScanner fDTDScanner = null; 159 boolean fNamespacesEnabled = false; 160 boolean fValidationEnabled = false; 161 boolean fLoadExternalDTD = true; 162 QName fElementQName = new QName(); 163 QName fAttributeQName = new QName(); 164 QName fCurrentElementQName = new QName(); 165 ScannerDispatcher fDispatcher = null; 166 EventHandler fEventHandler = null; 167 XMLDocumentHandler.DTDHandler fDTDHandler = null; 168 StringPool fStringPool = null; 169 XMLErrorReporter fErrorReporter = null; 170 XMLEntityHandler fEntityHandler = null; 171 XMLEntityHandler.EntityReader fEntityReader = null; 172 XMLEntityHandler.CharBuffer fLiteralData = null; 173 boolean fSeenRootElement = false; 174 boolean fSeenDoctypeDecl = false; 175 boolean fStandalone = false; 176 boolean fParseTextDecl = false; 177 boolean fScanningDTD = false; 178 int fScannerState = SCANNER_STATE_XML_DECL; 179 int fReaderId = -1; 180 int fAttValueReader = -1; 181 int fAttValueElementType = -1; 182 int fAttValueAttrName = -1; 183 int fAttValueOffset = -1; 184 int fAttValueMark = -1; 185 int fScannerMarkupDepth = 0; 186 187 191 196 public interface EventHandler { 197 202 public void callStandaloneIsYes() throws Exception ; 203 204 209 public void callStartDocument() throws Exception ; 210 215 public void callEndDocument() throws Exception ; 216 224 public void callXMLDecl(int version, int encoding, int standalone) throws Exception ; 225 232 public void callTextDecl(int version, int encoding) throws Exception ; 233 239 public void callStartElement(QName element) throws Exception ; 240 245 public void element(QName element) throws Exception ; 246 254 public boolean attribute(QName element, QName attrName, int attrValue) throws Exception ; 255 261 public void callEndElement(int readerId) throws Exception ; 262 266 public void callStartCDATA() throws Exception ; 267 271 public void callEndCDATA() throws Exception ; 272 278 public void callCharacters(int ch) throws Exception ; 279 286 public void callProcessingInstruction(int piTarget, int piData) throws Exception ; 287 293 public void callComment(int data) throws Exception ; 294 } 295 296 299 public XMLDocumentScanner(StringPool stringPool, 300 XMLErrorReporter errorReporter, 301 XMLEntityHandler entityHandler, 302 XMLEntityHandler.CharBuffer literalData) { 303 fStringPool = stringPool; 304 fErrorReporter = errorReporter; 305 fEntityHandler = entityHandler; 306 fLiteralData = literalData; 307 fDispatcher = new XMLDeclDispatcher(); 308 fAttrList = new XMLAttrList(fStringPool); 309 } 310 311 316 public void setEventHandler(XMLDocumentScanner.EventHandler eventHandler) { 317 fEventHandler = eventHandler; 318 } 319 320 321 public void setDTDHandler(XMLDocumentHandler.DTDHandler dtdHandler) { 322 fDTDHandler = dtdHandler; 323 } 324 325 326 public void setGrammarResolver(GrammarResolver resolver) { 327 fGrammarResolver = resolver; 328 } 329 330 335 public void reset(StringPool stringPool, XMLEntityHandler.CharBuffer literalData) { 336 fStringPool = stringPool; 337 fLiteralData = literalData; 338 fParseTextDecl = false; 339 fSeenRootElement = false; 340 fSeenDoctypeDecl = false; 341 fStandalone = false; 342 fScanningDTD = false; 343 fDispatcher = new XMLDeclDispatcher(); 344 fScannerState = SCANNER_STATE_XML_DECL; 345 fScannerMarkupDepth = 0; 346 fAttrList = new XMLAttrList(fStringPool); 347 } 348 349 375 public boolean parseSome(boolean doItAll) throws Exception  376 { 377 do { 378 if (!fDispatcher.dispatch(doItAll)) 379 return false; 380 } while (doItAll); 381 return true; 382 } 383 384 391 public void readerChange(XMLEntityHandler.EntityReader nextReader, int nextReaderId) throws Exception { 392 fEntityReader = nextReader; 393 fReaderId = nextReaderId; 394 if (fScannerState == SCANNER_STATE_ATTRIBUTE_VALUE) { 395 fAttValueOffset = fEntityReader.currentOffset(); 396 fAttValueMark = fAttValueOffset; 397 } 398 399 if (fDTDScanner != null && fScanningDTD) 401 fDTDScanner.readerChange(nextReader, nextReaderId); 402 } 403 404 411 public void endOfInput(int entityName, boolean moreToFollow) throws Exception { 412 if (fDTDScanner != null && fScanningDTD){ 413 fDTDScanner.endOfInput(entityName, moreToFollow); 414 } 415 fDispatcher.endOfInput(entityName, moreToFollow); 416 } 417 418 422 public boolean atEndOfInput() { 423 return fScannerState == SCANNER_STATE_END_OF_INPUT; 424 } 425 426 438 public int scanAttValue(QName element, QName attribute, boolean asSymbol) throws Exception { 439 boolean single; 440 if (!(single = fEntityReader.lookingAtChar('\'', true)) && !fEntityReader.lookingAtChar('\"', true)) { 441 reportFatalXMLError(XMLMessages.MSG_QUOTE_REQUIRED_IN_ATTVALUE, 442 XMLMessages.P10_QUOTE_REQUIRED, 443 element.rawname, 444 attribute.rawname); 445 return -1; 446 } 447 char qchar = single ? '\'' : '\"'; 448 fAttValueMark = fEntityReader.currentOffset(); 449 int attValue = fEntityReader.scanAttValue(qchar, asSymbol); 450 if (attValue >= 0) 451 return attValue; 452 int previousState = setScannerState(SCANNER_STATE_ATTRIBUTE_VALUE); 453 fAttValueReader = fReaderId; 454 fAttValueElementType = element.rawname; 456 fAttValueAttrName = attribute.rawname; 458 fAttValueOffset = fEntityReader.currentOffset(); 459 int dataOffset = fLiteralData.length(); 460 if (fAttValueOffset - fAttValueMark > 0) 461 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark); 462 fAttValueMark = fAttValueOffset; 463 boolean setMark = false; 464 boolean skippedCR; 465 while (true) { 466 if (fEntityReader.lookingAtChar(qchar, true)) { 467 if (fReaderId == fAttValueReader) 468 break; 469 } else if (fEntityReader.lookingAtChar(' ', true)) { 470 } else if ((skippedCR = fEntityReader.lookingAtChar((char)0x0D, true)) || fEntityReader.lookingAtSpace(true)) { 474 if (fAttValueOffset - fAttValueMark > 0) 475 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark); 476 setMark = true; 477 fLiteralData.append(' '); 478 if (skippedCR) { 479 } 485 } else if (fEntityReader.lookingAtChar('&', true)) { 486 if (fAttValueOffset - fAttValueMark > 0) 487 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark); 488 setMark = true; 489 if (fEntityReader.lookingAtChar('#', true)) { 493 int ch = scanCharRef(); 494 if (ch != -1) { 495 if (ch < 0x10000) 496 fLiteralData.append((char)ch); 497 else { 498 fLiteralData.append((char)(((ch-0x00010000)>>10)+0xd800)); 499 fLiteralData.append((char)(((ch-0x00010000)&0x3ff)+0xdc00)); 500 } 501 } 502 } else { 503 int nameOffset = fEntityReader.currentOffset(); 507 fEntityReader.skipPastName(';'); 508 int nameLength = fEntityReader.currentOffset() - nameOffset; 509 if (nameLength == 0) { 510 reportFatalXMLError(XMLMessages.MSG_NAME_REQUIRED_IN_REFERENCE, 511 XMLMessages.P68_NAME_REQUIRED); 512 } else if (!fEntityReader.lookingAtChar(';', true)) { 513 reportFatalXMLError(XMLMessages.MSG_SEMICOLON_REQUIRED_IN_REFERENCE, 514 XMLMessages.P68_SEMICOLON_REQUIRED, 515 fEntityReader.addString(nameOffset, nameLength)); 516 } else { 517 int entityName = fEntityReader.addSymbol(nameOffset, nameLength); 518 fEntityHandler.startReadingFromEntity(entityName, fScannerMarkupDepth, XMLEntityHandler.ENTITYREF_IN_ATTVALUE); 519 } 520 } 521 } else if (fEntityReader.lookingAtChar('<', true)) { 522 if (fAttValueOffset - fAttValueMark > 0) 523 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark); 524 setMark = true; 525 reportFatalXMLError(XMLMessages.MSG_LESSTHAN_IN_ATTVALUE, 526 XMLMessages.WFC_NO_LESSTHAN_IN_ATTVALUE, 527 element.rawname, 528 attribute.rawname); 529 } else if (!fEntityReader.lookingAtValidChar(true)) { 530 if (fAttValueOffset - fAttValueMark > 0) 531 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark); 532 setMark = true; 533 int invChar = fEntityReader.scanInvalidChar(); 534 if (fScannerState == SCANNER_STATE_END_OF_INPUT) 535 return -1; 536 if (invChar >= 0) { 537 reportFatalXMLError(XMLMessages.MSG_INVALID_CHAR_IN_ATTVALUE, 538 XMLMessages.P10_INVALID_CHARACTER, 539 fStringPool.toString(element.rawname), 540 fStringPool.toString(attribute.rawname), 541 Integer.toHexString(invChar)); 542 } 543 } 544 fAttValueOffset = fEntityReader.currentOffset(); 545 if (setMark) { 546 fAttValueMark = fAttValueOffset; 547 setMark = false; 548 } 549 } 550 restoreScannerState(previousState); 551 int dataLength = fLiteralData.length() - dataOffset; 552 if (dataLength == 0) { 553 return fEntityReader.addString(fAttValueMark, fAttValueOffset - fAttValueMark); 554 } 555 if (fAttValueOffset - fAttValueMark > 0) { 556 fEntityReader.append(fLiteralData, fAttValueMark, fAttValueOffset - fAttValueMark); 557 dataLength = fLiteralData.length() - dataOffset; 558 } 559 int value = fLiteralData.addString(dataOffset, dataLength); 560 return value; 561 } 562 563 void reportFatalXMLError(int majorCode, int minorCode) throws Exception { 567 fErrorReporter.reportError(fErrorReporter.getLocator(), 568 XMLMessages.XML_DOMAIN, 569 majorCode, 570 minorCode, 571 null, 572 XMLErrorReporter.ERRORTYPE_FATAL_ERROR); 573 } 574 void reportFatalXMLError(int majorCode, int minorCode, int stringIndex1) throws Exception { 575 Object [] args = { fStringPool.toString(stringIndex1) }; 576 fErrorReporter.reportError(fErrorReporter.getLocator(), 577 XMLMessages.XML_DOMAIN, 578 majorCode, 579 minorCode, 580 args, 581 XMLErrorReporter.ERRORTYPE_FATAL_ERROR); 582 } 583 void reportFatalXMLError(int majorCode, int minorCode, String string1) throws Exception { 584 Object [] args = { string1 }; 585 fErrorReporter.reportError(fErrorReporter.getLocator(), 586 XMLMessages.XML_DOMAIN, 587 majorCode, 588 minorCode, 589 args, 590 XMLErrorReporter.ERRORTYPE_FATAL_ERROR); 591 } 592 void reportFatalXMLError(int majorCode, int minorCode, int stringIndex1, int stringIndex2) throws Exception { 593 Object [] args = { fStringPool.toString(stringIndex1), 594 fStringPool.toString(stringIndex2) }; 595 fErrorReporter.reportError(fErrorReporter.getLocator(), 596 XMLMessages.XML_DOMAIN, 597 majorCode, 598 minorCode, 599 args, 600 XMLErrorReporter.ERRORTYPE_FATAL_ERROR); 601 } 602 void reportFatalXMLError(int majorCode, int minorCode, String string1, String string2) throws Exception { 603 Object [] args = { string1, string2 }; 604 fErrorReporter.reportError(fErrorReporter.getLocator(), 605 XMLMessages.XML_DOMAIN, 606 majorCode, 607 minorCode, 608 args, 609 XMLErrorReporter.ERRORTYPE_FATAL_ERROR); 610 } 611 void reportFatalXMLError(int majorCode, int minorCode, String string1, String string2, String string3) throws Exception { 612 Object [] args = { string1, string2, string3 }; 613 fErrorReporter.reportError(fErrorReporter.getLocator(), 614 XMLMessages.XML_DOMAIN, 615 majorCode, 616 minorCode, 617 args, 618 XMLErrorReporter.ERRORTYPE_FATAL_ERROR); 619 } 620 void abortMarkup(int majorCode, int minorCode) throws Exception { 621 reportFatalXMLError(majorCode, minorCode); 622 skipPastEndOfCurrentMarkup(); 623 } 624 void abortMarkup(int majorCode, int minorCode, int stringIndex1) throws Exception { 625 reportFatalXMLError(majorCode, minorCode, stringIndex1); 626 skipPastEndOfCurrentMarkup(); 627 } 628 void abortMarkup(int majorCode, int minorCode, String string1) throws Exception { 629 reportFatalXMLError(majorCode, minorCode, string1); 630 skipPastEndOfCurrentMarkup(); 631 } 632 void abortMarkup(int majorCode, int minorCode, int stringIndex1, int stringIndex2) throws Exception { 633 reportFatalXMLError(majorCode, minorCode, stringIndex1, stringIndex2); 634 skipPastEndOfCurrentMarkup(); 635 } 636 void skipPastEndOfCurrentMarkup() throws Exception { 637 fEntityReader.skipToChar('>'); 638 if (fEntityReader.lookingAtChar('>', true)) 639 fScannerMarkupDepth--; 640 } 641 int setScannerState(int state) { 645 int oldState = fScannerState; 646 fScannerState = state; 647 return oldState; 648 } 649 void restoreScannerState(int state) { 650 if (fScannerState != SCANNER_STATE_END_OF_INPUT) 651 fScannerState = state; 652 } 653 662 interface ScannerDispatcher { 663 670 boolean dispatch(boolean keepgoing) throws Exception ; 671 678 void endOfInput(int entityName, boolean moreToFollow) throws Exception ; 679 } 680 final class XMLDeclDispatcher implements ScannerDispatcher { 681 public boolean dispatch(boolean keepgoing) throws Exception { 682 fEventHandler.callStartDocument(); 683 if (fEntityReader.lookingAtChar('<', true)) { 684 fScannerMarkupDepth++; 685 setScannerState(SCANNER_STATE_START_OF_MARKUP); 686 if (fEntityReader.lookingAtChar('?', true)) { 687 int piTarget = fEntityReader.scanName(' '); 688 if (piTarget == -1) { 689 abortMarkup(XMLMessages.MSG_PITARGET_REQUIRED, 690 XMLMessages.P16_PITARGET_REQUIRED); 691 } else if ("xml".equals(fStringPool.toString(piTarget))) { 692 if (fEntityReader.lookingAtSpace(true)) { scanXMLDeclOrTextDecl(false); 694 } else { abortMarkup(XMLMessages.MSG_RESERVED_PITARGET, 696 XMLMessages.P17_RESERVED_PITARGET); 697 } 698 } else { scanPI(piTarget); 700 } 701 fDispatcher = new PrologDispatcher(); 702 restoreScannerState(SCANNER_STATE_PROLOG); 703 return true; 704 } 705 if (fEntityReader.lookingAtChar('!', true)) { 706 if (fEntityReader.lookingAtChar('-', true)) { if (fEntityReader.lookingAtChar('-', true)) { 708 scanComment(); } else { 710 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG, 711 XMLMessages.P22_NOT_RECOGNIZED); 712 } 713 } else { 714 if (fEntityReader.skippedString(doctype_string)) { 715 setScannerState(SCANNER_STATE_DOCTYPE); 716 fSeenDoctypeDecl = true; 717 scanDoctypeDecl(fStandalone); fScannerMarkupDepth--; 719 fDispatcher = new PrologDispatcher(); 720 restoreScannerState(SCANNER_STATE_PROLOG); 721 return true; 722 } else { 723 abortMarkup(XMLMessages.MSG_MARKUP_NOT_RECOGNIZED_IN_PROLOG, 724 XMLMessages.P22_NOT_RECOGNIZED); 725 } 726 } 727 } else { 728 fDispatcher = new ContentDispatcher(); 729 restoreScannerState(SCANNER_STATE_ROOT_ELEMENT); 730 return true; 731 } 732 } else { 733 if (fEntityReader.lookingAtSpace(true)) { 734 fEntityReader.skipPastSpaces(); 735 } else if (!fEntityReader.lookingAtValidChar(false)) { 736 int invChar = fEntityReader.scanInvalidChar(); 737 &nb
|