1 57 58 package org.enhydra.apache.xerces.readers; 59 60 import java.util.Vector ; 61 62 import org.enhydra.apache.xerces.framework.XMLErrorReporter; 63 import org.enhydra.apache.xerces.utils.CharDataChunk; 64 import org.enhydra.apache.xerces.utils.ImplementationMessages; 65 import org.enhydra.apache.xerces.utils.QName; 66 import org.enhydra.apache.xerces.utils.StringHasher; 67 import org.enhydra.apache.xerces.utils.StringPool; 68 import org.enhydra.apache.xerces.utils.XMLCharacterProperties; 69 70 80 abstract class AbstractCharReader extends XMLEntityReader { 81 82 91 protected AbstractCharReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, StringPool stringPool) { 92 super(entityHandler, errorReporter, sendCharDataAsCharArray); 93 fStringPool = stringPool; 94 fCurrentChunk = CharDataChunk.createChunk(fStringPool, null); 95 } 96 97 protected CharDataChunk fCurrentChunk = null; 102 protected int fCurrentIndex = 0; 103 protected char[] fMostRecentData = null; 104 protected int fMostRecentChar = 0; 105 protected int fLength = 0; 106 107 118 protected abstract int fillCurrentChunk() throws Exception ; 119 120 134 protected void deferException(int errorCode, Object [] args, int offset) { 135 if (fDeferredErrors == null) 136 fDeferredErrors = new Vector (); 137 DeferredError de = new DeferredError(errorCode, args, offset); 138 fDeferredErrors.addElement(de); 139 } 140 141 149 protected XMLEntityHandler.EntityReader changeReaders() throws Exception { 150 XMLEntityHandler.EntityReader nextReader = super.changeReaders(); 151 fCurrentChunk.releaseChunk(); 152 fCurrentChunk = null; 153 return nextReader; 154 } 155 156 169 177 public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) { 178 fCurrentChunk.append(charBuffer, offset, length); 179 } 180 181 189 public int addString(int offset, int length) { 190 if (length == 0) 191 return 0; 192 return fCurrentChunk.addString(offset, length); 193 } 194 195 203 public int addSymbol(int offset, int length) { 204 if (length == 0) 205 return 0; 206 return fCurrentChunk.addSymbol(offset, length, 0); 207 } 208 209 212 public boolean lookingAtChar(char chr, boolean skipPastChar) throws Exception { 213 int ch = fMostRecentChar; 214 if (ch != chr) { 215 if (ch == 0) { 216 if (atEOF(fCurrentOffset + 1)) { 217 return changeReaders().lookingAtChar(chr, skipPastChar); 218 } 219 } 220 return false; 221 } 222 if (skipPastChar) { 223 fCharacterCounter++; 224 fCurrentOffset++; 225 if (++fCurrentIndex == CharDataChunk.CHUNK_SIZE) 226 slowLoadNextChar(); 227 else 228 fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF; 229 } 230 return true; 231 } 232 233 236 public boolean lookingAtValidChar(boolean skipPastChar) throws Exception { 237 int ch = fMostRecentChar; 238 if (ch < 0xD800) { 239 if (ch >= 0x20 || ch == 0x09) { 240 if (skipPastChar) { 241 fCharacterCounter++; 242 loadNextChar(); 243 } 244 return true; 245 } 246 if (ch == 0x0A) { 247 if (skipPastChar) { 248 fLinefeedCounter++; 249 fCharacterCounter = 1; 250 loadNextChar(); 251 } 252 return true; 253 } 254 if (ch == 0) { 255 if (atEOF(fCurrentOffset + 1)) { 256 return changeReaders().lookingAtValidChar(skipPastChar); 257 } 258 } 259 return false; 260 } 261 if (ch > 0xFFFD) { 262 return false; 263 } 264 if (ch < 0xDC00) { 265 CharDataChunk savedChunk = fCurrentChunk; 266 int savedIndex = fCurrentIndex; 267 int savedOffset = fCurrentOffset; 268 ch = loadNextChar(); 269 boolean valid = (ch >= 0xDC00 && ch < 0xE000); 270 if (!valid || !skipPastChar) { 271 fCurrentChunk = savedChunk; 272 fCurrentIndex = savedIndex; 273 fCurrentOffset = savedOffset; 274 fMostRecentData = savedChunk.toCharArray(); 275 fMostRecentChar = fMostRecentData[savedIndex] & 0xFFFF; 276 return valid; 277 } 278 } else if (ch < 0xE000) { 279 return false; 280 } 281 if (skipPastChar) { 282 fCharacterCounter++; 283 loadNextChar(); 284 } 285 return true; 286 } 287 288 291 public boolean lookingAtSpace(boolean skipPastChar) throws Exception { 292 int ch = fMostRecentChar; 293 if (ch > 0x20) 294 return false; 295 if (ch == 0x20 || ch == 0x09) { 296 if (!skipPastChar) 297 return true; 298 fCharacterCounter++; 299 } else if (ch == 0x0A) { 300 if (!skipPastChar) 301 return true; 302 fLinefeedCounter++; 303 fCharacterCounter = 1; 304 } else { 305 if (ch == 0) { if (atEOF(fCurrentOffset + 1)) { 307 return changeReaders().lookingAtSpace(skipPastChar); 308 } 309 } 310 return false; 311 } 312 fCurrentOffset++; 313 if (++fCurrentIndex == CharDataChunk.CHUNK_SIZE) 314 slowLoadNextChar(); 315 else 316 fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF; 317 return true; 318 } 319 320 323 public void skipToChar(char chr) throws Exception { 324 int ch = fMostRecentChar; 328 while (true) { 329 if (ch == chr) 330 return; 331 if (ch == 0) { 332 if (atEOF(fCurrentOffset + 1)) { 333 changeReaders().skipToChar(chr); 334 return; 335 } 336 fCharacterCounter++; 337 } else if (ch == 0x0A) { 338 fLinefeedCounter++; 339 fCharacterCounter = 1; 340 } else if (ch >= 0xD800 && ch < 0xDC00) { 341 fCharacterCounter++; 342 ch = loadNextChar(); 343 if (ch < 0xDC00 || ch >= 0xE000) 344 continue; 345 } else 346 fCharacterCounter++; 347 ch = loadNextChar(); 348 } 349 } 350 351 354 public void skipPastSpaces() throws Exception { 355 int ch = fMostRecentChar; 356 while (true) { 357 if (ch == 0x20 || ch == 0x09) { 358 fCharacterCounter++; 359 } else if (ch == 0x0A) { 360 fLinefeedCounter++; 361 fCharacterCounter = 1; 362 } else { 363 if (ch == 0 && atEOF(fCurrentOffset + 1)) 364 changeReaders().skipPastSpaces(); 365 return; 366 } 367 ch = loadNextChar(); 368 } 369 } 370 371 374 public void skipPastName(char fastcheck) throws Exception { 375 int ch = fMostRecentChar; 376 if (ch < 0x80) { 377 if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) 378 return; 379 } else { 380 if ( ! fCalledCharPropInit) { 381 XMLCharacterProperties.initCharFlags(); 382 fCalledCharPropInit = true; 383 } 384 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) 385 return; 386 } 387 while (true) { 388 fCharacterCounter++; 389 ch = loadNextChar(); 390 if (fastcheck == ch) 391 return; 392 if (ch < 0x80) { 393 if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0) 394 return; 395 } else { 396 if (!fCalledCharPropInit) { 397 XMLCharacterProperties.initCharFlags(); 398 fCalledCharPropInit = true; 399 } 400 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) 401 return; 402 } 403 } 404 } 405 406 409 public void skipPastNmtoken(char fastcheck) throws Exception { 410 int ch = fMostRecentChar; 411 while (true) { 412 if (fastcheck == ch) 413 return; 414 if (ch < 0x80) { 415 if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0) 416 return; 417 } else { 418 if (!fCalledCharPropInit) { 419 XMLCharacterProperties.initCharFlags(); 420 fCalledCharPropInit = true; 421 } 422 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) 423 return; 424 } 425 fCharacterCounter++; 426 ch = loadNextChar(); 427 } 428 } 429 430 433 public boolean skippedString(char[] s) throws Exception { 434 int length = s.length; 435 char[] data = fMostRecentData; 436 int index = fCurrentIndex; 437 if (index + length <= CharDataChunk.CHUNK_SIZE) { 438 for (int i = 0; i < length; i++) { 439 if (data[index++] != s[i]) 440 return false; 441 } 442 fCharacterCounter += length; 443 fCurrentOffset += length; 444 fCurrentIndex = index; 445 if (index == CharDataChunk.CHUNK_SIZE) 446 slowLoadNextChar(); 447 else 448 fMostRecentChar = data[index] & 0xFFFF; 449 return true; 450 } 451 CharDataChunk dataChunk = fCurrentChunk; 452 int offset = fCurrentOffset; 453 int savedIndex = index; 454 int i = 0; 455 while (index < CharDataChunk.CHUNK_SIZE) { 456 if (data[index++] != s[i++]) 457 return false; 458 } 459 slowLoadNextChar(); 460 data = fMostRecentData; 461 index = 0; 462 while (i < length) { 463 if (data[index++] != s[i++]) { 464 fCurrentChunk = dataChunk; 465 fCurrentIndex = savedIndex; 466 fCurrentOffset = offset; 467 fMostRecentData = dataChunk.toCharArray(); 468 fMostRecentChar = fMostRecentData[savedIndex] & 0xFFFF; 469 return false; 470 } 471 } 472 fCharacterCounter += length; 473 fCurrentOffset += length; 474 fCurrentIndex = index; 475 if (index == CharDataChunk.CHUNK_SIZE) 476 slowLoadNextChar(); 477 else 478 fMostRecentChar = data[index] & 0xFFFF; 479 return true; 480 } 481 482 485 public int scanInvalidChar() throws Exception { 486 int ch = fMostRecentChar; 487 if (ch == 0x0A) { 488 fLinefeedCounter++; 489 fCharacterCounter = 1; 490 loadNextChar(); 491 } else if (ch == 0) { 492 if (atEOF(fCurrentOffset + 1)) { 493 return changeReaders().scanInvalidChar(); 494 } 495 if (fDeferredErrors != null) { 496 for (int i = 0; i < fDeferredErrors.size(); i++) { 497 DeferredError de = (DeferredError)fDeferredErrors.elementAt(i); 498 if (de.offset == fCurrentIndex) { 499 fErrorReporter.reportError(fErrorReporter.getLocator(), 500 ImplementationMessages.XERCES_IMPLEMENTATION_DOMAIN, 501 de.errorCode, 502 0, 503 de.args, 504 XMLErrorReporter.ERRORTYPE_FATAL_ERROR); 505 fDeferredErrors.removeElementAt(i); 506 fCharacterCounter++; 507 loadNextChar(); 508 return -1; 509 } 510 } 511 } 512 fCharacterCounter++; 513 loadNextChar(); 514 } else { 515 fCharacterCounter++; 516 if (ch >= 0xD800 && ch < 0xDC00) { 517 int ch2 = loadNextChar(); 518 if (ch2 >= 0xDC00 && ch2 < 0xE000) { 519 ch = ((ch-0xD800)<<10)+(ch2-0xDC00)+0x10000; 520 loadNextChar(); 521 } 522 } else 523 loadNextChar(); 524 } 525 return ch; 526 } 527 528 531 public int scanCharRef(boolean hex) throws Exception { 532 int ch = fMostRecentChar; 533 if (ch == 0) { 534 if (atEOF(fCurrentOffset + 1)) { 535 return changeReaders().scanCharRef(hex); 536 } 537 return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; 538 } 539 int num = 0; 540 if (hex) { 541 if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0) 542 return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; 543 num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10); 544 } else { 545 if (ch < '0' || ch > '9') 546 return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; 547 num = ch - '0'; 548 } 549 fCharacterCounter++; 550 loadNextChar(); 551 boolean toobig = false; 552 while (true) { 553 ch = fMostRecentChar; 554 if (ch == 0) 555 break; 556 if (hex) { 557 if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0) 558 break; 559 } else { 560 if (ch < '0' || ch > '9') 561 break; 562 } 563 fCharacterCounter++; 564 loadNextChar(); 565 if (hex) { 566 int dig = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10); 567 num = (num << 4) + dig; 568 } else { 569 int dig = ch - '0'; 570 num = (num * 10) + dig; 571 } 572 if (num > 0x10FFFF) { 573 toobig = true; 574 num = 0; 575 } 576 } 577 if (ch != ';') 578 return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED; 579 fCharacterCounter++; 580 loadNextChar(); 581 if (toobig) 582 return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE; 583 return num; 584 } 585 586 589 public int scanStringLiteral() throws Exception { 590 boolean single; 591 if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) { 592 return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED; 593 } 594 int offset = fCurrentOffset; 595 char qchar = single ? '\'' : '\"'; 596 while (!lookingAtChar(qchar, false)) { 597 if (!lookingAtValidChar(true)) { 598 return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR; 599 } 600 } 601 int stringIndex = addString(offset, fCurrentOffset - offset); 602 lookingAtChar(qchar, true); return stringIndex; 604 } 605 606 613 public int scanAttValue(char qchar, boolean asSymbol) throws Exception 614 { 615 int offset = fCurrentOffset; 616 while (true) { 617 if (lookingAtChar(qchar, false)) { 618 break; 619 } 620 if (lookingAtChar(' ', true)) { 621 continue; 622 } 623 if (lookingAtSpace(false)) { 624 return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX; 625 } 626 if (lookingAtChar('&', false)) { 627 return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX; 628 } 629 if (lookingAtChar('<', false)) { 630 return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN; 631 } 632 if (!lookingAtValidChar(true)) { 633 return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR; 634 } 635 } 636 int result = asSymbol ? addSymbol(offset, fCurrentOffset - offset) : addString(offset, fCurrentOffset - offset); 637 lookingAtChar(qchar, true); 638 return result; 639 } 640 641 648 public int scanEntityValue(int qchar, boolean createString) throws Exception 649 { 650 int offset = fCurrentOffset; 651 while (true) { 652 if (atEOF(fCurrentOffset + 1)) { 653 changeReaders(); 654 return XMLEntityHandler.ENTITYVALUE_RESULT_END_OF_INPUT; 655 } 656 if (qchar != -1 && lookingAtChar((char)qchar, false)) { 657 if (!createString) 658 return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED; 659 break; 660 } 661 if (lookingAtChar('&', false)) { 662 return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE; 663 } 664 if (lookingAtChar('%', false)) { 665 return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF; 666 } 667 if (!lookingAtValidChar(true)) { 668 return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR; 669 } 670 } 671 int result = addString(offset, fCurrentOffset - offset); 672 lookingAtChar((char)qchar, true); 673 return result; 674 } 675 676 679 public int scanName(char fastcheck) throws Exception { 680 int ch = fMostRecentChar; 681 if (ch < 0x80) { 682 if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) 683 return -1; 684 } else { 685 if (!fCalledCharPropInit) { 686 XMLCharacterProperties.initCharFlags(); 687 fCalledCharPropInit = true; 688 } 689 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) 690 return -1; 691 } 692 int offset = fCurrentOffset; 693 int index = fCurrentIndex; 694 char[] data = fMostRecentData; 695 if (++index == CharDataChunk.CHUNK_SIZE) { 696 slowLoadNextChar(); 697 index = 0; 698 data = fMostRecentData; 699 } 700 fCharacterCounter++; 701 fCurrentOffset++; 702 int hashcode = 0; 703 while (true) { 704 hashcode = StringHasher.hashChar(hashcode, ch); 705 ch = data[index] & 0xFFFF; 706 if (fastcheck == ch) 707 break; 708 if (ch < 0x80) { 709 if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0) 710 break; 711 } else { 712 if (!fCalledCharPropInit) { 713 XMLCharacterProperties.initCharFlags(); 714 fCalledCharPropInit = true; 715 } 716 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) 717 break; 718 } 719 if (++index == CharDataChunk.CHUNK_SIZE) { 720 slowLoadNextChar(); 721 index = 0; 722 data = fMostRecentData; 723 } 724 fCharacterCounter++; 725 fCurrentOffset++; 726 } 727 fCurrentIndex = index; 728 fMostRecentChar = ch; 729 hashcode = StringHasher.finishHash(hashcode); 730 int length = fCurrentOffset - offset; 731 int nameIndex = fCurrentChunk.addSymbol(offset, length, hashcode); 732 return nameIndex; 733 } 734 735 738 public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws Exception { 739 char[] expected = expectedName.chars; 740 int offset = expectedName.offset; 741 int len = expectedName.length; 742 int ch = fMostRecentChar; 743 for (int i = 0; i < len; i++) { 744 if (ch != expected[offset++]) { 745 skipPastNmtoken(fastcheck); 746 return false; 747 } 748 fCharacterCounter++; 749 fCurrentOffset++; 750 if (++fCurrentIndex == CharDataChunk.CHUNK_SIZE) 751 ch = slowLoadNextChar(); 752 else 753 ch = (fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF); 754 } 755 if (ch == fastcheck) 756 return true; 757 if (ch < 0x80) { 758 if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0) 759 return true; 760 } else { 761 if (!fCalledCharPropInit) { 762 XMLCharacterProperties.initCharFlags(); 763 fCalledCharPropInit = true; 764 } 765 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) 766 return true; 767 } 768 skipPastNmtoken(fastcheck); 769 return false; 770 } 771 772 775 public void scanQName(char fastcheck, QName qname) throws Exception { 776 int ch = fMostRecentChar; 777 if (ch < 0x80) { 778 if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) { 779 qname.clear(); 780 return; 781 } 782 if (ch == ':') { 783 qname.clear(); 784 return; 785 } 786 } else { 787 if (!fCalledCharPropInit) { 788 XMLCharacterProperties.initCharFlags(); 789 fCalledCharPropInit = true; 790 } 791 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) { 792 qname.clear(); 793 return; 794 } 795 } 796 int offset = fCurrentOffset; 797 int index = fCurrentIndex; 798 char[] data = fMostRecentData; 799 if (++index == CharDataChunk.CHUNK_SIZE) { 800 slowLoadNextChar(); 801 index = 0; 802 data = fMostRecentData; 803 } 804 fCharacterCounter++; 805 fCurrentOffset++; 806 int hashcode = 0; 807 int prefixend = -1; 808 while (true) { 809 hashcode = StringHasher.hashChar(hashcode, ch); 810 ch = data[index] & 0xFFFF; 811 if (fastcheck == ch) 812 break; 813 if (ch < 0x80) { 814 if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0) 815 break; 816 if (ch == ':') { 817 if (prefixend != -1) 818 break; 819 prefixend = fCurrentOffset; 820 if (index + 1 == CharDataChunk.CHUNK_SIZE) { 828 CharDataChunk savedChunk = fCurrentChunk; 829 int savedOffset = fCurrentOffset; 830 ch = slowLoadNextChar(); 831 fCurrentChunk = savedChunk; 832 fCurrentOffset = savedOffset; 833 fMostRecentData = savedChunk.toCharArray(); 834 } else 835 ch = data[index + 1] & 0xFFFF; 836 boolean lpok = true; 837 if (ch < 0x80) { 838 if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0 || ch == ':') 839 lpok = false; 840 } else { 841 if (!fCalledCharPropInit) { 842 XMLCharacterProperties.initCharFlags(); 843 fCalledCharPropInit = true; 844 } 845 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) 846 lpok = false; 847 } 848 ch = ':'; 849 if (!lpok) { 850 prefixend = -1; 851 break; 852 } 853 } 854 } else { 855 if (!fCalledCharPropInit) { 856 XMLCharacterProperties.initCharFlags(); 857 fCalledCharPropInit = true; 858 } 859 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) 860 break; 861 } 862 if (++index == CharDataChunk.CHUNK_SIZE) { 863 slowLoadNextChar(); 864 index = 0; 865 data = fMostRecentData; 866 } 867 fCharacterCounter++; 868 fCurrentOffset++; 869 } 870 fCurrentIndex = index; 871 fMostRecentChar = ch; 872 hashcode = StringHasher.finishHash(hashcode); 873 int length = fCurrentOffset - offset; 874 qname.rawname = fCurrentChunk.addSymbol(offset, length, hashcode); 875 qname.prefix = prefixend == -1 ? -1 : addSymbol(offset, prefixend - offset); 876 qname.localpart = prefixend == -1 ? qname.rawname : addSymbol(prefixend + 1, fCurrentOffset - (prefixend + 1)); 877 qname.uri = StringPool.EMPTY_STRING; 878 879 } 881 887 public int scanContent(QName element) throws Exception { 888 if (fCallClearPreviousChunk && fCurrentChunk.clearPreviousChunk()) 889 fCallClearPreviousChunk = false; 890 int charDataOffset = fCurrentOffset; 891 int ch = fMostRecentChar; 892 if (ch < 0x80) { 893 switch (XMLCharacterProperties.fgAsciiWSCharData[ch]) { 894 case 0: 895 fCharacterCounter++; 896 ch = loadNextChar(); 897 break; 898 case 1: fCharacterCounter++; 900 ch = loadNextChar(); 901 if (!fInCDSect) { 902 return recognizeMarkup(ch); 903 } 904 break; 905 case 2: fCharacterCounter++; 907 ch = loadNextChar(); 908 if (!fInCDSect) { 909 return recognizeReference(ch); 910 } 911 break; 912 case 3: fCharacterCounter++; 914 ch = loadNextChar(); 915 if (ch != ']') 916 break; 917 if (fCurrentIndex + 1 == CharDataChunk.CHUNK_SIZE) { 918 CharDataChunk dataChunk = fCurrentChunk; 919 int index = fCurrentIndex; 920 int offset = fCurrentOffset; 921 if (loadNextChar() != '>') { 922 fCurrentChunk = dataChunk; 923 fCurrentIndex = index; 924 fCurrentOffset = offset; 925 fMostRecentData = dataChunk.toCharArray(); 926 fMostRecentChar = ']'; 927 break; 928 } 929 } else { 930 if (fMostRecentData[fCurrentIndex + 1] != '>') 931 break; 932 fCurrentIndex++; 933 fCurrentOffset++; 934 } 935 loadNextChar(); 936 fCharacterCounter += 2; 937 return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT; 938 case 4: if (ch == 0 && atEOF(fCurrentOffset + 1)) { 940 changeReaders(); 941 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; } 943 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 944 case 5: 945 do { 946 if (ch == 0x0A) { 947 fLinefeedCounter++; 948 fCharacterCounter = 1; 949 } else 950 fCharacterCounter++; 951 ch = loadNextChar(); 952 } while (ch == 0x20 || ch == 0x09 || ch == 0x0A); 953 if (ch < 0x80) { 954 switch (XMLCharacterProperties.fgAsciiCharData[ch]) { 955 case 0: 956 fCharacterCounter++; 957 ch = loadNextChar(); 958 break; 959 case 1: if (!fInCDSect) { 961 callCharDataHandler(charDataOffset, fCurrentOffset, true); 962 fCharacterCounter++; 963 ch = loadNextChar(); 964 return recognizeMarkup(ch); 965 } 966 fCharacterCounter++; 967 ch = loadNextChar(); 968 break; 969 case 2: if (!fInCDSect) { 971 callCharDataHandler(charDataOffset, fCurrentOffset, true); 972 fCharacterCounter++; 973 ch = loadNextChar(); 974 return recognizeReference(ch); 975 } 976 fCharacterCounter++; 977 ch = loadNextChar(); 978 break; 979 case 3: int endOffset = fCurrentOffset; 981 ch = loadNextChar(); 982 if (ch != ']') { 983 fCharacterCounter++; 984 break; 985 } 986 if (fCurrentIndex + 1 == CharDataChunk.CHUNK_SIZE) { 987 CharDataChunk dataChunk = fCurrentChunk; 988 int index = fCurrentIndex; 989 int offset = fCurrentOffset; 990 if (loadNextChar() != '>') { 991 fCurrentChunk = dataChunk; 992 fCurrentIndex = index; 993 fCurrentOffset = offset; 994 fMostRecentData = dataChunk.toCharArray(); 995 fMostRecentChar = ']'; 996 fCharacterCounter++; 997 break; 998 } 999 } else { 1000 if (fMostRecentData[fCurrentIndex + 1] != '>') { 1001 fCharacterCounter++; 1002 break; 1003 } 1004 fCurrentIndex++; 1005 fCurrentOffset++; 1006 } 1007 loadNextChar(); 1008 callCharDataHandler(charDataOffset, endOffset, true); 1009 fCharacterCounter += 3; 1010 return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT; 1011 case 4: callCharDataHandler(charDataOffset, fCurrentOffset, true); 1013 if (ch == 0 && atEOF(fCurrentOffset + 1)) { 1014 changeReaders(); 1015 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; } 1017 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 1018 } 1019 } else if (!skipMultiByteCharData(ch)) { 1020 callCharDataHandler(charDataOffset, fCurrentOffset, true); 1021 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 1022 } 1023 break; 1024 } 1025 } else if (!skipMultiByteCharData(ch)) { 1026 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 1027 } 1028 ch = skipAsciiCharData(); 1029 while (true) { 1030 if (ch < 0x80) { 1031 switch (XMLCharacterProperties.fgAsciiCharData[ch]) { 1032 case 0: 1033 fCharacterCounter++; 1034 ch = loadNextChar(); 1035 break; 1036 case 1: if (!fInCDSect) { 1038 callCharDataHandler(charDataOffset, fCurrentOffset, false); 1039 fCharacterCounter++; 1040 ch = loadNextChar(); 1041 return recognizeMarkup(ch); 1042 } 1043 fCharacterCounter++; 1044 ch = loadNextChar(); 1045 break; 1046 case 2: if (!fInCDSect) { 1048 callCharDataHandler(charDataOffset, fCurrentOffset, false); 1049 fCharacterCounter++; 1050 ch = loadNextChar(); 1051 return recognizeReference(ch); 1052 } 1053 fCharacterCounter++; 1054 ch = loadNextChar(); 1055 break; 1056 case 3: int endOffset = fCurrentOffset; 1058 ch = loadNextChar(); 1059 if (ch != ']') { 1060 fCharacterCounter++; 1061 break; 1062 } 1063 if (fCurrentIndex + 1 == CharDataChunk.CHUNK_SIZE) { 1064 CharDataChunk dataChunk = fCurrentChunk; 1065 int index = fCurrentIndex; 1066 int offset = fCurrentOffset; 1067 if (loadNextChar() != '>') { 1068 fCurrentChunk = dataChunk; 1069 fCurrentIndex = index; 1070 fCurrentOffset = offset; 1071 fMostRecentData = dataChunk.toCharArray(); 1072 fMostRecentChar = ']'; 1073 fCharacterCounter++; 1074 break; 1075 } 1076 } else { 1077 if (fMostRecentData[fCurrentIndex + 1] != '>') { 1078 fCharacterCounter++; 1079 break; 1080 } 1081 fCurrentIndex++; 1082 fCurrentOffset++; 1083 } 1084 loadNextChar(); 1085 callCharDataHandler(charDataOffset, endOffset, false); 1086 fCharacterCounter += 3; 1087 return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT; 1088 case 4: if (ch == 0x0A) { 1090 fLinefeedCounter++; 1091 fCharacterCounter = 1; 1092 ch = loadNextChar(); 1093 break; 1094 } 1095 callCharDataHandler(charDataOffset, fCurrentOffset, false); 1096 if (ch == 0 && atEOF(fCurrentOffset + 1)) { 1097 changeReaders(); 1098 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; } 1100 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 1101 } 1102 } else { 1103 if (!skipMultiByteCharData(ch)) { 1104 callCharDataHandler(charDataOffset, fCurrentOffset, false); 1105 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 1106 } 1107 ch = fMostRecentChar; 1108 } 1109 } 1110 } 1111 1112 private static final char[] cdata_string = { 'C','D','A','T','A','[' }; 1116 private StringPool fStringPool = null; 1117 private boolean fCalledCharPropInit = false; 1118 private boolean fCallClearPreviousChunk = true; 1119 private Vector fDeferredErrors = null; 1120 1121 private class DeferredError { 1125 int errorCode; 1126 Object [] args; 1127 int offset; 1128 DeferredError(int ec, Object [] a, int o) { 1129 errorCode = ec; 1130 args = a; 1131 offset = o; 1132 } 1133 } 1134 1135 1139 1143 private int recognizeMarkup(int ch) throws Exception { 1144 switch (ch) { 1145 case 0: 1146 return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; 1147 case '?': 1148 fCharacterCounter++; 1149 loadNextChar(); 1150 return XMLEntityHandler.CONTENT_RESULT_START_OF_PI; 1151 case '!': 1152 fCharacterCounter++; 1153 ch = loadNextChar(); 1154 if (ch == 0) { 1155 fCharacterCounter--; 1156 fCurrentOffset--; 1157 return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; 1158 } 1159 if (ch == '-') { 1160 fCharacterCounter++; 1161 ch = loadNextChar(); 1162 if (ch == 0) { 1163 fCharacterCounter -= 2; 1164 fCurrentOffset -= 2; 1165 return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; 1166 } 1167 if (ch == '-') { 1168 fCharacterCounter++; 1169 loadNextChar(); 1170 return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT; 1171 } 1172 break; 1173 } 1174 if (ch == '[') { 1175 for (int i = 0; i < 6; i++) { 1176 fCharacterCounter++; 1177 ch = loadNextChar(); 1178 if (ch == 0) { 1179 fCharacterCounter -= (2 + i); 1180 fCurrentOffset -= (2 + i); 1181 return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; 1182 } 1183 if (ch != cdata_string[i]) { 1184 return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED; 1185 } 1186 } 1187 fCharacterCounter++; 1188 loadNextChar(); 1189 return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT; 1190 } 1191 break; 1192 case '/': 1193 fCharacterCounter++; 1194 loadNextChar(); 1195 return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG; 1196 default: 1197 return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT; 1198 } 1199 return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED; 1200 } 1201 1202 1206 private int recognizeReference(int ch) throws Exception { 1207 if (ch == 0) { 1208 return XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT; 1209 } 1210 if (ch == '#') { 1216 fCharacterCounter++; 1217 loadNextChar(); 1218 return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF; 1219 } else { 1220 return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF; 1221 } 1222 } 1223 1224 1227 private boolean skipMultiByteCharData(int ch) throws Exception { 1228 if (ch < 0xD800) { 1229 loadNextChar(); 1230 return true; 1231 } 1232 if (ch > 0xFFFD) 1233 return false; 1234 if (ch >= 0xDC00 && ch < 0xE000) 1235 return false; 1236 if (ch >= 0xD800 && ch < 0xDC00) { 1237 CharDataChunk savedChunk = fCurrentChunk; 1238 int savedIndex = fCurrentIndex; 1239 int savedOffset = fCurrentOffset; 1240 ch = loadNextChar(); 1241 if (ch < 0xDC00 || ch >= 0xE000) { 1242 fCurrentChunk = savedChunk; 1243 fCurrentIndex = savedIndex; 1244 fCurrentOffset = savedOffset; 1245 fMostRecentData = savedChunk.toCharArray(); 1246 fMostRecentChar = fMostRecentData[savedIndex] & 0xFFFF; 1247 return false; 1248 } 1249 } 1250 loadNextChar(); 1251 return true; 1252 } 1253 1254 1260 private int skipAsciiCharData() throws Exception { 1261 int index = fCurrentIndex; 1262 int offset = fCurrentOffset - index; 1263 while (true) { 1264 char[] data = fMostRecentData; 1265 while (index < CharDataChunk.CHUNK_SIZE) { 1266 int ch = data[index] & 0xFFFF; 1267 if (ch >= 0x80) { 1268 fCurrentOffset = offset + index; 1269 fCurrentIndex = index; 1270 fMostRecentChar = ch; 1271 return ch; 1272 } 1273 if (XMLCharacterProperties.fgAsciiCharData[ch] == 0) { 1274 fCharacterCounter++; 1275 } else if (ch == 0x0A) { 1276 fLinefeedCounter++; 1277 fCharacterCounter = 1; 1278 } else { 1279 fCurrentOffset = offset + index; 1280 fCurrentIndex = index; 1281 fMostRecentChar = ch; 1282 return ch; 1283 } 1284 index++; 1285 } 1286 offset += index; 1287 slowLoadNextChar(); 1288 index = 0; 1289 } 1290 } 1291 1292 1300 private void callCharDataHandler(int offset, int endOffset, boolean isWhitespace) throws Exception { 1301 1302 int length = endOffset - offset; 1303 if (!fSendCharDataAsCharArray) { 1304 int stringIndex = addString(offset, length); 1305 if (isWhitespace) 1306 fCharDataHandler.processWhitespace(stringIndex); 1307 else 1308 fCharDataHandler.processCharacters(stringIndex); 1309 return; 1310 } 1311 1312 CharDataChunk dataChunk = fCurrentChunk.chunkFor(offset); 1313 int index = offset & CharDataChunk.CHUNK_MASK; 1314 if (index + length <= CharDataChunk.CHUNK_SIZE) { 1315 if (length != 0) { 1319 if (isWhitespace) 1320 fCharDataHandler.processWhitespace(dataChunk.toCharArray(), index, length); 1321 else 1322 fCharDataHandler.processCharacters(dataChunk.toCharArray(), index, length); 1323 } 1324 return; 1325 } 1326 1327 int count = length; 1331 int nbytes = CharDataChunk.CHUNK_SIZE - index; 1332 if (isWhitespace) 1333 fCharDataHandler.processWhitespace(dataChunk.toCharArray(), index, nbytes); 1334 else 1335 fCharDataHandler.processCharacters(dataChunk.toCharArray(), index, nbytes); 1336 count -= nbytes; 1337 1338 do { 1342 dataChunk = dataChunk.nextChunk(); 1343 if (dataChunk == null) { 1344 throw new RuntimeException (new ImplementationMessages().createMessage(null, ImplementationMessages.INT_DCN, 0, null)); 1345 } 1346 nbytes = count <= CharDataChunk.CHUNK_SIZE ? count : CharDataChunk.CHUNK_SIZE; 1347 if (isWhitespace) 1348 fCharDataHandler.processWhitespace(dataChunk.toCharArray(), 0, nbytes); 1349 else 1350 fCharDataHandler.processCharacters(dataChunk.toCharArray(), 0, nbytes); 1351 count -= nbytes; 1352 } while (count > 0); 1353 } 1354 1355 1361 private int slowLoadNextChar() throws Exception { 1362 fCallClearPreviousChunk = true; 1363 if (fCurrentChunk.nextChunk() != null) { 1364 fCurrentChunk = fCurrentChunk.nextChunk(); 1365 fCurrentIndex = 0; 1366 fMostRecentData = fCurrentChunk.toCharArray(); 1367 return (fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF); 1368 } else { 1369 fCurrentChunk = CharDataChunk.createChunk(fStringPool, fCurrentChunk); 1370 return fillCurrentChunk(); 1371 } 1372 } 1373 1374 1380 private int loadNextChar() throws Exception { 1381 fCurrentOffset++; 1382 if (++fCurrentIndex == CharDataChunk.CHUNK_SIZE) 1383 return slowLoadNextChar(); 1384 return (fMostRecentChar = fMostRecentData[fCurrentIndex] & 0xFFFF); 1385 } 1386 1387 1393 private boolean atEOF(int offset) { 1394 return (offset > fLength); 1395 } 1396 1397} 1398 | Popular Tags |