1 57 58 package org.enhydra.apache.xerces.readers; 59 60 import java.io.IOException ; 61 62 import org.enhydra.apache.xerces.framework.XMLErrorReporter; 63 import org.enhydra.apache.xerces.utils.ChunkyByteArray; 64 import org.enhydra.apache.xerces.utils.ChunkyCharArray; 65 import org.enhydra.apache.xerces.utils.QName; 66 import org.enhydra.apache.xerces.utils.StringHasher; 67 import org.enhydra.apache.xerces.utils.StringPool; 68 import org.enhydra.apache.xerces.utils.XMLCharacterProperties; 69 70 81 final class UCSReader extends XMLEntityReader implements StringPool.StringProducer { 82 83 87 89 90 private static final boolean DEBUG_UTF16_BIG = false; 91 92 static final int 96 E_UCS4B = 0, E_UCS4L = 1, E_UCS2B = 2, E_UCS2L = 3, E_UCS2B_NOBOM = 4, E_UCS2L_NOBOM = 5; private ChunkyByteArray fData = null; 106 private int fEncoding = -1; 107 private StringPool fStringPool = null; 108 private int fBytesPerChar = -1; 109 private boolean fBigEndian = true; 110 private ChunkyCharArray fStringCharArray = null; 111 private boolean fCalledCharPropInit = false; 112 UCSReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, ChunkyByteArray data, int encoding, StringPool stringPool) throws Exception { 116 super(entityHandler, errorReporter, sendCharDataAsCharArray); 117 fCurrentOffset = (encoding == E_UCS2B || encoding == E_UCS2L) ? 2 : 0; 118 fData = data; 119 fEncoding = encoding; 120 fStringPool = stringPool; 121 fBytesPerChar = (fEncoding == E_UCS4B || fEncoding == E_UCS4L) ? 4 : 2; 122 fBigEndian = fEncoding == E_UCS4B || fEncoding == E_UCS2B || fEncoding == E_UCS2B_NOBOM; 123 } 124 private int getChar(int offset) throws IOException { 128 int b0 = fData.byteAt(offset++) & 0xff; 129 if (b0 == 0xff && fData.atEOF(offset)) 130 return -1; 131 int b1 = fData.byteAt(offset++) & 0xff; 132 if (fBytesPerChar == 4) { 133 int b2 = fData.byteAt(offset++) & 0xff; 134 int b3 = fData.byteAt(offset++) & 0xff; 135 if (fBigEndian) 136 return (b0<<24)+(b1<<16)+(b2<<8)+b3; 137 else 138 return (b3<<24)+(b2<<16)+(b1<<8)+b0; 139 } else { 140 if (fBigEndian) 141 return (b0<<8)+b1; 142 else 143 return (b1<<8)+b0; 144 } 145 } 146 149 public int addString(int offset, int length) { 150 if (length == 0) 151 return 0; 152 return fStringPool.addString(this, offset, length); 153 } 154 157 public int addSymbol(int offset, int length) { 158 if (length == 0) 159 return 0; 160 return fStringPool.addSymbol(this, offset, length, getHashcode(offset, length)); 161 } 162 public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) { 166 int endOffset = offset + length; 167 while (offset < endOffset) { 168 int ch; 169 try { 170 ch = getChar(offset); 171 } 172 catch (IOException ex) { 173 ch = 0; } 175 charBuffer.append((char)ch); 176 offset += fBytesPerChar; 177 } 178 } 179 public void releaseString(int offset, int length) { 183 } 185 public String toString(int offset, int length) { 189 if (fStringCharArray == null) 193 fStringCharArray = new ChunkyCharArray(fStringPool); 194 int newOffset = fStringCharArray.length(); 195 append(fStringCharArray, offset, length); 196 int newLength = fStringCharArray.length() - newOffset; 197 int stringIndex = fStringCharArray.addString(newOffset, newLength); 198 return fStringPool.toString(stringIndex); 199 } 200 private int getHashcode(int offset, int length) { 204 int endOffset = offset + length; 205 int hashcode = 0; 206 while (offset < endOffset) { 207 int ch; 208 try { 209 ch = getChar(offset); 210 } 211 catch (IOException ex) { 212 ch = 0; } 214 hashcode = StringHasher.hashChar(hashcode, ch); 215 offset += fBytesPerChar; 216 } 217 return StringHasher.finishHash(hashcode); 218 } 219 public boolean equalsString(int offset, int length, char[] strChars, int strOffset, int strLength) { 221 int endOffset = offset + length; 222 int slen = strLength; 223 while (offset < endOffset) { 224 if (slen-- == 0) 225 return false; 226 int ch; 227 try { 228 ch = getChar(offset); 229 } 230 catch (IOException ex) { 231 ch = 0; } 233 if (ch != strChars[strOffset++]) 234 return false; 235 offset += fBytesPerChar; 236 } 237 return slen == 0; 238 } 239 private static char[] fCharacters = new char[256]; 243 private int fCharDataLength = 0; 244 private void appendCharData(int ch) { 245 if (fCharacters.length == fCharDataLength) { 246 char[] newchars = new char[fCharacters.length * 2]; 247 System.arraycopy(fCharacters, 0, newchars, 0, fCharacters.length); 248 fCharacters = newchars; 249 } 250 fCharacters[fCharDataLength++] = (char)ch; 251 } 252 public void callCharDataHandler(int offset, int length, boolean isWhitespace) throws Exception { 253 int endOffset = offset + length; 254 boolean skiplf = false; 255 while (offset < endOffset) { 256 int ch = getChar(offset); 257 if (skiplf) { 259 skiplf = false; 260 if (ch == 0x0A) { 261 offset += fBytesPerChar; 262 continue; 263 } 264 } 265 if (ch == 0x0D) { 266 skiplf = true; 267 ch = 0x0A; 268 } 269 appendCharData(ch); 270 offset += fBytesPerChar; 271 } 272 if (fSendCharDataAsCharArray) { 273 if (isWhitespace) 274 fCharDataHandler.processWhitespace(fCharacters, 0, fCharDataLength); 275 else 276 fCharDataHandler.processCharacters(fCharacters, 0, fCharDataLength); 277 } else { 278 int stringIndex = fStringPool.addString(new String (fCharacters, 0, fCharDataLength)); 279 if (isWhitespace) 280 fCharDataHandler.processWhitespace(stringIndex); 281 else 282 fCharDataHandler.processCharacters(stringIndex); 283 } 284 fCharDataLength = 0; 285 } 286 public boolean lookingAtChar(char ch, boolean skipPastChar) throws Exception { 290 int ch2 = getChar(fCurrentOffset); 291 if (ch2 == ch) { 292 if (skipPastChar) { 293 fCharacterCounter++; 294 fCurrentOffset += fBytesPerChar; 295 } 296 return true; 297 } 298 return false; 299 } 300 public boolean lookingAtValidChar(boolean skipPastChar) throws Exception { 304 int ch = getChar(fCurrentOffset); 305 if (ch < 0x20) { 306 if (ch == 0x09) { 307 if (!skipPastChar) 308 return true; 309 fCharacterCounter++; 310 } else if (ch == 0x0A) { 311 if (!skipPastChar) 312 return true; 313 fLinefeedCounter++; 314 fCharacterCounter = 1; 315 } else if (ch == 0x0D) { 316 if (!skipPastChar) 317 return true; 318 fCarriageReturnCounter++; 319 fCharacterCounter = 1; 320 } else { 321 if (ch == -1) { 322 return changeReaders().lookingAtValidChar(skipPastChar); 323 } 324 return false; 325 } 326 fCurrentOffset += fBytesPerChar; 327 return true; 328 } 329 if (ch <= 0xD7FF) { 330 if (skipPastChar) { 331 fCharacterCounter++; 332 fCurrentOffset += fBytesPerChar; 333 } 334 return true; 335 } 336 if (ch <= 0xDFFF) { 337 if (skipPastChar) { 339 fCharacterCounter++; 340 fCurrentOffset += fBytesPerChar; 341 } 342 return true; 343 } 344 if (ch <= 0xFFFD) { 345 if (skipPastChar) { 346 fCharacterCounter++; 347 fCurrentOffset += fBytesPerChar; 348 } 349 return true; 350 } 351 return false; 352 } 353 public boolean lookingAtSpace(boolean skipPastChar) throws Exception { 357 int ch = getChar(fCurrentOffset); 358 if (ch > 0x20) 359 return false; 360 if (ch == 0x20 || ch == 0x09) { 361 if (!skipPastChar) 362 return true; 363 fCharacterCounter++; 364 } else if (ch == 0x0A) { 365 if (!skipPastChar) 366 return true; 367 fLinefeedCounter++; 368 fCharacterCounter = 1; 369 } else if (ch == 0x0D) { 370 if (!skipPastChar) 371 return true; 372 fCarriageReturnCounter++; 373 fCharacterCounter = 1; 374 } else { 375 if (ch == -1) { return changeReaders().lookingAtSpace(skipPastChar); 377 } 378 return false; 379 } 380 fCurrentOffset += fBytesPerChar; 381 return true; 382 } 383 public void skipToChar(char chr) throws Exception { 387 while (true) { 388 int ch = getChar(fCurrentOffset); 389 if (ch == chr) 390 return; 391 if (ch == -1) { 392 changeReaders().skipToChar(chr); 393 return; 394 } 395 if (ch == 0x0A) { 396 fLinefeedCounter++; 397 fCharacterCounter = 1; 398 } else if (ch == 0x0D) { 399 fCarriageReturnCounter++; 400 fCharacterCounter = 1; 401 } else if (ch >= 0xD800 && ch < 0xDC00) { 402 fCharacterCounter++; 403 fCurrentOffset += fBytesPerChar; 404 ch = getChar(fCurrentOffset); 405 if (ch < 0xDC00 || ch >= 0xE000) 406 continue; 407 } else 408 fCharacterCounter++; 409 fCurrentOffset += fBytesPerChar; 410 } 411 } 412 public void skipPastSpaces() throws Exception { 416 while (true) { 417 int ch = getChar(fCurrentOffset); 418 if (ch > 0x20) 419 return; 420 if (ch == 0x20 || ch == 0x09) { 421 fCharacterCounter++; 422 } else if (ch == 0x0A) { 423 fLinefeedCounter++; 424 fCharacterCounter = 1; 425 } else if (ch == 0x0D) { 426 fCarriageReturnCounter++; 427 fCharacterCounter = 1; 428 } else { 429 if (ch == -1) 430 changeReaders().skipPastSpaces(); 431 return; 432 } 433 fCurrentOffset += fBytesPerChar; 434 } 435 } 436 public void skipPastName(char fastcheck) throws Exception { 440 int ch = getChar(fCurrentOffset); 441 if (!fCalledCharPropInit) { 442 XMLCharacterProperties.initCharFlags(); 443 fCalledCharPropInit = true; 444 } 445 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) 446 return; 447 while (true) { 448 fCurrentOffset += fBytesPerChar; 449 fCharacterCounter++; 450 ch = getChar(fCurrentOffset); 451 if (fastcheck == ch) 452 return; 453 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) 454 return; 455 } 456 } 457 public void skipPastNmtoken(char fastcheck) throws Exception { 461 int ch = getChar(fCurrentOffset); 462 if (!fCalledCharPropInit) { 463 XMLCharacterProperties.initCharFlags(); 464 fCalledCharPropInit = true; 465 } 466 while (true) { 467 if (fastcheck == ch) 468 return; 469 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) 470 return; 471 fCurrentOffset += fBytesPerChar; 472 fCharacterCounter++; 473 ch = getChar(fCurrentOffset); 474 } 475 } 476 public boolean skippedString(char[] s) throws Exception { 480 int offset = fCurrentOffset; 481 for (int i = 0; i < s.length; i++) { 482 if (getChar(offset) != s[i]) 483 return false; 484 offset += fBytesPerChar; 485 } 486 fCurrentOffset = offset; 487 fCharacterCounter += s.length; 488 return true; 489 } 490 public int scanInvalidChar() throws Exception { 494 int ch = getChar(fCurrentOffset); 495 if (ch == -1) { 496 return changeReaders().scanInvalidChar(); 497 } 498 fCurrentOffset += fBytesPerChar; 499 if (ch == 0x0A) { 500 fLinefeedCounter++; 501 fCharacterCounter = 1; 502 } else if (ch == 0x0D) { 503 fCarriageReturnCounter++; 504 fCharacterCounter = 1; 505 } else { 506 fCharacterCounter++; 507 if (ch >= 0xD800 && ch < 0xDC00) { 508 int ch2 = getChar(fCurrentOffset); 509 if (ch2 >= 0xDC00 && ch2 < 0xE000) { 510 ch = ((ch-0xD800)<<10)+(ch2-0xDC00)+0x10000; 511 fCurrentOffset += fBytesPerChar; 512 } 513 } 514 } 515 return ch; 516 } 517 public int scanCharRef(boolean hex) throws Exception { 521 int ch = getChar(fCurrentOffset); 522 if (ch == -1) { 523 return changeReaders().scanCharRef(hex); 524 } 525 int num = 0; 526 if (hex) { 527 if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0) 528 return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; 529 num = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10); 530 } else { 531 if (ch < '0' || ch > '9') 532 return XMLEntityHandler.CHARREF_RESULT_INVALID_CHAR; 533 num = ch - '0'; 534 } 535 fCharacterCounter++; 536 fCurrentOffset += fBytesPerChar; 537 boolean toobig = false; 538 while (true) { 539 ch = getChar(fCurrentOffset); 540 if (ch == -1) 541 break; 542 if (hex) { 543 if (ch > 'f' || XMLCharacterProperties.fgAsciiXDigitChar[ch] == 0) 544 break; 545 } else { 546 if (ch < '0' || ch > '9') 547 break; 548 } 549 fCharacterCounter++; 550 fCurrentOffset += fBytesPerChar; 551 if (hex) { 552 int dig = ch - (ch < 'A' ? '0' : (ch < 'a' ? 'A' : 'a') - 10); 553 num = (num << 4) + dig; 554 } else { 555 int dig = ch - '0'; 556 num = (num * 10) + dig; 557 } 558 if (num > 0x10FFFF) { 559 toobig = true; 560 num = 0; 561 } 562 } 563 if (ch != ';') 564 return XMLEntityHandler.CHARREF_RESULT_SEMICOLON_REQUIRED; 565 fCharacterCounter++; 566 fCurrentOffset += fBytesPerChar; 567 if (toobig) 568 return XMLEntityHandler.CHARREF_RESULT_OUT_OF_RANGE; 569 return num; 570 } 571 public int scanStringLiteral() throws Exception { 575 boolean single; 576 if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) { 577 return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED; 578 } 579 int offset = fCurrentOffset; 580 char qchar = single ? '\'' : '\"'; 581 while (!lookingAtChar(qchar, false)) { 582 if (!lookingAtValidChar(true)) { 583 return XMLEntityHandler.STRINGLIT_RESULT_INVALID_CHAR; 584 } 585 } 586 int stringIndex = addString(offset, fCurrentOffset - offset); 587 lookingAtChar(qchar, true); return stringIndex; 589 } 590 public int scanAttValue(char qchar, boolean asSymbol) throws Exception 595 { 596 int offset = fCurrentOffset; 597 while (true) { 598 if (lookingAtChar(qchar, false)) { 599 break; 600 } 601 if (lookingAtChar(' ', true)) { 602 continue; 603 } 604 if (lookingAtSpace(false)) { 605 return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX; 606 } 607 if (lookingAtChar('&', false)) { 608 return XMLEntityHandler.ATTVALUE_RESULT_COMPLEX; 609 } 610 if (lookingAtChar('<', false)) { 611 return XMLEntityHandler.ATTVALUE_RESULT_LESSTHAN; 612 } 613 if (!lookingAtValidChar(true)) { 614 return XMLEntityHandler.ATTVALUE_RESULT_INVALID_CHAR; 615 } 616 } 617 int result = asSymbol ? addSymbol(offset, fCurrentOffset - offset) : addString(offset, fCurrentOffset - offset); 618 lookingAtChar(qchar, true); 619 return result; 620 } 621 public int scanEntityValue(int qchar, boolean createString) throws Exception 626 { 627 int offset = fCurrentOffset; 628 while (true) { 629 if (qchar != -1 && lookingAtChar((char)qchar, false)) { 630 if (!createString) 631 return XMLEntityHandler.ENTITYVALUE_RESULT_FINISHED; 632 break; 633 } 634 if (lookingAtChar('&', false)) { 635 return XMLEntityHandler.ENTITYVALUE_RESULT_REFERENCE; 636 } 637 if (lookingAtChar('%', false)) { 638 return XMLEntityHandler.ENTITYVALUE_RESULT_PEREF; 639 } 640 if (!lookingAtValidChar(true)) { 641 return XMLEntityHandler.ENTITYVALUE_RESULT_INVALID_CHAR; 642 } 643 } 644 int result = addString(offset, fCurrentOffset - offset); 645 lookingAtChar((char)qchar, true); 646 return result; 647 } 648 public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws Exception { 652 int nameOffset = fCurrentOffset; 653 skipPastName(fastcheck); 654 int nameLength = fCurrentOffset - nameOffset; 655 if (nameLength == 0) 656 return false; 657 int nameIndex = addSymbol(nameOffset, nameLength); 658 return true; 660 } 661 662 public void scanQName(char fastcheck, QName qname) throws Exception { 663 664 int nameOffset = fCurrentOffset; 666 int ch; 667 int prefixend=-1; 668 int offset=fCurrentOffset; 669 ch = getChar(fCurrentOffset); 670 if (ch < 0x80) { 671 if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0) { 672 qname.clear(); 673 return; 674 } 675 if (ch == ':') { 676 qname.clear(); 677 return; 678 } 679 } 680 else { 681 if (!fCalledCharPropInit) { 682 XMLCharacterProperties.initCharFlags(); 683 fCalledCharPropInit = true; 684 } 685 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) 686 return; 687 } 688 689 while (true) { 690 fCurrentOffset += fBytesPerChar; 691 fCharacterCounter++; 692 ch = getChar(fCurrentOffset); 693 if (fastcheck == ch) { 694 break; 695 } 696 if (ch < 0x80) { 697 if (XMLCharacterProperties.fgAsciiNameChar[ch] == 0) { 698 break; 699 } 700 if (ch == ':') { 701 if (prefixend != -1) { 702 break; 703 } 704 prefixend = fCurrentOffset; 705 713 ch = getChar(fCurrentOffset+fBytesPerChar); 714 boolean lpok = true; 715 if (ch < 0x80) { 716 if (XMLCharacterProperties.fgAsciiInitialNameChar[ch] == 0 || ch == ':') { 717 lpok = false; 718 } 719 } 720 else { 721 if (!fCalledCharPropInit) { 722 XMLCharacterProperties.initCharFlags(); 723 fCalledCharPropInit = true; 724 } 725 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_InitialNameCharFlag) == 0) { 726 lpok = false; 727 } 728 } 729 if (!lpok) { 730 prefixend = -1; 731 break; 732 } 733 } 734 } 735 else { 736 if (!fCalledCharPropInit) { 737 XMLCharacterProperties.initCharFlags(); 738 fCalledCharPropInit = true; 739 } 740 if ((XMLCharacterProperties.fgCharFlags[ch] & XMLCharacterProperties.E_NameCharFlag) == 0) { 741 break; 742 } 743 } 744 } int length = fCurrentOffset - offset; 746 qname.prefix = prefixend == -1 ? -1 : addSymbol(offset, prefixend - offset); 747 qname.rawname = addSymbol(offset, length); 748 qname.localpart = prefixend == -1 ? qname.rawname : addSymbol(prefixend + fBytesPerChar, fCurrentOffset - (prefixend + fBytesPerChar)); 749 qname.uri = StringPool.EMPTY_STRING; 750 751 752 753 } 755 public int scanName(char fastcheck) throws Exception { 756 int nameOffset = fCurrentOffset; 757 skipPastName(fastcheck); 758 int nameLength = fCurrentOffset - nameOffset; 759 if (nameLength == 0) 760 return -1; 761 int nameIndex = addSymbol(nameOffset, nameLength); 762 return nameIndex; 763 } 764 private static final char[] cdata_string = { 'C','D','A','T','A','[' }; 768 private int recognizeMarkup() throws Exception { 769 int ch = getChar(fCurrentOffset); 770 switch (ch) { 771 case -1: 772 return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; 773 case '?': 774 fCharacterCounter++; 775 fCurrentOffset += fBytesPerChar; 776 return XMLEntityHandler.CONTENT_RESULT_START_OF_PI; 777 case '!': 778 fCharacterCounter++; 779 fCurrentOffset += fBytesPerChar; 780 ch = getChar(fCurrentOffset); 781 if (ch == -1) { 782 fCharacterCounter--; 783 fCurrentOffset -= fBytesPerChar;; 784 return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; 785 } 786 if (ch == '-') { 787 fCharacterCounter++; 788 fCurrentOffset += fBytesPerChar; 789 ch = getChar(fCurrentOffset); 790 if (ch == -1) { 791 fCharacterCounter -= 2; 792 fCurrentOffset -= 2; 793 return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; 794 } 795 if (ch == '-') { 796 fCharacterCounter++; 797 fCurrentOffset += fBytesPerChar; 798 return XMLEntityHandler.CONTENT_RESULT_START_OF_COMMENT; 799 } 800 break; 801 } 802 if (ch == '[') { 803 fCharacterCounter++; 804 fCurrentOffset += fBytesPerChar; 805 for (int i = 0; i < 6; i++) { 806 ch = getChar(fCurrentOffset); 807 if (ch == -1) { 808 fCharacterCounter -= (2 + i); 809 fCurrentOffset -= ((2 + i) * fBytesPerChar); 810 return XMLEntityHandler.CONTENT_RESULT_MARKUP_END_OF_INPUT; 811 } 812 if (ch != cdata_string[i]) { 813 return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED; 814 } 815 fCharacterCounter++; 816 fCurrentOffset += fBytesPerChar; 817 } 818 return XMLEntityHandler.CONTENT_RESULT_START_OF_CDSECT; 819 } 820 break; 821 case '/': 822 fCharacterCounter++; 823 fCurrentOffset += fBytesPerChar; 824 return XMLEntityHandler.CONTENT_RESULT_START_OF_ETAG; 825 default: 826 return XMLEntityHandler.CONTENT_RESULT_START_OF_ELEMENT; 827 } 828 return XMLEntityHandler.CONTENT_RESULT_MARKUP_NOT_RECOGNIZED; 829 } 830 private int recognizeReference() throws Exception { 831 int ch = getChar(fCurrentOffset); 832 if (ch == -1) { 833 return XMLEntityHandler.CONTENT_RESULT_REFERENCE_END_OF_INPUT; 834 } 835 if (ch == '#') { 841 fCharacterCounter++; 842 fCurrentOffset += fBytesPerChar; 843 return XMLEntityHandler.CONTENT_RESULT_START_OF_CHARREF; 844 } else { 845 return XMLEntityHandler.CONTENT_RESULT_START_OF_ENTITYREF; 846 } 847 } 848 public int scanContent(QName element) throws Exception { 849 int offset = fCurrentOffset; 850 int ch = getChar(fCurrentOffset); 851 fCurrentOffset += fBytesPerChar; 852 byte prop; 853 if (!fCalledCharPropInit) { 854 XMLCharacterProperties.initCharFlags(); 855 fCalledCharPropInit = true; 856 } 857 if (ch < 0x80) { 858 if (ch == -1) { 859 fCurrentOffset -= fBytesPerChar; 860 return changeReaders().scanContent(element); } 862 prop = XMLCharacterProperties.fgCharFlags[ch]; 863 if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0 && ch != 0x0A && ch != 0x0D) { 864 if (ch == '<') { 865 fCharacterCounter++; 866 if (!fInCDSect) { 867 return recognizeMarkup(); 868 } 869 } else if (ch == '&') { 870 fCharacterCounter++; 871 if (!fInCDSect) { 872 return recognizeReference(); 873 } 874 } else if (ch == ']') { 875 if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') { 876 fCharacterCounter += 3; 877 fCurrentOffset += (2 * fBytesPerChar); 878 return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT; 879 } 880 } else { 881 fCurrentOffset -= fBytesPerChar; 882 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 883 } 884 } else if (ch == 0x20 || ch == 0x09 || ch == 0x0A || ch == 0x0D) { 885 do { 886 if (ch == 0x0A) { 887 fLinefeedCounter++; 888 fCharacterCounter = 1; 889 } else if (ch == 0x0D) { 890 fCarriageReturnCounter++; 891 fCharacterCounter = 1; 892 } else { 893 fCharacterCounter++; 894 } 895 ch = getChar(fCurrentOffset); 896 fCurrentOffset += fBytesPerChar; 897 } while (ch == 0x20 || ch == 0x09 || ch == 0x0A || ch == 0x0D); 898 if (ch < 0x80) { 899 if (ch == -1) { 900 fCurrentOffset -= fBytesPerChar; 901 callCharDataHandler(offset, fCurrentOffset - offset, true); 902 return changeReaders().scanContent(element); } 904 prop = XMLCharacterProperties.fgCharFlags[ch]; 905 if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) { 906 if (ch == '<') { 907 if (!fInCDSect) { 908 callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true); 909 fCharacterCounter++; 910 return recognizeMarkup(); 911 } 912 fCharacterCounter++; 913 } else if (ch == '&') { 914 if (!fInCDSect) { 915 callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true); 916 fCharacterCounter++; 917 return recognizeReference(); 918 } 919 fCharacterCounter++; 920 } else if (ch == ']') { 921 if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') { 922 callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, true); 923 fCharacterCounter += 3; 924 fCurrentOffset += (2 * fBytesPerChar); 925 return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT; 926 } 927 } else { 928 fCurrentOffset -= fBytesPerChar; 929 callCharDataHandler(offset, fCurrentOffset - offset, true); 930 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 931 } 932 } 933 } else { 934 if (ch >= 0xD800 && ch <= 0xDFFF) { 935 fCurrentOffset += fBytesPerChar; 936 } else if (ch == 0xFFFE || ch == 0xFFFF) { 937 fCurrentOffset -= fBytesPerChar; 938 callCharDataHandler(offset, fCurrentOffset - offset, true); 939 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 940 } 941 } 942 } 943 } else { 944 if (ch >= 0xD800 && ch <= 0xDFFF) { 945 fCurrentOffset += fBytesPerChar; 946 } else if (ch == 0xFFFE || ch == 0xFFFF) { 947 fCurrentOffset -= fBytesPerChar; 948 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 949 } 950 } 951 fCharacterCounter++; 952 while (true) { 953 ch = getChar(fCurrentOffset); 954 fCurrentOffset += fBytesPerChar; 955 if (ch >= 0x80 || ch < 0) 956 break; 957 prop = XMLCharacterProperties.fgCharFlags[ch]; 958 if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) { 959 if (ch == 0x0A) { 960 fLinefeedCounter++; 961 fCharacterCounter = 1; 962 } else if (ch == 0x0D) { 963 fCarriageReturnCounter++; 964 fCharacterCounter = 1; 965 } else 966 break; 967 } else 968 fCharacterCounter++; 969 } 970 while (true) { if (ch < 0x80) { 972 if (ch == -1) { 973 fCurrentOffset -= fBytesPerChar; 974 callCharDataHandler(offset, fCurrentOffset - offset, false); 975 return changeReaders().scanContent(element); } 977 prop = XMLCharacterProperties.fgCharFlags[ch]; 978 if ((prop & XMLCharacterProperties.E_CharDataFlag) == 0) { 979 if (ch == '<') { 980 if (!fInCDSect) { 981 callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false); 982 fCharacterCounter++; 983 return recognizeMarkup(); 984 } 985 fCharacterCounter++; 986 } else if (ch == '&') { 987 if (!fInCDSect) { 988 callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false); 989 fCharacterCounter++; 990 return recognizeReference(); 991 } 992 fCharacterCounter++; 993 } else if (ch == 0x0A) { 994 fLinefeedCounter++; 995 fCharacterCounter = 1; 996 } else if (ch == 0x0D) { 997 fCarriageReturnCounter++; 998 fCharacterCounter = 1; 999 } else if (ch == ']') { 1000 if (getChar(fCurrentOffset) == ']' && getChar(fCurrentOffset + fBytesPerChar) == '>') { 1001 callCharDataHandler(offset, (fCurrentOffset - fBytesPerChar) - offset, false); 1002 fCharacterCounter += 3; 1003 fCurrentOffset += (2 * fBytesPerChar); 1004 return XMLEntityHandler.CONTENT_RESULT_END_OF_CDSECT; 1005 } 1006 fCharacterCounter++; 1007 } else { 1008 fCurrentOffset -= fBytesPerChar; 1009 callCharDataHandler(offset, fCurrentOffset - offset, false); 1010 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 1011 } 1012 } else { 1013 fCharacterCounter++; 1014 } 1015 } else { 1016 if (ch >= 0xD800 && ch <= 0xDFFF) { 1017 fCharacterCounter++; 1018 fCurrentOffset += fBytesPerChar; 1019 } else if (ch == 0xFFFE || ch == 0xFFFF) { 1020 fCurrentOffset -= fBytesPerChar; 1021 callCharDataHandler(offset, fCurrentOffset - offset, false); 1022 return XMLEntityHandler.CONTENT_RESULT_INVALID_CHAR; 1023 } 1024 fCharacterCounter++; 1025 } 1026 ch = getChar(fCurrentOffset); 1027 fCurrentOffset += fBytesPerChar; 1028 } 1029 } 1030} 1031 | Popular Tags |