| 1 57 58 package org.enhydra.apache.xerces.readers; 59 60 import java.io.InputStream ; 61 62 import org.enhydra.apache.xerces.framework.XMLErrorReporter; 63 import org.enhydra.apache.xerces.utils.QName; 64 import org.enhydra.apache.xerces.utils.StringPool; 65 import org.enhydra.apache.xerces.utils.SymbolCache; 66 import org.enhydra.apache.xerces.utils.UTF8DataChunk; 67 import org.enhydra.apache.xerces.utils.XMLCharacterProperties; 68 69 91 final class UTF8Reader extends XMLEntityReader { 92 private final static boolean USE_OUT_OF_LINE_LOAD_NEXT_BYTE = false; 96 private final static boolean USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE = true; 97 public UTF8Reader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, InputStream dataStream, StringPool stringPool) throws Exception { 101 super(entityHandler, errorReporter, sendCharDataAsCharArray); 102 fInputStream = dataStream; 103 fStringPool = stringPool; 104 fCharArrayRange = fStringPool.createCharArrayRange(); 105 fCurrentChunk = UTF8DataChunk.createChunk(fStringPool, null); 106 fillCurrentChunk(); 107 } 108 111 public int addString(int offset, int length) { 112 if (length == 0) 113 return 0; 114 return fCurrentChunk.addString(offset, length); 115 } 116 119 public int addSymbol(int offset, int length) { 120 if (length == 0) 121 return 0; 122 return fCurrentChunk.addSymbol(offset, length, 0); 123 } 124 127 private int addSymbol(int offset, int length, int hashcode) { 128 if (length == 0) 129 return 0; 130 return fCurrentChunk.addSymbol(offset, length, hashcode); 131 } 132 135 public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) { 136 fCurrentChunk.append(charBuffer, offset, length); 137 } 138 private int slowLoadNextByte() throws Exception { 142 fCallClearPreviousChunk = true; 143 if (fCurrentChunk.nextChunk() != null) { 144 fCurrentChunk = fCurrentChunk.nextChunk(); 145 fCurrentIndex = 0; 146 fMostRecentData = fCurrentChunk.toByteArray(); 147 return(fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF); 148 } else { 149 fCurrentChunk = UTF8DataChunk.createChunk(fStringPool, fCurrentChunk); 150 return fillCurrentChunk(); 151 } 152 } 153 private int loadNextByte() throws Exception { 154 fCurrentOffset++; 155 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 156 fCurrentIndex++; 157 try { 158 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 159 return fMostRecentByte; 160 } catch (ArrayIndexOutOfBoundsException ex) { 161 return slowLoadNextByte(); 162 } 163 } else { 164 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 165 return slowLoadNextByte(); 166 else 167 return(fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF); 168 } 169 } 170 private boolean atEOF(int offset) { 174 return(offset > fLength); 175 } 176 public XMLEntityHandler.EntityReader changeReaders() throws Exception { 180 XMLEntityHandler.EntityReader nextReader = super.changeReaders(); 181 fCurrentChunk.releaseChunk(); 182 fCurrentChunk = null; 183 fMostRecentData = null; 184 fMostRecentByte = 0; 185 return nextReader; 186 } 187 public boolean lookingAtChar(char ch, boolean skipPastChar) throws Exception { 191 int b0 = fMostRecentByte; 192 if (b0 != ch) { 193 if (b0 == 0) { 194 if (atEOF(fCurrentOffset + 1)) { 195 return changeReaders().lookingAtChar(ch, skipPastChar); 196 } 197 } 198 if (ch == 0x0A && b0 == 0x0D) { 199 if (skipPastChar) { 200 fCarriageReturnCounter++; 201 fCharacterCounter = 1; 202 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 203 b0 = loadNextByte(); 204 } else { 205 fCurrentOffset++; 206 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 207 fCurrentIndex++; 208 try { 209 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 210 b0 = fMostRecentByte; 211 } catch (ArrayIndexOutOfBoundsException ex) { 212 b0 = slowLoadNextByte(); 213 } 214 } else { 215 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 216 b0 = slowLoadNextByte(); 217 else 218 b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF); 219 } 220 } 221 if (b0 == 0x0A) { 222 fLinefeedCounter++; 223 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 224 loadNextByte(); 225 } else { 226 fCurrentOffset++; 227 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 228 fCurrentIndex++; 229 try { 230 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 231 } catch (ArrayIndexOutOfBoundsException ex) { 232 slowLoadNextByte(); 233 } 234 } else { 235 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 236 slowLoadNextByte(); 237 else 238 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 239 } 240 } 241 } 242 } 243 return true; 244 } 245 return false; 246 } 247 if (ch == 0x0D) 248 return false; 249 if (skipPastChar) { 250 fCharacterCounter++; 251 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 252 loadNextByte(); 253 } else { 254 fCurrentOffset++; 255 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 256 fCurrentIndex++; 257 try { 258 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 259 } catch (ArrayIndexOutOfBoundsException ex) { 260 slowLoadNextByte(); 261 } 262 } else { 263 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 264 slowLoadNextByte(); 265 else 266 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 267 } 268 } 269 } 270 return true; 271 } 272 public boolean lookingAtValidChar(boolean skipPastChar) throws Exception { 276 int b0 = fMostRecentByte; 277 if (b0 < 0x80) { if (b0 >= 0x20 || b0 == 0x09) { 279 if (skipPastChar) { 280 fCharacterCounter++; 281 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 282 loadNextByte(); 283 } else { 284 fCurrentOffset++; 285 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 286 fCurrentIndex++; 287 try { 288 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 289 } catch (ArrayIndexOutOfBoundsException ex) { 290 slowLoadNextByte(); 291 } 292 } else { 293 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 294 slowLoadNextByte(); 295 else 296 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 297 } 298 } 299 } 300 return true; 301 } 302 if (b0 == 0x0A) { 303 if (skipPastChar) { 304 fLinefeedCounter++; 305 fCharacterCounter = 1; 306 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 307 loadNextByte(); 308 } else { 309 fCurrentOffset++; 310 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 311 fCurrentIndex++; 312 try { 313 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 314 } catch (ArrayIndexOutOfBoundsException ex) { 315 slowLoadNextByte(); 316 } 317 } else { 318 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 319 slowLoadNextByte(); 320 else 321 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 322 } 323 } 324 } 325 return true; 326 } 327 if (b0 == 0x0D) { 328 if (skipPastChar) { 329 fCarriageReturnCounter++; 330 fCharacterCounter = 1; 331 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 332 b0 = loadNextByte(); 333 } else { 334 fCurrentOffset++; 335 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 336 fCurrentIndex++; 337 try { 338 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 339 b0 = fMostRecentByte; 340 } catch (ArrayIndexOutOfBoundsException ex) { 341 b0 = slowLoadNextByte(); 342 } 343 } else { 344 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 345 b0 = slowLoadNextByte(); 346 else 347 b0 = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF); 348 } 349 } 350 if (b0 == 0x0A) { 351 fLinefeedCounter++; 352 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 353 loadNextByte(); 354 } else { 355 fCurrentOffset++; 356 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 357 fCurrentIndex++; 358 try { 359 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 360 } catch (ArrayIndexOutOfBoundsException ex) { 361 slowLoadNextByte(); 362 } 363 } else { 364 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 365 slowLoadNextByte(); 366 else 367 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 368 } 369 } 370 } 371 } 372 return true; 373 } 374 if (b0 == 0) { 375 if (atEOF(fCurrentOffset + 1)) { 376 return changeReaders().lookingAtValidChar(skipPastChar); 377 } 378 } 379 return false; 380 } 381 UTF8DataChunk saveChunk = fCurrentChunk; 385 int saveIndex = fCurrentIndex; 386 int saveOffset = fCurrentOffset; 387 int b1 = loadNextByte(); 388 if ((0xe0 & b0) == 0xc0) { if (skipPastChar) { 390 fCharacterCounter++; 391 loadNextByte(); 392 } else { 393 fCurrentChunk = saveChunk; 394 fCurrentIndex = saveIndex; 395 fCurrentOffset = saveOffset; 396 fMostRecentData = saveChunk.toByteArray(); 397 fMostRecentByte = b0; 398 } 399 return true; } 401 int b2 = loadNextByte(); 402 if ((0xf0 & b0) == 0xe0) { boolean result = false; 407 if (!((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE))) { if (skipPastChar) { 409 fCharacterCounter++; 410 loadNextByte(); 411 return true; 412 } 413 result = true; 414 } 415 fCurrentChunk = saveChunk; 416 fCurrentIndex = saveIndex; 417 fCurrentOffset = saveOffset; 418 fMostRecentData = saveChunk.toByteArray(); 419 fMostRecentByte = b0; 420 return result; 421 } 422 int b3 = loadNextByte(); boolean result = false; 426 427 if ( ((b0&0xf8) == 0xf0) && ((b1&0xc0)==0x80) && 430 ((b2&0xc0) == 0x80) && ((b3&0xc0)==0x80)){ 431 if (!(b0 > 0xF4 || (b0 == 0xF4 && b1 >= 0x90))) { 433 if (skipPastChar) { 434 fCharacterCounter++; 435 loadNextByte(); 436 return true; 437 } 438 result = true; 439 } 440 fCurrentChunk = saveChunk; 441 fCurrentIndex = saveIndex; 442 fCurrentOffset = saveOffset; 443 fMostRecentData = saveChunk.toByteArray(); 444 fMostRecentByte = b0; 445 return result; 446 } else{ 447 fCurrentChunk = saveChunk; 448 fCurrentIndex = saveIndex; 449 fCurrentOffset = saveOffset; 450 fMostRecentData = saveChunk.toByteArray(); 451 fMostRecentByte = b0; 452 return result; 453 } 454 } 455 public boolean lookingAtSpace(boolean skipPastChar) throws Exception { 459 int ch = fMostRecentByte; 460 if (ch > 0x20) 461 return false; 462 if (ch == 0x20 || ch == 0x09) { 463 if (!skipPastChar) 464 return true; 465 fCharacterCounter++; 466 } else if (ch == 0x0A) { 467 if (!skipPastChar) 468 return true; 469 fLinefeedCounter++; 470 fCharacterCounter = 1; 471 } else if (ch == 0x0D) { 472 if (!skipPastChar) 473 return true; 474 fCarriageReturnCounter++; 475 fCharacterCounter = 1; 476 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 477 ch = loadNextByte(); 478 } else { 479 fCurrentOffset++; 480 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 481 fCurrentIndex++; 482 try { 483 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 484 ch = fMostRecentByte; 485 } catch (ArrayIndexOutOfBoundsException ex) { 486 ch = slowLoadNextByte(); 487 } 488 } else { 489 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 490 ch = slowLoadNextByte(); 491 else 492 ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF); 493 } 494 } 495 if (ch != 0x0A) 496 return true; 497 fLinefeedCounter++; 498 } else { 499 if (ch == 0) { if (atEOF(fCurrentOffset + 1)) { 501 return changeReaders().lookingAtSpace(skipPastChar); 502 } 503 } 504 return false; 505 } 506 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 507 loadNextByte(); 508 } else { 509 fCurrentOffset++; 510 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 511 fCurrentIndex++; 512 try { 513 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 514 } catch (ArrayIndexOutOfBoundsException ex) { 515 slowLoadNextByte(); 516 } 517 } else { 518 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 519 slowLoadNextByte(); 520 else 521 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 522 } 523 } 524 return true; 525 } 526 public void skipToChar(char ch) throws Exception { 530 int b0 = fMostRecentByte; 534 while (true) { 535 if (b0 == ch) return; 537 if (b0 == 0) { 538 if (atEOF(fCurrentOffset + 1)) { 539 changeReaders().skipToChar(ch); 540 return; 541 } 542 fCharacterCounter++; 543 } else if (b0 == 0x0A) { 544 fLinefeedCounter++; 545 fCharacterCounter = 1; 546 } else if (b0 == 0x0D) { 547 fCarriageReturnCounter++; 548 fCharacterCounter = 1; 549 b0 = loadNextByte(); 550 if (b0 != 0x0A) 551 continue; 552 fLinefeedCounter++; 553 } else if (b0 < 0x80) { fCharacterCounter++; 555 } else { 556 fCharacterCounter++; 557 if ((0xe0 & b0) == 0xc0) { loadNextByte(); 559 } else if ((0xf0 & b0) == 0xe0) { loadNextByte(); 561 loadNextByte(); 562 } else { loadNextByte(); 564 loadNextByte(); 565 loadNextByte(); 566 } 567 } 568 b0 = loadNextByte(); 569 } 570 } 571 public void skipPastSpaces() throws Exception { 575 int ch = fMostRecentByte; 576 while (true) { 577 if (ch == 0x20 || ch == 0x09) { 578 fCharacterCounter++; 579 } else if (ch == 0x0A) { 580 fLinefeedCounter++; 581 fCharacterCounter = 1; 582 } else if (ch == 0x0D) { 583 fCarriageReturnCounter++; 584 fCharacterCounter = 1; 585 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 586 ch = loadNextByte(); 587 } else { 588 fCurrentOffset++; 589 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 590 fCurrentIndex++; 591 try { 592 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 593 ch = fMostRecentByte; 594 } catch (ArrayIndexOutOfBoundsException ex) { 595 ch = slowLoadNextByte(); 596 } 597 } else { 598 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 599 ch = slowLoadNextByte(); 600 else 601 ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF); 602 } 603 } 604 if (ch != 0x0A) 605 continue; 606 fLinefeedCounter++; 607 } else { 608 if (ch == 0 && atEOF(fCurrentOffset + 1)) 609 changeReaders().skipPastSpaces(); 610 return; 611 } 612 if (USE_OUT_OF_LINE_LOAD_NEXT_BYTE) { 613 ch = loadNextByte(); 614 } else { 615 fCurrentOffset++; 616 if (USE_TRY_CATCH_FOR_LOAD_NEXT_BYTE) { 617 fCurrentIndex++; 618 try { 619 fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF; 620 ch = fMostRecentByte; 621 } catch (ArrayIndexOutOfBoundsException ex) { 622 ch = slowLoadNextByte(); 623 } 624 } else { 625 if (++fCurrentIndex == UTF8DataChunk.CHUNK_SIZE) 626 ch = slowLoadNextByte(); 627 else 628 ch = (fMostRecentByte = fMostRecentData[fCurrentIndex] & 0xFF); 629 } 630 } 631 } 632 } 633 protected boolean skippedMultiByteCharWithFlag(int b0, int flag) throws Exception { 637 UTF8DataChunk saveChunk = fCurrentChunk; 638 int saveOffset = fCurrentOffset; 639 int saveIndex = fCurrentIndex; 640 if (!fCalledCharPropInit) { 641 XMLCharacterProperties.initCharFlags(); 642 fCalledCharPropInit = true; 643 } 644 int b1 = loadNextByte(); 645 if ((0xe0 & b0) == 0xc0) { if ((XMLCharacterProperties.fgCharFlags[((0x1f & b0)<<6) + (0x3f & b1)] & flag) == 0) { fCurrentChunk = saveChunk; 648 fCurrentIndex = saveIndex; 649 fCurrentOffset = saveOffset; 650 fMostRecentData = saveChunk.toByteArray(); 651 fMostRecentByte = b0; 652 return false; 653 } 654 return true; 655 } 656 int b2 = loadNextByte(); 657 if ((0xf0 & b0) == 0xe0) { if ((b0 == 0xED && b1 >= 0xA0) || (b0 == 0xEF && b1 == 0xBF && b2 >= 0xBE)) { 660 fCurrentChunk = saveChunk; 661 fCurrentIndex = saveIndex; 662 fCurrentOffset = saveOffset; 663 fMostRecentData = saveChunk.toByteArray(); 664 fMostRecentByte = b0; 665 return false; 666 } 667 if ((XMLCharacterProperties.fgCharFlags[((0x0f & b0)<<12) + ((0x3f & b1)<<6) + (0x3f & b2)] & flag) == 0) { fCurrentChunk = saveChunk; 669 fCurrentIndex = saveIndex; 670 fCurrentOffset = saveOffset; 671 fMostRecentData = saveChunk.toByteArray(); 672 fMostRecentByte = b0; 673 return false; 674 } 675 return true; 676 } else { fCurrentChunk = saveChunk; 678 fCurrentIndex = saveIndex; 679 fCurrentOffset = saveOffset; 680 fMostRecentData = saveChunk.toByteArray(); 681 fMostRecentByte = b0; 682 return false; 683 } 684 } 685 public void skipPastName(char fastcheck) throws Exception { 686 int b0 = fMostRecentByte; 687 if (b0 < 0x80) { 688 if (XMLCharacterProperties.fgAsciiInitialNameChar[b0] == 0) 689 return; 690 } else { 691 if (!fCalledCharPropInit) { 692 XMLCharacterProperties.initCharFlags(); 693 fCalledCharPropInit = true; 694 } 695 if (!skippedMultiByteCharWithFlag(b0, XMLCharacterProperties.E_InitialNameCharFlag)) 696 return; 697 } 698 while (true) { 699 fCharacterCounter++; 700 b0 = loadNextByte(); 701 if (fastcheck == b0) 702 return; 703 if (b0 < 0x80) { 704 if (XMLCharacterProperties.fgAsciiNameChar[b0] == 0) 705 return; 706 &nbs
|