1 57 58 package org.enhydra.apache.xerces.readers; 59 60 import java.io.InputStream ; 61 62 import org.enhydra.apache.xerces.framework.XMLErrorReporter; 63 import org.enhydra.apache.xerces.utils.CharDataChunk; 64 import org.enhydra.apache.xerces.utils.ImplementationMessages; 65 import org.enhydra.apache.xerces.utils.StringPool; 66 67 79 final class UTF8CharReader extends AbstractCharReader { 80 UTF8CharReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, InputStream dataStream, StringPool stringPool) throws Exception { 84 super(entityHandler, errorReporter, sendCharDataAsCharArray, stringPool); 85 fInputStream = dataStream; 86 fillCurrentChunk(); 87 } 88 private InputStream fInputStream = null; 92 private boolean fCheckOverflow = false; 99 private byte[] fOverflow = null; 100 private int fOverflowOffset = 0; 101 private int fOverflowEnd = 0; 102 private int fOutputOffset = 0; 103 private boolean fSkipLinefeed = false; 104 private int fPartialMultiByteIn = 0; 105 private byte[] fPartialMultiByteChar = new byte[3]; 106 private int fPartialSurrogatePair = 0; 107 private boolean fPartialMultiByteResult = false; 108 protected int fillCurrentChunk() throws Exception { 112 char[] recycledData = fCurrentChunk.toCharArray(); 117 fOutputOffset = 0; 122 if (fCheckOverflow) { 123 fMostRecentData = recycledData; 129 if (fOverflowEnd < CharDataChunk.CHUNK_SIZE) { 130 recycledData = null; 131 if (fOverflowEnd > 0) { 132 if (fMostRecentData == null || fMostRecentData.length < 1 + fOverflowEnd - fOverflowOffset) 133 fMostRecentData = new char[1 + fOverflowEnd - fOverflowOffset]; 134 copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); 135 } else { 136 if (fMostRecentData == null) 137 fMostRecentData = new char[1]; 138 } 139 fMostRecentData[fOutputOffset] = 0; 140 fOverflow = null; 144 fLength += fOutputOffset; 145 fCurrentIndex = 0; 146 fCurrentChunk.setCharArray(fMostRecentData); 147 return (fMostRecentChar = fMostRecentData[0]); 148 } 149 if (fMostRecentData == null || fMostRecentData.length < CharDataChunk.CHUNK_SIZE) 150 fMostRecentData = new char[CharDataChunk.CHUNK_SIZE]; 151 else 152 recycledData = null; 153 copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); 154 fCheckOverflow = false; 155 } else { 156 if (fOverflow == null) 157 fOverflow = new byte[CharDataChunk.CHUNK_SIZE]; 158 fMostRecentData = null; 159 } 160 while (true) { 161 fOverflowOffset = 0; 162 fOverflowEnd = 0; 163 int capacity = CharDataChunk.CHUNK_SIZE; 164 int result = 0; 165 do { 166 try { 167 result = fInputStream.read(fOverflow, fOverflowEnd, capacity); 168 } catch (java.io.IOException ex) { 169 result = -1; 170 } 171 if (result == -1) { 172 fInputStream.close(); 176 fInputStream = null; 177 if (fMostRecentData == null) { 178 fMostRecentData = recycledData; 183 if (fMostRecentData == null || fMostRecentData.length < 1 + fOverflowEnd) 184 fMostRecentData = new char[1 + fOverflowEnd]; 185 else 186 recycledData = null; 187 copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); 188 fOverflow = null; 189 fMostRecentData[fOutputOffset] = 0; 190 } else { 191 boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); 195 if (alldone) { 196 if (fOverflowEnd == CharDataChunk.CHUNK_SIZE) { 197 fCheckOverflow = true; 205 fOverflowOffset = 0; 206 fOverflowEnd = 0; 207 } else { 208 fOverflow = null; 212 fMostRecentData[fOutputOffset] = 0; 213 } 214 } else { 215 fCheckOverflow = true; 220 } 221 } 222 break; 223 } 224 if (result > 0) { 225 fOverflowEnd += result; 226 capacity -= result; 227 } 228 } while (capacity > 0); 229 if (result == -1) 233 break; 234 if (fMostRecentData != null) { 235 boolean alldone = copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); 236 if (fOutputOffset == CharDataChunk.CHUNK_SIZE) { 237 if (!alldone) { 241 fCheckOverflow = true; 245 } 246 break; 247 } 248 } else { 249 fMostRecentData = recycledData; 254 if (fMostRecentData == null || fMostRecentData.length < CharDataChunk.CHUNK_SIZE) 255 fMostRecentData = new char[CharDataChunk.CHUNK_SIZE]; 256 else 257 recycledData = null; 258 copyNormalize(fOverflow, fOverflowOffset, fMostRecentData, fOutputOffset); 259 if (fOutputOffset == CharDataChunk.CHUNK_SIZE) { 260 break; 264 } 265 } 266 } 271 fLength += fOutputOffset; 275 fCurrentIndex = 0; 276 fCurrentChunk.setCharArray(fMostRecentData); 277 return (fMostRecentChar = fMostRecentData[0]); 278 } 279 private boolean copyNormalize(byte[] in, int inOffset, char[] out, int outOffset) throws Exception { 283 int inEnd = fOverflowEnd; 287 int outEnd = out.length; 288 if (inOffset == inEnd) 289 return true; 290 byte b = in[inOffset]; 291 if (fSkipLinefeed) { 292 fSkipLinefeed = false; 293 if (b == 0x0A) { 294 if (++inOffset == inEnd) 295 return exitNormalize(inOffset, outOffset, true); 296 b = in[inOffset]; 297 } 298 } else if (fPartialMultiByteIn > 0) { 299 if (!handlePartialMultiByteChar(b, in, inOffset, inEnd, out, outOffset, outEnd)) 300 return fPartialMultiByteResult; 301 inOffset = fOverflowOffset; 302 outOffset = fOutputOffset; 303 b = in[inOffset]; 304 } 305 while (outOffset < outEnd) { 306 int inCount = inEnd - inOffset; 311 int outCount = outEnd - outOffset; 312 if (inCount > outCount) 313 inCount = outCount; 314 inOffset++; 315 while (true) { 316 while (b == 0x0D || b < 0) { 317 if (b == 0x0D) { 318 out[outOffset++] = 0x0A; 319 if (inOffset == inEnd) { 320 fSkipLinefeed = true; 321 return exitNormalize(inOffset, outOffset, true); 322 } 323 b = in[inOffset]; 324 if (b == 0x0A) { 325 if (++inOffset == inEnd) 326 return exitNormalize(inOffset, outOffset, true); 327 b = in[inOffset]; 328 } 329 if (outOffset == outEnd) 330 return exitNormalize(inOffset, outOffset, false); 331 } else { 332 if (!handleMultiByteChar(b, in, inOffset, inEnd, out, outOffset, outEnd)) 333 return fPartialMultiByteResult; 334 inOffset = fOverflowOffset; 335 outOffset = fOutputOffset; 336 b = in[inOffset]; 337 } 338 inCount = inEnd - inOffset; 339 outCount = outEnd - outOffset; 340 if (inCount > outCount) 341 inCount = outCount; 342 inOffset++; 343 } 344 while (true) { 345 out[outOffset++] = (char)b; 346 if (--inCount == 0) 347 break; 348 b = in[inOffset++]; 349 if (b == 0x0D || b < 0) 350 break; 351 } 352 if (inCount == 0) 353 break; 354 } 355 if (inOffset == inEnd) 356 break; 357 } 358 return exitNormalize(inOffset, outOffset, inOffset == inEnd); 359 } 360 private boolean exitNormalize(int inOffset, int outOffset, boolean result) { 364 fOverflowOffset = inOffset; 365 fOutputOffset = outOffset; 366 return result; 367 } 368 private void savePartialMultiByte(int inCount, byte bz, byte by, byte bx) { 372 fPartialMultiByteIn = inCount; 373 fPartialMultiByteChar[--inCount] = bz; 374 fPartialMultiByteChar[--inCount] = by; 375 fPartialMultiByteChar[--inCount] = bx; 376 } 377 private void savePartialMultiByte(int inCount, byte bz, byte by) { 378 fPartialMultiByteIn = inCount; 379 fPartialMultiByteChar[--inCount] = bz; 380 fPartialMultiByteChar[--inCount] = by; 381 } 382 private void savePartialMultiByte(int inCount, byte bz) { 383 fPartialMultiByteIn = inCount; 384 fPartialMultiByteChar[--inCount] = bz; 385 } 386 private boolean handleMultiByteChar(byte b, byte[] in, int inOffset, int inEnd, char[] out, int outOffset, int outEnd) throws Exception { 387 if (inOffset == inEnd) { 388 savePartialMultiByte(1, b); 389 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true); 390 return false; 391 } 392 byte b1 = in[inOffset++]; 393 if ((b1 & 0xc0) != 0x80) { 394 Object [] args = { 395 Integer.toHexString(b & 0xff), 396 Integer.toHexString(b1 & 0xff) 397 }; 398 deferException(ImplementationMessages.ENC5, args, outOffset); 399 out[outOffset++] = 0; 400 return exitNormalize(inOffset, outOffset, true); 401 } 402 if ((b & 0xe0) == 0xc0) { int ch = ((0x1f & b)<<6) + (0x3f & b1); 404 out[outOffset++] = (char)ch; 405 if (inOffset == inEnd || outOffset == outEnd) { 406 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd); 407 return false; 408 } 409 } else { 410 if (inOffset == inEnd) { 411 savePartialMultiByte(2, b1, b); 412 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true); 413 return false; 414 } 415 byte b2 = in[inOffset++]; 416 if ((b2 & 0xc0) != 0x80) { 417 Object [] args = { 418 Integer.toHexString(b & 0xff), 419 Integer.toHexString(b1 & 0xff), 420 Integer.toHexString(b2 & 0xff) 421 }; 422 deferException(ImplementationMessages.ENC6, args, outOffset); 423 out[outOffset++] = 0; 424 return exitNormalize(inOffset, outOffset, true); 425 } 426 if ((b & 0xf0) == 0xe0) { int ch = ((0x0f & b)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); 428 out[outOffset++] = (char)ch; 429 if (inOffset == inEnd || outOffset == outEnd) { 430 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd); 431 return false; 432 } 433 } else { 434 if ((b & 0xf8) != 0xf0) { 435 Object [] args = { Integer.toHexString(b & 0xff) }; 436 deferException(ImplementationMessages.ENC4, args, outOffset); 437 out[outOffset++] = 0; 438 return exitNormalize(inOffset, outOffset, true); 439 } 440 if (inOffset == inEnd) { 441 savePartialMultiByte(3, b2, b1, b); 442 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true); 443 return false; 444 } 445 byte b3 = in[inOffset++]; 446 if ((b3 & 0xc0) != 0x80) { 447 Object [] args = { 448 Integer.toHexString(b & 0xff), 449 Integer.toHexString(b1 & 0xff), 450 Integer.toHexString(b2 & 0xff), 451 Integer.toHexString(b3 & 0xff) 452 }; 453 deferException(ImplementationMessages.ENC7, args, outOffset); 454 out[outOffset++] = 0; 455 return exitNormalize(inOffset, outOffset, true); 456 } 457 int ch = ((0x0f & b)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3); 458 if (ch >= 0x10000) { 459 out[outOffset++] = (char)(((ch-0x00010000)>>10)+0xd800); 460 ch = (((ch-0x00010000)&0x3ff)+0xdc00); 461 if (outOffset == outEnd) { 462 fPartialSurrogatePair = ch; 463 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd); 464 return false; 465 } 466 } 467 out[outOffset++] = (char)ch; 468 if (inOffset == inEnd || outOffset == outEnd) { 469 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd); 470 return false; 471 } 472 } 473 } 474 return exitNormalize(inOffset, outOffset, true); 475 } 476 private boolean handlePartialMultiByteChar(byte b, byte[] in, int inOffset, int inEnd, char[] out, int outOffset, int outEnd) throws Exception { 477 if (outOffset == outEnd) { 478 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, inOffset == inEnd); 479 return false; 480 } 481 if (fPartialMultiByteIn == 4) { 482 out[outOffset++] = (char)fPartialSurrogatePair; 483 if (outOffset == outEnd) { 484 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false); 485 return false; 486 } 487 fOutputOffset = outOffset; 488 return true; 489 } 490 int byteIn = fPartialMultiByteIn; 491 fPartialMultiByteIn = 0; 492 byte b1 = 0; 493 byte b2 = 0; 494 byte b3 = 0; 495 switch (byteIn) { 496 case 1: b1 = b; break; 497 case 2: b2 = b; break; 498 case 3: b3 = b; break; 499 } 500 int i = byteIn; 501 switch (byteIn) { 502 case 3: 503 b2 = fPartialMultiByteChar[--i]; 504 case 2: 505 b1 = fPartialMultiByteChar[--i]; 506 case 1: 507 b = fPartialMultiByteChar[--i]; 508 } 509 switch (byteIn) { 510 case 1: 511 if ((b1 & 0xc0) != 0x80) { 512 Object [] args = { 513 Integer.toHexString(b), 514 Integer.toHexString(b1) 515 }; 516 deferException(ImplementationMessages.ENC5, args, outOffset); 517 out[outOffset++] = 0; 518 break; 519 } 520 case 2: 522 if ((b & 0xe0) == 0xc0) { int ch = ((0x1f & b)<<6) + (0x3f & b1); 524 out[outOffset++] = (char)ch; 525 if (outOffset == outEnd) { 526 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false); 527 return false; 528 } 529 if (byteIn < 2 && ++inOffset == inEnd) { 530 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true); 531 return false; 532 } 533 break; 534 } 535 if (byteIn < 2) { 536 if (++inOffset == inEnd) { 537 savePartialMultiByte(2, b1); 538 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true); 539 return false; 540 } 541 b2 = in[inOffset]; 542 } 543 if ((b2 & 0xc0) != 0x80) { 544 Object [] args = { 545 Integer.toHexString(b), 546 Integer.toHexString(b1), 547 Integer.toHexString(b2) 548 }; 549 deferException(ImplementationMessages.ENC6, args, outOffset); 550 out[outOffset++] = 0; 551 break; 552 } 553 case 3: 555 if ((b & 0xf0) == 0xe0) { int ch = ((0x0f & b)<<12) + ((0x3f & b1)<<6) + (0x3f & b2); 557 out[outOffset++] = (char)ch; 558 if (outOffset == outEnd) { 559 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false); 560 return false; 561 } 562 if (byteIn < 3 && ++inOffset == inEnd) { 563 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true); 564 return false; 565 } 566 break; 567 } 568 if (byteIn < 3) { 569 if ((b & 0xf8) != 0xf0) { 570 Object [] args = { Integer.toHexString(b) }; 571 deferException(ImplementationMessages.ENC4, args, outOffset); 572 out[outOffset++] = 0; 573 break; 574 } 575 if (++inOffset == inEnd) { 576 savePartialMultiByte(3, b2, b1); 577 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true); 578 return false; 579 } 580 b3 = in[inOffset]; 581 } 582 if ((b3 & 0xc0) != 0x80) { 583 Object [] args = { 584 Integer.toHexString(b), 585 Integer.toHexString(b1), 586 Integer.toHexString(b2), 587 Integer.toHexString(b3) 588 }; 589 deferException(ImplementationMessages.ENC7, args, outOffset); 590 out[outOffset++] = 0; 591 break; 592 } 593 int ch = ((0x0f & b)<<18) + ((0x3f & b1)<<12) + ((0x3f & b2)<<6) + (0x3f & b3); 594 if (ch >= 0x10000) { 595 out[outOffset++] = (char)(((ch-0x00010000)>>10)+0xd800); 596 ch = (((ch-0x00010000)&0x3ff)+0xdc00); 597 if (outOffset == outEnd) { 598 fPartialSurrogatePair = ch; 599 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false); 600 return false; 601 } 602 } 603 out[outOffset++] = (char)ch; 604 if (outOffset == outEnd) { 605 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, false); 606 return false; 607 } 608 if (++inOffset == inEnd) { 609 fPartialMultiByteResult = exitNormalize(inOffset, outOffset, true); 610 return false; 611 } 612 break; 613 } 614 return exitNormalize(inOffset, outOffset, true); 615 } 616 } 617 | Popular Tags |