1 17 18 package org.apache.jasper.xmlparser; 19 20 import java.io.InputStream ; 21 import java.io.IOException ; 22 import java.io.Reader ; 23 import java.io.UTFDataFormatException ; 24 import org.apache.jasper.compiler.Localizer; 25 26 31 public class UTF8Reader 32 extends Reader { 33 34 private org.apache.commons.logging.Log log= 35 org.apache.commons.logging.LogFactory.getLog( UTF8Reader.class ); 36 37 41 42 public static final int DEFAULT_BUFFER_SIZE = 2048; 43 44 46 47 private static final boolean DEBUG_READ = false; 48 49 53 54 protected InputStream fInputStream; 55 56 57 protected byte[] fBuffer; 58 59 60 protected int fOffset; 61 62 63 private int fSurrogate = -1; 64 65 69 76 public UTF8Reader(InputStream inputStream, int size) { 77 fInputStream = inputStream; 78 fBuffer = new byte[size]; 79 } 80 81 85 98 public int read() throws IOException { 99 100 int c = fSurrogate; 102 if (fSurrogate == -1) { 103 int index = 0; 106 107 int b0 = index == fOffset 109 ? fInputStream.read() : fBuffer[index++] & 0x00FF; 110 if (b0 == -1) { 111 return -1; 112 } 113 114 if (b0 < 0x80) { 117 c = (char)b0; 118 } 119 120 else if ((b0 & 0xE0) == 0xC0) { 123 int b1 = index == fOffset 124 ? fInputStream.read() : fBuffer[index++] & 0x00FF; 125 if (b1 == -1) { 126 expectedByte(2, 2); 127 } 128 if ((b1 & 0xC0) != 0x80) { 129 invalidByte(2, 2, b1); 130 } 131 c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F); 132 } 133 134 else if ((b0 & 0xF0) == 0xE0) { 137 int b1 = index == fOffset 138 ? fInputStream.read() : fBuffer[index++] & 0x00FF; 139 if (b1 == -1) { 140 expectedByte(2, 3); 141 } 142 if ((b1 & 0xC0) != 0x80) { 143 invalidByte(2, 3, b1); 144 } 145 int b2 = index == fOffset 146 ? fInputStream.read() : fBuffer[index++] & 0x00FF; 147 if (b2 == -1) { 148 expectedByte(3, 3); 149 } 150 if ((b2 & 0xC0) != 0x80) { 151 invalidByte(3, 3, b2); 152 } 153 c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) | 154 (b2 & 0x003F); 155 } 156 157 else if ((b0 & 0xF8) == 0xF0) { 162 int b1 = index == fOffset 163 ? fInputStream.read() : fBuffer[index++] & 0x00FF; 164 if (b1 == -1) { 165 expectedByte(2, 4); 166 } 167 if ((b1 & 0xC0) != 0x80) { 168 invalidByte(2, 3, b1); 169 } 170 int b2 = index == fOffset 171 ? fInputStream.read() : fBuffer[index++] & 0x00FF; 172 if (b2 == -1) { 173 expectedByte(3, 4); 174 } 175 if ((b2 & 0xC0) != 0x80) { 176 invalidByte(3, 3, b2); 177 } 178 int b3 = index == fOffset 179 ? fInputStream.read() : fBuffer[index++] & 0x00FF; 180 if (b3 == -1) { 181 expectedByte(4, 4); 182 } 183 if ((b3 & 0xC0) != 0x80) { 184 invalidByte(4, 4, b3); 185 } 186 int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003); 187 if (uuuuu > 0x10) { 188 invalidSurrogate(uuuuu); 189 } 190 int wwww = uuuuu - 1; 191 int hs = 0xD800 | 192 ((wwww << 6) & 0x03C0) | ((b1 << 2) & 0x003C) | 193 ((b2 >> 4) & 0x0003); 194 int ls = 0xDC00 | ((b2 << 6) & 0x03C0) | (b3 & 0x003F); 195 c = hs; 196 fSurrogate = ls; 197 } 198 199 else { 201 invalidByte(1, 1, b0); 202 } 203 } 204 205 else { 207 fSurrogate = -1; 208 } 209 210 if (DEBUG_READ) { 212 if (log.isDebugEnabled()) 213 log.debug("read(): 0x"+Integer.toHexString(c)); 214 } 215 return c; 216 217 } 219 233 public int read(char ch[], int offset, int length) throws IOException { 234 235 int out = offset; 237 if (fSurrogate != -1) { 238 ch[offset + 1] = (char)fSurrogate; 239 fSurrogate = -1; 240 length--; 241 out++; 242 } 243 244 int count = 0; 246 if (fOffset == 0) { 247 if (length > fBuffer.length) { 249 length = fBuffer.length; 250 } 251 252 count = fInputStream.read(fBuffer, 0, length); 254 if (count == -1) { 255 return -1; 256 } 257 count += out - offset; 258 } 259 260 else { 268 count = fOffset; 269 fOffset = 0; 270 } 271 272 final int total = count; 274 for (int in = 0; in < total; in++) { 275 int b0 = fBuffer[in] & 0x00FF; 276 277 if (b0 < 0x80) { 280 ch[out++] = (char)b0; 281 continue; 282 } 283 284 if ((b0 & 0xE0) == 0xC0) { 287 int b1 = -1; 288 if (++in < total) { 289 b1 = fBuffer[in] & 0x00FF; 290 } 291 else { 292 b1 = fInputStream.read(); 293 if (b1 == -1) { 294 if (out > offset) { 295 fBuffer[0] = (byte)b0; 296 fOffset = 1; 297 return out - offset; 298 } 299 expectedByte(2, 2); 300 } 301 count++; 302 } 303 if ((b1 & 0xC0) != 0x80) { 304 if (out > offset) { 305 fBuffer[0] = (byte)b0; 306 fBuffer[1] = (byte)b1; 307 fOffset = 2; 308 return out - offset; 309 } 310 invalidByte(2, 2, b1); 311 } 312 int c = ((b0 << 6) & 0x07C0) | (b1 & 0x003F); 313 ch[out++] = (char)c; 314 count -= 1; 315 continue; 316 } 317 318 if ((b0 & 0xF0) == 0xE0) { 321 int b1 = -1; 322 if (++in < total) { 323 b1 = fBuffer[in] & 0x00FF; 324 } 325 else { 326 b1 = fInputStream.read(); 327 if (b1 == -1) { 328 if (out > offset) { 329 fBuffer[0] = (byte)b0; 330 fOffset = 1; 331 return out - offset; 332 } 333 expectedByte(2, 3); 334 } 335 count++; 336 } 337 if ((b1 & 0xC0) != 0x80) { 338 if (out > offset) { 339 fBuffer[0] = (byte)b0; 340 fBuffer[1] = (byte)b1; 341 fOffset = 2; 342 return out - offset; 343 } 344 invalidByte(2, 3, b1); 345 } 346 int b2 = -1; 347 if (++in < total) { 348 b2 = fBuffer[in] & 0x00FF; 349 } 350 else { 351 b2 = fInputStream.read(); 352 if (b2 == -1) { 353 if (out > offset) { 354 fBuffer[0] = (byte)b0; 355 fBuffer[1] = (byte)b1; 356 fOffset = 2; 357 return out - offset; 358 } 359 expectedByte(3, 3); 360 } 361 count++; 362 } 363 if ((b2 & 0xC0) != 0x80) { 364 if (out > offset) { 365 fBuffer[0] = (byte)b0; 366 fBuffer[1] = (byte)b1; 367 fBuffer[2] = (byte)b2; 368 fOffset = 3; 369 return out - offset; 370 } 371 invalidByte(3, 3, b2); 372 } 373 int c = ((b0 << 12) & 0xF000) | ((b1 << 6) & 0x0FC0) | 374 (b2 & 0x003F); 375 ch[out++] = (char)c; 376 count -= 2; 377 continue; 378 } 379 380 if ((b0 & 0xF8) == 0xF0) { 385 int b1 = -1; 386 if (++in < total) { 387 b1 = fBuffer[in] & 0x00FF; 388 } 389 else { 390 b1 = fInputStream.read(); 391 if (b1 == -1) { 392 if (out > offset) { 393 fBuffer[0] = (byte)b0; 394 fOffset = 1; 395 return out - offset; 396 } 397 expectedByte(2, 4); 398 } 399 count++; 400 } 401 if ((b1 & 0xC0) != 0x80) { 402 if (out > offset) { 403 fBuffer[0] = (byte)b0; 404 fBuffer[1] = (byte)b1; 405 fOffset = 2; 406 return out - offset; 407 } 408 invalidByte(2, 4, b1); 409 } 410 int b2 = -1; 411 if (++in < total) { 412 b2 = fBuffer[in] & 0x00FF; 413 } 414 else { 415 b2 = fInputStream.read(); 416 if (b2 == -1) { 417 if (out > offset) { 418 fBuffer[0] = (byte)b0; 419 fBuffer[1] = (byte)b1; 420 fOffset = 2; 421 return out - offset; 422 } 423 expectedByte(3, 4); 424 } 425 count++; 426 } 427 if ((b2 & 0xC0) != 0x80) { 428 if (out > offset) { 429 fBuffer[0] = (byte)b0; 430 fBuffer[1] = (byte)b1; 431 fBuffer[2] = (byte)b2; 432 fOffset = 3; 433 return out - offset; 434 } 435 invalidByte(3, 4, b2); 436 } 437 int b3 = -1; 438 if (++in < total) { 439 b3 = fBuffer[in] & 0x00FF; 440 } 441 else { 442 b3 = fInputStream.read(); 443 if (b3 == -1) { 444 if (out > offset) { 445 fBuffer[0] = (byte)b0; 446 fBuffer[1] = (byte)b1; 447 fBuffer[2] = (byte)b2; 448 fOffset = 3; 449 return out - offset; 450 } 451 expectedByte(4, 4); 452 } 453 count++; 454 } 455 if ((b3 & 0xC0) != 0x80) { 456 if (out > offset) { 457 fBuffer[0] = (byte)b0; 458 fBuffer[1] = (byte)b1; 459 fBuffer[2] = (byte)b2; 460 fBuffer[3] = (byte)b3; 461 fOffset = 4; 462 return out - offset; 463 } 464 invalidByte(4, 4, b2); 465 } 466 467 int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003); 469 if (uuuuu > 0x10) { 470 invalidSurrogate(uuuuu); 471 } 472 int wwww = uuuuu - 1; 473 int zzzz = b1 & 0x000F; 474 int yyyyyy = b2 & 0x003F; 475 int xxxxxx = b3 & 0x003F; 476 int hs = 0xD800 | ((wwww << 6) & 0x03C0) | (zzzz << 2) | (yyyyyy >> 4); 477 int ls = 0xDC00 | ((yyyyyy << 6) & 0x03C0) | xxxxxx; 478 479 ch[out++] = (char)hs; 481 ch[out++] = (char)ls; 482 count -= 2; 483 continue; 484 } 485 486 if (out > offset) { 488 fBuffer[0] = (byte)b0; 489 fOffset = 1; 490 return out - offset; 491 } 492 invalidByte(1, 1, b0); 493 } 494 495 if (DEBUG_READ) { 497 if (log.isDebugEnabled()) 498 log.debug("read(char[],"+offset+','+length+"): count="+count); 499 } 500 return count; 501 502 } 504 514 public long skip(long n) throws IOException { 515 516 long remaining = n; 517 final char[] ch = new char[fBuffer.length]; 518 do { 519 int length = ch.length < remaining ? ch.length : (int)remaining; 520 int count = read(ch, 0, length); 521 if (count > 0) { 522 remaining -= count; 523 } 524 else { 525 break; 526 } 527 } while (remaining > 0); 528 529 long skipped = n - remaining; 530 return skipped; 531 532 } 534 543 public boolean ready() throws IOException { 544 return false; 545 } 547 550 public boolean markSupported() { 551 return false; 552 } 554 567 public void mark(int readAheadLimit) throws IOException { 568 throw new IOException ( 569 Localizer.getMessage("jsp.error.xml.operationNotSupported", 570 "mark()", "UTF-8")); 571 } 572 573 586 public void reset() throws IOException { 587 fOffset = 0; 588 fSurrogate = -1; 589 } 591 598 public void close() throws IOException { 599 fInputStream.close(); 600 } 602 606 607 private void expectedByte(int position, int count) 608 throws UTFDataFormatException { 609 610 throw new UTFDataFormatException ( 611 Localizer.getMessage("jsp.error.xml.expectedByte", 612 Integer.toString(position), 613 Integer.toString(count))); 614 615 } 617 618 private void invalidByte(int position, int count, int c) 619 throws UTFDataFormatException { 620 621 throw new UTFDataFormatException ( 622 Localizer.getMessage("jsp.error.xml.invalidByte", 623 Integer.toString(position), 624 Integer.toString(count))); 625 } 627 628 private void invalidSurrogate(int uuuuu) throws UTFDataFormatException { 629 630 throw new UTFDataFormatException ( 631 Localizer.getMessage("jsp.error.xml.invalidHighSurrogate", 632 Integer.toHexString(uuuuu))); 633 } 635 } | Popular Tags |