1 7 8 package com.ibm.icu.text; 9 10 72 public final class UnicodeDecompressor implements SCSU 73 { 74 78 79 private int fCurrentWindow = 0; 80 81 82 private int [] fOffsets = new int [ NUMWINDOWS ]; 83 84 85 private int fMode = SINGLEBYTEMODE; 86 87 88 private final static int BUFSIZE = 3; 89 90 91 private byte [] fBuffer = new byte [BUFSIZE]; 92 93 94 private int fBufferLength = 0; 95 96 97 103 public UnicodeDecompressor() 104 { 105 reset(); } 107 108 115 public static String decompress(byte [] buffer) 116 { 117 char [] buf = decompress(buffer, 0, buffer.length); 118 return new String (buf); 119 } 120 121 130 public static char [] decompress(byte [] buffer, 131 int start, 132 int limit) 133 { 134 UnicodeDecompressor comp = new UnicodeDecompressor(); 135 136 int len = Math.max(2, 2 * (limit - start)); 140 char [] temp = new char [len]; 141 142 int charCount = comp.decompress(buffer, start, limit, null, 143 temp, 0, len); 144 145 char [] result = new char [charCount]; 146 System.arraycopy(temp, 0, result, 0, charCount); 147 return result; 148 } 149 150 170 public int decompress(byte [] byteBuffer, 171 int byteBufferStart, 172 int byteBufferLimit, 173 int [] bytesRead, 174 char [] charBuffer, 175 int charBufferStart, 176 int charBufferLimit) 177 { 178 int bytePos = byteBufferStart; 180 181 int ucPos = charBufferStart; 183 184 int aByte = 0x00; 186 187 188 if(charBuffer.length < 2 || (charBufferLimit - charBufferStart) < 2) 190 throw new IllegalArgumentException ("charBuffer.length < 2"); 191 192 if(fBufferLength > 0) { 195 196 int newBytes = 0; 197 198 if(fBufferLength != BUFSIZE) { 200 newBytes = fBuffer.length - fBufferLength; 201 202 if(byteBufferLimit - byteBufferStart < newBytes) 204 newBytes = byteBufferLimit - byteBufferStart; 205 206 System.arraycopy(byteBuffer, byteBufferStart, 207 fBuffer, fBufferLength, newBytes); 208 } 209 210 fBufferLength = 0; 212 213 int count = decompress(fBuffer, 0, fBuffer.length, null, 215 charBuffer, charBufferStart, 216 charBufferLimit); 217 218 ucPos += count; 220 bytePos += newBytes; 221 } 222 223 mainLoop: 225 while(bytePos < byteBufferLimit && ucPos < charBufferLimit) { 226 switch(fMode) { 227 case SINGLEBYTEMODE: 228 singleByteModeLoop: 230 while(bytePos < byteBufferLimit && ucPos < charBufferLimit) { 231 aByte = byteBuffer[bytePos++] & 0xFF; 232 switch(aByte) { 233 case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: 237 case 0x85: case 0x86: case 0x87: case 0x88: case 0x89: 238 case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E: 239 case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: 240 case 0x94: case 0x95: case 0x96: case 0x97: case 0x98: 241 case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D: 242 case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: 243 case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7: 244 case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC: 245 case 0xAD: case 0xAE: case 0xAF: case 0xB0: case 0xB1: 246 case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: 247 case 0xB7: case 0xB8: case 0xB9: case 0xBA: case 0xBB: 248 case 0xBC: case 0xBD: case 0xBE: case 0xBF: case 0xC0: 249 case 0xC1: case 0xC2: case 0xC3: case 0xC4: case 0xC5: 250 case 0xC6: case 0xC7: case 0xC8: case 0xC9: case 0xCA: 251 case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF: 252 case 0xD0: case 0xD1: case 0xD2: case 0xD3: case 0xD4: 253 case 0xD5: case 0xD6: case 0xD7: case 0xD8: case 0xD9: 254 case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE: 255 case 0xDF: case 0xE0: case 0xE1: case 0xE2: case 0xE3: 256 case 0xE4: case 0xE5: case 0xE6: case 0xE7: case 0xE8: 257 case 0xE9: case 0xEA: case 0xEB: case 0xEC: case 0xED: 258 case 0xEE: case 0xEF: case 0xF0: case 0xF1: case 0xF2: 259 case 0xF3: case 0xF4: case 0xF5: case 0xF6: case 0xF7: 260 case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC: 261 case 0xFD: case 0xFE: case 0xFF: 262 if(fOffsets[ fCurrentWindow ] <= 0xFFFF) { 266 charBuffer[ucPos++] = (char) 267 (aByte + fOffsets[ fCurrentWindow ] 268 - COMPRESSIONOFFSET); 269 } 270 else { 275 if((ucPos + 1) >= charBufferLimit) { 279 --bytePos; 280 System.arraycopy(byteBuffer, bytePos, 281 fBuffer, 0, 282 byteBufferLimit - bytePos); 283 fBufferLength = byteBufferLimit - bytePos; 284 bytePos += fBufferLength; 285 break mainLoop; 286 } 287 288 int normalizedBase = fOffsets[ fCurrentWindow ] 289 - 0x10000; 290 charBuffer[ucPos++] = (char) 291 (0xD800 + (normalizedBase >> 10)); 292 charBuffer[ucPos++] = (char) 293 (0xDC00 + (normalizedBase & 0x3FF)+(aByte & 0x7F)); 294 } 295 break; 296 297 case 0x00: case 0x09: case 0x0A: case 0x0D: 303 case 0x20: case 0x21: case 0x22: case 0x23: case 0x24: 304 case 0x25: case 0x26: case 0x27: case 0x28: case 0x29: 305 case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: 306 case 0x2F: case 0x30: case 0x31: case 0x32: case 0x33: 307 case 0x34: case 0x35: case 0x36: case 0x37: case 0x38: 308 case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D: 309 case 0x3E: case 0x3F: case 0x40: case 0x41: case 0x42: 310 case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: 311 case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: 312 case 0x4D: case 0x4E: case 0x4F: case 0x50: case 0x51: 313 case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: 314 case 0x57: case 0x58: case 0x59: case 0x5A: case 0x5B: 315 case 0x5C: case 0x5D: case 0x5E: case 0x5F: case 0x60: 316 case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: 317 case 0x66: case 0x67: case 0x68: case 0x69: case 0x6A: 318 case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: 319 case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: 320 case 0x75: case 0x76: case 0x77: case 0x78: case 0x79: 321 case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E: 322 case 0x7F: 323 charBuffer[ucPos++] = (char) aByte; 324 break; 325 326 case SQUOTEU: 328 if( (bytePos + 1) >= byteBufferLimit ) { 331 --bytePos; 332 System.arraycopy(byteBuffer, bytePos, 333 fBuffer, 0, 334 byteBufferLimit - bytePos); 335 fBufferLength = byteBufferLimit - bytePos; 336 bytePos += fBufferLength; 337 break mainLoop; 338 } 339 340 aByte = byteBuffer[bytePos++]; 341 charBuffer[ucPos++] = (char) 342 (aByte << 8 | (byteBuffer[bytePos++] & 0xFF)); 343 break; 344 345 case SCHANGEU: 347 fMode = UNICODEMODE; 348 break singleByteModeLoop; 349 351 case SQUOTE0: case SQUOTE1: case SQUOTE2: case SQUOTE3: 353 case SQUOTE4: case SQUOTE5: case SQUOTE6: case SQUOTE7: 354 if(bytePos >= byteBufferLimit) { 357 --bytePos; 358 System.arraycopy(byteBuffer, bytePos, 359 fBuffer, 0, 360 byteBufferLimit - bytePos); 361 fBufferLength = byteBufferLimit - bytePos; 362 bytePos += fBufferLength; 363 break mainLoop; 364 } 365 366 int dByte = byteBuffer[bytePos++] & 0xFF; 369 charBuffer[ucPos++] = (char) 370 (dByte+ (dByte >= 0x00 && dByte < 0x80 371 ? sOffsets[aByte - SQUOTE0] 372 : (fOffsets[aByte - SQUOTE0] 373 - COMPRESSIONOFFSET))); 374 break; 375 376 case SCHANGE0: case SCHANGE1: case SCHANGE2: case SCHANGE3: 378 case SCHANGE4: case SCHANGE5: case SCHANGE6: case SCHANGE7: 379 fCurrentWindow = aByte - SCHANGE0; 380 break; 381 382 case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3: 384 case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7: 385 if(bytePos >= byteBufferLimit) { 388 --bytePos; 389 System.arraycopy(byteBuffer, bytePos, 390 fBuffer, 0, 391 byteBufferLimit - bytePos); 392 fBufferLength = byteBufferLimit - bytePos; 393 bytePos += fBufferLength; 394 break mainLoop; 395 } 396 397 fCurrentWindow = aByte - SDEFINE0; 398 fOffsets[fCurrentWindow] = 399 sOffsetTable[byteBuffer[bytePos++] & 0xFF]; 400 break; 401 402 case SDEFINEX: 404 if((bytePos + 1) >= byteBufferLimit ) { 407 --bytePos; 408 System.arraycopy(byteBuffer, bytePos, 409 fBuffer, 0, 410 byteBufferLimit - bytePos); 411 fBufferLength = byteBufferLimit - bytePos; 412 bytePos += fBufferLength; 413 break mainLoop; 414 } 415 416 aByte = byteBuffer[bytePos++] & 0xFF; 417 fCurrentWindow = (aByte & 0xE0) >> 5; 418 fOffsets[fCurrentWindow] = 0x10000 + 419 (0x80 * (((aByte & 0x1F) << 8) 420 | (byteBuffer[bytePos++] & 0xFF))); 421 break; 422 423 case SRESERVED: 425 break; 426 427 } } break; 430 431 case UNICODEMODE: 432 unicodeModeLoop: 434 while(bytePos < byteBufferLimit && ucPos < charBufferLimit) { 435 aByte = byteBuffer[bytePos++] & 0xFF; 436 switch(aByte) { 437 case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3: 439 case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7: 440 if(bytePos >= byteBufferLimit ) { 443 --bytePos; 444 System.arraycopy(byteBuffer, bytePos, 445 fBuffer, 0, 446 byteBufferLimit - bytePos); 447 fBufferLength = byteBufferLimit - bytePos; 448 bytePos += fBufferLength; 449 break mainLoop; 450 } 451 452 fCurrentWindow = aByte - UDEFINE0; 453 fOffsets[fCurrentWindow] = 454 sOffsetTable[byteBuffer[bytePos++] & 0xFF]; 455 fMode = SINGLEBYTEMODE; 456 break unicodeModeLoop; 457 459 case UDEFINEX: 461 if((bytePos + 1) >= byteBufferLimit ) { 464 --bytePos; 465 System.arraycopy(byteBuffer, bytePos, 466 fBuffer, 0, 467 byteBufferLimit - bytePos); 468 fBufferLength = byteBufferLimit - bytePos; 469 bytePos += fBufferLength; 470 break mainLoop; 471 } 472 473 aByte = byteBuffer[bytePos++] & 0xFF; 474 fCurrentWindow = (aByte & 0xE0) >> 5; 475 fOffsets[fCurrentWindow] = 0x10000 + 476 (0x80 * (((aByte & 0x1F) << 8) 477 | (byteBuffer[bytePos++] & 0xFF))); 478 fMode = SINGLEBYTEMODE; 479 break unicodeModeLoop; 480 482 case UCHANGE0: case UCHANGE1: case UCHANGE2: case UCHANGE3: 484 case UCHANGE4: case UCHANGE5: case UCHANGE6: case UCHANGE7: 485 fCurrentWindow = aByte - UCHANGE0; 486 fMode = SINGLEBYTEMODE; 487 break unicodeModeLoop; 488 490 case UQUOTEU: 492 if(bytePos >= byteBufferLimit - 1) { 495 --bytePos; 496 System.arraycopy(byteBuffer, bytePos, 497 fBuffer, 0, 498 byteBufferLimit - bytePos); 499 fBufferLength = byteBufferLimit - bytePos; 500 bytePos += fBufferLength; 501 break mainLoop; 502 } 503 504 aByte = byteBuffer[bytePos++]; 505 charBuffer[ucPos++] = (char) 506 (aByte << 8 | (byteBuffer[bytePos++] & 0xFF)); 507 break; 508 509 default: 510 if(bytePos >= byteBufferLimit ) { 513 --bytePos; 514 System.arraycopy(byteBuffer, bytePos, 515 fBuffer, 0, 516 byteBufferLimit - bytePos); 517 fBufferLength = byteBufferLimit - bytePos; 518 bytePos += fBufferLength; 519 break mainLoop; 520 } 521 522 charBuffer[ucPos++] = (char) 523 (aByte << 8 | (byteBuffer[bytePos++] & 0xFF)); 524 break; 525 526 } } break; 529 530 } } 533 if(bytesRead != null) 535 bytesRead [0] = (bytePos - byteBufferStart); 536 537 return (ucPos - charBufferStart); 539 } 540 541 545 public void reset() 546 { 547 fOffsets[0] = 0x0080; fOffsets[1] = 0x00C0; fOffsets[2] = 0x0400; fOffsets[3] = 0x0600; fOffsets[4] = 0x0900; fOffsets[5] = 0x3040; fOffsets[6] = 0x30A0; fOffsets[7] = 0xFF00; 557 558 fCurrentWindow = 0; fMode = SINGLEBYTEMODE; fBufferLength = 0; } 562 }; 563 | Popular Tags |