1 7 package com.ibm.icu.text; 8 9 76 77 186 public final class UnicodeCompressor implements SCSU 187 { 188 192 193 private static boolean [] sSingleTagTable = { 194 false, true, true, true, true, true, true, true, true, false, 196 false, true, true, false, true, true, true, true, true, true, 197 true, true, true, true, true, true, true, true, true, true, 198 true, true, false, false, false, false, false, false,false, 199 false, false, false, false, false, false, false, false, false, 200 false, false, false, false, false, false, false, false, false, 201 false, false, false, false, false, false, false, false, false, 202 false, false, false, false, false, false, false, false, false, 203 false, false, false, false, false, false, false, false, false, 204 false, false, false, false, false, false, false, false, false, 205 false, false, false, false, false, false, false, false, false, 206 false, false, false, false, false, false, false, false, false, 207 false, false, false, false, false, false, false, false, false, 208 false, false, false, false, false, false, false, false, false, 209 false, false, false, false, false, false, false, false, false, 210 false, false, false, false, false, false, false, false, false, 211 false, false, false, false, false, false, false, false, false, 212 false, false, false, false, false, false, false, false, false, 213 false, false, false, false, false, false, false, false, false, 214 false, false, false, false, false, false, false, false, false, 215 false, false, false, false, false, false, false, false, false, 216 false, false, false, false, false, false, false, false, false, 217 false, false, false, false, false, false, false, false, false, 218 false, false, false, false, false, false, false, false, false, 219 false, false, false, false, false, false, false, false, false, 220 false, false, false, false, false, false, false, false, false, 221 false, false, false, false, false, false, false, false, false, 222 false, false, false, false, false, false, false, false, false, 223 false 224 }; 225 226 227 private static boolean [] sUnicodeTagTable = { 228 false, false, false, false, false, false, false, false, false, 230 false, false, false, false, false, false, false, false, false, 231 false, false, false, false, false, false, false, false, false, 232 false, false, false, false, false, false, false, false, false, 233 false, false, false, false, false, false, false, false, false, 234 false, false, false, false, false, false, false, false, false, 235 false, false, false, false, false, false, false, false, false, 236 false, false, false, false, false, false, false, false, false, 237 false, false, false, false, false, false, false, false, false, 238 false, false, false, false, false, false, false, false, false, 239 false, false, false, false, false, false, false, false, false, 240 false, false, false, false, false, false, false, false, false, 241 false, false, false, false, false, false, false, false, false, 242 false, false, false, false, false, false, false, false, false, 243 false, false, false, false, false, false, false, false, false, 244 false, false, false, false, false, false, false, false, false, 245 false, false, false, false, false, false, false, false, false, 246 false, false, false, false, false, false, false, false, false, 247 false, false, false, false, false, false, false, false, false, 248 false, false, false, false, false, false, false, false, false, 249 false, false, false, false, false, false, false, false, false, 250 false, false, false, false, false, false, false, false, false, 251 false, false, false, false, false, false, false, false, false, 252 false, false, false, false, false, false, false, false, false, 253 false, false, false, false, false, false, false, false, true, 254 true, true, true, true, true, true, true, true, true, true, 255 true, true, true, true, true, true, true, true, false, false, 256 false, false, false, false, false, false, false, false, false, 257 false, false 258 }; 259 260 264 265 private int fCurrentWindow = 0; 266 267 268 private int [] fOffsets = new int [ NUMWINDOWS ]; 269 270 271 private int fMode = SINGLEBYTEMODE; 272 273 274 private int [] fIndexCount = new int [ MAXINDEX + 1 ]; 275 276 277 private int [] fTimeStamps = new int [ NUMWINDOWS ]; 278 279 280 private int fTimeStamp = 0; 281 282 283 289 public UnicodeCompressor() 290 { 291 reset(); } 293 294 301 public static byte [] compress(String buffer) 302 { 303 return compress(buffer.toCharArray(), 0, buffer.length()); 304 } 305 306 315 public static byte [] compress(char [] buffer, 316 int start, 317 int limit) 318 { 319 UnicodeCompressor comp = new UnicodeCompressor(); 320 321 int len = Math.max(4, 3 * (limit - start) + 1); 327 byte [] temp = new byte [len]; 328 329 int byteCount = comp.compress(buffer, start, limit, null, 330 temp, 0, len); 331 332 byte [] result = new byte [byteCount]; 333 System.arraycopy(temp, 0, result, 0, byteCount); 334 return result; 335 } 336 337 356 public int compress(char [] charBuffer, 357 int charBufferStart, 358 int charBufferLimit, 359 int [] charsRead, 360 byte [] byteBuffer, 361 int byteBufferStart, 362 int byteBufferLimit) 363 { 364 int bytePos = byteBufferStart; 366 367 int ucPos = charBufferStart; 369 370 int curUC = INVALIDCHAR; 372 373 int curIndex = -1; 375 376 int nextUC = INVALIDCHAR; 378 int forwardUC = INVALIDCHAR; 379 380 int whichWindow = 0; 382 383 int hiByte = 0; 385 int loByte = 0; 386 387 388 if(byteBuffer.length < 4 || (byteBufferLimit - byteBufferStart) < 4) 390 throw new IllegalArgumentException ("byteBuffer.length < 4"); 391 392 mainLoop: 393 while(ucPos < charBufferLimit && bytePos < byteBufferLimit) { 394 switch(fMode) { 395 case SINGLEBYTEMODE: 397 singleByteModeLoop: 398 while(ucPos < charBufferLimit && bytePos < byteBufferLimit) { 399 curUC = charBuffer[ucPos++]; 401 402 if(ucPos < charBufferLimit) 404 nextUC = charBuffer[ucPos]; 405 else 406 nextUC = INVALIDCHAR; 407 408 if(curUC < 0x0080) { 411 loByte = curUC & 0xFF; 412 413 if(sSingleTagTable[loByte]) { 417 if( (bytePos + 1) >= byteBufferLimit) 421 { --ucPos; break mainLoop; } 422 423 byteBuffer[bytePos++] = (byte) SQUOTE0; 426 } 427 428 byteBuffer[bytePos++] = (byte) loByte; 429 } 430 431 else if(inDynamicWindow(curUC, fCurrentWindow) ) { 435 byteBuffer[bytePos++] = (byte) 436 (curUC - fOffsets[ fCurrentWindow ] 437 + COMPRESSIONOFFSET); 438 } 439 440 else if( ! isCompressible(curUC) ) { 443 if(nextUC != INVALIDCHAR && isCompressible(nextUC)) { 445 if( (bytePos + 2) >= byteBufferLimit) 450 { --ucPos; break mainLoop; } 451 452 byteBuffer[bytePos++] = (byte) SQUOTEU; 453 byteBuffer[bytePos++] = (byte) (curUC >>> 8); 454 byteBuffer[bytePos++] = (byte) (curUC & 0xFF); 455 } 456 else { 457 if((bytePos + 3) >= byteBufferLimit) 461 { --ucPos; break mainLoop; } 462 463 byteBuffer[bytePos++] = (byte) SCHANGEU; 464 465 hiByte = curUC >>> 8; 466 loByte = curUC & 0xFF; 467 468 if(sUnicodeTagTable[hiByte]) 469 byteBuffer[bytePos++] = (byte) UQUOTEU; 471 472 byteBuffer[bytePos++] = (byte) hiByte; 473 byteBuffer[bytePos++] = (byte) loByte; 474 475 fMode = UNICODEMODE; 476 break singleByteModeLoop; 477 } 478 } 479 480 else if((whichWindow = findDynamicWindow(curUC)) 484 != INVALIDWINDOW ) { 485 if( (ucPos + 1) < charBufferLimit ) 487 forwardUC = charBuffer[ucPos + 1]; 488 else 489 forwardUC = INVALIDCHAR; 490 491 if(inDynamicWindow(nextUC, whichWindow) 495 && inDynamicWindow(forwardUC, whichWindow)) { 496 if( (bytePos + 1) >= byteBufferLimit) 500 { --ucPos; break mainLoop; } 501 502 byteBuffer[bytePos++] = (byte)(SCHANGE0 + whichWindow); 503 byteBuffer[bytePos++] = (byte) 504 (curUC - fOffsets[whichWindow] 505 + COMPRESSIONOFFSET); 506 fTimeStamps [ whichWindow ] = ++fTimeStamp; 507 fCurrentWindow = whichWindow; 508 } 509 510 else { 513 if((bytePos + 1) >= byteBufferLimit) 517 { --ucPos; break mainLoop; } 518 519 byteBuffer[bytePos++] = (byte) (SQUOTE0 + whichWindow); 520 byteBuffer[bytePos++] = (byte) 521 (curUC - fOffsets[whichWindow] 522 + COMPRESSIONOFFSET); 523 } 524 } 525 526 else if((whichWindow = findStaticWindow(curUC)) 531 != INVALIDWINDOW 532 && ! inStaticWindow(nextUC, whichWindow) ) { 533 if((bytePos + 1) >= byteBufferLimit) 537 { --ucPos; break mainLoop; } 538 539 byteBuffer[bytePos++] = (byte) (SQUOTE0 + whichWindow); 540 byteBuffer[bytePos++] = (byte) 541 (curUC - sOffsets[whichWindow]); 542 } 543 544 else { 547 curIndex = makeIndex(curUC); 549 fIndexCount[curIndex]++; 550 551 if((ucPos + 1) < charBufferLimit) 553 forwardUC = charBuffer[ucPos + 1]; 554 else 555 forwardUC = INVALIDCHAR; 556 557 if((fIndexCount[curIndex] > 1) || 564 (curIndex == makeIndex(nextUC) 565 && curIndex == makeIndex(forwardUC))) { 566 if( (bytePos + 2) >= byteBufferLimit) 570 { --ucPos; break mainLoop; } 571 572 whichWindow = getLRDefinedWindow(); 574 575 byteBuffer[bytePos++] = (byte)(SDEFINE0 + whichWindow); 576 byteBuffer[bytePos++] = (byte) curIndex; 577 byteBuffer[bytePos++] = (byte) 578 (curUC - sOffsetTable[curIndex] 579 + COMPRESSIONOFFSET); 580 581 fOffsets[whichWindow] = sOffsetTable[curIndex]; 582 fCurrentWindow = whichWindow; 583 fTimeStamps [whichWindow] = ++fTimeStamp; 584 } 585 586 else { 593 if((bytePos + 3) >= byteBufferLimit) 597 { --ucPos; break mainLoop; } 598 599 byteBuffer[bytePos++] = (byte) SCHANGEU; 600 601 hiByte = curUC >>> 8; 602 loByte = curUC & 0xFF; 603 604 if(sUnicodeTagTable[hiByte]) 605 byteBuffer[bytePos++] = (byte) UQUOTEU; 607 608 byteBuffer[bytePos++] = (byte) hiByte; 609 byteBuffer[bytePos++] = (byte) loByte; 610 611 fMode = UNICODEMODE; 612 break singleByteModeLoop; 613 } 614 } 615 } 616 break; 617 618 case UNICODEMODE: 619 unicodeModeLoop: 621 while(ucPos < charBufferLimit && bytePos < byteBufferLimit) { 622 curUC = charBuffer[ucPos++]; 624 625 if( ucPos < charBufferLimit ) 627 nextUC = charBuffer[ucPos]; 628 else 629 nextUC = INVALIDCHAR; 630 631 if( ! isCompressible(curUC) 634 || (nextUC != INVALIDCHAR && ! isCompressible(nextUC))) { 635 if( (bytePos + 2) >= byteBufferLimit) 638 { --ucPos; break mainLoop; } 639 640 hiByte = curUC >>> 8; 641 loByte = curUC & 0xFF; 642 643 if(sUnicodeTagTable[ hiByte ]) 644 byteBuffer[bytePos++] = (byte) UQUOTEU; 646 647 byteBuffer[bytePos++] = (byte) hiByte; 648 byteBuffer[bytePos++] = (byte) loByte; 649 } 650 651 else if(curUC < 0x0080) { 654 loByte = curUC & 0xFF; 655 656 if(nextUC != INVALIDCHAR 660 && nextUC < 0x0080 && ! sSingleTagTable[ loByte ] ) { 661 if( (bytePos + 1) >= byteBufferLimit) 665 { --ucPos; break mainLoop; } 666 667 whichWindow = fCurrentWindow; 669 byteBuffer[bytePos++] = (byte)(UCHANGE0 + whichWindow); 670 byteBuffer[bytePos++] = (byte) loByte; 671 672 fTimeStamps [whichWindow] = ++fTimeStamp; 674 fMode = SINGLEBYTEMODE; 675 break unicodeModeLoop; 676 } 677 678 else { 682 if((bytePos + 1) >= byteBufferLimit) 686 { --ucPos; break mainLoop; } 687 688 byteBuffer[bytePos++] = (byte) 0x00; 692 byteBuffer[bytePos++] = (byte) loByte; 693 } 694 } 695 696 else if((whichWindow = findDynamicWindow(curUC)) 698 != INVALIDWINDOW ) { 699 if(inDynamicWindow(nextUC, whichWindow)) { 703 if((bytePos + 1) >= byteBufferLimit) 707 { --ucPos; break mainLoop; } 708 709 byteBuffer[bytePos++] = (byte)(UCHANGE0 + whichWindow); 710 byteBuffer[bytePos++] = (byte) 711 (curUC - fOffsets[whichWindow] 712 + COMPRESSIONOFFSET); 713 714 fTimeStamps [ whichWindow ] = ++fTimeStamp; 715 fCurrentWindow = whichWindow; 716 fMode = SINGLEBYTEMODE; 717 break unicodeModeLoop; 718 } 719 720 else { 722 if((bytePos + 2) >= byteBufferLimit) 727 { --ucPos; break mainLoop; } 728 729 hiByte = curUC >>> 8; 730 loByte = curUC & 0xFF; 731 732 if(sUnicodeTagTable[ hiByte ]) 733 byteBuffer[bytePos++] = (byte) UQUOTEU; 735 736 byteBuffer[bytePos++] = (byte) hiByte; 737 byteBuffer[bytePos++] = (byte) loByte; 738 } 739 } 740 741 else { 743 curIndex = makeIndex(curUC); 745 fIndexCount[curIndex]++; 746 747 if( (ucPos + 1) < charBufferLimit ) 749 forwardUC = charBuffer[ucPos + 1]; 750 else 751 forwardUC = INVALIDCHAR; 752 753 if((fIndexCount[curIndex] > 1) || 761 (curIndex == makeIndex(nextUC) 762 && curIndex == makeIndex(forwardUC))) { 763 if((bytePos + 2) >= byteBufferLimit) 768 { --ucPos; break mainLoop; } 769 770 whichWindow = getLRDefinedWindow(); 772 773 byteBuffer[bytePos++] = (byte)(UDEFINE0 + whichWindow); 774 byteBuffer[bytePos++] = (byte) curIndex; 775 byteBuffer[bytePos++] = (byte) 776 (curUC - sOffsetTable[curIndex] 777 + COMPRESSIONOFFSET); 778 779 fOffsets[whichWindow] = sOffsetTable[curIndex]; 780 fCurrentWindow = whichWindow; 781 fTimeStamps [whichWindow] = ++fTimeStamp; 782 fMode = SINGLEBYTEMODE; 783 break unicodeModeLoop; 784 } 785 786 else { 789 if((bytePos + 2) >= byteBufferLimit) 794 { --ucPos; break mainLoop; } 795 796 hiByte = curUC >>> 8; 797 loByte = curUC & 0xFF; 798 799 if(sUnicodeTagTable[ hiByte ]) 800 byteBuffer[bytePos++] = (byte) UQUOTEU; 802 803 byteBuffer[bytePos++] = (byte) hiByte; 804 byteBuffer[bytePos++] = (byte) loByte; 805 } 806 } 807 } 808 } } 810 811 if(charsRead != null) 813 charsRead [0] = (ucPos - charBufferStart); 814 815 return (bytePos - byteBufferStart); 817 } 818 819 823 public void reset() 824 { 825 int i; 826 827 fOffsets[0] = 0x0080; fOffsets[1] = 0x00C0; fOffsets[2] = 0x0400; fOffsets[3] = 0x0600; fOffsets[4] = 0x0900; fOffsets[5] = 0x3040; fOffsets[6] = 0x30A0; fOffsets[7] = 0xFF00; 837 838 for(i = 0; i < NUMWINDOWS; i++) { 840 fTimeStamps[i] = 0; 841 } 842 843 for(i = 0; i <= MAXINDEX; i++ ) { 845 fIndexCount[i] = 0; 846 } 847 848 fTimeStamp = 0; fCurrentWindow = 0; fMode = SINGLEBYTEMODE; } 852 853 857 864 private static int makeIndex(int c) 865 { 866 if(c >= 0x00C0 && c < 0x0140) 868 return LATININDEX; 869 else if(c >= 0x0250 && c < 0x02D0) 870 return IPAEXTENSIONINDEX; 871 else if(c >= 0x0370 && c < 0x03F0) 872 return GREEKINDEX; 873 else if(c >= 0x0530 && c < 0x0590) 874 return ARMENIANINDEX; 875 else if(c >= 0x3040 && c < 0x30A0) 876 return HIRAGANAINDEX; 877 else if(c >= 0x30A0 && c < 0x3120) 878 return KATAKANAINDEX; 879 else if(c >= 0xFF60 && c < 0xFF9F) 880 return HALFWIDTHKATAKANAINDEX; 881 882 else if(c >= 0x0080 && c < 0x3400) 884 return (c / 0x80) & 0xFF; 885 else if(c >= 0xE000 && c <= 0xFFFF) 886 return ((c - 0xAC00) / 0x80) & 0xFF; 887 888 else { 890 return RESERVEDINDEX; 891 } 892 } 893 894 898 905 private boolean inDynamicWindow(int c, 906 int whichWindow) 907 { 908 return (c >= fOffsets[whichWindow] 909 && c < (fOffsets[whichWindow] + 0x80)); 910 } 911 912 919 private static boolean inStaticWindow(int c, 920 int whichWindow) 921 { 922 return (c >= sOffsets[whichWindow] 923 && c < (sOffsets[whichWindow] + 0x80)); 924 } 925 926 930 935 private static boolean isCompressible(int c) 936 { 937 return (c < 0x3400 || c >= 0xE000); 938 } 939 940 944 950 private int findDynamicWindow(int c) 951 { 952 for(int i = NUMWINDOWS - 1; i >= 0; --i) { 955 if(inDynamicWindow(c, i)) { 956 ++fTimeStamps[i]; 957 return i; 958 } 959 } 960 961 return INVALIDWINDOW; 962 } 963 964 970 private static int findStaticWindow(int c) 971 { 972 for(int i = NUMSTATICWINDOWS - 1; i >= 0; --i) { 975 if(inStaticWindow(c, i)) { 976 return i; 977 } 978 } 979 980 return INVALIDWINDOW; 981 } 982 983 987 988 private int getLRDefinedWindow() 989 { 990 int leastRU = Integer.MAX_VALUE; 991 int whichWindow = INVALIDWINDOW; 992 993 for(int i = NUMWINDOWS - 1; i >= 0; --i ) { 997 if( fTimeStamps[i] < leastRU ) { 998 leastRU = fTimeStamps[i]; 999 whichWindow = i; 1000 } 1001 } 1002 1003 return whichWindow; 1004 } 1005 1006}; 1007 | Popular Tags |