1 7 8 package com.ibm.icu.impl; 9 import java.io.ByteArrayInputStream ; 10 import java.io.IOException ; 11 import java.io.BufferedInputStream ; 12 import java.io.InputStream ; 13 import java.util.MissingResourceException ; 14 15 import com.ibm.icu.text.Normalizer; 16 import com.ibm.icu.text.UTF16; 17 import com.ibm.icu.text.UnicodeSet; 18 import com.ibm.icu.text.UnicodeSetIterator; 19 import com.ibm.icu.util.RangeValueIterator; 20 import com.ibm.icu.util.VersionInfo; 21 import com.ibm.icu.lang.UCharacter; 22 23 27 public final class NormalizerImpl { 28 static final NormalizerImpl IMPL; 30 31 static 32 { 33 try 34 { 35 IMPL = new NormalizerImpl(); 36 } 37 catch (Exception e) 38 { 39 throw new MissingResourceException (e.getMessage(), "", ""); 40 } 41 } 42 43 static final int UNSIGNED_BYTE_MASK =0xFF; 44 static final long UNSIGNED_INT_MASK = 0xffffffffL; 45 50 private static final String DATA_FILE_NAME = ICUResourceBundle.ICU_BUNDLE+"/unorm.icu"; 51 52 54 public static final int QC_NFC=0x11; 56 public static final int QC_NFKC=0x22; 57 public static final int QC_NFD=4; 58 public static final int QC_NFKD=8; 59 60 public static final int QC_ANY_NO=0xf; 61 62 65 public static final int QC_MAYBE=0x10; 66 public static final int QC_ANY_MAYBE=0x30; 67 68 public static final int QC_MASK=0x3f; 69 70 private static final int COMBINES_FWD=0x40; 71 private static final int COMBINES_BACK=0x80; 72 public static final int COMBINES_ANY=0xc0; 73 private static final int CC_SHIFT=8; 75 public static final int CC_MASK=0xff00; 76 private static final int EXTRA_SHIFT=16; 78 private static final int EXTRA_INDEX_TOP=0xfc00; 80 81 private static final int EXTRA_SURROGATE_MASK=0x3ff; 82 private static final int EXTRA_SURROGATE_TOP=0x3f0; 83 84 private static final int EXTRA_HANGUL=EXTRA_SURROGATE_TOP; 85 private static final int EXTRA_JAMO_L=EXTRA_SURROGATE_TOP+1; 86 private static final int EXTRA_JAMO_V=EXTRA_SURROGATE_TOP+2; 87 private static final int EXTRA_JAMO_T=EXTRA_SURROGATE_TOP+3; 88 89 90 private static final long MIN_SPECIAL = (long)(0xfc000000 & UNSIGNED_INT_MASK); 91 private static final long SURROGATES_TOP = (long)(0xfff00000 & UNSIGNED_INT_MASK); 92 private static final long MIN_HANGUL = (long)(0xfff00000 & UNSIGNED_INT_MASK); 93 private static final long MIN_JAMO_V = (long)(0xfff20000 & UNSIGNED_INT_MASK); 94 private static final long JAMO_V_TOP = (long)(0xfff30000 & UNSIGNED_INT_MASK); 95 96 97 98 99 static final int INDEX_TRIE_SIZE = 0; 100 101 static final int INDEX_CHAR_COUNT = 1; 102 103 static final int INDEX_COMBINE_DATA_COUNT = 2; 104 105 static final int INDEX_COMBINE_FWD_COUNT = 3; 106 107 static final int INDEX_COMBINE_BOTH_COUNT = 4; 108 109 static final int INDEX_COMBINE_BACK_COUNT = 5; 110 111 public static final int INDEX_MIN_NFC_NO_MAYBE = 6; 112 113 public static final int INDEX_MIN_NFKC_NO_MAYBE = 7; 114 115 public static final int INDEX_MIN_NFD_NO_MAYBE = 8; 116 117 public static final int INDEX_MIN_NFKD_NO_MAYBE = 9; 118 119 static final int INDEX_FCD_TRIE_SIZE = 10; 120 121 static final int INDEX_AUX_TRIE_SIZE = 11; 122 123 static final int INDEX_CANON_SET_COUNT = 12; 124 125 static final int INDEX_TOP = 32; 126 127 128 129 130 private static final int AUX_UNSAFE_SHIFT = 11; 131 private static final int AUX_COMP_EX_SHIFT = 10; 132 private static final int AUX_NFC_SKIPPABLE_F_SHIFT = 12; 133 134 private static final int AUX_MAX_FNC = ((int)1<<AUX_COMP_EX_SHIFT); 135 private static final int AUX_UNSAFE_MASK = (int)((1<<AUX_UNSAFE_SHIFT) & UNSIGNED_INT_MASK); 136 private static final int AUX_FNC_MASK = (int)((AUX_MAX_FNC-1) & UNSIGNED_INT_MASK); 137 private static final int AUX_COMP_EX_MASK = (int)((1<<AUX_COMP_EX_SHIFT) & UNSIGNED_INT_MASK); 138 private static final long AUX_NFC_SKIP_F_MASK = ((UNSIGNED_INT_MASK&1)<<AUX_NFC_SKIPPABLE_F_SHIFT); 139 140 141 142 static final int SET_INDEX_CANON_SETS_LENGTH = 0; 143 144 static final int SET_INDEX_CANON_BMP_TABLE_LENGTH = 1; 145 146 static final int SET_INDEX_CANON_SUPP_TABLE_LENGTH = 2; 147 148 static final int SET_INDEX_TOP = 32; 149 150 static final int CANON_SET_INDICIES_INDEX = 0; 151 static final int CANON_SET_START_SETS_INDEX = 1; 152 static final int CANON_SET_BMP_TABLE_INDEX = 2; 153 static final int CANON_SET_SUPP_TABLE_INDEX = 3; 154 155 static final int CANON_SET_MAX_CANON_SETS = 0x4000; 156 159 static final int CANON_SET_BMP_MASK = 0xc000; 160 static final int CANON_SET_BMP_IS_INDEX = 0x4000; 161 162 private static final int MAX_BUFFER_SIZE = 20; 163 164 169 public static final int COMPARE_EQUIV = 0x80000; 170 171 172 173 174 static final class NormTrieImpl implements Trie.DataManipulate{ 175 static IntTrie normTrie= null; 176 183 184 public int getFoldingOffset(int value){ 185 return BMP_INDEX_LENGTH+ 186 ((value>>(EXTRA_SHIFT-SURROGATE_BLOCK_BITS))& 187 (0x3ff<<SURROGATE_BLOCK_BITS)); 188 } 189 190 } 191 static final class FCDTrieImpl implements Trie.DataManipulate{ 192 static CharTrie fcdTrie=null; 193 200 201 public int getFoldingOffset(int value){ 202 return value; 203 } 204 } 205 206 static final class AuxTrieImpl implements Trie.DataManipulate{ 207 static CharTrie auxTrie = null; 208 215 216 public int getFoldingOffset(int value){ 217 return (int)(value &AUX_FNC_MASK)<<SURROGATE_BLOCK_BITS; 218 } 219 } 220 221 222 223 224 private static FCDTrieImpl fcdTrieImpl; 225 private static NormTrieImpl normTrieImpl; 226 private static AuxTrieImpl auxTrieImpl; 227 private static int[] indexes; 228 private static char[] combiningTable; 229 private static char[] extraData; 230 private static Object [] canonStartSets; 231 232 private static boolean isDataLoaded; 233 private static boolean isFormatVersion_2_1; 234 private static boolean isFormatVersion_2_2; 235 private static byte[] unicodeVersion; 236 237 240 private static final int DATA_BUFFER_SIZE = 25000; 241 242 246 public static final int MIN_WITH_LEAD_CC=0x300; 247 248 249 256 private static final int DECOMP_FLAG_LENGTH_HAS_CC=0x80; 257 260 private static final int DECOMP_LENGTH_MASK=0x7f; 261 262 263 private static final int BMP_INDEX_LENGTH=0x10000>>Trie.INDEX_STAGE_1_SHIFT_; 264 267 private static final int SURROGATE_BLOCK_BITS=10-Trie.INDEX_STAGE_1_SHIFT_; 268 269 270 public static int getFromIndexesArr(int index){ 272 return indexes[index]; 273 } 274 275 277 281 private NormalizerImpl() throws IOException { 282 if(!isDataLoaded){ 284 285 InputStream i = ICUData.getRequiredStream(DATA_FILE_NAME); 287 BufferedInputStream b = new BufferedInputStream (i,DATA_BUFFER_SIZE); 288 NormalizerDataReader reader = new NormalizerDataReader(b); 289 290 indexes = reader.readIndexes(NormalizerImpl.INDEX_TOP); 292 293 byte[] normBytes = new byte[indexes[NormalizerImpl.INDEX_TRIE_SIZE]]; 294 295 int combiningTableTop = indexes[NormalizerImpl.INDEX_COMBINE_DATA_COUNT]; 296 combiningTable = new char[combiningTableTop]; 297 298 int extraDataTop = indexes[NormalizerImpl.INDEX_CHAR_COUNT]; 299 extraData = new char[extraDataTop]; 300 301 byte[] fcdBytes = new byte[indexes[NormalizerImpl.INDEX_FCD_TRIE_SIZE]]; 302 byte[] auxBytes = new byte[indexes[NormalizerImpl.INDEX_AUX_TRIE_SIZE]]; 303 canonStartSets=new Object [NormalizerImpl.CANON_SET_MAX_CANON_SETS]; 304 305 fcdTrieImpl = new FCDTrieImpl(); 306 normTrieImpl = new NormTrieImpl(); 307 auxTrieImpl = new AuxTrieImpl(); 308 309 reader.read(normBytes, fcdBytes,auxBytes, extraData, combiningTable, 311 canonStartSets); 312 313 NormTrieImpl.normTrie = new IntTrie( new ByteArrayInputStream (normBytes),normTrieImpl ); 314 FCDTrieImpl.fcdTrie = new CharTrie( new ByteArrayInputStream (fcdBytes),fcdTrieImpl ); 315 AuxTrieImpl.auxTrie = new CharTrie( new ByteArrayInputStream (auxBytes),auxTrieImpl ); 316 317 isDataLoaded = true; 320 321 byte[] formatVersion = reader.getDataFormatVersion(); 323 324 isFormatVersion_2_1 =( formatVersion[0]>2 325 || 326 (formatVersion[0]==2 && formatVersion[1]>=1) 327 ); 328 isFormatVersion_2_2 =( formatVersion[0]>2 329 || 330 (formatVersion[0]==2 && formatVersion[1]>=2) 331 ); 332 unicodeVersion = reader.getUnicodeVersion(); 333 b.close(); 334 } 335 } 336 337 338 339 340 341 public static final int JAMO_L_BASE=0x1100; 342 public static final int JAMO_V_BASE=0x1161; 343 public static final int JAMO_T_BASE=0x11a7; 344 345 public static final int HANGUL_BASE=0xac00; 346 347 public static final int JAMO_L_COUNT=19; 348 public static final int JAMO_V_COUNT=21; 349 public static final int JAMO_T_COUNT=28; 350 public static final int HANGUL_COUNT=JAMO_L_COUNT*JAMO_V_COUNT*JAMO_T_COUNT; 351 352 private static boolean isHangulWithoutJamoT(char c) { 353 c-=HANGUL_BASE; 354 return c<HANGUL_COUNT && c%JAMO_T_COUNT==0; 355 } 356 357 358 359 360 private static boolean isNorm32Regular(long norm32) { 361 return norm32<MIN_SPECIAL; 362 } 363 364 365 private static boolean isNorm32LeadSurrogate(long norm32) { 366 return MIN_SPECIAL<=norm32 && norm32<SURROGATES_TOP; 367 } 368 369 370 private static boolean isNorm32HangulOrJamo(long norm32) { 371 return norm32>=MIN_HANGUL; 372 } 373 374 378 private static boolean isHangulJamoNorm32HangulOrJamoL(long norm32) { 380 return norm32<MIN_JAMO_V; 381 } 382 384 388 private static boolean isJamoVTNorm32JamoV(long norm32) { 389 return norm32<JAMO_V_TOP; 390 } 391 392 393 394 public static long getNorm32(char c) { 395 return ((UNSIGNED_INT_MASK) & (NormTrieImpl.normTrie.getLeadValue(c))); 396 } 397 398 public static long getNorm32FromSurrogatePair(long norm32, 399 char c2) { 400 404 return ((UNSIGNED_INT_MASK) & 405 NormTrieImpl.normTrie.getTrailValue((int)norm32, c2)); 406 } 407 private static long getNorm32(int c){ 409 return (UNSIGNED_INT_MASK&(NormTrieImpl.normTrie.getCodePointValue(c))); 410 } 411 412 private static long getNorm32(int c,int mask){ 413 long norm32= getNorm32(UTF16.getLeadSurrogate(c)); 414 if(((norm32&mask)>0) && isNorm32LeadSurrogate(norm32)) { 415 416 norm32=getNorm32FromSurrogatePair(norm32,UTF16.getTrailSurrogate(c)); 417 } 418 return norm32; 419 } 420 422 423 427 private static long getNorm32(char[] p,int start, 428 int mask) { 429 long norm32= getNorm32(p[start]); 430 if(((norm32&mask)>0) && isNorm32LeadSurrogate(norm32)) { 431 432 norm32=getNorm32FromSurrogatePair(norm32, p[start+1]); 433 } 434 return norm32; 435 } 436 public static VersionInfo getUnicodeVersion(){ 437 return VersionInfo.getInstance(unicodeVersion[0], unicodeVersion[1], 438 unicodeVersion[2], unicodeVersion[3]); 439 } 440 public static char getFCD16(char c) { 441 return FCDTrieImpl.fcdTrie.getLeadValue(c); 442 } 443 444 public static char getFCD16FromSurrogatePair(char fcd16, char c2) { 445 448 return FCDTrieImpl.fcdTrie.getTrailValue(fcd16, c2); 449 } 450 public static int getFCD16(int c) { 451 return FCDTrieImpl.fcdTrie.getCodePointValue(c); 452 } 453 454 private static int getExtraDataIndex(long norm32) { 455 return (int)(norm32>>EXTRA_SHIFT); 456 } 457 458 private static final class DecomposeArgs{ 459 int cc; 460 int trailCC; 461 int length; 462 } 463 469 private static int decompose(long norm32, 470 int qcMask, 471 DecomposeArgs args) { 472 int p= getExtraDataIndex(norm32); 473 args.length=extraData[p++]; 474 475 if((norm32&qcMask&QC_NFKD)!=0 && args.length>=0x100) { 476 477 p+=((args.length>>7)&1)+(args.length&DECOMP_LENGTH_MASK); 478 args.length>>=8; 479 } 480 481 if((args.length&DECOMP_FLAG_LENGTH_HAS_CC)>0) { 482 483 char bothCCs=extraData[p++]; 484 args.cc=(UNSIGNED_BYTE_MASK) & (bothCCs>>8); 485 args.trailCC=(UNSIGNED_BYTE_MASK) & bothCCs; 486 } else { 487 488 args.cc=args.trailCC=0; 489 } 490 491 args.length&=DECOMP_LENGTH_MASK; 492 return p; 493 } 494 495 496 500 private static int decompose(long norm32, 501 DecomposeArgs args) { 502 503 int p= getExtraDataIndex(norm32); 504 args.length=extraData[p++]; 505 506 if((args.length&DECOMP_FLAG_LENGTH_HAS_CC)>0) { 507 508 char bothCCs=extraData[p++]; 509 args.cc=(UNSIGNED_BYTE_MASK) & (bothCCs>>8); 510 args.trailCC=(UNSIGNED_BYTE_MASK) & bothCCs; 511 } else { 512 513 args.cc=args.trailCC=0; 514 } 515 516 args.length&=DECOMP_LENGTH_MASK; 517 return p; 518 } 519 520 521 private static final class NextCCArgs{ 522 char[] source; 523 int next; 524 int limit; 525 char c; 526 char c2; 527 } 528 529 534 private static int getNextCC(NextCCArgs args) { 535 long norm32; 536 537 args.c=args.source[args.next++]; 538 539 norm32= getNorm32(args.c); 540 if((norm32 & CC_MASK)==0) { 541 args.c2=0; 542 return 0; 543 } else { 544 if(!isNorm32LeadSurrogate(norm32)) { 545 args.c2=0; 546 } else { 547 548 if(args.next!=args.limit && 549 UTF16.isTrailSurrogate(args.c2=args.source[args.next])){ 550 ++args.next; 551 norm32=getNorm32FromSurrogatePair(norm32, args.c2); 552 } else { 553 args.c2=0; 554 return 0; 555 } 556 } 557 558 return (int)((UNSIGNED_BYTE_MASK) & (norm32>>CC_SHIFT)); 559 } 560 } 561 562 private static final class PrevArgs{ 563 char[] src; 564 int start; 565 int current; 566 char c; 567 char c2; 568 } 569 570 576 private static long getPrevNorm32(PrevArgs args, 577 int minC, 578 int mask) { 579 long norm32; 580 581 args.c=args.src[--args.current]; 582 args.c2=0; 583 584 587 if(args.c<minC) { 588 return 0; 589 } else if(!UTF16.isSurrogate(args.c)) { 590 return getNorm32(args.c); 591 } else if(UTF16.isLeadSurrogate(args.c)) { 592 593 return 0; 594 } else if(args.current!=args.start && 595 UTF16.isLeadSurrogate(args.c2=args.src[args.current-1])) { 596 --args.current; 597 norm32=getNorm32(args.c2); 598 599 if((norm32&mask)==0) { 600 603 return 0; 604 } else { 605 606 return getNorm32FromSurrogatePair(norm32, args.c); 607 } 608 } else { 609 610 args.c2=0; 611 return 0; 612 } 613 } 614 615 619 private static int getPrevCC(PrevArgs args) { 620 621 return (int)((UNSIGNED_BYTE_MASK)&(getPrevNorm32(args, MIN_WITH_LEAD_CC, 622 CC_MASK)>>CC_SHIFT)); 623 } 624 625 629 public static boolean isNFDSafe(long norm32, 630 intccOrQCMask, 631 int decompQCMask) { 632 if((norm32&ccOrQCMask)==0) { 633 return true; 634 } 635 636 637 if(isNorm32Regular(norm32) && (norm32&decompQCMask)!=0) { 638 DecomposeArgs args=new DecomposeArgs(); 639 640 decompose(norm32, decompQCMask, args); 641 return args.cc==0; 642 } else { 643 644 return (norm32&CC_MASK)==0; 645 } 646 } 647 648 652 public static boolean isTrueStarter(long norm32, 653 int ccOrQCMask, 654 int decompQCMask) { 655 if((norm32&ccOrQCMask)==0) { 656 return true; 657 } 658 659 660 if((norm32&decompQCMask)!=0) { 661 int p; 662 DecomposeArgs args=new DecomposeArgs(); 663 664 p=decompose(norm32, decompQCMask, args); 665 666 if(args.cc==0) { 667 int qcMask=ccOrQCMask&QC_MASK; 668 669 670 if((getNorm32(extraData,p, qcMask)&qcMask)==0) { 671 672 return true; 673 } 674 } 675 } 676 return false; 677 } 678 679 680 681 696 private static int insertOrdered(char[] source, 697 int start, 698 int current, int p, 699 char c, char c2, 700 int cc) { 701 int back, preBack; 702 int r; 703 int prevCC, trailCC=cc; 704 705 if(start<current && cc!=0) { 706 preBack=back=current; 708 PrevArgs prevArgs = new PrevArgs(); 709 prevArgs.current = current; 710 prevArgs.start = start; 711 prevArgs.src = source; 712 prevCC=getPrevCC(prevArgs); 714 preBack = prevArgs.current; 715 716 if(cc<prevCC) { 717 trailCC=prevCC; 719 back=preBack; 720 while(start<preBack) { 721 prevCC=getPrevCC(prevArgs); 722 preBack=prevArgs.current; 723 if(cc>=prevCC) { 724 break; 725 } 726 back=preBack; 727 } 728 729 730 736 r=p; 738 do { 739 source[--r]=source[--current]; 740 } while(back!=current); 741 } 742 } 743 744 source[current]=c; 746 if(c2!=0) { 747 source[(current+1)]=c2; 748 } 749 750 return trailCC; 752 } 753 754 777 private static int mergeOrdered(char[] source, 778 int start, 779 int current, 780 char[] data, 781 int next, 782 int limit, 783 boolean isOrdered) { 784 int r; 785 int cc, trailCC=0; 786 boolean adjacent; 787 788 adjacent= current==next; 789 NextCCArgs ncArgs = new NextCCArgs(); 790 ncArgs.source = data; 791 ncArgs.next = next; 792 ncArgs.limit = limit; 793 794 if(start!=current || !isOrdered) { 795 796 while(ncArgs.next<ncArgs.limit) { 797 cc=getNextCC(ncArgs); 798 if(cc==0) { 799 trailCC=0; 801 if(adjacent) { 802 current=ncArgs.next; 803 } else { 804 data[current++]=ncArgs.c; 805 if(ncArgs.c2!=0) { 806 data[current++]=ncArgs.c2; 807 } 808 } 809 if(isOrdered) { 810 break; 811 } else { 812 start=current; 813 } 814 } else { 815 r=current+(ncArgs.c2==0 ? 1 : 2); 816 trailCC=insertOrdered(source,start, current, r, 817 ncArgs.c, ncArgs.c2, cc); 818 current=r; 819 } 820 } 821 } 822 823 if(ncArgs.next==ncArgs.limit) { 824 return trailCC; 826 } else { 827 if(!adjacent) { 828 do { 830 source[current++]=data[ncArgs.next++]; 831 } while(ncArgs.next!=ncArgs.limit); 832 ncArgs.limit=current; 833 } 834 PrevArgs prevArgs = new PrevArgs(); 835 prevArgs.src = data; 836 prevArgs.start = start; 837 prevArgs.current = ncArgs.limit; 838 return getPrevCC(prevArgs); 839 } 840 841 } 842 private static int mergeOrdered(char[] source, 843 int start, 844 int current, 845 char[] data, 846 final int next, 847 final int limit) { 848 return mergeOrdered(source,start,current,data,next,limit,true); 849 } 850 851 852 853 public static boolean checkFCD(char[] src,int srcStart, int srcLimit, 854 UnicodeSet nx) { 855 856 char fcd16,c,c2; 857 int prevCC=0, cc; 858 int i =srcStart, length = srcLimit; 859 860 for(;;) { 861 for(;;) { 862 if(i==length) { 863 return true; 864 } else if((c=src[i++])<MIN_WITH_LEAD_CC) { 865 prevCC=(int)-c; 866 } else if((fcd16=getFCD16(c))==0) { 867 prevCC=0; 868 } else { 869 break; 870 } 871 } 872 873 if(UTF16.isLeadSurrogate(c)) { 875 if(i!=length && UTF16.isTrailSurrogate(c2=src[i])) { 877 ++i; 878 fcd16=getFCD16FromSurrogatePair(fcd16, c2); 879 } else { 880 c2=0; 881 fcd16=0; 882 } 883 }else{ 884 c2=0; 885 } 886 887 if(nx_contains(nx, c, c2)) { 888 prevCC=0; 889 continue; 890 } 891 892 899 cc=(int)(fcd16>>8); 901 if(cc!=0) { 902 if(prevCC<0) { 903 if(!nx_contains(nx, (int)-prevCC)) { 907 prevCC=(int)(FCDTrieImpl.fcdTrie.getBMPValue( 908 (char)-prevCC)&0xff 909 ); 910 } else { 911 prevCC=0; 912 } 913 914 } 915 916 if(cc<prevCC) { 917 return false; 918 } 919 } 920 prevCC=(int)(fcd16&0xff); 921 } 922 } 923 924 public static Normalizer.QuickCheckResult quickCheck(char[] src, 925 int srcStart, 926 int srcLimit, 927 int minNoMaybe, 928 int qcMask, 929 int options, 930 boolean allowMaybe, 931 UnicodeSet nx){ 932 933 int ccOrQCMask; 934 long norm32; 935 char c, c2; 936 char cc, prevCC; 937 long qcNorm32; 938 Normalizer.QuickCheckResult result; 939 ComposePartArgs args = new ComposePartArgs(); 940 char[] buffer ; 941 int start = srcStart; 942 943 if(!isDataLoaded) { 944 return Normalizer.MAYBE; 945 } 946 ccOrQCMask=CC_MASK|qcMask; 948 result=Normalizer.YES; 949 prevCC=0; 950 951 for(;;) { 952 for(;;) { 953 if(srcStart==srcLimit) { 954 return result; 955 } else if((c=src[srcStart++])>=minNoMaybe && 956 (( norm32=getNorm32(c)) & ccOrQCMask)!=0) { 957 break; 958 } 959 prevCC=0; 960 } 961 962 963 if(isNorm32LeadSurrogate(norm32)) { 965 if(srcStart!=srcLimit&& UTF16.isTrailSurrogate(c2=src[srcStart])) { 967 ++srcStart; 968 norm32=getNorm32FromSurrogatePair(norm32,c2); 969 } else { 970 norm32=0; 971 c2=0; 972 } 973 }else{ 974 c2=0; 975 } 976 if(nx_contains(nx, c, c2)) { 977 978 norm32=0; 979 } 980 981 cc=(char)((norm32>>CC_SHIFT)&0xFF); 983 if(cc!=0 && cc<prevCC) { 984 return Normalizer.NO; 985 } 986 prevCC=cc; 987 988 qcNorm32 = norm32 & qcMask; 990 if((qcNorm32& QC_ANY_NO)>=1) { 991 result= Normalizer.NO; 992 break; 993 } else if(qcNorm32!=0) { 994 if(allowMaybe){ 996 result=Normalizer.MAYBE; 997 }else{ 998 int prevStarter; 1001 int decompQCMask; 1002 1003 decompQCMask=(qcMask<<2)&0xf; 1005 1007 prevStarter=srcStart-1; 1009 if(UTF16.isTrailSurrogate(src[prevStarter])) { 1010 --prevStarter; 1013 } 1014 1015 prevStarter=findPreviousStarter(src, start, prevStarter, 1016 ccOrQCMask, decompQCMask, 1017 (char)minNoMaybe); 1018 1019 srcStart=findNextStarter(src,srcStart, srcLimit, qcMask, 1022 decompQCMask,(char) minNoMaybe); 1023 1024 args.prevCC = prevCC; 1026 1027 buffer = composePart(args,prevStarter,src,srcStart,srcLimit,options,nx); 1029 1030 if(0!=strCompare(buffer,0,args.length,src,prevStarter,srcStart, false)) { 1032 result=Normalizer.NO; break; 1034 } 1035 1036 } 1038 } 1039 } 1040 return result; 1041 } 1042 1043 1044 public static int getDecomposition(int c , 1048 boolean compat, 1049 char[] dest, 1050 int destStart, 1051 int destCapacity) { 1052 1053 if( (UNSIGNED_INT_MASK & c)<=0x10ffff) { 1054 long norm32; 1055 int qcMask; 1056 int minNoMaybe; 1057 int length; 1058 1059 if(!compat) { 1061 minNoMaybe=(int)indexes[INDEX_MIN_NFD_NO_MAYBE]; 1062 qcMask=QC_NFD; 1063 } else { 1064 minNoMaybe=(int)indexes[INDEX_MIN_NFKD_NO_MAYBE]; 1065 qcMask=QC_NFKD; 1066 } 1067 1068 if(c<minNoMaybe) { 1069 if(destCapacity>0) { 1071 dest[0]=(char)c; 1072 } 1073 return -1; 1074 } 1075 1076 1077 norm32=getNorm32(c); 1078 if((norm32&qcMask)==0) { 1079 1080 if(c<=0xffff) { 1081 if(destCapacity>0) { 1082 dest[0]=(char)c; 1083 } 1084 return -1; 1085 } else { 1086 if(destCapacity>=2) { 1087 dest[0]=UTF16.getLeadSurrogate(c); 1088 dest[1]=UTF16.getTrailSurrogate(c); 1089 } 1090 return -2; 1091 } 1092 } else if(isNorm32HangulOrJamo(norm32)) { 1093 1094 char c2; 1095 1096 c-=HANGUL_BASE; 1097 1098 c2=(char)(c%JAMO_T_COUNT); 1099 c/=JAMO_T_COUNT; 1100 if(c2>0) { 1101 if(destCapacity>=3) { 1102 dest[2]=(char)(JAMO_T_BASE+c2); 1103 } 1104 length=3; 1105 } else { 1106 length=2; 1107 } 1108 1109 if(destCapacity>=2) { 1110 dest[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT); 1111 dest[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT); 1112 } 1113 return length; 1114 } else { 1115 1118 int p, limit; 1119 DecomposeArgs args = new DecomposeArgs(); 1120 1121 p=decompose(norm32, qcMask, args); 1122 if(args.length<=destCapacity) { 1123 limit=p+args.length; 1124 do { 1125 dest[destStart++]=extraData[p++]; 1126 } while(p<limit); 1127 } 1128 return args.length; 1129 } 1130 } else { 1131 return 0; 1132 } 1133 } 1134 1135 1136 public static int decompose(char[] src,int srcStart,int srcLimit, 1137 char[] dest,int destStart,int destLimit, 1138 boolean compat,int[] outTrailCC, 1139 UnicodeSet nx) { 1140 1141 char[] buffer = new char[3]; 1142 int prevSrc; 1143 long norm32; 1144 int ccOrQCMask, qcMask; 1145 int reorderStartIndex, length; 1146 char c, c2, minNoMaybe; 1147 int cc, prevCC, trailCC; 1148 char[] p; 1149 int pStart; 1150 int destIndex = destStart; 1151 int srcIndex = srcStart; 1152 if(!compat) { 1153 minNoMaybe=(char)indexes[INDEX_MIN_NFD_NO_MAYBE]; 1154 qcMask=QC_NFD; 1155 } else { 1156 minNoMaybe=(char)indexes[INDEX_MIN_NFKD_NO_MAYBE]; 1157 qcMask=QC_NFKD; 1158 } 1159 1160 1161 ccOrQCMask=CC_MASK|qcMask; 1162 reorderStartIndex=0; 1163 prevCC=0; 1164 norm32=0; 1165 c=0; 1166 pStart=0; 1167 1168 cc=trailCC=-1; 1170 for(;;) { 1171 1174 prevSrc=srcIndex; 1175 1176 while(srcIndex!=srcLimit &&((c=src[srcIndex])<minNoMaybe || 1177 ((norm32=getNorm32(c))&ccOrQCMask)==0)){ 1178 prevCC=0; 1179 ++srcIndex; 1180 } 1181 1182 1183 if(srcIndex!=prevSrc) { 1184 length=(int)(srcIndex-prevSrc); 1185 if((destIndex+length)<=destLimit) { 1186 System.arraycopy(src,prevSrc,dest,destIndex,length); 1187 } 1188 1189 destIndex+=length; 1190 reorderStartIndex=destIndex; 1191 } 1192 1193 1194 if(srcIndex==srcLimit) { 1195 break; 1196 } 1197 1198 1199 ++srcIndex; 1200 1201 1202 1218 if(isNorm32HangulOrJamo(norm32)) { 1219 if(nx_contains(nx, c)) { 1220 c2=0; 1221 p=null; 1222 length=1; 1223 } else { 1224 p=buffer; 1226 pStart=0; 1227 cc=trailCC=0; 1228 1229 c-=HANGUL_BASE; 1230 1231 c2=(char)(c%JAMO_T_COUNT); 1232 c/=JAMO_T_COUNT; 1233 if(c2>0) { 1234 buffer[2]=(char)(JAMO_T_BASE+c2); 1235 length=3; 1236 } else { 1237 length=2; 1238 } 1239 1240 buffer[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT); 1241 buffer[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT); 1242 } 1243 } else { 1244 if(isNorm32Regular(norm32)) { 1245 c2=0; 1246 length=1; 1247 } else { 1248 if(srcIndex!=srcLimit && 1250 UTF16.isTrailSurrogate(c2=src[srcIndex])) { 1251 ++srcIndex; 1252 length=2; 1253 norm32=getNorm32FromSurrogatePair(norm32, c2); 1254 } else { 1255 c2=0; 1256 length=1; 1257 norm32=0; 1258 } 1259 } 1260 1261 1262 if(nx_contains(nx, c, c2)) { 1263 1264 cc=trailCC=0; 1265 p=null; 1266 } else if((norm32&qcMask)==0) { 1267 1268 cc=trailCC=(int)((UNSIGNED_BYTE_MASK) & (norm32>>CC_SHIFT)); 1269 p=null; 1270 pStart=-1; 1271 } else { 1272 DecomposeArgs arg = new DecomposeArgs(); 1273 1276 pStart=decompose(norm32, qcMask, arg); 1277 p=extraData; 1278 length=arg.length; 1279 cc=arg.cc; 1280 trailCC=arg.trailCC; 1281 if(length==1) { 1282 1283 c=p[pStart]; 1284 c2=0; 1285 p=null; 1286 pStart=-1; 1287 } 1288 } 1289 } 1290 1291 1294 if((destIndex+length)<=destLimit) { 1295 int reorderSplit=destIndex; 1296 if(p==null) { 1297 1298 if(cc!=0 && cc<prevCC) { 1299 1302 destIndex+=length; 1303 trailCC=insertOrdered(dest,reorderStartIndex, 1304 reorderSplit, destIndex, c, c2, cc); 1305 } else { 1306 1307 dest[destIndex++]=c; 1308 if(c2!=0) { 1309 dest[destIndex++]=c2; 1310 } 1311 } 1312 } else { 1313 1316 if(cc!=0 && cc<prevCC) { 1317 1320 destIndex+=length; 1321 trailCC=mergeOrdered(dest,reorderStartIndex, 1322 reorderSplit,p, pStart,pStart+length); 1323 } else { 1324 1325 do { 1326 dest[destIndex++]=p[pStart++]; 1327 } while(--length>0); 1328 } 1329 } 1330 } else { 1331 1332 1333 destIndex+=length; 1334 } 1335 1336 prevCC=trailCC; 1337 if(prevCC==0) { 1338 reorderStartIndex=destIndex; 1339 } 1340 } 1341 1342 outTrailCC[0]=prevCC; 1343 1344 return destIndex - destStart; 1345 } 1346 1347 1348 private static final class NextCombiningArgs{ 1349 char[] source; 1350 int start; 1351 char c; 1353 char c2; 1354 int combiningIndex; 1355 char cc; 1356 } 1357 1358 1359 private static int getNextCombining(NextCombiningArgs args, 1360 int limit, 1361 UnicodeSet nx) { 1362 long norm32; 1363 int combineFlags; 1364 1365 args.c=args.source[args.start++]; 1366 norm32=getNorm32(args.c); 1367 1368 1369 args.c2=0; 1370 args.combiningIndex=0; 1371 args.cc=0; 1372 1373 if((norm32&(CC_MASK|COMBINES_ANY))==0) { 1374 return 0; 1375 } else { 1376 if(isNorm32Regular(norm32)) { 1377 1378 } else if(isNorm32HangulOrJamo(norm32)) { 1379 1380 args.combiningIndex=(int)((UNSIGNED_INT_MASK)&(0xfff0| 1381 (norm32>>EXTRA_SHIFT))); 1382 return (int)(norm32&COMBINES_ANY); 1383 } else { 1384 1385 if(args.start!=limit && UTF16.isTrailSurrogate(args.c2= 1386 args.source[args.start])) { 1387 ++args.start; 1388 norm32=getNorm32FromSurrogatePair(norm32, args.c2); 1389 } else { 1390 args.c2=0; 1391 return 0; 1392 } 1393 } 1394 1395 if(nx_contains(nx, args.c, args.c2)) { 1396 return 0; 1397 } 1398 1399 args.cc= (char)((norm32>>CC_SHIFT)&0xff); 1400 1401 combineFlags=(int)(norm32&COMBINES_ANY); 1402 if(combineFlags!=0) { 1403 int index = getExtraDataIndex(norm32); 1404 args.combiningIndex=index>0 ? extraData[(index-1)] :0; 1405 } 1406 1407 return combineFlags; 1408 } 1409 } 1410 1411 1419 private static int getCombiningIndexFromStarter(char c,char c2){ 1420 long norm32; 1421 1422 norm32=getNorm32(c); 1423 if(c2!=0) { 1424 norm32=getNorm32FromSurrogatePair(norm32, c2); 1425 } 1426 return extraData[(getExtraDataIndex(norm32)-1)]; 1427 } 1428 1429 1446 private static int combine(char[]table,int tableStart, 1447 int combineBackIndex, 1448 int[] outValues) { 1449 int key; 1450 int value,value2; 1451 1452 if(outValues.length<2){ 1453 throw new IllegalArgumentException (); 1454 } 1455 1456 1457 for(;;) { 1458 key=table[tableStart++]; 1459 if(key>=combineBackIndex) { 1460 break; 1461 } 1462 tableStart+= ((table[tableStart]&0x8000) != 0)? 2 : 1; 1463 } 1464 1465 1466 if((key&0x7fff)==combineBackIndex) { 1467 1468 value=table[tableStart]; 1469 1470 1471 key=(int)((UNSIGNED_INT_MASK)&((value&0x2000)+1)); 1472 1473 1476 if((value&0x8000) != 0) { 1477 if((value&0x4000) != 0) { 1478 1479 value=(int)((UNSIGNED_INT_MASK)&((value&0x3ff)|0xd800)); 1480 value2=table[tableStart+1]; 1481 } else { 1482 1483 value=table[tableStart+1]; 1484 value2=0; 1485 } 1486 } else { 1487 1488 value&=0x1fff; 1489 value2=0; 1490 } 1491 outValues[0]=value; 1492 outValues[1]=value2; 1493 return key; 1494 } else { 1495 1496 return 0; 1497 } 1498 } 1499 1500 1501 private static final class RecomposeArgs{ 1502 char[] source; 1503 int start; 1504 int limit; 1505 } 1506 1520 private static char recompose(RecomposeArgs args, int options, UnicodeSet nx) { 1521 int remove, q, r; 1522 int combineFlags; 1523 int combineFwdIndex, combineBackIndex; 1524 int result, value=0, value2=0; 1525 int prevCC; 1526 boolean starterIsSupplementary; 1527 int starter; 1528 int[] outValues = new int[2]; 1529 starter=-1; 1530 combineFwdIndex=0; 1531 starterIsSupplementary=false; 1532 prevCC=0; 1533 1534 NextCombiningArgs ncArg = new NextCombiningArgs(); 1535 ncArg.source = args.source; 1536 1537 ncArg.cc =0; 1538 ncArg.c2 =0; 1539 1540 for(;;) { 1541 ncArg.start = args.start; 1542 combineFlags=getNextCombining(ncArg,args.limit,nx); 1543 combineBackIndex=ncArg.combiningIndex; 1544 args.start = ncArg.start; 1545 1546 if(((combineFlags&COMBINES_BACK)!=0) && starter!=-1) { 1547 if((combineBackIndex&0x8000)!=0) { 1548 1551 1552 if((options&BEFORE_PRI_29)!=0 || prevCC==0) { 1553 remove=-1; 1554 combineFlags=0; 1555 ncArg.c2=args.source[starter]; 1556 if(combineBackIndex==0xfff2) { 1557 1560 ncArg.c2=(char)(ncArg.c2-JAMO_L_BASE); 1561 if(ncArg.c2<JAMO_L_COUNT) { 1562 remove=args.start-1; 1563 ncArg.c=(char)(HANGUL_BASE+(ncArg.c2*JAMO_V_COUNT+ 1564 (ncArg.c-JAMO_V_BASE))*JAMO_T_COUNT); 1565 if(args.start!=args.limit && 1566 (ncArg.c2=(char)(args.source[args.start] 1567 -JAMO_T_BASE))<JAMO_T_COUNT) { 1568 ++args.start; 1569 ncArg.c+=ncArg.c2; 1570 } else { 1571 1572 combineFlags=COMBINES_FWD; 1573 } 1574 if(!nx_contains(nx, ncArg.c)) { 1575 args.source[starter]=ncArg.c; 1576 } else { 1577 1578 if(!isHangulWithoutJamoT(ncArg.c)) { 1579 --args.start; 1580 } 1581 1582 remove=args.start; 1583 } 1584 } 1585 1586 1595 } else { 1596 1597 if(isHangulWithoutJamoT(ncArg.c2)) { 1598 ncArg.c2+=ncArg.c-JAMO_T_BASE; 1599 if(!nx_contains(nx, ncArg.c2)) { 1600 remove=args.start-1; 1601 args.source[starter]=ncArg.c2; 1602 } 1603 } 1604 } 1605 1606 if(remove!=-1) { 1607 1608 q=remove; 1609 r=args.start; 1610 while(r<args.limit) { 1611 args.source[q++]=args.source[r++]; 1612 } 1613 args.start=remove; 1614 args.limit=q; 1615 } 1616 1617 ncArg.c2=0; 1618 1619 if(combineFlags!=0) { 1620 1624 1625 1626 if(args.start==args.limit) { 1627 return (char)prevCC; 1628 } 1629 1630 1631 combineFwdIndex=0xfff0; 1632 1633 1634 continue; 1635 } 1636 } 1637 1638 1646 1647 } else if( 1648 1649 !((combineFwdIndex&0x8000)!=0) && 1650 1651 ((options&BEFORE_PRI_29)!=0 ? 1652 (prevCC!=ncArg.cc || prevCC==0) : 1653 (prevCC<ncArg.cc || prevCC==0)) && 1654 1655 0!=(result=combine(combiningTable,combineFwdIndex, 1656 combineBackIndex, outValues)) && 1657 1658 !nx_contains(nx, (char)value, (char)value2) 1659 ) { 1660 value=outValues[0]; 1661 value2=outValues[1]; 1662 1665 remove= ncArg.c2==0 ? args.start-1 : args.start-2; 1666 1667 1668 args.source[starter]=(char)value; 1669 if(starterIsSupplementary) { 1670 if(value2!=0) { 1671 1672 args.source[starter+1]=(char)value2; 1673 } else { 1674 1676 starterIsSupplementary=false; 1677 q=starter+1; 1678 r=q+1; 1679 while(r<remove) { 1680 args.source[q++]=args.source[r++]; 1681 } 1682 --remove; 1683 } 1684 } else if(value2!=0) { 1685 1687 starterIsSupplementary=true; 1688 1689 ++starter; 1690 q=remove; 1691 r=++remove; 1692 while(starter<q) { 1693 args.source[--r]=args.source[--q]; 1694 } 1695 args.source[starter]=(char)value2; 1696 --starter; 1697 1698 } 1699 1700 1702 if(remove<args.start) { 1703 q=remove; 1704 r=args.start; 1705 while(r<args.limit) { 1706 args.source[q++]=args.source[r++]; 1707 } 1708 args.start=remove; 1709 args.limit=q; 1710 } 1711 1712 1713 1714 1715 if(args.start==args.limit) { 1716 return (char)prevCC; 1717 } 1718 1719 1720 if(result>1) { 1721 combineFwdIndex=getCombiningIndexFromStarter((char)value, 1722 (char)value2); 1723 } else { 1724 starter=-1; 1725 } 1726 1727 1728 continue; 1729 } 1730 } 1731 1732 1733 prevCC=ncArg.cc; 1734 if(args.start==args.limit) { 1735 return (char)prevCC; 1736 } 1737 1738 1739 if(ncArg.cc==0) { 1740 1741 if((combineFlags&COMBINES_FWD)!=0) { 1742 1743 if(ncArg.c2==0) { 1744 starterIsSupplementary=false; 1745 starter=args.start-1; 1746 } else { 1747 starterIsSupplementary=false; 1748 starter=args.start-2; 1749 } 1750 combineFwdIndex=combineBackIndex; 1751 } else { 1752 1753 starter=-1; 1754 } 1755 } else if((options&OPTIONS_COMPOSE_CONTIGUOUS)!=0) { 1756 1757 starter=-1; 1758 } 1759 } 1760 } 1761 1762 private static int findPreviousStarter(char[]src, int srcStart, int current, 1765 int ccOrQCMask, 1766 int decompQCMask, 1767 char minNoMaybe) { 1768 long norm32; 1769 PrevArgs args = new PrevArgs(); 1770 args.src = src; 1771 args.start = srcStart; 1772 args.current = current; 1773 1774 while(args.start<args.current) { 1775 norm32= getPrevNorm32(args, minNoMaybe, ccOrQCMask|decompQCMask); 1776 if(isTrueStarter(norm32, ccOrQCMask, decompQCMask)) { 1777 break; 1778 } 1779 } 1780 return args.current; 1781 } 1782 1783 1786 private static int findNextStarter(char[] src,int start,int limit, 1787 int qcMask, 1788 int decompQCMask, 1789 char minNoMaybe) { 1790 int p; 1791 long norm32; 1792 int ccOrQCMask; 1793 char c, c2; 1794 1795 ccOrQCMask=CC_MASK|qcMask; 1796 1797 DecomposeArgs decompArgs = new DecomposeArgs(); 1798 1799 for(;;) { 1800 if(start==limit) { 1801 break; 1802 } 1803 c=src[start]; 1804 if(c<minNoMaybe) { 1805 break; 1806 } 1807 1808 norm32=getNorm32(c); 1809 if((norm32&ccOrQCMask)==0) { 1810 break; 1811 } 1812 1813 if(isNorm32LeadSurrogate(norm32)) { 1814 1815 if((start+1)==limit || 1816 !UTF16.isTrailSurrogate(c2=(src[start+1]))){ 1817 1818 break; 1819 } 1820 norm32=getNorm32FromSurrogatePair(norm32, c2); 1821 1822 if((norm32&ccOrQCMask)==0) { 1823 break; 1824 } 1825 } else { 1826 c2=0; 1827 } 1828 1829 1830 if((norm32&decompQCMask)!=0) { 1831 1833 p=decompose(norm32, decompQCMask, decompArgs); 1834 1835 1837 if(decompArgs.cc==0 && (getNorm32(extraData,p, qcMask)&qcMask)==0) { 1838 break; 1839 } 1840 } 1841 1842 start+= c2==0 ? 1 : 2; 1843 } 1844 1845 return start; 1846 } 1847 1848 1849 private static final class ComposePartArgs{ 1850 int prevCC; 1851 int length; 1852 } 1853 1854 1855 private static char[] composePart(ComposePartArgs args, 1856 int prevStarter, 1857 char[] src, int start, int limit, 1858 int options, 1859 UnicodeSet nx) { 1860 int recomposeLimit; 1861 boolean compat =((options&OPTIONS_COMPAT)!=0); 1862 1863 1864 int[] outTrailCC = new int[1]; 1865 char[] buffer = new char[(limit-prevStarter)*MAX_BUFFER_SIZE]; 1866 1867 for(;;){ 1868 args.length=decompose(src,prevStarter,(start), 1869 buffer,0,buffer.length, 1870 compat,outTrailCC,nx); 1871 if(args.length<=buffer.length){ 1872 break; 1873 }else{ 1874 buffer = new char[args.length]; 1875 } 1876 } 1877 1878 1879 recomposeLimit=args.length; 1880 1881 if(args.length>=2) { 1882 RecomposeArgs rcArgs = new RecomposeArgs(); 1883 rcArgs.source = buffer; 1884 rcArgs.start = 0; 1885 rcArgs.limit = recomposeLimit; 1886 args.prevCC=recompose(rcArgs, options, nx); 1887 recomposeLimit = rcArgs.limit; 1888 } 1889 1890 1891 args.length=recomposeLimit; 1892 return buffer; 1893 } 1894 1895 private static boolean composeHangul(char prev, char c, 1896 long norm32, 1897 char[] src,int[] srcIndex, int limit, 1898 boolean compat, 1899 char[] dest,int destIndex, 1900 UnicodeSet nx) { 1901 int start=srcIndex[0]; 1902 if(isJamoVTNorm32JamoV(norm32)) { 1903 1905 prev=(char)(prev-JAMO_L_BASE); 1906 if(prev<JAMO_L_COUNT) { 1907 c=(char)(HANGUL_BASE+(prev*JAMO_V_COUNT+ 1908 (c-JAMO_V_BASE))*JAMO_T_COUNT); 1909 1910 1912 if(start!=limit) { 1913 char next, t; 1914 1915 next=src[start]; 1916 if((t=(char)(next-JAMO_T_BASE))<JAMO_T_COUNT) { 1917 1918 ++start; 1919 c+=t; 1920 } else if(compat) { 1921 1923 norm32=getNorm32(next); 1924 if(isNorm32Regular(norm32) && ((norm32&QC_NFKD)!=0)) { 1925 int p ; 1926 DecomposeArgs dcArgs = new DecomposeArgs(); 1927 p=decompose(norm32, QC_NFKD, dcArgs); 1928 if(dcArgs.length==1 && 1929 (t=(char)(extraData[p]-JAMO_T_BASE)) 1930 <JAMO_T_COUNT) { 1931 1932 ++start; 1933 c+=t; 1934 } 1935 } 1936 } 1937 } 1938 if(nx_contains(nx, c)) { 1939 if(!isHangulWithoutJamoT(c)) { 1940 --start; 1941 } 1942 return false; 1943 } 1944 dest[destIndex]=c; 1945 srcIndex[0]=start; 1946 return true; 1947 } 1948 } else if(isHangulWithoutJamoT(prev)) { 1949 1951 c=(char)(prev+(c-JAMO_T_BASE)); 1952 if(nx_contains(nx, c)) { 1953 return false; 1954 } 1955 dest[destIndex]=c; 1956 srcIndex[0]=start; 1957 return true; 1958 } 1959 return false; 1960 } 1961 1966 1967 public static int compose(char[] src, int srcStart, int srcLimit, 1968 char[] dest,int destStart,int destLimit, 1969 int options,UnicodeSet nx) { 1970 1971 int prevSrc, prevStarter; 1972 long norm32; 1973 int ccOrQCMask, qcMask; 1974 int reorderStartIndex, length; 1975 char c, c2, minNoMaybe; 1976 int cc, prevCC; 1977 int[] ioIndex = new int[1]; 1978 int destIndex = destStart; 1979 int srcIndex = srcStart; 1980 1981 if((options&OPTIONS_COMPAT)!=0) { 1982 minNoMaybe=(char)indexes[INDEX_MIN_NFKC_NO_MAYBE]; 1983 qcMask=QC_NFKC; 1984 } else { 1985 minNoMaybe=(char)indexes[INDEX_MIN_NFC_NO_MAYBE]; 1986 qcMask=QC_NFC; 1987 } 1988 1989 2012 prevStarter=srcIndex; 2013 2014 ccOrQCMask=CC_MASK|qcMask; 2015 reorderStartIndex=0; 2016 prevCC=0; 2017 2018 2019 norm32=0; 2020 c=0; 2021 2022 for(;;) { 2023 2025 prevSrc=srcIndex; 2026 2027 while(srcIndex!=srcLimit && ((c=src[srcIndex])<minNoMaybe || 2028 ((norm32=getNorm32(c))&ccOrQCMask)==0)) { 2029 prevCC=0; 2030 ++srcIndex; 2031 } 2032 2033 2034 2035 if(srcIndex!=prevSrc) { 2036 length=(int)(srcIndex-prevSrc); 2037 if((destIndex+length)<=destLimit) { 2038 System.arraycopy(src,prevSrc,dest,destIndex,length); 2039 } 2040 destIndex+=length; 2041 reorderStartIndex=destIndex; 2042 2043 2045 prevStarter=srcIndex-1; 2046 if(UTF16.isTrailSurrogate(src[prevStarter]) && 2047 prevSrc<prevStarter && 2048 UTF16.isLeadSurrogate(src[(prevStarter-1)])) { 2049 --prevStarter; 2050 } 2051 2052 prevSrc=srcIndex; 2053 } 2054 2055 2056 if(srcIndex==srcLimit) { 2057 break; 2058 } 2059 2060 2061 ++srcIndex; 2062 2063 2084 2085 2086 2093 if(isNorm32HangulOrJamo(norm32)) { 2094 2100 prevCC=cc=0; 2101 reorderStartIndex=destIndex; 2102 ioIndex[0]=srcIndex; 2103 if( 2104 destIndex>0 && 2105 composeHangul(src[(prevSrc-1)], c, norm32,src, ioIndex, 2106 srcLimit, (options&OPTIONS_COMPAT)!=0, dest, 2107 destIndex<=destLimit ? destIndex-1: 0, 2108 nx) 2109 ) { 2110 srcIndex=ioIndex[0]; 2111 prevStarter=srcIndex; 2112 continue; 2113 } 2114 2115 srcIndex = ioIndex[0]; 2116 2117 2119 c2=0; 2120 length=1; 2121 prevStarter=prevSrc; 2122 } else { 2123 if(isNorm32Regular(norm32)) { 2124 c2=0; 2125 length=1; 2126 } else { 2127 2128 if(srcIndex!=srcLimit && 2129 UTF16.isTrailSurrogate(c2=src[srcIndex])) { 2130 ++srcIndex; 2131 length=2; 2132 norm32=getNorm32FromSurrogatePair(norm32, c2); 2133 } else { 2134 2135 c2=0; 2136 length=1; 2137 norm32=0; 2138 } 2139 } 2140 ComposePartArgs args =new ComposePartArgs(); 2141 2142 2143 if(nx_contains(nx, c, c2)) { 2144 2145 cc=0; 2146 } else if((norm32&qcMask)==0) { 2147 cc=(int)((UNSIGNED_BYTE_MASK)&(norm32>>CC_SHIFT)); 2148 } else { 2149 char[] p; 2150 2151 2164 int decompQCMask=(qcMask<<2)&0xf; 2165 2170 if(isTrueStarter(norm32, CC_MASK|qcMask, decompQCMask)) { 2171 prevStarter=prevSrc; 2172 } else { 2173 2174 destIndex-=prevSrc-prevStarter; 2175 } 2176 2177 2178 srcIndex=findNextStarter(src, srcIndex,srcLimit, qcMask, 2179 decompQCMask, minNoMaybe); 2180 args.prevCC = prevCC; 2182 args.length = length; 2184 p=composePart(args,prevStarter,src,srcIndex,srcLimit,options,nx); 2185 2186 if(p==null) { 2187 2188 break; 2189 } 2190 2191 prevCC = args.prevCC; 2192 length = args.length; 2193 2194 2196 if((destIndex+args.length)<=destLimit) { 2197 int i=0; 2198 while(i<args.length) { 2199 dest[destIndex++]=p[i++]; 2200 --length; 2201 } 2202 } else { 2203 2204 2205 destIndex+=length; 2206 } 2207 2208 prevStarter=srcIndex; 2209 continue; 2210 } 2211 } 2212 2213 2214 if((destIndex+length)<=destLimit) { 2215 if(cc!=0 && cc<prevCC) { 2216 2218 int reorderSplit= destIndex; 2219 destIndex+=length; 2220 prevCC=insertOrdered(dest,reorderStartIndex, reorderSplit, 2221 destIndex, c, c2, cc); 2222 } else { 2223 2224 dest[destIndex++]=c; 2225 if(c2!=0) { 2226 dest[destIndex++]=c2; 2227 } 2228 prevCC=cc; 2229 } 2230 } else { 2231 2232 2233 destIndex+=length; 2234 prevCC=cc; 2235 } 2236 } 2237 2238 return destIndex - destStart; 2239 } 2240 2241 2242 private static int findSafeFCD(char[] src, int start, int limit, 2243 char fcd16) { 2244 char c, c2; 2245 2246 2257 for(;;) { 2258 2259 if((fcd16&0xff)==0) { 2260 break; 2261 } 2262 2263 2264 if(start==limit) { 2265 break; 2266 } 2267 c=src[start]; 2268 2269 2270 if(c<MIN_WITH_LEAD_CC || (fcd16=getFCD16(c))==0) { 2271 break; 2272 } 2273 2274 if(!UTF16.isLeadSurrogate(c)) { 2275 if(fcd16<=0xff) { 2276 break; 2277 } 2278 ++start; 2279 } else if(start+1!=limit && 2280 (UTF16.isTrailSurrogate(c2=src[start+1]))) { 2281 2282 fcd16=getFCD16FromSurrogatePair(fcd16, c2); 2283 if(fcd16<=0xff) { 2284 break; 2285 } 2286 start+=2; 2287 } else { 2288 2289 break; 2290 } 2291 } 2292 2293 return start; 2294 } 2295 2296 private static int decomposeFCD(char[] src, 2297 int start,int decompLimit, 2298 char[] dest, 2299 int[] destIndexArr, 2300 UnicodeSet nx) { 2301 char[] p=null; 2302 int pStart=-1; 2303 2304 long norm32; 2305 int reorderStartIndex; 2306 char c, c2; 2307 int prevCC; 2308 DecomposeArgs args = new DecomposeArgs(); 2309 int destIndex = destIndexArr[0]; 2310 2324 reorderStartIndex=destIndex; 2325 prevCC=0; 2326 2327 while(start<decompLimit) { 2328 c=src[start++]; 2329 norm32=getNorm32(c); 2330 if(isNorm32Regular(norm32)) { 2331 c2=0; 2332 args.length=1; 2333 } else { 2334 2339 2340 if(start!=decompLimit && UTF16.isTrailSurrogate(c2=src[start])){ 2341 ++start; 2342 args.length=2; 2343 norm32=getNorm32FromSurrogatePair(norm32, c2); 2344 } else { 2345 c2=0; 2346 args.length=1; 2347 norm32=0; 2348 } 2349 } 2350 2351 2352 if(nx_contains(nx, c, c2)) { 2353 2354 args.cc=args.trailCC=0; 2355 p=null; 2356 } else if((norm32&QC_NFD)==0) { 2357 2358 args.cc=args.trailCC=(int)((UNSIGNED_BYTE_MASK)& 2359 (norm32>>CC_SHIFT)); 2360 p=null; 2361 } else { 2362 2364 pStart=decompose(norm32, args); 2365 p=extraData; 2366 if(args.length==1) { 2367 2368 c=p[pStart]; 2369 c2=0; 2370 p=null; 2371 } 2372 } 2373 2374 2376 if((destIndex+args.length)<=dest.length) { 2377 int reorderSplit=destIndex; 2378 if(p==null) { 2379 2380 if(args.cc!=0 && args.cc<prevCC) { 2381 2383 destIndex+=args.length; 2384 args.trailCC=insertOrdered(dest,reorderStartIndex, 2385 reorderSplit, destIndex, 2386 c, c2, args.cc); 2387 } else { 2388 2389 dest[destIndex++]=c; 2390 if(c2!=0) { 2391 dest[destIndex++]=c2; 2392 } 2393 } 2394 } else { 2395 2397 if(args.cc!=0 && args.cc<prevCC) { 2398 2400 destIndex+=args.length; 2401 args.trailCC=mergeOrdered(dest,reorderStartIndex, 2402 reorderSplit, p, pStart, 2403 pStart+args.length); 2404 } else { 2405 2406 do { 2407 dest[destIndex++]=p[pStart++]; 2408 } while(--args.length>0); 2409 } 2410 } 2411 } else { 2412 2413 2414 destIndex+=args.length; 2415 } 2416 2417 prevCC=args.trailCC; 2418 if(prevCC==0) { 2419 reorderStartIndex=destIndex; 2420 } 2421 } 2422 destIndexArr[0]=destIndex; 2423 return prevCC; 2424 } 2425 2426 public static int makeFCD(char[] src, int srcStart, int srcLimit, 2427 char[] dest, int destStart, int destLimit, 2428 UnicodeSet nx) { 2429 2430 int prevSrc, decompStart; 2431 int destIndex, length; 2432 char c, c2; 2433 int fcd16; 2434 int prevCC, cc; 2435 2436 2437 decompStart=srcStart; 2438 destIndex=destStart; 2439 prevCC=0; 2440 c=0; 2441 fcd16=0; 2442 int[] destIndexArr = new int[1]; 2443 destIndexArr[0]=destIndex; 2444 2445 for(;;) { 2446 2448 prevSrc=srcStart; 2449 2450 for(;;) { 2451 if(srcStart==srcLimit) { 2452 break; 2453 } else if((c=src[srcStart])<MIN_WITH_LEAD_CC) { 2454 prevCC=(int)-c; 2455 } else if((fcd16=getFCD16(c))==0) { 2456 prevCC=0; 2457 } else { 2458 break; 2459 } 2460 ++srcStart; 2461 } 2462 2463 2464 2472 2473 2474 if(srcStart!=prevSrc) { 2475 length=(int)(srcStart-prevSrc); 2476 if((destIndex+length)<=destLimit) { 2477 System.arraycopy(src,prevSrc,dest,destIndex,length); 2478 } 2479 destIndex+=length; 2480 prevSrc=srcStart; 2481 2482 2484 if(prevCC<0) { 2485 2487 if(!nx_contains(nx, (int)-prevCC)) { 2488 prevCC=(int)(getFCD16((int)-prevCC)&0xff); 2489 } else { 2490 prevCC=0; 2491 } 2492 2497 decompStart=prevSrc-1; 2498 } 2499 } 2500 2506 2507 2508 if(srcStart==srcLimit) { 2509 break; 2510 } 2511 2512 2513 if(prevCC==0) { 2514 decompStart=prevSrc; 2515 } 2516 2517 2518 ++srcStart; 2519 2520 2521 if(UTF16.isLeadSurrogate(c)) { 2522 2523 if(srcStart!=srcLimit && 2524 UTF16.isTrailSurrogate(c2=src[srcStart])) { 2525 ++srcStart; 2526 fcd16=getFCD16FromSurrogatePair((char)fcd16, c2); 2527 } else { 2528 c2=0; 2529 fcd16=0; 2530 } 2531 } else { 2532 c2=0; 2533 } 2534 2535 2536 if(nx_contains(nx, c, c2)) { 2537 fcd16=0; 2538 } 2539 2540 cc=(int)(fcd16>>8); 2541 if(cc==0 || cc>=prevCC) { 2542 2543 if(cc==0) { 2544 decompStart=prevSrc; 2545 } 2546 prevCC=(int)(fcd16&0xff); 2547 2548 2549 length= c2==0 ? 1 : 2; 2550 if((destIndex+length)<=destLimit) { 2551 dest[destIndex++]=c; 2552 if(c2!=0) { 2553 dest[destIndex++]=c2; 2554 } 2555 } else { 2556 destIndex+=length; 2557 } 2558 } else { 2559 2564 destIndex-=(int)(prevSrc-decompStart); 2565 2566 2571 srcStart=findSafeFCD(src,srcStart, srcLimit, (char)fcd16); 2572 2573 2577 destIndexArr[0] = destIndex; 2578 prevCC=decomposeFCD(src,decompStart, srcStart,dest, 2579 destIndexArr,nx); 2580 decompStart=srcStart; 2581 destIndex=destIndexArr[0]; 2582 } 2583 } 2584 2585 return destIndex - destStart; 2586 2587 } 2588 2589 public static int getCombiningClass(int c) { 2590 long norm32; 2591 norm32=getNorm32(c); 2592 return (char)((norm32>>CC_SHIFT)&0xFF); 2593 } 2594 2595 public static boolean isFullCompositionExclusion(int c) { 2596 if(isFormatVersion_2_1) { 2597 int aux =AuxTrieImpl.auxTrie.getCodePointValue(c); 2598 return (boolean)((aux & AUX_COMP_EX_MASK)!=0); 2599 } else { 2600 return false; 2601 } 2602 } 2603 2604 public static boolean isCanonSafeStart(int c) { 2605 if(isFormatVersion_2_1) { 2606 int aux = AuxTrieImpl.auxTrie.getCodePointValue(c); 2607 return (boolean)((aux & AUX_UNSAFE_MASK)==0); 2608 } else { 2609 return false; 2610 } 2611 } 2612 2613 public static boolean getCanonStartSet(int c, USerializedSet fillSet) { 2614 2615 if(fillSet!=null && canonStartSets!=null) { 2616 2623 char[] table; 2624 int i=0, start, limit; 2625 2626 int[] indexes = (int[]) canonStartSets[CANON_SET_INDICIES_INDEX]; 2627 char[] startSets = (char[]) canonStartSets[CANON_SET_START_SETS_INDEX]; 2628 2629 if(c<=0xffff) { 2630 table=(char[]) canonStartSets[CANON_SET_BMP_TABLE_INDEX]; 2631 start=0; 2632 limit=table.length; 2633 2634 2635 while(start<limit-2) { 2636 i=(char)(((start+limit)/4)*2); 2637 if(c<table[i]) { 2638 limit=i; 2639 } else { 2640 start=i; 2641 } 2642 } 2643 2645 if(c==table[start]) { 2646 i=table[start+1]; 2647 if((i & CANON_SET_BMP_MASK)==CANON_SET_BMP_IS_INDEX) { 2648 2650 i&=(CANON_SET_MAX_CANON_SETS-1); 2651 return fillSet.getSet(startSets,(i-indexes.length)); 2652 } else { 2653 2655 fillSet.setToOne(i); 2656 return true; 2657 } 2658 } 2659 } else { 2660 char high, low, h,j=0; 2661 2662 table=(char[]) canonStartSets[CANON_SET_SUPP_TABLE_INDEX]; 2663 start=0; 2664 limit=table.length; 2665 2666 high=(char)(c>>16); 2667 low=(char)c; 2668 2669 2670 while(start<limit-3) { 2671 2672 i=(char)(((start+limit)/6)*3); 2673 j=(char)(table[i]&0x1f); 2674 int tableVal = table[i+1]; 2675 int lowInt = low; 2676 if(high<j || ((tableVal>lowInt) && (high==j))) { 2677 limit=i; 2678 } else { 2679 start=i; 2680 } 2681 2682 2684 if(ICUDebug.enabled()){ 2687 System.err.println("\t\t j = " + Utility.hex(j,4) + 2688 "\t i = " + Utility.hex(i,4) + 2689 "\t high = "+ Utility.hex(high) + 2690 "\t low = " + Utility.hex(lowInt,4) + 2691 "\t table[i+1]: "+ Utility.hex(tableVal,4) 2692 ); 2693 } 2694 2695 } 2696 2697 2698 h=table[start]; 2699 2700 int tableVal1 = table[start+1]; 2702 int lowInt = low; 2703 2704 if(high==(h&0x1f) && lowInt==tableVal1) { 2705 int tableVal2 = table[start+2]; 2706 i=tableVal2; 2707 if((h&0x8000)==0) { 2708 2709 return fillSet.getSet(startSets,(i-indexes.length)); 2710 } else { 2711 2715 int temp = ((int)h & 0x1f00)<<8; 2717 i|=temp; 2718 fillSet.setToOne((int)i); 2719 return true; 2720 } 2721 } 2722 } 2723 } 2724 2725 return false; 2726 } 2727 2728 public static int getFC_NFKC_Closure(int c, char[] dest) { 2729 2730 int destCapacity; 2731 2732 if(dest==null ) { 2733 destCapacity=0; 2734 }else{ 2735 destCapacity = dest.length; 2736 } 2737 2738 int aux =AuxTrieImpl.auxTrie.getCodePointValue(c); 2739 2740 aux&= AUX_FNC_MASK; 2741 if(aux!=0) { 2742 int s; 2743 int index=aux; 2744 int length; 2745 2746 s =extraData[index]; 2747 if(s<0xff00) { 2748 2749 length=1; 2750 } else { 2751 length=s&0xff; 2752 ++index; 2753 } 2754 if(0<length && length<=destCapacity) { 2755 System.arraycopy(extraData,index,dest,0,length); 2756 } 2757 return length; 2758 } else { 2759 return 0; 2760 } 2761 } 2762 2763 2764 2765 public static boolean isNFSkippable(int c, Normalizer.Mode mode, long mask) { 2766 long norm32; 2767 mask = mask & UNSIGNED_INT_MASK; 2768 char aux; 2769 2770 2771 norm32 = getNorm32(c); 2772 2773 if((norm32&mask)!=0) { 2774 return false; 2775 } 2776 2777 if(mode == Normalizer.NFD || mode == Normalizer.NFKD || mode == Normalizer.NONE){ 2778 return true; 2779 } 2780 2781 2782 2783 if((norm32& QC_NFD)==0) { 2784 return true; 2785 } 2786 2787 2788 if(isNorm32HangulOrJamo(norm32)) { 2789 2790 return !isHangulWithoutJamoT((char)c); 2791 } 2792 2793 2794 2795 if(!isFormatVersion_2_2) { 2796 return false; 2797 } 2798 2799 2800 aux = AuxTrieImpl.auxTrie.getCodePointValue(c); 2801 return (aux&AUX_NFC_SKIP_F_MASK)==0; 2802 2803 2804 } 2805 2806 2814 2815 public static UnicodeSet addPropertyStarts(UnicodeSet set) { 2816 int c; 2817 2818 2819 TrieIterator normIter = new TrieIterator(NormTrieImpl.normTrie); 2821 RangeValueIterator.Element normResult = new RangeValueIterator.Element(); 2822 2823 while(normIter.next(normResult)){ 2824 set.add(normResult.start); 2825 } 2826 2827 TrieIterator fcdIter = new TrieIterator(FCDTrieImpl.fcdTrie); 2829 RangeValueIterator.Element fcdResult = new RangeValueIterator.Element(); 2830 2831 while(fcdIter.next(fcdResult)){ 2832 set.add(fcdResult.start); 2833 } 2834 2835 if(isFormatVersion_2_1){ 2836 TrieIterator auxIter = new TrieIterator(AuxTrieImpl.auxTrie); 2838 RangeValueIterator.Element auxResult = new RangeValueIterator.Element(); 2839 while(auxIter.next(auxResult)){ 2840 set.add(auxResult.start); 2841 } 2842 } 2843 2844 for(c=HANGUL_BASE; c<HANGUL_BASE+HANGUL_COUNT; c+=JAMO_T_COUNT) { 2845 set.add(c); 2846 set.add(c+1); 2847 } 2848 set.add(HANGUL_BASE+HANGUL_COUNT); 2849 return set; } 2851 2852 2859 public static final int quickCheck(int c, int modeValue) { 2860 final int qcMask[]={ 2861 0, 0, QC_NFD, QC_NFKD, QC_NFC, QC_NFKC 2862 }; 2863 2864 int norm32=(int)getNorm32(c)&qcMask[modeValue]; 2865 2866 if(norm32==0) { 2867 return 1; } else if((norm32&QC_ANY_NO)!=0) { 2869 return 0; } else { 2871 return 2; } 2873 } 2874 2875 2884 public CharTrie getFCDTrie(){ 2886 return FCDTrieImpl.fcdTrie; 2887 } 2888 2890 2891 2892 2893 2894 2984 2985 private static class CmpEquivLevel { 2987 char[] source; 2988 int start; 2989 int s; 2990 int limit; 2991 } 2992 2993 3001 private static int decompose(int c, char[] buffer) { 3002 3003 long norm32; 3004 int length=0; 3005 norm32 = (long) ((UNSIGNED_INT_MASK) & NormTrieImpl.normTrie.getCodePointValue(c)); 3006 if((norm32 & QC_NFD)!=0) { 3007 if(isNorm32HangulOrJamo(norm32)) { 3008 3009 char c2; 3010 3011 c-=HANGUL_BASE; 3012 3013 c2=(char)(c%JAMO_T_COUNT); 3014 c/=JAMO_T_COUNT; 3015 if(c2>0) { 3016 buffer[2]=(char)(JAMO_T_BASE+c2); 3017 length=3; 3018 } else { 3019 length=2; 3020 } 3021 buffer[1]=(char)(JAMO_V_BASE+c%JAMO_V_COUNT); 3022 buffer[0]=(char)(JAMO_L_BASE+c/JAMO_V_COUNT); 3023 return length; 3024 } else { 3025 3026 DecomposeArgs args = new DecomposeArgs(); 3027 int index = decompose(norm32, args); 3028 System.arraycopy(extraData,index,buffer,0,args.length); 3029 return args.length ; 3030 } 3031 } else { 3032 return 0; 3033 } 3034 } 3035 3036 private static int foldCase(int c, char[] dest, int destStart, int destLimit, 3037 int options){ 3038 String src = UTF16.valueOf(c); 3039 String foldedStr = UCharacter.foldCase(src,options); 3040 char[] foldedC = foldedStr.toCharArray(); 3041 for(int i=0;i<foldedC.length;i++){ 3042 if(destStart<destLimit){ 3043 dest[destStart]=foldedC[i]; 3044 } 3045 destStart++; 3048 } 3049 return (c==UTF16.charAt(foldedStr,0)) ? -destStart : destStart; 3050 } 3051 3052 3071 public static int cmpEquivFold(String s1, String s2,int options){ 3072 return cmpEquivFold(s1.toCharArray(),0,s1.length(), 3073 s2.toCharArray(),0,s2.length(), 3074 options); 3075 } 3076 3077 3078 public static int cmpEquivFold(char[] s1, int s1Start,int s1Limit, 3080 char[] s2, int s2Start,int s2Limit, 3081 int options) { 3082 int start1, start2, limit1, limit2; 3084 char[] cSource1, cSource2; 3085 3086 cSource1 = s1; 3087 cSource2 = s2; 3088 int length; 3090 3091 CmpEquivLevel[] stack1 = new CmpEquivLevel[]{ 3093 new CmpEquivLevel(), 3094 new CmpEquivLevel() 3095 }; 3096 CmpEquivLevel[] stack2 = new CmpEquivLevel[]{ 3097 new CmpEquivLevel(), 3098 new CmpEquivLevel() 3099 }; 3100 3101 char[] decomp1 = new char[8]; 3103 char[] decomp2 = new char[8]; 3104 3105 char[] fold1 = new char[32]; 3107 char[] fold2 = new char[32]; 3108 3109 int level1, level2; 3111 3112 int c1, c2; 3114 int cp1, cp2; 3115 3116 3118 3123 3124 start1=s1Start; 3126 limit1=s1Limit; 3127 3128 start2=s2Start; 3129 limit2=s2Limit; 3130 3131 level1=level2=0; 3132 c1=c2=-1; 3133 cp1=cp2=-1; 3134 for(;;) { 3136 3139 if(c1<0) { 3140 for(;;) { 3142 if(s1Start>=limit1) { 3143 if(level1==0) { 3144 c1=-1; 3145 break; 3146 } 3147 } else { 3148 c1=cSource1[s1Start]; 3149 ++s1Start; 3150 break; 3151 } 3152 3153 do { 3155 --level1; 3156 start1=stack1[level1].start; 3157 } while(start1==-1); s1Start=stack1[level1].s; 3159 limit1=stack1[level1].limit; 3160 cSource1=stack1[level1].source; 3161 } 3162 } 3163 3164 if(c2<0) { 3165 for(;;) { 3167 if(s2Start>=limit2) { 3168 if(level2==0) { 3169 c2=-1; 3170 break; 3171 } 3172 } else { 3173 c2=cSource2[s2Start]; 3174 ++s2Start; 3175 break; 3176 } 3177 3178 do { 3180 --level2; 3181 start2=stack2[level2].start; 3182 } while(start2==-1); 3183 s2Start=stack2[level2].s; 3184 limit2=stack2[level2].limit; 3185 cSource2=stack2[level2].source; 3186 } 3187 } 3188 3189 if(c1==c2) { 3193 if(c1<0) { 3194 return 0; } 3196 c1=c2=-1; continue; 3198 } else if(c1<0) { 3199 return -1; } else if(c2<0) { 3201 return 1; } 3203 3205 cp1=c1; 3208 if(UTF16.isSurrogate((char)c1)) { 3209 char c; 3210 3211 if(UTF16.isLeadSurrogate((char)c1)) { 3212 if( s1Start!=limit1 && 3213 UTF16.isTrailSurrogate(c=cSource1[s1Start]) 3214 ) { 3215 cp1=UCharacterProperty.getRawSupplementary((char)c1, c); 3217 } 3218 } else { 3219 if( start1<=(s1Start-2) && 3220 UTF16.isLeadSurrogate(c=cSource1[(s1Start-2)]) 3221 ) { 3222 cp1=UCharacterProperty.getRawSupplementary(c, (char)c1); 3223 } 3224 } 3225 } 3226 cp2=c2; 3227 if(UTF16.isSurrogate((char)c2)) { 3228 char c; 3229 3230 if(UTF16.isLeadSurrogate((char)c2)) { 3231 if( s2Start!=limit2 && 3232 UTF16.isTrailSurrogate(c=cSource2[s2Start]) 3233 ) { 3234 cp2=UCharacterProperty.getRawSupplementary((char)c2, c); 3236 } 3237 } else { 3238 if( start2<=(s2Start-2) && 3239 UTF16.isLeadSurrogate(c=cSource2[s2Start-2]) 3240 ) { 3241 cp2=UCharacterProperty.getRawSupplementary(c, (char)c2); 3242 } 3243 } 3244 } 3245 3246 if( level1<2 && ((options & Normalizer.COMPARE_IGNORE_CASE)!=0)&& 3249 (length=foldCase(cp1, fold1, 0,32,options))>=0 3250 ) { 3251 if(UTF16.isSurrogate((char)c1)) { 3253 if(UTF16.isLeadSurrogate((char)c1)) { 3254 ++s1Start; 3257 } else { 3258 --s2Start; 3264 c2=cSource2[(s2Start-1)]; 3265 } 3266 } 3267 3268 stack1[0].start=start1; 3270 stack1[0].s=s1Start; 3271 stack1[0].limit=limit1; 3272 stack1[0].source=cSource1; 3273 ++level1; 3274 3275 cSource1 = fold1; 3276 start1=s1Start=0; 3277 limit1=length; 3278 3279 c1=-1; 3281 continue; 3282 } 3283 3284 if( level2<2 && ((options& Normalizer.COMPARE_IGNORE_CASE)!=0) && 3285 (length=foldCase(cp2, fold2,0,32, options))>=0 3286 ) { 3287 if(UTF16.isSurrogate((char)c2)) { 3289 if(UTF16.isLeadSurrogate((char)c2)) { 3290 ++s2Start; 3293 } else { 3294 --s1Start; 3300 c1=cSource1[(s1Start-1)]; 3301 } 3302 } 3303 3304 stack2[0].start=start2; 3306 stack2[0].s=s2Start; 3307 stack2[0].limit=limit2; 3308 stack2[0].source=cSource2; 3309 ++level2; 3310 3311 cSource2 = fold2; 3312 start2=s2Start=0; 3313 limit2=length; 3314 3315 c2=-1; 3317 continue; 3318 } 3319 3320 if( level1<2 && ((options&COMPARE_EQUIV)!=0) && 3321 0!=(length=decompose(cp1,decomp1)) 3322 ) { 3323 if(UTF16.isSurrogate((char)c1)) { 3325 if(UTF16.isLeadSurrogate((char)c1)) { 3326 ++s1Start; 3329 } else { 3330 --s2Start; 3336 c2=cSource2[(s2Start-1)]; 3337 } 3338 } 3339 3340 stack1[level1].start=start1; 3342 stack1[level1].s=s1Start; 3343 stack1[level1].limit=limit1; 3344 stack1[level1].source=cSource1; 3345 ++level1; 3346 3347 cSource1 = decomp1; 3349 start1=s1Start=0; 3350 limit1=length; 3351 3352 if(level1<2) { 3354 stack1[level1++].start=-1; 3355 } 3356 c1=-1; 3358 continue; 3359 } 3360 3361 if( level2<2 && ((options&COMPARE_EQUIV)!=0) && 3362 0!=(length=decompose(cp2, decomp2)) 3363 ) { 3364 if(UTF16.isSurrogate((char)c2)) { 3366 if(UTF16.isLeadSurrogate((char)c2)) { 3367 ++s2Start; 3370 } else { 3371 --s1Start; 3377 c1=cSource1[(s1Start-1)]; 3378 } 3379 } 3380 3381 stack2[level2].start=start2; 3383 stack2[level2].s=s2Start; 3384 stack2[level2].limit=limit2; 3385 stack2[level2].source=cSource2; 3386 ++level2; 3387 3388 cSource2=decomp2; 3390 start2=s2Start=0; 3391 limit2=length; 3392 3393 if(level2<2) { 3395 stack2[level2++].start=-1; 3396 } 3397 3398 c2=-1; 3400 continue; 3401 } 3402 3403 3404 3407 3412 3419 if( c1>=0xd800 && c2>=0xd800 && 3420 ((options&Normalizer.COMPARE_CODE_POINT_ORDER)!=0) 3421 ) { 3422 3424 if( 3425 ( c1<=0xdbff && s1Start!=limit1 3426 && 3427 UTF16.isTrailSurrogate(cSource1[s1Start]) 3428 ) 3429 || 3430 ( UTF16.isTrailSurrogate((char)c1) && start1!=(s1Start-1) 3431 && 3432 UTF16.isLeadSurrogate(cSource1[(s1Start-2)]) 3433 ) 3434 ) { 3435 3436 } else { 3437 3439 c1-=0x2800; 3440 } 3441 3442 if( 3443 ( c2<=0xdbff && s2Start!=limit2 3444 && 3445 UTF16.isTrailSurrogate(cSource2[s2Start]) 3446 ) 3447 || 3448 ( UTF16.isTrailSurrogate((char)c2) && start2!=(s2Start-1) 3449 && 3450 UTF16.isLeadSurrogate(cSource2[(s2Start-2)]) 3451 ) 3452 ) { 3453 3454 } else { 3455 3457 c2-=0x2800; 3458 } 3459 } 3460 3461 return c1-c2; 3462 } 3463 } 3464 private static int strCompare(char[] s1, int s1Start, int s1Limit, 3465 char[] s2, int s2Start, int s2Limit, 3466 boolean codePointOrder) { 3467 3468 int start1, start2, limit1, limit2; 3469 3470 char c1, c2; 3471 3472 3473 start1=s1Start; 3474 start2=s2Start; 3475 3476 int length1, length2; 3477 3478 length1 = s1Limit - s1Start; 3479 length2 = s2Limit - s2Start; 3480 3481 int lengthResult; 3482 3483 if(length1<length2) { 3484 lengthResult=-1; 3485 limit1=start1+length1; 3486 } else if(length1==length2) { 3487 lengthResult=0; 3488 limit1=start1+length1; 3489 } else { 3490 lengthResult=1; 3491 limit1=start1+length2; 3492 } 3493 3494 if(s1==s2) { 3495 return lengthResult; 3496 } 3497 3498 for(;;) { 3499 3500 if(s1Start==limit1) { 3501 return lengthResult; 3502 } 3503 3504 c1=s1[s1Start]; 3505 c2=s2[s2Start]; 3506 if(c1!=c2) { 3507 break; 3508 } 3509 ++s1Start; 3510 ++s2Start; 3511 } 3512 3513 3514 limit1=start1+length1; 3515 limit2=start2+length2; 3516 3517 3518 3519 if(c1>=0xd800 && c2>=0xd800 && codePointOrder) { 3520 3522 if( 3523 ( c1<=0xdbff && (s1Start+1)!=limit1 && 3524 UTF16.isTrailSurrogate(s1[(s1Start+1)]) 3525 ) || 3526 ( UTF16.isTrailSurrogate(c1) && start1!=s1Start && 3527 UTF16.isLeadSurrogate(s1[(s1Start-1)]) 3528 ) 3529 ) { 3530 3531 } else { 3532 3533 c1-=0x2800; 3534 } 3535 3536 if( 3537 ( c2<=0xdbff && (s2Start+1)!=limit2 && 3538 UTF16.isTrailSurrogate(s2[(s2Start+1)]) 3539 ) || 3540 ( UTF16.isTrailSurrogate(c2) && start2!=s2Start && 3541 UTF16.isLeadSurrogate(s2[(s2Start-1)]) 3542 ) 3543 ) { 3544 3545 } else { 3546 3547 c2-=0x2800; 3548 } 3549 } 3550 3551 3552 return (int)c1-(int)c2; 3553 } 3554 3555 3556 3589 3590 3596 private static final int OPTIONS_NX_MASK=0x1f; 3597 private static final int OPTIONS_UNICODE_MASK=0xe0; 3598 public static final int OPTIONS_SETS_MASK=0xff; 3599 private static final int OPTIONS_UNICODE_SHIFT=5; 3600 private static final UnicodeSet[] nxCache = new UnicodeSet[OPTIONS_SETS_MASK+1]; 3601 3602 3603 3604 3608 private static final int NX_HANGUL = 1; 3609 3613 private static final int NX_CJK_COMPAT=2; 3614 3626 public static final int BEFORE_PRI_29=0x100; 3627 3628 3634 3635 3636 public static final int OPTIONS_COMPAT=0x1000; 3637 3638 public static final int OPTIONS_COMPOSE_CONTIGUOUS=0x2000; 3639 3640 3641 3642 3649 private static final synchronized UnicodeSet internalGetNXHangul() { 3650 3651 3652 if(nxCache[NX_HANGUL]==null) { 3653 nxCache[NX_HANGUL]=new UnicodeSet(0xac00, 0xd7a3); 3654 } 3655 return nxCache[NX_HANGUL]; 3656 } 3657 3658 private static final synchronized UnicodeSet internalGetNXCJKCompat() { 3659 3660 3661 if(nxCache[NX_CJK_COMPAT]==null) { 3662 3663 3664 UnicodeSet set, hasDecomp; 3665 3666 set=new UnicodeSet("[:Ideographic:]"); 3667 3668 3669 hasDecomp=new UnicodeSet(); 3670 3671 3672 UnicodeSetIterator it = new UnicodeSetIterator(set); 3673 int start, end; 3674 long norm32; 3675 3676 while(it.nextRange() && (it.codepoint != UnicodeSetIterator.IS_STRING)) { 3677 start=it.codepoint; 3678 end=it.codepointEnd; 3679 while(start<=end) { 3680 norm32 = getNorm32(start); 3681 if((norm32 & QC_NFD)>0) { 3682 hasDecomp.add(start); 3683 } 3684 ++start; 3685 } 3686 } 3687 3688 3689 nxCache[NX_CJK_COMPAT]=hasDecomp; 3690 3691 } 3692 3693 return nxCache[NX_CJK_COMPAT]; 3694 } 3695 3696 private static final synchronized UnicodeSet internalGetNXUnicode(int options) { 3697 options &= OPTIONS_UNICODE_MASK; 3698 if(options==0) { 3699 return null; 3700 } 3701 3702 if(nxCache[options]==null) { 3703 3704 UnicodeSet set = new UnicodeSet(); 3705 3706 switch(options) { 3707 case Normalizer.UNICODE_3_2: 3708 set.applyPattern("[:^Age=3.2:]"); 3709 break; 3710 default: 3711 return null; 3712 } 3713 3714 nxCache[options]=set; 3715 } 3716 3717 return nxCache[options]; 3718 } 3719 3720 3721 private static final synchronized UnicodeSet internalGetNX(int options) { 3722 options&=OPTIONS_SETS_MASK; 3723 3724 if(nxCache[options]==null) { 3725 3726 if(options==NX_HANGUL) { 3727 return internalGetNXHangul(); 3728 } 3729 if(options==NX_CJK_COMPAT) { 3730 return internalGetNXCJKCompat(); 3731 } 3732 if((options & OPTIONS_UNICODE_MASK)!=0 && (options & OPTIONS_NX_MASK)==0) { 3733 return internalGetNXUnicode(options); 3734 } 3735 3736 3737 UnicodeSet set; 3738 UnicodeSet other; 3739 3740 set=new UnicodeSet(); 3741 3742 3743 if((options & NX_HANGUL)!=0 && null!=(other=internalGetNXHangul())) { 3744 set.addAll(other); 3745 } 3746 if((options&NX_CJK_COMPAT)!=0 && null!=(other=internalGetNXCJKCompat())) { 3747 set.addAll(other); 3748 } 3749 if((options&OPTIONS_UNICODE_MASK)!=0 && null!=(other=internalGetNXUnicode(options))) { 3750 set.addAll(other); 3751 } 3752 3753 nxCache[options]=set; 3754 } 3755 return nxCache[options]; 3756 } 3757 3758 public static final UnicodeSet getNX(int options) { 3759 if((options&=OPTIONS_SETS_MASK)==0) { 3760 3761 return null; 3762 } else { 3763 return internalGetNX(options); 3764 } 3765 } 3766 3767 private static final boolean nx_contains(UnicodeSet nx, int c) { 3768 return nx!=null && nx.contains(c); 3769 } 3770 3771 private static final boolean nx_contains(UnicodeSet nx, char c, char c2) { 3772 return nx!=null && nx.contains(c2==0 ? c : UCharacterProperty.getRawSupplementary(c, c2)); 3773 } 3774 3775 3776} 3777 | Popular Tags |