1 7 package com.ibm.icu.text; 8 9 import java.io.IOException ; 10 import java.text.ParseException ; 11 import java.util.Hashtable ; 12 import java.util.Vector ; 13 import java.util.Arrays ; 14 import java.util.Enumeration ; 15 16 import com.ibm.icu.impl.TrieBuilder; 17 import com.ibm.icu.impl.IntTrieBuilder; 18 import com.ibm.icu.impl.TrieIterator; 19 import com.ibm.icu.impl.Utility; 20 import com.ibm.icu.impl.UCharacterProperty; 21 import com.ibm.icu.lang.UCharacter; 22 import com.ibm.icu.lang.UCharacterCategory; 23 import com.ibm.icu.impl.NormalizerImpl; 24 import com.ibm.icu.util.RangeValueIterator; 25 import com.ibm.icu.util.VersionInfo; 26 27 34 final class CollationParsedRuleBuilder 35 { 36 38 44 CollationParsedRuleBuilder(String rules) throws ParseException 45 { 46 m_parser_ = new CollationRuleParser(rules); 47 m_parser_.assembleTokenList(); 48 m_utilColEIter_ = RuleBasedCollator.UCA_.getCollationElementIterator( 49 ""); 50 } 51 52 54 57 static class InverseUCA 58 { 59 61 InverseUCA() 62 { 63 } 64 65 67 70 int m_table_[]; 71 74 char m_continuations_[]; 75 76 79 VersionInfo m_UCA_version_; 80 81 83 92 final int getInversePrevCE(int ce, int contce, int strength, 93 int prevresult[]) 94 { 95 int result = findInverseCE(ce, contce); 96 97 if (result < 0) { 98 prevresult[0] = CollationElementIterator.NULLORDER; 99 return -1; 100 } 101 102 ce &= STRENGTH_MASK_[strength]; 103 contce &= STRENGTH_MASK_[strength]; 104 105 prevresult[0] = ce; 106 prevresult[1] = contce; 107 108 while ((prevresult[0] & STRENGTH_MASK_[strength]) == ce 109 && (prevresult[1] & STRENGTH_MASK_[strength])== contce 110 && result > 0) { 111 prevresult[0] = m_table_[3 * (-- result)]; 115 prevresult[1] = m_table_[3 * result + 1]; 116 } 117 return result; 118 } 119 120 final int getCEStrengthDifference(int CE, int contCE, 121 int prevCE, int prevContCE) { 122 int strength = Collator.TERTIARY; 123 while( 124 ((prevCE & STRENGTH_MASK_[strength]) != (CE & STRENGTH_MASK_[strength]) 125 || (prevContCE & STRENGTH_MASK_[strength]) != (contCE & STRENGTH_MASK_[strength])) 126 && (strength != 0)) { 127 strength--; 128 } 129 return strength; 130 } 131 132 private int compareCEs(int source0, int source1, int target0, int target1) { 133 int s1 = source0, s2, t1 = target0, t2; 134 if(RuleBasedCollator.isContinuation(source1)) { 135 s2 = source1; 136 } else { 137 s2 = 0; 138 } 139 if(RuleBasedCollator.isContinuation(target1)) { 140 t2 = target1; 141 } else { 142 t2 = 0; 143 } 144 145 int s = 0, t = 0; 146 if(s1 == t1 && s2 == t2) { 147 return 0; 148 } 149 s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16); 150 t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16); 151 if(s == t) { 152 s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8; 153 t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8; 154 if(s == t) { 155 s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF); 156 t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF); 157 return Utility.compareUnsigned(s, t); 158 } else { 159 return Utility.compareUnsigned(s, t); 160 } 161 } else { 162 return Utility.compareUnsigned(s, t); 163 } 164 } 165 166 172 int findInverseCE(int ce, int contce) 173 { 174 int bottom = 0; 175 int top = m_table_.length / 3; 176 int result = 0; 177 178 while (bottom < top - 1) { 179 result = (top + bottom) >> 1; 180 int first = m_table_[3 * result]; 181 int second = m_table_[3 * result + 1]; 182 int comparison = compareCEs(first, second, ce, contce); 183 if (comparison > 0) { 184 top = result; 185 } 186 else if (comparison < 0) { 187 bottom = result; 188 } 189 else { 190 break; 191 } 192 } 193 194 return result; 195 } 196 197 203 void getInverseGapPositions(CollationRuleParser.TokenListHeader 204 listheader) 205 throws Exception 206 { 207 CollationRuleParser.Token token = listheader.m_first_; 209 int tokenstrength = token.m_strength_; 210 211 for (int i = 0; i < 3; i ++) { 212 listheader.m_gapsHi_[3 * i] = 0; 213 listheader.m_gapsHi_[3 * i + 1] = 0; 214 listheader.m_gapsHi_[3 * i + 2] = 0; 215 listheader.m_gapsLo_[3 * i] = 0; 216 listheader.m_gapsLo_[3 * i + 1] = 0; 217 listheader.m_gapsLo_[3 * i + 2] = 0; 218 listheader.m_numStr_[i] = 0; 219 listheader.m_fStrToken_[i] = null; 220 listheader.m_lStrToken_[i] = null; 221 listheader.m_pos_[i] = -1; 222 } 223 224 if ((listheader.m_baseCE_ >>> 24) 225 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_ 226 && (listheader.m_baseCE_ >>> 24) 227 <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_) 228 { 229 listheader.m_pos_[0] = 0; 231 int t1 = listheader.m_baseCE_; 232 int t2 = listheader.m_baseContCE_; 233 listheader.m_gapsLo_[0] = mergeCE(t1, t2, 234 Collator.PRIMARY); 235 listheader.m_gapsLo_[1] = mergeCE(t1, t2, 236 Collator.SECONDARY); 237 listheader.m_gapsLo_[2] = mergeCE(t1, t2, 238 Collator.TERTIARY); 239 int primaryCE = t1 & RuleBasedCollator.CE_PRIMARY_MASK_ | (t2 & RuleBasedCollator.CE_PRIMARY_MASK_) >>> 16; 240 primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(RuleBasedCollator.impCEGen_.getRawFromImplicit(primaryCE)+1); 241 242 t1 = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505; 243 t2 = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_; 244 245 listheader.m_gapsHi_[0] = mergeCE(t1, t2, 259 Collator.PRIMARY); 260 listheader.m_gapsHi_[1] = mergeCE(t1, t2, 261 Collator.SECONDARY); 262 listheader.m_gapsHi_[2] = mergeCE(t1, t2, 263 Collator.TERTIARY); 264 } 265 else if (listheader.m_indirect_ == true 266 && listheader.m_nextCE_ != 0) { 267 listheader.m_pos_[0] = 0; 268 int t1 = listheader.m_baseCE_; 269 int t2 = listheader.m_baseContCE_; 270 listheader.m_gapsLo_[0] = mergeCE(t1, t2, 271 Collator.PRIMARY); 272 listheader.m_gapsLo_[1] = mergeCE(t1, t2, 273 Collator.SECONDARY); 274 listheader.m_gapsLo_[2] = mergeCE(t1, t2, 275 Collator.TERTIARY); 276 t1 = listheader.m_nextCE_; 277 t2 = listheader.m_nextContCE_; 278 listheader.m_gapsHi_[0] = mergeCE(t1, t2, 279 Collator.PRIMARY); 280 listheader.m_gapsHi_[1] = mergeCE(t1, t2, 281 Collator.SECONDARY); 282 listheader.m_gapsHi_[2] = mergeCE(t1, t2, 283 Collator.TERTIARY); 284 } 285 else { 286 while (true) { 287 if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) { 288 listheader.m_pos_[tokenstrength] 289 = getInverseNext(listheader, 290 tokenstrength); 291 if (listheader.m_pos_[tokenstrength] >= 0) { 292 listheader.m_fStrToken_[tokenstrength] = token; 293 } 294 else { 295 throw new Exception ("Internal program error"); 299 } 300 } 301 302 while (token != null && token.m_strength_ >= tokenstrength) 303 { 304 if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_) { 305 listheader.m_lStrToken_[tokenstrength] = token; 306 } 307 token = token.m_next_; 308 } 309 if (tokenstrength < CE_BASIC_STRENGTH_LIMIT_ - 1) { 310 if (listheader.m_pos_[tokenstrength] 313 == listheader.m_pos_[tokenstrength + 1]) { 314 listheader.m_fStrToken_[tokenstrength] 315 = listheader.m_fStrToken_[tokenstrength 316 + 1]; 317 listheader.m_fStrToken_[tokenstrength + 1] = null; 318 listheader.m_lStrToken_[tokenstrength + 1] = null; 319 listheader.m_pos_[tokenstrength + 1] = -1; 320 } 321 } 322 if (token != null) { 323 tokenstrength = token.m_strength_; 324 } 325 else { 326 break; 327 } 328 } 329 for (int st = 0; st < 3; st ++) { 330 int pos = listheader.m_pos_[st]; 331 if (pos >= 0) { 332 int t1 = m_table_[3 * pos]; 333 int t2 = m_table_[3 * pos + 1]; 334 listheader.m_gapsHi_[3 * st] = mergeCE(t1, t2, 335 Collator.PRIMARY); 336 listheader.m_gapsHi_[3 * st + 1] = mergeCE(t1, t2, 337 Collator.SECONDARY); 338 listheader.m_gapsHi_[3 * st + 2] = (t1 & 0x3f) << 24 339 | (t2 & 0x3f) << 16; 340 t1 = listheader.m_baseCE_; 344 t2 = listheader.m_baseContCE_; 345 346 listheader.m_gapsLo_[3 * st] = mergeCE(t1, t2, 347 Collator.PRIMARY); 348 listheader.m_gapsLo_[3 * st + 1] = mergeCE(t1, t2, 349 Collator.SECONDARY); 350 listheader.m_gapsLo_[3 * st + 2] = (t1 & 0x3f) << 24 351 | (t2 & 0x3f) << 16; 352 } 353 } 354 } 355 } 356 357 363 private final int getInverseNext(CollationRuleParser.TokenListHeader 364 listheader, 365 int strength) 366 { 367 int ce = listheader.m_baseCE_; 368 int secondce = listheader.m_baseContCE_; 369 int result = findInverseCE(ce, secondce); 370 371 if (result < 0) { 372 return -1; 373 } 374 375 ce &= STRENGTH_MASK_[strength]; 376 secondce &= STRENGTH_MASK_[strength]; 377 378 int nextce = ce; 379 int nextcontce = secondce; 380 381 while((nextce & STRENGTH_MASK_[strength]) == ce 382 && (nextcontce & STRENGTH_MASK_[strength]) == secondce) { 383 nextce = m_table_[3 * (++ result)]; 384 nextcontce = m_table_[3 * result + 1]; 385 } 386 387 listheader.m_nextCE_ = nextce; 388 listheader.m_nextContCE_ = nextcontce; 389 390 return result; 391 } 392 } 393 394 396 399 static final InverseUCA INVERSE_UCA_; 400 401 404 private static final String INV_UCA_VERSION_MISMATCH_ = 405 "UCA versions of UCA and inverse UCA should match"; 406 407 410 private static final String UCA_NOT_INSTANTIATED_ = 411 "UCA is not instantiated!"; 412 413 416 static { 417 InverseUCA temp = null; 418 try { 419 temp = CollatorReader.getInverseUCA(); 420 } catch (IOException e) { 421 } 422 438 439 if(temp != null && RuleBasedCollator.UCA_ != null) { 440 if(!temp.m_UCA_version_.equals(RuleBasedCollator.UCA_.m_UCA_version_)) { 441 throw new RuntimeException (INV_UCA_VERSION_MISMATCH_); 442 } 443 } else { 444 throw new RuntimeException (UCA_NOT_INSTANTIATED_); 445 } 446 447 INVERSE_UCA_ = temp; 448 } 449 450 452 457 void setRules(RuleBasedCollator collator) throws Exception 458 { 459 if (m_parser_.m_resultLength_ > 0 || m_parser_.m_removeSet_ != null) { 460 assembleTailoringTable(collator); 462 } 463 else { collator.setWithUCATables(); 466 } 467 m_parser_.setDefaultOptionsInCollator(collator); 469 } 470 471 private void copyRangeFromUCA(BuildTable t, int start, int end) { 472 int u = 0; 473 for (u = start; u <= end; u ++) { 474 int CE = t.m_mapping_.getValue(u); 476 if (CE == CE_NOT_FOUND_ 477 || (isContractionTableElement(CE) 482 && getCE(t.m_contractions_, CE, 0) == CE_NOT_FOUND_)) { 483 m_utilElement_.m_uchars_ = UCharacter.toString(u); 485 m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_; 486 m_utilElement_.m_prefix_ = 0; 487 m_utilElement_.m_CELength_ = 0; 488 m_utilColEIter_.setText(m_utilElement_.m_uchars_); 489 while (CE != CollationElementIterator.NULLORDER) { 490 CE = m_utilColEIter_.next(); 491 if (CE != CollationElementIterator.NULLORDER) { 492 m_utilElement_.m_CEs_[m_utilElement_.m_CELength_ ++] 493 = CE; 494 } 495 } 496 addAnElement(t, m_utilElement_); 497 } 498 } 499 } 500 501 539 void assembleTailoringTable(RuleBasedCollator collator) throws Exception 540 { 541 542 for (int i = 0; i < m_parser_.m_resultLength_; i ++) { 543 if (m_parser_.m_listHeader_[i].m_first_ != null) { 547 initBuffers(m_parser_.m_listHeader_[i]); 552 } 553 } 554 555 if (m_parser_.m_variableTop_ != null) { 556 m_parser_.m_options_.m_variableTopValue_ 558 = m_parser_.m_variableTop_.m_CE_[0] >>> 16; 559 if (m_parser_.m_variableTop_.m_listHeader_.m_first_ 561 == m_parser_.m_variableTop_) { m_parser_.m_variableTop_.m_listHeader_.m_first_ 563 = m_parser_.m_variableTop_.m_next_; 564 } 565 if (m_parser_.m_variableTop_.m_listHeader_.m_last_ 566 == m_parser_.m_variableTop_) { 567 m_parser_.m_variableTop_.m_listHeader_.m_last_ 569 = m_parser_.m_variableTop_.m_previous_; 570 } 571 if (m_parser_.m_variableTop_.m_next_ != null) { 572 m_parser_.m_variableTop_.m_next_.m_previous_ 573 = m_parser_.m_variableTop_.m_previous_; 574 } 575 if (m_parser_.m_variableTop_.m_previous_ != null) { 576 m_parser_.m_variableTop_.m_previous_.m_next_ 577 = m_parser_.m_variableTop_.m_next_; 578 } 579 } 580 581 BuildTable t = new BuildTable(m_parser_); 582 583 for (int i = 0; i < m_parser_.m_resultLength_; i ++) { 587 createElements(t, m_parser_.m_listHeader_[i]); 591 } 592 593 m_utilElement_.clear(); 594 StringBuffer str = new StringBuffer (); 595 596 copyRangeFromUCA(t, 0, 0xFF); 598 599 if(m_parser_.m_copySet_ != null) { 601 int i = 0; 602 for(i = 0; i < m_parser_.m_copySet_.getRangeCount(); i++) { 603 copyRangeFromUCA(t, m_parser_.m_copySet_.getRangeStart(i), 604 m_parser_.m_copySet_.getRangeEnd(i)); 605 } 606 } 607 608 char conts[] = RuleBasedCollator.UCA_CONTRACTIONS_; 610 int offset = 0; 611 while (conts[offset] != 0) { 612 int tailoredCE = t.m_mapping_.getValue(conts[offset]); 614 if (tailoredCE != CE_NOT_FOUND_) { 615 boolean needToAdd = true; 616 if (isContractionTableElement(tailoredCE)) { 617 if (isTailored(t.m_contractions_, tailoredCE, 618 conts, offset + 1) == true) { 619 needToAdd = false; 620 } 621 } 622 if(m_parser_.m_removeSet_ != null && m_parser_.m_removeSet_.contains(conts[offset])) { 623 needToAdd = false; 624 } 625 626 627 if (needToAdd == true) { 628 m_utilElement_.m_prefix_ = 0; 630 m_utilElement_.m_prefixChars_ = null; 631 m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_; 632 str.delete(0, str.length()); 633 str.append(conts[offset]); 634 str.append(conts[offset + 1]); 635 if (conts[offset + 2] != 0) { 636 str.append(conts[offset + 2]); 637 } 638 m_utilElement_.m_uchars_ = str.toString(); 639 m_utilElement_.m_CELength_ = 0; 640 m_utilColEIter_.setText(m_utilElement_.m_uchars_); 641 while (true) { 642 int CE = m_utilColEIter_.next(); 643 if (CE != CollationElementIterator.NULLORDER) { 644 m_utilElement_.m_CEs_[m_utilElement_.m_CELength_ 645 ++] = CE; 646 } 647 else { 648 break; 649 } 650 } 651 addAnElement(t, m_utilElement_); 652 } 653 } else if(m_parser_.m_removeSet_ != null && m_parser_.m_removeSet_.contains(conts[offset])) { 654 copyRangeFromUCA(t, conts[offset], conts[offset]); 655 } 656 657 offset += 3; 658 } 659 660 processUCACompleteIgnorables(t); 662 663 canonicalClosure(t); 665 666 assembleTable(t, collator); 668 } 669 670 672 private static class CEGenerator 673 { 674 676 WeightRange m_ranges_[]; 677 int m_rangesLength_; 678 int m_byteSize_; 679 int m_start_; 680 int m_limit_; 681 int m_maxCount_; 682 int m_count_; 683 int m_current_; 684 int m_fLow_; int m_fHigh_; 687 689 CEGenerator() 690 { 691 m_ranges_ = new WeightRange[7]; 692 for (int i = 6; i >= 0; i --) { 693 m_ranges_[i] = new WeightRange(); 694 } 695 } 696 } 697 698 private static class WeightRange implements Comparable 699 { 700 702 707 public int compareTo(Object target) 708 { 709 if (this == target) { 710 return 0; 711 } 712 int tstart = ((WeightRange)target).m_start_; 713 if (m_start_ == tstart) { 714 return 0; 715 } 716 if (m_start_ > tstart) { 717 return 1; 718 } 719 return -1; 720 } 721 722 725 public void clear() 726 { 727 m_start_ = 0; 728 m_end_ = 0; 729 m_length_ = 0; 730 m_count_ = 0; 731 m_length2_ = 0; 732 m_count2_ = 0; 733 } 734 735 737 int m_start_; 738 int m_end_; 739 int m_length_; 740 int m_count_; 741 int m_length2_; 742 int m_count2_; 743 744 746 WeightRange() 747 { 748 clear(); 749 } 750 751 756 WeightRange(WeightRange source) 757 { 758 m_start_ = source.m_start_; 759 m_end_ = source.m_end_; 760 m_length_ = source.m_length_; 761 m_count_ = source.m_count_; 762 m_length2_ = source.m_length2_; 763 m_count2_ = source.m_count2_; 764 } 765 } 766 767 private static class MaxJamoExpansionTable 768 { 769 771 Vector m_endExpansionCE_; 772 Vector m_isV_; 774 byte m_maxLSize_; 775 byte m_maxVSize_; 776 byte m_maxTSize_; 777 778 780 MaxJamoExpansionTable() 781 { 782 m_endExpansionCE_ = new Vector (); 783 m_isV_ = new Vector (); 784 m_endExpansionCE_.add(new Integer (0)); 785 m_isV_.add(new Boolean (false)); 786 m_maxLSize_ = 1; 787 m_maxVSize_ = 1; 788 m_maxTSize_ = 1; 789 } 790 791 MaxJamoExpansionTable(MaxJamoExpansionTable table) 792 { 793 m_endExpansionCE_ = (Vector )table.m_endExpansionCE_.clone(); 794 m_isV_ = (Vector )table.m_isV_.clone(); 795 m_maxLSize_ = table.m_maxLSize_; 796 m_maxVSize_ = table.m_maxVSize_; 797 m_maxTSize_ = table.m_maxTSize_; 798 } 799 } 800 801 private static class MaxExpansionTable 802 { 803 805 MaxExpansionTable() 806 { 807 m_endExpansionCE_ = new Vector (); 808 m_expansionCESize_ = new Vector (); 809 m_endExpansionCE_.add(new Integer (0)); 810 m_expansionCESize_.add(new Byte ((byte)0)); 811 } 812 813 MaxExpansionTable(MaxExpansionTable table) 814 { 815 m_endExpansionCE_ = (Vector )table.m_endExpansionCE_.clone(); 816 m_expansionCESize_ = (Vector )table.m_expansionCESize_.clone(); 817 } 818 819 821 Vector m_endExpansionCE_; 822 Vector m_expansionCESize_; 823 } 824 825 private static class BasicContractionTable 826 { 827 829 BasicContractionTable() 830 { 831 m_CEs_ = new Vector (); 832 m_codePoints_ = new StringBuffer (); 833 } 834 835 837 StringBuffer m_codePoints_; 838 Vector m_CEs_; 839 } 840 841 private static class ContractionTable 842 { 843 845 849 ContractionTable(IntTrieBuilder mapping) 850 { 851 m_mapping_ = mapping; 852 m_elements_ = new Vector (); 853 m_CEs_ = new Vector (); 854 m_codePoints_ = new StringBuffer (); 855 m_offsets_ = new Vector (); 856 m_currentTag_ = CE_NOT_FOUND_TAG_; 857 } 858 859 864 ContractionTable(ContractionTable table) 865 { 866 m_mapping_ = table.m_mapping_; 867 m_elements_ = (Vector )table.m_elements_.clone(); 868 m_codePoints_ = new StringBuffer (table.m_codePoints_.toString()); 869 m_CEs_ = (Vector )table.m_CEs_.clone(); 870 m_offsets_ = (Vector )table.m_offsets_.clone(); 871 m_currentTag_ = table.m_currentTag_; 872 } 873 874 876 879 Vector m_elements_; 880 IntTrieBuilder m_mapping_; 881 StringBuffer m_codePoints_; 882 Vector m_CEs_; 883 Vector m_offsets_; 884 int m_currentTag_; 885 } 886 887 private static final class BuildTable implements TrieBuilder.DataManipulate 888 { 889 891 899 public int getFoldedValue(int cp, int offset) 900 { 901 int limit = cp + 0x400; 902 while (cp < limit) { 903 int value = m_mapping_.getValue(cp); 904 boolean inBlockZero = m_mapping_.isInZeroBlock(cp); 905 int tag = getCETag(value); 906 if (inBlockZero == true) { 907 cp += TrieBuilder.DATA_BLOCK_LENGTH; 908 } 909 else if (!(isSpecial(value) && (tag == CE_IMPLICIT_TAG_ 910 || tag == CE_NOT_FOUND_TAG_))) { 911 return RuleBasedCollator.CE_SPECIAL_FLAG_ 916 | (CE_SURROGATE_TAG_ << 24) | offset; 917 } 918 else { 919 ++ cp; 920 } 921 } 922 return 0; 923 } 924 925 927 930 BuildTable(CollationRuleParser parser) 931 { 932 m_collator_ = new RuleBasedCollator(); 933 m_collator_.setWithUCAData(); 934 MaxExpansionTable maxet = new MaxExpansionTable(); 935 MaxJamoExpansionTable maxjet = new MaxJamoExpansionTable(); 936 m_options_ = parser.m_options_; 937 m_expansions_ = new Vector (); 938 int trieinitialvalue = RuleBasedCollator.CE_SPECIAL_FLAG_ 941 | (CE_NOT_FOUND_TAG_ << 24); 942 m_mapping_ = new IntTrieBuilder(null, 0x30000, trieinitialvalue, 944 trieinitialvalue, true); 945 m_prefixLookup_ = new Hashtable (); 946 m_contractions_ = new ContractionTable(m_mapping_); 948 m_maxExpansions_ = maxet; 950 for (int i = 0; 952 i < RuleBasedCollator.UCA_.m_expansionEndCE_.length; i ++) { 953 maxet.m_endExpansionCE_.add(new Integer ( 954 RuleBasedCollator.UCA_.m_expansionEndCE_[i])); 955 maxet.m_expansionCESize_.add(new Byte ( 956 RuleBasedCollator.UCA_.m_expansionEndCEMaxSize_[i])); 957 } 958 m_maxJamoExpansions_ = maxjet; 959 960 m_unsafeCP_ = new byte[UNSAFECP_TABLE_SIZE_]; 961 m_contrEndCP_ = new byte[UNSAFECP_TABLE_SIZE_]; 962 Arrays.fill(m_unsafeCP_, (byte)0); 963 Arrays.fill(m_contrEndCP_, (byte)0); 964 } 965 966 971 BuildTable(BuildTable table) 972 { 973 m_collator_ = table.m_collator_; 974 m_mapping_ = new IntTrieBuilder(table.m_mapping_); 975 m_expansions_ = (Vector )table.m_expansions_.clone(); 976 m_contractions_ = new ContractionTable(table.m_contractions_); 977 m_contractions_.m_mapping_ = m_mapping_; 978 m_options_ = table.m_options_; 979 m_maxExpansions_ = new MaxExpansionTable(table.m_maxExpansions_); 980 m_maxJamoExpansions_ 981 = new MaxJamoExpansionTable(table.m_maxJamoExpansions_); 982 m_unsafeCP_ = new byte[table.m_unsafeCP_.length]; 983 System.arraycopy(table.m_unsafeCP_, 0, m_unsafeCP_, 0, 984 m_unsafeCP_.length); 985 m_contrEndCP_ = new byte[table.m_contrEndCP_.length]; 986 System.arraycopy(table.m_contrEndCP_, 0, m_contrEndCP_, 0, 987 m_contrEndCP_.length); 988 } 989 990 992 RuleBasedCollator m_collator_; 993 IntTrieBuilder m_mapping_; 994 Vector m_expansions_; 995 ContractionTable m_contractions_; 996 CollationRuleParser.OptionSet m_options_; 998 MaxExpansionTable m_maxExpansions_; 999 MaxJamoExpansionTable m_maxJamoExpansions_; 1000 byte m_unsafeCP_[]; 1001 byte m_contrEndCP_[]; 1002 Hashtable m_prefixLookup_; 1003 } 1004 1005 private static class Elements 1006 { 1007 1009 String m_prefixChars_; 1010 int m_prefix_; 1011 String m_uchars_; 1012 1015 String m_cPoints_; 1016 1019 int m_cPointsOffset_; 1020 1024 int m_CEs_[]; 1025 int m_CELength_; 1026 1029 int m_mapCE_; 1030 int m_sizePrim_[]; 1031 int m_sizeSec_[]; 1032 int m_sizeTer_[]; 1033 boolean m_variableTop_; 1034 boolean m_caseBit_; 1035 1036 1038 1041 Elements() 1042 { 1043 m_sizePrim_ = new int[128]; 1044 m_sizeSec_ = new int[128]; 1045 m_sizeTer_ = new int[128]; 1046 m_CEs_ = new int[256]; 1047 m_CELength_ = 0; 1048 } 1049 1050 1053 Elements(Elements element) 1054 { 1055 m_prefixChars_ = element.m_prefixChars_; 1056 m_prefix_ = element.m_prefix_; 1057 m_uchars_ = element.m_uchars_; 1058 m_cPoints_ = element.m_cPoints_; 1059 m_cPointsOffset_ = element.m_cPointsOffset_; 1060 m_CEs_ = element.m_CEs_; 1061 m_CELength_ = element.m_CELength_; 1062 m_mapCE_ = element.m_mapCE_; 1063 m_sizePrim_ = element.m_sizePrim_; 1064 m_sizeSec_ = element.m_sizeSec_; 1065 m_sizeTer_ = element.m_sizeTer_; 1066 m_variableTop_ = element.m_variableTop_; 1067 m_caseBit_ = element.m_caseBit_; 1068 } 1069 1070 1072 1075 public void clear() 1076 { 1077 m_prefixChars_ = null; 1078 m_prefix_ = 0; 1079 m_uchars_ = null; 1080 m_cPoints_ = null; 1081 m_cPointsOffset_ = 0; 1082 m_CELength_ = 0; 1083 m_mapCE_ = 0; 1084 Arrays.fill(m_sizePrim_, 0); 1085 Arrays.fill(m_sizeSec_, 0); 1086 Arrays.fill(m_sizeTer_, 0); 1087 m_variableTop_ = false; 1088 m_caseBit_ = false; 1089 } 1090 1091 1092 1096 public int hashCode() 1097 { 1098 String str = m_cPoints_.substring(m_cPointsOffset_); 1099 return str.hashCode(); 1100 } 1101 1102 1107 public boolean equals(Object target) 1108 { 1109 if (target == this) { 1110 return true; 1111 } 1112 if (target instanceof Elements) { 1113 Elements t = (Elements)target; 1114 int size = m_cPoints_.length() - m_cPointsOffset_; 1115 if (size == t.m_cPoints_.length() - t.m_cPointsOffset_) { 1116 return t.m_cPoints_.regionMatches(t.m_cPointsOffset_, 1117 m_cPoints_, 1118 m_cPointsOffset_, size); 1119 } 1120 } 1121 return false; 1122 } 1123 } 1124 1125 1127 1130 private static final int CE_BASIC_STRENGTH_LIMIT_ = 3; 1131 1134 private static final int CE_STRENGTH_LIMIT_ = 16; 1135 1138 private static final int STRENGTH_MASK_[] = {0xFFFF0000, 0xFFFFFF00, 1139 0xFFFFFFFF}; 1140 1143 private static final int CE_NOT_FOUND_ = 0xF0000000; 1144 1147 private static final int CE_NOT_FOUND_TAG_ = 0; 1148 1151 private static final int CE_EXPANSION_TAG_ = 1; 1152 1155 private static final int CE_CONTRACTION_TAG_ = 2; 1156 1159 private static final int CE_THAI_TAG_ = 3; 1160 1163 private static final int CE_CHARSET_TAG_ = 4; 1164 1167 private static final int CE_SURROGATE_TAG_ = 5; 1168 1171 private static final int CE_HANGUL_SYLLABLE_TAG_ = 6; 1172 1175 private static final int CE_LEAD_SURROGATE_TAG_ = 7; 1176 1179 private static final int CE_TRAIL_SURROGATE_TAG_ = 8; 1180 1183 private static final int CE_CJK_IMPLICIT_TAG_ = 9; 1184 private static final int CE_IMPLICIT_TAG_ = 10; 1185 private static final int CE_SPEC_PROC_TAG_ = 11; 1186 1191 private static final int CE_LONG_PRIMARY_TAG_ = 12; 1192 1197 private static final int UNSAFECP_TABLE_SIZE_ = 1056; 1198 1202 private static final int UNSAFECP_TABLE_MASK_ = 0x1fff; 1203 1206 private static final int UPPER_CASE_ = 0x80; 1207 private static final int MIXED_CASE_ = 0x40; 1208 private static final int LOWER_CASE_ = 0x00; 1209 1212 private static final int INIT_TABLE_SIZE_ = 1028; 1213 1216 private static final int HEADER_SIZE_ = 0xC4; 1217 1220 private static final int CONTRACTION_TABLE_NEW_ELEMENT_ = 0xFFFFFF; 1221 1224 private CollationRuleParser m_parser_; 1225 1228 private CollationElementIterator m_utilColEIter_; 1229 1232 private CEGenerator m_utilGens_[] = {new CEGenerator(), new CEGenerator(), 1233 new CEGenerator()}; 1234 private int m_utilCEBuffer_[] = new int[CE_BASIC_STRENGTH_LIMIT_]; 1235 private int m_utilIntBuffer_[] = new int[CE_STRENGTH_LIMIT_]; 1236 private Elements m_utilElement_ = new Elements(); 1237 private Elements m_utilElement2_ = new Elements(); 1238 private CollationRuleParser.Token m_utilToken_ 1239 = new CollationRuleParser.Token(); 1240 private int m_utilCountBuffer_[] = new int[6]; 1241 private long m_utilLongBuffer_[] = new long[5]; 1242 private WeightRange m_utilLowerWeightRange_[] = 1243 {new WeightRange(), new WeightRange(), 1244 new WeightRange(), new WeightRange(), 1245 new WeightRange()}; 1246 private WeightRange m_utilUpperWeightRange_[] = 1247 {new WeightRange(), new WeightRange(), 1248 new WeightRange(), new WeightRange(), 1249 new WeightRange()}; 1250 private WeightRange m_utilWeightRange_ = new WeightRange(); 1251 private char m_utilCharBuffer_[] = new char[256]; 1252 private CanonicalIterator m_utilCanIter_ = new CanonicalIterator(""); 1253 private StringBuffer m_utilStringBuffer_ = new StringBuffer (""); 1254 1255 1257 1261 private void initBuffers(CollationRuleParser.TokenListHeader listheader) 1262 throws Exception 1263 { 1264 CollationRuleParser.Token token = listheader.m_last_; 1265 Arrays.fill(m_utilIntBuffer_, 0, CE_STRENGTH_LIMIT_, 0); 1266 1267 token.m_toInsert_ = 1; 1268 m_utilIntBuffer_[token.m_strength_] = 1; 1269 while (token.m_previous_ != null) { 1270 if (token.m_previous_.m_strength_ < token.m_strength_) { 1271 m_utilIntBuffer_[token.m_strength_] = 0; 1273 m_utilIntBuffer_[token.m_previous_.m_strength_] ++; 1274 } 1275 else if (token.m_previous_.m_strength_ > token.m_strength_) { 1276 m_utilIntBuffer_[token.m_previous_.m_strength_] = 1; 1278 } 1279 else { 1280 m_utilIntBuffer_[token.m_strength_] ++; 1281 } 1282 token = token.m_previous_; 1283 token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_]; 1284 } 1285 1286 token.m_toInsert_ = m_utilIntBuffer_[token.m_strength_]; 1287 INVERSE_UCA_.getInverseGapPositions(listheader); 1288 1289 token = listheader.m_first_; 1290 int fstrength = Collator.IDENTICAL; 1291 int initstrength = Collator.IDENTICAL; 1292 1293 m_utilCEBuffer_[Collator.PRIMARY] = mergeCE(listheader.m_baseCE_, 1294 listheader.m_baseContCE_, 1295 Collator.PRIMARY); 1296 m_utilCEBuffer_[Collator.SECONDARY] = mergeCE(listheader.m_baseCE_, 1297 listheader.m_baseContCE_, 1298 Collator.SECONDARY); 1299 m_utilCEBuffer_[Collator.TERTIARY] = mergeCE(listheader.m_baseCE_, 1300 listheader.m_baseContCE_, 1301 Collator.TERTIARY); 1302 while (token != null) { 1303 fstrength = token.m_strength_; 1304 if (fstrength < initstrength) { 1305 initstrength = fstrength; 1306 if (listheader.m_pos_[fstrength] == -1) { 1307 while (listheader.m_pos_[fstrength] == -1 && fstrength > 0) 1308 { 1309 fstrength--; 1310 } 1311 if (listheader.m_pos_[fstrength] == -1) { 1312 throw new Exception ("Internal program error"); 1313 } 1314 } 1315 if (initstrength == Collator.TERTIARY) { 1316 m_utilCEBuffer_[Collator.PRIMARY] 1318 = listheader.m_gapsLo_[fstrength * 3]; 1319 m_utilCEBuffer_[Collator.SECONDARY] 1320 = listheader.m_gapsLo_[fstrength * 3 + 1]; 1321 m_utilCEBuffer_[Collator.TERTIARY] = getCEGenerator( 1322 m_utilGens_[Collator.TERTIARY], 1323 listheader.m_gapsLo_, 1324 listheader.m_gapsHi_, 1325 token, fstrength); 1326 } 1327 else if (initstrength == Collator.SECONDARY) { 1328 m_utilCEBuffer_[Collator.PRIMARY] 1330 = listheader.m_gapsLo_[fstrength * 3]; 1331 m_utilCEBuffer_[Collator.SECONDARY] 1332 = getCEGenerator( 1333 m_utilGens_[Collator.SECONDARY], 1334 listheader.m_gapsLo_, 1335 listheader.m_gapsHi_, 1336 token, fstrength); 1337 m_utilCEBuffer_[Collator.TERTIARY] 1338 = getSimpleCEGenerator( 1339 m_utilGens_[Collator.TERTIARY], 1340 token, Collator.TERTIARY); 1341 } 1342 else { 1343 m_utilCEBuffer_[Collator.PRIMARY] 1345 = getCEGenerator( 1346 m_utilGens_[Collator.PRIMARY], 1347 listheader.m_gapsLo_, 1348 listheader.m_gapsHi_, 1349 token, fstrength); 1350 m_utilCEBuffer_[Collator.SECONDARY] 1351 = getSimpleCEGenerator( 1352 m_utilGens_[Collator.SECONDARY], 1353 token, Collator.SECONDARY); 1354 m_utilCEBuffer_[Collator.TERTIARY] 1355 = getSimpleCEGenerator( 1356 m_utilGens_[Collator.TERTIARY], 1357 token, Collator.TERTIARY); 1358 } 1359 } 1360 else { 1361 if (token.m_strength_ == Collator.TERTIARY) { 1362 m_utilCEBuffer_[Collator.TERTIARY] 1363 = getNextGenerated(m_utilGens_[Collator.TERTIARY]); 1364 } 1365 else if (token.m_strength_ == Collator.SECONDARY) { 1366 m_utilCEBuffer_[Collator.SECONDARY] 1367 = getNextGenerated(m_utilGens_[Collator.SECONDARY]); 1368 m_utilCEBuffer_[Collator.TERTIARY] 1369 = getSimpleCEGenerator( 1370 m_utilGens_[Collator.TERTIARY], 1371 token, Collator.TERTIARY); 1372 } 1373 else if (token.m_strength_ == Collator.PRIMARY) { 1374 m_utilCEBuffer_[Collator.PRIMARY] 1375 = getNextGenerated( 1376 m_utilGens_[Collator.PRIMARY]); 1377 m_utilCEBuffer_[Collator.SECONDARY] 1378 = getSimpleCEGenerator( 1379 m_utilGens_[Collator.SECONDARY], 1380 token, Collator.SECONDARY); 1381 m_utilCEBuffer_[Collator.TERTIARY] 1382 = getSimpleCEGenerator( 1383 m_utilGens_[Collator.TERTIARY], 1384 token, Collator.TERTIARY); 1385 } 1386 } 1387 doCE(m_utilCEBuffer_, token); 1388 token = token.m_next_; 1389 } 1390 } 1391 1392 1397 private int getNextGenerated(CEGenerator g) 1398 { 1399 g.m_current_ = nextWeight(g); 1400 return g.m_current_; 1401 } 1402 1403 1410 private int getSimpleCEGenerator(CEGenerator g, 1411 CollationRuleParser.Token token, 1412 int strength) throws Exception 1413 { 1414 int high, low, count = 1; 1415 int maxbyte = (strength == Collator.TERTIARY) ? 0x3F : 0xFF; 1416 1417 if (strength == Collator.SECONDARY) { 1418 low = RuleBasedCollator.COMMON_TOP_2_ << 24; 1419 high = 0xFFFFFFFF; 1420 count = 0xFF - RuleBasedCollator.COMMON_TOP_2_; 1421 } 1422 else { 1423 low = RuleBasedCollator.BYTE_COMMON_ << 24; high = 0x40000000; 1425 count = 0x40 - RuleBasedCollator.BYTE_COMMON_; 1426 } 1427 1428 if (token.m_next_ != null && token.m_next_.m_strength_ == strength) { 1429 count = token.m_next_.m_toInsert_; 1430 } 1431 1432 g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, 1433 g.m_ranges_); 1434 g.m_current_ = RuleBasedCollator.BYTE_COMMON_ << 24; 1435 1436 if (g.m_rangesLength_ == 0) { 1437 throw new Exception ("Internal program error"); 1438 } 1439 return g.m_current_; 1440 } 1441 1442 1449 private static int mergeCE(int ce1, int ce2, int strength) 1450 { 1451 int mask = RuleBasedCollator.CE_TERTIARY_MASK_; 1452 if (strength == Collator.SECONDARY) { 1453 mask = RuleBasedCollator.CE_SECONDARY_MASK_; 1454 } 1455 else if (strength == Collator.PRIMARY) { 1456 mask = RuleBasedCollator.CE_PRIMARY_MASK_; 1457 } 1458 ce1 &= mask; 1459 ce2 &= mask; 1460 switch (strength) 1461 { 1462 case Collator.PRIMARY: 1463 return ce1 | ce2 >>> 16; 1464 case Collator.SECONDARY: 1465 return ce1 << 16 | ce2 << 8; 1466 default: 1467 return ce1 << 24 | ce2 << 16; 1468 } 1469 } 1470 1471 1479 private int getCEGenerator(CEGenerator g, int lows[], int highs[], 1480 CollationRuleParser.Token token, int fstrength) 1481 throws Exception 1482 { 1483 int strength = token.m_strength_; 1484 int low = lows[fstrength * 3 + strength]; 1485 int high = highs[fstrength * 3 + strength]; 1486 int maxbyte = 0; 1487 if(strength == Collator.TERTIARY) { 1488 maxbyte = 0x3F; 1489 } else if(strength == Collator.PRIMARY) { 1490 maxbyte = 0xFE; 1491 } else { 1492 maxbyte = 0xFF; 1493 } 1494 1495 int count = token.m_toInsert_; 1496 1497 if (Utility.compareUnsigned(low, high) >= 0 1498 && strength > Collator.PRIMARY) { 1499 int s = strength; 1500 while (true) { 1501 s --; 1502 if (lows[fstrength * 3 + s] != highs[fstrength * 3 + s]) { 1503 if (strength == Collator.SECONDARY) { 1504 low = RuleBasedCollator.COMMON_TOP_2_ << 24; 1505 high = 0xFFFFFFFF; 1506 } 1507 else { 1508 high = 0x40000000; 1512 } 1513 break; 1514 } 1515 if (s < 0) { 1516 throw new Exception ("Internal program error"); 1517 } 1518 } 1519 } 1520 if (low == 0) { 1521 low = 0x01000000; 1522 } 1523 if (strength == Collator.SECONDARY) { if (Utility.compareUnsigned(low, 1525 RuleBasedCollator.COMMON_BOTTOM_2_ << 24) >= 0 1526 && Utility.compareUnsigned(low, 1527 RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) { 1528 low = RuleBasedCollator.COMMON_TOP_2_ << 24; 1529 } 1530 if (Utility.compareUnsigned(high, 1531 RuleBasedCollator.COMMON_BOTTOM_2_ << 24) > 0 1532 && Utility.compareUnsigned(high, 1533 RuleBasedCollator.COMMON_TOP_2_ << 24) < 0) { 1534 high = RuleBasedCollator.COMMON_TOP_2_ << 24; 1535 } 1536 if (Utility.compareUnsigned(low, 1537 RuleBasedCollator.COMMON_BOTTOM_2_ << 24) < 0) { 1538 g.m_rangesLength_ = allocateWeights( 1539 RuleBasedCollator.BYTE_UNSHIFTED_MIN_ << 24, 1540 high, count, maxbyte, g.m_ranges_); 1541 g.m_current_ = nextWeight(g); 1542 return g.m_current_; 1544 } 1545 } 1546 1547 g.m_rangesLength_ = allocateWeights(low, high, count, maxbyte, 1548 g.m_ranges_); 1549 if (g.m_rangesLength_ == 0) { 1550 throw new Exception ("Internal program error"); 1551 } 1552 g.m_current_ = nextWeight(g); 1553 return g.m_current_; 1554 } 1555 1556 1561 private void doCE(int ceparts[], CollationRuleParser.Token token) 1562 throws Exception 1563 { 1564 for (int i = 0; i < 3; i ++) { 1567 m_utilIntBuffer_[i] = countBytes(ceparts[i]); 1569 } 1570 1571 int cei = 0; 1573 int value = 0; 1574 1575 while ((cei << 1) < m_utilIntBuffer_[0] || cei < m_utilIntBuffer_[1] 1576 || cei < m_utilIntBuffer_[2]) { 1577 if (cei > 0) { 1578 value = RuleBasedCollator.CE_CONTINUATION_MARKER_; 1579 } else { 1580 value = 0; 1581 } 1582 1583 if ((cei << 1) < m_utilIntBuffer_[0]) { 1584 value |= ((ceparts[0] >> (32 - ((cei + 1) << 4))) & 0xFFFF) 1585 << 16; 1586 } 1587 if (cei < m_utilIntBuffer_[1]) { 1588 value |= ((ceparts[1] >> (32 - ((cei + 1) << 3))) & 0xFF) << 8; 1589 } 1590 1591 if (cei < m_utilIntBuffer_[2]) { 1592 value |= ((ceparts[2] >> (32 - ((cei+1) << 3))) & 0x3F); 1593 } 1594 token.m_CE_[cei] = value; 1595 cei ++; 1596 } 1597 if (cei == 0) { token.m_CELength_ = 1; 1599 token.m_CE_[0] = 0; 1600 } 1601 else { token.m_CELength_ = cei; 1603 } 1604 1605 if(token.m_CE_[0] != 0) { int startoftokenrule = token.m_source_ & 0xFF; 1608 if ((token.m_source_ >>> 24) > 1) { 1609 int length = token.m_source_ >>> 24; 1611 String tokenstr = token.m_rules_.substring(startoftokenrule, 1612 startoftokenrule + length); 1613 token.m_CE_[0] |= getCaseBits(tokenstr); 1614 } 1615 else { 1616 int caseCE 1618 = getFirstCE(token.m_rules_.charAt(startoftokenrule)); 1619 token.m_CE_[0] |= (caseCE & 0xC0); 1620 } 1621 } 1622 } 1623 1624 1629 private static final int countBytes(int ce) 1630 { 1631 int mask = 0xFFFFFFFF; 1632 int result = 0; 1633 while (mask != 0) { 1634 if ((ce & mask) != 0) { 1635 result ++; 1636 } 1637 mask >>>= 8; 1638 } 1639 return result; 1640 } 1641 1642 1647 private void createElements(BuildTable t, 1648 CollationRuleParser.TokenListHeader lh) 1649 { 1650 CollationRuleParser.Token tok = lh.m_first_; 1651 m_utilElement_.clear(); 1652 while (tok != null) { 1653 if (tok.m_expansion_ != 0) { 1658 int len = tok.m_expansion_ >>> 24; 1659 int currentSequenceLen = len; 1660 int expOffset = tok.m_expansion_ & 0x00FFFFFF; 1661 m_utilToken_.m_source_ = currentSequenceLen | expOffset; 1662 m_utilToken_.m_rules_ = m_parser_.m_source_; 1663 1664 while (len > 0) { 1665 currentSequenceLen = len; 1666 while (currentSequenceLen > 0) { 1667 m_utilToken_.m_source_ = (currentSequenceLen << 24) 1668 | expOffset; 1669 CollationRuleParser.Token expt = 1670 (CollationRuleParser.Token) 1671 m_parser_.m_hashTable_.get(m_utilToken_); 1672 if (expt != null 1673 && expt.m_strength_ 1674 != CollationRuleParser.TOKEN_RESET_) { 1675 int noOfCEsToCopy = expt.m_CELength_; 1677 for (int j = 0; j < noOfCEsToCopy; j ++) { 1678 tok.m_expCE_[tok.m_expCELength_ + j] 1679 = expt.m_CE_[j]; 1680 } 1681 tok.m_expCELength_ += noOfCEsToCopy; 1682 expOffset += currentSequenceLen; len -= currentSequenceLen; break; 1687 } 1688 else { 1689 currentSequenceLen --; 1690 } 1691 } 1692 if (currentSequenceLen == 0) { 1693 m_utilColEIter_.setText(m_parser_.m_source_.substring( 1698 expOffset, expOffset + 1)); 1699 while (true) { 1700 int order = m_utilColEIter_.next(); 1701 if (order == CollationElementIterator.NULLORDER) { 1702 break; 1703 } 1704 tok.m_expCE_[tok.m_expCELength_ ++] = order; 1705 } 1706 expOffset ++; 1707 len --; 1708 } 1709 } 1710 } 1711 else { 1712 tok.m_expCELength_ = 0; 1713 } 1714 1715 m_utilElement_.m_CELength_ = tok.m_CELength_ + tok.m_expCELength_; 1717 1718 System.arraycopy(tok.m_CE_, 0, m_utilElement_.m_CEs_, 0, 1720 tok.m_CELength_); 1721 System.arraycopy(tok.m_expCE_, 0, m_utilElement_.m_CEs_, 1722 tok.m_CELength_, tok.m_expCELength_); 1723 1724 m_utilElement_.m_prefix_ = 0; m_utilElement_.m_cPointsOffset_ = 0; if (tok.m_prefix_ != 0) { 1731 int size = tok.m_prefix_ >> 24; 1736 int offset = tok.m_prefix_ & 0x00FFFFFF; 1737 m_utilElement_.m_prefixChars_ 1738 = m_parser_.m_source_.substring(offset, offset + size); 1739 size = (tok.m_source_ >> 24) - (tok.m_prefix_ >> 24); 1740 offset = (tok.m_source_ & 0x00FFFFFF) + (tok.m_prefix_ >> 24); 1741 m_utilElement_.m_uchars_ 1742 = m_parser_.m_source_.substring(offset, offset + size); 1743 } 1744 else { 1745 m_utilElement_.m_prefixChars_ = null; 1746 int offset = tok.m_source_ & 0x00FFFFFF; 1747 int size = tok.m_source_ >>> 24; 1748 m_utilElement_.m_uchars_ = m_parser_.m_source_.substring(offset, 1749 offset + size); 1750 } 1751 m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_; 1752 for (int i = 0; i < m_utilElement_.m_cPoints_.length() 1753 - m_utilElement_.m_cPointsOffset_; i ++) { 1754 if (isJamo(m_utilElement_.m_cPoints_.charAt(i))) { 1755 t.m_collator_.m_isJamoSpecial_ = true; 1756 break; 1757 } 1758 } 1759 1760 1778 addAnElement(t, m_utilElement_); 1780 tok = tok.m_next_; 1781 } 1782 } 1783 1784 1790 private final int getCaseBits(String src) throws Exception 1791 { 1792 int uCount = 0; 1793 int lCount = 0; 1794 src = Normalizer.decompose(src, true); 1795 m_utilColEIter_.setText(src); 1796 for (int i = 0; i < src.length(); i++) { 1797 m_utilColEIter_.setText(src.substring(i, i + 1)); 1798 int order = m_utilColEIter_.next(); 1799 if (RuleBasedCollator.isContinuation(order)) { 1800 throw new Exception ("Internal program error"); 1801 } 1802 if ((order & RuleBasedCollator.CE_CASE_BIT_MASK_) 1803 == UPPER_CASE_) { 1804 uCount ++; 1805 } 1806 else { 1807 char ch = src.charAt(i); 1808 if (UCharacter.isLowerCase(ch)) { 1809 lCount ++; 1810 } 1811 else { 1812 if (toSmallKana(ch) == ch && toLargeKana(ch) != ch) { 1813 lCount ++; 1814 } 1815 } 1816 } 1817 } 1818 1819 if (uCount != 0 && lCount != 0) { 1820 return MIXED_CASE_; 1821 } 1822 else if (uCount != 0) { 1823 return UPPER_CASE_; 1824 } 1825 else { 1826 return LOWER_CASE_; 1827 } 1828 } 1829 1830 1835 private static final char toLargeKana(char ch) 1836 { 1837 if (0x3042 < ch && ch < 0x30ef) { switch (ch - 0x3000) { 1839 case 0x41: 1840 case 0x43: 1841 case 0x45: 1842 case 0x47: 1843 case 0x49: 1844 case 0x63: 1845 case 0x83: 1846 case 0x85: 1847 case 0x8E: 1848 case 0xA1: 1849 case 0xA3: 1850 case 0xA5: 1851 case 0xA7: 1852 case 0xA9: 1853 case 0xC3: 1854 case 0xE3: 1855 case 0xE5: 1856 case 0xEE: 1857 ch ++; 1858 break; 1859 case 0xF5: 1860 ch = 0x30AB; 1861 break; 1862 case 0xF6: 1863 ch = 0x30B1; 1864 break; 1865 } 1866 } 1867 return ch; 1868 } 1869 1870 1875 private static final char toSmallKana(char ch) 1876 { 1877 if (0x3042 < ch && ch < 0x30ef) { switch (ch - 0x3000) { 1879 case 0x42: 1880 case 0x44: 1881 case 0x46: 1882 case 0x48: 1883 case 0x4A: 1884 case 0x64: 1885 case 0x84: 1886 case 0x86: 1887 case 0x8F: 1888 case 0xA2: 1889 case 0xA4: 1890 case 0xA6: 1891 case 0xA8: 1892 case 0xAA: 1893 case 0xC4: 1894 case 0xE4: 1895 case 0xE6: 1896 case 0xEF: 1897 ch --; 1898 break; 1899 case 0xAB: 1900 ch = 0x30F5; 1901 break; 1902 case 0xB1: 1903 ch = 0x30F6; 1904 break; 1905 } 1906 } 1907 return ch; 1908 } 1909 1910 1913 private int getFirstCE(char ch) 1914 { 1915 m_utilColEIter_.setText(UCharacter.toString(ch)); 1916 return m_utilColEIter_.next(); 1917 } 1918 1919 1925 private int addAnElement(BuildTable t, Elements element) 1926 { 1927 Vector expansions = t.m_expansions_; 1928 element.m_mapCE_ = 0; 1929 1930 if (element.m_CELength_ == 1) { 1931 element.m_mapCE_ = element.m_CEs_[0]; 1932 1933 } else { 1934 if (element.m_CELength_ == 2 && RuleBasedCollator.isContinuation(element.m_CEs_[1]) 1945 && (element.m_CEs_[1] 1946 & (~(0xFF << 24 | RuleBasedCollator.CE_CONTINUATION_MARKER_))) 1947 == 0 && (((element.m_CEs_[0] >> 8) & 0xFF) 1949 == RuleBasedCollator.BYTE_COMMON_) 1950 && ((element.m_CEs_[0] & 0xFF) 1952 == RuleBasedCollator.BYTE_COMMON_) ) { 1954 element.m_mapCE_ = RuleBasedCollator.CE_SPECIAL_FLAG_ 1955 | (CE_LONG_PRIMARY_TAG_ << 24) 1957 | ((element.m_CEs_[0] >> 8) & 0xFFFF00) 1959 | ((element.m_CEs_[1] >> 24) & 0xFF); 1961 } 1962 else { 1963 int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ 1966 | (CE_EXPANSION_TAG_ 1967 << RuleBasedCollator.CE_TAG_SHIFT_) 1968 | (addExpansion(expansions, element.m_CEs_[0]) 1969 << 4) & 0xFFFFF0; 1970 1971 for (int i = 1; i < element.m_CELength_; i ++) { 1972 addExpansion(expansions, element.m_CEs_[i]); 1973 } 1974 if (element.m_CELength_ <= 0xF) { 1975 expansion |= element.m_CELength_; 1976 } 1977 else { 1978 addExpansion(expansions, 0); 1979 } 1980 element.m_mapCE_ = expansion; 1981 setMaxExpansion(element.m_CEs_[element.m_CELength_ - 1], 1982 (byte)element.m_CELength_, 1983 t.m_maxExpansions_); 1984 if (isJamo(element.m_cPoints_.charAt(0))){ 1985 t.m_collator_.m_isJamoSpecial_ = true; 1986 setMaxJamoExpansion(element.m_cPoints_.charAt(0), 1987 element.m_CEs_[element.m_CELength_ 1988 - 1], 1989 (byte)element.m_CELength_, 1990 t.m_maxJamoExpansions_); 1991 } 1992 } 1993 } 1994 1995 int uniChar = 0; 1998 if ((element.m_uchars_.length() == 2) 1999 && UTF16.isLeadSurrogate(element.m_uchars_.charAt(0))) { 2000 uniChar = UCharacterProperty.getRawSupplementary( 2001 element.m_uchars_.charAt(0), 2002 element.m_uchars_.charAt(1)); 2003 } 2004 else if (element.m_uchars_.length() == 1) { 2005 uniChar = element.m_uchars_.charAt(0); 2006 } 2007 2008 if (uniChar != 0 && UCharacter.isDigit(uniChar)) { 2016 int expansion = RuleBasedCollator.CE_SPECIAL_FLAG_ 2018 | (CollationElementIterator.CE_DIGIT_TAG_ 2019 << RuleBasedCollator.CE_TAG_SHIFT_) | 1; 2020 if (element.m_mapCE_ != 0) { 2021 expansion |= (addExpansion(expansions, element.m_mapCE_) << 4); 2023 } 2024 else { 2025 expansion |= (addExpansion(expansions, element.m_CEs_[0]) << 4); 2026 } 2027 element.m_mapCE_ = expansion; 2028 } 2029 2030 2034 if (element.m_prefixChars_ != null && 2035 element.m_prefixChars_.length() - element.m_prefix_ > 0) { 2036 m_utilElement2_.m_caseBit_ = element.m_caseBit_; 2040 m_utilElement2_.m_CELength_ = element.m_CELength_; 2041 m_utilElement2_.m_CEs_ = element.m_CEs_; 2042 m_utilElement2_.m_mapCE_ = element.m_mapCE_; 2043 m_utilElement2_.m_sizePrim_ = element.m_sizePrim_; 2045 m_utilElement2_.m_sizeSec_ = element.m_sizeSec_; 2046 m_utilElement2_.m_sizeTer_ = element.m_sizeTer_; 2047 m_utilElement2_.m_variableTop_ = element.m_variableTop_; 2048 m_utilElement2_.m_prefix_ = element.m_prefix_; 2049 m_utilElement2_.m_prefixChars_ = Normalizer.compose(element.m_prefixChars_, false); 2050 m_utilElement2_.m_uchars_ = element.m_uchars_; 2051 m_utilElement2_.m_cPoints_ = element.m_cPoints_; 2052 m_utilElement2_.m_cPointsOffset_ = 0; 2053 2054 if (t.m_prefixLookup_ != null) { 2055 Elements uCE = (Elements)t.m_prefixLookup_.get(element); 2056 if (uCE != null) { 2057 element.m_mapCE_ = addPrefix(t, uCE.m_mapCE_, element); 2059 } 2060 else { element.m_mapCE_ = addPrefix(t, CE_NOT_FOUND_, element); 2062 uCE = new Elements(element); 2063 uCE.m_cPoints_ = uCE.m_uchars_; 2064 t.m_prefixLookup_.put(uCE, uCE); 2065 } 2066 if (m_utilElement2_.m_prefixChars_.length() 2067 != element.m_prefixChars_.length() - element.m_prefix_ 2068 || !m_utilElement2_.m_prefixChars_.regionMatches(0, 2069 element.m_prefixChars_, element.m_prefix_, 2070 m_utilElement2_.m_prefixChars_.length())) { 2071 m_utilElement2_.m_mapCE_ = addPrefix(t, element.m_mapCE_, 2073 m_utilElement2_); 2074 } 2075 } 2076 } 2077 2078 if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1 2082 && !(element.m_cPoints_.length() - element.m_cPointsOffset_ == 2 2083 && UTF16.isLeadSurrogate(element.m_cPoints_.charAt(0)) 2084 && UTF16.isTrailSurrogate(element.m_cPoints_.charAt(1)))) { 2085 m_utilCanIter_.setSource(element.m_cPoints_); 2088 String source = m_utilCanIter_.next(); 2089 while (source != null && source.length() > 0) { 2090 if (Normalizer.quickCheck(source, Normalizer.FCD,0) 2091 != Normalizer.NO) { 2092 element.m_uchars_ = source; 2093 element.m_cPoints_ = element.m_uchars_; 2094 finalizeAddition(t, element); 2095 } 2096 source = m_utilCanIter_.next(); 2097 } 2098 2099 return element.m_mapCE_; 2100 } 2101 else { 2102 return finalizeAddition(t, element); 2103 } 2104 } 2105 2106 2112 private static final int addExpansion(Vector expansions, int value) 2113 { 2114 expansions.add(new Integer (value)); 2115 return expansions.size() - 1; 2116 } 2117 2118 2127 private static int setMaxExpansion(int endexpansion, byte expansionsize, 2128 MaxExpansionTable maxexpansion) 2129 { 2130 int start = 0; 2131 int limit = maxexpansion.m_endExpansionCE_.size(); 2132 long unsigned = (long)endexpansion; 2133 unsigned &= 0xFFFFFFFFl; 2134 2135 int result = -1; 2138 while (start < limit - 1) { 2139 int mid = start + ((limit - start) >> 1); 2140 long unsignedce = ((Integer )maxexpansion.m_endExpansionCE_.get( 2141 mid)).intValue(); 2142 unsignedce &= 0xFFFFFFFFl; 2143 if (unsigned <= unsignedce) { 2144 limit = mid; 2145 } 2146 else { 2147 start = mid; 2148 } 2149 } 2150 2151 if (((Integer )maxexpansion.m_endExpansionCE_.get(start)).intValue() 2152 == endexpansion) { 2153 result = start; 2154 } 2155 else if (((Integer )maxexpansion.m_endExpansionCE_.get(limit)).intValue() 2156 == endexpansion) { 2157 result = limit; 2158 } 2159 if (result > -1) { 2160 Object currentsize = maxexpansion.m_expansionCESize_.get(result); 2163 if (((Byte )currentsize).byteValue() < expansionsize) { 2164 maxexpansion.m_expansionCESize_.set(result, 2165 new Byte (expansionsize)); 2166 } 2167 } 2168 else { 2169 maxexpansion.m_endExpansionCE_.insertElementAt( 2172 new Integer (endexpansion), 2173 start + 1); 2174 maxexpansion.m_expansionCESize_.insertElementAt( 2175 new Byte (expansionsize), 2176 start + 1); 2177 } 2178 return maxexpansion.m_endExpansionCE_.size(); 2179 } 2180 2181 2191 private static int setMaxJamoExpansion(char ch, int endexpansion, 2192 byte expansionsize, 2193 MaxJamoExpansionTable maxexpansion) 2194 { 2195 boolean isV = true; 2196 if (ch >= 0x1100 && ch <= 0x1112) { 2197 if (maxexpansion.m_maxLSize_ < expansionsize) { 2200 maxexpansion.m_maxLSize_ = expansionsize; 2201 } 2202 return maxexpansion.m_endExpansionCE_.size(); 2203 } 2204 2205 if (ch >= 0x1161 && ch <= 0x1175) { 2206 if (maxexpansion.m_maxVSize_ < expansionsize) { 2208 maxexpansion.m_maxVSize_ = expansionsize; 2209 } 2210 } 2211 2212 if (ch >= 0x11A8 && ch <= 0x11C2) { 2213 isV = false; 2214 if (maxexpansion.m_maxTSize_ < expansionsize) { 2216 maxexpansion.m_maxTSize_ = expansionsize; 2217 } 2218 } 2219 2220 int pos = maxexpansion.m_endExpansionCE_.size(); 2221 while (pos > 0) { 2222 pos --; 2223 if (((Integer )maxexpansion.m_endExpansionCE_.get(pos)).intValue() 2224 == endexpansion) { 2225 return maxexpansion.m_endExpansionCE_.size(); 2226 } 2227 } 2228 maxexpansion.m_endExpansionCE_.add(new Integer (endexpansion)); 2229 maxexpansion.m_isV_.add(new Boolean (isV)); 2230 2231 return maxexpansion.m_endExpansionCE_.size(); 2232 } 2233 2234 2241 private int addPrefix(BuildTable t, int CE, Elements element) 2242 { 2243 ContractionTable contractions = t.m_contractions_; 2248 String oldCP = element.m_cPoints_; 2249 int oldCPOffset = element.m_cPointsOffset_; 2250 2251 contractions.m_currentTag_ = CE_SPEC_PROC_TAG_; 2252 int size = element.m_prefixChars_.length() - element.m_prefix_; 2254 for (int j = 1; j < size; j ++) { 2255 char ch = element.m_prefixChars_.charAt(j + element.m_prefix_); 2259 if (!UTF16.isTrailSurrogate(ch)) { 2260 unsafeCPSet(t.m_unsafeCP_, ch); 2261 } 2262 } 2263 2264 m_utilStringBuffer_.delete(0, m_utilStringBuffer_.length()); 2266 for (int j = 0; j < size; j ++) { 2267 int offset = element.m_prefixChars_.length() - j - 1; 2270 m_utilStringBuffer_.append(element.m_prefixChars_.charAt(offset)); 2271 } 2272 element.m_prefixChars_ = m_utilStringBuffer_.toString(); 2273 element.m_prefix_ = 0; 2274 2275 if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt(0))) { 2278 unsafeCPSet(t.m_unsafeCP_, element.m_cPoints_.charAt(0)); 2279 } 2280 2281 element.m_cPoints_ = element.m_prefixChars_; 2282 element.m_cPointsOffset_ = element.m_prefix_; 2283 2284 if (!UTF16.isTrailSurrogate( 2288 element.m_cPoints_.charAt(element.m_cPoints_.length() - 1))) { 2289 ContrEndCPSet(t.m_contrEndCP_, element.m_cPoints_.charAt( 2290 element.m_cPoints_.length() - 1)); 2291 } 2292 2295 if (isJamo(element.m_prefixChars_.charAt(element.m_prefix_))) { 2298 t.m_collator_.m_isJamoSpecial_ = true; 2299 } 2300 if (!isPrefix(CE)) { 2303 int firstContractionOffset = addContraction(contractions, 2305 CONTRACTION_TABLE_NEW_ELEMENT_, 2306 (char)0, CE); 2307 int newCE = processContraction(contractions, element, 2308 CE_NOT_FOUND_); 2309 addContraction(contractions, firstContractionOffset, 2310 element.m_prefixChars_.charAt(element.m_prefix_), 2311 newCE); 2312 addContraction(contractions, firstContractionOffset, (char)0xFFFF, 2313 CE); 2314 CE = constructSpecialCE(CE_SPEC_PROC_TAG_, firstContractionOffset); 2315 } 2316 else { 2317 char ch = element.m_prefixChars_.charAt(element.m_prefix_); 2323 int position = findCP(contractions, CE, ch); 2324 if (position > 0) { 2325 int eCE = getCE(contractions, CE, position); 2327 int newCE = processContraction(contractions, element, eCE); 2328 setContraction(contractions, CE, position, ch, newCE); 2329 } 2330 else { 2331 processContraction(contractions, element, CE_NOT_FOUND_); 2333 insertContraction(contractions, CE, ch, element.m_mapCE_); 2334 } 2335 } 2336 2337 element.m_cPoints_ = oldCP; 2338 element.m_cPointsOffset_ = oldCPOffset; 2339 2340 return CE; 2341 } 2342 2343 2348 private static final boolean isContraction(int CE) 2349 { 2350 return isSpecial(CE) && (getCETag(CE) == CE_CONTRACTION_TAG_); 2351 } 2352 2353 2358 private static final boolean isPrefix(int CE) 2359 { 2360 return isSpecial(CE) && (getCETag(CE) == CE_SPEC_PROC_TAG_); 2361 } 2362 2363 2368 private static final boolean isSpecial(int CE) 2369 { 2370 return (CE & RuleBasedCollator.CE_SPECIAL_FLAG_) == 0xF0000000; 2371 } 2372 2373 2378 private static final int getCETag(int CE) 2379 { 2380 return (CE & RuleBasedCollator.CE_TAG_MASK_) >>> 2381 RuleBasedCollator.CE_TAG_SHIFT_; 2382 } 2383 2384 2390 private static final int getCE(ContractionTable table, int element, 2391 int position) 2392 { 2393 element &= 0xFFFFFF; 2394 BasicContractionTable tbl = getBasicContractionTable(table, element); 2395 2396 if (tbl == null) { 2397 return CE_NOT_FOUND_; 2398 } 2399 if (position > tbl.m_CEs_.size() || position == -1) { 2400 return CE_NOT_FOUND_; 2401 } 2402 else { 2403 return ((Integer )tbl.m_CEs_.get(position)).intValue(); 2404 } 2405 } 2406 2407 2412 private static final void unsafeCPSet(byte table[], char c) 2413 { 2414 int hash = c; 2415 if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) { 2416 if (hash >= 0xd800 && hash <= 0xf8ff) { 2417 return; 2420 } 2421 hash = (hash & UNSAFECP_TABLE_MASK_) + 256; 2422 } 2423 table[hash >> 3] |= (1 << (hash & 7)); 2424 } 2425 2426 2431 private static final void ContrEndCPSet(byte table[], char c) 2432 { 2433 int hash = c; 2434 if (hash >= (UNSAFECP_TABLE_SIZE_ << 3)) { 2435 hash = (hash & UNSAFECP_TABLE_MASK_) + 256; 2436 } 2437 table[hash >> 3] |= (1 << (hash & 7)); 2438 } 2439 2440 2449 private static int addContraction(ContractionTable table, int element, 2450 char codePoint, int value) 2451 { 2452 BasicContractionTable tbl = getBasicContractionTable(table, element); 2453 if (tbl == null) { 2454 tbl = addAContractionElement(table); 2455 element = table.m_elements_.size() - 1; 2456 } 2457 2458 tbl.m_CEs_.add(new Integer (value)); 2459 tbl.m_codePoints_.append(codePoint); 2460 return constructSpecialCE(table.m_currentTag_, element); 2461 } 2462 2463 2468 private static BasicContractionTable addAContractionElement( 2469 ContractionTable table) 2470 { 2471 BasicContractionTable result = new BasicContractionTable(); 2472 table.m_elements_.add(result); 2473 return result; 2474 } 2475 2476 2482 private static final int constructSpecialCE(int tag, int CE) 2483 { 2484 return RuleBasedCollator.CE_SPECIAL_FLAG_ 2485 | (tag << RuleBasedCollator.CE_TAG_SHIFT_) | (CE & 0xFFFFFF); 2486 } 2487 2488 2495 private static int processContraction(ContractionTable contractions, 2496 Elements element, 2497 int existingCE) 2498 { 2499 int firstContractionOffset = 0; 2500 if (element.m_cPoints_.length() - element.m_cPointsOffset_ == 1) { 2502 if (isContractionTableElement(existingCE) 2503 && getCETag(existingCE) == contractions.m_currentTag_) { 2504 changeContraction(contractions, existingCE, (char)0, 2505 element.m_mapCE_); 2506 changeContraction(contractions, existingCE, (char)0xFFFF, 2507 element.m_mapCE_); 2508 return existingCE; 2509 } 2510 else { 2511 return element.m_mapCE_; 2514 } 2515 } 2516 2517 element.m_cPointsOffset_ ++; 2523 if (!isContractionTableElement(existingCE)) { 2524 firstContractionOffset = addContraction(contractions, 2526 CONTRACTION_TABLE_NEW_ELEMENT_, 2527 (char)0, existingCE); 2528 int newCE = processContraction(contractions, element, 2529 CE_NOT_FOUND_); 2530 addContraction(contractions, firstContractionOffset, 2531 element.m_cPoints_.charAt(element.m_cPointsOffset_), 2532 newCE); 2533 addContraction(contractions, firstContractionOffset, 2534 (char)0xFFFF, existingCE); 2535 existingCE = constructSpecialCE(contractions.m_currentTag_, 2536 firstContractionOffset); 2537 } 2538 else { 2539 int position = findCP(contractions, existingCE, 2545 element.m_cPoints_.charAt(element.m_cPointsOffset_)); 2546 if (position > 0) { 2547 int eCE = getCE(contractions, existingCE, position); 2549 int newCE = processContraction(contractions, element, eCE); 2550 setContraction(contractions, existingCE, position, 2551 element.m_cPoints_.charAt(element.m_cPointsOffset_), 2552 newCE); 2553 } 2554 else { 2555 int newCE = processContraction(contractions, element, 2557 CE_NOT_FOUND_); 2558 insertContraction(contractions, existingCE, 2559 element.m_cPoints_.charAt(element.m_cPointsOffset_), 2560 newCE); 2561 } 2562 } 2563 element.m_cPointsOffset_ --; 2564 return existingCE; 2565 } 2566 2567 2572 private static final boolean isContractionTableElement(int CE) 2573 { 2574 return isSpecial(CE) 2575 && (getCETag(CE) == CE_CONTRACTION_TAG_ 2576 || getCETag(CE) == CE_SPEC_PROC_TAG_); 2577 } 2578 2579 2586 private static int findCP(ContractionTable table, int element, 2587 char codePoint) 2588 { 2589 BasicContractionTable tbl = getBasicContractionTable(table, element); 2590 if (tbl == null) { 2591 return -1; 2592 } 2593 2594 int position = 0; 2595 while (codePoint > tbl.m_codePoints_.charAt(position)) { 2596 position ++; 2597 if (position > tbl.m_codePoints_.length()) { 2598 return -1; 2599 } 2600 } 2601 if (codePoint == tbl.m_codePoints_.charAt(position)) { 2602 return position; 2603 } 2604 else { 2605 return -1; 2606 } 2607 } 2608 2609 2615 private static final BasicContractionTable getBasicContractionTable( 2616 ContractionTable table, 2617 int offset) 2618 { 2619 offset &= 0xFFFFFF; 2620 if (offset == 0xFFFFFF) { 2621 return null; 2622 } 2623 return (BasicContractionTable)table.m_elements_.get(offset); 2624 } 2625 2626 2634 private static final int changeContraction(ContractionTable table, 2635 int element, char codePoint, 2636 int newCE) 2637 { 2638 BasicContractionTable tbl = getBasicContractionTable(table, element); 2639 if (tbl == null) { 2640 return 0; 2641 } 2642 int position = 0; 2643 while (codePoint > tbl.m_codePoints_.charAt(position)) { 2644 position ++; 2645 if (position > tbl.m_codePoints_.length()) { 2646 return CE_NOT_FOUND_; 2647 } 2648 } 2649 if (codePoint == tbl.m_codePoints_.charAt(position)) { 2650 tbl.m_CEs_.set(position, new Integer (newCE)); 2651 return element & 0xFFFFFF; 2652 } 2653 else { 2654 return CE_NOT_FOUND_; 2655 } 2656 } 2657 2658 2668 private static final int setContraction(ContractionTable table, 2669 int element, int offset, 2670 char codePoint, int value) 2671 { 2672 element &= 0xFFFFFF; 2673 BasicContractionTable tbl = getBasicContractionTable(table, element); 2674 if (tbl == null) { 2675 tbl = addAContractionElement(table); 2676 element = table.m_elements_.size() - 1; 2677 } 2678 2679 tbl.m_CEs_.set(offset, new Integer (value)); 2680 tbl.m_codePoints_.setCharAt(offset, codePoint); 2681 return constructSpecialCE(table.m_currentTag_, element); 2682 } 2683 2684 2693 private static final int insertContraction(ContractionTable table, 2694 int element, char codePoint, 2695 int value) 2696 { 2697 element &= 0xFFFFFF; 2698 BasicContractionTable tbl = getBasicContractionTable(table, element); 2699 if (tbl == null) { 2700 tbl = addAContractionElement(table); 2701 element = table.m_elements_.size() - 1; 2702 } 2703 2704 int offset = 0; 2705 while (tbl.m_codePoints_.charAt(offset) < codePoint 2706 && offset < tbl.m_codePoints_.length()) { 2707 offset ++; 2708 } 2709 2710 tbl.m_CEs_.insertElementAt(new Integer (value), offset); 2711 tbl.m_codePoints_.insert(offset, codePoint); 2712 2713 return constructSpecialCE(table.m_currentTag_, element); 2714 } 2715 2716 2721 private final static int finalizeAddition(BuildTable t, Elements element) 2722 { 2723 int CE = CE_NOT_FOUND_; 2724 if (element.m_mapCE_ == 0) { 2728 for (int i = 0; i < element.m_cPoints_.length(); i ++) { 2729 char ch = element.m_cPoints_.charAt(i); 2730 if (!UTF16.isTrailSurrogate(ch)) { 2731 unsafeCPSet(t.m_unsafeCP_, ch); 2732 } 2733 } 2734 } 2735 2736 if (element.m_cPoints_.length() - element.m_cPointsOffset_ > 1) { 2737 int cp = UTF16.charAt(element.m_cPoints_, element.m_cPointsOffset_); 2739 CE = t.m_mapping_.getValue(cp); 2740 CE = addContraction(t, CE, element); 2741 } 2742 else { 2743 CE = t.m_mapping_.getValue(element.m_cPoints_.charAt( 2745 element.m_cPointsOffset_)); 2746 2747 if (CE != CE_NOT_FOUND_) { 2748 if(isContractionTableElement(CE)) { 2749 if (!isPrefix(element.m_mapCE_)) { 2752 setContraction(t.m_contractions_, CE, 0, (char)0, 2757 element.m_mapCE_); 2758 changeLastCE(t.m_contractions_, CE, element.m_mapCE_); 2761 } 2762 } 2763 else { 2764 t.m_mapping_.setValue(element.m_cPoints_.charAt( 2765 element.m_cPointsOffset_), 2766 element.m_mapCE_); 2767 } 2768 } 2769 else { 2770 t.m_mapping_.setValue(element.m_cPoints_.charAt( 2771 element.m_cPointsOffset_), 2772 element.m_mapCE_); 2773 } 2774 } 2775 return CE; 2776 } 2777 2778 2785 private static int addContraction(BuildTable t, int CE, Elements element) 2786 { 2787 ContractionTable contractions = t.m_contractions_; 2788 contractions.m_currentTag_ = CE_CONTRACTION_TAG_; 2789 2790 int cp = UTF16.charAt(element.m_cPoints_, 0); 2792 int cpsize = 1; 2793 if (UCharacter.isSupplementary(cp)) { 2794 cpsize = 2; 2795 } 2796 if (cpsize < element.m_cPoints_.length()) { 2797 int size = element.m_cPoints_.length() - element.m_cPointsOffset_; 2800 for (int j = 1; j < size; j ++) { 2801 if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt( 2805 element.m_cPointsOffset_ + j))) { 2806 unsafeCPSet(t.m_unsafeCP_, 2807 element.m_cPoints_.charAt( 2808 element.m_cPointsOffset_ + j)); 2809 } 2810 } 2811 if (!UTF16.isTrailSurrogate(element.m_cPoints_.charAt( 2815 element.m_cPoints_.length() -1))) { 2816 ContrEndCPSet(t.m_contrEndCP_, 2817 element.m_cPoints_.charAt( 2818 element.m_cPoints_.length() -1)); 2819 } 2820 2821 if (isJamo(element.m_cPoints_.charAt(element.m_cPointsOffset_))) { 2824 t.m_collator_.m_isJamoSpecial_ = true; 2825 } 2826 element.m_cPointsOffset_ += cpsize; 2829 if (!isContraction(CE)) { 2830 int firstContractionOffset = addContraction(contractions, 2832 CONTRACTION_TABLE_NEW_ELEMENT_, (char)0, CE); 2833 int newCE = processContraction(contractions, element, 2834 CE_NOT_FOUND_); 2835 addContraction(contractions, firstContractionOffset, 2836 element.m_cPoints_.charAt(element.m_cPointsOffset_), 2837 newCE); 2838 addContraction(contractions, firstContractionOffset, 2839 (char)0xFFFF, CE); 2840 CE = constructSpecialCE(CE_CONTRACTION_TAG_, 2841 firstContractionOffset); 2842 } 2843 else { 2844 int position = findCP(contractions, CE, 2850 element.m_cPoints_.charAt(element.m_cPointsOffset_)); 2851 if (position > 0) { 2852 int eCE = getCE(contractions, CE, position); 2854 int newCE = processContraction(contractions, element, eCE); 2855 setContraction(contractions, CE, position, 2856 element.m_cPoints_.charAt(element.m_cPointsOffset_), 2857 newCE); 2858 } 2859 else { 2860 int newCE = processContraction(contractions, element, 2862 CE_NOT_FOUND_); 2863 insertContraction(contractions, CE, 2864 element.m_cPoints_.charAt(element.m_cPointsOffset_), 2865 newCE); 2866 } 2867 } 2868 element.m_cPointsOffset_ -= cpsize; 2869 t.m_mapping_.setValue(cp, CE); 2870 } 2871 else if (!isContraction(CE)) { 2872 t.m_mapping_.setValue(cp, element.m_mapCE_); 2874 } 2875 else { 2876 changeContraction(contractions, CE, (char)0, element.m_mapCE_); 2879 changeContraction(contractions, CE, (char)0xFFFF, element.m_mapCE_); 2880 } 2881 return CE; 2882 } 2883 2884 2891 private static final int changeLastCE(ContractionTable table, int element, 2892 int value) 2893 { 2894 BasicContractionTable tbl = getBasicContractionTable(table, element); 2895 if (tbl == null) { 2896 return 0; 2897 } 2898 2899 tbl.m_CEs_.set(tbl.m_CEs_.size() - 1, new Integer (value)); 2900 return constructSpecialCE(table.m_currentTag_, element & 0xFFFFFF); 2901 } 2902 2903 2910 private static int nextWeight(CEGenerator cegenerator) 2911 { 2912 if (cegenerator.m_rangesLength_ > 0) { 2913 int maxByte = cegenerator.m_ranges_[0].m_count_; 2915 int weight = cegenerator.m_ranges_[0].m_start_; 2917 if (weight == cegenerator.m_ranges_[0].m_end_) { 2918 cegenerator.m_rangesLength_ --; 2921 if (cegenerator.m_rangesLength_ > 0) { 2922 System.arraycopy(cegenerator.m_ranges_, 1, 2923 cegenerator.m_ranges_, 0, 2924 cegenerator.m_rangesLength_); 2925 cegenerator.m_ranges_[0].m_count_ = maxByte; 2926 } 2928 } 2929 else { 2930 cegenerator.m_ranges_[0].m_start_ 2932 = incWeight(weight, cegenerator.m_ranges_[0].m_length2_, 2933 maxByte); 2934 } 2935 return weight; 2936 } 2937 return -1; 2938 } 2939 2940 2947 private static final int incWeight(int weight, int length, int maxByte) 2948 { 2949 while (true) { 2950 int b = getWeightByte(weight, length); 2951 if (b < maxByte) { 2952 return setWeightByte(weight, length, b + 1); 2953 } 2954 else { 2955 weight = setWeightByte(weight, length, 2958 RuleBasedCollator.BYTE_FIRST_TAILORED_); 2959 -- length; 2960 } 2961 } 2962 } 2963 2964 2970 private static final int getWeightByte(int weight, int index) 2971 { 2972 return (weight >> ((4 - index) << 3)) & 0xff; 2973 } 2974 2975 2981 private static final int setWeightByte(int weight, int index, int b) 2982 { 2983 index <<= 3; 2984 int mask = 0xffffffff >>> index; 2986 index = 32 - index; 2987 mask |= 0xffffff00 << index; 2988 return (weight & mask) | (b << index); 2989 } 2990 2991 3001 private int allocateWeights(int lowerLimit, int upperLimit, int n, 3002 int maxByte, WeightRange ranges[]) 3003 { 3004 int countBytes = maxByte - RuleBasedCollator.BYTE_FIRST_TAILORED_ + 1; 3006 m_utilLongBuffer_[0] = 1; 3010 m_utilLongBuffer_[1] = countBytes; 3011 m_utilLongBuffer_[2] = m_utilLongBuffer_[1] * countBytes; 3012 m_utilLongBuffer_[3] = m_utilLongBuffer_[2] * countBytes; 3013 m_utilLongBuffer_[4] = m_utilLongBuffer_[3] * countBytes; 3014 int rangeCount = getWeightRanges(lowerLimit, upperLimit, maxByte, 3015 countBytes, ranges); 3016 if (rangeCount <= 0) { 3017 return 0; 3018 } 3019 long maxCount = 0; 3021 for (int i = 0; i < rangeCount; ++ i) { 3022 maxCount += (long)ranges[i].m_count_ 3023 * m_utilLongBuffer_[4 - ranges[i].m_length_]; 3024 } 3025 if (maxCount < n) { 3026 return 0; 3027 } 3028 for (int i = 0; i < rangeCount; ++ i) { 3030 ranges[i].m_length2_ = ranges[i].m_length_; 3031 ranges[i].m_count2_ = ranges[i].m_count_; 3032 } 3033 while (true) { 3035 int minLength = ranges[0].m_length2_; 3037 Arrays.fill(m_utilCountBuffer_, 0); 3040 for (int i = 0; i < rangeCount; ++ i) { 3041 m_utilCountBuffer_[ranges[i].m_length2_] += ranges[i].m_count2_; 3042 } 3043 if (n <= m_utilCountBuffer_[minLength] 3045 + m_utilCountBuffer_[minLength + 1]) { 3046 maxCount = 0; 3048 rangeCount = 0; 3049 do { 3050 maxCount += ranges[rangeCount].m_count2_; 3051 ++ rangeCount; 3052 } while (n > maxCount); 3053 break; 3054 } 3055 else if (n <= ranges[0].m_count2_ * countBytes) { 3056 rangeCount = 1; 3059 long power_1 3062 = m_utilLongBuffer_[minLength - ranges[0].m_length_]; 3063 long power = power_1 * countBytes; 3064 int count2 = (int)((n + power - 1) / power); 3065 int count1 = ranges[0].m_count_ - count2; 3066 if (count1 < 1) { 3068 lengthenRange(ranges, 0, maxByte, countBytes); 3070 } 3071 else { 3072 rangeCount = 2; 3076 ranges[1].m_end_ = ranges[0].m_end_; 3077 ranges[1].m_length_ = ranges[0].m_length_; 3078 ranges[1].m_length2_ = minLength; 3079 int i = ranges[0].m_length_; 3081 int b = getWeightByte(ranges[0].m_start_, i) + count1 - 1; 3082 if (b <= maxByte) { 3085 ranges[0].m_end_ = setWeightByte(ranges[0].m_start_, i, 3086 b); 3087 } 3088 else { 3089 ranges[0].m_end_ = setWeightByte( 3090 incWeight(ranges[0].m_start_, i - 1, 3091 maxByte), 3092 i, b - countBytes); 3093 } 3094 b = (maxByte << 24) | (maxByte << 16) | (maxByte << 8) 3097 | maxByte; ranges[0].m_end_ = truncateWeight(ranges[0].m_end_, i) 3099 | (b >>> (i << 3)) 3100 & (b << ((4 - minLength) << 3)); 3101 ranges[1].m_start_ = incWeight(ranges[0].m_end_, minLength, 3104 maxByte); 3105 ranges[0].m_count_ = count1; 3107 ranges[1].m_count_ = count2; 3108 3109 ranges[0].m_count2_ = (int)(count1 * power_1); 3110 ranges[1].m_count2_ = (int)(count2 * power_1); 3112 3113 lengthenRange(ranges, 1, maxByte, countBytes); 3115 } 3116 break; 3117 } 3118 for (int i=0; ranges[i].m_length2_ == minLength; ++ i) { 3120 lengthenRange(ranges, i, maxByte, countBytes); 3121 } 3122 } 3123 3124 if (rangeCount > 1) { 3125 Arrays.sort(ranges, 0, rangeCount); 3127 } 3128 3129 ranges[0].m_count_ = maxByte; 3131 3132 return rangeCount; 3133 } 3134 3135 3143 private static final int lengthenRange(WeightRange range[], int offset, 3144 int maxByte, int countBytes) 3145 { 3146 int length = range[offset].m_length2_ + 1; 3147 range[offset].m_start_ = setWeightTrail(range[offset].m_start_, length, 3148 RuleBasedCollator.BYTE_FIRST_TAILORED_); 3149 range[offset].m_end_ = setWeightTrail(range[offset].m_end_, length, 3150 maxByte); 3151 range[offset].m_count2_ *= countBytes; 3152 range[offset].m_length2_ = length; 3153 return length; 3154 } 3155 3156 3163 private static final int setWeightTrail(int weight, int length, int trail) 3164 { 3165 length = (4 - length) << 3; 3166 return (weight & (0xffffff00 << length)) | (trail << length); 3167 } 3168 3169 3180 private int getWeightRanges(int lowerLimit, int upperLimit, int maxByte, 3181 int countBytes, WeightRange ranges[]) 3182 { 3183 int lowerLength = lengthOfWeight(lowerLimit); 3186 int upperLength = lengthOfWeight(upperLimit); 3187 if (Utility.compareUnsigned(lowerLimit, upperLimit) >= 0) { 3188 return 0; 3189 } 3190 if (lowerLength < upperLength) { 3192 if (lowerLimit == truncateWeight(upperLimit, lowerLength)) { 3193 return 0; 3194 } 3195 } 3196 3213 for(int length = 0; length < 5; length++) { 3216 m_utilLowerWeightRange_[length].clear(); 3217 m_utilUpperWeightRange_[length].clear(); 3218 } 3219 m_utilWeightRange_.clear(); 3220 3221 int weight = lowerLimit; 3222 for (int length = lowerLength; length >= 2; -- length) { 3223 m_utilLowerWeightRange_[length].clear(); 3224 int trail = getWeightByte(weight, length); 3225 if (trail < maxByte) { 3226 m_utilLowerWeightRange_[length].m_start_ 3227 = incWeightTrail(weight, length); 3228 m_utilLowerWeightRange_[length].m_end_ 3229 = setWeightTrail(weight, length, maxByte); 3230 m_utilLowerWeightRange_[length].m_length_ = length; 3231 m_utilLowerWeightRange_[length].m_count_ = maxByte - trail; 3232 } 3233 weight = truncateWeight(weight, length - 1); 3234 } 3235 m_utilWeightRange_.m_start_ = incWeightTrail(weight, 1); 3236 3237 weight = upperLimit; 3238 3241 for (int length = upperLength; length >= 2; length --) { 3242 int trail = getWeightByte(weight, length); 3243 if (trail > RuleBasedCollator.BYTE_FIRST_TAILORED_) { 3244 m_utilUpperWeightRange_[length].m_start_ 3245 = setWeightTrail(weight, length, 3246 RuleBasedCollator.BYTE_FIRST_TAILORED_); 3247 m_utilUpperWeightRange_[length].m_end_ 3248 = decWeightTrail(weight, length); 3249 m_utilUpperWeightRange_[length].m_length_ = length; 3250 m_utilUpperWeightRange_[length].m_count_ = trail 3251 - RuleBasedCollator.BYTE_FIRST_TAILORED_; 3252 } 3253 weight = truncateWeight(weight, length - 1); 3254 } 3255 m_utilWeightRange_.m_end_ = decWeightTrail(weight, 1); 3256 3257 m_utilWeightRange_.m_length_ = 1; 3259 if (Utility.compareUnsigned(m_utilWeightRange_.m_end_, m_utilWeightRange_.m_start_) >= 0) { 3260 m_utilWeightRange_.m_count_ 3262 = ((m_utilWeightRange_.m_end_ - m_utilWeightRange_.m_start_) 3263 >>> 24) + 1; 3264 } 3265 else { 3266 m_utilWeightRange_.m_count_ = 0; 3269 for (int length = 4; length >= 2; -- length) { 3271 if (m_utilLowerWeightRange_[length].m_count_ > 0 3272 && m_utilUpperWeightRange_[length].m_count_ > 0) { 3273 int start = m_utilUpperWeightRange_[length].m_start_; 3274 int end = m_utilLowerWeightRange_[length].m_end_; 3275 if (end >= start || incWeight(end, length, maxByte) 3276 == start) { 3277 start = m_utilLowerWeightRange_[length].m_start_; 3281 end = m_utilLowerWeightRange_[length].m_end_ 3282 = m_utilUpperWeightRange_[length].m_end_; 3283 m_utilLowerWeightRange_[length].m_count_ 3287 = getWeightByte(end, length) 3288 - getWeightByte(start, length) + 1 3289 + countBytes * (getWeightByte(end, length - 1) 3290 - getWeightByte(start, 3291 length - 1)); 3292 m_utilUpperWeightRange_[length].m_count_ = 0; 3293 while (-- length >= 2) { 3294 m_utilLowerWeightRange_[length].m_count_ 3295 = m_utilUpperWeightRange_[length].m_count_ = 0; 3296 } 3297 break; 3298 } 3299 } 3300 } 3301 } 3302 3303 int rangeCount = 0; 3305 if (m_utilWeightRange_.m_count_ > 0) { 3306 ranges[0] = new WeightRange(m_utilWeightRange_); 3307 rangeCount = 1; 3308 } 3309 for (int length = 2; length <= 4; ++ length) { 3310 if (m_utilUpperWeightRange_[length].m_count_ > 0) { 3313 ranges[rangeCount] 3314 = new WeightRange(m_utilUpperWeightRange_[length]); 3315 ++ rangeCount; 3316 } 3317 if (m_utilLowerWeightRange_[length].m_count_ > 0) { 3318 ranges[rangeCount] 3319 = new WeightRange(m_utilLowerWeightRange_[length]); 3320 ++ rangeCount; 3321 } 3322 } 3323 return rangeCount; 3324 } 3325 3326 3332 private static final int truncateWeight(int weight, int length) 3333 { 3334 return weight & (0xffffffff << ((4 - length) << 3)); 3335 } 3336 3337 3342 private static final int lengthOfWeight(int weight) 3343 { 3344 if ((weight & 0xffffff) == 0) { 3345 return 1; 3346 } 3347 else if ((weight & 0xffff) == 0) { 3348 return 2; 3349 } 3350 else if ((weight & 0xff) == 0) { 3351 return 3; 3352 } 3353 return 4; 3354 } 3355 3356 3362 private static final int incWeightTrail(int weight, int length) 3363 { 3364 return weight + (1 << ((4-length) << 3)); 3365 } 3366 3367 3373 private static int decWeightTrail(int weight, int length) 3374 { 3375 return weight - (1 << ((4 - length) << 3)); 3376 } 3377 3378 3384 private static int findCP(BasicContractionTable tbl, char codePoint) 3385 { 3386 int position = 0; 3387 while (codePoint > tbl.m_codePoints_.charAt(position)) { 3388 position ++; 3389 if (position > tbl.m_codePoints_.length()) { 3390 return -1; 3391 } 3392 } 3393 if (codePoint == tbl.m_codePoints_.charAt(position)) { 3394 return position; 3395 } 3396 else { 3397 return -1; 3398 } 3399 } 3400 3401 3408 private static int findCE(ContractionTable table, int element, char ch) 3409 { 3410 if (table == null) { 3411 return CE_NOT_FOUND_; 3412 } 3413 BasicContractionTable tbl = getBasicContractionTable(table, element); 3414 if (tbl == null) { 3415 return CE_NOT_FOUND_; 3416 } 3417 int position = findCP(tbl, ch); 3418 if (position > tbl.m_CEs_.size() || position < 0) { 3419 return CE_NOT_FOUND_; 3420 } 3421 return ((Integer )tbl.m_CEs_.get(position)).intValue(); 3422 } 3423 3424 3432 private static boolean isTailored(ContractionTable table, int element, 3433 char array[], int offset) 3434 { 3435 while (array[offset] != 0) { 3436 element = findCE(table, element, array[offset]); 3437 if (element == CE_NOT_FOUND_) { 3438 return false; 3439 } 3440 if (!isContractionTableElement(element)) { 3441 return true; 3442 } 3443 offset ++; 3444 } 3445 if (getCE(table, element, 0) != CE_NOT_FOUND_) { 3446 return true; 3447 } 3448 else { 3449 return false; 3450 } 3451 } 3452 3453 3458 private void assembleTable(BuildTable t, RuleBasedCollator collator) 3459 { 3460 IntTrieBuilder mapping = t.m_mapping_; 3461 Vector expansions = t.m_expansions_; 3462 ContractionTable contractions = t.m_contractions_; 3463 MaxExpansionTable maxexpansion = t.m_maxExpansions_; 3464 3465 collator.m_contractionOffset_ = 0; 3470 int contractionsSize = constructTable(contractions); 3471 3472 getMaxExpansionJamo(mapping, maxexpansion, t.m_maxJamoExpansions_, 3476 collator.m_isJamoSpecial_); 3477 3478 setAttributes(collator, t.m_options_); 3481 int size = expansions.size(); 3483 collator.m_expansion_ = new int[size]; 3484 for (int i = 0; i < size; i ++) { 3485 collator.m_expansion_[i] = ((Integer )expansions.get(i)).intValue(); 3486 } 3487 if (contractionsSize != 0) { 3489 collator.m_contractionIndex_ = new char[contractionsSize]; 3491 contractions.m_codePoints_.getChars(0, contractionsSize, 3492 collator.m_contractionIndex_, 3493 0); 3494 collator.m_contractionCE_ = new int[contractionsSize]; 3496 for (int i = 0; i < contractionsSize; i ++) { 3497 collator.m_contractionCE_[i] = ((Integer ) 3498 contractions.m_CEs_.get(i)).intValue(); 3499 } 3500 } 3501 collator.m_trie_ = mapping.serialize(t, 3503 RuleBasedCollator.DataManipulate.getInstance()); 3504 collator.m_expansionOffset_ = 0; 3510 size = maxexpansion.m_endExpansionCE_.size(); 3511 collator.m_expansionEndCE_ = new int[size - 1]; 3512 for (int i = 1; i < size; i ++) { 3513 collator.m_expansionEndCE_[i - 1] = ((Integer ) 3514 maxexpansion.m_endExpansionCE_.get(i)).intValue(); 3515 } 3516 collator.m_expansionEndCEMaxSize_ = new byte[size - 1]; 3517 for (int i = 1; i < size; i ++) { 3518 collator.m_expansionEndCEMaxSize_[i - 1] 3519 = ((Byte )maxexpansion.m_expansionCESize_.get(i)).byteValue(); 3520 } 3521 unsafeCPAddCCNZ(t); 3523 for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i ++) { 3525 t.m_unsafeCP_[i] |= RuleBasedCollator.UCA_.m_unsafe_[i]; 3526 } 3527 collator.m_unsafe_ = t.m_unsafeCP_; 3528 3529 for (int i = 0; i < UNSAFECP_TABLE_SIZE_; i ++) { 3533 t.m_contrEndCP_[i] |= RuleBasedCollator.UCA_.m_contractionEnd_[i]; 3534 } 3535 collator.m_contractionEnd_ = t.m_contrEndCP_; 3536 } 3537 3538 3543 private static final void setAttributes(RuleBasedCollator collator, 3544 CollationRuleParser.OptionSet option) 3545 { 3546 collator.latinOneFailed_ = true; 3547 collator.m_caseFirst_ = option.m_caseFirst_; 3548 collator.setDecomposition(option.m_decomposition_); 3549 collator.setAlternateHandlingShifted( 3550 option.m_isAlternateHandlingShifted_); 3551 collator.setCaseLevel(option.m_isCaseLevel_); 3552 collator.setFrenchCollation(option.m_isFrenchCollation_); 3553 collator.m_isHiragana4_ = option.m_isHiragana4_; 3554 collator.setStrength(option.m_strength_); 3555 collator.m_variableTopValue_ = option.m_variableTopValue_; 3556 collator.latinOneFailed_ = false; 3557 } 3558 3559 3564 private int constructTable(ContractionTable table) 3565 { 3566 int tsize = table.m_elements_.size(); 3568 if (tsize == 0) { 3569 return 0; 3570 } 3571 table.m_offsets_.clear(); 3572 int position = 0; 3573 for (int i = 0; i < tsize; i ++) { 3574 table.m_offsets_.add(new Integer (position)); 3575 position += ((BasicContractionTable) 3576 table.m_elements_.get(i)).m_CEs_.size(); 3577 } 3578 table.m_CEs_.clear(); 3579 table.m_codePoints_.delete(0, table.m_codePoints_.length()); 3580 StringBuffer cpPointer = table.m_codePoints_; 3582 Vector CEPointer = table.m_CEs_; 3583 for (int i = 0; i < tsize; i ++) { 3584 BasicContractionTable bct = (BasicContractionTable) 3585 table.m_elements_.get(i); 3586 int size = bct.m_CEs_.size(); 3587 char ccMax = 0; 3588 char ccMin = 255; 3589 int offset = CEPointer.size(); 3590 CEPointer.add(bct.m_CEs_.get(0)); 3591 for (int j = 1; j < size; j ++) { 3592 char ch = bct.m_codePoints_.charAt(j); 3593 char cc = (char)(UCharacter.getCombiningClass(ch) & 0xFF); 3594 if (cc > ccMax) { 3595 ccMax = cc; 3596 } 3597 if (cc < ccMin) { 3598 ccMin = cc; 3599 } 3600 cpPointer.append(ch); 3601 CEPointer.add(bct.m_CEs_.get(j)); 3602 } 3603 cpPointer.insert(offset, 3604 (char)(((ccMin == ccMax) ? 1 : 0 << 8) | ccMax)); 3605 for (int j = 0; j < size; j ++) { 3606 if (isContractionTableElement(((Integer ) 3607 CEPointer.get(offset + j)).intValue())) { 3608 int ce = ((Integer )CEPointer.get(offset + j)).intValue(); 3609 CEPointer.set(offset + j, 3610 new Integer (constructSpecialCE(getCETag(ce), 3611 ((Integer )table.m_offsets_.get( 3612 getContractionOffset(ce))).intValue()))); 3613 } 3614 } 3615 } 3616 3617 for (int i = 0; i <= 0x10FFFF; i ++) { 3618 int CE = table.m_mapping_.getValue(i); 3619 if (isContractionTableElement(CE)) { 3620 CE = constructSpecialCE(getCETag(CE), 3621 ((Integer )table.m_offsets_.get( 3622 getContractionOffset(CE))).intValue()); 3623 table.m_mapping_.setValue(i, CE); 3624 } 3625 } 3626 return position; 3627 } 3628 3629 3634 private static final int getContractionOffset(int ce) 3635 { 3636 return ce & 0xFFFFFF; 3637 } 3638 3639 3646 private static void getMaxExpansionJamo(IntTrieBuilder mapping, 3647 MaxExpansionTable maxexpansion, 3648 MaxJamoExpansionTable 3649 maxjamoexpansion, 3650 boolean jamospecial) 3651 { 3652 int VBASE = 0x1161; 3653 int TBASE = 0x11A8; 3654 int VCOUNT = 21; 3655 int TCOUNT = 28; 3656 int v = VBASE + VCOUNT - 1; 3657 int t = TBASE + TCOUNT - 1; 3658 3659 while (v >= VBASE) { 3660 int ce = mapping.getValue(v); 3661 if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) 3662 != RuleBasedCollator.CE_SPECIAL_FLAG_) { 3663 setMaxExpansion(ce, (byte)2, maxexpansion); 3664 } 3665 v --; 3666 } 3667 3668 while (t >= TBASE) 3669 { 3670 int ce = mapping.getValue(t); 3671 if ((ce & RuleBasedCollator.CE_SPECIAL_FLAG_) 3672 != RuleBasedCollator.CE_SPECIAL_FLAG_) { 3673 setMaxExpansion(ce, (byte)3, maxexpansion); 3674 } 3675 t --; 3676 } 3677 if (jamospecial) { 3679 int count = maxjamoexpansion.m_endExpansionCE_.size(); 3681 byte maxTSize = (byte)(maxjamoexpansion.m_maxLSize_ + 3682 maxjamoexpansion.m_maxVSize_ + 3683 maxjamoexpansion.m_maxTSize_); 3684 byte maxVSize = (byte)(maxjamoexpansion.m_maxLSize_ + 3685 maxjamoexpansion.m_maxVSize_); 3686 3687 while (count > 0) { 3688 count --; 3689 if (((Boolean )maxjamoexpansion.m_isV_.get(count)).booleanValue() 3690 == true) { 3691 setMaxExpansion(((Integer ) 3692 maxjamoexpansion.m_endExpansionCE_.get(count)).intValue(), 3693 maxVSize, maxexpansion); 3694 } 3695 else { 3696 setMaxExpansion(((Integer ) 3697 maxjamoexpansion.m_endExpansionCE_.get(count)).intValue(), 3698 maxTSize, maxexpansion); 3699 } 3700 } 3701 } 3702 } 3703 3704 3708 private static final void unsafeCPAddCCNZ(BuildTable t) 3709 { 3710 3711 for (char c = 0; c < 0xffff; c ++) { 3712 char fcd = NormalizerImpl.getFCD16(c); 3713 if (fcd >= 0x100 || (UTF16.isLeadSurrogate(c) && fcd != 0)) { 3715 unsafeCPSet(t.m_unsafeCP_, c); 3717 } 3718 } 3719 3720 if (t.m_prefixLookup_ != null) { 3721 Enumeration els = t.m_prefixLookup_.elements(); 3722 while (els.hasMoreElements()) { 3723 Elements e = (Elements)els.nextElement(); 3724 String comp = Normalizer.compose(e.m_cPoints_, false); 3732 unsafeCPSet(t.m_unsafeCP_, comp.charAt(0)); 3733 } 3734 } 3735 } 3736 3737 3747 private boolean enumCategoryRangeClosureCategory(BuildTable t, 3748 RuleBasedCollator collator, 3749 CollationElementIterator colEl, 3750 int start, int limit, int type) 3751 { 3752 if (type != UCharacterCategory.UNASSIGNED 3753 && type != UCharacterCategory.PRIVATE_USE) { 3754 3756 for (int u32 = start; u32 < limit; u32 ++) { 3757 int noOfDec = NormalizerImpl.getDecomposition(u32, false, 3758 m_utilCharBuffer_, 3759 0, 256); 3760 if (noOfDec > 0) { 3761 String comp = UCharacter.toString(u32); 3763 String decomp = new String (m_utilCharBuffer_, 0, noOfDec); 3764 if (!collator.equals(comp, decomp)) { 3765 m_utilElement_.m_cPoints_ = decomp; 3766 m_utilElement_.m_prefix_ = 0; 3767 Elements prefix 3768 = (Elements)t.m_prefixLookup_.get(m_utilElement_); 3769 if (prefix == null) { 3770 m_utilElement_.m_cPoints_ = comp; 3771 m_utilElement_.m_prefix_ = 0; 3772 m_utilElement_.m_prefixChars_ = null; 3773 colEl.setText(decomp); 3774 int ce = colEl.next(); 3775 m_utilElement_.m_CELength_ = 0; 3776 while (ce != CollationElementIterator.NULLORDER) { 3777 m_utilElement_.m_CEs_[ 3778 m_utilElement_.m_CELength_ ++] 3779 = ce; 3780 ce = colEl.next(); 3781 } 3782 } 3783 else { 3784 m_utilElement_.m_cPoints_ = comp; 3785 m_utilElement_.m_prefix_ = 0; 3786 m_utilElement_.m_prefixChars_ = null; 3787 m_utilElement_.m_CELength_ = 1; 3788 m_utilElement_.m_CEs_[0] = prefix.m_mapCE_; 3789 } 3797 addAnElement(t, m_utilElement_); 3798 } 3799 } 3800 } 3801 } 3802 return true; 3803 } 3804 3805 3810 private static final boolean isJamo(char ch) 3811 { 3812 return (ch >= 0x1100 && ch <= 0x1112) 3813 || (ch >= 0x1175 && ch <= 0x1161) 3814 || (ch >= 0x11A8 && ch <= 0x11C2); 3815 } 3816 3817 3820 private void canonicalClosure(BuildTable t) 3821 { 3822 BuildTable temp = new BuildTable(t); 3823 assembleTable(temp, temp.m_collator_); 3824 CollationElementIterator coleiter 3826 = temp.m_collator_.getCollationElementIterator(""); 3827 RangeValueIterator typeiter = UCharacter.getTypeIterator(); 3828 RangeValueIterator.Element element = new RangeValueIterator.Element(); 3829 while (typeiter.next(element)) { 3830 enumCategoryRangeClosureCategory(t, temp.m_collator_, coleiter, 3831 element.start, element.limit, 3832 element.value); 3833 } 3834 } 3835 3836 private void processUCACompleteIgnorables(BuildTable t) 3837 { 3838 TrieIterator trieiterator 3839 = new TrieIterator(RuleBasedCollator.UCA_.m_trie_); 3840 RangeValueIterator.Element element = new RangeValueIterator.Element(); 3841 while (trieiterator.next(element)) { 3842 int start = element.start; 3843 int limit = element.limit; 3844 if (element.value == 0) { 3845 while (start < limit) { 3846 int CE = t.m_mapping_.getValue(start); 3847 if (CE == CE_NOT_FOUND_) { 3848 m_utilElement_.m_prefix_ = 0; 3849 m_utilElement_.m_uchars_ = UCharacter.toString(start); 3850 m_utilElement_.m_cPoints_ = m_utilElement_.m_uchars_; 3851 m_utilElement_.m_cPointsOffset_ = 0; 3852 m_utilElement_.m_CELength_ = 1; 3853 m_utilElement_.m_CEs_[0] = 0; 3854 addAnElement(t, m_utilElement_); 3855 } 3856 start ++; 3857 } 3858 } 3859 } 3860 } 3861} 3862 | Popular Tags |