1 8 package com.ibm.icu.text; 9 10 import java.io.IOException ; 11 import java.text.CharacterIterator ; 12 import java.text.ParseException ; 13 import java.util.Arrays ; 14 import java.util.MissingResourceException ; 15 16 import com.ibm.icu.impl.ByteBuffer; 20 22 import com.ibm.icu.impl.BOCU; 23 import com.ibm.icu.impl.ICUDebug; 24 import com.ibm.icu.impl.ICUResourceBundle; 25 import com.ibm.icu.impl.ImplicitCEGenerator; 26 import com.ibm.icu.impl.IntTrie; 27 import com.ibm.icu.impl.StringUCharacterIterator; 28 import com.ibm.icu.impl.Trie; 29 import com.ibm.icu.impl.TrieIterator; 30 import com.ibm.icu.impl.Utility; 31 import com.ibm.icu.lang.UCharacter; 32 import com.ibm.icu.util.RangeValueIterator; 33 import com.ibm.icu.util.ULocale; 34 import com.ibm.icu.util.UResourceBundle; 35 import com.ibm.icu.util.VersionInfo; 36 37 193 public final class RuleBasedCollator extends Collator 194 { 195 197 214 public RuleBasedCollator(String rules) throws Exception 215 { 216 checkUCA(); 217 if (rules == null) { 218 throw new IllegalArgumentException ( 219 "Collation rules can not be null"); 220 } 221 init(rules); 222 } 223 224 226 231 public Object clone() throws CloneNotSupportedException 232 { 233 RuleBasedCollator result = (RuleBasedCollator)super.clone(); 234 if (latinOneCEs_ != null) { 235 result.m_reallocLatinOneCEs_ = true; 236 } 237 result.initUtility(false); return result; 242 } 243 244 249 public CollationElementIterator getCollationElementIterator(String source) 250 { 251 return new CollationElementIterator(source, this); 252 } 253 254 261 public CollationElementIterator getCollationElementIterator( 262 CharacterIterator source) 263 { 264 CharacterIterator newsource = (CharacterIterator )source.clone(); 265 return new CollationElementIterator(newsource, this); 266 } 267 268 275 public CollationElementIterator getCollationElementIterator( 276 UCharacterIterator source) 277 { 278 return new CollationElementIterator(source, this); 279 } 280 281 283 295 public void setHiraganaQuaternary(boolean flag) 296 { 297 m_isHiragana4_ = flag; 298 updateInternalState(); 299 } 300 301 309 public void setHiraganaQuaternaryDefault() 310 { 311 m_isHiragana4_ = m_defaultIsHiragana4_; 312 updateInternalState(); 313 } 314 315 330 public void setUpperCaseFirst(boolean upperfirst) 331 { 332 if (upperfirst) { 333 if(m_caseFirst_ != AttributeValue.UPPER_FIRST_) { 334 latinOneRegenTable_ = true; 335 } 336 m_caseFirst_ = AttributeValue.UPPER_FIRST_; 337 } 338 else { 339 if(m_caseFirst_ != AttributeValue.OFF_) { 340 latinOneRegenTable_ = true; 341 } 342 m_caseFirst_ = AttributeValue.OFF_; 343 } 344 updateInternalState(); 345 } 346 347 364 public void setLowerCaseFirst(boolean lowerfirst) 365 { 366 if (lowerfirst) { 367 if(m_caseFirst_ != AttributeValue.LOWER_FIRST_) { 368 latinOneRegenTable_ = true; 369 } 370 m_caseFirst_ = AttributeValue.LOWER_FIRST_; 371 } 372 else { 373 if(m_caseFirst_ != AttributeValue.OFF_) { 374 latinOneRegenTable_ = true; 375 } 376 m_caseFirst_ = AttributeValue.OFF_; 377 } 378 updateInternalState(); 379 } 380 381 392 public final void setCaseFirstDefault() 393 { 394 if(m_caseFirst_ != m_defaultCaseFirst_) { 395 latinOneRegenTable_ = true; 396 } 397 m_caseFirst_ = m_defaultCaseFirst_; 398 updateInternalState(); 399 } 400 401 409 public void setAlternateHandlingDefault() 410 { 411 m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_; 412 updateInternalState(); 413 } 414 415 423 public void setCaseLevelDefault() 424 { 425 m_isCaseLevel_ = m_defaultIsCaseLevel_; 426 updateInternalState(); 427 } 428 429 437 public void setDecompositionDefault() 438 { 439 setDecomposition(m_defaultDecomposition_); 440 updateInternalState(); 441 } 442 443 451 public void setFrenchCollationDefault() 452 { 453 if(m_isFrenchCollation_ != m_defaultIsFrenchCollation_) { 454 latinOneRegenTable_ = true; 455 } 456 m_isFrenchCollation_ = m_defaultIsFrenchCollation_; 457 updateInternalState(); 458 } 459 460 468 public void setStrengthDefault() 469 { 470 setStrength(m_defaultStrength_); 471 updateInternalState(); 472 } 473 474 483 public void setNumericCollationDefault() 484 { 485 setNumericCollation(m_defaultIsNumericCollation_); 486 updateInternalState(); 487 } 488 489 503 public void setFrenchCollation(boolean flag) 504 { 505 if(m_isFrenchCollation_ != flag) { 506 latinOneRegenTable_ = true; 507 } 508 m_isFrenchCollation_ = flag; 509 updateInternalState(); 510 } 511 512 533 public void setAlternateHandlingShifted(boolean shifted) 534 { 535 m_isAlternateHandlingShifted_ = shifted; 536 updateInternalState(); 537 } 538 539 561 public void setCaseLevel(boolean flag) 562 { 563 m_isCaseLevel_ = flag; 564 updateInternalState(); 565 } 566 567 586 public void setStrength(int newStrength) 587 { 588 super.setStrength(newStrength); 589 updateInternalState(); 590 } 591 592 619 public int setVariableTop(String varTop) 620 { 621 if (varTop == null || varTop.length() == 0) { 622 throw new IllegalArgumentException ( 623 "Variable top argument string can not be null or zero in length."); 624 } 625 if (m_srcUtilIter_ == null) { 626 initUtility(true); 627 } 628 629 m_srcUtilColEIter_.setText(varTop); 630 int ce = m_srcUtilColEIter_.next(); 631 632 if (m_srcUtilColEIter_.getOffset() != varTop.length() 636 || ce == CollationElementIterator.NULLORDER) { 637 throw new IllegalArgumentException ( 638 "Variable top argument string is a contraction that does not exist " 639 + "in the Collation order"); 640 } 641 642 int nextCE = m_srcUtilColEIter_.next(); 643 644 if ((nextCE != CollationElementIterator.NULLORDER) 645 && (!isContinuation(nextCE) || (nextCE & CE_PRIMARY_MASK_) != 0)) { 646 throw new IllegalArgumentException ( 647 "Variable top argument string can only have a single collation " 648 + "element that has less than or equal to two PRIMARY strength " 649 + "bytes"); 650 } 651 652 m_variableTopValue_ = (ce & CE_PRIMARY_MASK_) >> 16; 653 654 return ce & CE_PRIMARY_MASK_; 655 } 656 657 667 public void setVariableTop(int varTop) 668 { 669 m_variableTopValue_ = (varTop & CE_PRIMARY_MASK_) >> 16; 670 } 671 672 681 public void setNumericCollation(boolean flag) 682 { 683 m_isNumericCollation_ = flag; 685 updateInternalState(); 686 } 687 688 690 697 public String getRules() 698 { 699 return m_rules_; 700 } 701 702 712 public String getRules(boolean fullrules) 713 { 714 if (!fullrules) { 715 return m_rules_; 716 } 717 return UCA_.m_rules_.concat(m_rules_); 719 } 720 721 731 public UnicodeSet getTailoredSet() 732 { 733 try { 734 CollationRuleParser src = new CollationRuleParser(getRules()); 735 return src.getTailoredSet(); 736 } catch(Exception e) { 737 throw new IllegalStateException ("A tailoring rule should not " + 738 "have errors. Something is quite wrong!"); 739 } 740 } 741 742 private class contContext { 743 RuleBasedCollator coll; 744 UnicodeSet contractions; 745 UnicodeSet expansions; 746 UnicodeSet removedContractions; 747 boolean addPrefixes; 748 contContext(RuleBasedCollator coll, UnicodeSet contractions, UnicodeSet expansions, 749 UnicodeSet removedContractions, boolean addPrefixes) { 750 this.coll = coll; 751 this.contractions = contractions; 752 this.expansions = expansions; 753 this.removedContractions = removedContractions; 754 this.addPrefixes = addPrefixes; 755 } 756 } 757 758 private void 759 addSpecial(contContext c, StringBuffer buffer, int CE) 760 { 761 StringBuffer b = new StringBuffer (); 762 int offset = (CE & 0xFFFFFF) - c.coll.m_contractionOffset_; 763 int newCE = c.coll.m_contractionCE_[offset]; 764 if(newCE != CollationElementIterator.CE_NOT_FOUND_) { 766 if(isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_ 767 && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_ 768 && c.addPrefixes) { 769 addSpecial(c, buffer, newCE); 770 } 771 if(buffer.length() > 1) { 772 if(c.contractions != null) { 773 c.contractions.add(buffer.toString()); 774 } 775 if(c.expansions != null && isSpecial(CE) && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) { 776 c.expansions.add(buffer.toString()); 777 } 778 } 779 } 780 781 offset++; 782 if(getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) { 784 while(c.coll.m_contractionIndex_[offset] != 0xFFFF) { 785 b.delete(0, b.length()); 786 b.append(buffer); 787 newCE = c.coll.m_contractionCE_[offset]; 788 b.insert(0, c.coll.m_contractionIndex_[offset]); 789 if(isSpecial(newCE) && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) { 790 addSpecial(c, b, newCE); 791 } else { 792 if(c.contractions != null) { 793 c.contractions.add(b.toString()); 794 } 795 if(c.expansions != null && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) { 796 c.expansions.add(b.toString()); 797 } 798 } 799 offset++; 800 } 801 } else if(getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_) { 802 while(c.coll.m_contractionIndex_[offset] != 0xFFFF) { 803 b.delete(0, b.length()); 804 b.append(buffer); 805 newCE = c.coll.m_contractionCE_[offset]; 806 b.append(c.coll.m_contractionIndex_[offset]); 807 if(isSpecial(newCE) && (getTag(newCE) == CollationElementIterator.CE_CONTRACTION_TAG_ || getTag(newCE) == CollationElementIterator.CE_SPEC_PROC_TAG_)) { 808 addSpecial(c, b, newCE); 809 } else { 810 if(c.contractions != null) { 811 c.contractions.add(b.toString()); 812 } 813 if(c.expansions != null && isSpecial(newCE) && getTag(newCE) == CollationElementIterator.CE_EXPANSION_TAG_) { 814 c.expansions.add(b.toString()); 815 } 816 } 817 offset++; 818 } 819 } 820 } 821 822 private 823 void processSpecials(contContext c) 824 { 825 int internalBufferSize = 512; 826 TrieIterator trieiterator 827 = new TrieIterator(c.coll.m_trie_); 828 RangeValueIterator.Element element = new RangeValueIterator.Element(); 829 while (trieiterator.next(element)) { 830 int start = element.start; 831 int limit = element.limit; 832 int CE = element.value; 833 StringBuffer contraction = new StringBuffer (internalBufferSize); 834 835 if(isSpecial(CE)) { 836 if(((getTag(CE) == CollationElementIterator.CE_SPEC_PROC_TAG_ && c.addPrefixes) || getTag(CE) == CollationElementIterator.CE_CONTRACTION_TAG_)) { 837 while(start < limit) { 838 if(c.removedContractions != null && c.removedContractions.contains(start)) { 841 start++; 842 continue; 843 } 844 contraction.append((char) start); 847 addSpecial(c, contraction, CE); 848 start++; 849 } 850 } else if(c.expansions != null && getTag(CE) == CollationElementIterator.CE_EXPANSION_TAG_) { 851 while(start < limit) { 852 c.expansions.add(start++); 853 } 854 } 855 } 856 } 857 } 858 859 868 public void 869 getContractionsAndExpansions(UnicodeSet contractions, UnicodeSet expansions, 870 boolean addPrefixes) throws Exception { 871 if(contractions != null) { 872 contractions.clear(); 873 } 874 if(expansions != null) { 875 expansions.clear(); 876 } 877 int rulesLen = 0; 878 String rules = getRules(); 879 try { 880 CollationRuleParser src = new CollationRuleParser(rules); 881 contContext c = new contContext(RuleBasedCollator.UCA_, 882 contractions, expansions, src.m_removeSet_, addPrefixes); 883 884 processSpecials(c); 886 c.coll = this; 888 c.removedContractions = null; 889 processSpecials(c); 890 } catch (Exception e) { 891 throw e; 892 } 893 } 894 895 920 public CollationKey getCollationKey(String source) { 921 if (source == null) { 922 return null; 923 } 924 m_utilRawCollationKey_ = getRawCollationKey(source, 925 m_utilRawCollationKey_); 926 return new CollationKey(source, m_utilRawCollationKey_); 927 } 928 929 946 public RawCollationKey getRawCollationKey(String source, 947 RawCollationKey key) 948 { 949 if (source == null) { 950 return null; 951 } 952 int strength = getStrength(); 953 m_utilCompare0_ = m_isCaseLevel_; 954 m_utilCompare1_ = true; 955 m_utilCompare2_ = strength >= SECONDARY; 956 m_utilCompare3_ = strength >= TERTIARY; 957 m_utilCompare4_ = strength >= QUATERNARY; 958 m_utilCompare5_ = strength == IDENTICAL; 959 960 m_utilBytesCount0_ = 0; 961 m_utilBytesCount1_ = 0; 962 m_utilBytesCount2_ = 0; 963 m_utilBytesCount3_ = 0; 964 m_utilBytesCount4_ = 0; 965 m_utilBytesCount5_ = 0; 966 m_utilCount0_ = 0; 967 m_utilCount1_ = 0; 968 m_utilCount2_ = 0; 969 m_utilCount3_ = 0; 970 m_utilCount4_ = 0; 971 m_utilCount5_ = 0; 972 boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_; 973 int commonBottom4 = ((m_variableTopValue_ >>> 8) + 1) & LAST_BYTE_MASK_; 977 byte hiragana4 = 0; 978 if (m_isHiragana4_ && m_utilCompare4_) { 979 hiragana4 = (byte)commonBottom4; 981 commonBottom4 ++; 982 } 983 984 int bottomCount4 = 0xFF - commonBottom4; 985 if (m_utilCompare5_ && Normalizer.quickCheck(source, Normalizer.NFD,0) 987 != Normalizer.YES) { 988 source = Normalizer.decompose(source, false); 992 } 993 else if (getDecomposition() != NO_DECOMPOSITION 994 && Normalizer.quickCheck(source, Normalizer.FCD,0) 995 != Normalizer.YES) { 996 source = Normalizer.normalize(source,Normalizer.FCD); 999 } 1000 getSortKeyBytes(source, doFrench, hiragana4, commonBottom4, 1001 bottomCount4); 1002 if (key == null) { 1003 key = new RawCollationKey(); 1004 } 1005 getSortKey(source, doFrench, commonBottom4, bottomCount4, key); 1006 return key; 1007 } 1008 1009 1020 public boolean isUpperCaseFirst() 1021 { 1022 return (m_caseFirst_ == AttributeValue.UPPER_FIRST_); 1023 } 1024 1025 1036 public boolean isLowerCaseFirst() 1037 { 1038 return (m_caseFirst_ == AttributeValue.LOWER_FIRST_); 1039 } 1040 1041 1053 public boolean isAlternateHandlingShifted() 1054 { 1055 return m_isAlternateHandlingShifted_; 1056 } 1057 1058 1067 public boolean isCaseLevel() 1068 { 1069 return m_isCaseLevel_; 1070 } 1071 1072 1080 public boolean isFrenchCollation() 1081 { 1082 return m_isFrenchCollation_; 1083 } 1084 1085 1093 public boolean isHiraganaQuaternary() 1094 { 1095 return m_isHiragana4_; 1096 } 1097 1098 1105 public int getVariableTop() 1106 { 1107 return m_variableTopValue_ << 16; 1108 } 1109 1110 1120 public boolean getNumericCollation() 1121 { 1122 return m_isNumericCollation_; 1123 } 1124 1125 1127 1136 public boolean equals(Object obj) 1137 { 1138 if (obj == null) { 1139 return false; } 1141 if (this == obj) { 1142 return true; 1143 } 1144 if (getClass() != obj.getClass()) { 1145 return false; 1146 } 1147 RuleBasedCollator other = (RuleBasedCollator)obj; 1148 if (getStrength() != other.getStrength() 1150 || getDecomposition() != other.getDecomposition() 1151 || other.m_caseFirst_ != m_caseFirst_ 1152 || other.m_caseSwitch_ != m_caseSwitch_ 1153 || other.m_isAlternateHandlingShifted_ 1154 != m_isAlternateHandlingShifted_ 1155 || other.m_isCaseLevel_ != m_isCaseLevel_ 1156 || other.m_isFrenchCollation_ != m_isFrenchCollation_ 1157 || other.m_isHiragana4_ != m_isHiragana4_) { 1158 return false; 1159 } 1160 boolean rules = m_rules_ == other.m_rules_; 1161 if (!rules && (m_rules_ != null && other.m_rules_ != null)) { 1162 rules = m_rules_.equals(other.m_rules_); 1163 } 1164 if (!rules || !ICUDebug.enabled("collation")) { 1165 return rules; 1166 } 1167 if (m_addition3_ != other.m_addition3_ 1168 || m_bottom3_ != other.m_bottom3_ 1169 || m_bottomCount3_ != other.m_bottomCount3_ 1170 || m_common3_ != other.m_common3_ 1171 || m_isSimple3_ != other.m_isSimple3_ 1172 || m_mask3_ != other.m_mask3_ 1173 || m_minContractionEnd_ != other.m_minContractionEnd_ 1174 || m_minUnsafe_ != other.m_minUnsafe_ 1175 || m_top3_ != other.m_top3_ 1176 || m_topCount3_ != other.m_topCount3_ 1177 || !Arrays.equals(m_unsafe_, other.m_unsafe_)) { 1178 return false; 1179 } 1180 if (!m_trie_.equals(other.m_trie_)) { 1181 for (int i = UCharacter.MAX_VALUE; i >= UCharacter.MIN_VALUE; i --) 1184 { 1185 int v = m_trie_.getCodePointValue(i); 1186 int otherv = other.m_trie_.getCodePointValue(i); 1187 if (v != otherv) { 1188 int mask = v & (CE_TAG_MASK_ | CE_SPECIAL_FLAG_); 1189 if (mask == (otherv & 0xff000000)) { 1190 v &= 0xffffff; 1191 otherv &= 0xffffff; 1192 if (mask == 0xf1000000) { 1193 v -= (m_expansionOffset_ << 4); 1194 otherv -= (other.m_expansionOffset_ << 4); 1195 } 1196 else if (mask == 0xf2000000) { 1197 v -= m_contractionOffset_; 1198 otherv -= other.m_contractionOffset_; 1199 } 1200 if (v == otherv) { 1201 continue; 1202 } 1203 } 1204 return false; 1205 } 1206 } 1207 } 1208 if (Arrays.equals(m_contractionCE_, other.m_contractionCE_) 1209 && Arrays.equals(m_contractionEnd_, other.m_contractionEnd_) 1210 && Arrays.equals(m_contractionIndex_, other.m_contractionIndex_) 1211 && Arrays.equals(m_expansion_, other.m_expansion_) 1212 && Arrays.equals(m_expansionEndCE_, other.m_expansionEndCE_)) { 1213 for (int i = 0; i < m_expansionEndCE_.length; i ++) { 1215 if (m_expansionEndCEMaxSize_[i] 1216 != other.m_expansionEndCEMaxSize_[i]) { 1217 return false; 1218 } 1219 return true; 1220 } 1221 } 1222 return false; 1223 } 1224 1225 1230 public int hashCode() 1231 { 1232 String rules = getRules(); 1233 if (rules == null) { 1234 rules = ""; 1235 } 1236 return rules.hashCode(); 1237 } 1238 1239 1272 public int compare(String source, String target) 1273 { 1274 if (source == target) { 1275 return 0; 1276 } 1277 1278 int offset = getFirstUnmatchedOffset(source, target); 1280 if(latinOneUse_) { 1282 if ((offset < source.length() 1283 && source.charAt(offset) > ENDOFLATINONERANGE_) 1284 || (offset < target.length() 1285 && target.charAt(offset) > ENDOFLATINONERANGE_)) { 1286 return compareRegular(source, target, offset); 1288 } else { 1289 return compareUseLatin1(source, target, offset); 1290 } 1291 } else { 1292 return compareRegular(source, target, offset); 1293 } 1294 } 1295 1296 1298 1301 static interface AttributeValue 1302 { 1303 1307 static final int DEFAULT_ = -1; 1308 1311 static final int PRIMARY_ = Collator.PRIMARY; 1312 1315 static final int SECONDARY_ = Collator.SECONDARY; 1316 1319 static final int TERTIARY_ = Collator.TERTIARY; 1320 1323 static final int DEFAULT_STRENGTH_ = Collator.TERTIARY; 1324 1327 static final int CE_STRENGTH_LIMIT_ = Collator.TERTIARY + 1; 1328 1331 static final int QUATERNARY_ = 3; 1332 1335 static final int IDENTICAL_ = Collator.IDENTICAL; 1336 1339 static final int STRENGTH_LIMIT_ = Collator.IDENTICAL + 1; 1340 1344 static final int OFF_ = 16; 1345 1349 static final int ON_ = 17; 1350 1353 static final int SHIFTED_ = 20; 1354 1358 static final int NON_IGNORABLE_ = 21; 1359 1362 static final int LOWER_FIRST_ = 24; 1363 1366 static final int UPPER_FIRST_ = 25; 1367 1370 static final int LIMIT_ = 29; 1371 } 1372 1373 1377 static interface Attribute 1378 { 1379 1385 static final int FRENCH_COLLATION_ = 0; 1386 1394 static final int ALTERNATE_HANDLING_ = 1; 1395 1402 static final int CASE_FIRST_ = 2; 1403 1412 static final int CASE_LEVEL_ = 3; 1413 1423 static final int NORMALIZATION_MODE_ = 4; 1424 1436 static final int STRENGTH_ = 5; 1437 1442 static final int HIRAGANA_QUATERNARY_MODE_ = 6; 1443 1446 static final int LIMIT_ = 7; 1447 } 1448 1449 1452 static class DataManipulate implements Trie.DataManipulate 1453 { 1454 1456 1463 public final int getFoldingOffset(int ce) 1464 { 1465 if (isSpecial(ce) && getTag(ce) == CE_SURROGATE_TAG_) { 1466 return (ce & 0xFFFFFF); 1467 } 1468 return 0; 1469 } 1470 1471 1474 public static final DataManipulate getInstance() 1475 { 1476 if (m_instance_ == null) { 1477 m_instance_ = new DataManipulate(); 1478 } 1479 return m_instance_; 1480 } 1481 1482 1484 1487 private static DataManipulate m_instance_; 1488 1489 1491 1494 private DataManipulate() 1495 { 1496 } 1497 } 1498 1499 1502 static final class UCAConstants 1503 { 1504 int FIRST_TERTIARY_IGNORABLE_[] = new int[2]; int LAST_TERTIARY_IGNORABLE_[] = new int[2]; int FIRST_PRIMARY_IGNORABLE_[] = new int[2]; int FIRST_SECONDARY_IGNORABLE_[] = new int[2]; int LAST_SECONDARY_IGNORABLE_[] = new int[2]; int LAST_PRIMARY_IGNORABLE_[] = new int[2]; int FIRST_VARIABLE_[] = new int[2]; int LAST_VARIABLE_[] = new int[2]; int FIRST_NON_VARIABLE_[] = new int[2]; int LAST_NON_VARIABLE_[] = new int[2]; int RESET_TOP_VALUE_[] = new int[2]; int FIRST_IMPLICIT_[] = new int[2]; 1516 int LAST_IMPLICIT_[] = new int[2]; 1517 int FIRST_TRAILING_[] = new int[2]; 1518 int LAST_TRAILING_[] = new int[2]; 1519 int PRIMARY_TOP_MIN_; 1520 int PRIMARY_IMPLICIT_MIN_; int PRIMARY_IMPLICIT_MAX_; int PRIMARY_TRAILING_MIN_; int PRIMARY_TRAILING_MAX_; int PRIMARY_SPECIAL_MIN_; int PRIMARY_SPECIAL_MAX_; } 1527 1528 1530 static final byte BYTE_FIRST_TAILORED_ = (byte)0x04; 1531 static final byte BYTE_COMMON_ = (byte)0x05; 1532 static final int COMMON_TOP_2_ = 0x86; static final int COMMON_BOTTOM_2_ = BYTE_COMMON_; 1534 1537 static final int CE_CASE_BIT_MASK_ = 0xC0; 1538 static final int CE_TAG_SHIFT_ = 24; 1539 static final int CE_TAG_MASK_ = 0x0F000000; 1540 1541 static final int CE_SPECIAL_FLAG_ = 0xF0000000; 1542 1545 static final int CE_SURROGATE_TAG_ = 5; 1546 1549 static final int CE_PRIMARY_MASK_ = 0xFFFF0000; 1550 1553 static final int CE_SECONDARY_MASK_ = 0xFF00; 1554 1557 static final int CE_TERTIARY_MASK_ = 0xFF; 1558 1561 static final int CE_PRIMARY_SHIFT_ = 16; 1562 1565 static final int CE_SECONDARY_SHIFT_ = 8; 1566 1569 static final int CE_CONTINUATION_MARKER_ = 0xC0; 1570 1571 1578 int m_expansionOffset_; 1579 1586 int m_contractionOffset_; 1587 1590 boolean m_isJamoSpecial_; 1591 1592 1594 int m_defaultVariableTopValue_; 1595 boolean m_defaultIsFrenchCollation_; 1596 boolean m_defaultIsAlternateHandlingShifted_; 1597 int m_defaultCaseFirst_; 1598 boolean m_defaultIsCaseLevel_; 1599 int m_defaultDecomposition_; 1600 int m_defaultStrength_; 1601 boolean m_defaultIsHiragana4_; 1602 boolean m_defaultIsNumericCollation_; 1603 1604 1607 int m_variableTopValue_; 1608 1611 boolean m_isHiragana4_; 1612 1615 int m_caseFirst_; 1616 1619 boolean m_isNumericCollation_; 1620 1621 1623 1626 int m_expansion_[]; 1627 1630 char m_contractionIndex_[]; 1631 1634 int m_contractionCE_[]; 1635 1638 IntTrie m_trie_; 1639 1643 int m_expansionEndCE_[]; 1644 1649 byte m_expansionEndCEMaxSize_[]; 1650 1658 byte m_unsafe_[]; 1659 1663 byte m_contractionEnd_[]; 1664 1667 String m_rules_; 1668 1671 char m_minUnsafe_; 1672 1675 char m_minContractionEnd_; 1676 1679 VersionInfo m_version_; 1680 1683 VersionInfo m_UCA_version_; 1684 1687 VersionInfo m_UCD_version_; 1688 1689 1692 static final RuleBasedCollator UCA_; 1693 1696 static final UCAConstants UCA_CONSTANTS_; 1697 1700 static final char UCA_CONTRACTIONS_[]; 1701 1702 private static boolean UCA_INIT_COMPLETE; 1703 1704 1707 static final ImplicitCEGenerator impCEGen_; 1708 static final byte SORT_LEVEL_TERMINATOR_ = 1; 1728 1729 static final int maxRegularPrimary = 0xA0; 1734 static final int minImplicitPrimary = 0xE0; 1735 static final int maxImplicitPrimary = 0xE4; 1736 1737 1738 static 1740 { 1741 1745 RuleBasedCollator iUCA_ = null; 1746 UCAConstants iUCA_CONSTANTS_ = null; 1747 char iUCA_CONTRACTIONS_[] = null; 1748 ImplicitCEGenerator iimpCEGen_ = null; 1749 try 1750 { 1751 iUCA_ = new RuleBasedCollator(); 1757 iUCA_CONSTANTS_ = new UCAConstants(); 1758 iUCA_CONTRACTIONS_ = CollatorReader.read(iUCA_, iUCA_CONSTANTS_); 1759 1760 iimpCEGen_ = new ImplicitCEGenerator(minImplicitPrimary, maxImplicitPrimary); 1762 iUCA_.init(); 1764 ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_COLLATION_BASE_NAME, ULocale.ENGLISH); 1765 iUCA_.m_rules_ = (String )rb.getObject("UCARules"); 1766 } 1767 catch (MissingResourceException ex) 1768 { 1769 } 1771 catch (IOException e) 1772 { 1773 } 1776 1777 UCA_ = iUCA_; 1778 UCA_CONSTANTS_ = iUCA_CONSTANTS_; 1779 UCA_CONTRACTIONS_ = iUCA_CONTRACTIONS_; 1780 impCEGen_ = iimpCEGen_; 1781 1782 UCA_INIT_COMPLETE = true; 1783 } 1784 1785 1786 private static void checkUCA() throws MissingResourceException { 1787 if (UCA_INIT_COMPLETE && UCA_ == null) { 1788 throw new MissingResourceException ("Collator UCA data unavailable", "", ""); 1789 } 1790 } 1791 1792 1794 1803 RuleBasedCollator() 1804 { 1805 checkUCA(); 1806 initUtility(false); 1807 } 1808 1809 1815 RuleBasedCollator(ULocale locale) 1816 { 1817 checkUCA(); 1818 ICUResourceBundle rb = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_COLLATION_BASE_NAME, locale); 1819 initUtility(false); 1820 if (rb != null) { 1821 try { 1822 String collkey = locale.getKeywordValue("collation"); 1824 if(collkey == null) { 1825 collkey = rb.getStringWithFallback("collations/default"); 1826 } 1827 1828 ICUResourceBundle elements = rb.getWithFallback("collations/" + collkey); 1832 if (elements != null) { 1833 ULocale uloc = rb.getULocale(); 1835 setLocale(uloc, uloc); 1836 1837 m_rules_ = elements.getString("Sequence"); 1838 ByteBuffer buf = elements.get("%%CollationBin").getBinary(); 1839 if(buf!=null){ 1841 byte map[] = buf.array(); 1843 CollatorReader.initRBC(this, map); 1844 1860 if(!m_UCA_version_.equals(UCA_.m_UCA_version_) || 1864 !m_UCD_version_.equals(UCA_.m_UCD_version_)) { 1865 init(m_rules_); 1866 return; 1867 } 1868 init(); 1869 return; 1870 } 1871 else { 1872 1876 init(m_rules_); 1877 return; 1878 } 1879 } 1880 } 1881 catch (Exception e) { 1882 } 1885 } 1886 setWithUCAData(); 1887 } 1888 1889 1891 1895 final void setWithUCATables() 1896 { 1897 m_contractionOffset_ = UCA_.m_contractionOffset_; 1898 m_expansionOffset_ = UCA_.m_expansionOffset_; 1899 m_expansion_ = UCA_.m_expansion_; 1900 m_contractionIndex_ = UCA_.m_contractionIndex_; 1901 m_contractionCE_ = UCA_.m_contractionCE_; 1902 m_trie_ = UCA_.m_trie_; 1903 m_expansionEndCE_ = UCA_.m_expansionEndCE_; 1904 m_expansionEndCEMaxSize_ = UCA_.m_expansionEndCEMaxSize_; 1905 m_unsafe_ = UCA_.m_unsafe_; 1906 m_contractionEnd_ = UCA_.m_contractionEnd_; 1907 m_minUnsafe_ = UCA_.m_minUnsafe_; 1908 m_minContractionEnd_ = UCA_.m_minContractionEnd_; 1909 } 1910 1911 1914 final void setWithUCAData() 1915 { 1916 latinOneFailed_ = true; 1917 1918 m_addition3_ = UCA_.m_addition3_; 1919 m_bottom3_ = UCA_.m_bottom3_; 1920 m_bottomCount3_ = UCA_.m_bottomCount3_; 1921 m_caseFirst_ = UCA_.m_caseFirst_; 1922 m_caseSwitch_ = UCA_.m_caseSwitch_; 1923 m_common3_ = UCA_.m_common3_; 1924 m_contractionOffset_ = UCA_.m_contractionOffset_; 1925 setDecomposition(UCA_.getDecomposition()); 1926 m_defaultCaseFirst_ = UCA_.m_defaultCaseFirst_; 1927 m_defaultDecomposition_ = UCA_.m_defaultDecomposition_; 1928 m_defaultIsAlternateHandlingShifted_ 1929 = UCA_.m_defaultIsAlternateHandlingShifted_; 1930 m_defaultIsCaseLevel_ = UCA_.m_defaultIsCaseLevel_; 1931 m_defaultIsFrenchCollation_ = UCA_.m_defaultIsFrenchCollation_; 1932 m_defaultIsHiragana4_ = UCA_.m_defaultIsHiragana4_; 1933 m_defaultStrength_ = UCA_.m_defaultStrength_; 1934 m_defaultVariableTopValue_ = UCA_.m_defaultVariableTopValue_; 1935 m_defaultIsNumericCollation_ = UCA_.m_defaultIsNumericCollation_; 1936 m_expansionOffset_ = UCA_.m_expansionOffset_; 1937 m_isAlternateHandlingShifted_ = UCA_.m_isAlternateHandlingShifted_; 1938 m_isCaseLevel_ = UCA_.m_isCaseLevel_; 1939 m_isFrenchCollation_ = UCA_.m_isFrenchCollation_; 1940 m_isHiragana4_ = UCA_.m_isHiragana4_; 1941 m_isJamoSpecial_ = UCA_.m_isJamoSpecial_; 1942 m_isSimple3_ = UCA_.m_isSimple3_; 1943 m_mask3_ = UCA_.m_mask3_; 1944 m_minContractionEnd_ = UCA_.m_minContractionEnd_; 1945 m_minUnsafe_ = UCA_.m_minUnsafe_; 1946 m_rules_ = UCA_.m_rules_; 1947 setStrength(UCA_.getStrength()); 1948 m_top3_ = UCA_.m_top3_; 1949 m_topCount3_ = UCA_.m_topCount3_; 1950 m_variableTopValue_ = UCA_.m_variableTopValue_; 1951 m_isNumericCollation_ = UCA_.m_isNumericCollation_; 1952 setWithUCATables(); 1953 latinOneFailed_ = false; 1954 } 1955 1956 1966 final boolean isUnsafe(char ch) 1967 { 1968 if (ch < m_minUnsafe_) { 1969 return false; 1970 } 1971 1972 if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) { 1973 if (UTF16.isLeadSurrogate(ch) 1974 || UTF16.isTrailSurrogate(ch)) { 1975 return true; 1977 } 1978 ch &= HEURISTIC_OVERFLOW_MASK_; 1979 ch += HEURISTIC_OVERFLOW_OFFSET_; 1980 } 1981 int value = m_unsafe_[ch >> HEURISTIC_SHIFT_]; 1982 return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0; 1983 } 1984 1985 1991 final boolean isContractionEnd(char ch) 1992 { 1993 if (UTF16.isTrailSurrogate(ch)) { 1994 return true; 1995 } 1996 1997 if (ch < m_minContractionEnd_) { 1998 return false; 1999 } 2000 2001 if (ch >= (HEURISTIC_SIZE_ << HEURISTIC_SHIFT_)) { 2002 ch &= HEURISTIC_OVERFLOW_MASK_; 2003 ch += HEURISTIC_OVERFLOW_OFFSET_; 2004 } 2005 int value = m_contractionEnd_[ch >> HEURISTIC_SHIFT_]; 2006 return ((value >> (ch & HEURISTIC_MASK_)) & 1) != 0; 2007 } 2008 2009 2014 static int getTag(int ce) 2015 { 2016 return (ce & CE_TAG_MASK_) >> CE_TAG_SHIFT_; 2017 } 2018 2019 2024 static boolean isSpecial(int ce) 2025 { 2026 return (ce & CE_SPECIAL_FLAG_) == CE_SPECIAL_FLAG_; 2027 } 2028 2029 2034 static final boolean isContinuation(int ce) 2035 { 2036 return ce != CollationElementIterator.NULLORDER 2037 && (ce & CE_CONTINUATION_TAG_) == CE_CONTINUATION_TAG_; 2038 } 2039 2040 2042 2044 2049 private static final int DEFAULT_MIN_HEURISTIC_ = 0x300; 2050 2055 private static final char HEURISTIC_SIZE_ = 1056; 2056 2060 private static final char HEURISTIC_OVERFLOW_MASK_ = 0x1fff; 2061 2064 private static final int HEURISTIC_SHIFT_ = 3; 2065 2069 private static final char HEURISTIC_OVERFLOW_OFFSET_ = 256; 2070 2073 private static final char HEURISTIC_MASK_ = 7; 2074 2075 private int m_caseSwitch_; 2076 private int m_common3_; 2077 private int m_mask3_; 2078 2081 private int m_addition3_; 2082 2085 private int m_top3_; 2086 2089 private int m_bottom3_; 2090 private int m_topCount3_; 2091 private int m_bottomCount3_; 2092 2095 private static final int CASE_SWITCH_ = 0xC0; 2096 private static final int NO_CASE_SWITCH_ = 0; 2097 2100 private static final int CE_REMOVE_CASE_ = 0x3F; 2101 private static final int CE_KEEP_CASE_ = 0xFF; 2102 2105 private static final int CE_CASE_MASK_3_ = 0xFF; 2106 2109 private static final double PROPORTION_2_ = 0.5; 2110 private static final double PROPORTION_3_ = 0.667; 2111 2112 2114 2118 private static final byte BYTE_ZERO_ = 0x0; 2119 private static final byte BYTE_LEVEL_SEPARATOR_ = (byte)0x01; 2120 private static final byte BYTE_SORTKEY_GLUE_ = (byte)0x02; 2121 private static final byte BYTE_SHIFT_PREFIX_ = (byte)0x03; 2122 static final byte BYTE_UNSHIFTED_MIN_ = BYTE_SHIFT_PREFIX_; 2123 private static final byte BYTE_FIRST_UCA_ = BYTE_COMMON_; 2124 static final byte CODAN_PLACEHOLDER = 0x24; 2125 private static final byte BYTE_LAST_LATIN_PRIMARY_ = (byte)0x4C; 2126 private static final byte BYTE_FIRST_NON_LATIN_PRIMARY_ = (byte)0x4D; 2127 private static final byte BYTE_UNSHIFTED_MAX_ = (byte)0xFF; 2128 private static final int TOTAL_2_ = COMMON_TOP_2_ - COMMON_BOTTOM_2_ - 1; 2129 private static final int FLAG_BIT_MASK_CASE_SWITCH_OFF_ = 0x80; 2130 private static final int FLAG_BIT_MASK_CASE_SWITCH_ON_ = 0x40; 2131 private static final int COMMON_TOP_CASE_SWITCH_OFF_3_ = 0x85; 2132 private static final int COMMON_TOP_CASE_SWITCH_LOWER_3_ = 0x45; 2133 private static final int COMMON_TOP_CASE_SWITCH_UPPER_3_ = 0xC5; 2134 private static final int COMMON_BOTTOM_3_ = 0x05; 2135 private static final int COMMON_BOTTOM_CASE_SWITCH_UPPER_3_ = 0x86; 2136 private static final int COMMON_BOTTOM_CASE_SWITCH_LOWER_3_ = 2137 COMMON_BOTTOM_3_; 2138 private static final int TOP_COUNT_2_ = (int)(PROPORTION_2_ * TOTAL_2_); 2139 private static final int BOTTOM_COUNT_2_ = TOTAL_2_ - TOP_COUNT_2_; 2140 private static final int COMMON_2_ = COMMON_BOTTOM_2_; 2141 private static final int COMMON_UPPER_FIRST_3_ = 0xC5; 2142 private static final int COMMON_NORMAL_3_ = COMMON_BOTTOM_3_; 2143 private static final int COMMON_4_ = (byte)0xFF; 2144 2145 2146 2147 2151 2153 2156 private boolean m_isSimple3_; 2157 2158 2161 private boolean m_isFrenchCollation_; 2162 2167 private boolean m_isAlternateHandlingShifted_; 2168 2171 private boolean m_isCaseLevel_; 2172 2173 private static final int SORT_BUFFER_INIT_SIZE_ = 128; 2174 private static final int SORT_BUFFER_INIT_SIZE_1_ = 2175 SORT_BUFFER_INIT_SIZE_ << 3; 2176 private static final int SORT_BUFFER_INIT_SIZE_2_ = SORT_BUFFER_INIT_SIZE_; 2177 private static final int SORT_BUFFER_INIT_SIZE_3_ = SORT_BUFFER_INIT_SIZE_; 2178 private static final int SORT_BUFFER_INIT_SIZE_CASE_ = 2179 SORT_BUFFER_INIT_SIZE_ >> 2; 2180 private static final int SORT_BUFFER_INIT_SIZE_4_ = SORT_BUFFER_INIT_SIZE_; 2181 2182 private static final int CE_CONTINUATION_TAG_ = 0xC0; 2183 private static final int CE_REMOVE_CONTINUATION_MASK_ = 0xFFFFFF3F; 2184 2185 private static final int LAST_BYTE_MASK_ = 0xFF; 2186 2187 private static final int CE_RESET_TOP_VALUE_ = 0x9F000303; 2188 private static final int CE_NEXT_TOP_VALUE_ = 0xE8960303; 2189 2190 private static final byte SORT_CASE_BYTE_START_ = (byte)0x80; 2191 private static final byte SORT_CASE_SHIFT_START_ = (byte)7; 2192 2193 2196 private static final int CE_BUFFER_SIZE_ = 512; 2197 2198 boolean latinOneUse_ = false; 2200 boolean latinOneRegenTable_ = false; 2201 boolean latinOneFailed_ = false; 2202 2203 int latinOneTableLen_ = 0; 2204 int latinOneCEs_[] = null; 2205 2208 private StringUCharacterIterator m_srcUtilIter_; 2209 private CollationElementIterator m_srcUtilColEIter_; 2210 private StringUCharacterIterator m_tgtUtilIter_; 2211 private CollationElementIterator m_tgtUtilColEIter_; 2212 2215 private boolean m_utilCompare0_; 2216 private boolean m_utilCompare1_; 2217 private boolean m_utilCompare2_; 2218 private boolean m_utilCompare3_; 2219 private boolean m_utilCompare4_; 2220 private boolean m_utilCompare5_; 2221 2224 private byte m_utilBytes0_[]; 2225 private byte m_utilBytes1_[]; 2226 private byte m_utilBytes2_[]; 2227 private byte m_utilBytes3_[]; 2228 private byte m_utilBytes4_[]; 2229 private byte m_utilBytes5_[]; 2230 private RawCollationKey m_utilRawCollationKey_; 2231 2232 private int m_utilBytesCount0_; 2233 private int m_utilBytesCount1_; 2234 private int m_utilBytesCount2_; 2235 private int m_utilBytesCount3_; 2236 private int m_utilBytesCount4_; 2237 private int m_utilBytesCount5_; 2238 private int m_utilCount0_; 2239 private int m_utilCount1_; 2240 private int m_utilCount2_; 2241 private int m_utilCount3_; 2242 private int m_utilCount4_; 2243 private int m_utilCount5_; 2244 2245 private int m_utilFrenchStart_; 2246 private int m_utilFrenchEnd_; 2247 2248 2251 private int m_srcUtilCEBuffer_[]; 2252 private int m_tgtUtilCEBuffer_[]; 2253 private int m_srcUtilCEBufferSize_; 2254 private int m_tgtUtilCEBufferSize_; 2255 2256 private int m_srcUtilContOffset_; 2257 private int m_tgtUtilContOffset_; 2258 2259 private int m_srcUtilOffset_; 2260 private int m_tgtUtilOffset_; 2261 2262 2264 private void init(String rules) throws Exception 2265 { 2266 setWithUCAData(); 2267 CollationParsedRuleBuilder builder 2268 = new CollationParsedRuleBuilder(rules); 2269 builder.setRules(this); 2270 m_rules_ = rules; 2271 init(); 2272 initUtility(false); 2273 } 2274 2275 private final int compareRegular(String source, String target, int offset) { 2276 if (m_srcUtilIter_ == null) { 2277 initUtility(true); 2278 } 2279 int strength = getStrength(); 2280 m_utilCompare0_ = m_isCaseLevel_; 2282 m_utilCompare1_ = true; 2283 m_utilCompare2_ = strength >= SECONDARY; 2284 m_utilCompare3_ = strength >= TERTIARY; 2285 m_utilCompare4_ = strength >= QUATERNARY; 2286 m_utilCompare5_ = strength == IDENTICAL; 2287 boolean doFrench = m_isFrenchCollation_ && m_utilCompare2_; 2288 boolean doShift4 = m_isAlternateHandlingShifted_ && m_utilCompare4_; 2289 boolean doHiragana4 = m_isHiragana4_ && m_utilCompare4_; 2290 2291 if (doHiragana4 && doShift4) { 2292 String sourcesub = source.substring(offset); 2293 String targetsub = target.substring(offset); 2294 return compareBySortKeys(sourcesub, targetsub); 2295 } 2296 2297 int lowestpvalue = m_isAlternateHandlingShifted_ 2299 ? m_variableTopValue_ << 16 : 0; 2300 m_srcUtilCEBufferSize_ = 0; 2301 m_tgtUtilCEBufferSize_ = 0; 2302 int result = doPrimaryCompare(doHiragana4, lowestpvalue, source, 2303 target, offset); 2304 if (m_srcUtilCEBufferSize_ == -1 2305 && m_tgtUtilCEBufferSize_ == -1) { 2306 return result; 2310 } 2311 2312 int hiraganaresult = result; 2313 2314 if (m_utilCompare2_) { 2315 result = doSecondaryCompare(doFrench); 2316 if (result != 0) { 2317 return result; 2318 } 2319 } 2320 if (m_utilCompare0_) { 2322 result = doCaseCompare(); 2323 if (result != 0) { 2324 return result; 2325 } 2326 } 2327 if (m_utilCompare3_) { 2329 result = doTertiaryCompare(); 2330 if (result != 0) { 2331 return result; 2332 } 2333 } 2334 2335 if (doShift4) { result = doQuaternaryCompare(lowestpvalue); 2337 if (result != 0) { 2338 return result; 2339 } 2340 } 2341 else if (doHiragana4 && hiraganaresult != 0) { 2342 return hiraganaresult; 2345 } 2346 2347 if (m_utilCompare5_) { 2352 return doIdenticalCompare(source, target, offset, true); 2353 } 2354 return 0; 2355 } 2356 2357 2368 private final int doPrimaryBytes(int ce, boolean notIsContinuation, 2369 boolean doShift, int leadPrimary, 2370 int commonBottom4, int bottomCount4) 2371 { 2372 2373 int p2 = (ce >>= 16) & LAST_BYTE_MASK_; int p1 = ce >>> 8; if (doShift) { 2376 if (m_utilCount4_ > 0) { 2377 while (m_utilCount4_ > bottomCount4) { 2378 m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, 2379 (byte)(commonBottom4 + bottomCount4)); 2380 m_utilBytesCount4_ ++; 2381 m_utilCount4_ -= bottomCount4; 2382 } 2383 m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, 2384 (byte)(commonBottom4 2385 + (m_utilCount4_ - 1))); 2386 m_utilBytesCount4_ ++; 2387 m_utilCount4_ = 0; 2388 } 2389 if (p1 != 0) { 2392 m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, 2394 (byte)p1); 2395 m_utilBytesCount4_ ++; 2396 } 2397 if (p2 != 0) { 2398 m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, 2399 (byte)p2); 2400 m_utilBytesCount4_ ++; 2401 } 2402 } 2403 else { 2404 if (p1 != CollationElementIterator.IGNORABLE) { 2410 if (notIsContinuation) { 2411 if (leadPrimary == p1) { 2412 m_utilBytes1_ = append(m_utilBytes1_, 2413 m_utilBytesCount1_, (byte)p2); 2414 m_utilBytesCount1_ ++; 2415 } 2416 else { 2417 if (leadPrimary != 0) { 2418 m_utilBytes1_ = append(m_utilBytes1_, 2419 m_utilBytesCount1_, 2420 ((p1 > leadPrimary) 2421 ? BYTE_UNSHIFTED_MAX_ 2422 : BYTE_UNSHIFTED_MIN_)); 2423 m_utilBytesCount1_ ++; 2424 } 2425 if (p2 == CollationElementIterator.IGNORABLE) { 2426 m_utilBytes1_ = append(m_utilBytes1_, 2428 m_utilBytesCount1_, 2429 (byte)p1); 2430 m_utilBytesCount1_ ++; 2431 leadPrimary = 0; 2432 } 2433 else if (p1 < BYTE_FIRST_NON_LATIN_PRIMARY_ 2434 || (p1 > maxRegularPrimary 2435 && p1 < minImplicitPrimary 2438 )) { 2441 leadPrimary = 0; 2443 m_utilBytes1_ = append(m_utilBytes1_, 2444 m_utilBytesCount1_, 2445 (byte)p1); 2446 m_utilBytesCount1_ ++; 2447 m_utilBytes1_ = append(m_utilBytes1_, 2448 m_utilBytesCount1_, 2449 (byte)p2); 2450 m_utilBytesCount1_ ++; 2451 } 2452 else { leadPrimary = p1; 2454 m_utilBytes1_ = append(m_utilBytes1_, 2455 m_utilBytesCount1_, 2456 (byte)p1); 2457 m_utilBytesCount1_ ++; 2458 m_utilBytes1_ = append(m_utilBytes1_, 2459 m_utilBytesCount1_, (byte)p2); 2460 m_utilBytesCount1_ ++; 2461 } 2462 } 2463 } 2464 else { 2465 m_utilBytes1_ = append(m_utilBytes1_, 2467 m_utilBytesCount1_, (byte)p1); 2468 m_utilBytesCount1_ ++; 2469 if (p2 != CollationElementIterator.IGNORABLE) { 2470 m_utilBytes1_ = append(m_utilBytes1_, 2471 m_utilBytesCount1_, (byte)p2); 2472 m_utilBytesCount1_ ++; 2474 } 2475 } 2476 } 2477 } 2478 return leadPrimary; 2479 } 2480 2481 2488 private final void doSecondaryBytes(int ce, boolean notIsContinuation, 2489 boolean doFrench) 2490 { 2491 int s = (ce >>= 8) & LAST_BYTE_MASK_; if (s != 0) { 2493 if (!doFrench) { 2494 if (s == COMMON_2_ && notIsContinuation) { 2496 m_utilCount2_ ++; 2497 } 2498 else { 2499 if (m_utilCount2_ > 0) { 2500 if (s > COMMON_2_) { while (m_utilCount2_ > TOP_COUNT_2_) { 2502 m_utilBytes2_ = append(m_utilBytes2_, 2503 m_utilBytesCount2_, 2504 (byte)(COMMON_TOP_2_ - TOP_COUNT_2_)); 2505 m_utilBytesCount2_ ++; 2506 m_utilCount2_ -= TOP_COUNT_2_; 2507 } 2508 m_utilBytes2_ = append(m_utilBytes2_, 2509 m_utilBytesCount2_, 2510 (byte)(COMMON_TOP_2_ 2511 - (m_utilCount2_ - 1))); 2512 m_utilBytesCount2_ ++; 2513 } 2514 else { 2515 while (m_utilCount2_ > BOTTOM_COUNT_2_) { 2516 m_utilBytes2_ = append(m_utilBytes2_, 2517 m_utilBytesCount2_, 2518 (byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_)); 2519 m_utilBytesCount2_ ++; 2520 m_utilCount2_ -= BOTTOM_COUNT_2_; 2521 } 2522 m_utilBytes2_ = append(m_utilBytes2_, 2523 m_utilBytesCount2_, 2524 (byte)(COMMON_BOTTOM_2_ 2525 + (m_utilCount2_ - 1))); 2526 m_utilBytesCount2_ ++; 2527 } 2528 m_utilCount2_ = 0; 2529 } 2530 m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_, 2531 (byte)s); 2532 m_utilBytesCount2_ ++; 2533 } 2534 } 2535 else { 2536 m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_, 2537 (byte)s); 2538 m_utilBytesCount2_ ++; 2539 if (notIsContinuation) { 2545 if (m_utilFrenchStart_ != -1) { 2546 reverseBuffer(m_utilBytes2_); 2549 m_utilFrenchStart_ = -1; 2550 } 2551 } 2552 else { 2553 if (m_utilFrenchStart_ == -1) { 2554 m_utilFrenchStart_ = m_utilBytesCount2_ - 2; 2555 } 2556 m_utilFrenchEnd_ = m_utilBytesCount2_ - 1; 2557 } 2558 } 2559 } 2560 } 2561 2562 2566 private void reverseBuffer(byte buffer[]) 2567 { 2568 int start = m_utilFrenchStart_; 2569 int end = m_utilFrenchEnd_; 2570 while (start < end) { 2571 byte b = buffer[start]; 2572 buffer[start ++] = buffer[end]; 2573 buffer[end --] = b; 2574 } 2575 } 2576 2577 2582 private final int doCaseShift(int caseshift) 2583 { 2584 if (caseshift == 0) { 2585 m_utilBytes0_ = append(m_utilBytes0_, m_utilBytesCount0_, 2586 SORT_CASE_BYTE_START_); 2587 m_utilBytesCount0_ ++; 2588 caseshift = SORT_CASE_SHIFT_START_; 2589 } 2590 return caseshift; 2591 } 2592 2593 2601 private final int doCaseBytes(int tertiary, boolean notIsContinuation, 2602 int caseshift) 2603 { 2604 caseshift = doCaseShift(caseshift); 2605 2606 if (notIsContinuation && tertiary != 0) { 2607 byte casebits = (byte)(tertiary & 0xC0); 2608 if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) { 2609 if (casebits == 0) { 2610 m_utilBytes0_[m_utilBytesCount0_ - 1] 2611 |= (1 << (-- caseshift)); 2612 } 2613 else { 2614 caseshift = doCaseShift(caseshift - 1); 2616 m_utilBytes0_[m_utilBytesCount0_ - 1] 2617 |= ((casebits >> 6) & 1) << (-- caseshift); 2618 } 2619 } 2620 else { 2621 if (casebits != 0) { 2622 m_utilBytes0_[m_utilBytesCount0_ - 1] 2623 |= 1 << (-- caseshift); 2624 caseshift = doCaseShift(caseshift); 2626 m_utilBytes0_[m_utilBytesCount0_ - 1] 2627 |= ((casebits >> 7) & 1) << (-- caseshift); 2628 } 2629 else { 2630 caseshift --; 2631 } 2632 } 2633 } 2634 2635 return caseshift; 2636 } 2637 2638 2644 private final void doTertiaryBytes(int tertiary, boolean notIsContinuation) 2645 { 2646 if (tertiary != 0) { 2647 if (tertiary == m_common3_ && notIsContinuation) { 2650 m_utilCount3_ ++; 2651 } 2652 else { 2653 int common3 = m_common3_ & LAST_BYTE_MASK_; 2654 if (tertiary > common3 && m_common3_ == COMMON_NORMAL_3_) { 2655 tertiary += m_addition3_; 2656 } 2657 else if (tertiary <= common3 2658 && m_common3_ == COMMON_UPPER_FIRST_3_) { 2659 tertiary -= m_addition3_; 2660 } 2661 if (m_utilCount3_ > 0) { 2662 if (tertiary > common3) { 2663 while (m_utilCount3_ > m_topCount3_) { 2664 m_utilBytes3_ = append(m_utilBytes3_, 2665 m_utilBytesCount3_, 2666 (byte)(m_top3_ - m_topCount3_)); 2667 m_utilBytesCount3_ ++; 2668 m_utilCount3_ -= m_topCount3_; 2669 } 2670 m_utilBytes3_ = append(m_utilBytes3_, 2671 m_utilBytesCount3_, 2672 (byte)(m_top3_ 2673 - (m_utilCount3_ - 1))); 2674 m_utilBytesCount3_ ++; 2675 } 2676 else { 2677 while (m_utilCount3_ > m_bottomCount3_) { 2678 m_utilBytes3_ = append(m_utilBytes3_, 2679 m_utilBytesCount3_, 2680 (byte)(m_bottom3_ + m_bottomCount3_)); 2681 m_utilBytesCount3_ ++; 2682 m_utilCount3_ -= m_bottomCount3_; 2683 } 2684 m_utilBytes3_ = append(m_utilBytes3_, 2685 m_utilBytesCount3_, 2686 (byte)(m_bottom3_ 2687 + (m_utilCount3_ - 1))); 2688 m_utilBytesCount3_ ++; 2689 } 2690 m_utilCount3_ = 0; 2691 } 2692 m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, 2693 (byte)tertiary); 2694 m_utilBytesCount3_ ++; 2695 } 2696 } 2697 } 2698 2699 2707 private final void doQuaternaryBytes(boolean isCodePointHiragana, 2708 int commonBottom4, int bottomCount4, 2709 byte hiragana4) 2710 { 2711 if (isCodePointHiragana) { if (m_utilCount4_ > 0) { while (m_utilCount4_ > bottomCount4) { 2714 m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, 2715 (byte)(commonBottom4 2716 + bottomCount4)); 2717 m_utilBytesCount4_ ++; 2718 m_utilCount4_ -= bottomCount4; 2719 } 2720 m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, 2721 (byte)(commonBottom4 2722 + (m_utilCount4_ - 1))); 2723 m_utilBytesCount4_ ++; 2724 m_utilCount4_ = 0; 2725 } 2726 m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, 2727 hiragana4); m_utilBytesCount4_ ++; 2729 } 2730 else { m_utilCount4_ ++; 2732 } 2733 } 2734 2735 2745 private final void getSortKeyBytes(String source, boolean doFrench, 2746 byte hiragana4, int commonBottom4, 2747 int bottomCount4) 2748 2749 { 2750 if (m_srcUtilIter_ == null) { 2751 initUtility(true); 2752 } 2753 int backupDecomposition = getDecomposition(); 2754 setDecomposition(NO_DECOMPOSITION); m_srcUtilIter_.setText(source); 2756 m_srcUtilColEIter_.setText(m_srcUtilIter_); 2757 m_utilFrenchStart_ = -1; 2758 m_utilFrenchEnd_ = -1; 2759 2760 2763 boolean doShift = false; 2764 boolean notIsContinuation = false; 2765 2766 int leadPrimary = 0; int caseShift = 0; 2768 2769 while (true) { 2770 int ce = m_srcUtilColEIter_.next(); 2771 if (ce == CollationElementIterator.NULLORDER) { 2772 break; 2773 } 2774 2775 if (ce == CollationElementIterator.IGNORABLE) { 2776 continue; 2777 } 2778 2779 notIsContinuation = !isContinuation(ce); 2780 2781 2787 boolean isPrimaryByteIgnorable = (ce & CE_PRIMARY_MASK_) == 0; 2788 boolean isSmallerThanVariableTop = (ce >>> CE_PRIMARY_SHIFT_) 2791 <= m_variableTopValue_; 2792 doShift = (m_isAlternateHandlingShifted_ 2793 && ((notIsContinuation && isSmallerThanVariableTop 2794 && !isPrimaryByteIgnorable) || (!notIsContinuation && doShift)) 2796 || (doShift && isPrimaryByteIgnorable)); 2797 if (doShift && isPrimaryByteIgnorable) { 2798 continue; 2804 } 2805 leadPrimary = doPrimaryBytes(ce, notIsContinuation, doShift, 2806 leadPrimary, commonBottom4, 2807 bottomCount4); 2808 if (doShift) { 2809 continue; 2810 } 2811 if (m_utilCompare2_) { 2812 doSecondaryBytes(ce, notIsContinuation, doFrench); 2813 } 2814 2815 int t = ce & LAST_BYTE_MASK_; 2816 if (!notIsContinuation) { 2817 t = ce & CE_REMOVE_CONTINUATION_MASK_; 2818 } 2819 2820 if (m_utilCompare0_ && (!isPrimaryByteIgnorable || m_utilCompare2_)) { 2821 caseShift = doCaseBytes(t, notIsContinuation, caseShift); 2825 } 2826 else if (notIsContinuation) { 2827 t ^= m_caseSwitch_; 2828 } 2829 2830 t &= m_mask3_; 2831 2832 if (m_utilCompare3_) { 2833 doTertiaryBytes(t, notIsContinuation); 2834 } 2835 2836 if (m_utilCompare4_ && notIsContinuation) { doQuaternaryBytes(m_srcUtilColEIter_.m_isCodePointHiragana_, 2838 commonBottom4, bottomCount4, hiragana4); 2839 } 2840 } 2841 setDecomposition(backupDecomposition); if (m_utilFrenchStart_ != -1) { 2843 reverseBuffer(m_utilBytes2_); 2845 } 2846 } 2847 2848 2858 private final void getSortKey(String source, boolean doFrench, 2859 int commonBottom4, 2860 int bottomCount4, 2861 RawCollationKey key) 2862 { 2863 if (m_utilCompare2_) { 2866 doSecondary(doFrench); 2867 } 2868 if (m_utilCompare0_) { 2870 doCase(); 2871 } 2872 if (m_utilCompare3_) { 2873 doTertiary(); 2874 if (m_utilCompare4_) { 2875 doQuaternary(commonBottom4, bottomCount4); 2876 if (m_utilCompare5_) { 2877 doIdentical(source); 2878 } 2879 2880 } 2881 } 2882 m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, (byte)0); 2883 m_utilBytesCount1_ ++; 2884 2885 key.set(m_utilBytes1_, 0, m_utilBytesCount1_); 2886 } 2887 2888 2891 private final void doFrench() 2892 { 2893 for (int i = 0; i < m_utilBytesCount2_; i ++) { 2894 byte s = m_utilBytes2_[m_utilBytesCount2_ - i - 1]; 2895 if (s == COMMON_2_) { 2897 ++ m_utilCount2_; 2898 } 2899 else { 2900 if (m_utilCount2_ > 0) { 2901 if ((s & LAST_BYTE_MASK_) > COMMON_2_) { 2903 while (m_utilCount2_ > TOP_COUNT_2_) { 2905 m_utilBytes1_ = append(m_utilBytes1_, 2906 m_utilBytesCount1_, 2907 (byte)(COMMON_TOP_2_ - TOP_COUNT_2_)); 2908 m_utilBytesCount1_ ++; 2909 m_utilCount2_ -= TOP_COUNT_2_; 2910 } 2911 m_utilBytes1_ = append(m_utilBytes1_, 2912 m_utilBytesCount1_, 2913 (byte)(COMMON_TOP_2_ 2914 - (m_utilCount2_ - 1))); 2915 m_utilBytesCount1_ ++; 2916 } 2917 else { 2918 while (m_utilCount2_ > BOTTOM_COUNT_2_) { 2919 m_utilBytes1_ = append(m_utilBytes1_, 2920 m_utilBytesCount1_, 2921 (byte)(COMMON_BOTTOM_2_ + BOTTOM_COUNT_2_)); 2922 m_utilBytesCount1_ ++; 2923 m_utilCount2_ -= BOTTOM_COUNT_2_; 2924 } 2925 m_utilBytes1_ = append(m_utilBytes1_, 2926 m_utilBytesCount1_, 2927 (byte)(COMMON_BOTTOM_2_ 2928 + (m_utilCount2_ - 1))); 2929 m_utilBytesCount1_ ++; 2930 } 2931 m_utilCount2_ = 0; 2932 } 2933 m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, s); 2934 m_utilBytesCount1_ ++; 2935 } 2936 } 2937 if (m_utilCount2_ > 0) { 2938 while (m_utilCount2_ > BOTTOM_COUNT_2_) { 2939 m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, 2940 (byte)(COMMON_BOTTOM_2_ 2941 + BOTTOM_COUNT_2_)); 2942 m_utilBytesCount1_ ++; 2943 m_utilCount2_ -= BOTTOM_COUNT_2_; 2944 } 2945 m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, 2946 (byte)(COMMON_BOTTOM_2_ 2947 + (m_utilCount2_ - 1))); 2948 m_utilBytesCount1_ ++; 2949 } 2950 } 2951 2952 2956 private final void doSecondary(boolean doFrench) 2957 { 2958 if (m_utilCount2_ > 0) { 2959 while (m_utilCount2_ > BOTTOM_COUNT_2_) { 2960 m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_, 2961 (byte)(COMMON_BOTTOM_2_ 2962 + BOTTOM_COUNT_2_)); 2963 m_utilBytesCount2_ ++; 2964 m_utilCount2_ -= BOTTOM_COUNT_2_; 2965 } 2966 m_utilBytes2_ = append(m_utilBytes2_, m_utilBytesCount2_, 2967 (byte)(COMMON_BOTTOM_2_ + 2968 (m_utilCount2_ - 1))); 2969 m_utilBytesCount2_ ++; 2970 } 2971 2972 m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, 2973 SORT_LEVEL_TERMINATOR_); 2974 m_utilBytesCount1_ ++; 2975 2976 if (doFrench) { doFrench(); 2978 } 2979 else { 2980 if (m_utilBytes1_.length <= m_utilBytesCount1_ 2981 + m_utilBytesCount2_) { 2982 m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, 2983 m_utilBytesCount2_); 2984 } 2985 System.arraycopy(m_utilBytes2_, 0, m_utilBytes1_, 2986 m_utilBytesCount1_, m_utilBytesCount2_); 2987 m_utilBytesCount1_ += m_utilBytesCount2_; 2988 } 2989 } 2990 2991 2998 private static final byte[] increase(byte buffer[], int size, 2999 int incrementsize) 3000 { 3001 byte result[] = new byte[buffer.length + incrementsize]; 3002 System.arraycopy(buffer, 0, result, 0, size); 3003 return result; 3004 } 3005 3006 3013 private static final int[] increase(int buffer[], int size, 3014 int incrementsize) 3015 { 3016 int result[] = new int[buffer.length + incrementsize]; 3017 System.arraycopy(buffer, 0, result, 0, size); 3018 return result; 3019 } 3020 3021 3024 private final void doCase() 3025 { 3026 m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, 3027 SORT_LEVEL_TERMINATOR_); 3028 m_utilBytesCount1_ ++; 3029 if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount0_) { 3030 m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, 3031 m_utilBytesCount0_); 3032 } 3033 System.arraycopy(m_utilBytes0_, 0, m_utilBytes1_, m_utilBytesCount1_, 3034 m_utilBytesCount0_); 3035 m_utilBytesCount1_ += m_utilBytesCount0_; 3036 } 3037 3038 3041 private final void doTertiary() 3042 { 3043 if (m_utilCount3_ > 0) { 3044 if (m_common3_ != COMMON_BOTTOM_3_) { 3045 while (m_utilCount3_ >= m_topCount3_) { 3046 m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, 3047 (byte)(m_top3_ - m_topCount3_)); 3048 m_utilBytesCount3_ ++; 3049 m_utilCount3_ -= m_topCount3_; 3050 } 3051 m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, 3052 (byte)(m_top3_ - m_utilCount3_)); 3053 m_utilBytesCount3_ ++; 3054 } 3055 else { 3056 while (m_utilCount3_ > m_bottomCount3_) { 3057 m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, 3058 (byte)(m_bottom3_ 3059 + m_bottomCount3_)); 3060 m_utilBytesCount3_ ++; 3061 m_utilCount3_ -= m_bottomCount3_; 3062 } 3063 m_utilBytes3_ = append(m_utilBytes3_, m_utilBytesCount3_, 3064 (byte)(m_bottom3_ 3065 + (m_utilCount3_ - 1))); 3066 m_utilBytesCount3_ ++; 3067 } 3068 } 3069 m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, 3070 SORT_LEVEL_TERMINATOR_); 3071 m_utilBytesCount1_ ++; 3072 if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount3_) { 3073 m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, 3074 m_utilBytesCount3_); 3075 } 3076 System.arraycopy(m_utilBytes3_, 0, m_utilBytes1_, m_utilBytesCount1_, 3077 m_utilBytesCount3_); 3078 m_utilBytesCount1_ += m_utilBytesCount3_; 3079 } 3080 3081 3084 private final void doQuaternary(int commonbottom4, int bottomcount4) 3085 { 3086 if (m_utilCount4_ > 0) { 3087 while (m_utilCount4_ > bottomcount4) { 3088 m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, 3089 (byte)(commonbottom4 + bottomcount4)); 3090 m_utilBytesCount4_ ++; 3091 m_utilCount4_ -= bottomcount4; 3092 } 3093 m_utilBytes4_ = append(m_utilBytes4_, m_utilBytesCount4_, 3094 (byte)(commonbottom4 3095 + (m_utilCount4_ - 1))); 3096 m_utilBytesCount4_ ++; 3097 } 3098 m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, 3099 SORT_LEVEL_TERMINATOR_); 3100 m_utilBytesCount1_ ++; 3101 if (m_utilBytes1_.length <= m_utilBytesCount1_ + m_utilBytesCount4_) { 3102 m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, 3103 m_utilBytesCount4_); 3104 } 3105 System.arraycopy(m_utilBytes4_, 0, m_utilBytes1_, m_utilBytesCount1_, 3106 m_utilBytesCount4_); 3107 m_utilBytesCount1_ += m_utilBytesCount4_; 3108 } 3109 3110 3116 private final void doIdentical(String source) 3117 { 3118 int isize = BOCU.getCompressionLength(source); 3119 m_utilBytes1_ = append(m_utilBytes1_, m_utilBytesCount1_, 3120 SORT_LEVEL_TERMINATOR_); 3121 m_utilBytesCount1_ ++; 3122 if (m_utilBytes1_.length <= m_utilBytesCount1_ + isize) { 3123 m_utilBytes1_ = increase(m_utilBytes1_, m_utilBytesCount1_, 3124 1 + isize); 3125 } 3126 m_utilBytesCount1_ = BOCU.compress(source, m_utilBytes1_, 3127 m_utilBytesCount1_); 3128 } 3129 3130 3139 private final int getFirstUnmatchedOffset(String source, String target) 3140 { 3141 int result = 0; 3142 int slength = source.length(); 3143 int tlength = target.length(); 3144 int minlength = slength; 3145 if (minlength > tlength) { 3146 minlength = tlength; 3147 } 3148 while (result < minlength 3149 && source.charAt(result) == target.charAt(result)) { 3150 result ++; 3151 } 3152 if (result > 0) { 3153 char schar = 0; 3158 char tchar = 0; 3159 if (result < minlength) { 3160 schar = source.charAt(result); tchar = target.charAt(result); 3162 } 3163 else { 3164 schar = source.charAt(minlength - 1); 3165 if (isUnsafe(schar)) { 3166 tchar = schar; 3167 } 3168 else if (slength == tlength) { 3169 return result; 3170 } 3171 else if (slength < tlength) { 3172 tchar = target.charAt(result); 3173 } 3174 else { 3175 schar = source.charAt(result); 3176 } 3177 } 3178 if (isUnsafe(schar) || isUnsafe(tchar)) 3179 { 3180 do { 3187 result --; 3188 } 3189 while (result > 0 && isUnsafe(source.charAt(result))); 3190 } 3191 } 3192 return result; 3193 } 3194 3195 3204 private static final byte[] append(byte array[], int appendindex, 3205 byte value) 3206 { 3207 try { 3208 array[appendindex] = value; 3209 } 3210 catch (ArrayIndexOutOfBoundsException e) { 3211 array = increase(array, appendindex, SORT_BUFFER_INIT_SIZE_); 3212 array[appendindex] = value; 3213 } 3214 return array; 3215 } 3216 3217 3224 private final int compareBySortKeys(String source, String target) 3225 3226 { 3227 m_utilRawCollationKey_ = getRawCollationKey(source, 3228 m_utilRawCollationKey_); 3229 RawCollationKey targetkey = getRawCollationKey(target, null); 3231 return m_utilRawCollationKey_.compareTo(targetkey); 3232 } 3233 3234 3254 private final int doPrimaryCompare(boolean doHiragana4, int lowestpvalue, 3255 String source, String target, 3256 int textoffset) 3257 3258 { 3259 m_srcUtilIter_.setText(source); 3261 m_srcUtilColEIter_.setText(m_srcUtilIter_, textoffset); 3262 m_tgtUtilIter_.setText(target); 3263 m_tgtUtilColEIter_.setText(m_tgtUtilIter_, textoffset); 3264 3265 if (!m_isAlternateHandlingShifted_) { 3267 int hiraganaresult = 0; 3268 while (true) { 3269 int sorder = 0; 3270 do { 3272 sorder = m_srcUtilColEIter_.next(); 3273 m_srcUtilCEBuffer_ = append(m_srcUtilCEBuffer_, 3274 m_srcUtilCEBufferSize_, sorder); 3275 m_srcUtilCEBufferSize_ ++; 3276 sorder &= CE_PRIMARY_MASK_; 3277 } while (sorder == CollationElementIterator.IGNORABLE); 3278 3279 int torder = 0; 3280 do { 3281 torder = m_tgtUtilColEIter_.next(); 3282 m_tgtUtilCEBuffer_ = append(m_tgtUtilCEBuffer_, 3283 m_tgtUtilCEBufferSize_, torder); 3284 m_tgtUtilCEBufferSize_ ++; 3285 torder &= CE_PRIMARY_MASK_; 3286 } while (torder == CollationElementIterator.IGNORABLE); 3287 3288 if (sorder == torder) { 3290 if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1] 3293 == CollationElementIterator.NULLORDER) { 3294 if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1] 3295 != CollationElementIterator.NULLORDER) { 3296 return -1; 3297 } 3298 break; 3299 } 3300 else if (m_tgtUtilCEBuffer_[m_tgtUtilCEBufferSize_ - 1] 3301 == CollationElementIterator.NULLORDER) { 3302 return 1; 3303 } 3304 if (doHiragana4 && hiraganaresult == 0 3305 && m_srcUtilColEIter_.m_isCodePointHiragana_ != 3306 m_tgtUtilColEIter_.m_isCodePointHiragana_) { 3307 if (m_srcUtilColEIter_.m_isCodePointHiragana_) { 3308 hiraganaresult = -1; 3309 } 3310 else { 3311 hiraganaresult = 1; 3312 } 3313 } 3314 } 3315 else { 3316 return endPrimaryCompare(sorder, torder); 3318 } 3319 } 3320 return hiraganaresult; 3322 } 3323 else { while (true) { 3325 int sorder = getPrimaryShiftedCompareCE(m_srcUtilColEIter_, 3326 lowestpvalue, true); 3327 int torder = getPrimaryShiftedCompareCE(m_tgtUtilColEIter_, 3328 lowestpvalue, false); 3329 if (sorder == torder) { 3330 if (m_srcUtilCEBuffer_[m_srcUtilCEBufferSize_ - 1] 3331 == CollationElementIterator.NULLORDER) { 3332 break; 3333 } 3334 else { 3335 continue; 3336 } 3337 } 3338 else { 3339 return endPrimaryCompare(sorder, torder); 3340 } 3341 } } 3343 return 0; 3344 } 3345 3346 3355 private final int endPrimaryCompare(int sorder, int torder) 3356 { 3357 boolean isSourceNullOrder = (m_srcUtilCEBuffer_[ 3360 m_srcUtilCEBufferSize_ - 1] 3361 == CollationElementIterator.NULLORDER); 3362 boolean isTargetNullOrder = (m_tgtUtilCEBuffer_[ 3363 m_tgtUtilCEBufferSize_ - 1] 3364 == CollationElementIterator.NULLORDER); 3365 m_srcUtilCEBufferSize_ = -1; 3366 m_tgtUtilCEBufferSize_ = -1; 3367 if (isSourceNullOrder) { 3368 return -1; 3369 } 3370 if (isTargetNullOrder) { 3371 return 1; 3372 } 3373 sorder >>>= CE_PRIMARY_SHIFT_; 3375 torder >>>= CE_PRIMARY_SHIFT_; 3376 if (sorder < torder) { 3377 return -1; 3378 } 3379 return 1; 3380 } 3381 3382 3392 private final int getPrimaryShiftedCompareCE( 3393 CollationElementIterator coleiter, 3394 int lowestpvalue, boolean isSrc) 3395 3396 { 3397 boolean shifted = false; 3398 int result = CollationElementIterator.IGNORABLE; 3399 int cebuffer[] = m_srcUtilCEBuffer_; 3400 int cebuffersize = m_srcUtilCEBufferSize_; 3401 if (!isSrc) { 3402 cebuffer = m_tgtUtilCEBuffer_; 3403 cebuffersize = m_tgtUtilCEBufferSize_; 3404 } 3405 while (true) { 3406 result = coleiter.next(); 3407 if (result == CollationElementIterator.NULLORDER) { 3408 cebuffer = append(cebuffer, cebuffersize, result); 3409 cebuffersize ++; 3410 break; 3411 } 3412 else if (result == CollationElementIterator.IGNORABLE 3413 || (shifted 3414 && (result & CE_PRIMARY_MASK_) 3415 == CollationElementIterator.IGNORABLE)) { 3416 continue; 3419 } 3420 else if (isContinuation(result)) { 3421 if ((result & CE_PRIMARY_MASK_) 3422 != CollationElementIterator.IGNORABLE) { 3423 if (shifted) { 3425 result = (result & CE_PRIMARY_MASK_) 3426 | CE_CONTINUATION_MARKER_; 3427 cebuffer = append(cebuffer, cebuffersize, result); 3429 cebuffersize ++; 3430 continue; 3431 } 3432 else { 3433 cebuffer = append(cebuffer, cebuffersize, result); 3434 cebuffersize ++; 3435 break; 3436 } 3437 } 3438 else { if (!shifted) { 3440 cebuffer = append(cebuffer, cebuffersize, result); 3441 cebuffersize ++; 3442 } 3443 } 3444 } 3445 else { if (Utility.compareUnsigned(result & CE_PRIMARY_MASK_, 3447 lowestpvalue) > 0) { 3448 cebuffer = append(cebuffer, cebuffersize, result); 3449 cebuffersize ++; 3450 break; 3451 } 3452 else { 3453 if ((result & CE_PRIMARY_MASK_) != 0) { 3454 shifted = true; 3455 result &= CE_PRIMARY_MASK_; 3456 cebuffer = append(cebuffer, cebuffersize, result); 3457 cebuffersize ++; 3458 continue; 3459 } 3460 else { 3461 cebuffer = append(cebuffer, cebuffersize, result); 3462 cebuffersize ++; 3463 shifted = false; 3464 continue; 3465 } 3466 } 3467 } 3468 } 3469 if (isSrc) { 3470 m_srcUtilCEBuffer_ = cebuffer; 3471 m_srcUtilCEBufferSize_ = cebuffersize; 3472 } 3473 else { 3474 m_tgtUtilCEBuffer_ = cebuffer; 3475 m_tgtUtilCEBufferSize_ = cebuffersize; 3476 } 3477 result &= CE_PRIMARY_MASK_; 3478 return result; 3479 } 3480 3481 3490 private static final int[] append(int array[], int appendindex, int value) 3491 { 3492 if (appendindex + 1 >= array.length) { 3493 array = increase(array, appendindex, CE_BUFFER_SIZE_); 3494 } 3495 array[appendindex] = value; 3496 return array; 3497 } 3498 3499 3504 private final int doSecondaryCompare(boolean doFrench) 3505 { 3506 if (!doFrench) { int soffset = 0; 3509 int toffset = 0; 3510 while (true) { 3511 int sorder = CollationElementIterator.IGNORABLE; 3512 while (sorder == CollationElementIterator.IGNORABLE) { 3513 sorder = m_srcUtilCEBuffer_[soffset ++] 3514 & CE_SECONDARY_MASK_; 3515 } 3516 int torder = CollationElementIterator.IGNORABLE; 3517 while (torder == CollationElementIterator.IGNORABLE) { 3518 torder = m_tgtUtilCEBuffer_[toffset ++] 3519 & CE_SECONDARY_MASK_; 3520 } 3521 3522 if (sorder == torder) { 3523 if (m_srcUtilCEBuffer_[soffset - 1] 3524 == CollationElementIterator.NULLORDER) { 3525 if (m_tgtUtilCEBuffer_[toffset - 1] 3526 != CollationElementIterator.NULLORDER) { 3527 return -1; 3528 } 3529 break; 3530 } 3531 else if (m_tgtUtilCEBuffer_[toffset - 1] 3532 == CollationElementIterator.NULLORDER) { 3533 return 1; 3534 } 3535 } 3536 else { 3537 if (m_srcUtilCEBuffer_[soffset - 1] == 3538 CollationElementIterator.NULLORDER) { 3539 return -1; 3540 } 3541 if (m_tgtUtilCEBuffer_[toffset - 1] == 3542 CollationElementIterator.NULLORDER) { 3543 return 1; 3544 } 3545 return (sorder < torder) ? -1 : 1; 3546 } 3547 } 3548 } 3549 else { m_srcUtilContOffset_ = 0; 3551 m_tgtUtilContOffset_ = 0; 3552 m_srcUtilOffset_ = m_srcUtilCEBufferSize_ - 2; 3553 m_tgtUtilOffset_ = m_tgtUtilCEBufferSize_ - 2; 3554 while (true) { 3555 int sorder = getSecondaryFrenchCE(true); 3556 int torder = getSecondaryFrenchCE(false); 3557 if (sorder == torder) { 3558 if ((m_srcUtilOffset_ < 0 && m_tgtUtilOffset_ < 0) 3559 || (m_srcUtilOffset_ >= 0 3560 && m_srcUtilCEBuffer_[m_srcUtilOffset_] 3561 == CollationElementIterator.NULLORDER)) { 3562 break; 3563 } 3564 } 3565 else { 3566 return (sorder < torder) ? -1 : 1; 3567 } 3568 } 3569 } 3570 return 0; 3571 } 3572 3573 3578 private final int getSecondaryFrenchCE(boolean isSrc) 3579 { 3580 int result = CollationElementIterator.IGNORABLE; 3581 int offset = m_srcUtilOffset_; 3582 int continuationoffset = m_srcUtilContOffset_; 3583 int cebuffer[] = m_srcUtilCEBuffer_; 3584 if (!isSrc) { 3585 offset = m_tgtUtilOffset_; 3586 continuationoffset = m_tgtUtilContOffset_; 3587 cebuffer = m_tgtUtilCEBuffer_; 3588 } 3589 3590 while (result == CollationElementIterator.IGNORABLE 3591 && offset >= 0) { 3592 if (continuationoffset == 0) { 3593 result = cebuffer[offset]; 3594 while (isContinuation(cebuffer[offset --])){ 3595 } 3596 if (isContinuation(cebuffer[offset + 1])) { 3599 continuationoffset = offset; 3601 offset += 2; 3602 } 3603 } 3604 else { 3605 result = cebuffer[offset ++]; 3606 if (!isContinuation(result)) { 3607 offset = continuationoffset; 3609 continuationoffset = 0; 3611 continue; 3612 } 3613 } 3614 result &= CE_SECONDARY_MASK_; } 3616 if (isSrc) { 3617 m_srcUtilOffset_ = offset; 3618 m_srcUtilContOffset_ = continuationoffset; 3619 } 3620 else { 3621 m_tgtUtilOffset_ = offset; 3622 m_tgtUtilContOffset_ = continuationoffset; 3623 } 3624 return result; 3625 } 3626 3627 3631 private final int doCaseCompare() 3632 { 3633 int soffset = 0; 3634 int toffset = 0; 3635 while (true) { 3636 int sorder = CollationElementIterator.IGNORABLE; 3637 int torder = CollationElementIterator.IGNORABLE; 3638 while ((sorder & CE_REMOVE_CASE_) 3639 == CollationElementIterator.IGNORABLE) { 3640 sorder = m_srcUtilCEBuffer_[soffset ++]; 3641 if (!isContinuation(sorder) && ((sorder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) { 3642 sorder &= CE_CASE_MASK_3_; 3645 sorder ^= m_caseSwitch_; 3646 } 3647 else { 3648 sorder = CollationElementIterator.IGNORABLE; 3649 } 3650 } 3651 3652 while ((torder & CE_REMOVE_CASE_) 3653 == CollationElementIterator.IGNORABLE) { 3654 torder = m_tgtUtilCEBuffer_[toffset ++]; 3655 if (!isContinuation(torder) && ((torder & CE_PRIMARY_MASK_) != 0 || m_utilCompare2_ == true)) { 3656 torder &= CE_CASE_MASK_3_; 3659 torder ^= m_caseSwitch_; 3660 } 3661 else { 3662 torder = CollationElementIterator.IGNORABLE; 3663 } 3664 } 3665 3666 sorder &= CE_CASE_BIT_MASK_; 3667 torder &= CE_CASE_BIT_MASK_; 3668 if (sorder == torder) { 3669 if (m_srcUtilCEBuffer_[soffset - 1] 3671 == CollationElementIterator.NULLORDER) { 3672 if (m_tgtUtilCEBuffer_[toffset - 1] 3673 != CollationElementIterator.NULLORDER) { 3674 return -1; 3675 } 3676 break; 3677 } 3678 else if (m_tgtUtilCEBuffer_[toffset - 1] 3679 == CollationElementIterator.NULLORDER) { 3680 return 1; 3681 } 3682 } 3683 else { 3684 if (m_srcUtilCEBuffer_[soffset - 1] 3685 == CollationElementIterator.NULLORDER) { 3686 return -1; 3687 } 3688 if (m_tgtUtilCEBuffer_[soffset - 1] 3689 == CollationElementIterator.NULLORDER) { 3690 return 1; 3691 } 3692 return (sorder < torder) ? -1 : 1; 3693 } 3694 } 3695 return 0; 3696 } 3697 3698 3702 private final int doTertiaryCompare() 3703 { 3704 int soffset = 0; 3705 int toffset = 0; 3706 while (true) { 3707 int sorder = CollationElementIterator.IGNORABLE; 3708 int torder = CollationElementIterator.IGNORABLE; 3709 while ((sorder & CE_REMOVE_CASE_) 3710 == CollationElementIterator.IGNORABLE) { 3711 sorder = m_srcUtilCEBuffer_[soffset ++] & m_mask3_; 3712 if (!isContinuation(sorder)) { 3713 sorder ^= m_caseSwitch_; 3714 } 3715 else { 3716 sorder &= CE_REMOVE_CASE_; 3717 } 3718 } 3719 3720 while ((torder & CE_REMOVE_CASE_) 3721 == CollationElementIterator.IGNORABLE) { 3722 torder = m_tgtUtilCEBuffer_[toffset ++] & m_mask3_; 3723 if (!isContinuation(torder)) { 3724 torder ^= m_caseSwitch_; 3725 } 3726 else { 3727 torder &= CE_REMOVE_CASE_; 3728 } 3729 } 3730 3731 if (sorder == torder) { 3732 if (m_srcUtilCEBuffer_[soffset - 1] 3733 == CollationElementIterator.NULLORDER) { 3734 if (m_tgtUtilCEBuffer_[toffset - 1] 3735 != CollationElementIterator.NULLORDER) { 3736 return -1; 3737 } 3738 break; 3739 } 3740 else if (m_tgtUtilCEBuffer_[toffset - 1] 3741 == CollationElementIterator.NULLORDER) { 3742 return 1; 3743 } 3744 } 3745 else { 3746 if (m_srcUtilCEBuffer_[soffset - 1] == 3747 CollationElementIterator.NULLORDER) { 3748 return -1; 3749 } 3750 if (m_tgtUtilCEBuffer_[toffset - 1] == 3751 CollationElementIterator.NULLORDER) { 3752 return 1; 3753 } 3754 return (sorder < torder) ? -1 : 1; 3755 } 3756 } 3757 return 0; 3758 } 3759 3760 3766 private final int doQuaternaryCompare(int lowestpvalue) 3767 { 3768 boolean sShifted = true; 3769 boolean tShifted = true; 3770 int soffset = 0; 3771 int toffset = 0; 3772 while (true) { 3773 int sorder = CollationElementIterator.IGNORABLE; 3774 int torder = CollationElementIterator.IGNORABLE; 3775 while (sorder == CollationElementIterator.IGNORABLE 3776 || (isContinuation(sorder) && !sShifted)) { 3777 sorder = m_srcUtilCEBuffer_[soffset ++]; 3778 if (isContinuation(sorder)) { 3779 if (!sShifted) { 3780 continue; 3781 } 3782 } 3783 else if (Utility.compareUnsigned(sorder, lowestpvalue) > 0 3784 || (sorder & CE_PRIMARY_MASK_) 3785 == CollationElementIterator.IGNORABLE) { 3786 sorder = CE_PRIMARY_MASK_; 3788 sShifted = false; 3789 } 3790 else { 3791 sShifted = true; 3792 } 3793 } 3794 sorder >>>= CE_PRIMARY_SHIFT_; 3795 while (torder == CollationElementIterator.IGNORABLE 3796 || (isContinuation(torder) && !tShifted)) { 3797 torder = m_tgtUtilCEBuffer_[toffset ++]; 3798 if (isContinuation(torder)) { 3799 if (!tShifted) { 3800 continue; 3801 } 3802 } 3803 else if (Utility.compareUnsigned(torder, lowestpvalue) > 0 3804 || (torder & CE_PRIMARY_MASK_) 3805 == CollationElementIterator.IGNORABLE) { 3806 torder = CE_PRIMARY_MASK_; 3808 tShifted = false; 3809 } 3810 else { 3811 tShifted = true; 3812 } 3813 } 3814 torder >>>= CE_PRIMARY_SHIFT_; 3815 3816 if (sorder == torder) { 3817 if (m_srcUtilCEBuffer_[soffset - 1] 3818 == CollationElementIterator.NULLORDER) { 3819 if (m_tgtUtilCEBuffer_[toffset - 1] 3820 != CollationElementIterator.NULLORDER) { 3821 return -1; 3822 } 3823 break; 3824 } 3825 else if (m_tgtUtilCEBuffer_[toffset - 1] 3826 == CollationElementIterator.NULLORDER) { 3827 return 1; 3828 } 3829 } 3830 else { 3831 if (m_srcUtilCEBuffer_[soffset - 1] == 3832 CollationElementIterator.NULLORDER) { 3833 return -1; 3834 } 3835 if (m_tgtUtilCEBuffer_[toffset - 1] == 3836 CollationElementIterator.NULLORDER) { 3837 return 1; 3838 } 3839 return (sorder < torder) ? -1 : 1; 3840 } 3841 } 3842 return 0; 3843 } 3844 3845 3857 private static final int doIdenticalCompare(String source, String target, 3858 int offset, boolean normalize) 3859 3860 { 3861 if (normalize) { 3862 if (Normalizer.quickCheck(source, Normalizer.NFD,0) 3863 != Normalizer.YES) { 3864 source = Normalizer.decompose(source, false); 3865 } 3866 3867 if (Normalizer.quickCheck(target, Normalizer.NFD,0) 3868 != Normalizer.YES) { 3869 target = Normalizer.decompose(target, false); 3870 } 3871 offset = 0; 3872 } 3873 3874 return doStringCompare(source, target, offset); 3875 } 3876 3877 3886 private static final int doStringCompare(String source, 3887 String target, 3888 int offset) 3889 { 3890 char schar = 0; 3892 char tchar = 0; 3893 int slength = source.length(); 3894 int tlength = target.length(); 3895 int minlength = Math.min(slength, tlength); 3896 while (offset < minlength) { 3897 schar = source.charAt(offset); 3898 tchar = target.charAt(offset ++); 3899 if (schar != tchar) { 3900 break; 3901 } 3902 } 3903 3904 if (schar == tchar && offset == minlength) { 3905 if (slength > minlength) { 3906 return 1; 3907 } 3908 if (tlength > minlength) { 3909 return -1; 3910 } 3911 return 0; 3912 } 3913 3914 if (schar >= UTF16.LEAD_SURROGATE_MIN_VALUE 3916 && tchar >= UTF16.LEAD_SURROGATE_MIN_VALUE) { 3917 schar = fixupUTF16(schar); 3918 tchar = fixupUTF16(tchar); 3919 } 3920 3921 return (schar < tchar) ? -1 : 1; } 3924 3925 3928 private static final char fixupUTF16(char ch) 3929 { 3930 if (ch >= 0xe000) { 3931 ch -= 0x800; 3932 } 3933 else { 3934 ch += 0x2000; 3935 } 3936 return ch; 3937 } 3938 3939 3942 private void updateInternalState() 3943 { 3944 if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) { 3945 m_caseSwitch_ = CASE_SWITCH_; 3946 } 3947 else { 3948 m_caseSwitch_ = NO_CASE_SWITCH_; 3949 } 3950 3951 if (m_isCaseLevel_ || m_caseFirst_ == AttributeValue.OFF_) { 3952 m_mask3_ = CE_REMOVE_CASE_; 3953 m_common3_ = COMMON_NORMAL_3_; 3954 m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_OFF_; 3955 m_top3_ = COMMON_TOP_CASE_SWITCH_OFF_3_; 3956 m_bottom3_ = COMMON_BOTTOM_3_; 3957 } 3958 else { 3959 m_mask3_ = CE_KEEP_CASE_; 3960 m_addition3_ = FLAG_BIT_MASK_CASE_SWITCH_ON_; 3961 if (m_caseFirst_ == AttributeValue.UPPER_FIRST_) { 3962 m_common3_ = COMMON_UPPER_FIRST_3_; 3963 m_top3_ = COMMON_TOP_CASE_SWITCH_UPPER_3_; 3964 m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_UPPER_3_; 3965 } else { 3966 m_common3_ = COMMON_NORMAL_3_; 3967 m_top3_ = COMMON_TOP_CASE_SWITCH_LOWER_3_; 3968 m_bottom3_ = COMMON_BOTTOM_CASE_SWITCH_LOWER_3_; 3969 } 3970 } 3971 3972 int total3 = m_top3_ - COMMON_BOTTOM_3_ - 1; 3974 m_topCount3_ = (int)(PROPORTION_3_ * total3); 3976 m_bottomCount3_ = total3 - m_topCount3_; 3977 3978 if (!m_isCaseLevel_ && getStrength() == AttributeValue.TERTIARY_ 3979 && !m_isFrenchCollation_ && !m_isAlternateHandlingShifted_) { 3980 m_isSimple3_ = true; 3981 } 3982 else { 3983 m_isSimple3_ = false; 3984 } 3985 if(!m_isCaseLevel_ && getStrength() <= AttributeValue.TERTIARY_ && !m_isNumericCollation_ 3986 && !m_isAlternateHandlingShifted_ && !latinOneFailed_) { 3987 if(latinOneCEs_ == null || latinOneRegenTable_) { 3988 if(setUpLatinOne()) { latinOneUse_ = true; 3990 } else { 3991 latinOneUse_ = false; 3992 latinOneFailed_ = true; 3993 } 3994 latinOneRegenTable_ = false; 3995 } else { latinOneUse_ = true; 3997 } 3998 } else { 3999 latinOneUse_ = false; 4000 } 4001 4002 } 4003 4004 4007 private final void init() 4008 { 4009 for (m_minUnsafe_ = 0; m_minUnsafe_ < DEFAULT_MIN_HEURISTIC_; 4010 m_minUnsafe_ ++) { 4011 if (isUnsafe(m_minUnsafe_)) { 4013 break; 4014 } 4015 } 4016 4017 for (m_minContractionEnd_ = 0; 4018 m_minContractionEnd_ < DEFAULT_MIN_HEURISTIC_; 4019 m_minContractionEnd_ ++) { 4020 if (isContractionEnd(m_minContractionEnd_)) { 4022 break; 4023 } 4024 } 4025 latinOneFailed_ = true; 4026 setStrength(m_defaultStrength_); 4027 setDecomposition(m_defaultDecomposition_); 4028 m_variableTopValue_ = m_defaultVariableTopValue_; 4029 m_isFrenchCollation_ = m_defaultIsFrenchCollation_; 4030 m_isAlternateHandlingShifted_ = m_defaultIsAlternateHandlingShifted_; 4031 m_isCaseLevel_ = m_defaultIsCaseLevel_; 4032 m_caseFirst_ = m_defaultCaseFirst_; 4033 m_isHiragana4_ = m_defaultIsHiragana4_; 4034 m_isNumericCollation_ = m_defaultIsNumericCollation_; 4035 latinOneFailed_ = false; 4036 updateInternalState(); 4037 } 4038 4039 4042 private final void initUtility(boolean allocate) { 4043 if (allocate) { 4044 if (m_srcUtilIter_ == null) { 4045 m_srcUtilIter_ = new StringUCharacterIterator(); 4046 m_srcUtilColEIter_ = new CollationElementIterator(m_srcUtilIter_, this); 4047 m_tgtUtilIter_ = new StringUCharacterIterator(); 4048 m_tgtUtilColEIter_ = new CollationElementIterator(m_tgtUtilIter_, this); 4049 m_utilBytes0_ = new byte[SORT_BUFFER_INIT_SIZE_CASE_]; m_utilBytes1_ = new byte[SORT_BUFFER_INIT_SIZE_1_]; m_utilBytes2_ = new byte[SORT_BUFFER_INIT_SIZE_2_]; m_utilBytes3_ = new byte[SORT_BUFFER_INIT_SIZE_3_]; m_utilBytes4_ = new byte[SORT_BUFFER_INIT_SIZE_4_]; m_srcUtilCEBuffer_ = new int[CE_BUFFER_SIZE_]; 4055 m_tgtUtilCEBuffer_ = new int[CE_BUFFER_SIZE_]; 4056 } 4057 } else { 4058 m_srcUtilIter_ = null; 4059 m_srcUtilColEIter_ = null; 4060 m_tgtUtilIter_ = null; 4061 m_tgtUtilColEIter_ = null; 4062 m_utilBytes0_ = null; 4063 m_utilBytes1_ = null; 4064 m_utilBytes2_ = null; 4065 m_utilBytes3_ = null; 4066 m_utilBytes4_ = null; 4067 m_srcUtilCEBuffer_ = null; 4068 m_tgtUtilCEBuffer_ = null; 4069 } 4070 } 4071 4072 private static final int ENDOFLATINONERANGE_ = 0xFF; 4074 private static final int LATINONETABLELEN_ = (ENDOFLATINONERANGE_+50); 4075 private static final int BAIL_OUT_CE_ = 0xFF000000; 4076 4077 4080 4081 private class shiftValues { 4082 int primShift = 24; 4083 int secShift = 24; 4084 int terShift = 24; 4085 } 4086 4087 private final void 4088 addLatinOneEntry(char ch, int CE, shiftValues sh) { 4089 int primary1 = 0, primary2 = 0, secondary = 0, tertiary = 0; 4090 boolean reverseSecondary = false; 4091 if(!isContinuation(CE)) { 4092 tertiary = ((CE & m_mask3_)); 4093 tertiary ^= m_caseSwitch_; 4094 reverseSecondary = true; 4095 } else { 4096 tertiary = (byte)((CE & CE_REMOVE_CONTINUATION_MASK_)); 4097 tertiary &= CE_REMOVE_CASE_; 4098 reverseSecondary = false; 4099 } 4100 4101 secondary = ((CE >>>= 8) & LAST_BYTE_MASK_); 4102 primary2 = ((CE >>>= 8) & LAST_BYTE_MASK_); 4103 primary1 = (CE >>> 8); 4104 4105 if(primary1 != 0) { 4106 latinOneCEs_[ch] |= (primary1 << sh.primShift); 4107 sh.primShift -= 8; 4108 } 4109 if(primary2 != 0) { 4110 if(sh.primShift < 0) { 4111 latinOneCEs_[ch] = BAIL_OUT_CE_; 4112 latinOneCEs_[latinOneTableLen_+ch] = BAIL_OUT_CE_; 4113 latinOneCEs_[2*latinOneTableLen_+ch] = BAIL_OUT_CE_; 4114 return; 4115 } 4116 latinOneCEs_[ch] |= (primary2 << sh.primShift); 4117 sh.primShift -= 8; 4118 } 4119 if(secondary != 0) { 4120 if(reverseSecondary && m_isFrenchCollation_) { latinOneCEs_[latinOneTableLen_+ch] >>>= 8; latinOneCEs_[latinOneTableLen_+ch] |= (secondary << 24); 4123 } else { latinOneCEs_[latinOneTableLen_+ch] |= (secondary << sh.secShift); 4125 } 4126 sh.secShift -= 8; 4127 } 4128 if(tertiary != 0) { 4129 latinOneCEs_[2*latinOneTableLen_+ch] |= (tertiary << sh.terShift); 4130 sh.terShift -= 8; 4131 } 4132 } 4133 4134 private final void 4135 resizeLatinOneTable(int newSize) { 4136 int newTable[] = new int[3*newSize]; 4137 int sizeToCopy = ((newSize<latinOneTableLen_)?newSize:latinOneTableLen_); 4138 System.arraycopy(latinOneCEs_, 0, newTable, 0, sizeToCopy); 4140 System.arraycopy(latinOneCEs_, latinOneTableLen_, newTable, newSize, sizeToCopy); 4141 System.arraycopy(latinOneCEs_, 2*latinOneTableLen_, newTable, 2*newSize, sizeToCopy); 4142 latinOneTableLen_ = newSize; 4143 latinOneCEs_ = newTable; 4144 } 4145 4146 private final boolean setUpLatinOne() { 4147 if(latinOneCEs_ == null || m_reallocLatinOneCEs_) { 4148 latinOneCEs_ = new int[3*LATINONETABLELEN_]; 4149 latinOneTableLen_ = LATINONETABLELEN_; 4150 m_reallocLatinOneCEs_ = false; 4151 } else { 4152 Arrays.fill(latinOneCEs_, 0); 4153 } 4154 if(m_ContInfo_ == null) { 4155 m_ContInfo_ = new ContractionInfo(); 4156 } 4157 char ch = 0; 4158 CollationElementIterator it = getCollationElementIterator(""); 4161 4162 shiftValues s = new shiftValues(); 4163 int CE = 0; 4164 char contractionOffset = ENDOFLATINONERANGE_+1; 4165 4166 for(ch = 0; ch <= ENDOFLATINONERANGE_; ch++) { 4167 s.primShift = 24; s.secShift = 24; s.terShift = 24; 4168 if(ch < 0x100) { 4169 CE = m_trie_.getLatin1LinearValue(ch); 4170 } else { 4171 CE = m_trie_.getLeadValue(ch); 4172 if(CE == CollationElementIterator.CE_NOT_FOUND_) { 4173 CE = UCA_.m_trie_.getLeadValue(ch); 4174 } 4175 } 4176 if(!isSpecial(CE)) { 4177 addLatinOneEntry(ch, CE, s); 4178 } else { 4179 switch (RuleBasedCollator.getTag(CE)) { 4180 case CollationElementIterator.CE_EXPANSION_TAG_: 4181 case CollationElementIterator.CE_DIGIT_TAG_: 4182 it.setText(UCharacter.toString(ch)); 4186 while((CE = it.next()) != CollationElementIterator.NULLORDER) { 4187 if(s.primShift < 0 || s.secShift < 0 || s.terShift < 0) { 4188 latinOneCEs_[ch] = BAIL_OUT_CE_; 4189 latinOneCEs_[latinOneTableLen_+ch] = BAIL_OUT_CE_; 4190 latinOneCEs_[2*latinOneTableLen_+ch] = BAIL_OUT_CE_; 4191 break; 4192 } 4193 addLatinOneEntry(ch, CE, s); 4194 } 4195 break; 4196 case CollationElementIterator.CE_CONTRACTION_TAG_: 4197 { 4203 if((CE & 0x00FFF000) != 0) { 4204 latinOneFailed_ = true; 4205 return false; 4206 } 4207 4208 int UCharOffset = (CE & 0xFFFFFF) - m_contractionOffset_; 4210 CE |= (contractionOffset & 0xFFF) << 12; 4212 latinOneCEs_[ch] = CE; 4213 latinOneCEs_[latinOneTableLen_+ch] = CE; 4214 latinOneCEs_[2*latinOneTableLen_+ch] = CE; 4215 4216 do { 4219 CE = m_contractionCE_[UCharOffset]; 4221 if(isSpecial(CE) 4222 && getTag(CE) 4223 == CollationElementIterator.CE_EXPANSION_TAG_) { 4224 int i; 4225 int offset = ((CE & 0xFFFFF0) >> 4) - m_expansionOffset_; int size = CE & 0xF; if(size != 0) { 4230 for(i = 0; i<size; i++) { 4231 if(s.primShift < 0 || s.secShift < 0 || s.terShift < 0) { 4232 latinOneCEs_[contractionOffset] = BAIL_OUT_CE_; 4233 latinOneCEs_[latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_; 4234 latinOneCEs_[2*latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_; 4235 break; 4236 } 4237 addLatinOneEntry(contractionOffset, m_expansion_[offset+i], s); 4238 } 4239 } else { 4240 while(m_expansion_[offset] != 0) { 4241 if(s.primShift < 0 || s.secShift < 0 || s.terShift < 0) { 4242 latinOneCEs_[contractionOffset] = BAIL_OUT_CE_; 4243 latinOneCEs_[latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_; 4244 latinOneCEs_[2*latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_; 4245 break; 4246 } 4247 addLatinOneEntry(contractionOffset, m_expansion_[offset++], s); 4248 } 4249 } 4250 contractionOffset++; 4251 } else if(!isSpecial(CE)) { 4252 addLatinOneEntry(contractionOffset++, CE, s); 4253 } else { 4254 latinOneCEs_[contractionOffset] = BAIL_OUT_CE_; 4255 latinOneCEs_[latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_; 4256 latinOneCEs_[2*latinOneTableLen_+contractionOffset] = BAIL_OUT_CE_; 4257 contractionOffset++; 4258 } 4259 UCharOffset++; 4260 s.primShift = 24; s.secShift = 24; s.terShift = 24; 4261 if(contractionOffset == latinOneTableLen_) { resizeLatinOneTable(2*latinOneTableLen_); 4263 } 4264 } while(m_contractionIndex_[UCharOffset] != 0xFFFF); 4265 } 4266 break; 4267 default: 4268 latinOneFailed_ = true; 4269 return false; 4270 } 4271 } 4272 } 4273 if(contractionOffset < latinOneTableLen_) { 4275 resizeLatinOneTable(contractionOffset); 4276 } 4277 return true; 4278 } 4279 4280 private class ContractionInfo { 4281 int index; 4282 } 4283 4284 ContractionInfo m_ContInfo_; 4285 4286 private int 4287 getLatinOneContraction(int strength, int CE, String s) { 4288 int len = s.length(); 4290 int UCharOffset = (CE & 0xFFF) - m_contractionOffset_; 4292 int offset = 1; 4293 int latinOneOffset = (CE & 0x00FFF000) >>> 12; 4294 char schar = 0, tchar = 0; 4295 4296 for(;;) { 4297 4306 if(m_ContInfo_.index == len) { 4307 return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset]); 4308 } else { 4309 schar = s.charAt(m_ContInfo_.index); 4310 } 4311 4313 while(schar > (tchar = m_contractionIndex_[UCharOffset+offset])) { 4314 offset++; 4315 } 4316 4317 if (schar == tchar) { 4318 m_ContInfo_.index++; 4319 return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset+offset]); 4320 } 4321 else 4322 { 4323 if(schar > ENDOFLATINONERANGE_ ) { 4324 return BAIL_OUT_CE_; 4325 } 4326 int isZeroCE = m_trie_.getLeadValue(schar); if(isZeroCE == 0) { m_ContInfo_.index++; 4330 continue; 4331 } 4332 4333 return(latinOneCEs_[strength*latinOneTableLen_+latinOneOffset]); 4334 } 4335 } 4336 } 4337 4338 4339 4348 private final int 4349 compareUseLatin1(String source, String target, int startOffset) 4350 { 4351 int sLen = source.length(); 4352 int tLen = target.length(); 4353 4354 int strength = getStrength(); 4355 4356 int sIndex = startOffset, tIndex = startOffset; 4357 char sChar = 0, tChar = 0; 4358 int sOrder=0, tOrder=0; 4359 4360 boolean endOfSource = false; 4361 4362 4364 boolean haveContractions = false; 4367 int offset = latinOneTableLen_; 4368 4369 primLoop: 4371 for(;;) { 4372 while(sOrder==0) { if(sIndex==sLen) { 4375 endOfSource = true; 4376 break; 4377 } 4378 sChar=source.charAt(sIndex++); if(sChar > ENDOFLATINONERANGE_) { return compareRegular(source, target, startOffset); 4383 } 4384 sOrder = latinOneCEs_[sChar]; 4385 if(isSpecial(sOrder)) { if(getTag(sOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) { 4389 m_ContInfo_.index = sIndex; 4390 sOrder = getLatinOneContraction(0, sOrder, source); 4391 sIndex = m_ContInfo_.index; 4392 haveContractions = true; } 4396 if(isSpecial(sOrder) ) { 4397 return compareRegular(source, target, startOffset); 4399 } 4400 } 4401 } 4402 4403 while(tOrder==0) { if(tIndex==tLen) { 4406 if(endOfSource) { 4407 break primLoop; 4408 } else { 4409 return 1; 4410 } 4411 } 4412 tChar=target.charAt(tIndex++); if(tChar > ENDOFLATINONERANGE_) { return compareRegular(source, target, startOffset); 4416 } 4417 tOrder = latinOneCEs_[tChar]; 4418 if(isSpecial(tOrder)) { 4419 if(getTag(tOrder) == CollationElementIterator.CE_CONTRACTION_TAG_) { 4421 m_ContInfo_.index = tIndex; 4422 tOrder = getLatinOneContraction(0, tOrder, target); 4423 tIndex = m_ContInfo_.index; 4424 haveContractions = true; 4425 } 4426 if(isSpecial(tOrder)) { 4427 return compareRegular(source, target, startOffset); 4429 } 4430 } 4431 } 4432 if(endOfSource) { return -1; 4434 } 4435 4436 if(sOrder == tOrder) { sOrder = 0; tOrder = 0; 4438 continue; 4439 } else { 4440 if(((sOrder^tOrder)&0xFF000000)!=0) { 4442 if(sOrder >>> 8 < tOrder >>> 8) { 4444 return -1; 4445 } else { 4446 return 1; 4447 } 4448 } 4451 4452 sOrder<<=8; 4454 tOrder<<=8; 4455 } 4456 } 4457 4458 if(strength >= SECONDARY) { 4462 endOfSource = false; 4465 4466 if(!m_isFrenchCollation_) { sIndex = startOffset; tIndex = startOffset; 4473 secLoop: 4474 for(;;) { 4475 while(sOrder==0) { 4476 if(sIndex==sLen) { 4477 endOfSource = true; 4478 break; 4479 } 4480 sChar=source.charAt(sIndex++); sOrder = latinOneCEs_[offset+sChar]; 4482 if(isSpecial(sOrder)) { 4483 m_ContInfo_.index = sIndex; 4484 sOrder = getLatinOneContraction(1, sOrder, source); 4485 sIndex = m_ContInfo_.index; 4486 } 4487 } 4488 4489 while(tOrder==0) { 4490 if(tIndex==tLen) { 4491 if(endOfSource) { 4492 break secLoop; 4493 } else { 4494 return 1; 4495 } 4496 } 4497 tChar=target.charAt(tIndex++); tOrder = latinOneCEs_[offset+tChar]; 4499 if(isSpecial(tOrder)) { 4500 m_ContInfo_.index = tIndex; 4501 tOrder = getLatinOneContraction(1, tOrder, target); 4502 tIndex = m_ContInfo_.index; 4503 } 4504 } 4505 if(endOfSource) { 4506 return -1; 4507 } 4508 4509 if(sOrder == tOrder) { 4510 sOrder = 0; tOrder = 0; 4511 continue; 4512 } else { 4513 if(((sOrder^tOrder)&0xFF000000)!=0) { 4515 if(sOrder >>> 8 < tOrder >>> 8) { 4516 return -1; 4517 } else { 4518 return 1; 4519 } 4520 } 4521 sOrder<<=8; 4522 tOrder<<=8; 4523 } 4524 } 4525 } else { if(haveContractions) { return compareRegular(source, target, startOffset); 4529 } 4530 sIndex = sLen; tIndex = tLen; 4532 secFLoop: 4533 for(;;) { 4534 while(sOrder==0) { 4535 if(sIndex==startOffset) { 4536 endOfSource = true; 4537 break; 4538 } 4539 sChar=source.charAt(--sIndex); sOrder = latinOneCEs_[offset+sChar]; 4541 } 4543 4544 while(tOrder==0) { 4545 if(tIndex==startOffset) { 4546 if(endOfSource) { 4547 break secFLoop; 4548 } else { 4549 return 1; 4550 } 4551 } 4552 tChar=target.charAt(--tIndex); tOrder = latinOneCEs_[offset+tChar]; 4554 } 4556 if(endOfSource) { 4557 return -1; 4558 } 4559 4560 if(sOrder == tOrder) { 4561 sOrder = 0; tOrder = 0; 4562 continue; 4563 } else { 4564 if(((sOrder^tOrder)&0xFF000000)!=0) { 4566 if(sOrder >>> 8 < tOrder >>> 8) { 4567 return -1; 4568 } else { 4569 return 1; 4570 } 4571 } 4572 sOrder<<=8; 4573 tOrder<<=8; 4574 } 4575 } 4576 } 4577 } 4578 4579 if(strength >= TERTIARY) { 4580 offset += latinOneTableLen_; 4582 sIndex = startOffset; tIndex = startOffset; 4584 endOfSource = false; 4585 for(;;) { 4586 while(sOrder==0) { 4587 if(sIndex==sLen) { 4588 endOfSource = true; 4589 break; 4590 } 4591 sChar=source.charAt(sIndex++); sOrder = latinOneCEs_[offset+sChar]; 4593 if(isSpecial(sOrder)) { 4594 m_ContInfo_.index = sIndex; 4595 sOrder = getLatinOneContraction(2, sOrder, source); 4596 sIndex = m_ContInfo_.index; 4597 } 4598 } 4599 while(tOrder==0) { 4600 if(tIndex==tLen) { 4601 if(endOfSource) { 4602 return 0; } else { 4604 return 1; 4605 } 4606 } 4607 tChar=target.charAt(tIndex++); tOrder = latinOneCEs_[offset+tChar]; 4609 if(isSpecial(tOrder)) { 4610 m_ContInfo_.index = tIndex; 4611 tOrder = getLatinOneContraction(2, tOrder, target); 4612 tIndex = m_ContInfo_.index; 4613 } 4614 } 4615 if(endOfSource) { 4616 return -1; 4617 } 4618 if(sOrder == tOrder) { 4619 sOrder = 0; tOrder = 0; 4620 continue; 4621 } else { 4622 if(((sOrder^tOrder)&0xff000000)!=0) { 4623 if(sOrder >>> 8 < tOrder >>> 8) { 4624 return -1; 4625 } else { 4626 return 1; 4627 } 4628 } 4629 sOrder<<=8; 4630 tOrder<<=8; 4631 } 4632 } 4633 } 4634 return 0; 4635 } 4636 4641 public VersionInfo getVersion() { 4642 4643 int rtVersion = VersionInfo.UCOL_RUNTIME_VERSION.getMajor(); 4644 4645 int bdVersion = m_version_.getMajor(); 4646 4647 4651 int csVersion = 0; 4652 4653 4654 int cmbVersion = ((rtVersion<<11) | (bdVersion<<6) | (csVersion)) & 0xFFFF; 4655 4656 4657 return VersionInfo.getInstance(cmbVersion>>8, 4658 cmbVersion & 0xFF, 4659 m_version_.getMinor(), 4660 UCA_.m_UCA_version_.getMajor()); 4661 4662 } 4667 4668 4673 public VersionInfo getUCAVersion() { 4674 return UCA_.m_UCA_version_; 4675 } 4676 4677 private transient boolean m_reallocLatinOneCEs_; 4678} 4679 | Popular Tags |