1 10 package com.ibm.icu.text; 11 12 16 import com.ibm.icu.impl.NormalizerImpl; 17 import com.ibm.icu.impl.UCharacterProperty; 18 import com.ibm.icu.impl.StringUCharacterIterator; 19 import com.ibm.icu.impl.CharacterIteratorWrapper; 20 import com.ibm.icu.impl.ICUDebug; 21 import com.ibm.icu.lang.UCharacter; 22 import java.text.CharacterIterator ; 23 import java.util.MissingResourceException ; 24 25 101 public final class CollationElementIterator 102 { 103 104 105 107 117 public final static int NULLORDER = 0xffffffff; 118 119 128 public static final int IGNORABLE = 0; 129 130 132 134 161 public int getOffset() 162 { 163 if (m_bufferOffset_ != -1) { 164 if (m_isForwards_) { 165 return m_FCDLimit_; 166 } 167 return m_FCDStart_; 168 } 169 return m_source_.getIndex(); 170 } 171 172 173 183 public int getMaxExpansion(int ce) 184 { 185 int start = 0; 186 int limit = m_collator_.m_expansionEndCE_.length; 187 long unsignedce = ce & 0xFFFFFFFFl; 188 while (start < limit - 1) { 189 int mid = start + ((limit - start) >> 1); 190 long midce = m_collator_.m_expansionEndCE_[mid] & 0xFFFFFFFFl; 191 if (unsignedce <= midce) { 192 limit = mid; 193 } 194 else { 195 start = mid; 196 } 197 } 198 int result = 1; 199 if (m_collator_.m_expansionEndCE_[start] == ce) { 200 result = m_collator_.m_expansionEndCEMaxSize_[start]; 201 } 202 else if (limit < m_collator_.m_expansionEndCE_.length && 203 m_collator_.m_expansionEndCE_[limit] == ce) { 204 result = m_collator_.m_expansionEndCEMaxSize_[limit]; 205 } 206 else if ((ce & 0xFFFF) == 0x00C0) { 207 result = 2; 208 } 209 return result; 210 } 211 212 214 225 public void reset() 226 { 227 m_source_.setToStart(); 228 updateInternalState(); 229 } 230 231 253 public int next() 254 { 255 m_isForwards_ = true; 256 if (m_CEBufferSize_ > 0) { 257 if (m_CEBufferOffset_ < m_CEBufferSize_) { 258 return m_CEBuffer_[m_CEBufferOffset_ ++]; 260 } 261 m_CEBufferSize_ = 0; 262 m_CEBufferOffset_ = 0; 263 } 264 265 int ch_int = nextChar(); 266 267 if (ch_int == UCharacterIterator.DONE) { 268 return NULLORDER; 269 } 270 char ch = (char)ch_int; 271 if (m_collator_.m_isHiragana4_) { 272 m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309e) 273 && !(ch > 0x3094 && ch < 0x309d); 274 } 275 276 int result = NULLORDER; 277 if (ch <= 0xFF) { 278 result = m_collator_.m_trie_.getLatin1LinearValue(ch); 282 if (RuleBasedCollator.isSpecial(result)) { 283 result = nextSpecial(m_collator_, result, ch); 284 } 285 } 286 else { 287 result = m_collator_.m_trie_.getLeadValue(ch); 288 if (RuleBasedCollator.isSpecial(result)) { 290 result = nextSpecial(m_collator_, result, ch); 292 } 293 if (result == CE_NOT_FOUND_ && RuleBasedCollator.UCA_ != null) { 294 result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch); 298 if (RuleBasedCollator.isSpecial(result)) { 299 result = nextSpecial(RuleBasedCollator.UCA_, result, ch); 301 } 302 } 303 } 304 if(result == CE_NOT_FOUND_) { 305 result = nextImplicit(ch); 307 } 308 return result; 309 } 310 311 333 public int previous() 334 { 335 if (m_source_.getIndex() <= 0 && m_isForwards_) { 336 m_source_.setToLimit(); 339 updateInternalState(); 340 } 341 m_isForwards_ = false; 342 int result = NULLORDER; 343 if (m_CEBufferSize_ > 0) { 344 if (m_CEBufferOffset_ > 0) { 345 return m_CEBuffer_[-- m_CEBufferOffset_]; 346 } 347 m_CEBufferSize_ = 0; 348 m_CEBufferOffset_ = 0; 349 } 350 int ch_int = previousChar(); 351 if (ch_int == UCharacterIterator.DONE) { 352 return NULLORDER; 353 } 354 char ch = (char)ch_int; 355 if (m_collator_.m_isHiragana4_) { 356 m_isCodePointHiragana_ = (ch >= 0x3040 && ch <= 0x309f); 357 } 358 if (m_collator_.isContractionEnd(ch) && !isBackwardsStart()) { 359 result = previousSpecial(m_collator_, CE_CONTRACTION_, ch); 360 } 361 else { 362 if (ch <= 0xFF) { 363 result = m_collator_.m_trie_.getLatin1LinearValue(ch); 364 } 365 else { 366 result = m_collator_.m_trie_.getLeadValue(ch); 367 } 368 if (RuleBasedCollator.isSpecial(result)) { 369 result = previousSpecial(m_collator_, result, ch); 370 } 371 if (result == CE_NOT_FOUND_) { 372 if (!isBackwardsStart() 373 && m_collator_.isContractionEnd(ch)) { 374 result = CE_CONTRACTION_; 375 } 376 else { 377 if(RuleBasedCollator.UCA_ != null) { 378 result = RuleBasedCollator.UCA_.m_trie_.getLeadValue(ch); 379 } 380 } 381 382 if (RuleBasedCollator.isSpecial(result)) { 383 if(RuleBasedCollator.UCA_ != null) { 384 result = previousSpecial(RuleBasedCollator.UCA_, result, ch); 385 } 386 } 387 } 388 } 389 if(result == CE_NOT_FOUND_) { 390 result = previousImplicit(ch); 391 } 392 return result; 393 } 394 395 402 public final static int primaryOrder(int ce) 403 { 404 return (ce & RuleBasedCollator.CE_PRIMARY_MASK_) 405 >>> RuleBasedCollator.CE_PRIMARY_SHIFT_; 406 } 407 414 public final static int secondaryOrder(int ce) 415 { 416 return (ce & RuleBasedCollator.CE_SECONDARY_MASK_) 417 >> RuleBasedCollator.CE_SECONDARY_SHIFT_; 418 } 419 420 427 public final static int tertiaryOrder(int ce) 428 { 429 return ce & RuleBasedCollator.CE_TERTIARY_MASK_; 430 } 431 432 454 public void setOffset(int offset) 455 { 456 m_source_.setIndex(offset); 457 int ch_int = m_source_.current(); 458 char ch = (char)ch_int; 459 if (ch_int != UCharacterIterator.DONE && m_collator_.isUnsafe(ch)) { 460 if (UTF16.isTrailSurrogate(ch)) { 463 char prevch = (char)m_source_.previous(); 465 if (!UTF16.isLeadSurrogate(prevch)) { 466 m_source_.setIndex(offset); } 468 } 469 else { 470 while (m_source_.getIndex() > 0) { 473 if (!m_collator_.isUnsafe(ch)) { 474 break; 475 } 476 ch = (char)m_source_.previous(); 477 } 478 updateInternalState(); 479 int prevoffset = 0; 480 while (m_source_.getIndex() <= offset) { 481 prevoffset = m_source_.getIndex(); 482 next(); 483 } 484 m_source_.setIndex(prevoffset); 485 } 486 } 487 updateInternalState(); 488 offset = m_source_.getIndex(); 491 if (offset == 0) { 492 m_isForwards_ = false; 495 } 496 else if (offset == m_source_.getLength()) { 497 m_isForwards_ = true; 500 } 501 } 502 503 510 public void setText(String source) 511 { 512 m_srcUtilIter_.setText(source); 513 m_source_ = m_srcUtilIter_; 514 updateInternalState(); 515 } 516 517 526 public void setText(UCharacterIterator source) 527 { 528 m_srcUtilIter_.setText(source.getText()); 529 m_source_ = m_srcUtilIter_; 530 updateInternalState(); 531 } 532 533 540 public void setText(CharacterIterator source) 541 { 542 m_source_ = new CharacterIteratorWrapper(source); 543 m_source_.setToStart(); 544 updateInternalState(); 545 } 546 547 549 557 public boolean equals(Object that) 558 { 559 if (that == this) { 560 return true; 561 } 562 if (that instanceof CollationElementIterator) { 563 CollationElementIterator thatceiter 564 = (CollationElementIterator)that; 565 if (!m_collator_.equals(thatceiter.m_collator_)) { 566 return false; 567 } 568 return m_source_.getIndex() == thatceiter.m_source_.getIndex() 570 && m_source_.getText().equals( 571 thatceiter.m_source_.getText()); 572 } 573 return false; 574 } 575 576 578 589 CollationElementIterator(String source, RuleBasedCollator collator) 590 { 591 m_srcUtilIter_ = new StringUCharacterIterator(source); 592 m_utilStringBuffer_ = new StringBuffer (); 593 m_source_ = m_srcUtilIter_; 594 m_collator_ = collator; 595 m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_]; 596 m_buffer_ = new StringBuffer (); 597 m_utilSpecialBackUp_ = new Backup(); 598 updateInternalState(); 599 } 600 601 612 CollationElementIterator(CharacterIterator source, 613 RuleBasedCollator collator) 614 { 615 m_srcUtilIter_ = new StringUCharacterIterator(); 616 m_utilStringBuffer_ = new StringBuffer (); 617 m_source_ = new CharacterIteratorWrapper(source); 618 m_collator_ = collator; 619 m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_]; 620 m_buffer_ = new StringBuffer (); 621 m_utilSpecialBackUp_ = new Backup(); 622 updateInternalState(); 623 } 624 625 636 CollationElementIterator(UCharacterIterator source, 637 RuleBasedCollator collator) 638 { 639 m_srcUtilIter_ = new StringUCharacterIterator(); 640 m_utilStringBuffer_ = new StringBuffer (); 641 m_srcUtilIter_.setText(source.getText()); 642 m_source_ = m_srcUtilIter_; 643 m_collator_ = collator; 644 m_CEBuffer_ = new int[CE_BUFFER_INIT_SIZE_]; 645 m_buffer_ = new StringBuffer (); 646 m_utilSpecialBackUp_ = new Backup(); 647 updateInternalState(); 648 } 649 650 652 655 boolean m_isCodePointHiragana_; 656 659 int m_FCDStart_; 660 668 int m_CEBufferOffset_; 669 670 676 int m_CEBufferSize_; 677 static final int CE_NOT_FOUND_ = 0xF0000000; 678 static final int CE_EXPANSION_TAG_ = 1; 679 static final int CE_CONTRACTION_TAG_ = 2; 680 683 static final int CE_DIGIT_TAG_ = 13; 684 685 687 692 void setCollator(RuleBasedCollator collator) 693 { 694 m_collator_ = collator; 695 updateInternalState(); 696 } 697 698 710 void setExactOffset(int offset) 711 { 712 m_source_.setIndex(offset); 713 updateInternalState(); 714 } 715 716 720 boolean isInBuffer() 721 { 722 return m_bufferOffset_ > 0; 723 } 724 725 726 740 void setText(UCharacterIterator source, int offset) 741 { 742 m_srcUtilIter_.setText(source.getText()); 743 m_source_ = m_srcUtilIter_; 744 m_source_.setIndex(offset); 745 updateInternalState(); 746 } 747 748 750 753 private static final class Backup 754 { 755 757 760 protected int m_FCDLimit_; 761 764 protected int m_FCDStart_; 765 768 protected boolean m_isCodePointHiragana_; 769 772 protected int m_bufferOffset_; 773 776 protected int m_offset_; 777 780 protected StringBuffer m_buffer_; 781 782 784 787 protected Backup() 788 { 789 m_buffer_ = new StringBuffer (); 790 } 791 } 792 794 797 private boolean m_isForwards_; 798 801 private UCharacterIterator m_source_; 802 805 private int m_bufferOffset_; 806 810 private StringBuffer m_buffer_; 811 814 private int m_FCDLimit_; 815 818 private RuleBasedCollator m_collator_; 819 822 private boolean m_isHiragana4_; 823 826 private int m_CEBuffer_[]; 827 835 private static final int CE_BUFFER_INIT_SIZE_ = 512; 836 839 private Backup m_utilSpecialBackUp_; 840 843 private Backup m_utilSpecialEntryBackUp_; 844 847 private Backup m_utilSpecialDiscontiguousBackUp_; 848 851 private StringUCharacterIterator m_srcUtilIter_; 852 private StringBuffer m_utilStringBuffer_; 853 private StringBuffer m_utilSkippedBuffer_; 854 private CollationElementIterator m_utilColEIter_; 855 858 private static final int FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0xC0; 859 863 private static final int LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ = 0x300; 864 867 private static final int LAST_BYTE_MASK_ = 0xFF; 868 871 private static final int SECOND_LAST_BYTE_SHIFT_ = 8; 872 873 875 private static final int CE_EXPANSION_ = 0xF1000000; 876 private static final int CE_CONTRACTION_ = 0xF2000000; 877 881 private static final int CE_NO_MORE_CES_ = 0x00010101; 882 private static final int CE_NO_MORE_CES_PRIMARY_ = 0x00010000; 883 private static final int CE_NO_MORE_CES_SECONDARY_ = 0x00000100; 884 private static final int CE_NO_MORE_CES_TERTIARY_ = 0x00000001; 885 886 private static final int CE_NOT_FOUND_TAG_ = 0; 887 890 private static final int CE_CHARSET_TAG_ = 4; 891 894 private static final int CE_HANGUL_SYLLABLE_TAG_ = 6; 895 898 private static final int CE_LEAD_SURROGATE_TAG_ = 7; 899 902 private static final int CE_TRAIL_SURROGATE_TAG_ = 8; 903 906 private static final int CE_CJK_IMPLICIT_TAG_ = 9; 907 private static final int CE_IMPLICIT_TAG_ = 10; 908 static final int CE_SPEC_PROC_TAG_ = 11; 909 914 private static final int CE_LONG_PRIMARY_TAG_ = 12; 915 916 private static final int CE_CE_TAGS_COUNT = 14; 917 private static final int CE_BYTE_COMMON_ = 0x05; 918 919 921 private static final int HANGUL_SBASE_ = 0xAC00; 922 private static final int HANGUL_LBASE_ = 0x1100; 923 private static final int HANGUL_VBASE_ = 0x1161; 924 private static final int HANGUL_TBASE_ = 0x11A7; 925 private static final int HANGUL_VCOUNT_ = 21; 926 private static final int HANGUL_TCOUNT_ = 28; 927 928 930 private static final int CJK_BASE_ = 0x4E00; 931 private static final int CJK_LIMIT_ = 0x9FFF+1; 932 private static final int CJK_COMPAT_USED_BASE_ = 0xFA0E; 933 private static final int CJK_COMPAT_USED_LIMIT_ = 0xFA2F + 1; 934 private static final int CJK_A_BASE_ = 0x3400; 935 private static final int CJK_A_LIMIT_ = 0x4DBF + 1; 936 private static final int CJK_B_BASE_ = 0x20000; 937 private static final int CJK_B_LIMIT_ = 0x2A6DF + 1; 938 private static final int NON_CJK_OFFSET_ = 0x110000; 939 940 private static final boolean DEBUG = ICUDebug.enabled("collator"); 941 942 944 947 private void updateInternalState() 948 { 949 m_isCodePointHiragana_ = false; 950 m_buffer_.setLength(0); 951 m_bufferOffset_ = -1; 952 m_CEBufferOffset_ = 0; 953 m_CEBufferSize_ = 0; 954 m_FCDLimit_ = -1; 955 m_FCDStart_ = m_source_.getLength(); 956 m_isHiragana4_ = m_collator_.m_isHiragana4_; 957 m_isForwards_ = true; 958 } 959 960 964 private void backupInternalState(Backup backup) 965 { 966 backup.m_offset_ = m_source_.getIndex(); 967 backup.m_FCDLimit_ = m_FCDLimit_; 968 backup.m_FCDStart_ = m_FCDStart_; 969 backup.m_isCodePointHiragana_ = m_isCodePointHiragana_; 970 backup.m_bufferOffset_ = m_bufferOffset_; 971 backup.m_buffer_.setLength(0); 972 if (m_bufferOffset_ >= 0) { 973 if(ICUDebug.isJDK14OrHigher){ 975 backup.m_buffer_.append(m_buffer_); 976 }else{ 977 backup.m_buffer_.append(m_buffer_.toString()); 978 } 979 } 980 } 981 982 986 private void updateInternalState(Backup backup) 987 { 988 m_source_.setIndex(backup.m_offset_); 989 m_isCodePointHiragana_ = backup.m_isCodePointHiragana_; 990 m_bufferOffset_ = backup.m_bufferOffset_; 991 m_FCDLimit_ = backup.m_FCDLimit_; 992 m_FCDStart_ = backup.m_FCDStart_; 993 m_buffer_.setLength(0); 994 if (m_bufferOffset_ >= 0) { 995 m_buffer_.append(backup.m_buffer_.toString()); 997 } 998 } 999 1000 1005 private int getCombiningClass(int ch) 1006 { 1007 if (ch >= LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ && 1008 m_collator_.isUnsafe((char)ch) || ch > 0xFFFF) { 1009 return NormalizerImpl.getCombiningClass(ch); 1010 } 1011 return 0; 1012 } 1013 1014 1021 private void normalize() 1022 { 1023 int size = m_FCDLimit_ - m_FCDStart_; 1024 m_buffer_.setLength(0); 1025 m_source_.setIndex(m_FCDStart_); 1026 for (int i = 0; i < size; i ++) { 1027 m_buffer_.append((char)m_source_.next()); 1028 } 1029 String decomp = Normalizer.decompose(m_buffer_.toString(), false); 1030 m_buffer_.setLength(0); 1031 m_buffer_.append(decomp); 1032 m_bufferOffset_ = 0; 1033 } 1034 1035 1052 private boolean FCDCheck(char ch, int offset) 1053 { 1054 boolean result = true; 1055 1056 m_FCDStart_ = offset; 1059 m_source_.setIndex(offset); 1060 char fcd = NormalizerImpl.getFCD16(ch); 1062 if (fcd != 0 && UTF16.isLeadSurrogate(ch)) { 1063 m_source_.next(); 1064 ch = (char)m_source_.current(); 1065 if (UTF16.isTrailSurrogate(ch)) { 1067 fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch); 1068 } else { 1069 fcd = 0; 1070 } 1071 } 1072 1073 int prevTrailCC = fcd & LAST_BYTE_MASK_; 1074 1075 if (prevTrailCC != 0) { 1076 while (true) { 1079 m_source_.next(); 1080 int ch_int = m_source_.current(); 1081 if (ch_int == UCharacterIterator.DONE) { 1082 break; 1083 } 1084 ch = (char)ch_int; 1085 fcd = NormalizerImpl.getFCD16(ch); 1087 if (fcd != 0 && UTF16.isLeadSurrogate(ch)) { 1088 m_source_.next(); 1089 ch = (char)m_source_.current(); 1090 if (UTF16.isTrailSurrogate(ch)) { 1091 fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, ch); 1092 } else { 1093 fcd = 0; 1094 } 1095 } 1096 int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_; 1097 if (leadCC == 0) { 1098 break; 1100 } 1101 1102 if (leadCC < prevTrailCC) { 1103 result = false; 1104 } 1105 1106 prevTrailCC = fcd & LAST_BYTE_MASK_; 1107 } 1108 } 1109 m_FCDLimit_ = m_source_.getIndex(); 1110 m_source_.setIndex(m_FCDStart_); 1111 m_source_.next(); 1112 return result; 1113 } 1114 1115 1121 private int nextChar() 1122 { 1123 int result; 1124 1125 if (m_bufferOffset_ < 0) { 1127 result = m_source_.current(); 1130 } 1131 else { 1132 if (m_bufferOffset_ >= m_buffer_.length()) { 1134 m_source_.setIndex(m_FCDLimit_); 1137 m_bufferOffset_ = -1; 1138 m_buffer_.setLength(0); 1139 return nextChar(); 1140 } 1141 return m_buffer_.charAt(m_bufferOffset_ ++); 1142 } 1143 int startoffset = m_source_.getIndex(); 1144 if (result < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_ 1145 || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION 1147 || m_bufferOffset_ >= 0 || m_FCDLimit_ > startoffset) { 1148 m_source_.next(); 1150 return result; 1151 } 1152 1153 if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) { 1154 m_source_.next(); 1157 int next = m_source_.current(); 1158 if (next == UCharacterIterator.DONE 1159 || next < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_) { 1160 return result; } 1163 } 1164 1165 if (!FCDCheck((char)result, startoffset)) { 1167 normalize(); 1168 result = m_buffer_.charAt(0); 1169 m_bufferOffset_ = 1; 1170 } 1171 return result; 1172 } 1173 1174 1180 private void normalizeBackwards() 1181 { 1182 normalize(); 1183 m_bufferOffset_ = m_buffer_.length(); 1184 } 1185 1186 1204 private boolean FCDCheckBackwards(char ch, int offset) 1205 { 1206 boolean result = true; 1207 char fcd = 0; 1208 m_FCDLimit_ = offset + 1; 1209 m_source_.setIndex(offset); 1210 if (!UTF16.isSurrogate(ch)) { 1211 fcd = NormalizerImpl.getFCD16(ch); 1212 } 1213 else if (UTF16.isTrailSurrogate(ch) && m_FCDLimit_ > 0) { 1214 char trailch = ch; 1216 ch = (char)m_source_.previous(); 1217 if (UTF16.isLeadSurrogate(ch)) { 1218 fcd = NormalizerImpl.getFCD16(ch); 1219 if (fcd != 0) { 1220 fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, 1221 trailch); 1222 } 1223 } 1224 else { 1225 fcd = 0; } 1227 } 1228 1229 int leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_; 1230 1233 while (leadCC != 0) { 1234 offset = m_source_.getIndex(); 1235 if (offset == 0) { 1236 break; 1237 } 1238 ch = (char)m_source_.previous(); 1239 if (!UTF16.isSurrogate(ch)) { 1240 fcd = NormalizerImpl.getFCD16(ch); 1241 } 1242 else if (UTF16.isTrailSurrogate(ch) && m_source_.getIndex() > 0) { 1243 char trail = ch; 1244 ch = (char)m_source_.previous(); 1245 if (UTF16.isLeadSurrogate(ch)) { 1246 fcd = NormalizerImpl.getFCD16(ch); 1247 } 1248 if (fcd != 0) { 1249 fcd = NormalizerImpl.getFCD16FromSurrogatePair(fcd, trail); 1250 } 1251 } 1252 else { 1253 fcd = 0; } 1255 int prevTrailCC = fcd & LAST_BYTE_MASK_; 1256 if (leadCC < prevTrailCC) { 1257 result = false; 1258 } 1259 leadCC = fcd >>> SECOND_LAST_BYTE_SHIFT_; 1260 } 1261 1262 if (fcd == 0) { 1265 m_FCDStart_ = offset; 1266 } 1267 else { 1268 m_FCDStart_ = m_source_.getIndex(); 1269 } 1270 m_source_.setIndex(m_FCDLimit_); 1271 return result; 1272 } 1273 1274 1280 private int previousChar() 1281 { 1282 if (m_bufferOffset_ >= 0) { 1283 m_bufferOffset_ --; 1284 if (m_bufferOffset_ >= 0) { 1285 return m_buffer_.charAt(m_bufferOffset_); 1286 } 1287 else { 1288 m_buffer_.setLength(0); 1290 if (m_FCDStart_ == 0) { 1291 m_FCDStart_ = -1; 1292 m_source_.setIndex(0); 1293 return UCharacterIterator.DONE; 1294 } 1295 else { 1296 m_FCDLimit_ = m_FCDStart_; 1297 m_source_.setIndex(m_FCDStart_); 1298 return previousChar(); 1299 } 1300 } 1301 } 1302 int result = m_source_.previous(); 1303 int startoffset = m_source_.getIndex(); 1304 if (result < LEAD_ZERO_COMBINING_CLASS_FAST_LIMIT_ 1305 || m_collator_.getDecomposition() == Collator.NO_DECOMPOSITION 1306 || m_FCDStart_ <= startoffset || m_source_.getIndex() == 0) { 1307 return result; 1308 } 1309 int ch = m_source_.previous(); 1310 if (ch < FULL_ZERO_COMBINING_CLASS_FAST_LIMIT_) { 1311 m_source_.next(); 1313 return result; 1314 } 1315 if (!FCDCheckBackwards((char)result, startoffset)) { 1317 normalizeBackwards(); 1318 m_bufferOffset_ --; 1319 result = m_buffer_.charAt(m_bufferOffset_); 1320 } 1321 else { 1322 m_source_.setIndex(startoffset); 1324 } 1325 return result; 1326 } 1327 1328 1332 private final boolean isBackwardsStart() 1333 { 1334 return (m_bufferOffset_ < 0 && m_source_.getIndex() == 0) 1335 || (m_bufferOffset_ == 0 && m_FCDStart_ <= 0); 1336 } 1337 1338 1342 private final boolean isEnd() 1343 { 1344 if (m_bufferOffset_ >= 0) { 1345 if (m_bufferOffset_ != m_buffer_.length()) { 1346 return false; 1347 } 1348 else { 1349 return m_FCDLimit_ == m_source_.getLength(); 1351 } 1352 } 1353 return m_source_.getLength() == m_source_.getIndex(); 1354 } 1355 1356 1367 private final int nextSurrogate(RuleBasedCollator collator, int ce, 1368 char trail) 1369 { 1370 if (!UTF16.isTrailSurrogate(trail)) { 1371 updateInternalState(m_utilSpecialBackUp_); 1372 return IGNORABLE; 1373 } 1374 int result = collator.m_trie_.getTrailValue(ce, trail); 1377 if (result == CE_NOT_FOUND_) { 1378 updateInternalState(m_utilSpecialBackUp_); 1379 } 1380 return result; 1381 } 1382 1383 1389 private int getExpansionOffset(RuleBasedCollator collator, int ce) 1390 { 1391 return ((ce & 0xFFFFF0) >> 4) - collator.m_expansionOffset_; 1392 } 1393 1394 1395 1401 private int getContractionOffset(RuleBasedCollator collator, int ce) 1402 { 1403 return (ce & 0xFFFFFF) - collator.m_contractionOffset_; 1404 } 1405 1406 1411 private boolean isSpecialPrefixTag(int ce) 1412 { 1413 return RuleBasedCollator.isSpecial(ce) && 1414 RuleBasedCollator.getTag(ce) == CE_SPEC_PROC_TAG_; 1415 } 1416 1417 1430 private int nextSpecialPrefix(RuleBasedCollator collator, int ce, 1431 Backup entrybackup) 1432 { 1433 backupInternalState(m_utilSpecialBackUp_); 1434 updateInternalState(entrybackup); 1435 previousChar(); 1436 1438 while (true) { 1439 int entryoffset = getContractionOffset(collator, ce); 1444 int offset = entryoffset; 1445 if (isBackwardsStart()) { 1446 ce = collator.m_contractionCE_[offset]; 1447 break; 1448 } 1449 char previous = (char)previousChar(); 1450 while (previous > collator.m_contractionIndex_[offset]) { 1451 offset ++; 1453 } 1454 1455 if (previous == collator.m_contractionIndex_[offset]) { 1456 ce = collator.m_contractionCE_[offset]; 1459 } 1460 else { 1461 ce = collator.m_contractionCE_[entryoffset]; 1463 } 1464 1465 if (!isSpecialPrefixTag(ce)) { 1466 break; 1472 } 1473 } 1474 if (ce != CE_NOT_FOUND_) { 1475 updateInternalState(m_utilSpecialBackUp_); 1477 } 1478 else { updateInternalState(entrybackup); 1481 } 1482 return ce; 1483 } 1484 1485 1490 private boolean isContractionTag(int ce) 1491 { 1492 return RuleBasedCollator.isSpecial(ce) && 1493 RuleBasedCollator.getTag(ce) == CE_CONTRACTION_TAG_; 1494 } 1495 1496 1509 private void setDiscontiguous(StringBuffer skipped) 1510 { 1511 if (m_bufferOffset_ >= 0) { 1512 m_buffer_.replace(0, m_bufferOffset_, skipped.toString()); 1513 } 1514 else { 1515 m_FCDLimit_ = m_source_.getIndex(); 1516 m_buffer_.setLength(0); 1517 m_buffer_.append(skipped.toString()); 1518 } 1519 1520 m_bufferOffset_ = 0; 1521 } 1522 1523 1527 private int currentChar() 1528 { 1529 if (m_bufferOffset_ < 0) { 1530 m_source_.previous(); 1531 return m_source_.next(); 1532 } 1533 1534 return m_buffer_.charAt(m_bufferOffset_ - 1); 1538 } 1539 1540 1549 private int nextDiscontiguous(RuleBasedCollator collator, int entryoffset) 1550 { 1551 int offset = entryoffset; 1552 boolean multicontraction = false; 1553 if (m_utilSkippedBuffer_ == null) { 1555 m_utilSkippedBuffer_ = new StringBuffer (); 1556 } 1557 else { 1558 m_utilSkippedBuffer_.setLength(0); 1559 } 1560 char ch = (char)currentChar(); 1561 m_utilSkippedBuffer_.append((char)currentChar()); 1562 if (m_utilSpecialDiscontiguousBackUp_ == null) { 1564 m_utilSpecialDiscontiguousBackUp_ = new Backup(); 1565 } 1566 backupInternalState(m_utilSpecialDiscontiguousBackUp_); 1567 char nextch = ch; 1568 while (true) { 1569 ch = nextch; 1570 int ch_int = nextChar(); 1571 nextch = (char)ch_int; 1572 if (ch_int == UCharacterIterator.DONE 1573 || getCombiningClass(nextch) == 0) { 1574 if (multicontraction) { 1578 if (ch_int != UCharacterIterator.DONE) { 1579 previousChar(); } 1581 setDiscontiguous(m_utilSkippedBuffer_); 1582 return collator.m_contractionCE_[offset]; 1583 } 1584 break; 1585 } 1586 1587 offset ++; while (nextch > collator.m_contractionIndex_[offset]) { 1589 offset ++; 1590 } 1591 1592 int ce = CE_NOT_FOUND_; 1593 if (nextch != collator.m_contractionIndex_[offset] 1594 || getCombiningClass(nextch) == getCombiningClass(ch)) { 1595 m_utilSkippedBuffer_.append(nextch); 1597 continue; 1598 } 1599 else { 1600 ce = collator.m_contractionCE_[offset]; 1601 } 1602 1603 if (ce == CE_NOT_FOUND_) { 1604 break; 1605 } 1606 else if (isContractionTag(ce)) { 1607 offset = getContractionOffset(collator, ce); 1609 if (collator.m_contractionCE_[offset] != CE_NOT_FOUND_) { 1610 multicontraction = true; 1611 backupInternalState(m_utilSpecialDiscontiguousBackUp_); 1612 } 1613 } 1614 else { 1615 setDiscontiguous(m_utilSkippedBuffer_); 1616 return ce; 1617 } 1618 } 1619 1620 updateInternalState(m_utilSpecialDiscontiguousBackUp_); 1621 previousChar(); 1624 return collator.m_contractionCE_[entryoffset]; 1625 } 1626 1627 1634 private int nextContraction(RuleBasedCollator collator, int ce) 1635 { 1636 backupInternalState(m_utilSpecialBackUp_); 1637 int entryce = collator.m_contractionCE_[getContractionOffset(collator, ce)]; while (true) { 1639 int entryoffset = getContractionOffset(collator, ce); 1640 int offset = entryoffset; 1641 1642 if (isEnd()) { 1643 ce = collator.m_contractionCE_[offset]; 1644 if (ce == CE_NOT_FOUND_) { 1645 ce = entryce; 1648 updateInternalState(m_utilSpecialBackUp_); 1649 } 1650 break; 1651 } 1652 1653 int maxCC = (collator.m_contractionIndex_[offset] & 0xFF); 1655 byte allSame = (byte)(collator.m_contractionIndex_[offset] >> 8); 1657 char ch = (char)nextChar(); 1658 offset ++; 1659 while (ch > collator.m_contractionIndex_[offset]) { 1660 offset ++; 1662 } 1663 1664 if (ch == collator.m_contractionIndex_[offset]) { 1665 ce = collator.m_contractionCE_[offset]; 1668 } 1669 else { 1670 int miss = ch; 1673 if(UTF16.isLeadSurrogate(ch)) { miss = UCharacterProperty.getRawSupplementary(ch, (char) nextChar()); 1676 } 1677 int sCC; 1678 if (maxCC == 0 || (sCC = getCombiningClass(miss)) == 0 1679 || sCC > maxCC || (allSame != 0 && sCC == maxCC) || 1680 isEnd()) { 1681 previousChar(); 1683 if(miss > 0xFFFF) { 1684 previousChar(); 1685 } 1686 ce = collator.m_contractionCE_[entryoffset]; 1687 } 1688 else { 1689 int ch_int = nextChar(); 1692 if (ch_int != UCharacterIterator.DONE) { 1693 previousChar(); 1694 } 1695 char nextch = (char)ch_int; 1696 if (getCombiningClass(nextch) == 0) { 1697 previousChar(); 1698 if(miss > 0xFFFF) { 1699 previousChar(); 1700 } 1701 ce = collator.m_contractionCE_[entryoffset]; 1703 } 1704 else { 1705 ce = nextDiscontiguous(collator, entryoffset); 1706 } 1707 } 1708 } 1709 1710 if (ce == CE_NOT_FOUND_) { 1711 updateInternalState(m_utilSpecialBackUp_); 1713 ce = entryce; 1714 break; 1715 } 1716 1717 if (!isContractionTag(ce)) { 1719 break; 1720 } 1721 1722 if (collator.m_contractionCE_[entryoffset] != CE_NOT_FOUND_) { 1724 entryce = collator.m_contractionCE_[entryoffset]; 1728 backupInternalState(m_utilSpecialBackUp_); 1729 if (m_utilSpecialBackUp_.m_bufferOffset_ >= 0) { 1730 m_utilSpecialBackUp_.m_bufferOffset_ --; 1731 } 1732 else { 1733 m_utilSpecialBackUp_.m_offset_ --; 1734 } 1735 } 1736 } 1737 return ce; 1738 } 1739 1740 1746 private int nextLongPrimary(int ce) 1747 { 1748 m_CEBuffer_[1] = ((ce & 0xFF) << 24) 1749 | RuleBasedCollator.CE_CONTINUATION_MARKER_; 1750 m_CEBufferOffset_ = 1; 1751 m_CEBufferSize_ = 2; 1752 m_CEBuffer_[0] = ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | 1753 CE_BYTE_COMMON_; 1754 return m_CEBuffer_[0]; 1755 } 1756 1757 1762 private int getExpansionCount(int ce) 1763 { 1764 return ce & 0xF; 1765 } 1766 1767 1774 private int nextExpansion(RuleBasedCollator collator, int ce) 1775 { 1776 int offset = getExpansionOffset(collator, ce); 1780 m_CEBufferSize_ = getExpansionCount(ce); 1781 m_CEBufferOffset_ = 1; 1782 m_CEBuffer_[0] = collator.m_expansion_[offset]; 1783 if (m_CEBufferSize_ != 0) { 1784 for (int i = 1; i < m_CEBufferSize_; i ++) { 1786 m_CEBuffer_[i] = collator.m_expansion_[offset + i]; 1787 } 1788 } 1789 else { 1790 m_CEBufferSize_ = 1; 1792 while (collator.m_expansion_[offset] != 0) { 1793 m_CEBuffer_[m_CEBufferSize_ ++] = 1794 collator.m_expansion_[++ offset]; 1795 } 1796 } 1797 if (m_CEBufferSize_ == 1) { 1800 m_CEBufferSize_ = 0; 1801 m_CEBufferOffset_ = 0; 1802 } 1803 return m_CEBuffer_[0]; 1804 } 1805 1806 1813 private int nextDigit(RuleBasedCollator collator, int ce, int cp) 1814 { 1815 1819 if (m_collator_.m_isNumericCollation_){ 1820 int collateVal = 0; 1821 int trailingZeroIndex = 0; 1822 boolean nonZeroValReached = false; 1823 1824 m_utilStringBuffer_.setLength(3); 1829 1830 int digVal = UCharacter.digit(cp); 1835 int digIndx = 1; 1842 for (;;) { 1843 if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) { 1845 m_utilStringBuffer_.setLength(m_utilStringBuffer_.length() 1846 << 1); 1847 } 1848 if (digVal != 0 || nonZeroValReached) { 1850 if (digVal != 0 && !nonZeroValReached) { 1851 nonZeroValReached = true; 1852 } 1853 if (digIndx % 2 == 1) { 1865 collateVal += digVal; 1866 if (collateVal == 0 && trailingZeroIndex == 0) { 1868 trailingZeroIndex = ((digIndx - 1) >>> 1) + 2; 1869 } 1870 else if (trailingZeroIndex != 0) { 1871 trailingZeroIndex = 0; 1872 } 1873 m_utilStringBuffer_.setCharAt( 1874 ((digIndx - 1) >>> 1) + 2, 1875 (char)((collateVal << 1) + 6)); 1876 collateVal = 0; 1877 } 1878 else { 1879 collateVal = digVal * 10; 1883 m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2, 1884 (char)((collateVal << 1) + 6)); 1885 } 1886 digIndx ++; 1887 } 1888 1889 if (!isEnd()){ 1891 backupInternalState(m_utilSpecialBackUp_); 1892 int char32 = nextChar(); 1893 char ch = (char)char32; 1894 if (UTF16.isLeadSurrogate(ch)){ 1895 if (!isEnd()) { 1896 char trail = (char)nextChar(); 1897 if (UTF16.isTrailSurrogate(trail)) { 1898 char32 = UCharacterProperty.getRawSupplementary( 1899 ch, trail); 1900 } 1901 else { 1902 goBackOne(); 1903 } 1904 } 1905 } 1906 1907 digVal = UCharacter.digit(char32); 1908 if (digVal == -1) { 1909 updateInternalState(m_utilSpecialBackUp_); 1913 break; 1914 } 1915 } 1916 else { 1917 break; 1918 } 1919 } 1920 1921 if (nonZeroValReached == false){ 1922 digIndx = 2; 1923 m_utilStringBuffer_.setCharAt(2, (char)6); 1924 } 1925 1926 int endIndex = trailingZeroIndex != 0 ? trailingZeroIndex 1927 : (digIndx >>> 1) + 2; 1928 if (digIndx % 2 != 0){ 1929 for (int i = 2; i < endIndex; i ++){ 1938 m_utilStringBuffer_.setCharAt(i, 1939 (char)((((((m_utilStringBuffer_.charAt(i) - 6) >>> 1) 1940 % 10) * 10) 1941 + (((m_utilStringBuffer_.charAt(i + 1) - 6) 1942 >>> 1) / 10) << 1) + 6)); 1943 } 1944 -- digIndx; 1945 } 1946 1947 m_utilStringBuffer_.setCharAt(endIndex - 1, 1949 (char)(m_utilStringBuffer_.charAt(endIndex - 1) - 1)); 1950 1951 m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER); 1956 m_utilStringBuffer_.setCharAt(1, 1957 (char)(0x80 + ((digIndx >>> 1) & 0x7F))); 1958 1959 ce = (((m_utilStringBuffer_.charAt(0) << 8) 1962 | m_utilStringBuffer_.charAt(1)) 1964 << RuleBasedCollator.CE_PRIMARY_SHIFT_) 1965 | (RuleBasedCollator.BYTE_COMMON_ 1967 << RuleBasedCollator.CE_SECONDARY_SHIFT_) 1968 | RuleBasedCollator.BYTE_COMMON_; int i = 2; 1971 m_CEBuffer_[0] = ce; 1972 m_CEBufferSize_ = 1; 1973 m_CEBufferOffset_ = 1; 1974 while (i < endIndex) 1975 { 1976 int primWeight = m_utilStringBuffer_.charAt(i ++) << 8; 1977 if (i < endIndex) { 1978 primWeight |= m_utilStringBuffer_.charAt(i ++); 1979 } 1980 m_CEBuffer_[m_CEBufferSize_ ++] 1981 = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_) 1982 | RuleBasedCollator.CE_CONTINUATION_MARKER_; 1983 } 1984 return ce; 1985 } 1986 1987 return collator.m_expansion_[getExpansionOffset(collator, ce)]; 1991 } 1992 1993 1998 private int nextImplicit(int codepoint) 1999 { 2000 if (!UCharacter.isLegal(codepoint)) { 2001 return IGNORABLE; 2004 } 2005 int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint); 2006 m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_) 2007 | 0x00000505; 2008 m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0; 2009 m_CEBufferOffset_ = 1; 2010 m_CEBufferSize_ = 2; 2011 return m_CEBuffer_[0]; 2012 } 2013 2014 2019 private int nextSurrogate(char ch) 2020 { 2021 int ch_int = nextChar(); 2022 char nextch = (char)ch_int; 2023 if (ch_int != CharacterIterator.DONE && 2024 UTF16.isTrailSurrogate(nextch)) { 2025 int codepoint = UCharacterProperty.getRawSupplementary(ch, nextch); 2026 return nextImplicit(codepoint); 2027 } 2028 if (nextch != CharacterIterator.DONE) { 2029 previousChar(); } 2031 return IGNORABLE; } 2033 2034 2041 private int nextHangul(RuleBasedCollator collator, char ch) 2042 { 2043 char L = (char)(ch - HANGUL_SBASE_); 2044 2045 char T = (char)(L % HANGUL_TCOUNT_); 2049 L /= HANGUL_TCOUNT_; 2050 char V = (char)(L % HANGUL_VCOUNT_); 2051 L /= HANGUL_VCOUNT_; 2052 2053 L += HANGUL_LBASE_; 2055 V += HANGUL_VBASE_; 2056 T += HANGUL_TBASE_; 2057 2058 m_CEBufferSize_ = 0; 2061 if (!collator.m_isJamoSpecial_) { m_CEBuffer_[m_CEBufferSize_ ++] = 2063 collator.m_trie_.getLeadValue(L); 2064 m_CEBuffer_[m_CEBufferSize_ ++] = 2065 collator.m_trie_.getLeadValue(V); 2066 2067 if (T != HANGUL_TBASE_) { 2068 m_CEBuffer_[m_CEBufferSize_ ++] = 2069 collator.m_trie_.getLeadValue(T); 2070 } 2071 m_CEBufferOffset_ = 1; 2072 return m_CEBuffer_[0]; 2073 } 2074 else { 2075 m_buffer_.append((char)L); 2081 m_buffer_.append((char)V); 2082 if (T != HANGUL_TBASE_) { 2083 m_buffer_.append((char)T); 2084 } 2085 m_FCDLimit_ = m_source_.getIndex(); 2086 m_FCDStart_ = m_FCDLimit_ - 1; 2087 return IGNORABLE; 2090 } 2091 } 2092 2093 2100 private int nextSpecial(RuleBasedCollator collator, int ce, char ch) 2101 { 2102 int codepoint = ch; 2103 Backup entrybackup = m_utilSpecialEntryBackUp_; 2104 if (entrybackup != null) { 2106 m_utilSpecialEntryBackUp_ = null; 2107 } 2108 else { 2109 entrybackup = new Backup(); 2110 } 2111 backupInternalState(entrybackup); 2112 try { while (true) { 2114 switch(RuleBasedCollator.getTag(ce)) { 2117 case CE_NOT_FOUND_TAG_: 2118 return ce; 2120 case RuleBasedCollator.CE_SURROGATE_TAG_: 2121 if (isEnd()) { 2122 return IGNORABLE; 2123 } 2124 backupInternalState(m_utilSpecialBackUp_); 2125 char trail = (char)nextChar(); 2126 ce = nextSurrogate(collator, ce, trail); 2127 codepoint = 2130 UCharacterProperty.getRawSupplementary(ch, trail); 2131 break; 2132 case CE_SPEC_PROC_TAG_: 2133 ce = nextSpecialPrefix(collator, ce, entrybackup); 2134 break; 2135 case CE_CONTRACTION_TAG_: 2136 ce = nextContraction(collator, ce); 2137 break; 2138 case CE_LONG_PRIMARY_TAG_: 2139 return nextLongPrimary(ce); 2140 case CE_EXPANSION_TAG_: 2141 return nextExpansion(collator, ce); 2142 case CE_DIGIT_TAG_: 2143 ce = nextDigit(collator, ce, codepoint); 2144 break; 2145 case CE_CJK_IMPLICIT_TAG_: 2147 return nextImplicit(codepoint); 2149 case CE_IMPLICIT_TAG_: return nextImplicit(codepoint); 2151 case CE_TRAIL_SURROGATE_TAG_: 2152 return IGNORABLE; case CE_LEAD_SURROGATE_TAG_: return nextSurrogate(ch); 2155 case CE_HANGUL_SYLLABLE_TAG_: return nextHangul(collator, ch); 2157 case CE_CHARSET_TAG_: 2158 return CE_NOT_FOUND_; 2160 default: 2161 ce = IGNORABLE; 2162 } 2164 if (!RuleBasedCollator.isSpecial(ce)) { 2165 break; 2166 } 2167 } 2168 } 2169 finally { 2170 m_utilSpecialEntryBackUp_ = entrybackup; 2171 } 2172 return ce; 2173 } 2174 2175 2186 private int previousSpecialPrefix(RuleBasedCollator collator, int ce) 2187 { 2188 backupInternalState(m_utilSpecialBackUp_); 2189 while (true) { 2190 int offset = getContractionOffset(collator, ce); 2192 int entryoffset = offset; 2193 if (isBackwardsStart()) { 2194 ce = collator.m_contractionCE_[offset]; 2195 break; 2196 } 2197 char prevch = (char)previousChar(); 2198 while (prevch > collator.m_contractionIndex_[offset]) { 2199 offset ++; 2202 } 2203 if (prevch == collator.m_contractionIndex_[offset]) { 2204 ce = collator.m_contractionCE_[offset]; 2205 } 2206 else { 2207 int isZeroCE = collator.m_trie_.getLeadValue(prevch); 2214 if (isZeroCE == 0) { 2216 continue; 2217 } 2218 else if (UTF16.isTrailSurrogate(prevch) 2219 || UTF16.isLeadSurrogate(prevch)) { 2220 if (!isBackwardsStart()) { 2227 char lead = (char)previousChar(); 2228 if (UTF16.isLeadSurrogate(lead)) { 2229 isZeroCE = collator.m_trie_.getLeadValue(lead); 2230 if (RuleBasedCollator.getTag(isZeroCE) 2231 == RuleBasedCollator.CE_SURROGATE_TAG_) { 2232 int finalCE = collator.m_trie_.getTrailValue( 2233 isZeroCE, 2234 prevch); 2235 if (finalCE == 0) { 2236 continue; 2239 } 2240 } 2241 } 2242 else { 2243 nextChar(); continue; 2246 } 2247 nextChar(); } 2249 else { 2250 continue; 2252 } 2253 } 2254 2255 ce = collator.m_contractionCE_[entryoffset]; 2257 } 2258 2259 if (!isSpecialPrefixTag(ce)) { 2260 break; 2264 } 2265 } 2266 updateInternalState(m_utilSpecialBackUp_); 2267 return ce; 2268 } 2269 2270 2280 private int previousContraction(RuleBasedCollator collator, int ce, char ch) 2281 { 2282 m_utilStringBuffer_.setLength(0); 2283 char prevch = (char)previousChar(); 2286 boolean atStart = false; 2287 while (collator.isUnsafe(ch)) { 2290 m_utilStringBuffer_.insert(0, ch); 2291 ch = prevch; 2292 if (isBackwardsStart()) { 2293 atStart = true; 2294 break; 2295 } 2296 prevch = (char)previousChar(); 2297 } 2298 if (!atStart) { 2299 nextChar(); 2301 } 2302 m_utilStringBuffer_.insert(0, ch); 2304 2305 int originaldecomp = collator.getDecomposition(); 2310 collator.setDecomposition(Collator.NO_DECOMPOSITION); 2312 if (m_utilColEIter_ == null) { 2313 m_utilColEIter_ = new CollationElementIterator( 2314 m_utilStringBuffer_.toString(), 2315 collator); 2316 } 2317 else { 2318 m_utilColEIter_.m_collator_ = collator; 2319 m_utilColEIter_.setText(m_utilStringBuffer_.toString()); 2320 } 2321 ce = m_utilColEIter_.next(); 2322 m_CEBufferSize_ = 0; 2323 while (ce != NULLORDER) { 2324 if (m_CEBufferSize_ == m_CEBuffer_.length) { 2325 try { 2326 int tempbuffer[] = new int[m_CEBuffer_.length + 50]; 2328 System.arraycopy(m_CEBuffer_, 0, tempbuffer, 0, 2329 m_CEBuffer_.length); 2330 m_CEBuffer_ = tempbuffer; 2331 } 2332 catch( MissingResourceException e) 2333 { 2334 throw e; 2335 } 2336 catch (Exception e) { 2337 if(DEBUG){ 2338 e.printStackTrace(); 2339 } 2340 return NULLORDER; 2341 } 2342 } 2343 m_CEBuffer_[m_CEBufferSize_ ++] = ce; 2344 ce = m_utilColEIter_.next(); 2345 } 2346 collator.setDecomposition(originaldecomp); 2347 m_CEBufferOffset_ = m_CEBufferSize_ - 1; 2348 return m_CEBuffer_[m_CEBufferOffset_]; 2349 } 2350 2351 2356 private int previousLongPrimary(int ce) 2357 { 2358 m_CEBufferSize_ = 0; 2359 m_CEBuffer_[m_CEBufferSize_ ++] = 2360 ((ce & 0xFFFF00) << 8) | (CE_BYTE_COMMON_ << 8) | CE_BYTE_COMMON_; 2361 m_CEBuffer_[m_CEBufferSize_ ++] = ((ce & 0xFF) << 24) 2362 | RuleBasedCollator.CE_CONTINUATION_MARKER_; 2363 m_CEBufferOffset_ = m_CEBufferSize_ - 1; 2364 return m_CEBuffer_[m_CEBufferOffset_]; 2365 } 2366 2367 2373 private int previousExpansion(RuleBasedCollator collator, int ce) 2374 { 2375 int offset = getExpansionOffset(collator, ce); 2377 m_CEBufferSize_ = getExpansionCount(ce); 2378 if (m_CEBufferSize_ != 0) { 2379 for (int i = 0; i < m_CEBufferSize_; i ++) { 2381 m_CEBuffer_[i] = collator.m_expansion_[offset + i]; 2382 } 2383 2384 } 2385 else { 2386 while (collator.m_expansion_[offset + m_CEBufferSize_] != 0) { 2388 m_CEBuffer_[m_CEBufferSize_] = 2389 collator.m_expansion_[offset + m_CEBufferSize_]; 2390 m_CEBufferSize_ ++; 2391 } 2392 } 2393 m_CEBufferOffset_ = m_CEBufferSize_ - 1; 2394 return m_CEBuffer_[m_CEBufferOffset_]; 2395 } 2396 2397 2404 private int previousDigit(RuleBasedCollator collator, int ce, char ch) 2405 { 2406 if (m_collator_.m_isNumericCollation_){ 2409 int leadingZeroIndex = 0; 2410 int collateVal = 0; 2411 boolean nonZeroValReached = false; 2412 2413 m_utilStringBuffer_.setLength(3); 2415 2416 int char32 = ch; 2420 if (UTF16.isTrailSurrogate(ch)) { 2421 if (!isBackwardsStart()){ 2422 char lead = (char)previousChar(); 2423 if (UTF16.isLeadSurrogate(lead)) { 2424 char32 = UCharacterProperty.getRawSupplementary(lead, 2425 ch); 2426 } 2427 else { 2428 goForwardOne(); 2429 } 2430 } 2431 } 2432 int digVal = UCharacter.digit(char32); 2433 int digIndx = 0; 2434 for (;;) { 2435 if (digIndx >= ((m_utilStringBuffer_.length() - 2) << 1)) { 2437 m_utilStringBuffer_.setLength(m_utilStringBuffer_.length() 2438 << 1); 2439 } 2440 if (digVal != 0 || nonZeroValReached) { 2442 if (digVal != 0 && !nonZeroValReached) { 2443 nonZeroValReached = true; 2444 } 2445 2446 2460 if (digIndx % 2 == 1){ 2461 collateVal += digVal * 10; 2462 2463 if (collateVal == 0 && leadingZeroIndex == 0) { 2465 leadingZeroIndex = ((digIndx - 1) >>> 1) + 2; 2466 } 2467 else if (leadingZeroIndex != 0) { 2468 leadingZeroIndex = 0; 2469 } 2470 2471 m_utilStringBuffer_.setCharAt(((digIndx - 1) >>> 1) + 2, 2472 (char)((collateVal << 1) + 6)); 2473 collateVal = 0; 2474 } 2475 else { 2476 collateVal = digVal; 2477 } 2478 } 2479 digIndx ++; 2480 2481 if (!isBackwardsStart()){ 2482 backupInternalState(m_utilSpecialBackUp_); 2483 char32 = previousChar(); 2484 ch = (char)ch; 2485 if (UTF16.isTrailSurrogate(ch)){ 2486 if (!isBackwardsStart()) { 2487 char lead = (char)previousChar(); 2488 if (UTF16.isLeadSurrogate(lead)) { 2489 char32 2490 = UCharacterProperty.getRawSupplementary( 2491 lead, ch); 2492 } 2493 else { 2494 updateInternalState(m_utilSpecialBackUp_); 2495 } 2496 } 2497 } 2498 2499 digVal = UCharacter.digit(char32); 2500 if (digVal == -1) { 2501 updateInternalState(m_utilSpecialBackUp_); 2502 break; 2503 } 2504 } 2505 else { 2506 break; 2507 } 2508 } 2509 2510 if (nonZeroValReached == false) { 2511 digIndx = 2; 2512 m_utilStringBuffer_.setCharAt(2, (char)6); 2513 } 2514 2515 if (digIndx % 2 != 0) { 2516 if (collateVal == 0 && leadingZeroIndex == 0) { 2517 leadingZeroIndex = ((digIndx - 1) >>> 1) + 2; 2520 } 2521 else { 2522 m_utilStringBuffer_.setCharAt((digIndx >>> 1) + 2, 2524 (char)((collateVal << 1) + 6)); 2525 digIndx ++; 2526 } 2527 } 2528 2529 int endIndex = leadingZeroIndex != 0 ? leadingZeroIndex 2530 : ((digIndx >>> 1) + 2) ; 2531 digIndx = ((endIndex - 2) << 1) + 1; m_utilStringBuffer_.setCharAt(2, 2535 (char)(m_utilStringBuffer_.charAt(2) - 1)); 2536 m_utilStringBuffer_.setCharAt(0, (char)RuleBasedCollator.CODAN_PLACEHOLDER); 2541 m_utilStringBuffer_.setCharAt(1, 2542 (char)(0x80 + ((digIndx >>> 1) & 0x7F))); 2543 2544 m_CEBufferSize_ = 0; 2548 m_CEBuffer_[m_CEBufferSize_ ++] 2549 = (((m_utilStringBuffer_.charAt(0) << 8) 2550 | m_utilStringBuffer_.charAt(1)) 2552 << RuleBasedCollator.CE_PRIMARY_SHIFT_) 2553 | (RuleBasedCollator.BYTE_COMMON_ 2555 << RuleBasedCollator.CE_SECONDARY_SHIFT_) 2556 | RuleBasedCollator.BYTE_COMMON_; 2558 int i = endIndex - 1; while (i >= 2) { 2560 int primWeight = m_utilStringBuffer_.charAt(i --) << 8; 2561 if (i >= 2) { 2562 primWeight |= m_utilStringBuffer_.charAt(i --); 2563 } 2564 m_CEBuffer_[m_CEBufferSize_ ++] 2565 = (primWeight << RuleBasedCollator.CE_PRIMARY_SHIFT_) 2566 | RuleBasedCollator.CE_CONTINUATION_MARKER_; 2567 } 2568 m_CEBufferOffset_ = m_CEBufferSize_ - 1; 2569 return m_CEBuffer_[m_CEBufferOffset_]; 2570 } 2571 else { 2572 return collator.m_expansion_[getExpansionOffset(collator, ce)]; 2573 } 2574 } 2575 2576 2582 private int previousHangul(RuleBasedCollator collator, char ch) 2583 { 2584 char L = (char)(ch - HANGUL_SBASE_); 2585 char T = (char)(L % HANGUL_TCOUNT_); 2588 L /= HANGUL_TCOUNT_; 2589 char V = (char)(L % HANGUL_VCOUNT_); 2590 L /= HANGUL_VCOUNT_; 2591 2592 L += HANGUL_LBASE_; 2594 V += HANGUL_VBASE_; 2595 T += HANGUL_TBASE_; 2596 2597 m_CEBufferSize_ = 0; 2598 if (!collator.m_isJamoSpecial_) { 2599 m_CEBuffer_[m_CEBufferSize_ ++] = 2600 collator.m_trie_.getLeadValue(L); 2601 m_CEBuffer_[m_CEBufferSize_ ++] = 2602 collator.m_trie_.getLeadValue(V); 2603 if (T != HANGUL_TBASE_) { 2604 m_CEBuffer_[m_CEBufferSize_ ++] = 2605 collator.m_trie_.getLeadValue(T); 2606 } 2607 m_CEBufferOffset_ = m_CEBufferSize_ - 1; 2608 return m_CEBuffer_[m_CEBufferOffset_]; 2609 } 2610 else { 2611 m_buffer_.append(L); 2615 m_buffer_.append(V); 2616 if (T != HANGUL_TBASE_) { 2617 m_buffer_.append(T); 2618 } 2619 2620 m_FCDStart_ = m_source_.getIndex(); 2621 m_FCDLimit_ = m_FCDStart_ + 1; 2622 return IGNORABLE; 2623 } 2624 } 2625 2626 2631 private int previousImplicit(int codepoint) 2632 { 2633 if (!UCharacter.isLegal(codepoint)) { 2634 return IGNORABLE; } 2636 int result = RuleBasedCollator.impCEGen_.getImplicitFromCodePoint(codepoint); 2637 m_CEBufferSize_ = 2; 2638 m_CEBufferOffset_ = 1; 2639 m_CEBuffer_[0] = (result & RuleBasedCollator.CE_PRIMARY_MASK_) 2640 | 0x00000505; 2641 m_CEBuffer_[1] = ((result & 0x0000FFFF) << 16) | 0x000000C0; 2642 return m_CEBuffer_[1]; 2643 } 2644 2645 2650 private int previousSurrogate(char ch) 2651 { 2652 if (isBackwardsStart()) { 2653 return IGNORABLE; 2655 } 2656 char prevch = (char)previousChar(); 2657 if (UTF16.isLeadSurrogate(prevch)) { 2659 return previousImplicit( 2660 UCharacterProperty.getRawSupplementary(prevch, ch)); 2661 } 2662 if (prevch != CharacterIterator.DONE) { 2663 nextChar(); 2664 } 2665 return IGNORABLE; } 2667 2668 2675 private int previousSpecial(RuleBasedCollator collator, int ce, char ch) 2676 { 2677 while(true) { 2678 switch (RuleBasedCollator.getTag(ce)) { 2681 case CE_NOT_FOUND_TAG_: return ce; 2683 case RuleBasedCollator.CE_SURROGATE_TAG_: 2684 return IGNORABLE; 2687 case CE_SPEC_PROC_TAG_: 2688 ce = previousSpecialPrefix(collator, ce); 2689 break; 2690 case CE_CONTRACTION_TAG_: 2691 if (isBackwardsStart()) { 2693 ce = collator.m_contractionCE_[ 2695 getContractionOffset(collator, ce)]; 2696 break; 2697 } 2698 return previousContraction(collator, ce, ch); case CE_LONG_PRIMARY_TAG_: 2700 return previousLongPrimary(ce); 2701 case CE_EXPANSION_TAG_: return previousExpansion(collator, ce); 2703 case CE_DIGIT_TAG_: 2704 ce = previousDigit(collator, ce, ch); 2705 break; 2706 case CE_HANGUL_SYLLABLE_TAG_: return previousHangul(collator, ch); 2708 case CE_LEAD_SURROGATE_TAG_: return IGNORABLE; case CE_TRAIL_SURROGATE_TAG_: return previousSurrogate(ch); 2712 case CE_CJK_IMPLICIT_TAG_: 2713 return previousImplicit(ch); 2715 case CE_IMPLICIT_TAG_: return previousImplicit(ch); 2718 case CE_CHARSET_TAG_: return CE_NOT_FOUND_; 2720 default: ce = IGNORABLE; 2722 } 2723 if (!RuleBasedCollator.isSpecial(ce)) { 2724 break; 2725 } 2726 } 2727 return ce; 2728 } 2729 2730 2735 2777 2812 2821 private char peekCharacter(int offset) 2822 { 2823 if (offset != 0) { 2824 int currentoffset = m_source_.getIndex(); 2825 m_source_.setIndex(currentoffset + offset); 2826 char result = (char)m_source_.current(); 2827 m_source_.setIndex(currentoffset); 2828 return result; 2829 } 2830 else { 2831 return (char)m_source_.current(); 2832 } 2833 } 2834 2835 2844 private void goBackOne() 2845 { 2846 if (m_bufferOffset_ >= 0) { 2847 m_bufferOffset_ --; 2848 } 2849 else { 2850 m_source_.setIndex(m_source_.getIndex() - 1); 2851 } 2852 } 2853 2854 2863 private void goForwardOne() 2864 { 2865 if (m_bufferOffset_ < 0) { 2866 m_source_.setIndex(m_source_.getIndex() + 1); 2869 } 2870 else { 2871 m_bufferOffset_ ++; 2873 } 2874 } 2875} 2876 | Popular Tags |