1 7 package com.ibm.icu.text; 8 9 import java.text.ParseException ; 10 import java.util.Hashtable ; 11 import java.util.Arrays ; 12 import com.ibm.icu.lang.UCharacter; 13 import com.ibm.icu.impl.UCharacterProperty; 14 15 22 final class CollationRuleParser 23 { 24 26 28 38 CollationRuleParser(String rules) throws ParseException 39 { 40 extractSetsFromRules(rules); 41 m_source_ = new StringBuffer (Normalizer.decompose(rules, false).trim()); 42 m_rules_ = m_source_.toString(); 43 m_current_ = 0; 44 m_extraCurrent_ = m_source_.length(); 45 m_variableTop_ = null; 46 m_parsedToken_ = new ParsedToken(); 47 m_hashTable_ = new Hashtable (); 48 m_options_ = new OptionSet(RuleBasedCollator.UCA_); 49 m_listHeader_ = new TokenListHeader[512]; 50 m_resultLength_ = 0; 51 } 55 56 58 61 static class OptionSet 62 { 63 65 69 OptionSet(RuleBasedCollator collator) 70 { 71 m_variableTopValue_ = collator.m_variableTopValue_; 72 m_isFrenchCollation_ = collator.isFrenchCollation(); 73 m_isAlternateHandlingShifted_ 74 = collator.isAlternateHandlingShifted(); 75 m_caseFirst_ = collator.m_caseFirst_; 76 m_isCaseLevel_ = collator.isCaseLevel(); 77 m_decomposition_ = collator.getDecomposition(); 78 m_strength_ = collator.getStrength(); 79 m_isHiragana4_ = collator.m_isHiragana4_; 80 } 81 82 84 int m_variableTopValue_; 85 boolean m_isFrenchCollation_; 86 89 boolean m_isAlternateHandlingShifted_; 90 93 int m_caseFirst_; 94 97 boolean m_isCaseLevel_; 98 101 int m_decomposition_; 102 105 int m_strength_; 106 109 boolean m_isHiragana4_; 110 } 111 112 115 static class TokenListHeader 116 { 117 Token m_first_; 118 Token m_last_; 119 Token m_reset_; 120 boolean m_indirect_; 121 int m_baseCE_; 122 int m_baseContCE_; 123 int m_nextCE_; 124 int m_nextContCE_; 125 int m_previousCE_; 126 int m_previousContCE_; 127 int m_pos_[] = new int[Collator.IDENTICAL + 1]; 128 int m_gapsLo_[] = new int[3 * (Collator.TERTIARY + 1)]; 129 int m_gapsHi_[] = new int[3 * (Collator.TERTIARY + 1)]; 130 int m_numStr_[] = new int[3 * (Collator.TERTIARY + 1)]; 131 Token m_fStrToken_[] = new Token[Collator.TERTIARY + 1]; 132 Token m_lStrToken_[] = new Token[Collator.TERTIARY + 1]; 133 } 134 135 138 static class Token 139 { 140 142 int m_CE_[]; 143 int m_CELength_; 144 int m_expCE_[]; 145 int m_expCELength_; 146 int m_source_; 147 int m_expansion_; 148 int m_prefix_; 149 int m_strength_; 150 int m_toInsert_; 151 int m_polarity_; TokenListHeader m_listHeader_; 153 Token m_previous_; 154 Token m_next_; 155 StringBuffer m_rules_; 156 char m_flags_; 157 158 160 Token() 161 { 162 m_CE_ = new int[128]; 163 m_expCE_ = new int[128]; 164 m_polarity_ = TOKEN_POLARITY_POSITIVE_; 166 m_next_ = null; 167 m_previous_ = null; 168 m_CELength_ = 0; 169 m_expCELength_ = 0; 170 } 171 172 174 178 public int hashCode() 179 { 180 int result = 0; 181 int len = (m_source_ & 0xFF000000) >>> 24; 182 int inc = ((len - 32) / 32) + 1; 183 184 int start = m_source_ & 0x00FFFFFF; 185 int limit = start + len; 186 187 while (start < limit) { 188 result = (result * 37) + m_rules_.charAt(start); 189 start += inc; 190 } 191 return result; 192 } 193 194 199 public boolean equals(Object target) 200 { 201 if (target == this) { 202 return true; 203 } 204 if (target instanceof Token) { 205 Token t = (Token)target; 206 int sstart = m_source_ & 0x00FFFFFF; 207 int tstart = t.m_source_ & 0x00FFFFFF; 208 int slimit = (m_source_ & 0xFF000000) >> 24; 209 int tlimit = (m_source_ & 0xFF000000) >> 24; 210 211 int end = sstart + slimit - 1; 212 213 if (m_source_ == 0 || t.m_source_ == 0) { 214 return false; 215 } 216 if (slimit != tlimit) { 217 return false; 218 } 219 if (m_source_ == t.m_source_) { 220 return true; 221 } 222 223 while (sstart < end 224 && m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart)) 225 { 226 ++ sstart; 227 ++ tstart; 228 } 229 if (m_rules_.charAt(sstart) == t.m_rules_.charAt(tstart)) { 230 return true; 231 } 232 } 233 return false; 234 } 235 } 236 237 239 242 static final int TOKEN_RESET_ = 0xDEADBEEF; 243 244 247 int m_resultLength_; 248 251 TokenListHeader m_listHeader_[]; 252 255 Token m_variableTop_; 256 259 OptionSet m_options_; 260 263 StringBuffer m_source_; 264 267 Hashtable m_hashTable_; 268 269 271 void setDefaultOptionsInCollator(RuleBasedCollator collator) 272 { 273 collator.m_defaultStrength_ = m_options_.m_strength_; 274 collator.m_defaultDecomposition_ = m_options_.m_decomposition_; 275 collator.m_defaultIsFrenchCollation_ = m_options_.m_isFrenchCollation_; 276 collator.m_defaultIsAlternateHandlingShifted_ 277 = m_options_.m_isAlternateHandlingShifted_; 278 collator.m_defaultIsCaseLevel_ = m_options_.m_isCaseLevel_; 279 collator.m_defaultCaseFirst_ = m_options_.m_caseFirst_; 280 collator.m_defaultIsHiragana4_ = m_options_.m_isHiragana4_; 281 collator.m_defaultVariableTopValue_ = m_options_.m_variableTopValue_; 282 } 283 284 286 290 private static class ParsedToken 291 { 292 294 297 ParsedToken() 298 { 299 m_charsLen_ = 0; 300 m_charsOffset_ = 0; 301 m_extensionLen_ = 0; 302 m_extensionOffset_ = 0; 303 m_prefixLen_ = 0; 304 m_prefixOffset_ = 0; 305 m_flags_ = 0; 306 m_strength_ = TOKEN_UNSET_; 307 } 308 309 311 int m_strength_; 312 int m_charsOffset_; 313 int m_charsLen_; 314 int m_extensionOffset_; 315 int m_extensionLen_; 316 int m_prefixOffset_; 317 int m_prefixLen_; 318 char m_flags_; 319 char m_indirectIndex_; 320 } 321 322 325 private static class IndirectBoundaries 326 { 327 329 IndirectBoundaries(int startce[], int limitce[]) 330 { 331 m_startCE_ = startce[0]; 334 m_startContCE_ = startce[1]; 335 if (limitce != null) { 336 m_limitCE_ = limitce[0]; 337 m_limitContCE_ = limitce[1]; 338 } 339 else { 340 m_limitCE_ = 0; 341 m_limitContCE_ = 0; 342 } 343 } 344 345 347 int m_startCE_; 348 int m_startContCE_; 349 int m_limitCE_; 350 int m_limitContCE_; 351 } 352 353 356 private static class TokenOption 357 { 358 360 TokenOption(String name, int attribute, String suboptions[], 361 int suboptionattributevalue[]) 362 { 363 m_name_ = name; 364 m_attribute_ = attribute; 365 m_subOptions_ = suboptions; 366 m_subOptionAttributeValues_ = suboptionattributevalue; 367 } 368 369 371 private String m_name_; 372 private int m_attribute_; 373 private String m_subOptions_[]; 374 private int m_subOptionAttributeValues_[]; 375 } 376 377 379 382 private ParsedToken m_parsedToken_; 383 386 private String m_rules_; 387 private int m_current_; 388 392 private int m_optionEnd_; 393 396 private int m_sourceLimit_; 397 400 private int m_extraCurrent_; 401 402 405 UnicodeSet m_copySet_; 406 407 412 UnicodeSet m_removeSet_; 413 417 private static final int TOKEN_EXTRA_RULE_SPACE_SIZE_ = 2048; 418 421 private static final int TOKEN_UNSET_ = 0xFFFFFFFF; 422 426 private static final int TOKEN_POLARITY_NEGATIVE_ = 0; 427 431 private static final int TOKEN_POLARITY_POSITIVE_ = 1; 432 435 private static final int TOKEN_TOP_MASK_ = 0x04; 436 439 private static final int TOKEN_VARIABLE_TOP_MASK_ = 0x08; 440 443 private static final int TOKEN_BEFORE_ = 0x03; 444 447 private static final int TOKEN_SUCCESS_MASK_ = 0x10; 448 449 464 private static final IndirectBoundaries INDIRECT_BOUNDARIES_[]; 465 466 469 private static final int INVERSE_SIZE_MASK_ = 0xFFF00000; 470 private static final int INVERSE_OFFSET_MASK_ = 0x000FFFFF; 471 private static final int INVERSE_SHIFT_VALUE_ = 20; 472 473 481 private static final TokenOption RULES_OPTIONS_[]; 482 483 static 484 { 485 INDIRECT_BOUNDARIES_ = new IndirectBoundaries[15]; 486 INDIRECT_BOUNDARIES_[0] = new IndirectBoundaries( 488 RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_, 489 RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_); 490 INDIRECT_BOUNDARIES_[1] = new IndirectBoundaries( 492 RuleBasedCollator.UCA_CONSTANTS_.FIRST_PRIMARY_IGNORABLE_, 493 null); 494 INDIRECT_BOUNDARIES_[2] = new IndirectBoundaries( 496 RuleBasedCollator.UCA_CONSTANTS_.LAST_PRIMARY_IGNORABLE_, 497 null); 498 499 INDIRECT_BOUNDARIES_[3] = new IndirectBoundaries( 501 RuleBasedCollator.UCA_CONSTANTS_.FIRST_SECONDARY_IGNORABLE_, 502 null); 503 INDIRECT_BOUNDARIES_[4] = new IndirectBoundaries( 505 RuleBasedCollator.UCA_CONSTANTS_.LAST_SECONDARY_IGNORABLE_, 506 null); 507 INDIRECT_BOUNDARIES_[5] = new IndirectBoundaries( 509 RuleBasedCollator.UCA_CONSTANTS_.FIRST_TERTIARY_IGNORABLE_, 510 null); 511 INDIRECT_BOUNDARIES_[6] = new IndirectBoundaries( 513 RuleBasedCollator.UCA_CONSTANTS_.LAST_TERTIARY_IGNORABLE_, 514 null); 515 INDIRECT_BOUNDARIES_[7] = new IndirectBoundaries( 517 RuleBasedCollator.UCA_CONSTANTS_.FIRST_VARIABLE_, 518 null); 519 INDIRECT_BOUNDARIES_[8] = new IndirectBoundaries( 521 RuleBasedCollator.UCA_CONSTANTS_.LAST_VARIABLE_, 522 null); 523 INDIRECT_BOUNDARIES_[9] = new IndirectBoundaries( 525 RuleBasedCollator.UCA_CONSTANTS_.FIRST_NON_VARIABLE_, 526 null); 527 INDIRECT_BOUNDARIES_[10] = new IndirectBoundaries( 529 RuleBasedCollator.UCA_CONSTANTS_.LAST_NON_VARIABLE_, 530 RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_); 531 INDIRECT_BOUNDARIES_[11] = new IndirectBoundaries( 533 RuleBasedCollator.UCA_CONSTANTS_.FIRST_IMPLICIT_, 534 null); 535 INDIRECT_BOUNDARIES_[12] = new IndirectBoundaries( 537 RuleBasedCollator.UCA_CONSTANTS_.LAST_IMPLICIT_, 538 RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_); 539 INDIRECT_BOUNDARIES_[13] = new IndirectBoundaries( 541 RuleBasedCollator.UCA_CONSTANTS_.FIRST_TRAILING_, 542 null); 543 INDIRECT_BOUNDARIES_[14] = new IndirectBoundaries( 545 RuleBasedCollator.UCA_CONSTANTS_.LAST_TRAILING_, 546 null); 547 INDIRECT_BOUNDARIES_[14].m_limitCE_ 548 = RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_SPECIAL_MIN_ << 24; 549 550 RULES_OPTIONS_ = new TokenOption[19]; 551 String option[] = {"non-ignorable", "shifted"}; 552 int value[] = {RuleBasedCollator.AttributeValue.NON_IGNORABLE_, 553 RuleBasedCollator.AttributeValue.SHIFTED_}; 554 RULES_OPTIONS_[0] = new TokenOption("alternate", 555 RuleBasedCollator.Attribute.ALTERNATE_HANDLING_, 556 option, value); 557 option = new String [1]; 558 option[0] = "2"; 559 value = new int[1]; 560 value[0] = RuleBasedCollator.AttributeValue.ON_; 561 RULES_OPTIONS_[1] = new TokenOption("backwards", 562 RuleBasedCollator.Attribute.FRENCH_COLLATION_, 563 option, value); 564 String offonoption[] = new String [2]; 565 offonoption[0] = "off"; 566 offonoption[1] = "on"; 567 int offonvalue[] = new int[2]; 568 offonvalue[0] = RuleBasedCollator.AttributeValue.OFF_; 569 offonvalue[1] = RuleBasedCollator.AttributeValue.ON_; 570 RULES_OPTIONS_[2] = new TokenOption("caseLevel", 571 RuleBasedCollator.Attribute.CASE_LEVEL_, 572 offonoption, offonvalue); 573 option = new String [3]; 574 option[0] = "lower"; 575 option[1] = "upper"; 576 option[2] = "off"; 577 value = new int[3]; 578 value[0] = RuleBasedCollator.AttributeValue.LOWER_FIRST_; 579 value[1] = RuleBasedCollator.AttributeValue.UPPER_FIRST_; 580 value[2] = RuleBasedCollator.AttributeValue.OFF_; 581 RULES_OPTIONS_[3] = new TokenOption("caseFirst", 582 RuleBasedCollator.Attribute.CASE_FIRST_, 583 option, value); 584 RULES_OPTIONS_[4] = new TokenOption("normalization", 585 RuleBasedCollator.Attribute.NORMALIZATION_MODE_, 586 offonoption, offonvalue); 587 RULES_OPTIONS_[5] = new TokenOption("hiraganaQ", 588 RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_, 589 offonoption, offonvalue); 590 option = new String [5]; 591 option[0] = "1"; 592 option[1] = "2"; 593 option[2] = "3"; 594 option[3] = "4"; 595 option[4] = "I"; 596 value = new int[5]; 597 value[0] = RuleBasedCollator.AttributeValue.PRIMARY_; 598 value[1] = RuleBasedCollator.AttributeValue.SECONDARY_; 599 value[2] = RuleBasedCollator.AttributeValue.TERTIARY_; 600 value[3] = RuleBasedCollator.AttributeValue.QUATERNARY_; 601 value[4] = RuleBasedCollator.AttributeValue.IDENTICAL_; 602 RULES_OPTIONS_[6] = new TokenOption("strength", 603 RuleBasedCollator.Attribute.STRENGTH_, 604 option, value); 605 RULES_OPTIONS_[7] = new TokenOption("variable top", 606 RuleBasedCollator.Attribute.LIMIT_, 607 null, null); 608 RULES_OPTIONS_[8] = new TokenOption("rearrange", 609 RuleBasedCollator.Attribute.LIMIT_, 610 null, null); 611 option = new String [3]; 612 option[0] = "1"; 613 option[1] = "2"; 614 option[2] = "3"; 615 value = new int[3]; 616 value[0] = RuleBasedCollator.AttributeValue.PRIMARY_; 617 value[1] = RuleBasedCollator.AttributeValue.SECONDARY_; 618 value[2] = RuleBasedCollator.AttributeValue.TERTIARY_; 619 RULES_OPTIONS_[9] = new TokenOption("before", 620 RuleBasedCollator.Attribute.LIMIT_, 621 option, value); 622 RULES_OPTIONS_[10] = new TokenOption("top", 623 RuleBasedCollator.Attribute.LIMIT_, 624 null, null); 625 String firstlastoption[] = new String [7]; 626 firstlastoption[0] = "primary"; 627 firstlastoption[1] = "secondary"; 628 firstlastoption[2] = "tertiary"; 629 firstlastoption[3] = "variable"; 630 firstlastoption[4] = "regular"; 631 firstlastoption[5] = "implicit"; 632 firstlastoption[6] = "trailing"; 633 634 int firstlastvalue[] = new int[7]; 635 Arrays.fill(firstlastvalue, RuleBasedCollator.AttributeValue.PRIMARY_); 636 637 RULES_OPTIONS_[11] = new TokenOption("first", 638 RuleBasedCollator.Attribute.LIMIT_, 639 firstlastoption, firstlastvalue); 640 RULES_OPTIONS_[12] = new TokenOption("last", 641 RuleBasedCollator.Attribute.LIMIT_, 642 firstlastoption, firstlastvalue); 643 RULES_OPTIONS_[13] = new TokenOption("optimize", 644 RuleBasedCollator.Attribute.LIMIT_, 645 null, null); 646 RULES_OPTIONS_[14] = new TokenOption("suppressContractions", 647 RuleBasedCollator.Attribute.LIMIT_, 648 null, null); 649 RULES_OPTIONS_[15] = new TokenOption("undefined", 650 RuleBasedCollator.Attribute.LIMIT_, 651 null, null); 652 RULES_OPTIONS_[16] = new TokenOption("scriptOrder", 653 RuleBasedCollator.Attribute.LIMIT_, 654 null, null); 655 RULES_OPTIONS_[17] = new TokenOption("charsetname", 656 RuleBasedCollator.Attribute.LIMIT_, 657 null, null); 658 RULES_OPTIONS_[18] = new TokenOption("charset", 659 RuleBasedCollator.Attribute.LIMIT_, 660 null, null); 661 } 662 663 666 private Token m_utilToken_ = new Token(); 667 private CollationElementIterator m_UCAColEIter_ 668 = RuleBasedCollator.UCA_.getCollationElementIterator(""); 669 private int m_utilCEBuffer_[] = new int[2]; 670 671 673 677 int assembleTokenList() throws ParseException 678 { 679 Token lastToken = null; 680 m_parsedToken_.m_strength_ = TOKEN_UNSET_; 681 int sourcelimit = m_source_.length(); 682 int expandNext = 0; 683 684 while (m_current_ < sourcelimit) { 685 m_parsedToken_.m_prefixOffset_ = 0; 686 if (parseNextToken(lastToken == null) < 0) { 687 continue; 689 } 690 char specs = m_parsedToken_.m_flags_; 691 boolean variableTop = ((specs & TOKEN_VARIABLE_TOP_MASK_) != 0); 692 boolean top = ((specs & TOKEN_TOP_MASK_) != 0); 693 int lastStrength = TOKEN_UNSET_; 694 if (lastToken != null) { 695 lastStrength = lastToken.m_strength_; 696 } 697 m_utilToken_.m_source_ = m_parsedToken_.m_charsLen_ << 24 698 | m_parsedToken_.m_charsOffset_; 699 m_utilToken_.m_rules_ = m_source_; 700 Token sourceToken = (Token)m_hashTable_.get(m_utilToken_); 703 if (m_parsedToken_.m_strength_ != TOKEN_RESET_) { 704 if (lastToken == null) { 705 throwParseException(m_source_.toString(), 0); 707 } 708 if (sourceToken == null) { 710 sourceToken = new Token(); 712 sourceToken.m_rules_ = m_source_; 713 sourceToken.m_source_ = m_parsedToken_.m_charsLen_ << 24 714 | m_parsedToken_.m_charsOffset_; 715 sourceToken.m_prefix_ = m_parsedToken_.m_prefixLen_ << 24 716 | m_parsedToken_.m_prefixOffset_; 717 sourceToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_; 719 sourceToken.m_next_ = null; 720 sourceToken.m_previous_ = null; 721 sourceToken.m_CELength_ = 0; 722 sourceToken.m_expCELength_ = 0; 723 m_hashTable_.put(sourceToken, sourceToken); 724 } 725 else { 726 if (sourceToken.m_strength_ != TOKEN_RESET_ 728 && lastToken != sourceToken) { 729 if (sourceToken.m_next_ != null) { 731 if (sourceToken.m_next_.m_strength_ 732 > sourceToken.m_strength_) { 733 sourceToken.m_next_.m_strength_ 734 = sourceToken.m_strength_; 735 } 736 sourceToken.m_next_.m_previous_ 737 = sourceToken.m_previous_; 738 } 739 else { 740 sourceToken.m_listHeader_.m_last_ 741 = sourceToken.m_previous_; 742 } 743 if (sourceToken.m_previous_ != null) { 744 sourceToken.m_previous_.m_next_ 745 = sourceToken.m_next_; 746 } 747 else { 748 sourceToken.m_listHeader_.m_first_ 749 = sourceToken.m_next_; 750 } 751 sourceToken.m_next_ = null; 752 sourceToken.m_previous_ = null; 753 } 754 } 755 sourceToken.m_strength_ = m_parsedToken_.m_strength_; 756 sourceToken.m_listHeader_ = lastToken.m_listHeader_; 757 758 if (lastStrength == TOKEN_RESET_ 761 || sourceToken.m_listHeader_.m_first_ == null) { 762 if (sourceToken.m_listHeader_.m_first_ == null) { 764 sourceToken.m_listHeader_.m_first_ = sourceToken; 765 sourceToken.m_listHeader_.m_last_ = sourceToken; 766 } 767 else { if (sourceToken.m_listHeader_.m_first_.m_strength_ 770 <= sourceToken.m_strength_) { 771 sourceToken.m_next_ 772 = sourceToken.m_listHeader_.m_first_; 773 sourceToken.m_next_.m_previous_ = sourceToken; 774 sourceToken.m_listHeader_.m_first_ = sourceToken; 775 sourceToken.m_previous_ = null; 776 } 777 else { 778 lastToken = sourceToken.m_listHeader_.m_first_; 779 while (lastToken.m_next_ != null 780 && lastToken.m_next_.m_strength_ 781 > sourceToken.m_strength_) { 782 lastToken = lastToken.m_next_; 783 } 784 if (lastToken.m_next_ != null) { 785 lastToken.m_next_.m_previous_ = sourceToken; 786 } 787 else { 788 sourceToken.m_listHeader_.m_last_ 789 = sourceToken; 790 } 791 sourceToken.m_previous_ = lastToken; 792 sourceToken.m_next_ = lastToken.m_next_; 793 lastToken.m_next_ = sourceToken; 794 } 795 } 796 } 797 else { 798 if (sourceToken != lastToken) { 805 if (lastToken.m_polarity_ == sourceToken.m_polarity_) { 806 while (lastToken.m_next_ != null 807 && lastToken.m_next_.m_strength_ 808 > sourceToken.m_strength_) { 809 lastToken = lastToken.m_next_; 810 } 811 sourceToken.m_previous_ = lastToken; 812 if (lastToken.m_next_ != null) { 813 lastToken.m_next_.m_previous_ = sourceToken; 814 } 815 else { 816 sourceToken.m_listHeader_.m_last_ = sourceToken; 817 } 818 sourceToken.m_next_ = lastToken.m_next_; 819 lastToken.m_next_ = sourceToken; 820 } 821 else { 822 while (lastToken.m_previous_ != null 823 && lastToken.m_previous_.m_strength_ 824 > sourceToken.m_strength_) { 825 lastToken = lastToken.m_previous_; 826 } 827 sourceToken.m_next_ = lastToken; 828 if (lastToken.m_previous_ != null) { 829 lastToken.m_previous_.m_next_ = sourceToken; 830 } 831 else { 832 sourceToken.m_listHeader_.m_first_ 833 = sourceToken; 834 } 835 sourceToken.m_previous_ = lastToken.m_previous_; 836 lastToken.m_previous_ = sourceToken; 837 } 838 } 839 else { if (lastStrength < sourceToken.m_strength_) { 842 sourceToken.m_strength_ = lastStrength; 843 } 844 } 845 } 846 if (variableTop == true && m_variableTop_ == null) { 848 variableTop = false; 849 m_variableTop_ = sourceToken; 850 } 851 sourceToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24 857 | m_parsedToken_.m_extensionOffset_; 858 if (expandNext != 0) { 859 if (sourceToken.m_strength_ == RuleBasedCollator.PRIMARY) { 860 expandNext = 0; 862 } 863 else if (sourceToken.m_expansion_ == 0) { 864 sourceToken.m_expansion_ = expandNext; 867 } 868 else { 869 int start = expandNext & 0xFFFFFF; 872 int size = expandNext >>> 24; 873 if (size > 0) { 874 m_source_.append(m_source_.substring(start, 875 start + size)); 876 } 877 start = m_parsedToken_.m_extensionOffset_; 878 m_source_.append(m_source_.substring(start, 879 start + m_parsedToken_.m_extensionLen_)); 880 sourceToken.m_expansion_ = (size 881 + m_parsedToken_.m_extensionLen_) << 24 882 | m_extraCurrent_; 883 m_extraCurrent_ += size + m_parsedToken_.m_extensionLen_; 884 } 885 } 886 if((lastToken.m_flags_ & TOKEN_BEFORE_) != 0) { 892 int beforeStrength = (lastToken.m_flags_ & TOKEN_BEFORE_) - 1; 893 if(beforeStrength != sourceToken.m_strength_) { 894 throwParseException(m_source_.toString(), m_current_); 895 } 896 } 897 898 } 899 else { 900 if (lastToken != null && lastStrength == TOKEN_RESET_) { 901 if (m_resultLength_ > 0 && m_listHeader_[m_resultLength_ - 1].m_first_ == null) { 905 m_resultLength_ --; 906 } 907 } 908 if (sourceToken == null) { 909 int searchCharsLen = m_parsedToken_.m_charsLen_; 912 while (searchCharsLen > 1 && sourceToken == null) { 913 searchCharsLen --; 914 m_utilToken_.m_source_ = searchCharsLen << 24 916 | m_parsedToken_.m_charsOffset_; 917 m_utilToken_.m_rules_ = m_source_; 918 sourceToken = (Token)m_hashTable_.get(m_utilToken_); 919 } 920 if (sourceToken != null) { 921 expandNext = (m_parsedToken_.m_charsLen_ 922 - searchCharsLen) << 24 923 | (m_parsedToken_.m_charsOffset_ 924 + searchCharsLen); 925 } 926 } 927 if ((specs & TOKEN_BEFORE_) != 0) { 928 if (top == false) { 929 int strength = (specs & TOKEN_BEFORE_) - 1; 931 if (sourceToken != null 932 && sourceToken.m_strength_ != TOKEN_RESET_) { 933 while (sourceToken.m_strength_ > strength 936 && sourceToken.m_previous_ != null) { 937 sourceToken = sourceToken.m_previous_; 938 } 939 if (sourceToken.m_strength_ == strength) { 941 if (sourceToken.m_previous_ != null) { 942 sourceToken = sourceToken.m_previous_; 943 } 944 else { sourceToken 946 = sourceToken.m_listHeader_.m_reset_; 947 } 948 } 949 else { sourceToken 951 = sourceToken.m_listHeader_.m_reset_; 952 sourceToken = getVirginBefore(sourceToken, 953 strength); 954 } 955 } 956 else { 957 sourceToken 958 = getVirginBefore(sourceToken, strength); 959 } 960 } 961 else { 962 top = false; 964 m_listHeader_[m_resultLength_] = new TokenListHeader(); 965 m_listHeader_[m_resultLength_].m_previousCE_ = 0; 966 m_listHeader_[m_resultLength_].m_previousContCE_ = 0; 967 m_listHeader_[m_resultLength_].m_indirect_ = true; 968 int strength = (specs & TOKEN_BEFORE_) - 1; 972 int baseCE = INDIRECT_BOUNDARIES_[ 973 m_parsedToken_.m_indirectIndex_].m_startCE_; 974 int baseContCE = INDIRECT_BOUNDARIES_[ 975 m_parsedToken_.m_indirectIndex_].m_startContCE_; 976 int ce[] = new int[2]; 977 if((baseCE >>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_) 978 && (baseCE >>> 24 <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { 979 int primary = baseCE & RuleBasedCollator.CE_PRIMARY_MASK_ | (baseContCE & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16; 980 int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary); 981 int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1); 982 ce[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505; 983 ce[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_; 984 } else { 985 CollationParsedRuleBuilder.InverseUCA invuca 986 = CollationParsedRuleBuilder.INVERSE_UCA_; 987 invuca.getInversePrevCE(baseCE, baseContCE, strength, 988 ce); 989 } 990 m_listHeader_[m_resultLength_].m_baseCE_ = ce[0]; 991 m_listHeader_[m_resultLength_].m_baseContCE_ = ce[1]; 992 m_listHeader_[m_resultLength_].m_nextCE_ = 0; 993 m_listHeader_[m_resultLength_].m_nextContCE_ = 0; 994 995 sourceToken = new Token(); 996 expandNext = initAReset(0, sourceToken); 997 } 998 } 999 if (sourceToken == null) { 1005 if (m_listHeader_[m_resultLength_] == null) { 1006 m_listHeader_[m_resultLength_] = new TokenListHeader(); 1007 } 1008 if (top == false) { 1018 CollationElementIterator coleiter 1019 = RuleBasedCollator.UCA_.getCollationElementIterator( 1020 m_source_.substring(m_parsedToken_.m_charsOffset_, 1021 m_parsedToken_.m_charsOffset_ 1022 + m_parsedToken_.m_charsLen_)); 1023 1024 int CE = coleiter.next(); 1025 int expand = coleiter.getOffset() 1027 + m_parsedToken_.m_charsOffset_; 1028 int SecondCE = coleiter.next(); 1029 1030 m_listHeader_[m_resultLength_].m_baseCE_ 1031 = CE & 0xFFFFFF3F; 1032 if (RuleBasedCollator.isContinuation(SecondCE)) { 1033 m_listHeader_[m_resultLength_].m_baseContCE_ 1034 = SecondCE; 1035 } 1036 else { 1037 m_listHeader_[m_resultLength_].m_baseContCE_ = 0; 1038 } 1039 m_listHeader_[m_resultLength_].m_nextCE_ = 0; 1040 m_listHeader_[m_resultLength_].m_nextContCE_ = 0; 1041 m_listHeader_[m_resultLength_].m_previousCE_ = 0; 1042 m_listHeader_[m_resultLength_].m_previousContCE_ = 0; 1043 m_listHeader_[m_resultLength_].m_indirect_ = false; 1044 sourceToken = new Token(); 1045 expandNext = initAReset(expand, sourceToken); 1046 } 1047 else { top = false; 1049 m_listHeader_[m_resultLength_].m_previousCE_ = 0; 1050 m_listHeader_[m_resultLength_].m_previousContCE_ = 0; 1051 m_listHeader_[m_resultLength_].m_indirect_ = true; 1052 IndirectBoundaries ib = INDIRECT_BOUNDARIES_[ 1053 m_parsedToken_.m_indirectIndex_]; 1054 m_listHeader_[m_resultLength_].m_baseCE_ 1055 = ib.m_startCE_; 1056 m_listHeader_[m_resultLength_].m_baseContCE_ 1057 = ib.m_startContCE_; 1058 m_listHeader_[m_resultLength_].m_nextCE_ 1059 = ib.m_limitCE_; 1060 m_listHeader_[m_resultLength_].m_nextContCE_ 1061 = ib.m_limitContCE_; 1062 sourceToken = new Token(); 1063 expandNext = initAReset(0, sourceToken); 1064 } 1065 } 1066 else { top = false; 1068 } 1069 } 1070 lastToken = sourceToken; 1073 } 1074 1075 if (m_resultLength_ > 0 1076 && m_listHeader_[m_resultLength_ - 1].m_first_ == null) { 1077 m_resultLength_ --; 1078 } 1079 return m_resultLength_; 1080 } 1081 1082 1088 private static final void throwParseException(String rules, int offset) 1089 throws ParseException 1090 { 1091 String precontext = rules.substring(0, offset); 1093 String postcontext = rules.substring(offset, rules.length()); 1094 StringBuffer error = new StringBuffer ( 1095 "Parse error occurred in rule at offset "); 1096 error.append(offset); 1097 error.append("\n after the prefix \""); 1098 error.append(precontext); 1099 error.append("\" before the suffix \""); 1100 error.append(postcontext); 1101 throw new ParseException (error.toString(), offset); 1102 } 1103 1104 private final boolean doSetTop() { 1105 m_parsedToken_.m_charsOffset_ = m_extraCurrent_; 1106 m_source_.append((char)0xFFFE); 1107 IndirectBoundaries ib = 1108 INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_]; 1109 m_source_.append((char)(ib.m_startCE_ >> 16)); 1110 m_source_.append((char)(ib.m_startCE_ & 0xFFFF)); 1111 m_extraCurrent_ += 3; 1112 if (INDIRECT_BOUNDARIES_[m_parsedToken_.m_indirectIndex_ 1113 ].m_startContCE_ == 0) { 1114 m_parsedToken_.m_charsLen_ = 3; 1115 } 1116 else { 1117 m_source_.append((char)(INDIRECT_BOUNDARIES_[ 1118 m_parsedToken_.m_indirectIndex_ 1119 ].m_startContCE_ >> 16)); 1120 m_source_.append((char)(INDIRECT_BOUNDARIES_[ 1121 m_parsedToken_.m_indirectIndex_ 1122 ].m_startContCE_ & 0xFFFF)); 1123 m_extraCurrent_ += 2; 1124 m_parsedToken_.m_charsLen_ = 5; 1125 } 1126 return true; 1127 } 1128 1129 private static boolean isCharNewLine(char c) { 1130 switch (c) { 1131 case 0x000A: 1132 case 0x000D: 1133 case 0x000C: 1134 case 0x0085: 1135 case 0x2028: 1136 case 0x2029: 1137 return true; 1138 default: 1139 return false; 1140 } 1141 } 1142 1143 1152 private int parseNextToken(boolean startofrules) throws ParseException 1153 { 1154 boolean variabletop = false; 1156 boolean top = false; 1157 boolean inchars = true; 1158 boolean inquote = false; 1159 boolean wasinquote = false; 1160 byte before = 0; 1161 boolean isescaped = false; 1162 int newextensionlen = 0; 1163 int extensionoffset = 0; 1164 int newstrength = TOKEN_UNSET_; 1165 1166 m_parsedToken_.m_charsLen_ = 0; 1167 m_parsedToken_.m_charsOffset_ = 0; 1168 m_parsedToken_.m_prefixOffset_ = 0; 1169 m_parsedToken_.m_prefixLen_ = 0; 1170 m_parsedToken_.m_indirectIndex_ = 0; 1171 1172 int limit = m_rules_.length(); 1173 while (m_current_ < limit) { 1174 char ch = m_source_.charAt(m_current_); 1175 if (inquote) { 1176 if (ch == 0x0027) { inquote = false; 1178 } 1179 else { 1180 if ((m_parsedToken_.m_charsLen_ == 0) || inchars) { 1181 if (m_parsedToken_.m_charsLen_ == 0) { 1182 m_parsedToken_.m_charsOffset_ = m_extraCurrent_; 1183 } 1184 m_parsedToken_.m_charsLen_ ++; 1185 } 1186 else { 1187 if (newextensionlen == 0) { 1188 extensionoffset = m_extraCurrent_; 1189 } 1190 newextensionlen ++; 1191 } 1192 } 1193 } 1194 else if (isescaped) { 1195 isescaped = false; 1196 if (newstrength == TOKEN_UNSET_) { 1197 throwParseException(m_rules_, m_current_); 1198 } 1199 if (ch != 0 && m_current_ != limit) { 1200 if (inchars) { 1201 if (m_parsedToken_.m_charsLen_ == 0) { 1202 m_parsedToken_.m_charsOffset_ = m_current_; 1203 } 1204 m_parsedToken_.m_charsLen_ ++; 1205 } 1206 else { 1207 if (newextensionlen == 0) { 1208 extensionoffset = m_current_; 1209 } 1210 newextensionlen ++; 1211 } 1212 } 1213 } 1214 else { 1215 if (!UCharacterProperty.isRuleWhiteSpace(ch)) { 1216 switch (ch) { 1218 case 0x003D : if (newstrength != TOKEN_UNSET_) { 1220 return doEndParseNextToken(newstrength, 1221 top, 1222 extensionoffset, 1223 newextensionlen, 1224 variabletop, before); 1225 } 1226 if (startofrules == true) { 1228 m_parsedToken_.m_indirectIndex_ = 5; 1229 top = doSetTop(); 1230 return doEndParseNextToken(TOKEN_RESET_, 1231 top, 1232 extensionoffset, 1233 newextensionlen, 1234 variabletop, before); 1235 } 1236 newstrength = Collator.IDENTICAL; 1237 break; 1238 case 0x002C : if (newstrength != TOKEN_UNSET_) { 1240 return doEndParseNextToken(newstrength, 1241 top, 1242 extensionoffset, 1243 newextensionlen, 1244 variabletop, before); 1245 } 1246 if (startofrules == true) { 1248 m_parsedToken_.m_indirectIndex_ = 5; 1249 top = doSetTop(); 1250 return doEndParseNextToken(TOKEN_RESET_, 1251 top, 1252 extensionoffset, 1253 newextensionlen, 1254 variabletop, before); 1255 } 1256 newstrength = Collator.TERTIARY; 1257 break; 1258 case 0x003B : if (newstrength != TOKEN_UNSET_) { 1260 return doEndParseNextToken(newstrength, 1261 top, 1262 extensionoffset, 1263 newextensionlen, 1264 variabletop, before); 1265 } 1266 if (startofrules == true) { 1268 m_parsedToken_.m_indirectIndex_ = 5; 1269 top = doSetTop(); 1270 return doEndParseNextToken(TOKEN_RESET_, 1271 top, 1272 extensionoffset, 1273 newextensionlen, 1274 variabletop, before); 1275 } 1276 newstrength = Collator.SECONDARY; 1277 break; 1278 case 0x003C : if (newstrength != TOKEN_UNSET_) { 1280 return doEndParseNextToken(newstrength, 1281 top, 1282 extensionoffset, 1283 newextensionlen, 1284 variabletop, before); 1285 } 1286 if (startofrules == true) { 1288 m_parsedToken_.m_indirectIndex_ = 5; 1289 top = doSetTop(); 1290 return doEndParseNextToken(TOKEN_RESET_, 1291 top, 1292 extensionoffset, 1293 newextensionlen, 1294 variabletop, before); 1295 } 1296 if (m_source_.charAt(m_current_ + 1) == 0x003C) { 1299 m_current_ ++; 1300 if (m_source_.charAt(m_current_ + 1) == 0x003C) { 1301 m_current_ ++; newstrength = Collator.TERTIARY; 1303 } 1304 else { newstrength = Collator.SECONDARY; 1306 } 1307 } 1308 else { newstrength = Collator.PRIMARY; 1310 } 1311 break; 1312 case 0x0026 : if (newstrength != TOKEN_UNSET_) { 1314 return doEndParseNextToken(newstrength, 1315 top, 1316 extensionoffset, 1317 newextensionlen, 1318 variabletop, before); 1319 } 1320 newstrength = TOKEN_RESET_; break; 1322 case 0x005b : m_optionEnd_ = m_rules_.indexOf(0x005d, m_current_); 1325 if (m_optionEnd_ != -1) { byte result = readAndSetOption(); 1327 m_current_ = m_optionEnd_; 1328 if ((result & TOKEN_TOP_MASK_) != 0) { 1329 if (newstrength == TOKEN_RESET_) { 1330 top = doSetTop(); 1331 if (before != 0) { 1332 m_source_.append((char)0x002d); 1336 m_source_.append((char)before); 1337 m_extraCurrent_ += 2; 1338 m_parsedToken_.m_charsLen_ += 2; 1339 } 1340 m_current_ ++; 1341 return doEndParseNextToken(newstrength, 1342 true, 1343 extensionoffset, 1344 newextensionlen, 1345 variabletop, before); 1346 } 1347 else { 1348 throwParseException(m_rules_, m_current_); 1349 } 1350 } 1351 else if ((result & TOKEN_VARIABLE_TOP_MASK_) != 0) { 1352 if (newstrength != TOKEN_RESET_ 1353 && newstrength != TOKEN_UNSET_) { 1354 variabletop = true; 1355 m_parsedToken_.m_charsOffset_ 1356 = m_extraCurrent_; 1357 m_source_.append((char)0xFFFF); 1358 m_extraCurrent_ ++; 1359 m_current_ ++; 1360 m_parsedToken_.m_charsLen_ = 1; 1361 return doEndParseNextToken(newstrength, 1362 top, 1363 extensionoffset, 1364 newextensionlen, 1365 variabletop, before); 1366 } 1367 else { 1368 throwParseException(m_rules_, m_current_); 1369 } 1370 } 1371 else if ((result & TOKEN_BEFORE_) != 0){ 1372 if (newstrength == TOKEN_RESET_) { 1373 before = (byte)(result & TOKEN_BEFORE_); 1374 } 1375 else { 1376 throwParseException(m_rules_, m_current_); 1377 } 1378 } 1379 } 1380 break; 1381 case 0x002F : wasinquote = false; inchars = false; break; 1386 case 0x005C : isescaped = true; 1388 break; 1389 case 0x0027 : if (newstrength == TOKEN_UNSET_) { 1392 throwParseException(m_rules_, m_current_); 1394 } 1395 inquote = true; 1396 if (inchars) { if (wasinquote == false) { 1398 m_parsedToken_.m_charsOffset_ = m_extraCurrent_; 1399 } 1400 if (m_parsedToken_.m_charsLen_ != 0) { 1401 m_source_.append(m_source_.substring( 1402 m_current_ - m_parsedToken_.m_charsLen_, 1403 m_current_)); 1404 m_extraCurrent_ += m_parsedToken_.m_charsLen_; 1405 } 1406 m_parsedToken_.m_charsLen_ ++; 1407 } 1408 else { if (wasinquote == false) { 1410 extensionoffset = m_extraCurrent_; 1411 } 1412 if (newextensionlen != 0) { 1413 m_source_.append(m_source_.substring( 1414 m_current_ - newextensionlen, 1415 m_current_)); 1416 m_extraCurrent_ += newextensionlen; 1417 } 1418 newextensionlen ++; 1419 } 1420 wasinquote = true; 1421 m_current_ ++; 1422 ch = m_source_.charAt(m_current_); 1423 if (ch == 0x0027) { m_source_.append(ch); 1425 m_extraCurrent_ ++; 1426 inquote = false; 1427 } 1428 break; 1429 case 0x0040 : if (newstrength == TOKEN_UNSET_) { 1433 m_options_.m_isFrenchCollation_ = true; 1434 break; 1435 } 1436 case 0x007C : m_parsedToken_.m_prefixOffset_ 1446 = m_parsedToken_.m_charsOffset_; 1447 m_parsedToken_.m_prefixLen_ 1448 = m_parsedToken_.m_charsLen_; 1449 if (inchars) { if (wasinquote == false) { 1451 m_parsedToken_.m_charsOffset_ = m_extraCurrent_; 1452 } 1453 if (m_parsedToken_.m_charsLen_ != 0) { 1454 String prefix = m_source_.substring( 1455 m_current_ - m_parsedToken_.m_charsLen_, 1456 m_current_); 1457 m_source_.append(prefix); 1458 m_extraCurrent_ += m_parsedToken_.m_charsLen_; 1459 } 1460 m_parsedToken_.m_charsLen_ ++; 1461 } 1462 wasinquote = true; 1463 do { 1464 m_current_ ++; 1465 ch = m_source_.charAt(m_current_); 1466 } while (UCharacterProperty.isRuleWhiteSpace(ch)); 1468 break; 1469 case 0x0023: do { 1471 m_current_ ++; 1472 ch = m_source_.charAt(m_current_); 1473 } while (!isCharNewLine(ch)); 1474 break; 1475 case 0x0021: break; 1477 default : 1478 if (newstrength == TOKEN_UNSET_) { 1479 throwParseException(m_rules_, m_current_); 1480 } 1481 if (isSpecialChar(ch) && (inquote == false)) { 1482 throwParseException(m_rules_, m_current_); 1483 } 1484 if (ch == 0x0000 && m_current_ + 1 == limit) { 1485 break; 1486 } 1487 if (inchars) { 1488 if (m_parsedToken_.m_charsLen_ == 0) { 1489 m_parsedToken_.m_charsOffset_ = m_current_; 1490 } 1491 m_parsedToken_.m_charsLen_++; 1492 } 1493 else { 1494 if (newextensionlen == 0) { 1495 extensionoffset = m_current_; 1496 } 1497 newextensionlen ++; 1498 } 1499 break; 1500 } 1501 } 1502 } 1503 if (wasinquote) { 1504 if (ch != 0x27) { 1505 m_source_.append(ch); 1506 m_extraCurrent_ ++; 1507 } 1508 } 1509 m_current_ ++; 1510 } 1511 return doEndParseNextToken(newstrength, top, 1512 extensionoffset, newextensionlen, 1513 variabletop, before); 1514 } 1515 1516 1521 private int doEndParseNextToken(int newstrength, 1522 boolean top, 1523 int extensionoffset, int newextensionlen, 1524 boolean variabletop, int before) 1525 throws ParseException 1526 { 1527 if (newstrength == TOKEN_UNSET_) { 1528 return -1; 1529 } 1530 if (m_parsedToken_.m_charsLen_ == 0 && top == false) { 1531 throwParseException(m_rules_, m_current_); 1532 } 1533 1534 m_parsedToken_.m_strength_ = newstrength; 1535 m_parsedToken_.m_extensionOffset_ = extensionoffset; 1538 m_parsedToken_.m_extensionLen_ = newextensionlen; 1539 m_parsedToken_.m_flags_ = (char) 1540 ((variabletop ? TOKEN_VARIABLE_TOP_MASK_ : 0) 1541 | (top ? TOKEN_TOP_MASK_ : 0) | before); 1542 return m_current_; 1543 } 1544 1545 1552 private Token getVirginBefore(Token sourcetoken, int strength) 1553 throws ParseException 1554 { 1555 if (sourcetoken != null) { 1557 int offset = sourcetoken.m_source_ & 0xFFFFFF; 1558 m_UCAColEIter_.setText(m_source_.substring(offset, offset + 1)); 1559 } 1560 else { 1561 m_UCAColEIter_.setText( 1562 m_source_.substring(m_parsedToken_.m_charsOffset_, 1563 m_parsedToken_.m_charsOffset_ + 1)); 1564 } 1565 1566 int basece = m_UCAColEIter_.next() & 0xFFFFFF3F; 1567 int basecontce = m_UCAColEIter_.next(); 1568 if (basecontce == CollationElementIterator.NULLORDER) { 1569 basecontce = 0; 1570 } 1571 1572 int ch = 0; 1573 1574 1575 if((basece >>> 24 >= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MIN_) 1576 && (basece >>> 24 <= RuleBasedCollator.UCA_CONSTANTS_.PRIMARY_IMPLICIT_MAX_)) { 1577 1578 int primary = basece & RuleBasedCollator.CE_PRIMARY_MASK_ | (basecontce & RuleBasedCollator.CE_PRIMARY_MASK_) >> 16; 1579 int raw = RuleBasedCollator.impCEGen_.getRawFromImplicit(primary); 1580 ch = RuleBasedCollator.impCEGen_.getCodePointFromRaw(raw-1); 1581 int primaryCE = RuleBasedCollator.impCEGen_.getImplicitFromRaw(raw-1); 1582 m_utilCEBuffer_[0] = primaryCE & RuleBasedCollator.CE_PRIMARY_MASK_ | 0x0505; 1583 m_utilCEBuffer_[1] = (primaryCE << 16) & RuleBasedCollator.CE_PRIMARY_MASK_ | RuleBasedCollator.CE_CONTINUATION_MARKER_; 1584 1585 m_parsedToken_.m_charsOffset_ = m_extraCurrent_; 1586 m_source_.append('\uFFFE'); 1587 m_source_.append((char)ch); 1588 m_extraCurrent_ += 2; 1589 m_parsedToken_.m_charsLen_++; 1590 1591 m_utilToken_.m_source_ = (m_parsedToken_.m_charsLen_ << 24) 1592 | m_parsedToken_.m_charsOffset_; 1593 m_utilToken_.m_rules_ = m_source_; 1594 sourcetoken = (Token)m_hashTable_.get(m_utilToken_); 1595 1596 if(sourcetoken == null) { 1597 m_listHeader_[m_resultLength_] = new TokenListHeader(); 1598 m_listHeader_[m_resultLength_].m_baseCE_ 1599 = m_utilCEBuffer_[0] & 0xFFFFFF3F; 1600 if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) { 1601 m_listHeader_[m_resultLength_].m_baseContCE_ 1602 = m_utilCEBuffer_[1]; 1603 } 1604 else { 1605 m_listHeader_[m_resultLength_].m_baseContCE_ = 0; 1606 } 1607 m_listHeader_[m_resultLength_].m_nextCE_ = 0; 1608 m_listHeader_[m_resultLength_].m_nextContCE_ = 0; 1609 m_listHeader_[m_resultLength_].m_previousCE_ = 0; 1610 m_listHeader_[m_resultLength_].m_previousContCE_ = 0; 1611 m_listHeader_[m_resultLength_].m_indirect_ = false; 1612 1613 sourcetoken = new Token(); 1614 initAReset(-1, sourcetoken); 1615 } 1616 1617 } else { 1618 1619 int invpos = CollationParsedRuleBuilder.INVERSE_UCA_.getInversePrevCE( 1621 basece, basecontce, 1622 strength, m_utilCEBuffer_); 1623 if(CollationParsedRuleBuilder.INVERSE_UCA_.getCEStrengthDifference(basece, basecontce, m_utilCEBuffer_[0], m_utilCEBuffer_[1]) < strength) { 1628 if(strength == Collator.SECONDARY) { 1632 m_utilCEBuffer_[0] = basece - 0x0200; 1633 } else { m_utilCEBuffer_[0] = basece - 0x02; 1635 } 1636 if(RuleBasedCollator.isContinuation(basecontce)) { 1637 if(strength == Collator.SECONDARY) { 1638 m_utilCEBuffer_[1] = basecontce - 0x0200; 1639 } else { m_utilCEBuffer_[1] = basecontce - 0x02; 1641 } 1642 } 1643 } 1644 1645 1675 1676 1683 m_parsedToken_.m_charsOffset_ -= 10; 1694 m_parsedToken_.m_charsLen_ += 10; 1695 m_listHeader_[m_resultLength_] = new TokenListHeader(); 1696 m_listHeader_[m_resultLength_].m_baseCE_ 1697 = m_utilCEBuffer_[0] & 0xFFFFFF3F; 1698 if (RuleBasedCollator.isContinuation(m_utilCEBuffer_[1])) { 1699 m_listHeader_[m_resultLength_].m_baseContCE_ 1700 = m_utilCEBuffer_[1]; 1701 } 1702 else { 1703 m_listHeader_[m_resultLength_].m_baseContCE_ = 0; 1704 } 1705 m_listHeader_[m_resultLength_].m_nextCE_ = 0; 1706 m_listHeader_[m_resultLength_].m_nextContCE_ = 0; 1707 m_listHeader_[m_resultLength_].m_previousCE_ = 0; 1708 m_listHeader_[m_resultLength_].m_previousContCE_ = 0; 1709 m_listHeader_[m_resultLength_].m_indirect_ = false; 1710 sourcetoken = new Token(); 1711 initAReset(-1, sourcetoken); 1712 } 1714 return sourcetoken; 1715 } 1716 1717 1729 private int initAReset(int expand, Token targetToken) throws ParseException 1730 { 1731 if (m_resultLength_ == m_listHeader_.length - 1) { 1732 TokenListHeader temp[] = new TokenListHeader[m_resultLength_ << 1]; 1735 System.arraycopy(m_listHeader_, 0, temp, 0, m_resultLength_ + 1); 1736 m_listHeader_ = temp; 1737 } 1738 targetToken.m_rules_ = m_source_; 1740 targetToken.m_source_ = m_parsedToken_.m_charsLen_ << 24 1741 | m_parsedToken_.m_charsOffset_; 1742 targetToken.m_expansion_ = m_parsedToken_.m_extensionLen_ << 24 1743 | m_parsedToken_.m_extensionOffset_; 1744 targetToken.m_flags_ = m_parsedToken_.m_flags_; 1746 1747 if (m_parsedToken_.m_prefixOffset_ != 0) { 1748 throwParseException(m_rules_, m_parsedToken_.m_charsOffset_ - 1); 1749 } 1750 1751 targetToken.m_prefix_ = 0; 1752 targetToken.m_polarity_ = TOKEN_POLARITY_POSITIVE_; 1754 targetToken.m_strength_ = TOKEN_RESET_; 1755 targetToken.m_next_ = null; 1756 targetToken.m_previous_ = null; 1757 targetToken.m_CELength_ = 0; 1758 targetToken.m_expCELength_ = 0; 1759 targetToken.m_listHeader_ = m_listHeader_[m_resultLength_]; 1760 m_listHeader_[m_resultLength_].m_first_ = null; 1761 m_listHeader_[m_resultLength_].m_last_ = null; 1762 m_listHeader_[m_resultLength_].m_first_ = null; 1763 m_listHeader_[m_resultLength_].m_last_ = null; 1764 m_listHeader_[m_resultLength_].m_reset_ = targetToken; 1765 1766 1775 int result = 0; 1776 if (expand > 0) { 1777 if (m_parsedToken_.m_charsLen_ > 1) { 1779 targetToken.m_source_ = ((expand 1780 - m_parsedToken_.m_charsOffset_ ) 1781 << 24) 1782 | m_parsedToken_.m_charsOffset_; 1783 result = ((m_parsedToken_.m_charsLen_ 1784 + m_parsedToken_.m_charsOffset_ - expand) << 24) 1785 | expand; 1786 } 1787 } 1788 1789 m_resultLength_ ++; 1790 m_hashTable_.put(targetToken, targetToken); 1791 return result; 1792 } 1793 1794 1799 private static final boolean isSpecialChar(char ch) 1800 { 1801 return (ch <= 0x002F && ch >= 0x0020) || (ch <= 0x003F && ch >= 0x003A) 1802 || (ch <= 0x0060 && ch >= 0x005B) 1803 || (ch <= 0x007E && ch >= 0x007D) || ch == 0x007B; 1804 } 1805 1806 private 1807 UnicodeSet readAndSetUnicodeSet(String source, int start) throws ParseException 1808 { 1809 while(source.charAt(start) != '[') { 1810 start++; 1811 } 1812 int noOpenBraces = 1; 1815 int current = 1; while(start+current < source.length() && noOpenBraces != 0) { 1817 if(source.charAt(start+current) == '[') { 1818 noOpenBraces++; 1819 } else if(source.charAt(start+current) == ']') { noOpenBraces--; 1821 } 1822 current++; 1823 } 1824 1826 if(noOpenBraces != 0 || ( source.indexOf("]", start+current) ) == -1) { 1827 throwParseException(m_rules_, start); 1828 } 1829 return new UnicodeSet(source.substring(start, start+current)); } 1831 1832 1833 1836 private int m_optionarg_ = 0; 1837 1838 private int readOption(String rules, int start, int optionend) 1839 { 1840 m_optionarg_ = 0; 1841 int i = 0; 1842 while (i < RULES_OPTIONS_.length) { 1843 String option = RULES_OPTIONS_[i].m_name_; 1844 int optionlength = option.length(); 1845 if (rules.length() > start + optionlength 1846 && option.equalsIgnoreCase(rules.substring(start, 1847 start + optionlength))) { 1848 if (optionend - start > optionlength) { 1849 m_optionarg_ = start + optionlength; 1850 while (m_optionarg_ < optionend && UCharacter.isWhitespace(rules.charAt(m_optionarg_))) 1852 { m_optionarg_ ++; 1854 } 1855 } 1856 break; 1857 } 1858 i ++; 1859 } 1860 if(i == RULES_OPTIONS_.length) { 1861 i = -1; 1862 } 1863 return i; 1864 } 1865 1870 private byte readAndSetOption() throws ParseException 1871 { 1872 int start = m_current_ + 1; int i = readOption(m_rules_, start, m_optionEnd_); 1874 1875 int optionarg = m_optionarg_; 1876 1877 if (i < 0) { 1878 throwParseException(m_rules_, start); 1879 } 1880 1881 if (i < 7) { 1882 if (optionarg != 0) { 1883 for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; 1884 j ++) { 1885 String subname = RULES_OPTIONS_[i].m_subOptions_[j]; 1886 int size = optionarg + subname.length(); 1887 if (m_rules_.length() > size 1888 && subname.equalsIgnoreCase(m_rules_.substring( 1889 optionarg, size))) { 1890 setOptions(m_options_, RULES_OPTIONS_[i].m_attribute_, 1891 RULES_OPTIONS_[i].m_subOptionAttributeValues_[j]); 1892 return TOKEN_SUCCESS_MASK_; 1893 } 1894 } 1895 } 1896 throwParseException(m_rules_, optionarg); 1897 } 1898 else if (i == 7) { return TOKEN_SUCCESS_MASK_ | TOKEN_VARIABLE_TOP_MASK_; 1900 } 1901 else if (i == 8) { return TOKEN_SUCCESS_MASK_; 1903 } 1904 else if (i == 9) { if (optionarg != 0) { 1906 for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; 1907 j ++) { 1908 String subname = RULES_OPTIONS_[i].m_subOptions_[j]; 1909 int size = optionarg + subname.length(); 1910 if (m_rules_.length() > size 1911 && subname.equalsIgnoreCase( 1912 m_rules_.substring(optionarg, 1913 optionarg + subname.length()))) { 1914 return (byte)(TOKEN_SUCCESS_MASK_ 1915 | RULES_OPTIONS_[i].m_subOptionAttributeValues_[j] 1916 + 1); 1917 } 1918 } 1919 } 1920 throwParseException(m_rules_, optionarg); 1921 } 1922 else if (i == 10) { m_parsedToken_.m_indirectIndex_ = 0; 1926 return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_; 1927 } 1928 else if (i < 13) { for (int j = 0; j < RULES_OPTIONS_[i].m_subOptions_.length; j ++) { 1930 String subname = RULES_OPTIONS_[i].m_subOptions_[j]; 1931 int size = optionarg + subname.length(); 1932 if (m_rules_.length() > size 1933 && subname.equalsIgnoreCase(m_rules_.substring(optionarg, 1934 size))) { 1935 m_parsedToken_.m_indirectIndex_ = (char)(i - 10 + (j << 1)); 1936 return TOKEN_SUCCESS_MASK_ | TOKEN_TOP_MASK_; 1937 } 1938 } 1939 throwParseException(m_rules_, optionarg); 1940 } 1941 else if(i == 13 || i == 14) { int noOpenBraces = 1; 1944 m_current_++; while(m_current_ < m_source_.length() && noOpenBraces != 0) { 1946 if(m_source_.charAt(m_current_) == '[') { 1947 noOpenBraces++; 1948 } else if(m_source_.charAt(m_current_) == ']') { noOpenBraces--; 1950 } 1951 m_current_++; 1952 } 1953 m_optionEnd_ = m_current_-1; 1954 return TOKEN_SUCCESS_MASK_; 1955 } 1956 else { 1957 throwParseException(m_rules_, optionarg); 1958 } 1959 return TOKEN_SUCCESS_MASK_; } 1961 1962 1968 private void setOptions(OptionSet optionset, int attribute, int value) 1969 { 1970 switch (attribute) { 1971 case RuleBasedCollator.Attribute.HIRAGANA_QUATERNARY_MODE_ : 1972 optionset.m_isHiragana4_ 1973 = (value == RuleBasedCollator.AttributeValue.ON_); 1974 break; 1975 case RuleBasedCollator.Attribute.FRENCH_COLLATION_ : 1976 optionset.m_isFrenchCollation_ 1977 = (value == RuleBasedCollator.AttributeValue.ON_); 1978 break; 1979 case RuleBasedCollator.Attribute.ALTERNATE_HANDLING_ : 1980 optionset.m_isAlternateHandlingShifted_ 1981 = (value 1982 == RuleBasedCollator.AttributeValue.SHIFTED_); 1983 break; 1984 case RuleBasedCollator.Attribute.CASE_FIRST_ : 1985 optionset.m_caseFirst_ = value; 1986 break; 1987 case RuleBasedCollator.Attribute.CASE_LEVEL_ : 1988 optionset.m_isCaseLevel_ 1989 = (value == RuleBasedCollator.AttributeValue.ON_); 1990 break; 1991 case RuleBasedCollator.Attribute.NORMALIZATION_MODE_ : 1992 if (value == RuleBasedCollator.AttributeValue.ON_) { 1993 value = Collator.CANONICAL_DECOMPOSITION; 1994 } 1995 optionset.m_decomposition_ = value; 1996 break; 1997 case RuleBasedCollator.Attribute.STRENGTH_ : 1998 optionset.m_strength_ = value; 1999 break; 2000 default : 2001 break; 2002 } 2003 } 2004 2005 UnicodeSet getTailoredSet() throws ParseException 2006 { 2007 boolean startOfRules = true; 2008 UnicodeSet tailored = new UnicodeSet(); 2009 String pattern; 2010 CanonicalIterator it = new CanonicalIterator(""); 2011 2012 m_parsedToken_.m_strength_ = TOKEN_UNSET_; 2013 int sourcelimit = m_source_.length(); 2014 2016 while (m_current_ < sourcelimit) { 2017 m_parsedToken_.m_prefixOffset_ = 0; 2018 if (parseNextToken(startOfRules) < 0) { 2019 continue; 2021 } 2022 startOfRules = false; 2023 if(m_parsedToken_.m_strength_ != TOKEN_RESET_) { 2026 it.setSource(m_source_.substring( 2027 m_parsedToken_.m_charsOffset_, 2028 m_parsedToken_.m_charsOffset_+m_parsedToken_.m_charsLen_)); 2029 pattern = it.next(); 2030 while(pattern != null) { 2031 if(Normalizer.quickCheck(pattern, Normalizer.FCD,0) != Normalizer.NO) { 2032 tailored.add(pattern); 2033 } 2034 pattern = it.next(); 2035 } 2036 } 2037 } 2038 return tailored; 2039 } 2040 2041 final private void extractSetsFromRules(String rules) throws ParseException { 2042 int optionNumber = -1; 2043 int setStart = 0; 2044 int i = 0; 2045 while(i < rules.length()) { 2046 if(rules.charAt(i) == 0x005B) { 2047 optionNumber = readOption(rules, i+1, rules.length()); 2048 setStart = m_optionarg_; 2049 if(optionNumber == 13) { 2050 UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart); 2051 if(m_copySet_ == null) { 2052 m_copySet_ = newSet; 2053 } else { 2054 m_copySet_.addAll(newSet); 2055 } 2056 } else if(optionNumber == 14) { 2057 UnicodeSet newSet = readAndSetUnicodeSet(rules, setStart); 2058 if(m_removeSet_ == null) { 2059 m_removeSet_ = newSet; 2060 } else { 2061 m_removeSet_.addAll(newSet); 2062 } 2063 } 2064 } 2065 i++; 2066 } 2067 } 2068} 2069 | Popular Tags |