1 7 package com.ibm.icu.text; 8 9 import java.io.IOException ; 10 import java.io.InputStream ; 11 import java.text.CharacterIterator ; 12 13 import com.ibm.icu.impl.Assert; 14 15 16 27 public class RuleBasedBreakIterator_New extends RuleBasedBreakIterator { 28 private static final boolean ASSERT = false; 29 30 private static final int START_STATE = 1; private static final int STOP_STATE = 0; 33 34 RuleBasedBreakIterator_New() { 35 } 36 37 41 private CharacterIterator fText; 42 43 47 private RBBIDataWrapper fRData; 48 49 52 private int fLastRuleStatusIndex; 53 54 60 private boolean fLastStatusIndexValid; 61 62 66 public static boolean fTrace; 67 68 69 74 public void dump() { 75 this.fRData.dump(); 76 } 77 78 79 88 public Object clone() 89 { 90 RuleBasedBreakIterator_New result = (RuleBasedBreakIterator_New) super.clone(); 91 if (fText != null) { 92 fText = (CharacterIterator )fText.clone(); 93 } 94 return result; 95 } 96 97 102 public boolean equals(Object that) { 103 try { 104 RuleBasedBreakIterator_New other = (RuleBasedBreakIterator_New) that; 105 if (fRData != other.fRData && (fRData == null || other.fRData == null)) { 106 return false; 107 } 108 if (fRData != null && other.fRData != null && 109 (!fRData.fRuleSource.equals(other.fRData.fRuleSource))) { 110 return false; 111 } 112 if (fText == null && other.fText == null) { 113 return true; 114 } 115 if (fText == null || other.fText == null) { 116 return false; 117 } 118 return fText.equals(other.fText); 119 } 120 catch(ClassCastException e) { 121 return false; 122 } 123 } 124 125 130 public String toString() { 131 String retStr = null; 132 if (fRData != null) { 133 retStr = fRData.fRuleSource; 134 } 135 return retStr; 136 } 137 138 143 public int hashCode() 144 { 145 return fRData.fRuleSource.hashCode(); 146 } 147 148 149 153 157 public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException { 158 RuleBasedBreakIterator_New This = new RuleBasedBreakIterator_New(); 159 This.fRData = RBBIDataWrapper.get(is); 160 This.fText = new java.text.StringCharacterIterator (""); return This; 163 } 164 165 166 167 171 177 public int first() { 178 fLastRuleStatusIndex = 0; 179 fLastStatusIndexValid = true; 180 if (fText == null) { 181 return BreakIterator.DONE; 182 } 183 fText.first(); 184 return fText.getIndex(); 185 } 186 187 188 194 public int last() { 195 if (fText == null) { 196 fLastRuleStatusIndex = 0; 197 fLastStatusIndexValid = true; 198 return BreakIterator.DONE; 199 } 200 201 207 208 fLastStatusIndexValid = false; 209 int pos = fText.getEndIndex(); 210 fText.setIndex(pos); 211 return pos; 212 } 213 214 215 225 public int next(int n) { 226 int result = current(); 227 while (n > 0) { 228 result = handleNext(fRData.fFTable); 229 --n; 230 } 231 while (n < 0) { 232 result = previous(); 233 ++n; 234 } 235 return result; 236 } 237 238 239 244 public int next() { 245 return handleNext(fRData.fFTable); 246 } 247 248 249 254 public int previous() { 255 if (fText == null || current() == fText.getBeginIndex()) { 257 fLastRuleStatusIndex = 0; 258 fLastStatusIndexValid = true; 259 return BreakIterator.DONE; 260 } 261 262 if (fRData.fSRTable != null || fRData.fSFTable != null) { 263 return handlePrevious(fRData.fRTable); 264 } 265 266 273 int start = current(); 274 275 CIPrevious32(fText); 276 int lastResult = handlePrevious(); 277 int result = lastResult; 278 int lastTag = 0; 279 boolean breakTagValid = false; 280 281 285 for (;;) { 286 result = handleNext(fRData.fFTable); 287 if (result == BreakIterator.DONE || result >= start) { 288 break; 289 } 290 lastResult = result; 291 lastTag = fLastRuleStatusIndex; 292 breakTagValid = true; 293 } 294 295 302 fText.setIndex(lastResult); 305 fLastRuleStatusIndex = lastTag; fLastStatusIndexValid = breakTagValid; 307 return lastResult; 308 } 309 316 public int following(int offset) { 317 fLastRuleStatusIndex = 0; 321 fLastStatusIndexValid = true; 322 if (fText == null || offset >= fText.getEndIndex()) { 323 last(); 324 return next(); 325 } 326 else if (offset < fText.getBeginIndex()) { 327 return first(); 328 } 329 330 334 int result = 0; 335 336 if (fRData.fSRTable != null) { 337 fText.setIndex(offset); 340 CINext32(fText); 344 handlePrevious(fRData.fSRTable); 346 result = next(); 347 while (result <= offset) { 348 result = next(); 349 } 350 return result; 351 } 352 if (fRData.fSFTable != null) { 353 fText.setIndex(offset); 356 CIPrevious32(fText); 357 handleNext(fRData.fSFTable); 359 int oldresult = previous(); 363 while (oldresult > offset) { 364 result = previous(); 365 if (result <= offset) { 366 return oldresult; 367 } 368 oldresult = result; 369 } 370 result = next(); 371 if (result <= offset) { 372 return next(); 373 } 374 return result; 375 } 376 385 fText.setIndex(offset); 386 if (offset == fText.getBeginIndex()) { 387 return handleNext(fRData.fFTable); 388 } 389 result = previous(); 390 391 while (result != BreakIterator.DONE && result <= offset) { 392 result = next(); 393 } 394 395 return result; 396 } 397 404 public int preceding(int offset) { 405 408 if (fText == null || offset > fText.getEndIndex()) { 410 return last(); 412 } 413 else if (offset < fText.getBeginIndex()) { 414 return first(); 415 } 416 417 421 int result; 422 if (fRData.fSFTable != null) { 423 fText.setIndex(offset); 426 CIPrevious32(fText); 430 handleNext(fRData.fSFTable); 431 result = previous(); 432 while (result >= offset) { 433 result = previous(); 434 } 435 return result; 436 } 437 if (fRData.fSRTable != null) { 438 fText.setIndex(offset); 440 CINext32(fText); 441 handlePrevious(fRData.fSRTable); 443 444 int oldresult = next(); 448 while (oldresult < offset) { 449 result = next(); 450 if (result >= offset) { 451 return oldresult; 452 } 453 oldresult = result; 454 } 455 result = previous(); 456 if (result >= offset) { 457 return previous(); 458 } 459 return result; 460 } 461 462 fText.setIndex(offset); 464 return previous(); 465 } 466 467 471 protected static final void checkOffset(int offset, CharacterIterator text) { 472 if (offset < text.getBeginIndex() || offset > text.getEndIndex()) { 473 throw new IllegalArgumentException ("offset out of bounds"); 474 } 475 } 476 477 478 486 public boolean isBoundary(int offset) { 487 checkOffset(offset, fText); 488 489 if (offset == fText.getBeginIndex()) { 491 first(); return true; 493 } 494 495 if (offset == fText.getEndIndex()) { 496 last(); return true; 498 } 499 500 if (offset < fText.getBeginIndex()) { 502 first(); return false; 504 } 505 506 if (offset > fText.getEndIndex()) { 507 last(); return false; 509 } 510 511 return following(offset - 1) == offset; 515 } 516 517 522 public int current() { 523 return (fText != null) ? fText.getIndex() : BreakIterator.DONE; 524 } 525 526 527 528 private void makeRuleStatusValid() { 529 if (fLastStatusIndexValid == false) { 530 if (fText == null || current() == fText.getBeginIndex()) { 532 fLastRuleStatusIndex = 0; 534 fLastStatusIndexValid = true; 535 } else { 536 int pa = current(); 538 previous(); 539 int pb = next(); 540 if (ASSERT) Assert.assrt("pa == pb", pa == pb); 541 } 542 } 543 if (ASSERT) { 544 Assert.assrt("fLastStatusIndexValid == true", fLastStatusIndexValid == true); 545 Assert.assrt("fLastRuleStatusIndex >= 0 && fLastRuleStatusIndex < fRData.fStatusTable.length", 546 fLastRuleStatusIndex >= 0 && fLastRuleStatusIndex < fRData.fStatusTable.length); 547 } 548 } 549 550 551 573 574 public int getRuleStatus() { 575 makeRuleStatusValid(); 576 int idx = fLastRuleStatusIndex + fRData.fStatusTable[fLastRuleStatusIndex]; 585 int tagVal = fRData.fStatusTable[idx]; 586 587 return tagVal; 588 } 589 590 591 592 614 public int getRuleStatusVec(int[] fillInArray) { 615 makeRuleStatusValid(); 616 int numStatusVals = fRData.fStatusTable[fLastRuleStatusIndex]; 617 if (fillInArray != null) { 618 int numToCopy = Math.min(numStatusVals, fillInArray.length); 619 for (int i=0; i<numToCopy; i++) { 620 fillInArray[i] = fRData.fStatusTable[fLastRuleStatusIndex + i + 1]; 621 } 622 } 623 return numStatusVals; 624 } 625 626 627 635 public CharacterIterator getText() { 636 return fText; 637 } 638 639 640 646 public void setText(CharacterIterator newText) { 647 fText = newText; 648 this.first(); 649 } 650 651 private static int CI_DONE32 = 0x7fffffff; 655 656 663 private static int CINext32(CharacterIterator ci) { 664 int c= ci.current(); 667 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE && c<=UTF16.LEAD_SURROGATE_MAX_VALUE) { 668 c = ci.next(); 669 if (c<UTF16.TRAIL_SURROGATE_MIN_VALUE || c>UTF16.TRAIL_SURROGATE_MAX_VALUE) { 670 c = ci.previous(); 671 } 672 } 673 674 c = ci.next(); 676 677 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) { 680 c = CINextTrail32(ci, c); 681 } 682 683 if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != CI_DONE32) { 684 ci.previous(); 687 } 688 return c; 689 } 690 691 692 private static int CINextTrail32(CharacterIterator ci, int lead) { 700 int retVal = lead; 701 if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) { 702 char cTrail = ci.next(); 703 if (UTF16.isTrailSurrogate(cTrail)) { 704 retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 705 (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 706 UTF16.SUPPLEMENTARY_MIN_VALUE; 707 } else { 708 ci.previous(); 709 } 710 } else { 711 if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) { 712 retVal = CI_DONE32; 713 } 714 } 715 return retVal; 716 } 717 718 private static int CIPrevious32(CharacterIterator ci) { 719 if (ci.getIndex() <= ci.getBeginIndex()) { 720 return CI_DONE32; 721 } 722 char trail = ci.previous(); 723 int retVal = trail; 724 if (UTF16.isTrailSurrogate(trail)) { 725 char lead = ci.previous(); 726 if (UTF16.isLeadSurrogate(lead)) { 727 retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 728 ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 729 UTF16.SUPPLEMENTARY_MIN_VALUE; 730 } else { 731 ci.next(); 732 } 733 } 734 return retVal; 735 } 736 737 738 739 private static int CICurrent32(CharacterIterator ci) { 740 char lead = ci.current(); 741 int retVal = lead; 742 if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) { 743 return retVal; 744 } 745 if (UTF16.isLeadSurrogate(lead)) { 746 int trail = (int)ci.next(); 747 ci.previous(); 748 if (UTF16.isTrailSurrogate((char)trail)) { 749 retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + 750 (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + 751 UTF16.SUPPLEMENTARY_MIN_VALUE; 752 } 753 } else { 754 if (lead == CharacterIterator.DONE) { 755 if (ci.getIndex() >= ci.getEndIndex()) { 756 retVal = CI_DONE32; 757 } 758 } 759 } 760 return retVal; 761 } 762 763 764 777 private int handleNext(short stateTable[]) { 778 if (fTrace) { 779 System.out.println("Handle Next pos char state category"); 780 } 781 782 fLastStatusIndexValid = true; 784 785 if (fText == null) { 787 fLastRuleStatusIndex = 0; 788 return BreakIterator.DONE; 789 } 790 791 int initialPosition = fText.getIndex(); 792 int result = initialPosition; 793 int lookaheadResult = 0; 794 795 int state = START_STATE; 797 short category; 798 int c = fText.current(); 799 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) { 800 c = CINextTrail32(fText, c); 801 if (c == CI_DONE32) { 802 fLastRuleStatusIndex = 0; 803 return BreakIterator.DONE; 804 } 805 } 806 int row = fRData.getRowIndex(state); 807 int lookaheadStatus = 0; 808 int lookaheadTagIdx = 0; 809 810 fLastRuleStatusIndex = 0; 811 812 category = (short)fRData.fTrie.getCodePointValue(c); 815 820 while (state != STOP_STATE) { 822 if (c == CI_DONE32) { 823 825 if (lookaheadResult > result) { 826 result = lookaheadResult; 830 fLastRuleStatusIndex = lookaheadTagIdx; 831 lookaheadStatus = 0; 832 } else if (result == initialPosition) { 833 fText.setIndex(initialPosition); 836 CINext32(fText); 837 } 838 break; 839 } 840 category = (short)fRData.fTrie.getCodePointValue(c); 844 845 851 if (fTrace) { 852 System.out.print(" " + RBBIDataWrapper.intToString(fText.getIndex(), 5)); 853 System.out.print(RBBIDataWrapper.intToHexString(c, 10)); 854 System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6)); 855 } 856 857 state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category]; 860 row = fRData.getRowIndex(state); 861 862 c = (int)fText.next(); 866 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) { 867 c = CINextTrail32(fText, c); 868 } 869 870 if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) { 871 result = fText.getIndex(); 873 if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c != CI_DONE32) { 874 result--; 877 } 878 879 fLastRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX]; 881 } 882 883 if (stateTable[row + RBBIDataWrapper.LOOKAHEAD] != 0) { 884 if (lookaheadStatus != 0 885 && stateTable[row + RBBIDataWrapper.ACCEPTING] == lookaheadStatus) { 886 result = lookaheadResult; 889 fLastRuleStatusIndex = lookaheadTagIdx; 890 lookaheadStatus = 0; 891 continue; 892 } 893 894 lookaheadResult = fText.getIndex(); 895 if (c>=UTF16.SUPPLEMENTARY_MIN_VALUE && c!=CI_DONE32) { 896 lookaheadResult--; 899 } 900 lookaheadStatus = stateTable[row + RBBIDataWrapper.LOOKAHEAD]; 901 lookaheadTagIdx = stateTable[row + RBBIDataWrapper.TAGIDX]; 902 continue; 903 } 904 905 906 if (stateTable[row + RBBIDataWrapper.ACCEPTING] != 0) { 907 lookaheadStatus = 0; } 909 } 911 913 if (result == initialPosition) { 917 result = fText.setIndex(initialPosition); 918 CINext32(fText); 919 result = fText.getIndex(); 920 } 921 922 fText.setIndex(result); 924 if (fTrace) { 925 System.out.println("result = " + result); 926 } 927 return result; 928 } 929 930 933 private int handlePrevious() { 934 if (fText == null || fRData == null) { 935 return 0; 936 } 937 if (fRData.fRTable == null) { 938 fText.first(); 939 return fText.getIndex(); 940 } 941 942 short stateTable[] = fRData.fRTable; 943 int state = START_STATE; 944 int category; 945 int lastCategory = 0; 946 int result = fText.getIndex(); 947 int lookaheadStatus = 0; 948 int lookaheadResult = 0; 949 int lookaheadTagIdx = 0; 950 int c = CICurrent32(fText); 951 int row; 952 953 row = fRData.getRowIndex(state); 954 category = (short)fRData.fTrie.getCodePointValue(c); 955 957 if (fTrace) { 958 System.out.println("Handle Prev pos char state category "); 959 } 960 961 for (;;) { 963 if (c == CI_DONE32) { 964 break; 965 } 966 967 lastCategory = category; 970 category = (short)fRData.fTrie.getCodePointValue(c); 971 972 977 if (fTrace) { 978 System.out.print(" " + fText.getIndex()+ " "); 979 if (0x20<=c && c<0x7f) { 980 System.out.print(" " + c + " "); 981 } else { 982 System.out.print(" " + Integer.toHexString(c) + " "); 983 } 984 System.out.println(" " + state + " " + category + " "); 985 } 986 987 state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category]; 989 row = fRData.getRowIndex(state); 990 991 continueOn: { 992 if (stateTable[row + RBBIDataWrapper.ACCEPTING] == 0 && 993 stateTable[row + RBBIDataWrapper.LOOKAHEAD] == 0) { 994 break continueOn; 995 } 996 997 if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) { 998 result = fText.getIndex(); 1000 lookaheadStatus = 0; break continueOn; 1002 } 1003 1004 if (stateTable[row + RBBIDataWrapper.ACCEPTING] == 0 && 1005 stateTable[row + RBBIDataWrapper.LOOKAHEAD] != 0) { 1006 int r = fText.getIndex(); 1011 if (r > result) { 1012 lookaheadResult = r; 1013 lookaheadStatus = stateTable[row + RBBIDataWrapper.LOOKAHEAD]; 1014 lookaheadTagIdx = stateTable[row + RBBIDataWrapper.TAGIDX]; 1015 } 1016 break continueOn; 1017 } 1018 1019 if (stateTable[row + RBBIDataWrapper.ACCEPTING] != 0 && 1020 stateTable[row + RBBIDataWrapper.LOOKAHEAD] != 0) { 1021 if (lookaheadResult > result) { 1026 if (stateTable[row + RBBIDataWrapper.ACCEPTING] != lookaheadStatus) { 1027 } 1031 result = lookaheadResult; 1032 fLastRuleStatusIndex = lookaheadTagIdx; 1033 lookaheadStatus = 0; 1034 } 1035 break continueOn; 1036 } 1037 } 1039 if (state == STOP_STATE) { 1040 break; 1041 } 1042 1043 c = CIPrevious32(fText); 1045 } 1046 1047 if (c == CI_DONE32) { 1051 result = fText.getBeginIndex(); 1052 } 1053 fText.setIndex(result); 1054 1055 return result; 1056 } 1057 1058 1059 private int handlePrevious(short stateTable[]) { 1060 if (fText == null || stateTable == null) { 1061 return 0; 1062 } 1063 fLastStatusIndexValid = false; 1066 if (stateTable == null) { 1067 return fText.getBeginIndex(); 1068 } 1069 1070 int state = START_STATE; 1071 int category; 1072 int c = CIPrevious32(fText); 1073 int result = fText.getIndex(); 1075 int lookaheadStatus = 0; 1076 int lookaheadResult = 0; 1077 boolean lookAheadHardBreak = 1078 (stateTable[RBBIDataWrapper.FLAGS+1] & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0; 1079 1080 int row = fRData.getRowIndex(state); 1081 1082 category = (short)fRData.fTrie.getCodePointValue(c); 1083 1084 if (fTrace) { 1085 System.out.println("Handle Prev pos char state category "); 1086 } 1087 1088 for (;;) { 1090 if (c==CI_DONE32) { 1091 if (fRData.fHeader.fVersion == 1) { 1092 if (stateTable[row + RBBIDataWrapper.LOOKAHEAD] != 0 && 1095 lookaheadResult == 0) { 1096 result = 0; 1097 } 1098 break; 1099 } 1100 category = 1; 1103 } else { 1104 category = (short)fRData.fTrie.getCodePointValue(c); 1107 } 1108 1109 1112 if (fTrace) { 1113 System.out.print(" " + fText.getIndex()+ " "); 1114 if (0x20<=c && c<0x7f) { 1115 System.out.print(" " + c + " "); 1116 } else { 1117 System.out.print(" " + Integer.toHexString(c) + " "); 1118 } 1119 System.out.println(" " + state + " " + category + " "); 1120 } 1121 1122 state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category]; 1124 row = fRData.getRowIndex(state); 1125 1126 if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) { 1127 result = fText.getIndex(); 1129 } 1130 1131 if (stateTable[row + RBBIDataWrapper.LOOKAHEAD] != 0) { 1132 if (lookaheadStatus != 0 1133 && stateTable[row + RBBIDataWrapper.ACCEPTING] == lookaheadStatus) { 1134 result = lookaheadResult; 1137 lookaheadStatus = 0; 1138 1146 1149 if (lookAheadHardBreak) { 1150 break; 1151 } 1152 fText.setIndex(result); 1153 } else { 1154 lookaheadResult = fText.getIndex(); 1157 lookaheadStatus = stateTable[row + RBBIDataWrapper.LOOKAHEAD]; 1158 } 1159 } else { 1160 if (stateTable[row + RBBIDataWrapper.ACCEPTING] != 0) { 1162 if (!lookAheadHardBreak) { 1164 lookaheadStatus = 0; } 1172 } 1173 } 1174 1175 if (state == STOP_STATE) { 1176 break; 1177 } 1178 1179 c = CIPrevious32(fText); 1181 } 1182 1183 fText.setIndex(result); 1184 1185 return result; 1186 } 1187 1188} 1189 1190 1191 1192 1193 1194 | Popular Tags |