1 7 8 23 24 package java.text; 25 26 import java.io.BufferedInputStream ; 27 import java.io.IOException ; 28 import java.security.AccessController ; 29 import java.security.PrivilegedActionException ; 30 import java.security.PrivilegedExceptionAction ; 31 import java.util.Vector ; 32 import java.util.Stack ; 33 import java.util.Hashtable ; 34 import java.util.Enumeration ; 35 import java.util.MissingResourceException ; 36 import java.text.CharacterIterator ; 37 import java.text.StringCharacterIterator ; 38 import sun.text.CompactByteArray; 39 import sun.text.SupplementaryCharacterData; 40 41 206 class RuleBasedBreakIterator extends BreakIterator { 207 208 211 protected static final byte IGNORE = -1; 212 213 216 private static final short START_STATE = 1; 217 218 221 private static final short STOP_STATE = 0; 222 223 226 static final byte[] LABEL = { 227 (byte)'B', (byte)'I', (byte)'d', (byte)'a', (byte)'t', (byte)'a', 228 (byte)'\0' 229 }; 230 static final int LABEL_LENGTH = LABEL.length; 231 232 235 static final byte supportedVersion = 1; 236 237 240 private static final int HEADER_LENGTH = 36; 241 242 245 private static final int BMP_INDICES_LENGTH = 512; 246 247 250 private CompactByteArray charCategoryTable = null; 251 private SupplementaryCharacterData supplementaryCharCategoryTable = null; 252 253 256 private short[] stateTable = null; 257 258 262 private short[] backwardsStateTable = null; 263 264 268 private boolean[] endStates = null; 269 270 274 private boolean[] lookaheadStates = null; 275 276 280 private byte[] additionalData = null; 281 282 286 private int numCategories; 287 288 291 private CharacterIterator text = null; 292 293 296 private long checksum; 297 298 302 306 public RuleBasedBreakIterator(String datafile) 307 throws IOException , MissingResourceException { 308 readTables(datafile); 309 } 310 311 359 protected void readTables(String datafile) 360 throws IOException , MissingResourceException { 361 362 byte[] buffer = readFile(datafile); 363 364 365 int stateTableLength = BreakIterator.getInt(buffer, 0); 366 int backwardsStateTableLength = BreakIterator.getInt(buffer, 4); 367 int endStatesLength = BreakIterator.getInt(buffer, 8); 368 int lookaheadStatesLength = BreakIterator.getInt(buffer, 12); 369 int BMPdataLength = BreakIterator.getInt(buffer, 16); 370 int nonBMPdataLength = BreakIterator.getInt(buffer, 20); 371 int additionalDataLength = BreakIterator.getInt(buffer, 24); 372 checksum = BreakIterator.getLong(buffer, 28); 373 374 375 stateTable = new short[stateTableLength]; 376 int offset = HEADER_LENGTH; 377 for (int i = 0; i < stateTableLength; i++, offset+=2) { 378 stateTable[i] = BreakIterator.getShort(buffer, offset); 379 } 380 381 382 backwardsStateTable = new short[backwardsStateTableLength]; 383 for (int i = 0; i < backwardsStateTableLength; i++, offset+=2) { 384 backwardsStateTable[i] = BreakIterator.getShort(buffer, offset); 385 } 386 387 388 endStates = new boolean[endStatesLength]; 389 for (int i = 0; i < endStatesLength; i++, offset++) { 390 endStates[i] = buffer[offset] == 1; 391 } 392 393 394 lookaheadStates = new boolean[lookaheadStatesLength]; 395 for (int i = 0; i < lookaheadStatesLength; i++, offset++) { 396 lookaheadStates[i] = buffer[offset] == 1; 397 } 398 399 400 short[] temp1 = new short[BMP_INDICES_LENGTH]; for (int i = 0; i < BMP_INDICES_LENGTH; i++, offset+=2) { 402 temp1[i] = BreakIterator.getShort(buffer, offset); 403 } 404 byte[] temp2 = new byte[BMPdataLength]; System.arraycopy(buffer, offset, temp2, 0, BMPdataLength); 406 offset += BMPdataLength; 407 charCategoryTable = new CompactByteArray(temp1, temp2); 408 409 410 int[] temp3 = new int[nonBMPdataLength]; 411 for (int i = 0; i < nonBMPdataLength; i++, offset+=4) { 412 temp3[i] = BreakIterator.getInt(buffer, offset); 413 } 414 supplementaryCharCategoryTable = new SupplementaryCharacterData(temp3); 415 416 417 if (additionalDataLength > 0) { 418 additionalData = new byte[additionalDataLength]; 419 System.arraycopy(buffer, offset, additionalData, 0, additionalDataLength); 420 } 421 422 423 numCategories = stateTable.length / endStates.length; 424 } 425 426 protected byte[] readFile(final String datafile) 427 throws IOException , MissingResourceException { 428 429 BufferedInputStream is; 430 try { 431 is = (BufferedInputStream )AccessController.doPrivileged( 432 new PrivilegedExceptionAction () { 433 public Object run() throws Exception { 434 return new BufferedInputStream (getClass().getResourceAsStream("/sun/text/resources/" + datafile)); 435 } 436 } 437 ); 438 } 439 catch (PrivilegedActionException e) { 440 throw new InternalError (e.toString()); 441 } 442 443 int offset = 0; 444 445 446 int len = LABEL_LENGTH + 5; 447 byte[] buf = new byte[len]; 448 if (is.read(buf) != len) { 449 throw new MissingResourceException ("Wrong header length", 450 datafile, ""); 451 } 452 453 454 for (int i = 0; i < LABEL_LENGTH; i++, offset++) { 455 if (buf[offset] != LABEL[offset]) { 456 throw new MissingResourceException ("Wrong magic number", 457 datafile, ""); 458 } 459 } 460 461 462 if (buf[offset] != supportedVersion) { 463 throw new MissingResourceException ("Unsupported version(" + buf[offset] + ")", 464 datafile, ""); 465 } 466 467 468 len = BreakIterator.getInt(buf, ++offset); 469 buf = new byte[len]; 470 if (is.read(buf) != len) { 471 throw new MissingResourceException ("Wrong data length", 472 datafile, ""); 473 } 474 475 is.close(); 476 477 return buf; 478 } 479 480 byte[] getAdditionalData() { 481 return additionalData; 482 } 483 484 void setAdditionalData(byte[] b) { 485 additionalData = b; 486 } 487 488 496 public Object clone() { 497 RuleBasedBreakIterator result = (RuleBasedBreakIterator ) super.clone(); 498 if (text != null) { 499 result.text = (CharacterIterator ) text.clone(); 500 } 501 return result; 502 } 503 504 508 public boolean equals(Object that) { 509 try { 510 if (that == null) { 511 return false; 512 } 513 514 RuleBasedBreakIterator other = (RuleBasedBreakIterator ) that; 515 if (checksum != other.checksum) { 516 return false; 517 } 518 if (text == null) { 519 return other.text == null; 520 } else { 521 return text.equals(other.text); 522 } 523 } 524 catch(ClassCastException e) { 525 return false; 526 } 527 } 528 529 532 public String toString() { 533 StringBuffer sb = new StringBuffer (); 534 sb.append('['); 535 sb.append("checksum=0x" + Long.toHexString(checksum)); 536 sb.append(']'); 537 return sb.toString(); 538 } 539 540 544 public int hashCode() { 545 return (int)checksum; 546 } 547 548 552 557 public int first() { 558 CharacterIterator t = getText(); 559 560 t.first(); 561 return t.getIndex(); 562 } 563 564 569 public int last() { 570 CharacterIterator t = getText(); 571 572 t.setIndex(t.getEndIndex()); 575 return t.getIndex(); 576 } 577 578 587 public int next(int n) { 588 int result = current(); 589 while (n > 0) { 590 result = handleNext(); 591 --n; 592 } 593 while (n < 0) { 594 result = previous(); 595 ++n; 596 } 597 return result; 598 } 599 600 604 public int next() { 605 return handleNext(); 606 } 607 608 612 public int previous() { 613 CharacterIterator text = getText(); 615 if (current() == text.getBeginIndex()) { 616 return BreakIterator.DONE; 617 } 618 619 int start = current(); 625 getPrevious(); 626 int lastResult = handlePrevious(); 627 int result = lastResult; 628 629 while (result != BreakIterator.DONE && result < start) { 633 lastResult = result; 634 result = handleNext(); 635 } 636 637 text.setIndex(lastResult); 640 return lastResult; 641 } 642 643 646 private int getPrevious() { 647 char c2 = text.previous(); 648 if (Character.isLowSurrogate(c2) && 649 text.getIndex() > text.getBeginIndex()) { 650 char c1 = text.previous(); 651 if (Character.isHighSurrogate(c1)) { 652 return Character.toCodePoint(c1, c2); 653 } else { 654 text.next(); 655 } 656 } 657 return (int)c2; 658 } 659 660 663 int getCurrent() { 664 char c1 = text.current(); 665 if (Character.isHighSurrogate(c1) && 666 text.getIndex() < text.getEndIndex()) { 667 char c2 = text.next(); 668 text.previous(); 669 if (Character.isLowSurrogate(c2)) { 670 return Character.toCodePoint(c1, c2); 671 } 672 } 673 return (int)c1; 674 } 675 676 679 private int getCurrentCodePointCount() { 680 char c1 = text.current(); 681 if (Character.isHighSurrogate(c1) && 682 text.getIndex() < text.getEndIndex()) { 683 char c2 = text.next(); 684 text.previous(); 685 if (Character.isLowSurrogate(c2)) { 686 return 2; 687 } 688 } 689 return 1; 690 } 691 692 695 int getNext() { 696 int index = text.getIndex(); 697 int endIndex = text.getEndIndex(); 698 if (index == endIndex || 699 (index = index + getCurrentCodePointCount()) >= endIndex) { 700 return CharacterIterator.DONE; 701 } 702 text.setIndex(index); 703 return getCurrent(); 704 } 705 706 709 private int getNextIndex() { 710 int index = text.getIndex() + getCurrentCodePointCount(); 711 int endIndex = text.getEndIndex(); 712 if (index > endIndex) { 713 return endIndex; 714 } else { 715 return index; 716 } 717 } 718 719 722 protected static final void checkOffset(int offset, CharacterIterator text) { 723 if (offset < text.getBeginIndex() || offset >= text.getEndIndex()) { 724 throw new IllegalArgumentException ("offset out of bounds"); 725 } 726 } 727 728 734 public int following(int offset) { 735 736 CharacterIterator text = getText(); 737 checkOffset(offset, text); 738 739 text.setIndex(offset); 743 if (offset == text.getBeginIndex()) { 744 return handleNext(); 745 } 746 747 int result = handlePrevious(); 755 while (result != BreakIterator.DONE && result <= offset) { 756 result = handleNext(); 757 } 758 return result; 759 } 760 761 767 public int preceding(int offset) { 768 CharacterIterator text = getText(); 772 checkOffset(offset, text); 773 text.setIndex(offset); 774 return previous(); 775 } 776 777 784 public boolean isBoundary(int offset) { 785 CharacterIterator text = getText(); 786 checkOffset(offset, text); 787 if (offset == text.getBeginIndex()) { 788 return true; 789 } 790 791 else { 795 return following(offset - 1) == offset; 796 } 797 } 798 799 803 public int current() { 804 return getText().getIndex(); 805 } 806 807 814 public CharacterIterator getText() { 815 if (text == null) { 819 text = new StringCharacterIterator (""); 820 } 821 return text; 822 } 823 824 829 public void setText(CharacterIterator newText) { 830 int end = newText.getEndIndex(); 836 boolean goodIterator; 837 try { 838 newText.setIndex(end); goodIterator = newText.getIndex() == end; 840 } 841 catch(IllegalArgumentException e) { 842 goodIterator = false; 843 } 844 845 if (goodIterator) { 846 text = newText; 847 } 848 else { 849 text = new SafeCharIterator(newText); 850 } 851 text.first(); 852 } 853 854 855 859 866 protected int handleNext() { 867 CharacterIterator text = getText(); 869 if (text.getIndex() == text.getEndIndex()) { 870 return BreakIterator.DONE; 871 } 872 873 int result = getNextIndex(); 875 int lookaheadResult = 0; 876 877 int state = START_STATE; 879 int category; 880 int c = getCurrent(); 881 882 while (c != CharacterIterator.DONE && state != STOP_STATE) { 884 885 category = lookupCategory(c); 888 889 if (category != IGNORE) { 892 state = lookupState(state, category); 893 } 894 895 if (lookaheadStates[state]) { 900 if (endStates[state]) { 901 result = lookaheadResult; 902 } 903 else { 904 lookaheadResult = getNextIndex(); 905 } 906 } 907 908 else { 911 if (endStates[state]) { 912 result = getNextIndex(); 913 } 914 } 915 916 c = getNext(); 917 } 918 919 if (c == CharacterIterator.DONE && lookaheadResult == text.getEndIndex()) { 924 result = lookaheadResult; 925 } 926 927 text.setIndex(result); 928 return result; 929 } 930 931 938 protected int handlePrevious() { 939 CharacterIterator text = getText(); 940 int state = START_STATE; 941 int category = 0; 942 int lastCategory = 0; 943 int c = getCurrent(); 944 945 while (c != CharacterIterator.DONE && state != STOP_STATE) { 947 948 lastCategory = category; 951 category = lookupCategory(c); 952 953 if (category != IGNORE) { 956 state = lookupBackwardState(state, category); 957 } 958 959 c = getPrevious(); 961 } 962 963 if (c != CharacterIterator.DONE) { 969 if (lastCategory != IGNORE) { 970 getNext(); 971 getNext(); 972 } 973 else { 974 getNext(); 975 } 976 } 977 return text.getIndex(); 978 } 979 980 984 protected int lookupCategory(int c) { 985 if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 986 return charCategoryTable.elementAt((char)c); 987 } else { 988 return supplementaryCharCategoryTable.getValue(c); 989 } 990 } 991 992 996 protected int lookupState(int state, int category) { 997 return stateTable[state * numCategories + category]; 998 } 999 1000 1004 protected int lookupBackwardState(int state, int category) { 1005 return backwardsStateTable[state * numCategories + category]; 1006 } 1007 1008 1017 private static final class SafeCharIterator implements CharacterIterator , 1018 Cloneable { 1019 1020 private CharacterIterator base; 1021 private int rangeStart; 1022 private int rangeLimit; 1023 private int currentIndex; 1024 1025 SafeCharIterator(CharacterIterator base) { 1026 this.base = base; 1027 this.rangeStart = base.getBeginIndex(); 1028 this.rangeLimit = base.getEndIndex(); 1029 this.currentIndex = base.getIndex(); 1030 } 1031 1032 public char first() { 1033 return setIndex(rangeStart); 1034 } 1035 1036 public char last() { 1037 return setIndex(rangeLimit - 1); 1038 } 1039 1040 public char current() { 1041 if (currentIndex < rangeStart || currentIndex >= rangeLimit) { 1042 return DONE; 1043 } 1044 else { 1045 return base.setIndex(currentIndex); 1046 } 1047 } 1048 1049 public char next() { 1050 1051 currentIndex++; 1052 if (currentIndex >= rangeLimit) { 1053 currentIndex = rangeLimit; 1054 return DONE; 1055 } 1056 else { 1057 return base.setIndex(currentIndex); 1058 } 1059 } 1060 1061 public char previous() { 1062 1063 currentIndex--; 1064 if (currentIndex < rangeStart) { 1065 currentIndex = rangeStart; 1066 return DONE; 1067 } 1068 else { 1069 return base.setIndex(currentIndex); 1070 } 1071 } 1072 1073 public char setIndex(int i) { 1074 1075 if (i < rangeStart || i > rangeLimit) { 1076 throw new IllegalArgumentException ("Invalid position"); 1077 } 1078 currentIndex = i; 1079 return current(); 1080 } 1081 1082 public int getBeginIndex() { 1083 return rangeStart; 1084 } 1085 1086 public int getEndIndex() { 1087 return rangeLimit; 1088 } 1089 1090 public int getIndex() { 1091 return currentIndex; 1092 } 1093 1094 public Object clone() { 1095 1096 SafeCharIterator copy = null; 1097 try { 1098 copy = (SafeCharIterator) super.clone(); 1099 } 1100 catch(CloneNotSupportedException e) { 1101 throw new Error ("Clone not supported: " + e); 1102 } 1103 1104 CharacterIterator copyOfBase = (CharacterIterator ) base.clone(); 1105 copy.base = copyOfBase; 1106 return copy; 1107 } 1108 } 1109} 1110 1111 | Popular Tags |