1 7 8 9 13 14 package com.ibm.icu.text; 15 16 import com.ibm.icu.util.CompactByteArray; 17 import com.ibm.icu.impl.Utility; 18 import java.util.Vector ; 19 import java.util.Stack ; 20 import java.util.Hashtable ; 21 import java.util.Enumeration ; 22 import java.text.CharacterIterator ; 23 import java.text.StringCharacterIterator ; 24 25 import java.io.*; 26 27 228 public class RuleBasedBreakIterator_Old extends RuleBasedBreakIterator { 229 230 234 protected static final byte IGNORE = -1; 235 236 240 private static final String IGNORE_VAR = "_ignore_"; 241 242 245 private static final short START_STATE = 1; 246 247 250 private static final short STOP_STATE = 0; 251 252 255 private String description; 256 257 260 private CompactByteArray charCategoryTable = null; 261 262 265 private short[] stateTable = null; 266 267 271 private short[] backwardsStateTable = null; 272 273 277 private boolean[] endStates = null; 278 279 283 private boolean[] lookaheadStates = null; 284 285 289 private int numCategories; 290 291 294 private CharacterIterator text = null; 295 296 300 310 public RuleBasedBreakIterator_Old(String description) { 311 this.description = description; 313 314 Builder builder = makeBuilder(); 316 builder.buildBreakIterator(); 317 } 319 320 324 protected Builder makeBuilder() { 325 return new Builder(); 326 } 327 328 337 public Object clone() 338 { 339 RuleBasedBreakIterator_Old result = (RuleBasedBreakIterator_Old) super.clone(); 340 if (text != null) { 341 result.text = (CharacterIterator ) text.clone(); 342 } 343 return result; 344 } 345 346 351 public boolean equals(Object that) { 352 try { 353 RuleBasedBreakIterator_Old other = (RuleBasedBreakIterator_Old) that; 354 if (!description.equals(other.description)) { 355 return false; 356 } 357 return getText().equals(other.getText()); 358 } 359 catch(ClassCastException e) { 360 return false; 361 } 362 } 363 364 368 public String toString() { 369 return description; 370 } 371 372 377 public int hashCode() 378 { 379 return description.hashCode(); 380 } 381 382 387 public void debugDumpTables() { 389 System.out.println("Character Classes:"); 390 int currentCharClass = 257; 391 int startCurrentRange = 0; 392 int initialStringLength = 0; 393 394 StringBuffer [] charClassRanges = new StringBuffer [numCategories]; 395 for (int i=0; i<numCategories; i++) { 396 charClassRanges[i] = new StringBuffer (); 397 } 398 399 for (int i = 0; i < 0xffff; i++) { 400 if ((int)charCategoryTable.elementAt((char)i) != currentCharClass) { 401 if (currentCharClass != 257) { 402 if (i != startCurrentRange+1) { 404 charClassRanges[currentCharClass].append("-"+ Integer.toHexString(i-1)); 405 } 406 if (charClassRanges[currentCharClass].length() % 72 < initialStringLength % 72) { 407 charClassRanges[currentCharClass].append("\n "); 408 } 409 } 410 411 currentCharClass = (int)charCategoryTable.elementAt((char)i); 413 startCurrentRange = i; 414 initialStringLength = charClassRanges[currentCharClass].length(); 415 if (charClassRanges[currentCharClass].length() > 0) 416 charClassRanges[currentCharClass].append(", "); 417 charClassRanges[currentCharClass].append(Integer.toHexString(i)); 418 } 419 } 420 421 for (int i=0; i<numCategories; i++) { 422 System.out.println(i + ": " + charClassRanges[i]); 423 } 424 425 426 System.out.println("\n\nState Table. *: end state %: look ahead state"); 427 System.out.print("C:\t"); 428 for (int i = 0; i < numCategories; i++) 429 System.out.print(Integer.toString(i) + "\t"); 430 System.out.println(); System.out.print("================================================="); 431 for (int i = 0; i < stateTable.length; i++) { 432 if (i % numCategories == 0) { 433 System.out.println(); 434 if (endStates[i / numCategories]) 435 System.out.print("*"); 436 else 437 System.out.print(" "); 438 if (lookaheadStates[i / numCategories]) { 439 System.out.print("%"); 440 } 441 else 442 System.out.print(" "); 443 System.out.print(Integer.toString(i / numCategories) + ":\t"); 444 } 445 if (stateTable[i] == 0) { 446 System.out.print(".\t"); 447 } else { 448 System.out.print(Integer.toString(stateTable[i]) + "\t"); 449 } 450 } 451 System.out.println(); 452 } 453 455 463 public void writeTablesToFile(FileOutputStream file, boolean littleEndian) throws IOException { 464 DataOutputStream out = new DataOutputStream(file); 467 468 byte[] comment = "Copyright (C) 1999, International Business Machines Corp. and others. All Rights Reserved.".getBytes("US-ASCII"); 470 short headerSize = (short)(comment.length + 1 + 24); short realHeaderSize = (short)(headerSize + ((headerSize % 16 == 0) ? 0 : 16 - (headerSize % 16))); 474 writeSwappedShort(realHeaderSize, out, littleEndian); 475 out.write(0xda); 477 out.write(0x27); 478 writeSwappedShort((short)20, out, littleEndian); 480 writeSwappedShort((short)0, out, littleEndian); 482 483 if (littleEndian) { 485 out.write(0); 486 } else { 487 out.write(1); 488 } 489 490 out.write(0); 492 out.write(2); 494 out.write(0); 496 out.writeInt(0x42524b53); out.writeInt(0); 500 out.writeInt(0); 502 out.write(comment); 504 out.write(0); 505 while (headerSize < realHeaderSize) { 507 out.write(0); 508 ++headerSize; 509 } 510 511 writeSwappedInt(numCategories, out, littleEndian); 514 int fileEnd = 36; 515 writeSwappedInt(fileEnd, out, littleEndian); 517 fileEnd += (description.length() + 1) * 2; 518 fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4); 519 writeSwappedInt(fileEnd, out, littleEndian); 521 fileEnd += charCategoryTable.getIndexArray().length * 2; 522 fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4); 523 writeSwappedInt(fileEnd, out, littleEndian); 525 fileEnd += charCategoryTable.getValueArray().length; 526 fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4); 527 writeSwappedInt(fileEnd, out, littleEndian); 529 fileEnd += stateTable.length * 2; 530 fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4); 531 writeSwappedInt(fileEnd, out, littleEndian); 533 fileEnd += backwardsStateTable.length * 2; 534 fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4); 535 writeSwappedInt(fileEnd, out, littleEndian); 537 fileEnd += endStates.length; 538 fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4); 539 writeSwappedInt(fileEnd, out, littleEndian); 541 fileEnd += lookaheadStates.length; 542 fileEnd += (fileEnd % 4 == 0) ? 0 : 4 - (fileEnd % 4); 543 writeSwappedInt(fileEnd, out, littleEndian); 545 546 for (int i = 0; i < description.length(); i++) 549 writeSwappedShort((short)description.charAt(i), out, littleEndian); 550 out.writeShort(0); 551 if ((description.length() + 1) % 2 == 1) 552 out.writeShort(0); 553 char[] temp1 = charCategoryTable.getIndexArray(); 555 for (int i = 0; i < temp1.length; i++) 556 writeSwappedShort((short)temp1[i], out, littleEndian); 557 if (temp1.length % 2 == 1) 558 out.writeShort(0); 559 byte[] temp2 = charCategoryTable.getValueArray(); 560 out.write(temp2); 561 switch (temp2.length % 4) { 562 case 1: out.write(0); 563 case 2: out.write(0); 564 case 3: out.write(0); 565 default: break; 566 } 567 for (int i = 0; i < stateTable.length; i++) 569 writeSwappedShort(stateTable[i], out, littleEndian); 570 if (stateTable.length % 2 == 1) 571 out.writeShort(0); 572 for (int i = 0; i < backwardsStateTable.length; i++) 573 writeSwappedShort(backwardsStateTable[i], out, littleEndian); 574 if (backwardsStateTable.length % 2 == 1) 575 out.writeShort(0); 576 for (int i = 0; i < endStates.length; i++) 578 out.writeBoolean(endStates[i]); 579 switch (endStates.length % 4) { 580 case 1: out.write(0); 581 case 2: out.write(0); 582 case 3: out.write(0); 583 default: break; 584 } 585 for (int i = 0; i < lookaheadStates.length; i++) 586 out.writeBoolean(lookaheadStates[i]); 587 switch (lookaheadStates.length % 4) { 588 case 1: out.write(0); 589 case 2: out.write(0); 590 case 3: out.write(0); 591 default: break; 592 } 593 } 594 595 598 protected void writeSwappedShort(short x, DataOutputStream out, boolean littleEndian) 599 throws IOException{ 600 if (littleEndian) { 601 out.write((byte)(x & 0xff)); 602 out.write((byte)((x >> 8) & 0xff)); 603 } 604 else { 605 out.write((byte)((x >> 8) & 0xff)); 606 out.write((byte)(x & 0xff)); 607 } 608 } 609 610 613 protected void writeSwappedInt(int x, DataOutputStream out, boolean littleEndian) 614 throws IOException { 615 if (littleEndian) { 616 out.write((byte)(x & 0xff)); 617 out.write((byte)((x >> 8) & 0xff)); 618 out.write((byte)((x >> 16) & 0xff)); 619 out.write((byte)((x >> 24) & 0xff)); 620 } 621 else { 622 out.write((byte)((x >> 24) & 0xff)); 623 out.write((byte)((x >> 16) & 0xff)); 624 out.write((byte)((x >> 8) & 0xff)); 625 out.write((byte)(x & 0xff)); 626 } 627 } 628 630 634 640 public int first() { 641 CharacterIterator t = getText(); 642 643 t.first(); 644 return t.getIndex(); 645 } 646 647 653 public int last() { 654 CharacterIterator t = getText(); 655 656 t.setIndex(t.getEndIndex()); 659 return t.getIndex(); 660 } 661 662 672 public int next(int n) { 673 int result = current(); 674 while (n > 0) { 675 result = handleNext(); 676 --n; 677 } 678 while (n < 0) { 679 result = previous(); 680 ++n; 681 } 682 return result; 683 } 684 685 690 public int next() { 691 return handleNext(); 692 } 693 694 699 public int previous() { 700 CharacterIterator text = getText(); 702 if (current() == text.getBeginIndex()) { 703 return BreakIterator.DONE; 704 } 705 706 int start = current(); 712 text.previous(); 713 int lastResult = handlePrevious(); 714 int result = lastResult; 715 716 while (result != BreakIterator.DONE && result < start) { 720 lastResult = result; 721 result = handleNext(); 722 } 723 724 text.setIndex(lastResult); 727 return lastResult; 728 } 729 730 734 protected static final void checkOffset(int offset, CharacterIterator text) { 735 if (offset < text.getBeginIndex() || offset > text.getEndIndex()) { 736 throw new IllegalArgumentException ("offset out of bounds"); 737 } 738 } 739 740 747 public int following(int offset) { 748 CharacterIterator text = getText(); 751 if (offset == text.getEndIndex()) { 752 return BreakIterator.DONE; 753 } 754 checkOffset(offset, text); 755 756 text.setIndex(offset); 760 if (offset == text.getBeginIndex()) { 761 return handleNext(); 762 } 763 764 int result = handlePrevious(); 772 while (result != BreakIterator.DONE && result <= offset) { 773 result = handleNext(); 774 } 775 return result; 776 } 777 778 785 public int preceding(int offset) { 786 CharacterIterator text = getText(); 790 checkOffset(offset, text); 791 text.setIndex(offset); 792 return previous(); 793 } 794 795 803 public boolean isBoundary(int offset) { 804 CharacterIterator text = getText(); 805 checkOffset(offset, text); 806 if (offset == text.getBeginIndex()) { 807 return true; 808 } 809 810 else { 814 return following(offset - 1) == offset; 815 } 816 } 817 818 823 public int current() { 824 return getText().getIndex(); 825 } 826 827 828 842 public int getRuleStatus() { 843 return 0; 844 } 845 846 847 848 860 public int getRuleStatusVec(int[] fillInArray) { 861 if (fillInArray != null && fillInArray.length >= 1) { 862 fillInArray[0] = 0; 863 } 864 return 1; 865 } 866 867 868 869 877 public CharacterIterator getText() { 878 if (text == null) { 882 text = new StringCharacterIterator(""); 883 } 884 return text; 885 } 886 887 893 public void setText(CharacterIterator newText) { 894 int end = newText.getEndIndex(); 896 newText.setIndex(end); 897 if (newText.getIndex() != end) { 898 text = new SafeCharIterator(newText); 900 } 901 else { 902 text = newText; 903 } 904 text.first(); 905 } 906 907 908 912 920 protected int handleNext() { 921 CharacterIterator text = getText(); 923 if (text.getIndex() == text.getEndIndex()) { 924 return BreakIterator.DONE; 925 } 926 927 int result = text.getIndex() + 1; 929 int lookaheadResult = 0; 930 931 int state = START_STATE; 933 int category; 934 char c = text.current(); 935 char lastC = c; 936 int lastCPos = 0; 937 938 if (lookupCategory(c) == IGNORE) { 943 while (lookupCategory(c) == IGNORE) 944 c = text.next(); 945 946 if (Character.getType(c) == Character.NON_SPACING_MARK || Character.getType(c) 947 == Character.ENCLOSING_MARK) { 948 return text.getIndex(); 949 } 950 } 951 952 while (c != CharacterIterator.DONE && state != STOP_STATE) { 954 955 category = lookupCategory(c); 958 959 if (category != IGNORE) { 962 state = lookupState(state, category); 963 } 964 965 if (lookaheadStates[state]) { 970 if (endStates[state]) { 971 if (lookaheadResult > 0) { 972 result = lookaheadResult; 973 } 974 else { 975 result = text.getIndex() + 1; 976 } 977 } 978 else { 979 lookaheadResult = text.getIndex() + 1; 980 } 981 } 982 983 else { 986 if (endStates[state]) { 987 result = text.getIndex() + 1; 988 } 989 } 990 991 if (category != IGNORE && state != STOP_STATE) { 994 lastC = c; 995 lastCPos = text.getIndex(); 996 } 997 c = text.next(); 998 } 999 1000 if (c == CharacterIterator.DONE && lookaheadResult == text.getEndIndex()) { 1005 result = lookaheadResult; 1006 } 1007 1008 else if ("\n\r\f\u2028\u2029".indexOf(lastC) != -1) { 1013 result = lastCPos + 1; 1014 } 1015 1016 text.setIndex(result); 1017 return result; 1018 } 1019 1020 1028 protected int handlePrevious() { 1029 CharacterIterator text = getText(); 1030 int state = START_STATE; 1031 int category = 0; 1032 int lastCategory = 0; 1033 char c = text.current(); 1034 1035 while (c != CharacterIterator.DONE && state != STOP_STATE) { 1037 1039 lastCategory = category; 1042 category = lookupCategory(c); 1043 1044 if (category != IGNORE) { 1047 state = lookupBackwardState(state, category); 1048 } 1049 1050 c = text.previous(); 1052 } 1053 1054 if (c != CharacterIterator.DONE) { 1060 if (lastCategory != IGNORE) { 1061 text.setIndex(text.getIndex() + 2); 1062 } 1063 else { 1064 text.next(); 1065 } 1066 } 1067 return text.getIndex(); 1069 } 1070 1071 1077 protected int lookupCategory(char c) { 1078 return charCategoryTable.elementAt(c); 1080 } 1081 1082 1088 1089 1094 protected int lookupState(int state, int category) { 1095 return stateTable[state * numCategories + category]; 1096 } 1097 1098 1103 protected int lookupBackwardState(int state, int category) { 1104 return backwardsStateTable[state * numCategories + category]; 1105 } 1106 1107 1113 private static UnicodeSet intersection(UnicodeSet a, UnicodeSet b) 1114 { 1115 UnicodeSet result = new UnicodeSet(a); 1116 1117 result.retainAll(b); 1118 1119 return result; 1120 } 1121 1122 1148 protected class Builder { 1149 1154 protected Vector categories = null; 1155 1156 1161 protected Hashtable expressions = null; 1162 1163 1167 protected UnicodeSet ignoreChars = null; 1168 1169 1173 protected Vector tempStateTable = null; 1174 1175 1181 protected Vector decisionPointList = null; 1182 1183 1188 protected Stack decisionPointStack = null; 1189 1190 1194 protected Vector loopingStates = null; 1195 1196 1202 protected Vector statesToBackfill = null; 1203 1204 1211 protected Vector mergeList = null; 1212 1213 1218 protected boolean clearLoopingStates = false; 1219 1220 1225 protected static final int END_STATE_FLAG = 0x8000; 1226 1227 1232 protected static final int DONT_LOOP_FLAG = 0x4000; 1233 1234 1239 protected static final int LOOKAHEAD_STATE_FLAG = 0x2000; 1240 1241 1246 protected static final int ALL_FLAGS = END_STATE_FLAG | LOOKAHEAD_STATE_FLAG 1247 | DONT_LOOP_FLAG; 1248 1249 1253 public Builder() { 1254 } 1255 1256 1261 public void buildBreakIterator() { 1262 Vector tempRuleList = buildRuleList(description); 1263 buildCharCategories(tempRuleList); 1264 buildStateTable(tempRuleList); 1265 buildBackwardsStateTable(tempRuleList); 1266 } 1267 1268 1276 private Vector buildRuleList(String description) { 1277 1296 Vector tempRuleList = new Vector (); 1300 Stack parenStack = new Stack (); 1301 1302 int p = 0; 1303 int ruleStart = 0; 1304 char c = '\u0000'; 1305 char lastC = '\u0000'; 1306 char lastOpen = '\u0000'; 1307 boolean haveEquals = false; 1308 boolean havePipe = false; 1309 boolean sawVarName = false; 1310 boolean sawIllegalChar = false; 1311 int illegalCharPos = 0; 1312 final String charsThatCantPrecedeAsterisk = "=/<(|>*+?;\u0000"; 1313 1314 if (description.length() != 0 && description.charAt(description.length() - 1) != ';') { 1316 description = description + ";"; 1317 } 1318 1319 while (p < description.length()) { 1321 c = description.charAt(p); 1322 switch (c) { 1323 case '{': 1326 case '[': 1327 case '(': 1328 if (lastOpen == '{') { 1329 error("Can't nest brackets inside {}", p, description); 1330 } 1331 if (lastOpen == '[' && c != '[') { 1332 error("Can't nest anything in [] but []", p, description); 1333 } 1334 1335 if (c == '{' && (haveEquals || havePipe)) { 1338 error("Unknown variable name", p, description); 1339 } 1340 1341 lastOpen = c; 1342 parenStack.push(new Character (c)); 1343 if (c == '{') { 1344 sawVarName = true; 1345 } 1346 break; 1347 1348 case '}': 1352 case ']': 1353 case ')': 1354 char expectedClose = '\u0000'; 1355 switch (lastOpen) { 1356 case '{': 1357 expectedClose = '}'; 1358 break; 1359 case '[': 1360 expectedClose = ']'; 1361 break; 1362 case '(': 1363 expectedClose = ')'; 1364 break; 1365 } 1366 if (c != expectedClose) { 1367 error("Unbalanced parentheses", p, description); 1368 } 1369 if (lastC == lastOpen) { 1370 error("Parens don't contain anything", p, description); 1371 } 1372 parenStack.pop(); 1373 if (!parenStack.empty()) { 1374 lastOpen = ((Character )(parenStack.peek())).charValue(); 1375 } 1376 else { 1377 lastOpen = '\u0000'; 1378 } 1379 1380 break; 1381 1382 case '*': case '+': case '?': 1385 if (charsThatCantPrecedeAsterisk.indexOf(lastC) != -1 1386 && (c != '?' || lastC != '*')) { 1387 error("Misplaced *, +, or ?", p, description); 1388 } 1389 break; 1390 1391 case '=': 1394 if (haveEquals || havePipe) { 1395 error("More than one = or / in rule", p, description); 1396 } 1397 haveEquals = true; 1398 sawIllegalChar = false; 1399 break; 1400 1401 case '/': 1404 if (haveEquals || havePipe) { 1405 error("More than one = or / in rule", p, description); 1406 } 1407 if (sawVarName) { 1408 error("Unknown variable name", p, description); 1409 } 1410 havePipe = true; 1411 break; 1412 1413 case '!': 1416 if (lastC != ';' && lastC != '\u0000') { 1417 error("! can only occur at the beginning of a rule", p, description); 1418 } 1419 break; 1420 1421 case '\\': 1424 ++p; 1425 break; 1426 1427 case '.': 1429 break; 1430 1431 case '^': 1435 case '-': 1436 case ':': 1437 case '&': 1438 if (lastOpen != '[' && lastOpen != '{' && !sawIllegalChar) { 1439 sawIllegalChar = true; 1440 illegalCharPos = p; 1441 } 1442 break; 1443 1444 case ';': 1446 if (sawIllegalChar) { 1449 error("Illegal character", illegalCharPos, description); 1450 } 1451 1452 if (lastC == ';' || lastC == '\u0000') { 1455 error("Empty rule", p, description); 1456 } 1457 if (!parenStack.empty()) { 1458 error("Unbalanced parenheses", p, description); 1459 } 1460 1461 if (parenStack.empty()) { 1462 if (haveEquals) { 1466 description = processSubstitution(description.substring(ruleStart, 1467 p), description, p + 1); 1468 } 1469 else { 1470 if (sawVarName) { 1473 error("Unknown variable name", p, description); 1474 } 1475 1476 tempRuleList.addElement(description.substring(ruleStart, p)); 1478 } 1479 1480 ruleStart = p + 1; 1482 haveEquals = havePipe = sawVarName = sawIllegalChar = false; 1483 } 1484 break; 1485 1486 case '|': 1490 if (lastC == '|') { 1491 error("Empty alternative", p, description); 1492 } 1493 if (parenStack.empty() || lastOpen != '(') { 1494 error("Misplaced |", p, description); 1495 } 1496 break; 1497 1498 default: 1501 if (c >= ' ' && c < '\u007f' && !Character.isLetter(c) 1502 && !Character.isDigit(c) && !sawIllegalChar) { 1503 sawIllegalChar = true; 1504 illegalCharPos = p; 1505 } 1506 break; 1507 } 1508 lastC = c; 1509 ++p; 1510 } 1511 if (tempRuleList.size() == 0) { 1512 error("No valid rules in description", p, description); 1513 } 1514 return tempRuleList; 1515 } 1516 1517 1525 protected String processSubstitution(String substitutionRule, String description, 1526 int startPos) { 1527 String replace; 1529 String replaceWith; 1530 int equalPos = substitutionRule.indexOf('='); 1531 if (substitutionRule.charAt(0) != '$') { 1532 error("Missing '$' on left-hand side of =", startPos, description); 1533 } 1534 replace = substitutionRule.substring(1, equalPos); 1535 replaceWith = substitutionRule.substring(equalPos + 1); 1536 1537 handleSpecialSubstitution(replace, replaceWith, startPos, description); 1542 1543 if (replaceWith.length() == 0) { 1545 error("Nothing on right-hand side of =", startPos, description); 1546 } 1547 if (replace.length() == 0) { 1548 error("Nothing on left-hand side of =", startPos, description); 1549 } 1550 if (!(replaceWith.charAt(0) == '[' && replaceWith.charAt(replaceWith.length() - 1) 1551 == ']') && !(replaceWith.charAt(0) == '(' && replaceWith.charAt( 1552 replaceWith.length() - 1) == ')')) { 1553 error("Illegal right-hand side for =", startPos, description); 1554 } 1555 1556 replace = "$" + replace; 1560 StringBuffer result = new StringBuffer (); 1561 result.append(description.substring(0, startPos)); 1562 int lastPos = startPos; 1563 int pos = description.indexOf(replace, startPos); 1564 while (pos != -1) { 1565 if (description.charAt(pos-1) == ';' && 1568 description.charAt(pos + replace.length()) == '=') { 1569 error("Attempt to redefine " + replace, pos, description); 1570 } 1571 result.append(description.substring(lastPos, pos)); 1572 result.append(replaceWith); 1573 lastPos = pos + replace.length(); 1574 pos = description.indexOf(replace, lastPos); 1575 } 1576 result.append(description.substring(lastPos)); 1577 return result.toString(); 1578 } 1579 1580 1590 protected void handleSpecialSubstitution(String replace, String replaceWith, 1591 int startPos, String description) { 1592 if (replace.equals(IGNORE_VAR)) { 1597 if (replaceWith.charAt(0) == '(') { 1598 error("Ignore group can't be enclosed in (", startPos, description); 1599 } 1600 ignoreChars = new UnicodeSet(replaceWith, false); 1601 } 1602 } 1603 1604 1613 protected void buildCharCategories(Vector tempRuleList) { 1614 int bracketLevel = 0; 1615 int p = 0; 1616 int lineNum = 0; 1617 1618 expressions = new Hashtable (); 1621 while (lineNum < tempRuleList.size()) { 1622 String line = (String )(tempRuleList.elementAt(lineNum)); 1623 p = 0; 1624 while (p < line.length()) { 1625 char c = line.charAt(p); 1626 switch (c) { 1627 case '(': case ')': case '*': case '.': case '/': 1629 case '|': case ';': case '?': case '!': case '+': 1630 break; 1631 1632 case '[': 1635 int q = p + 1; 1636 ++bracketLevel; 1637 while (q < line.length() && bracketLevel != 0) { 1638 c = line.charAt(q); 1639 if (c == '[') { 1640 ++bracketLevel; 1641 } 1642 else if (c == ']') { 1643 --bracketLevel; 1644 } 1645 ++q; 1646 } 1647 if (expressions.get(line.substring(p, q)) == null) { 1648 expressions.put(line.substring(p, q), new UnicodeSet(line. 1649 substring(p, q), false)); 1650 } 1652 p = q - 1; 1653 break; 1654 1655 case '\\': 1658 ++p; 1659 c = line.charAt(p); 1660 1662 default: 1664 UnicodeSet s = new UnicodeSet(); 1665 s.add(line.charAt(p)); 1666 expressions.put(line.substring(p, p + 1), s); 1667 break; 1669 } 1670 ++p; 1671 } 1672 ++lineNum; 1673 } 1674 1675 categories = new Vector (); 1677 if (ignoreChars != null) { 1678 categories.addElement(ignoreChars); 1679 } 1680 else { 1681 categories.addElement(new UnicodeSet()); 1682 } 1683 ignoreChars = null; 1684 1685 mungeExpressionList(expressions); 1687 1688 1700 Enumeration iter = expressions.elements(); 1702 while (iter.hasMoreElements()) { 1703 UnicodeSet work = new UnicodeSet((UnicodeSet)iter.nextElement()); 1705 1706 for (int j = categories.size() - 1; !work.isEmpty() && j > 0; j--) { 1708 1709 UnicodeSet cat = (UnicodeSet)(categories.elementAt(j)); 1712 UnicodeSet overlap = intersection(work, cat); 1713 1714 if (!overlap.isEmpty()) { 1715 if (!overlap.equals(cat)) { 1720 cat.removeAll(overlap); 1721 categories.addElement(overlap); 1722 } 1723 1724 work.removeAll(overlap); 1727 } 1728 } 1729 1730 if (!work.isEmpty()) { 1733 categories.addElement(work); 1734 } 1735 } 1736 1737 UnicodeSet allChars = new UnicodeSet(); 1741 for (int i = 1; i < categories.size(); i++) 1742 allChars.addAll((UnicodeSet)(categories.elementAt(i))); 1743 UnicodeSet ignoreChars = (UnicodeSet)(categories.elementAt(0)); 1744 ignoreChars.removeAll(allChars); 1745 1746 iter = expressions.keys(); 1752 while (iter.hasMoreElements()) { 1753 String key = (String )iter.nextElement(); 1754 UnicodeSet cs = (UnicodeSet)expressions.get(key); 1755 StringBuffer cats = new StringBuffer (); 1756 1757 for (int j = 1; j < categories.size(); j++) { 1759 UnicodeSet cat = new UnicodeSet((UnicodeSet) categories.elementAt(j)); 1760 1761 if (cs.containsAll(cat)) { 1763 1764 cats.append((char)(0x100 + j)); 1767 if (cs.equals(cat)) { 1768 break; 1769 } 1770 } 1771 } 1772 1773 expressions.put(key, cats.toString()); 1776 } 1777 1778 charCategoryTable = new CompactByteArray((byte)0); 1782 1783 for (int i = 0; i < categories.size(); i++) { 1785 UnicodeSet chars = (UnicodeSet)(categories.elementAt(i)); 1786 int n = chars.getRangeCount(); 1787 1788 for (int j = 0; j < n; ++j) { 1790 int rangeStart = chars.getRangeStart(j); 1791 1792 if (rangeStart >= 0x10000) { 1794 break; 1795 } 1796 1797 if (i != 0) { 1799 charCategoryTable.setElementAt((char)rangeStart, 1800 (char)chars.getRangeEnd(j), (byte)i); 1801 } 1802 1803 else { 1808 charCategoryTable.setElementAt((char)rangeStart, 1809 (char)chars.getRangeEnd(j), IGNORE); 1810 } 1811 } 1812 } 1813 1814 charCategoryTable.compact(); 1816 1817 numCategories = categories.size(); 1819 } 1820 1821 1822 protected void mungeExpressionList(Hashtable expressions) { 1823 } 1826 1827 1832 private void buildStateTable(Vector tempRuleList) { 1833 tempStateTable = new Vector (); 1838 tempStateTable.addElement(new short[numCategories + 1]); 1839 tempStateTable.addElement(new short[numCategories + 1]); 1840 1841 for (int i = 0; i < tempRuleList.size(); i++) { 1845 String rule = (String )tempRuleList.elementAt(i); 1846 if (rule.charAt(0) != '!') { 1847 parseRule(rule, true); 1848 } 1849 } 1850 1851 finishBuildingStateTable(true); 1854 1877 } 1878 1879 1886 private void parseRule(String rule, boolean forward) { 1887 1950 int p = 0; 1951 int currentState = 1; int lastState = currentState; 1953 String pendingChars = ""; 1954 1955 decisionPointStack = new Stack (); 1956 decisionPointList = new Vector (); 1957 loopingStates = new Vector (); 1958 statesToBackfill = new Vector (); 1959 1960 short[] state; 1961 boolean sawEarlyBreak = false; 1962 1963 if (!forward) { 1966 loopingStates.addElement(new Integer (1)); 1967 } 1968 1969 decisionPointList.addElement(new Integer (currentState)); currentState = tempStateTable.size() - 1; while (p < rule.length()) { 1975 char c = rule.charAt(p); 1976 clearLoopingStates = false; 1977 1978 if (c == '[' 1981 || c == '\\' 1982 || Character.isLetter(c) 1983 || Character.isDigit(c) 1984 || c < ' ' 1985 || c == '.' 1986 || c >= '\u007f') { 1987 1988 if (c != '.') { 1991 int q = p; 1992 1993 if (c == '\\') { 1996 q = p + 2; 1997 ++p; 1998 } 1999 2000 else if (c == '[') { 2003 int bracketLevel = 1; 2004 while (bracketLevel > 0) { 2005 ++q; 2006 c = rule.charAt(q); 2007 if (c == '[') { 2008 ++bracketLevel; 2009 } 2010 else if (c == ']') { 2011 --bracketLevel; 2012 } 2013 else if (c == '\\') { 2014 ++q; 2015 } 2016 } 2017 ++q; 2018 } 2019 2020 else { 2022 q = p + 1; 2023 } 2024 2025 pendingChars = (String )expressions.get(rule.substring(p, q)); 2028 2029 p = q - 1; 2031 } 2032 2033 else { 2035 int rowNum = ((Integer )decisionPointList.lastElement()).intValue(); 2036 state = (short[])tempStateTable.elementAt(rowNum); 2037 2038 if (p + 1 < rule.length() && rule.charAt(p + 1) == '*' && state[0] != 0) { 2041 decisionPointList.addElement(new Integer (state[0])); 2042 pendingChars = ""; 2043 ++p; 2044 if (p + 1 < rule.length() && rule.charAt(p + 1) == '?') { 2045 setLoopingStates(decisionPointList, decisionPointList); 2047 ++p; 2048 } 2049 } 2051 2052 else { 2055 StringBuffer temp = new StringBuffer (); 2056 for (int i = 0; i < numCategories; i++) 2057 temp.append((char)(i + 0x100)); 2058 pendingChars = temp.toString(); 2059 } 2060 } 2061 2062 if (pendingChars.length() != 0) { 2065 2066 if (p + 1 < rule.length() && ( 2069 rule.charAt(p + 1) == '*' || 2070 rule.charAt(p + 1) == '?' 2071 )) { 2072 decisionPointStack.push(decisionPointList.clone()); 2073 } 2074 2075 int newState = tempStateTable.size(); 2080 if (loopingStates.size() != 0) { 2081 statesToBackfill.addElement(new Integer (newState)); 2082 } 2083 state = new short[numCategories + 1]; 2084 if (sawEarlyBreak) { 2085 state[numCategories] = DONT_LOOP_FLAG; 2086 } 2087 tempStateTable.addElement(state); 2088 2089 updateStateTable(decisionPointList, pendingChars, (short)newState); 2094 decisionPointList.removeAllElements(); 2095 2096 lastState = currentState; 2099 do { 2100 ++currentState; 2101 decisionPointList.addElement(new Integer (currentState)); 2102 } while (currentState + 1 < tempStateTable.size()); 2103 } 2104 } 2105 2106 if (c == '+' || c == '*' || c == '?') { 2109 if (c == '*' || c == '+') { 2112 for (int i = lastState + 1; i < tempStateTable.size(); i++) { 2115 Vector temp = new Vector (); 2116 temp.addElement(new Integer (i)); 2117 updateStateTable(temp, pendingChars, (short)(lastState + 1)); 2118 } 2119 2120 while (currentState + 1 < tempStateTable.size()) { 2124 decisionPointList.addElement(new Integer (++currentState)); 2125 } 2126 } 2127 2128 if (c == '*' || c == '?') { 2133 Vector temp = (Vector )decisionPointStack.pop(); 2134 for (int i = 0; i < decisionPointList.size(); i++) 2135 temp.addElement(decisionPointList.elementAt(i)); 2136 decisionPointList = temp; 2137 2138 if (c == '*' && p + 1 < rule.length() && rule.charAt(p + 1) == '?') { 2154 setLoopingStates(decisionPointList, decisionPointList); 2156 ++p; 2157 } 2158 } 2159 } 2160 2161 if (c == '(') { 2179 2180 tempStateTable.addElement(new short[numCategories + 1]); 2183 2184 lastState = currentState; 2187 ++currentState; 2188 2189 decisionPointList.insertElementAt(new Integer (currentState), 0); 2192 2193 decisionPointStack.push(decisionPointList.clone()); 2198 decisionPointStack.push(new Vector ()); 2199 } 2200 2201 if (c == '|') { 2205 2206 Vector oneDown = (Vector )decisionPointStack.pop(); 2208 Vector twoDown = (Vector )decisionPointStack.peek(); 2209 decisionPointStack.push(oneDown); 2210 2211 for (int i = 0; i < decisionPointList.size(); i++) 2215 oneDown.addElement(decisionPointList.elementAt(i)); 2216 decisionPointList = (Vector )twoDown.clone(); 2217 } 2218 2219 if (c == ')') { 2230 2231 Vector exitPoints = (Vector )decisionPointStack.pop(); 2235 for (int i = 0; i < decisionPointList.size(); i++) 2236 exitPoints.addElement(decisionPointList.elementAt(i)); 2237 decisionPointList = exitPoints; 2238 2239 if (p + 1 >= rule.length() || ( 2242 rule.charAt(p + 1) != '*' && 2243 rule.charAt(p + 1) != '+' && 2244 rule.charAt(p + 1) != '?') 2245 ) { 2246 decisionPointStack.pop(); 2247 } 2248 2249 else { 2252 2253 exitPoints = (Vector )decisionPointList.clone(); 2256 2257 Vector temp = (Vector )decisionPointStack.pop(); 2259 2260 int tempStateNum = ((Integer )temp.firstElement()).intValue(); 2264 short[] tempState = (short[])tempStateTable.elementAt(tempStateNum); 2265 2266 if (rule.charAt(p + 1) == '?' || rule.charAt(p + 1) == '*') { 2269 for (int i = 0; i < decisionPointList.size(); i++) 2270 temp.addElement(decisionPointList.elementAt(i)); 2271 decisionPointList = temp; 2272 } 2273 2274 if (rule.charAt(p + 1) == '+' || rule.charAt(p + 1) == '*') { 2277 for (int i = 0; i < tempState.length; i++) { 2278 if (tempState[i] > tempStateNum) { 2279 updateStateTable(exitPoints, 2280 new Character ((char)(i + 0x100)).toString(), 2281 tempState[i]); 2282 } 2283 } 2284 } 2285 2286 lastState = currentState; 2288 currentState = tempStateTable.size() - 1; 2289 ++p; 2290 } 2291 } 2292 2293 if (c == '/') { 2298 sawEarlyBreak = true; 2299 for (int i = 0; i < decisionPointList.size(); i++) { 2300 state = (short[])tempStateTable.elementAt(((Integer )decisionPointList. 2301 elementAt(i)).intValue()); 2302 state[numCategories] |= LOOKAHEAD_STATE_FLAG; 2303 } 2304 } 2305 2306 2319 2320 if (clearLoopingStates) { 2326 setLoopingStates(null, decisionPointList); 2327 } 2328 2329 ++p; 2332 } 2333 2334 setLoopingStates(null, decisionPointList); 2336 2337 for (int i = 0; i < decisionPointList.size(); i++) { 2354 int rowNum = ((Integer )decisionPointList.elementAt(i)).intValue(); 2355 state = (short[])tempStateTable.elementAt(rowNum); 2356 state[numCategories] |= END_STATE_FLAG; 2357 if (sawEarlyBreak) { 2358 state[numCategories] |= LOOKAHEAD_STATE_FLAG; 2359 } 2360 } 2361 2372 } 2373 2374 2375 2383 private void updateStateTable(Vector rows, 2384 String pendingChars, 2385 short newValue) { 2386 short[] newValues = new short[numCategories + 1]; 2390 for (int i = 0; i < pendingChars.length(); i++) 2391 newValues[(int)(pendingChars.charAt(i)) - 0x100] = newValue; 2392 2393 for (int i = 0; i < rows.size(); i++) { 2396 mergeStates(((Integer )rows.elementAt(i)).intValue(), newValues, rows); 2397 } 2398 } 2399 2400 2420 private void mergeStates(int rowNum, 2421 short[] newValues, 2422 Vector rowsBeingUpdated) { 2423 short[] oldValues = (short[])(tempStateTable.elementAt(rowNum)); 2424 2432 2433 boolean isLoopingState = loopingStates.contains(new Integer (rowNum)); 2434 2435 for (int i = 0; i < oldValues.length; i++) { 2437 2438 if (oldValues[i] == newValues[i]) { 2440 continue; 2441 } 2442 2443 else if (isLoopingState && loopingStates.contains(new Integer (oldValues[i]))) { 2447 if (newValues[i] != 0) { 2448 if (oldValues[i] == 0) { 2449 clearLoopingStates = true; 2450 } 2451 oldValues[i] = newValues[i]; 2452 } 2453 } 2454 2455 else if (oldValues[i] == 0) { 2458 oldValues[i] = newValues[i]; 2459 } 2460 2461 else if (i == numCategories) { 2464 oldValues[i] = (short)((newValues[i] & ALL_FLAGS) | oldValues[i]); 2465 } 2466 2467 else if (oldValues[i] != 0 && newValues[i] != 0) { 2470 2471 int combinedRowNum = searchMergeList(oldValues[i], newValues[i]); 2474 if (combinedRowNum != 0) { 2475 oldValues[i] = (short)combinedRowNum; 2476 } 2477 2478 else { 2480 int oldRowNum = oldValues[i]; 2482 int newRowNum = newValues[i]; 2483 combinedRowNum = tempStateTable.size(); 2484 2485 if (mergeList == null) { 2488 mergeList = new Vector (); 2489 } 2490 mergeList.addElement(new int[] { oldRowNum, newRowNum, combinedRowNum }); 2491 2492 2494 short[] newRow = new short[numCategories + 1]; 2499 short[] oldRow = (short[])(tempStateTable.elementAt(oldRowNum)); 2500 System.arraycopy(oldRow, 0, newRow, 0, numCategories + 1); 2501 tempStateTable.addElement(newRow); 2502 oldValues[i] = (short)combinedRowNum; 2503 2504 2505 2510 if ((decisionPointList.contains(new Integer (oldRowNum)) 2513 || decisionPointList.contains(new Integer (newRowNum))) 2514 && !decisionPointList.contains(new Integer (combinedRowNum)) 2515 ) { 2516 decisionPointList.addElement(new Integer (combinedRowNum)); 2517 } 2518 2519 if ((rowsBeingUpdated.contains(new Integer (oldRowNum)) 2521 || rowsBeingUpdated.contains(new Integer (newRowNum))) 2522 && !rowsBeingUpdated.contains(new Integer (combinedRowNum)) 2523 ) { 2524 decisionPointList.addElement(new Integer (combinedRowNum)); 2525 } 2526 for (int k = 0; k < decisionPointStack.size(); k++) { 2529 Vector dpl = (Vector )decisionPointStack.elementAt(k); 2530 if ((dpl.contains(new Integer (oldRowNum)) 2531 || dpl.contains(new Integer (newRowNum))) 2532 && !dpl.contains(new Integer (combinedRowNum)) 2533 ) { 2534 dpl.addElement(new Integer (combinedRowNum)); 2535 } 2536 } 2537 2538 mergeStates(combinedRowNum, (short[])(tempStateTable.elementAt( 2542 newValues[i])), rowsBeingUpdated); 2543 } 2544 } 2545 } 2546 return; 2547 } 2548 2549 2556 private int searchMergeList(int a, int b) { 2557 if (mergeList == null) { 2559 return 0; 2560 } 2561 2562 else { 2564 int[] entry; 2565 for (int i = 0; i < mergeList.size(); i++) { 2566 entry = (int[])(mergeList.elementAt(i)); 2567 2568 if ((entry[0] == a && entry[1] == b) || (entry[0] == b && entry[1] == a)) { 2572 return entry[2]; 2573 } 2574 2575 if ((entry[2] == a && (entry[0] == b || entry[1] == b))) { 2578 return entry[2]; 2579 } 2580 if ((entry[2] == b && (entry[0] == a || entry[1] == a))) { 2581 return entry[2]; 2582 } 2583 } 2584 return 0; 2585 } 2586 } 2587 2588 2598 private void setLoopingStates(Vector newLoopingStates, Vector endStates) { 2599 2600 if (!loopingStates.isEmpty()) { 2604 int loopingState = ((Integer )loopingStates.lastElement()).intValue(); 2605 int rowNum; 2606 2607 for (int i = 0; i < endStates.size(); i++) { 2611 eliminateBackfillStates(((Integer )endStates.elementAt(i)).intValue()); 2612 } 2613 2614 for (int i = 0; i < statesToBackfill.size(); i++) { 2622 rowNum = ((Integer )statesToBackfill.elementAt(i)).intValue(); 2623 short[] state = (short[])tempStateTable.elementAt(rowNum); 2624 state[numCategories] = 2625 (short)((state[numCategories] & ALL_FLAGS) | loopingState); 2626 } 2627 statesToBackfill.removeAllElements(); 2628 loopingStates.removeAllElements(); 2629 } 2630 2631 if (newLoopingStates != null) { 2632 loopingStates = (Vector )newLoopingStates.clone(); 2633 } 2634 } 2635 2636 2641 private void eliminateBackfillStates(int baseState) { 2642 2643 if (statesToBackfill.contains(new Integer (baseState))) { 2645 2646 statesToBackfill.removeElement(new Integer (baseState)); 2648 2649 short[] state = (short[])tempStateTable.elementAt(baseState); 2652 for (int i = 0; i < numCategories; i++) { 2653 if (state[i] != 0) { 2654 eliminateBackfillStates(state[i]); 2655 } 2656 } 2657 } 2658 } 2659 2660 2664 private void backfillLoopingStates() { 2665 short[] state; 2666 short[] loopingState = null; 2667 int loopingStateRowNum = 0; 2668 int fromState; 2669 2670 for (int i = 0; i < tempStateTable.size(); i++) { 2672 state = (short[])tempStateTable.elementAt(i); 2673 2674 fromState = state[numCategories] & ~ALL_FLAGS; 2679 if (fromState > 0) { 2680 2681 if (fromState != loopingStateRowNum) { 2683 loopingStateRowNum = fromState; 2684 loopingState = (short[])tempStateTable.elementAt(loopingStateRowNum); 2685 } 2686 2687 state[numCategories] &= ALL_FLAGS; 2689 2690 for (int j = 0; j < state.length; j++) { 2693 if (state[j] == 0) { 2694 state[j] = loopingState[j]; 2695 } 2696 else if (state[j] == DONT_LOOP_FLAG) { 2697 state[j] = 0; 2698 } 2699 } 2700 } 2701 } 2702 } 2703 2704 2710 private void finishBuildingStateTable(boolean forward) { 2711 backfillLoopingStates(); 2714 2716 int[] rowNumMap = new int[tempStateTable.size()]; 2717 Stack rowsToFollow = new Stack (); 2718 rowsToFollow.push(new Integer (1)); 2719 rowNumMap[1] = 1; 2720 2721 while (rowsToFollow.size() != 0) { 2725 int rowNum = ((Integer )rowsToFollow.pop()).intValue(); 2726 short[] row = (short[])(tempStateTable.elementAt(rowNum)); 2727 2728 for (int i = 0; i < numCategories; i++) { 2729 if (row[i] != 0) { 2730 if (rowNumMap[row[i]] == 0) { 2731 rowNumMap[row[i]] = row[i]; 2732 rowsToFollow.push(new Integer (row[i])); 2733 } 2734 } 2735 } 2736 } 2737 2743 2744 int newRowNum; 2746 2747 2752 int[] stateClasses = new int[tempStateTable.size()]; 2755 int nextClass = numCategories + 1; 2756 short[] state1, state2; 2757 for (int i = 1; i < stateClasses.length; i++) { 2758 if (rowNumMap[i] == 0) { 2759 continue; 2760 } 2761 state1 = (short[])tempStateTable.elementAt(i); 2762 for (int j = 0; j < numCategories; j++) { 2763 if (state1[j] != 0) { 2764 ++stateClasses[i]; 2765 } 2766 } 2767 if (stateClasses[i] == 0) { 2768 stateClasses[i] = nextClass; 2769 } 2770 } 2771 ++nextClass; 2772 2773 int currentClass; 2780 int lastClass; 2781 boolean split; 2782 2783 do { 2784 currentClass = 1; 2786 lastClass = nextClass; 2787 while (currentClass < nextClass) { 2788 split = false; 2790 state1 = state2 = null; 2791 for (int i = 0; i < stateClasses.length; i++) { 2792 if (stateClasses[i] == currentClass) { 2793 if (state1 == null) { 2795 state1 = (short[])tempStateTable.elementAt(i); 2796 } 2797 else { 2798 state2 = (short[])tempStateTable.elementAt(i); 2799 for (int j = 0; j < state2.length; j++) 2800 if ((j == numCategories && state1[j] != state2[j] && forward) 2801 || (j != numCategories && stateClasses[state1[j]] 2802 != stateClasses[state2[j]])) { 2803 stateClasses[i] = nextClass; 2804 split = true; 2805 break; 2806 } 2807 } 2808 } 2809 } 2810 if (split) { 2811 ++nextClass; 2812 } 2813 ++currentClass; 2814 } 2816 } while (lastClass != nextClass); 2817 2818 int[] representatives = new int[nextClass]; 2821 for (int i = 1; i < stateClasses.length; i++) 2822 if (representatives[stateClasses[i]] == 0) { 2823 representatives[stateClasses[i]] = i; 2824 } 2825 else { 2826 rowNumMap[i] = representatives[stateClasses[i]]; 2827 } 2828 2830 for (int i = 1; i < rowNumMap.length; i++) { 2833 if (rowNumMap[i] != i) { 2834 tempStateTable.setElementAt(null, i); 2835 } 2836 } 2837 2838 newRowNum = 1; 2844 for (int i = 1; i < rowNumMap.length; i++) { 2845 if (tempStateTable.elementAt(i) != null) { 2846 rowNumMap[i] = newRowNum++; 2847 } 2848 } 2849 for (int i = 1; i < rowNumMap.length; i++) { 2850 if (tempStateTable.elementAt(i) == null) { 2851 rowNumMap[i] = rowNumMap[rowNumMap[i]]; 2852 } 2853 } 2854 2856 2859 if (forward) { 2861 endStates = new boolean[newRowNum]; 2862 lookaheadStates = new boolean[newRowNum]; 2863 stateTable = new short[newRowNum * numCategories]; 2864 int p = 0; 2865 int p2 = 0; 2866 for (int i = 0; i < tempStateTable.size(); i++) { 2867 short[] row = (short[])(tempStateTable.elementAt(i)); 2868 if (row == null) { 2869 continue; 2870 } 2871 for (int j = 0; j < numCategories; j++) { 2872 stateTable[p] = (short)(rowNumMap[row[j]]); 2873 ++p; 2874 } 2875 endStates[p2] = ((row[numCategories] & END_STATE_FLAG) != 0); 2876 lookaheadStates[p2] = ((row[numCategories] & LOOKAHEAD_STATE_FLAG) != 0); 2877 ++p2; 2878 } 2879 } 2880 2881 else { 2883 backwardsStateTable = new short[newRowNum * numCategories]; 2884 int p = 0; 2885 for (int i = 0; i < tempStateTable.size(); i++) { 2886 short[] row = (short[])(tempStateTable.elementAt(i)); 2887 if (row == null) { 2888 continue; 2889 } 2890 for (int j = 0; j < numCategories; j++) { 2891 backwardsStateTable[p] = (short)(rowNumMap[row[j]]); 2892 ++p; 2893 } 2894 } 2895 } 2896 } 2897 2898 2903 private void buildBackwardsStateTable(Vector tempRuleList) { 2904 2905 tempStateTable = new Vector (); 2909 tempStateTable.addElement(new short[numCategories + 1]); 2910 tempStateTable.addElement(new short[numCategories + 1]); 2911 2912 for (int i = 0; i < tempRuleList.size(); i++) { 2920 String rule = (String )tempRuleList.elementAt(i); 2921 if (rule.charAt(0) == '!') { 2922 parseRule(rule.substring(1), false); 2923 } 2924 } 2925 backfillLoopingStates(); 2926 2927 2946 int backTableOffset = tempStateTable.size(); 2953 if (backTableOffset > 2) { 2954 ++backTableOffset; 2955 } 2956 2957 for (int i = 0; i < numCategories + 1; i++) 2963 tempStateTable.addElement(new short[numCategories + 1]); 2964 2965 short[] state = (short[])tempStateTable.elementAt(backTableOffset - 1); 2966 for (int i = 0; i < numCategories; i++) 2967 state[i] = (short)(i + backTableOffset); 2968 2969 int numRows = stateTable.length / numCategories; 2982 for (int column = 0; column < numCategories; column++) { 2983 for (int row = 0; row < numRows; row++) { 2984 int nextRow = lookupState(row, column); 2985 if (nextRow != 0) { 2986 for (int nextColumn = 0; nextColumn < numCategories; nextColumn++) { 2987 int cellValue = lookupState(nextRow, nextColumn); 2988 if (cellValue != 0) { 2989 state = (short[])tempStateTable.elementAt(nextColumn + 2990 backTableOffset); 2991 state[column] = (short)(column + backTableOffset); 2992 } 2993 } 2994 } 2995 } 2996 } 2997 2998 if (backTableOffset > 1) { 3006 3007 state = (short[])tempStateTable.elementAt(1); 3012 for (int i = backTableOffset - 1; i < tempStateTable.size(); i++) { 3013 short[] state2 = (short[])tempStateTable.elementAt(i); 3014 for (int j = 0; j < numCategories; j++) { 3015 if (state[j] != 0 && state2[j] != 0) { 3016 state2[j] = state[j]; 3017 } 3018 } 3019 } 3020 3021 state = (short[])tempStateTable.elementAt(backTableOffset - 1); 3026 for (int i = 1; i < backTableOffset - 1; i++) { 3027 short[] state2 = (short[])tempStateTable.elementAt(i); 3028 if ((state2[numCategories] & END_STATE_FLAG) == 0) { 3029 for (int j = 0; j < numCategories; j++) { 3030 if (state2[j] == 0) { 3031 state2[j] = state[j]; 3032 } 3033 } 3034 } 3035 } 3036 } 3037 3038 3040 finishBuildingStateTable(false); 3043 3057 } 3058 3059 3068 protected void error(String message, int position, String context) { 3069 throw new IllegalArgumentException ("Parse error: " + message + "\n" + 3070 Utility.escape(context.substring(0, position)) + "\n\n" + 3071 Utility.escape(context.substring(position))); 3072 } 3073 3074 3078 protected void debugPrintVector(String label, Vector v) { 3079 System.out.print(label); 3080 for (int i = 0; i < v.size(); i++) 3081 System.out.print(v.elementAt(i).toString() + "\t"); 3082 System.out.println(); 3083 } 3084 3085 3088 protected void debugPrintVectorOfVectors(String label1, String label2, Vector v) { 3089 System.out.println(label1); 3090 for (int i = 0; i < v.size(); i++) 3091 debugPrintVector(label2, (Vector )v.elementAt(i)); 3092 } 3093 3094 3097 protected void debugPrintTempStateTable() { 3098 System.out.println(" tempStateTable:"); 3099 System.out.print(" C:\t"); 3100 for (int i = 0; i <= numCategories; i++) 3101 System.out.print(Integer.toString(i) + "\t"); 3102 System.out.println(); 3103 for (int i = 1; i < tempStateTable.size(); i++) { 3104 short[] row = (short[])(tempStateTable.elementAt(i)); 3105 System.out.print(" " + i + ":\t"); 3106 for (int j = 0; j < row.length; j++) { 3107 if (row[j] == 0) { 3108 System.out.print(".\t"); 3109 } 3110 else { 3111 System.out.print(Integer.toString(row[j]) + "\t"); 3112 } 3113 } 3114 System.out.println(); 3115 } 3116 } 3117 3118 } 3119 3121 3130 private static final class SafeCharIterator implements CharacterIterator , 3133 Cloneable { 3134 3135 private CharacterIterator base; 3136 private int rangeStart; 3137 private int rangeLimit; 3138 private int currentIndex; 3139 3140 SafeCharIterator(CharacterIterator base) { 3141 this.base = base; 3142 this.rangeStart = base.getBeginIndex(); 3143 this.rangeLimit = base.getEndIndex(); 3144 this.currentIndex = base.getIndex(); 3145 } 3146 3147 public char first() { 3148 return setIndex(rangeStart); 3149 } 3150 3151 public char last() { 3152 return setIndex(rangeLimit - 1); 3153 } 3154 3155 public char current() { 3156 if (currentIndex < rangeStart || currentIndex >= rangeLimit) { 3157 return DONE; 3158 } 3159 else { 3160 return base.setIndex(currentIndex); 3161 } 3162 } 3163 3164 public char next() { 3165 3166 currentIndex++; 3167 if (currentIndex >= rangeLimit) { 3168 currentIndex = rangeLimit; 3169 return DONE; 3170 } 3171 else { 3172 return base.setIndex(currentIndex); 3173 } 3174 } 3175 3176 public char previous() { 3177 3178 currentIndex--; 3179 if (currentIndex < rangeStart) { 3180 currentIndex = rangeStart; 3181 return DONE; 3182 } 3183 else { 3184 return base.setIndex(currentIndex); 3185 } 3186 } 3187 3188 public char setIndex(int i) { 3189 3190 if (i < rangeStart || i > rangeLimit) { 3191 throw new IllegalArgumentException ("Invalid position"); 3192 } 3193 currentIndex = i; 3194 return current(); 3195 } 3196 3197 public int getBeginIndex() { 3198 return rangeStart; 3199 } 3200 3201 public int getEndIndex() { 3202 return rangeLimit; 3203 } 3204 3205 public int getIndex() { 3206 return currentIndex; 3207 } 3208 3209 public Object clone() { 3210 3211 SafeCharIterator copy = null; 3212 try { 3213 copy = (SafeCharIterator) super.clone(); 3214 } 3215 catch(CloneNotSupportedException e) { 3216 throw new Error ("Clone not supported: " + e); 3217 } 3218 3219 CharacterIterator copyOfBase = (CharacterIterator ) base.clone(); 3220 copy.base = copyOfBase; 3221 return copy; 3222 } 3223 } 3224 3226 3230 public static void debugPrintln(String s) { 3231 final String zeros = "0000"; 3232 String temp; 3233 StringBuffer out = new StringBuffer (); 3234 for (int i = 0; i < s.length(); i++) { 3235 char c = s.charAt(i); 3236 if (c >= ' ' && c < '\u007f') { 3237 out.append(c); 3238 } 3239 else { 3240 out.append("\\u"); 3241 temp = Integer.toHexString((int)c); 3242 out.append(zeros.substring(0, 4 - temp.length())); 3243 out.append(temp); 3244 } 3245 } 3246 System.out.println(out); 3247 } 3248 } 3250 3251 | Popular Tags |