1 7 package com.ibm.icu.text; 8 9 import com.ibm.icu.impl.data.ResourceReader; 10 import com.ibm.icu.impl.Utility; 11 import java.util.Vector ; 12 import java.util.Hashtable ; 13 import java.text.ParsePosition ; 14 import com.ibm.icu.lang.*; 15 import com.ibm.icu.impl.UCharacterProperty; 16 17 class TransliteratorParser { 18 19 23 28 public Vector dataVector; 29 30 34 public Vector idBlockVector; 35 36 39 private RuleBasedTransliterator.Data curData; 40 41 44 public UnicodeSet compoundFilter; 45 46 47 private int direction; 48 49 52 private ParseData parseData; 53 54 59 private Vector variablesVector; 60 61 65 private Hashtable variableNames; 66 67 72 private StringBuffer segmentStandins; 73 74 80 private Vector segmentObjects; 81 82 88 private char variableNext; 89 90 96 private char variableLimit; 97 98 105 private String undefinedVariableName; 106 107 112 private int dotStandIn = -1; 113 114 118 private static final String ID_TOKEN = "::"; 120 private static final int ID_TOKEN_LEN = 2; 121 122 128 129 private static final char VARIABLE_DEF_OP = '='; 131 private static final char FORWARD_RULE_OP = '>'; 132 private static final char REVERSE_RULE_OP = '<'; 133 private static final char FWDREV_RULE_OP = '~'; 135 private static final String OPERATORS = "=><\u2190\u2192\u2194"; 136 private static final String HALF_ENDERS = "=><\u2190\u2192\u2194;"; 137 138 private static final char QUOTE = '\''; 140 private static final char ESCAPE = '\\'; 141 private static final char END_OF_RULE = ';'; 142 private static final char RULE_COMMENT_CHAR = '#'; 143 144 private static final char CONTEXT_ANTE = '{'; private static final char CONTEXT_POST = '}'; private static final char CURSOR_POS = '|'; 147 private static final char CURSOR_OFFSET = '@'; 148 private static final char ANCHOR_START = '^'; 149 150 private static final char KLEENE_STAR = '*'; 151 private static final char ONE_OR_MORE = '+'; 152 private static final char ZERO_OR_ONE = '?'; 153 154 private static final char DOT = '.'; 155 private static final String DOT_SET = "[^[:Zp:][:Zl:]\\r\\n$]"; 156 157 161 private static final char SEGMENT_OPEN = '('; 164 private static final char SEGMENT_CLOSE = ')'; 165 166 private static final char FUNCTION = '&'; 168 169 private static final char ALT_REVERSE_RULE_OP = '\u2190'; private static final char ALT_FORWARD_RULE_OP = '\u2192'; private static final char ALT_FWDREV_RULE_OP = '\u2194'; private static final char ALT_FUNCTION = '\u2206'; 177 private static UnicodeSet ILLEGAL_TOP = new UnicodeSet("[\\)]"); 179 180 private static UnicodeSet ILLEGAL_SEG = new UnicodeSet("[\\{\\}\\|\\@]"); 182 183 private static UnicodeSet ILLEGAL_FUNC = new UnicodeSet("[\\^\\(\\.\\*\\+\\?\\{\\}\\|\\@]"); 185 186 190 196 private class ParseData implements SymbolTable { 197 198 201 public char[] lookup(String name) { 202 return (char[]) variableNames.get(name); 203 } 204 205 208 public UnicodeMatcher lookupMatcher(int ch) { 209 int i = ch - curData.variablesBase; 212 if (i >= 0 && i < variablesVector.size()) { 213 return (UnicodeMatcher) variablesVector.elementAt(i); 214 } 215 return null; 216 } 217 218 222 public String parseReference(String text, ParsePosition pos, int limit) { 223 int start = pos.getIndex(); 224 int i = start; 225 while (i < limit) { 226 char c = text.charAt(i); 227 if ((i==start && !Character.isUnicodeIdentifierStart(c)) || 228 !Character.isUnicodeIdentifierPart(c)) { 229 break; 230 } 231 ++i; 232 } 233 if (i == start) { return null; 235 } 236 pos.setIndex(i); 237 return text.substring(start, i); 238 } 239 240 244 public boolean isMatcher(int ch) { 245 int i = ch - curData.variablesBase; 248 if (i >= 0 && i < variablesVector.size()) { 249 return variablesVector.elementAt(i) instanceof UnicodeMatcher; 250 } 251 return true; 252 } 253 254 258 public boolean isReplacer(int ch) { 259 int i = ch - curData.variablesBase; 262 if (i >= 0 && i < variablesVector.size()) { 263 return variablesVector.elementAt(i) instanceof UnicodeReplacer; 264 } 265 return true; 266 } 267 } 268 269 273 280 private static abstract class RuleBody { 281 282 288 String nextLine() { 289 String s = handleNextLine(); 290 if (s != null && 291 s.length() > 0 && 292 s.charAt(s.length() - 1) == '\\') { 293 294 StringBuffer b = new StringBuffer (s); 295 do { 296 b.deleteCharAt(b.length()-1); 297 s = handleNextLine(); 298 if (s == null) { 299 break; 300 } 301 b.append(s); 302 } while (s.length() > 0 && 303 s.charAt(s.length() - 1) == '\\'); 304 305 s = b.toString(); 306 } 307 return s; 308 } 309 310 313 abstract void reset(); 314 315 318 abstract String handleNextLine(); 319 } 320 321 324 private static class RuleArray extends RuleBody { 325 String [] array; 326 int i; 327 public RuleArray(String [] array) { this.array = array; i = 0; } 328 public String handleNextLine() { 329 return (i < array.length) ? array[i++] : null; 330 } 331 public void reset() { 332 i = 0; 333 } 334 } 335 336 339 private static class RuleReader extends RuleBody { 340 ResourceReader reader; 341 public RuleReader(ResourceReader reader) { this.reader = reader; } 342 public String handleNextLine() { 343 try { 344 return reader.readLine(); 345 } catch (java.io.IOException e) {} 346 return null; 347 } 348 public void reset() { 349 reader.reset(); 350 } 351 } 352 353 357 362 private static class RuleHalf { 363 364 public String text; 365 366 public int cursor = -1; public int ante = -1; public int post = -1; 370 public int cursorOffset = 0; 381 private int cursorOffsetPos = 0; 384 385 public boolean anchorStart = false; 386 public boolean anchorEnd = false; 387 388 392 private int nextSegmentNumber = 1; 393 394 400 public int parse(String rule, int pos, int limit, 401 TransliteratorParser parser) { 402 int start = pos; 403 StringBuffer buf = new StringBuffer (); 404 pos = parseSection(rule, pos, limit, parser, buf, ILLEGAL_TOP, false); 405 text = buf.toString(); 406 407 if (cursorOffset > 0 && cursor != cursorOffsetPos) { 408 syntaxError("Misplaced " + CURSOR_POS, rule, start); 409 } 410 411 return pos; 412 } 413 414 437 private int parseSection(String rule, int pos, int limit, 438 TransliteratorParser parser, 439 StringBuffer buf, 440 UnicodeSet illegal, 441 boolean isSegment) { 442 int start = pos; 443 ParsePosition pp = null; 444 int quoteStart = -1; int quoteLimit = -1; 446 int varStart = -1; int varLimit = -1; 448 int[] iref = new int[1]; 449 int bufStart = buf.length(); 450 451 main: 452 while (pos < limit) { 453 char c = rule.charAt(pos++); 456 if (UCharacterProperty.isRuleWhiteSpace(c)) { 457 continue; 458 } 459 if (HALF_ENDERS.indexOf(c) >= 0) { 461 if (isSegment) { 462 syntaxError("Unclosed segment", rule, start); 463 } 464 break main; 465 } 466 if (anchorEnd) { 467 syntaxError("Malformed variable reference", rule, start); 469 } 470 if (UnicodeSet.resemblesPattern(rule, pos-1)) { 471 if (pp == null) { 472 pp = new ParsePosition (0); 473 } 474 pp.setIndex(pos-1); buf.append(parser.parseSet(rule, pp)); 476 pos = pp.getIndex(); 477 continue; 478 } 479 if (c == ESCAPE) { 481 if (pos == limit) { 482 syntaxError("Trailing backslash", rule, start); 483 } 484 iref[0] = pos; 485 int escaped = Utility.unescapeAt(rule, iref); 486 pos = iref[0]; 487 if (escaped == -1) { 488 syntaxError("Malformed escape", rule, start); 489 } 490 parser.checkVariableRange(escaped, rule, start); 491 UTF16.append(buf, escaped); 492 continue; 493 } 494 if (c == QUOTE) { 496 int iq = rule.indexOf(QUOTE, pos); 497 if (iq == pos) { 498 buf.append(c); ++pos; 500 } else { 501 507 quoteStart = buf.length(); 508 for (;;) { 509 if (iq < 0) { 510 syntaxError("Unterminated quote", rule, start); 511 } 512 buf.append(rule.substring(pos, iq)); 513 pos = iq+1; 514 if (pos < limit && rule.charAt(pos) == QUOTE) { 515 iq = rule.indexOf(QUOTE, pos+1); 517 } else { 519 break; 520 } 521 } 522 quoteLimit = buf.length(); 523 524 for (iq=quoteStart; iq<quoteLimit; ++iq) { 525 parser.checkVariableRange(buf.charAt(iq), rule, start); 526 } 527 } 528 continue; 529 } 530 531 parser.checkVariableRange(c, rule, start); 532 533 if (illegal.contains(c)) { 534 syntaxError("Illegal character '" + c + '\'', rule, start); 535 } 536 537 switch (c) { 538 539 case ANCHOR_START: 543 if (buf.length() == 0 && !anchorStart) { 544 anchorStart = true; 545 } else { 546 syntaxError("Misplaced anchor start", 547 rule, start); 548 } 549 break; 550 case SEGMENT_OPEN: 551 { 552 int bufSegStart = buf.length(); 555 556 int segmentNumber = nextSegmentNumber++; 561 pos = parseSection(rule, pos, limit, parser, buf, ILLEGAL_SEG, true); 563 564 StringMatcher m = 569 new StringMatcher(buf.substring(bufSegStart), 570 segmentNumber, parser.curData); 571 572 parser.setSegmentObject(segmentNumber, m); 574 buf.setLength(bufSegStart); 575 buf.append(parser.getSegmentStandin(segmentNumber)); 576 } 577 break; 578 case FUNCTION: 579 case ALT_FUNCTION: 580 { 581 iref[0] = pos; 582 TransliteratorIDParser.SingleID single = TransliteratorIDParser.parseFilterID(rule, iref); 583 if (single == null || 585 !Utility.parseChar(rule, iref, SEGMENT_OPEN)) { 586 syntaxError("Invalid function", rule, start); 587 } 588 589 Transliterator t = single.getInstance(); 590 if (t == null) { 591 syntaxError("Invalid function ID", rule, start); 592 } 593 594 int bufSegStart = buf.length(); 597 598 pos = parseSection(rule, iref[0], limit, parser, buf, ILLEGAL_FUNC, true); 600 601 FunctionReplacer r = 604 new FunctionReplacer(t, 605 new StringReplacer(buf.substring(bufSegStart), parser.curData)); 606 607 buf.setLength(bufSegStart); 609 buf.append(parser.generateStandInFor(r)); 610 } 611 break; 612 case SymbolTable.SYMBOL_REF: 613 { 615 if (pos == limit) { 620 anchorEnd = true; 623 break; 624 } 625 c = rule.charAt(pos); 627 int r = UCharacter.digit(c, 10); 628 if (r >= 1 && r <= 9) { 629 iref[0] = pos; 630 r = Utility.parseNumber(rule, iref, 10); 631 if (r < 0) { 632 syntaxError("Undefined segment reference", 633 rule, start); 634 } 635 pos = iref[0]; 636 buf.append(parser.getSegmentStandin(r)); 637 } else { 638 if (pp == null) { pp = new ParsePosition (0); 640 } 641 pp.setIndex(pos); 642 String name = parser.parseData. 643 parseReference(rule, pp, limit); 644 if (name == null) { 645 anchorEnd = true; 651 break; 652 } 653 pos = pp.getIndex(); 654 varStart = buf.length(); 659 parser.appendVariableDef(name, buf); 660 varLimit = buf.length(); 661 } 662 } 663 break; 664 case DOT: 665 buf.append(parser.getDotStandIn()); 666 break; 667 case KLEENE_STAR: 668 case ONE_OR_MORE: 669 case ZERO_OR_ONE: 670 { 677 if (isSegment && buf.length() == bufStart) { 678 syntaxError("Misplaced quantifier", rule, start); 680 break; 681 } 682 683 int qstart, qlimit; 684 if (buf.length() == quoteLimit) { 687 qstart = quoteStart; 689 qlimit = quoteLimit; 690 } else if (buf.length() == varLimit) { 691 qstart = varStart; 693 qlimit = varLimit; 694 } else { 695 qstart = buf.length() - 1; 698 qlimit = qstart + 1; 699 } 700 701 UnicodeMatcher m; 702 try { 703 m = new StringMatcher(buf.toString(), qstart, qlimit, 704 0, parser.curData); 705 } catch (RuntimeException e) { 706 throw new IllegalArgumentException ("Failure in rule: " + rule.substring(pos, limit)); 707 } 708 int min = 0; 709 int max = Quantifier.MAX; 710 switch (c) { 711 case ONE_OR_MORE: 712 min = 1; 713 break; 714 case ZERO_OR_ONE: 715 min = 0; 716 max = 1; 717 break; 718 } 721 m = new Quantifier(m, min, max); 722 buf.setLength(qstart); 723 buf.append(parser.generateStandInFor(m)); 724 } 725 break; 726 727 case SEGMENT_CLOSE: 731 break main; 734 735 case CONTEXT_ANTE: 739 if (ante >= 0) { 740 syntaxError("Multiple ante contexts", rule, start); 741 } 742 ante = buf.length(); 743 break; 744 case CONTEXT_POST: 745 if (post >= 0) { 746 syntaxError("Multiple post contexts", rule, start); 747 } 748 post = buf.length(); 749 break; 750 case CURSOR_POS: 751 if (cursor >= 0) { 752 syntaxError("Multiple cursors", rule, start); 753 } 754 cursor = buf.length(); 755 break; 756 case CURSOR_OFFSET: 757 if (cursorOffset < 0) { 758 if (buf.length() > 0) { 759 syntaxError("Misplaced " + c, rule, start); 760 } 761 --cursorOffset; 762 } else if (cursorOffset > 0) { 763 if (buf.length() != cursorOffsetPos || cursor >= 0) { 764 syntaxError("Misplaced " + c, rule, start); 765 } 766 ++cursorOffset; 767 } else { 768 if (cursor == 0 && buf.length() == 0) { 769 cursorOffset = -1; 770 } else if (cursor < 0) { 771 cursorOffsetPos = buf.length(); 772 cursorOffset = 1; 773 } else { 774 syntaxError("Misplaced " + c, rule, start); 775 } 776 } 777 break; 778 779 default: 783 if (c >= 0x0021 && c <= 0x007E && 787 !((c >= '0' && c <= '9') || 788 (c >= 'A' && c <= 'Z') || 789 (c >= 'a' && c <= 'z'))) { 790 syntaxError("Unquoted " + c, rule, start); 791 } 792 buf.append(c); 793 break; 794 } 795 } 796 return pos; 797 } 798 799 802 void removeContext() { 803 text = text.substring(ante < 0 ? 0 : ante, 804 post < 0 ? text.length() : post); 805 ante = post = -1; 806 anchorStart = anchorEnd = false; 807 } 808 809 813 public boolean isValidOutput(TransliteratorParser parser) { 814 for (int i=0; i<text.length(); ) { 815 int c = UTF16.charAt(text, i); 816 i += UTF16.getCharCount(c); 817 if (!parser.parseData.isReplacer(c)) { 818 return false; 819 } 820 } 821 return true; 822 } 823 824 828 public boolean isValidInput(TransliteratorParser parser) { 829 for (int i=0; i<text.length(); ) { 830 int c = UTF16.charAt(text, i); 831 i += UTF16.getCharCount(c); 832 if (!parser.parseData.isMatcher(c)) { 833 return false; 834 } 835 } 836 return true; 837 } 838 } 839 840 844 847 public TransliteratorParser() { 848 } 849 850 854 public void parse(String rules, int direction) { 855 parseRules(new RuleArray(new String [] { rules }), direction); 856 } 857 858 862 public void parse(ResourceReader rules, int direction) { 863 parseRules(new RuleReader(rules), direction); 864 } 865 866 870 883 void parseRules(RuleBody ruleArray, int dir) { 884 boolean parsingIDs = true; 885 boolean inBeginEndBlock = false; 886 int ruleCount = 0; 887 888 dataVector = new Vector (); 889 idBlockVector = new Vector (); 890 curData = null; 891 direction = dir; 892 compoundFilter = null; 893 variablesVector = new Vector (); 894 variableNames = new Hashtable (); 895 parseData = new ParseData(); 896 897 StringBuffer errors = null; 898 int errorCount = 0; 899 900 ruleArray.reset(); 901 902 StringBuffer idBlockResult = new StringBuffer (); 903 904 this.compoundFilter = null; 910 int compoundFilterOffset = -1; 911 912 main: 913 for (;;) { 914 String rule = ruleArray.nextLine(); 915 if (rule == null) { 916 break; 917 } 918 int pos = 0; 919 int limit = rule.length(); 920 while (pos < limit) { 921 char c = rule.charAt(pos++); 922 if (UCharacterProperty.isRuleWhiteSpace(c)) { 923 continue; 924 } 925 if (c == RULE_COMMENT_CHAR) { 927 pos = rule.indexOf("\n", pos) + 1; 928 if (pos == 0) { 929 break; } 931 continue; } 933 934 if (c == END_OF_RULE) 936 continue; 937 938 try { 943 ++ruleCount; 944 945 --pos; 948 if ((pos + ID_TOKEN_LEN + 1) <= limit && 951 rule.regionMatches(pos, ID_TOKEN, 0, ID_TOKEN_LEN)) { 952 pos += ID_TOKEN_LEN; 953 c = rule.charAt(pos); 954 while (UCharacterProperty.isRuleWhiteSpace(c) && pos < limit) { 955 ++pos; 956 c = rule.charAt(pos); 957 } 958 int[] p = new int[] { pos }; 959 960 if (!parsingIDs) { 961 if (curData != null) { 962 if (direction == Transliterator.FORWARD) 963 dataVector.add(curData); 964 else 965 dataVector.insertElementAt(curData, 0); 966 curData = null; 967 } 968 parsingIDs = true; 969 } 970 971 TransliteratorIDParser.SingleID id = 972 TransliteratorIDParser.parseSingleID( 973 rule, p, direction); 974 if (p[0] != pos && Utility.parseChar(rule, p, END_OF_RULE)) { 975 977 if (direction == Transliterator.FORWARD) { 978 idBlockResult.append(id.canonID).append(END_OF_RULE); 979 } else { 980 idBlockResult.insert(0, id.canonID + END_OF_RULE); 981 } 982 983 } else { 984 int[] withParens = new int[] { -1 }; 986 UnicodeSet f = TransliteratorIDParser.parseGlobalFilter(rule, p, direction, withParens, null); 987 if (f != null && Utility.parseChar(rule, p, END_OF_RULE)) { 988 if ((direction == Transliterator.FORWARD) == 989 (withParens[0] == 0)) { 990 if (compoundFilter != null) { 991 syntaxError("Multiple global filters", rule, pos); 993 } 994 compoundFilter = f; 995 compoundFilterOffset = ruleCount; 996 } 997 } else { 998 syntaxError("Invalid ::ID", rule, pos); 1001 } 1002 } 1003 1004 pos = p[0]; 1005 } else { 1006 if (parsingIDs) { 1007 if (direction == Transliterator.FORWARD) 1008 idBlockVector.add(idBlockResult.toString()); 1009 else 1010 idBlockVector.insertElementAt(idBlockResult.toString(), 0); 1011 idBlockResult.delete(0, idBlockResult.length()); 1012 parsingIDs = false; 1013 curData = new RuleBasedTransliterator.Data(); 1014 1015 setVariableRange(0xF000, 0xF8FF); 1020 } 1021 1022 if (resemblesPragma(rule, pos, limit)) { 1023 int ppp = parsePragma(rule, pos, limit); 1024 if (ppp < 0) { 1025 syntaxError("Unrecognized pragma", rule, pos); 1026 } 1027 pos = ppp; 1028 } else { 1030 pos = parseRule(rule, pos, limit); 1031 } 1032 } 1033 } catch (IllegalArgumentException e) { 1034 if (errorCount == 30) { 1035 errors.append("\nMore than 30 errors; further messages squelched"); 1036 break main; 1037 } 1038 if (errors == null) { 1039 errors = new StringBuffer (e.getMessage()); 1040 } else { 1041 errors.append("\n" + e.getMessage()); 1042 } 1043 ++errorCount; 1044 pos = ruleEnd(rule, pos, limit) + 1; } 1046 } 1047 } 1048 if (parsingIDs && idBlockResult.length() > 0) { 1049 if (direction == Transliterator.FORWARD) 1050 idBlockVector.add(idBlockResult.toString()); 1051 else 1052 idBlockVector.insertElementAt(idBlockResult.toString(), 0); 1053 } 1054 else if (!parsingIDs && curData != null) { 1055 if (direction == Transliterator.FORWARD) 1056 dataVector.add(curData); 1057 else 1058 dataVector.insertElementAt(curData, 0); 1059 } 1060 1061 for (int i = 0; i < dataVector.size(); i++) { 1063 RuleBasedTransliterator.Data data = (RuleBasedTransliterator.Data)dataVector.get(i); 1064 data.variables = new Object [variablesVector.size()]; 1065 variablesVector.copyInto(data.variables); 1066 data.variableNames = new Hashtable (); 1067 data.variableNames.putAll(variableNames); 1068 } 1069 variablesVector = null; 1070 1071 try { 1073 if (compoundFilter != null) { 1074 if ((direction == Transliterator.FORWARD && 1075 compoundFilterOffset != 1) || 1076 (direction == Transliterator.REVERSE && 1077 compoundFilterOffset != ruleCount)) { 1078 throw new IllegalArgumentException ("Compound filters misplaced"); 1079 } 1080 } 1081 1082 for (int i = 0; i < dataVector.size(); i++) { 1083 RuleBasedTransliterator.Data data = (RuleBasedTransliterator.Data)dataVector.get(i); 1084 data.ruleSet.freeze(); 1085 } 1086 1087 if (idBlockVector.size() == 1 && ((String )idBlockVector.get(0)).length() == 0) 1088 idBlockVector.remove(0); 1089 1090 } catch (IllegalArgumentException e) { 1091 if (errors == null) { 1092 errors = new StringBuffer (e.getMessage()); 1093 } else { 1094 errors.append("\n").append(e.getMessage()); 1095 } 1096 } 1097 1098 if (errors != null) { 1099 throw new IllegalArgumentException (errors.toString()); 1100 } 1101 } 1102 1103 1118 private int parseRule(String rule, int pos, int limit) { 1119 int start = pos; 1121 char operator = 0; 1122 1123 segmentStandins = new StringBuffer (); 1125 segmentObjects = new Vector (); 1126 1127 RuleHalf left = new RuleHalf(); 1128 RuleHalf right = new RuleHalf(); 1129 1130 undefinedVariableName = null; 1131 pos = left.parse(rule, pos, limit, this); 1132 1133 if (pos == limit || 1134 OPERATORS.indexOf(operator = rule.charAt(--pos)) < 0) { 1135 syntaxError("No operator pos=" + pos, rule, start); 1136 } 1137 ++pos; 1138 1139 if (operator == REVERSE_RULE_OP && 1141 (pos < limit && rule.charAt(pos) == FORWARD_RULE_OP)) { 1142 ++pos; 1143 operator = FWDREV_RULE_OP; 1144 } 1145 1146 switch (operator) { 1148 case ALT_FORWARD_RULE_OP: 1149 operator = FORWARD_RULE_OP; 1150 break; 1151 case ALT_REVERSE_RULE_OP: 1152 operator = REVERSE_RULE_OP; 1153 break; 1154 case ALT_FWDREV_RULE_OP: 1155 operator = FWDREV_RULE_OP; 1156 break; 1157 } 1158 1159 pos = right.parse(rule, pos, limit, this); 1160 1161 if (pos < limit) { 1162 if (rule.charAt(--pos) == END_OF_RULE) { 1163 ++pos; 1164 } else { 1165 syntaxError("Unquoted operator", rule, start); 1167 } 1168 } 1169 1170 if (operator == VARIABLE_DEF_OP) { 1171 1176 if (undefinedVariableName == null) { 1179 syntaxError("Missing '$' or duplicate definition", rule, start); 1180 } 1181 if (left.text.length() != 1 || left.text.charAt(0) != variableLimit) { 1182 syntaxError("Malformed LHS", rule, start); 1183 } 1184 if (left.anchorStart || left.anchorEnd || 1185 right.anchorStart || right.anchorEnd) { 1186 syntaxError("Malformed variable def", rule, start); 1187 } 1188 int n = right.text.length(); 1190 char[] value = new char[n]; 1191 right.text.getChars(0, n, value, 0); 1192 variableNames.put(undefinedVariableName, value); 1193 1194 ++variableLimit; 1195 return pos; 1196 } 1197 1198 if (undefinedVariableName != null) { 1201 syntaxError("Undefined variable $" + undefinedVariableName, 1202 rule, start); 1203 } 1204 1205 if (segmentStandins.length() > segmentObjects.size()) { 1207 syntaxError("Undefined segment reference", rule, start); 1208 } 1209 for (int i=0; i<segmentStandins.length(); ++i) { 1210 if (segmentStandins.charAt(i) == 0) { 1211 syntaxError("Internal error", rule, start); } 1213 } 1214 for (int i=0; i<segmentObjects.size(); ++i) { 1215 if (segmentObjects.elementAt(i) == null) { 1216 syntaxError("Internal error", rule, start); } 1218 } 1219 1220 if (operator != FWDREV_RULE_OP && 1223 ((direction == Transliterator.FORWARD) != (operator == FORWARD_RULE_OP))) { 1224 return pos; 1225 } 1226 1227 if (direction == Transliterator.REVERSE) { 1230 RuleHalf temp = left; 1231 left = right; 1232 right = temp; 1233 } 1234 1235 if (operator == FWDREV_RULE_OP) { 1239 right.removeContext(); 1240 left.cursor = -1; 1241 left.cursorOffset = 0; 1242 } 1243 1244 if (left.ante < 0) { 1246 left.ante = 0; 1247 } 1248 if (left.post < 0) { 1249 left.post = left.text.length(); 1250 } 1251 1252 if (right.ante >= 0 || right.post >= 0 || left.cursor >= 0 || 1259 (right.cursorOffset != 0 && right.cursor < 0) || 1260 right.anchorStart || right.anchorEnd || 1267 !left.isValidInput(this) || !right.isValidOutput(this) || 1268 left.ante > left.post) { 1269 syntaxError("Malformed rule", rule, start); 1270 } 1271 1272 UnicodeMatcher[] segmentsArray = null; 1274 if (segmentObjects.size() > 0) { 1275 segmentsArray = new UnicodeMatcher[segmentObjects.size()]; 1276 segmentObjects.toArray(segmentsArray); 1277 } 1278 1279 curData.ruleSet.addRule(new TransliterationRule( 1280 left.text, left.ante, left.post, 1281 right.text, right.cursor, right.cursorOffset, 1282 segmentsArray, 1283 left.anchorStart, left.anchorEnd, 1284 curData)); 1285 1286 return pos; 1287 } 1288 1289 1292 private void setVariableRange(int start, int end) { 1293 if (start > end || start < 0 || end > 0xFFFF) { 1294 throw new IllegalArgumentException ("Invalid variable range " + start + ", " + end); 1295 } 1296 1297 curData.variablesBase = (char) start; 1299 if (dataVector.size() == 0) { 1300 variableNext = (char) start; 1301 variableLimit = (char) (end + 1); 1302 } 1303 } 1304 1305 1310 private void checkVariableRange(int ch, String rule, int start) { 1311 if (ch >= curData.variablesBase && ch < variableLimit) { 1312 syntaxError("Variable range character in rule", rule, start); 1313 } 1314 } 1315 1316 1324 private void pragmaMaximumBackup(int backup) { 1325 throw new IllegalArgumentException ("use maximum backup pragma not implemented yet"); 1327 } 1328 1330 1338 private void pragmaNormalizeRules(Normalizer.Mode mode) { 1339 throw new IllegalArgumentException ("use normalize rules pragma not implemented yet"); 1341 } 1342 1344 1350 static boolean resemblesPragma(String rule, int pos, int limit) { 1351 return Utility.parsePattern(rule, pos, limit, "use ", null) >= 0; 1353 } 1354 1355 1364 private int parsePragma(String rule, int pos, int limit) { 1365 int[] array = new int[2]; 1366 1367 pos += 4; 1371 1372 int p = Utility.parsePattern(rule, pos, limit, "~variable range # #~;", array); 1377 if (p >= 0) { 1378 setVariableRange(array[0], array[1]); 1379 return p; 1380 } 1381 1382 p = Utility.parsePattern(rule, pos, limit, "~maximum backup #~;", array); 1383 if (p >= 0) { 1384 pragmaMaximumBackup(array[0]); 1385 return p; 1386 } 1387 1388 p = Utility.parsePattern(rule, pos, limit, "~nfd rules~;", null); 1389 if (p >= 0) { 1390 pragmaNormalizeRules(Normalizer.NFD); 1391 return p; 1392 } 1393 1394 p = Utility.parsePattern(rule, pos, limit, "~nfc rules~;", null); 1395 if (p >= 0) { 1396 pragmaNormalizeRules(Normalizer.NFC); 1397 return p; 1398 } 1399 1400 return -1; 1402 } 1403 1404 1413 static final void syntaxError(String msg, String rule, int start) { 1414 int end = ruleEnd(rule, start, rule.length()); 1415 throw new IllegalArgumentException (msg + " in \"" + 1416 Utility.escape(rule.substring(start, end)) + '"'); 1417 } 1418 1419 static final int ruleEnd(String rule, int start, int limit) { 1420 int end = Utility.quotedIndexOf(rule, start, limit, ";"); 1421 if (end < 0) { 1422 end = limit; 1423 } 1424 return end; 1425 } 1426 1427 1431 private final char parseSet(String rule, ParsePosition pos) { 1432 UnicodeSet set = new UnicodeSet(rule, pos, parseData); 1433 if (variableNext >= variableLimit) { 1434 throw new RuntimeException ("Private use variables exhausted"); 1435 } 1436 set.compact(); 1437 return generateStandInFor(set); 1438 } 1439 1440 1444 char generateStandInFor(Object obj) { 1445 1447 for (int i=0; i<variablesVector.size(); ++i) { 1450 if (variablesVector.elementAt(i) == obj) { return (char) (curData.variablesBase + i); 1452 } 1453 } 1454 1455 if (variableNext >= variableLimit) { 1456 throw new RuntimeException ("Variable range exhausted"); 1457 } 1458 variablesVector.addElement(obj); 1459 return variableNext++; 1460 } 1461 1462 1465 public char getSegmentStandin(int seg) { 1466 if (segmentStandins.length() < seg) { 1467 segmentStandins.setLength(seg); 1468 } 1469 char c = segmentStandins.charAt(seg-1); 1470 if (c == 0) { 1471 if (variableNext >= variableLimit) { 1472 throw new RuntimeException ("Variable range exhausted"); 1473 } 1474 c = variableNext++; 1475 variablesVector.addElement(null); 1479 segmentStandins.setCharAt(seg-1, c); 1480 } 1481 return c; 1482 } 1483 1484 1487 public void setSegmentObject(int seg, StringMatcher obj) { 1488 if (segmentObjects.size() < seg) { 1493 segmentObjects.setSize(seg); 1494 } 1495 int index = getSegmentStandin(seg) - curData.variablesBase; 1496 if (segmentObjects.elementAt(seg-1) != null || 1497 variablesVector.elementAt(index) != null) { 1498 throw new RuntimeException (); } 1500 segmentObjects.setElementAt(obj, seg-1); 1501 variablesVector.setElementAt(obj, index); 1502 } 1503 1504 1508 char getDotStandIn() { 1509 if (dotStandIn == -1) { 1510 dotStandIn = generateStandInFor(new UnicodeSet(DOT_SET)); 1511 } 1512 return (char) dotStandIn; 1513 } 1514 1515 1520 private void appendVariableDef(String name, StringBuffer buf) { 1521 char[] ch = (char[]) variableNames.get(name); 1522 if (ch == null) { 1523 if (undefinedVariableName == null) { 1528 undefinedVariableName = name; 1529 if (variableNext >= variableLimit) { 1530 throw new RuntimeException ("Private use variables exhausted"); 1531 } 1532 buf.append((char) --variableLimit); 1533 } else { 1534 throw new IllegalArgumentException ("Undefined variable $" 1535 + name); 1536 } 1537 } else { 1538 buf.append(ch); 1539 } 1540 } 1541} 1542 1543 | Popular Tags |