1 7 package com.ibm.icu.text; 8 9 import com.ibm.icu.impl.ICUResourceBundle; 10 import com.ibm.icu.impl.Utility; 11 import com.ibm.icu.impl.UtilityExtensions; 12 import com.ibm.icu.util.CaseInsensitiveString; 13 import com.ibm.icu.util.ULocale; 14 import com.ibm.icu.util.UResourceBundle; 15 import com.ibm.icu.impl.UCharacterProperty; 16 17 import java.text.MessageFormat ; 18 import java.util.Enumeration ; 19 import java.util.Hashtable ; 20 import java.util.Locale ; 21 import java.util.MissingResourceException ; 22 import java.util.Vector ; 23 24 248 public abstract class Transliterator { 249 256 public static final int FORWARD = 0; 257 258 265 public static final int REVERSE = 1; 266 267 288 public static class Position { 289 290 298 public int contextStart; 299 300 308 public int contextLimit; 309 310 317 public int start; 318 319 326 public int limit; 327 328 333 public Position() { 334 this(0, 0, 0, 0); 335 } 336 337 343 public Position(int contextStart, int contextLimit, int start) { 344 this(contextStart, contextLimit, start, contextLimit); 345 } 346 347 352 public Position(int contextStart, int contextLimit, 353 int start, int limit) { 354 this.contextStart = contextStart; 355 this.contextLimit = contextLimit; 356 this.start = start; 357 this.limit = limit; 358 } 359 360 364 public Position(Position pos) { 365 set(pos); 366 } 367 368 372 public void set(Position pos) { 373 contextStart = pos.contextStart; 374 contextLimit = pos.contextLimit; 375 start = pos.start; 376 limit = pos.limit; 377 } 378 379 383 public boolean equals(Object obj) { 384 if (obj instanceof Position) { 385 Position pos = (Position) obj; 386 return contextStart == pos.contextStart && 387 contextLimit == pos.contextLimit && 388 start == pos.start && 389 limit == pos.limit; 390 } 391 return false; 392 } 393 394 398 public String toString() { 399 return "[cs=" + contextStart 400 + ", s=" + start 401 + ", l=" + limit 402 + ", cl=" + contextLimit 403 + "]"; 404 } 405 406 413 public final void validate(int length) { 414 if (contextStart < 0 || 415 start < contextStart || 416 limit < start || 417 contextLimit < limit || 418 length < contextLimit) { 419 throw new IllegalArgumentException ("Invalid Position {cs=" + 420 contextStart + ", s=" + 421 start + ", l=" + 422 limit + ", cl=" + 423 contextLimit + "}, len=" + 424 length); 425 } 426 } 427 } 428 429 432 private String ID; 433 434 440 private UnicodeFilter filter; 441 442 private int maximumContextLength = 0; 443 444 447 private static TransliteratorRegistry registry; 448 449 private static Hashtable displayNameCache; 450 451 456 private static final String RB_DISPLAY_NAME_PREFIX = "%Translit%%"; 457 458 463 private static final String RB_SCRIPT_DISPLAY_NAME_PREFIX = "%Translit%"; 464 465 471 private static final String RB_DISPLAY_NAME_PATTERN = "TransliteratorNamePattern"; 472 473 477 static final char ID_DELIM = ';'; 478 479 483 static final char ID_SEP = '-'; 484 485 489 static final char VARIANT_SEP = '/'; 490 491 501 static final boolean DEBUG = false; 502 503 private static final String COPYRIGHT = 504 "\u00A9 IBM Corporation 1999. All rights reserved."; 505 506 515 protected Transliterator(String ID, UnicodeFilter filter) { 516 if (ID == null) { 517 throw new NullPointerException (); 518 } 519 this.ID = ID; 520 this.filter = filter; 521 } 522 523 538 public final int transliterate(Replaceable text, int start, int limit) { 539 if (start < 0 || 540 limit < start || 541 text.length() < limit) { 542 return -1; 543 } 544 545 Position pos = new Position(start, limit, start); 546 filteredTransliterate(text, pos, false, true); 547 return pos.limit; 548 } 549 550 555 public final void transliterate(Replaceable text) { 556 transliterate(text, 0, text.length()); 557 } 558 559 566 public final String transliterate(String text) { 567 ReplaceableString result = new ReplaceableString(text); 568 transliterate(result); 569 return result.toString(); 570 } 571 572 620 public final void transliterate(Replaceable text, Position index, 621 String insertion) { 622 index.validate(text.length()); 623 624 if (insertion != null) { 626 text.replace(index.limit, index.limit, insertion); 627 index.limit += insertion.length(); 628 index.contextLimit += insertion.length(); 629 } 630 631 if (index.limit > 0 && 632 UTF16.isLeadSurrogate(text.charAt(index.limit - 1))) { 633 return; 638 } 639 640 filteredTransliterate(text, index, true, true); 641 642 } 649 650 666 public final void transliterate(Replaceable text, Position index, 667 int insertion) { 668 transliterate(text, index, UTF16.valueOf(insertion)); 669 } 670 671 683 public final void transliterate(Replaceable text, Position index) { 684 transliterate(text, index, null); 685 } 686 687 698 public final void finishTransliteration(Replaceable text, 699 Position index) { 700 index.validate(text.length()); 701 filteredTransliterate(text, index, false, true); 702 } 703 704 783 protected abstract void handleTransliterate(Replaceable text, 784 Position pos, boolean incremental); 785 786 813 private void filteredTransliterate(Replaceable text, 814 Position index, 815 boolean incremental, 816 boolean rollback) { 817 if (filter == null && !rollback) { 820 handleTransliterate(text, index, incremental); 821 return; 822 } 823 824 842 int globalLimit = index.limit; 849 850 860 StringBuffer log = null; 861 if (DEBUG) { 862 log = new StringBuffer (); 863 } 864 865 for (;;) { 866 867 if (filter != null) { 868 871 int c; 873 while (index.start < globalLimit && 874 !filter.contains(c=text.char32At(index.start))) { 875 index.start += UTF16.getCharCount(c); 876 } 877 878 index.limit = index.start; 880 while (index.limit < globalLimit && 881 filter.contains(c=text.char32At(index.limit))) { 882 index.limit += UTF16.getCharCount(c); 883 } 884 } 885 886 if (index.start == index.limit) { 890 break; 891 } 892 893 boolean isIncrementalRun = 898 (index.limit < globalLimit ? false : incremental); 899 900 int delta; 901 902 930 if (rollback && isIncrementalRun) { 931 932 if (DEBUG) { 933 log.setLength(0); 934 System.out.println("filteredTransliterate{"+getID()+"}i: IN=" + 935 UtilityExtensions.formatInput(text, index)); 936 } 937 938 int runStart = index.start; 939 int runLimit = index.limit; 940 int runLength = runLimit - runStart; 941 942 int rollbackOrigin = text.length(); 944 text.copy(runStart, runLimit, rollbackOrigin); 945 946 int passStart = runStart; 952 int rollbackStart = rollbackOrigin; 953 954 int passLimit = index.start; 957 958 int uncommittedLength = 0; 961 962 int totalDelta = 0; 964 965 for (;;) { 969 int charLength = 971 UTF16.getCharCount(text.char32At(passLimit)); 972 passLimit += charLength; 973 if (passLimit > runLimit) { 974 break; 975 } 976 uncommittedLength += charLength; 977 978 index.limit = passLimit; 979 980 if (DEBUG) { 981 log.setLength(0); 982 log.append("filteredTransliterate{"+getID()+"}i: "); 983 UtilityExtensions.formatInput(log, text, index); 984 } 985 986 handleTransliterate(text, index, true); 991 992 if (DEBUG) { 993 log.append(" => "); 994 UtilityExtensions.formatInput(log, text, index); 995 } 996 997 delta = index.limit - passLimit; 999 if (index.start != index.limit) { 1003 int rs = rollbackStart + delta - (index.limit - passStart); 1006 1007 text.replace(passStart, index.limit, ""); 1009 1010 text.copy(rs, rs + uncommittedLength, passStart); 1012 1013 index.start = passStart; 1015 index.limit = passLimit; 1016 index.contextLimit -= delta; 1017 1018 if (DEBUG) { 1019 log.append(" (ROLLBACK)"); 1020 } 1021 } 1022 1023 else { 1027 passStart = passLimit = index.start; 1029 1030 rollbackStart += delta + uncommittedLength; 1035 uncommittedLength = 0; 1036 1037 runLimit += delta; 1039 totalDelta += delta; 1040 } 1041 1042 if (DEBUG) { 1043 System.out.println(Utility.escape(log.toString())); 1044 } 1045 } 1046 1047 rollbackOrigin += totalDelta; 1051 globalLimit += totalDelta; 1052 1053 text.replace(rollbackOrigin, rollbackOrigin + runLength, ""); 1055 1056 index.start = passStart; 1058 } 1059 1060 else { 1061 if (DEBUG) { 1063 log.setLength(0); 1064 log.append("filteredTransliterate{"+getID()+"}: "); 1065 UtilityExtensions.formatInput(log, text, index); 1066 } 1067 1068 int limit = index.limit; 1069 handleTransliterate(text, index, isIncrementalRun); 1070 delta = index.limit - limit; 1072 if (DEBUG) { 1073 log.append(" => "); 1074 UtilityExtensions.formatInput(log, text, index); 1075 } 1076 1077 if (!isIncrementalRun && index.start != index.limit) { 1085 throw new RuntimeException ("ERROR: Incomplete non-incremental transliteration by " + getID()); 1086 } 1087 1088 globalLimit += delta; 1092 1093 if (DEBUG) { 1094 System.out.println(Utility.escape(log.toString())); 1095 } 1096 } 1097 1098 if (filter == null || isIncrementalRun) { 1099 break; 1100 } 1101 1102 } 1105 1106 index.limit = globalLimit; 1109 1110 if (DEBUG) { 1111 System.out.println("filteredTransliterate{"+getID()+"}: OUT=" + 1112 UtilityExtensions.formatInput(text, index)); 1113 } 1114 } 1115 1116 1127 public void filteredTransliterate(Replaceable text, 1128 Position index, 1129 boolean incremental) { 1130 filteredTransliterate(text, index, incremental, false); 1131 } 1132 1133 1145 public final int getMaximumContextLength() { 1146 return maximumContextLength; 1147 } 1148 1149 1154 protected void setMaximumContextLength(int a) { 1155 if (a < 0) { 1156 throw new IllegalArgumentException ("Invalid context length " + a); 1157 } 1158 maximumContextLength = a; 1159 } 1160 1161 1169 public final String getID() { 1170 return ID; 1171 } 1172 1173 1178 protected final void setID(String id) { 1179 ID = id; 1180 } 1181 1182 1188 public final static String getDisplayName(String ID) { 1189 return getDisplayName(ID, ULocale.getDefault()); 1190 } 1191 1192 1211 public static String getDisplayName(String id, Locale inLocale) { 1212 return getDisplayName(id, ULocale.forLocale(inLocale)); 1213 } 1214 1215 1235 public static String getDisplayName(String id, ULocale inLocale) { 1236 1237 1243 ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle. 1244 getBundleInstance(ICUResourceBundle.ICU_TRANSLIT_BASE_NAME, inLocale); 1245 1246 String stv[] = TransliteratorIDParser.IDtoSTV(id); 1248 if (stv == null) { 1249 return ""; 1251 } 1252 String ID = stv[0] + '-' + stv[1]; 1253 if (stv[2] != null && stv[2].length() > 0) { 1254 ID = ID + '/' + stv[2]; 1255 } 1256 1257 String n = (String ) displayNameCache.get(new CaseInsensitiveString(ID)); 1259 if (n != null) { 1260 return n; 1261 } 1262 1263 try { 1266 return bundle.getString(RB_DISPLAY_NAME_PREFIX + ID); 1267 } catch (MissingResourceException e) {} 1268 1269 try { 1270 MessageFormat format = new MessageFormat( 1273 bundle.getString(RB_DISPLAY_NAME_PATTERN)); 1274 Object [] args = new Object [] { new Integer (2), stv[0], stv[1] }; 1276 1277 for (int j=1; j<=2; ++j) { 1279 try { 1280 args[j] = bundle.getString(RB_SCRIPT_DISPLAY_NAME_PREFIX + 1281 (String ) args[j]); 1282 } catch (MissingResourceException e) {} 1283 } 1284 1285 return (stv[2].length() > 0) ? 1287 (format.format(args) + '/' + stv[2]) : 1288 format.format(args); 1289 } catch (MissingResourceException e2) {} 1290 1291 throw new RuntimeException (); 1295 } 1296 1297 1302 public final UnicodeFilter getFilter() { 1303 return filter; 1304 } 1305 1306 1315 public void setFilter(UnicodeFilter filter) { 1316 this.filter = filter; 1317 } 1318 1319 1329 public static final Transliterator getInstance(String ID) { 1330 return getInstance(ID, FORWARD); 1331 } 1332 1333 1348 public static Transliterator getInstance(String ID, 1349 int dir) { 1350 StringBuffer canonID = new StringBuffer (); 1351 Vector list = new Vector (); 1352 UnicodeSet[] globalFilter = new UnicodeSet[1]; 1353 if (!TransliteratorIDParser.parseCompoundID(ID, dir, canonID, list, globalFilter)) { 1354 throw new IllegalArgumentException ("Invalid ID " + ID); 1355 } 1356 1357 TransliteratorIDParser.instantiateList(list); 1358 1359 Transliterator t = null; 1361 if (list.size() > 1 || Utility.indexOf(canonID, ";") >= 0) { 1362 t = new CompoundTransliterator(list); 1368 } 1369 else { 1370 t = (Transliterator)list.elementAt(0); 1371 } 1372 1373 t.setID(canonID.toString()); 1374 if (globalFilter[0] != null) { 1375 t.setFilter(globalFilter[0]); 1376 } 1377 return t; 1378 } 1379 1380 1390 static Transliterator getBasicInstance(String id, String canonID) { 1391 StringBuffer s = new StringBuffer (); 1392 Transliterator t = registry.get(id, s); 1393 if (s.length() != 0) { 1394 t = getInstance(s.toString(), FORWARD); 1397 } 1398 if (t != null && canonID != null) { 1399 t.setID(canonID); 1400 } 1401 return t; 1402 } 1403 1404 1413 public static final Transliterator createFromRules(String ID, String rules, int dir) { 1414 Transliterator t = null; 1415 1416 TransliteratorParser parser = new TransliteratorParser(); 1417 parser.parse(rules, dir); 1418 1419 if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) { 1421 t = new NullTransliterator(); 1422 } 1423 else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) { 1424 t = new RuleBasedTransliterator(ID, (RuleBasedTransliterator.Data)parser.dataVector.get(0), null); 1425 } 1426 else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) { 1427 if (parser.compoundFilter != null) 1432 t = getInstance(parser.compoundFilter.toPattern(false) + ";" 1433 + (String )parser.idBlockVector.get(0)); 1434 else 1435 t = getInstance((String )parser.idBlockVector.get(0)); 1436 1437 1438 if (t != null) { 1439 t.setID(ID); 1440 } 1441 } 1442 else { 1443 Vector transliterators = new Vector (); 1444 int passNumber = 1; 1445 1446 int limit = Math.max(parser.idBlockVector.size(), parser.dataVector.size()); 1447 for (int i = 0; i < limit; i++) { 1448 if (i < parser.idBlockVector.size()) { 1449 String idBlock = (String )parser.idBlockVector.get(i); 1450 if (idBlock.length() > 0) { 1451 Transliterator temp = getInstance(idBlock); 1452 if (!(temp instanceof NullTransliterator)) 1453 transliterators.add(getInstance(idBlock)); 1454 } 1455 } 1456 if (i < parser.dataVector.size()) { 1457 RuleBasedTransliterator.Data data = (RuleBasedTransliterator.Data)parser.dataVector.get(i); 1458 transliterators.add(new RuleBasedTransliterator("%Pass" + passNumber++, data, null)); 1459 } 1460 } 1461 1462 t = new CompoundTransliterator(transliterators, passNumber - 1); 1463 t.setID(ID); 1464 if (parser.compoundFilter != null) { 1465 t.setFilter(parser.compoundFilter); 1466 } 1467 } 1468 1469 return t; 1470 } 1471 1472 1479 public String toRules(boolean escapeUnprintable) { 1480 return baseToRules(escapeUnprintable); 1481 } 1482 1483 1493 protected final String baseToRules(boolean escapeUnprintable) { 1494 if (escapeUnprintable) { 1498 StringBuffer rulesSource = new StringBuffer (); 1499 String id = getID(); 1500 for (int i=0; i<id.length();) { 1501 int c = UTF16.charAt(id, i); 1502 if (!Utility.escapeUnprintable(rulesSource, c)) { 1503 UTF16.append(rulesSource, c); 1504 } 1505 i += UTF16.getCharCount(c); 1506 } 1507 rulesSource.insert(0, "::"); 1508 rulesSource.append(ID_DELIM); 1509 return rulesSource.toString(); 1510 } 1511 return "::" + getID() + ID_DELIM; 1512 } 1513 1514 1528 public Transliterator[] getElements() { 1529 Transliterator result[]; 1530 if (this instanceof CompoundTransliterator) { 1531 CompoundTransliterator cpd = (CompoundTransliterator) this; 1532 result = new Transliterator[cpd.getCount()]; 1533 for (int i=0; i<result.length; ++i) { 1534 result[i] = cpd.getTransliterator(i); 1535 } 1536 } else { 1537 result = new Transliterator[] { this }; 1538 } 1539 return result; 1540 } 1541 1542 1555 public final UnicodeSet getSourceSet() { 1556 UnicodeSet set = handleGetSourceSet(); 1557 if (filter != null) { 1558 UnicodeSet filterSet; 1559 try { 1562 filterSet = (UnicodeSet) filter; 1563 } catch (ClassCastException e) { 1564 filterSet = new UnicodeSet(); 1565 filter.addMatchSetTo(filterSet); 1566 } 1567 set.retainAll(filterSet); 1568 } 1569 return set; 1570 } 1571 1572 1585 protected UnicodeSet handleGetSourceSet() { 1586 return new UnicodeSet(); 1587 } 1588 1589 1600 public UnicodeSet getTargetSet() { 1601 return new UnicodeSet(); 1602 } 1603 1604 1622 public final Transliterator getInverse() { 1623 return getInstance(ID, REVERSE); 1624 } 1625 1626 1639 public static void registerClass(String ID, Class transClass, String displayName) { 1640 registry.put(ID, transClass, true); 1641 if (displayName != null) { 1642 displayNameCache.put(new CaseInsensitiveString(ID), displayName); 1643 } 1644 } 1645 1646 1653 public static void registerFactory(String ID, Factory factory) { 1654 registry.put(ID, factory, true); 1655 } 1656 1657 1662 public static void registerInstance(Transliterator trans) { 1663 registry.put(trans.getID(), trans, true); 1664 } 1665 1666 1672 static void registerInstance(Transliterator trans, boolean visible) { 1673 registry.put(trans.getID(), trans, visible); 1674 } 1675 1676 1685 public static void registerAlias(String aliasID, String realID) { 1686 registry.put(aliasID, realID, true); 1687 } 1688 1689 1722 static void registerSpecialInverse(String target, 1723 String inverseTarget, 1724 boolean bidirectional) { 1725 TransliteratorIDParser.registerSpecialInverse(target, inverseTarget, bidirectional); 1726 } 1727 1728 1736 public static void unregister(String ID) { 1737 displayNameCache.remove(new CaseInsensitiveString(ID)); 1738 registry.remove(ID); 1739 } 1740 1741 1753 public static final Enumeration getAvailableIDs() { 1754 return registry.getAvailableIDs(); 1755 } 1756 1757 1764 public static final Enumeration getAvailableSources() { 1765 return registry.getAvailableSources(); 1766 } 1767 1768 1775 public static final Enumeration getAvailableTargets(String source) { 1776 return registry.getAvailableTargets(source); 1777 } 1778 1779 1784 public static final Enumeration getAvailableVariants(String source, 1785 String target) { 1786 return registry.getAvailableVariants(source, target); 1787 } 1788 private static final String INDEX = "index", 1789 RB_RULE_BASED_IDS ="RuleBasedTransliteratorIDs"; 1790 static { 1791 registry = new TransliteratorRegistry(); 1792 1793 displayNameCache = new Hashtable (); 1795 1829 ICUResourceBundle bundle, transIDs, colBund; 1830 bundle = (ICUResourceBundle)UResourceBundle.getBundleInstance(ICUResourceBundle.ICU_TRANSLIT_BASE_NAME, INDEX); 1831 transIDs = bundle.get(RB_RULE_BASED_IDS); 1832 1833 int row, maxRows; 1834 maxRows = transIDs.getSize(); 1835 for (row = 0; row < maxRows; row++) { 1836 colBund = transIDs.get(row); 1837 String ID = colBund.getKey(); 1838 ICUResourceBundle res = colBund.get(0); 1839 String type = res.getKey(); 1840 if (type.equals("file") || type.equals("internal")) { 1841 String resString = res.getString("resource"); 1844 int dir; 1845 String direction = res.getString("direction"); 1846 switch (direction.charAt(0)) { 1847 case 'F': 1848 dir = FORWARD; 1849 break; 1850 case 'R': 1851 dir = REVERSE; 1852 break; 1853 default: 1854 throw new RuntimeException ("Can't parse direction: " + direction); 1855 } 1856 registry.put(ID, 1857 resString, "UTF-16", dir, 1860 !type.equals("internal")); 1861 } else if (type.equals("alias")) { 1862 String resString = res.getString(); 1864 registry.put(ID, resString, true); 1865 } else { 1866 throw new RuntimeException ("Unknow type: " + type); 1868 } 1869 } 1870 1871 registerSpecialInverse(NullTransliterator.SHORT_ID, NullTransliterator.SHORT_ID, false); 1872 1873 registerClass(NullTransliterator._ID, 1875 NullTransliterator.class, null); 1876 RemoveTransliterator.register(); 1877 EscapeTransliterator.register(); 1878 UnescapeTransliterator.register(); 1879 LowercaseTransliterator.register(); 1880 UppercaseTransliterator.register(); 1881 TitlecaseTransliterator.register(); 1882 UnicodeNameTransliterator.register(); 1883 NameUnicodeTransliterator.register(); 1884 NormalizationTransliterator.register(); 1885 BreakTransliterator.register(); 1886 AnyTransliterator.register(); } 1888 1889 1899 public static interface Factory { 1900 1904 Transliterator getInstance(String ID); 1905 } 1906} 1907 | Popular Tags |