KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > text > AnyTransliterator


1 /*
2 *****************************************************************
3 * Copyright (c) 2002-2006, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *****************************************************************
6 * Date Name Description
7 * 06/06/2002 aliu Creation.
8 *****************************************************************
9 */

10 package com.ibm.icu.text;
11 import com.ibm.icu.lang.UScript;
12 import java.lang.Math JavaDoc;
13 import java.util.Enumeration JavaDoc;
14 import java.util.HashSet JavaDoc;
15 import java.util.HashMap JavaDoc;
16 import java.util.Map JavaDoc;
17 import java.util.MissingResourceException JavaDoc;
18 /**
19  * A transliterator that translates multiple input scripts to a single
20  * output script. It is named Any-T or Any-T/V, where T is the target
21  * and V is the optional variant. The target T is a script.
22  *
23  * <p>An AnyTransliterator partitions text into runs of the same
24  * script, together with adjacent COMMON or INHERITED characters.
25  * After determining the script of each run, it transliterates from
26  * that script to the given target/variant. It does so by
27  * instantiating a transliterator from the source script to the
28  * target/variant. If a run consists only of the target script,
29  * COMMON, or INHERITED characters, then the run is not changed.
30  *
31  * <p>At startup, all possible AnyTransliterators are registered with
32  * the system, as determined by examining the registered script
33  * transliterators.
34  *
35  * @since ICU 2.2
36  * @author Alan Liu
37  */

38 class AnyTransliterator extends Transliterator {
39
40     //------------------------------------------------------------
41
// Constants
42

43     static final char TARGET_SEP = '-';
44     static final char VARIANT_SEP = '/';
45     static final String JavaDoc ANY = "Any";
46     static final String JavaDoc NULL_ID = "Null";
47     static final String JavaDoc LATIN_PIVOT = "-Latin;Latin-";
48
49     /**
50      * Cache mapping UScriptCode values to Transliterator*.
51      */

52     private Map JavaDoc cache;
53
54     /**
55      * The target or target/variant string.
56      */

57     private String JavaDoc target;
58
59     /**
60      * The target script code. Never USCRIPT_INVALID_CODE.
61      */

62     private int targetScript;
63
64     /**
65      * Implements {@link Transliterator#handleTransliterate}.
66      */

67     protected void handleTransliterate(Replaceable text,
68                                        Position pos, boolean isIncremental) {
69         int allStart = pos.start;
70         int allLimit = pos.limit;
71
72         ScriptRunIterator it =
73             new ScriptRunIterator(text, pos.contextStart, pos.contextLimit);
74
75         while (it.next()) {
76             // Ignore runs in the ante context
77
if (it.limit <= allStart) continue;
78
79             // Try to instantiate transliterator from it.scriptCode to
80
// our target or target/variant
81
Transliterator t = getTransliterator(it.scriptCode);
82
83             if (t == null) {
84                 // We have no transliterator. Do nothing, but keep
85
// pos.start up to date.
86
pos.start = it.limit;
87                 continue;
88             }
89
90             // If the run end is before the transliteration limit, do
91
// a non-incremental transliteration. Otherwise do an
92
// incremental one.
93
boolean incremental = isIncremental && (it.limit >= allLimit);
94
95             pos.start = Math.max(allStart, it.start);
96             pos.limit = Math.min(allLimit, it.limit);
97             int limit = pos.limit;
98             t.filteredTransliterate(text, pos, incremental);
99             int delta = pos.limit - limit;
100             allLimit += delta;
101             it.adjustLimit(delta);
102
103             // We're done if we enter the post context
104
if (it.limit >= allLimit) break;
105         }
106
107         // Restore limit. pos.start is fine where the last transliterator
108
// left it, or at the end of the last run.
109
pos.limit = allLimit;
110     }
111
112     /**
113      * Private constructor
114      * @param id the ID of the form S-T or S-T/V, where T is theTarget
115      * and V is theVariant. Must not be empty.
116      * @param theTarget the target name. Must not be empty, and must
117      * name a script corresponding to theTargetScript.
118      * @param theVariant the variant name, or the empty string if
119      * there is no variant
120      * @param theTargetScript the script code corresponding to
121      * theTarget.
122      */

123     private AnyTransliterator(String JavaDoc id,
124                               String JavaDoc theTarget,
125                               String JavaDoc theVariant,
126                               int theTargetScript) {
127         super(id, null);
128         targetScript = theTargetScript;
129         cache = new HashMap JavaDoc();
130
131         target = theTarget;
132         if (theVariant.length() > 0) {
133             target = theTarget + VARIANT_SEP + theVariant;
134         }
135     }
136
137     /**
138      * Returns a transliterator from the given source to our target or
139      * target/variant. Returns NULL if the source is the same as our
140      * target script, or if the source is USCRIPT_INVALID_CODE.
141      * Caches the result and returns the same transliterator the next
142      * time. The caller does NOT own the result and must not delete
143      * it.
144      */

145     private Transliterator getTransliterator(int source) {
146         if (source == targetScript || source == UScript.INVALID_CODE) {
147             return null;
148         }
149
150         Integer JavaDoc key = new Integer JavaDoc(source);
151         Transliterator t = (Transliterator) cache.get(key);
152         if (t == null) {
153             String JavaDoc sourceName = UScript.getName(source);
154             String JavaDoc id = sourceName + TARGET_SEP + target;
155
156             try {
157                 t = Transliterator.getInstance(id, FORWARD);
158             } catch (RuntimeException JavaDoc e) { }
159             if (t == null) {
160
161                 // Try to pivot around Latin, our most common script
162
id = sourceName + LATIN_PIVOT + target;
163                 try {
164                     t = Transliterator.getInstance(id, FORWARD);
165                 } catch (RuntimeException JavaDoc e) { }
166             }
167
168             if (t != null) {
169                 cache.put(key, t);
170             }
171         }
172
173         return t;
174     }
175
176     /**
177      * Registers standard transliterators with the system. Called by
178      * Transliterator during initialization. Scan all current targets
179      * and register those that are scripts T as Any-T/V.
180      */

181     static void register() {
182
183         HashSet JavaDoc seen = new HashSet JavaDoc();
184
185         for (Enumeration JavaDoc s=Transliterator.getAvailableSources(); s.hasMoreElements(); ) {
186             String JavaDoc source = (String JavaDoc) s.nextElement();
187
188             // Ignore the "Any" source
189
if (source.equalsIgnoreCase(ANY)) continue;
190
191             for (Enumeration JavaDoc t=Transliterator.getAvailableTargets(source);
192                  t.hasMoreElements(); ) {
193                 String JavaDoc target = (String JavaDoc) t.nextElement();
194
195                 // Only process each target once
196
if (seen.contains(target)) continue;
197                 seen.add(target);
198
199                 // Get the script code for the target. If not a script, ignore.
200
int targetScript = scriptNameToCode(target);
201                 if (targetScript == UScript.INVALID_CODE) continue;
202
203                 for (Enumeration JavaDoc v=Transliterator.getAvailableVariants(source, target);
204                      v.hasMoreElements(); ) {
205                     String JavaDoc variant = (String JavaDoc) v.nextElement();
206
207                     String JavaDoc id;
208                     id = TransliteratorIDParser.STVtoID(ANY, target, variant);
209                     AnyTransliterator trans = new AnyTransliterator(id, target, variant,
210                                                                     targetScript);
211                     Transliterator.registerInstance(trans);
212                     Transliterator.registerSpecialInverse(target, NULL_ID, false);
213                 }
214             }
215         }
216     }
217
218     /**
219      * Return the script code for a given name, or
220      * UScript.INVALID_CODE if not found.
221      */

222     private static int scriptNameToCode(String JavaDoc name) {
223         try{
224             int[] codes = UScript.getCode(name);
225             return codes != null ? codes[0] : UScript.INVALID_CODE;
226         }catch( MissingResourceException JavaDoc e){
227             return UScript.INVALID_CODE;
228         }
229     }
230
231     //------------------------------------------------------------
232
// ScriptRunIterator
233

234     /**
235      * Returns a series of ranges corresponding to scripts. They will be
236      * of the form:
237      *
238      * ccccSScSSccccTTcTcccc - c = common, S = first script, T = second
239      * | | - first run (start, limit)
240      * | | - second run (start, limit)
241      *
242      * That is, the runs will overlap. The reason for this is so that a
243      * transliterator can consider common characters both before and after
244      * the scripts.
245      */

246     private static class ScriptRunIterator {
247
248         private Replaceable text;
249         private int textStart;
250         private int textLimit;
251
252         /**
253          * The code of the current run, valid after next() returns. May
254          * be UScript.INVALID_CODE if and only if the entire text is
255          * COMMON/INHERITED.
256          */

257         public int scriptCode;
258
259         /**
260          * The start of the run, inclusive, valid after next() returns.
261          */

262         public int start;
263
264         /**
265          * The end of the run, exclusive, valid after next() returns.
266          */

267         public int limit;
268
269         /**
270          * Constructs a run iterator over the given text from start
271          * (inclusive) to limit (exclusive).
272          */

273         public ScriptRunIterator(Replaceable text, int start, int limit) {
274             this.text = text;
275             this.textStart = start;
276             this.textLimit = limit;
277             this.limit = start;
278         }
279
280
281         /**
282          * Returns TRUE if there are any more runs. TRUE is always
283          * returned at least once. Upon return, the caller should
284          * examine scriptCode, start, and limit.
285          */

286         public boolean next() {
287             int ch;
288             int s;
289
290             scriptCode = UScript.INVALID_CODE; // don't know script yet
291
start = limit;
292
293             // Are we done?
294
if (start == textLimit) {
295                 return false;
296             }
297
298             // Move start back to include adjacent COMMON or INHERITED
299
// characters
300
while (start > textStart) {
301                 ch = text.char32At(start - 1); // look back
302
s = UScript.getScript(ch);
303                 if (s == UScript.COMMON || s == UScript.INHERITED) {
304                     --start;
305                 } else {
306                     break;
307                 }
308             }
309
310             // Move limit ahead to include COMMON, INHERITED, and characters
311
// of the current script.
312
while (limit < textLimit) {
313                 ch = text.char32At(limit); // look ahead
314
s = UScript.getScript(ch);
315                 if (s != UScript.COMMON && s != UScript.INHERITED) {
316                     if (scriptCode == UScript.INVALID_CODE) {
317                         scriptCode = s;
318                     } else if (s != scriptCode) {
319                         break;
320                     }
321                 }
322                 ++limit;
323             }
324
325             // Return TRUE even if the entire text is COMMON / INHERITED, in
326
// which case scriptCode will be UScript.INVALID_CODE.
327
return true;
328         }
329
330         /**
331          * Adjusts internal indices for a change in the limit index of the
332          * given delta. A positive delta means the limit has increased.
333          */

334         public void adjustLimit(int delta) {
335             limit += delta;
336             textLimit += delta;
337         }
338     }
339 }
340
341 //eof
342
Popular Tags