KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > text > UnescapeTransliterator


1 /*
2 **********************************************************************
3 * Copyright (c) 2001-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/19/2001 aliu Creation.
8 **********************************************************************
9 */

10 package com.ibm.icu.text;
11 import com.ibm.icu.lang.*;
12
13 /**
14  * A transliterator that converts Unicode escape forms to the
15  * characters they represent. Escape forms have a prefix, a suffix, a
16  * radix, and minimum and maximum digit counts.
17  *
18  * <p>This class is package private. It registers several standard
19  * variants with the system which are then accessed via their IDs.
20  *
21  * @author Alan Liu
22  */

23 class UnescapeTransliterator extends Transliterator {
24
25     /**
26      * The encoded pattern specification. The pattern consists of
27      * zero or more forms. Each form consists of a prefix, suffix,
28      * radix, minimum digit count, and maximum digit count. These
29      * values are stored as a five character header. That is, their
30      * numeric values are cast to 16-bit characters and stored in the
31      * string. Following these five characters, the prefix
32      * characters, then suffix characters are stored. Each form thus
33      * takes n+5 characters, where n is the total length of the prefix
34      * and suffix. The end is marked by a header of length one
35      * consisting of the character END.
36      */

37     private char spec[];
38
39     /**
40      * Special character marking the end of the spec[] array.
41      */

42     private static final char END = 0xFFFF;
43
44     /**
45      * Registers standard variants with the system. Called by
46      * Transliterator during initialization.
47      */

48     static void register() {
49         // Unicode: "U+10FFFF" hex, min=4, max=6
50
Transliterator.registerFactory("Hex-Any/Unicode", new Transliterator.Factory() {
51             public Transliterator getInstance(String JavaDoc ID) {
52                 return new UnescapeTransliterator("Hex-Any/Unicode", new char[] {
53                     2, 0, 16, 4, 6, 'U', '+',
54                     END
55                 });
56             }
57         });
58         
59         // Java: "\\uFFFF" hex, min=4, max=4
60
Transliterator.registerFactory("Hex-Any/Java", new Transliterator.Factory() {
61             public Transliterator getInstance(String JavaDoc ID) {
62                 return new UnescapeTransliterator("Hex-Any/Java", new char[] {
63                     2, 0, 16, 4, 4, '\\', 'u',
64                     END
65                 });
66             }
67         });
68         
69         // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
70
Transliterator.registerFactory("Hex-Any/C", new Transliterator.Factory() {
71             public Transliterator getInstance(String JavaDoc ID) {
72                 return new UnescapeTransliterator("Hex-Any/C", new char[] {
73                     2, 0, 16, 4, 4, '\\', 'u',
74                     2, 0, 16, 8, 8, '\\', 'U',
75                     END
76                 });
77             }
78         });
79         
80         // XML: "&#x10FFFF;" hex, min=1, max=6
81
Transliterator.registerFactory("Hex-Any/XML", new Transliterator.Factory() {
82             public Transliterator getInstance(String JavaDoc ID) {
83                 return new UnescapeTransliterator("Hex-Any/XML", new char[] {
84                     3, 1, 16, 1, 6, '&', '#', 'x', ';',
85                     END
86                 });
87             }
88         });
89
90         // XML10: "&1114111;" dec, min=1, max=7 (not really "Hex-Any")
91
Transliterator.registerFactory("Hex-Any/XML10", new Transliterator.Factory() {
92             public Transliterator getInstance(String JavaDoc ID) {
93                 return new UnescapeTransliterator("Hex-Any/XML10", new char[] {
94                     2, 1, 10, 1, 7, '&', '#', ';',
95                     END
96                 });
97             }
98         });
99
100         // Perl: "\\x{263A}" hex, min=1, max=6
101
Transliterator.registerFactory("Hex-Any/Perl", new Transliterator.Factory() {
102             public Transliterator getInstance(String JavaDoc ID) {
103                 return new UnescapeTransliterator("Hex-Any/Perl", new char[] {
104                     3, 1, 16, 1, 6, '\\', 'x', '{', '}',
105                     END
106                 });
107             }
108         });
109
110         // All: Java, C, Perl, XML, XML10, Unicode
111
Transliterator.registerFactory("Hex-Any", new Transliterator.Factory() {
112             public Transliterator getInstance(String JavaDoc ID) {
113                 return new UnescapeTransliterator("Hex-Any", new char[] {
114                     2, 0, 16, 4, 6, 'U', '+', // Unicode
115
2, 0, 16, 4, 4, '\\', 'u', // Java
116
2, 0, 16, 8, 8, '\\', 'U', // C (surrogates)
117
3, 1, 16, 1, 6, '&', '#', 'x', ';', // XML
118
2, 1, 10, 1, 7, '&', '#', ';', // XML10
119
3, 1, 16, 1, 6, '\\', 'x', '{', '}', // Perl
120
END
121                 });
122             }
123         });
124     }
125
126     /**
127      * Package private constructor. Takes the encoded spec array.
128      */

129     UnescapeTransliterator(String JavaDoc ID, char spec[]) {
130         super(ID, null);
131         this.spec = spec;
132     }
133
134     /**
135      * Implements {@link Transliterator#handleTransliterate}.
136      */

137     protected void handleTransliterate(Replaceable text,
138                                        Position pos, boolean isIncremental) {
139         int start = pos.start;
140         int limit = pos.limit;
141         int i, j, ipat;
142
143       loop:
144         while (start < limit) {
145             // Loop over the forms in spec[]. Exit this loop when we
146
// match one of the specs. Exit the outer loop if a
147
// partial match is detected and isIncremental is true.
148
for (j=0, ipat=0; spec[ipat] != END; ++j) {
149
150                 // Read the header
151
int prefixLen = spec[ipat++];
152                 int suffixLen = spec[ipat++];
153                 int radix = spec[ipat++];
154                 int minDigits = spec[ipat++];
155                 int maxDigits = spec[ipat++];
156
157                 // s is a copy of start that is advanced over the
158
// characters as we parse them.
159
int s = start;
160                 boolean match = true;
161
162                 for (i=0; i<prefixLen; ++i) {
163                     if (s >= limit) {
164                         if (i > 0) {
165                             // We've already matched a character. This is
166
// a partial match, so we return if in
167
// incremental mode. In non-incremental mode,
168
// go to the next spec.
169
if (isIncremental) {
170                                 break loop;
171                             }
172                             match = false;
173                             break;
174                         }
175                     }
176                     char c = text.charAt(s++);
177                     if (c != spec[ipat + i]) {
178                         match = false;
179                         break;
180                     }
181                 }
182
183                 if (match) {
184                     int u = 0;
185                     int digitCount = 0;
186                     for (;;) {
187                         if (s >= limit) {
188                             // Check for partial match in incremental mode.
189
if (s > start && isIncremental) {
190                                 break loop;
191                             }
192                             break;
193                         }
194                         int ch = text.char32At(s);
195                         int digit = UCharacter.digit(ch, radix);
196                         if (digit < 0) {
197                             break;
198                         }
199                         s += UTF16.getCharCount(ch);
200                         u = (u * radix) + digit;
201                         if (++digitCount == maxDigits) {
202                             break;
203                         }
204                     }
205
206                     match = (digitCount >= minDigits);
207
208                     if (match) {
209                         for (i=0; i<suffixLen; ++i) {
210                             if (s >= limit) {
211                                 // Check for partial match in incremental mode.
212
if (s > start && isIncremental) {
213                                     break loop;
214                                 }
215                                 match = false;
216                                 break;
217                             }
218                             char c = text.charAt(s++);
219                             if (c != spec[ipat + prefixLen + i]) {
220                                 match = false;
221                                 break;
222                             }
223                         }
224
225                         if (match) {
226                             // At this point, we have a match
227
String JavaDoc str = UTF16.valueOf(u);
228                             text.replace(start, s, str);
229                             limit -= s - start - str.length();
230                             // The following break statement leaves the
231
// loop that is traversing the forms in
232
// spec[]. We then parse the next input
233
// character.
234
break;
235                         }
236                     }
237                 }
238
239                 ipat += prefixLen + suffixLen;
240             }
241
242             if (start < limit) {
243                 start += UTF16.getCharCount(text.char32At(start));
244             }
245         }
246
247         pos.contextLimit += limit - pos.limit;
248         pos.limit = limit;
249         pos.start = start;
250     }
251 }
252
Popular Tags