KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > xquark > xpath > datamodel > xerces > utils > XMLCharacterProperties


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.xquark.xpath.datamodel.xerces.utils;
59
60 /**
61  * A class representing properties of characters according to various
62  * W3C recommendations
63  *
64  * XMLCharacterProperties provides convenience methods for commonly used
65  * character tests.
66  *
67  * For performance reasons, the tables used by the convenience methods are
68  * also public, and are directly accessed by performance critical routines.
69  *
70  */

71
72 public final class XMLCharacterProperties {
73     /*
74      * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
75      *
76      * Note: This is the same as the ascii portion of the
77      * NameChar definition.
78      */

79     /**
80      * Check to see if a string is a valid version string according to
81      * [26] in the XML 1.0 Recommendation
82      *
83      * @param version string to check
84      * @return true if version is a valid version string
85      */

86     public static boolean validVersionNum(String JavaDoc version) {
87         int len = version.length();
88         if (len == 0)
89             return false;
90         for (int i = 0; i < len; i++) {
91             char ch = version.charAt(i);
92             if (ch > 'z' || fgAsciiNameChar[ch] == 0)
93                 return false;
94         }
95         return true;
96     }
97     /*
98      * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
99      */

100     /**
101      * Check to see if a string is a valid encoding name according to [81]
102      * in the XML 1.0 Recommendation
103      *
104      * @param encoding string to check
105      * @return true if encoding is a valid encoding name
106      */

107     public static boolean validEncName(String JavaDoc encoding) {
108         int len = encoding.length();
109         if (len == 0)
110             return false;
111         char ch = encoding.charAt(0);
112         if (ch > 'z' || fgAsciiAlphaChar[ch] == 0)
113             return false;
114         for (int i = 1; i < len; i++) {
115             ch = encoding.charAt(i);
116             if (ch > 'z' || fgAsciiEncNameChar[ch] == 0)
117                 return false;
118         }
119         return true;
120     }
121     /*
122      * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
123      */

124     /**
125      * Check to see if a string is a valid public identifier according to [13]
126      * in the XML 1.0 Recommendation
127      *
128      * @param publicId string to check
129      * @return true if publicId is a valid public identifier
130      */

131     public static int validPublicId(String JavaDoc publicId) {
132         int len = publicId.length();
133         if (len == 0)
134             return -1;
135         for (int i = 0; i < len; i++) {
136             char ch = publicId.charAt(i);
137             if (ch > 'z' || fgAsciiPubidChar[ch] == 0)
138                 return i;
139         }
140         return -1;
141     }
142     /*
143      * [5] Name ::= (Letter | '_' | ':') (NameChar)*
144      */

145     /**
146      * Check to see if a string is a valid Name according to [5]
147      * in the XML 1.0 Recommendation
148      *
149      * @param name string to check
150      * @return true if name is a valid Name
151      */

152     public static boolean validName(String JavaDoc name) {
153         int len = name.length();
154         if (len == 0)
155             return false;
156         char ch = name.charAt(0);
157         if (ch > 'z') {
158             if ((fgCharFlags[ch] & E_InitialNameCharFlag) == 0)
159                 return false;
160         } else if (fgAsciiInitialNameChar[ch] == 0)
161             return false;
162         for (int i = 1; i < len; i++) {
163             ch = name.charAt(i);
164             if (ch > 'z') {
165                 if ((fgCharFlags[ch] & E_NameCharFlag) == 0)
166                     return false;
167             } else if (fgAsciiNameChar[ch] == 0)
168                 return false;
169         }
170         return true;
171     }
172
173     /*
174      * from the namespace rec
175      * [5] NCName ::= (Letter | '_' | ':') (NameNCChar)*
176      */

177     /**
178      * Check to see if a string is a valid NCName according to [5]
179      * from the XML Namespaces 1.0 Recommendation
180      *
181      * @param name string to check
182      * @return true if name is a valid NCName
183      */

184     public static boolean validNCName(String JavaDoc name) {
185         int len = name.length();
186         if (len == 0)
187             return false;
188         char ch = name.charAt(0);
189         if (ch > 'z') {
190             if ((fgCharFlags[ch] & E_InitialNameCharFlag) == 0)
191                 return false;
192         } else if (fgAsciiInitialNCNameChar[ch] == 0)
193             return false;
194         for (int i = 1; i < len; i++) {
195             ch = name.charAt(i);
196             if (ch > 'z') {
197                 if ((fgCharFlags[ch] & E_NameCharFlag) == 0)
198                     return false;
199             } else if (fgAsciiNCNameChar[ch] == 0)
200                 return false;
201         }
202         return true;
203     }
204
205
206     /*
207      * [7] Nmtoken ::= (NameChar)+
208      */

209     /**
210      * Check to see if a string is a valid Nmtoken according to [7]
211      * in the XML 1.0 Recommendation
212      *
213      * @param nmtoken string to checj
214      * @return true if nmtoken is a valid Nmtoken
215      */

216     public static boolean validNmtoken(String JavaDoc nmtoken) {
217         int len = nmtoken.length();
218         if (len == 0)
219             return false;
220         for (int i = 0; i < len; i++) {
221             char ch = nmtoken.charAt(i);
222             if (ch > 'z') {
223                 if ((fgCharFlags[ch] & E_NameCharFlag) == 0)
224                     return false;
225             } else if (fgAsciiNameChar[ch] == 0) {
226                 return false;
227             }
228         }
229         return true;
230     }
231     /*
232      * Here are tables used to build character properties.
233      */

234     public static final byte fgAsciiXDigitChar[] = {
235         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
236         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
237         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
238         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // '0' - '9'
239
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A' - 'F'
240
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
241         0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a' - 'f'
242
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
243     };
244     public static final byte fgAsciiAlphaChar[] = {
245         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
246         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
247         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
248         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
249         0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
250
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 'P' - 'Z'
251
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
252
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
253
};
254     public static final byte fgAsciiEncNameChar[] = {
255         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
256         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
257         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, // '-' is 0x2D and '.' is 0x2E
258
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // '0' - '9'
259
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
260
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
261
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
262
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
263
};
264     public static final byte fgAsciiPubidChar[] = {
265         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
266         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267         1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, // ' ', '!', '#', '$', '%',
268
// '\'', '(', ')', '*', '+', ',', '-', '.', '/'
269
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, // '0' - '9', ':', ';', '=', '?'
270
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '@', 'A' - 'O'
271
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
272
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
273
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
274
};
275     public static final byte fgAsciiInitialNameChar[] = {
276         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
277         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
279         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, // ':' is 0x3A
280
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
281
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
282
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
283
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
284
};
285     public static final byte fgAsciiNameChar[] = {
286         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
287         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
288         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, // '-' is 0x2D and '.' is 0x2E
289
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // '0' - '9' and ':' is 0x3A
290
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
291
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
292
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
293
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
294
};
295     public static final byte fgAsciiInitialNCNameChar[] = {
296         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
297         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
298         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ':' is 0x3A
300
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
301
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
302
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
303
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
304
};
305     public static final byte fgAsciiNCNameChar[] = {
306         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
308         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, // '-' is 0x2D and '.' is 0x2E
309
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // '0' - '9' and ':' is 0x3A
310
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A' - 'O'
311
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 'P' - 'Z' and '_' is 0x5F
312
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a' - 'o'
313
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 'p' - 'z'
314
};
315     public static final byte fgAsciiCharData[] = {
316         4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, // tab is 0x09
317
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
318         0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, // '&' is 0x26
319
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '<' is 0x3C
320
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, // ']' is 0x5D
322
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
324     };
325     public static final byte fgAsciiWSCharData[] = {
326         4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 4, 4, 5, 4, 4, // tab is 0x09, LF is 0x0A, CR is 0x0D
327
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
328         5, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ' ' is 0x20, '&' is 0x26
329
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // '<' is 0x3C
330
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
331         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, // ']' is 0x5D
332
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
334     };
335     public static final byte E_CharDataFlag = 1<<0;
336     public static final byte E_InitialNameCharFlag = 1<<1;
337     public static final byte E_NameCharFlag = 1<<2;
338     public static byte[] fgCharFlags = null;
339     public static synchronized void initCharFlags() {
340         if (fgCharFlags == null) {
341             fgCharFlags = new byte[0x10000];
342             setFlagForRange(fgCharDataRanges, E_CharDataFlag);
343             setFlagForRange(fgInitialNameCharRanges, (byte)(E_InitialNameCharFlag | E_NameCharFlag));
344             setFlagForRange(fgNameCharRanges, E_NameCharFlag);
345         }
346     }
347     private static void setFlagForRange(char[] ranges, byte flag)
348     {
349         int i;
350         int ch;
351         for (i = 0; (ch = ranges[i]) != 0; i += 2) {
352             int endch = ranges[i+1];
353             while (ch <= endch)
354                 fgCharFlags[ch++] |= flag;
355         }
356         for (i++; (ch = ranges[i]) != 0; i++)
357             fgCharFlags[ch] |= flag;
358     }
359     /*
360      * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] // any Unicode character, excluding the
361      * | [#xE000-#xFFFD] | [#x10000-#x10FFFF] // surrogate blocks, FFFE, and FFFF.
362      * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
363      *
364      * We will use Char - ( [^<&] | ']' | #xA | #xD ) and handle the special cases inline.
365      */

366     private static final char fgCharDataRanges[] = {
367         0x0020, 0x0025, // '&' is 0x0026
368
0x0027, 0x003B, // '<' is 0x003C
369
0x003D, 0x005C, // ']' is 0x005D
370
0x005E, 0xD7FF,
371         0xE000, 0xFFFD,
372             0x0000,
373         0x0009, // tab
374
0x0000
375     };
376     /*
377      * [5] Name ::= (Letter | '_' | ':') (NameChar)*
378      * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender
379      * [84] Letter ::= BaseChar | Ideographic
380      * [85] BaseChar ::= <see standard>
381      * [86] Ideographic ::= <see standard>
382      * [87] CombiningChar ::= <see standard>
383      * [88] Digit ::= <see standard>
384      * [89] Extender ::= <see standard>
385      */

386     private static final char fgInitialNameCharRanges[] = {
387         //
388
// Ranges:
389
//
390
// BaseChar ranges
391
//
392
0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
393         0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
394         0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
395         0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
396         0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
397         0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
398         0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
399         0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
400         0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
401         0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
402         0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
403         0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
404         0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
405         0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
406         0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
407         0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
408         0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
409         0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
410         0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
411         0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
412         0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
413         0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
414         0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
415         0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
416         0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
417         0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
418         0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
419         0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
420         0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
421         0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
422         0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
423         0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
424         0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
425         0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
426         0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
427         0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
428         0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
429         0xAC00, 0xD7A3,
430         //
431
// Ideographic ranges
432
//
433
0x3021, 0x3029, 0x4E00, 0x9FA5,
434         //
435
// Ranges end marker
436
//
437
0x0000,
438         //
439
// Single char values
440
//
441
0x003A, // ':'
442
0x005F, // '_'
443
//
444
// BaseChar singles
445
//
446
0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
447         0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
448         0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
449         0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
450         0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
451         0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
452         0x1F5D, 0x1FBE, 0x2126, 0x212E,
453         //
454
// Ideographic singles
455
//
456
0x3007,
457         //
458
// Singles end marker
459
//
460
0x0000
461     };
462     private static final char fgNameCharRanges[] = {
463         //
464
// Ranges:
465
//
466
0x002D, 0x002E, // '-' and '.'
467
//
468
// CombiningChar ranges
469
//
470
0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
471         0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
472         0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
473         0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
474         0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
475         0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
476         0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
477         0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
478         0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
479         0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
480         0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
481         0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
482         0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
483         0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
484         0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
485         0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
486         0x20D0, 0x20DC, 0x302A, 0x302F,
487         //
488
// Digit ranges
489
//
490
0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
491         0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
492         0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
493         0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
494         //
495
// Extender ranges
496
//
497
0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
498         //
499
// Ranges end marker
500
//
501
0x0000,
502         //
503
// Single char values
504
//
505
// CombiningChar singles
506
//
507
0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
508         0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
509         0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
510         0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
511         //
512
// Extender singles
513
//
514
0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
515         //
516
// Singles end marker
517
//
518
0x0000
519     };
520 }
521
Popular Tags