KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > xml2 > XmlChar


1 /*
2  * Copyright (c) 1998-2006 Caucho Technology -- all rights reserved
3  *
4  * This file is part of Resin(R) Open Source
5  *
6  * Each copy or derived work must preserve the copyright notice and this
7  * notice unmodified.
8  *
9  * Resin Open Source is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * Resin Open Source is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
17  * of NON-INFRINGEMENT. See the GNU General Public License for more
18  * details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with Resin Open Source; if not, write to the
22  * Free SoftwareFoundation, Inc.
23  * 59 Temple Place, Suite 330
24  * Boston, MA 02111-1307 USA
25  *
26  * @author Scott Ferguson
27  */

28
29 package com.caucho.xml2;
30
31 /**
32  * XmlChar contains the XML character classes
33  */

34 public class XmlChar {
35   static boolean isAsciiNameChar[];
36   
37   private XmlChar() {}
38
39   public static boolean isWhitespace(int ch)
40   {
41     return ch <= 0x20 && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd);
42   }
43
44   public static boolean isChar(int ch)
45   {
46     return (ch >= 0x20 && ch <= 0xd7ff ||
47         ch == 0x9 ||
48         ch == 0xa ||
49         ch == 0xd ||
50         ch >= 0xe000 && ch <= 0xfff0);
51   }
52
53   public static boolean isNameStart(int ch)
54   {
55     return (ch >= 0x41 && ch <= 0x5a ||
56         ch >= 0x61 && ch <= 0x7a ||
57         ch == '_' || ch == ':' ||
58         ch > 0x7f && (isBaseChar(ch) ||
59               isIdeographic(ch)));
60   }
61
62   /**
63    * Returns a boolean array testing for ascii name characters.
64    */

65   public static boolean []getAsciiNameCharArray()
66   {
67     return isAsciiNameChar;
68   }
69   
70   /**
71    * Returns true if the character is an XML name character.
72    */

73   public static boolean isNameChar(int ch)
74   {
75     if (ch < 0x20)
76       return false;
77     else if (ch < 128)
78       return isAsciiNameChar[ch];
79     else
80       return (isBaseChar(ch) ||
81               isIdeographic(ch) ||
82               isCombiningChar(ch) ||
83               isExtender(ch) ||
84               isDigit(ch));
85   }
86
87   private static boolean isBaseChar(int ch)
88   {
89     return (ch <= 0xff && (ch >= 0x0041 && ch <= 0x005A ||
90                ch >= 0x0061 && ch <= 0x007A ||
91                ch >= 0x00C0 && ch <= 0x00D6 ||
92                ch >= 0x00D8 && ch <= 0x00F6 ||
93                ch >= 0x00F8 && ch <= 0x00FF) ||
94         ch <= 0x1f5 && (ch >= 0x0100 && ch <= 0x0131 ||
95                 ch >= 0x0134 && ch <= 0x013E ||
96                 ch >= 0x0141 && ch <= 0x0148 ||
97                 ch >= 0x014A && ch <= 0x017E ||
98                 ch >= 0x0180 && ch <= 0x01C3 ||
99                 ch >= 0x01CD && ch <= 0x01F0 ||
100                 ch >= 0x01F4 && ch <= 0x01F5) ||
101         ch <= 0x2ff && (ch >= 0x01FA && ch <= 0x0217 ||
102                 ch >= 0x0250 && ch <= 0x02A8 ||
103                 ch >= 0x02BB && ch <= 0x02C1) ||
104         ch <= 0x3ff && (ch == 0x0386 ||
105                 ch >= 0x0388 && ch <= 0x038A ||
106                 ch == 0x038C ||
107                 ch >= 0x038E && ch <= 0x03A1 ||
108                 ch >= 0x03A3 && ch <= 0x03CE ||
109                 ch >= 0x03D0 && ch <= 0x03D6 ||
110                 ch == 0x03DA ||
111                 ch == 0x03DC ||
112                 ch == 0x03DE ||
113                 ch == 0x03E0 ||
114                 ch >= 0x03E2 && ch <= 0x03F3) ||
115         ch <= 0x4ff && (ch >= 0x0401 && ch <= 0x040C ||
116                 ch >= 0x040E && ch <= 0x044F ||
117                 ch >= 0x0451 && ch <= 0x045C ||
118                 ch >= 0x045E && ch <= 0x0481 ||
119                 ch >= 0x0490 && ch <= 0x04C4 ||
120                 ch >= 0x04C7 && ch <= 0x04C8 ||
121                 ch >= 0x04CB && ch <= 0x04CC ||
122                 ch >= 0x04D0 && ch <= 0x04EB ||
123                 ch >= 0x04EE && ch <= 0x04F5 ||
124                 ch >= 0x04F8 && ch <= 0x04F9) ||
125         ch <= 0x5ff && (ch >= 0x0531 && ch <= 0x0556 ||
126                 ch == 0x0559 ||
127                 ch >= 0x0561 && ch <= 0x0586 ||
128                 ch >= 0x05D0 && ch <= 0x05EA ||
129                 ch >= 0x05F0 && ch <= 0x05F2) ||
130         ch <= 0x6ff && (ch >= 0x0621 && ch <= 0x063A ||
131                 ch >= 0x0641 && ch <= 0x064A ||
132                 ch >= 0x0671 && ch <= 0x06B7 ||
133                 ch >= 0x06BA && ch <= 0x06BE ||
134                 ch >= 0x06C0 && ch <= 0x06CE ||
135                 ch >= 0x06D0 && ch <= 0x06D3 ||
136                 ch == 0x06D5 ||
137                 ch >= 0x06E5 && ch <= 0x06E6) ||
138         ch <= 0x9ff && (ch >= 0x0905 && ch <= 0x0939 ||
139                 ch == 0x093D ||
140                 ch >= 0x0958 && ch <= 0x0961 ||
141                 ch >= 0x0985 && ch <= 0x098C ||
142                 ch >= 0x098F && ch <= 0x0990 ||
143                 ch >= 0x0993 && ch <= 0x09A8 ||
144                 ch >= 0x09AA && ch <= 0x09B0 ||
145                 ch == 0x09B2 ||
146                 ch >= 0x09B6 && ch <= 0x09B9 ||
147                 ch >= 0x09DC && ch <= 0x09DD ||
148                 ch >= 0x09DF && ch <= 0x09E1 ||
149                 ch >= 0x09F0 && ch <= 0x09F1) ||
150         ch <= 0xaff && (ch >= 0x0A05 && ch <= 0x0A0A ||
151                 ch >= 0x0A0F && ch <= 0x0A10 ||
152                 ch >= 0x0A13 && ch <= 0x0A28 ||
153                 ch >= 0x0A2A && ch <= 0x0A30 ||
154                 ch >= 0x0A32 && ch <= 0x0A33 ||
155                 ch >= 0x0A35 && ch <= 0x0A36 ||
156                 ch >= 0x0A38 && ch <= 0x0A39 ||
157                 ch >= 0x0A59 && ch <= 0x0A5C ||
158                 ch == 0x0A5E ||
159                 ch >= 0x0A72 && ch <= 0x0A74 ||
160                 ch >= 0x0A85 && ch <= 0x0A8B ||
161                 ch == 0x0A8D ||
162                 ch >= 0x0A8F && ch <= 0x0A91 ||
163                 ch >= 0x0A93 && ch <= 0x0AA8 ||
164                 ch >= 0x0AAA && ch <= 0x0AB0 ||
165                 ch >= 0x0AB2 && ch <= 0x0AB3 ||
166                 ch >= 0x0AB5 && ch <= 0x0AB9 ||
167                 ch == 0x0ABD ||
168                 ch == 0x0AE0) ||
169         ch <= 0xbff && (ch >= 0x0B05 && ch <= 0x0B0C ||
170                 ch >= 0x0B0F && ch <= 0x0B10 ||
171                 ch >= 0x0B13 && ch <= 0x0B28 ||
172                 ch >= 0x0B2A && ch <= 0x0B30 ||
173                 ch >= 0x0B32 && ch <= 0x0B33 ||
174                 ch >= 0x0B36 && ch <= 0x0B39 ||
175                 ch == 0x0B3D ||
176                 ch >= 0x0B5C && ch <= 0x0B5D ||
177                 ch >= 0x0B5F && ch <= 0x0B61 ||
178                 ch >= 0x0B85 && ch <= 0x0B8A ||
179                 ch >= 0x0B8E && ch <= 0x0B90 ||
180                 ch >= 0x0B92 && ch <= 0x0B95 ||
181                 ch >= 0x0B99 && ch <= 0x0B9A ||
182                 ch == 0x0B9C ||
183                 ch >= 0x0B9E && ch <= 0x0B9F ||
184                 ch >= 0x0BA3 && ch <= 0x0BA4 ||
185                 ch >= 0x0BA8 && ch <= 0x0BAA ||
186                 ch >= 0x0BAE && ch <= 0x0BB5 ||
187                 ch >= 0x0BB7 && ch <= 0x0BB9) ||
188         ch <= 0xcff && (ch >= 0x0C05 && ch <= 0x0C0C ||
189                 ch >= 0x0C0E && ch <= 0x0C10 ||
190                 ch >= 0x0C12 && ch <= 0x0C28 ||
191                 ch >= 0x0C2A && ch <= 0x0C33 ||
192                 ch >= 0x0C35 && ch <= 0x0C39 ||
193                 ch >= 0x0C60 && ch <= 0x0C61 ||
194                 ch >= 0x0C85 && ch <= 0x0C8C ||
195                 ch >= 0x0C8E && ch <= 0x0C90 ||
196                 ch >= 0x0C92 && ch <= 0x0CA8 ||
197                 ch >= 0x0CAA && ch <= 0x0CB3 ||
198                 ch >= 0x0CB5 && ch <= 0x0CB9 ||
199                 ch == 0x0CDE ||
200                 ch >= 0x0CE0 && ch <= 0x0CE1) ||
201         ch <= 0xdff && (ch >= 0x0D05 && ch <= 0x0D0C ||
202                 ch >= 0x0D0E && ch <= 0x0D10 ||
203                 ch >= 0x0D12 && ch <= 0x0D28 ||
204                 ch >= 0x0D2A && ch <= 0x0D39 ||
205                 ch >= 0x0D60 && ch <= 0x0D61) ||
206         ch <= 0xfff && (ch >= 0x0E01 && ch <= 0x0E2E ||
207                 ch == 0x0E30 ||
208                 ch >= 0x0E32 && ch <= 0x0E33 ||
209                 ch >= 0x0E40 && ch <= 0x0E45 ||
210                 ch >= 0x0E81 && ch <= 0x0E82 ||
211                 ch == 0x0E84 ||
212                 ch >= 0x0E87 && ch <= 0x0E88 ||
213                 ch == 0x0E8A ||
214                 ch == 0x0E8D ||
215                 ch >= 0x0E94 && ch <= 0x0E97 ||
216                 ch >= 0x0E99 && ch <= 0x0E9F ||
217                 ch >= 0x0EA1 && ch <= 0x0EA3 ||
218                 ch == 0x0EA5 ||
219                 ch == 0x0EA7 ||
220                 ch >= 0x0EAA && ch <= 0x0EAB ||
221                 ch >= 0x0EAD && ch <= 0x0EAE ||
222                 ch == 0x0EB0 ||
223                 ch >= 0x0EB2 && ch <= 0x0EB3 ||
224                 ch == 0x0EBD ||
225                 ch >= 0x0EC0 && ch <= 0x0EC4 ||
226                 ch >= 0x0F40 && ch <= 0x0F47 ||
227                 ch >= 0x0F49 && ch <= 0x0F69) ||
228         ch <= 0x10ff && (ch >= 0x10A0 && ch <= 0x10C5 ||
229                  ch >= 0x10D0 && ch <= 0x10F6) ||
230         ch <= 0x11ff && (ch == 0x1100 ||
231                  ch >= 0x1102 && ch <= 0x1103 ||
232                  ch >= 0x1105 && ch <= 0x1107 ||
233                  ch == 0x1109 ||
234                  ch >= 0x110B && ch <= 0x110C ||
235                  ch >= 0x110E && ch <= 0x1112 ||
236                  ch == 0x113C ||
237                  ch == 0x113E ||
238                  ch == 0x1140 ||
239                  ch == 0x114C ||
240                  ch == 0x114E ||
241                  ch == 0x1150 ||
242                  ch >= 0x1154 && ch <= 0x1155 ||
243                  ch == 0x1159 ||
244                  ch >= 0x115F && ch <= 0x1161 ||
245                  ch == 0x1163 ||
246                  ch == 0x1165 ||
247                  ch == 0x1167 ||
248                  ch == 0x1169 ||
249                  ch >= 0x116D && ch <= 0x116E ||
250                  ch >= 0x1172 && ch <= 0x1173 ||
251                  ch == 0x1175 ||
252                  ch == 0x119E ||
253                  ch == 0x11A8 ||
254                  ch == 0x11AB ||
255                  ch >= 0x11AE && ch <= 0x11AF ||
256                  ch >= 0x11B7 && ch <= 0x11B8 ||
257                  ch == 0x11BA ||
258                  ch >= 0x11BC && ch <= 0x11C2 ||
259                  ch == 0x11EB ||
260                  ch == 0x11F0 ||
261                  ch == 0x11F9) ||
262         ch <= 0x1fff && (ch >= 0x1E00 && ch <= 0x1E9B ||
263                  ch >= 0x1EA0 && ch <= 0x1EF9 ||
264                  ch >= 0x1F00 && ch <= 0x1F15 ||
265                  ch >= 0x1F18 && ch <= 0x1F1D ||
266                  ch >= 0x1F20 && ch <= 0x1F45 ||
267                  ch >= 0x1F48 && ch <= 0x1F4D ||
268                  ch >= 0x1F50 && ch <= 0x1F57 ||
269                  ch == 0x1F59 ||
270                  ch == 0x1F5B ||
271                  ch == 0x1F5D ||
272                  ch >= 0x1F5F && ch <= 0x1F7D ||
273                  ch >= 0x1F80 && ch <= 0x1FB4 ||
274                  ch >= 0x1FB6 && ch <= 0x1FBC ||
275                  ch == 0x1FBE ||
276                  ch >= 0x1FC2 && ch <= 0x1FC4 ||
277                  ch >= 0x1FC6 && ch <= 0x1FCC ||
278                  ch >= 0x1FD0 && ch <= 0x1FD3 ||
279                  ch >= 0x1FD6 && ch <= 0x1FDB ||
280                  ch >= 0x1FE0 && ch <= 0x1FEC ||
281                  ch >= 0x1FF2 && ch <= 0x1FF4 ||
282                  ch >= 0x1FF6 && ch <= 0x1FFC) ||
283         ch == 0x2126 ||
284         ch >= 0x212A && ch <= 0x212B ||
285         ch == 0x212E ||
286         ch >= 0x2180 && ch <= 0x2182 ||
287         ch >= 0x3041 && ch <= 0x3094 ||
288         ch >= 0x30A1 && ch <= 0x30FA ||
289         ch >= 0x3105 && ch <= 0x312C ||
290         ch >= 0xAC00 && ch <= 0xD7A3);
291   }
292
293   private static boolean isIdeographic(int ch)
294   {
295     return (ch >= 0x4e00 && ch <= 0x9fa5 || ch == 0x3007 ||
296         ch >= 0x3021 && ch <= 0x3029);
297   }
298
299   private static boolean isCombiningChar(int ch)
300   {
301     if (ch < 0x300)
302       return false;
303
304     return (ch <= 0x6ff && (ch >= 0x0300 && ch <= 0x0345 ||
305                 ch >= 0x0360 && ch <= 0x0361 ||
306                 ch >= 0x0483 && ch <= 0x0486 ||
307                 ch >= 0x0591 && ch <= 0x05A1 ||
308                 ch >= 0x05A3 && ch <= 0x05B9 ||
309                 ch >= 0x05BB && ch <= 0x05BD ||
310                 ch == 0x05BF ||
311                 ch >= 0x05C1 && ch <= 0x05C2 ||
312                 ch == 0x05C4 ||
313                 ch >= 0x064B && ch <= 0x0652 ||
314                 ch == 0x0670 ||
315                 ch >= 0x06D6 && ch <= 0x06DC ||
316                 ch >= 0x06DD && ch <= 0x06DF ||
317                 ch >= 0x06E0 && ch <= 0x06E4 ||
318                 ch >= 0x06E7 && ch <= 0x06E8 ||
319                 ch >= 0x06EA && ch <= 0x06ED) ||
320         ch <= 0x9ff && (ch >= 0x0901 && ch <= 0x0903 ||
321                 ch == 0x093C ||
322                 ch >= 0x093E && ch <= 0x094C ||
323                 ch == 0x094D ||
324                 ch >= 0x0951 && ch <= 0x0954 ||
325                 ch >= 0x0962 && ch <= 0x0963 ||
326                 ch >= 0x0981 && ch <= 0x0983 ||
327                 ch == 0x09BC ||
328                 ch == 0x09BE ||
329                 ch == 0x09BF ||
330                 ch >= 0x09C0 && ch <= 0x09C4 ||
331                 ch >= 0x09C7 && ch <= 0x09C8 ||
332                 ch >= 0x09CB && ch <= 0x09CD ||
333                 ch == 0x09D7 ||
334                 ch >= 0x09E2 && ch <= 0x09E3) ||
335         ch <= 0xaff && (ch == 0x0A02 ||
336                 ch == 0x0A3C ||
337                 ch == 0x0A3E ||
338                 ch == 0x0A3F ||
339                 ch >= 0x0A40 && ch <= 0x0A42 ||
340                 ch >= 0x0A47 && ch <= 0x0A48 ||
341                 ch >= 0x0A4B && ch <= 0x0A4D ||
342                 ch >= 0x0A70 && ch <= 0x0A71 ||
343                 ch >= 0x0A81 && ch <= 0x0A83 ||
344                 ch == 0x0ABC ||
345                 ch >= 0x0ABE && ch <= 0x0AC5 ||
346                 ch >= 0x0AC7 && ch <= 0x0AC9 ||
347                 ch >= 0x0ACB && ch <= 0x0ACD) ||
348         ch <= 0xbff && (ch >= 0x0B01 && ch <= 0x0B03 ||
349                 ch == 0x0B3C ||
350                 ch >= 0x0B3E && ch <= 0x0B43 ||
351                 ch >= 0x0B47 && ch <= 0x0B48 ||
352                 ch >= 0x0B4B && ch <= 0x0B4D ||
353                 ch >= 0x0B56 && ch <= 0x0B57 ||
354                 ch >= 0x0B82 && ch <= 0x0B83 ||
355                 ch >= 0x0BBE && ch <= 0x0BC2 ||
356                 ch >= 0x0BC6 && ch <= 0x0BC8 ||
357                 ch >= 0x0BCA && ch <= 0x0BCD ||
358                 ch == 0x0BD7) ||
359         ch <= 0xc00 && (ch >= 0x0C01 && ch <= 0x0C03 ||
360                 ch >= 0x0C3E && ch <= 0x0C44 ||
361                 ch >= 0x0C46 && ch <= 0x0C48 ||
362                 ch >= 0x0C4A && ch <= 0x0C4D ||
363                 ch >= 0x0C55 && ch <= 0x0C56 ||
364                 ch >= 0x0C82 && ch <= 0x0C83 ||
365                 ch >= 0x0CBE && ch <= 0x0CC4 ||
366                 ch >= 0x0CC6 && ch <= 0x0CC8 ||
367                 ch >= 0x0CCA && ch <= 0x0CCD ||
368                 ch >= 0x0CD5 && ch <= 0x0CD6) ||
369         ch <= 0xeff && (ch >= 0x0D02 && ch <= 0x0D03 ||
370                 ch >= 0x0D3E && ch <= 0x0D43 ||
371                 ch >= 0x0D46 && ch <= 0x0D48 ||
372                 ch >= 0x0D4A && ch <= 0x0D4D ||
373                 ch == 0x0D57 ||
374                 ch == 0x0E31 ||
375                 ch >= 0x0E34 && ch <= 0x0E3A ||
376                 ch >= 0x0E47 && ch <= 0x0E4E ||
377                 ch == 0x0EB1 ||
378                 ch >= 0x0EB4 && ch <= 0x0EB9 ||
379                 ch >= 0x0EBB && ch <= 0x0EBC ||
380                 ch >= 0x0EC8 && ch <= 0x0ECD) ||
381         ch <= 0xfff && (ch >= 0x0F18 && ch <= 0x0F19 ||
382                 ch == 0x0F35 ||
383                 ch == 0x0F37 ||
384                 ch == 0x0F39 ||
385                 ch == 0x0F3E ||
386                 ch == 0x0F3F ||
387                 ch >= 0x0F71 && ch <= 0x0F84 ||
388                 ch >= 0x0F86 && ch <= 0x0F8B ||
389                 ch >= 0x0F90 && ch <= 0x0F95 ||
390                 ch == 0x0F97 ||
391                 ch >= 0x0F99 && ch <= 0x0FAD ||
392                 ch >= 0x0FB1 && ch <= 0x0FB7 ||
393                 ch == 0x0FB9) ||
394         ch >= 0x20D0 && ch <= 0x20DC ||
395         ch == 0x20E1 ||
396         ch >= 0x302A && ch <= 0x302F ||
397         ch == 0x3099 ||
398         ch == 0x309A);
399   }
400
401   private static boolean isDigit(int ch)
402   {
403     return (ch >= 0x0030 && ch <= 0x0039 ||
404         ch >= 0x0660 && ch <= 0x0669 ||
405         ch >= 0x06F0 && ch <= 0x06F9 ||
406         ch >= 0x0966 && ch <= 0x096F ||
407         ch >= 0x09E6 && ch <= 0x09EF ||
408         ch >= 0x0A66 && ch <= 0x0A6F ||
409         ch >= 0x0AE6 && ch <= 0x0AEF ||
410         ch >= 0x0B66 && ch <= 0x0B6F ||
411         ch >= 0x0BE7 && ch <= 0x0BEF ||
412         ch >= 0x0C66 && ch <= 0x0C6F ||
413         ch >= 0x0CE6 && ch <= 0x0CEF ||
414         ch >= 0x0D66 && ch <= 0x0D6F ||
415         ch >= 0x0E50 && ch <= 0x0E59 ||
416         ch >= 0x0ED0 && ch <= 0x0ED9 ||
417         ch >= 0x0F20 && ch <= 0x0F29);
418   }
419
420   private static boolean isExtender(int ch)
421   {
422     return (ch == 0x00B7 ||
423         ch == 0x02D0 ||
424         ch == 0x02D1 ||
425         ch == 0x0387 ||
426         ch == 0x0640 ||
427         ch == 0x0E46 ||
428         ch == 0x0EC6 ||
429         ch == 0x3005 ||
430         ch >= 0x3031 && ch <= 0x3035 ||
431         ch >= 0x309D && ch <= 0x309E ||
432         ch >= 0x30FC && ch <= 0x30FE);
433   }
434
435   static {
436     isAsciiNameChar = new boolean[128];
437     for (int i = 0x30; i <= 0x39; i++)
438       isAsciiNameChar[i] = true;
439     for (int i = 0x41; i <= 0x5a; i++)
440       isAsciiNameChar[i] = true;
441     for (int i = 0x61; i <= 0x7a; i++)
442       isAsciiNameChar[i] = true;
443     isAsciiNameChar['_'] = true;
444     isAsciiNameChar[':'] = true;
445     isAsciiNameChar['.'] = true;
446     isAsciiNameChar['-'] = true;
447   }
448 }
449
Popular Tags