KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > w3c > tidy > TidyUtils


1 /*
2  * Java HTML Tidy - JTidy
3  * HTML parser and pretty printer
4  *
5  * Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
6  * Institute of Technology, Institut National de Recherche en
7  * Informatique et en Automatique, Keio University). All Rights
8  * Reserved.
9  *
10  * Contributing Author(s):
11  *
12  * Dave Raggett <dsr@w3.org>
13  * Andy Quick <ac.quick@sympatico.ca> (translation to Java)
14  * Gary L Peskin <garyp@firstech.com> (Java development)
15  * Sami Lempinen <sami@lempinen.net> (release management)
16  * Fabrizio Giustina <fgiust at users.sourceforge.net>
17  *
18  * The contributing author(s) would like to thank all those who
19  * helped with testing, bug fixes, and patience. This wouldn't
20  * have been possible without all of you.
21  *
22  * COPYRIGHT NOTICE:
23  *
24  * This software and documentation is provided "as is," and
25  * the copyright holders and contributing author(s) make no
26  * representations or warranties, express or implied, including
27  * but not limited to, warranties of merchantability or fitness
28  * for any particular purpose or that the use of the software or
29  * documentation will not infringe any third party patents,
30  * copyrights, trademarks or other rights.
31  *
32  * The copyright holders and contributing author(s) will not be
33  * liable for any direct, indirect, special or consequential damages
34  * arising out of any use of the software or documentation, even if
35  * advised of the possibility of such damage.
36  *
37  * Permission is hereby granted to use, copy, modify, and distribute
38  * this source code, or portions hereof, documentation and executables,
39  * for any purpose, without fee, subject to the following restrictions:
40  *
41  * 1. The origin of this source code must not be misrepresented.
42  * 2. Altered versions must be plainly marked as such and must
43  * not be misrepresented as being the original source.
44  * 3. This Copyright notice may not be removed or altered from any
45  * source or altered source distribution.
46  *
47  * The copyright holders and contributing author(s) specifically
48  * permit, without fee, and encourage the use of this source code
49  * as a component for supporting the Hypertext Markup Language in
50  * commercial products. If you use this source code in a product,
51  * acknowledgment is not required but would be appreciated.
52  *
53  */

54
55 package org.w3c.tidy;
56
57 /**
58  * Utility class with handy methods, mainly for String handling or for reproducing c behaviours.
59  * @author Fabrizio Giustina
60  * @version $Revision $ ($Author $)
61  */

62 public final class TidyUtils
63 {
64
65     /**
66      * char type: digit.
67      */

68     private static final short DIGIT = 1;
69
70     /**
71      * char type: letter.
72      */

73     private static final short LETTER = 2;
74
75     /**
76      * char type: namechar.
77      */

78     private static final short NAMECHAR = 4;
79
80     /**
81      * char type: whitespace.
82      */

83     private static final short WHITE = 8;
84
85     /**
86      * char type: newline.
87      */

88     private static final short NEWLINE = 16;
89
90     /**
91      * char type: lowercase.
92      */

93     private static final short LOWERCASE = 32;
94
95     /**
96      * char type: uppercase.
97      */

98     private static final short UPPERCASE = 64;
99
100     /**
101      * used to classify chars for lexical purposes.
102      */

103     private static short[] lexmap = new short[128];
104
105     static
106     {
107         mapStr("\r\n\f", (short) (NEWLINE | WHITE));
108         mapStr(" \t", WHITE);
109         mapStr("-.:_", NAMECHAR);
110         mapStr("0123456789", (short) (DIGIT | NAMECHAR));
111         mapStr("abcdefghijklmnopqrstuvwxyz", (short) (LOWERCASE | LETTER | NAMECHAR));
112         mapStr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", (short) (UPPERCASE | LETTER | NAMECHAR));
113     }
114
115     /**
116      * utility class, don't instantiate.
117      */

118     private TidyUtils()
119     {
120         // unused
121
}
122
123     /**
124      * Converts a int to a boolean.
125      * @param value int value
126      * @return <code>true</code> if value is != 0
127      */

128     static boolean toBoolean(int value)
129     {
130         return value != 0;
131     }
132
133     /**
134      * convert an int to unsigned (& 0xFF).
135      * @param c signed int
136      * @return unsigned int
137      */

138     static int toUnsigned(int c)
139     {
140         return c & 0xFF;
141     }
142
143     /**
144      * check if the first String contains the second one.
145      * @param s1 full String
146      * @param len1 maximum position in String
147      * @param s2 String to search for
148      * @return true if s1 contains s2 in the range 0-len1
149      */

150     static boolean wsubstrn(String JavaDoc s1, int len1, String JavaDoc s2)
151     {
152         int searchIndex = s1.indexOf(s2);
153         return searchIndex > -1 && searchIndex <= len1;
154     }
155
156     /**
157      * check if the first String contains the second one (ignore case).
158      * @param s1 full String
159      * @param len1 maximum position in String
160      * @param s2 String to search for
161      * @return true if s1 contains s2 in the range 0-len1
162      */

163     static boolean wsubstrncase(String JavaDoc s1, int len1, String JavaDoc s2)
164     {
165         return wsubstrn(s1.toLowerCase(), len1, s2.toLowerCase());
166     }
167
168     /**
169      * return offset of cc from beginning of s1, -1 if not found.
170      * @param s1 String
171      * @param len1 maximum offset (values > than lenl are ignored and returned as -1)
172      * @param cc character to search for
173      * @return index of cc in s1
174      */

175     static int wstrnchr(String JavaDoc s1, int len1, char cc)
176     {
177         int indexOf = s1.indexOf(cc);
178         if (indexOf < len1)
179         {
180             return indexOf;
181         }
182
183         return -1;
184     }
185
186     /**
187      * Same as wsubstrn, but without a specified length.
188      * @param s1 full String
189      * @param s2 String to search for
190      * @return <code>true</code> if s2 is found in s2 (case insensitive search)
191      */

192     static boolean wsubstr(String JavaDoc s1, String JavaDoc s2)
193     {
194         int i;
195         int len1 = s1.length();
196         int len2 = s2.length();
197
198         for (i = 0; i <= len1 - len2; ++i)
199         {
200             if (s2.equalsIgnoreCase(s1.substring(i)))
201             {
202                 return true;
203             }
204         }
205
206         return false;
207     }
208
209     /**
210      * Is the character a hex digit?
211      * @param c char
212      * @return <code>true</code> if he given character is a hex digit
213      */

214     static boolean isxdigit(char c)
215     {
216         return Character.isDigit(c) || (Character.toLowerCase(c) >= 'a' && Character.toLowerCase(c) <= 'f');
217     }
218
219     /**
220      * Check if the string valueToCheck is contained in validValues array (case insesitie comparison).
221      * @param validValues array of valid values
222      * @param valueToCheck value to search for
223      * @return <code>true</code> if valueToCheck is found in validValues
224      */

225     static boolean isInValuesIgnoreCase(String JavaDoc[] validValues, String JavaDoc valueToCheck)
226     {
227         int len = validValues.length;
228         for (int j = 0; j < len; j++)
229         {
230             if (validValues[j].equalsIgnoreCase(valueToCheck))
231             {
232                 return true;
233             }
234         }
235         return false;
236     }
237
238     /**
239      * Return true if substring s is in p and isn't all in upper case. This is used to check the case of SYSTEM, PUBLIC,
240      * DTD and EN.
241      * @param s substring
242      * @param p full string
243      * @param len how many chars to check in p
244      * @return true if substring s is in p and isn't all in upper case
245      */

246     public static boolean findBadSubString(String JavaDoc s, String JavaDoc p, int len)
247     {
248         int n = s.length();
249         int i = 0;
250         String JavaDoc ps;
251
252         while (n < len)
253         {
254             ps = p.substring(i, i + n);
255             if (s.equalsIgnoreCase(ps))
256             {
257                 return (!ps.equals(s.substring(0, n)));
258             }
259
260             ++i;
261             --len;
262         }
263
264         return false;
265     }
266
267     /**
268      * Is the given char a valid xml letter?
269      * @param c char
270      * @return <code>true</code> if the char is a valid xml letter
271      */

272     static boolean isXMLLetter(char c)
273     {
274         return ((c >= 0x41 && c <= 0x5a)
275             || (c >= 0x61 && c <= 0x7a)
276             || (c >= 0xc0 && c <= 0xd6)
277             || (c >= 0xd8 && c <= 0xf6)
278             || (c >= 0xf8 && c <= 0xff)
279             || (c >= 0x100 && c <= 0x131)
280             || (c >= 0x134 && c <= 0x13e)
281             || (c >= 0x141 && c <= 0x148)
282             || (c >= 0x14a && c <= 0x17e)
283             || (c >= 0x180 && c <= 0x1c3)
284             || (c >= 0x1cd && c <= 0x1f0)
285             || (c >= 0x1f4 && c <= 0x1f5)
286             || (c >= 0x1fa && c <= 0x217)
287             || (c >= 0x250 && c <= 0x2a8)
288             || (c >= 0x2bb && c <= 0x2c1)
289             || c == 0x386
290             || (c >= 0x388 && c <= 0x38a)
291             || c == 0x38c
292             || (c >= 0x38e && c <= 0x3a1)
293             || (c >= 0x3a3 && c <= 0x3ce)
294             || (c >= 0x3d0 && c <= 0x3d6)
295             || c == 0x3da
296             || c == 0x3dc
297             || c == 0x3de
298             || c == 0x3e0
299             || (c >= 0x3e2 && c <= 0x3f3)
300             || (c >= 0x401 && c <= 0x40c)
301             || (c >= 0x40e && c <= 0x44f)
302             || (c >= 0x451 && c <= 0x45c)
303             || (c >= 0x45e && c <= 0x481)
304             || (c >= 0x490 && c <= 0x4c4)
305             || (c >= 0x4c7 && c <= 0x4c8)
306             || (c >= 0x4cb && c <= 0x4cc)
307             || (c >= 0x4d0 && c <= 0x4eb)
308             || (c >= 0x4ee && c <= 0x4f5)
309             || (c >= 0x4f8 && c <= 0x4f9)
310             || (c >= 0x531 && c <= 0x556)
311             || c == 0x559
312             || (c >= 0x561 && c <= 0x586)
313             || (c >= 0x5d0 && c <= 0x5ea)
314             || (c >= 0x5f0 && c <= 0x5f2)
315             || (c >= 0x621 && c <= 0x63a)
316             || (c >= 0x641 && c <= 0x64a)
317             || (c >= 0x671 && c <= 0x6b7)
318             || (c >= 0x6ba && c <= 0x6be)
319             || (c >= 0x6c0 && c <= 0x6ce)
320             || (c >= 0x6d0 && c <= 0x6d3)
321             || c == 0x6d5
322             || (c >= 0x6e5 && c <= 0x6e6)
323             || (c >= 0x905 && c <= 0x939)
324             || c == 0x93d
325             || (c >= 0x958 && c <= 0x961)
326             || (c >= 0x985 && c <= 0x98c)
327             || (c >= 0x98f && c <= 0x990)
328             || (c >= 0x993 && c <= 0x9a8)
329             || (c >= 0x9aa && c <= 0x9b0)
330             || c == 0x9b2
331             || (c >= 0x9b6 && c <= 0x9b9)
332             || (c >= 0x9dc && c <= 0x9dd)
333             || (c >= 0x9df && c <= 0x9e1)
334             || (c >= 0x9f0 && c <= 0x9f1)
335             || (c >= 0xa05 && c <= 0xa0a)
336             || (c >= 0xa0f && c <= 0xa10)
337             || (c >= 0xa13 && c <= 0xa28)
338             || (c >= 0xa2a && c <= 0xa30)
339             || (c >= 0xa32 && c <= 0xa33)
340             || (c >= 0xa35 && c <= 0xa36)
341             || (c >= 0xa38 && c <= 0xa39)
342             || (c >= 0xa59 && c <= 0xa5c)
343             || c == 0xa5e
344             || (c >= 0xa72 && c <= 0xa74)
345             || (c >= 0xa85 && c <= 0xa8b)
346             || c == 0xa8d
347             || (c >= 0xa8f && c <= 0xa91)
348             || (c >= 0xa93 && c <= 0xaa8)
349             || (c >= 0xaaa && c <= 0xab0)
350             || (c >= 0xab2 && c <= 0xab3)
351             || (c >= 0xab5 && c <= 0xab9)
352             || c == 0xabd
353             || c == 0xae0
354             || (c >= 0xb05 && c <= 0xb0c)
355             || (c >= 0xb0f && c <= 0xb10)
356             || (c >= 0xb13 && c <= 0xb28)
357             || (c >= 0xb2a && c <= 0xb30)
358             || (c >= 0xb32 && c <= 0xb33)
359             || (c >= 0xb36 && c <= 0xb39)
360             || c == 0xb3d
361             || (c >= 0xb5c && c <= 0xb5d)
362             || (c >= 0xb5f && c <= 0xb61)
363             || (c >= 0xb85 && c <= 0xb8a)
364             || (c >= 0xb8e && c <= 0xb90)
365             || (c >= 0xb92 && c <= 0xb95)
366             || (c >= 0xb99 && c <= 0xb9a)
367             || c == 0xb9c
368             || (c >= 0xb9e && c <= 0xb9f)
369             || (c >= 0xba3 && c <= 0xba4)
370             || (c >= 0xba8 && c <= 0xbaa)
371             || (c >= 0xbae && c <= 0xbb5)
372             || (c >= 0xbb7 && c <= 0xbb9)
373             || (c >= 0xc05 && c <= 0xc0c)
374             || (c >= 0xc0e && c <= 0xc10)
375             || (c >= 0xc12 && c <= 0xc28)
376             || (c >= 0xc2a && c <= 0xc33)
377             || (c >= 0xc35 && c <= 0xc39)
378             || (c >= 0xc60 && c <= 0xc61)
379             || (c >= 0xc85 && c <= 0xc8c)
380             || (c >= 0xc8e && c <= 0xc90)
381             || (c >= 0xc92 && c <= 0xca8)
382             || (c >= 0xcaa && c <= 0xcb3)
383             || (c >= 0xcb5 && c <= 0xcb9)
384             || c == 0xcde
385             || (c >= 0xce0 && c <= 0xce1)
386             || (c >= 0xd05 && c <= 0xd0c)
387             || (c >= 0xd0e && c <= 0xd10)
388             || (c >= 0xd12 && c <= 0xd28)
389             || (c >= 0xd2a && c <= 0xd39)
390             || (c >= 0xd60 && c <= 0xd61)
391             || (c >= 0xe01 && c <= 0xe2e)
392             || c == 0xe30
393             || (c >= 0xe32 && c <= 0xe33)
394             || (c >= 0xe40 && c <= 0xe45)
395             || (c >= 0xe81 && c <= 0xe82)
396             || c == 0xe84
397             || (c >= 0xe87 && c <= 0xe88)
398             || c == 0xe8a
399             || c == 0xe8d
400             || (c >= 0xe94 && c <= 0xe97)
401             || (c >= 0xe99 && c <= 0xe9f)
402             || (c >= 0xea1 && c <= 0xea3)
403             || c == 0xea5
404             || c == 0xea7
405             || (c >= 0xeaa && c <= 0xeab)
406             || (c >= 0xead && c <= 0xeae)
407             || c == 0xeb0
408             || (c >= 0xeb2 && c <= 0xeb3)
409             || c == 0xebd
410             || (c >= 0xec0 && c <= 0xec4)
411             || (c >= 0xf40 && c <= 0xf47)
412             || (c >= 0xf49 && c <= 0xf69)
413             || (c >= 0x10a0 && c <= 0x10c5)
414             || (c >= 0x10d0 && c <= 0x10f6)
415             || c == 0x1100
416             || (c >= 0x1102 && c <= 0x1103)
417             || (c >= 0x1105 && c <= 0x1107)
418             || c == 0x1109
419             || (c >= 0x110b && c <= 0x110c)
420             || (c >= 0x110e && c <= 0x1112)
421             || c == 0x113c
422             || c == 0x113e
423             || c == 0x1140
424             || c == 0x114c
425             || c == 0x114e
426             || c == 0x1150
427             || (c >= 0x1154 && c <= 0x1155)
428             || c == 0x1159
429             || (c >= 0x115f && c <= 0x1161)
430             || c == 0x1163
431             || c == 0x1165
432             || c == 0x1167
433             || c == 0x1169
434             || (c >= 0x116d && c <= 0x116e)
435             || (c >= 0x1172 && c <= 0x1173)
436             || c == 0x1175
437             || c == 0x119e
438             || c == 0x11a8
439             || c == 0x11ab
440             || (c >= 0x11ae && c <= 0x11af)
441             || (c >= 0x11b7 && c <= 0x11b8)
442             || c == 0x11ba
443             || (c >= 0x11bc && c <= 0x11c2)
444             || c == 0x11eb
445             || c == 0x11f0
446             || c == 0x11f9
447             || (c >= 0x1e00 && c <= 0x1e9b)
448             || (c >= 0x1ea0 && c <= 0x1ef9)
449             || (c >= 0x1f00 && c <= 0x1f15)
450             || (c >= 0x1f18 && c <= 0x1f1d)
451             || (c >= 0x1f20 && c <= 0x1f45)
452             || (c >= 0x1f48 && c <= 0x1f4d)
453             || (c >= 0x1f50 && c <= 0x1f57)
454             || c == 0x1f59
455             || c == 0x1f5b
456             || c == 0x1f5d
457             || (c >= 0x1f5f && c <= 0x1f7d)
458             || (c >= 0x1f80 && c <= 0x1fb4)
459             || (c >= 0x1fb6 && c <= 0x1fbc)
460             || c == 0x1fbe
461             || (c >= 0x1fc2 && c <= 0x1fc4)
462             || (c >= 0x1fc6 && c <= 0x1fcc)
463             || (c >= 0x1fd0 && c <= 0x1fd3)
464             || (c >= 0x1fd6 && c <= 0x1fdb)
465             || (c >= 0x1fe0 && c <= 0x1fec)
466             || (c >= 0x1ff2 && c <= 0x1ff4)
467             || (c >= 0x1ff6 && c <= 0x1ffc)
468             || c == 0x2126
469             || (c >= 0x212a && c <= 0x212b)
470             || c == 0x212e
471             || (c >= 0x2180 && c <= 0x2182)
472             || (c >= 0x3041 && c <= 0x3094)
473             || (c >= 0x30a1 && c <= 0x30fa)
474             || (c >= 0x3105 && c <= 0x312c)
475             || (c >= 0xac00 && c <= 0xd7a3)
476             || (c >= 0x4e00 && c <= 0x9fa5)
477             || c == 0x3007
478             || (c >= 0x3021 && c <= 0x3029)
479             || (c >= 0x4e00 && c <= 0x9fa5)
480             || c == 0x3007 || (c >= 0x3021 && c <= 0x3029));
481     }
482
483     /**
484      * Is the given char valid in xml name?
485      * @param c char
486      * @return <code>true</code> if the char is a valid xml name char
487      */

488     static boolean isXMLNamechar(char c)
489     {
490         return (isXMLLetter(c)
491             || c == '.'
492             || c == '_'
493             || c == ':'
494             || c == '-'
495             || (c >= 0x300 && c <= 0x345)
496             || (c >= 0x360 && c <= 0x361)
497             || (c >= 0x483 && c <= 0x486)
498             || (c >= 0x591 && c <= 0x5a1)
499             || (c >= 0x5a3 && c <= 0x5b9)
500             || (c >= 0x5bb && c <= 0x5bd)
501             || c == 0x5bf
502             || (c >= 0x5c1 && c <= 0x5c2)
503             || c == 0x5c4
504             || (c >= 0x64b && c <= 0x652)
505             || c == 0x670
506             || (c >= 0x6d6 && c <= 0x6dc)
507             || (c >= 0x6dd && c <= 0x6df)
508             || (c >= 0x6e0 && c <= 0x6e4)
509             || (c >= 0x6e7 && c <= 0x6e8)
510             || (c >= 0x6ea && c <= 0x6ed)
511             || (c >= 0x901 && c <= 0x903)
512             || c == 0x93c
513             || (c >= 0x93e && c <= 0x94c)
514             || c == 0x94d
515             || (c >= 0x951 && c <= 0x954)
516             || (c >= 0x962 && c <= 0x963)
517             || (c >= 0x981 && c <= 0x983)
518             || c == 0x9bc
519             || c == 0x9be
520             || c == 0x9bf
521             || (c >= 0x9c0 && c <= 0x9c4)
522             || (c >= 0x9c7 && c <= 0x9c8)
523             || (c >= 0x9cb && c <= 0x9cd)
524             || c == 0x9d7
525             || (c >= 0x9e2 && c <= 0x9e3)
526             || c == 0xa02
527             || c == 0xa3c
528             || c == 0xa3e
529             || c == 0xa3f
530             || (c >= 0xa40 && c <= 0xa42)
531             || (c >= 0xa47 && c <= 0xa48)
532             || (c >= 0xa4b && c <= 0xa4d)
533             || (c >= 0xa70 && c <= 0xa71)
534             || (c >= 0xa81 && c <= 0xa83)
535             || c == 0xabc
536             || (c >= 0xabe && c <= 0xac5)
537             || (c >= 0xac7 && c <= 0xac9)
538             || (c >= 0xacb && c <= 0xacd)
539             || (c >= 0xb01 && c <= 0xb03)
540             || c == 0xb3c
541             || (c >= 0xb3e && c <= 0xb43)
542             || (c >= 0xb47 && c <= 0xb48)
543             || (c >= 0xb4b && c <= 0xb4d)
544             || (c >= 0xb56 && c <= 0xb57)
545             || (c >= 0xb82 && c <= 0xb83)
546             || (c >= 0xbbe && c <= 0xbc2)
547             || (c >= 0xbc6 && c <= 0xbc8)
548             || (c >= 0xbca && c <= 0xbcd)
549             || c == 0xbd7
550             || (c >= 0xc01 && c <= 0xc03)
551             || (c >= 0xc3e && c <= 0xc44)
552             || (c >= 0xc46 && c <= 0xc48)
553             || (c >= 0xc4a && c <= 0xc4d)
554             || (c >= 0xc55 && c <= 0xc56)
555             || (c >= 0xc82 && c <= 0xc83)
556             || (c >= 0xcbe && c <= 0xcc4)
557             || (c >= 0xcc6 && c <= 0xcc8)
558             || (c >= 0xcca && c <= 0xccd)
559             || (c >= 0xcd5 && c <= 0xcd6)
560             || (c >= 0xd02 && c <= 0xd03)
561             || (c >= 0xd3e && c <= 0xd43)
562             || (c >= 0xd46 && c <= 0xd48)
563             || (c >= 0xd4a && c <= 0xd4d)
564             || c == 0xd57
565             || c == 0xe31
566             || (c >= 0xe34 && c <= 0xe3a)
567             || (c >= 0xe47 && c <= 0xe4e)
568             || c == 0xeb1
569             || (c >= 0xeb4 && c <= 0xeb9)
570             || (c >= 0xebb && c <= 0xebc)
571             || (c >= 0xec8 && c <= 0xecd)
572             || (c >= 0xf18 && c <= 0xf19)
573             || c == 0xf35
574             || c == 0xf37
575             || c == 0xf39
576             || c == 0xf3e
577             || c == 0xf3f
578             || (c >= 0xf71 && c <= 0xf84)
579             || (c >= 0xf86 && c <= 0xf8b)
580             || (c >= 0xf90 && c <= 0xf95)
581             || c == 0xf97
582             || (c >= 0xf99 && c <= 0xfad)
583             || (c >= 0xfb1 && c <= 0xfb7)
584             || c == 0xfb9
585             || (c >= 0x20d0 && c <= 0x20dc)
586             || c == 0x20e1
587             || (c >= 0x302a && c <= 0x302f)
588             || c == 0x3099
589             || c == 0x309a
590             || (c >= 0x30 && c <= 0x39)
591             || (c >= 0x660 && c <= 0x669)
592             || (c >= 0x6f0 && c <= 0x6f9)
593             || (c >= 0x966 && c <= 0x96f)
594             || (c >= 0x9e6 && c <= 0x9ef)
595             || (c >= 0xa66 && c <= 0xa6f)
596             || (c >= 0xae6 && c <= 0xaef)
597             || (c >= 0xb66 && c <= 0xb6f)
598             || (c >= 0xbe7 && c <= 0xbef)
599             || (c >= 0xc66 && c <= 0xc6f)
600             || (c >= 0xce6 && c <= 0xcef)
601             || (c >= 0xd66 && c <= 0xd6f)
602             || (c >= 0xe50 && c <= 0xe59)
603             || (c >= 0xed0 && c <= 0xed9)
604             || (c >= 0xf20 && c <= 0xf29)
605             || c == 0xb7
606             || c == 0x2d0
607             || c == 0x2d1
608             || c == 0x387
609             || c == 0x640
610             || c == 0xe46
611             || c == 0xec6
612             || c == 0x3005
613             || (c >= 0x3031 && c <= 0x3035)
614             || (c >= 0x309d && c <= 0x309e) || (c >= 0x30fc && c <= 0x30fe));
615     }
616
617     /**
618      * Is the given character a single or double quote?
619      * @param c char
620      * @return <code>true</code> if c is " or '
621      */

622     static boolean isQuote(int c)
623     {
624         return (c == '\'' || c == '\"');
625     }
626
627     /**
628      * Should always be able convert to/from UTF-8, so encoding exceptions are converted to an Error to avoid adding
629      * throws declarations in lots of methods.
630      * @param str String
631      * @return utf8 bytes
632      * @see String#getBytes()
633      */

634     public static byte[] getBytes(String JavaDoc str)
635     {
636         try
637         {
638             return str.getBytes("UTF8");
639         }
640         catch (java.io.UnsupportedEncodingException JavaDoc e)
641         {
642             throw new Error JavaDoc("String to UTF-8 conversion failed: " + e.getMessage());
643         }
644     }
645
646     /**
647      * Should always be able convert to/from UTF-8, so encoding exceptions are converted to an Error to avoid adding
648      * throws declarations in lots of methods.
649      * @param bytes byte array
650      * @param offset starting offset in byte array
651      * @param length length in byte array starting from offset
652      * @return same as <code>new String(bytes, offset, length, "UTF8")</code>
653      */

654     public static String JavaDoc getString(byte[] bytes, int offset, int length)
655     {
656         try
657         {
658             return new String JavaDoc(bytes, offset, length, "UTF8");
659         }
660         catch (java.io.UnsupportedEncodingException JavaDoc e)
661         {
662             throw new Error JavaDoc("UTF-8 to string conversion failed: " + e.getMessage());
663         }
664     }
665
666     /**
667      * Return the last char in string. This is useful when trailing quotemark is missing on an attribute
668      * @param str String
669      * @return last char in String
670      */

671     public static int lastChar(String JavaDoc str)
672     {
673         if (str != null && str.length() > 0)
674         {
675             return str.charAt(str.length() - 1);
676         }
677
678         return 0;
679     }
680
681     /**
682      * Determines if the specified character is whitespace.
683      * @param c char
684      * @return <code>true</code> if char is whitespace.
685      */

686     public static boolean isWhite(char c)
687     {
688         short m = map(c);
689         return TidyUtils.toBoolean(m & WHITE);
690     }
691
692     /**
693      * Is the given char a digit?
694      * @param c char
695      * @return <code>true</code> if the given char is a digit
696      */

697     public static boolean isDigit(char c)
698     {
699         short m;
700         m = map(c);
701         return TidyUtils.toBoolean(m & DIGIT);
702     }
703
704     /**
705      * Is the given char a letter?
706      * @param c char
707      * @return <code>true</code> if the given char is a letter
708      */

709     public static boolean isLetter(char c)
710     {
711         short m;
712         m = map(c);
713         return TidyUtils.toBoolean(m & LETTER);
714     }
715
716     /**
717      * Is the given char valid in name? (letter, digit or "-", ".", ":", "_")
718      * @param c char
719      * @return <code>true</code> if char is a name char.
720      */

721     public static boolean isNamechar(char c)
722     {
723         short map = map(c);
724
725         return TidyUtils.toBoolean(map & NAMECHAR);
726     }
727
728     /**
729      * Determines if the specified character is a lowercase character.
730      * @param c char
731      * @return <code>true</code> if char is lower case.
732      */

733     public static boolean isLower(char c)
734     {
735         short map = map(c);
736
737         return TidyUtils.toBoolean(map & LOWERCASE);
738     }
739
740     /**
741      * Determines if the specified character is a uppercase character.
742      * @param c char
743      * @return <code>true</code> if char is upper case.
744      */

745     public static boolean isUpper(char c)
746     {
747         short map = map(c);
748
749         return TidyUtils.toBoolean(map & UPPERCASE);
750     }
751
752     /**
753      * Maps the given character to its lowercase equivalent.
754      * @param c char
755      * @return lowercase char.
756      */

757     public static char toLower(char c)
758     {
759         short m = map(c);
760
761         if (TidyUtils.toBoolean(m & UPPERCASE))
762         {
763             c = (char) (c + 'a' - 'A');
764         }
765
766         return c;
767     }
768
769     /**
770      * Maps the given character to its uppercase equivalent.
771      * @param c char
772      * @return uppercase char.
773      */

774     public static char toUpper(char c)
775     {
776         short m = map(c);
777
778         if (TidyUtils.toBoolean(m & LOWERCASE))
779         {
780             c = (char) (c + 'A' - 'a');
781         }
782
783         return c;
784     }
785
786     /**
787      * Fold case of a char.
788      * @param c char
789      * @param tocaps convert to caps
790      * @param xmlTags use xml tags? If true no change will be performed
791      * @return folded char
792      * @todo check the use of xmlTags parameter
793      */

794     public static char foldCase(char c, boolean tocaps, boolean xmlTags)
795     {
796
797         if (!xmlTags)
798         {
799
800             if (tocaps)
801             {
802                 if (isLower(c))
803                 {
804                     c = toUpper(c);
805                 }
806             }
807             else
808             {
809                 // force to lower case
810
if (isUpper(c))
811                 {
812                     c = toLower(c);
813                 }
814             }
815         }
816
817         return c;
818     }
819
820     /**
821      * Classify chars in String and put them in lexmap.
822      * @param str String
823      * @param code code associated to chars in the String
824      */

825     private static void mapStr(String JavaDoc str, short code)
826     {
827         int c;
828         for (int i = 0; i < str.length(); i++)
829         {
830             c = str.charAt(i);
831             lexmap[c] |= code;
832         }
833     }
834
835     /**
836      * Returns the constant which defines the classification of char in lexmap.
837      * @param c char
838      * @return char type
839      */

840     private static short map(char c)
841     {
842         return (c < 128 ? lexmap[c] : 0);
843     }
844
845     /**
846      * Is the given character encoding supported?
847      * @param name character encoding name
848      * @return <code>true</code> if encoding is supported, false otherwhise.
849      */

850     public static boolean isCharEncodingSupported(String JavaDoc name)
851     {
852         name = EncodingNameMapper.toJava(name);
853         if (name == null)
854         {
855             return false;
856         }
857
858         try
859         {
860             "".getBytes(name);
861         }
862         catch (java.io.UnsupportedEncodingException JavaDoc e)
863         {
864             return false;
865         }
866         return true;
867     }
868 }
Popular Tags