KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > batik > xml > XMLUtilities


1 /*
2
3    Copyright 1999-2003 The Apache Software Foundation
4
5    Licensed under the Apache License, Version 2.0 (the "License");
6    you may not use this file except in compliance with the License.
7    You may obtain a copy of the License at
8
9        http://www.apache.org/licenses/LICENSE-2.0
10
11    Unless required by applicable law or agreed to in writing, software
12    distributed under the License is distributed on an "AS IS" BASIS,
13    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14    See the License for the specific language governing permissions and
15    limitations under the License.
16
17 */

18
19 package org.apache.batik.xml;
20
21 import java.io.ByteArrayInputStream JavaDoc;
22 import java.io.IOException JavaDoc;
23 import java.io.InputStream JavaDoc;
24 import java.io.InputStreamReader JavaDoc;
25 import java.io.PushbackInputStream JavaDoc;
26 import java.io.Reader JavaDoc;
27
28 import org.apache.batik.util.EncodingUtilities;
29
30 /**
31  * A collection of utility functions for XML.
32  *
33  * @author <a HREF="mailto:stephane@hillion.org">Stephane Hillion</a>
34  * @version $Id: XMLUtilities.java,v 1.8 2005/02/22 09:13:03 cam Exp $
35  */

36 public class XMLUtilities extends XMLCharacters {
37
38     /**
39      * This class does not need to be instantiated.
40      */

41     protected XMLUtilities() {
42     }
43
44     /**
45      * Tests whether the given character is a valid space.
46      */

47     public static boolean isXMLSpace(char c) {
48       return (c <= 0x0020) &&
49              (((((1L << 0x0009) |
50                  (1L << 0x000A) |
51                  (1L << 0x000D) |
52                  (1L << 0x0020)) >> c) & 1L) != 0);
53     }
54
55     /**
56      * Tests whether the given character is usable as the
57      * first character of an XML name.
58      */

59     public static boolean isXMLNameFirstCharacter(char c) {
60     return (NAME_FIRST_CHARACTER[c / 32] & (1 << (c % 32))) != 0;
61     }
62
63     /**
64      * Tests whether the given character is a valid XML name character.
65      */

66     public static boolean isXMLNameCharacter(char c) {
67     return (NAME_CHARACTER[c / 32] & (1 << (c % 32))) != 0;
68     }
69
70     /**
71      * Tests whether the given 32 bits character is valid in XML documents.
72      */

73     public static boolean isXMLCharacter(int c) {
74     return (c >= 0x10000 && c <= 0x10ffff) ||
75         (XML_CHARACTER[c / 32] & (1 << (c % 32))) != 0;
76     }
77
78     /**
79      * Tests whether the given character is a valid XML public ID character.
80      */

81     public static boolean isXMLPublicIdCharacter(char c) {
82     return (c < 128) &&
83             (PUBLIC_ID_CHARACTER[c / 32] & (1 << (c % 32))) != 0;
84     }
85
86     /**
87      * Tests whether the given character is a valid XML version character.
88      */

89     public static boolean isXMLVersionCharacter(char c) {
90     return (c < 128) &&
91             (VERSION_CHARACTER[c / 32] & (1 << (c % 32))) != 0;
92     }
93
94     /**
95      * Tests whether the given character is a valid aphabetic character.
96      */

97     public static boolean isXMLAlphabeticCharacter(char c) {
98     return (c < 128) &&
99             (ALPHABETIC_CHARACTER[c / 32] & (1 << (c % 32))) != 0;
100     }
101
102     /**
103      * Creates a Reader initialized to scan the characters in the given
104      * XML document's InputStream.
105      * @param is The input stream positionned at the beginning of an
106      * XML document.
107      * @return a Reader positionned at the beginning of the XML document
108      * It is created from an encoding figured out from the first
109      * few bytes of the document. As a consequence the given
110      * input stream is not positionned anymore at the beginning
111      * of the document when this method returns.
112      */

113     public static Reader JavaDoc createXMLDocumentReader(InputStream JavaDoc is)
114         throws IOException JavaDoc {
115         PushbackInputStream JavaDoc pbis = new PushbackInputStream JavaDoc(is, 128);
116         byte[] buf = new byte[4];
117
118         int len = pbis.read(buf);
119         if (len > 0) {
120             pbis.unread(buf, 0, len);
121         }
122
123         if (len == 4) {
124             switch (buf[0] & 0x00FF) {
125             case 0:
126                 if (buf[1] == 0x003c && buf[2] == 0x0000 && buf[3] == 0x003f) {
127                     return new InputStreamReader JavaDoc(pbis, "UnicodeBig");
128                 }
129                 break;
130
131             case '<':
132                 switch (buf[1] & 0x00FF) {
133                 case 0:
134                     if (buf[2] == 0x003f && buf[3] == 0x0000) {
135                         return new InputStreamReader JavaDoc(pbis, "UnicodeLittle");
136                     }
137                     break;
138
139                 case '?':
140                     if (buf[2] == 'x' && buf[3] == 'm') {
141                         Reader JavaDoc r = createXMLDeclarationReader(pbis, "UTF8");
142                         String JavaDoc enc = getXMLDeclarationEncoding(r, "UTF8");
143                         return new InputStreamReader JavaDoc(pbis, enc);
144                     }
145                 }
146                 break;
147
148             case 0x004C:
149                 if (buf[1] == 0x006f &&
150                     (buf[2] & 0x00FF) == 0x00a7 &&
151                     (buf[3] & 0x00FF) == 0x0094) {
152                     Reader JavaDoc r = createXMLDeclarationReader(pbis, "CP037");
153                     String JavaDoc enc = getXMLDeclarationEncoding(r, "CP037");
154                     return new InputStreamReader JavaDoc(pbis, enc);
155                 }
156                 break;
157
158             case 0x00FE:
159                 if ((buf[1] & 0x00FF) == 0x00FF) {
160                     return new InputStreamReader JavaDoc(pbis, "Unicode");
161                 }
162                 break;
163
164             case 0x00FF:
165                 if ((buf[1] & 0x00FF) == 0x00FE) {
166                     return new InputStreamReader JavaDoc(pbis, "Unicode");
167                 }
168             }
169         }
170
171         return new InputStreamReader JavaDoc(pbis, "UTF8");
172     }
173
174     /**
175      * Creates a reader from the given input stream and encoding.
176      * This method assumes the input stream working buffer is at least
177      * 128 byte long. The input stream is restored before this method
178      * returns. The 4 first bytes are skipped before creating the reader.
179      */

180     protected static Reader JavaDoc createXMLDeclarationReader(PushbackInputStream JavaDoc pbis,
181                                                        String JavaDoc enc)
182         throws IOException JavaDoc {
183         byte[] buf = new byte[128];
184         int len = pbis.read(buf);
185
186         if (len > 0) {
187             pbis.unread(buf, 0, len);
188         }
189
190         return new InputStreamReader JavaDoc(new ByteArrayInputStream JavaDoc(buf, 4, len), enc);
191     }
192
193     /**
194      * Reads an XML declaration to get the encoding declaration value.
195      * @param r a reader positionned just after '<?xm'.
196      * @param e the encoding to return by default or on error.
197      */

198     protected static String JavaDoc getXMLDeclarationEncoding(Reader JavaDoc r, String JavaDoc e)
199         throws IOException JavaDoc {
200         int c;
201
202         if ((c = r.read()) != 'l') {
203             return e;
204         }
205
206         if (!isXMLSpace((char)(c = r.read()))) {
207             return e;
208         }
209
210         while (isXMLSpace((char)(c = r.read())));
211             
212         if (c != 'v') {
213             return e;
214         }
215         if ((c = r.read()) != 'e') {
216             return e;
217         }
218         if ((c = r.read()) != 'r') {
219             return e;
220         }
221         if ((c = r.read()) != 's') {
222             return e;
223         }
224         if ((c = r.read()) != 'i') {
225             return e;
226         }
227         if ((c = r.read()) != 'o') {
228             return e;
229         }
230         if ((c = r.read()) != 'n') {
231             return e;
232         }
233              
234         c = r.read();
235         while (isXMLSpace((char)c)) {
236             c = r.read();
237         }
238
239         if (c != '=') {
240             return e;
241         }
242
243         while (isXMLSpace((char)(c = r.read())));
244             
245         if (c != '"' && c != '\'') {
246             return e;
247         }
248         char sc = (char)c;
249
250         for (;;) {
251             c = r.read();
252             if (c == sc) {
253                 break;
254             }
255             if (!isXMLVersionCharacter((char)c)) {
256                 return e;
257             }
258         }
259
260         if (!isXMLSpace((char)(c = r.read()))) {
261             return e;
262         }
263         while (isXMLSpace((char)(c = r.read())));
264
265         if (c != 'e') {
266             return e;
267         }
268         if ((c = r.read()) != 'n') {
269             return e;
270         }
271         if ((c = r.read()) != 'c') {
272             return e;
273         }
274         if ((c = r.read()) != 'o') {
275             return e;
276         }
277         if ((c = r.read()) != 'd') {
278             return e;
279         }
280         if ((c = r.read()) != 'i') {
281             return e;
282         }
283         if ((c = r.read()) != 'n') {
284             return e;
285         }
286         if ((c = r.read()) != 'g') {
287             return e;
288         }
289
290         c = r.read();
291         while (isXMLSpace((char)c)) {
292             c = r.read();
293         }
294
295         if (c != '=') {
296             return e;
297         }
298
299         while (isXMLSpace((char)(c = r.read())));
300             
301         if (c != '"' && c != '\'') {
302             return e;
303         }
304         sc = (char)c;
305
306         StringBuffer JavaDoc enc = new StringBuffer JavaDoc();
307         for (;;) {
308             c = r.read();
309             if (c == -1) {
310                 return e;
311             }
312             if (c == sc) {
313                 return encodingToJavaEncoding(enc.toString(), e);
314             }
315             enc.append((char)c);
316         }
317     }
318
319     /**
320      * Converts the given standard encoding representation to the
321      * corresponding Java encoding string.
322      * @param e the encoding string to convert.
323      * @param de the encoding string if no corresponding encoding was found.
324      */

325     public static String JavaDoc encodingToJavaEncoding(String JavaDoc e, String JavaDoc de) {
326         String JavaDoc result = EncodingUtilities.javaEncoding(e);
327         return (result == null) ? de : result;
328     }
329 }
330
Popular Tags