1 18 19 package org.apache.batik.xml; 20 21 import java.io.ByteArrayInputStream ; 22 import java.io.IOException ; 23 import java.io.InputStream ; 24 import java.io.InputStreamReader ; 25 import java.io.PushbackInputStream ; 26 import java.io.Reader ; 27 28 import org.apache.batik.util.EncodingUtilities; 29 30 36 public class XMLUtilities extends XMLCharacters { 37 38 41 protected XMLUtilities() { 42 } 43 44 47 public static boolean isXMLSpace(char c) { 48 return (c <= 0x0020) && 49 (((((1L << 0x0009) | 50 (1L << 0x000A) | 51 (1L << 0x000D) | 52 (1L << 0x0020)) >> c) & 1L) != 0); 53 } 54 55 59 public static boolean isXMLNameFirstCharacter(char c) { 60 return (NAME_FIRST_CHARACTER[c / 32] & (1 << (c % 32))) != 0; 61 } 62 63 66 public static boolean isXMLNameCharacter(char c) { 67 return (NAME_CHARACTER[c / 32] & (1 << (c % 32))) != 0; 68 } 69 70 73 public static boolean isXMLCharacter(int c) { 74 return (c >= 0x10000 && c <= 0x10ffff) || 75 (XML_CHARACTER[c / 32] & (1 << (c % 32))) != 0; 76 } 77 78 81 public static boolean isXMLPublicIdCharacter(char c) { 82 return (c < 128) && 83 (PUBLIC_ID_CHARACTER[c / 32] & (1 << (c % 32))) != 0; 84 } 85 86 89 public static boolean isXMLVersionCharacter(char c) { 90 return (c < 128) && 91 (VERSION_CHARACTER[c / 32] & (1 << (c % 32))) != 0; 92 } 93 94 97 public static boolean isXMLAlphabeticCharacter(char c) { 98 return (c < 128) && 99 (ALPHABETIC_CHARACTER[c / 32] & (1 << (c % 32))) != 0; 100 } 101 102 113 public static Reader createXMLDocumentReader(InputStream is) 114 throws IOException { 115 PushbackInputStream pbis = new PushbackInputStream (is, 128); 116 byte[] buf = new byte[4]; 117 118 int len = pbis.read(buf); 119 if (len > 0) { 120 pbis.unread(buf, 0, len); 121 } 122 123 if (len == 4) { 124 switch (buf[0] & 0x00FF) { 125 case 0: 126 if (buf[1] == 0x003c && buf[2] == 0x0000 && buf[3] == 0x003f) { 127 return new InputStreamReader (pbis, "UnicodeBig"); 128 } 129 break; 130 131 case '<': 132 switch (buf[1] & 0x00FF) { 133 case 0: 134 if (buf[2] == 0x003f && buf[3] == 0x0000) { 135 return new InputStreamReader (pbis, "UnicodeLittle"); 136 } 137 break; 138 139 case '?': 140 if (buf[2] == 'x' && buf[3] == 'm') { 141 Reader r = createXMLDeclarationReader(pbis, "UTF8"); 142 String enc = getXMLDeclarationEncoding(r, "UTF8"); 143 return new InputStreamReader (pbis, enc); 144 } 145 } 146 break; 147 148 case 0x004C: 149 if (buf[1] == 0x006f && 150 (buf[2] & 0x00FF) == 0x00a7 && 151 (buf[3] & 0x00FF) == 0x0094) { 152 Reader r = createXMLDeclarationReader(pbis, "CP037"); 153 String enc = getXMLDeclarationEncoding(r, "CP037"); 154 return new InputStreamReader (pbis, enc); 155 } 156 break; 157 158 case 0x00FE: 159 if ((buf[1] & 0x00FF) == 0x00FF) { 160 return new InputStreamReader (pbis, "Unicode"); 161 } 162 break; 163 164 case 0x00FF: 165 if ((buf[1] & 0x00FF) == 0x00FE) { 166 return new InputStreamReader (pbis, "Unicode"); 167 } 168 } 169 } 170 171 return new InputStreamReader (pbis, "UTF8"); 172 } 173 174 180 protected static Reader createXMLDeclarationReader(PushbackInputStream pbis, 181 String enc) 182 throws IOException { 183 byte[] buf = new byte[128]; 184 int len = pbis.read(buf); 185 186 if (len > 0) { 187 pbis.unread(buf, 0, len); 188 } 189 190 return new InputStreamReader (new ByteArrayInputStream (buf, 4, len), enc); 191 } 192 193 198 protected static String getXMLDeclarationEncoding(Reader r, String e) 199 throws IOException { 200 int c; 201 202 if ((c = r.read()) != 'l') { 203 return e; 204 } 205 206 if (!isXMLSpace((char)(c = r.read()))) { 207 return e; 208 } 209 210 while (isXMLSpace((char)(c = r.read()))); 211 212 if (c != 'v') { 213 return e; 214 } 215 if ((c = r.read()) != 'e') { 216 return e; 217 } 218 if ((c = r.read()) != 'r') { 219 return e; 220 } 221 if ((c = r.read()) != 's') { 222 return e; 223 } 224 if ((c = r.read()) != 'i') { 225 return e; 226 } 227 if ((c = r.read()) != 'o') { 228 return e; 229 } 230 if ((c = r.read()) != 'n') { 231 return e; 232 } 233 234 c = r.read(); 235 while (isXMLSpace((char)c)) { 236 c = r.read(); 237 } 238 239 if (c != '=') { 240 return e; 241 } 242 243 while (isXMLSpace((char)(c = r.read()))); 244 245 if (c != '"' && c != '\'') { 246 return e; 247 } 248 char sc = (char)c; 249 250 for (;;) { 251 c = r.read(); 252 if (c == sc) { 253 break; 254 } 255 if (!isXMLVersionCharacter((char)c)) { 256 return e; 257 } 258 } 259 260 if (!isXMLSpace((char)(c = r.read()))) { 261 return e; 262 } 263 while (isXMLSpace((char)(c = r.read()))); 264 265 if (c != 'e') { 266 return e; 267 } 268 if ((c = r.read()) != 'n') { 269 return e; 270 } 271 if ((c = r.read()) != 'c') { 272 return e; 273 } 274 if ((c = r.read()) != 'o') { 275 return e; 276 } 277 if ((c = r.read()) != 'd') { 278 return e; 279 } 280 if ((c = r.read()) != 'i') { 281 return e; 282 } 283 if ((c = r.read()) != 'n') { 284 return e; 285 } 286 if ((c = r.read()) != 'g') { 287 return e; 288 } 289 290 c = r.read(); 291 while (isXMLSpace((char)c)) { 292 c = r.read(); 293 } 294 295 if (c != '=') { 296 return e; 297 } 298 299 while (isXMLSpace((char)(c = r.read()))); 300 301 if (c != '"' && c != '\'') { 302 return e; 303 } 304 sc = (char)c; 305 306 StringBuffer enc = new StringBuffer (); 307 for (;;) { 308 c = r.read(); 309 if (c == -1) { 310 return e; 311 } 312 if (c == sc) { 313 return encodingToJavaEncoding(enc.toString(), e); 314 } 315 enc.append((char)c); 316 } 317 } 318 319 325 public static String encodingToJavaEncoding(String e, String de) { 326 String result = EncodingUtilities.javaEncoding(e); 327 return (result == null) ? de : result; 328 } 329 } 330 | Popular Tags |