1 57 58 package com.sun.org.apache.xerces.internal.xinclude; 59 60 import java.io.BufferedInputStream ; 61 import java.io.IOException ; 62 import java.io.InputStream ; 63 import java.io.InputStreamReader ; 64 import java.io.Reader ; 65 import java.net.HttpURLConnection ; 66 import java.net.URL ; 67 import java.net.URLConnection ; 68 import java.util.Locale ; 69 70 import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 71 import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 72 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 73 import com.sun.org.apache.xerces.internal.impl.XMLEntityManager; 74 import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter; 75 import com.sun.org.apache.xerces.internal.util.EncodingMap; 76 import com.sun.org.apache.xerces.internal.util.MessageFormatter; 77 import com.sun.org.apache.xerces.internal.util.XMLChar; 78 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 79 import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 80 81 101 public class XIncludeTextReader { 102 103 private Reader fReader; 104 private XIncludeHandler fHandler; 105 private XMLInputSource fSource; 106 private XMLErrorReporter fErrorReporter; 107 108 private String fAccept; 110 private String fAcceptLanguage; 111 112 118 public XIncludeTextReader(XMLInputSource source, XIncludeHandler handler) 119 throws IOException { 120 fHandler = handler; 121 fSource = source; 122 } 123 124 131 public void setErrorReporter(XMLErrorReporter errorReporter) { 132 fErrorReporter = errorReporter; 133 } 134 135 141 public void setHttpProperties(String accept, String acceptLanguage) { 142 fAccept = accept; 143 fAcceptLanguage = acceptLanguage; 144 } 145 146 151 protected Reader getReader(XMLInputSource source) throws IOException { 152 if (source.getCharacterStream() != null) { 153 return source.getCharacterStream(); 154 } 155 else { 156 InputStream stream = null; 157 158 String encoding = source.getEncoding(); 159 if (encoding == null) { 160 encoding = "UTF-8"; 161 } 162 if (source.getByteStream() != null) { 163 stream = source.getByteStream(); 164 if (!(stream instanceof BufferedInputStream )) { 166 stream = new BufferedInputStream (stream); 167 } 168 } 169 else { 170 String expandedSystemId = XMLEntityManager.expandSystemId(source.getSystemId(), source.getBaseSystemId(), false); 171 172 URL url = new URL (expandedSystemId); 173 URLConnection urlCon = url.openConnection(); 174 175 if (urlCon instanceof HttpURLConnection ) { 178 if( fAccept != null && fAccept.length() > 0) { 179 urlCon.setRequestProperty(XIncludeHandler.HTTP_ACCEPT, fAccept); 180 } 181 if( fAcceptLanguage != null && fAcceptLanguage.length() > 0) { 182 urlCon.setRequestProperty(XIncludeHandler.HTTP_ACCEPT_LANGUAGE, fAcceptLanguage); 183 } 184 } 185 186 stream = new BufferedInputStream (urlCon.getInputStream()); 188 189 String rawContentType = urlCon.getContentType(); 191 192 int index = (rawContentType != null) ? rawContentType.indexOf(';') : -1; 194 195 String contentType = null; 196 String charset = null; 197 if (index != -1) { 198 contentType = rawContentType.substring(0, index).trim(); 200 201 charset = rawContentType.substring(index + 1).trim(); 204 if (charset.startsWith("charset=")) { 205 charset = charset.substring(8).trim(); 207 if ((charset.charAt(0) == '"' 209 && charset.charAt(charset.length() - 1) == '"') 210 || (charset.charAt(0) == '\'' 211 && charset.charAt(charset.length() - 1) 212 == '\'')) { 213 charset = 214 charset.substring(1, charset.length() - 1); 215 } 216 } 217 else { 218 charset = null; 219 } 220 } 221 else { 222 contentType = rawContentType.trim(); 223 } 224 225 String detectedEncoding = null; 226 233 if (contentType.equals("text/xml")) { 234 if (charset != null) { 235 detectedEncoding = charset; 236 } 237 else { 238 detectedEncoding = "US-ASCII"; 240 } 241 } 242 else if (contentType.equals("application/xml")) { 243 if (charset != null) { 244 detectedEncoding = charset; 245 } 246 else { 247 detectedEncoding = getEncodingName(stream); 249 } 250 } 251 else if (contentType.endsWith("+xml")) { 252 detectedEncoding = getEncodingName(stream); 253 } 254 255 if (detectedEncoding != null) { 256 encoding = detectedEncoding; 257 } 258 } 260 261 encoding = encoding.toUpperCase(Locale.ENGLISH); 262 263 consumeBOM(stream, encoding); 265 266 if (encoding.equals("UTF-8")) { 271 return new UTF8Reader(stream, 272 XMLEntityManager.DEFAULT_BUFFER_SIZE, 273 fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), 274 fErrorReporter.getLocale() ); 275 } 276 277 String javaEncoding = EncodingMap.getIANA2JavaMapping(encoding); 279 280 if (javaEncoding == null) { 284 MessageFormatter aFormatter = 285 fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN); 286 Locale aLocale = fErrorReporter.getLocale(); 287 throw new IOException ( aFormatter.formatMessage( aLocale, 288 "EncodingDeclInvalid", 289 new Object [] {encoding} ) ); 290 } 291 else if (javaEncoding.equals("ASCII")) { 292 return new ASCIIReader(stream, 293 XMLEntityManager.DEFAULT_BUFFER_SIZE, 294 fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), 295 fErrorReporter.getLocale() ); 296 } 297 298 return new InputStreamReader (stream, javaEncoding); 299 } 300 } 301 302 308 protected String getEncodingName(InputStream stream) throws IOException { 309 final byte[] b4 = new byte[4]; 310 String encoding = null; 311 312 stream.mark(4); 315 int count = stream.read(b4, 0, 4); 316 stream.reset(); 317 if (count == 4) { 318 encoding = getEncodingName(b4); 319 } 320 321 return encoding; 322 } 323 324 330 protected void consumeBOM(InputStream stream, String encoding) 331 throws IOException { 332 333 byte[] b = new byte[3]; 334 int count = 0; 335 stream.mark(3); 336 if (encoding.equals("UTF-8")) { 337 count = stream.read(b, 0, 3); 338 if (count == 3) { 339 int b0 = b[0] & 0xFF; 340 int b1 = b[1] & 0xFF; 341 int b2 = b[2] & 0xFF; 342 if (b0 != 0xEF || b1 != 0xBB || b2 != 0xBF) { 343 stream.reset(); 345 } 346 } 347 else { 348 stream.reset(); 349 } 350 } 351 else if (encoding.startsWith("UTF-16")) { 352 count = stream.read(b, 0, 2); 353 if (count == 2) { 354 int b0 = b[0] & 0xFF; 355 int b1 = b[1] & 0xFF; 356 if ((b0 != 0xFE || b1 != 0xFF) 357 && (b0 != 0xFF || b1 != 0xFE)) { 358 stream.reset(); 360 } 361 } 362 else { 363 stream.reset(); 364 } 365 } 366 } 371 372 383 protected String getEncodingName(byte[] b4) { 384 385 int b0 = b4[0] & 0xFF; 387 int b1 = b4[1] & 0xFF; 388 if (b0 == 0xFE && b1 == 0xFF) { 389 return "UTF-16BE"; 391 } 392 if (b0 == 0xFF && b1 == 0xFE) { 393 return "UTF-16LE"; 395 } 396 397 int b2 = b4[2] & 0xFF; 399 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 400 return "UTF-8"; 401 } 402 403 int b3 = b4[3] & 0xFF; 405 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 406 return "ISO-10646-UCS-4"; 408 } 409 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 410 return "ISO-10646-UCS-4"; 412 } 413 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 414 return "ISO-10646-UCS-4"; 416 } 417 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 418 return "ISO-10646-UCS-4"; 420 } 421 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 422 return "UTF-16BE"; 425 } 426 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 427 return "UTF-16LE"; 430 } 431 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 432 return "CP037"; 435 } 436 437 return null; 439 440 } 442 449 public void parse() throws IOException { 450 XMLStringBuffer buffer = new XMLStringBuffer(); 457 fReader = getReader(fSource); 458 int ch; 459 while((ch = fReader.read()) != -1) { 460 if (isValid(ch)) { 461 buffer.append((char)ch); 462 } 463 else if (XMLChar.isHighSurrogate(ch)) { 464 int ch2 = fReader.read(); 465 if (XMLChar.isLowSurrogate(ch2)) { 466 467 int sup = XMLChar.supplemental((char)ch, (char)ch2); 469 470 if (!isValid(sup)) { 472 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 473 "InvalidCharInContent", 474 new Object [] { Integer.toString(sup, 16) }, 475 XMLErrorReporter.SEVERITY_FATAL_ERROR); 476 continue; 477 } 478 buffer.append((char) ch); 479 buffer.append((char) ch2); 480 } 481 else { 482 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 483 "InvalidCharInContent", 484 new Object [] { Integer.toString(ch, 16) }, 485 XMLErrorReporter.SEVERITY_FATAL_ERROR); 486 } 487 } 488 else { 489 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 490 "InvalidCharInContent", 491 new Object [] { Integer.toString(ch, 16) }, 492 XMLErrorReporter.SEVERITY_FATAL_ERROR); 493 } 494 } 495 if (fHandler != null && buffer.length > 0) { 496 fHandler.characters( 497 buffer, 498 fHandler.modifyAugmentations(null, true)); 499 } 500 } 501 502 508 public void close() throws IOException { 509 if (fReader != null) { 510 fReader.close(); 511 } 512 } 513 514 520 protected boolean isValid(int ch) { 521 return XMLChar.isValid(ch); 522 } 523 } 524 | Popular Tags |