1 16 package org.apache.xerces.xinclude; 17 18 import java.io.BufferedInputStream ; 19 import java.io.IOException ; 20 import java.io.InputStream ; 21 import java.io.InputStreamReader ; 22 import java.io.Reader ; 23 import java.net.HttpURLConnection ; 24 import java.net.URL ; 25 import java.net.URLConnection ; 26 import java.util.Iterator ; 27 import java.util.Locale ; 28 import java.util.Map ; 29 30 import org.apache.xerces.impl.XMLEntityManager; 31 import org.apache.xerces.impl.XMLErrorReporter; 32 import org.apache.xerces.impl.io.ASCIIReader; 33 import org.apache.xerces.impl.io.UTF8Reader; 34 import org.apache.xerces.impl.msg.XMLMessageFormatter; 35 import org.apache.xerces.util.EncodingMap; 36 import org.apache.xerces.util.HTTPInputSource; 37 import org.apache.xerces.util.MessageFormatter; 38 import org.apache.xerces.util.XMLChar; 39 import org.apache.xerces.xni.XMLString; 40 import org.apache.xerces.xni.parser.XMLInputSource; 41 42 63 public class XIncludeTextReader { 64 65 private Reader fReader; 66 private XIncludeHandler fHandler; 67 private XMLInputSource fSource; 68 private XMLErrorReporter fErrorReporter; 69 private XMLString fTempString = new XMLString(); 70 71 78 public XIncludeTextReader(XMLInputSource source, XIncludeHandler handler, int bufferSize) 79 throws IOException { 80 fHandler = handler; 81 fSource = source; 82 fTempString = new XMLString(new char[bufferSize + 1], 0, 0); 83 } 84 85 92 public void setErrorReporter(XMLErrorReporter errorReporter) { 93 fErrorReporter = errorReporter; 94 } 95 96 101 protected Reader getReader(XMLInputSource source) throws IOException { 102 if (source.getCharacterStream() != null) { 103 return source.getCharacterStream(); 104 } 105 else { 106 InputStream stream = null; 107 108 String encoding = source.getEncoding(); 109 if (encoding == null) { 110 encoding = "UTF-8"; 111 } 112 if (source.getByteStream() != null) { 113 stream = source.getByteStream(); 114 if (!(stream instanceof BufferedInputStream )) { 116 stream = new BufferedInputStream (stream, fTempString.ch.length); 117 } 118 } 119 else { 120 String expandedSystemId = XMLEntityManager.expandSystemId(source.getSystemId(), source.getBaseSystemId(), false); 121 122 URL url = new URL (expandedSystemId); 123 URLConnection urlCon = url.openConnection(); 124 125 if (urlCon instanceof HttpURLConnection && source instanceof HTTPInputSource) { 127 final HttpURLConnection urlConnection = (HttpURLConnection ) urlCon; 128 final HTTPInputSource httpInputSource = (HTTPInputSource) source; 129 130 Iterator propIter = httpInputSource.getHTTPRequestProperties(); 132 while (propIter.hasNext()) { 133 Map.Entry entry = (Map.Entry ) propIter.next(); 134 urlConnection.setRequestProperty((String ) entry.getKey(), (String ) entry.getValue()); 135 } 136 137 boolean followRedirects = httpInputSource.getFollowHTTPRedirects(); 139 if (!followRedirects) { 140 XMLEntityManager.setInstanceFollowRedirects(urlConnection, followRedirects); 141 } 142 } 143 144 stream = new BufferedInputStream (urlCon.getInputStream()); 146 147 String rawContentType = urlCon.getContentType(); 149 150 int index = (rawContentType != null) ? rawContentType.indexOf(';') : -1; 152 153 String contentType = null; 154 String charset = null; 155 if (index != -1) { 156 contentType = rawContentType.substring(0, index).trim(); 158 159 charset = rawContentType.substring(index + 1).trim(); 162 if (charset.startsWith("charset=")) { 163 charset = charset.substring(8).trim(); 165 if ((charset.charAt(0) == '"' 167 && charset.charAt(charset.length() - 1) == '"') 168 || (charset.charAt(0) == '\'' 169 && charset.charAt(charset.length() - 1) 170 == '\'')) { 171 charset = 172 charset.substring(1, charset.length() - 1); 173 } 174 } 175 else { 176 charset = null; 177 } 178 } 179 else { 180 contentType = rawContentType.trim(); 181 } 182 183 String detectedEncoding = null; 184 191 if (contentType.equals("text/xml")) { 192 if (charset != null) { 193 detectedEncoding = charset; 194 } 195 else { 196 detectedEncoding = "US-ASCII"; 198 } 199 } 200 else if (contentType.equals("application/xml")) { 201 if (charset != null) { 202 detectedEncoding = charset; 203 } 204 else { 205 detectedEncoding = getEncodingName(stream); 207 } 208 } 209 else if (contentType.endsWith("+xml")) { 210 detectedEncoding = getEncodingName(stream); 211 } 212 213 if (detectedEncoding != null) { 214 encoding = detectedEncoding; 215 } 216 } 218 219 encoding = encoding.toUpperCase(Locale.ENGLISH); 220 221 encoding = consumeBOM(stream, encoding); 223 224 if (encoding.equals("UTF-8")) { 229 return new UTF8Reader(stream, 230 fTempString.ch.length, 231 fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), 232 fErrorReporter.getLocale() ); 233 } 234 235 String javaEncoding = EncodingMap.getIANA2JavaMapping(encoding); 237 238 if (javaEncoding == null) { 242 MessageFormatter aFormatter = 243 fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN); 244 Locale aLocale = fErrorReporter.getLocale(); 245 throw new IOException ( aFormatter.formatMessage( aLocale, 246 "EncodingDeclInvalid", 247 new Object [] {encoding} ) ); 248 } 249 else if (javaEncoding.equals("ASCII")) { 250 return new ASCIIReader(stream, 251 fTempString.ch.length, 252 fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), 253 fErrorReporter.getLocale() ); 254 } 255 256 return new InputStreamReader (stream, javaEncoding); 257 } 258 } 259 260 266 protected String getEncodingName(InputStream stream) throws IOException { 267 final byte[] b4 = new byte[4]; 268 String encoding = null; 269 270 stream.mark(4); 273 int count = stream.read(b4, 0, 4); 274 stream.reset(); 275 if (count == 4) { 276 encoding = getEncodingName(b4); 277 } 278 279 return encoding; 280 } 281 282 290 protected String consumeBOM(InputStream stream, String encoding) 291 throws IOException { 292 293 byte[] b = new byte[3]; 294 int count = 0; 295 stream.mark(3); 296 if (encoding.equals("UTF-8")) { 297 count = stream.read(b, 0, 3); 298 if (count == 3) { 299 final int b0 = b[0] & 0xFF; 300 final int b1 = b[1] & 0xFF; 301 final int b2 = b[2] & 0xFF; 302 if (b0 != 0xEF || b1 != 0xBB || b2 != 0xBF) { 303 stream.reset(); 305 } 306 } 307 else { 308 stream.reset(); 309 } 310 } 311 else if (encoding.startsWith("UTF-16")) { 312 count = stream.read(b, 0, 2); 313 if (count == 2) { 314 final int b0 = b[0] & 0xFF; 315 final int b1 = b[1] & 0xFF; 316 if (b0 == 0xFE && b1 == 0xFF) { 317 return "UTF-16BE"; 318 } 319 else if (b0 == 0xFF && b1 == 0xFE) { 320 return "UTF-16LE"; 321 } 322 } 323 stream.reset(); 325 } 326 return encoding; 331 } 332 333 344 protected String getEncodingName(byte[] b4) { 345 346 int b0 = b4[0] & 0xFF; 348 int b1 = b4[1] & 0xFF; 349 if (b0 == 0xFE && b1 == 0xFF) { 350 return "UTF-16BE"; 352 } 353 if (b0 == 0xFF && b1 == 0xFE) { 354 return "UTF-16LE"; 356 } 357 358 int b2 = b4[2] & 0xFF; 360 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 361 return "UTF-8"; 362 } 363 364 int b3 = b4[3] & 0xFF; 366 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 367 return "ISO-10646-UCS-4"; 369 } 370 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 371 return "ISO-10646-UCS-4"; 373 } 374 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 375 return "ISO-10646-UCS-4"; 377 } 378 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 379 return "ISO-10646-UCS-4"; 381 } 382 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 383 return "UTF-16BE"; 386 } 387 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 388 return "UTF-16LE"; 391 } 392 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 393 return "CP037"; 396 } 397 398 return null; 400 401 } 403 410 public void parse() throws IOException { 411 412 fReader = getReader(fSource); 413 fSource = null; 414 int readSize = fReader.read(fTempString.ch, 0, fTempString.ch.length - 1); 415 while (readSize != -1) { 416 for (int i = 0; i < readSize; ++i) { 417 char ch = fTempString.ch[i]; 418 if (!isValid(ch)) { 419 if (XMLChar.isHighSurrogate(ch)) { 420 int ch2; 421 if (++i < readSize) { 423 ch2 = fTempString.ch[i]; 424 } 425 else { 427 ch2 = fReader.read(); 428 if (ch2 != -1) { 429 fTempString.ch[readSize++] = (char) ch2; 430 } 431 } 432 if (XMLChar.isLowSurrogate(ch2)) { 433 int sup = XMLChar.supplemental(ch, (char)ch2); 435 if (!isValid(sup)) { 436 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 437 "InvalidCharInContent", 438 new Object [] { Integer.toString(sup, 16) }, 439 XMLErrorReporter.SEVERITY_FATAL_ERROR); 440 } 441 } 442 else { 443 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 444 "InvalidCharInContent", 445 new Object [] { Integer.toString(ch2, 16) }, 446 XMLErrorReporter.SEVERITY_FATAL_ERROR); 447 } 448 } 449 else { 450 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 451 "InvalidCharInContent", 452 new Object [] { Integer.toString(ch, 16) }, 453 XMLErrorReporter.SEVERITY_FATAL_ERROR); 454 } 455 } 456 } 457 if (fHandler != null && readSize > 0) { 458 fTempString.offset = 0; 459 fTempString.length = readSize; 460 fHandler.characters( 461 fTempString, 462 fHandler.modifyAugmentations(null, true)); 463 } 464 readSize = fReader.read(fTempString.ch, 0, fTempString.ch.length - 1); 465 } 466 467 } 468 469 474 public void setInputSource(XMLInputSource source) { 475 fSource = source; 476 } 477 478 484 public void close() throws IOException { 485 if (fReader != null) { 486 fReader.close(); 487 fReader = null; 488 } 489 } 490 491 497 protected boolean isValid(int ch) { 498 return XMLChar.isValid(ch); 499 } 500 501 507 protected void setBufferSize(int bufferSize) { 508 if (fTempString.ch.length != ++bufferSize) { 509 fTempString.ch = new char[bufferSize]; 510 } 511 } 512 513 } | Popular Tags |