|                                                                                                              1
 57
 58  package com.sun.org.apache.xerces.internal.xinclude;
 59
 60  import java.io.BufferedInputStream
  ; 61  import java.io.IOException
  ; 62  import java.io.InputStream
  ; 63  import java.io.InputStreamReader
  ; 64  import java.io.Reader
  ; 65  import java.net.HttpURLConnection
  ; 66  import java.net.URL
  ; 67  import java.net.URLConnection
  ; 68  import java.util.Locale
  ; 69
 70  import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
 71  import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
 72  import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
 73  import com.sun.org.apache.xerces.internal.impl.XMLEntityManager;
 74  import com.sun.org.apache.xerces.internal.impl.XMLErrorReporter;
 75  import com.sun.org.apache.xerces.internal.util.EncodingMap;
 76  import com.sun.org.apache.xerces.internal.util.MessageFormatter;
 77  import com.sun.org.apache.xerces.internal.util.XMLChar;
 78  import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
 79  import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource;
 80
 81
 101 public class XIncludeTextReader {
 102
 103     private Reader
  fReader; 104     private XIncludeHandler fHandler;
 105     private XMLInputSource fSource;
 106     private XMLErrorReporter fErrorReporter;
 107
 108         private String
  fAccept; 110     private String
  fAcceptLanguage; 111
 112
 118     public XIncludeTextReader(XMLInputSource source, XIncludeHandler handler)
 119         throws IOException
  { 120         fHandler = handler;
 121         fSource = source;
 122     }
 123
 124
 131     public void setErrorReporter(XMLErrorReporter errorReporter) {
 132         fErrorReporter = errorReporter;
 133     }
 134
 135
 141     public void setHttpProperties(String
  accept, String  acceptLanguage) { 142         fAccept = accept;
 143         fAcceptLanguage = acceptLanguage;
 144     }
 145
 146
 151     protected Reader
  getReader(XMLInputSource source) throws IOException  { 152         if (source.getCharacterStream() != null) {
 153             return source.getCharacterStream();
 154         }
 155         else {
 156             InputStream
  stream = null; 157
 158             String
  encoding = source.getEncoding(); 159             if (encoding == null) {
 160                 encoding = "UTF-8";
 161             }
 162             if (source.getByteStream() != null) {
 163                 stream = source.getByteStream();
 164                                 if (!(stream instanceof BufferedInputStream
  )) { 166                     stream = new BufferedInputStream
  (stream); 167                 }
 168             }
 169             else {
 170                 String
  expandedSystemId = XMLEntityManager.expandSystemId(source.getSystemId(), source.getBaseSystemId(), false); 171
 172                 URL
  url = new URL  (expandedSystemId); 173                 URLConnection
  urlCon = url.openConnection(); 174
 175                                                 if (urlCon instanceof HttpURLConnection
  ) { 178                     if( fAccept != null && fAccept.length() > 0) {
 179                         urlCon.setRequestProperty(XIncludeHandler.HTTP_ACCEPT, fAccept);
 180                     }
 181                     if( fAcceptLanguage != null && fAcceptLanguage.length() > 0) {
 182                         urlCon.setRequestProperty(XIncludeHandler.HTTP_ACCEPT_LANGUAGE, fAcceptLanguage);
 183                     }
 184                 }
 185
 186                                 stream = new BufferedInputStream
  (urlCon.getInputStream()); 188
 189                                 String
  rawContentType = urlCon.getContentType(); 191
 192                                 int index = (rawContentType != null) ? rawContentType.indexOf(';') : -1;
 194
 195                 String
  contentType = null; 196                 String
  charset = null; 197                 if (index != -1) {
 198                                         contentType = rawContentType.substring(0, index).trim();
 200
 201                                                             charset = rawContentType.substring(index + 1).trim();
 204                     if (charset.startsWith("charset=")) {
 205                                                 charset = charset.substring(8).trim();
 207                                                 if ((charset.charAt(0) == '"'
 209                             && charset.charAt(charset.length() - 1) == '"')
 210                             || (charset.charAt(0) == '\''
 211                                 && charset.charAt(charset.length() - 1)
 212                                     == '\'')) {
 213                             charset =
 214                                 charset.substring(1, charset.length() - 1);
 215                         }
 216                     }
 217                     else {
 218                         charset = null;
 219                     }
 220                 }
 221                 else {
 222                     contentType = rawContentType.trim();
 223                 }
 224
 225                 String
  detectedEncoding = null; 226
 233                 if (contentType.equals("text/xml")) {
 234                     if (charset != null) {
 235                         detectedEncoding = charset;
 236                     }
 237                     else {
 238                                                 detectedEncoding = "US-ASCII";
 240                     }
 241                 }
 242                 else if (contentType.equals("application/xml")) {
 243                     if (charset != null) {
 244                         detectedEncoding = charset;
 245                     }
 246                     else {
 247                                                 detectedEncoding = getEncodingName(stream);
 249                     }
 250                 }
 251                 else if (contentType.endsWith("+xml")) {
 252                     detectedEncoding = getEncodingName(stream);
 253                 }
 254
 255                 if (detectedEncoding != null) {
 256                     encoding = detectedEncoding;
 257                 }
 258                             }
 260
 261             encoding = encoding.toUpperCase(Locale.ENGLISH);
 262
 263                         consumeBOM(stream, encoding);
 265
 266                                                             if (encoding.equals("UTF-8")) {
 271                 return new UTF8Reader(stream,
 272                     XMLEntityManager.DEFAULT_BUFFER_SIZE,
 273                     fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN),
 274                     fErrorReporter.getLocale() );
 275             }
 276
 277                         String
  javaEncoding = EncodingMap.getIANA2JavaMapping(encoding); 279
 280                                                 if (javaEncoding == null) {
 284                 MessageFormatter aFormatter =
 285                     fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN);
 286                 Locale
  aLocale = fErrorReporter.getLocale(); 287                 throw new IOException
  ( aFormatter.formatMessage( aLocale, 288                     "EncodingDeclInvalid",
 289                     new Object
  [] {encoding} ) ); 290             }
 291             else if (javaEncoding.equals("ASCII")) {
 292                 return new ASCIIReader(stream,
 293                     XMLEntityManager.DEFAULT_BUFFER_SIZE,
 294                     fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN),
 295                     fErrorReporter.getLocale() );
 296             }
 297
 298             return new InputStreamReader
  (stream, javaEncoding); 299         }
 300     }
 301
 302
 308     protected String
  getEncodingName(InputStream  stream) throws IOException  { 309         final byte[] b4 = new byte[4];
 310         String
  encoding = null; 311
 312                         stream.mark(4);
 315         int count = stream.read(b4, 0, 4);
 316         stream.reset();
 317         if (count == 4) {
 318             encoding = getEncodingName(b4);
 319         }
 320
 321         return encoding;
 322     }
 323
 324
 330     protected void consumeBOM(InputStream
  stream, String  encoding) 331         throws IOException
  { 332
 333         byte[] b = new byte[3];
 334         int count = 0;
 335         stream.mark(3);
 336         if (encoding.equals("UTF-8")) {
 337             count = stream.read(b, 0, 3);
 338             if (count == 3) {
 339                 int b0 = b[0] & 0xFF;
 340                 int b1 = b[1] & 0xFF;
 341                 int b2 = b[2] & 0xFF;
 342                 if (b0 != 0xEF || b1 != 0xBB || b2 != 0xBF) {
 343                                         stream.reset();
 345                 }
 346             }
 347             else {
 348                 stream.reset();
 349             }
 350         }
 351         else if (encoding.startsWith("UTF-16")) {
 352             count = stream.read(b, 0, 2);
 353             if (count == 2) {
 354                 int b0 = b[0] & 0xFF;
 355                 int b1 = b[1] & 0xFF;
 356                 if ((b0 != 0xFE || b1 != 0xFF)
 357                     && (b0 != 0xFF || b1 != 0xFE)) {
 358                                         stream.reset();
 360                 }
 361             }
 362             else {
 363                 stream.reset();
 364             }
 365         }
 366                                     }
 371
 372
 383     protected String
  getEncodingName(byte[] b4) { 384
 385                 int b0 = b4[0] & 0xFF;
 387         int b1 = b4[1] & 0xFF;
 388         if (b0 == 0xFE && b1 == 0xFF) {
 389                         return "UTF-16BE";
 391         }
 392         if (b0 == 0xFF && b1 == 0xFE) {
 393                         return "UTF-16LE";
 395         }
 396
 397                 int b2 = b4[2] & 0xFF;
 399         if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
 400             return "UTF-8";
 401         }
 402
 403                 int b3 = b4[3] & 0xFF;
 405         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
 406                         return "ISO-10646-UCS-4";
 408         }
 409         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
 410                         return "ISO-10646-UCS-4";
 412         }
 413         if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
 414                         return "ISO-10646-UCS-4";
 416         }
 417         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
 418                         return "ISO-10646-UCS-4";
 420         }
 421         if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
 422                                     return "UTF-16BE";
 425         }
 426         if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
 427                                     return "UTF-16LE";
 430         }
 431         if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
 432                                     return "CP037";
 435         }
 436
 437                 return null;
 439
 440     }
 442
 449     public void parse() throws IOException
  { 450                                                         XMLStringBuffer buffer = new XMLStringBuffer();
 457         fReader = getReader(fSource);
 458         int ch;
 459         while((ch = fReader.read()) != -1) {
 460             if (isValid(ch)) {
 461                 buffer.append((char)ch);
 462             }
 463             else if (XMLChar.isHighSurrogate(ch)) {
 464                 int ch2 = fReader.read();
 465                 if (XMLChar.isLowSurrogate(ch2)) {
 466
 467                                         int sup = XMLChar.supplemental((char)ch, (char)ch2);
 469
 470                                         if (!isValid(sup)) {
 472                         fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 473                                                    "InvalidCharInContent",
 474                                                    new Object
  [] { Integer.toString(sup, 16) }, 475                                                    XMLErrorReporter.SEVERITY_FATAL_ERROR);
 476                         continue;
 477                     }
 478                     buffer.append((char) ch);
 479                     buffer.append((char) ch2);
 480                 }
 481                 else {
 482                     fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 483                                                "InvalidCharInContent",
 484                                                new Object
  [] { Integer.toString(ch, 16) }, 485                                                XMLErrorReporter.SEVERITY_FATAL_ERROR);
 486                 }
 487             }
 488             else {
 489                 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
 490                                            "InvalidCharInContent",
 491                                            new Object
  [] { Integer.toString(ch, 16) }, 492                                            XMLErrorReporter.SEVERITY_FATAL_ERROR);
 493             }
 494         }
 495         if (fHandler != null && buffer.length > 0) {
 496             fHandler.characters(
 497                 buffer,
 498                 fHandler.modifyAugmentations(null, true));
 499         }
 500     }
 501
 502
 508     public void close() throws IOException
  { 509         if (fReader != null) {
 510             fReader.close();
 511         }
 512     }
 513
 514
 520     protected boolean isValid(int ch) {
 521         return XMLChar.isValid(ch);
 522     }
 523 }
 524
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |