1 18 package org.apache.batik.xml; 19 20 import java.io.IOException ; 21 import java.io.InputStream ; 22 import java.io.PushbackInputStream ; 23 import java.io.Reader ; 24 25 import org.apache.batik.util.io.StreamNormalizingReader; 26 import org.apache.batik.util.io.UTF16Decoder; 27 28 35 public class XMLStreamNormalizingReader extends StreamNormalizingReader { 36 37 42 public XMLStreamNormalizingReader(InputStream is, String encod) 43 throws IOException { 44 PushbackInputStream pbis = new PushbackInputStream (is, 128); 45 byte[] buf = new byte[4]; 46 47 int len = pbis.read(buf); 48 if (len > 0) { 49 pbis.unread(buf, 0, len); 50 } 51 52 if (len == 4) { 53 switch (buf[0] & 0x00FF) { 54 case 0: 55 if (buf[1] == 0x003c && buf[2] == 0x0000 && buf[3] == 0x003f) { 56 charDecoder = new UTF16Decoder(pbis, true); 57 return; 58 } 59 break; 60 61 case '<': 62 switch (buf[1] & 0x00FF) { 63 case 0: 64 if (buf[2] == 0x003f && buf[3] == 0x0000) { 65 charDecoder = new UTF16Decoder(pbis, false); 66 return; 67 } 68 break; 69 70 case '?': 71 if (buf[2] == 'x' && buf[3] == 'm') { 72 Reader r = XMLUtilities.createXMLDeclarationReader 73 (pbis, "UTF8"); 74 String enc = XMLUtilities.getXMLDeclarationEncoding 75 (r, "UTF-8"); 76 charDecoder = createCharDecoder(pbis, enc); 77 return; 78 } 79 } 80 break; 81 82 case 0x004C: 83 if (buf[1] == 0x006f && 84 (buf[2] & 0x00FF) == 0x00a7 && 85 (buf[3] & 0x00FF) == 0x0094) { 86 Reader r = XMLUtilities.createXMLDeclarationReader 87 (pbis, "CP037"); 88 String enc = XMLUtilities.getXMLDeclarationEncoding 89 (r, "EBCDIC-CP-US"); 90 charDecoder = createCharDecoder(pbis, enc); 91 return; 92 } 93 break; 94 95 case 0x00FE: 96 if ((buf[1] & 0x00FF) == 0x00FF) { 97 charDecoder = createCharDecoder(pbis, "UTF-16"); 98 return; 99 } 100 break; 101 102 case 0x00FF: 103 if ((buf[1] & 0x00FF) == 0x00FE) { 104 charDecoder = createCharDecoder(pbis, "UTF-16"); 105 return; 106 } 107 } 108 } 109 110 encod = (encod == null) ? "UTF-8" : encod; 111 charDecoder = createCharDecoder(pbis, encod); 112 } 113 } 114 | Popular Tags |