1 19 package org.netbeans.modules.tasklist.providers; 20 21 import org.openide.ErrorManager; 22 23 import java.io.*; 24 25 31 final class XMLEncodingHelper extends Object { 32 33 37 private static final int EXPECTED_PROLOG_LENGTH = 1000; 39 40 45 public static String detectEncoding(InputStream in) throws IOException { 46 47 if (! in.markSupported()) { 48 ErrorManager.getDefault().log("XMLEncodingHelper got unmarkable stream: " + in.getClass()); return null; 50 } 51 52 try { 53 in.mark(EXPECTED_PROLOG_LENGTH); 54 55 byte[] bytes = new byte[EXPECTED_PROLOG_LENGTH]; 56 for (int i = 0; i<bytes.length; i++) { 57 try { 58 int datum = in.read(); 59 if (datum == -1) break; 60 bytes[i] = (byte) datum; 61 } catch (EOFException ex) { 62 } 63 } 64 65 String enc = autoDetectEncoding(bytes); 66 if (enc == null) return null; 67 68 enc = detectDeclaredEncoding(bytes, enc); 69 if (enc == null) return null; 70 71 return enc; 72 } finally { 73 in.reset(); 74 } 75 } 76 77 78 81 static String autoDetectEncoding(byte[] buf) throws IOException { 82 83 84 if (buf.length >= 4) { 85 switch (buf[0]) { 86 case 0: 87 if (buf[1] == (byte)0x3c && buf[2] == (byte)0x00 && buf[3] == (byte)0x3f) { 90 return "UnicodeBigUnmarked"; } 92 break; 94 95 case 0x3c: 96 switch (buf[1]) { 97 101 case 0x00: 103 if (buf [2] == (byte)0x3f && buf [3] == (byte)0x00) { 104 return "UnicodeLittleUnmarked"; } 106 break; 107 108 case '?': 110 if (buf [2] == 'x' && buf [3] == 'm') { return "UTF8"; } 113 break; 114 } 115 break; 116 117 case 0x4c: 119 if (buf[1] == (byte)0x6f && buf[2] == (byte)0xa7 && buf[3] == (byte)0x94) { 120 return "Cp037"; } 122 break; 123 124 case (byte)0xfe: 126 if (buf[1] == (byte)0xff && (buf[2] != 0 || buf[3] != 0)) { 127 return "UnicodeBig"; } 129 break; 130 131 case (byte)0xff: 133 if (buf[1] == (byte)0xfe && (buf[2] != 0 || buf[3] != 0)) { 134 return "UnicodeLittle"; } 136 break; 137 138 case (byte)0xef: 140 if (buf[1] == (byte)0xbb && buf[2] == (byte)0xbf) { 141 return "UTF8"; } 143 break; 144 145 } 146 } 147 148 return null; 149 } 150 151 155 static String detectDeclaredEncoding(byte[] data, String baseEncoding) throws IOException { 156 157 StringBuffer buf = new StringBuffer (); 158 Reader r; 159 char delimiter = '"'; 160 161 r = new InputStreamReader(new ByteArrayInputStream(data), baseEncoding); 162 try { 163 for (int c = r.read(); c != -1; c = r.read()) { 164 buf.append((char)c); 165 } 166 } catch (IOException ex) { 167 } 170 171 String s = buf.toString(); 172 173 int iend = s.indexOf("?>"); 174 iend = iend == -1 ? s.length() : iend; 175 176 int iestart = s.indexOf("encoding"); if (iestart == -1 || iestart > iend) return null; 178 179 char[] chars = s.toCharArray(); 180 181 int i = iestart; 182 183 for (; i<iend; i++) { 184 if (chars[i] == '=') break; 185 } 186 187 for (; i<iend; i++) { 188 if (chars[i] == '\'' || chars[i] == '"') { 189 delimiter = chars[i]; 190 break; 191 } 192 193 } 194 195 i++; 196 197 int ivalstart = i; 198 for (; i<iend; i++) { 199 if (chars[i] == delimiter) { 200 return new String (chars, ivalstart, i - ivalstart); 201 } 202 } 203 204 return null; 205 } 206 207 } 208 | Popular Tags |