1 19 package org.netbeans.modules.diff; 20 21 import org.openide.ErrorManager; 22 23 import java.io.*; 24 25 33 final class XMLEncodingHelper extends Object { 34 35 39 private static final int EXPECTED_PROLOG_LENGTH = 1000; 41 42 47 public static String detectEncoding(InputStream in) throws IOException { 48 49 if (! in.markSupported()) { 50 ErrorManager.getDefault().log("XMLEncodingHelper got unmarkable stream: " + in.getClass()); return null; 52 } 53 54 try { 55 in.mark(EXPECTED_PROLOG_LENGTH); 56 57 byte[] bytes = new byte[EXPECTED_PROLOG_LENGTH]; 58 for (int i = 0; i<bytes.length; i++) { 59 try { 60 int datum = in.read(); 61 if (datum == -1) break; 62 bytes[i] = (byte) datum; 63 } catch (EOFException ex) { 64 } 65 } 66 67 String enc = autoDetectEncoding(bytes); 68 if (enc == null) return null; 69 70 enc = detectDeclaredEncoding(bytes, enc); 71 if (enc == null) return null; 72 73 return enc; 74 } finally { 75 in.reset(); 76 } 77 } 78 79 80 83 static String autoDetectEncoding(byte[] buf) throws IOException { 84 85 86 if (buf.length >= 4) { 87 switch (buf[0]) { 88 case 0: 89 if (buf[1] == (byte)0x3c && buf[2] == (byte)0x00 && buf[3] == (byte)0x3f) { 92 return "UnicodeBigUnmarked"; } 94 break; 96 97 case 0x3c: 98 switch (buf[1]) { 99 103 case 0x00: 105 if (buf [2] == (byte)0x3f && buf [3] == (byte)0x00) { 106 return "UnicodeLittleUnmarked"; } 108 break; 109 110 case '?': 112 if (buf [2] == 'x' && buf [3] == 'm') { return "UTF8"; } 115 break; 116 } 117 break; 118 119 case 0x4c: 121 if (buf[1] == (byte)0x6f && buf[2] == (byte)0xa7 && buf[3] == (byte)0x94) { 122 return "Cp037"; } 124 break; 125 126 case (byte)0xfe: 128 if (buf[1] == (byte)0xff && (buf[2] != 0 || buf[3] != 0)) { 129 return "UnicodeBig"; } 131 break; 132 133 case (byte)0xff: 135 if (buf[1] == (byte)0xfe && (buf[2] != 0 || buf[3] != 0)) { 136 return "UnicodeLittle"; } 138 break; 139 140 case (byte)0xef: 142 if (buf[1] == (byte)0xbb && buf[2] == (byte)0xbf) { 143 return "UTF8"; } 145 break; 146 147 } 148 } 149 150 return null; 151 } 152 153 157 static String detectDeclaredEncoding(byte[] data, String baseEncoding) throws IOException { 158 159 StringBuffer buf = new StringBuffer (); 160 Reader r; 161 char delimiter = '"'; 162 163 r = new InputStreamReader(new ByteArrayInputStream(data), baseEncoding); 164 try { 165 for (int c = r.read(); c != -1; c = r.read()) { 166 buf.append((char)c); 167 } 168 } catch (IOException ex) { 169 } 172 173 String s = buf.toString(); 174 175 int iend = s.indexOf("?>"); 176 iend = iend == -1 ? s.length() : iend; 177 178 int iestart = s.indexOf("encoding"); if (iestart == -1 || iestart > iend) return null; 180 181 char[] chars = s.toCharArray(); 182 183 int i = iestart; 184 185 for (; i<iend; i++) { 186 if (chars[i] == '=') break; 187 } 188 189 for (; i<iend; i++) { 190 if (chars[i] == '\'' || chars[i] == '"') { 191 delimiter = chars[i]; 192 break; 193 } 194 195 } 196 197 i++; 198 199 int ivalstart = i; 200 for (; i<iend; i++) { 201 if (chars[i] == delimiter) { 202 return new String (chars, ivalstart, i - ivalstart); 203 } 204 } 205 206 return null; 207 } 208 209 } 210 | Popular Tags |