1 19 package org.netbeans.modules.xml.core.lib; 20 21 import java.io.*; 22 import javax.swing.text.*; 23 24 30 public class EncodingHelper extends Object { 31 32 private static final int EXPECTED_PROLOG_LENGTH = 1000; 34 35 40 public static String detectEncoding(InputStream in) throws IOException { 41 42 if (! in.markSupported()) { 43 if ( Util.THIS.isLoggable() ) Util.THIS.debug("EncodingHelper got unmarkable stream: " + in.getClass()); return null; 45 } 46 47 try { 48 in.mark(EXPECTED_PROLOG_LENGTH); 49 50 byte[] bytes = new byte[EXPECTED_PROLOG_LENGTH]; 51 for (int i = 0; i<bytes.length; i++) { 52 try { 53 int datum = in.read(); 54 if (datum == -1) break; 55 bytes[i] = (byte) datum; 56 } catch (EOFException ex) { 57 } 58 } 59 60 String enc = autoDetectEncoding(bytes); 61 if (enc == null) return null; 62 63 enc = detectDeclaredEncoding(bytes, enc); 64 if (enc == null) return null; 65 66 return Convertors.iana2java (enc); 67 } finally { 68 in.reset(); 69 } 70 } 71 72 73 76 static String autoDetectEncoding(byte[] buf) throws IOException { 77 78 79 if (buf.length >= 4) { 80 switch (buf[0]) { 81 case 0: 82 if (buf[1] == (byte)0x3c && buf[2] == (byte)0x00 && buf[3] == (byte)0x3f) { 85 return "UnicodeBigUnmarked"; 86 } 87 break; 89 90 case 0x3c: 91 switch (buf[1]) { 92 96 case 0x00: 98 if (buf [2] == (byte)0x3f && buf [3] == (byte)0x00) { 99 return "UnicodeLittleUnmarked"; 100 } 101 break; 102 103 case '?': 105 if (buf [2] == 'x' && buf [3] == 'm') { 106 return "UTF8"; } 108 break; 109 } 110 break; 111 112 case 0x4c: 114 if (buf[1] == (byte)0x6f && buf[2] == (byte)0xa7 && buf[3] == (byte)0x94) { 115 return "Cp037"; } 117 break; 118 119 case (byte)0xfe: 121 if (buf[1] == (byte)0xff && (buf[2] != 0 || buf[3] != 0)) { 122 return "UnicodeBig"; } 124 break; 125 126 case (byte)0xff: 128 if (buf[1] == (byte)0xfe && (buf[2] != 0 || buf[3] != 0)) { 129 return "UnicodeLittle"; } 131 break; 132 133 case (byte)0xef: 135 if (buf[1] == (byte)0xbb && buf[2] == (byte)0xbf) { 136 return "UTF8"; } 138 break; 139 140 } 141 } 142 143 return null; 144 } 145 146 150 static String detectDeclaredEncoding(byte[] data, String baseEncoding) throws IOException { 151 152 StringBuffer buf = new StringBuffer (); 153 Reader r; 154 char delimiter = '"'; 155 156 r = new InputStreamReader(new ByteArrayInputStream(data), baseEncoding); 157 try { 158 for (int c = r.read(); c != -1; c = r.read()) { 159 buf.append((char)c); 160 } 161 } catch (IOException ex) { 162 } 165 166 String s = buf.toString(); 167 168 int iend = s.indexOf("?>"); 169 iend = iend == -1 ? s.length() : iend; 170 171 int iestart = s.indexOf("encoding"); 172 if (iestart == -1 || iestart > iend) return null; 173 174 char[] chars = s.toCharArray(); 175 176 int i = iestart; 177 178 for (; i<iend; i++) { 179 if (chars[i] == '=') break; 180 } 181 182 for (; i<iend; i++) { 183 if (chars[i] == '\'' || chars[i] == '"') { 184 delimiter = chars[i]; 185 break; 186 } 187 188 } 189 190 i++; 191 192 int ivalstart = i; 193 for (; i<iend; i++) { 194 if (chars[i] == delimiter) { 195 return new String (chars, ivalstart, i - ivalstart); 196 } 197 } 198 199 return null; 200 } 201 202 205 static String parseMIMECharSet(String mime) { 206 207 final String CHARSET = "charset"; 208 209 if (mime != null) { 210 int i; 211 212 mime = mime.toLowerCase (); 213 i = mime.indexOf (';'); 214 if (i != -1) { 215 String attributes; 216 217 attributes = mime.substring (i + 1); 218 mime = mime.substring (0, i); 219 220 i = attributes.indexOf (CHARSET); if (i != -1) { 223 attributes = attributes.substring (i + CHARSET.length()); 224 if ((i = attributes.indexOf (';')) != -1) 226 attributes = attributes.substring (0, i); 227 if ((i = attributes.indexOf ('=')) != -1) { 229 attributes = attributes.substring (i + 1); 230 if ((i = attributes.indexOf ('(')) != -1) 232 attributes = attributes.substring (0, i); 233 if ((i = attributes.indexOf ('"')) != -1) { 235 attributes = attributes.substring (i + 1); 236 attributes = attributes.substring (0, 237 attributes.indexOf ('"')); 238 } 239 return attributes.trim(); 240 } 242 } 243 } 244 } 245 246 return null; 247 } 248 249 250 251 255 public static String detectEncoding(Document doc) throws IOException { 256 257 if (doc == null) return null; 258 259 try { 260 261 String text = doc.getText(0, 262 doc.getLength() > EXPECTED_PROLOG_LENGTH ? 263 EXPECTED_PROLOG_LENGTH : doc.getLength() 264 ); 265 InputStream in = new ByteArrayInputStream(text.getBytes()); 266 return detectEncoding(in); 267 268 } catch (BadLocationException ex) { 269 throw new RuntimeException (ex.toString()); 270 } 271 272 } 273 274 } 275 | Popular Tags |