1 19 package org.netbeans.modules.xml.schema.core; 20 21 import java.io.ByteArrayInputStream ; 22 import java.io.EOFException ; 23 import java.io.IOException ; 24 import java.io.InputStream ; 25 import java.io.InputStreamReader ; 26 import java.io.Reader ; 27 import javax.swing.text.BadLocationException ; 28 import javax.swing.text.Document ; 29 30 36 public class EncodingHelper extends Object { 37 38 private static final int EXPECTED_PROLOG_LENGTH = 1000; 40 41 46 public static String detectEncoding(InputStream in) throws IOException { 47 48 if (! in.markSupported()) { 49 return null; 50 } 51 52 try { 53 in.mark(EXPECTED_PROLOG_LENGTH); 54 55 byte[] bytes = new byte[EXPECTED_PROLOG_LENGTH]; 56 for (int i = 0; i<bytes.length; i++) { 57 try { 58 int datum = in.read(); 59 if (datum == -1) break; 60 bytes[i] = (byte) datum; 61 } catch (EOFException ex) { 62 } 63 } 64 65 String enc = autoDetectEncoding(bytes); 66 if (enc == null) return null; 67 68 enc = detectDeclaredEncoding(bytes, enc); 69 if (enc == null) return null; 70 71 return Convertors.iana2java (enc); 72 } finally { 73 in.reset(); 74 } 75 } 76 77 78 81 static String autoDetectEncoding(byte[] buf) throws IOException { 82 83 84 if (buf.length >= 4) { 85 switch (buf[0]) { 86 case 0: 87 if (buf[1] == (byte)0x3c && buf[2] == (byte)0x00 && buf[3] == (byte)0x3f) { 90 return "UnicodeBigUnmarked"; 91 } 92 break; 94 95 case 0x3c: 96 switch (buf[1]) { 97 101 case 0x00: 103 if (buf [2] == (byte)0x3f && buf [3] == (byte)0x00) { 104 return "UnicodeLittleUnmarked"; 105 } 106 break; 107 108 case '?': 110 if (buf [2] == 'x' && buf [3] == 'm') { 111 return "UTF8"; } 113 break; 114 } 115 break; 116 117 case 0x4c: 119 if (buf[1] == (byte)0x6f && buf[2] == (byte)0xa7 && buf[3] == (byte)0x94) { 120 return "Cp037"; } 122 break; 123 124 case (byte)0xfe: 126 if (buf[1] == (byte)0xff && (buf[2] != 0 || buf[3] != 0)) { 127 return "UnicodeBig"; } 129 break; 130 131 case (byte)0xff: 133 if (buf[1] == (byte)0xfe && (buf[2] != 0 || buf[3] != 0)) { 134 return "UnicodeLittle"; } 136 break; 137 138 case (byte)0xef: 140 if (buf[1] == (byte)0xbb && buf[2] == (byte)0xbf) { 141 return "UTF8"; } 143 break; 144 145 } 146 } 147 148 return null; 149 } 150 151 155 static String detectDeclaredEncoding(byte[] data, String baseEncoding) throws IOException { 156 157 StringBuffer buf = new StringBuffer (); 158 Reader r; 159 char delimiter = '"'; 160 161 r = new InputStreamReader (new ByteArrayInputStream (data), baseEncoding); 162 try { 163 for (int c = r.read(); c != -1; c = r.read()) { 164 buf.append((char)c); 165 } 166 } catch (IOException ex) { 167 } 170 171 String s = buf.toString(); 172 173 int iend = s.indexOf("?>"); 174 iend = iend == -1 ? s.length() : iend; 175 176 int iestart = s.indexOf("encoding"); 177 if (iestart == -1 || iestart > iend) return null; 178 179 char[] chars = s.toCharArray(); 180 181 int i = iestart; 182 183 for (; i<iend; i++) { 184 if (chars[i] == '=') break; 185 } 186 187 for (; i<iend; i++) { 188 if (chars[i] == '\'' || chars[i] == '"') { 189 delimiter = chars[i]; 190 break; 191 } 192 193 } 194 195 i++; 196 197 int ivalstart = i; 198 for (; i<iend; i++) { 199 if (chars[i] == delimiter) { 200 return new String (chars, ivalstart, i - ivalstart); 201 } 202 } 203 204 return null; 205 } 206 207 210 static String parseMIMECharSet(String mime) { 211 212 final String CHARSET = "charset"; 213 214 if (mime != null) { 215 int i; 216 217 mime = mime.toLowerCase (); 218 i = mime.indexOf (';'); 219 if (i != -1) { 220 String attributes; 221 222 attributes = mime.substring (i + 1); 223 225 i = attributes.indexOf (CHARSET); if (i != -1) { 228 attributes = attributes.substring (i + CHARSET.length()); 229 if ((i = attributes.indexOf (';')) != -1) 231 attributes = attributes.substring (0, i); 232 if ((i = attributes.indexOf ('=')) != -1) { 234 attributes = attributes.substring (i + 1); 235 if ((i = attributes.indexOf ('(')) != -1) 237 attributes = attributes.substring (0, i); 238 if ((i = attributes.indexOf ('"')) != -1) { 240 attributes = attributes.substring (i + 1); 241 attributes = attributes.substring (0, 242 attributes.indexOf ('"')); 243 } 244 return attributes.trim(); 245 } 247 } 248 } 249 } 250 251 return null; 252 } 253 254 255 256 260 public static String detectEncoding(Document doc) throws IOException { 261 262 if (doc == null) return null; 263 264 try { 265 266 String text = doc.getText(0, 267 doc.getLength() > EXPECTED_PROLOG_LENGTH ? 268 EXPECTED_PROLOG_LENGTH : doc.getLength() 269 ); 270 InputStream in = new ByteArrayInputStream (text.getBytes()); 271 return detectEncoding(in); 272 273 } catch (BadLocationException ex) { 274 throw new RuntimeException (ex.toString()); 275 } 276 277 } 278 } 279 | Popular Tags |