1 19 package org.netbeans.modules.xml.core.lib; 20 21 import java.io.*; 22 import java.text.MessageFormat ; 23 import java.util.*; 24 import javax.swing.text.*; 25 import org.netbeans.modules.xml.core.parser.ParserLoader; 26 import junit.framework.*; 27 28 public class EncodingHelperTest extends TestCase { 29 30 static final String [] JAVA_ENCODINGS = new String [] { 31 "ASCII", 32 "ISO8859_1", 33 "ISO8859_2", 34 "ISO8859_3", 35 "ISO8859_4", 36 "ISO8859_5", 37 "ISO8859_6", 38 "ISO8859_7", 39 "ISO8859_8", 40 "ISO8859_9", 41 "Big5", 42 "Cp037", 43 "Cp1006", 44 "Cp1025", 45 "Cp1026", 46 "Cp1046", 47 "Cp1097", 48 "Cp1098", 49 "Cp1112", 50 "Cp1122", 51 "Cp1123", 52 "Cp1124", 53 "Cp1250", 54 "Cp1251", 55 "Cp1252", 56 "Cp1253", 57 "Cp1254", 58 "Cp1255", 59 "Cp1256", 60 "Cp1257", 61 "Cp1258", 62 "Cp1381", 63 "Cp1383", 64 "Cp273", 65 "Cp277", 66 "Cp278", 67 "Cp280", 68 "Cp284", 69 "Cp285", 70 "Cp297", 71 "Cp33722", 72 "Cp420", 73 "Cp424", 74 "Cp437", 75 "Cp500", 76 "Cp737", 77 "Cp775", 78 "Cp838", 79 "Cp850", 80 "Cp852", 81 "Cp855", 82 "Cp857", 83 "Cp860", 84 "Cp861", 85 "Cp862", 86 "Cp863", 87 "Cp864", 88 "Cp865", 89 "Cp866", 90 "Cp868", 91 "Cp869", 92 "Cp870", 93 "Cp871", 94 "Cp874", 95 "Cp875", 96 "Cp918", 97 "Cp921", 98 "Cp922", 99 "Cp930", 100 "Cp933", 101 "Cp935", 102 "Cp937", 103 "Cp939", 104 "Cp942", 105 "Cp948", 106 "Cp949", 107 "Cp950", 108 "Cp964", 109 "Cp970", 110 "EUC_CN", 111 "EUC_JP", 112 "EUC_KR", 113 "EUC_TW", 114 "GBK", 115 "ISO2022JP", 119 "ISO2022KR", 120 "JIS0201", 121 "KOI8_R", 124 "MS874", 125 "MacArabic", 126 "MacCentralEurope", 127 "MacCroatian", 128 "MacCyrillic", 129 "MacDingbat", 130 "MacGreek", 131 "MacHebrew", 132 "MacIceland", 133 "MacRoman", 134 "MacRomania", 135 "MacSymbol", 136 "MacThai", 137 "MacTurkish", 138 "MacUkraine", 139 "SJIS", 140 "UTF8", 141 "Unicode", 142 "UTF-16", 143 "UnicodeLittle", 144 "UnicodeLittleUnmarked", 145 "UnicodeBig", 146 "UnicodeBigUnmarked", 147 }; 148 149 public EncodingHelperTest(java.lang.String testName) { 150 super(testName); 151 } 152 153 154 public void testEncodingDetection() throws IOException { 155 156 String fmt = "<?xml version=\"1.0\" encoding=''{0}'' ?> <?pi abcdefghijklmnopqrtsuvwxyz_1234567890\"ABCDEFGHIJKLMNOPQRTSUVWXYZ-.?>"; 158 String enc = null; 159 160 for (int i = 0; i<JAVA_ENCODINGS.length; i++) { 161 char xml[] = MessageFormat.format(fmt, new Object [] {JAVA_ENCODINGS[i]}).toCharArray(); 162 163 ByteArrayOutputStream os = new ByteArrayOutputStream(); 164 try { 165 OutputStreamWriter wr = new OutputStreamWriter(os, JAVA_ENCODINGS[i]); 166 wr.write(xml); 167 wr.flush(); 168 wr.close(); 169 } catch (IOException ex) { 170 fail("While writing as " + JAVA_ENCODINGS[i] + ":" + ex); 171 } 172 173 byte[] out = os.toByteArray(); 175 char[] chars = new char[xml.length]; 176 177 enc = EncodingHelper.autoDetectEncoding(out); 178 179 if (enc != null) { 180 181 try { 182 ByteArrayInputStream in = new ByteArrayInputStream(out); 183 InputStreamReader reader = new InputStreamReader(in, JAVA_ENCODINGS[i]); 184 reader.read(chars); 185 186 188 for(int j = 0; j<chars.length; j++) { 189 if (chars[j] != xml[j]) { 190 fail(JAVA_ENCODINGS[i] + " cannot write '" + xml[j] + "'"); 191 } 192 } 193 194 String denc = EncodingHelper.detectDeclaredEncoding(out, enc); 196 if (JAVA_ENCODINGS[i].equals(denc) == false) { 197 fail("detectDeclaredEncoding() failure got " + denc + " instead of " + JAVA_ENCODINGS[i]); 198 } 199 200 } catch (IOException ex) { 201 System.out.println(JAVA_ENCODINGS[i] + " detected as \t" + enc); 202 fail("Cannot read: " + JAVA_ENCODINGS[i] + " due to: " + ex); 203 } 204 205 } else { 206 Set known = new HashSet(); 207 known.add("Cp930"); 208 known.add("MacDingbat"); 209 known.add("MacSymbol"); 210 211 if (known.contains(JAVA_ENCODINGS[i]) == false) { 212 fail(JAVA_ENCODINGS[i] + " indetermined \t" + out[0] + ", " + out[1] + ", " + out[2] + ", " + out[3]); 213 } 214 } 215 216 } 217 218 219 221 byte[] usc4_1234 = new byte[] {(byte)0,(byte)0,(byte)0xfe,(byte)0xff}; 222 byte[] usc4_4321 = new byte[] {(byte)0xff,(byte)0xfe,(byte)0,(byte)0}; 223 byte[] usc4_2143 = new byte[] {(byte)0,(byte)0,(byte)0xff,(byte)0xfe}; 224 byte[] usc4_3412 = new byte[] {(byte)0xfe,(byte)0xff,(byte)0,(byte)0}; 225 byte[] utf16_be = new byte[] {(byte)0xfe,(byte)0xff,(byte)'<',(byte)'?'}; 226 byte[] utf16_le = new byte[] {(byte)0xff,(byte)0xfe,(byte)'<',(byte)'?'}; 227 byte[] utf8 = new byte[] {(byte)0xef,(byte)0xbb,(byte)0xbf,(byte)'<'}; 228 229 if (EncodingHelper.autoDetectEncoding(usc4_1234) != null) fail("usc4_1234"); 230 if (EncodingHelper.autoDetectEncoding(usc4_4321) != null) fail("usc4_4321"); 231 if (EncodingHelper.autoDetectEncoding(usc4_2143) != null) fail("usc4_2143"); 232 if (EncodingHelper.autoDetectEncoding(usc4_3412) != null) fail("usc4_3412"); 233 234 235 237 System.out.println("Warning: BOM encoding roundtrip test disabled."); 238 239 321 } 322 323 324 } 325 | Popular Tags |