1 21 22 package nu.xom.tests; 23 24 import java.io.ByteArrayInputStream ; 25 import java.io.ByteArrayOutputStream ; 26 import java.io.IOException ; 27 import java.io.InputStream ; 28 import java.io.UnsupportedEncodingException ; 29 30 import nu.xom.Attribute; 31 import nu.xom.Builder; 32 import nu.xom.Document; 33 import nu.xom.Element; 34 import nu.xom.ParsingException; 35 import nu.xom.Serializer; 36 37 import com.ibm.icu.text.UTF16; 38 39 49 public class EncodingTest extends XOMTestCase { 50 51 52 public EncodingTest(String name) { 53 super(name); 54 } 55 56 57 private Document doc; 58 59 protected void setUp() { 60 61 Element root = new Element("root"); 62 doc = new Document(root); 63 64 for (int i = 0x20; i <= 0xD7FF; i++) { 65 Element data = new Element("d"); 66 data.appendChild(String.valueOf(((char) i))); 67 data.addAttribute(new Attribute("c", String.valueOf(i))); 68 root.appendChild(data); 69 } 70 71 for (int i = 0xE000; i <= 0xFFFD; i++) { 73 Element data = new Element("d"); 74 data.appendChild(String.valueOf(((char) i))); 75 data.addAttribute(new Attribute("c", String.valueOf(i))); 76 root.appendChild(data); 77 } 78 79 for (int i = 0; i < 256; i++) { 83 int u = 0x1D100 + i; 84 90 Element data = new Element("d"); 91 String s = UTF16.valueOf(u); 93 data.appendChild( s ); 94 data.addAttribute(new Attribute("c", String.valueOf(u))); 95 root.appendChild(data); 97 } 98 99 } 100 101 102 protected void tearDown() { 103 doc = null; 104 System.gc(); 105 } 106 107 108 public void testEUCJP() throws ParsingException, IOException { 109 checkAll("EUC-JP"); 110 } 111 112 113 public void testShift_JIS() throws ParsingException, IOException { 114 checkAll("Shift_JIS"); 115 } 116 117 118 public void testISO2022JP() throws ParsingException, IOException { 119 checkAll("ISO-2022-JP"); 120 } 121 122 123 public void testGeneric() throws ParsingException, IOException { 124 checkAll("Cp1252"); 125 } 126 127 128 public void testMacRoman() throws ParsingException, IOException { 131 checkAll("MacRoman"); 132 } 133 134 135 public void testBig5() throws ParsingException, IOException { 136 checkAll("Big5"); 137 } 138 139 public void testUSASCII() throws ParsingException, IOException { 140 checkAll("US-ASCII"); 141 } 142 143 public void testASCII() throws ParsingException, IOException { 144 checkAll("ASCII"); 145 } 146 147 public void testLatin1() throws ParsingException, IOException { 148 checkAll("ISO-8859-1"); 149 } 150 151 public void testLatin2() throws ParsingException, IOException { 152 checkAll("ISO-8859-2"); 153 } 154 155 public void testLatin3() throws ParsingException, IOException { 156 checkAll("ISO-8859-3"); 157 } 158 159 public void testLatin4() throws ParsingException, IOException { 160 checkAll("ISO-8859-4"); 161 } 162 163 public void testCyrillic() throws ParsingException, IOException { 164 checkAll("ISO-8859-5"); 165 } 166 167 public void testArabic() throws ParsingException, IOException { 168 checkAll("ISO-8859-6"); 169 } 170 171 public void testGreek() throws ParsingException, IOException { 172 checkAll("ISO-8859-7"); 173 } 174 175 public void testThai() throws ParsingException, IOException { 176 checkAll("TIS-620"); 177 } 178 179 public void testHebrew() throws ParsingException, IOException { 180 checkAll("ISO-8859-8"); 181 } 182 183 public void testLatin5() throws ParsingException, IOException { 184 checkAll("ISO-8859-9"); 185 } 186 187 public void testUTF8() throws ParsingException, IOException { 188 checkAll("UTF-8"); 189 } 190 191 public void testUTF16() throws ParsingException, IOException { 192 checkAll("UTF-16"); 193 } 194 195 public void testUCS2() throws ParsingException, IOException { 196 checkAll("ISO-10646-UCS-2"); 197 } 198 199 public void testEBCDIC() throws ParsingException, IOException { 200 checkAll("Cp037"); 201 } 202 203 private static boolean java14OrLater = false; 205 206 static { 207 String version = System.getProperty("java.version"); 208 String majorVersion = version.substring(0, 3); 209 double versionNumber = Double.parseDouble(majorVersion); 210 if (versionNumber >= 1.4) java14OrLater = true; 211 } 212 213 public void testLatin7() throws ParsingException, IOException { 214 if (java14OrLater) checkAll("ISO-8859-13"); 215 } 216 217 public void testLatin9() throws ParsingException, IOException { 218 if (java14OrLater) checkAll("ISO-8859-15"); 219 } 220 221 public void testGB18030() throws ParsingException, IOException { 222 if (java14OrLater) checkAll("GB18030"); 223 } 224 225 public void testUCS4() throws ParsingException, IOException { 230 if (charsetAvailable("ISO-10646-UCS-4")) checkAll("ISO-10646-UCS-4"); 231 } 232 233 public void testLatin6() throws ParsingException, IOException { 234 if (charsetAvailable("ISO-8859-10")) checkAll("ISO-8859-10"); 235 } 236 237 public void testLatin8() throws ParsingException, IOException { 238 if (charsetAvailable("ISO-8859-14")) checkAll("ISO-8859-14"); 239 } 240 241 public void testLatin10() throws ParsingException, IOException { 242 if (charsetAvailable("ISO-8859-16")) checkAll("ISO-8859-16"); 243 } 244 245 246 public void testUnsupportedEncoding() 249 throws ParsingException, IOException { 250 checkAll("Cp1252"); 251 } 252 253 254 private static boolean charsetAvailable(String name) { 255 try { 257 "d".getBytes(name); 258 return true; 259 } 260 catch (UnsupportedEncodingException ex) { 261 return false; 262 } 263 264 } 265 266 267 private void checkAll(String encoding) 268 throws ParsingException, IOException { 269 270 Builder builder = new Builder(); 271 byte[] data = null; 272 ByteArrayOutputStream out = new ByteArrayOutputStream (100000); 273 Serializer serializer = new Serializer(out, encoding); 275 serializer.write(doc); 276 serializer.flush(); 277 out.flush(); 278 out.close(); 279 data = out.toByteArray(); 280 InputStream in = new ByteArrayInputStream (data); 281 Document reparsed = builder.build(in); 282 in.close(); 283 serializer = null; 284 285 Element reparsedRoot = reparsed.getRootElement(); 286 int childCount = reparsedRoot.getChildCount(); 287 for (int i = 0; i < childCount; i++) { 288 Element test = (Element) reparsedRoot.getChild(i); 289 String value = test.getValue(); 290 int expected 291 = Integer.parseInt(test.getAttributeValue("c")); 292 if (expected == 133 && encoding.equalsIgnoreCase("Cp037")) { 294 continue; 295 } 296 int actual = value.charAt(0); 297 if (value.length() > 1) { 298 actual = UTF16.charAt(value, 0); 299 } 300 assertEquals("Expected 0x" 307 + Integer.toHexString(expected).toUpperCase() 308 + " but was 0x" 309 + Integer.toHexString(actual).toUpperCase(), expected, actual); 310 } 311 312 in = null; 313 314 } 315 316 317 private void checkSome(String encoding) 318 throws ParsingException, IOException { 319 320 Builder builder = new Builder(); 321 byte[] data = null; 322 ByteArrayOutputStream out = new ByteArrayOutputStream (100000); 323 Serializer serializer = new Serializer(out, encoding); 325 serializer.write(doc); 326 serializer.flush(); 327 out.flush(); 328 out.close(); 329 data = out.toByteArray(); 330 InputStream in = new ByteArrayInputStream (data); 331 Document reparsed = builder.build(in); 332 in.close(); 333 serializer = null; 334 335 Element reparsedRoot = reparsed.getRootElement(); 336 int childCount = reparsedRoot.getChildCount(); 337 for (int i = 0; i < childCount; i++) { 338 Element test = (Element) reparsedRoot.getChild(i); 339 String value = test.getValue(); 340 int expected 341 = Integer.parseInt(test.getAttributeValue("c")); 342 if (expected == 133 && encoding.equalsIgnoreCase("Cp037")) { 344 continue; 345 } 346 int actual = value.charAt(0); 347 if (value.length() > 1) { 348 actual = UTF16.charAt(value, 0); 349 } 350 if (expected != actual) System.err.println(expected); 351 } 352 353 in = null; 354 355 } 356 357 358 } 359 | Popular Tags |