KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > xml > core > lib > EncodingHelperTest


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19 package org.netbeans.modules.xml.core.lib;
20
21 import java.io.*;
22 import java.text.MessageFormat JavaDoc;
23 import java.util.*;
24 import javax.swing.text.*;
25 import org.netbeans.modules.xml.core.parser.ParserLoader;
26 import junit.framework.*;
27
28 public class EncodingHelperTest extends TestCase {
29     
30     static final String JavaDoc[] JAVA_ENCODINGS = new String JavaDoc[] {
31        "ASCII",
32        "ISO8859_1",
33        "ISO8859_2",
34        "ISO8859_3",
35        "ISO8859_4",
36        "ISO8859_5",
37        "ISO8859_6",
38        "ISO8859_7",
39        "ISO8859_8",
40        "ISO8859_9",
41        "Big5",
42        "Cp037",
43        "Cp1006",
44        "Cp1025",
45        "Cp1026",
46        "Cp1046",
47        "Cp1097",
48        "Cp1098",
49        "Cp1112",
50        "Cp1122",
51        "Cp1123",
52        "Cp1124",
53        "Cp1250",
54        "Cp1251",
55        "Cp1252",
56        "Cp1253",
57        "Cp1254",
58        "Cp1255",
59        "Cp1256",
60        "Cp1257",
61        "Cp1258",
62        "Cp1381",
63        "Cp1383",
64        "Cp273",
65        "Cp277",
66        "Cp278",
67        "Cp280",
68        "Cp284",
69        "Cp285",
70        "Cp297",
71        "Cp33722",
72        "Cp420",
73        "Cp424",
74        "Cp437",
75        "Cp500",
76        "Cp737",
77        "Cp775",
78        "Cp838",
79        "Cp850",
80        "Cp852",
81        "Cp855",
82        "Cp857",
83        "Cp860",
84        "Cp861",
85        "Cp862",
86        "Cp863",
87        "Cp864",
88        "Cp865",
89        "Cp866",
90        "Cp868",
91        "Cp869",
92        "Cp870",
93        "Cp871",
94        "Cp874",
95        "Cp875",
96        "Cp918",
97        "Cp921",
98        "Cp922",
99        "Cp930",
100        "Cp933",
101        "Cp935",
102        "Cp937",
103        "Cp939",
104        "Cp942",
105        "Cp948",
106        "Cp949",
107        "Cp950",
108        "Cp964",
109        "Cp970",
110        "EUC_CN",
111        "EUC_JP",
112        "EUC_KR",
113        "EUC_TW",
114        "GBK",
115 // "ISO2022CN", // unsupported on write see http://developer.java.sun.com/developer/bugParade/bugs/4296969.html
116
// "ISO2022CN_CNS", // unsupported on read
117
// "ISO2022CN_GB", // unsupported on read
118
"ISO2022JP",
119        "ISO2022KR",
120        "JIS0201",
121 // "JIS0208", // cannot write '<'
122
// "JIS0212", // cannot write '<'
123
"KOI8_R",
124        "MS874",
125        "MacArabic",
126        "MacCentralEurope",
127        "MacCroatian",
128        "MacCyrillic",
129        "MacDingbat",
130        "MacGreek",
131        "MacHebrew",
132        "MacIceland",
133        "MacRoman",
134        "MacRomania",
135        "MacSymbol",
136        "MacThai",
137        "MacTurkish",
138        "MacUkraine",
139        "SJIS",
140        "UTF8",
141        "Unicode",
142        "UTF-16",
143        "UnicodeLittle",
144        "UnicodeLittleUnmarked",
145        "UnicodeBig",
146        "UnicodeBigUnmarked",
147     };
148     
149     public EncodingHelperTest(java.lang.String JavaDoc testName) {
150         super(testName);
151     }
152     
153     /** Test of autoDetectEncoding method, of class org.netbeans.modules.xml.core.lib.EncodingHelper. */
154     public void testEncodingDetection() throws IOException {
155
156         // typical xml prolog with all allowed IANA encoding names
157
String JavaDoc fmt = "<?xml version=\"1.0\" encoding=''{0}'' ?> <?pi abcdefghijklmnopqrtsuvwxyz_1234567890\"ABCDEFGHIJKLMNOPQRTSUVWXYZ-.?>";
158         String JavaDoc enc = null;
159         
160         for (int i = 0; i<JAVA_ENCODINGS.length; i++) {
161             char xml[] = MessageFormat.format(fmt, new Object JavaDoc[] {JAVA_ENCODINGS[i]}).toCharArray();
162             
163             ByteArrayOutputStream os = new ByteArrayOutputStream();
164             try {
165                 OutputStreamWriter wr = new OutputStreamWriter(os, JAVA_ENCODINGS[i]);
166                 wr.write(xml);
167                 wr.flush();
168                 wr.close();
169             } catch (IOException ex) {
170                 fail("While writing as " + JAVA_ENCODINGS[i] + ":" + ex);
171             }
172             
173             //InputStream in = new ByteArrayInputStream(os.toByteArray());
174
byte[] out = os.toByteArray();
175             char[] chars = new char[xml.length];
176             
177             enc = EncodingHelper.autoDetectEncoding(out);
178             
179             if (enc != null) {
180                 
181                 try {
182                     ByteArrayInputStream in = new ByteArrayInputStream(out);
183                     InputStreamReader reader = new InputStreamReader(in, JAVA_ENCODINGS[i]);
184                     reader.read(chars);
185
186                     // check read characters for identity
187

188                     for(int j = 0; j<chars.length; j++) {
189                         if (chars[j] != xml[j]) {
190                             fail(JAVA_ENCODINGS[i] + " cannot write '" + xml[j] + "'");
191                         }
192                     }
193
194                     // try to decode encoding
195
String JavaDoc denc = EncodingHelper.detectDeclaredEncoding(out, enc);
196                     if (JAVA_ENCODINGS[i].equals(denc) == false) {
197                         fail("detectDeclaredEncoding() failure got " + denc + " instead of " + JAVA_ENCODINGS[i]);
198                     }
199                     
200                 } catch (IOException ex) {
201                     System.out.println(JAVA_ENCODINGS[i] + " detected as \t" + enc);
202                     fail("Cannot read: " + JAVA_ENCODINGS[i] + " due to: " + ex);
203                 }
204                 
205             } else {
206                 Set known = new HashSet();
207                 known.add("Cp930");
208                 known.add("MacDingbat");
209                 known.add("MacSymbol");
210                 
211                 if (known.contains(JAVA_ENCODINGS[i]) == false) {
212                     fail(JAVA_ENCODINGS[i] + " indetermined \t" + out[0] + ", " + out[1] + ", " + out[2] + ", " + out[3]);
213                 }
214             }
215             
216         }
217         
218
219         // Byte Order marks recognition test
220

221         byte[] usc4_1234 = new byte[] {(byte)0,(byte)0,(byte)0xfe,(byte)0xff};
222         byte[] usc4_4321 = new byte[] {(byte)0xff,(byte)0xfe,(byte)0,(byte)0};
223         byte[] usc4_2143 = new byte[] {(byte)0,(byte)0,(byte)0xff,(byte)0xfe};
224         byte[] usc4_3412 = new byte[] {(byte)0xfe,(byte)0xff,(byte)0,(byte)0};
225         byte[] utf16_be = new byte[] {(byte)0xfe,(byte)0xff,(byte)'<',(byte)'?'};
226         byte[] utf16_le = new byte[] {(byte)0xff,(byte)0xfe,(byte)'<',(byte)'?'};
227         byte[] utf8 = new byte[] {(byte)0xef,(byte)0xbb,(byte)0xbf,(byte)'<'};
228
229         if (EncodingHelper.autoDetectEncoding(usc4_1234) != null) fail("usc4_1234");
230         if (EncodingHelper.autoDetectEncoding(usc4_4321) != null) fail("usc4_4321");
231         if (EncodingHelper.autoDetectEncoding(usc4_2143) != null) fail("usc4_2143");
232         if (EncodingHelper.autoDetectEncoding(usc4_3412) != null) fail("usc4_3412");
233         
234         
235         // test roundtrip on recognized
236

237         System.out.println("Warning: BOM encoding roundtrip test disabled.");
238         
239 /* There are probably bugs in JDK that recognizes BOM as '?'
240         // parameters
241         byte[] out, datab, mark;
242         String data;
243         char[] buf, outch;
244         int mark_le;
245         InputStreamReader r;
246         
247         //
248         
249         mark = utf16_be;
250         enc = EncodingHelper.autoDetectEncoding(mark);
251         enc = "UnicodeBig";
252         mark_le = 2;
253         
254         data = MessageFormat.format(fmt, new String[] {enc});
255         outch = data.toCharArray();
256         datab = data.getBytes(enc);
257         
258         out = new byte[datab.length + mark_le];
259         System.arraycopy(mark, 0, out, 0, 4);
260         System.arraycopy(datab, 0, out, mark_le, datab.length);
261         
262         r = new InputStreamReader(new ByteArrayInputStream(out), enc);
263         buf = new char[outch.length];
264         r.read(buf);
265
266         for(int j = 0; j<outch.length; j++) {
267             if (buf[j] != outch[j]) {
268                 fail(enc + " cannot write '" + outch[j] + "'" + " got'" + buf[j] + buf[j+1] + "'");
269             }
270         }
271
272
273         //
274         
275         mark = utf16_le;
276         enc = EncodingHelper.autoDetectEncoding(mark);
277         mark_le = 2;
278         
279         data = MessageFormat.format(fmt, new String[] {enc});
280         outch = data.toCharArray();
281         datab = data.getBytes(enc);
282         
283         out = new byte[datab.length + mark_le];
284         System.arraycopy(mark, 0, out, 0, 4);
285         System.arraycopy(datab, 0, out, mark_le, datab.length);
286         
287         r = new InputStreamReader(new ByteArrayInputStream(out), enc);
288         buf = new char[outch.length];
289         r.read(buf);
290
291         for(int j = 0; j<outch.length; j++) {
292             if (buf[j] != outch[j]) {
293                 fail(enc + " cannot write '" + outch[j] + "'" + " got'" + buf[j] + "'");
294             }
295         }
296                 
297         //
298         
299         mark = utf8;
300         enc = EncodingHelper.autoDetectEncoding(mark);
301         mark_le = 3;
302         
303         data = MessageFormat.format(fmt, new String[] {enc});
304         outch = data.toCharArray();
305         datab = data.getBytes(enc);
306         
307         out = new byte[datab.length + mark_le];
308         System.arraycopy(mark, 0, out, 0, 4);
309         System.arraycopy(datab, 0, out, mark_le, datab.length);
310         
311         r = new InputStreamReader(new ByteArrayInputStream(mark), enc);
312         buf = new char[outch.length];
313         r.read(buf);
314
315         for(int j = 0; j<outch.length; j++) {
316             if (buf[j] != outch[j]) {
317                 fail(enc + " cannot write '" + outch[j] + "'" + " got'" + buf[j] + buf[j+1] + buf[j+2] + "'");
318             }
319         }
320   */

321     }
322     
323     
324 }
325
Popular Tags