KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > syndication > unittest > TestXmlReader


1 /*
2  * Copyright 2004 Sun Microsystems, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17 package com.sun.syndication.unittest;
18
19 import com.sun.syndication.io.XmlReader;
20 import junit.framework.TestCase;
21
22 import java.io.*;
23 import java.text.MessageFormat JavaDoc;
24 import java.util.HashMap JavaDoc;
25 import java.util.Map JavaDoc;
26
27 /**
28  * @author pat, tucu
29  *
30  */

31 public class TestXmlReader extends TestCase {
32
33     public static void main(String JavaDoc[] args) throws Exception JavaDoc {
34         TestXmlReader test = new TestXmlReader();
35         test.testRawBom();
36         test.testRawNoBom();
37         test.testHttp();
38     }
39
40     protected void _testRawNoBomValid(String JavaDoc encoding) throws Exception JavaDoc {
41         InputStream is = getXmlStream("no-bom","xml",encoding,encoding);
42         XmlReader xmlReader = new XmlReader(is,false);
43         assertEquals(xmlReader.getEncoding(),"UTF-8");
44
45         is = getXmlStream("no-bom","xml-prolog",encoding,encoding);
46         xmlReader = new XmlReader(is);
47         assertEquals(xmlReader.getEncoding(),"UTF-8");
48
49         is = getXmlStream("no-bom","xml-prolog-encoding",encoding,encoding);
50         xmlReader = new XmlReader(is);
51         assertEquals(xmlReader.getEncoding(),encoding);
52     }
53
54     protected void _testRawNoBomInvalid(String JavaDoc encoding) throws Exception JavaDoc {
55         InputStream is = getXmlStream("no-bom","xml-prolog-encoding",encoding,encoding);
56         try {
57             XmlReader xmlReader = new XmlReader(is,false);
58             fail("It should have failed");
59         }
60         catch (IOException ex) {
61             assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
62         }
63      }
64
65     public void testRawNoBom() throws Exception JavaDoc {
66         _testRawNoBomValid("US-ASCII");
67         _testRawNoBomValid("UTF-8");
68         _testRawNoBomValid("ISO-8859-1");
69     }
70
71     protected void _testRawBomValid(String JavaDoc encoding) throws Exception JavaDoc {
72         InputStream is = getXmlStream(encoding+"-bom","xml-prolog-encoding",encoding,encoding);
73         XmlReader xmlReader = new XmlReader(is,false);
74         if (!encoding.equals("UTF-16")) {
75             assertEquals(xmlReader.getEncoding(),encoding);
76         }
77         else {
78             assertEquals(xmlReader.getEncoding().substring(0,encoding.length()),encoding);
79         }
80     }
81
82     protected void _testRawBomInvalid(String JavaDoc bomEnc,String JavaDoc streamEnc,String JavaDoc prologEnc) throws Exception JavaDoc {
83         InputStream is = getXmlStream(bomEnc,"xml-prolog-encoding",streamEnc,prologEnc);
84         try {
85             XmlReader xmlReader = new XmlReader(is,false);
86             fail("It should have failed for BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
87         }
88         catch (IOException ex) {
89             assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
90         }
91      }
92
93     public void testRawBom() throws Exception JavaDoc {
94         _testRawBomValid("UTF-8");
95         _testRawBomValid("UTF-16BE");
96         _testRawBomValid("UTF-16LE");
97         _testRawBomValid("UTF-16");
98
99         _testRawBomInvalid("UTF-8-bom","US-ASCII","US-ASCII");
100         _testRawBomInvalid("UTF-8-bom","ISO-8859-1","ISO-8859-1");
101         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16");
102         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16BE");
103         _testRawBomInvalid("UTF-8-bom","UTF-8","UTF-16LE");
104         _testRawBomInvalid("UTF-16BE-bom","UTF-16BE","UTF-16LE");
105         _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-16BE");
106         _testRawBomInvalid("UTF-16LE-bom","UTF-16LE","UTF-8");
107     }
108
109     public void testHttp() throws Exception JavaDoc {
110         _testHttpValid("application/xml","no-bom","US-ASCII",null);
111         _testHttpValid("application/xml","UTF-8-bom","US-ASCII",null);
112         _testHttpValid("application/xml","UTF-8-bom","UTF-8",null);
113         _testHttpValid("application/xml","UTF-8-bom","UTF-8","UTF-8");
114         _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
115         _testHttpValid("application/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
116         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
117         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
118         _testHttpValid("application/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
119
120         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
121         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
122         _testHttpInvalid("application/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
123         _testHttpInvalid("application/xml","UTF-8-bom","US-ASCII","US-ASCII");
124         _testHttpInvalid("application/xml;charset=UTF-16","UTF-16LE","UTF-8","UTF-8");
125         _testHttpInvalid("application/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
126
127         _testHttpValid("text/xml","no-bom","US-ASCII",null);
128         _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8");
129         _testHttpValid("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null);
130         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null);
131         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16");
132         _testHttpValid("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE");
133         _testHttpValid("text/xml","UTF-8-bom","US-ASCII",null);
134
135         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null);
136         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16");
137         _testHttpInvalid("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE");
138         _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE");
139         _testHttpInvalid("text/xml;charset=UTF-16","no-bom","UTF-16BE",null);
140
141         _testHttpLenient("text/xml","no-bom","US-ASCII",null, "US-ASCII");
142         _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8","UTF-8", "UTF-8");
143         _testHttpLenient("text/xml;charset=UTF-8","UTF-8-bom","UTF-8",null, "UTF-8");
144         _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
145         _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
146         _testHttpLenient("text/xml;charset=UTF-16","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
147         _testHttpLenient("text/xml","UTF-8-bom","US-ASCII",null, "US-ASCII");
148
149         _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE",null, "UTF-16BE");
150         _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16", "UTF-16");
151         _testHttpLenient("text/xml;charset=UTF-16BE","UTF-16BE-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
152         _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE","UTF-16BE", "UTF-16BE");
153         _testHttpLenient("text/xml;charset=UTF-16","no-bom","UTF-16BE",null, "UTF-16");
154
155         _testHttpLenient("text/html","no-bom","US-ASCII","US-ASCII", "US-ASCII");
156         _testHttpLenient("text/html","no-bom","US-ASCII",null, "US-ASCII");
157         _testHttpLenient("text/html;charset=UTF-8","no-bom","US-ASCII","UTF-8", "UTF-8");
158         _testHttpLenient("text/html;charset=UTF-16BE","no-bom","US-ASCII","UTF-8", "UTF-8");
159     }
160
161     public void _testHttpValid(String JavaDoc cT,String JavaDoc bomEnc,String JavaDoc streamEnc,String JavaDoc prologEnc) throws Exception JavaDoc {
162         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml":"xml-prolog-encoding",streamEnc,prologEnc);
163         XmlReader xmlReader = new XmlReader(is,cT,false);
164         if (!streamEnc.equals("UTF-16")) {
165             // we can not assert things here becuase UTF-8, US-ASCII and ISO-8859-1 look alike for the chars used for detection
166
}
167         else {
168             assertEquals(xmlReader.getEncoding().substring(0,streamEnc.length()),streamEnc);
169         }
170     }
171
172     protected void _testHttpInvalid(String JavaDoc cT,String JavaDoc bomEnc,String JavaDoc streamEnc,String JavaDoc prologEnc) throws Exception JavaDoc {
173         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml-prolog":"xml-prolog-encoding",streamEnc,prologEnc);
174         try {
175             XmlReader xmlReader = new XmlReader(is,cT,false);
176             fail("It should have failed for HTTP Content-type "+cT+", BOM "+bomEnc+", streamEnc "+streamEnc+" and prologEnc "+prologEnc);
177         }
178         catch (IOException ex) {
179             assertTrue(ex.getMessage().indexOf("Invalid encoding,")>-1);
180         }
181      }
182
183     protected void _testHttpLenient(String JavaDoc cT, String JavaDoc bomEnc, String JavaDoc streamEnc, String JavaDoc prologEnc, String JavaDoc shouldbe) throws Exception JavaDoc {
184         InputStream is = getXmlStream(bomEnc,(prologEnc==null)?"xml-prolog":"xml-prolog-encoding",streamEnc,prologEnc);
185         XmlReader xmlReader = new XmlReader(is,cT,true);
186         assertEquals(xmlReader.getEncoding(),shouldbe);
187     }
188
189     // XML Stream generator
190

191     private static final int[] NO_BOM_BYTES = {};
192     private static final int[] UTF_16BE_BOM_BYTES = {0xFE,0xFF};
193     private static final int[] UTF_16LE_BOM_BYTES = {0xFF,0XFE};
194     private static final int[] UTF_8_BOM_BYTES = {0xEF,0xBB,0xBF};
195
196     private static final Map JavaDoc BOMs = new HashMap JavaDoc();
197
198     static {
199         BOMs.put("no-bom",NO_BOM_BYTES);
200         BOMs.put("UTF-16BE-bom",UTF_16BE_BOM_BYTES);
201         BOMs.put("UTF-16LE-bom",UTF_16LE_BOM_BYTES);
202         BOMs.put("UTF-16-bom",NO_BOM_BYTES); // it's added by the writer
203
BOMs.put("UTF-8-bom",UTF_8_BOM_BYTES);
204     }
205
206     private static final MessageFormat JavaDoc XML = new MessageFormat JavaDoc(
207             "<root>{2}</root>");
208     private static final MessageFormat JavaDoc XML_WITH_PROLOG = new MessageFormat JavaDoc(
209             "<?xml version=\"1.0\"?>\n<root>{2}</root>");
210     private static final MessageFormat JavaDoc XML_WITH_PROLOG_AND_ENCODING = new MessageFormat JavaDoc(
211             "<?xml version=\"1.0\" encoding=\"{1}\"?>\n<root>{2}</root>");
212
213     private static final MessageFormat JavaDoc INFO = new MessageFormat JavaDoc(
214             "\nBOM : {0}\nDoc : {1}\nStream Enc : {2}\nProlog Enc : {3}\n");
215
216     private static final Map JavaDoc XMLs = new HashMap JavaDoc();
217
218     static {
219         XMLs.put("xml",XML);
220         XMLs.put("xml-prolog",XML_WITH_PROLOG);
221         XMLs.put("xml-prolog-encoding",XML_WITH_PROLOG_AND_ENCODING);
222     }
223
224     /**
225      *
226      * @param bomType no-bom, UTF-16BE-bom, UTF-16LE-bom, UTF-8-bom
227      * @param xmlType xml, xml-prolog, xml-prolog-charset
228      * @return XML stream
229      */

230     protected InputStream getXmlStream(String JavaDoc bomType,String JavaDoc xmlType,String JavaDoc streamEnc,String JavaDoc prologEnc) throws IOException {
231         ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
232         int[] bom = (int[]) BOMs.get(bomType);
233         if (bom==null) {
234             bom = new int[0];
235         }
236         MessageFormat JavaDoc xml = (MessageFormat JavaDoc) XMLs.get(xmlType);
237         for (int i=0;i<bom.length;i++) {
238             baos.write(bom[i]);
239         }
240         Writer writer = new OutputStreamWriter(baos,streamEnc);
241         String JavaDoc info = INFO.format(new Object JavaDoc[]{bomType,xmlType,prologEnc});
242         String JavaDoc xmlDoc = xml.format(new Object JavaDoc[]{streamEnc,prologEnc,info});
243         writer.write(xmlDoc);
244
245         // PADDDING TO TEST THINGS WORK BEYOND PUSHBACK_SIZE
246
writer.write("<da>\n");
247         for (int i=0;i<10000;i++) {
248             writer.write("<do/>\n");
249         }
250         writer.write("</da>\n");
251
252         writer.close();
253         return new ByteArrayInputStream(baos.toByteArray());
254     }
255
256
257 }
258
Popular Tags