KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > readers > UTF8Recognizer


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.readers;
59
60 import java.io.IOException JavaDoc;
61 import java.io.InputStreamReader JavaDoc;
62 import java.io.UnsupportedEncodingException JavaDoc;
63
64 import org.enhydra.apache.xerces.framework.XMLErrorReporter;
65 import org.enhydra.apache.xerces.utils.ChunkyByteArray;
66 import org.enhydra.apache.xerces.utils.QName;
67 import org.enhydra.apache.xerces.utils.StringPool;
68
69 /**
70  *
71  * @version
72  */

73 final class UTF8Recognizer extends XMLDeclRecognizer {
74     private byte[] fUTF8BOM = {(byte)0xEF, (byte)0xBB, (byte)0xBF};
75     //
76
//
77
//
78
public XMLEntityHandler.EntityReader recognize(XMLEntityReaderFactory readerFactory,
79                                                    XMLEntityHandler entityHandler,
80                                                    XMLErrorReporter errorReporter,
81                                                    boolean sendCharDataAsCharArray,
82                                                    StringPool stringPool,
83                                                    ChunkyByteArray data,
84                                                    boolean xmlDecl,
85                                                    boolean allowJavaEncodingName) throws Exception JavaDoc {
86         XMLEntityHandler.EntityReader reader = null;
87
88         //check to see if there is a UTF8 BOM, if see one, skip past it.
89
boolean seeBOM = false;
90         byte bom0 = data.byteAt(0);
91         if (bom0 == fUTF8BOM[0]) {
92             byte bom1 = data.byteAt(1);
93             if (bom1 == fUTF8BOM[1]) {
94                 byte bom2 = data.byteAt(2);
95                 if (bom2 == fUTF8BOM[2]) {
96                     seeBOM = true;
97                 }
98             }
99         }
100         if (seeBOM) {
101             // it will have the same content anyway.
102
data.read(fUTF8BOM, 0, 3);
103         }
104
105         byte b0 = data.byteAt(0);
106         boolean debug = false;
107
108         if (b0 == '<') {
109             int b1 = data.byteAt(1);
110             if (b1 == '?') {
111                 if (data.byteAt(2) == 'x' && data.byteAt(3) == 'm' && data.byteAt(4) == 'l') {
112                     int b5 = data.byteAt(5);
113                     if (b5 == 0x20 || b5 == 0x09 || b5 == 0x0a || b5 == 0x0d) {
114                         XMLEntityHandler.EntityReader declReader = new XMLDeclReader(entityHandler, errorReporter, sendCharDataAsCharArray, data, stringPool);
115                         int encoding = prescanXMLDeclOrTextDecl(declReader, xmlDecl);
116                         if (encoding != -1) {
117                             String JavaDoc encname = stringPool.orphanString(encoding);
118                             String JavaDoc enc = encname.toUpperCase();
119                             if ("ISO-10646-UCS-2".equals(enc)) throw new UnsupportedEncodingException JavaDoc(encname);
120                             if ("ISO-10646-UCS-4".equals(enc)) throw new UnsupportedEncodingException JavaDoc(encname);
121                             if ("UTF-16".equals(enc)) throw new UnsupportedEncodingException JavaDoc(encname);
122
123                             String JavaDoc javaencname = MIME2Java.convert(enc);
124                             if (null == javaencname) {
125                                 // Not supported
126
if (allowJavaEncodingName) {
127                                     javaencname = encname;
128                                 } else {
129                                     throw new UnsupportedEncodingException JavaDoc(encname);
130                                 }
131                             }
132                             try {
133                                 data.rewind();
134                                 if ("UTF-8".equalsIgnoreCase(javaencname) || "UTF8".equalsIgnoreCase(javaencname)) {
135                                     reader = readerFactory.createUTF8Reader(entityHandler, errorReporter, sendCharDataAsCharArray, data, stringPool);
136                                 } else {
137                                     reader = readerFactory.createCharReader(entityHandler, errorReporter, sendCharDataAsCharArray,
138                                                                             new InputStreamReader JavaDoc(data, javaencname), stringPool);
139                                 }
140                             } catch (UnsupportedEncodingException JavaDoc e) {
141                                 throw new UnsupportedEncodingException JavaDoc(encname);
142                             } catch (Exception JavaDoc e) {
143                                 if( debug == true )
144                                    e.printStackTrace(); // Internal Error
145
}
146                         } else {
147                             data.rewind();
148                             reader = readerFactory.createUTF8Reader(entityHandler, errorReporter, sendCharDataAsCharArray, data, stringPool);
149                         }
150                     }
151                 }
152             }
153         }
154         return reader;
155     }
156
157     final class XMLDeclReader extends XMLEntityReader {
158         //
159
//
160
//
161
private StringPool fStringPool = null;
162         private ChunkyByteArray fData = null;
163         //
164
//
165
//
166
XMLDeclReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, ChunkyByteArray data, StringPool stringPool) {
167             super(entityHandler, errorReporter, sendCharDataAsCharArray);
168             fStringPool = stringPool;
169             fData = data;
170         }
171         //
172
// These methods are used to parse XMLDecl/TextDecl.
173
//
174
public boolean lookingAtChar(char ch, boolean skipPastChar) throws IOException JavaDoc {
175             if (fData.byteAt(fCurrentOffset) != ch)
176                 return false;
177             if (skipPastChar)
178                 fCurrentOffset++;
179             return true;
180         }
181         public boolean lookingAtSpace(boolean skipPastChar) throws IOException JavaDoc {
182             int ch = fData.byteAt(fCurrentOffset) & 0xff;
183             if (ch != 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D)
184                 return false;
185             if (skipPastChar)
186                 fCurrentOffset++;
187             return true;
188         }
189         public void skipPastSpaces() throws IOException JavaDoc {
190             while (true) {
191                 int ch = fData.byteAt(fCurrentOffset) & 0xff;
192                 if (ch != 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D)
193                     return;
194                 fCurrentOffset++;
195             }
196         }
197         public boolean skippedString(char[] s) throws IOException JavaDoc {
198             int offset = fCurrentOffset;
199             for (int i = 0; i < s.length; i++) {
200                 if (fData.byteAt(offset) != s[i])
201                     return false;
202                 offset++;
203             }
204             fCurrentOffset = offset;
205             return true;
206         }
207         public int scanStringLiteral() throws Exception JavaDoc {
208             boolean single;
209             if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) {
210                 return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
211             }
212             int offset = fCurrentOffset;
213             char qchar = single ? '\'' : '\"';
214             while (true) {
215                 byte b = fData.byteAt(fCurrentOffset);
216                 if (b == qchar)
217                     break;
218                 if (b == -1)
219                     return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED;
220                 fCurrentOffset++;
221             }
222             int length = fCurrentOffset - offset;
223             StringBuffer JavaDoc str = new StringBuffer JavaDoc(length);
224             for (int i = 0; i < length; i++) {
225                 str.append((char)fData.byteAt(offset + i));
226             }
227             int stringIndex = fStringPool.addString(str.toString());
228             fCurrentOffset++; // move past qchar
229
return stringIndex;
230         }
231         //
232
// The rest of the methods in XMLReader are not used for parsing XMLDecl/TextDecl.
233
//
234
public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) {
235             throw new RuntimeException JavaDoc("RDR002 cannot happen");
236         }
237         public int addString(int offset, int length) {
238             throw new RuntimeException JavaDoc("RDR002 cannot happen");
239         }
240         public int addSymbol(int offset, int length) {
241             throw new RuntimeException JavaDoc("RDR002 cannot happen");
242         }
243         public void skipToChar(char ch) throws IOException JavaDoc {
244             throw new IOException JavaDoc("RDR002 cannot happen");
245         }
246         public void skipPastName(char fastcheck) throws IOException JavaDoc {
247             throw new IOException JavaDoc("RDR002 cannot happen");
248         }
249         public void skipPastNmtoken(char fastcheck) throws IOException JavaDoc {
250             throw new IOException JavaDoc("RDR002 cannot happen");
251         }
252         public boolean lookingAtValidChar(boolean skipPastChar) throws IOException JavaDoc {
253             throw new IOException JavaDoc("RDR002 cannot happen");
254         }
255         public int scanInvalidChar() throws IOException JavaDoc {
256             throw new IOException JavaDoc("RDR002 cannot happen");
257         }
258         public int scanCharRef(boolean hex) throws IOException JavaDoc {
259             throw new IOException JavaDoc("RDR002 cannot happen");
260         }
261         public int scanAttValue(char qchar, boolean asSymbol) throws IOException JavaDoc {
262             throw new IOException JavaDoc("RDR002 cannot happen");
263         }
264         public int scanEntityValue(int qchar, boolean createString) throws IOException JavaDoc {
265             throw new IOException JavaDoc("RDR002 cannot happen");
266         }
267         public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws IOException JavaDoc {
268             throw new IOException JavaDoc("RDR002 cannot happen");
269         }
270         public void scanQName(char fastcheck, QName qname) throws IOException JavaDoc {
271             throw new IOException JavaDoc("RDR002 cannot happen");
272         }
273         public int scanName(char fastcheck) throws IOException JavaDoc {
274             throw new IOException JavaDoc("RDR002 cannot happen");
275         }
276         public int scanContent(QName element) throws IOException JavaDoc {
277             throw new IOException JavaDoc("RDR002 cannot happen");
278         }
279     }
280 }
281
Popular Tags