1 57 58 package org.enhydra.apache.xerces.readers; 59 60 import java.io.IOException ; 61 import java.io.InputStreamReader ; 62 import java.io.UnsupportedEncodingException ; 63 64 import org.enhydra.apache.xerces.framework.XMLErrorReporter; 65 import org.enhydra.apache.xerces.utils.ChunkyByteArray; 66 import org.enhydra.apache.xerces.utils.QName; 67 import org.enhydra.apache.xerces.utils.StringPool; 68 69 73 final class UTF8Recognizer extends XMLDeclRecognizer { 74 private byte[] fUTF8BOM = {(byte)0xEF, (byte)0xBB, (byte)0xBF}; 75 public XMLEntityHandler.EntityReader recognize(XMLEntityReaderFactory readerFactory, 79 XMLEntityHandler entityHandler, 80 XMLErrorReporter errorReporter, 81 boolean sendCharDataAsCharArray, 82 StringPool stringPool, 83 ChunkyByteArray data, 84 boolean xmlDecl, 85 boolean allowJavaEncodingName) throws Exception { 86 XMLEntityHandler.EntityReader reader = null; 87 88 boolean seeBOM = false; 90 byte bom0 = data.byteAt(0); 91 if (bom0 == fUTF8BOM[0]) { 92 byte bom1 = data.byteAt(1); 93 if (bom1 == fUTF8BOM[1]) { 94 byte bom2 = data.byteAt(2); 95 if (bom2 == fUTF8BOM[2]) { 96 seeBOM = true; 97 } 98 } 99 } 100 if (seeBOM) { 101 data.read(fUTF8BOM, 0, 3); 103 } 104 105 byte b0 = data.byteAt(0); 106 boolean debug = false; 107 108 if (b0 == '<') { 109 int b1 = data.byteAt(1); 110 if (b1 == '?') { 111 if (data.byteAt(2) == 'x' && data.byteAt(3) == 'm' && data.byteAt(4) == 'l') { 112 int b5 = data.byteAt(5); 113 if (b5 == 0x20 || b5 == 0x09 || b5 == 0x0a || b5 == 0x0d) { 114 XMLEntityHandler.EntityReader declReader = new XMLDeclReader(entityHandler, errorReporter, sendCharDataAsCharArray, data, stringPool); 115 int encoding = prescanXMLDeclOrTextDecl(declReader, xmlDecl); 116 if (encoding != -1) { 117 String encname = stringPool.orphanString(encoding); 118 String enc = encname.toUpperCase(); 119 if ("ISO-10646-UCS-2".equals(enc)) throw new UnsupportedEncodingException (encname); 120 if ("ISO-10646-UCS-4".equals(enc)) throw new UnsupportedEncodingException (encname); 121 if ("UTF-16".equals(enc)) throw new UnsupportedEncodingException (encname); 122 123 String javaencname = MIME2Java.convert(enc); 124 if (null == javaencname) { 125 if (allowJavaEncodingName) { 127 javaencname = encname; 128 } else { 129 throw new UnsupportedEncodingException (encname); 130 } 131 } 132 try { 133 data.rewind(); 134 if ("UTF-8".equalsIgnoreCase(javaencname) || "UTF8".equalsIgnoreCase(javaencname)) { 135 reader = readerFactory.createUTF8Reader(entityHandler, errorReporter, sendCharDataAsCharArray, data, stringPool); 136 } else { 137 reader = readerFactory.createCharReader(entityHandler, errorReporter, sendCharDataAsCharArray, 138 new InputStreamReader (data, javaencname), stringPool); 139 } 140 } catch (UnsupportedEncodingException e) { 141 throw new UnsupportedEncodingException (encname); 142 } catch (Exception e) { 143 if( debug == true ) 144 e.printStackTrace(); } 146 } else { 147 data.rewind(); 148 reader = readerFactory.createUTF8Reader(entityHandler, errorReporter, sendCharDataAsCharArray, data, stringPool); 149 } 150 } 151 } 152 } 153 } 154 return reader; 155 } 156 157 final class XMLDeclReader extends XMLEntityReader { 158 private StringPool fStringPool = null; 162 private ChunkyByteArray fData = null; 163 XMLDeclReader(XMLEntityHandler entityHandler, XMLErrorReporter errorReporter, boolean sendCharDataAsCharArray, ChunkyByteArray data, StringPool stringPool) { 167 super(entityHandler, errorReporter, sendCharDataAsCharArray); 168 fStringPool = stringPool; 169 fData = data; 170 } 171 public boolean lookingAtChar(char ch, boolean skipPastChar) throws IOException { 175 if (fData.byteAt(fCurrentOffset) != ch) 176 return false; 177 if (skipPastChar) 178 fCurrentOffset++; 179 return true; 180 } 181 public boolean lookingAtSpace(boolean skipPastChar) throws IOException { 182 int ch = fData.byteAt(fCurrentOffset) & 0xff; 183 if (ch != 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D) 184 return false; 185 if (skipPastChar) 186 fCurrentOffset++; 187 return true; 188 } 189 public void skipPastSpaces() throws IOException { 190 while (true) { 191 int ch = fData.byteAt(fCurrentOffset) & 0xff; 192 if (ch != 0x20 && ch != 0x09 && ch != 0x0A && ch != 0x0D) 193 return; 194 fCurrentOffset++; 195 } 196 } 197 public boolean skippedString(char[] s) throws IOException { 198 int offset = fCurrentOffset; 199 for (int i = 0; i < s.length; i++) { 200 if (fData.byteAt(offset) != s[i]) 201 return false; 202 offset++; 203 } 204 fCurrentOffset = offset; 205 return true; 206 } 207 public int scanStringLiteral() throws Exception { 208 boolean single; 209 if (!(single = lookingAtChar('\'', true)) && !lookingAtChar('\"', true)) { 210 return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED; 211 } 212 int offset = fCurrentOffset; 213 char qchar = single ? '\'' : '\"'; 214 while (true) { 215 byte b = fData.byteAt(fCurrentOffset); 216 if (b == qchar) 217 break; 218 if (b == -1) 219 return XMLEntityHandler.STRINGLIT_RESULT_QUOTE_REQUIRED; 220 fCurrentOffset++; 221 } 222 int length = fCurrentOffset - offset; 223 StringBuffer str = new StringBuffer (length); 224 for (int i = 0; i < length; i++) { 225 str.append((char)fData.byteAt(offset + i)); 226 } 227 int stringIndex = fStringPool.addString(str.toString()); 228 fCurrentOffset++; return stringIndex; 230 } 231 public void append(XMLEntityHandler.CharBuffer charBuffer, int offset, int length) { 235 throw new RuntimeException ("RDR002 cannot happen"); 236 } 237 public int addString(int offset, int length) { 238 throw new RuntimeException ("RDR002 cannot happen"); 239 } 240 public int addSymbol(int offset, int length) { 241 throw new RuntimeException ("RDR002 cannot happen"); 242 } 243 public void skipToChar(char ch) throws IOException { 244 throw new IOException ("RDR002 cannot happen"); 245 } 246 public void skipPastName(char fastcheck) throws IOException { 247 throw new IOException ("RDR002 cannot happen"); 248 } 249 public void skipPastNmtoken(char fastcheck) throws IOException { 250 throw new IOException ("RDR002 cannot happen"); 251 } 252 public boolean lookingAtValidChar(boolean skipPastChar) throws IOException { 253 throw new IOException ("RDR002 cannot happen"); 254 } 255 public int scanInvalidChar() throws IOException { 256 throw new IOException ("RDR002 cannot happen"); 257 } 258 public int scanCharRef(boolean hex) throws IOException { 259 throw new IOException ("RDR002 cannot happen"); 260 } 261 public int scanAttValue(char qchar, boolean asSymbol) throws IOException { 262 throw new IOException ("RDR002 cannot happen"); 263 } 264 public int scanEntityValue(int qchar, boolean createString) throws IOException { 265 throw new IOException ("RDR002 cannot happen"); 266 } 267 public boolean scanExpectedName(char fastcheck, StringPool.CharArrayRange expectedName) throws IOException { 268 throw new IOException ("RDR002 cannot happen"); 269 } 270 public void scanQName(char fastcheck, QName qname) throws IOException { 271 throw new IOException ("RDR002 cannot happen"); 272 } 273 public int scanName(char fastcheck) throws IOException { 274 throw new IOException ("RDR002 cannot happen"); 275 } 276 public int scanContent(QName element) throws IOException { 277 throw new IOException ("RDR002 cannot happen"); 278 } 279 } 280 } 281 | Popular Tags |