1 5 6 package com.hp.hpl.jena.rdf.arp; 7 8 import com.hp.hpl.jena.util.CharEncoding; 9 10 import java.io.IOException ; 11 import java.io.InputStream ; 12 import java.io.InputStreamReader ; 13 import java.io.Reader ; 14 import java.io.UTFDataFormatException ; 15 import java.nio.charset.Charset ; 16 17 import org.apache.xerces.parsers.SAXParser; 18 import org.apache.xerces.parsers.StandardParserConfiguration; 19 import org.apache.xerces.util.EncodingMap; 20 import org.apache.xerces.xni.Augmentations; 21 import org.apache.xerces.xni.parser.XMLInputSource; 22 import org.apache.xerces.xni.parser.XMLPullParserConfiguration; 23 import org.xml.sax.*; 24 25 34 class SingleThreadedParser extends XMLHandler { 35 36 private XMLPullParserConfiguration pullParser; 37 38 private SAXParser saxParser; 39 40 private String readerXMLEncoding = null; 41 42 private String xmlEncoding = null; 43 44 private SingleThreadedParser(SAXParser rdr, 45 XMLPullParserConfiguration config) { 46 super(); 47 pullParser = config; 48 saxParser = rdr; 49 try { 50 SAX2RDF.installHandlers(rdr, this); 51 } catch (SAXException e) { 52 throw new RuntimeException ("Supposedly impossible:", e); 53 } 54 } 56 57 SAXParser getSAXParser() { 58 return saxParser; 59 } 60 61 static private class MySAXParser extends SAXParser { 62 MySAXParser(StandardParserConfiguration c) { 63 super(c); 64 try { 65 setFeature("http://xml.org/sax/features/string-interning", 66 false); 67 } catch (SAXException e) { 68 } 72 } 73 74 SingleThreadedParser a; 75 76 public void xmlDecl(String version, String encoding, String standalone, 77 Augmentations augs) { 78 a.setEncoding(encoding == null ? "UTF" : encoding); 79 super.xmlDecl(version, encoding, standalone, augs); 80 } 81 88 } 89 90 static SingleThreadedParser create() { 91 StandardParserConfiguration c = new StandardParserConfiguration(); 92 MySAXParser msp = new MySAXParser(c); 93 SingleThreadedParser a = new SingleThreadedParser(msp, c); 94 msp.a = a; 95 return a; 96 } 97 98 boolean parseSome() { 99 try { 100 return pullParser.parse(false); 101 } catch (UTFDataFormatException e) { 102 try { 103 generalError(ERR_UTF_ENCODING, e); 104 } catch (SAXParseException e1) { 105 } 107 return false; 108 } catch (IOException e) { 109 try { 110 generalError(ERR_GENERIC_IO, e); 111 } catch (SAXParseException e1) { 112 } 114 return false; 115 } catch (FatalParsingErrorException e) { 116 return false; 117 } 118 } 119 120 RDFParser rdfParser; 121 122 synchronized public void parse(InputSource input) throws IOException , 123 SAXException { 124 parse(input, input.getSystemId()); 125 } 126 127 synchronized public void parse(InputSource input, String base) 128 throws IOException , SAXException { 129 initParse(base); 132 pipe = new PullingTokenPipe(this); 134 pullParser.setInputSource(convert(input)); 135 136 SAX2RDF.installHandlers(saxParser, this); 137 saxParser.reset(); 138 139 try { 141 try { 142 rdfParser = new RDFParser(pipe, SingleThreadedParser.this); 143 if (getOptions().getEmbedding()) 144 rdfParser.embeddedFile(documentContext); 145 else 146 rdfParser.rdfFile(documentContext); 147 } catch (WrappedException wrapped) { 148 wrapped.throwMe(); 149 } catch (ParseException parse) { 150 153 userError(parse); 154 157 } 158 } finally { 159 endBnodeScope(); 160 } 161 162 } 163 164 XMLInputSource convert(InputSource in) { 165 Reader rdr = in.getCharacterStream(); 166 InputStream str = in.getByteStream(); 167 String publicID = in.getPublicId(); 168 String systemID = in.getSystemId(); 169 readerXMLEncoding = null; 170 encodingProblems = false; 171 if (rdr == null && str == null) { 172 return new XMLInputSource(publicID, systemID, systemID); 173 } else if (rdr == null) { 174 return new XMLInputSource(publicID, systemID, systemID, str, null); 175 } else if (str == null) { 176 if (rdr instanceof InputStreamReader ) { 177 String javaEnc = ((InputStreamReader ) rdr).getEncoding(); 178 readerXMLEncoding = CharEncoding.create(javaEnc).name(); 179 } 180 return new XMLInputSource(publicID, systemID, systemID, rdr, null); 181 } 182 return null; 183 } 184 185 void setEncoding(String original) { 186 187 CharEncoding encodingInfo = CharEncoding.create(original); 188 String e = encodingInfo.name(); 189 if (xmlEncoding == null) { 191 if (e.equals("UTF") && readerXMLEncoding != null 193 && readerXMLEncoding.startsWith("UTF")) { 194 xmlEncoding = readerXMLEncoding; 195 return; 196 } 197 xmlEncoding = e; 198 try { 199 200 if (readerXMLEncoding != null 201 && !readerXMLEncoding.equalsIgnoreCase(e)) { 202 putWarning( 203 WARN_ENCODING_MISMATCH, 204 new Location(locator), 205 "Encoding on InputStreamReader or FileReader does not match that of XML document. Use FileInputStream. [" 206 + readerXMLEncoding + " != " + e + "]"); 207 encodingProblems = true; 208 } 209 210 if (e.equals("UTF")) 211 return; 212 213 if (!encodingInfo.isIANA()) { 214 putWarning( 215 encodingInfo.isInNIO() ? WARN_NON_IANA_ENCODING 216 : WARN_UNSUPPORTED_ENCODING, new Location( 217 locator), encodingInfo.warningMessage()); 218 } else if (!original.equalsIgnoreCase(e)) { 219 putWarning( 220 WARN_NONCANONICAL_IANA_NAME, 221 new Location(locator), 222 "The encoding \"" 223 + original 224 + "\" is not the canonical name at IANA, suggest \"" 225 + e 226 + "\" would give more interoperability."); 227 228 } 229 } catch (SAXParseException e1) { 230 } 232 233 } 234 } 235 236 } 237 238 263 264 | Popular Tags |