1 23 24 package org.enhydra.xml.xmlc.compiler; 25 26 import java.io.IOException ; 27 import java.io.PrintWriter ; 28 import java.io.Reader ; 29 30 import org.enhydra.xml.io.ErrorReporter; 31 import org.enhydra.xml.io.InputSourceOps; 32 import org.enhydra.xml.xmlc.XMLCError; 33 import org.enhydra.xml.xmlc.XMLCException; 34 import org.enhydra.xml.xmlc.dom.XMLCDocument; 35 import org.enhydra.xml.xmlc.dom.XMLCDomFactory; 36 import org.enhydra.xml.xmlc.dom.XMLCDomFactoryCache; 37 import org.enhydra.xml.xmlc.html.parsers.swing.SwingHTMLParser; 38 import org.enhydra.xml.xmlc.html.parsers.tidy.TidyHTMLParser; 39 import org.enhydra.xml.xmlc.metadata.CompileOptions; 40 import org.enhydra.xml.xmlc.metadata.DocumentClass; 41 import org.enhydra.xml.xmlc.metadata.DocumentFormat; 42 import org.enhydra.xml.xmlc.metadata.InputDocument; 43 import org.enhydra.xml.xmlc.metadata.MetaData; 44 import org.enhydra.xml.xmlc.metadata.Parser; 45 import org.enhydra.xml.xmlc.metadata.ParserType; 46 import org.enhydra.xml.xmlc.misc.LineNumberMap; 47 import org.enhydra.xml.xmlc.misc.SSIReader; 48 import org.enhydra.xml.xmlc.parsers.ParseTracer; 49 import org.enhydra.xml.xmlc.parsers.XMLCParser; 50 import org.enhydra.xml.xmlc.parsers.xerces.XercesParser; 51 import org.w3c.dom.Element ; 52 import org.xml.sax.InputSource ; 53 import org.xml.sax.SAXException ; 54 55 56 59 public class Parse { 60 63 private XMLCParser fXMLCParser; 64 65 68 private ErrorReporter fErrorReporter; 69 70 73 private PrintWriter fVerboseOut; 74 75 78 private boolean fVerbose; 79 80 86 public Parse(ErrorReporter errorReporter, 87 PrintWriter verboseOut) { 88 fErrorReporter = errorReporter; 89 fVerboseOut = verboseOut; 90 } 91 92 95 private boolean isHtmlParser(ParserType parser) { 96 return (parser == ParserType.SWING) 97 || (parser == ParserType.TIDY); 98 } 99 100 103 private void setupParser(MetaData metaData, 104 ParserType parser, 105 boolean isHtmlDocument) 106 throws XMLCException, IOException { 107 if (parser == null) { 108 if (isHtmlDocument) { 110 parser = ParserType.TIDY; 111 } else { 112 parser = ParserType.XERCES; 113 } 114 } 115 116 if (isHtmlDocument) { 118 if (!isHtmlParser(parser)) { 119 throw new XMLCException("Document appears to be an HTML document; the " 120 + parser + " parser only supports XML" 121 + " (does the document start with `<?xml ... ?>'?): " 122 + getInputSourceDesc(metaData)); 123 } 124 } else { 125 if (isHtmlParser(parser)) { 126 throw new XMLCException("Document appears to be an XML document; the " 127 + parser + " parser only supports HTML: " 128 + getInputSourceDesc(metaData)); 129 } 130 if (metaData.getHTMLSection() != null) { 131 } 134 } 135 136 if (parser == ParserType.SWING) { 138 fXMLCParser = new SwingHTMLParser(); 139 } else if (parser == ParserType.TIDY) { 140 fXMLCParser = new TidyHTMLParser(); 141 } else if (parser == ParserType.XERCES) { 142 fXMLCParser = new XercesParser(); 143 } else { 144 throw new XMLCError("Unknown parser \"" + parser + "\""); 145 } 146 } 147 148 152 private boolean isXMLDocument(MetaData metaData) throws IOException { 153 InputDocument inputDoc = metaData.getInputDocument(); 154 DocumentFormat docFormat = inputDoc.getDocumentFormat(); 155 156 if (docFormat == DocumentFormat.XML) { 157 return true; 158 } else if (docFormat == DocumentFormat.HTML) { 159 return false; 160 } else { 161 return InputSourceOps.isXMLDocument(inputDoc.getInputSource()); 162 } 163 } 164 165 168 private InputSource getInputSource(MetaData metaData) throws IOException { 169 InputDocument inputDoc = metaData.getInputDocument(); 170 InputSource inputSource = inputDoc.getInputSource(); 171 if (inputDoc.getProcessSSI()) { 172 return SSIReader.create(inputSource, inputDoc.getSSIBase()); 175 } else { 177 return inputSource; 178 } 179 } 180 181 182 private String getInputSourceDesc(MetaData metaData) { 183 InputDocument inputDoc = metaData.getInputDocument(); 184 return InputSourceOps.getName(inputDoc.getInputSource()); 185 } 186 187 191 private LineNumberMap getLineNumberMap(InputSource input) { 192 Reader reader = input.getCharacterStream(); 193 if (reader instanceof SSIReader) { 194 return ((SSIReader)reader).getLineNumberMap(); 195 } else { 196 return null; 197 } 198 } 199 200 206 public XMLCDocument parse(MetaData metaData) 207 throws XMLCException, IOException { 208 209 Parser parser = metaData.getParser(); 210 CompileOptions compileOptions = metaData.getCompileOptions(); 211 DocumentClass documentClass = metaData.getDocumentClass(); 212 213 if (fVerboseOut != null) { 215 fVerbose = compileOptions.getVerbose(); 218 } 219 boolean printParseInfo 220 = (compileOptions.getPrintParseInfo() && (fVerboseOut != null)); 221 222 ParseTracer traceOut = new ParseTracer(printParseInfo ? fVerboseOut : null); 223 224 InputSource inputSource = getInputSource(metaData); 225 LineNumberMap lineNumberMap = getLineNumberMap(inputSource); 226 227 boolean isHtmlDocument = !isXMLDocument(metaData); 228 XMLCDomFactory domFactory 229 = XMLCDomFactoryCache.createFactory(documentClass.getDomFactoryClass(isHtmlDocument), 230 isHtmlDocument); 231 if (fVerbose) { 232 fVerboseOut.println(">>> using DOM Factory class: " + domFactory.getClass().getName()); 233 } 234 XMLCDomFactoryCache.checkForOutdatedClass(domFactory); 235 236 setupParser(metaData, parser.getName(), isHtmlDocument); 237 238 boolean saveWarnings = fErrorReporter.getPrintWarnings(); 239 fErrorReporter.setPrintWarnings(parser.getWarnings()); 240 241 XMLCDocument xmlcDoc; 242 try { 243 xmlcDoc = fXMLCParser.parse(inputSource, 244 lineNumberMap, 245 domFactory, 246 metaData, 247 fErrorReporter, 248 traceOut); 249 } catch (SAXException except) { 250 Exception useExcept = except.getException(); 251 if (useExcept == null) { 252 useExcept = except; 253 } 254 throw new XMLCException("Parse of \"" + inputSource.getSystemId() 255 + "\" failed: " + useExcept, useExcept); 256 } finally { 257 fErrorReporter.setPrintWarnings(saveWarnings); 258 } 259 int cnt = fErrorReporter.getErrorCnt(); 260 if (cnt > 0) { 261 throw new XMLCException(cnt + " error" + ((cnt == 1) ? "" : "s") 262 + " parsing document"); 263 } 264 265 Element root = xmlcDoc.getDocument().getDocumentElement(); 267 if (root != null) { 268 root.normalize(); 269 } 270 271 return xmlcDoc; 272 } 273 } 274 | Popular Tags |