Parse


1   /*
2    * Enhydra Java Application Server Project
3    * 
4    * The contents of this file are subject to the Enhydra Public License
5    * Version 1.1 (the "License"); you may not use this file except in
6    * compliance with the License. You may obtain a copy of the License on
7    * the Enhydra web site ( http://www.enhydra.org/ ).
8    * 
9    * Software distributed under the License is distributed on an "AS IS"
10   * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See 
11   * the License for the specific terms governing rights and limitations
12   * under the License.
13   * 
14   * The Initial Developer of the Enhydra Application Server is Lutris
15   * Technologies, Inc. The Enhydra Application Server and portions created
16   * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
17   * All Rights Reserved.
18   * 
19   * Contributor(s):
20   * 
21   * $Id: Parse.java,v 1.3 2005/01/26 08:29:24 jkjome Exp $
22   */
23  
24  package org.enhydra.xml.xmlc.compiler;
25  
26  import java.io.IOException  ;
27  import java.io.PrintWriter  ;
28  import java.io.Reader  ;
29  
30  import org.enhydra.xml.io.ErrorReporter;
31  import org.enhydra.xml.io.InputSourceOps;
32  import org.enhydra.xml.xmlc.XMLCError;
33  import org.enhydra.xml.xmlc.XMLCException;
34  import org.enhydra.xml.xmlc.dom.XMLCDocument;
35  import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
36  import org.enhydra.xml.xmlc.dom.XMLCDomFactoryCache;
37  import org.enhydra.xml.xmlc.html.parsers.swing.SwingHTMLParser;
38  import org.enhydra.xml.xmlc.html.parsers.tidy.TidyHTMLParser;
39  import org.enhydra.xml.xmlc.metadata.CompileOptions;
40  import org.enhydra.xml.xmlc.metadata.DocumentClass;
41  import org.enhydra.xml.xmlc.metadata.DocumentFormat;
42  import org.enhydra.xml.xmlc.metadata.InputDocument;
43  import org.enhydra.xml.xmlc.metadata.MetaData;
44  import org.enhydra.xml.xmlc.metadata.Parser;
45  import org.enhydra.xml.xmlc.metadata.ParserType;
46  import org.enhydra.xml.xmlc.misc.LineNumberMap;
47  import org.enhydra.xml.xmlc.misc.SSIReader;
48  import org.enhydra.xml.xmlc.parsers.ParseTracer;
49  import org.enhydra.xml.xmlc.parsers.XMLCParser;
50  import org.enhydra.xml.xmlc.parsers.xerces.XercesParser;
51  import org.w3c.dom.Element  ;
52  import org.xml.sax.InputSource  ;
53  import org.xml.sax.SAXException  ;
54  
55  
56  /**
57   * Parse a XML or HTML document into a DOM.
58   */
59  public class Parse {
60      /**
61       * XML parser object.
62       */
63      private XMLCParser fXMLCParser;
64  
65      /**
66       * Error output.
67       */
68      private ErrorReporter fErrorReporter;
69  
70      /**
71       * Verbose output stream.
72       */
73      private PrintWriter   fVerboseOut;
74  
75      /**
76       * Print verbose messages.
77       */
78      private boolean fVerbose;
79  
80      /**
81       * Construct a new file parser.
82       *
83       * @param errorReporter Object used to handle errors.
84       * @param verboseOut Output stream for verbose and trace information.
85       */
86      public Parse(ErrorReporter errorReporter,
87                   PrintWriter   verboseOut) {
88          fErrorReporter = errorReporter;
89          fVerboseOut = verboseOut;
90      }
91  
92      /*
93       * Is this an the name of an HTML parser?
94       */
95      private boolean isHtmlParser(ParserType parser) {
96          return (parser == ParserType.SWING)
97              || (parser == ParserType.TIDY);
98      }
99  
100     /**
101      * Determine the parser to use.
102      */
103     private void setupParser(MetaData metaData,
104                              ParserType parser,
105                              boolean isHtmlDocument)
106         throws XMLCException, IOException   {
107         if (parser == null) {
108             // Use default parser based on document type.
109             if (isHtmlDocument) {
110                 parser = ParserType.TIDY;
111             } else {
112                 parser = ParserType.XERCES;
113             }
114         }
115         
116         // Check for conflicts with parser and document type.
117         if (isHtmlDocument) {
118             if (!isHtmlParser(parser)) {
119                 throw new XMLCException("Document appears to be an HTML document; the "
120                                         + parser + " parser only supports XML"
121                                         + " (does the document start with `<?xml ... ?>'?): "
122                                         + getInputSourceDesc(metaData));
123             }
124         } else {
125             if (isHtmlParser(parser)) {
126                 throw new XMLCException("Document appears to be an XML document; the "
127                                         + parser + " parser only supports HTML: "
128                                         + getInputSourceDesc(metaData));
129             }
130             if (metaData.getHTMLSection() != null) {
131                 //FIXME: need to include check for options being defaulted.
132                 //throw new XMLCException("HTML options may not be specified for a XML document");
133             }
134         }
135 
136         // Load and initialize parser.
137         if (parser == ParserType.SWING) {
138             fXMLCParser = new SwingHTMLParser();
139         } else if (parser == ParserType.TIDY) {
140             fXMLCParser = new TidyHTMLParser(); 
141         } else if (parser == ParserType.XERCES) {
142             fXMLCParser = new XercesParser();
143         } else {
144             throw new XMLCError("Unknown parser \"" + parser + "\"");
145         }
146     }
147 
148     /**
149      * Determine if this is an XML or HTML document.  Its either explictly
150      * specified or must be determined by looking at the file.
151      */
152     private boolean isXMLDocument(MetaData metaData) throws IOException   {
153         InputDocument inputDoc = metaData.getInputDocument();
154         DocumentFormat docFormat = inputDoc.getDocumentFormat();
155         
156         if (docFormat == DocumentFormat.XML) {
157             return true;
158         } else if (docFormat == DocumentFormat.HTML) {
159             return false;
160         } else {
161             return InputSourceOps.isXMLDocument(inputDoc.getInputSource());
162         }
163     }
164 
165     /**
166      * Get the input source, handling SSI filtering.
167      */
168     private InputSource   getInputSource(MetaData metaData) throws IOException   {
169         InputDocument inputDoc = metaData.getInputDocument();
170         InputSource   inputSource = inputDoc.getInputSource();
171         if (inputDoc.getProcessSSI()) {
172             // dbr_20020128.1_start
173             //return SSIReader.create(inputSource);
174             return SSIReader.create(inputSource, inputDoc.getSSIBase());
175             // dbr_20020128.1_end
176         } else {
177             return inputSource;
178         }
179     }
180 
181     /** Get a description of the input source for error messages */
182     private String   getInputSourceDesc(MetaData metaData) {
183         InputDocument inputDoc = metaData.getInputDocument();
184         return InputSourceOps.getName(inputDoc.getInputSource());
185     }
186     
187     /**
188      * Get the line number map to pass to the parse, or null
189      * if one is not in uses.
190      */
191     private LineNumberMap getLineNumberMap(InputSource   input) {
192         Reader   reader = input.getCharacterStream();
193         if (reader instanceof SSIReader) {
194             return ((SSIReader)reader).getLineNumberMap();
195         } else {
196             return null;
197         }
198     }
199 
200     /*
201      * Parse the page into the DOM and perform various checks and edits.
202      *
203      * @param metaData Document metadata.
204      * @param verboseOut Write verbose and trace information output stream.
205      */
206     public XMLCDocument parse(MetaData metaData)
207         throws XMLCException, IOException   {
208 
209         Parser parser = metaData.getParser();
210         CompileOptions compileOptions = metaData.getCompileOptions();
211         DocumentClass documentClass = metaData.getDocumentClass();
212 
213         // Setup tracing
214         if (fVerboseOut != null) {
215             // Only enable verbose output if a stream is available
216             // and its requested.
217             fVerbose = compileOptions.getVerbose();
218         }
219         boolean printParseInfo
220             = (compileOptions.getPrintParseInfo() && (fVerboseOut != null));
221 
222         ParseTracer traceOut = new ParseTracer(printParseInfo ? fVerboseOut : null);
223 
224         InputSource   inputSource = getInputSource(metaData);
225         LineNumberMap lineNumberMap = getLineNumberMap(inputSource);
226 
227         boolean isHtmlDocument = !isXMLDocument(metaData);
228         XMLCDomFactory domFactory
229             = XMLCDomFactoryCache.createFactory(documentClass.getDomFactoryClass(isHtmlDocument),
230                                                 isHtmlDocument);
231         if (fVerbose) {
232             fVerboseOut.println(">>> using DOM Factory class: " + domFactory.getClass().getName());
233         }
234         XMLCDomFactoryCache.checkForOutdatedClass(domFactory);
235 
236         setupParser(metaData, parser.getName(), isHtmlDocument);
237 
238         boolean saveWarnings = fErrorReporter.getPrintWarnings();
239         fErrorReporter.setPrintWarnings(parser.getWarnings());
240 
241         XMLCDocument xmlcDoc;
242         try {
243             xmlcDoc = fXMLCParser.parse(inputSource, 
244                                         lineNumberMap,
245                                         domFactory,
246                                         metaData,
247                                         fErrorReporter,
248                                         traceOut);
249         } catch (SAXException   except) {
250             Exception   useExcept = except.getException();
251             if (useExcept == null) {
252                 useExcept = except;
253             }
254             throw new XMLCException("Parse of \"" + inputSource.getSystemId()
255                                     + "\" failed: " + useExcept, useExcept);
256         } finally {
257             fErrorReporter.setPrintWarnings(saveWarnings);
258         }
259         int cnt = fErrorReporter.getErrorCnt();
260         if (cnt > 0) {
261             throw new XMLCException(cnt + " error" + ((cnt == 1) ? "" : "s")
262                                     + " parsing document");
263         }
264 
265         // Normalize the text nodes.
266         Element   root = xmlcDoc.getDocument().getDocumentElement();
267         if (root != null) {
268             root.normalize();
269         }
270 
271         return xmlcDoc;
272     }    
273 }
274
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags