KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > xml > xmlc > compiler > Parse


1 /*
2  * Enhydra Java Application Server Project
3  *
4  * The contents of this file are subject to the Enhydra Public License
5  * Version 1.1 (the "License"); you may not use this file except in
6  * compliance with the License. You may obtain a copy of the License on
7  * the Enhydra web site ( http://www.enhydra.org/ ).
8  *
9  * Software distributed under the License is distributed on an "AS IS"
10  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
11  * the License for the specific terms governing rights and limitations
12  * under the License.
13  *
14  * The Initial Developer of the Enhydra Application Server is Lutris
15  * Technologies, Inc. The Enhydra Application Server and portions created
16  * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
17  * All Rights Reserved.
18  *
19  * Contributor(s):
20  *
21  * $Id: Parse.java,v 1.3 2005/01/26 08:29:24 jkjome Exp $
22  */

23
24 package org.enhydra.xml.xmlc.compiler;
25
26 import java.io.IOException JavaDoc;
27 import java.io.PrintWriter JavaDoc;
28 import java.io.Reader JavaDoc;
29
30 import org.enhydra.xml.io.ErrorReporter;
31 import org.enhydra.xml.io.InputSourceOps;
32 import org.enhydra.xml.xmlc.XMLCError;
33 import org.enhydra.xml.xmlc.XMLCException;
34 import org.enhydra.xml.xmlc.dom.XMLCDocument;
35 import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
36 import org.enhydra.xml.xmlc.dom.XMLCDomFactoryCache;
37 import org.enhydra.xml.xmlc.html.parsers.swing.SwingHTMLParser;
38 import org.enhydra.xml.xmlc.html.parsers.tidy.TidyHTMLParser;
39 import org.enhydra.xml.xmlc.metadata.CompileOptions;
40 import org.enhydra.xml.xmlc.metadata.DocumentClass;
41 import org.enhydra.xml.xmlc.metadata.DocumentFormat;
42 import org.enhydra.xml.xmlc.metadata.InputDocument;
43 import org.enhydra.xml.xmlc.metadata.MetaData;
44 import org.enhydra.xml.xmlc.metadata.Parser;
45 import org.enhydra.xml.xmlc.metadata.ParserType;
46 import org.enhydra.xml.xmlc.misc.LineNumberMap;
47 import org.enhydra.xml.xmlc.misc.SSIReader;
48 import org.enhydra.xml.xmlc.parsers.ParseTracer;
49 import org.enhydra.xml.xmlc.parsers.XMLCParser;
50 import org.enhydra.xml.xmlc.parsers.xerces.XercesParser;
51 import org.w3c.dom.Element JavaDoc;
52 import org.xml.sax.InputSource JavaDoc;
53 import org.xml.sax.SAXException JavaDoc;
54
55
56 /**
57  * Parse a XML or HTML document into a DOM.
58  */

59 public class Parse {
60     /**
61      * XML parser object.
62      */

63     private XMLCParser fXMLCParser;
64
65     /**
66      * Error output.
67      */

68     private ErrorReporter fErrorReporter;
69
70     /**
71      * Verbose output stream.
72      */

73     private PrintWriter JavaDoc fVerboseOut;
74
75     /**
76      * Print verbose messages.
77      */

78     private boolean fVerbose;
79
80     /**
81      * Construct a new file parser.
82      *
83      * @param errorReporter Object used to handle errors.
84      * @param verboseOut Output stream for verbose and trace information.
85      */

86     public Parse(ErrorReporter errorReporter,
87                  PrintWriter JavaDoc verboseOut) {
88         fErrorReporter = errorReporter;
89         fVerboseOut = verboseOut;
90     }
91
92     /*
93      * Is this an the name of an HTML parser?
94      */

95     private boolean isHtmlParser(ParserType parser) {
96         return (parser == ParserType.SWING)
97             || (parser == ParserType.TIDY);
98     }
99
100     /**
101      * Determine the parser to use.
102      */

103     private void setupParser(MetaData metaData,
104                              ParserType parser,
105                              boolean isHtmlDocument)
106         throws XMLCException, IOException JavaDoc {
107         if (parser == null) {
108             // Use default parser based on document type.
109
if (isHtmlDocument) {
110                 parser = ParserType.TIDY;
111             } else {
112                 parser = ParserType.XERCES;
113             }
114         }
115         
116         // Check for conflicts with parser and document type.
117
if (isHtmlDocument) {
118             if (!isHtmlParser(parser)) {
119                 throw new XMLCException("Document appears to be an HTML document; the "
120                                         + parser + " parser only supports XML"
121                                         + " (does the document start with `<?xml ... ?>'?): "
122                                         + getInputSourceDesc(metaData));
123             }
124         } else {
125             if (isHtmlParser(parser)) {
126                 throw new XMLCException("Document appears to be an XML document; the "
127                                         + parser + " parser only supports HTML: "
128                                         + getInputSourceDesc(metaData));
129             }
130             if (metaData.getHTMLSection() != null) {
131                 //FIXME: need to include check for options being defaulted.
132
//throw new XMLCException("HTML options may not be specified for a XML document");
133
}
134         }
135
136         // Load and initialize parser.
137
if (parser == ParserType.SWING) {
138             fXMLCParser = new SwingHTMLParser();
139         } else if (parser == ParserType.TIDY) {
140             fXMLCParser = new TidyHTMLParser();
141         } else if (parser == ParserType.XERCES) {
142             fXMLCParser = new XercesParser();
143         } else {
144             throw new XMLCError("Unknown parser \"" + parser + "\"");
145         }
146     }
147
148     /**
149      * Determine if this is an XML or HTML document. Its either explictly
150      * specified or must be determined by looking at the file.
151      */

152     private boolean isXMLDocument(MetaData metaData) throws IOException JavaDoc {
153         InputDocument inputDoc = metaData.getInputDocument();
154         DocumentFormat docFormat = inputDoc.getDocumentFormat();
155         
156         if (docFormat == DocumentFormat.XML) {
157             return true;
158         } else if (docFormat == DocumentFormat.HTML) {
159             return false;
160         } else {
161             return InputSourceOps.isXMLDocument(inputDoc.getInputSource());
162         }
163     }
164
165     /**
166      * Get the input source, handling SSI filtering.
167      */

168     private InputSource JavaDoc getInputSource(MetaData metaData) throws IOException JavaDoc {
169         InputDocument inputDoc = metaData.getInputDocument();
170         InputSource JavaDoc inputSource = inputDoc.getInputSource();
171         if (inputDoc.getProcessSSI()) {
172             // dbr_20020128.1_start
173
//return SSIReader.create(inputSource);
174
return SSIReader.create(inputSource, inputDoc.getSSIBase());
175             // dbr_20020128.1_end
176
} else {
177             return inputSource;
178         }
179     }
180
181     /** Get a description of the input source for error messages */
182     private String JavaDoc getInputSourceDesc(MetaData metaData) {
183         InputDocument inputDoc = metaData.getInputDocument();
184         return InputSourceOps.getName(inputDoc.getInputSource());
185     }
186     
187     /**
188      * Get the line number map to pass to the parse, or null
189      * if one is not in uses.
190      */

191     private LineNumberMap getLineNumberMap(InputSource JavaDoc input) {
192         Reader JavaDoc reader = input.getCharacterStream();
193         if (reader instanceof SSIReader) {
194             return ((SSIReader)reader).getLineNumberMap();
195         } else {
196             return null;
197         }
198     }
199
200     /*
201      * Parse the page into the DOM and perform various checks and edits.
202      *
203      * @param metaData Document metadata.
204      * @param verboseOut Write verbose and trace information output stream.
205      */

206     public XMLCDocument parse(MetaData metaData)
207         throws XMLCException, IOException JavaDoc {
208
209         Parser parser = metaData.getParser();
210         CompileOptions compileOptions = metaData.getCompileOptions();
211         DocumentClass documentClass = metaData.getDocumentClass();
212
213         // Setup tracing
214
if (fVerboseOut != null) {
215             // Only enable verbose output if a stream is available
216
// and its requested.
217
fVerbose = compileOptions.getVerbose();
218         }
219         boolean printParseInfo
220             = (compileOptions.getPrintParseInfo() && (fVerboseOut != null));
221
222         ParseTracer traceOut = new ParseTracer(printParseInfo ? fVerboseOut : null);
223
224         InputSource JavaDoc inputSource = getInputSource(metaData);
225         LineNumberMap lineNumberMap = getLineNumberMap(inputSource);
226
227         boolean isHtmlDocument = !isXMLDocument(metaData);
228         XMLCDomFactory domFactory
229             = XMLCDomFactoryCache.createFactory(documentClass.getDomFactoryClass(isHtmlDocument),
230                                                 isHtmlDocument);
231         if (fVerbose) {
232             fVerboseOut.println(">>> using DOM Factory class: " + domFactory.getClass().getName());
233         }
234         XMLCDomFactoryCache.checkForOutdatedClass(domFactory);
235
236         setupParser(metaData, parser.getName(), isHtmlDocument);
237
238         boolean saveWarnings = fErrorReporter.getPrintWarnings();
239         fErrorReporter.setPrintWarnings(parser.getWarnings());
240
241         XMLCDocument xmlcDoc;
242         try {
243             xmlcDoc = fXMLCParser.parse(inputSource,
244                                         lineNumberMap,
245                                         domFactory,
246                                         metaData,
247                                         fErrorReporter,
248                                         traceOut);
249         } catch (SAXException JavaDoc except) {
250             Exception JavaDoc useExcept = except.getException();
251             if (useExcept == null) {
252                 useExcept = except;
253             }
254             throw new XMLCException("Parse of \"" + inputSource.getSystemId()
255                                     + "\" failed: " + useExcept, useExcept);
256         } finally {
257             fErrorReporter.setPrintWarnings(saveWarnings);
258         }
259         int cnt = fErrorReporter.getErrorCnt();
260         if (cnt > 0) {
261             throw new XMLCException(cnt + " error" + ((cnt == 1) ? "" : "s")
262                                     + " parsing document");
263         }
264
265         // Normalize the text nodes.
266
Element JavaDoc root = xmlcDoc.getDocument().getDocumentElement();
267         if (root != null) {
268             root.normalize();
269         }
270
271         return xmlcDoc;
272     }
273 }
274
Popular Tags