KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > excalibur > xml > impl > JaxpParser


1 /*
2  * Copyright 2002-2004 The Apache Software Foundation
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12  * implied.
13  *
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17 package org.apache.excalibur.xml.impl;
18
19 import java.io.IOException JavaDoc;
20
21 import javax.xml.parsers.DocumentBuilder JavaDoc;
22 import javax.xml.parsers.DocumentBuilderFactory JavaDoc;
23 import javax.xml.parsers.ParserConfigurationException JavaDoc;
24 import javax.xml.parsers.SAXParserFactory JavaDoc;
25
26 import org.apache.avalon.excalibur.pool.Poolable;
27 import org.apache.avalon.framework.activity.Disposable;
28 import org.apache.avalon.framework.component.Component;
29 import org.apache.avalon.framework.logger.AbstractLogEnabled;
30 import org.apache.avalon.framework.parameters.ParameterException;
31 import org.apache.avalon.framework.parameters.Parameterizable;
32 import org.apache.avalon.framework.parameters.Parameters;
33 import org.apache.avalon.framework.service.ServiceException;
34 import org.apache.avalon.framework.service.ServiceManager;
35 import org.apache.avalon.framework.service.Serviceable;
36 import org.apache.excalibur.xml.EntityResolver;
37 import org.apache.excalibur.xml.dom.DOMParser;
38 import org.apache.excalibur.xml.sax.SAXParser;
39 import org.w3c.dom.Document JavaDoc;
40 import org.xml.sax.ContentHandler JavaDoc;
41 import org.xml.sax.ErrorHandler JavaDoc;
42 import org.xml.sax.InputSource JavaDoc;
43 import org.xml.sax.SAXException JavaDoc;
44 import org.xml.sax.SAXParseException JavaDoc;
45 import org.xml.sax.XMLReader JavaDoc;
46 import org.xml.sax.ext.LexicalHandler JavaDoc;
47
48 /**
49  * An XMLParser that is only dependant on JAXP 1.1 compliant parsers.
50  *
51  * The configuration can contain the following parameters :
52  * <ul>
53  * <li>validate (boolean, default = <code>false</code>) : should the parser
54  * validate parsed documents ?
55  * </li>
56  * <li>namespace-prefixes (boolean, default = <code>false</code>) : do we want
57  * namespaces declarations also as 'xmlns:' attributes ?<br>
58  * <i>Note</i> : setting this to <code>true</code> confuses some XSL
59  * processors (e.g. Saxon).
60  * </li>
61  * <li>stop-on-warning (boolean, default = <code>true</code>) : should the parser
62  * stop parsing if a warning occurs ?
63  * </li>
64  * <li>stop-on-recoverable-error (boolean, default = <code>true</code>) : should the parser
65  * stop parsing if a recoverable error occurs ?
66  * </li>
67  * <li>reuse-parsers (boolean, default = <code>true</code>) : do we want to reuse
68  * parsers or create a new parser for each parse ?<br>
69  * <i>Note</i> : even if this parameter is <code>true</code>, parsers are not
70  * recycled in case of parsing errors : some parsers (e.g. Xerces) don't like
71  * to be reused after failure.
72  * </li>
73  * <li>sax-parser-factory (string, optional) : the name of the <code>SAXParserFactory</code>
74  * implementation class to be used instead of using the standard JAXP mechanism
75  * (<code>SAXParserFactory.newInstance()</code>). This allows to choose
76  * unambiguously the JAXP implementation to be used when several of them are
77  * available in the classpath.
78  * </li>
79  * <li>drop-dtd-comments : should comment() events from DTD's be dropped? Since this implementation
80  * does not support the DeclHandler interface anyway, it is quite useless to only have the comments
81  * from DTD. And the comment events from the internal DTD subset would appear in the serialized output
82  * again.
83  * </li>
84  * </ul>
85  *
86  * @author <a HREF="mailto:dev@avalon.apache.org">Avalon Development Team</a>
87  * @version CVS $Revision: 1.6 $ $Date: 2004/02/28 11:47:20 $
88  * @avalon.component
89  */

90 public final class JaxpParser
91     extends AbstractLogEnabled
92     implements SAXParser, DOMParser,
93                 Poolable, Component, Parameterizable, Serviceable, Disposable, ErrorHandler JavaDoc
94 {
95     /** the SAX Parser factory */
96     private SAXParserFactory JavaDoc m_factory;
97
98     /** The SAX reader. It is created lazily by {@link #setupXMLReader()}
99      and cleared if a parsing error occurs. */

100     private XMLReader JavaDoc m_reader;
101
102     /** the Entity Resolver */
103     private EntityResolver m_resolver;
104
105     /** do we want namespaces also as attributes ? */
106     private boolean m_nsPrefixes;
107
108     /** do we want to reuse parsers ? */
109     private boolean m_reuseParsers;
110
111     /** do we stop on warnings ? */
112     private boolean m_stopOnWarning;
113
114     /** do we stop on recoverable errors ? */
115     private boolean m_stopOnRecoverableError;
116
117     /** the Document Builder factory */
118     private DocumentBuilderFactory JavaDoc m_docFactory;
119
120     /** The DOM builder. It is created lazily by {@link #setupDocumentBuilder()}
121      and cleared if a parsing error occurs. */

122     private DocumentBuilder JavaDoc m_docBuilder;
123
124     /** Should comments appearing between start/endDTD events be dropped ? */
125     private boolean m_dropDtdComments;
126
127     /** The serviec manager */
128     private ServiceManager m_manager;
129     
130     /**
131      * Get the Entity Resolver from the component m_manager
132      *
133      * @avalon.dependency type="EntityResolver" optional="true"
134      */

135     public void service( final ServiceManager manager )
136         throws ServiceException
137     {
138         m_manager = manager;
139         
140         if( manager.hasService( EntityResolver.ROLE ) )
141         {
142             m_resolver = (EntityResolver)manager.lookup( EntityResolver.ROLE );
143             if( getLogger().isDebugEnabled() )
144             {
145                 getLogger().debug( "JaxpParser: Using EntityResolver: " + m_resolver );
146             }
147         }
148     }
149
150     /* (non-Javadoc)
151      * @see org.apache.avalon.framework.activity.Disposable#dispose()
152      */

153     public void dispose()
154     {
155         if ( m_manager != null )
156         {
157             m_manager.release( m_resolver );
158             m_manager = null;
159             m_resolver = null;
160         }
161     }
162
163     public void parameterize( final Parameters params )
164         throws ParameterException
165     {
166         // Validation and namespace prefixes parameters
167
boolean validate = params.getParameterAsBoolean( "validate", false );
168         m_nsPrefixes = params.getParameterAsBoolean( "namespace-prefixes", false );
169         m_reuseParsers = params.getParameterAsBoolean( "reuse-parsers", true );
170         m_stopOnWarning = params.getParameterAsBoolean( "stop-on-warning", true );
171         m_stopOnRecoverableError = params.getParameterAsBoolean( "stop-on-recoverable-error", true );
172         m_dropDtdComments = params.getParameterAsBoolean( "drop-dtd-comments", false );
173
174         // Get the SAXFactory
175
final String JavaDoc saxParserFactoryName = params.getParameter( "sax-parser-factory",
176                                                                  "javax.xml.parsers.SAXParserFactory" );
177         if( "javax.xml.parsers.SAXParserFactory".equals( saxParserFactoryName ) )
178         {
179             m_factory = SAXParserFactory.newInstance();
180         }
181         else
182         {
183             try
184             {
185                 final Class JavaDoc factoryClass = loadClass( saxParserFactoryName );
186                 m_factory = (SAXParserFactory JavaDoc)factoryClass.newInstance();
187             }
188             catch( Exception JavaDoc e )
189             {
190                 throw new ParameterException( "Cannot load SAXParserFactory class " + saxParserFactoryName, e );
191             }
192         }
193         m_factory.setNamespaceAware( true );
194         m_factory.setValidating( validate );
195
196         // Get the DocumentFactory
197
final String JavaDoc documentBuilderFactoryName = params.getParameter( "document-builder-factory",
198                                                                        "javax.xml.parsers.DocumentBuilderFactory" );
199         if( "javax.xml.parsers.DocumentBuilderFactory".equals( documentBuilderFactoryName ) )
200         {
201             m_docFactory = DocumentBuilderFactory.newInstance();
202         }
203         else
204         {
205             try
206             {
207                 final Class JavaDoc factoryClass = loadClass( documentBuilderFactoryName );
208                 m_docFactory = (DocumentBuilderFactory JavaDoc)factoryClass.newInstance();
209             }
210             catch( Exception JavaDoc e )
211             {
212                 throw new ParameterException( "Cannot load DocumentBuilderFactory class " + documentBuilderFactoryName, e );
213             }
214         }
215         m_docFactory.setNamespaceAware( true );
216         m_docFactory.setValidating( validate );
217
218         if( getLogger().isDebugEnabled() )
219         {
220             getLogger().debug( "JaxpParser: validating: " + validate +
221                                ", namespace-prefixes: " + m_nsPrefixes +
222                                ", reuse parser: " + m_reuseParsers +
223                                ", stop on warning: " + m_stopOnWarning +
224                                ", stop on recoverable-error: " + m_stopOnRecoverableError +
225                                ", saxParserFactory: " + saxParserFactoryName +
226                                ", documentBuilderFactory: " + documentBuilderFactoryName );
227         }
228     }
229
230     /**
231      * Load a class
232      */

233     private Class JavaDoc loadClass( String JavaDoc name ) throws Exception JavaDoc
234     {
235         ClassLoader JavaDoc loader = Thread.currentThread().getContextClassLoader();
236         if( loader == null )
237         {
238             loader = getClass().getClassLoader();
239         }
240         return loader.loadClass( name );
241     }
242
243     /**
244      * Parse the <code>InputSource</code> and send
245      * SAX events to the consumer.
246      * Attention: the consumer can implement the
247      * <code>LexicalHandler</code> as well.
248      * The parse should take care of this.
249      */

250     public void parse( final InputSource JavaDoc in,
251                        final ContentHandler JavaDoc contentHandler,
252                        final LexicalHandler JavaDoc lexicalHandler )
253         throws SAXException JavaDoc, IOException JavaDoc
254     {
255         setupXMLReader();
256
257         // Ensure we will use a fresh new parser at next parse in case of failure
258
XMLReader JavaDoc tmpReader = m_reader;
259         m_reader = null;
260
261         try
262         {
263             LexicalHandler JavaDoc theLexicalHandler = null;
264             if ( null == lexicalHandler
265                  && contentHandler instanceof LexicalHandler JavaDoc)
266             {
267                 theLexicalHandler = (LexicalHandler JavaDoc)contentHandler;
268             }
269             if( null != lexicalHandler )
270             {
271                 theLexicalHandler = lexicalHandler;
272             }
273             if (theLexicalHandler != null)
274             {
275                 if (m_dropDtdComments)
276                     theLexicalHandler = new DtdCommentEater(theLexicalHandler);
277                 tmpReader.setProperty( "http://xml.org/sax/properties/lexical-handler",
278                                        theLexicalHandler );
279             }
280         }
281         catch( final SAXException JavaDoc e )
282         {
283             final String JavaDoc message =
284                 "SAX2 driver does not support property: " +
285                 "'http://xml.org/sax/properties/lexical-handler'";
286             getLogger().warn( message );
287         }
288
289         tmpReader.setErrorHandler( this );
290         tmpReader.setContentHandler( contentHandler );
291         if( null != m_resolver )
292         {
293             tmpReader.setEntityResolver( m_resolver );
294         }
295
296         tmpReader.parse( in );
297
298         // Here, parsing was successful : restore reader
299
if( m_reuseParsers )
300         {
301             m_reader = tmpReader;
302         }
303     }
304
305     /**
306      * Parse the {@link InputSource} and send
307      * SAX events to the consumer.
308      * Attention: the consumer can implement the
309      * {@link LexicalHandler} as well.
310      * The parse should take care of this.
311      */

312     public void parse( InputSource JavaDoc in, ContentHandler JavaDoc consumer )
313         throws SAXException JavaDoc, IOException JavaDoc
314     {
315         this.parse( in, consumer,
316                     (consumer instanceof LexicalHandler JavaDoc ? (LexicalHandler JavaDoc)consumer : null));
317     }
318
319     /**
320      * Creates a new {@link XMLReader} if needed.
321      */

322     private void setupXMLReader()
323         throws SAXException JavaDoc
324     {
325         if( null == m_reader )
326         {
327             // Create the XMLReader
328
try
329             {
330                 m_reader = m_factory.newSAXParser().getXMLReader();
331             }
332             catch( final ParserConfigurationException JavaDoc pce )
333             {
334                 final String JavaDoc message = "Cannot produce a valid parser";
335                 throw new SAXException JavaDoc( message, pce );
336             }
337             
338             m_reader.setFeature( "http://xml.org/sax/features/namespaces", true );
339             
340             if( m_nsPrefixes )
341             {
342                 try
343                 {
344                     m_reader.setFeature( "http://xml.org/sax/features/namespace-prefixes",
345                                          m_nsPrefixes );
346                 }
347                 catch( final SAXException JavaDoc se )
348                 {
349                     final String JavaDoc message =
350                         "SAX2 XMLReader does not support setting feature: " +
351                         "'http://xml.org/sax/features/namespace-prefixes'";
352                     getLogger().warn( message );
353                 }
354             }
355         }
356     }
357
358     /**
359      * Parses a new Document object from the given InputSource.
360      */

361     public Document JavaDoc parseDocument( final InputSource JavaDoc input )
362         throws SAXException JavaDoc, IOException JavaDoc
363     {
364         setupDocumentBuilder();
365
366         // Ensure we will use a fresh new parser at next parse in case of failure
367
DocumentBuilder JavaDoc tmpBuilder = m_docBuilder;
368         m_docBuilder = null;
369
370         if( null != m_resolver )
371         {
372             tmpBuilder.setEntityResolver( m_resolver );
373         }
374
375         Document JavaDoc result = tmpBuilder.parse( input );
376
377         // Here, parsing was successful : restore builder
378
if( m_reuseParsers )
379         {
380             m_docBuilder = tmpBuilder;
381         }
382
383         return result;
384     }
385
386     /**
387      * Creates a new {@link DocumentBuilder} if needed.
388      */

389     private void setupDocumentBuilder()
390         throws SAXException JavaDoc
391     {
392         if( null == m_docBuilder )
393         {
394             try
395             {
396                 m_docBuilder = m_docFactory.newDocumentBuilder();
397             }
398             catch( final ParserConfigurationException JavaDoc pce )
399             {
400                 final String JavaDoc message = "Could not create DocumentBuilder";
401                 throw new SAXException JavaDoc( message, pce );
402             }
403         }
404     }
405
406     /**
407      * Return a new {@link Document}.
408      */

409     public Document JavaDoc createDocument()
410         throws SAXException JavaDoc
411     {
412         setupDocumentBuilder();
413         return m_docBuilder.newDocument();
414     }
415
416     /**
417      * Receive notification of a recoverable error.
418      */

419     public void error( final SAXParseException JavaDoc spe )
420         throws SAXException JavaDoc
421     {
422         final String JavaDoc message =
423             "Error parsing " + spe.getSystemId() + " (line " +
424             spe.getLineNumber() + " col. " + spe.getColumnNumber() +
425             "): " + spe.getMessage();
426         if( m_stopOnRecoverableError )
427         {
428             throw new SAXException JavaDoc( message, spe );
429         }
430         getLogger().error( message, spe );
431     }
432
433     /**
434      * Receive notification of a fatal error.
435      */

436     public void fatalError( final SAXParseException JavaDoc spe )
437         throws SAXException JavaDoc
438     {
439         final String JavaDoc message =
440             "Fatal error parsing " + spe.getSystemId() + " (line " +
441             spe.getLineNumber() + " col. " + spe.getColumnNumber() +
442             "): " + spe.getMessage();
443         throw new SAXException JavaDoc( message, spe );
444     }
445
446     /**
447      * Receive notification of a warning.
448      */

449     public void warning( final SAXParseException JavaDoc spe )
450         throws SAXException JavaDoc
451     {
452         final String JavaDoc message =
453             "Warning parsing " + spe.getSystemId() + " (line " +
454             spe.getLineNumber() + " col. " + spe.getColumnNumber() +
455             "): " + spe.getMessage();
456
457         if( m_stopOnWarning )
458         {
459             throw new SAXException JavaDoc( message, spe );
460         }
461         getLogger().warn( message, spe );
462     }
463
464     /**
465      * A LexicalHandler implementation that strips all comment events between
466      * startDTD and endDTD. In all other cases the events are forwarded to another
467      * LexicalHandler.
468      */

469     private static class DtdCommentEater implements LexicalHandler JavaDoc
470     {
471         private LexicalHandler JavaDoc next;
472         private boolean inDTD;
473
474         public DtdCommentEater(LexicalHandler JavaDoc nextHandler)
475         {
476             this.next = nextHandler;
477         }
478
479         public void startDTD (String JavaDoc name, String JavaDoc publicId, String JavaDoc systemId)
480             throws SAXException JavaDoc
481         {
482             inDTD = true;
483             next.startDTD(name, publicId, systemId);
484         }
485
486         public void endDTD ()
487             throws SAXException JavaDoc
488         {
489             inDTD = false;
490             next.endDTD();
491         }
492
493         public void startEntity (String JavaDoc name)
494             throws SAXException JavaDoc
495         {
496             next.startEntity(name);
497         }
498
499         public void endEntity (String JavaDoc name)
500             throws SAXException JavaDoc
501         {
502             next.endEntity(name);
503         }
504
505         public void startCDATA ()
506             throws SAXException JavaDoc
507         {
508             next.startCDATA();
509         }
510
511         public void endCDATA ()
512             throws SAXException JavaDoc
513         {
514             next.endCDATA();
515         }
516
517         public void comment (char ch[], int start, int length)
518             throws SAXException JavaDoc
519         {
520             if (!inDTD)
521                 next.comment(ch, start, length);
522         }
523     }
524
525 }
526
Popular Tags