KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > excalibur > xml > sax > JTidyHTMLParser


1 /*
2  * Copyright 2002-2004 The Apache Software Foundation
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12  * implied.
13  *
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17 package org.apache.excalibur.xml.sax;
18
19 import java.io.IOException JavaDoc;
20 import java.util.Properties JavaDoc;
21
22 import org.apache.avalon.framework.activity.Initializable;
23 import org.apache.avalon.framework.component.Component;
24 import org.apache.avalon.framework.configuration.Configurable;
25 import org.apache.avalon.framework.configuration.Configuration;
26 import org.apache.avalon.framework.configuration.ConfigurationException;
27 import org.apache.avalon.framework.logger.AbstractLogEnabled;
28 import org.apache.avalon.framework.parameters.Parameters;
29 import org.apache.avalon.framework.service.ServiceException;
30 import org.apache.avalon.framework.service.ServiceManager;
31 import org.apache.avalon.framework.service.Serviceable;
32 import org.apache.avalon.framework.thread.ThreadSafe;
33 import org.apache.excalibur.xml.dom.DOMSerializer;
34 import org.w3c.dom.Document JavaDoc;
35 import org.w3c.tidy.Tidy;
36 import org.xml.sax.ContentHandler JavaDoc;
37 import org.xml.sax.InputSource JavaDoc;
38 import org.xml.sax.SAXException JavaDoc;
39 import org.xml.sax.ext.LexicalHandler JavaDoc;
40
41 /**
42  * Converter for transforming an input stream contain text/html data
43  * to SAX events.
44  *
45  * @author <a HREF="mailto:dev@avalon.apache.org">Avalon Development Team</a>
46  * @version CVS $Revision: 1.4 $ $Date: 2004/02/28 11:47:20 $
47  */

48 public final class JTidyHTMLParser extends AbstractLogEnabled
49         implements SAXParser, Serviceable, Configurable, Initializable, ThreadSafe, Component
50 {
51     private DOMSerializer m_serializer;
52     private Tidy m_tidy;
53     private Properties JavaDoc m_properties;
54
55     public void service( ServiceManager serviceManager ) throws ServiceException
56     {
57         m_serializer = (DOMSerializer) serviceManager.lookup( DOMSerializer.ROLE );
58     }
59
60     public void configure( Configuration configuration ) throws ConfigurationException
61     {
62         final Parameters parameters = Parameters.fromConfiguration( configuration );
63         m_properties = Parameters.toProperties( parameters );
64     }
65
66     public void initialize() throws Exception JavaDoc
67     {
68         m_tidy = new Tidy();
69
70         //default options.
71
m_tidy.setXmlOut( true );
72         m_tidy.setXHTML( true );
73         m_tidy.setShowWarnings( false );
74
75         m_tidy.setConfigurationFromProps( m_properties );
76     }
77
78     public void parse( InputSource JavaDoc in,
79                        ContentHandler JavaDoc contentHandler,
80                        LexicalHandler JavaDoc lexicalHandler )
81             throws SAXException JavaDoc, IOException JavaDoc
82     {
83         final Document JavaDoc document = m_tidy.parseDOM( in.getByteStream(), null );
84         m_serializer.serialize( document, contentHandler, lexicalHandler );
85     }
86     
87     /**
88      * Parse the {@link InputSource} and send
89      * SAX events to the consumer.
90      * Attention: the consumer can implement the
91      * {@link LexicalHandler} as well.
92      * The parse should take care of this.
93      */

94     public void parse( InputSource JavaDoc in, ContentHandler JavaDoc consumer )
95         throws SAXException JavaDoc, IOException JavaDoc
96     {
97         this.parse( in, consumer,
98                     (consumer instanceof LexicalHandler JavaDoc ? (LexicalHandler JavaDoc)consumer : null));
99     }
100     
101 }
102
103
Popular Tags