1 17 package org.apache.excalibur.xml.sax; 18 19 import java.io.IOException ; 20 import java.util.Properties ; 21 22 import org.apache.avalon.framework.activity.Initializable; 23 import org.apache.avalon.framework.component.Component; 24 import org.apache.avalon.framework.configuration.Configurable; 25 import org.apache.avalon.framework.configuration.Configuration; 26 import org.apache.avalon.framework.configuration.ConfigurationException; 27 import org.apache.avalon.framework.logger.AbstractLogEnabled; 28 import org.apache.avalon.framework.parameters.Parameters; 29 import org.apache.avalon.framework.service.ServiceException; 30 import org.apache.avalon.framework.service.ServiceManager; 31 import org.apache.avalon.framework.service.Serviceable; 32 import org.apache.avalon.framework.thread.ThreadSafe; 33 import org.apache.excalibur.xml.dom.DOMSerializer; 34 import org.w3c.dom.Document ; 35 import org.w3c.tidy.Tidy; 36 import org.xml.sax.ContentHandler ; 37 import org.xml.sax.InputSource ; 38 import org.xml.sax.SAXException ; 39 import org.xml.sax.ext.LexicalHandler ; 40 41 48 public final class JTidyHTMLParser extends AbstractLogEnabled 49 implements SAXParser, Serviceable, Configurable, Initializable, ThreadSafe, Component 50 { 51 private DOMSerializer m_serializer; 52 private Tidy m_tidy; 53 private Properties m_properties; 54 55 public void service( ServiceManager serviceManager ) throws ServiceException 56 { 57 m_serializer = (DOMSerializer) serviceManager.lookup( DOMSerializer.ROLE ); 58 } 59 60 public void configure( Configuration configuration ) throws ConfigurationException 61 { 62 final Parameters parameters = Parameters.fromConfiguration( configuration ); 63 m_properties = Parameters.toProperties( parameters ); 64 } 65 66 public void initialize() throws Exception 67 { 68 m_tidy = new Tidy(); 69 70 m_tidy.setXmlOut( true ); 72 m_tidy.setXHTML( true ); 73 m_tidy.setShowWarnings( false ); 74 75 m_tidy.setConfigurationFromProps( m_properties ); 76 } 77 78 public void parse( InputSource in, 79 ContentHandler contentHandler, 80 LexicalHandler lexicalHandler ) 81 throws SAXException , IOException 82 { 83 final Document document = m_tidy.parseDOM( in.getByteStream(), null ); 84 m_serializer.serialize( document, contentHandler, lexicalHandler ); 85 } 86 87 94 public void parse( InputSource in, ContentHandler consumer ) 95 throws SAXException , IOException 96 { 97 this.parse( in, consumer, 98 (consumer instanceof LexicalHandler ? (LexicalHandler )consumer : null)); 99 } 100 101 } 102 103 | Popular Tags |