KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > cocoon > transformation > HTMLTransformer


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.cocoon.transformation;
17
18 import java.io.BufferedInputStream JavaDoc;
19 import java.io.ByteArrayInputStream JavaDoc;
20 import java.io.IOException JavaDoc;
21 import java.io.PrintWriter JavaDoc;
22 import java.io.StringWriter JavaDoc;
23 import java.util.HashMap JavaDoc;
24 import java.util.Map JavaDoc;
25 import java.util.Properties JavaDoc;
26 import java.util.StringTokenizer JavaDoc;
27
28 import org.apache.avalon.framework.configuration.Configurable;
29 import org.apache.avalon.framework.configuration.Configuration;
30 import org.apache.avalon.framework.configuration.ConfigurationException;
31 import org.apache.avalon.framework.parameters.Parameters;
32 import org.apache.cocoon.ProcessingException;
33 import org.apache.cocoon.environment.SourceResolver;
34 import org.apache.cocoon.transformation.AbstractSAXTransformer;
35 import org.apache.cocoon.xml.XMLUtils;
36 import org.apache.cocoon.xml.IncludeXMLConsumer;
37 import org.apache.excalibur.source.Source;
38 import org.w3c.tidy.Tidy;
39 import org.xml.sax.Attributes JavaDoc;
40 import org.xml.sax.SAXException JavaDoc;
41
42 /**
43  * Converts (escaped) HTML snippets into JTidied HTML.
44  * This transformer expects a list of elements, passed as comma separated
45  * values of the "tags" parameter. It records the text enclosed in such
46  * elements and pass it thru JTidy to obtain valid XHTML.
47  *
48  * <p>TODO: Add namespace support.
49  * <p><strong>WARNING:</strong> This transformer should be considered unstable.
50  *
51  * @author <a HREF="mailto:d.madama@pro-netics.com">Daniele Madama</a>
52  * @author <a HREF="mailto:gianugo@apache.org">Gianugo Rabellino</a>
53  *
54  * @version CVS $Id: HTMLTransformer.java 164601 2005-04-25 17:03:55Z vgritsenko $
55  */

56 public class HTMLTransformer
57     extends AbstractSAXTransformer
58     implements Configurable {
59
60     /**
61      * Properties for Tidy format
62      */

63     private Properties JavaDoc properties;
64
65     /**
66      * Tags that must be normalized
67      */

68     private Map JavaDoc tags;
69
70     /**
71      * React on endElement calls that contain a tag to be
72      * tidied and run Jtidy on it, otherwise passthru.
73      *
74      * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
75      */

76     public void endElement(String JavaDoc uri, String JavaDoc name, String JavaDoc raw)
77         throws SAXException JavaDoc {
78         if (this.tags.containsKey(name)) {
79             String JavaDoc toBeNormalized = this.endTextRecording();
80             try {
81                 this.normalize(toBeNormalized);
82             } catch (ProcessingException e) {
83                 e.printStackTrace();
84             }
85         }
86         super.endElement(uri, name, raw);
87     }
88
89     /**
90      * Start buffering text if inside a tag to be normalized,
91      * passthru otherwise.
92      *
93      * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
94      */

95     public void startElement(
96         String JavaDoc uri,
97         String JavaDoc name,
98         String JavaDoc raw,
99         Attributes JavaDoc attr)
100         throws SAXException JavaDoc {
101         super.startElement(uri, name, raw, attr);
102         if (this.tags.containsKey(name)) {
103             this.startTextRecording();
104         }
105     }
106
107     /**
108      * Configure this transformer, possibly passing to it
109      * a jtidy configuration file location.
110      */

111     public void configure(Configuration config) throws ConfigurationException {
112         super.configure(config);
113
114         String JavaDoc configUrl = config.getChild("jtidy-config").getValue(null);
115         if (configUrl != null) {
116             org.apache.excalibur.source.SourceResolver resolver = null;
117             Source configSource = null;
118             try {
119                 resolver = (org.apache.excalibur.source.SourceResolver)
120                            this.manager.lookup(org.apache.excalibur.source.SourceResolver.ROLE);
121                 configSource = resolver.resolveURI(configUrl);
122                 if (getLogger().isDebugEnabled()) {
123                     getLogger().debug(
124                         "Loading configuration from " + configSource.getURI());
125                 }
126                 this.properties = new Properties JavaDoc();
127                 this.properties.load(configSource.getInputStream());
128
129             } catch (Exception JavaDoc e) {
130                 getLogger().warn("Cannot load configuration from " + configUrl);
131                 throw new ConfigurationException(
132                     "Cannot load configuration from " + configUrl,
133                     e);
134             } finally {
135                 if (null != resolver) {
136                     this.manager.release(resolver);
137                     resolver.release(configSource);
138                 }
139             }
140         }
141     }
142
143     /**
144      * The beef: run JTidy on the buffered text and stream
145      * the result
146      *
147      * @param text the string to be tidied
148      */

149     private void normalize(String JavaDoc text) throws ProcessingException {
150         try {
151             // Setup an instance of Tidy.
152
Tidy tidy = new Tidy();
153             tidy.setXmlOut(true);
154
155             if (this.properties == null) {
156                 tidy.setXHTML(true);
157             } else {
158                 tidy.setConfigurationFromProps(this.properties);
159             }
160
161             //Set Jtidy warnings on-off
162
tidy.setShowWarnings(getLogger().isWarnEnabled());
163             //Set Jtidy final result summary on-off
164
tidy.setQuiet(!getLogger().isInfoEnabled());
165             //Set Jtidy infos to a String (will be logged) instead of System.out
166
StringWriter JavaDoc stringWriter = new StringWriter JavaDoc();
167             PrintWriter JavaDoc errorWriter = new PrintWriter JavaDoc(stringWriter);
168             tidy.setErrout(errorWriter);
169
170             // Extract the document using JTidy and stream it.
171
ByteArrayInputStream JavaDoc bais =
172                 new ByteArrayInputStream JavaDoc(text.getBytes());
173             org.w3c.dom.Document JavaDoc doc =
174                 tidy.parseDOM(new BufferedInputStream JavaDoc(bais), null);
175
176             // FIXME: Jtidy doesn't warn or strip duplicate attributes in same
177
// tag; stripping.
178
XMLUtils.stripDuplicateAttributes(doc, null);
179
180             errorWriter.flush();
181             errorWriter.close();
182             if (getLogger().isWarnEnabled()) {
183                 getLogger().warn(stringWriter.toString());
184             }
185
186             IncludeXMLConsumer.includeNode(doc, this.contentHandler, this.lexicalHandler);
187         } catch (Exception JavaDoc e) {
188             throw new ProcessingException(
189                 "Exception in HTMLTransformer.normalize()",
190                 e);
191         }
192     }
193
194     /**
195      * Setup this component, passing the tag names to be tidied.
196      */

197
198     public void setup(
199         SourceResolver resolver,
200         Map JavaDoc objectModel,
201         String JavaDoc src,
202         Parameters par)
203         throws ProcessingException, SAXException JavaDoc, IOException JavaDoc {
204         super.setup(resolver, objectModel, src, par);
205         String JavaDoc tagsParam = par.getParameter("tags", "");
206         if (getLogger().isDebugEnabled()) {
207             getLogger().debug("tags: " + tagsParam);
208         }
209         this.tags = new HashMap JavaDoc();
210         StringTokenizer JavaDoc tokenizer = new StringTokenizer JavaDoc(tagsParam, ",");
211         while (tokenizer.hasMoreElements()) {
212             String JavaDoc tok = tokenizer.nextToken().trim();
213             this.tags.put(tok, tok);
214         }
215     }
216 }
217
Popular Tags