KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > cocoon > portal > util > HtmlSaxParser


1 /*
2  * Copyright 2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.cocoon.portal.util;
17
18 import java.io.IOException JavaDoc;
19 import java.io.StringReader JavaDoc;
20 import java.util.Iterator JavaDoc;
21 import java.util.Properties JavaDoc;
22
23 import org.apache.cocoon.xml.ContentHandlerWrapper;
24 import org.apache.excalibur.xml.sax.XMLConsumer;
25 import org.apache.xerces.parsers.AbstractSAXParser;
26 import org.cyberneko.html.HTMLConfiguration;
27 import org.xml.sax.Attributes JavaDoc;
28 import org.xml.sax.ContentHandler JavaDoc;
29 import org.xml.sax.InputSource JavaDoc;
30 import org.xml.sax.SAXException JavaDoc;
31 import org.xml.sax.ext.LexicalHandler JavaDoc;
32
33 /**
34  * This parser uses the nekohtml parser to parse html and generate sax streams.
35  *
36  * @version $Id: HtmlSaxParser.java 329454 2005-10-29 17:44:45Z cziegeler $
37  */

38 public class HtmlSaxParser extends AbstractSAXParser {
39
40     public HtmlSaxParser(Properties JavaDoc properties) {
41         super(getConfig(properties));
42     }
43
44     protected static HTMLConfiguration getConfig(Properties JavaDoc properties) {
45         HTMLConfiguration config = new HTMLConfiguration();
46         config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
47         if (properties != null) {
48             for (Iterator JavaDoc i = properties.keySet().iterator();i.hasNext();) {
49                 String JavaDoc name = (String JavaDoc) i.next();
50                 config.setProperty(name, properties.getProperty(name));
51             }
52         }
53         return config;
54     }
55
56     /**
57      * Parse html stored in the string.
58      */

59     public static void parseString(String JavaDoc content, ContentHandler JavaDoc ch)
60     throws SAXException JavaDoc {
61         final HtmlSaxParser parser = new HtmlSaxParser(null);
62         parser.setContentHandler(ch);
63         if ( ch instanceof LexicalHandler JavaDoc ) {
64             parser.setLexicalHandler((LexicalHandler JavaDoc)ch);
65         }
66         final InputSource JavaDoc is = new InputSource JavaDoc(new StringReader JavaDoc(content));
67         try {
68             parser.parse(is);
69         } catch (IOException JavaDoc ioe) {
70             throw new SAXException JavaDoc("Error during parsing of html markup.", ioe);
71         }
72     }
73
74     public static XMLConsumer getContentFilter(ContentHandler JavaDoc ch) {
75         return new ContentFilter(ch);
76     }
77
78     protected static final class ContentFilter extends ContentHandlerWrapper {
79
80         public ContentFilter(ContentHandler JavaDoc ch) {
81             this.setContentHandler(ch);
82             if ( ch instanceof LexicalHandler JavaDoc ) {
83                 this.setLexicalHandler((LexicalHandler JavaDoc)ch);
84             }
85         }
86
87         /**
88          * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
89          */

90         public void endElement(String JavaDoc uri, String JavaDoc loc, String JavaDoc raw) throws SAXException JavaDoc {
91             if ( !loc.equals("html") && !loc.equals("body") ) {
92                 super.endElement(uri, loc, raw);
93             }
94         }
95
96         /**
97          * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
98          */

99         public void startElement(String JavaDoc uri, String JavaDoc loc, String JavaDoc raw, Attributes JavaDoc a) throws SAXException JavaDoc {
100             if ( !loc.equals("html") && !loc.equals("body") ) {
101                 super.startElement(uri, loc, raw, a);
102             }
103         }
104     }
105
106 }
107
Popular Tags