KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > clipbuilder > html > web > html > Impl > DefaultHTMLParser


1 package org.jahia.clipbuilder.html.web.html.Impl;
2
3 import java.io.*;
4 import org.jahia.clipbuilder.html.web.html.*;
5 import org.apache.xerces.parsers.*;
6 import org.cyberneko.html.*;
7 import org.w3c.dom.*;
8 import org.xml.sax.*;
9
10 /**
11  * Default implementation of HTMLParser
12  *
13  *@author Tlili Khaled
14  */

15 public class DefaultHTMLParser implements HTMLParser {
16     private HTMLConfiguration configuration;
17     private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(DefaultHTMLParser.class);
18
19
20     /**
21      * Constructor for the NekoHTMLParser object
22      */

23     public DefaultHTMLParser() {
24         init();
25     }
26
27
28
29     /**
30      * Sets the Transformer attribute of the NekoHTMLParser object
31      *
32      *@param configuration The new Configuration value
33      */

34     public void setConfiguration(HTMLConfiguration configuration) {
35         logger.debug("[ Set configutation ]");
36
37
38         this.configuration = configuration;
39     }
40
41
42     /**
43      * Gets the Transformer attribute of the NekoHTMLParser object
44      *
45      *@return The Transformer value
46      */

47     public HTMLConfiguration getConfiguration() {
48
49         return configuration;
50     }
51
52
53     /**
54      * parse
55      *
56      *@param html Description of Parameter
57      *@return Document
58      *@exception SAXException Description of Exception
59      *@exception IOException Description of Exception method
60      */

61     public Document parse(String JavaDoc html) throws IOException, SAXException {
62
63         Document htmlDocument = null;
64         try {
65             //Parser
66
java.io.InputStream JavaDoc in = new java.io.StringBufferInputStream JavaDoc(html);
67
68             DOMParser parser = new DOMParser(configuration);
69             parser.parse(new InputSource(in));
70             htmlDocument = parser.getDocument();
71             logger.debug("[ Parsing finished. ]");
72         }
73         catch (IOException ex) {
74             ex.printStackTrace();
75         }
76         catch (SAXException ex) {
77             ex.printStackTrace();
78         }
79
80         return htmlDocument;
81     }
82
83
84
85     /**
86      * init the parser
87      */

88     private void init() {
89         //Configure the parser
90
configuration = new org.cyberneko.html.HTMLConfiguration();
91         configuration.setFeature("http://cyberneko.org/html/features/augmentations", true);
92         configuration.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
93         configuration.setFeature("http://cyberneko.org/html/features/report-errors", false);
94         configuration.setFeature("http://cyberneko.org/html/features/balance-tags/ignore-outside-content", false);
95                 configuration.setFeature("http://apache.org/xml/features/scanner/notify-char-refs", false);
96                 configuration.setFeature("http://apache.org/xml/features/scanner/notify-builtin-refs", true);
97                 configuration.setFeature("http://cyberneko.org/html/features/scanner/ignore-specified-charset",true);
98
99         //script and css: remove <!-- --> due to encoding problem whith xerces and jdom
100
configuration.setFeature("http://cyberneko.org/html/features/scanner/script/strip-comment-delims", true);
101         configuration.setFeature("http://cyberneko.org/html/features/scanner/style/strip-comment-delims", true);
102                 // Deal whith namespace
103
configuration.setFeature("http://xml.org/sax/features/namespaces", false);
104                 configuration.setFeature("http://cyberneko.org/html/features/insert-namespaces", false);
105                 configuration.setFeature("http://cyberneko.org/html/features/override-namespaces", false);
106
107
108     }
109
110 }
111
Popular Tags