KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlcleaner > HtmlTagProvider


1 /* Copyright (c) 2006-2007, Vladimir Nikic
2     All rights reserved.
3
4     Redistribution and use of this software in source and binary forms,
5     with or without modification, are permitted provided that the following
6     conditions are met:
7
8     * Redistributions of source code must retain the above
9       copyright notice, this list of conditions and the
10       following disclaimer.
11
12     * Redistributions in binary form must reproduce the above
13       copyright notice, this list of conditions and the
14       following disclaimer in the documentation and/or other
15       materials provided with the distribution.
16
17     * The name of HtmlCleaner may not be used to endorse or promote
18       products derived from this software without specific prior
19       written permission.
20
21     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31     POSSIBILITY OF SUCH DAMAGE.
32
33     You can contact Vladimir Nikic by sending e-mail to
34     nikic_vladimir@yahoo.com. Please include the word "HtmlCleaner" in the
35     subject line.
36 */

37
38 package org.htmlcleaner;
39
40 import java.util.HashMap JavaDoc;
41 import java.util.StringTokenizer JavaDoc;
42
43 /**
44  * <p>
45  * Default HTML tag info provider. Here the basic set of HTML tags is defined, including
46  * depricated tags and some Microsoft specific tags. Rules for tag balancing are similar
47  * to that used in most web-browsers.
48  * </p>
49  *
50  * Created by: Vladimir Nikic<br/>
51  * Date: November, 2006.
52  */

53 public class HtmlTagProvider extends HashMap JavaDoc implements ITagInfoProvider {
54
55     // singleton instance, used if no other TagInfoProvider is specified
56
private static HtmlTagProvider _instance;
57
58     /**
59      * Returns singleton instance of this class.
60      */

61     public static synchronized HtmlTagProvider getInstance() {
62         if (_instance == null) {
63             _instance = new HtmlTagProvider();
64         }
65
66         return _instance;
67     }
68
69     /**
70      * Default constructor - creates tags and rules for balancing.
71      */

72     public HtmlTagProvider() {
73         defineTags();
74     }
75
76     /**
77      * Shortcut to creating TagInfo instance and storing it to the map.
78      * @param name
79      * @param contentType
80      * @param belongsTo
81      * @param dependancies
82      */

83     protected void addTag(String JavaDoc name, String JavaDoc contentType, int belongsTo, String JavaDoc dependancies) {
84         this.put( name.toLowerCase(), new TagInfo(name, contentType, belongsTo, false, false, false, dependancies) );
85     }
86
87     /**
88      * Definition of all HTML tags together with rules for tag balancing.
89      */

90     protected void defineTags() {
91         // Structure
92
addTag("div", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
93         addTag("span", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
94
95         // Meta Information
96
addTag("meta", TagInfo.CONTENT_NONE, TagInfo.HEAD, null);
97         addTag("link", TagInfo.CONTENT_NONE, TagInfo.HEAD, null);
98         addTag("title", TagInfo.CONTENT_TEXT, TagInfo.HEAD, null);
99         addTag("style", TagInfo.CONTENT_ALL, TagInfo.HEAD, null);
100         addTag("bgsound", TagInfo.CONTENT_NONE, TagInfo.HEAD, null);
101
102         // Text
103
addTag("h1", TagInfo.CONTENT_ALL, TagInfo.BODY, "h1,h2,h3,h4,h5,h6");
104         addTag("h2", TagInfo.CONTENT_ALL, TagInfo.BODY, "h1,h2,h3,h4,h5,h6");
105         addTag("h3", TagInfo.CONTENT_ALL, TagInfo.BODY, "h1,h2,h3,h4,h5,h6");
106         addTag("h4", TagInfo.CONTENT_ALL, TagInfo.BODY, "h1,h2,h3,h4,h5,h6");
107         addTag("h5", TagInfo.CONTENT_ALL, TagInfo.BODY, "h1,h2,h3,h4,h5,h6");
108         addTag("h6", TagInfo.CONTENT_ALL, TagInfo.BODY, "h1,h2,h3,h4,h5,h6");
109         addTag("p", TagInfo.CONTENT_ALL, TagInfo.BODY, "p");
110         addTag("strong", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
111         addTag("em", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
112         addTag("abbr", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
113         addTag("acronym", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
114         addTag("address", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
115         addTag("bdo", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
116         addTag("blockquote",TagInfo.CONTENT_ALL, TagInfo.BODY, null);
117         addTag("cite", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
118         addTag("q", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
119         addTag("code", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
120         addTag("ins", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
121         addTag("del", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
122         addTag("dfn", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
123         addTag("kbd", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
124         addTag("pre", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
125         addTag("samp", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
126         addTag("listing", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
127         addTag("var", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
128         addTag("br", TagInfo.CONTENT_NONE, TagInfo.BODY, null);
129         addTag("wbr", TagInfo.CONTENT_NONE, TagInfo.BODY, null);
130         addTag("nobr", TagInfo.CONTENT_ALL, TagInfo.BODY, "nobr");
131         addTag("xmp", TagInfo.CONTENT_TEXT, TagInfo.BODY, null);
132
133         // Links
134
addTag("a", TagInfo.CONTENT_ALL, TagInfo.BODY, "a");
135         addTag("base", TagInfo.CONTENT_NONE, TagInfo.HEAD, null);
136
137         // Images and Objects
138
addTag("img", TagInfo.CONTENT_NONE, TagInfo.BODY, null);
139         addTag("area", TagInfo.CONTENT_NONE, TagInfo.BODY, "!map,area");
140         addTag("map", TagInfo.CONTENT_ALL, TagInfo.BODY, "map");
141         addTag("object", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
142         addTag("param", TagInfo.CONTENT_NONE, TagInfo.BODY, null);
143         addTag("applet", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
144         addTag("xml", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
145
146         // Lists
147
addTag("ul", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
148         addTag("ol", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
149         addTag("li", TagInfo.CONTENT_ALL, TagInfo.BODY, "li");
150         addTag("dl", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
151         addTag("dt", TagInfo.CONTENT_ALL, TagInfo.BODY, "dt,dd");
152         addTag("dd", TagInfo.CONTENT_ALL, TagInfo.BODY, "dt,dd");
153         addTag("menu", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
154         addTag("dir", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
155
156         // Tables
157
addTag("table", TagInfo.CONTENT_ALL, TagInfo.BODY, "#tr,#tbody,#thead,#tfoot,#colgroup,#caption,#tr,tr,thead,tbody,tfoot,caption,colgroup,table");
158         addTag("tr", TagInfo.CONTENT_ALL, TagInfo.BODY, "!table,+tbody,^thead,^tfoot,#td,#th,tr,caption,colgroup");
159         addTag("td", TagInfo.CONTENT_ALL, TagInfo.BODY, "!table,+tr,td,th,caption,colgroup");
160         addTag("th", TagInfo.CONTENT_ALL, TagInfo.BODY, "!table,+tr,td,th,caption,colgroup");
161         addTag("tbody", TagInfo.CONTENT_ALL, TagInfo.BODY, "!table,#tr,td,th,tr,tbody,thead,tfoot,caption,colgroup");
162         addTag("thead", TagInfo.CONTENT_ALL, TagInfo.BODY, "!table,#tr,td,th,tr,tbody,thead,tfoot,caption,colgroup");
163         addTag("tfoot", TagInfo.CONTENT_ALL, TagInfo.BODY, "!table,#tr,td,th,tr,tbody,thead,tfoot,caption,colgroup");
164         addTag("col", TagInfo.CONTENT_NONE, TagInfo.BODY, "!colgroup");
165         addTag("colgroup", TagInfo.CONTENT_ALL, TagInfo.BODY, "!table,#col,td,th,tr,tbody,thead,tfoot,caption,colgroup");
166         addTag("caption", TagInfo.CONTENT_ALL, TagInfo.BODY, "!table,td,th,tr,tbody,thead,tfoot,caption,colgroup");
167
168         // Forms
169
addTag("form", TagInfo.CONTENT_ALL, TagInfo.BODY, "-form,option,optgroup,textarea,select,fieldset");
170         addTag("input", TagInfo.CONTENT_NONE, TagInfo.BODY, "select,optgroup,option");
171         addTag("textarea", TagInfo.CONTENT_ALL, TagInfo.BODY, "select,optgroup,option");
172         addTag("select", TagInfo.CONTENT_ALL, TagInfo.BODY, "#option,#optgroup,option,optgroup,select");
173         addTag("option", TagInfo.CONTENT_TEXT, TagInfo.BODY, "!select,option");
174         addTag("optgroup", TagInfo.CONTENT_ALL, TagInfo.BODY, "!select,#option,optgroup");
175         addTag("button", TagInfo.CONTENT_ALL, TagInfo.BODY, "select,optgroup,option");
176         addTag("label", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
177         addTag("fieldset", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
178         addTag("isindex", TagInfo.CONTENT_NONE, TagInfo.BODY, null);
179
180         // Scripting
181
addTag("script", TagInfo.CONTENT_ALL, TagInfo.HEAD_AND_BODY, null);
182         addTag("noscript", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
183
184         // Presentational
185
addTag("b", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
186         addTag("i", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
187         addTag("u", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
188         addTag("tt", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
189         addTag("sub", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
190         addTag("sup", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
191         addTag("big", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
192         addTag("small", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
193         addTag("strike", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
194         addTag("blink", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
195         addTag("marquee", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
196         addTag("s", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
197         addTag("hr", TagInfo.CONTENT_NONE, TagInfo.BODY, null);
198         addTag("font", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
199         addTag("basefont", TagInfo.CONTENT_NONE, TagInfo.BODY, null);
200         addTag("center", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
201
202         addTag("comment", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
203         addTag("server", TagInfo.CONTENT_ALL, TagInfo.BODY, null);
204         addTag("iframe", TagInfo.CONTENT_NONE, TagInfo.BODY, null);
205         addTag("embed", TagInfo.CONTENT_NONE, TagInfo.BODY, null);
206
207         getTagInfo("title").setUnique(true);
208         getTagInfo("form").setIgnorePermitted(true);
209         getTagInfo("select").setIgnorePermitted(true);
210         getTagInfo("option").setIgnorePermitted(true);
211         getTagInfo("optgroup").setIgnorePermitted(true);
212
213         String JavaDoc commonTags = "div,p,address,h1,h2,h3,h4,h5,h6,blockquote,pre,listing,ul,ol,li,dl,menu,dir,table,form,fieldset,isindex,marquee,center,embed,param,hr";
214
215         addDependancy("p", commonTags);
216         addDependancy("address", commonTags);
217         addDependancy("label", commonTags);
218         addDependancy("abbr", commonTags);
219         addDependancy("acronym", commonTags);
220         addDependancy("dfn", commonTags);
221         addDependancy("kbd", commonTags);
222         addDependancy("samp", commonTags);
223         addDependancy("var", commonTags);
224         addDependancy("cite", commonTags);
225         addDependancy("code", commonTags);
226         addDependancy("param", commonTags);
227         addDependancy("xml", commonTags);
228
229         addDependancy("&a", commonTags);
230         addDependancy("&bdo", commonTags);
231         addDependancy("&strong", commonTags);
232         addDependancy("&em", commonTags);
233         addDependancy("&q", commonTags);
234         addDependancy("&b", commonTags);
235         addDependancy("&i", commonTags);
236         addDependancy("&u", commonTags);
237         addDependancy("&tt", commonTags);
238         addDependancy("&sub", commonTags);
239         addDependancy("&sup", commonTags);
240         addDependancy("&big", commonTags);
241         addDependancy("&small", commonTags);
242         addDependancy("&strike", commonTags);
243         addDependancy("&s", commonTags);
244         addDependancy("&font", commonTags);
245
246         getTagInfo("applet").setDeprecated(true);
247         getTagInfo("basefont").setDeprecated(true);
248         getTagInfo("center").setDeprecated(true);
249         getTagInfo("dir").setDeprecated(true);
250         getTagInfo("font").setDeprecated(true);
251         getTagInfo("isindex").setDeprecated(true);
252         getTagInfo("menu").setDeprecated(true);
253         getTagInfo("s").setDeprecated(true);
254         getTagInfo("strike").setDeprecated(true);
255         getTagInfo("u").setDeprecated(true);
256     }
257
258     protected void addDependancy(String JavaDoc tagName, String JavaDoc tagList) {
259         if (tagList != null) {
260             StringTokenizer JavaDoc tokenizer = new StringTokenizer JavaDoc(tagList, ",.");
261             while (tokenizer.hasMoreTokens()) {
262                 TagInfo curr = getTagInfo(tokenizer.nextToken().trim());
263                 curr.addDependancy(tagName);
264             }
265         }
266     }
267
268     /**
269      * Implementation of the interface method.
270      * @param tagName
271      * @return TagInfo instance from the map, for the specified tag name.
272      */

273     public TagInfo getTagInfo(String JavaDoc tagName) {
274         if (tagName != null) {
275             return (TagInfo) get( tagName.toLowerCase() );
276         }
277
278         return null;
279     }
280
281 }
Popular Tags