KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlcleaner > TagInfo


1 /* Copyright (c) 2006-2007, Vladimir Nikic
2     All rights reserved.
3     
4     Redistribution and use of this software in source and binary forms,
5     with or without modification, are permitted provided that the following
6     conditions are met:
7     
8     * Redistributions of source code must retain the above
9       copyright notice, this list of conditions and the
10       following disclaimer.
11     
12     * Redistributions in binary form must reproduce the above
13       copyright notice, this list of conditions and the
14       following disclaimer in the documentation and/or other
15       materials provided with the distribution.
16     
17     * The name of HtmlCleaner may not be used to endorse or promote
18       products derived from this software without specific prior
19       written permission.
20
21     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31     POSSIBILITY OF SUCH DAMAGE.
32     
33     You can contact Vladimir Nikic by sending e-mail to
34     nikic_vladimir@yahoo.com. Please include the word "HtmlCleaner" in the
35     subject line.
36 */

37
38 package org.htmlcleaner;
39
40 import java.util.*;
41
42 /**
43  * <p>
44  * Class contains information about single HTML tag.<br/>
45  * It also contains rules to for tag balancing. For each tag, list of dependant
46  * tags may be defined. In order to more easely describe those rules, several
47  * prefixed are introduced.
48  * </p>
49  * <p>
50  * For each tag, list of dependant tags may be specified using following prefixes:
51  * <ul>
52  * <li>
53  * <h3>!</h3> fatal tag - required outer tag - the tag will be ignored during
54  * parsing (will be skipped) if this fatal tag is missing. For example, most web
55  * browsers ignore elements TD, TR, TBODY if they are not in the context of TABLE tag.
56  * </li>
57  * <li>
58  * <h3>+</h3> required enclosing tag - if there is no such, it is implicitely
59  * created. For example if TD is out of TR - open TR is created before.
60  * </li>
61  * <li>
62  * <h3>-</h3> permitted tag - it is not allowed to occure inside - for example
63  * FORM cannot be inside other FORM and it will be ignored during cleanup.
64  * </li>
65  * <li>
66  * <h3>#</h3> allowed children tags - for example TR allowes TD and TH. If there
67  * are some dependant allowed tags defined then cleaner ignores other tags, treating
68  * them as unallowed, unless they are in some other relationship with this tag.
69  * </li>
70  * <li>
71  * <h3>^</h3> higher level tags - for example for TR higher tags are THEAD, TBODY, TFOOT.
72  * </li>
73  * <li>
74  * <h3>&</h3> tags that must be closed and copied - for example, in
75  * <code>&lt;a HREF="#"&gt;&lt;div&gt;....</code> tag A must be closed before DIV but
76  * copied again inside DIV.
77  * </li>
78  * </ul>
79  * </p>
80  *
81  * <p>
82  * Tag TR for instance (table row) may define the following dependancies:
83  * <code>!table,+tbody,^thead,^tfoot,#td,#th,tr,caption,colgroup</code>
84  * meaning the following: <br>
85  * <li>TR must be in context of TABLE, otherwise it will be ignored,</li>
86  * <li>TR may can be directly inside TBODY, TFOOT and THEAD, otherwise TBODY will be
87  * implicitely created in front of it.</li>
88  * <li>TR can contain TD and TD, all other tags and content will be pushed out of current
89  * limiting context, in the case of html tables, in front of enclosing TABLE tag.</li>
90  * <li>if previous open tag is one of TR, CAPTION or COLGROUP, it will be implicitely closed.</li>
91  * </p>
92  *
93  * Created by Vladimir Nikic.<br/>
94  * Date: November, 2006
95  */

96 public class TagInfo {
97
98     static final int HEAD_AND_BODY = 0;
99     static final int HEAD = 1;
100     static final int BODY = 2;
101     
102     static String JavaDoc CONTENT_ALL = "ALL";
103     static String JavaDoc CONTENT_NONE = "NONE";
104     static String JavaDoc CONTENT_TEXT = "TEXT";
105
106     private String JavaDoc name;
107     private String JavaDoc contentType;
108     private Set mustCloseTags = new HashSet();
109     private Set higherTags = new HashSet();
110     private Set childTags = new HashSet();
111     private Set permittedTags = new HashSet();
112     private Set copyTags = new HashSet();
113     private int belongsTo = BODY;
114     private String JavaDoc requiredParent = null;
115     private String JavaDoc fatalTag = null;
116     private boolean deprecated = false;
117     private boolean unique = false;
118     private boolean ignorePermitted = false;
119
120     public TagInfo( String JavaDoc name, String JavaDoc contentType, int belongsTo, boolean depricated,
121                     boolean unique, boolean ignorePermitted, String JavaDoc dependancies ) {
122         this.name = name;
123         this.contentType = contentType;
124         this.belongsTo = belongsTo;
125         this.deprecated = depricated;
126         this.unique = unique;
127         this.ignorePermitted = ignorePermitted;
128
129         // defines dependant tags
130
if (dependancies != null) {
131             StringTokenizer tokenizer = new StringTokenizer(dependancies, ",.;| ");
132             while (tokenizer.hasMoreTokens()) {
133                 String JavaDoc currTag = tokenizer.nextToken().toLowerCase();
134                 addDependancy(currTag);
135             }
136         }
137     }
138     
139     public void addDependancy(String JavaDoc dependantTagName) {
140         if (dependantTagName.startsWith("!")) {
141             String JavaDoc tagName = dependantTagName.substring(1);
142             this.fatalTag = tagName;
143             this.higherTags.add(tagName);
144         } else if (dependantTagName.startsWith("+")) {
145             String JavaDoc tagName = dependantTagName.substring(1);
146             this.requiredParent = dependantTagName.substring(1);
147             this.higherTags.add(tagName);
148         } else if (dependantTagName.startsWith("-")) {
149             this.permittedTags.add( dependantTagName.substring(1) );
150         } else if (dependantTagName.startsWith("#")) {
151             this.childTags.add( dependantTagName.substring(1) );
152         } else if (dependantTagName.startsWith("^")) {
153             this.higherTags.add( dependantTagName.substring(1) );
154         } else if (dependantTagName.startsWith("&")) {
155             String JavaDoc tagName = dependantTagName.substring(1);
156             this.copyTags.add(tagName);
157             this.mustCloseTags.add(tagName);
158         } else if ( !"".equals(dependantTagName.trim()) ) {
159             this.mustCloseTags.add(dependantTagName);
160         }
161     }
162
163     // getters and setters
164

165     public String JavaDoc getName() {
166         return name;
167     }
168
169     public void setName(String JavaDoc name) {
170         this.name = name;
171     }
172
173     public String JavaDoc getContentType() {
174         return contentType;
175     }
176
177     public void setContentType(String JavaDoc contentType) {
178         this.contentType = contentType;
179     }
180
181     public Set getMustCloseTags() {
182         return mustCloseTags;
183     }
184
185     public void setMustCloseTags(Set mustCloseTags) {
186         this.mustCloseTags = mustCloseTags;
187     }
188
189     public Set getHigherTags() {
190         return higherTags;
191     }
192
193     public void setHigherTags(Set higherTags) {
194         this.higherTags = higherTags;
195     }
196
197     public Set getChildTags() {
198         return childTags;
199     }
200
201     public void setChildTags(Set childTags) {
202         this.childTags = childTags;
203     }
204
205     public Set getPermittedTags() {
206         return permittedTags;
207     }
208
209     public void setPermittedTags(Set permittedTags) {
210         this.permittedTags = permittedTags;
211     }
212
213     public Set getCopyTags() {
214         return copyTags;
215     }
216
217     public void setCopyTags(Set copyTags) {
218         this.copyTags = copyTags;
219     }
220
221     public String JavaDoc getRequiredParent() {
222         return requiredParent;
223     }
224
225     public void setRequiredParent(String JavaDoc requiredParent) {
226         this.requiredParent = requiredParent;
227     }
228
229     public int getBelongsTo() {
230         return belongsTo;
231     }
232
233     public void setBelongsTo(int belongsTo) {
234         this.belongsTo = belongsTo;
235     }
236
237     public String JavaDoc getFatalTag() {
238         return fatalTag;
239     }
240
241     public void setFatalTag(String JavaDoc fatalTag) {
242         this.fatalTag = fatalTag;
243     }
244
245     public boolean isDeprecated() {
246         return deprecated;
247     }
248
249     public void setDeprecated(boolean deprecated) {
250         this.deprecated = deprecated;
251     }
252
253     public boolean isUnique() {
254         return unique;
255     }
256
257     public void setUnique(boolean unique) {
258         this.unique = unique;
259     }
260
261     public boolean isIgnorePermitted() {
262         return ignorePermitted;
263     }
264
265     public void setIgnorePermitted(boolean ignorePermitted) {
266         this.ignorePermitted = ignorePermitted;
267     }
268
269     // other functionality
270

271     boolean allowsBody() {
272         return !CONTENT_NONE.equals(contentType);
273     }
274     
275     boolean isHigher(String JavaDoc tagName) {
276         return higherTags.contains(tagName);
277     }
278     
279     boolean isCopy(String JavaDoc tagName) {
280         return copyTags.contains(tagName);
281     }
282
283     boolean hasCopyTags() {
284         return !copyTags.isEmpty();
285     }
286     
287     boolean hasPermittedTags() {
288         return !permittedTags.isEmpty();
289     }
290
291     boolean isHeadTag() {
292         return belongsTo == HEAD;
293     }
294     
295     boolean isHeadAndBodyTag() {
296         return belongsTo == HEAD || belongsTo == HEAD_AND_BODY;
297     }
298
299     boolean isMustCloseTag(TagInfo tagInfo) {
300         if (tagInfo != null) {
301             return mustCloseTags.contains( tagInfo.getName() ) || tagInfo.contentType == CONTENT_TEXT;
302         }
303
304         return false;
305     }
306
307     boolean allowsItem(BaseToken token) {
308         if ( contentType != CONTENT_NONE && token instanceof TagToken ) {
309             TagToken tagToken = (TagToken) token;
310             String JavaDoc tagName = tagToken.getName();
311             if ( "script".equals(tagName) ) {
312                 return true;
313             }
314         }
315
316         if (contentType == CONTENT_ALL) {
317             if ( !childTags.isEmpty() ) {
318                 return token instanceof TagToken ? childTags.contains( ((TagToken)token).getName() ) : false;
319             } else if ( !permittedTags.isEmpty() ) {
320                 return token instanceof TagToken ? !permittedTags.contains( ((TagToken)token).getName() ) : true;
321             } else {
322                 return true;
323             }
324         } else if (contentType == CONTENT_TEXT) {
325             return !(token instanceof TagToken);
326         }
327         
328         return false;
329     }
330     
331     boolean allowsAnything() {
332         return contentType == CONTENT_ALL && childTags.size() == 0;
333     }
334
335 }
Popular Tags