KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > jforum > util > SafeHtml


1 /*
2  * Copyright (c) Rafael Steil
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms,
6  * with or without modification, are permitted provided
7  * that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above
10  * copyright notice, this list of conditions and the
11  * following disclaimer.
12  * 2) Redistributions in binary form must reproduce the
13  * above copyright notice, this list of conditions and
14  * the following disclaimer in the documentation and/or
15  * other materials provided with the distribution.
16  * 3) Neither the name of "Rafael Steil" nor
17  * the names of its contributors may be used to endorse
18  * or promote products derived from this software without
19  * specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
22  * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
23  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
24  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
27  * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
32  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
34  * IN CONTRACT, STRICT LIABILITY, OR TORT
35  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
36  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
37  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
38  *
39  * This file creation date: 27/09/2004 23:59:10
40  * The JForum Project
41  * http://www.jforum.net
42  */

43 package net.jforum.util;
44
45 import java.util.HashSet JavaDoc;
46 import java.util.Iterator JavaDoc;
47 import java.util.Set JavaDoc;
48 import java.util.Vector JavaDoc;
49
50 import net.jforum.exceptions.ForumException;
51 import net.jforum.util.preferences.ConfigKeys;
52 import net.jforum.util.preferences.SystemGlobals;
53
54 import org.apache.log4j.Logger;
55 import org.htmlparser.Attribute;
56 import org.htmlparser.Node;
57 import org.htmlparser.Tag;
58 import org.htmlparser.lexer.Lexer;
59 import org.htmlparser.nodes.TextNode;
60
61 /**
62  * Process text with html and remove possible
63  * malicious tags and attributes.
64  *
65  * @author Rafael Steil
66  * @version $Id: SafeHtml.java,v 1.11 2006/02/21 13:59:50 rafaelsteil Exp $
67  */

68 public class SafeHtml
69 {
70     private static final Logger logger = Logger.getLogger(SafeHtml.class);
71     private static Set JavaDoc welcomeTags;
72     
73     static {
74         welcomeTags = new HashSet JavaDoc();
75         String JavaDoc[] tags = SystemGlobals.getValue(ConfigKeys.HTML_TAGS_WELCOME).toUpperCase().split(",");
76
77         for (int i = 0; i < tags.length; i++) {
78             welcomeTags.add(tags[i].trim());
79         }
80     }
81     
82     public SafeHtml() {}
83     
84     private String JavaDoc processAllNodes(String JavaDoc contents, boolean onlyEvaluateJs) throws Exception JavaDoc
85     {
86         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(512);
87         
88         Lexer lexer = new Lexer(contents);
89         Node node;
90         
91         while ((node = lexer.nextNode()) != null) {
92             boolean isTextNode = node instanceof TextNode;
93             
94             if (isTextNode) {
95                 String JavaDoc text = node.toHtml();
96                 
97                 if (text.indexOf('>') > -1 || text.indexOf('<') > -1) {
98                     text = text.replaceAll("<", "&lt;")
99                         .replaceAll(">", "&gt;")
100                         .replaceAll("\"", "&quot;");
101                     node.setText(text);
102                 }
103             }
104             else if (onlyEvaluateJs) {
105                 this.checkAndValidateAttributes((Tag)node);
106             }
107             
108             if (isTextNode || onlyEvaluateJs || this.isTagWelcome(node)) {
109                 sb.append(node.toHtml());
110             }
111             else {
112                 sb.append(node.toHtml().replaceAll("<", "&lt;").replaceAll(">", "&gt;"));
113             }
114         }
115         
116         return sb.toString();
117     }
118     
119     private boolean isTagWelcome(Node node)
120     {
121         Tag tag = (Tag)node;
122
123         if (!welcomeTags.contains(tag.getTagName())) {
124             return false;
125         }
126         
127         this.checkAndValidateAttributes(tag);
128         
129         return true;
130     }
131     
132     private void checkAndValidateAttributes(Tag tag)
133     {
134         Vector JavaDoc newAttributes = new Vector JavaDoc();
135
136         for (Iterator JavaDoc iter = tag.getAttributesEx().iterator(); iter.hasNext(); ) {
137             Attribute a = (Attribute)iter.next();
138
139             String JavaDoc name = a.getName();
140             if (name != null) {
141                 name = name.toLowerCase();
142                 if (("href".equals(name) || "src".equals(name)) && a.getValue() != null) {
143                     if (a.getValue().toLowerCase().indexOf("javascript:") > -1) {
144                         a.setValue("#");
145                     }
146                     else if (a.getValue().indexOf("&#") > -1) {
147                         a.setValue(a.getValue().replaceAll("&#", "&amp;#"));
148                     }
149                     
150                     newAttributes.add(a);
151                 }
152                 else if (!name.startsWith("on") && !name.startsWith("style")) {
153                     newAttributes.add(a);
154                 }
155             }
156             else {
157                 newAttributes.add(a);
158             }
159         }
160         
161         tag.setAttributesEx(newAttributes);
162     }
163     
164     /**
165      * Given a string input, tries to avoid all javascript input
166      * @param contents
167      * @return the filtered data
168      */

169     public static String JavaDoc avoidJavascript(String JavaDoc contents)
170     {
171         try {
172             return new SafeHtml().processAllNodes(contents, true);
173         }
174         catch (Exception JavaDoc e) {
175             throw new ForumException("Problems while parsing HTML: " + e, e);
176         }
177     }
178
179     /**
180      * Parers a text and removes all unwanted tags and javascript code
181      * @param contents the contents to parse
182      * @return the filtered data
183      */

184     public static String JavaDoc makeSafe(String JavaDoc contents)
185     {
186         if (contents == null || contents.trim().length() == 0) {
187             return contents;
188         }
189         
190         try {
191             contents = new SafeHtml().processAllNodes(contents, false);
192         }
193         catch (Exception JavaDoc e) {
194             throw new ForumException("Problems while parsing HTML: " + e, e);
195         }
196         
197         return contents;
198     }
199 }
200
Popular Tags