KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > jcorporate > expresso > core > security > filters > HtmlPlusURLFilter


1 /* ====================================================================
2  * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
3  *
4  * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * 3. The end-user documentation included with the redistribution,
19  * if any, must include the following acknowledgment:
20  * "This product includes software developed by Jcorporate Ltd.
21  * (http://www.jcorporate.com/)."
22  * Alternately, this acknowledgment may appear in the software itself,
23  * if and wherever such third-party acknowledgments normally appear.
24  *
25  * 4. "Jcorporate" and product names such as "Expresso" must
26  * not be used to endorse or promote products derived from this
27  * software without prior written permission. For written permission,
28  * please contact info@jcorporate.com.
29  *
30  * 5. Products derived from this software may not be called "Expresso",
31  * or other Jcorporate product names; nor may "Expresso" or other
32  * Jcorporate product names appear in their name, without prior
33  * written permission of Jcorporate Ltd.
34  *
35  * 6. No product derived from this software may compete in the same
36  * market space, i.e. framework, without prior written permission
37  * of Jcorporate Ltd. For written permission, please contact
38  * partners@jcorporate.com.
39  *
40  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43  * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
44  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  * ====================================================================
53  *
54  * This software consists of voluntary contributions made by many
55  * individuals on behalf of the Jcorporate Ltd. Contributions back
56  * to the project(s) are encouraged when you make modifications.
57  * Please send them to support@jcorporate.com. For more information
58  * on Jcorporate Ltd. and its products, please see
59  * <http://www.jcorporate.com/>.
60  *
61  * Portions of this software are based upon other open source
62  * products and are subject to their respective licenses.
63  */

64
65 package com.jcorporate.expresso.core.security.filters;
66
67 import com.jcorporate.expresso.core.controller.ControllerRequest;
68 import com.jcorporate.expresso.core.controller.ServletControllerRequest;
69 import com.jcorporate.expresso.core.db.DBConnection;
70 import com.jcorporate.expresso.core.misc.StringUtil;
71 import com.jcorporate.expresso.services.dbobj.Setup;
72
73 import javax.servlet.http.HttpServletRequest JavaDoc;
74
75 /**
76  * This class provides a filter implementation for HTML output, protecting against
77  * XSS exploits, plus it creates anchor (<a>) tags for anything that starts
78  * with 'http://', 'www.', etc.
79  *
80  * @author Larry Hamel and Patti Schank
81  */

82 public class HtmlPlusURLFilter
83         extends HtmlFilter {
84
85     // URL types to search for (to add anchor tag)
86
public static final String JavaDoc[] URL_TYPES = {
87         "http://", "https://", "ftp://", "mailto:", "news:"
88     };
89
90     // These will be prepended with 'http://"
91
public static final String JavaDoc[] URL_INFORMAL_PREFIXES = {
92         "www.", "www2."
93     };
94
95     /**
96      * Name for Setup value which decides if we are limiting anchor labels
97      */

98     public static final String JavaDoc MAX_CHARS_IN_URL_LABEL = "MaxCharsURL_Label";
99
100
101     /**
102      * No-args constructor required
103      */

104     public HtmlPlusURLFilter()
105             throws IllegalArgumentException JavaDoc {
106     }
107
108     /**
109      * Constructor for passing strings and their replacements
110      *
111      * @param specialStringList Strings to replace
112      * @param replaceList The replacement strings
113      */

114     public HtmlPlusURLFilter(String JavaDoc[] specialStringList, String JavaDoc[] replaceList)
115             throws IllegalArgumentException JavaDoc {
116         super(specialStringList, replaceList);
117     }
118
119     /**
120      * This filter HTML encodes all special characters defined by the replacement
121      * list. If a particular character doesn't exist in the map, then the chracter
122      * is passed appended into the result set.
123      * If it does exist, then the value the special character maps to is appended
124      * into the list instead.
125      *
126      * @param data The string to encode.
127      * @return The filtered string
128      */

129     public String JavaDoc standardFilter(String JavaDoc data) {
130         String JavaDoc result = super.standardFilter(data);
131
132         return insertHrefTags(result);
133     } /* standardFilter(String) */
134
135
136     /**
137      * Given a url string, if it's null or equals "" then just return
138      * it as is. Otherwise check if it is valid form, that is, starts
139      * with http:// or ftp:// or some other valid url prefix. If not,
140      * prepend http://.
141      *
142      * @param url The url string
143      * @return The url with http:// prepended, if needed
144      */

145     public static String JavaDoc addHttpPrefixIfNeeded(String JavaDoc url) {
146         if (StringUtil.isBlankOrNull(url)) {
147             return url;
148         }
149         String JavaDoc validUrl = url;
150         if (!hasValidUrlPrefix(url)) {
151             validUrl = "http://" + url;
152         }
153         return validUrl;
154     }
155
156     /**
157      * Return true if the url has a valid prefix, like http://
158      *
159      * @param url
160      * @return
161      */

162     public static boolean hasValidUrlPrefix(String JavaDoc url) {
163         boolean valid = false;
164         for (int i = 0; i < URL_TYPES.length; i++) {
165             if (url.startsWith(URL_TYPES[i])) {
166                 valid = true;
167             }
168         }
169         return valid;
170     }
171
172     /**
173      * Return true if the url is valid. Checks that it is not
174      * null, that it has a valid prefix, and that it contains
175      * a dot (must, to have a domain name) and at least 2 characters
176      * after the dot (the domain). Add more tests here as appropriate.
177      *
178      * @param url The candidate URL to check
179      * @return True if the URL is valid
180      */

181     public static boolean isValidUrl(String JavaDoc url) {
182         if (StringUtil.isBlankOrNull(url)) {
183             return false;
184         }
185         if (!hasValidUrlPrefix(url)) {
186             return false;
187         }
188         // does it have a domain name after a dot?
189
int dotIndex = url.indexOf(".");
190         if (dotIndex < 0) {
191             return false;
192         }
193         String JavaDoc domain = url.substring(dotIndex);
194         if (domain.length() < 2) {
195             return false;
196         }
197         return true;
198     }
199
200
201     /**
202      * Get web server address
203      *
204      * @return the address of this web server
205      */

206     public static String JavaDoc getWebHostPort(ControllerRequest request) {
207         ServletControllerRequest sreq = (ServletControllerRequest) request;
208         HttpServletRequest JavaDoc hreq = (HttpServletRequest JavaDoc) sreq.getServletRequest();
209         String JavaDoc serverDomainName = hreq.getServerName();
210         int serverPort = hreq.getServerPort();
211
212         if (serverPort != 80) {
213             serverDomainName = serverDomainName + ":" + serverPort;
214         }
215         return serverDomainName;
216     }
217
218     /**
219      * Insert a href tag around any http, https, www, or www2 strings
220      *
221      * @param s The string to search in and insert
222      * @return A String with <a href></a> tags and http:// if needed
223      */

224     public static String JavaDoc insertHrefTags(String JavaDoc s) {
225         boolean appendHttp = false;
226         String JavaDoc result = s;
227
228         int hIndex = -1;
229         for (int i = 0; i < URL_TYPES.length; i++) {
230             String JavaDoc urlType = URL_TYPES[i];
231             hIndex = s.indexOf(urlType);
232             if (hIndex != -1) {
233                 break;
234             }
235         }
236
237         if (hIndex == -1) {
238             for (int i = 0; i < URL_INFORMAL_PREFIXES.length; i++) {
239                 String JavaDoc urlType = URL_INFORMAL_PREFIXES[i];
240                 hIndex = s.indexOf(urlType);
241                 if (hIndex != -1) {
242                     appendHttp = true;
243                     break;
244                 }
245
246                 hIndex = s.indexOf(urlType.toUpperCase());
247                 if (hIndex != -1) {
248                     appendHttp = true;
249                     break;
250                 }
251             }
252
253         }
254
255         if (hIndex >= 0) {
256             int endIndex = findEndOfHref(s, hIndex);
257             String JavaDoc href = s.substring(hIndex, endIndex);
258
259             // XSS protection: cannot have '<', and we have already stripped for this
260
// in initial filtering. However, within anchor, cannot have equivalent
261
// hex or &lt; within URL, so that <script> cannot be entered
262
// todo use String.replace() regexp in jdk1.4 after expresso raises requirements
263
href = StringUtil.replaceAll(href, "&lt;", "");
264             href = StringUtil.replaceAll(href, "&LT;", "");
265             href = StringUtil.replaceAll(href, "&lT;", "");
266             href = StringUtil.replaceAll(href, "&Lt;", "");
267             href = StringUtil.replaceAll(href, "%3c", "");
268             href = StringUtil.replaceAll(href, "%3C", "");
269
270             StringBuffer JavaDoc link = new StringBuffer JavaDoc();
271             link.append(" <a HREF=\"");
272             if (appendHttp) {
273                 link.append("http://");
274             }
275             link.append(href);
276             link.append("\" target=\"_blank\">");
277
278             // If string is long, use ellipses if this setup value is set
279
// Unfortunately, we don't have access to dbname here
280
String JavaDoc max = Setup.getValueUnrequired(DBConnection.DEFAULT_DB_CONTEXT_NAME, MAX_CHARS_IN_URL_LABEL);
281             if (max != null) {
282                 try {
283                     int maxchars = Integer.parseInt(max);
284                     if (href.length() > maxchars) {
285                         link.append(href.substring(0, maxchars));
286                         link.append("&#133"); // ellipses
287
} else {
288                         link.append(href);
289                     }
290                 } catch (Exception JavaDoc e) {
291                     e.printStackTrace();
292                 }
293             } else {
294                 link.append(href);
295             }
296             link.append("</a>");
297
298             String JavaDoc linksBefore = "";
299             String JavaDoc linksAfter = "";
300
301             // recurse
302
if (hIndex > 5) {
303                 linksBefore = insertHrefTags(s.substring(0, hIndex));
304             }
305             if (endIndex != s.length()) {
306                 linksAfter = insertHrefTags(s.substring(endIndex));
307             }
308
309
310             return linksBefore + link.toString() + linksAfter;
311
312         } else {
313             return result;
314         }
315     }
316
317     /**
318      * Finds the end of a hyperlink
319      *
320      * @param s The string
321      * @param start The url's starting index
322      */

323     public static int findEndOfHref(String JavaDoc s, int start) {
324         char[] chars = s.toCharArray();
325         int end = s.length();
326
327         for (int i = start; i < end; i++) {
328             char c = chars[i];
329
330             if (Character.isLetterOrDigit(c)) {
331                 continue;
332             }
333
334             /**
335              * Legal punctuation in URLs (see RFC 2396
336              * ftp://ftp.isi.edu/in-notes/rfc2396.txt)
337              ; / ? : @ & = +
338              $ , - _" . ! ~ * ' ( )
339              % #
340              */

341             switch (c) { // legal punctuation in URLS
342
case '.':
343                 case ',':
344                 case ')':
345                 case '(':
346                 case '@':
347                 case '?':
348                 case '&':
349                 case '=':
350                 case '-':
351                 case '_':
352                 case '/':
353                 case '#':
354                 case ':':
355                 case '~':
356                 case '+':
357                 case ';':
358                 case '!':
359                 case '*':
360                 case '\'':
361                 case '$':
362                     continue; // legal punctuation in URL
363

364                 case '%':
365                     // Special case, indicates a URL encoding follows
366
// Malicious XSS could abuse encoding to slip scripts
367
// through. Only allow encoding of safe hex characters
368
if ((i < (end - 2)) && (isSafeURLEncoding(chars[i + 1], chars[i + 2]))) {
369                         continue;
370                     } else {
371                         return i; // unsafe URL encoding
372
}
373
374                 default:
375                     return i; // illegal punctuation--must be end of URL
376
}
377         }
378         return end;
379     }
380
381     /**
382      * Return true if we allow the given URL encoding (after a %).
383      * See http://www.blooberry.com/indexdot/html/topics/urlencoding.htm
384      * To stop XSS attacks, definitely don't allow:
385      * the less than symbol < (3C) and the greater than symbol > (3E)
386      * 00-1F hex (ascii control characters)
387      * 80-FF hex (non-ascii, by definition not legal)
388      * <p/>
389      * For extra safety, let's not allow the following (add later if needed)
390      * quote (%27), left paren (%28), right paren (%29)
391      * left bracket (7B), right bracket (7D)
392      * <p/>
393      * Okay to allow as encoded (might be misunderstood within URLS):s
394      * space (20), ! (21), " (22), # (23), $ (24)
395      * % (25), & (26), * (2A), + (2B), comma (2C)
396      * - (2D), period (2E), / (2F), : (3A), ; (3B),
397      * = (3D), ? (3F), @ (40)
398      * | (7C), \ (5C), ~ (7E)
399      * <p/>
400      * The following are also okay, but shouldn't be encoded anyway, so don't
401      * bother checking for these for now:
402      * digits: 30-39
403      * uppercase letters: 41 - 5A
404      * lowercase letters: 61 - 7A
405      */

406     private static boolean isSafeURLEncoding(char c1, char c2) {
407         String JavaDoc[] allowedEncodings = {"20", "21", "22", "23", "24", "25",
408                                      "26", "2A", "2B", "2C", "2D", "2E", "2F",
409                                      "3A", "3B", "3D", "3F", "40", "7C",
410                                      "5C", "7E"};
411
412         String JavaDoc encodedCharStr = String.valueOf(c1) + String.valueOf(c2);
413
414         // test for containment of safe encoding characters
415
for (int i = 0; i < allowedEncodings.length; i++) {
416             if (encodedCharStr.startsWith(allowedEncodings[i])) {
417                 return true;
418             }
419         }
420         // otherwise assume encoded characters are unsafe
421
return false;
422     }
423 }
Popular Tags