KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > jodd > servlet > HtmlEncoder


1 package jodd.servlet;
2
3 import java.io.UnsupportedEncodingException;
4 import java.net.URLDecoder;
5 import java.net.URLEncoder;
6
7 /**
8  * Encodes text strings and URLs to be HTML-safe.
9  */

10 public final class HtmlEncoder {
11
12     private static float newSizeFactor = 1.3f;
13
14
15     /**
16      * Lookup table for use in encode() method.
17      *
18      * @see #encode
19      */

20     public static final String[] TABLE_HTML = new String[256];
21
22     /**
23      * Lookup table for use in encodeTextXxx() methods.
24      *
25      * @see #encodeText
26      * @see #encodeTextSmart
27      * @see #encodeTextStrict
28      */

29     public static final String[] TABLE_HTML_STRICT = new String[256];
30
31     static {
32         for (int i = 0; i < 10; i++) {
33             TABLE_HTML[i] = "&#00" + i + ";";
34         }
35         for (int i = 10; i < 32; i++) {
36             TABLE_HTML[i] = "&#0" + i + ";";
37         }
38         for (int i = 32; i < 128; i++) {
39             TABLE_HTML[i] = String.valueOf((char)i);
40         }
41         for (int i = 128; i < 256; i++) {
42             TABLE_HTML[i] = "&#" + i + ";";
43         }
44
45         // special characters
46
TABLE_HTML['\''] = "&#039;"; // apostrophe ('&apos;' doesn't work - it is not by the w3 specs)
47
TABLE_HTML['\"'] = "&quot;"; // double quote
48
TABLE_HTML['&'] = "&amp;"; // ampersand
49
TABLE_HTML['<'] = "&lt;"; // lower than
50
TABLE_HTML['>'] = "&gt;"; // greater than
51

52         // strict table
53
System.arraycopy(TABLE_HTML, 0, TABLE_HTML_STRICT, 0, 256);
54         TABLE_HTML_STRICT[' '] = "&nbsp;";
55         TABLE_HTML_STRICT['\n'] = "<br>"; // ascii 10
56
TABLE_HTML_STRICT['\r'] = "<br>"; // ascii 13
57
}
58
59     // ---------------------------------------------------------------- encoding
60

61     /**
62      * Encode string to HTML-safe text. Extra characters are encoded as decimals,
63      * and five special characters are replaced with their HTML values:
64      * <li>' with &amp;#039;</li>
65      * <li>" with &amp;quot;</li>
66      * <li>&amp; with &amp;amp;</li>
67      * <li>&lt; with &amp;lt;</li>
68      * <li>&gt; with &amp;gt;</li>
69      *
70      * @param string input string
71      *
72      * @return HTML-safe string
73      * @see #encodeText
74      */

75     public static String encode(String string) {
76         if ((string == null) || (string.length() == 0)) {
77             return "";
78         }
79         int n = string.length();
80         StringBuffer buffer = new StringBuffer((int) (n * newSizeFactor));
81         int tableLen = TABLE_HTML.length;
82         char c;
83         for (int i = 0; i < n; i++) {
84             c = string.charAt(i);
85             if (c < tableLen) {
86                 buffer.append(TABLE_HTML[c]);
87             } else {
88                 buffer.append("&#").append((int)c).append(';');
89             }
90         }
91         return buffer.toString();
92     }
93
94     /**
95      * Encodes text int HTML-safe text and preserves format. Additionaly, the following
96      * characters are replaced:
97      * <li>' ' with &amp;nbsp;</li>
98      * <li>\n with &lt;br&gt;</li>
99      * <li>\r with &lt;br&gt;</li>
100      * <br><br>
101      * Additionaly, this method takes care about CRLF and LF texts and handles
102      * both.
103      *
104      * Common problem with this method is that spaces are not breakable, so they
105      * may break the outline of the page.
106      *
107      * @param string input string
108      *
109      * @return HTML-safe format
110      */

111     public static String encodeTextStrict(String string) {
112         if ((string == null) || (string.length() == 0)) {
113             return "";
114         }
115         int n = string.length();
116         StringBuffer buffer = new StringBuffer((int) (n * newSizeFactor));
117         int tableLen = TABLE_HTML_STRICT.length;
118         char c = 0, prev = 0;
119         for (int i = 0; i < n; i++, prev = c) {
120             c = string.charAt(i);
121
122             if ((c == '\n') && (prev == '\r')) {
123                 continue; // previously '\r' (CR) was encoded, so skip '\n' (LF)
124
}
125             if (c < tableLen) {
126                 buffer.append(TABLE_HTML_STRICT[c]);
127             } else {
128                 buffer.append("&#").append((int)c).append(';');
129             }
130         }
131         return buffer.toString();
132     }
133
134     /**
135      * Encodes text int HTML-safe text and preserves format except spaces.
136      * Additionaly, the following characters are replaced:
137      *
138      * <li>\n with &lt;br&gt;</li>
139      * <li>\r with &lt;br&gt;</li>
140      * <br><br>
141      * Additionaly, this method takes care about CRLF and LF texts and handles
142      * both.
143      *
144      * @param string input string
145      *
146      * @return HTML-safe format
147      */

148     public static String encodeText(String string) {
149         if ((string == null) || (string.length() == 0)) {
150             return "";
151         }
152         int n = string.length();
153         StringBuffer buffer = new StringBuffer((int) (n * newSizeFactor));
154         int tableLen = TABLE_HTML_STRICT.length;
155         char c = 0, prev = 0;
156         for (int i = 0; i < n; i++, prev = c) {
157             c = string.charAt(i);;
158
159             if (c == ' ') {
160                 buffer.append(' ');
161                 continue;
162             }
163             if ((c == '\n') && (prev == '\r')) {
164                 continue; // previously '\r' (CR) was encoded, so skip '\n' (LF)
165
}
166             if (c < tableLen) {
167                 buffer.append(TABLE_HTML_STRICT[c]);
168             } else {
169                 buffer.append("&#").append((int)c).append(';');
170             }
171         }
172         return buffer.toString();
173     }
174
175     /**
176      * Encodes text int HTML-safe text and preserves format using smart spaces.
177      * Additionaly, the following characters are replaced:
178      *
179      * <li>\n with &lt;br&gt;</li>
180      * <li>\r with &lt;br&gt;</li>
181      * <br><br>
182      * Additionaly, this method takes care about CRLF and LF texts and handles
183      * both.<br>
184      *
185      * This method is special since it preserves format, but with combination of
186      * not-breakable spaces and common spaces, so breaks are availiable.
187      *
188      * @param string input string
189      *
190      * @return HTML-safe format
191      */

192     public static String encodeTextSmart(String string) {
193         if ((string == null) || (string.length() == 0)) {
194             return "";
195         }
196         int n = string.length();
197         StringBuffer buffer = new StringBuffer((int) (n * newSizeFactor));
198         int tableLen = TABLE_HTML_STRICT.length;
199         char c = 0, prev = 0;
200         boolean prevSpace = false;
201         for (int i = 0; i < n; i++, prev = c) {
202             c = string.charAt(i);;
203
204             if (c == ' ') {
205                 if (prev != ' ') {
206                     prevSpace = false;
207                 }
208                 if (prevSpace == false) {
209                     buffer.append(' ');
210                 } else {
211                     buffer.append("&nbsp;");
212                 }
213                 prevSpace = !prevSpace;
214                 continue;
215             }
216             if ((c == '\n') && (prev == '\r')) {
217                 continue; // previously '\r' (CR) was encoded, so skip '\n' (LF)
218
}
219             if (c < tableLen) {
220                 buffer.append(TABLE_HTML_STRICT[c]);
221             } else {
222                 buffer.append("&#").append((int)c).append(';');
223             }
224         }
225         return buffer.toString();
226     }
227
228
229     // ---------------------------------------------------------------- URL encode/decode
230

231     /**
232      * Encodes HTML JavaScript for page output using ISO-88591-1 encoding. Null
233      * strings are converted to empty ones. Unfortunatelly, this encoding is not
234      * comatible with the javascripts functions escape/unescape.
235      *
236      * @param string input string
237      *
238      * @return HTML ready string.
239      */

240     public static String encodeUrl(String string) {
241         return encodeUrl(string, "ISO-8859-1");
242     }
243     /**
244      * Encodes HTML JavaScript for page output. Null strings are converted to
245      * empty ones.
246      *
247      * @param string input string
248      * @param encoding
249      *
250      * @return HTML ready string.
251      */

252     public static String encodeUrl(String string, String encoding) {
253         if (string == null) {
254             return "";
255         }
256         try {
257             return URLEncoder.encode(string, encoding);
258         } catch (UnsupportedEncodingException e) {
259             return null;
260         }
261     }
262
263     /**
264      * Encodes HTML JavaScript for page output using ISO-88591-1 encoding. Null
265      * strings are converted to empty ones.
266      *
267      * @param string input
268      *
269      * @return HTML ready string.
270      */

271     public static String decodeUrl(String string) {
272         return decodeUrl(string, "ISO-8859-1");
273     }
274
275     /**
276      * Encodes HTML JavaScript for page output. Null strings are converted to empty ones.
277      *
278      * @param string input
279      * @param encoding encoding
280      *
281      * @return HTML ready string.
282      */

283     public static String decodeUrl(String string, String encoding) {
284         if (string == null) {
285             return "";
286         }
287         try {
288             return URLDecoder.decode(string, encoding);
289         } catch (UnsupportedEncodingException e) {
290             return null;
291         }
292     }
293
294 }
295
Popular Tags