KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > commons > lang > StringEscapeUtils


1 /*
2  * Copyright 2002-2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.commons.lang;
17
18 import java.io.IOException JavaDoc;
19 import java.io.Writer JavaDoc;
20
21 import org.apache.commons.lang.exception.NestableRuntimeException;
22
23 /**
24  * <p>Escapes and unescapes <code>String</code>s for
25  * Java, Java Script, HTML, XML, and SQL.</p>
26  *
27  * @author Apache Jakarta Turbine
28  * @author GenerationJavaCore library
29  * @author Purple Technology
30  * @author <a HREF="mailto:bayard@generationjava.com">Henri Yandell</a>
31  * @author <a HREF="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
32  * @author Antony Riley
33  * @author Helge Tesgaard
34  * @author <a HREF="sean@boohai.com">Sean Brown</a>
35  * @author <a HREF="mailto:ggregory@seagullsw.com">Gary Gregory</a>
36  * @author Phil Steitz
37  * @author Pete Gieser
38  * @since 2.0
39  * @version $Id: StringEscapeUtils.java 165657 2005-05-02 18:31:49Z ggregory $
40  */

41 public class StringEscapeUtils {
42
43     /**
44      * <p><code>StringEscapeUtils</code> instances should NOT be constructed in
45      * standard programming.</p>
46      *
47      * <p>Instead, the class should be used as:
48      * <pre>StringEscapeUtils.escapeJava("foo");</pre></p>
49      *
50      * <p>This constructor is public to permit tools that require a JavaBean
51      * instance to operate.</p>
52      */

53     public StringEscapeUtils() {
54     }
55
56     // Java and JavaScript
57
//--------------------------------------------------------------------------
58
/**
59      * <p>Escapes the characters in a <code>String</code> using Java String rules.</p>
60      *
61      * <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
62      *
63      * <p>So a tab becomes the characters <code>'\\'</code> and
64      * <code>'t'</code>.</p>
65      *
66      * <p>The only difference between Java strings and JavaScript strings
67      * is that in JavaScript, a single quote must be escaped.</p>
68      *
69      * <p>Example:
70      * <pre>
71      * input string: He didn't say, "Stop!"
72      * output string: He didn't say, \"Stop!\"
73      * </pre>
74      * </p>
75      *
76      * @param str String to escape values in, may be null
77      * @return String with escaped values, <code>null</code> if null string input
78      */

79     public static String JavaDoc escapeJava(String JavaDoc str) {
80         return escapeJavaStyleString(str, false);
81     }
82
83     /**
84      * <p>Escapes the characters in a <code>String</code> using Java String rules to
85      * a <code>Writer</code>.</p>
86      *
87      * <p>A <code>null</code> string input has no effect.</p>
88      *
89      * @see #escapeJava(java.lang.String)
90      * @param out Writer to write escaped string into
91      * @param str String to escape values in, may be null
92      * @throws IllegalArgumentException if the Writer is <code>null</code>
93      * @throws IOException if error occurs on underlying Writer
94      */

95     public static void escapeJava(Writer JavaDoc out, String JavaDoc str) throws IOException JavaDoc {
96         escapeJavaStyleString(out, str, false);
97     }
98
99     /**
100      * <p>Escapes the characters in a <code>String</code> using JavaScript String rules.</p>
101      * <p>Escapes any values it finds into their JavaScript String form.
102      * Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
103      *
104      * <p>So a tab becomes the characters <code>'\\'</code> and
105      * <code>'t'</code>.</p>
106      *
107      * <p>The only difference between Java strings and JavaScript strings
108      * is that in JavaScript, a single quote must be escaped.</p>
109      *
110      * <p>Example:
111      * <pre>
112      * input string: He didn't say, "Stop!"
113      * output string: He didn\'t say, \"Stop!\"
114      * </pre>
115      * </p>
116      *
117      * @param str String to escape values in, may be null
118      * @return String with escaped values, <code>null</code> if null string input
119      */

120     public static String JavaDoc escapeJavaScript(String JavaDoc str) {
121         return escapeJavaStyleString(str, true);
122     }
123
124     /**
125      * <p>Escapes the characters in a <code>String</code> using JavaScript String rules
126      * to a <code>Writer</code>.</p>
127      *
128      * <p>A <code>null</code> string input has no effect.</p>
129      *
130      * @see #escapeJavaScript(java.lang.String)
131      * @param out Writer to write escaped string into
132      * @param str String to escape values in, may be null
133      * @throws IllegalArgumentException if the Writer is <code>null</code>
134      * @throws IOException if error occurs on underlying Writer
135      **/

136     public static void escapeJavaScript(Writer JavaDoc out, String JavaDoc str) throws IOException JavaDoc {
137         escapeJavaStyleString(out, str, true);
138     }
139
140     private static String JavaDoc escapeJavaStyleString(String JavaDoc str, boolean escapeSingleQuotes) {
141         if (str == null) {
142             return null;
143         }
144         try {
145             StringPrintWriter writer = new StringPrintWriter(str.length() * 2);
146             escapeJavaStyleString(writer, str, escapeSingleQuotes);
147             return writer.getString();
148         } catch (IOException JavaDoc ioe) {
149             // this should never ever happen while writing to a StringWriter
150
ioe.printStackTrace();
151             return null;
152         }
153     }
154
155     private static void escapeJavaStyleString(Writer JavaDoc out, String JavaDoc str, boolean escapeSingleQuote) throws IOException JavaDoc {
156         if (out == null) {
157             throw new IllegalArgumentException JavaDoc("The Writer must not be null");
158         }
159         if (str == null) {
160             return;
161         }
162         int sz;
163         sz = str.length();
164         for (int i = 0; i < sz; i++) {
165             char ch = str.charAt(i);
166
167             // handle unicode
168
if (ch > 0xfff) {
169                 out.write("\\u" + hex(ch));
170             } else if (ch > 0xff) {
171                 out.write("\\u0" + hex(ch));
172             } else if (ch > 0x7f) {
173                 out.write("\\u00" + hex(ch));
174             } else if (ch < 32) {
175                 switch (ch) {
176                     case '\b':
177                         out.write('\\');
178                         out.write('b');
179                         break;
180                     case '\n':
181                         out.write('\\');
182                         out.write('n');
183                         break;
184                     case '\t':
185                         out.write('\\');
186                         out.write('t');
187                         break;
188                     case '\f':
189                         out.write('\\');
190                         out.write('f');
191                         break;
192                     case '\r':
193                         out.write('\\');
194                         out.write('r');
195                         break;
196                     default :
197                         if (ch > 0xf) {
198                             out.write("\\u00" + hex(ch));
199                         } else {
200                             out.write("\\u000" + hex(ch));
201                         }
202                         break;
203                 }
204             } else {
205                 switch (ch) {
206                     case '\'':
207                         if (escapeSingleQuote) {
208                           out.write('\\');
209                         }
210                         out.write('\'');
211                         break;
212                     case '"':
213                         out.write('\\');
214                         out.write('"');
215                         break;
216                     case '\\':
217                         out.write('\\');
218                         out.write('\\');
219                         break;
220                     default :
221                         out.write(ch);
222                         break;
223                 }
224             }
225         }
226     }
227
228     /**
229      * <p>Returns an upper case hexadecimal <code>String</code> for the given
230      * character.</p>
231      *
232      * @param ch The character to convert.
233      * @return An upper case hexadecimal <code>String</code>
234      */

235     private static String JavaDoc hex(char ch) {
236         return Integer.toHexString(ch).toUpperCase();
237     }
238
239     /**
240      * <p>Unescapes any Java literals found in the <code>String</code>.
241      * For example, it will turn a sequence of <code>'\'</code> and
242      * <code>'n'</code> into a newline character, unless the <code>'\'</code>
243      * is preceded by another <code>'\'</code>.</p>
244      *
245      * @param str the <code>String</code> to unescape, may be null
246      * @return a new unescaped <code>String</code>, <code>null</code> if null string input
247      */

248     public static String JavaDoc unescapeJava(String JavaDoc str) {
249         if (str == null) {
250             return null;
251         }
252         try {
253             StringPrintWriter writer = new StringPrintWriter(str.length());
254             unescapeJava(writer, str);
255             return writer.getString();
256         } catch (IOException JavaDoc ioe) {
257             // this should never ever happen while writing to a StringWriter
258
ioe.printStackTrace();
259             return null;
260         }
261     }
262
263     /**
264      * <p>Unescapes any Java literals found in the <code>String</code> to a
265      * <code>Writer</code>.</p>
266      *
267      * <p>For example, it will turn a sequence of <code>'\'</code> and
268      * <code>'n'</code> into a newline character, unless the <code>'\'</code>
269      * is preceded by another <code>'\'</code>.</p>
270      *
271      * <p>A <code>null</code> string input has no effect.</p>
272      *
273      * @param out the <code>Writer</code> used to output unescaped characters
274      * @param str the <code>String</code> to unescape, may be null
275      * @throws IllegalArgumentException if the Writer is <code>null</code>
276      * @throws IOException if error occurs on underlying Writer
277      */

278     public static void unescapeJava(Writer JavaDoc out, String JavaDoc str) throws IOException JavaDoc {
279         if (out == null) {
280             throw new IllegalArgumentException JavaDoc("The Writer must not be null");
281         }
282         if (str == null) {
283             return;
284         }
285         int sz = str.length();
286         StringBuffer JavaDoc unicode = new StringBuffer JavaDoc(4);
287         boolean hadSlash = false;
288         boolean inUnicode = false;
289         for (int i = 0; i < sz; i++) {
290             char ch = str.charAt(i);
291             if (inUnicode) {
292                 // if in unicode, then we're reading unicode
293
// values in somehow
294
unicode.append(ch);
295                 if (unicode.length() == 4) {
296                     // unicode now contains the four hex digits
297
// which represents our unicode character
298
try {
299                         int value = Integer.parseInt(unicode.toString(), 16);
300                         out.write((char) value);
301                         unicode.setLength(0);
302                         inUnicode = false;
303                         hadSlash = false;
304                     } catch (NumberFormatException JavaDoc nfe) {
305                         throw new NestableRuntimeException("Unable to parse unicode value: " + unicode, nfe);
306                     }
307                 }
308                 continue;
309             }
310             if (hadSlash) {
311                 // handle an escaped value
312
hadSlash = false;
313                 switch (ch) {
314                     case '\\':
315                         out.write('\\');
316                         break;
317                     case '\'':
318                         out.write('\'');
319                         break;
320                     case '\"':
321                         out.write('"');
322                         break;
323                     case 'r':
324                         out.write('\r');
325                         break;
326                     case 'f':
327                         out.write('\f');
328                         break;
329                     case 't':
330                         out.write('\t');
331                         break;
332                     case 'n':
333                         out.write('\n');
334                         break;
335                     case 'b':
336                         out.write('\b');
337                         break;
338                     case 'u':
339                         {
340                             // uh-oh, we're in unicode country....
341
inUnicode = true;
342                             break;
343                         }
344                     default :
345                         out.write(ch);
346                         break;
347                 }
348                 continue;
349             } else if (ch == '\\') {
350                 hadSlash = true;
351                 continue;
352             }
353             out.write(ch);
354         }
355         if (hadSlash) {
356             // then we're in the weird case of a \ at the end of the
357
// string, let's output it anyway.
358
out.write('\\');
359         }
360     }
361
362     /**
363      * <p>Unescapes any JavaScript literals found in the <code>String</code>.</p>
364      *
365      * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
366      * into a newline character, unless the <code>'\'</code> is preceded by another
367      * <code>'\'</code>.</p>
368      *
369      * @see #unescapeJava(String)
370      * @param str the <code>String</code> to unescape, may be null
371      * @return A new unescaped <code>String</code>, <code>null</code> if null string input
372      */

373     public static String JavaDoc unescapeJavaScript(String JavaDoc str) {
374         return unescapeJava(str);
375     }
376
377     /**
378      * <p>Unescapes any JavaScript literals found in the <code>String</code> to a
379      * <code>Writer</code>.</p>
380      *
381      * <p>For example, it will turn a sequence of <code>'\'</code> and <code>'n'</code>
382      * into a newline character, unless the <code>'\'</code> is preceded by another
383      * <code>'\'</code>.</p>
384      *
385      * <p>A <code>null</code> string input has no effect.</p>
386      *
387      * @see #unescapeJava(Writer,String)
388      * @param out the <code>Writer</code> used to output unescaped characters
389      * @param str the <code>String</code> to unescape, may be null
390      * @throws IllegalArgumentException if the Writer is <code>null</code>
391      * @throws IOException if error occurs on underlying Writer
392      */

393     public static void unescapeJavaScript(Writer JavaDoc out, String JavaDoc str) throws IOException JavaDoc {
394         unescapeJava(out, str);
395     }
396
397     // HTML and XML
398
//--------------------------------------------------------------------------
399
/**
400      * <p>Escapes the characters in a <code>String</code> using HTML entities.</p>
401      *
402      * <p>
403      * For example:
404      * </p>
405      * <p><code>"bread" & "butter"</code></p>
406      * becomes:
407      * <p>
408      * <code>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</code>.
409      * </p>
410      *
411      * <p>Supports all known HTML 4.0 entities, including funky accents.</p>
412      *
413      * @param str the <code>String</code> to escape, may be null
414      * @return a new escaped <code>String</code>, <code>null</code> if null string input
415      *
416      * @see #unescapeHtml(String)
417      * @see </br><a HREF="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
418      * @see </br><a HREF="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
419      * @see </br><a HREF="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
420      * @see </br><a HREF="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
421      * @see </br><a HREF="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
422      **/

423     public static String JavaDoc escapeHtml(String JavaDoc str) {
424         if (str == null) {
425             return null;
426         }
427         //todo: add a version that takes a Writer
428
//todo: rewrite underlying method to use a Writer instead of a StringBuffer
429
return Entities.HTML40.escape(str);
430     }
431
432     /**
433      * <p>Unescapes a string containing entity escapes to a string
434      * containing the actual Unicode characters corresponding to the
435      * escapes. Supports HTML 4.0 entities.</p>
436      *
437      * <p>For example, the string "&amp;lt;Fran&amp;ccedil;ais&amp;gt;"
438      * will become "&lt;Fran&ccedil;ais&gt;"</p>
439      *
440      * <p>If an entity is unrecognized, it is left alone, and inserted
441      * verbatim into the result string. e.g. "&amp;gt;&amp;zzzz;x" will
442      * become "&gt;&amp;zzzz;x".</p>
443      *
444      * @param str the <code>String</code> to unescape, may be null
445      * @return a new unescaped <code>String</code>, <code>null</code> if null string input
446      * @see #escapeHtml(String)
447      **/

448     public static String JavaDoc unescapeHtml(String JavaDoc str) {
449         if (str == null) {
450             return null;
451         }
452         return Entities.HTML40.unescape(str);
453     }
454
455     /**
456      * <p>Escapes the characters in a <code>String</code> using XML entities.</p>
457      *
458      * <p>For example: <tt>"bread" & "butter"</tt> =>
459      * <tt>&amp;quot;bread&amp;quot; &amp;amp; &amp;quot;butter&amp;quot;</tt>.
460      * </p>
461      *
462      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
463      * Does not support DTDs or external entities.</p>
464      *
465      * @param str the <code>String</code> to escape, may be null
466      * @return a new escaped <code>String</code>, <code>null</code> if null string input
467      * @see #unescapeXml(java.lang.String)
468      **/

469     public static String JavaDoc escapeXml(String JavaDoc str) {
470         if (str == null) {
471             return null;
472         }
473         return Entities.XML.escape(str);
474     }
475
476     /**
477      * <p>Unescapes a string containing XML entity escapes to a string
478      * containing the actual Unicode characters corresponding to the
479      * escapes.</p>
480      *
481      * <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
482      * Does not support DTDs or external entities.</p>
483      *
484      * @param str the <code>String</code> to unescape, may be null
485      * @return a new unescaped <code>String</code>, <code>null</code> if null string input
486      * @see #escapeXml(String)
487      **/

488     public static String JavaDoc unescapeXml(String JavaDoc str) {
489         if (str == null) {
490             return null;
491         }
492         return Entities.XML.unescape(str);
493     }
494
495     /**
496      * <p>Escapes the characters in a <code>String</code> to be suitable to pass to
497      * an SQL query.</p>
498      *
499      * <p>For example,
500      * <pre>statement.executeQuery("SELECT * FROM MOVIES WHERE TITLE='" +
501      * StringEscapeUtils.escapeSql("McHale's Navy") +
502      * "'");</pre>
503      * </p>
504      *
505      * <p>At present, this method only turns single-quotes into doubled single-quotes
506      * (<code>"McHale's Navy"</code> => <code>"McHale''s Navy"</code>). It does not
507      * handle the cases of percent (%) or underscore (_) for use in LIKE clauses.</p>
508      *
509      * see http://www.jguru.com/faq/view.jsp?EID=8881
510      * @param str the string to escape, may be null
511      * @return a new String, escaped for SQL, <code>null</code> if null string input
512      */

513     public static String JavaDoc escapeSql(String JavaDoc str) {
514         if (str == null) {
515             return null;
516         }
517         return StringUtils.replace(str, "'", "''");
518     }
519
520 }
521
522
Popular Tags