KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > java > net > URLDecoder


1 /*
2  * @(#)URLDecoder.java 1.28 05/11/17
3  *
4  * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7
8 package java.net;
9
10 import java.io.*;
11
12 /**
13  * Utility class for HTML form decoding. This class contains static methods
14  * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
15  * MIME format.
16  * <p>
17  * To conversion process is the reverse of that used by the URLEncoder class. It is assumed
18  * that all characters in the encoded string are one of the following:
19  * &quot;<code>a</code>&quot; through &quot;<code>z</code>&quot;,
20  * &quot;<code>A</code>&quot; through &quot;<code>Z</code>&quot;,
21  * &quot;<code>0</code>&quot; through &quot;<code>9</code>&quot;, and
22  * &quot;<code>-</code>&quot;, &quot;<code>_</code>&quot;,
23  * &quot;<code>.</code>&quot;, and &quot;<code>*</code>&quot;. The
24  * character &quot;<code>%</code>&quot; is allowed but is interpreted
25  * as the start of a special escaped sequence.
26  * <p>
27  * The following rules are applied in the conversion:
28  * <p>
29  * <ul>
30  * <li>The alphanumeric characters &quot;<code>a</code>&quot; through
31  * &quot;<code>z</code>&quot;, &quot;<code>A</code>&quot; through
32  * &quot;<code>Z</code>&quot; and &quot;<code>0</code>&quot;
33  * through &quot;<code>9</code>&quot; remain the same.
34  * <li>The special characters &quot;<code>.</code>&quot;,
35  * &quot;<code>-</code>&quot;, &quot;<code>*</code>&quot;, and
36  * &quot;<code>_</code>&quot; remain the same.
37  * <li>The plus sign &quot;<code>+</code>&quot; is converted into a
38  * space character &quot;<code>&nbsp;</code>&quot; .
39  * <li>A sequence of the form "<code>%<i>xy</i></code>" will be
40  * treated as representing a byte where <i>xy</i> is the two-digit
41  * hexadecimal representation of the 8 bits. Then, all substrings
42  * that contain one or more of these byte sequences consecutively
43  * will be replaced by the character(s) whose encoding would result
44  * in those consecutive bytes.
45  * The encoding scheme used to decode these characters may be specified,
46  * or if unspecified, the default encoding of the platform will be used.
47  * </ul>
48  * <p>
49  * There are two possible ways in which this decoder could deal with
50  * illegal strings. It could either leave illegal characters alone or
51  * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>.
52  * Which approach the decoder takes is left to the
53  * implementation.
54  *
55  * @author Mark Chamness
56  * @author Michael McCloskey
57  * @version 1.28, 11/17/05
58  * @since 1.2
59  */

60
61 public class URLDecoder {
62
63     // The platform default encoding
64
static String JavaDoc dfltEncName = URLEncoder.dfltEncName;
65
66     /**
67      * Decodes a <code>x-www-form-urlencoded</code> string.
68      * The platform's default encoding is used to determine what characters
69      * are represented by any consecutive sequences of the form
70      * "<code>%<i>xy</i></code>".
71      * @param s the <code>String</code> to decode
72      * @deprecated The resulting string may vary depending on the platform's
73      * default encoding. Instead, use the decode(String,String) method
74      * to specify the encoding.
75      * @return the newly decoded <code>String</code>
76      */

77     @Deprecated JavaDoc
78     public static String JavaDoc decode(String JavaDoc s) {
79
80     String JavaDoc str = null;
81
82     try {
83         str = decode(s, dfltEncName);
84     } catch (UnsupportedEncodingException e) {
85         // The system should always have the platform default
86
}
87
88     return str;
89     }
90
91     /**
92      * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
93      * encoding scheme.
94      * The supplied encoding is used to determine
95      * what characters are represented by any consecutive sequences of the
96      * form "<code>%<i>xy</i></code>".
97      * <p>
98      * <em><strong>Note:</strong> The <a HREF=
99      * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
100      * World Wide Web Consortium Recommendation</a> states that
101      * UTF-8 should be used. Not doing so may introduce
102      * incompatibilites.</em>
103      *
104      * @param s the <code>String</code> to decode
105      * @param enc The name of a supported
106      * <a HREF="../lang/package-summary.html#charenc">character
107      * encoding</a>.
108      * @return the newly decoded <code>String</code>
109      * @exception UnsupportedEncodingException
110      * If character encoding needs to be consulted, but
111      * named character encoding is not supported
112      * @see URLEncoder#encode(java.lang.String, java.lang.String)
113      * @since 1.4
114      */

115     public static String JavaDoc decode(String JavaDoc s, String JavaDoc enc)
116     throws UnsupportedEncodingException{
117     
118     boolean needToChange = false;
119     int numChars = s.length();
120     StringBuffer JavaDoc sb = new StringBuffer JavaDoc(numChars > 500 ? numChars / 2 : numChars);
121     int i = 0;
122
123     if (enc.length() == 0) {
124         throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
125     }
126
127     char c;
128     byte[] bytes = null;
129     while (i < numChars) {
130             c = s.charAt(i);
131             switch (c) {
132         case '+':
133         sb.append(' ');
134         i++;
135         needToChange = true;
136         break;
137         case '%':
138         /*
139          * Starting with this instance of %, process all
140          * consecutive substrings of the form %xy. Each
141          * substring %xy will yield a byte. Convert all
142          * consecutive bytes obtained this way to whatever
143          * character(s) they represent in the provided
144          * encoding.
145          */

146
147         try {
148
149             // (numChars-i)/3 is an upper bound for the number
150
// of remaining bytes
151
if (bytes == null)
152             bytes = new byte[(numChars-i)/3];
153             int pos = 0;
154             
155             while ( ((i+2) < numChars) &&
156                 (c=='%')) {
157             bytes[pos++] =
158                 (byte)Integer.parseInt(s.substring(i+1,i+3),16);
159             i+= 3;
160             if (i < numChars)
161                 c = s.charAt(i);
162             }
163
164             // A trailing, incomplete byte encoding such as
165
// "%x" will cause an exception to be thrown
166

167             if ((i < numChars) && (c=='%'))
168             throw new IllegalArgumentException JavaDoc(
169                  "URLDecoder: Incomplete trailing escape (%) pattern");
170             
171             sb.append(new String JavaDoc(bytes, 0, pos, enc));
172         } catch (NumberFormatException JavaDoc e) {
173             throw new IllegalArgumentException JavaDoc(
174                     "URLDecoder: Illegal hex characters in escape (%) pattern - "
175             + e.getMessage());
176         }
177         needToChange = true;
178         break;
179         default:
180         sb.append(c);
181         i++;
182         break;
183             }
184         }
185
186         return (needToChange? sb.toString() : s);
187     }
188 }
189
Popular Tags