KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > jcorporate > expresso > core > misc > URLUTF8Encoder


1 /* ====================================================================
2  * The Jcorporate Apache Style Software License, Version 1.2 05-07-2002
3  *
4  * Copyright (c) 1995-2002 Jcorporate Ltd. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * 3. The end-user documentation included with the redistribution,
19  * if any, must include the following acknowledgment:
20  * "This product includes software developed by Jcorporate Ltd.
21  * (http://www.jcorporate.com/)."
22  * Alternately, this acknowledgment may appear in the software itself,
23  * if and wherever such third-party acknowledgments normally appear.
24  *
25  * 4. "Jcorporate" and product names such as "Expresso" must
26  * not be used to endorse or promote products derived from this
27  * software without prior written permission. For written permission,
28  * please contact info@jcorporate.com.
29  *
30  * 5. Products derived from this software may not be called "Expresso",
31  * or other Jcorporate product names; nor may "Expresso" or other
32  * Jcorporate product names appear in their name, without prior
33  * written permission of Jcorporate Ltd.
34  *
35  * 6. No product derived from this software may compete in the same
36  * market space, i.e. framework, without prior written permission
37  * of Jcorporate Ltd. For written permission, please contact
38  * partners@jcorporate.com.
39  *
40  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
41  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43  * DISCLAIMED. IN NO EVENT SHALL JCORPORATE LTD OR ITS CONTRIBUTORS
44  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
45  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
46  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
47  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
48  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
49  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
50  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  * ====================================================================
53  *
54  * This software consists of voluntary contributions made by many
55  * individuals on behalf of the Jcorporate Ltd. Contributions back
56  * to the project(s) are encouraged when you make modifications.
57  * Please send them to support@jcorporate.com. For more information
58  * on Jcorporate Ltd. and its products, please see
59  * <http://www.jcorporate.com/>.
60  *
61  * Portions of this software are based upon other open source
62  * products and are subject to their respective licenses.
63  */

64
65 package com.jcorporate.expresso.core.misc;
66
67 /**
68  * URLUTF8Encoder.java
69  *
70  * Copyright 2001 Jcorporate Ltd.
71  */

72
73 import com.jcorporate.expresso.kernel.util.FastStringBuffer;
74
75 import java.lang.ref.SoftReference JavaDoc;
76 import java.util.ArrayList JavaDoc;
77 import java.util.Iterator JavaDoc;
78
79
80 /**
81  * Provides a method to encode any string into a URL-safe
82  * form, the so-called "x-www-form-urlencoded" form.
83  * Non-ASCII characters are first encoded as sequences of
84  * two or three bytes, using the UTF-8 algorithm, before being
85  * encoded in "x-www-form-urlencoded".
86  */

87 public class URLUTF8Encoder {
88
89     private static SoftReference JavaDoc hexValues;
90
91     private URLUTF8Encoder() {
92     } // no instantiations
93

94
95     /*
96     final static String[] hex = {
97         "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
98         "%0a", "%0b", "%0c", "%0d", "%0e", "%0f", "%10", "%11", "%12", "%13",
99         "%14", "%15", "%16", "%17", "%18", "%19", "%1a", "%1b", "%1c", "%1d",
100         "%1e", "%1f", "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
101         "%28", "%29", "%2a", "%2b", "%2c", "%2d", "%2e", "%2f", "%30", "%31",
102         "%32", "%33", "%34", "%35", "%36", "%37", "%38", "%39", "%3a", "%3b",
103         "%3c", "%3d", "%3e", "%3f", "%40", "%41", "%42", "%43", "%44", "%45",
104         "%46", "%47", "%48", "%49", "%4a", "%4b", "%4c", "%4d", "%4e", "%4f",
105         "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", "%58", "%59",
106         "%5a", "%5b", "%5c", "%5d", "%5e", "%5f", "%60", "%61", "%62", "%63",
107         "%64", "%65", "%66", "%67", "%68", "%69", "%6a", "%6b", "%6c", "%6d",
108         "%6e", "%6f", "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
109         "%78", "%79", "%7a", "%7b", "%7c", "%7d", "%7e", "%7f", "%80", "%81",
110         "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8a", "%8b",
111         "%8c", "%8d", "%8e", "%8f", "%90", "%91", "%92", "%93", "%94", "%95",
112         "%96", "%97", "%98", "%99", "%9a", "%9b", "%9c", "%9d", "%9e", "%9f",
113         "%a0", "%a1", "%a2", "%a3", "%a4", "%a5", "%a6", "%a7", "%a8", "%a9",
114         "%aa", "%ab", "%ac", "%ad", "%ae", "%af", "%b0", "%b1", "%b2", "%b3",
115         "%b4", "%b5", "%b6", "%b7", "%b8", "%b9", "%ba", "%bb", "%bc", "%bd",
116         "%be", "%bf", "%c0", "%c1", "%c2", "%c3", "%c4", "%c5", "%c6", "%c7",
117         "%c8", "%c9", "%ca", "%cb", "%cc", "%cd", "%ce", "%cf", "%d0", "%d1",
118         "%d2", "%d3", "%d4", "%d5", "%d6", "%d7", "%d8", "%d9", "%da", "%db",
119         "%dc", "%dd", "%de", "%df", "%e0", "%e1", "%e2", "%e3", "%e4", "%e5",
120         "%e6", "%e7", "%e8", "%e9", "%ea", "%eb", "%ec", "%ed", "%ee", "%ef",
121         "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", "%f8", "%f9",
122         "%fa", "%fb", "%fc", "%fd", "%fe", "%ff"
123     };
124     */

125
126     /**
127      * Encode a string to the "x-www-form-urlencoded" form, enhanced
128      * with the UTF-8-in-URL proposal. This is what happens:
129      * <p/>
130      * <ul>
131      * <li><p>The ASCII characters 'a' through 'z', 'A' through 'Z',
132      * and '0' through '9' remain the same.
133      * <p/>
134      * <li><p>The space character ' ' is converted into a plus sign '+'.
135      * <p/>
136      * <li><p>All other ASCII characters are converted into the
137      * 3-character string "%xy", where xy is
138      * the two-digit hexadecimal representation of the character
139      * code
140      * <p/>
141      * <li><p>All non-ASCII characters are encoded in two steps: first
142      * to a sequence of 2 or 3 bytes, using the UTF-8 algorithm;
143      * secondly each of these bytes is encoded as "%xx".
144      * </ul>
145      *
146      * @param s The string to be encoded
147      * @return The encoded string
148      */

149     public static String JavaDoc encode(String JavaDoc s) {
150 // FastStringBuffer sbuf = new FastStringBuffer( s.length() + s.length());
151
FastStringBuffer sbuf = FastStringBuffer.getInstance();
152         try {
153             String JavaDoc[] hex = getHex();
154             int len = s.length();
155
156             for (int i = 0; i < len; i++) {
157                 int ch = s.charAt(i);
158
159                 if ('A' <= ch && ch <= 'Z') { // 'A'..'Z'
160
sbuf.append((char) ch);
161                 } else if ('a' <= ch && ch <= 'z') { // 'a'..'z'
162
sbuf.append((char) ch);
163                 } else if ('0' <= ch && ch <= '9') { // '0'..'9'
164
sbuf.append((char) ch);
165                 } else if (ch == ' ') { // space
166
sbuf.append('+');
167                 } else if (ch <= 0x007f) { // other ASCII
168
sbuf.append(hex[ch]);
169                 } else if (ch <= 0x07FF) { // non-ASCII <= 0x7FF
170
sbuf.append(hex[0xc0 | (ch >> 6)]);
171                     sbuf.append(hex[0x80 | (ch & 0x3F)]);
172                 } else { // 0x7FF < ch <= 0xFFFF
173
sbuf.append(hex[0xe0 | (ch >> 12)]);
174                     sbuf.append(hex[0x80 | ((ch >> 6) & 0x3F)]);
175                     sbuf.append(hex[0x80 | (ch & 0x3F)]);
176                 }
177             }
178
179             return sbuf.toString();
180         } finally {
181             sbuf.release();
182         }
183     } /* encode(String) */
184
185     /**
186      * @param s the string to decode
187      * @return a decoded string
188      */

189     public static String JavaDoc decode(String JavaDoc s) {
190         if (s == null) {
191             return null;
192         }
193
194         s = s.trim();
195
196 // FastStringBuffer sbuf = new FastStringBuffer(s.length());
197
FastStringBuffer sbuf = FastStringBuffer.getInstance();
198         try {
199             int l = s.length();
200             int ch = -1;
201             int b;
202             int sumb = 0;
203
204             for (int i = 0; i < l; i++) {
205
206                 /* Get next byte b from URL segment s */
207                 switch (ch = s.charAt(i)) {
208                     case '%':
209                         ch = s.charAt(++i);
210
211                         int hb = (Character.isDigit((char) ch)
212                                 ? ch - '0'
213                                 : 10 + Character.toLowerCase((char) ch) -
214                                 'a') & 0xF;
215
216                         if (i <= (l - 2)) {
217                             ch = s.charAt(++i);
218
219                             int lb = (Character.isDigit((char) ch)
220                                     ? ch - '0'
221                                     : 10 +
222                                     Character.toLowerCase((char) ch) -
223                                     'a') & 0xF;
224                             b = (hb << 4) | lb;
225                         } else {
226                             b = ch;
227                         }
228
229                         break;
230
231                     case '+':
232                         b = ' ';
233                         break;
234
235                     default:
236                         b = ch;
237                 }
238                 /* Decode byte b as UTF-8, sumb collects incomplete chars */
239                 if ((b & 0xc0) == 0x80) { // 10xxxxxx (continuation byte)
240
sumb = (sumb << 6) | (b & 0x3f); // Add to 6 bits to sumb
241
} else { // Start of new sequence
242
if (i != 0) { // Not on 1st cycle
243
sbuf.append((char) sumb); // Add previous char to sbuf
244
}
245                     if ((b & 0x80) == 0x00) { // 0xxxxxxx (yields 7 bits)
246
sumb = b; // Store in sbuf
247
} else { // 110xxxxx or 1110xxxx
248
sumb = b & 0x1f; // (yields 5 or 4 bits)
249
}
250
251                     /* We don't test if the UTF-8 encoding is well-formed */
252                 }
253             }
254             if (sumb != 0) {
255                 sbuf.append((char) sumb);
256             }
257
258             return sbuf.toString().trim();
259         } catch (StringIndexOutOfBoundsException JavaDoc se) {
260             se.printStackTrace(System.err);
261             throw new IllegalArgumentException JavaDoc("Index out of bounds while " +
262                     "decoding string '" + s +
263                     "' (length " + s.length() +
264                     ")");
265         } finally {
266             sbuf.release();
267         }
268     } /* decode(String) */
269
270
271     public static void main(String JavaDoc[] args) {
272         ArrayList JavaDoc testStrings = new ArrayList JavaDoc();
273         testStrings.add("this is a test");
274         testStrings.add("Now\nWe\nGet\tMore%04Complicated");
275         testStrings.add("%Leading percent");
276         testStrings.add("Trailing%");
277         testStrings.add("%Leading and trailing%");
278         testStrings.add("Even@$%%More!&Comlicated^@%");
279         testStrings.add("Even@$%%More!&C|omlic|ated^@%");
280         testStrings.add("|Even@$%%More!&C|omlic|ated^@%|");
281         testStrings.add("LoginName|Admin%|");
282         testStrings.add("|Even@$%%More!&C|omld^@%||Even@$%%More!&C|oml" +
283                 "icic|ated^@%||Even@$%%More!&C|omlic|ated^@%||Even@$%%M" +
284                 "ore!&C|omlic|ated^@%|");
285         testStrings.add("");
286
287         String JavaDoc encoded = null;
288         String JavaDoc decoded = null;
289         String JavaDoc testString = null;
290
291         for (Iterator JavaDoc i = testStrings.iterator(); i.hasNext();) {
292             testString = (String JavaDoc) i.next();
293             encoded = encode(testString);
294             decoded = decode(encoded);
295
296             if (!decoded.equals(testString)) {
297                 System.out.println("Error encoding/decoding string '" +
298                         testString + "' (length " +
299                         testString.length() + "). Encoded as '" +
300                         encoded + "' (length " + encoded.length() +
301                         ") and decoded to '" + decoded +
302                         "' (length " + decoded.length() + ")");
303             }
304         }
305
306         System.out.println("Tests Complete");
307     }
308
309     private static synchronized String JavaDoc[] getHex() {
310         String JavaDoc returnValue[];
311         if (hexValues == null || hexValues.get() == null) {
312             final String JavaDoc[] hex = {
313                 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07", "%08", "%09",
314                 "%0a", "%0b", "%0c", "%0d", "%0e", "%0f", "%10", "%11", "%12", "%13",
315                 "%14", "%15", "%16", "%17", "%18", "%19", "%1a", "%1b", "%1c", "%1d",
316                 "%1e", "%1f", "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
317                 "%28", "%29", "%2a", "%2b", "%2c", "%2d", "%2e", "%2f", "%30", "%31",
318                 "%32", "%33", "%34", "%35", "%36", "%37", "%38", "%39", "%3a", "%3b",
319                 "%3c", "%3d", "%3e", "%3f", "%40", "%41", "%42", "%43", "%44", "%45",
320                 "%46", "%47", "%48", "%49", "%4a", "%4b", "%4c", "%4d", "%4e", "%4f",
321                 "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57", "%58", "%59",
322                 "%5a", "%5b", "%5c", "%5d", "%5e", "%5f", "%60", "%61", "%62", "%63",
323                 "%64", "%65", "%66", "%67", "%68", "%69", "%6a", "%6b", "%6c", "%6d",
324                 "%6e", "%6f", "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
325                 "%78", "%79", "%7a", "%7b", "%7c", "%7d", "%7e", "%7f", "%80", "%81",
326                 "%82", "%83", "%84", "%85", "%86", "%87", "%88", "%89", "%8a", "%8b",
327                 "%8c", "%8d", "%8e", "%8f", "%90", "%91", "%92", "%93", "%94", "%95",
328                 "%96", "%97", "%98", "%99", "%9a", "%9b", "%9c", "%9d", "%9e", "%9f",
329                 "%a0", "%a1", "%a2", "%a3", "%a4", "%a5", "%a6", "%a7", "%a8", "%a9",
330                 "%aa", "%ab", "%ac", "%ad", "%ae", "%af", "%b0", "%b1", "%b2", "%b3",
331                 "%b4", "%b5", "%b6", "%b7", "%b8", "%b9", "%ba", "%bb", "%bc", "%bd",
332                 "%be", "%bf", "%c0", "%c1", "%c2", "%c3", "%c4", "%c5", "%c6", "%c7",
333                 "%c8", "%c9", "%ca", "%cb", "%cc", "%cd", "%ce", "%cf", "%d0", "%d1",
334                 "%d2", "%d3", "%d4", "%d5", "%d6", "%d7", "%d8", "%d9", "%da", "%db",
335                 "%dc", "%dd", "%de", "%df", "%e0", "%e1", "%e2", "%e3", "%e4", "%e5",
336                 "%e6", "%e7", "%e8", "%e9", "%ea", "%eb", "%ec", "%ed", "%ee", "%ef",
337                 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", "%f8", "%f9",
338                 "%fa", "%fb", "%fc", "%fd", "%fe", "%ff"
339             };
340
341             returnValue = hex;
342             hexValues = new SoftReference JavaDoc(hex);
343         } else {
344             returnValue = (String JavaDoc[]) hexValues.get();
345         }
346
347         return returnValue;
348     }
349 }
350
351 /* URLUTF8Encoder */
Popular Tags