KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > commons > httpclient > util > URIUtil


1 /*
2  * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util/URIUtil.java,v 1.21.2.1 2004/02/22 18:21:16 olegk Exp $
3  * $Revision: 1.21.2.1 $
4  * $Date: 2004/02/22 18:21:16 $
5  *
6  * ====================================================================
7  *
8  * Copyright 2002-2004 The Apache Software Foundation
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  * ====================================================================
22  *
23  * This software consists of voluntary contributions made by many
24  * individuals on behalf of the Apache Software Foundation. For more
25  * information on the Apache Software Foundation, please see
26  * <http://www.apache.org/>.
27  *
28  * [Additional notices, if required by prior licensing conditions]
29  *
30  */

31
32 package org.apache.commons.httpclient.util;
33
34 import java.io.UnsupportedEncodingException JavaDoc;
35 import java.util.BitSet JavaDoc;
36 import org.apache.commons.httpclient.URI;
37 import org.apache.commons.httpclient.URIException;
38
39 /**
40  * The URI escape and character encoding and decoding utility.
41  * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather
42  * than {@link org.apache.commons.httpclient.URI}.
43  *
44  * @author <a HREF="mailto:jericho@apache.org">Sung-Gu</a>
45  * @version $Revision: 1.21.2.1 $ $Date: 2002/03/14 15:14:01
46  */

47
48 public class URIUtil {
49
50     // ----------------------------------------------------- Instance variables
51

52     protected static final BitSet JavaDoc empty = new BitSet JavaDoc(1);
53
54     // ---------------------------------------------------------- URI utilities
55

56     /**
57      * Get the basename of an URI. It's possibly an empty string.
58      *
59      * @param uri a string regarded an URI
60      * @return the basename string; an empty string if the path ends with slash
61      */

62     public static String JavaDoc getName(String JavaDoc uri) {
63         if (uri == null || uri.length() == 0) { return uri; }
64         String JavaDoc path = URIUtil.getPath(uri);
65         int at = path.lastIndexOf("/");
66         int to = path.length();
67         return (at >= 0) ? path.substring(at + 1, to) : path;
68     }
69
70
71     /**
72      * Get the query of an URI.
73      *
74      * @param uri a string regarded an URI
75      * @return the query string; <code>null</code> if empty or undefined
76      */

77     public static String JavaDoc getQuery(String JavaDoc uri) {
78         if (uri == null || uri.length() == 0) { return null; }
79         // consider of net_path
80
int at = uri.indexOf("//");
81         int from = uri.indexOf(
82             "/",
83             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
84         );
85         // the authority part of URI ignored
86
int to = uri.length();
87         // reuse the at and from variables to consider the query
88
at = uri.indexOf("?", from);
89         if (at >= 0) {
90             from = at + 1;
91         } else {
92             return null;
93         }
94         // check the fragment
95
if (uri.lastIndexOf("#") > from) {
96             to = uri.lastIndexOf("#");
97         }
98         // get the path and query.
99
return (from < 0 || from == to) ? null : uri.substring(from, to);
100     }
101
102
103     /**
104      * Get the path of an URI.
105      *
106      * @param uri a string regarded an URI
107      * @return the path string
108      */

109     public static String JavaDoc getPath(String JavaDoc uri) {
110         if (uri == null) {
111             return null;
112         }
113         // consider of net_path
114
int at = uri.indexOf("//");
115         int from = uri.indexOf(
116             "/",
117             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
118         );
119         // the authority part of URI ignored
120
int to = uri.length();
121         // check the query
122
if (uri.indexOf('?', from) != -1) {
123             to = uri.indexOf('?', from);
124         }
125         // check the fragment
126
if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) {
127             to = uri.lastIndexOf("#");
128         }
129         // get only the path.
130
return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
131     }
132
133
134     /**
135      * Get the path and query of an URI.
136      *
137      * @param uri a string regarded an URI
138      * @return the path and query string
139      */

140     public static String JavaDoc getPathQuery(String JavaDoc uri) {
141         if (uri == null) {
142             return null;
143         }
144         // consider of net_path
145
int at = uri.indexOf("//");
146         int from = uri.indexOf(
147             "/",
148             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
149         );
150         // the authority part of URI ignored
151
int to = uri.length();
152         // Ignore the '?' mark so to ignore the query.
153
// check the fragment
154
if (uri.lastIndexOf("#") > from) {
155             to = uri.lastIndexOf("#");
156         }
157         // get the path and query.
158
return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
159     }
160
161
162     /**
163      * Get the path of an URI and its rest part.
164      *
165      * @param uri a string regarded an URI
166      * @return the string from the path part
167      */

168     public static String JavaDoc getFromPath(String JavaDoc uri) {
169         if (uri == null) {
170             return null;
171         }
172         // consider of net_path
173
int at = uri.indexOf("//");
174         int from = uri.indexOf(
175             "/",
176             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
177         );
178         // get the path and its rest.
179
return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from);
180     }
181
182     // ----------------------------------------------------- Encoding utilities
183

184     /**
185      * Get the all escaped and encoded string with the default protocl charset.
186      * It's the same function to use <code>encode(String unescaped, Bitset
187      * empty, URI.getDefaultProtocolCharset())</code>.
188      *
189      * @param unescaped an unescaped string
190      * @return the escaped string
191      *
192      * @throws URIException if the default protocol charset is not supported
193      *
194      * @see URI#getDefaultProtocolCharset
195      * @see #encode
196      */

197     public static String JavaDoc encodeAll(String JavaDoc unescaped) throws URIException {
198         return encodeAll(unescaped, URI.getDefaultProtocolCharset());
199     }
200  
201
202     /**
203      * Get the all escaped and encoded string with a given charset.
204      * It's the same function to use <code>encode(String unescaped, Bitset
205      * empty, String charset)</code>.
206      *
207      * @param unescaped an unescaped string
208      * @param charset the charset
209      * @return the escaped string
210      *
211      * @throws URIException if the charset is not supported
212      *
213      * @see #encode
214      */

215     public static String JavaDoc encodeAll(String JavaDoc unescaped, String JavaDoc charset)
216         throws URIException {
217
218         return encode(unescaped, empty, charset);
219     }
220   
221
222     /**
223      * Escape and encode a string regarded as within the authority component of
224      * an URI with the default protocol charset.
225      * Within the authority component, the characters ";", ":", "@", "?", and
226      * "/" are reserved.
227      *
228      * @param unescaped an unescaped string
229      * @return the escaped string
230      *
231      * @throws URIException if the default protocol charset is not supported
232      *
233      * @see URI#getDefaultProtocolCharset
234      * @see #encode
235      */

236     public static String JavaDoc encodeWithinAuthority(String JavaDoc unescaped)
237         throws URIException {
238
239         return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset());
240     }
241
242
243     /**
244      * Escape and encode a string regarded as within the authority component of
245      * an URI with a given charset.
246      * Within the authority component, the characters ";", ":", "@", "?", and
247      * "/" are reserved.
248      *
249      * @param unescaped an unescaped string
250      * @param charset the charset
251      * @return the escaped string
252      *
253      * @throws URIException if the charset is not supported
254      *
255      * @see #encode
256      */

257     public static String JavaDoc encodeWithinAuthority(String JavaDoc unescaped, String JavaDoc charset)
258         throws URIException {
259
260         return encode(unescaped, URI.allowed_within_authority, charset);
261     }
262
263
264     /**
265      * Escape and encode a string regarded as the path and query components of
266      * an URI with the default protocol charset.
267      *
268      * @param unescaped an unescaped string
269      * @return the escaped string
270      *
271      * @throws URIException if the default protocol charset is not supported
272      *
273      * @see URI#getDefaultProtocolCharset
274      * @see #encode
275      */

276     public static String JavaDoc encodePathQuery(String JavaDoc unescaped) throws URIException {
277         return encodePathQuery(unescaped, URI.getDefaultProtocolCharset());
278     }
279
280
281     /**
282      * Escape and encode a string regarded as the path and query components of
283      * an URI with a given charset.
284      *
285      * @param unescaped an unescaped string
286      * @param charset the charset
287      * @return the escaped string
288      *
289      * @throws URIException if the charset is not supported
290      *
291      * @see #encode
292      */

293     public static String JavaDoc encodePathQuery(String JavaDoc unescaped, String JavaDoc charset)
294         throws URIException {
295
296         int at = unescaped.indexOf('?');
297         if (at < 0) {
298             return encode(unescaped, URI.allowed_abs_path, charset);
299         }
300         // else
301
return encode(unescaped.substring(0, at), URI.allowed_abs_path, charset)
302             + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset);
303     }
304
305
306     /**
307      * Escape and encode a string regarded as within the path component of an
308      * URI with the default protocol charset.
309      * The path may consist of a sequence of path segments separated by a
310      * single slash "/" character. Within a path segment, the characters
311      * "/", ";", "=", and "?" are reserved.
312      *
313      * @param unescaped an unescaped string
314      * @return the escaped string
315      *
316      * @throws URIException if the default protocol charset is not supported
317      *
318      * @see URI#getDefaultProtocolCharset
319      * @see #encode
320      */

321     public static String JavaDoc encodeWithinPath(String JavaDoc unescaped)
322         throws URIException {
323
324         return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset());
325     }
326
327
328     /**
329      * Escape and encode a string regarded as within the path component of an
330      * URI with a given charset.
331      * The path may consist of a sequence of path segments separated by a
332      * single slash "/" character. Within a path segment, the characters
333      * "/", ";", "=", and "?" are reserved.
334      *
335      * @param unescaped an unescaped string
336      * @param charset the charset
337      * @return the escaped string
338      *
339      * @throws URIException if the charset is not supported
340      *
341      * @see #encode
342      */

343     public static String JavaDoc encodeWithinPath(String JavaDoc unescaped, String JavaDoc charset)
344         throws URIException {
345
346         return encode(unescaped, URI.allowed_within_path, charset);
347     }
348
349
350     /**
351      * Escape and encode a string regarded as the path component of an URI with
352      * the default protocol charset.
353      *
354      * @param unescaped an unescaped string
355      * @return the escaped string
356      *
357      * @throws URIException if the default protocol charset is not supported
358      *
359      * @see URI#getDefaultProtocolCharset
360      * @see #encode
361      */

362     public static String JavaDoc encodePath(String JavaDoc unescaped) throws URIException {
363         return encodePath(unescaped, URI.getDefaultProtocolCharset());
364     }
365
366
367     /**
368      * Escape and encode a string regarded as the path component of an URI with
369      * a given charset.
370      *
371      * @param unescaped an unescaped string
372      * @param charset the charset
373      * @return the escaped string
374      *
375      * @throws URIException if the charset is not supported
376      *
377      * @see #encode
378      */

379     public static String JavaDoc encodePath(String JavaDoc unescaped, String JavaDoc charset)
380         throws URIException {
381
382         return encode(unescaped, URI.allowed_abs_path, charset);
383     }
384
385
386     /**
387      * Escape and encode a string regarded as within the query component of an
388      * URI with the default protocol charset.
389      * When a query comprise the name and value pairs, it is used in order
390      * to encode each name and value string. The reserved special characters
391      * within a query component are being included in encoding the query.
392      *
393      * @param unescaped an unescaped string
394      * @return the escaped string
395      *
396      * @throws URIException if the default protocol charset is not supported
397      *
398      * @see URI#getDefaultProtocolCharset
399      * @see #encode
400      */

401     public static String JavaDoc encodeWithinQuery(String JavaDoc unescaped)
402         throws URIException {
403
404         return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset());
405     }
406
407
408     /**
409      * Escape and encode a string regarded as within the query component of an
410      * URI with a given charset.
411      * When a query comprise the name and value pairs, it is used in order
412      * to encode each name and value string. The reserved special characters
413      * within a query component are being included in encoding the query.
414      *
415      * @param unescaped an unescaped string
416      * @param charset the charset
417      * @return the escaped string
418      *
419      * @throws URIException if the charset is not supported
420      *
421      * @see #encode
422      */

423     public static String JavaDoc encodeWithinQuery(String JavaDoc unescaped, String JavaDoc charset)
424         throws URIException {
425
426         return encode(unescaped, URI.allowed_within_query, charset);
427     }
428
429
430     /**
431      * Escape and encode a string regarded as the query component of an URI with
432      * the default protocol charset.
433      * When a query string is not misunderstood the reserved special characters
434      * ("&amp;", "=", "+", ",", and "$") within a query component, this method
435      * is recommended to use in encoding the whole query.
436      *
437      * @param unescaped an unescaped string
438      * @return the escaped string
439      *
440      * @throws URIException if the default protocol charset is not supported
441      *
442      * @see URI#getDefaultProtocolCharset
443      * @see #encode
444      */

445     public static String JavaDoc encodeQuery(String JavaDoc unescaped) throws URIException {
446         return encodeQuery(unescaped, URI.getDefaultProtocolCharset());
447     }
448
449
450     /**
451      * Escape and encode a string regarded as the query component of an URI with
452      * a given charset.
453      * When a query string is not misunderstood the reserved special characters
454      * ("&amp;", "=", "+", ",", and "$") within a query component, this method
455      * is recommended to use in encoding the whole query.
456      *
457      * @param unescaped an unescaped string
458      * @param charset the charset
459      * @return the escaped string
460      *
461      * @throws URIException if the charset is not supported
462      *
463      * @see #encode
464      */

465     public static String JavaDoc encodeQuery(String JavaDoc unescaped, String JavaDoc charset)
466         throws URIException {
467
468         return encode(unescaped, URI.allowed_query, charset);
469     }
470
471
472     /**
473      * Escape and encode a given string with allowed characters not to be
474      * escaped and the default protocol charset.
475      *
476      * @param unescaped a string
477      * @param allowed allowed characters not to be escaped
478      * @return the escaped string
479      *
480      * @throws URIException if the default protocol charset is not supported
481      *
482      * @see URI#getDefaultProtocolCharset
483      * @see Coder#encode
484      */

485     public static String JavaDoc encode(String JavaDoc unescaped, BitSet JavaDoc allowed)
486         throws URIException {
487
488         return encode(unescaped, allowed, URI.getDefaultProtocolCharset());
489     }
490
491
492     /**
493      * Escape and encode a given string with allowed characters not to be
494      * escaped and a given charset.
495      *
496      * @param unescaped a string
497      * @param allowed allowed characters not to be escaped
498      * @param charset the charset
499      * @return the escaped string
500      *
501      * @throws URIException if the charset is not supported
502      *
503      * @see Coder#encode
504      */

505     public static String JavaDoc encode(String JavaDoc unescaped, BitSet JavaDoc allowed,
506             String JavaDoc charset) throws URIException {
507
508         return new String JavaDoc(Coder.encode(unescaped, allowed, charset));
509     }
510
511
512     /**
513      * Unescape and decode a given string regarded as an escaped string with the
514      * default protocol charset.
515      *
516      * @param escaped a string
517      * @return the unescaped string
518      *
519      * @throws URIException if the default protocol charset is not supported
520      *
521      * @see URI#getDefaultProtocolCharset
522      * @see Coder#decode
523      */

524     public static String JavaDoc decode(String JavaDoc escaped) throws URIException {
525         return Coder.decode(escaped.toCharArray(), URI.getDefaultProtocolCharset());
526     }
527
528
529     /**
530      * Unescape and decode a given string regarded as an escaped string.
531      *
532      * @param escaped a string
533      * @param charset the charset
534      * @return the unescaped string
535      *
536      * @throws URIException if the charset is not supported
537      *
538      * @see Coder#decode
539      */

540     public static String JavaDoc decode(String JavaDoc escaped, String JavaDoc charset)
541         throws URIException {
542
543         return Coder.decode(escaped.toCharArray(), charset);
544     }
545
546     // --------------------------------- transforming a string between charsets
547

548     /**
549      * Convert a target string to the specified character encoded string with
550      * the default protocol charset.
551      *
552      * @param target a target string
553      * @return the protocol character encoded string
554      *
555      * @throws URIException if the default protocol charset is not supported
556      *
557      * @see URI#getDefaultProtocolCharset
558      *
559      * @deprecated Do not use. To be removed
560      */

561     public static String JavaDoc toProtocolCharset(String JavaDoc target) throws URIException {
562         return toUsingCharset(
563             target,
564             URI.getDefaultDocumentCharset(),
565             URI.getDefaultProtocolCharset());
566     }
567
568
569     /**
570      * Convert a target string to the specified character encoded string with
571      * a given protocol charset.
572      *
573      * @param target a target string
574      * @param charset the transformed protocol charset
575      * @return the protocol character encoded string
576      *
577      * @throws URIException if the charset is not supported
578      *
579      * @deprecated Do not use. To be removed
580      */

581     public static String JavaDoc toProtocolCharset(String JavaDoc target, String JavaDoc charset)
582         throws URIException {
583
584         return toUsingCharset(target, URI.getDefaultDocumentCharset(), charset);
585     }
586
587
588     /**
589      * Convert a target string to the specified character encoded string with
590      * the default document charset.
591      *
592      * @param target a target string
593      * @return the document character encoded string
594      *
595      * @throws URIException if the default protocol charset is not supported
596      *
597      * @see URI#getDefaultDocumentCharset
598      *
599      * @deprecated Do not use. To be removed
600      */

601     public static String JavaDoc toDocumentCharset(String JavaDoc target) throws URIException {
602         return toUsingCharset(target, URI.getDefaultProtocolCharset(),
603                 URI.getDefaultDocumentCharset());
604     }
605
606
607     /**
608      * Convert a target string to the specified character encoded string with
609      * a given document charset.
610      *
611      * @param target a target string
612      * @param charset the transformed document charset
613      * @return the document character encoded string
614      *
615      * @throws URIException if the charset is not supported
616      *
617      * @deprecated Do not use. To be removed
618      */

619     public static String JavaDoc toDocumentCharset(String JavaDoc target, String JavaDoc charset)
620         throws URIException {
621
622         return toUsingCharset(target, URI.getDefaultProtocolCharset(), charset);
623     }
624
625
626     /**
627      * Convert a target string from the <code>fromCharset</code> charset to
628      * the <code>toCharset</code> charset.
629      * <p>
630      * What if the document charset is ISO-8859-1 and the protocol charset is
631      * UTF-8, when it's read from the document part and is used in the protocol
632      * part, the use of the method will be <code>toUsingCharset(the string,
633      * "ISO-8859-1", "UTF-8")</code>.
634      *
635      * @param target a target string
636      * @param fromCharset the previous charset
637      * @param toCharset the changing charset
638      * @return the document character encoded string
639      *
640      * @throws URIException if either of the charsets are not supported
641      *
642      * @deprecated Do not use. To be removed
643      */

644
645     public static String JavaDoc toUsingCharset(String JavaDoc target, String JavaDoc fromCharset,
646             String JavaDoc toCharset) throws URIException {
647
648         try {
649             return new String JavaDoc(target.getBytes(fromCharset), toCharset);
650         } catch (UnsupportedEncodingException JavaDoc error) {
651             throw new URIException(URIException.UNSUPPORTED_ENCODING,
652                     error.getMessage());
653         }
654     }
655
656     // ---------------------------------------------------------- Inner classes
657

658     /**
659      * The basic and internal utility for URI escape and character encoding and
660      * decoding.
661      */

662     protected static class Coder extends URI {
663
664         /**
665          * Escape and encode a given string with allowed characters not to be
666          * escaped.
667          *
668          * @param unescapedComponent an unescaped component
669          * @param allowed allowed characters not to be escaped
670          * @param charset the charset to encode
671          * @return the escaped and encoded string
672          *
673          * @throws URIException if the charset is not supported
674          */

675         public static char[] encode(String JavaDoc unescapedComponent, BitSet JavaDoc allowed, String JavaDoc charset)
676             throws URIException {
677
678             return URI.encode(unescapedComponent, allowed, charset);
679         }
680
681
682         /**
683          * Unescape and decode a given string.
684          *
685          * @param escapedComponent an being-unescaped component
686          * @param charset the charset to decode
687          * @return the escaped and encoded string
688          *
689          * @throws URIException if the charset is not supported
690          */

691         public static String JavaDoc decode(char[] escapedComponent, String JavaDoc charset)
692             throws URIException {
693
694             return URI.decode(escapedComponent, charset);
695         }
696
697
698         /**
699          * Verify whether a given string is escaped or not
700          *
701          * @param original given characters
702          * @return true if the given character array is 7 bit ASCII-compatible.
703          */

704         public static boolean verifyEscaped(char[] original) {
705             for (int i = 0; i < original.length; i++) {
706                 int c = original[i];
707                 if (c > 128) {
708                     return false;
709                 } else if (c == '%') {
710                     if (Character.digit(original[++i], 16) == -1
711                         || Character.digit(original[++i], 16) == -1) {
712                         return false;
713                     }
714                 }
715             }
716             return true;
717         }
718
719
720         /**
721          * Replace from a given character to given character in an array order
722          * for a given string.
723          *
724          * @param original a given string
725          * @param from a replacing character array
726          * @param to a replaced character array
727          * @return the replaced string
728          */

729         public static String JavaDoc replace(String JavaDoc original, char[] from, char[] to) {
730             for (int i = from.length; i > 0; --i) {
731                 original = replace(original, from[i], to[i]);
732             }
733             return original.toString();
734         }
735
736
737         /**
738          * Replace from a given character to given character for a given string.
739          *
740          * @param original a given string
741          * @param from a replacing character array
742          * @param to a replaced character array
743          * @return the replaced string
744          */

745         public static String JavaDoc replace(String JavaDoc original, char from, char to) {
746             StringBuffer JavaDoc result = new StringBuffer JavaDoc(original.length());
747             int at, saved = 0;
748             do {
749                 at = original.indexOf(from);
750                 if (at >= 0) {
751                     result.append(original.substring(0, at));
752                     result.append(to);
753                 } else {
754                     result.append(original.substring(saved));
755                 }
756                 saved = at;
757             } while (at >= 0);
758             return result.toString();
759         }
760     }
761
762 }
763
764
Popular Tags