KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > commons > httpclient > URI


1 /*
2  * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.36.2.5 2004/02/22 18:21:13 olegk Exp $
3  * $Revision: 1.36.2.5 $
4  * $Date: 2004/02/22 18:21:13 $
5  *
6  * ====================================================================
7  *
8  * Copyright 2002-2004 The Apache Software Foundation
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  * ====================================================================
22  *
23  * This software consists of voluntary contributions made by many
24  * individuals on behalf of the Apache Software Foundation. For more
25  * information on the Apache Software Foundation, please see
26  * <http://www.apache.org/>.
27  *
28  * [Additional notices, if required by prior licensing conditions]
29  *
30  */

31
32 package org.apache.commons.httpclient;
33
34 import java.io.IOException JavaDoc;
35 import java.io.ObjectInputStream JavaDoc;
36 import java.io.ObjectOutputStream JavaDoc;
37 import java.io.Serializable JavaDoc;
38 import java.io.UnsupportedEncodingException JavaDoc;
39 import java.util.Locale JavaDoc;
40 import java.util.BitSet JavaDoc;
41 import java.util.Hashtable JavaDoc;
42 import java.net.URL JavaDoc;
43
44 /**
45  * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
46  * This class has the purpose of supportting of parsing a URI reference to
47  * extend any specific protocols, the character encoding of the protocol to
48  * be transported and the charset of the document.
49  * <p>
50  * A URI is always in an "escaped" form, since escaping or unescaping a
51  * completed URI might change its semantics.
52  * <p>
53  * Implementers should be careful not to escape or unescape the same string
54  * more than once, since unescaping an already unescaped string might lead to
55  * misinterpreting a percent data character as another escaped character,
56  * or vice versa in the case of escaping an already escaped string.
57  * <p>
58  * In order to avoid these problems, data types used as follows:
59  * <p><blockquote><pre>
60  * URI character sequence: char
61  * octet sequence: byte
62  * original character sequence: String
63  * </pre></blockquote><p>
64  *
65  * So, a URI is a sequence of characters as an array of a char type, which
66  * is not always represented as a sequence of octets as an array of byte.
67  * <p>
68  *
69  * URI Syntactic Components
70  * <p><blockquote><pre>
71  * - In general, written as follows:
72  * Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
73  * Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
74  *
75  * - Syntax
76  * absoluteURI = scheme ":" ( hier_part | opaque_part )
77  * hier_part = ( net_path | abs_path ) [ "?" query ]
78  * net_path = "//" authority [ abs_path ]
79  * abs_path = "/" path_segments
80  * </pre></blockquote><p>
81  *
82  * The following examples illustrate URI that are in common use.
83  * <pre>
84  * ftp://ftp.is.co.za/rfc/rfc1808.txt
85  * -- ftp scheme for File Transfer Protocol services
86  * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
87  * -- gopher scheme for Gopher and Gopher+ Protocol services
88  * http://www.math.uio.no/faq/compression-faq/part1.html
89  * -- http scheme for Hypertext Transfer Protocol services
90  * mailto:mduerst@ifi.unizh.ch
91  * -- mailto scheme for electronic mail addresses
92  * news:comp.infosystems.www.servers.unix
93  * -- news scheme for USENET news groups and articles
94  * telnet://melvyl.ucop.edu/
95  * -- telnet scheme for interactive services via the TELNET Protocol
96  * </pre>
97  * Please, notice that there are many modifications from URL(RFC 1738) and
98  * relative URL(RFC 1808).
99  * <p>
100  * <b>The expressions for a URI</b>
101  * <p><pre>
102  * For escaped URI forms
103  * - URI(char[]) // constructor
104  * - char[] getRawXxx() // method
105  * - String getEscapedXxx() // method
106  * - String toString() // method
107  * <p>
108  * For unescaped URI forms
109  * - URI(String) // constructor
110  * - String getXXX() // method
111  * </pre><p>
112  *
113  * @author <a HREF="mailto:jericho@apache.org">Sung-Gu</a>
114  * @author <a HREF="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
115  * @version $Revision: 1.36.2.5 $ $Date: 2002/03/14 15:14:01
116  */

117 public class URI implements Cloneable JavaDoc, Comparable JavaDoc, Serializable JavaDoc {
118
119
120     // ----------------------------------------------------------- Constructors
121

122     /** Create an instance as an internal use */
123     protected URI() {
124     }
125
126
127     /**
128      * Construct a URI as an escaped form of a character array with the given
129      * charset.
130      *
131      * @param escaped the URI character sequence
132      * @param charset the charset string to do escape encoding
133      * @throws URIException If the URI cannot be created.
134      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
135      * @see #getProtocolCharset
136      */

137     public URI(char[] escaped, String JavaDoc charset)
138         throws URIException, NullPointerException JavaDoc {
139         protocolCharset = charset;
140         parseUriReference(new String JavaDoc(escaped), true);
141     }
142
143
144     /**
145      * Construct a URI as an escaped form of a character array.
146      * An URI can be placed within double-quotes or angle brackets like
147      * "http://test.com/" and &lt;http://test.com/&gt;
148      *
149      * @param escaped the URI character sequence
150      * @throws URIException If the URI cannot be created.
151      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
152      * @see #getDefaultProtocolCharset
153      */

154     public URI(char[] escaped)
155         throws URIException, NullPointerException JavaDoc {
156         parseUriReference(new String JavaDoc(escaped), true);
157     }
158
159
160     /**
161      * Construct a URI from the given string with the given charset.
162      *
163      * @param original the string to be represented to URI character sequence
164      * It is one of absoluteURI and relativeURI.
165      * @param charset the charset string to do escape encoding
166      * @throws URIException If the URI cannot be created.
167      * @see #getProtocolCharset
168      */

169     public URI(String JavaDoc original, String JavaDoc charset) throws URIException {
170         protocolCharset = charset;
171         parseUriReference(original, false);
172     }
173
174
175     /**
176      * Construct a URI from the given string.
177      * <p><blockquote><pre>
178      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
179      * </pre></blockquote><p>
180      * An URI can be placed within double-quotes or angle brackets like
181      * "http://test.com/" and &lt;http://test.com/&gt;
182      *
183      * @param original the string to be represented to URI character sequence
184      * It is one of absoluteURI and relativeURI.
185      * @throws URIException If the URI cannot be created.
186      * @see #getDefaultProtocolCharset
187      */

188     public URI(String JavaDoc original) throws URIException {
189         parseUriReference(original, false);
190     }
191
192
193     /**
194      * Construct a URI from a URL.
195      *
196      * @param url a valid URL.
197      * @throws URIException If the URI cannot be created.
198      * @since 2.0
199      * @deprecated currently somewhat wrong and diffrent with java.net.URL usage
200      */

201     public URI(URL JavaDoc url) throws URIException {
202         this(url.toString());
203     }
204
205
206     /**
207      * Construct a general URI from the given components.
208      * <p><blockquote><pre>
209      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
210      * absoluteURI = scheme ":" ( hier_part | opaque_part )
211      * opaque_part = uric_no_slash *uric
212      * </pre></blockquote><p>
213      * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
214      * &lt;fragment&gt;.
215      *
216      * @param scheme the scheme string
217      * @param schemeSpecificPart scheme_specific_part
218      * @param fragment the fragment string
219      * @throws URIException If the URI cannot be created.
220      * @see #getDefaultProtocolCharset
221      */

222     public URI(String JavaDoc scheme, String JavaDoc schemeSpecificPart, String JavaDoc fragment)
223         throws URIException {
224
225         // validate and contruct the URI character sequence
226
if (scheme == null) {
227            throw new URIException(URIException.PARSING, "scheme required");
228         }
229         char[] s = scheme.toLowerCase().toCharArray();
230         if (validate(s, URI.scheme)) {
231             _scheme = s; // is_absoluteURI
232
} else {
233             throw new URIException(URIException.PARSING, "incorrect scheme");
234         }
235         _opaque = encode(schemeSpecificPart, allowed_opaque_part,
236                 getProtocolCharset());
237         // Set flag
238
_is_opaque_part = true;
239         _fragment = fragment.toCharArray();
240
241         setURI();
242     }
243
244
245     /**
246      * Construct a general URI from the given components.
247      * <p><blockquote><pre>
248      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
249      * absoluteURI = scheme ":" ( hier_part | opaque_part )
250      * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
251      * hier_part = ( net_path | abs_path ) [ "?" query ]
252      * </pre></blockquote><p>
253      * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
254      * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
255      * &gt;.
256      *
257      * @param scheme the scheme string
258      * @param authority the authority string
259      * @param path the path string
260      * @param query the query string
261      * @param fragment the fragment string
262      * @throws URIException If the new URI cannot be created.
263      * @see #getDefaultProtocolCharset
264      */

265     public URI(String JavaDoc scheme, String JavaDoc authority, String JavaDoc path, String JavaDoc query,
266                String JavaDoc fragment) throws URIException {
267
268         // validate and contruct the URI character sequence
269
StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
270         if (scheme != null) {
271             buff.append(scheme);
272             buff.append(':');
273         }
274         if (authority != null) {
275             buff.append("//");
276             buff.append(authority);
277         }
278         if (path != null) { // accept empty path
279
if ((scheme != null || authority != null)
280                     && !path.startsWith("/")) {
281                 throw new URIException(URIException.PARSING,
282                         "abs_path requested");
283             }
284             buff.append(path);
285         }
286         if (query != null) {
287             buff.append('?');
288             buff.append(query);
289         }
290         if (fragment != null) {
291             buff.append('#');
292             buff.append(fragment);
293         }
294         parseUriReference(buff.toString(), false);
295     }
296
297
298     /**
299      * Construct a general URI from the given components.
300      *
301      * @param scheme the scheme string
302      * @param userinfo the userinfo string
303      * @param host the host string
304      * @param port the port number
305      * @throws URIException If the new URI cannot be created.
306      * @see #getDefaultProtocolCharset
307      */

308     public URI(String JavaDoc scheme, String JavaDoc userinfo, String JavaDoc host, int port)
309         throws URIException {
310
311         this(scheme, userinfo, host, port, null, null, null);
312     }
313
314
315     /**
316      * Construct a general URI from the given components.
317      *
318      * @param scheme the scheme string
319      * @param userinfo the userinfo string
320      * @param host the host string
321      * @param port the port number
322      * @param path the path string
323      * @throws URIException If the new URI cannot be created.
324      * @see #getDefaultProtocolCharset
325      */

326     public URI(String JavaDoc scheme, String JavaDoc userinfo, String JavaDoc host, int port,
327             String JavaDoc path) throws URIException {
328
329         this(scheme, userinfo, host, port, path, null, null);
330     }
331
332
333     /**
334      * Construct a general URI from the given components.
335      *
336      * @param scheme the scheme string
337      * @param userinfo the userinfo string
338      * @param host the host string
339      * @param port the port number
340      * @param path the path string
341      * @param query the query string
342      * @throws URIException If the new URI cannot be created.
343      * @see #getDefaultProtocolCharset
344      */

345     public URI(String JavaDoc scheme, String JavaDoc userinfo, String JavaDoc host, int port,
346             String JavaDoc path, String JavaDoc query) throws URIException {
347
348         this(scheme, userinfo, host, port, path, query, null);
349     }
350
351
352     /**
353      * Construct a general URI from the given components.
354      *
355      * @param scheme the scheme string
356      * @param userinfo the userinfo string
357      * @param host the host string
358      * @param port the port number
359      * @param path the path string
360      * @param query the query string
361      * @param fragment the fragment string
362      * @throws URIException If the new URI cannot be created.
363      * @see #getDefaultProtocolCharset
364      */

365     public URI(String JavaDoc scheme, String JavaDoc userinfo, String JavaDoc host, int port,
366             String JavaDoc path, String JavaDoc query, String JavaDoc fragment) throws URIException {
367
368         this(scheme, (host == null) ? null
369             : ((userinfo != null) ? userinfo + '@' : "") + host
370                 + ((port != -1) ? ":" + port : ""), path, query, fragment);
371     }
372
373
374     /**
375      * Construct a general URI from the given components.
376      *
377      * @param scheme the scheme string
378      * @param host the host string
379      * @param path the path string
380      * @param fragment the fragment string
381      * @throws URIException If the new URI cannot be created.
382      * @see #getDefaultProtocolCharset
383      */

384     public URI(String JavaDoc scheme, String JavaDoc host, String JavaDoc path, String JavaDoc fragment)
385         throws URIException {
386
387         this(scheme, host, path, null, fragment);
388     }
389
390
391     /**
392      * Construct a general URI with the given relative URI string.
393      *
394      * @param base the base URI
395      * @param relative the relative URI string
396      * @throws URIException If the new URI cannot be created.
397      */

398     public URI(URI base, String JavaDoc relative) throws URIException {
399         this(base, new URI(relative));
400     }
401
402
403     /**
404      * Construct a general URI with the given relative URI.
405      * <p><blockquote><pre>
406      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
407      * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
408      * </pre></blockquote><p>
409      * Resolving Relative References to Absolute Form.
410      *
411      * <strong>Examples of Resolving Relative URI References</strong>
412      *
413      * Within an object with a well-defined base URI of
414      * <p><blockquote><pre>
415      * http://a/b/c/d;p?q
416      * </pre></blockquote><p>
417      * the relative URI would be resolved as follows:
418      *
419      * Normal Examples
420      *
421      * <p><blockquote><pre>
422      * g:h = g:h
423      * g = http://a/b/c/g
424      * ./g = http://a/b/c/g
425      * g/ = http://a/b/c/g/
426      * /g = http://a/g
427      * //g = http://g
428      * ?y = http://a/b/c/?y
429      * g?y = http://a/b/c/g?y
430      * #s = (current document)#s
431      * g#s = http://a/b/c/g#s
432      * g?y#s = http://a/b/c/g?y#s
433      * ;x = http://a/b/c/;x
434      * g;x = http://a/b/c/g;x
435      * g;x?y#s = http://a/b/c/g;x?y#s
436      * . = http://a/b/c/
437      * ./ = http://a/b/c/
438      * .. = http://a/b/
439      * ../ = http://a/b/
440      * ../g = http://a/b/g
441      * ../.. = http://a/
442      * ../../ = http://a/
443      * ../../g = http://a/g
444      * </pre></blockquote><p>
445      *
446      * Some URI schemes do not allow a hierarchical syntax matching the
447      * <hier_part> syntax, and thus cannot use relative references.
448      *
449      * @param base the base URI
450      * @param relative the relative URI
451      * @throws URIException If the new URI cannot be created.
452      */

453     public URI(URI base, URI relative) throws URIException {
454
455         if (base._scheme == null) {
456             throw new URIException(URIException.PARSING, "base URI required");
457         }
458         if (base._scheme != null) {
459             this._scheme = base._scheme;
460             this._authority = base._authority;
461         }
462         if (base._is_opaque_part || relative._is_opaque_part) {
463             this._scheme = base._scheme;
464             this._is_opaque_part = base._is_opaque_part
465                 || relative._is_opaque_part;
466             this._opaque = relative._opaque;
467             this._fragment = relative._fragment;
468             this.setURI();
469             return;
470         }
471         if (relative._scheme != null) {
472             this._scheme = relative._scheme;
473             this._is_net_path = relative._is_net_path;
474             this._authority = relative._authority;
475             if (relative._is_server) {
476                 this._is_server = relative._is_server;
477                 this._userinfo = relative._userinfo;
478                 this._host = relative._host;
479                 this._port = relative._port;
480             } else if (relative._is_reg_name) {
481                 this._is_reg_name = relative._is_reg_name;
482             }
483             this._is_abs_path = relative._is_abs_path;
484             this._is_rel_path = relative._is_rel_path;
485             this._path = relative._path;
486         } else if (base._authority != null && relative._scheme == null) {
487             this._is_net_path = base._is_net_path;
488             this._authority = base._authority;
489             if (base._is_server) {
490                 this._is_server = base._is_server;
491                 this._userinfo = base._userinfo;
492                 this._host = base._host;
493                 this._port = base._port;
494             } else if (base._is_reg_name) {
495                 this._is_reg_name = base._is_reg_name;
496             }
497         }
498         if (relative._authority != null) {
499             this._is_net_path = relative._is_net_path;
500             this._authority = relative._authority;
501             if (relative._is_server) {
502                 this._is_server = relative._is_server;
503                 this._userinfo = relative._userinfo;
504                 this._host = relative._host;
505                 this._port = relative._port;
506             } else if (relative._is_reg_name) {
507                 this._is_reg_name = relative._is_reg_name;
508             }
509             this._is_abs_path = relative._is_abs_path;
510             this._is_rel_path = relative._is_rel_path;
511             this._path = relative._path;
512         }
513         // resolve the path and query if necessary
514
if (relative._scheme == null && relative._authority == null) {
515             if ((relative._path == null || relative._path.length == 0)
516                 && relative._query == null) {
517                 // handle a reference to the current document, see RFC 2396
518
// section 5.2 step 2
519
this._path = base._path;
520                 this._query = base._query;
521             } else {
522                 this._path = resolvePath(base._path, relative._path);
523             }
524         }
525         // base._query removed
526
if (relative._query != null) {
527             this._query = relative._query;
528         }
529         // base._fragment removed
530
if (relative._fragment != null) {
531             this._fragment = relative._fragment;
532         }
533         this.setURI();
534         // reparse the newly built URI, this will ensure that all flags are set correctly.
535
// TODO there must be a better way to do this
536
parseUriReference(new String JavaDoc(_uri), true);
537     }
538
539     // --------------------------------------------------- Instance Variables
540

541     /** Version ID for serialization */
542     static final long serialVersionUID = 604752400577948726L;
543
544
545     /**
546      * Cache the hash code for this URI.
547      */

548     protected int hash = 0;
549
550
551     /**
552      * This Uniform Resource Identifier (URI).
553      * The URI is always in an "escaped" form, since escaping or unescaping
554      * a completed URI might change its semantics.
555      */

556     protected char[] _uri = null;
557
558
559     /**
560      * The charset of the protocol used by this URI instance.
561      */

562     protected String JavaDoc protocolCharset = null;
563
564
565     /**
566      * The default charset of the protocol. RFC 2277, 2396
567      */

568     protected static String JavaDoc defaultProtocolCharset = "UTF-8";
569
570
571     /**
572      * The default charset of the document. RFC 2277, 2396
573      * The platform's charset is used for the document by default.
574      */

575     protected static String JavaDoc defaultDocumentCharset = null;
576     protected static String JavaDoc defaultDocumentCharsetByLocale = null;
577     protected static String JavaDoc defaultDocumentCharsetByPlatform = null;
578     // Static initializer for defaultDocumentCharset
579
static {
580         Locale JavaDoc locale = Locale.getDefault();
581         // in order to support backward compatiblity
582
if (locale != null) {
583             defaultDocumentCharsetByLocale =
584                 LocaleToCharsetMap.getCharset(locale);
585             // set the default document charset
586
defaultDocumentCharset = defaultDocumentCharsetByLocale;
587         }
588         // in order to support platform encoding
589
try {
590             defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
591         } catch(SecurityException JavaDoc ignore) {
592         }
593         if (defaultDocumentCharset == null) {
594             // set the default document charset
595
defaultDocumentCharset = defaultDocumentCharsetByPlatform;
596         }
597     }
598
599
600     /**
601      * The scheme.
602      */

603     protected char[] _scheme = null;
604
605
606     /**
607      * The opaque.
608      */

609     protected char[] _opaque = null;
610
611
612     /**
613      * The authority.
614      */

615     protected char[] _authority = null;
616
617
618     /**
619      * The userinfo.
620      */

621     protected char[] _userinfo = null;
622
623
624     /**
625      * The host.
626      */

627     protected char[] _host = null;
628
629
630     /**
631      * The port.
632      */

633     protected int _port = -1;
634
635
636     /**
637      * The path.
638      */

639     protected char[] _path = null;
640
641
642     /**
643      * The query.
644      */

645     protected char[] _query = null;
646
647
648     /**
649      * The fragment.
650      */

651     protected char[] _fragment = null;
652
653
654     /**
655      * The root path.
656      */

657     protected static char[] rootPath = { '/' };
658
659     // ---------------------- Generous characters for each component validation
660

661     /**
662      * The percent "%" character always has the reserved purpose of being the
663      * escape indicator, it must be escaped as "%25" in order to be used as
664      * data within a URI.
665      */

666     protected static final BitSet JavaDoc percent = new BitSet JavaDoc(256);
667     // Static initializer for percent
668
static {
669         percent.set('%');
670     }
671
672
673     /**
674      * BitSet for digit.
675      * <p><blockquote><pre>
676      * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
677      * "8" | "9"
678      * </pre></blockquote><p>
679      */

680     protected static final BitSet JavaDoc digit = new BitSet JavaDoc(256);
681     // Static initializer for digit
682
static {
683         for (int i = '0'; i <= '9'; i++) {
684             digit.set(i);
685         }
686     }
687
688
689     /**
690      * BitSet for alpha.
691      * <p><blockquote><pre>
692      * alpha = lowalpha | upalpha
693      * </pre></blockquote><p>
694      */

695     protected static final BitSet JavaDoc alpha = new BitSet JavaDoc(256);
696     // Static initializer for alpha
697
static {
698         for (int i = 'a'; i <= 'z'; i++) {
699             alpha.set(i);
700         }
701         for (int i = 'A'; i <= 'Z'; i++) {
702             alpha.set(i);
703         }
704     }
705
706
707     /**
708      * BitSet for alphanum (join of alpha &amp; digit).
709      * <p><blockquote><pre>
710      * alphanum = alpha | digit
711      * </pre></blockquote><p>
712      */

713     protected static final BitSet JavaDoc alphanum = new BitSet JavaDoc(256);
714     // Static initializer for alphanum
715
static {
716         alphanum.or(alpha);
717         alphanum.or(digit);
718     }
719
720
721     /**
722      * BitSet for hex.
723      * <p><blockquote><pre>
724      * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
725      * "a" | "b" | "c" | "d" | "e" | "f"
726      * </pre></blockquote><p>
727      */

728     protected static final BitSet JavaDoc hex = new BitSet JavaDoc(256);
729     // Static initializer for hex
730
static {
731         hex.or(digit);
732         for (int i = 'a'; i <= 'f'; i++) {
733             hex.set(i);
734         }
735         for (int i = 'A'; i <= 'F'; i++) {
736             hex.set(i);
737         }
738     }
739
740
741     /**
742      * BitSet for escaped.
743      * <p><blockquote><pre>
744      * escaped = "%" hex hex
745      * </pre></blockquote><p>
746      */

747     protected static final BitSet JavaDoc escaped = new BitSet JavaDoc(256);
748     // Static initializer for escaped
749
static {
750         escaped.or(percent);
751         escaped.or(hex);
752     }
753
754
755     /**
756      * BitSet for mark.
757      * <p><blockquote><pre>
758      * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
759      * "(" | ")"
760      * </pre></blockquote><p>
761      */

762     protected static final BitSet JavaDoc mark = new BitSet JavaDoc(256);
763     // Static initializer for mark
764
static {
765         mark.set('-');
766         mark.set('_');
767         mark.set('.');
768         mark.set('!');
769         mark.set('~');
770         mark.set('*');
771         mark.set('\'');
772         mark.set('(');
773         mark.set(')');
774     }
775
776
777     /**
778      * Data characters that are allowed in a URI but do not have a reserved
779      * purpose are called unreserved.
780      * <p><blockquote><pre>
781      * unreserved = alphanum | mark
782      * </pre></blockquote><p>
783      */

784     protected static final BitSet JavaDoc unreserved = new BitSet JavaDoc(256);
785     // Static initializer for unreserved
786
static {
787         unreserved.or(alphanum);
788         unreserved.or(mark);
789     }
790
791
792     /**
793      * BitSet for reserved.
794      * <p><blockquote><pre>
795      * reserved = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" |
796      * "$" | ","
797      * </pre></blockquote><p>
798      */

799     protected static final BitSet JavaDoc reserved = new BitSet JavaDoc(256);
800     // Static initializer for reserved
801
static {
802         reserved.set(';');
803         reserved.set('/');
804         reserved.set('?');
805         reserved.set(':');
806         reserved.set('@');
807         reserved.set('&');
808         reserved.set('=');
809         reserved.set('+');
810         reserved.set('$');
811         reserved.set(',');
812     }
813
814
815     /**
816      * BitSet for uric.
817      * <p><blockquote><pre>
818      * uric = reserved | unreserved | escaped
819      * </pre></blockquote><p>
820      */

821     protected static final BitSet JavaDoc uric = new BitSet JavaDoc(256);
822     // Static initializer for uric
823
static {
824         uric.or(reserved);
825         uric.or(unreserved);
826         uric.or(escaped);
827     }
828
829
830     /**
831      * BitSet for fragment (alias for uric).
832      * <p><blockquote><pre>
833      * fragment = *uric
834      * </pre></blockquote><p>
835      */

836     protected static final BitSet JavaDoc fragment = uric;
837
838
839     /**
840      * BitSet for query (alias for uric).
841      * <p><blockquote><pre>
842      * query = *uric
843      * </pre></blockquote><p>
844      */

845     protected static final BitSet JavaDoc query = uric;
846
847
848     /**
849      * BitSet for pchar.
850      * <p><blockquote><pre>
851      * pchar = unreserved | escaped |
852      * ":" | "@" | "&amp;" | "=" | "+" | "$" | ","
853      * </pre></blockquote><p>
854      */

855     protected static final BitSet JavaDoc pchar = new BitSet JavaDoc(256);
856     // Static initializer for pchar
857
static {
858         pchar.or(unreserved);
859         pchar.or(escaped);
860         pchar.set(':');
861         pchar.set('@');
862         pchar.set('&');
863         pchar.set('=');
864         pchar.set('+');
865         pchar.set('$');
866         pchar.set(',');
867     }
868
869
870     /**
871      * BitSet for param (alias for pchar).
872      * <p><blockquote><pre>
873      * param = *pchar
874      * </pre></blockquote><p>
875      */

876     protected static final BitSet JavaDoc param = pchar;
877
878
879     /**
880      * BitSet for segment.
881      * <p><blockquote><pre>
882      * segment = *pchar *( ";" param )
883      * </pre></blockquote><p>
884      */

885     protected static final BitSet JavaDoc segment = new BitSet JavaDoc(256);
886     // Static initializer for segment
887
static {
888         segment.or(pchar);
889         segment.set(';');
890         segment.or(param);
891     }
892
893
894     /**
895      * BitSet for path segments.
896      * <p><blockquote><pre>
897      * path_segments = segment *( "/" segment )
898      * </pre></blockquote><p>
899      */

900     protected static final BitSet JavaDoc path_segments = new BitSet JavaDoc(256);
901     // Static initializer for path_segments
902
static {
903         path_segments.set('/');
904         path_segments.or(segment);
905     }
906
907
908     /**
909      * URI absolute path.
910      * <p><blockquote><pre>
911      * abs_path = "/" path_segments
912      * </pre></blockquote><p>
913      */

914     protected static final BitSet JavaDoc abs_path = new BitSet JavaDoc(256);
915     // Static initializer for abs_path
916
static {
917         abs_path.set('/');
918         abs_path.or(path_segments);
919     }
920
921
922     /**
923      * URI bitset for encoding typical non-slash characters.
924      * <p><blockquote><pre>
925      * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
926      * "&amp;" | "=" | "+" | "$" | ","
927      * </pre></blockquote><p>
928      */

929     protected static final BitSet JavaDoc uric_no_slash = new BitSet JavaDoc(256);
930     // Static initializer for uric_no_slash
931
static {
932         uric_no_slash.or(unreserved);
933         uric_no_slash.or(escaped);
934         uric_no_slash.set(';');
935         uric_no_slash.set('?');
936         uric_no_slash.set(';');
937         uric_no_slash.set('@');
938         uric_no_slash.set('&');
939         uric_no_slash.set('=');
940         uric_no_slash.set('+');
941         uric_no_slash.set('$');
942         uric_no_slash.set(',');
943     }
944     
945
946     /**
947      * URI bitset that combines uric_no_slash and uric.
948      * <p><blockquote><pre>
949      * opaque_part = uric_no_slash *uric
950      * </pre></blockquote><p>
951      */

952     protected static final BitSet JavaDoc opaque_part = new BitSet JavaDoc(256);
953     // Static initializer for opaque_part
954
static {
955         // it's generous. because first character must not include a slash
956
opaque_part.or(uric_no_slash);
957         opaque_part.or(uric);
958     }
959     
960
961     /**
962      * URI bitset that combines absolute path and opaque part.
963      * <p><blockquote><pre>
964      * path = [ abs_path | opaque_part ]
965      * </pre></blockquote><p>
966      */

967     protected static final BitSet JavaDoc path = new BitSet JavaDoc(256);
968     // Static initializer for path
969
static {
970         path.or(abs_path);
971         path.or(opaque_part);
972     }
973
974
975     /**
976      * Port, a logical alias for digit.
977      */

978     protected static final BitSet JavaDoc port = digit;
979
980
981     /**
982      * Bitset that combines digit and dot fo IPv$address.
983      * <p><blockquote><pre>
984      * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
985      * </pre></blockquote><p>
986      */

987     protected static final BitSet JavaDoc IPv4address = new BitSet JavaDoc(256);
988     // Static initializer for IPv4address
989
static {
990         IPv4address.or(digit);
991         IPv4address.set('.');
992     }
993
994
995     /**
996      * RFC 2373.
997      * <p><blockquote><pre>
998      * IPv6address = hexpart [ ":" IPv4address ]
999      * </pre></blockquote><p>
1000     */

1001    protected static final BitSet JavaDoc IPv6address = new BitSet JavaDoc(256);
1002    // Static initializer for IPv6address reference
1003
static {
1004        IPv6address.or(hex); // hexpart
1005
IPv6address.set(':');
1006        IPv6address.or(IPv4address);
1007    }
1008
1009
1010    /**
1011     * RFC 2732, 2373.
1012     * <p><blockquote><pre>
1013     * IPv6reference = "[" IPv6address "]"
1014     * </pre></blockquote><p>
1015     */

1016    protected static final BitSet JavaDoc IPv6reference = new BitSet JavaDoc(256);
1017    // Static initializer for IPv6reference
1018
static {
1019        IPv6reference.set('[');
1020        IPv6reference.or(IPv6address);
1021        IPv6reference.set(']');
1022    }
1023
1024
1025    /**
1026     * BitSet for toplabel.
1027     * <p><blockquote><pre>
1028     * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1029     * </pre></blockquote><p>
1030     */

1031    protected static final BitSet JavaDoc toplabel = new BitSet JavaDoc(256);
1032    // Static initializer for toplabel
1033
static {
1034        toplabel.or(alphanum);
1035        toplabel.set('-');
1036    }
1037
1038
1039    /**
1040     * BitSet for domainlabel.
1041     * <p><blockquote><pre>
1042     * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1043     * </pre></blockquote><p>
1044     */

1045    protected static final BitSet JavaDoc domainlabel = toplabel;
1046
1047
1048    /**
1049     * BitSet for hostname.
1050     * <p><blockquote><pre>
1051     * hostname = *( domainlabel "." ) toplabel [ "." ]
1052     * </pre></blockquote><p>
1053     */

1054    protected static final BitSet JavaDoc hostname = new BitSet JavaDoc(256);
1055    // Static initializer for hostname
1056
static {
1057        hostname.or(toplabel);
1058        // hostname.or(domainlabel);
1059
hostname.set('.');
1060    }
1061
1062
1063    /**
1064     * BitSet for host.
1065     * <p><blockquote><pre>
1066     * host = hostname | IPv4address | IPv6reference
1067     * </pre></blockquote><p>
1068     */

1069    protected static final BitSet JavaDoc host = new BitSet JavaDoc(256);
1070    // Static initializer for host
1071
static {
1072        host.or(hostname);
1073        // host.or(IPv4address);
1074
host.or(IPv6reference); // IPv4address
1075
}
1076
1077
1078    /**
1079     * BitSet for hostport.
1080     * <p><blockquote><pre>
1081     * hostport = host [ ":" port ]
1082     * </pre></blockquote><p>
1083     */

1084    protected static final BitSet JavaDoc hostport = new BitSet JavaDoc(256);
1085    // Static initializer for hostport
1086
static {
1087        hostport.or(host);
1088        hostport.set(':');
1089        hostport.or(port);
1090    }
1091
1092
1093    /**
1094     * Bitset for userinfo.
1095     * <p><blockquote><pre>
1096     * userinfo = *( unreserved | escaped |
1097     * ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
1098     * </pre></blockquote><p>
1099     */

1100    protected static final BitSet JavaDoc userinfo = new BitSet JavaDoc(256);
1101    // Static initializer for userinfo
1102
static {
1103        userinfo.or(unreserved);
1104        userinfo.or(escaped);
1105        userinfo.set(';');
1106        userinfo.set(':');
1107        userinfo.set('&');
1108        userinfo.set('=');
1109        userinfo.set('+');
1110        userinfo.set('$');
1111        userinfo.set(',');
1112    }
1113
1114
1115    /**
1116     * BitSet for within the userinfo component like user and password.
1117     */

1118    public static final BitSet JavaDoc within_userinfo = new BitSet JavaDoc(256);
1119    // Static initializer for within_userinfo
1120
static {
1121        within_userinfo.or(userinfo);
1122        within_userinfo.clear(';'); // reserved within authority
1123
within_userinfo.clear(':');
1124        within_userinfo.clear('@');
1125        within_userinfo.clear('?');
1126        within_userinfo.clear('/');
1127    }
1128
1129
1130    /**
1131     * Bitset for server.
1132     * <p><blockquote><pre>
1133     * server = [ [ userinfo "@" ] hostport ]
1134     * </pre></blockquote><p>
1135     */

1136    protected static final BitSet JavaDoc server = new BitSet JavaDoc(256);
1137    // Static initializer for server
1138
static {
1139        server.or(userinfo);
1140        server.set('@');
1141        server.or(hostport);
1142    }
1143
1144
1145    /**
1146     * BitSet for reg_name.
1147     * <p><blockquote><pre>
1148     * reg_name = 1*( unreserved | escaped | "$" | "," |
1149     * ";" | ":" | "@" | "&amp;" | "=" | "+" )
1150     * </pre></blockquote><p>
1151     */

1152    protected static final BitSet JavaDoc reg_name = new BitSet JavaDoc(256);
1153    // Static initializer for reg_name
1154
static {
1155        reg_name.or(unreserved);
1156        reg_name.or(escaped);
1157        reg_name.set('$');
1158        reg_name.set(',');
1159        reg_name.set(';');
1160        reg_name.set(':');
1161        reg_name.set('@');
1162        reg_name.set('&');
1163        reg_name.set('=');
1164        reg_name.set('+');
1165    }
1166
1167
1168    /**
1169     * BitSet for authority.
1170     * <p><blockquote><pre>
1171     * authority = server | reg_name
1172     * </pre></blockquote><p>
1173     */

1174    protected static final BitSet JavaDoc authority = new BitSet JavaDoc(256);
1175    // Static initializer for authority
1176
static {
1177        authority.or(server);
1178        authority.or(reg_name);
1179    }
1180
1181
1182    /**
1183     * BitSet for scheme.
1184     * <p><blockquote><pre>
1185     * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1186     * </pre></blockquote><p>
1187     */

1188    protected static final BitSet JavaDoc scheme = new BitSet JavaDoc(256);
1189    // Static initializer for scheme
1190
static {
1191        scheme.or(alpha);
1192        scheme.or(digit);
1193        scheme.set('+');
1194        scheme.set('-');
1195        scheme.set('.');
1196    }
1197
1198
1199    /**
1200     * BitSet for rel_segment.
1201     * <p><blockquote><pre>
1202     * rel_segment = 1*( unreserved | escaped |
1203     * ";" | "@" | "&amp;" | "=" | "+" | "$" | "," )
1204     * </pre></blockquote><p>
1205     */

1206    protected static final BitSet JavaDoc rel_segment = new BitSet JavaDoc(256);
1207    // Static initializer for rel_segment
1208
static {
1209        rel_segment.or(unreserved);
1210        rel_segment.or(escaped);
1211        rel_segment.set(';');
1212        rel_segment.set('@');
1213        rel_segment.set('&');
1214        rel_segment.set('=');
1215        rel_segment.set('+');
1216        rel_segment.set('$');
1217        rel_segment.set(',');
1218    }
1219
1220
1221    /**
1222     * BitSet for rel_path.
1223     * <p><blockquote><pre>
1224     * rel_path = rel_segment [ abs_path ]
1225     * </pre></blockquote><p>
1226     */

1227    protected static final BitSet JavaDoc rel_path = new BitSet JavaDoc(256);
1228    // Static initializer for rel_path
1229
static {
1230        rel_path.or(rel_segment);
1231        rel_path.or(abs_path);
1232    }
1233
1234
1235    /**
1236     * BitSet for net_path.
1237     * <p><blockquote><pre>
1238     * net_path = "//" authority [ abs_path ]
1239     * </pre></blockquote><p>
1240     */

1241    protected static final BitSet JavaDoc net_path = new BitSet JavaDoc(256);
1242    // Static initializer for net_path
1243
static {
1244        net_path.set('/');
1245        net_path.or(authority);
1246        net_path.or(abs_path);
1247    }
1248    
1249
1250    /**
1251     * BitSet for hier_part.
1252     * <p><blockquote><pre>
1253     * hier_part = ( net_path | abs_path ) [ "?" query ]
1254     * </pre></blockquote><p>
1255     */

1256    protected static final BitSet JavaDoc hier_part = new BitSet JavaDoc(256);
1257    // Static initializer for hier_part
1258
static {
1259        hier_part.or(net_path);
1260        hier_part.or(abs_path);
1261        // hier_part.set('?'); aleady included
1262
hier_part.or(query);
1263    }
1264
1265
1266    /**
1267     * BitSet for relativeURI.
1268     * <p><blockquote><pre>
1269     * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1270     * </pre></blockquote><p>
1271     */

1272    protected static final BitSet JavaDoc relativeURI = new BitSet JavaDoc(256);
1273    // Static initializer for relativeURI
1274
static {
1275        relativeURI.or(net_path);
1276        relativeURI.or(abs_path);
1277        relativeURI.or(rel_path);
1278        // relativeURI.set('?'); aleady included
1279
relativeURI.or(query);
1280    }
1281
1282
1283    /**
1284     * BitSet for absoluteURI.
1285     * <p><blockquote><pre>
1286     * absoluteURI = scheme ":" ( hier_part | opaque_part )
1287     * </pre></blockquote><p>
1288     */

1289    protected static final BitSet JavaDoc absoluteURI = new BitSet JavaDoc(256);
1290    // Static initializer for absoluteURI
1291
static {
1292        absoluteURI.or(scheme);
1293        absoluteURI.set(':');
1294        absoluteURI.or(hier_part);
1295        absoluteURI.or(opaque_part);
1296    }
1297
1298
1299    /**
1300     * BitSet for URI-reference.
1301     * <p><blockquote><pre>
1302     * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1303     * </pre></blockquote><p>
1304     */

1305    protected static final BitSet JavaDoc URI_reference = new BitSet JavaDoc(256);
1306    // Static initializer for URI_reference
1307
static {
1308        URI_reference.or(absoluteURI);
1309        URI_reference.or(relativeURI);
1310        URI_reference.set('#');
1311        URI_reference.or(fragment);
1312    }
1313
1314    // ---------------------------- Characters disallowed within the URI syntax
1315
// Excluded US-ASCII Characters are like control, space, delims and unwise
1316

1317    /**
1318     * BitSet for control.
1319     */

1320    public static final BitSet JavaDoc control = new BitSet JavaDoc(256);
1321    // Static initializer for control
1322
static {
1323        for (int i = 0; i <= 0x1F; i++) {
1324            control.set(i);
1325        }
1326        control.set(0x7F);
1327    }
1328
1329    /**
1330     * BitSet for space.
1331     */

1332    public static final BitSet JavaDoc space = new BitSet JavaDoc(256);
1333    // Static initializer for space
1334
static {
1335        space.set(0x20);
1336    }
1337
1338
1339    /**
1340     * BitSet for delims.
1341     */

1342    public static final BitSet JavaDoc delims = new BitSet JavaDoc(256);
1343    // Static initializer for delims
1344
static {
1345        delims.set('<');
1346        delims.set('>');
1347        delims.set('#');
1348        delims.set('%');
1349        delims.set('"');
1350    }
1351
1352
1353    /**
1354     * BitSet for unwise.
1355     */

1356    public static final BitSet JavaDoc unwise = new BitSet JavaDoc(256);
1357    // Static initializer for unwise
1358
static {
1359        unwise.set('{');
1360        unwise.set('}');
1361        unwise.set('|');
1362        unwise.set('\\');
1363        unwise.set('^');
1364        unwise.set('[');
1365        unwise.set(']');
1366        unwise.set('`');
1367    }
1368
1369
1370    /**
1371     * Disallowed rel_path before escaping.
1372     */

1373    public static final BitSet JavaDoc disallowed_rel_path = new BitSet JavaDoc(256);
1374    // Static initializer for disallowed_rel_path
1375
static {
1376        disallowed_rel_path.or(uric);
1377        disallowed_rel_path.andNot(rel_path);
1378    }
1379
1380
1381    /**
1382     * Disallowed opaque_part before escaping.
1383     */

1384    public static final BitSet JavaDoc disallowed_opaque_part = new BitSet JavaDoc(256);
1385    // Static initializer for disallowed_opaque_part
1386
static {
1387        disallowed_opaque_part.or(uric);
1388        disallowed_opaque_part.andNot(opaque_part);
1389    }
1390
1391    // ----------------------- Characters allowed within and for each component
1392

1393    /**
1394     * Those characters that are allowed for the authority component.
1395     */

1396    public static final BitSet JavaDoc allowed_authority = new BitSet JavaDoc(256);
1397    // Static initializer for allowed_authority
1398
static {
1399        allowed_authority.or(authority);
1400        allowed_authority.clear('%');
1401    }
1402
1403
1404    /**
1405     * Those characters that are allowed for the opaque_part.
1406     */

1407    public static final BitSet JavaDoc allowed_opaque_part = new BitSet JavaDoc(256);
1408    // Static initializer for allowed_opaque_part
1409
static {
1410        allowed_opaque_part.or(opaque_part);
1411        allowed_opaque_part.clear('%');
1412    }
1413
1414
1415    /**
1416     * Those characters that are allowed for the reg_name.
1417     */

1418    public static final BitSet JavaDoc allowed_reg_name = new BitSet JavaDoc(256);
1419    // Static initializer for allowed_reg_name
1420
static {
1421        allowed_reg_name.or(reg_name);
1422        // allowed_reg_name.andNot(percent);
1423
allowed_reg_name.clear('%');
1424    }
1425
1426
1427    /**
1428     * Those characters that are allowed for the userinfo component.
1429     */

1430    public static final BitSet JavaDoc allowed_userinfo = new BitSet JavaDoc(256);
1431    // Static initializer for allowed_userinfo
1432
static {
1433        allowed_userinfo.or(userinfo);
1434        // allowed_userinfo.andNot(percent);
1435
allowed_userinfo.clear('%');
1436    }
1437
1438
1439    /**
1440     * Those characters that are allowed for within the userinfo component.
1441     */

1442    public static final BitSet JavaDoc allowed_within_userinfo = new BitSet JavaDoc(256);
1443    // Static initializer for allowed_within_userinfo
1444
static {
1445        allowed_within_userinfo.or(within_userinfo);
1446        allowed_within_userinfo.clear('%');
1447    }
1448
1449
1450    /**
1451     * Those characters that are allowed for the IPv6reference component.
1452     * The characters '[', ']' in IPv6reference should be excluded.
1453     */

1454    public static final BitSet JavaDoc allowed_IPv6reference = new BitSet JavaDoc(256);
1455    // Static initializer for allowed_IPv6reference
1456
static {
1457        allowed_IPv6reference.or(IPv6reference);
1458        // allowed_IPv6reference.andNot(unwise);
1459
allowed_IPv6reference.clear('[');
1460        allowed_IPv6reference.clear(']');
1461    }
1462
1463
1464    /**
1465     * Those characters that are allowed for the host component.
1466     * The characters '[', ']' in IPv6reference should be excluded.
1467     */

1468    public static final BitSet JavaDoc allowed_host = new BitSet JavaDoc(256);
1469    // Static initializer for allowed_host
1470
static {
1471        allowed_host.or(hostname);
1472        allowed_host.or(allowed_IPv6reference);
1473    }
1474
1475
1476    /**
1477     * Those characters that are allowed for the authority component.
1478     */

1479    public static final BitSet JavaDoc allowed_within_authority = new BitSet JavaDoc(256);
1480    // Static initializer for allowed_within_authority
1481
static {
1482        allowed_within_authority.or(server);
1483        allowed_within_authority.or(reg_name);
1484        allowed_within_authority.clear(';');
1485        allowed_within_authority.clear(':');
1486        allowed_within_authority.clear('@');
1487        allowed_within_authority.clear('?');
1488        allowed_within_authority.clear('/');
1489    }
1490
1491
1492    /**
1493     * Those characters that are allowed for the abs_path.
1494     */

1495    public static final BitSet JavaDoc allowed_abs_path = new BitSet JavaDoc(256);
1496    // Static initializer for allowed_abs_path
1497
static {
1498        allowed_abs_path.or(abs_path);
1499        // allowed_abs_path.set('/'); // aleady included
1500
allowed_abs_path.andNot(percent);
1501    }
1502
1503
1504    /**
1505     * Those characters that are allowed for the rel_path.
1506     */

1507    public static final BitSet JavaDoc allowed_rel_path = new BitSet JavaDoc(256);
1508    // Static initializer for allowed_rel_path
1509
static {
1510        allowed_rel_path.or(rel_path);
1511        allowed_rel_path.clear('%');
1512    }
1513
1514
1515    /**
1516     * Those characters that are allowed within the path.
1517     */

1518    public static final BitSet JavaDoc allowed_within_path = new BitSet JavaDoc(256);
1519    // Static initializer for allowed_within_path
1520
static {
1521        allowed_within_path.or(abs_path);
1522        allowed_within_path.clear('/');
1523        allowed_within_path.clear(';');
1524        allowed_within_path.clear('=');
1525        allowed_within_path.clear('?');
1526    }
1527
1528
1529    /**
1530     * Those characters that are allowed for the query component.
1531     */

1532    public static final BitSet JavaDoc allowed_query = new BitSet JavaDoc(256);
1533    // Static initializer for allowed_query
1534
static {
1535        allowed_query.or(uric);
1536        allowed_query.clear('%');
1537    }
1538
1539
1540    /**
1541     * Those characters that are allowed within the query component.
1542     */

1543    public static final BitSet JavaDoc allowed_within_query = new BitSet JavaDoc(256);
1544    // Static initializer for allowed_within_query
1545
static {
1546        allowed_within_query.or(allowed_query);
1547        allowed_within_query.andNot(reserved); // excluded 'reserved'
1548
}
1549
1550
1551    /**
1552     * Those characters that are allowed for the fragment component.
1553     */

1554    public static final BitSet JavaDoc allowed_fragment = new BitSet JavaDoc(256);
1555    // Static initializer for allowed_fragment
1556
static {
1557        allowed_fragment.or(uric);
1558        allowed_fragment.clear('%');
1559    }
1560
1561    // ------------------------------------------- Flags for this URI-reference
1562

1563    // TODO: Figure out what all these variables are for and provide javadoc
1564

1565    // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1566
// absoluteURI = scheme ":" ( hier_part | opaque_part )
1567
protected boolean _is_hier_part;
1568    protected boolean _is_opaque_part;
1569    // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1570
// hier_part = ( net_path | abs_path ) [ "?" query ]
1571
protected boolean _is_net_path;
1572    protected boolean _is_abs_path;
1573    protected boolean _is_rel_path;
1574    // net_path = "//" authority [ abs_path ]
1575
// authority = server | reg_name
1576
protected boolean _is_reg_name;
1577    protected boolean _is_server; // = _has_server
1578
// server = [ [ userinfo "@" ] hostport ]
1579
// host = hostname | IPv4address | IPv6reference
1580
protected boolean _is_hostname;
1581    protected boolean _is_IPv4address;
1582    protected boolean _is_IPv6reference;
1583
1584    // ------------------------------------------ Character and escape encoding
1585

1586    /**
1587     * Encodes URI string.
1588     *
1589     * This is a two mapping, one from original characters to octets, and
1590     * subsequently a second from octets to URI characters:
1591     * <p><blockquote><pre>
1592     * original character sequence->octet sequence->URI character sequence
1593     * </pre></blockquote><p>
1594     *
1595     * An escaped octet is encoded as a character triplet, consisting of the
1596     * percent character "%" followed by the two hexadecimal digits
1597     * representing the octet code. For example, "%20" is the escaped
1598     * encoding for the US-ASCII space character.
1599     * <p>
1600     * Conversion from the local filesystem character set to UTF-8 will
1601     * normally involve a two step process. First convert the local character
1602     * set to the UCS; then convert the UCS to UTF-8.
1603     * The first step in the process can be performed by maintaining a mapping
1604     * table that includes the local character set code and the corresponding
1605     * UCS code.
1606     * The next step is to convert the UCS character code to the UTF-8 encoding.
1607     * <p>
1608     * Mapping between vendor codepages can be done in a very similar manner
1609     * as described above.
1610     * <p>
1611     * The only time escape encodings can allowedly be made is when a URI is
1612     * being created from its component parts. The escape and validate methods
1613     * are internally performed within this method.
1614     *
1615     * @param original the original character sequence
1616     * @param allowed those characters that are allowed within a component
1617     * @param charset the protocol charset
1618     * @return URI character sequence
1619     * @throws URIException null component or unsupported character encoding
1620     */

1621    protected static char[] encode(String JavaDoc original, BitSet JavaDoc allowed,
1622            String JavaDoc charset) throws URIException {
1623
1624        // encode original to uri characters.
1625
if (original == null) {
1626            throw new URIException(URIException.PARSING, "null");
1627        }
1628        // escape octet to uri characters.
1629
if (allowed == null) {
1630            throw new URIException(URIException.PARSING,
1631                    "null allowed characters");
1632        }
1633        byte[] octets;
1634        try {
1635            octets = original.getBytes(charset);
1636        } catch (UnsupportedEncodingException JavaDoc error) {
1637            throw new URIException(URIException.UNSUPPORTED_ENCODING, charset);
1638        }
1639        StringBuffer JavaDoc buf = new StringBuffer JavaDoc(octets.length);
1640        for (int i = 0; i < octets.length; i++) {
1641            char c = (char) octets[i];
1642            if (allowed.get(c)) {
1643                buf.append(c);
1644            } else {
1645                buf.append('%');
1646                byte b = octets[i]; // use the original byte value
1647
char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
1648                buf.append(Character.toUpperCase(hexadecimal)); // high
1649
hexadecimal = Character.forDigit(b & 0xF, 16);
1650                buf.append(Character.toUpperCase(hexadecimal)); // low
1651
}
1652        }
1653
1654        return buf.toString().toCharArray();
1655    }
1656
1657
1658    /**
1659     * Decodes URI encoded string.
1660     *
1661     * This is a two mapping, one from URI characters to octets, and
1662     * subsequently a second from octets to original characters:
1663     * <p><blockquote><pre>
1664     * URI character sequence->octet sequence->original character sequence
1665     * </pre></blockquote><p>
1666     *
1667     * A URI must be separated into its components before the escaped
1668     * characters within those components can be allowedly decoded.
1669     * <p>
1670     * Notice that there is a chance that URI characters that are non UTF-8
1671     * may be parsed as valid UTF-8. A recent non-scientific analysis found
1672     * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1673     * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1674     * false reading.
1675     * <p>
1676     * The percent "%" character always has the reserved purpose of being
1677     * the escape indicator, it must be escaped as "%25" in order to be used
1678     * as data within a URI.
1679     * <p>
1680     * The unescape method is internally performed within this method.
1681     *
1682     * @param component the URI character sequence
1683     * @param charset the protocol charset
1684     * @return original character sequence
1685     * @throws URIException incomplete trailing escape pattern or unsupported
1686     * character encoding
1687     */

1688    protected static String JavaDoc decode(char[] component, String JavaDoc charset)
1689        throws URIException {
1690
1691        // unescape uri characters to octets
1692
if (component == null) {
1693            return null;
1694        }
1695
1696        byte[] octets;
1697        try {
1698            octets = new String JavaDoc(component).getBytes(charset);
1699        } catch (UnsupportedEncodingException JavaDoc error) {
1700            throw new URIException(URIException.UNSUPPORTED_ENCODING,
1701                    "not supported " + charset + " encoding");
1702        }
1703        int length = octets.length;
1704        int oi = 0; // output index
1705
for (int ii = 0; ii < length; oi++) {
1706            byte aByte = (byte) octets[ii++];
1707            if (aByte == '%' && ii + 2 <= length) {
1708                byte high = (byte) Character.digit((char) octets[ii++], 16);
1709                byte low = (byte) Character.digit((char) octets[ii++], 16);
1710                if (high == -1 || low == -1) {
1711                    throw new URIException(URIException.ESCAPING,
1712                            "incomplete trailing escape pattern");
1713                            
1714                }
1715                aByte = (byte) ((high << 4) + low);
1716            }
1717            octets[oi] = (byte) aByte;
1718        }
1719
1720        String JavaDoc result;
1721        try {
1722            result = new String JavaDoc(octets, 0, oi, charset);
1723        } catch (UnsupportedEncodingException JavaDoc error) {
1724            throw new URIException(URIException.UNSUPPORTED_ENCODING,
1725                    "not supported " + charset + " encoding");
1726        }
1727
1728        return result;
1729    }
1730
1731
1732    /**
1733     * Pre-validate the unescaped URI string within a specific component.
1734     *
1735     * @param component the component string within the component
1736     * @param disallowed those characters disallowed within the component
1737     * @return if true, it doesn't have the disallowed characters
1738     * if false, the component is undefined or an incorrect one
1739     */

1740    protected boolean prevalidate(String JavaDoc component, BitSet JavaDoc disallowed) {
1741        // prevalidate the given component by disallowed characters
1742
if (component == null) {
1743            return false; // undefined
1744
}
1745        char[] target = component.toCharArray();
1746        for (int i = 0; i < target.length; i++) {
1747            if (disallowed.get(target[i])) {
1748                return false;
1749            }
1750        }
1751        return true;
1752    }
1753
1754
1755    /**
1756     * Validate the URI characters within a specific component.
1757     * The component must be performed after escape encoding. Or it doesn't
1758     * include escaped characters.
1759     *
1760     * @param component the characters sequence within the component
1761     * @param generous those characters that are allowed within a component
1762     * @return if true, it's the correct URI character sequence
1763     */

1764    protected boolean validate(char[] component, BitSet JavaDoc generous) {
1765        // validate each component by generous characters
1766
return validate(component, 0, -1, generous);
1767    }
1768
1769
1770    /**
1771     * Validate the URI characters within a specific component.
1772     * The component must be performed after escape encoding. Or it doesn't
1773     * include escaped characters.
1774     * <p>
1775     * It's not that much strict, generous. The strict validation might be
1776     * performed before being called this method.
1777     *
1778     * @param component the characters sequence within the component
1779     * @param soffset the starting offset of the given component
1780     * @param eoffset the ending offset of the given component
1781     * if -1, it means the length of the component
1782     * @param generous those characters that are allowed within a component
1783     * @return if true, it's the correct URI character sequence
1784     */

1785    protected boolean validate(char[] component, int soffset, int eoffset,
1786            BitSet JavaDoc generous) {
1787        // validate each component by generous characters
1788
if (eoffset == -1) {
1789            eoffset = component.length - 1;
1790        }
1791        for (int i = soffset; i <= eoffset; i++) {
1792            if (!generous.get(component[i])) {
1793                return false;
1794            }
1795        }
1796        return true;
1797    }
1798
1799
1800    /**
1801     * In order to avoid any possilbity of conflict with non-ASCII characters,
1802     * Parse a URI reference as a <code>String</code> with the character
1803     * encoding of the local system or the document.
1804     * <p>
1805     * The following line is the regular expression for breaking-down a URI
1806     * reference into its components.
1807     * <p><blockquote><pre>
1808     * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1809     * 12 3 4 5 6 7 8 9
1810     * </pre></blockquote><p>
1811     * For example, matching the above expression to
1812     * http://jakarta.apache.org/ietf/uri/#Related
1813     * results in the following subexpression matches:
1814     * <p><blockquote><pre>
1815     * $1 = http:
1816     * scheme = $2 = http
1817     * $3 = //jakarta.apache.org
1818     * authority = $4 = jakarta.apache.org
1819     * path = $5 = /ietf/uri/
1820     * $6 = <undefined>
1821     * query = $7 = <undefined>
1822     * $8 = #Related
1823     * fragment = $9 = Related
1824     * </pre></blockquote><p>
1825     *
1826     * @param original the original character sequence
1827     * @param escaped <code>true</code> if <code>original</code> is escaped
1828     * @throws URIException If an error occurs.
1829     */

1830    protected void parseUriReference(String JavaDoc original, boolean escaped)
1831        throws URIException {
1832
1833        // validate and contruct the URI character sequence
1834
if (original == null) {
1835            throw new URIException("URI-Reference required");
1836        }
1837
1838        /* @
1839         * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1840         */

1841        String JavaDoc tmp = original.trim();
1842        
1843        /*
1844         * The length of the string sequence of characters.
1845         * It may not be equal to the length of the byte array.
1846         */

1847        int length = tmp.length();
1848
1849        /*
1850         * Remove the delimiters like angle brackets around an URI.
1851         */

1852        if (length > 0) {
1853            char[] firstDelimiter = { tmp.charAt(0) };
1854            if (validate(firstDelimiter, delims)) {
1855                if (length >= 2) {
1856                    char[] lastDelimiter = { tmp.charAt(length - 1) };
1857                    if (validate(lastDelimiter, delims)) {
1858                        tmp = tmp.substring(1, length - 1);
1859                        length = length - 2;
1860                    }
1861                }
1862            }
1863        }
1864
1865        /*
1866         * The starting index
1867         */

1868        int from = 0;
1869
1870        /*
1871         * The test flag whether the URI is started from the path component.
1872         */

1873        boolean isStartedFromPath = false;
1874        int atColon = tmp.indexOf(':');
1875        int atSlash = tmp.indexOf('/');
1876        if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) {
1877            isStartedFromPath = true;
1878        }
1879
1880        /*
1881         * <p><blockquote><pre>
1882         * @@@@@@@@
1883         * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1884         * </pre></blockquote><p>
1885         */

1886        int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1887        if (at == -1) {
1888            at = 0;
1889        }
1890
1891        /*
1892         * Parse the scheme.
1893         * <p><blockquote><pre>
1894         * scheme = $2 = http
1895         * @
1896         * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1897         * </pre></blockquote><p>
1898         */

1899        if (at < length && tmp.charAt(at) == ':') {
1900            char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1901            if (validate(target, scheme)) {
1902                _scheme = target;
1903            } else {
1904                throw new URIException("incorrect scheme");
1905            }
1906            from = ++at;
1907        }
1908
1909        /*
1910         * Parse the authority component.
1911         * <p><blockquote><pre>
1912         * authority = $4 = jakarta.apache.org
1913         * @@
1914         * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1915         * </pre></blockquote><p>
1916         */

1917        // Reset flags
1918
_is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1919        if (0 <= at && at < length && tmp.charAt(at) == '/') {
1920            // Set flag
1921
_is_hier_part = true;
1922            if (at + 2 < length && tmp.charAt(at + 1) == '/') {
1923                // the temporary index to start the search from
1924
int next = indexFirstOf(tmp, "/?#", at + 2);
1925                if (next == -1) {
1926                    next = (tmp.substring(at + 2).length() == 0) ? at + 2
1927                        : tmp.length();
1928                }
1929                parseAuthority(tmp.substring(at + 2, next), escaped);
1930                from = at = next;
1931                // Set flag
1932
_is_net_path = true;
1933            }
1934            if (from == at) {
1935                // Set flag
1936
_is_abs_path = true;
1937            }
1938        }
1939
1940        /*
1941         * Parse the path component.
1942         * <p><blockquote><pre>
1943         * path = $5 = /ietf/uri/
1944         * @@@@@@
1945         * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1946         * </pre></blockquote><p>
1947         */

1948        if (from < length) {
1949            // rel_path = rel_segment [ abs_path ]
1950
int next = indexFirstOf(tmp, "?#", from);
1951            if (next == -1) {
1952                next = tmp.length();
1953            }
1954            if (!_is_abs_path) {
1955                if (!escaped
1956                    && prevalidate(tmp.substring(from, next), disallowed_rel_path)
1957                    || escaped
1958                    && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
1959                    // Set flag
1960
_is_rel_path = true;
1961                } else if (!escaped
1962                    && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
1963                    || escaped
1964                    && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
1965                    // Set flag
1966
_is_opaque_part = true;
1967                } else {
1968                    // the path component may be empty
1969
_path = null;
1970                }
1971            }
1972            if (escaped) {
1973                setRawPath(tmp.substring(from, next).toCharArray());
1974            } else {
1975                setPath(tmp.substring(from, next));
1976            }
1977            at = next;
1978        }
1979
1980        // set the charset to do escape encoding
1981
String JavaDoc charset = getProtocolCharset();
1982
1983        /*
1984         * Parse the query component.
1985         * <p><blockquote><pre>
1986         * query = $7 = <undefined>
1987         * @@@@@@@@@
1988         * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1989         * </pre></blockquote><p>
1990         */

1991        if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
1992            int next = tmp.indexOf('#', at + 1);
1993            if (next == -1) {
1994                next = tmp.length();
1995            }
1996            _query = (escaped) ? tmp.substring(at + 1, next).toCharArray()
1997                : encode(tmp.substring(at + 1, next), allowed_query, charset);
1998            at = next;
1999        }
2000
2001        /*
2002         * Parse the fragment component.
2003         * <p><blockquote><pre>
2004         * fragment = $9 = Related
2005         * @@@@@@@@
2006         * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2007         * </pre></blockquote><p>
2008         */

2009        if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2010            if (at + 1 == length) { // empty fragment
2011
_fragment = "".toCharArray();
2012            } else {
2013                _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
2014                    : encode(tmp.substring(at + 1), allowed_fragment, charset);
2015            }
2016        }
2017
2018        // set this URI.
2019
setURI();
2020    }
2021
2022
2023    /**
2024     * Get the earlier index that to be searched for the first occurrance in
2025     * one of any of the given string.
2026     *
2027     * @param s the string to be indexed
2028     * @param delims the delimiters used to index
2029     * @return the earlier index if there are delimiters
2030     */

2031    protected int indexFirstOf(String JavaDoc s, String JavaDoc delims) {
2032        return indexFirstOf(s, delims, -1);
2033    }
2034
2035
2036    /**
2037     * Get the earlier index that to be searched for the first occurrance in
2038     * one of any of the given string.
2039     *
2040     * @param s the string to be indexed
2041     * @param delims the delimiters used to index
2042     * @param offset the from index
2043     * @return the earlier index if there are delimiters
2044     */

2045    protected int indexFirstOf(String JavaDoc s, String JavaDoc delims, int offset) {
2046        if (s == null || s.length() == 0) {
2047            return -1;
2048        }
2049        if (delims == null || delims.length() == 0) {
2050            return -1;
2051        }
2052        // check boundaries
2053
if (offset < 0) {
2054            offset = 0;
2055        } else if (offset > s.length()) {
2056            return -1;
2057        }
2058        // s is never null
2059
int min = s.length();
2060        char[] delim = delims.toCharArray();
2061        for (int i = 0; i < delim.length; i++) {
2062            int at = s.indexOf(delim[i], offset);
2063            if (at >= 0 && at < min) {
2064                min = at;
2065            }
2066        }
2067        return (min == s.length()) ? -1 : min;
2068    }
2069
2070
2071    /**
2072     * Get the earlier index that to be searched for the first occurrance in
2073     * one of any of the given array.
2074     *
2075     * @param s the character array to be indexed
2076     * @param delim the delimiter used to index
2077     * @return the ealier index if there are a delimiter
2078     */

2079    protected int indexFirstOf(char[] s, char delim) {
2080        return indexFirstOf(s, delim, 0);
2081    }
2082
2083
2084    /**
2085     * Get the earlier index that to be searched for the first occurrance in
2086     * one of any of the given array.
2087     *
2088     * @param s the character array to be indexed
2089     * @param delim the delimiter used to index
2090     * @param offset The offset.
2091     * @return the ealier index if there is a delimiter
2092     */

2093    protected int indexFirstOf(char[] s, char delim, int offset) {
2094        if (s == null || s.length == 0) {
2095            return -1;
2096        }
2097        // check boundaries
2098
if (offset < 0) {
2099            offset = 0;
2100        } else if (offset > s.length) {
2101            return -1;
2102        }
2103        for (int i = offset; i < s.length; i++) {
2104            if (s[i] == delim) {
2105                return i;
2106            }
2107        }
2108        return -1;
2109    }
2110
2111
2112    /**
2113     * Parse the authority component.
2114     *
2115     * @param original the original character sequence of authority component
2116     * @param escaped <code>true</code> if <code>original</code> is escaped
2117     * @throws URIException If an error occurs.
2118     */

2119    protected void parseAuthority(String JavaDoc original, boolean escaped)
2120        throws URIException {
2121
2122        // Reset flags
2123
_is_reg_name = _is_server =
2124        _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2125
2126        // set the charset to do escape encoding
2127
String JavaDoc charset = getProtocolCharset();
2128
2129        boolean hasPort = true;
2130        int from = 0;
2131        int next = original.indexOf('@');
2132        if (next != -1) { // neither -1 and 0
2133
// each protocol extented from URI supports the specific userinfo
2134
_userinfo = (escaped) ? original.substring(0, next).toCharArray()
2135                : encode(original.substring(0, next), allowed_userinfo,
2136                        charset);
2137            from = next + 1;
2138        }
2139        next = original.indexOf('[', from);
2140        if (next >= from) {
2141            next = original.indexOf(']', from);
2142            if (next == -1) {
2143                throw new URIException(URIException.PARSING, "IPv6reference");
2144            } else {
2145                next++;
2146            }
2147            // In IPv6reference, '[', ']' should be excluded
2148
_host = (escaped) ? original.substring(from, next).toCharArray()
2149                : encode(original.substring(from, next), allowed_IPv6reference,
2150                        charset);
2151            // Set flag
2152
_is_IPv6reference = true;
2153        } else { // only for !_is_IPv6reference
2154
next = original.indexOf(':', from);
2155            if (next == -1) {
2156                next = original.length();
2157                hasPort = false;
2158            }
2159            // REMINDME: it doesn't need the pre-validation
2160
_host = original.substring(from, next).toCharArray();
2161            if (validate(_host, IPv4address)) {
2162                // Set flag
2163
_is_IPv4address = true;
2164            } else if (validate(_host, hostname)) {
2165                // Set flag
2166
_is_hostname = true;
2167            } else {
2168                // Set flag
2169
_is_reg_name = true;
2170            }
2171        }
2172        if (_is_reg_name) {
2173            // Reset flags for a server-based naming authority
2174
_is_server = _is_hostname = _is_IPv4address =
2175            _is_IPv6reference = false;
2176            // set a registry-based naming authority
2177
_authority = (escaped) ? original.toString().toCharArray()
2178                : encode(original.toString(), allowed_reg_name, charset);
2179        } else {
2180            if (original.length() - 1 > next && hasPort
2181                && original.charAt(next) == ':') { // not empty
2182
from = next + 1;
2183                try {
2184                    _port = Integer.parseInt(original.substring(from));
2185                } catch (NumberFormatException JavaDoc error) {
2186                    throw new URIException(URIException.PARSING,
2187                            "invalid port number");
2188                }
2189            }
2190            // set a server-based naming authority
2191
StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
2192            if (_userinfo != null) { // has_userinfo
2193
buf.append(_userinfo);
2194                buf.append('@');
2195            }
2196            if (_host != null) {
2197                buf.append(_host);
2198                if (_port != -1) {
2199                    buf.append(':');
2200                    buf.append(_port);
2201                }
2202            }
2203            _authority = buf.toString().toCharArray();
2204            // Set flag
2205
_is_server = true;
2206        }
2207    }
2208
2209
2210    /**
2211     * Once it's parsed successfully, set this URI.
2212     *
2213     * @see #getRawURI
2214     */

2215    protected void setURI() {
2216        // set _uri
2217
StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
2218        // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2219
if (_scheme != null) {
2220            buf.append(_scheme);
2221            buf.append(':');
2222        }
2223        if (_is_net_path) {
2224            buf.append("//");
2225            if (_authority != null) { // has_authority
2226
if (_userinfo != null) { // by default, remove userinfo part
2227
if (_host != null) {
2228                        buf.append(_host);
2229                        if (_port != -1) {
2230                            buf.append(':');
2231                            buf.append(_port);
2232                        }
2233                    }
2234                } else {
2235                    buf.append(_authority);
2236                }
2237            }
2238        }
2239        if (_opaque != null && _is_opaque_part) {
2240            buf.append(_opaque);
2241        } else if (_path != null) {
2242            // _is_hier_part or _is_relativeURI
2243
if (_path.length != 0) {
2244                buf.append(_path);
2245            }
2246        }
2247        if (_query != null) { // has_query
2248
buf.append('?');
2249            buf.append(_query);
2250        }
2251        // ignore the fragment identifier
2252
_uri = buf.toString().toCharArray();
2253        hash = 0;
2254    }
2255
2256    // ----------------------------------------------------------- Test methods
2257

2258
2259    /**
2260     * Tell whether or not this URI is absolute.
2261     *
2262     * @return true iif this URI is absoluteURI
2263     */

2264    public boolean isAbsoluteURI() {
2265        return (_scheme != null);
2266    }
2267  
2268
2269    /**
2270     * Tell whether or not this URI is relative.
2271     *
2272     * @return true iif this URI is relativeURI
2273     */

2274    public boolean isRelativeURI() {
2275        return (_scheme == null);
2276    }
2277
2278
2279    /**
2280     * Tell whether or not the absoluteURI of this URI is hier_part.
2281     *
2282     * @return true iif the absoluteURI is hier_part
2283     */

2284    public boolean isHierPart() {
2285        return _is_hier_part;
2286    }
2287
2288
2289    /**
2290     * Tell whether or not the absoluteURI of this URI is opaque_part.
2291     *
2292     * @return true iif the absoluteURI is opaque_part
2293     */

2294    public boolean isOpaquePart() {
2295        return _is_opaque_part;
2296    }
2297
2298
2299    /**
2300     * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2301     * It's the same function as the has_authority() method.
2302     *
2303     * @return true iif the relativeURI or heir_part is net_path
2304     * @see #hasAuthority
2305     */

2306    public boolean isNetPath() {
2307        return _is_net_path || (_authority != null);
2308    }
2309
2310
2311    /**
2312     * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2313     *
2314     * @return true iif the relativeURI or hier_part is abs_path
2315     */

2316    public boolean isAbsPath() {
2317        return _is_abs_path;
2318    }
2319
2320
2321    /**
2322     * Tell whether or not the relativeURI of this URI is rel_path.
2323     *
2324     * @return true iif the relativeURI is rel_path
2325     */

2326    public boolean isRelPath() {
2327        return _is_rel_path;
2328    }
2329
2330
2331    /**
2332     * Tell whether or not this URI has authority.
2333     * It's the same function as the is_net_path() method.
2334     *
2335     * @return true iif this URI has authority
2336     * @see #isNetPath
2337     */

2338    public boolean hasAuthority() {
2339        return (_authority != null) || _is_net_path;
2340    }
2341
2342    /**
2343     * Tell whether or not the authority component of this URI is reg_name.
2344     *
2345     * @return true iif the authority component is reg_name
2346     */

2347    public boolean isRegName() {
2348        return _is_reg_name;
2349    }
2350  
2351
2352    /**
2353     * Tell whether or not the authority component of this URI is server.
2354     *
2355     * @return true iif the authority component is server
2356     */

2357    public boolean isServer() {
2358        return _is_server;
2359    }
2360  
2361
2362    /**
2363     * Tell whether or not this URI has userinfo.
2364     *
2365     * @return true iif this URI has userinfo
2366     */

2367    public boolean hasUserinfo() {
2368        return (_userinfo != null);
2369    }
2370  
2371
2372    /**
2373     * Tell whether or not the host part of this URI is hostname.
2374     *
2375     * @return true iif the host part is hostname
2376     */

2377    public boolean isHostname() {
2378        return _is_hostname;
2379    }
2380
2381
2382    /**
2383     * Tell whether or not the host part of this URI is IPv4address.
2384     *
2385     * @return true iif the host part is IPv4address
2386     */

2387    public boolean isIPv4address() {
2388        return _is_IPv4address;
2389    }
2390
2391
2392    /**
2393     * Tell whether or not the host part of this URI is IPv6reference.
2394     *
2395     * @return true iif the host part is IPv6reference
2396     */

2397    public boolean isIPv6reference() {
2398        return _is_IPv6reference;
2399    }
2400
2401
2402    /**
2403     * Tell whether or not this URI has query.
2404     *
2405     * @return true iif this URI has query
2406     */

2407    public boolean hasQuery() {
2408        return (_query != null);
2409    }
2410   
2411
2412    /**
2413     * Tell whether or not this URI has fragment.
2414     *
2415     * @return true iif this URI has fragment
2416     */

2417    public boolean hasFragment() {
2418        return (_fragment != null);
2419    }
2420   
2421   
2422    // ---------------------------------------------------------------- Charset
2423

2424
2425    /**
2426     * Set the default charset of the protocol.
2427     * <p>
2428     * The character set used to store files SHALL remain a local decision and
2429     * MAY depend on the capability of local operating systems. Prior to the
2430     * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2431     * and UTF-8 encoded. This approach, while allowing international exchange
2432     * of URIs, will still allow backward compatibility with older systems
2433     * because the code set positions for ASCII characters are identical to the
2434     * one byte sequence in UTF-8.
2435     * <p>
2436     * An individual URI scheme may require a single charset, define a default
2437     * charset, or provide a way to indicate the charset used.
2438     *
2439     * <p>
2440     * Always all the time, the setter method is always succeeded and throws
2441     * <code>DefaultCharsetChanged</code> exception.
2442     *
2443     * So API programmer must follow the following way:
2444     * <code><pre>
2445     * import org.apache.util.URI$DefaultCharsetChanged;
2446     * .
2447     * .
2448     * .
2449     * try {
2450     * URI.setDefaultProtocolCharset("UTF-8");
2451     * } catch (DefaultCharsetChanged cc) {
2452     * // CASE 1: the exception could be ignored, when it is set by user
2453     * if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2454     * // CASE 2: let user know the default protocol charset changed
2455     * } else {
2456     * // CASE 2: let user know the default document charset changed
2457     * }
2458     * }
2459     * </pre></code>
2460     *
2461     * The API programmer is responsible to set the correct charset.
2462     * And each application should remember its own charset to support.
2463     *
2464     * @param charset the default charset for each protocol
2465     * @throws DefaultCharsetChanged default charset changed
2466     */

2467    public static void setDefaultProtocolCharset(String JavaDoc charset)
2468        throws DefaultCharsetChanged {
2469            
2470        defaultProtocolCharset = charset;
2471        throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2472                "the default protocol charset changed");
2473    }
2474
2475
2476    /**
2477     * Get the default charset of the protocol.
2478     * <p>
2479     * An individual URI scheme may require a single charset, define a default
2480     * charset, or provide a way to indicate the charset used.
2481     * <p>
2482     * To work globally either requires support of a number of character sets
2483     * and to be able to convert between them, or the use of a single preferred
2484     * character set.
2485     * For support of global compatibility it is STRONGLY RECOMMENDED that
2486     * clients and servers use UTF-8 encoding when exchanging URIs.
2487     *
2488     * @return the default charset string
2489     */

2490    public static String JavaDoc getDefaultProtocolCharset() {
2491        return defaultProtocolCharset;
2492    }
2493
2494
2495    /**
2496     * Get the protocol charset used by this current URI instance.
2497     * It was set by the constructor for this instance. If it was not set by
2498     * contructor, it will return the default protocol charset.
2499     *
2500     * @return the protocol charset string
2501     * @see #getDefaultProtocolCharset
2502     */

2503    public String JavaDoc getProtocolCharset() {
2504        return (protocolCharset != null)
2505            ? protocolCharset
2506            : defaultProtocolCharset;
2507    }
2508
2509
2510    /**
2511     * Set the default charset of the document.
2512     * <p>
2513     * Notice that it will be possible to contain mixed characters (e.g.
2514     * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2515     * display of these character sets, the protocol charset could be simply
2516     * used again. Because it's not yet implemented that the insertion of BIDI
2517     * control characters at different points during composition is extracted.
2518     * <p>
2519     *
2520     * Always all the time, the setter method is always succeeded and throws
2521     * <code>DefaultCharsetChanged</code> exception.
2522     *
2523     * So API programmer must follow the following way:
2524     * <code><pre>
2525     * import org.apache.util.URI$DefaultCharsetChanged;
2526     * .
2527     * .
2528     * .
2529     * try {
2530     * URI.setDefaultDocumentCharset("EUC-KR");
2531     * } catch (DefaultCharsetChanged cc) {
2532     * // CASE 1: the exception could be ignored, when it is set by user
2533     * if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2534     * // CASE 2: let user know the default document charset changed
2535     * } else {
2536     * // CASE 2: let user know the default protocol charset changed
2537     * }
2538     * }
2539     * </pre></code>
2540     *
2541     * The API programmer is responsible to set the correct charset.
2542     * And each application should remember its own charset to support.
2543     *
2544     * @param charset the default charset for the document
2545     * @throws DefaultCharsetChanged default charset changed
2546     */

2547    public static void setDefaultDocumentCharset(String JavaDoc charset)
2548        throws DefaultCharsetChanged {
2549            
2550        defaultDocumentCharset = charset;
2551        throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2552                "the default document charset changed");
2553    }
2554
2555
2556    /**
2557     * Get the recommended default charset of the document.
2558     *
2559     * @return the default charset string
2560     */

2561    public static String JavaDoc getDefaultDocumentCharset() {
2562        return defaultDocumentCharset;
2563    }
2564
2565
2566    /**
2567     * Get the default charset of the document by locale.
2568     *
2569     * @return the default charset string by locale
2570     */

2571    public static String JavaDoc getDefaultDocumentCharsetByLocale() {
2572        return defaultDocumentCharsetByLocale;
2573    }
2574
2575
2576    /**
2577     * Get the default charset of the document by platform.
2578     *
2579     * @return the default charset string by platform
2580     */

2581    public static String JavaDoc getDefaultDocumentCharsetByPlatform() {
2582        return defaultDocumentCharsetByPlatform;
2583    }
2584
2585    // ------------------------------------------------------------- The scheme
2586

2587    /**
2588     * Get the scheme.
2589     *
2590     * @return the scheme
2591     */

2592    public char[] getRawScheme() {
2593        return _scheme;
2594    }
2595
2596
2597    /**
2598     * Get the scheme.
2599     *
2600     * @return the scheme
2601     * null if undefined scheme
2602     */

2603    public String JavaDoc getScheme() {
2604        return (_scheme == null) ? null : new String JavaDoc(_scheme);
2605    }
2606
2607    // ---------------------------------------------------------- The authority
2608

2609    /**
2610     * Set the authority. It can be one type of server, hostport, hostname,
2611     * IPv4address, IPv6reference and reg_name.
2612     * <p><blockquote><pre>
2613     * authority = server | reg_name
2614     * </pre></blockquote><p>
2615     *
2616     * @param escapedAuthority the raw escaped authority
2617     * @throws URIException If {@link
2618     * #parseAuthority(java.lang.String,boolean)} fails
2619     * @throws NullPointerException null authority
2620     */

2621    public void setRawAuthority(char[] escapedAuthority)
2622        throws URIException, NullPointerException JavaDoc {
2623            
2624        parseAuthority(new String JavaDoc(escapedAuthority), true);
2625        setURI();
2626    }
2627
2628
2629    /**
2630     * Set the authority. It can be one type of server, hostport, hostname,
2631     * IPv4address, IPv6reference and reg_name.
2632     * Note that there is no setAuthority method by the escape encoding reason.
2633     *
2634     * @param escapedAuthority the escaped authority string
2635     * @throws URIException If {@link
2636     * #parseAuthority(java.lang.String,boolean)} fails
2637     */

2638    public void setEscapedAuthority(String JavaDoc escapedAuthority)
2639        throws URIException {
2640
2641        parseAuthority(escapedAuthority, true);
2642        setURI();
2643    }
2644
2645
2646    /**
2647     * Get the raw-escaped authority.
2648     *
2649     * @return the raw-escaped authority
2650     */

2651    public char[] getRawAuthority() {
2652        return _authority;
2653    }
2654
2655
2656    /**
2657     * Get the escaped authority.
2658     *
2659     * @return the escaped authority
2660     */

2661    public String JavaDoc getEscapedAuthority() {
2662        return (_authority == null) ? null : new String JavaDoc(_authority);
2663    }
2664
2665
2666    /**
2667     * Get the authority.
2668     *
2669     * @return the authority
2670     * @throws URIException If {@link #decode} fails
2671     */

2672    public String JavaDoc getAuthority() throws URIException {
2673        return (_authority == null) ? null : decode(_authority,
2674                getProtocolCharset());
2675    }
2676
2677    // ----------------------------------------------------------- The userinfo
2678

2679    /**
2680     * Get the raw-escaped userinfo.
2681     *
2682     * @return the raw-escaped userinfo
2683     * @see #getAuthority
2684     */

2685    public char[] getRawUserinfo() {
2686        return _userinfo;
2687    }
2688
2689
2690    /**
2691     * Get the escaped userinfo.
2692     *
2693     * @return the escaped userinfo
2694     * @see #getAuthority
2695     */

2696    public String JavaDoc getEscapedUserinfo() {
2697        return (_userinfo == null) ? null : new String JavaDoc(_userinfo);
2698    }
2699
2700
2701    /**
2702     * Get the userinfo.
2703     *
2704     * @return the userinfo
2705     * @throws URIException If {@link #decode} fails
2706     * @see #getAuthority
2707     */

2708    public String JavaDoc getUserinfo() throws URIException {
2709        return (_userinfo == null) ? null : decode(_userinfo,
2710                getProtocolCharset());
2711    }
2712
2713    // --------------------------------------------------------------- The host
2714

2715    /**
2716     * Get the host.
2717     * <p><blockquote><pre>
2718     * host = hostname | IPv4address | IPv6reference
2719     * </pre></blockquote><p>
2720     *
2721     * @return the host
2722     * @see #getAuthority
2723     */

2724    public char[] getRawHost() {
2725        return _host;
2726    }
2727
2728
2729    /**
2730     * Get the host.
2731     * <p><blockquote><pre>
2732     * host = hostname | IPv4address | IPv6reference
2733     * </pre></blockquote><p>
2734     *
2735     * @return the host
2736     * @throws URIException If {@link #decode} fails
2737     * @see #getAuthority
2738     */

2739    public String JavaDoc getHost() throws URIException {
2740        return decode(_host, getProtocolCharset());
2741    }
2742
2743    // --------------------------------------------------------------- The port
2744

2745    /**
2746     * Get the port. In order to get the specfic default port, the specific
2747     * protocol-supported class extended from the URI class should be used.
2748     * It has the server-based naming authority.
2749     *
2750     * @return the port
2751     * if -1, it has the default port for the scheme or the server-based
2752     * naming authority is not supported in the specific URI.
2753     */

2754    public int getPort() {
2755        return _port;
2756    }
2757
2758    // --------------------------------------------------------------- The path
2759

2760    /**
2761     * Set the raw-escaped path.
2762     *
2763     * @param escapedPath the path character sequence
2764     * @throws URIException encoding error or not proper for initial instance
2765     * @see #encode
2766     */

2767    public void setRawPath(char[] escapedPath) throws URIException {
2768        if (escapedPath == null || escapedPath.length == 0) {
2769            _path = _opaque = escapedPath;
2770            setURI();
2771            return;
2772        }
2773        // remove the fragment identifier
2774
escapedPath = removeFragmentIdentifier(escapedPath);
2775        if (_is_net_path || _is_abs_path) {
2776            if (escapedPath[0] != '/') {
2777                throw new URIException(URIException.PARSING,
2778                        "not absolute path");
2779            }
2780            if (!validate(escapedPath, abs_path)) {
2781                throw new URIException(URIException.ESCAPING,
2782                        "escaped absolute path not valid");
2783            }
2784            _path = escapedPath;
2785        } else if (_is_rel_path) {
2786            int at = indexFirstOf(escapedPath, '/');
2787            if (at == 0) {
2788                throw new URIException(URIException.PARSING, "incorrect path");
2789            }
2790            if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment)
2791                && !validate(escapedPath, at, -1, abs_path)
2792                || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2793            
2794                throw new URIException(URIException.ESCAPING,
2795                        "escaped relative path not valid");
2796            }
2797            _path = escapedPath;
2798        } else if (_is_opaque_part) {
2799            if (!uric_no_slash.get(escapedPath[0])
2800                && !validate(escapedPath, 1, -1, uric)) {
2801                throw new URIException(URIException.ESCAPING,
2802                    "escaped opaque part not valid");
2803            }
2804            _opaque = escapedPath;
2805        } else {
2806            throw new URIException(URIException.PARSING, "incorrect path");
2807        }
2808        setURI();
2809    }
2810
2811
2812    /**
2813     * Set the escaped path.
2814     *
2815     * @param escapedPath the escaped path string
2816     * @throws URIException encoding error or not proper for initial instance
2817     * @see #encode
2818     */

2819    public void setEscapedPath(String JavaDoc escapedPath) throws URIException {
2820        if (escapedPath == null) {
2821            _path = _opaque = null;
2822            setURI();
2823            return;
2824        }
2825        setRawPath(escapedPath.toCharArray());
2826    }
2827
2828
2829    /**
2830     * Set the path.
2831     *
2832     * @param path the path string
2833     * @throws URIException set incorrectly or fragment only
2834     * @see #encode
2835     */

2836    public void setPath(String JavaDoc path) throws URIException {
2837
2838        if (path == null || path.length() == 0) {
2839            _path = _opaque = (path == null) ? null : path.toCharArray();
2840            setURI();
2841            return;
2842        }
2843        // set the charset to do escape encoding
2844
String JavaDoc charset = getProtocolCharset();
2845
2846        if (_is_net_path || _is_abs_path) {
2847            _path = encode(path, allowed_abs_path, charset);
2848        } else if (_is_rel_path) {
2849            StringBuffer JavaDoc buff = new StringBuffer JavaDoc(path.length());
2850            int at = path.indexOf('/');
2851            if (at == 0) { // never 0
2852
throw new URIException(URIException.PARSING,
2853                        "incorrect relative path");
2854            }
2855            if (at > 0) {
2856                buff.append(encode(path.substring(0, at), allowed_rel_path,
2857                            charset));
2858                buff.append(encode(path.substring(at), allowed_abs_path,
2859                            charset));
2860            } else {
2861                buff.append(encode(path, allowed_rel_path, charset));
2862            }
2863            _path = buff.toString().toCharArray();
2864        } else if (_is_opaque_part) {
2865            StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
2866            buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2867            buf.insert(1, encode(path.substring(1), uric, charset));
2868            _opaque = buf.toString().toCharArray();
2869        } else {
2870            throw new URIException(URIException.PARSING, "incorrect path");
2871        }
2872        setURI();
2873    }
2874
2875
2876    /**
2877     * Resolve the base and relative path.
2878     *
2879     * @param basePath a character array of the basePath
2880     * @param relPath a character array of the relPath
2881     * @return the resolved path
2882     * @throws URIException no more higher path level to be resolved
2883     */

2884    protected char[] resolvePath(char[] basePath, char[] relPath)
2885        throws URIException {
2886
2887        // REMINDME: paths are never null
2888
String JavaDoc base = (basePath == null) ? "" : new String JavaDoc(basePath);
2889        int at = base.lastIndexOf('/');
2890        if (at != -1) {
2891            basePath = base.substring(0, at + 1).toCharArray();
2892        }
2893        // _path could be empty
2894
if (relPath == null || relPath.length == 0) {
2895            return normalize(basePath);
2896        } else if (relPath[0] == '/') {
2897            return normalize(relPath);
2898        } else {
2899            StringBuffer JavaDoc buff = new StringBuffer JavaDoc(base.length()
2900                + relPath.length);
2901            buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2902            buff.append(relPath);
2903            return normalize(buff.toString().toCharArray());
2904        }
2905    }
2906
2907
2908    /**
2909     * Get the raw-escaped current hierarchy level in the given path.
2910     * If the last namespace is a collection, the slash mark ('/') should be
2911     * ended with at the last character of the path string.
2912     *
2913     * @param path the path
2914     * @return the current hierarchy level
2915     * @throws URIException no hierarchy level
2916     */

2917    protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2918
2919        if (_is_opaque_part) {
2920            throw new URIException(URIException.PARSING, "no hierarchy level");
2921        }
2922        if (path == null) {
2923            throw new URIException(URIException.PARSING, "empty path");
2924        }
2925        String JavaDoc buff = new String JavaDoc(path);
2926        int first = buff.indexOf('/');
2927        int last = buff.lastIndexOf('/');
2928        if (last == 0) {
2929            return rootPath;
2930        } else if (first != last && last != -1) {
2931            return buff.substring(0, last).toCharArray();
2932        }
2933        // FIXME: it could be a document on the server side
2934
return path;
2935    }
2936
2937
2938    /**
2939     * Get the raw-escaped current hierarchy level.
2940     *
2941     * @return the raw-escaped current hierarchy level
2942     * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2943     */

2944    public char[] getRawCurrentHierPath() throws URIException {
2945        return (_path == null) ? null : getRawCurrentHierPath(_path);
2946    }
2947 
2948
2949    /**
2950     * Get the escaped current hierarchy level.
2951     *
2952     * @return the escaped current hierarchy level
2953     * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2954     */

2955    public String JavaDoc getEscapedCurrentHierPath() throws URIException {
2956        char[] path = getRawCurrentHierPath();
2957        return (path == null) ? null : new String JavaDoc(path);
2958    }
2959 
2960
2961    /**
2962     * Get the current hierarchy level.
2963     *
2964     * @return the current hierarchy level
2965     * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2966     * @see #decode
2967     */

2968    public String JavaDoc getCurrentHierPath() throws URIException {
2969        char[] path = getRawCurrentHierPath();
2970        return (path == null) ? null : decode(path, getProtocolCharset());
2971    }
2972
2973
2974    /**
2975     * Get the level above the this hierarchy level.
2976     *
2977     * @return the raw above hierarchy level
2978     * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2979     */

2980    public char[] getRawAboveHierPath() throws URIException {
2981        char[] path = getRawCurrentHierPath();
2982        return (path == null) ? null : getRawCurrentHierPath(path);
2983    }
2984
2985
2986    /**
2987     * Get the level above the this hierarchy level.
2988     *
2989     * @return the raw above hierarchy level
2990     * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
2991     */

2992    public String JavaDoc getEscapedAboveHierPath() throws URIException {
2993        char[] path = getRawAboveHierPath();
2994        return (path == null) ? null : new String JavaDoc(path);
2995    }
2996
2997
2998    /**
2999     * Get the level above the this hierarchy level.
3000     *
3001     * @return the above hierarchy level
3002     * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3003     * @see #decode
3004     */

3005    public String JavaDoc getAboveHierPath() throws URIException {
3006        char[] path = getRawAboveHierPath();
3007        return (path == null) ? null : decode(path, getProtocolCharset());
3008    }
3009
3010
3011    /**
3012     * Get the raw-escaped path.
3013     * <p><blockquote><pre>
3014     * path = [ abs_path | opaque_part ]
3015     * </pre></blockquote><p>
3016     *
3017     * @return the raw-escaped path
3018     */

3019    public char[] getRawPath() {
3020        return _is_opaque_part ? _opaque : _path;
3021    }
3022
3023
3024    /**
3025     * Get the escaped path.
3026     * <p><blockquote><pre>
3027     * path = [ abs_path | opaque_part ]
3028     * abs_path = "/" path_segments
3029     * opaque_part = uric_no_slash *uric
3030     * </pre></blockquote><p>
3031     *
3032     * @return the escaped path string
3033     */

3034    public String JavaDoc getEscapedPath() {
3035        char[] path = getRawPath();
3036        return (path == null) ? null : new String JavaDoc(path);
3037    }
3038
3039
3040    /**
3041     * Get the path.
3042     * <p><blockquote><pre>
3043     * path = [ abs_path | opaque_part ]
3044     * </pre></blockquote><p>
3045     * @return the path string
3046     * @throws URIException If {@link #decode} fails.
3047     * @see #decode
3048     */

3049    public String JavaDoc getPath() throws URIException {
3050        char[] path = getRawPath();
3051        return (path == null) ? null : decode(path, getProtocolCharset());
3052    }
3053
3054
3055    /**
3056     * Get the raw-escaped basename of the path.
3057     *
3058     * @return the raw-escaped basename
3059     */

3060    public char[] getRawName() {
3061        if (_path == null) {
3062            return null;
3063        }
3064
3065        int at = 0;
3066        for (int i = _path.length - 1; i >= 0; i--) {
3067            if (_path[i] == '/') {
3068                at = i + 1;
3069                break;
3070            }
3071        }
3072        int len = _path.length - at;
3073        char[] basename = new char[len];
3074        System.arraycopy(_path, at, basename, 0, len);
3075        return basename;
3076    }
3077
3078
3079    /**
3080     * Get the escaped basename of the path.
3081     *
3082     * @return the escaped basename string
3083     */

3084    public String JavaDoc getEscapedName() {
3085        char[] basename = getRawName();
3086        return (basename == null) ? null : new String JavaDoc(basename);
3087    }
3088
3089
3090    /**
3091     * Get the basename of the path.
3092     *
3093     * @return the basename string
3094     * @throws URIException incomplete trailing escape pattern or unsupported
3095     * character encoding
3096     * @see #decode
3097     */

3098    public String JavaDoc getName() throws URIException {
3099        char[] basename = getRawName();
3100        return (basename == null) ? null : decode(getRawName(),
3101                getProtocolCharset());
3102    }
3103
3104    // ----------------------------------------------------- The path and query
3105

3106    /**
3107     * Get the raw-escaped path and query.
3108     *
3109     * @return the raw-escaped path and query
3110     */

3111    public char[] getRawPathQuery() {
3112
3113        if (_path == null && _query == null) {
3114            return null;
3115        }
3116        StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
3117        if (_path != null) {
3118            buff.append(_path);
3119        }
3120        if (_query != null) {
3121            buff.append('?');
3122            buff.append(_query);
3123        }
3124        return buff.toString().toCharArray();
3125    }
3126
3127
3128    /**
3129     * Get the escaped query.
3130     *
3131     * @return the escaped path and query string
3132     */

3133    public String JavaDoc getEscapedPathQuery() {
3134        char[] rawPathQuery = getRawPathQuery();
3135        return (rawPathQuery == null) ? null : new String JavaDoc(rawPathQuery);
3136    }
3137
3138
3139    /**
3140     * Get the path and query.
3141     *
3142     * @return the path and query string.
3143     * @throws URIException incomplete trailing escape pattern or unsupported
3144     * character encoding
3145     * @see #decode
3146     */

3147    public String JavaDoc getPathQuery() throws URIException {
3148        char[] rawPathQuery = getRawPathQuery();
3149        return (rawPathQuery == null) ? null : decode(rawPathQuery,
3150                getProtocolCharset());
3151    }
3152
3153    // -------------------------------------------------------------- The query
3154

3155    /**
3156     * Set the raw-escaped query.
3157     *
3158     * @param escapedQuery the raw-escaped query
3159     * @throws URIException escaped query not valid
3160     */

3161    public void setRawQuery(char[] escapedQuery) throws URIException {
3162        if (escapedQuery == null || escapedQuery.length == 0) {
3163            _query = escapedQuery;
3164            setURI();
3165            return;
3166        }
3167        // remove the fragment identifier
3168
escapedQuery = removeFragmentIdentifier(escapedQuery);
3169        if (!validate(escapedQuery, query)) {
3170            throw new URIException(URIException.ESCAPING,
3171                    "escaped query not valid");
3172        }
3173        _query = escapedQuery;
3174        setURI();
3175    }
3176
3177
3178    /**
3179     * Set the escaped query string.
3180     *
3181     * @param escapedQuery the escaped query string
3182     * @throws URIException escaped query not valid
3183     */

3184    public void setEscapedQuery(String JavaDoc escapedQuery) throws URIException {
3185        if (escapedQuery == null) {
3186            _query = null;
3187            setURI();
3188            return;
3189        }
3190        setRawQuery(escapedQuery.toCharArray());
3191    }
3192
3193
3194    /**
3195     * Set the query.
3196     * <p>
3197     * When a query string is not misunderstood the reserved special characters
3198     * ("&amp;", "=", "+", ",", and "$") within a query component, it is
3199     * recommended to use in encoding the whole query with this method.
3200     * <p>
3201     * The additional APIs for the special purpose using by the reserved
3202     * special characters used in each protocol are implemented in each protocol
3203     * classes inherited from <code>URI</code>. So refer to the same-named APIs
3204     * implemented in each specific protocol instance.
3205     *
3206     * @param query the query string.
3207     * @throws URIException incomplete trailing escape pattern or unsupported
3208     * character encoding
3209     * @see #encode
3210     */

3211    public void setQuery(String JavaDoc query) throws URIException {
3212        if (query == null || query.length() == 0) {
3213            _query = (query == null) ? null : query.toCharArray();
3214            setURI();
3215            return;
3216        }
3217        setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3218    }
3219
3220
3221    /**
3222     * Get the raw-escaped query.
3223     *
3224     * @return the raw-escaped query
3225     */

3226    public char[] getRawQuery() {
3227        return _query;
3228    }
3229
3230
3231    /**
3232     * Get the escaped query.
3233     *
3234     * @return the escaped query string
3235     */

3236    public String JavaDoc getEscapedQuery() {
3237        return (_query == null) ? null : new String JavaDoc(_query);
3238    }
3239
3240
3241    /**
3242     * Get the query.
3243     *
3244     * @return the query string.
3245     * @throws URIException incomplete trailing escape pattern or unsupported
3246     * character encoding
3247     * @see #decode
3248     */

3249    public String JavaDoc getQuery() throws URIException {
3250        return (_query == null) ? null : decode(_query, getProtocolCharset());
3251    }
3252
3253    // ----------------------------------------------------------- The fragment
3254

3255    /**
3256     * Set the raw-escaped fragment.
3257     *
3258     * @param escapedFragment the raw-escaped fragment
3259     * @throws URIException escaped fragment not valid
3260     */

3261    public void setRawFragment(char[] escapedFragment) throws URIException {
3262        if (escapedFragment == null || escapedFragment.length == 0) {
3263            _fragment = escapedFragment;
3264            hash = 0;
3265            return;
3266        }
3267        if (!validate(escapedFragment, fragment)) {
3268            throw new URIException(URIException.ESCAPING,
3269                    "escaped fragment not valid");
3270        }
3271        _fragment = escapedFragment;
3272        hash = 0;
3273    }
3274
3275
3276    /**
3277     * Set the escaped fragment string.
3278     *
3279     * @param escapedFragment the escaped fragment string
3280     * @throws URIException escaped fragment not valid
3281     */

3282    public void setEscapedFragment(String JavaDoc escapedFragment) throws URIException {
3283        if (escapedFragment == null) {
3284            _fragment = null;
3285            hash = 0;
3286            return;
3287        }
3288        setRawFragment(escapedFragment.toCharArray());
3289    }
3290
3291
3292    /**
3293     * Set the fragment.
3294     *
3295     * @param fragment the fragment string.
3296     * @throws URIException If an error occurs.
3297     */

3298    public void setFragment(String JavaDoc fragment) throws URIException {
3299        if (fragment == null || fragment.length() == 0) {
3300            _fragment = (fragment == null) ? null : fragment.toCharArray();
3301            hash = 0;
3302            return;
3303        }
3304        _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3305        hash = 0;
3306    }
3307
3308
3309    /**
3310     * Get the raw-escaped fragment.
3311     * <p>
3312     * The optional fragment identifier is not part of a URI, but is often used
3313     * in conjunction with a URI.
3314     * <p>
3315     * The format and interpretation of fragment identifiers is dependent on
3316     * the media type [RFC2046] of the retrieval result.
3317     * <p>
3318     * A fragment identifier is only meaningful when a URI reference is
3319     * intended for retrieval and the result of that retrieval is a document
3320     * for which the identified fragment is consistently defined.
3321     *
3322     * @return the raw-escaped fragment
3323     */

3324    public char[] getRawFragment() {
3325        return _fragment;
3326    }
3327
3328
3329    /**
3330     * Get the escaped fragment.
3331     *
3332     * @return the escaped fragment string
3333     */

3334    public String JavaDoc getEscapedFragment() {
3335        return (_fragment == null) ? null : new String JavaDoc(_fragment);
3336    }
3337
3338
3339    /**
3340     * Get the fragment.
3341     *
3342     * @return the fragment string
3343     * @throws URIException incomplete trailing escape pattern or unsupported
3344     * character encoding
3345     * @see #decode
3346     */

3347    public String JavaDoc getFragment() throws URIException {
3348        return (_fragment == null) ? null : decode(_fragment,
3349                getProtocolCharset());
3350    }
3351
3352    // ------------------------------------------------------------- Utilities
3353

3354    /**
3355     * Remove the fragment identifier of the given component.
3356     *
3357     * @param component the component that a fragment may be included
3358     * @return the component that the fragment identifier is removed
3359     */

3360    protected char[] removeFragmentIdentifier(char[] component) {
3361        if (component == null) {
3362            return null;
3363        }
3364        int lastIndex = new String JavaDoc(component).indexOf('#');
3365        if (lastIndex != -1) {
3366            component = new String JavaDoc(component).substring(0,
3367                    lastIndex).toCharArray();
3368        }
3369        return component;
3370    }
3371
3372
3373    /**
3374     * Normalize the given hier path part.
3375     *
3376     * <p>Algorithm taken from URI reference parser at
3377     * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3378     *
3379     * @param path the path to normalize
3380     * @return the normalized path
3381     * @throws URIException no more higher path level to be normalized
3382     */

3383    protected char[] normalize(char[] path) throws URIException {
3384
3385        if (path == null) {
3386            return null;
3387        }
3388
3389        String JavaDoc normalized = new String JavaDoc(path);
3390
3391        // If the buffer begins with "./" or "../", the "." or ".." is removed.
3392
if (normalized.startsWith("./")) {
3393            normalized = normalized.substring(1);
3394        } else if (normalized.startsWith("../")) {
3395            normalized = normalized.substring(2);
3396        } else if (normalized.startsWith("..")) {
3397            normalized = normalized.substring(2);
3398        }
3399
3400        // All occurrences of "/./" in the buffer are replaced with "/"
3401
int index = -1;
3402        while ((index = normalized.indexOf("/./")) != -1) {
3403            normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3404        }
3405
3406        // If the buffer ends with "/.", the "." is removed.
3407
if (normalized.endsWith("/.")) {
3408            normalized = normalized.substring(0, normalized.length() - 1);
3409        }
3410
3411        int startIndex = 0;
3412
3413        // All occurrences of "/<segment>/../" in the buffer, where ".."
3414
// and <segment> are complete path segments, are iteratively replaced
3415
// with "/" in order from left to right until no matching pattern remains.
3416
// If the buffer ends with "/<segment>/..", that is also replaced
3417
// with "/". Note that <segment> may be empty.
3418
while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3419            int slashIndex = normalized.lastIndexOf('/', index - 1);
3420            if (slashIndex >= 0) {
3421                normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3422            } else {
3423                startIndex = index + 3;
3424            }
3425        }
3426        if (normalized.endsWith("/..")) {
3427            int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3428            if (slashIndex >= 0) {
3429                normalized = normalized.substring(0, slashIndex + 1);
3430            }
3431        }
3432
3433        // All prefixes of "<segment>/../" in the buffer, where ".."
3434
// and <segment> are complete path segments, are iteratively replaced
3435
// with "/" in order from left to right until no matching pattern remains.
3436
// If the buffer ends with "<segment>/..", that is also replaced
3437
// with "/". Note that <segment> may be empty.
3438
while ((index = normalized.indexOf("/../")) != -1) {
3439            int slashIndex = normalized.lastIndexOf('/', index - 1);
3440            if (slashIndex >= 0) {
3441                break;
3442            } else {
3443                normalized = normalized.substring(index + 3);
3444            }
3445        }
3446        if (normalized.endsWith("/..")) {
3447            int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3448            if (slashIndex < 0) {
3449                normalized = "/";
3450            }
3451        }
3452
3453        return normalized.toCharArray();
3454    }
3455
3456
3457    /**
3458     * Normalizes the path part of this URI. Normalization is only meant to be performed on
3459     * URIs with an absolute path. Calling this method on a relative path URI will have no
3460     * effect.
3461     *
3462     * @throws URIException no more higher path level to be normalized
3463     *
3464     * @see #isAbsPath()
3465     */

3466    public void normalize() throws URIException {
3467        if (isAbsPath()) {
3468            _path = normalize(_path);
3469            setURI();
3470        }
3471    }
3472
3473
3474    /**
3475     * Test if the first array is equal to the second array.
3476     *
3477     * @param first the first character array
3478     * @param second the second character array
3479     * @return true if they're equal
3480     */

3481    protected boolean equals(char[] first, char[] second) {
3482
3483        if (first == null && second == null) {
3484            return true;
3485        }
3486        if (first == null || second == null) {
3487            return false;
3488        }
3489        if (first.length != second.length) {
3490            return false;
3491        }
3492        for (int i = 0; i < first.length; i++) {
3493            if (first[i] != second[i]) {
3494                return false;
3495            }
3496        }
3497        return true;
3498    }
3499
3500
3501    /**
3502     * Test an object if this URI is equal to another.
3503     *
3504     * @param obj an object to compare
3505     * @return true if two URI objects are equal
3506     */

3507    public boolean equals(Object JavaDoc obj) {
3508
3509        // normalize and test each components
3510
if (obj == this) {
3511            return true;
3512        }
3513        if (!(obj instanceof URI)) {
3514            return false;
3515        }
3516        URI another = (URI) obj;
3517        // scheme
3518
if (!equals(_scheme, another._scheme)) {
3519            return false;
3520        }
3521        // is_opaque_part or is_hier_part? and opaque
3522
if (!equals(_opaque, another._opaque)) {
3523            return false;
3524        }
3525        // is_hier_part
3526
// has_authority
3527
if (!equals(_authority, another._authority)) {
3528            return false;
3529        }
3530        // path
3531
if (!equals(_path, another._path)) {
3532            return false;
3533        }
3534        // has_query
3535
if (!equals(_query, another._query)) {
3536            return false;
3537        }
3538        // has_fragment? should be careful of the only fragment case.
3539
if (!equals(_fragment, another._fragment)) {
3540            return false;
3541        }
3542        return true;
3543    }
3544
3545    // ---------------------------------------------------------- Serialization
3546

3547    /**
3548     * Write the content of this URI.
3549     *
3550     * @param oos the object-output stream
3551     * @throws IOException If an IO problem occurs.
3552     */

3553    protected void writeObject(ObjectOutputStream JavaDoc oos)
3554        throws IOException JavaDoc {
3555
3556        oos.defaultWriteObject();
3557    }
3558
3559
3560    /**
3561     * Read a URI.
3562     *
3563     * @param ois the object-input stream
3564     * @throws ClassNotFoundException If one of the classes specified in the
3565     * input stream cannot be found.
3566     * @throws IOException If an IO problem occurs.
3567     */

3568    protected void readObject(ObjectInputStream JavaDoc ois)
3569        throws ClassNotFoundException JavaDoc, IOException JavaDoc {
3570
3571        ois.defaultReadObject();
3572    }
3573
3574    // -------------------------------------------------------------- Hash code
3575

3576    /**
3577     * Return a hash code for this URI.
3578     *
3579     * @return a has code value for this URI
3580     */

3581    public int hashCode() {
3582        if (hash == 0) {
3583            char[] c = _uri;
3584            if (c != null) {
3585                for (int i = 0, len = c.length; i < len; i++) {
3586                    hash = 31 * hash + c[i];
3587                }
3588            }
3589            c = _fragment;
3590            if (c != null) {
3591                for (int i = 0, len = c.length; i < len; i++) {
3592                    hash = 31 * hash + c[i];
3593                }
3594            }
3595        }
3596        return hash;
3597    }
3598
3599    // ------------------------------------------------------------- Comparison
3600

3601    /**
3602     * Compare this URI to another object.
3603     *
3604     * @param obj the object to be compared.
3605     * @return 0, if it's same,
3606     * -1, if failed, first being compared with in the authority component
3607     * @throws ClassCastException not URI argument
3608     */

3609    public int compareTo(Object JavaDoc obj) throws ClassCastException JavaDoc {
3610
3611        URI another = (URI) obj;
3612        if (!equals(_authority, another.getRawAuthority())) {
3613            return -1;
3614        }
3615        return toString().compareTo(another.toString());
3616    }
3617
3618    // ------------------------------------------------------------------ Clone
3619

3620    /**
3621     * Create and return a copy of this object, the URI-reference containing
3622     * the userinfo component. Notice that the whole URI-reference including
3623     * the userinfo component counld not be gotten as a <code>String</code>.
3624     * <p>
3625     * To copy the identical <code>URI</code> object including the userinfo
3626     * component, it should be used.
3627     *
3628     * @return a clone of this instance
3629     */

3630    public synchronized Object JavaDoc clone() {
3631
3632        URI instance = new URI();
3633
3634        instance._uri = _uri;
3635        instance._scheme = _scheme;
3636        instance._opaque = _opaque;
3637        instance._authority = _authority;
3638        instance._userinfo = _userinfo;
3639        instance._host = _host;
3640        instance._port = _port;
3641        instance._path = _path;
3642        instance._query = _query;
3643        instance._fragment = _fragment;
3644        // the charset to do escape encoding for this instance
3645
instance.protocolCharset = protocolCharset;
3646        // flags
3647
instance._is_hier_part = _is_hier_part;
3648        instance._is_opaque_part = _is_opaque_part;
3649        instance._is_net_path = _is_net_path;
3650        instance._is_abs_path = _is_abs_path;
3651        instance._is_rel_path = _is_rel_path;
3652        instance._is_reg_name = _is_reg_name;
3653        instance._is_server = _is_server;
3654        instance._is_hostname = _is_hostname;
3655        instance._is_IPv4address = _is_IPv4address;
3656        instance._is_IPv6reference = _is_IPv6reference;
3657
3658        return instance;
3659    }
3660
3661    // ------------------------------------------------------------ Get the URI
3662

3663    /**
3664     * It can be gotten the URI character sequence. It's raw-escaped.
3665     * For the purpose of the protocol to be transported, it will be useful.
3666     * <p>
3667     * It is clearly unwise to use a URL that contains a password which is
3668     * intended to be secret. In particular, the use of a password within
3669     * the 'userinfo' component of a URL is strongly disrecommended except
3670     * in those rare cases where the 'password' parameter is intended to be
3671     * public.
3672     * <p>
3673     * When you want to get each part of the userinfo, you need to use the
3674     * specific methods in the specific URL. It depends on the specific URL.
3675     *
3676     * @return the URI character sequence
3677     */

3678    public char[] getRawURI() {
3679        return _uri;
3680    }
3681
3682
3683    /**
3684     * It can be gotten the URI character sequence. It's escaped.
3685     * For the purpose of the protocol to be transported, it will be useful.
3686     *
3687     * @return the escaped URI string
3688     */

3689    public String JavaDoc getEscapedURI() {
3690        return (_uri == null) ? null : new String JavaDoc(_uri);
3691    }
3692    
3693
3694    /**
3695     * It can be gotten the URI character sequence.
3696     *
3697     * @return the original URI string
3698     * @throws URIException incomplete trailing escape pattern or unsupported
3699     * character encoding
3700     * @see #decode
3701     */

3702    public String JavaDoc getURI() throws URIException {
3703        return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3704    }
3705
3706
3707    /**
3708     * Get the URI reference character sequence.
3709     *
3710     * @return the URI reference character sequence
3711     */

3712    public char[] getRawURIReference() {
3713        if (_fragment == null) {
3714            return _uri;
3715        }
3716        if (_uri == null) {
3717            return _fragment;
3718        }
3719        // if _uri != null && _fragment != null
3720
String JavaDoc uriReference = new String JavaDoc(_uri) + "#" + new String JavaDoc(_fragment);
3721        return uriReference.toCharArray();
3722    }
3723
3724
3725    /**
3726     * Get the escaped URI reference string.
3727     *
3728     * @return the escaped URI reference string
3729     */

3730    public String JavaDoc getEscapedURIReference() {
3731        char[] uriReference = getRawURIReference();
3732        return (uriReference == null) ? null : new String JavaDoc(uriReference);
3733    }
3734
3735
3736    /**
3737     * Get the original URI reference string.
3738     *
3739     * @return the original URI reference string
3740     * @throws URIException If {@link #decode} fails.
3741     */

3742    public String JavaDoc getURIReference() throws URIException {
3743        char[] uriReference = getRawURIReference();
3744        return (uriReference == null) ? null : decode(uriReference,
3745                getProtocolCharset());
3746    }
3747
3748
3749    /**
3750     * Get the escaped URI string.
3751     * <p>
3752     * On the document, the URI-reference form is only used without the userinfo
3753     * component like http://jakarta.apache.org/ by the security reason.
3754     * But the URI-reference form with the userinfo component could be parsed.
3755     * <p>
3756     * In other words, this URI and any its subclasses must not expose the
3757     * URI-reference expression with the userinfo component like
3758     * http://user:password@hostport/restricted_zone.<br>
3759     * It means that the API client programmer should extract each user and
3760     * password to access manually. Probably it will be supported in the each
3761     * subclass, however, not a whole URI-reference expression.
3762     *
3763     * @return the escaped URI string
3764     * @see #clone()
3765     */

3766    public String JavaDoc toString() {
3767        return getEscapedURI();
3768    }
3769
3770
3771    // ------------------------------------------------------------ Inner class
3772

3773    /**
3774     * The charset-changed normal operation to represent to be required to
3775     * alert to user the fact the default charset is changed.
3776     */

3777    public static class DefaultCharsetChanged extends RuntimeException JavaDoc {
3778
3779        // ------------------------------------------------------- constructors
3780

3781        /**
3782         * The constructor with a reason string and its code arguments.
3783         *
3784         * @param reasonCode the reason code
3785         * @param reason the reason
3786         */

3787        public DefaultCharsetChanged(int reasonCode, String JavaDoc reason) {
3788            super(reason);
3789            this.reason = reason;
3790            this.reasonCode = reasonCode;
3791        }
3792
3793        // ---------------------------------------------------------- constants
3794

3795        /** No specified reason code. */
3796        public static final int UNKNOWN = 0;
3797
3798        /** Protocol charset changed. */
3799        public static final int PROTOCOL_CHARSET = 1;
3800
3801        /** Document charset changed. */
3802        public static final int DOCUMENT_CHARSET = 2;
3803
3804        // ------------------------------------------------- instance variables
3805

3806        /** The reason code. */
3807        private int reasonCode;
3808
3809        /** The reason message. */
3810        private String JavaDoc reason;
3811
3812        // ------------------------------------------------------------ methods
3813

3814        /**
3815         * Get the reason code.
3816         *
3817         * @return the reason code
3818         */

3819        public int getReasonCode() {
3820            return reasonCode;
3821        }
3822
3823        /**
3824         * Get the reason message.
3825         *
3826         * @return the reason message
3827         */

3828        public String JavaDoc getReason() {
3829            return reason;
3830        }
3831
3832    }
3833
3834
3835    /**
3836     * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3837     * given locale. Supports all locales recognized in JDK 1.1.
3838     * <p>
3839     * The distribution of this class is Servlets.com. It was originally
3840     * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3841     */

3842    public static class LocaleToCharsetMap {
3843
3844        /** A mapping of language code to charset */
3845        private static final Hashtable JavaDoc LOCALE_TO_CHARSET_MAP;
3846        static {
3847            LOCALE_TO_CHARSET_MAP = new Hashtable JavaDoc();
3848            LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3849            LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3850            LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3851            LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3852            LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3853            LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3854            LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3855            LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3856            LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3857            LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3858            LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3859            LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3860            LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3861            LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3862            LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3863            LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3864            LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3865            LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3866            LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3867            LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3868            LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3869            LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3870            LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3871            LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3872            LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3873            LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3874            LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3875            LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3876            LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3877            LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3878            LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3879            LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3880            LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3881            LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3882            LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3883            LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3884            LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3885            LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3886            LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3887        }
3888       
3889        /**
3890         * Get the preferred charset for the given locale.
3891         *
3892         * @param locale the locale
3893         * @return the preferred charset or null if the locale is not
3894         * recognized.
3895         */

3896        public static String JavaDoc getCharset(Locale JavaDoc locale) {
3897            // try for an full name match (may include country)
3898
String JavaDoc charset =
3899                (String JavaDoc) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3900            if (charset != null) {
3901                return charset;
3902            }
3903           
3904            // if a full name didn't match, try just the language
3905
charset = (String JavaDoc) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3906            return charset; // may be null
3907
}
3908
3909    }
3910
3911}
3912
3913
Popular Tags