KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > util > URI


1 /*
2  * Copyright 1999-2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.util;
18
19 import java.io.IOException JavaDoc;
20 import java.io.Serializable JavaDoc;
21
22 /**********************************************************************
23 * A class to represent a Uniform Resource Identifier (URI). This class
24 * is designed to handle the parsing of URIs and provide access to
25 * the various components (scheme, host, port, userinfo, path, query
26 * string and fragment) that may constitute a URI.
27 * <p>
28 * Parsing of a URI specification is done according to the URI
29 * syntax described in
30 * <a HREF="http://www.ietf.org/rfc/rfc2396.txt?number=2396">RFC 2396</a>,
31 * and amended by
32 * <a HREF="http://www.ietf.org/rfc/rfc2732.txt?number=2732">RFC 2732</a>.
33 * <p>
34 * Every absolute URI consists of a scheme, followed by a colon (':'),
35 * followed by a scheme-specific part. For URIs that follow the
36 * "generic URI" syntax, the scheme-specific part begins with two
37 * slashes ("//") and may be followed by an authority segment (comprised
38 * of user information, host, and port), path segment, query segment
39 * and fragment. Note that RFC 2396 no longer specifies the use of the
40 * parameters segment and excludes the "user:password" syntax as part of
41 * the authority segment. If "user:password" appears in a URI, the entire
42 * user/password string is stored as userinfo.
43 * <p>
44 * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
45 * the entire scheme-specific part is treated as the "path" portion
46 * of the URI.
47 * <p>
48 * Note that, unlike the java.net.URL class, this class does not provide
49 * any built-in network access functionality nor does it provide any
50 * scheme-specific functionality (for example, it does not know a
51 * default port for a specific scheme). Rather, it only knows the
52 * grammar and basic set of operations that can be applied to a URI.
53 *
54 * @version $Id: URI.java,v 1.23 2005/06/24 01:06:07 mrglavas Exp $
55 *
56 **********************************************************************/

57  public class URI implements Serializable JavaDoc {
58
59   /*******************************************************************
60   * MalformedURIExceptions are thrown in the process of building a URI
61   * or setting fields on a URI when an operation would result in an
62   * invalid URI specification.
63   *
64   ********************************************************************/

65   public static class MalformedURIException extends IOException JavaDoc {
66
67    /** Serialization version. */
68    static final long serialVersionUID = -6695054834342951930L;
69    
70    /******************************************************************
71     * Constructs a <code>MalformedURIException</code> with no specified
72     * detail message.
73     ******************************************************************/

74     public MalformedURIException() {
75       super();
76     }
77
78     /*****************************************************************
79     * Constructs a <code>MalformedURIException</code> with the
80     * specified detail message.
81     *
82     * @param p_msg the detail message.
83     ******************************************************************/

84     public MalformedURIException(String JavaDoc p_msg) {
85       super(p_msg);
86     }
87   }
88
89   /** Serialization version. */
90   static final long serialVersionUID = 1601921774685357214L;
91
92   private static final byte [] fgLookupTable = new byte[128];
93   
94   /**
95    * Character Classes
96    */

97   
98   /** reserved characters ;/?:@&=+$,[] */
99   //RFC 2732 added '[' and ']' as reserved characters
100
private static final int RESERVED_CHARACTERS = 0x01;
101   
102   /** URI punctuation mark characters: -_.!~*'() - these, combined with
103       alphanumerics, constitute the "unreserved" characters */

104   private static final int MARK_CHARACTERS = 0x02;
105   
106   /** scheme can be composed of alphanumerics and these characters: +-. */
107   private static final int SCHEME_CHARACTERS = 0x04;
108   
109   /** userinfo can be composed of unreserved, escaped and these
110       characters: ;:&=+$, */

111   private static final int USERINFO_CHARACTERS = 0x08;
112   
113   /** ASCII letter characters */
114   private static final int ASCII_ALPHA_CHARACTERS = 0x10;
115   
116   /** ASCII digit characters */
117   private static final int ASCII_DIGIT_CHARACTERS = 0x20;
118   
119   /** ASCII hex characters */
120   private static final int ASCII_HEX_CHARACTERS = 0x40;
121   
122   /** Path characters */
123   private static final int PATH_CHARACTERS = 0x80;
124
125   /** Mask for alpha-numeric characters */
126   private static final int MASK_ALPHA_NUMERIC = ASCII_ALPHA_CHARACTERS | ASCII_DIGIT_CHARACTERS;
127   
128   /** Mask for unreserved characters */
129   private static final int MASK_UNRESERVED_MASK = MASK_ALPHA_NUMERIC | MARK_CHARACTERS;
130   
131   /** Mask for URI allowable characters except for % */
132   private static final int MASK_URI_CHARACTER = MASK_UNRESERVED_MASK | RESERVED_CHARACTERS;
133   
134   /** Mask for scheme characters */
135   private static final int MASK_SCHEME_CHARACTER = MASK_ALPHA_NUMERIC | SCHEME_CHARACTERS;
136   
137   /** Mask for userinfo characters */
138   private static final int MASK_USERINFO_CHARACTER = MASK_UNRESERVED_MASK | USERINFO_CHARACTERS;
139   
140   /** Mask for path characters */
141   private static final int MASK_PATH_CHARACTER = MASK_UNRESERVED_MASK | PATH_CHARACTERS;
142
143   static {
144       // Add ASCII Digits and ASCII Hex Numbers
145
for (int i = '0'; i <= '9'; ++i) {
146           fgLookupTable[i] |= ASCII_DIGIT_CHARACTERS | ASCII_HEX_CHARACTERS;
147       }
148
149       // Add ASCII Letters and ASCII Hex Numbers
150
for (int i = 'A'; i <= 'F'; ++i) {
151           fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS;
152           fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS;
153       }
154
155       // Add ASCII Letters
156
for (int i = 'G'; i <= 'Z'; ++i) {
157           fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS;
158           fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS;
159       }
160
161       // Add Reserved Characters
162
fgLookupTable[';'] |= RESERVED_CHARACTERS;
163       fgLookupTable['/'] |= RESERVED_CHARACTERS;
164       fgLookupTable['?'] |= RESERVED_CHARACTERS;
165       fgLookupTable[':'] |= RESERVED_CHARACTERS;
166       fgLookupTable['@'] |= RESERVED_CHARACTERS;
167       fgLookupTable['&'] |= RESERVED_CHARACTERS;
168       fgLookupTable['='] |= RESERVED_CHARACTERS;
169       fgLookupTable['+'] |= RESERVED_CHARACTERS;
170       fgLookupTable['$'] |= RESERVED_CHARACTERS;
171       fgLookupTable[','] |= RESERVED_CHARACTERS;
172       fgLookupTable['['] |= RESERVED_CHARACTERS;
173       fgLookupTable[']'] |= RESERVED_CHARACTERS;
174
175       // Add Mark Characters
176
fgLookupTable['-'] |= MARK_CHARACTERS;
177       fgLookupTable['_'] |= MARK_CHARACTERS;
178       fgLookupTable['.'] |= MARK_CHARACTERS;
179       fgLookupTable['!'] |= MARK_CHARACTERS;
180       fgLookupTable['~'] |= MARK_CHARACTERS;
181       fgLookupTable['*'] |= MARK_CHARACTERS;
182       fgLookupTable['\''] |= MARK_CHARACTERS;
183       fgLookupTable['('] |= MARK_CHARACTERS;
184       fgLookupTable[')'] |= MARK_CHARACTERS;
185
186       // Add Scheme Characters
187
fgLookupTable['+'] |= SCHEME_CHARACTERS;
188       fgLookupTable['-'] |= SCHEME_CHARACTERS;
189       fgLookupTable['.'] |= SCHEME_CHARACTERS;
190
191       // Add Userinfo Characters
192
fgLookupTable[';'] |= USERINFO_CHARACTERS;
193       fgLookupTable[':'] |= USERINFO_CHARACTERS;
194       fgLookupTable['&'] |= USERINFO_CHARACTERS;
195       fgLookupTable['='] |= USERINFO_CHARACTERS;
196       fgLookupTable['+'] |= USERINFO_CHARACTERS;
197       fgLookupTable['$'] |= USERINFO_CHARACTERS;
198       fgLookupTable[','] |= USERINFO_CHARACTERS;
199       
200       // Add Path Characters
201
fgLookupTable[';'] |= PATH_CHARACTERS;
202       fgLookupTable['/'] |= PATH_CHARACTERS;
203       fgLookupTable[':'] |= PATH_CHARACTERS;
204       fgLookupTable['@'] |= PATH_CHARACTERS;
205       fgLookupTable['&'] |= PATH_CHARACTERS;
206       fgLookupTable['='] |= PATH_CHARACTERS;
207       fgLookupTable['+'] |= PATH_CHARACTERS;
208       fgLookupTable['$'] |= PATH_CHARACTERS;
209       fgLookupTable[','] |= PATH_CHARACTERS;
210   }
211
212   /** Stores the scheme (usually the protocol) for this URI. */
213   private String JavaDoc m_scheme = null;
214
215   /** If specified, stores the userinfo for this URI; otherwise null */
216   private String JavaDoc m_userinfo = null;
217
218   /** If specified, stores the host for this URI; otherwise null */
219   private String JavaDoc m_host = null;
220
221   /** If specified, stores the port for this URI; otherwise -1 */
222   private int m_port = -1;
223   
224   /** If specified, stores the registry based authority for this URI; otherwise -1 */
225   private String JavaDoc m_regAuthority = null;
226
227   /** If specified, stores the path for this URI; otherwise null */
228   private String JavaDoc m_path = null;
229
230   /** If specified, stores the query string for this URI; otherwise
231       null. */

232   private String JavaDoc m_queryString = null;
233
234   /** If specified, stores the fragment for this URI; otherwise null */
235   private String JavaDoc m_fragment = null;
236
237   private static boolean DEBUG = false;
238
239   /**
240   * Construct a new and uninitialized URI.
241   */

242   public URI() {
243   }
244
245  /**
246   * Construct a new URI from another URI. All fields for this URI are
247   * set equal to the fields of the URI passed in.
248   *
249   * @param p_other the URI to copy (cannot be null)
250   */

251   public URI(URI p_other) {
252     initialize(p_other);
253   }
254
255  /**
256   * Construct a new URI from a URI specification string. If the
257   * specification follows the "generic URI" syntax, (two slashes
258   * following the first colon), the specification will be parsed
259   * accordingly - setting the scheme, userinfo, host,port, path, query
260   * string and fragment fields as necessary. If the specification does
261   * not follow the "generic URI" syntax, the specification is parsed
262   * into a scheme and scheme-specific part (stored as the path) only.
263   *
264   * @param p_uriSpec the URI specification string (cannot be null or
265   * empty)
266   *
267   * @exception MalformedURIException if p_uriSpec violates any syntax
268   * rules
269   */

270   public URI(String JavaDoc p_uriSpec) throws MalformedURIException {
271     this((URI)null, p_uriSpec);
272   }
273   
274   /**
275    * Construct a new URI from a URI specification string. If the
276    * specification follows the "generic URI" syntax, (two slashes
277    * following the first colon), the specification will be parsed
278    * accordingly - setting the scheme, userinfo, host,port, path, query
279    * string and fragment fields as necessary. If the specification does
280    * not follow the "generic URI" syntax, the specification is parsed
281    * into a scheme and scheme-specific part (stored as the path) only.
282    * Construct a relative URI if boolean is assigned to "true"
283    * and p_uriSpec is not valid absolute URI, instead of throwing an exception.
284    *
285    * @param p_uriSpec the URI specification string (cannot be null or
286    * empty)
287    * @param allowNonAbsoluteURI true to permit non-absolute URIs,
288    * false otherwise.
289    *
290    * @exception MalformedURIException if p_uriSpec violates any syntax
291    * rules
292    */

293   public URI(String JavaDoc p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException {
294       this((URI)null, p_uriSpec, allowNonAbsoluteURI);
295   }
296   
297  /**
298   * Construct a new URI from a base URI and a URI specification string.
299   * The URI specification string may be a relative URI.
300   *
301   * @param p_base the base URI (cannot be null if p_uriSpec is null or
302   * empty)
303   * @param p_uriSpec the URI specification string (cannot be null or
304   * empty if p_base is null)
305   *
306   * @exception MalformedURIException if p_uriSpec violates any syntax
307   * rules
308   */

309   public URI(URI p_base, String JavaDoc p_uriSpec) throws MalformedURIException {
310     initialize(p_base, p_uriSpec);
311   }
312   
313   /**
314    * Construct a new URI from a base URI and a URI specification string.
315    * The URI specification string may be a relative URI.
316    * Construct a relative URI if boolean is assigned to "true"
317    * and p_uriSpec is not valid absolute URI and p_base is null
318    * instead of throwing an exception.
319    *
320    * @param p_base the base URI (cannot be null if p_uriSpec is null or
321    * empty)
322    * @param p_uriSpec the URI specification string (cannot be null or
323    * empty if p_base is null)
324    * @param allowNonAbsoluteURI true to permit non-absolute URIs,
325    * false otherwise.
326    *
327    * @exception MalformedURIException if p_uriSpec violates any syntax
328    * rules
329    */

330   public URI(URI p_base, String JavaDoc p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException {
331       initialize(p_base, p_uriSpec, allowNonAbsoluteURI);
332   }
333
334  /**
335   * Construct a new URI that does not follow the generic URI syntax.
336   * Only the scheme and scheme-specific part (stored as the path) are
337   * initialized.
338   *
339   * @param p_scheme the URI scheme (cannot be null or empty)
340   * @param p_schemeSpecificPart the scheme-specific part (cannot be
341   * null or empty)
342   *
343   * @exception MalformedURIException if p_scheme violates any
344   * syntax rules
345   */

346   public URI(String JavaDoc p_scheme, String JavaDoc p_schemeSpecificPart)
347              throws MalformedURIException {
348     if (p_scheme == null || p_scheme.trim().length() == 0) {
349       throw new MalformedURIException(
350             "Cannot construct URI with null/empty scheme!");
351     }
352     if (p_schemeSpecificPart == null ||
353         p_schemeSpecificPart.trim().length() == 0) {
354       throw new MalformedURIException(
355           "Cannot construct URI with null/empty scheme-specific part!");
356     }
357     setScheme(p_scheme);
358     setPath(p_schemeSpecificPart);
359   }
360
361  /**
362   * Construct a new URI that follows the generic URI syntax from its
363   * component parts. Each component is validated for syntax and some
364   * basic semantic checks are performed as well. See the individual
365   * setter methods for specifics.
366   *
367   * @param p_scheme the URI scheme (cannot be null or empty)
368   * @param p_host the hostname, IPv4 address or IPv6 reference for the URI
369   * @param p_path the URI path - if the path contains '?' or '#',
370   * then the query string and/or fragment will be
371   * set from the path; however, if the query and
372   * fragment are specified both in the path and as
373   * separate parameters, an exception is thrown
374   * @param p_queryString the URI query string (cannot be specified
375   * if path is null)
376   * @param p_fragment the URI fragment (cannot be specified if path
377   * is null)
378   *
379   * @exception MalformedURIException if any of the parameters violates
380   * syntax rules or semantic rules
381   */

382   public URI(String JavaDoc p_scheme, String JavaDoc p_host, String JavaDoc p_path,
383              String JavaDoc p_queryString, String JavaDoc p_fragment)
384          throws MalformedURIException {
385     this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment);
386   }
387
388  /**
389   * Construct a new URI that follows the generic URI syntax from its
390   * component parts. Each component is validated for syntax and some
391   * basic semantic checks are performed as well. See the individual
392   * setter methods for specifics.
393   *
394   * @param p_scheme the URI scheme (cannot be null or empty)
395   * @param p_userinfo the URI userinfo (cannot be specified if host
396   * is null)
397   * @param p_host the hostname, IPv4 address or IPv6 reference for the URI
398   * @param p_port the URI port (may be -1 for "unspecified"; cannot
399   * be specified if host is null)
400   * @param p_path the URI path - if the path contains '?' or '#',
401   * then the query string and/or fragment will be
402   * set from the path; however, if the query and
403   * fragment are specified both in the path and as
404   * separate parameters, an exception is thrown
405   * @param p_queryString the URI query string (cannot be specified
406   * if path is null)
407   * @param p_fragment the URI fragment (cannot be specified if path
408   * is null)
409   *
410   * @exception MalformedURIException if any of the parameters violates
411   * syntax rules or semantic rules
412   */

413   public URI(String JavaDoc p_scheme, String JavaDoc p_userinfo,
414              String JavaDoc p_host, int p_port, String JavaDoc p_path,
415              String JavaDoc p_queryString, String JavaDoc p_fragment)
416          throws MalformedURIException {
417     if (p_scheme == null || p_scheme.trim().length() == 0) {
418       throw new MalformedURIException("Scheme is required!");
419     }
420
421     if (p_host == null) {
422       if (p_userinfo != null) {
423         throw new MalformedURIException(
424              "Userinfo may not be specified if host is not specified!");
425       }
426       if (p_port != -1) {
427         throw new MalformedURIException(
428              "Port may not be specified if host is not specified!");
429       }
430     }
431
432     if (p_path != null) {
433       if (p_path.indexOf('?') != -1 && p_queryString != null) {
434         throw new MalformedURIException(
435           "Query string cannot be specified in path and query string!");
436       }
437
438       if (p_path.indexOf('#') != -1 && p_fragment != null) {
439         throw new MalformedURIException(
440           "Fragment cannot be specified in both the path and fragment!");
441       }
442     }
443
444     setScheme(p_scheme);
445     setHost(p_host);
446     setPort(p_port);
447     setUserinfo(p_userinfo);
448     setPath(p_path);
449     setQueryString(p_queryString);
450     setFragment(p_fragment);
451   }
452
453  /**
454   * Initialize all fields of this URI from another URI.
455   *
456   * @param p_other the URI to copy (cannot be null)
457   */

458   private void initialize(URI p_other) {
459     m_scheme = p_other.getScheme();
460     m_userinfo = p_other.getUserinfo();
461     m_host = p_other.getHost();
462     m_port = p_other.getPort();
463     m_regAuthority = p_other.getRegBasedAuthority();
464     m_path = p_other.getPath();
465     m_queryString = p_other.getQueryString();
466     m_fragment = p_other.getFragment();
467   }
468   
469   /**
470    * Initializes this URI from a base URI and a URI specification string.
471    * See RFC 2396 Section 4 and Appendix B for specifications on parsing
472    * the URI and Section 5 for specifications on resolving relative URIs
473    * and relative paths.
474    *
475    * @param p_base the base URI (may be null if p_uriSpec is an absolute
476    * URI)
477    * @param p_uriSpec the URI spec string which may be an absolute or
478    * relative URI (can only be null/empty if p_base
479    * is not null)
480    * @param allowNonAbsoluteURI true to permit non-absolute URIs,
481    * in case of relative URI, false otherwise.
482    *
483    * @exception MalformedURIException if p_base is null and p_uriSpec
484    * is not an absolute URI or if
485    * p_uriSpec violates syntax rules
486    */

487   private void initialize(URI p_base, String JavaDoc p_uriSpec, boolean allowNonAbsoluteURI)
488       throws MalformedURIException {
489       
490       String JavaDoc uriSpec = p_uriSpec;
491       int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0;
492       
493       if (p_base == null && uriSpecLen == 0) {
494           if (allowNonAbsoluteURI) {
495               m_path = "";
496               return;
497           }
498           throw new MalformedURIException("Cannot initialize URI with empty parameters.");
499       }
500       
501       // just make a copy of the base if spec is empty
502
if (uriSpecLen == 0) {
503           initialize(p_base);
504           return;
505       }
506       
507       int index = 0;
508       
509       // Check for scheme, which must be before '/', '?' or '#'.
510
int colonIdx = uriSpec.indexOf(':');
511       if (colonIdx != -1) {
512           final int searchFrom = colonIdx - 1;
513           // search backwards starting from character before ':'.
514
int slashIdx = uriSpec.lastIndexOf('/', searchFrom);
515           int queryIdx = uriSpec.lastIndexOf('?', searchFrom);
516           int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom);
517           
518           if (colonIdx == 0 || slashIdx != -1 ||
519               queryIdx != -1 || fragmentIdx != -1) {
520               // A standalone base is a valid URI according to spec
521
if (colonIdx == 0 || (p_base == null && fragmentIdx != 0 && !allowNonAbsoluteURI)) {
522                   throw new MalformedURIException("No scheme found in URI.");
523               }
524           }
525           else {
526               initializeScheme(uriSpec);
527               index = m_scheme.length()+1;
528               
529               // Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
530
if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') {
531                   throw new MalformedURIException("Scheme specific part cannot be empty.");
532               }
533           }
534       }
535       else if (p_base == null && uriSpec.indexOf('#') != 0 && !allowNonAbsoluteURI) {
536           throw new MalformedURIException("No scheme found in URI.");
537       }
538       
539       // Two slashes means we may have authority, but definitely means we're either
540
// matching net_path or abs_path. These two productions are ambiguous in that
541
// every net_path (except those containing an IPv6Reference) is an abs_path.
542
// RFC 2396 resolves this ambiguity by applying a greedy left most matching rule.
543
// Try matching net_path first, and if that fails we don't have authority so
544
// then attempt to match abs_path.
545
//
546
// net_path = "//" authority [ abs_path ]
547
// abs_path = "/" path_segments
548
if (((index+1) < uriSpecLen) &&
549           (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) {
550           index += 2;
551           int startPos = index;
552           
553           // Authority will be everything up to path, query or fragment
554
char testChar = '\0';
555           while (index < uriSpecLen) {
556               testChar = uriSpec.charAt(index);
557               if (testChar == '/' || testChar == '?' || testChar == '#') {
558                   break;
559               }
560               index++;
561           }
562           
563           // Attempt to parse authority. If the section is an empty string
564
// this is a valid server based authority, so set the host to this
565
// value.
566
if (index > startPos) {
567               // If we didn't find authority we need to back up. Attempt to
568
// match against abs_path next.
569
if (!initializeAuthority(uriSpec.substring(startPos, index))) {
570                   index = startPos - 2;
571               }
572           }
573           else {
574               m_host = "";
575           }
576       }
577       
578       initializePath(uriSpec, index);
579       
580       // Resolve relative URI to base URI - see RFC 2396 Section 5.2
581
// In some cases, it might make more sense to throw an exception
582
// (when scheme is specified is the string spec and the base URI
583
// is also specified, for example), but we're just following the
584
// RFC specifications
585
if (p_base != null) {
586           absolutize(p_base);
587       }
588   }
589
590  /**
591   * Initializes this URI from a base URI and a URI specification string.
592   * See RFC 2396 Section 4 and Appendix B for specifications on parsing
593   * the URI and Section 5 for specifications on resolving relative URIs
594   * and relative paths.
595   *
596   * @param p_base the base URI (may be null if p_uriSpec is an absolute
597   * URI)
598   * @param p_uriSpec the URI spec string which may be an absolute or
599   * relative URI (can only be null/empty if p_base
600   * is not null)
601   *
602   * @exception MalformedURIException if p_base is null and p_uriSpec
603   * is not an absolute URI or if
604   * p_uriSpec violates syntax rules
605   */

606   private void initialize(URI p_base, String JavaDoc p_uriSpec)
607                          throws MalformedURIException {
608       
609     String JavaDoc uriSpec = p_uriSpec;
610     int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0;
611     
612     if (p_base == null && uriSpecLen == 0) {
613       throw new MalformedURIException(
614                   "Cannot initialize URI with empty parameters.");
615     }
616
617     // just make a copy of the base if spec is empty
618
if (uriSpecLen == 0) {
619       initialize(p_base);
620       return;
621     }
622
623     int index = 0;
624
625     // Check for scheme, which must be before '/', '?' or '#'.
626
int colonIdx = uriSpec.indexOf(':');
627     if (colonIdx != -1) {
628         final int searchFrom = colonIdx - 1;
629         // search backwards starting from character before ':'.
630
int slashIdx = uriSpec.lastIndexOf('/', searchFrom);
631         int queryIdx = uriSpec.lastIndexOf('?', searchFrom);
632         int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom);
633        
634         if (colonIdx == 0 || slashIdx != -1 ||
635             queryIdx != -1 || fragmentIdx != -1) {
636             // A standalone base is a valid URI according to spec
637
if (colonIdx == 0 || (p_base == null && fragmentIdx != 0)) {
638                 throw new MalformedURIException("No scheme found in URI.");
639             }
640         }
641         else {
642             initializeScheme(uriSpec);
643             index = m_scheme.length()+1;
644             
645             // Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
646
if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') {
647                 throw new MalformedURIException("Scheme specific part cannot be empty.");
648             }
649         }
650     }
651     else if (p_base == null && uriSpec.indexOf('#') != 0) {
652         throw new MalformedURIException("No scheme found in URI.");
653     }
654
655     // Two slashes means we may have authority, but definitely means we're either
656
// matching net_path or abs_path. These two productions are ambiguous in that
657
// every net_path (except those containing an IPv6Reference) is an abs_path.
658
// RFC 2396 resolves this ambiguity by applying a greedy left most matching rule.
659
// Try matching net_path first, and if that fails we don't have authority so
660
// then attempt to match abs_path.
661
//
662
// net_path = "//" authority [ abs_path ]
663
// abs_path = "/" path_segments
664
if (((index+1) < uriSpecLen) &&
665         (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) {
666       index += 2;
667       int startPos = index;
668
669       // Authority will be everything up to path, query or fragment
670
char testChar = '\0';
671       while (index < uriSpecLen) {
672         testChar = uriSpec.charAt(index);
673         if (testChar == '/' || testChar == '?' || testChar == '#') {
674           break;
675         }
676         index++;
677       }
678
679       // Attempt to parse authority. If the section is an empty string
680
// this is a valid server based authority, so set the host to this
681
// value.
682
if (index > startPos) {
683         // If we didn't find authority we need to back up. Attempt to
684
// match against abs_path next.
685
if (!initializeAuthority(uriSpec.substring(startPos, index))) {
686           index = startPos - 2;
687         }
688       }
689       else {
690         m_host = "";
691       }
692     }
693
694     initializePath(uriSpec, index);
695
696     // Resolve relative URI to base URI - see RFC 2396 Section 5.2
697
// In some cases, it might make more sense to throw an exception
698
// (when scheme is specified is the string spec and the base URI
699
// is also specified, for example), but we're just following the
700
// RFC specifications
701
if (p_base != null) {
702         absolutize(p_base);
703     }
704   }
705
706   /**
707    * Absolutize URI with given base URI.
708    *
709    * @param p_base base URI for absolutization
710    */

711   public void absolutize(URI p_base) {
712
713       // check to see if this is the current doc - RFC 2396 5.2 #2
714
// note that this is slightly different from the RFC spec in that
715
// we don't include the check for query string being null
716
// - this handles cases where the urispec is just a query
717
// string or a fragment (e.g. "?y" or "#s") -
718
// see <http://www.ics.uci.edu/~fielding/url/test1.html> which
719
// identified this as a bug in the RFC
720
if (m_path.length() == 0 && m_scheme == null &&
721           m_host == null && m_regAuthority == null) {
722           m_scheme = p_base.getScheme();
723           m_userinfo = p_base.getUserinfo();
724           m_host = p_base.getHost();
725           m_port = p_base.getPort();
726           m_regAuthority = p_base.getRegBasedAuthority();
727           m_path = p_base.getPath();
728           
729           if (m_queryString == null) {
730               m_queryString = p_base.getQueryString();
731               
732               if (m_fragment == null) {
733                   m_fragment = p_base.getFragment();
734               }
735           }
736           return;
737       }
738       
739       // check for scheme - RFC 2396 5.2 #3
740
// if we found a scheme, it means absolute URI, so we're done
741
if (m_scheme == null) {
742           m_scheme = p_base.getScheme();
743       }
744       else {
745           return;
746       }
747       
748       // check for authority - RFC 2396 5.2 #4
749
// if we found a host, then we've got a network path, so we're done
750
if (m_host == null && m_regAuthority == null) {
751           m_userinfo = p_base.getUserinfo();
752           m_host = p_base.getHost();
753           m_port = p_base.getPort();
754           m_regAuthority = p_base.getRegBasedAuthority();
755       }
756       else {
757           return;
758       }
759       
760       // check for absolute path - RFC 2396 5.2 #5
761
if (m_path.length() > 0 &&
762               m_path.startsWith("/")) {
763           return;
764       }
765       
766       // if we get to this point, we need to resolve relative path
767
// RFC 2396 5.2 #6
768
String JavaDoc path = "";
769       String JavaDoc basePath = p_base.getPath();
770       
771       // 6a - get all but the last segment of the base URI path
772
if (basePath != null && basePath.length() > 0) {
773           int lastSlash = basePath.lastIndexOf('/');
774           if (lastSlash != -1) {
775               path = basePath.substring(0, lastSlash+1);
776           }
777       }
778       else if (m_path.length() > 0) {
779           path = "/";
780       }
781       
782       // 6b - append the relative URI path
783
path = path.concat(m_path);
784       
785       // 6c - remove all "./" where "." is a complete path segment
786
int index = -1;
787       while ((index = path.indexOf("/./")) != -1) {
788           path = path.substring(0, index+1).concat(path.substring(index+3));
789       }
790       
791       // 6d - remove "." if path ends with "." as a complete path segment
792
if (path.endsWith("/.")) {
793           path = path.substring(0, path.length()-1);
794       }
795       
796       // 6e - remove all "<segment>/../" where "<segment>" is a complete
797
// path segment not equal to ".."
798
index = 1;
799       int segIndex = -1;
800       String JavaDoc tempString = null;
801       
802       while ((index = path.indexOf("/../", index)) > 0) {
803           tempString = path.substring(0, path.indexOf("/../"));
804           segIndex = tempString.lastIndexOf('/');
805           if (segIndex != -1) {
806               if (!tempString.substring(segIndex).equals("..")) {
807                   path = path.substring(0, segIndex+1).concat(path.substring(index+4));
808                   index = segIndex;
809               }
810               else {
811                   index += 4;
812               }
813           }
814           else {
815               index += 4;
816           }
817       }
818       
819       // 6f - remove ending "<segment>/.." where "<segment>" is a
820
// complete path segment
821
if (path.endsWith("/..")) {
822           tempString = path.substring(0, path.length()-3);
823           segIndex = tempString.lastIndexOf('/');
824           if (segIndex != -1) {
825               path = path.substring(0, segIndex+1);
826           }
827       }
828       m_path = path;
829   }
830
831  /**
832   * Initialize the scheme for this URI from a URI string spec.
833   *
834   * @param p_uriSpec the URI specification (cannot be null)
835   *
836   * @exception MalformedURIException if URI does not have a conformant
837   * scheme
838   */

839   private void initializeScheme(String JavaDoc p_uriSpec)
840                  throws MalformedURIException {
841     int uriSpecLen = p_uriSpec.length();
842     int index = 0;
843     String JavaDoc scheme = null;
844     char testChar = '\0';
845
846     while (index < uriSpecLen) {
847       testChar = p_uriSpec.charAt(index);
848       if (testChar == ':' || testChar == '/' ||
849           testChar == '?' || testChar == '#') {
850         break;
851       }
852       index++;
853     }
854     scheme = p_uriSpec.substring(0, index);
855
856     if (scheme.length() == 0) {
857       throw new MalformedURIException("No scheme found in URI.");
858     }
859     else {
860       setScheme(scheme);
861     }
862   }
863
864  /**
865   * Initialize the authority (either server or registry based)
866   * for this URI from a URI string spec.
867   *
868   * @param p_uriSpec the URI specification (cannot be null)
869   *
870   * @return true if the given string matched server or registry
871   * based authority
872   */

873   private boolean initializeAuthority(String JavaDoc p_uriSpec) {
874     
875     int index = 0;
876     int start = 0;
877     int end = p_uriSpec.length();
878
879     char testChar = '\0';
880     String JavaDoc userinfo = null;
881
882     // userinfo is everything up to @
883
if (p_uriSpec.indexOf('@', start) != -1) {
884       while (index < end) {
885         testChar = p_uriSpec.charAt(index);
886         if (testChar == '@') {
887           break;
888         }
889         index++;
890       }
891       userinfo = p_uriSpec.substring(start, index);
892       index++;
893     }
894
895     // host is everything up to last ':', or up to
896
// and including ']' if followed by ':'.
897
String JavaDoc host = null;
898     start = index;
899     boolean hasPort = false;
900     if (index < end) {
901       if (p_uriSpec.charAt(start) == '[') {
902         int bracketIndex = p_uriSpec.indexOf(']', start);
903         index = (bracketIndex != -1) ? bracketIndex : end;
904         if (index+1 < end && p_uriSpec.charAt(index+1) == ':') {
905           ++index;
906           hasPort = true;
907         }
908         else {
909           index = end;
910         }
911       }
912       else {
913         int colonIndex = p_uriSpec.lastIndexOf(':', end);
914         index = (colonIndex > start) ? colonIndex : end;
915         hasPort = (index != end);
916       }
917     }
918     host = p_uriSpec.substring(start, index);
919     int port = -1;
920     if (host.length() > 0) {
921       // port
922
if (hasPort) {
923         index++;
924         start = index;
925         while (index < end) {
926           index++;
927         }
928         String JavaDoc portStr = p_uriSpec.substring(start, index);
929         if (portStr.length() > 0) {
930           // REVISIT: Remove this code.
931
/** for (int i = 0; i < portStr.length(); i++) {
932             if (!isDigit(portStr.charAt(i))) {
933               throw new MalformedURIException(
934                    portStr +
935                    " is invalid. Port should only contain digits!");
936             }
937           }**/

938           // REVISIT: Remove this code.
939
// Store port value as string instead of integer.
940
try {
941             port = Integer.parseInt(portStr);
942             if (port == -1) --port;
943           }
944           catch (NumberFormatException JavaDoc nfe) {
945             port = -2;
946           }
947         }
948       }
949     }
950     
951     if (isValidServerBasedAuthority(host, port, userinfo)) {
952       m_host = host;
953       m_port = port;
954       m_userinfo = userinfo;
955       return true;
956     }
957     // Note: Registry based authority is being removed from a
958
// new spec for URI which would obsolete RFC 2396. If the
959
// spec is added to XML errata, processing of reg_name
960
// needs to be removed. - mrglavas.
961
else if (isValidRegistryBasedAuthority(p_uriSpec)) {
962       m_regAuthority = p_uriSpec;
963       return true;
964     }
965     return false;
966   }
967   
968   /**
969    * Determines whether the components host, port, and user info
970    * are valid as a server authority.
971    *
972    * @param host the host component of authority
973    * @param port the port number component of authority
974    * @param userinfo the user info component of authority
975    *
976    * @return true if the given host, port, and userinfo compose
977    * a valid server authority
978    */

979   private boolean isValidServerBasedAuthority(String JavaDoc host, int port, String JavaDoc userinfo) {
980     
981     // Check if the host is well formed.
982
if (!isWellFormedAddress(host)) {
983       return false;
984     }
985     
986     // Check that port is well formed if it exists.
987
// REVISIT: There's no restriction on port value ranges, but
988
// perform the same check as in setPort to be consistent. Pass
989
// in a string to this method instead of an integer.
990
if (port < -1 || port > 65535) {
991       return false;
992     }
993     
994     // Check that userinfo is well formed if it exists.
995
if (userinfo != null) {
996       // Userinfo can contain alphanumerics, mark characters, escaped
997
// and ';',':','&','=','+','$',','
998
int index = 0;
999       int end = userinfo.length();
1000      char testChar = '\0';
1001      while (index < end) {
1002        testChar = userinfo.charAt(index);
1003        if (testChar == '%') {
1004          if (index+2 >= end ||
1005            !isHex(userinfo.charAt(index+1)) ||
1006            !isHex(userinfo.charAt(index+2))) {
1007            return false;
1008          }
1009          index += 2;
1010        }
1011        else if (!isUserinfoCharacter(testChar)) {
1012          return false;
1013        }
1014        ++index;
1015      }
1016    }
1017    return true;
1018  }
1019  
1020  /**
1021   * Determines whether the given string is a registry based authority.
1022   *
1023   * @param authority the authority component of a URI
1024   *
1025   * @return true if the given string is a registry based authority
1026   */

1027  private boolean isValidRegistryBasedAuthority(String JavaDoc authority) {
1028    int index = 0;
1029    int end = authority.length();
1030    char testChar;
1031    
1032    while (index < end) {
1033      testChar = authority.charAt(index);
1034      
1035      // check for valid escape sequence
1036
if (testChar == '%') {
1037        if (index+2 >= end ||
1038            !isHex(authority.charAt(index+1)) ||
1039            !isHex(authority.charAt(index+2))) {
1040            return false;
1041        }
1042        index += 2;
1043      }
1044      // can check against path characters because the set
1045
// is the same except for '/' which we've already excluded.
1046
else if (!isPathCharacter(testChar)) {
1047        return false;
1048      }
1049      ++index;
1050    }
1051    return true;
1052  }
1053    
1054 /**
1055  * Initialize the path for this URI from a URI string spec.
1056  *
1057  * @param p_uriSpec the URI specification (cannot be null)
1058  * @param p_nStartIndex the index to begin scanning from
1059  *
1060  * @exception MalformedURIException if p_uriSpec violates syntax rules
1061  */

1062  private void initializePath(String JavaDoc p_uriSpec, int p_nStartIndex)
1063                 throws MalformedURIException {
1064    if (p_uriSpec == null) {
1065      throw new MalformedURIException(
1066                "Cannot initialize path from null string!");
1067    }
1068
1069    int index = p_nStartIndex;
1070    int start = p_nStartIndex;
1071    int end = p_uriSpec.length();
1072    char testChar = '\0';
1073
1074    // path - everything up to query string or fragment
1075
if (start < end) {
1076        // RFC 2732 only allows '[' and ']' to appear in the opaque part.
1077
if (getScheme() == null || p_uriSpec.charAt(start) == '/') {
1078        
1079            // Scan path.
1080
// abs_path = "/" path_segments
1081
// rel_path = rel_segment [ abs_path ]
1082
while (index < end) {
1083                testChar = p_uriSpec.charAt(index);
1084            
1085                // check for valid escape sequence
1086
if (testChar == '%') {
1087                    if (index+2 >= end ||
1088                    !isHex(p_uriSpec.charAt(index+1)) ||
1089                    !isHex(p_uriSpec.charAt(index+2))) {
1090                        throw new MalformedURIException(
1091                            "Path contains invalid escape sequence!");
1092                    }
1093                    index += 2;
1094                }
1095                // Path segments cannot contain '[' or ']' since pchar
1096
// production was not changed by RFC 2732.
1097
else if (!isPathCharacter(testChar)) {
1098                    if (testChar == '?' || testChar == '#') {
1099                        break;
1100                    }
1101                    throw new MalformedURIException(
1102                        "Path contains invalid character: " + testChar);
1103                }
1104                ++index;
1105            }
1106        }
1107        else {
1108            
1109            // Scan opaque part.
1110
// opaque_part = uric_no_slash *uric
1111
while (index < end) {
1112                testChar = p_uriSpec.charAt(index);
1113            
1114                if (testChar == '?' || testChar == '#') {
1115                    break;
1116                }
1117                
1118                // check for valid escape sequence
1119
if (testChar == '%') {
1120                    if (index+2 >= end ||
1121                    !isHex(p_uriSpec.charAt(index+1)) ||
1122                    !isHex(p_uriSpec.charAt(index+2))) {
1123                        throw new MalformedURIException(
1124                            "Opaque part contains invalid escape sequence!");
1125                    }
1126                    index += 2;
1127                }
1128                // If the scheme specific part is opaque, it can contain '['
1129
// and ']'. uric_no_slash wasn't modified by RFC 2732, which
1130
// I've interpreted as an error in the spec, since the
1131
// production should be equivalent to (uric - '/'), and uric
1132
// contains '[' and ']'. - mrglavas
1133
else if (!isURICharacter(testChar)) {
1134                    throw new MalformedURIException(
1135                        "Opaque part contains invalid character: " + testChar);
1136                }
1137                ++index;
1138            }
1139        }
1140    }
1141    m_path = p_uriSpec.substring(start, index);
1142
1143    // query - starts with ? and up to fragment or end
1144
if (testChar == '?') {
1145      index++;
1146      start = index;
1147      while (index < end) {
1148        testChar = p_uriSpec.charAt(index);
1149        if (testChar == '#') {
1150          break;
1151        }
1152        if (testChar == '%') {
1153           if (index+2 >= end ||
1154              !isHex(p_uriSpec.charAt(index+1)) ||
1155              !isHex(p_uriSpec.charAt(index+2))) {
1156            throw new MalformedURIException(
1157                    "Query string contains invalid escape sequence!");
1158           }
1159           index += 2;
1160        }
1161        else if (!isURICharacter(testChar)) {
1162          throw new MalformedURIException(
1163                "Query string contains invalid character: " + testChar);
1164        }
1165        index++;
1166      }
1167      m_queryString = p_uriSpec.substring(start, index);
1168    }
1169
1170    // fragment - starts with #
1171
if (testChar == '#') {
1172      index++;
1173      start = index;
1174      while (index < end) {
1175        testChar = p_uriSpec.charAt(index);
1176
1177        if (testChar == '%') {
1178           if (index+2 >= end ||
1179              !isHex(p_uriSpec.charAt(index+1)) ||
1180              !isHex(p_uriSpec.charAt(index+2))) {
1181            throw new MalformedURIException(
1182                    "Fragment contains invalid escape sequence!");
1183           }
1184           index += 2;
1185        }
1186        else if (!isURICharacter(testChar)) {
1187          throw new MalformedURIException(
1188                "Fragment contains invalid character: "+testChar);
1189        }
1190        index++;
1191      }
1192      m_fragment = p_uriSpec.substring(start, index);
1193    }
1194  }
1195
1196 /**
1197  * Get the scheme for this URI.
1198  *
1199  * @return the scheme for this URI
1200  */

1201  public String JavaDoc getScheme() {
1202    return m_scheme;
1203  }
1204
1205 /**
1206  * Get the scheme-specific part for this URI (everything following the
1207  * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
1208  *
1209  * @return the scheme-specific part for this URI
1210  */

1211  public String JavaDoc getSchemeSpecificPart() {
1212    StringBuffer JavaDoc schemespec = new StringBuffer JavaDoc();
1213
1214    if (m_host != null || m_regAuthority != null) {
1215      schemespec.append("//");
1216    
1217      // Server based authority.
1218
if (m_host != null) {
1219
1220        if (m_userinfo != null) {
1221          schemespec.append(m_userinfo);
1222          schemespec.append('@');
1223        }
1224        
1225        schemespec.append(m_host);
1226        
1227        if (m_port != -1) {
1228          schemespec.append(':');
1229          schemespec.append(m_port);
1230        }
1231      }
1232      // Registry based authority.
1233
else {
1234        schemespec.append(m_regAuthority);
1235      }
1236    }
1237
1238    if (m_path != null) {
1239      schemespec.append((m_path));
1240    }
1241
1242    if (m_queryString != null) {
1243      schemespec.append('?');
1244      schemespec.append(m_queryString);
1245    }
1246
1247    if (m_fragment != null) {
1248      schemespec.append('#');
1249      schemespec.append(m_fragment);
1250    }
1251
1252    return schemespec.toString();
1253  }
1254
1255 /**
1256  * Get the userinfo for this URI.
1257  *
1258  * @return the userinfo for this URI (null if not specified).
1259  */

1260  public String JavaDoc getUserinfo() {
1261    return m_userinfo;
1262  }
1263
1264  /**
1265  * Get the host for this URI.
1266  *
1267  * @return the host for this URI (null if not specified).
1268  */

1269  public String JavaDoc getHost() {
1270    return m_host;
1271  }
1272
1273 /**
1274  * Get the port for this URI.
1275  *
1276  * @return the port for this URI (-1 if not specified).
1277  */

1278  public int getPort() {
1279    return m_port;
1280  }
1281  
1282  /**
1283   * Get the registry based authority for this URI.
1284   *
1285   * @return the registry based authority (null if not specified).
1286   */

1287  public String JavaDoc getRegBasedAuthority() {
1288    return m_regAuthority;
1289  }
1290  
1291  /**
1292   * Get the authority for this URI.
1293   *
1294   * @return the authority
1295   */

1296  public String JavaDoc getAuthority() {
1297      StringBuffer JavaDoc authority = new StringBuffer JavaDoc();
1298      if (m_host != null || m_regAuthority != null) {
1299          authority.append("//");
1300          
1301          // Server based authority.
1302
if (m_host != null) {
1303              
1304              if (m_userinfo != null) {
1305                  authority.append(m_userinfo);
1306                  authority.append('@');
1307              }
1308              
1309              authority.append(m_host);
1310              
1311              if (m_port != -1) {
1312                  authority.append(':');
1313                  authority.append(m_port);
1314              }
1315          }
1316          // Registry based authority.
1317
else {
1318              authority.append(m_regAuthority);
1319          }
1320      }
1321      return authority.toString();
1322  }
1323
1324 /**
1325  * Get the path for this URI (optionally with the query string and
1326  * fragment).
1327  *
1328  * @param p_includeQueryString if true (and query string is not null),
1329  * then a "?" followed by the query string
1330  * will be appended
1331  * @param p_includeFragment if true (and fragment is not null),
1332  * then a "#" followed by the fragment
1333  * will be appended
1334  *
1335  * @return the path for this URI possibly including the query string
1336  * and fragment
1337  */

1338  public String JavaDoc getPath(boolean p_includeQueryString,
1339                        boolean p_includeFragment) {
1340    StringBuffer JavaDoc pathString = new StringBuffer JavaDoc(m_path);
1341
1342    if (p_includeQueryString && m_queryString != null) {
1343      pathString.append('?');
1344      pathString.append(m_queryString);
1345    }
1346
1347    if (p_includeFragment && m_fragment != null) {
1348      pathString.append('#');
1349      pathString.append(m_fragment);
1350    }
1351    return pathString.toString();
1352  }
1353
1354 /**
1355  * Get the path for this URI. Note that the value returned is the path
1356  * only and does not include the query string or fragment.
1357  *
1358  * @return the path for this URI.
1359  */

1360  public String JavaDoc getPath() {
1361    return m_path;
1362  }
1363
1364 /**
1365  * Get the query string for this URI.
1366  *
1367  * @return the query string for this URI. Null is returned if there
1368  * was no "?" in the URI spec, empty string if there was a
1369  * "?" but no query string following it.
1370  */

1371  public String JavaDoc getQueryString() {
1372    return m_queryString;
1373  }
1374
1375 /**
1376  * Get the fragment for this URI.
1377  *
1378  * @return the fragment for this URI. Null is returned if there
1379  * was no "#" in the URI spec, empty string if there was a
1380  * "#" but no fragment following it.
1381  */

1382  public String JavaDoc getFragment() {
1383    return m_fragment;
1384  }
1385
1386 /**
1387  * Set the scheme for this URI. The scheme is converted to lowercase
1388  * before it is set.
1389  *
1390  * @param p_scheme the scheme for this URI (cannot be null)
1391  *
1392  * @exception MalformedURIException if p_scheme is not a conformant
1393  * scheme name
1394  */

1395  public void setScheme(String JavaDoc p_scheme) throws MalformedURIException {
1396    if (p_scheme == null) {
1397      throw new MalformedURIException(
1398                "Cannot set scheme from null string!");
1399    }
1400    if (!isConformantSchemeName(p_scheme)) {
1401      throw new MalformedURIException("The scheme is not conformant.");
1402    }
1403
1404    m_scheme = p_scheme.toLowerCase();
1405  }
1406
1407 /**
1408  * Set the userinfo for this URI. If a non-null value is passed in and
1409  * the host value is null, then an exception is thrown.
1410  *
1411  * @param p_userinfo the userinfo for this URI
1412  *
1413  * @exception MalformedURIException if p_userinfo contains invalid
1414  * characters
1415  */

1416  public void setUserinfo(String JavaDoc p_userinfo) throws MalformedURIException {
1417    if (p_userinfo == null) {
1418      m_userinfo = null;
1419      return;
1420    }
1421    else {
1422      if (m_host == null) {
1423        throw new MalformedURIException(
1424                     "Userinfo cannot be set when host is null!");
1425      }
1426
1427      // userinfo can contain alphanumerics, mark characters, escaped
1428
// and ';',':','&','=','+','$',','
1429
int index = 0;
1430      int end = p_userinfo.length();
1431      char testChar = '\0';
1432      while (index < end) {
1433        testChar = p_userinfo.charAt(index);
1434        if (testChar == '%') {
1435          if (index+2 >= end ||
1436              !isHex(p_userinfo.charAt(index+1)) ||
1437              !isHex(p_userinfo.charAt(index+2))) {
1438            throw new MalformedURIException(
1439                  "Userinfo contains invalid escape sequence!");
1440          }
1441        }
1442        else if (!isUserinfoCharacter(testChar)) {
1443          throw new MalformedURIException(
1444                  "Userinfo contains invalid character:"+testChar);
1445        }
1446        index++;
1447      }
1448    }
1449    m_userinfo = p_userinfo;
1450  }
1451
1452 /**
1453  * <p>Set the host for this URI. If null is passed in, the userinfo
1454  * field is also set to null and the port is set to -1.</p>
1455  *
1456  * <p>Note: This method overwrites registry based authority if it
1457  * previously existed in this URI.</p>
1458  *
1459  * @param p_host the host for this URI
1460  *
1461  * @exception MalformedURIException if p_host is not a valid IP
1462  * address or DNS hostname.
1463  */

1464  public void setHost(String JavaDoc p_host) throws MalformedURIException {
1465    if (p_host == null || p_host.length() == 0) {
1466      if (p_host != null) {
1467        m_regAuthority = null;
1468      }
1469      m_host = p_host;
1470      m_userinfo = null;
1471      m_port = -1;
1472      return;
1473    }
1474    else if (!isWellFormedAddress(p_host)) {
1475      throw new MalformedURIException("Host is not a well formed address!");
1476    }
1477    m_host = p_host;
1478    m_regAuthority = null;
1479  }
1480
1481 /**
1482  * Set the port for this URI. -1 is used to indicate that the port is
1483  * not specified, otherwise valid port numbers are between 0 and 65535.
1484  * If a valid port number is passed in and the host field is null,
1485  * an exception is thrown.
1486  *
1487  * @param p_port the port number for this URI
1488  *
1489  * @exception MalformedURIException if p_port is not -1 and not a
1490  * valid port number
1491  */

1492  public void setPort(int p_port) throws MalformedURIException {
1493    if (p_port >= 0 && p_port <= 65535) {
1494      if (m_host == null) {
1495        throw new MalformedURIException(
1496                      "Port cannot be set when host is null!");
1497      }
1498    }
1499    else if (p_port != -1) {
1500      throw new MalformedURIException("Invalid port number!");
1501    }
1502    m_port = p_port;
1503  }
1504  
1505  /**
1506   * <p>Sets the registry based authority for this URI.</p>
1507   *
1508   * <p>Note: This method overwrites server based authority
1509   * if it previously existed in this URI.</p>
1510   *
1511   * @param authority the registry based authority for this URI
1512   *
1513   * @exception MalformedURIException it authority is not a
1514   * well formed registry based authority
1515   */

1516  public void setRegBasedAuthority(String JavaDoc authority)
1517    throws MalformedURIException {
1518
1519    if (authority == null) {
1520      m_regAuthority = null;
1521      return;
1522    }
1523    // reg_name = 1*( unreserved | escaped | "$" | "," |
1524
// ";" | ":" | "@" | "&" | "=" | "+" )
1525
else if (authority.length() < 1 ||
1526      !isValidRegistryBasedAuthority(authority) ||
1527      authority.indexOf('/') != -1) {
1528      throw new MalformedURIException("Registry based authority is not well formed.");
1529    }
1530    m_regAuthority = authority;
1531    m_host = null;
1532    m_userinfo = null;
1533    m_port = -1;
1534  }
1535
1536 /**
1537  * Set the path for this URI. If the supplied path is null, then the
1538  * query string and fragment are set to null as well. If the supplied
1539  * path includes a query string and/or fragment, these fields will be
1540  * parsed and set as well. Note that, for URIs following the "generic
1541  * URI" syntax, the path specified should start with a slash.
1542  * For URIs that do not follow the generic URI syntax, this method
1543  * sets the scheme-specific part.
1544  *
1545  * @param p_path the path for this URI (may be null)
1546  *
1547  * @exception MalformedURIException if p_path contains invalid
1548  * characters
1549  */

1550  public void setPath(String JavaDoc p_path) throws MalformedURIException {
1551    if (p_path == null) {
1552      m_path = null;
1553      m_queryString = null;
1554      m_fragment = null;
1555    }
1556    else {
1557      initializePath(p_path, 0);
1558    }
1559  }
1560
1561 /**
1562  * Append to the end of the path of this URI. If the current path does
1563  * not end in a slash and the path to be appended does not begin with
1564  * a slash, a slash will be appended to the current path before the
1565  * new segment is added. Also, if the current path ends in a slash
1566  * and the new segment begins with a slash, the extra slash will be
1567  * removed before the new segment is appended.
1568  *
1569  * @param p_addToPath the new segment to be added to the current path
1570  *
1571  * @exception MalformedURIException if p_addToPath contains syntax
1572  * errors
1573  */

1574  public void appendPath(String JavaDoc p_addToPath)
1575                         throws MalformedURIException {
1576    if (p_addToPath == null || p_addToPath.trim().length() == 0) {
1577      return;
1578    }
1579
1580    if (!isURIString(p_addToPath)) {
1581      throw new MalformedURIException(
1582              "Path contains invalid character!");
1583    }
1584
1585    if (m_path == null || m_path.trim().length() == 0) {
1586      if (p_addToPath.startsWith("/")) {
1587        m_path = p_addToPath;
1588      }
1589      else {
1590        m_path = "/" + p_addToPath;
1591      }
1592    }
1593    else if (m_path.endsWith("/")) {
1594      if (p_addToPath.startsWith("/")) {
1595        m_path = m_path.concat(p_addToPath.substring(1));
1596      }
1597      else {
1598        m_path = m_path.concat(p_addToPath);
1599      }
1600    }
1601    else {
1602      if (p_addToPath.startsWith("/")) {
1603        m_path = m_path.concat(p_addToPath);
1604      }
1605      else {
1606        m_path = m_path.concat("/" + p_addToPath);
1607      }
1608    }
1609  }
1610
1611 /**
1612  * Set the query string for this URI. A non-null value is valid only
1613  * if this is an URI conforming to the generic URI syntax and
1614  * the path value is not null.
1615  *
1616  * @param p_queryString the query string for this URI
1617  *
1618  * @exception MalformedURIException if p_queryString is not null and this
1619  * URI does not conform to the generic
1620  * URI syntax or if the path is null
1621  */

1622  public void setQueryString(String JavaDoc p_queryString) throws MalformedURIException {
1623    if (p_queryString == null) {
1624      m_queryString = null;
1625    }
1626    else if (!isGenericURI()) {
1627      throw new MalformedURIException(
1628              "Query string can only be set for a generic URI!");
1629    }
1630    else if (getPath() == null) {
1631      throw new MalformedURIException(
1632              "Query string cannot be set when path is null!");
1633    }
1634    else if (!isURIString(p_queryString)) {
1635      throw new MalformedURIException(
1636              "Query string contains invalid character!");
1637    }
1638    else {
1639      m_queryString = p_queryString;
1640    }
1641  }
1642
1643 /**
1644  * Set the fragment for this URI. A non-null value is valid only
1645  * if this is a URI conforming to the generic URI syntax and
1646  * the path value is not null.
1647  *
1648  * @param p_fragment the fragment for this URI
1649  *
1650  * @exception MalformedURIException if p_fragment is not null and this
1651  * URI does not conform to the generic
1652  * URI syntax or if the path is null
1653  */

1654  public void setFragment(String JavaDoc p_fragment) throws MalformedURIException {
1655    if (p_fragment == null) {
1656      m_fragment = null;
1657    }
1658    else if (!isGenericURI()) {
1659      throw new MalformedURIException(
1660         "Fragment can only be set for a generic URI!");
1661    }
1662    else if (getPath() == null) {
1663      throw new MalformedURIException(
1664              "Fragment cannot be set when path is null!");
1665    }
1666    else if (!isURIString(p_fragment)) {
1667      throw new MalformedURIException(
1668              "Fragment contains invalid character!");
1669    }
1670    else {
1671      m_fragment = p_fragment;
1672    }
1673  }
1674
1675 /**
1676  * Determines if the passed-in Object is equivalent to this URI.
1677  *
1678  * @param p_test the Object to test for equality.
1679  *
1680  * @return true if p_test is a URI with all values equal to this
1681  * URI, false otherwise
1682  */

1683  public boolean equals(Object JavaDoc p_test) {
1684    if (p_test instanceof URI) {
1685      URI testURI = (URI) p_test;
1686      if (((m_scheme == null && testURI.m_scheme == null) ||
1687           (m_scheme != null && testURI.m_scheme != null &&
1688            m_scheme.equals(testURI.m_scheme))) &&
1689          ((m_userinfo == null && testURI.m_userinfo == null) ||
1690           (m_userinfo != null && testURI.m_userinfo != null &&
1691            m_userinfo.equals(testURI.m_userinfo))) &&
1692          ((m_host == null && testURI.m_host == null) ||
1693           (m_host != null && testURI.m_host != null &&
1694            m_host.equals(testURI.m_host))) &&
1695            m_port == testURI.m_port &&
1696          ((m_path == null && testURI.m_path == null) ||
1697           (m_path != null && testURI.m_path != null &&
1698            m_path.equals(testURI.m_path))) &&
1699          ((m_queryString == null && testURI.m_queryString == null) ||
1700           (m_queryString != null && testURI.m_queryString != null &&
1701            m_queryString.equals(testURI.m_queryString))) &&
1702          ((m_fragment == null && testURI.m_fragment == null) ||
1703           (m_fragment != null && testURI.m_fragment != null &&
1704            m_fragment.equals(testURI.m_fragment)))) {
1705        return true;
1706      }
1707    }
1708    return false;
1709  }
1710
1711 /**
1712  * Get the URI as a string specification. See RFC 2396 Section 5.2.
1713  *
1714  * @return the URI string specification
1715  */

1716  public String JavaDoc toString() {
1717    StringBuffer JavaDoc uriSpecString = new StringBuffer JavaDoc();
1718
1719    if (m_scheme != null) {
1720      uriSpecString.append(m_scheme);
1721      uriSpecString.append(':');
1722    }
1723    uriSpecString.append(getSchemeSpecificPart());
1724    return uriSpecString.toString();
1725  }
1726
1727 /**
1728  * Get the indicator as to whether this URI uses the "generic URI"
1729  * syntax.
1730  *
1731  * @return true if this URI uses the "generic URI" syntax, false
1732  * otherwise
1733  */

1734  public boolean isGenericURI() {
1735    // presence of the host (whether valid or empty) means
1736
// double-slashes which means generic uri
1737
return (m_host != null);
1738  }
1739  
1740  /**
1741   * Returns whether this URI represents an absolute URI.
1742   *
1743   * @return true if this URI represents an absolute URI, false
1744   * otherwise
1745   */

1746  public boolean isAbsoluteURI() {
1747      // presence of the scheme means absolute uri
1748
return (m_scheme != null);
1749  }
1750
1751 /**
1752  * Determine whether a scheme conforms to the rules for a scheme name.
1753  * A scheme is conformant if it starts with an alphanumeric, and
1754  * contains only alphanumerics, '+','-' and '.'.
1755  *
1756  * @return true if the scheme is conformant, false otherwise
1757  */

1758  public static boolean isConformantSchemeName(String JavaDoc p_scheme) {
1759    if (p_scheme == null || p_scheme.trim().length() == 0) {
1760      return false;
1761    }
1762
1763    if (!isAlpha(p_scheme.charAt(0))) {
1764      return false;
1765    }
1766
1767    char testChar;
1768    int schemeLength = p_scheme.length();
1769    for (int i = 1; i < schemeLength; ++i) {
1770      testChar = p_scheme.charAt(i);
1771      if (!isSchemeCharacter(testChar)) {
1772        return false;
1773      }
1774    }
1775
1776    return true;
1777  }
1778
1779 /**
1780  * Determine whether a string is syntactically capable of representing
1781  * a valid IPv4 address, IPv6 reference or the domain name of a network host.
1782  * A valid IPv4 address consists of four decimal digit groups separated by a
1783  * '.'. Each group must consist of one to three digits. See RFC 2732 Section 3,
1784  * and RFC 2373 Section 2.2, for the definition of IPv6 references. A hostname
1785  * consists of domain labels (each of which must begin and end with an alphanumeric
1786  * but may contain '-') separated & by a '.'. See RFC 2396 Section 3.2.2.
1787  *
1788  * @return true if the string is a syntactically valid IPv4 address,
1789  * IPv6 reference or hostname
1790  */

1791  public static boolean isWellFormedAddress(String JavaDoc address) {
1792    if (address == null) {
1793      return false;
1794    }
1795
1796    int addrLength = address.length();
1797    if (addrLength == 0) {
1798      return false;
1799    }
1800    
1801    // Check if the host is a valid IPv6reference.
1802
if (address.startsWith("[")) {
1803      return isWellFormedIPv6Reference(address);
1804    }
1805
1806    // Cannot start with a '.', '-', or end with a '-'.
1807
if (address.startsWith(".") ||
1808        address.startsWith("-") ||
1809        address.endsWith("-")) {
1810      return false;
1811    }
1812
1813    // rightmost domain label starting with digit indicates IP address
1814
// since top level domain label can only start with an alpha
1815
// see RFC 2396 Section 3.2.2
1816
int index = address.lastIndexOf('.');
1817    if (address.endsWith(".")) {
1818      index = address.substring(0, index).lastIndexOf('.');
1819    }
1820
1821    if (index+1 < addrLength && isDigit(address.charAt(index+1))) {
1822      return isWellFormedIPv4Address(address);
1823    }
1824    else {
1825      // hostname = *( domainlabel "." ) toplabel [ "." ]
1826
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
1827
// toplabel = alpha | alpha *( alphanum | "-" ) alphanum
1828

1829      // RFC 2396 states that hostnames take the form described in
1830
// RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According
1831
// to RFC 1034, hostnames are limited to 255 characters.
1832
if (addrLength > 255) {
1833        return false;
1834      }
1835      
1836      // domain labels can contain alphanumerics and '-"
1837
// but must start and end with an alphanumeric
1838
char testChar;
1839      int labelCharCount = 0;
1840
1841      for (int i = 0; i < addrLength; i++) {
1842        testChar = address.charAt(i);
1843        if (testChar == '.') {
1844          if (!isAlphanum(address.charAt(i-1))) {
1845            return false;
1846          }
1847          if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) {
1848            return false;
1849          }
1850          labelCharCount = 0;
1851        }
1852        else if (!isAlphanum(testChar) && testChar != '-') {
1853          return false;
1854        }
1855        // RFC 1034: Labels must be 63 characters or less.
1856
else if (++labelCharCount > 63) {
1857          return false;
1858        }
1859      }
1860    }
1861    return true;
1862  }
1863  
1864  /**
1865   * <p>Determines whether a string is an IPv4 address as defined by
1866   * RFC 2373, and under the further constraint that it must be a 32-bit
1867   * address. Though not expressed in the grammar, in order to satisfy
1868   * the 32-bit address constraint, each segment of the address cannot
1869   * be greater than 255 (8 bits of information).</p>
1870   *
1871   * <p><code>IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT</code></p>
1872   *
1873   * @return true if the string is a syntactically valid IPv4 address
1874   */

1875  public static boolean isWellFormedIPv4Address(String JavaDoc address) {
1876      
1877      int addrLength = address.length();
1878      char testChar;
1879      int numDots = 0;
1880      int numDigits = 0;
1881
1882      // make sure that 1) we see only digits and dot separators, 2) that
1883
// any dot separator is preceded and followed by a digit and
1884
// 3) that we find 3 dots
1885
//
1886
// RFC 2732 amended RFC 2396 by replacing the definition
1887
// of IPv4address with the one defined by RFC 2373. - mrglavas
1888
//
1889
// IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
1890
//
1891
// One to three digits must be in each segment.
1892
for (int i = 0; i < addrLength; i++) {
1893        testChar = address.charAt(i);
1894        if (testChar == '.') {
1895          if ((i > 0 && !isDigit(address.charAt(i-1))) ||
1896              (i+1 < addrLength && !isDigit(address.charAt(i+1)))) {
1897            return false;
1898          }
1899          numDigits = 0;
1900          if (++numDots > 3) {
1901            return false;
1902          }
1903        }
1904        else if (!isDigit(testChar)) {
1905          return false;
1906        }
1907        // Check that that there are no more than three digits
1908
// in this segment.
1909
else if (++numDigits > 3) {
1910          return false;
1911        }
1912        // Check that this segment is not greater than 255.
1913
else if (numDigits == 3) {
1914          char first = address.charAt(i-2);
1915          char second = address.charAt(i-1);
1916          if (!(first < '2' ||
1917               (first == '2' &&
1918               (second < '5' ||
1919               (second == '5' && testChar <= '5'))))) {
1920            return false;
1921          }
1922        }
1923      }
1924      return (numDots == 3);
1925  }
1926  
1927  /**
1928   * <p>Determines whether a string is an IPv6 reference as defined
1929   * by RFC 2732, where IPv6address is defined in RFC 2373. The
1930   * IPv6 address is parsed according to Section 2.2 of RFC 2373,
1931   * with the additional constraint that the address be composed of
1932   * 128 bits of information.</p>
1933   *
1934   * <p><code>IPv6reference = "[" IPv6address "]"</code></p>
1935   *
1936   * <p>Note: The BNF expressed in RFC 2373 Appendix B does not
1937   * accurately describe section 2.2, and was in fact removed from
1938   * RFC 3513, the successor of RFC 2373.</p>
1939   *
1940   * @return true if the string is a syntactically valid IPv6 reference
1941   */

1942  public static boolean isWellFormedIPv6Reference(String JavaDoc address) {
1943
1944      int addrLength = address.length();
1945      int index = 1;
1946      int end = addrLength-1;
1947      
1948      // Check if string is a potential match for IPv6reference.
1949
if (!(addrLength > 2 && address.charAt(0) == '['
1950          && address.charAt(end) == ']')) {
1951          return false;
1952      }
1953      
1954      // Counter for the number of 16-bit sections read in the address.
1955
int [] counter = new int[1];
1956      
1957      // Scan hex sequence before possible '::' or IPv4 address.
1958
index = scanHexSequence(address, index, end, counter);
1959      if (index == -1) {
1960          return false;
1961      }
1962      // Address must contain 128-bits of information.
1963
else if (index == end) {
1964          return (counter[0] == 8);
1965      }
1966      
1967      if (index+1 < end && address.charAt(index) == ':') {
1968          if (address.charAt(index+1) == ':') {
1969              // '::' represents at least one 16-bit group of zeros.
1970
if (++counter[0] > 8) {
1971                  return false;
1972              }
1973              index += 2;
1974              // Trailing zeros will fill out the rest of the address.
1975
if (index == end) {
1976                 return true;
1977              }
1978          }
1979          // If the second character wasn't ':', in order to be valid,
1980
// the remainder of the string must match IPv4Address,
1981
// and we must have read exactly 6 16-bit groups.
1982
else {
1983              return (counter[0] == 6) &&
1984                  isWellFormedIPv4Address(address.substring(index+1, end));
1985          }
1986      }
1987      else {
1988          return false;
1989      }
1990      
1991      // 3. Scan hex sequence after '::'.
1992
int prevCount = counter[0];
1993      index = scanHexSequence(address, index, end, counter);
1994
1995      // We've either reached the end of the string, the address ends in
1996
// an IPv4 address, or it is invalid. scanHexSequence has already
1997
// made sure that we have the right number of bits.
1998
return (index == end) ||
1999          (index != -1 && isWellFormedIPv4Address(
2000          address.substring((counter[0] > prevCount) ? index+1 : index, end)));
2001  }
2002  
2003  /**
2004   * Helper method for isWellFormedIPv6Reference which scans the
2005   * hex sequences of an IPv6 address. It returns the index of the
2006   * next character to scan in the address, or -1 if the string
2007   * cannot match a valid IPv6 address.
2008   *
2009   * @param address the string to be scanned
2010   * @param index the beginning index (inclusive)
2011   * @param end the ending index (exclusive)
2012   * @param counter a counter for the number of 16-bit sections read
2013   * in the address
2014   *
2015   * @return the index of the next character to scan, or -1 if the
2016   * string cannot match a valid IPv6 address
2017   */

2018  private static int scanHexSequence (String JavaDoc address, int index, int end, int [] counter) {
2019    
2020      char testChar;
2021      int numDigits = 0;
2022      int start = index;
2023      
2024      // Trying to match the following productions:
2025
// hexseq = hex4 *( ":" hex4)
2026
// hex4 = 1*4HEXDIG
2027
for (; index < end; ++index) {
2028        testChar = address.charAt(index);
2029        if (testChar == ':') {
2030            // IPv6 addresses are 128-bit, so there can be at most eight sections.
2031
if (numDigits > 0 && ++counter[0] > 8) {
2032                return -1;
2033            }
2034            // This could be '::'.
2035
if (numDigits == 0 || ((index+1 < end) && address.charAt(index+1) == ':')) {
2036                return index;
2037            }
2038            numDigits = 0;
2039        }
2040        // This might be invalid or an IPv4address. If it's potentially an IPv4address,
2041
// backup to just after the last valid character that matches hexseq.
2042
else if (!isHex(testChar)) {
2043            if (testChar == '.' && numDigits < 4 && numDigits > 0 && counter[0] <= 6) {
2044                int back = index - numDigits - 1;
2045                return (back >= start) ? back : (back+1);
2046            }
2047            return -1;
2048        }
2049        // There can be at most 4 hex digits per group.
2050
else if (++numDigits > 4) {
2051            return -1;
2052        }
2053      }
2054      return (numDigits > 0 && ++counter[0] <= 8) ? end : -1;
2055  }
2056
2057
2058 /**
2059  * Determine whether a char is a digit.
2060  *
2061  * @return true if the char is betweeen '0' and '9', false otherwise
2062  */

2063  private static boolean isDigit(char p_char) {
2064    return p_char >= '0' && p_char <= '9';
2065  }
2066
2067 /**
2068  * Determine whether a character is a hexadecimal character.
2069  *
2070  * @return true if the char is betweeen '0' and '9', 'a' and 'f'
2071  * or 'A' and 'F', false otherwise
2072  */

2073  private static boolean isHex(char p_char) {
2074    return (p_char <= 'f' && (fgLookupTable[p_char] & ASCII_HEX_CHARACTERS) != 0);
2075  }
2076
2077 /**
2078  * Determine whether a char is an alphabetic character: a-z or A-Z
2079  *
2080  * @return true if the char is alphabetic, false otherwise
2081  */

2082  private static boolean isAlpha(char p_char) {
2083      return ((p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z' ));
2084  }
2085
2086 /**
2087  * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
2088  *
2089  * @return true if the char is alphanumeric, false otherwise
2090  */

2091  private static boolean isAlphanum(char p_char) {
2092     return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_ALPHA_NUMERIC) != 0);
2093  }
2094
2095 /**
2096  * Determine whether a character is a reserved character:
2097  * ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '[', or ']'
2098  *
2099  * @return true if the string contains any reserved characters
2100  */

2101  private static boolean isReservedCharacter(char p_char) {
2102     return (p_char <= ']' && (fgLookupTable[p_char] & RESERVED_CHARACTERS) != 0);
2103  }
2104
2105 /**
2106  * Determine whether a char is an unreserved character.
2107  *
2108  * @return true if the char is unreserved, false otherwise
2109  */

2110  private static boolean isUnreservedCharacter(char p_char) {
2111     return (p_char <= '~' && (fgLookupTable[p_char] & MASK_UNRESERVED_MASK) != 0);
2112  }
2113
2114 /**
2115  * Determine whether a char is a URI character (reserved or
2116  * unreserved, not including '%' for escaped octets).
2117  *
2118  * @return true if the char is a URI character, false otherwise
2119  */

2120  private static boolean isURICharacter (char p_char) {
2121      return (p_char <= '~' && (fgLookupTable[p_char] & MASK_URI_CHARACTER) != 0);
2122  }
2123
2124 /**
2125  * Determine whether a char is a scheme character.
2126  *
2127  * @return true if the char is a scheme character, false otherwise
2128  */

2129  private static boolean isSchemeCharacter (char p_char) {
2130      return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_SCHEME_CHARACTER) != 0);
2131  }
2132
2133 /**
2134  * Determine whether a char is a userinfo character.
2135  *
2136  * @return true if the char is a userinfo character, false otherwise
2137  */

2138  private static boolean isUserinfoCharacter (char p_char) {
2139      return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_USERINFO_CHARACTER) != 0);
2140  }
2141  
2142 /**
2143  * Determine whether a char is a path character.
2144  *
2145  * @return true if the char is a path character, false otherwise
2146  */

2147  private static boolean isPathCharacter (char p_char) {
2148      return (p_char <= '~' && (fgLookupTable[p_char] & MASK_PATH_CHARACTER) != 0);
2149  }
2150
2151
2152 /**
2153  * Determine whether a given string contains only URI characters (also
2154  * called "uric" in RFC 2396). uric consist of all reserved
2155  * characters, unreserved characters and escaped characters.
2156  *
2157  * @return true if the string is comprised of uric, false otherwise
2158  */

2159  private static boolean isURIString(String JavaDoc p_uric) {
2160    if (p_uric == null) {
2161      return false;
2162    }
2163    int end = p_uric.length();
2164    char testChar = '\0';
2165    for (int i = 0; i < end; i++) {
2166      testChar = p_uric.charAt(i);
2167      if (testChar == '%') {
2168        if (i+2 >= end ||
2169            !isHex(p_uric.charAt(i+1)) ||
2170            !isHex(p_uric.charAt(i+2))) {
2171          return false;
2172        }
2173        else {
2174          i += 2;
2175          continue;
2176        }
2177      }
2178      if (isURICharacter(testChar)) {
2179          continue;
2180      }
2181      else {
2182        return false;
2183      }
2184    }
2185    return true;
2186  }
2187}
2188
Popular Tags