KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xml > utils > URI


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 /*
17  * $Id: URI.java,v 1.11 2004/02/17 04:21:14 minchau Exp $
18  */

19 package org.apache.xml.utils;
20
21 import java.io.IOException JavaDoc;
22 import java.io.Serializable JavaDoc;
23
24 import org.apache.xml.res.XMLErrorResources;
25 import org.apache.xml.res.XMLMessages;
26
27 /**
28  * A class to represent a Uniform Resource Identifier (URI). This class
29  * is designed to handle the parsing of URIs and provide access to
30  * the various components (scheme, host, port, userinfo, path, query
31  * string and fragment) that may constitute a URI.
32  * <p>
33  * Parsing of a URI specification is done according to the URI
34  * syntax described in RFC 2396
35  * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists
36  * of a scheme, followed by a colon (':'), followed by a scheme-specific
37  * part. For URIs that follow the "generic URI" syntax, the scheme-
38  * specific part begins with two slashes ("//") and may be followed
39  * by an authority segment (comprised of user information, host, and
40  * port), path segment, query segment and fragment. Note that RFC 2396
41  * no longer specifies the use of the parameters segment and excludes
42  * the "user:password" syntax as part of the authority segment. If
43  * "user:password" appears in a URI, the entire user/password string
44  * is stored as userinfo.
45  * <p>
46  * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
47  * the entire scheme-specific part is treated as the "path" portion
48  * of the URI.
49  * <p>
50  * Note that, unlike the java.net.URL class, this class does not provide
51  * any built-in network access functionality nor does it provide any
52  * scheme-specific functionality (for example, it does not know a
53  * default port for a specific scheme). Rather, it only knows the
54  * grammar and basic set of operations that can be applied to a URI.
55  *
56  *
57  */

58 public class URI implements Serializable JavaDoc
59 {
60
61   /**
62    * MalformedURIExceptions are thrown in the process of building a URI
63    * or setting fields on a URI when an operation would result in an
64    * invalid URI specification.
65    *
66    */

67   public static class MalformedURIException extends IOException JavaDoc
68   {
69
70     /**
71      * Constructs a <code>MalformedURIException</code> with no specified
72      * detail message.
73      */

74     public MalformedURIException()
75     {
76       super();
77     }
78
79     /**
80      * Constructs a <code>MalformedURIException</code> with the
81      * specified detail message.
82      *
83      * @param p_msg the detail message.
84      */

85     public MalformedURIException(String JavaDoc p_msg)
86     {
87       super(p_msg);
88     }
89   }
90
91   /** reserved characters */
92   private static final String JavaDoc RESERVED_CHARACTERS = ";/?:@&=+$,";
93
94   /**
95    * URI punctuation mark characters - these, combined with
96    * alphanumerics, constitute the "unreserved" characters
97    */

98   private static final String JavaDoc MARK_CHARACTERS = "-_.!~*'() ";
99
100   /** scheme can be composed of alphanumerics and these characters */
101   private static final String JavaDoc SCHEME_CHARACTERS = "+-.";
102
103   /**
104    * userinfo can be composed of unreserved, escaped and these
105    * characters
106    */

107   private static final String JavaDoc USERINFO_CHARACTERS = ";:&=+$,";
108
109   /** Stores the scheme (usually the protocol) for this URI.
110    * @serial */

111   private String JavaDoc m_scheme = null;
112
113   /** If specified, stores the userinfo for this URI; otherwise null.
114    * @serial */

115   private String JavaDoc m_userinfo = null;
116
117   /** If specified, stores the host for this URI; otherwise null.
118    * @serial */

119   private String JavaDoc m_host = null;
120
121   /** If specified, stores the port for this URI; otherwise -1.
122    * @serial */

123   private int m_port = -1;
124
125   /** If specified, stores the path for this URI; otherwise null.
126    * @serial */

127   private String JavaDoc m_path = null;
128
129   /**
130    * If specified, stores the query string for this URI; otherwise
131    * null.
132    * @serial
133    */

134   private String JavaDoc m_queryString = null;
135
136   /** If specified, stores the fragment for this URI; otherwise null.
137    * @serial */

138   private String JavaDoc m_fragment = null;
139
140   /** Indicate whether in DEBUG mode */
141   private static boolean DEBUG = false;
142
143   /**
144    * Construct a new and uninitialized URI.
145    */

146   public URI(){}
147
148   /**
149    * Construct a new URI from another URI. All fields for this URI are
150    * set equal to the fields of the URI passed in.
151    *
152    * @param p_other the URI to copy (cannot be null)
153    */

154   public URI(URI p_other)
155   {
156     initialize(p_other);
157   }
158
159   /**
160    * Construct a new URI from a URI specification string. If the
161    * specification follows the "generic URI" syntax, (two slashes
162    * following the first colon), the specification will be parsed
163    * accordingly - setting the scheme, userinfo, host,port, path, query
164    * string and fragment fields as necessary. If the specification does
165    * not follow the "generic URI" syntax, the specification is parsed
166    * into a scheme and scheme-specific part (stored as the path) only.
167    *
168    * @param p_uriSpec the URI specification string (cannot be null or
169    * empty)
170    *
171    * @throws MalformedURIException if p_uriSpec violates any syntax
172    * rules
173    */

174   public URI(String JavaDoc p_uriSpec) throws MalformedURIException
175   {
176     this((URI) null, p_uriSpec);
177   }
178
179   /**
180    * Construct a new URI from a base URI and a URI specification string.
181    * The URI specification string may be a relative URI.
182    *
183    * @param p_base the base URI (cannot be null if p_uriSpec is null or
184    * empty)
185    * @param p_uriSpec the URI specification string (cannot be null or
186    * empty if p_base is null)
187    *
188    * @throws MalformedURIException if p_uriSpec violates any syntax
189    * rules
190    */

191   public URI(URI p_base, String JavaDoc p_uriSpec) throws MalformedURIException
192   {
193     initialize(p_base, p_uriSpec);
194   }
195
196   /**
197    * Construct a new URI that does not follow the generic URI syntax.
198    * Only the scheme and scheme-specific part (stored as the path) are
199    * initialized.
200    *
201    * @param p_scheme the URI scheme (cannot be null or empty)
202    * @param p_schemeSpecificPart the scheme-specific part (cannot be
203    * null or empty)
204    *
205    * @throws MalformedURIException if p_scheme violates any
206    * syntax rules
207    */

208   public URI(String JavaDoc p_scheme, String JavaDoc p_schemeSpecificPart)
209           throws MalformedURIException
210   {
211
212     if (p_scheme == null || p_scheme.trim().length() == 0)
213     {
214       throw new MalformedURIException(
215         "Cannot construct URI with null/empty scheme!");
216     }
217
218     if (p_schemeSpecificPart == null
219             || p_schemeSpecificPart.trim().length() == 0)
220     {
221       throw new MalformedURIException(
222         "Cannot construct URI with null/empty scheme-specific part!");
223     }
224
225     setScheme(p_scheme);
226     setPath(p_schemeSpecificPart);
227   }
228
229   /**
230    * Construct a new URI that follows the generic URI syntax from its
231    * component parts. Each component is validated for syntax and some
232    * basic semantic checks are performed as well. See the individual
233    * setter methods for specifics.
234    *
235    * @param p_scheme the URI scheme (cannot be null or empty)
236    * @param p_host the hostname or IPv4 address for the URI
237    * @param p_path the URI path - if the path contains '?' or '#',
238    * then the query string and/or fragment will be
239    * set from the path; however, if the query and
240    * fragment are specified both in the path and as
241    * separate parameters, an exception is thrown
242    * @param p_queryString the URI query string (cannot be specified
243    * if path is null)
244    * @param p_fragment the URI fragment (cannot be specified if path
245    * is null)
246    *
247    * @throws MalformedURIException if any of the parameters violates
248    * syntax rules or semantic rules
249    */

250   public URI(String JavaDoc p_scheme, String JavaDoc p_host, String JavaDoc p_path, String JavaDoc p_queryString, String JavaDoc p_fragment)
251           throws MalformedURIException
252   {
253     this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment);
254   }
255
256   /**
257    * Construct a new URI that follows the generic URI syntax from its
258    * component parts. Each component is validated for syntax and some
259    * basic semantic checks are performed as well. See the individual
260    * setter methods for specifics.
261    *
262    * @param p_scheme the URI scheme (cannot be null or empty)
263    * @param p_userinfo the URI userinfo (cannot be specified if host
264    * is null)
265    * @param p_host the hostname or IPv4 address for the URI
266    * @param p_port the URI port (may be -1 for "unspecified"; cannot
267    * be specified if host is null)
268    * @param p_path the URI path - if the path contains '?' or '#',
269    * then the query string and/or fragment will be
270    * set from the path; however, if the query and
271    * fragment are specified both in the path and as
272    * separate parameters, an exception is thrown
273    * @param p_queryString the URI query string (cannot be specified
274    * if path is null)
275    * @param p_fragment the URI fragment (cannot be specified if path
276    * is null)
277    *
278    * @throws MalformedURIException if any of the parameters violates
279    * syntax rules or semantic rules
280    */

281   public URI(String JavaDoc p_scheme, String JavaDoc p_userinfo, String JavaDoc p_host, int p_port, String JavaDoc p_path, String JavaDoc p_queryString, String JavaDoc p_fragment)
282           throws MalformedURIException
283   {
284
285     if (p_scheme == null || p_scheme.trim().length() == 0)
286     {
287       throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_SCHEME_REQUIRED, null)); //"Scheme is required!");
288
}
289
290     if (p_host == null)
291     {
292       if (p_userinfo != null)
293       {
294         throw new MalformedURIException(
295           XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_USERINFO_IF_NO_HOST, null)); //"Userinfo may not be specified if host is not specified!");
296
}
297
298       if (p_port != -1)
299       {
300         throw new MalformedURIException(
301           XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_PORT_IF_NO_HOST, null)); //"Port may not be specified if host is not specified!");
302
}
303     }
304
305     if (p_path != null)
306     {
307       if (p_path.indexOf('?') != -1 && p_queryString != null)
308       {
309         throw new MalformedURIException(
310           XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_QUERY_STRING_IN_PATH, null)); //"Query string cannot be specified in path and query string!");
311
}
312
313       if (p_path.indexOf('#') != -1 && p_fragment != null)
314       {
315         throw new MalformedURIException(
316           XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_FRAGMENT_STRING_IN_PATH, null)); //"Fragment cannot be specified in both the path and fragment!");
317
}
318     }
319
320     setScheme(p_scheme);
321     setHost(p_host);
322     setPort(p_port);
323     setUserinfo(p_userinfo);
324     setPath(p_path);
325     setQueryString(p_queryString);
326     setFragment(p_fragment);
327   }
328
329   /**
330    * Initialize all fields of this URI from another URI.
331    *
332    * @param p_other the URI to copy (cannot be null)
333    */

334   private void initialize(URI p_other)
335   {
336
337     m_scheme = p_other.getScheme();
338     m_userinfo = p_other.getUserinfo();
339     m_host = p_other.getHost();
340     m_port = p_other.getPort();
341     m_path = p_other.getPath();
342     m_queryString = p_other.getQueryString();
343     m_fragment = p_other.getFragment();
344   }
345
346   /**
347    * Initializes this URI from a base URI and a URI specification string.
348    * See RFC 2396 Section 4 and Appendix B for specifications on parsing
349    * the URI and Section 5 for specifications on resolving relative URIs
350    * and relative paths.
351    *
352    * @param p_base the base URI (may be null if p_uriSpec is an absolute
353    * URI)
354    * @param p_uriSpec the URI spec string which may be an absolute or
355    * relative URI (can only be null/empty if p_base
356    * is not null)
357    *
358    * @throws MalformedURIException if p_base is null and p_uriSpec
359    * is not an absolute URI or if
360    * p_uriSpec violates syntax rules
361    */

362   private void initialize(URI p_base, String JavaDoc p_uriSpec)
363           throws MalformedURIException
364   {
365
366     if (p_base == null
367             && (p_uriSpec == null || p_uriSpec.trim().length() == 0))
368     {
369       throw new MalformedURIException(
370         XMLMessages.createXMLMessage(XMLErrorResources.ER_CANNOT_INIT_URI_EMPTY_PARMS, null)); //"Cannot initialize URI with empty parameters.");
371
}
372
373     // just make a copy of the base if spec is empty
374
if (p_uriSpec == null || p_uriSpec.trim().length() == 0)
375     {
376       initialize(p_base);
377
378       return;
379     }
380
381     String JavaDoc uriSpec = p_uriSpec.trim();
382     int uriSpecLen = uriSpec.length();
383     int index = 0;
384
385     // check for scheme
386
int colonIndex = uriSpec.indexOf(':');
387     if (colonIndex < 0)
388     {
389       if (p_base == null)
390       {
391         throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_SCHEME_IN_URI, new Object JavaDoc[]{uriSpec})); //"No scheme found in URI: "+uriSpec);
392
}
393     }
394     else
395     {
396       initializeScheme(uriSpec);
397       uriSpec = uriSpec.substring(colonIndex+1);
398       uriSpecLen = uriSpec.length();
399     }
400
401     // two slashes means generic URI syntax, so we get the authority
402
if (((index + 1) < uriSpecLen)
403             && (uriSpec.substring(index).startsWith("//")))
404     {
405       index += 2;
406
407       int startPos = index;
408
409       // get authority - everything up to path, query or fragment
410
char testChar = '\0';
411
412       while (index < uriSpecLen)
413       {
414         testChar = uriSpec.charAt(index);
415
416         if (testChar == '/' || testChar == '?' || testChar == '#')
417         {
418           break;
419         }
420
421         index++;
422       }
423
424       // if we found authority, parse it out, otherwise we set the
425
// host to empty string
426
if (index > startPos)
427       {
428         initializeAuthority(uriSpec.substring(startPos, index));
429       }
430       else
431       {
432         m_host = "";
433       }
434     }
435
436     initializePath(uriSpec.substring(index));
437
438     // Resolve relative URI to base URI - see RFC 2396 Section 5.2
439
// In some cases, it might make more sense to throw an exception
440
// (when scheme is specified is the string spec and the base URI
441
// is also specified, for example), but we're just following the
442
// RFC specifications
443
if (p_base != null)
444     {
445
446       // check to see if this is the current doc - RFC 2396 5.2 #2
447
// note that this is slightly different from the RFC spec in that
448
// we don't include the check for query string being null
449
// - this handles cases where the urispec is just a query
450
// string or a fragment (e.g. "?y" or "#s") -
451
// see <http://www.ics.uci.edu/~fielding/url/test1.html> which
452
// identified this as a bug in the RFC
453
if (m_path.length() == 0 && m_scheme == null && m_host == null)
454       {
455         m_scheme = p_base.getScheme();
456         m_userinfo = p_base.getUserinfo();
457         m_host = p_base.getHost();
458         m_port = p_base.getPort();
459         m_path = p_base.getPath();
460
461         if (m_queryString == null)
462         {
463           m_queryString = p_base.getQueryString();
464         }
465
466         return;
467       }
468
469       // check for scheme - RFC 2396 5.2 #3
470
// if we found a scheme, it means absolute URI, so we're done
471
if (m_scheme == null)
472       {
473         m_scheme = p_base.getScheme();
474       }
475
476       // check for authority - RFC 2396 5.2 #4
477
// if we found a host, then we've got a network path, so we're done
478
if (m_host == null)
479       {
480         m_userinfo = p_base.getUserinfo();
481         m_host = p_base.getHost();
482         m_port = p_base.getPort();
483       }
484       else
485       {
486         return;
487       }
488
489       // check for absolute path - RFC 2396 5.2 #5
490
if (m_path.length() > 0 && m_path.startsWith("/"))
491       {
492         return;
493       }
494
495       // if we get to this point, we need to resolve relative path
496
// RFC 2396 5.2 #6
497
String JavaDoc path = new String JavaDoc();
498       String JavaDoc basePath = p_base.getPath();
499
500       // 6a - get all but the last segment of the base URI path
501
if (basePath != null)
502       {
503         int lastSlash = basePath.lastIndexOf('/');
504
505         if (lastSlash != -1)
506         {
507           path = basePath.substring(0, lastSlash + 1);
508         }
509       }
510
511       // 6b - append the relative URI path
512
path = path.concat(m_path);
513
514       // 6c - remove all "./" where "." is a complete path segment
515
index = -1;
516
517       while ((index = path.indexOf("/./")) != -1)
518       {
519         path = path.substring(0, index + 1).concat(path.substring(index + 3));
520       }
521
522       // 6d - remove "." if path ends with "." as a complete path segment
523
if (path.endsWith("/."))
524       {
525         path = path.substring(0, path.length() - 1);
526       }
527
528       // 6e - remove all "<segment>/../" where "<segment>" is a complete
529
// path segment not equal to ".."
530
index = -1;
531
532       int segIndex = -1;
533       String JavaDoc tempString = null;
534
535       while ((index = path.indexOf("/../")) > 0)
536       {
537         tempString = path.substring(0, path.indexOf("/../"));
538         segIndex = tempString.lastIndexOf('/');
539
540         if (segIndex != -1)
541         {
542           if (!tempString.substring(segIndex++).equals(".."))
543           {
544             path = path.substring(0, segIndex).concat(path.substring(index
545                     + 4));
546           }
547         }
548       }
549
550       // 6f - remove ending "<segment>/.." where "<segment>" is a
551
// complete path segment
552
if (path.endsWith("/.."))
553       {
554         tempString = path.substring(0, path.length() - 3);
555         segIndex = tempString.lastIndexOf('/');
556
557         if (segIndex != -1)
558         {
559           path = path.substring(0, segIndex + 1);
560         }
561       }
562
563       m_path = path;
564     }
565   }
566
567   /**
568    * Initialize the scheme for this URI from a URI string spec.
569    *
570    * @param p_uriSpec the URI specification (cannot be null)
571    *
572    * @throws MalformedURIException if URI does not have a conformant
573    * scheme
574    */

575   private void initializeScheme(String JavaDoc p_uriSpec) throws MalformedURIException
576   {
577
578     int uriSpecLen = p_uriSpec.length();
579     int index = 0;
580     String JavaDoc scheme = null;
581     char testChar = '\0';
582
583     while (index < uriSpecLen)
584     {
585       testChar = p_uriSpec.charAt(index);
586
587       if (testChar == ':' || testChar == '/' || testChar == '?'
588               || testChar == '#')
589       {
590         break;
591       }
592
593       index++;
594     }
595
596     scheme = p_uriSpec.substring(0, index);
597
598     if (scheme.length() == 0)
599     {
600       throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_SCHEME_INURI, null)); //"No scheme found in URI.");
601
}
602     else
603     {
604       setScheme(scheme);
605     }
606   }
607
608   /**
609    * Initialize the authority (userinfo, host and port) for this
610    * URI from a URI string spec.
611    *
612    * @param p_uriSpec the URI specification (cannot be null)
613    *
614    * @throws MalformedURIException if p_uriSpec violates syntax rules
615    */

616   private void initializeAuthority(String JavaDoc p_uriSpec)
617           throws MalformedURIException
618   {
619
620     int index = 0;
621     int start = 0;
622     int end = p_uriSpec.length();
623     char testChar = '\0';
624     String JavaDoc userinfo = null;
625
626     // userinfo is everything up @
627
if (p_uriSpec.indexOf('@', start) != -1)
628     {
629       while (index < end)
630       {
631         testChar = p_uriSpec.charAt(index);
632
633         if (testChar == '@')
634         {
635           break;
636         }
637
638         index++;
639       }
640
641       userinfo = p_uriSpec.substring(start, index);
642
643       index++;
644     }
645
646     // host is everything up to ':'
647
String JavaDoc host = null;
648
649     start = index;
650
651     while (index < end)
652     {
653       testChar = p_uriSpec.charAt(index);
654
655       if (testChar == ':')
656       {
657         break;
658       }
659
660       index++;
661     }
662
663     host = p_uriSpec.substring(start, index);
664
665     int port = -1;
666
667     if (host.length() > 0)
668     {
669
670       // port
671
if (testChar == ':')
672       {
673         index++;
674
675         start = index;
676
677         while (index < end)
678         {
679           index++;
680         }
681
682         String JavaDoc portStr = p_uriSpec.substring(start, index);
683
684         if (portStr.length() > 0)
685         {
686           for (int i = 0; i < portStr.length(); i++)
687           {
688             if (!isDigit(portStr.charAt(i)))
689             {
690               throw new MalformedURIException(
691                 portStr + " is invalid. Port should only contain digits!");
692             }
693           }
694
695           try
696           {
697             port = Integer.parseInt(portStr);
698           }
699           catch (NumberFormatException JavaDoc nfe)
700           {
701
702             // can't happen
703
}
704         }
705       }
706     }
707
708     setHost(host);
709     setPort(port);
710     setUserinfo(userinfo);
711   }
712
713   /**
714    * Initialize the path for this URI from a URI string spec.
715    *
716    * @param p_uriSpec the URI specification (cannot be null)
717    *
718    * @throws MalformedURIException if p_uriSpec violates syntax rules
719    */

720   private void initializePath(String JavaDoc p_uriSpec) throws MalformedURIException
721   {
722
723     if (p_uriSpec == null)
724     {
725       throw new MalformedURIException(
726         "Cannot initialize path from null string!");
727     }
728
729     int index = 0;
730     int start = 0;
731     int end = p_uriSpec.length();
732     char testChar = '\0';
733
734     // path - everything up to query string or fragment
735
while (index < end)
736     {
737       testChar = p_uriSpec.charAt(index);
738
739       if (testChar == '?' || testChar == '#')
740       {
741         break;
742       }
743
744       // check for valid escape sequence
745
if (testChar == '%')
746       {
747         if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
748                 ||!isHex(p_uriSpec.charAt(index + 2)))
749         {
750           throw new MalformedURIException(
751             XMLMessages.createXMLMessage(XMLErrorResources.ER_PATH_CONTAINS_INVALID_ESCAPE_SEQUENCE, null)); //"Path contains invalid escape sequence!");
752
}
753       }
754       else if (!isReservedCharacter(testChar)
755                &&!isUnreservedCharacter(testChar))
756       {
757         if ('\\' != testChar)
758           throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_PATH_INVALID_CHAR, new Object JavaDoc[]{String.valueOf(testChar)})); //"Path contains invalid character: "
759
//+ testChar);
760
}
761
762       index++;
763     }
764
765     m_path = p_uriSpec.substring(start, index);
766
767     // query - starts with ? and up to fragment or end
768
if (testChar == '?')
769     {
770       index++;
771
772       start = index;
773
774       while (index < end)
775       {
776         testChar = p_uriSpec.charAt(index);
777
778         if (testChar == '#')
779         {
780           break;
781         }
782
783         if (testChar == '%')
784         {
785           if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
786                   ||!isHex(p_uriSpec.charAt(index + 2)))
787           {
788             throw new MalformedURIException(
789               "Query string contains invalid escape sequence!");
790           }
791         }
792         else if (!isReservedCharacter(testChar)
793                  &&!isUnreservedCharacter(testChar))
794         {
795           throw new MalformedURIException(
796             "Query string contains invalid character:" + testChar);
797         }
798
799         index++;
800       }
801
802       m_queryString = p_uriSpec.substring(start, index);
803     }
804
805     // fragment - starts with #
806
if (testChar == '#')
807     {
808       index++;
809
810       start = index;
811
812       while (index < end)
813       {
814         testChar = p_uriSpec.charAt(index);
815
816         if (testChar == '%')
817         {
818           if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
819                   ||!isHex(p_uriSpec.charAt(index + 2)))
820           {
821             throw new MalformedURIException(
822               "Fragment contains invalid escape sequence!");
823           }
824         }
825         else if (!isReservedCharacter(testChar)
826                  &&!isUnreservedCharacter(testChar))
827         {
828           throw new MalformedURIException(
829             "Fragment contains invalid character:" + testChar);
830         }
831
832         index++;
833       }
834
835       m_fragment = p_uriSpec.substring(start, index);
836     }
837   }
838
839   /**
840    * Get the scheme for this URI.
841    *
842    * @return the scheme for this URI
843    */

844   public String JavaDoc getScheme()
845   {
846     return m_scheme;
847   }
848
849   /**
850    * Get the scheme-specific part for this URI (everything following the
851    * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
852    *
853    * @return the scheme-specific part for this URI
854    */

855   public String JavaDoc getSchemeSpecificPart()
856   {
857
858     StringBuffer JavaDoc schemespec = new StringBuffer JavaDoc();
859
860     if (m_userinfo != null || m_host != null || m_port != -1)
861     {
862       schemespec.append("//");
863     }
864
865     if (m_userinfo != null)
866     {
867       schemespec.append(m_userinfo);
868       schemespec.append('@');
869     }
870
871     if (m_host != null)
872     {
873       schemespec.append(m_host);
874     }
875
876     if (m_port != -1)
877     {
878       schemespec.append(':');
879       schemespec.append(m_port);
880     }
881
882     if (m_path != null)
883     {
884       schemespec.append((m_path));
885     }
886
887     if (m_queryString != null)
888     {
889       schemespec.append('?');
890       schemespec.append(m_queryString);
891     }
892
893     if (m_fragment != null)
894     {
895       schemespec.append('#');
896       schemespec.append(m_fragment);
897     }
898
899     return schemespec.toString();
900   }
901
902   /**
903    * Get the userinfo for this URI.
904    *
905    * @return the userinfo for this URI (null if not specified).
906    */

907   public String JavaDoc getUserinfo()
908   {
909     return m_userinfo;
910   }
911
912   /**
913    * Get the host for this URI.
914    *
915    * @return the host for this URI (null if not specified).
916    */

917   public String JavaDoc getHost()
918   {
919     return m_host;
920   }
921
922   /**
923    * Get the port for this URI.
924    *
925    * @return the port for this URI (-1 if not specified).
926    */

927   public int getPort()
928   {
929     return m_port;
930   }
931
932   /**
933    * Get the path for this URI (optionally with the query string and
934    * fragment).
935    *
936    * @param p_includeQueryString if true (and query string is not null),
937    * then a "?" followed by the query string
938    * will be appended
939    * @param p_includeFragment if true (and fragment is not null),
940    * then a "#" followed by the fragment
941    * will be appended
942    *
943    * @return the path for this URI possibly including the query string
944    * and fragment
945    */

946   public String JavaDoc getPath(boolean p_includeQueryString,
947                         boolean p_includeFragment)
948   {
949
950     StringBuffer JavaDoc pathString = new StringBuffer JavaDoc(m_path);
951
952     if (p_includeQueryString && m_queryString != null)
953     {
954       pathString.append('?');
955       pathString.append(m_queryString);
956     }
957
958     if (p_includeFragment && m_fragment != null)
959     {
960       pathString.append('#');
961       pathString.append(m_fragment);
962     }
963
964     return pathString.toString();
965   }
966
967   /**
968    * Get the path for this URI. Note that the value returned is the path
969    * only and does not include the query string or fragment.
970    *
971    * @return the path for this URI.
972    */

973   public String JavaDoc getPath()
974   {
975     return m_path;
976   }
977
978   /**
979    * Get the query string for this URI.
980    *
981    * @return the query string for this URI. Null is returned if there
982    * was no "?" in the URI spec, empty string if there was a
983    * "?" but no query string following it.
984    */

985   public String JavaDoc getQueryString()
986   {
987     return m_queryString;
988   }
989
990   /**
991    * Get the fragment for this URI.
992    *
993    * @return the fragment for this URI. Null is returned if there
994    * was no "#" in the URI spec, empty string if there was a
995    * "#" but no fragment following it.
996    */

997   public String JavaDoc getFragment()
998   {
999     return m_fragment;
1000  }
1001
1002  /**
1003   * Set the scheme for this URI. The scheme is converted to lowercase
1004   * before it is set.
1005   *
1006   * @param p_scheme the scheme for this URI (cannot be null)
1007   *
1008   * @throws MalformedURIException if p_scheme is not a conformant
1009   * scheme name
1010   */

1011  public void setScheme(String JavaDoc p_scheme) throws MalformedURIException
1012  {
1013
1014    if (p_scheme == null)
1015    {
1016      throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_SCHEME_FROM_NULL_STRING, null)); //"Cannot set scheme from null string!");
1017
}
1018
1019    if (!isConformantSchemeName(p_scheme))
1020    {
1021      throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_SCHEME_NOT_CONFORMANT, null)); //"The scheme is not conformant.");
1022
}
1023
1024    m_scheme = p_scheme.toLowerCase();
1025  }
1026
1027  /**
1028   * Set the userinfo for this URI. If a non-null value is passed in and
1029   * the host value is null, then an exception is thrown.
1030   *
1031   * @param p_userinfo the userinfo for this URI
1032   *
1033   * @throws MalformedURIException if p_userinfo contains invalid
1034   * characters
1035   */

1036  public void setUserinfo(String JavaDoc p_userinfo) throws MalformedURIException
1037  {
1038
1039    if (p_userinfo == null)
1040    {
1041      m_userinfo = null;
1042    }
1043    else
1044    {
1045      if (m_host == null)
1046      {
1047        throw new MalformedURIException(
1048          "Userinfo cannot be set when host is null!");
1049      }
1050
1051      // userinfo can contain alphanumerics, mark characters, escaped
1052
// and ';',':','&','=','+','$',','
1053
int index = 0;
1054      int end = p_userinfo.length();
1055      char testChar = '\0';
1056
1057      while (index < end)
1058      {
1059        testChar = p_userinfo.charAt(index);
1060
1061        if (testChar == '%')
1062        {
1063          if (index + 2 >= end ||!isHex(p_userinfo.charAt(index + 1))
1064                  ||!isHex(p_userinfo.charAt(index + 2)))
1065          {
1066            throw new MalformedURIException(
1067              "Userinfo contains invalid escape sequence!");
1068          }
1069        }
1070        else if (!isUnreservedCharacter(testChar)
1071                 && USERINFO_CHARACTERS.indexOf(testChar) == -1)
1072        {
1073          throw new MalformedURIException(
1074            "Userinfo contains invalid character:" + testChar);
1075        }
1076
1077        index++;
1078      }
1079    }
1080
1081    m_userinfo = p_userinfo;
1082  }
1083
1084  /**
1085   * Set the host for this URI. If null is passed in, the userinfo
1086   * field is also set to null and the port is set to -1.
1087   *
1088   * @param p_host the host for this URI
1089   *
1090   * @throws MalformedURIException if p_host is not a valid IP
1091   * address or DNS hostname.
1092   */

1093  public void setHost(String JavaDoc p_host) throws MalformedURIException
1094  {
1095
1096    if (p_host == null || p_host.trim().length() == 0)
1097    {
1098      m_host = p_host;
1099      m_userinfo = null;
1100      m_port = -1;
1101    }
1102    else if (!isWellFormedAddress(p_host))
1103    {
1104      throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_HOST_ADDRESS_NOT_WELLFORMED, null)); //"Host is not a well formed address!");
1105
}
1106
1107    m_host = p_host;
1108  }
1109
1110  /**
1111   * Set the port for this URI. -1 is used to indicate that the port is
1112   * not specified, otherwise valid port numbers are between 0 and 65535.
1113   * If a valid port number is passed in and the host field is null,
1114   * an exception is thrown.
1115   *
1116   * @param p_port the port number for this URI
1117   *
1118   * @throws MalformedURIException if p_port is not -1 and not a
1119   * valid port number
1120   */

1121  public void setPort(int p_port) throws MalformedURIException
1122  {
1123
1124    if (p_port >= 0 && p_port <= 65535)
1125    {
1126      if (m_host == null)
1127      {
1128        throw new MalformedURIException(
1129          XMLMessages.createXMLMessage(XMLErrorResources.ER_PORT_WHEN_HOST_NULL, null)); //"Port cannot be set when host is null!");
1130
}
1131    }
1132    else if (p_port != -1)
1133    {
1134      throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_INVALID_PORT, null)); //"Invalid port number!");
1135
}
1136
1137    m_port = p_port;
1138  }
1139
1140  /**
1141   * Set the path for this URI. If the supplied path is null, then the
1142   * query string and fragment are set to null as well. If the supplied
1143   * path includes a query string and/or fragment, these fields will be
1144   * parsed and set as well. Note that, for URIs following the "generic
1145   * URI" syntax, the path specified should start with a slash.
1146   * For URIs that do not follow the generic URI syntax, this method
1147   * sets the scheme-specific part.
1148   *
1149   * @param p_path the path for this URI (may be null)
1150   *
1151   * @throws MalformedURIException if p_path contains invalid
1152   * characters
1153   */

1154  public void setPath(String JavaDoc p_path) throws MalformedURIException
1155  {
1156
1157    if (p_path == null)
1158    {
1159      m_path = null;
1160      m_queryString = null;
1161      m_fragment = null;
1162    }
1163    else
1164    {
1165      initializePath(p_path);
1166    }
1167  }
1168
1169  /**
1170   * Append to the end of the path of this URI. If the current path does
1171   * not end in a slash and the path to be appended does not begin with
1172   * a slash, a slash will be appended to the current path before the
1173   * new segment is added. Also, if the current path ends in a slash
1174   * and the new segment begins with a slash, the extra slash will be
1175   * removed before the new segment is appended.
1176   *
1177   * @param p_addToPath the new segment to be added to the current path
1178   *
1179   * @throws MalformedURIException if p_addToPath contains syntax
1180   * errors
1181   */

1182  public void appendPath(String JavaDoc p_addToPath) throws MalformedURIException
1183  {
1184
1185    if (p_addToPath == null || p_addToPath.trim().length() == 0)
1186    {
1187      return;
1188    }
1189
1190    if (!isURIString(p_addToPath))
1191    {
1192      throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_PATH_INVALID_CHAR, new Object JavaDoc[]{p_addToPath})); //"Path contains invalid character!");
1193
}
1194
1195    if (m_path == null || m_path.trim().length() == 0)
1196    {
1197      if (p_addToPath.startsWith("/"))
1198      {
1199        m_path = p_addToPath;
1200      }
1201      else
1202      {
1203        m_path = "/" + p_addToPath;
1204      }
1205    }
1206    else if (m_path.endsWith("/"))
1207    {
1208      if (p_addToPath.startsWith("/"))
1209      {
1210        m_path = m_path.concat(p_addToPath.substring(1));
1211      }
1212      else
1213      {
1214        m_path = m_path.concat(p_addToPath);
1215      }
1216    }
1217    else
1218    {
1219      if (p_addToPath.startsWith("/"))
1220      {
1221        m_path = m_path.concat(p_addToPath);
1222      }
1223      else
1224      {
1225        m_path = m_path.concat("/" + p_addToPath);
1226      }
1227    }
1228  }
1229
1230  /**
1231   * Set the query string for this URI. A non-null value is valid only
1232   * if this is an URI conforming to the generic URI syntax and
1233   * the path value is not null.
1234   *
1235   * @param p_queryString the query string for this URI
1236   *
1237   * @throws MalformedURIException if p_queryString is not null and this
1238   * URI does not conform to the generic
1239   * URI syntax or if the path is null
1240   */

1241  public void setQueryString(String JavaDoc p_queryString)
1242          throws MalformedURIException
1243  {
1244
1245    if (p_queryString == null)
1246    {
1247      m_queryString = null;
1248    }
1249    else if (!isGenericURI())
1250    {
1251      throw new MalformedURIException(
1252        "Query string can only be set for a generic URI!");
1253    }
1254    else if (getPath() == null)
1255    {
1256      throw new MalformedURIException(
1257        "Query string cannot be set when path is null!");
1258    }
1259    else if (!isURIString(p_queryString))
1260    {
1261      throw new MalformedURIException(
1262        "Query string contains invalid character!");
1263    }
1264    else
1265    {
1266      m_queryString = p_queryString;
1267    }
1268  }
1269
1270  /**
1271   * Set the fragment for this URI. A non-null value is valid only
1272   * if this is a URI conforming to the generic URI syntax and
1273   * the path value is not null.
1274   *
1275   * @param p_fragment the fragment for this URI
1276   *
1277   * @throws MalformedURIException if p_fragment is not null and this
1278   * URI does not conform to the generic
1279   * URI syntax or if the path is null
1280   */

1281  public void setFragment(String JavaDoc p_fragment) throws MalformedURIException
1282  {
1283
1284    if (p_fragment == null)
1285    {
1286      m_fragment = null;
1287    }
1288    else if (!isGenericURI())
1289    {
1290      throw new MalformedURIException(
1291        XMLMessages.createXMLMessage(XMLErrorResources.ER_FRAG_FOR_GENERIC_URI, null)); //"Fragment can only be set for a generic URI!");
1292
}
1293    else if (getPath() == null)
1294    {
1295      throw new MalformedURIException(
1296        XMLMessages.createXMLMessage(XMLErrorResources.ER_FRAG_WHEN_PATH_NULL, null)); //"Fragment cannot be set when path is null!");
1297
}
1298    else if (!isURIString(p_fragment))
1299    {
1300      throw new MalformedURIException(XMLMessages.createXMLMessage(XMLErrorResources.ER_FRAG_INVALID_CHAR, null)); //"Fragment contains invalid character!");
1301
}
1302    else
1303    {
1304      m_fragment = p_fragment;
1305    }
1306  }
1307
1308  /**
1309   * Determines if the passed-in Object is equivalent to this URI.
1310   *
1311   * @param p_test the Object to test for equality.
1312   *
1313   * @return true if p_test is a URI with all values equal to this
1314   * URI, false otherwise
1315   */

1316  public boolean equals(Object JavaDoc p_test)
1317  {
1318
1319    if (p_test instanceof URI)
1320    {
1321      URI testURI = (URI) p_test;
1322
1323      if (((m_scheme == null && testURI.m_scheme == null) || (m_scheme != null && testURI.m_scheme != null && m_scheme.equals(
1324              testURI.m_scheme))) && ((m_userinfo == null && testURI.m_userinfo == null) || (m_userinfo != null && testURI.m_userinfo != null && m_userinfo.equals(
1325              testURI.m_userinfo))) && ((m_host == null && testURI.m_host == null) || (m_host != null && testURI.m_host != null && m_host.equals(
1326              testURI.m_host))) && m_port == testURI.m_port && ((m_path == null && testURI.m_path == null) || (m_path != null && testURI.m_path != null && m_path.equals(
1327              testURI.m_path))) && ((m_queryString == null && testURI.m_queryString == null) || (m_queryString != null && testURI.m_queryString != null && m_queryString.equals(
1328              testURI.m_queryString))) && ((m_fragment == null && testURI.m_fragment == null) || (m_fragment != null && testURI.m_fragment != null && m_fragment.equals(
1329              testURI.m_fragment))))
1330      {
1331        return true;
1332      }
1333    }
1334
1335    return false;
1336  }
1337
1338  /**
1339   * Get the URI as a string specification. See RFC 2396 Section 5.2.
1340   *
1341   * @return the URI string specification
1342   */

1343  public String JavaDoc toString()
1344  {
1345
1346    StringBuffer JavaDoc uriSpecString = new StringBuffer JavaDoc();
1347
1348    if (m_scheme != null)
1349    {
1350      uriSpecString.append(m_scheme);
1351      uriSpecString.append(':');
1352    }
1353
1354    uriSpecString.append(getSchemeSpecificPart());
1355
1356    return uriSpecString.toString();
1357  }
1358
1359  /**
1360   * Get the indicator as to whether this URI uses the "generic URI"
1361   * syntax.
1362   *
1363   * @return true if this URI uses the "generic URI" syntax, false
1364   * otherwise
1365   */

1366  public boolean isGenericURI()
1367  {
1368
1369    // presence of the host (whether valid or empty) means
1370
// double-slashes which means generic uri
1371
return (m_host != null);
1372  }
1373
1374  /**
1375   * Determine whether a scheme conforms to the rules for a scheme name.
1376   * A scheme is conformant if it starts with an alphanumeric, and
1377   * contains only alphanumerics, '+','-' and '.'.
1378   *
1379   *
1380   * @param p_scheme The sheme name to check
1381   * @return true if the scheme is conformant, false otherwise
1382   */

1383  public static boolean isConformantSchemeName(String JavaDoc p_scheme)
1384  {
1385
1386    if (p_scheme == null || p_scheme.trim().length() == 0)
1387    {
1388      return false;
1389    }
1390
1391    if (!isAlpha(p_scheme.charAt(0)))
1392    {
1393      return false;
1394    }
1395
1396    char testChar;
1397
1398    for (int i = 1; i < p_scheme.length(); i++)
1399    {
1400      testChar = p_scheme.charAt(i);
1401
1402      if (!isAlphanum(testChar) && SCHEME_CHARACTERS.indexOf(testChar) == -1)
1403      {
1404        return false;
1405      }
1406    }
1407
1408    return true;
1409  }
1410
1411  /**
1412   * Determine whether a string is syntactically capable of representing
1413   * a valid IPv4 address or the domain name of a network host. A valid
1414   * IPv4 address consists of four decimal digit groups separated by a
1415   * '.'. A hostname consists of domain labels (each of which must
1416   * begin and end with an alphanumeric but may contain '-') separated
1417   * & by a '.'. See RFC 2396 Section 3.2.2.
1418   *
1419   *
1420   * @param p_address The address string to check
1421   * @return true if the string is a syntactically valid IPv4 address
1422   * or hostname
1423   */

1424  public static boolean isWellFormedAddress(String JavaDoc p_address)
1425  {
1426
1427    if (p_address == null)
1428    {
1429      return false;
1430    }
1431
1432    String JavaDoc address = p_address.trim();
1433    int addrLength = address.length();
1434
1435    if (addrLength == 0 || addrLength > 255)
1436    {
1437      return false;
1438    }
1439
1440    if (address.startsWith(".") || address.startsWith("-"))
1441    {
1442      return false;
1443    }
1444
1445    // rightmost domain label starting with digit indicates IP address
1446
// since top level domain label can only start with an alpha
1447
// see RFC 2396 Section 3.2.2
1448
int index = address.lastIndexOf('.');
1449
1450    if (address.endsWith("."))
1451    {
1452      index = address.substring(0, index).lastIndexOf('.');
1453    }
1454
1455    if (index + 1 < addrLength && isDigit(p_address.charAt(index + 1)))
1456    {
1457      char testChar;
1458      int numDots = 0;
1459
1460      // make sure that 1) we see only digits and dot separators, 2) that
1461
// any dot separator is preceded and followed by a digit and
1462
// 3) that we find 3 dots
1463
for (int i = 0; i < addrLength; i++)
1464      {
1465        testChar = address.charAt(i);
1466
1467        if (testChar == '.')
1468        {
1469          if (!isDigit(address.charAt(i - 1))
1470                  || (i + 1 < addrLength &&!isDigit(address.charAt(i + 1))))
1471          {
1472            return false;
1473          }
1474
1475          numDots++;
1476        }
1477        else if (!isDigit(testChar))
1478        {
1479          return false;
1480        }
1481      }
1482
1483      if (numDots != 3)
1484      {
1485        return false;
1486      }
1487    }
1488    else
1489    {
1490
1491      // domain labels can contain alphanumerics and '-"
1492
// but must start and end with an alphanumeric
1493
char testChar;
1494
1495      for (int i = 0; i < addrLength; i++)
1496      {
1497        testChar = address.charAt(i);
1498
1499        if (testChar == '.')
1500        {
1501          if (!isAlphanum(address.charAt(i - 1)))
1502          {
1503            return false;
1504          }
1505
1506          if (i + 1 < addrLength &&!isAlphanum(address.charAt(i + 1)))
1507          {
1508            return false;
1509          }
1510        }
1511        else if (!isAlphanum(testChar) && testChar != '-')
1512        {
1513          return false;
1514        }
1515      }
1516    }
1517
1518    return true;
1519  }
1520
1521  /**
1522   * Determine whether a char is a digit.
1523   *
1524   *
1525   * @param p_char the character to check
1526   * @return true if the char is betweeen '0' and '9', false otherwise
1527   */

1528  private static boolean isDigit(char p_char)
1529  {
1530    return p_char >= '0' && p_char <= '9';
1531  }
1532
1533  /**
1534   * Determine whether a character is a hexadecimal character.
1535   *
1536   *
1537   * @param p_char the character to check
1538   * @return true if the char is betweeen '0' and '9', 'a' and 'f'
1539   * or 'A' and 'F', false otherwise
1540   */

1541  private static boolean isHex(char p_char)
1542  {
1543    return (isDigit(p_char) || (p_char >= 'a' && p_char <= 'f')
1544            || (p_char >= 'A' && p_char <= 'F'));
1545  }
1546
1547  /**
1548   * Determine whether a char is an alphabetic character: a-z or A-Z
1549   *
1550   *
1551   * @param p_char the character to check
1552   * @return true if the char is alphabetic, false otherwise
1553   */

1554  private static boolean isAlpha(char p_char)
1555  {
1556    return ((p_char >= 'a' && p_char <= 'z')
1557            || (p_char >= 'A' && p_char <= 'Z'));
1558  }
1559
1560  /**
1561   * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
1562   *
1563   *
1564   * @param p_char the character to check
1565   * @return true if the char is alphanumeric, false otherwise
1566   */

1567  private static boolean isAlphanum(char p_char)
1568  {
1569    return (isAlpha(p_char) || isDigit(p_char));
1570  }
1571
1572  /**
1573   * Determine whether a character is a reserved character:
1574   * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
1575   *
1576   *
1577   * @param p_char the character to check
1578   * @return true if the string contains any reserved characters
1579   */

1580  private static boolean isReservedCharacter(char p_char)
1581  {
1582    return RESERVED_CHARACTERS.indexOf(p_char) != -1;
1583  }
1584
1585  /**
1586   * Determine whether a char is an unreserved character.
1587   *
1588   *
1589   * @param p_char the character to check
1590   * @return true if the char is unreserved, false otherwise
1591   */

1592  private static boolean isUnreservedCharacter(char p_char)
1593  {
1594    return (isAlphanum(p_char) || MARK_CHARACTERS.indexOf(p_char) != -1);
1595  }
1596
1597  /**
1598   * Determine whether a given string contains only URI characters (also
1599   * called "uric" in RFC 2396). uric consist of all reserved
1600   * characters, unreserved characters and escaped characters.
1601   *
1602   *
1603   * @param p_uric URI string
1604   * @return true if the string is comprised of uric, false otherwise
1605   */

1606  private static boolean isURIString(String JavaDoc p_uric)
1607  {
1608
1609    if (p_uric == null)
1610    {
1611      return false;
1612    }
1613
1614    int end = p_uric.length();
1615    char testChar = '\0';
1616
1617    for (int i = 0; i < end; i++)
1618    {
1619      testChar = p_uric.charAt(i);
1620
1621      if (testChar == '%')
1622      {
1623        if (i + 2 >= end ||!isHex(p_uric.charAt(i + 1))
1624                ||!isHex(p_uric.charAt(i + 2)))
1625        {
1626          return false;
1627        }
1628        else
1629        {
1630          i += 2;
1631
1632          continue;
1633        }
1634      }
1635
1636      if (isReservedCharacter(testChar) || isUnreservedCharacter(testChar))
1637      {
1638        continue;
1639      }
1640      else
1641      {
1642        return false;
1643      }
1644    }
1645
1646    return true;
1647  }
1648}
1649
Popular Tags