KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xml > internal > serializer > utils > URI


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 /*
17  * $Id: URI.java,v 1.1.4.1 2005/09/08 11:03:20 suresh_emailid Exp $
18  */

19 package com.sun.org.apache.xml.internal.serializer.utils;
20
21 import java.io.IOException JavaDoc;
22 import java.io.Serializable JavaDoc;
23
24
25 /**
26  * A class to represent a Uniform Resource Identifier (URI). This class
27  * is designed to handle the parsing of URIs and provide access to
28  * the various components (scheme, host, port, userinfo, path, query
29  * string and fragment) that may constitute a URI.
30  * <p>
31  * Parsing of a URI specification is done according to the URI
32  * syntax described in RFC 2396
33  * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists
34  * of a scheme, followed by a colon (':'), followed by a scheme-specific
35  * part. For URIs that follow the "generic URI" syntax, the scheme-
36  * specific part begins with two slashes ("//") and may be followed
37  * by an authority segment (comprised of user information, host, and
38  * port), path segment, query segment and fragment. Note that RFC 2396
39  * no longer specifies the use of the parameters segment and excludes
40  * the "user:password" syntax as part of the authority segment. If
41  * "user:password" appears in a URI, the entire user/password string
42  * is stored as userinfo.
43  * <p>
44  * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
45  * the entire scheme-specific part is treated as the "path" portion
46  * of the URI.
47  * <p>
48  * Note that, unlike the java.net.URL class, this class does not provide
49  * any built-in network access functionality nor does it provide any
50  * scheme-specific functionality (for example, it does not know a
51  * default port for a specific scheme). Rather, it only knows the
52  * grammar and basic set of operations that can be applied to a URI.
53  *
54  * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
55  * It exists to cut the serializers dependancy on that package.
56  *
57  * A minor change from the original is that this class no longer implements
58  * Serializable, and the serialVersionUID magic field is dropped, and
59  * the class is no longer "public".
60  *
61  * @xsl.usage internal
62  */

63 final class URI
64 {
65   /**
66    * MalformedURIExceptions are thrown in the process of building a URI
67    * or setting fields on a URI when an operation would result in an
68    * invalid URI specification.
69    *
70    */

71   public static class MalformedURIException extends IOException JavaDoc
72   {
73
74     /**
75      * Constructs a <code>MalformedURIException</code> with no specified
76      * detail message.
77      */

78     public MalformedURIException()
79     {
80       super();
81     }
82
83     /**
84      * Constructs a <code>MalformedURIException</code> with the
85      * specified detail message.
86      *
87      * @param p_msg the detail message.
88      */

89     public MalformedURIException(String JavaDoc p_msg)
90     {
91       super(p_msg);
92     }
93   }
94
95   /** reserved characters */
96   private static final String JavaDoc RESERVED_CHARACTERS = ";/?:@&=+$,";
97
98   /**
99    * URI punctuation mark characters - these, combined with
100    * alphanumerics, constitute the "unreserved" characters
101    */

102   private static final String JavaDoc MARK_CHARACTERS = "-_.!~*'() ";
103
104   /** scheme can be composed of alphanumerics and these characters */
105   private static final String JavaDoc SCHEME_CHARACTERS = "+-.";
106
107   /**
108    * userinfo can be composed of unreserved, escaped and these
109    * characters
110    */

111   private static final String JavaDoc USERINFO_CHARACTERS = ";:&=+$,";
112
113   /** Stores the scheme (usually the protocol) for this URI.
114    * @serial */

115   private String JavaDoc m_scheme = null;
116
117   /** If specified, stores the userinfo for this URI; otherwise null.
118    * @serial */

119   private String JavaDoc m_userinfo = null;
120
121   /** If specified, stores the host for this URI; otherwise null.
122    * @serial */

123   private String JavaDoc m_host = null;
124
125   /** If specified, stores the port for this URI; otherwise -1.
126    * @serial */

127   private int m_port = -1;
128
129   /** If specified, stores the path for this URI; otherwise null.
130    * @serial */

131   private String JavaDoc m_path = null;
132
133   /**
134    * If specified, stores the query string for this URI; otherwise
135    * null.
136    * @serial
137    */

138   private String JavaDoc m_queryString = null;
139
140   /** If specified, stores the fragment for this URI; otherwise null.
141    * @serial */

142   private String JavaDoc m_fragment = null;
143
144   /** Indicate whether in DEBUG mode */
145   private static boolean DEBUG = false;
146
147   /**
148    * Construct a new and uninitialized URI.
149    */

150   public URI(){}
151
152   /**
153    * Construct a new URI from another URI. All fields for this URI are
154    * set equal to the fields of the URI passed in.
155    *
156    * @param p_other the URI to copy (cannot be null)
157    */

158   public URI(URI p_other)
159   {
160     initialize(p_other);
161   }
162
163   /**
164    * Construct a new URI from a URI specification string. If the
165    * specification follows the "generic URI" syntax, (two slashes
166    * following the first colon), the specification will be parsed
167    * accordingly - setting the scheme, userinfo, host,port, path, query
168    * string and fragment fields as necessary. If the specification does
169    * not follow the "generic URI" syntax, the specification is parsed
170    * into a scheme and scheme-specific part (stored as the path) only.
171    *
172    * @param p_uriSpec the URI specification string (cannot be null or
173    * empty)
174    *
175    * @throws MalformedURIException if p_uriSpec violates any syntax
176    * rules
177    */

178   public URI(String JavaDoc p_uriSpec) throws MalformedURIException
179   {
180     this((URI) null, p_uriSpec);
181   }
182
183   /**
184    * Construct a new URI from a base URI and a URI specification string.
185    * The URI specification string may be a relative URI.
186    *
187    * @param p_base the base URI (cannot be null if p_uriSpec is null or
188    * empty)
189    * @param p_uriSpec the URI specification string (cannot be null or
190    * empty if p_base is null)
191    *
192    * @throws MalformedURIException if p_uriSpec violates any syntax
193    * rules
194    */

195   public URI(URI p_base, String JavaDoc p_uriSpec) throws MalformedURIException
196   {
197     initialize(p_base, p_uriSpec);
198   }
199
200   /**
201    * Construct a new URI that does not follow the generic URI syntax.
202    * Only the scheme and scheme-specific part (stored as the path) are
203    * initialized.
204    *
205    * @param p_scheme the URI scheme (cannot be null or empty)
206    * @param p_schemeSpecificPart the scheme-specific part (cannot be
207    * null or empty)
208    *
209    * @throws MalformedURIException if p_scheme violates any
210    * syntax rules
211    */

212   public URI(String JavaDoc p_scheme, String JavaDoc p_schemeSpecificPart)
213           throws MalformedURIException
214   {
215
216     if (p_scheme == null || p_scheme.trim().length() == 0)
217     {
218       throw new MalformedURIException(
219         "Cannot construct URI with null/empty scheme!");
220     }
221
222     if (p_schemeSpecificPart == null
223             || p_schemeSpecificPart.trim().length() == 0)
224     {
225       throw new MalformedURIException(
226         "Cannot construct URI with null/empty scheme-specific part!");
227     }
228
229     setScheme(p_scheme);
230     setPath(p_schemeSpecificPart);
231   }
232
233   /**
234    * Construct a new URI that follows the generic URI syntax from its
235    * component parts. Each component is validated for syntax and some
236    * basic semantic checks are performed as well. See the individual
237    * setter methods for specifics.
238    *
239    * @param p_scheme the URI scheme (cannot be null or empty)
240    * @param p_host the hostname or IPv4 address for the URI
241    * @param p_path the URI path - if the path contains '?' or '#',
242    * then the query string and/or fragment will be
243    * set from the path; however, if the query and
244    * fragment are specified both in the path and as
245    * separate parameters, an exception is thrown
246    * @param p_queryString the URI query string (cannot be specified
247    * if path is null)
248    * @param p_fragment the URI fragment (cannot be specified if path
249    * is null)
250    *
251    * @throws MalformedURIException if any of the parameters violates
252    * syntax rules or semantic rules
253    */

254   public URI(String JavaDoc p_scheme, String JavaDoc p_host, String JavaDoc p_path, String JavaDoc p_queryString, String JavaDoc p_fragment)
255           throws MalformedURIException
256   {
257     this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment);
258   }
259
260   /**
261    * Construct a new URI that follows the generic URI syntax from its
262    * component parts. Each component is validated for syntax and some
263    * basic semantic checks are performed as well. See the individual
264    * setter methods for specifics.
265    *
266    * @param p_scheme the URI scheme (cannot be null or empty)
267    * @param p_userinfo the URI userinfo (cannot be specified if host
268    * is null)
269    * @param p_host the hostname or IPv4 address for the URI
270    * @param p_port the URI port (may be -1 for "unspecified"; cannot
271    * be specified if host is null)
272    * @param p_path the URI path - if the path contains '?' or '#',
273    * then the query string and/or fragment will be
274    * set from the path; however, if the query and
275    * fragment are specified both in the path and as
276    * separate parameters, an exception is thrown
277    * @param p_queryString the URI query string (cannot be specified
278    * if path is null)
279    * @param p_fragment the URI fragment (cannot be specified if path
280    * is null)
281    *
282    * @throws MalformedURIException if any of the parameters violates
283    * syntax rules or semantic rules
284    */

285   public URI(String JavaDoc p_scheme, String JavaDoc p_userinfo, String JavaDoc p_host, int p_port, String JavaDoc p_path, String JavaDoc p_queryString, String JavaDoc p_fragment)
286           throws MalformedURIException
287   {
288
289     if (p_scheme == null || p_scheme.trim().length() == 0)
290     {
291       throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_REQUIRED, null)); //"Scheme is required!");
292
}
293
294     if (p_host == null)
295     {
296       if (p_userinfo != null)
297       {
298         throw new MalformedURIException(
299           Utils.messages.createMessage(MsgKey.ER_NO_USERINFO_IF_NO_HOST, null)); //"Userinfo may not be specified if host is not specified!");
300
}
301
302       if (p_port != -1)
303       {
304         throw new MalformedURIException(
305           Utils.messages.createMessage(MsgKey.ER_NO_PORT_IF_NO_HOST, null)); //"Port may not be specified if host is not specified!");
306
}
307     }
308
309     if (p_path != null)
310     {
311       if (p_path.indexOf('?') != -1 && p_queryString != null)
312       {
313         throw new MalformedURIException(
314           Utils.messages.createMessage(MsgKey.ER_NO_QUERY_STRING_IN_PATH, null)); //"Query string cannot be specified in path and query string!");
315
}
316
317       if (p_path.indexOf('#') != -1 && p_fragment != null)
318       {
319         throw new MalformedURIException(
320           Utils.messages.createMessage(MsgKey.ER_NO_FRAGMENT_STRING_IN_PATH, null)); //"Fragment cannot be specified in both the path and fragment!");
321
}
322     }
323
324     setScheme(p_scheme);
325     setHost(p_host);
326     setPort(p_port);
327     setUserinfo(p_userinfo);
328     setPath(p_path);
329     setQueryString(p_queryString);
330     setFragment(p_fragment);
331   }
332
333   /**
334    * Initialize all fields of this URI from another URI.
335    *
336    * @param p_other the URI to copy (cannot be null)
337    */

338   private void initialize(URI p_other)
339   {
340
341     m_scheme = p_other.getScheme();
342     m_userinfo = p_other.getUserinfo();
343     m_host = p_other.getHost();
344     m_port = p_other.getPort();
345     m_path = p_other.getPath();
346     m_queryString = p_other.getQueryString();
347     m_fragment = p_other.getFragment();
348   }
349
350   /**
351    * Initializes this URI from a base URI and a URI specification string.
352    * See RFC 2396 Section 4 and Appendix B for specifications on parsing
353    * the URI and Section 5 for specifications on resolving relative URIs
354    * and relative paths.
355    *
356    * @param p_base the base URI (may be null if p_uriSpec is an absolute
357    * URI)
358    * @param p_uriSpec the URI spec string which may be an absolute or
359    * relative URI (can only be null/empty if p_base
360    * is not null)
361    *
362    * @throws MalformedURIException if p_base is null and p_uriSpec
363    * is not an absolute URI or if
364    * p_uriSpec violates syntax rules
365    */

366   private void initialize(URI p_base, String JavaDoc p_uriSpec)
367           throws MalformedURIException
368   {
369
370     if (p_base == null
371             && (p_uriSpec == null || p_uriSpec.trim().length() == 0))
372     {
373       throw new MalformedURIException(
374         Utils.messages.createMessage(MsgKey.ER_CANNOT_INIT_URI_EMPTY_PARMS, null)); //"Cannot initialize URI with empty parameters.");
375
}
376
377     // just make a copy of the base if spec is empty
378
if (p_uriSpec == null || p_uriSpec.trim().length() == 0)
379     {
380       initialize(p_base);
381
382       return;
383     }
384
385     String JavaDoc uriSpec = p_uriSpec.trim();
386     int uriSpecLen = uriSpec.length();
387     int index = 0;
388
389     // check for scheme
390
int colonIndex = uriSpec.indexOf(':');
391     if (colonIndex < 0)
392     {
393       if (p_base == null)
394       {
395         throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_IN_URI, new Object JavaDoc[]{uriSpec})); //"No scheme found in URI: "+uriSpec);
396
}
397     }
398     else
399     {
400       initializeScheme(uriSpec);
401       uriSpec = uriSpec.substring(colonIndex+1);
402       uriSpecLen = uriSpec.length();
403     }
404
405     // two slashes means generic URI syntax, so we get the authority
406
if (((index + 1) < uriSpecLen)
407             && (uriSpec.substring(index).startsWith("//")))
408     {
409       index += 2;
410
411       int startPos = index;
412
413       // get authority - everything up to path, query or fragment
414
char testChar = '\0';
415
416       while (index < uriSpecLen)
417       {
418         testChar = uriSpec.charAt(index);
419
420         if (testChar == '/' || testChar == '?' || testChar == '#')
421         {
422           break;
423         }
424
425         index++;
426       }
427
428       // if we found authority, parse it out, otherwise we set the
429
// host to empty string
430
if (index > startPos)
431       {
432         initializeAuthority(uriSpec.substring(startPos, index));
433       }
434       else
435       {
436         m_host = "";
437       }
438     }
439
440     initializePath(uriSpec.substring(index));
441
442     // Resolve relative URI to base URI - see RFC 2396 Section 5.2
443
// In some cases, it might make more sense to throw an exception
444
// (when scheme is specified is the string spec and the base URI
445
// is also specified, for example), but we're just following the
446
// RFC specifications
447
if (p_base != null)
448     {
449
450       // check to see if this is the current doc - RFC 2396 5.2 #2
451
// note that this is slightly different from the RFC spec in that
452
// we don't include the check for query string being null
453
// - this handles cases where the urispec is just a query
454
// string or a fragment (e.g. "?y" or "#s") -
455
// see <http://www.ics.uci.edu/~fielding/url/test1.html> which
456
// identified this as a bug in the RFC
457
if (m_path.length() == 0 && m_scheme == null && m_host == null)
458       {
459         m_scheme = p_base.getScheme();
460         m_userinfo = p_base.getUserinfo();
461         m_host = p_base.getHost();
462         m_port = p_base.getPort();
463         m_path = p_base.getPath();
464
465         if (m_queryString == null)
466         {
467           m_queryString = p_base.getQueryString();
468         }
469
470         return;
471       }
472
473       // check for scheme - RFC 2396 5.2 #3
474
// if we found a scheme, it means absolute URI, so we're done
475
if (m_scheme == null)
476       {
477         m_scheme = p_base.getScheme();
478       }
479
480       // check for authority - RFC 2396 5.2 #4
481
// if we found a host, then we've got a network path, so we're done
482
if (m_host == null)
483       {
484         m_userinfo = p_base.getUserinfo();
485         m_host = p_base.getHost();
486         m_port = p_base.getPort();
487       }
488       else
489       {
490         return;
491       }
492
493       // check for absolute path - RFC 2396 5.2 #5
494
if (m_path.length() > 0 && m_path.startsWith("/"))
495       {
496         return;
497       }
498
499       // if we get to this point, we need to resolve relative path
500
// RFC 2396 5.2 #6
501
String JavaDoc path = new String JavaDoc();
502       String JavaDoc basePath = p_base.getPath();
503
504       // 6a - get all but the last segment of the base URI path
505
if (basePath != null)
506       {
507         int lastSlash = basePath.lastIndexOf('/');
508
509         if (lastSlash != -1)
510         {
511           path = basePath.substring(0, lastSlash + 1);
512         }
513       }
514
515       // 6b - append the relative URI path
516
path = path.concat(m_path);
517
518       // 6c - remove all "./" where "." is a complete path segment
519
index = -1;
520
521       while ((index = path.indexOf("/./")) != -1)
522       {
523         path = path.substring(0, index + 1).concat(path.substring(index + 3));
524       }
525
526       // 6d - remove "." if path ends with "." as a complete path segment
527
if (path.endsWith("/."))
528       {
529         path = path.substring(0, path.length() - 1);
530       }
531
532       // 6e - remove all "<segment>/../" where "<segment>" is a complete
533
// path segment not equal to ".."
534
index = -1;
535
536       int segIndex = -1;
537       String JavaDoc tempString = null;
538
539       while ((index = path.indexOf("/../")) > 0)
540       {
541         tempString = path.substring(0, path.indexOf("/../"));
542         segIndex = tempString.lastIndexOf('/');
543
544         if (segIndex != -1)
545         {
546           if (!tempString.substring(segIndex++).equals(".."))
547           {
548             path = path.substring(0, segIndex).concat(path.substring(index
549                     + 4));
550           }
551         }
552       }
553
554       // 6f - remove ending "<segment>/.." where "<segment>" is a
555
// complete path segment
556
if (path.endsWith("/.."))
557       {
558         tempString = path.substring(0, path.length() - 3);
559         segIndex = tempString.lastIndexOf('/');
560
561         if (segIndex != -1)
562         {
563           path = path.substring(0, segIndex + 1);
564         }
565       }
566
567       m_path = path;
568     }
569   }
570
571   /**
572    * Initialize the scheme for this URI from a URI string spec.
573    *
574    * @param p_uriSpec the URI specification (cannot be null)
575    *
576    * @throws MalformedURIException if URI does not have a conformant
577    * scheme
578    */

579   private void initializeScheme(String JavaDoc p_uriSpec) throws MalformedURIException
580   {
581
582     int uriSpecLen = p_uriSpec.length();
583     int index = 0;
584     String JavaDoc scheme = null;
585     char testChar = '\0';
586
587     while (index < uriSpecLen)
588     {
589       testChar = p_uriSpec.charAt(index);
590
591       if (testChar == ':' || testChar == '/' || testChar == '?'
592               || testChar == '#')
593       {
594         break;
595       }
596
597       index++;
598     }
599
600     scheme = p_uriSpec.substring(0, index);
601
602     if (scheme.length() == 0)
603     {
604       throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_INURI, null)); //"No scheme found in URI.");
605
}
606     else
607     {
608       setScheme(scheme);
609     }
610   }
611
612   /**
613    * Initialize the authority (userinfo, host and port) for this
614    * URI from a URI string spec.
615    *
616    * @param p_uriSpec the URI specification (cannot be null)
617    *
618    * @throws MalformedURIException if p_uriSpec violates syntax rules
619    */

620   private void initializeAuthority(String JavaDoc p_uriSpec)
621           throws MalformedURIException
622   {
623
624     int index = 0;
625     int start = 0;
626     int end = p_uriSpec.length();
627     char testChar = '\0';
628     String JavaDoc userinfo = null;
629
630     // userinfo is everything up @
631
if (p_uriSpec.indexOf('@', start) != -1)
632     {
633       while (index < end)
634       {
635         testChar = p_uriSpec.charAt(index);
636
637         if (testChar == '@')
638         {
639           break;
640         }
641
642         index++;
643       }
644
645       userinfo = p_uriSpec.substring(start, index);
646
647       index++;
648     }
649
650     // host is everything up to ':'
651
String JavaDoc host = null;
652
653     start = index;
654
655     while (index < end)
656     {
657       testChar = p_uriSpec.charAt(index);
658
659       if (testChar == ':')
660       {
661         break;
662       }
663
664       index++;
665     }
666
667     host = p_uriSpec.substring(start, index);
668
669     int port = -1;
670
671     if (host.length() > 0)
672     {
673
674       // port
675
if (testChar == ':')
676       {
677         index++;
678
679         start = index;
680
681         while (index < end)
682         {
683           index++;
684         }
685
686         String JavaDoc portStr = p_uriSpec.substring(start, index);
687
688         if (portStr.length() > 0)
689         {
690           for (int i = 0; i < portStr.length(); i++)
691           {
692             if (!isDigit(portStr.charAt(i)))
693             {
694               throw new MalformedURIException(
695                 portStr + " is invalid. Port should only contain digits!");
696             }
697           }
698
699           try
700           {
701             port = Integer.parseInt(portStr);
702           }
703           catch (NumberFormatException JavaDoc nfe)
704           {
705
706             // can't happen
707
}
708         }
709       }
710     }
711
712     setHost(host);
713     setPort(port);
714     setUserinfo(userinfo);
715   }
716
717   /**
718    * Initialize the path for this URI from a URI string spec.
719    *
720    * @param p_uriSpec the URI specification (cannot be null)
721    *
722    * @throws MalformedURIException if p_uriSpec violates syntax rules
723    */

724   private void initializePath(String JavaDoc p_uriSpec) throws MalformedURIException
725   {
726
727     if (p_uriSpec == null)
728     {
729       throw new MalformedURIException(
730         "Cannot initialize path from null string!");
731     }
732
733     int index = 0;
734     int start = 0;
735     int end = p_uriSpec.length();
736     char testChar = '\0';
737
738     // path - everything up to query string or fragment
739
while (index < end)
740     {
741       testChar = p_uriSpec.charAt(index);
742
743       if (testChar == '?' || testChar == '#')
744       {
745         break;
746       }
747
748       // check for valid escape sequence
749
if (testChar == '%')
750       {
751         if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
752                 ||!isHex(p_uriSpec.charAt(index + 2)))
753         {
754           throw new MalformedURIException(
755             Utils.messages.createMessage(MsgKey.ER_PATH_CONTAINS_INVALID_ESCAPE_SEQUENCE, null)); //"Path contains invalid escape sequence!");
756
}
757       }
758       else if (!isReservedCharacter(testChar)
759                &&!isUnreservedCharacter(testChar))
760       {
761         if ('\\' != testChar)
762           throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object JavaDoc[]{String.valueOf(testChar)})); //"Path contains invalid character: "
763
//+ testChar);
764
}
765
766       index++;
767     }
768
769     m_path = p_uriSpec.substring(start, index);
770
771     // query - starts with ? and up to fragment or end
772
if (testChar == '?')
773     {
774       index++;
775
776       start = index;
777
778       while (index < end)
779       {
780         testChar = p_uriSpec.charAt(index);
781
782         if (testChar == '#')
783         {
784           break;
785         }
786
787         if (testChar == '%')
788         {
789           if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
790                   ||!isHex(p_uriSpec.charAt(index + 2)))
791           {
792             throw new MalformedURIException(
793               "Query string contains invalid escape sequence!");
794           }
795         }
796         else if (!isReservedCharacter(testChar)
797                  &&!isUnreservedCharacter(testChar))
798         {
799           throw new MalformedURIException(
800             "Query string contains invalid character:" + testChar);
801         }
802
803         index++;
804       }
805
806       m_queryString = p_uriSpec.substring(start, index);
807     }
808
809     // fragment - starts with #
810
if (testChar == '#')
811     {
812       index++;
813
814       start = index;
815
816       while (index < end)
817       {
818         testChar = p_uriSpec.charAt(index);
819
820         if (testChar == '%')
821         {
822           if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
823                   ||!isHex(p_uriSpec.charAt(index + 2)))
824           {
825             throw new MalformedURIException(
826               "Fragment contains invalid escape sequence!");
827           }
828         }
829         else if (!isReservedCharacter(testChar)
830                  &&!isUnreservedCharacter(testChar))
831         {
832           throw new MalformedURIException(
833             "Fragment contains invalid character:" + testChar);
834         }
835
836         index++;
837       }
838
839       m_fragment = p_uriSpec.substring(start, index);
840     }
841   }
842
843   /**
844    * Get the scheme for this URI.
845    *
846    * @return the scheme for this URI
847    */

848   public String JavaDoc getScheme()
849   {
850     return m_scheme;
851   }
852
853   /**
854    * Get the scheme-specific part for this URI (everything following the
855    * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
856    *
857    * @return the scheme-specific part for this URI
858    */

859   public String JavaDoc getSchemeSpecificPart()
860   {
861
862     StringBuffer JavaDoc schemespec = new StringBuffer JavaDoc();
863
864     if (m_userinfo != null || m_host != null || m_port != -1)
865     {
866       schemespec.append("//");
867     }
868
869     if (m_userinfo != null)
870     {
871       schemespec.append(m_userinfo);
872       schemespec.append('@');
873     }
874
875     if (m_host != null)
876     {
877       schemespec.append(m_host);
878     }
879
880     if (m_port != -1)
881     {
882       schemespec.append(':');
883       schemespec.append(m_port);
884     }
885
886     if (m_path != null)
887     {
888       schemespec.append((m_path));
889     }
890
891     if (m_queryString != null)
892     {
893       schemespec.append('?');
894       schemespec.append(m_queryString);
895     }
896
897     if (m_fragment != null)
898     {
899       schemespec.append('#');
900       schemespec.append(m_fragment);
901     }
902
903     return schemespec.toString();
904   }
905
906   /**
907    * Get the userinfo for this URI.
908    *
909    * @return the userinfo for this URI (null if not specified).
910    */

911   public String JavaDoc getUserinfo()
912   {
913     return m_userinfo;
914   }
915
916   /**
917    * Get the host for this URI.
918    *
919    * @return the host for this URI (null if not specified).
920    */

921   public String JavaDoc getHost()
922   {
923     return m_host;
924   }
925
926   /**
927    * Get the port for this URI.
928    *
929    * @return the port for this URI (-1 if not specified).
930    */

931   public int getPort()
932   {
933     return m_port;
934   }
935
936   /**
937    * Get the path for this URI (optionally with the query string and
938    * fragment).
939    *
940    * @param p_includeQueryString if true (and query string is not null),
941    * then a "?" followed by the query string
942    * will be appended
943    * @param p_includeFragment if true (and fragment is not null),
944    * then a "#" followed by the fragment
945    * will be appended
946    *
947    * @return the path for this URI possibly including the query string
948    * and fragment
949    */

950   public String JavaDoc getPath(boolean p_includeQueryString,
951                         boolean p_includeFragment)
952   {
953
954     StringBuffer JavaDoc pathString = new StringBuffer JavaDoc(m_path);
955
956     if (p_includeQueryString && m_queryString != null)
957     {
958       pathString.append('?');
959       pathString.append(m_queryString);
960     }
961
962     if (p_includeFragment && m_fragment != null)
963     {
964       pathString.append('#');
965       pathString.append(m_fragment);
966     }
967
968     return pathString.toString();
969   }
970
971   /**
972    * Get the path for this URI. Note that the value returned is the path
973    * only and does not include the query string or fragment.
974    *
975    * @return the path for this URI.
976    */

977   public String JavaDoc getPath()
978   {
979     return m_path;
980   }
981
982   /**
983    * Get the query string for this URI.
984    *
985    * @return the query string for this URI. Null is returned if there
986    * was no "?" in the URI spec, empty string if there was a
987    * "?" but no query string following it.
988    */

989   public String JavaDoc getQueryString()
990   {
991     return m_queryString;
992   }
993
994   /**
995    * Get the fragment for this URI.
996    *
997    * @return the fragment for this URI. Null is returned if there
998    * was no "#" in the URI spec, empty string if there was a
999    * "#" but no fragment following it.
1000   */

1001  public String JavaDoc getFragment()
1002  {
1003    return m_fragment;
1004  }
1005
1006  /**
1007   * Set the scheme for this URI. The scheme is converted to lowercase
1008   * before it is set.
1009   *
1010   * @param p_scheme the scheme for this URI (cannot be null)
1011   *
1012   * @throws MalformedURIException if p_scheme is not a conformant
1013   * scheme name
1014   */

1015  public void setScheme(String JavaDoc p_scheme) throws MalformedURIException
1016  {
1017
1018    if (p_scheme == null)
1019    {
1020      throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_FROM_NULL_STRING, null)); //"Cannot set scheme from null string!");
1021
}
1022
1023    if (!isConformantSchemeName(p_scheme))
1024    {
1025      throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_NOT_CONFORMANT, null)); //"The scheme is not conformant.");
1026
}
1027
1028    m_scheme = p_scheme.toLowerCase();
1029  }
1030
1031  /**
1032   * Set the userinfo for this URI. If a non-null value is passed in and
1033   * the host value is null, then an exception is thrown.
1034   *
1035   * @param p_userinfo the userinfo for this URI
1036   *
1037   * @throws MalformedURIException if p_userinfo contains invalid
1038   * characters
1039   */

1040  public void setUserinfo(String JavaDoc p_userinfo) throws MalformedURIException
1041  {
1042
1043    if (p_userinfo == null)
1044    {
1045      m_userinfo = null;
1046    }
1047    else
1048    {
1049      if (m_host == null)
1050      {
1051        throw new MalformedURIException(
1052          "Userinfo cannot be set when host is null!");
1053      }
1054
1055      // userinfo can contain alphanumerics, mark characters, escaped
1056
// and ';',':','&','=','+','$',','
1057
int index = 0;
1058      int end = p_userinfo.length();
1059      char testChar = '\0';
1060
1061      while (index < end)
1062      {
1063        testChar = p_userinfo.charAt(index);
1064
1065        if (testChar == '%')
1066        {
1067          if (index + 2 >= end ||!isHex(p_userinfo.charAt(index + 1))
1068                  ||!isHex(p_userinfo.charAt(index + 2)))
1069          {
1070            throw new MalformedURIException(
1071              "Userinfo contains invalid escape sequence!");
1072          }
1073        }
1074        else if (!isUnreservedCharacter(testChar)
1075                 && USERINFO_CHARACTERS.indexOf(testChar) == -1)
1076        {
1077          throw new MalformedURIException(
1078            "Userinfo contains invalid character:" + testChar);
1079        }
1080
1081        index++;
1082      }
1083    }
1084
1085    m_userinfo = p_userinfo;
1086  }
1087
1088  /**
1089   * Set the host for this URI. If null is passed in, the userinfo
1090   * field is also set to null and the port is set to -1.
1091   *
1092   * @param p_host the host for this URI
1093   *
1094   * @throws MalformedURIException if p_host is not a valid IP
1095   * address or DNS hostname.
1096   */

1097  public void setHost(String JavaDoc p_host) throws MalformedURIException
1098  {
1099
1100    if (p_host == null || p_host.trim().length() == 0)
1101    {
1102      m_host = p_host;
1103      m_userinfo = null;
1104      m_port = -1;
1105    }
1106    else if (!isWellFormedAddress(p_host))
1107    {
1108      throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_HOST_ADDRESS_NOT_WELLFORMED, null)); //"Host is not a well formed address!");
1109
}
1110
1111    m_host = p_host;
1112  }
1113
1114  /**
1115   * Set the port for this URI. -1 is used to indicate that the port is
1116   * not specified, otherwise valid port numbers are between 0 and 65535.
1117   * If a valid port number is passed in and the host field is null,
1118   * an exception is thrown.
1119   *
1120   * @param p_port the port number for this URI
1121   *
1122   * @throws MalformedURIException if p_port is not -1 and not a
1123   * valid port number
1124   */

1125  public void setPort(int p_port) throws MalformedURIException
1126  {
1127
1128    if (p_port >= 0 && p_port <= 65535)
1129    {
1130      if (m_host == null)
1131      {
1132        throw new MalformedURIException(
1133          Utils.messages.createMessage(MsgKey.ER_PORT_WHEN_HOST_NULL, null)); //"Port cannot be set when host is null!");
1134
}
1135    }
1136    else if (p_port != -1)
1137    {
1138      throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_INVALID_PORT, null)); //"Invalid port number!");
1139
}
1140
1141    m_port = p_port;
1142  }
1143
1144  /**
1145   * Set the path for this URI. If the supplied path is null, then the
1146   * query string and fragment are set to null as well. If the supplied
1147   * path includes a query string and/or fragment, these fields will be
1148   * parsed and set as well. Note that, for URIs following the "generic
1149   * URI" syntax, the path specified should start with a slash.
1150   * For URIs that do not follow the generic URI syntax, this method
1151   * sets the scheme-specific part.
1152   *
1153   * @param p_path the path for this URI (may be null)
1154   *
1155   * @throws MalformedURIException if p_path contains invalid
1156   * characters
1157   */

1158  public void setPath(String JavaDoc p_path) throws MalformedURIException
1159  {
1160
1161    if (p_path == null)
1162    {
1163      m_path = null;
1164      m_queryString = null;
1165      m_fragment = null;
1166    }
1167    else
1168    {
1169      initializePath(p_path);
1170    }
1171  }
1172
1173  /**
1174   * Append to the end of the path of this URI. If the current path does
1175   * not end in a slash and the path to be appended does not begin with
1176   * a slash, a slash will be appended to the current path before the
1177   * new segment is added. Also, if the current path ends in a slash
1178   * and the new segment begins with a slash, the extra slash will be
1179   * removed before the new segment is appended.
1180   *
1181   * @param p_addToPath the new segment to be added to the current path
1182   *
1183   * @throws MalformedURIException if p_addToPath contains syntax
1184   * errors
1185   */

1186  public void appendPath(String JavaDoc p_addToPath) throws MalformedURIException
1187  {
1188
1189    if (p_addToPath == null || p_addToPath.trim().length() == 0)
1190    {
1191      return;
1192    }
1193
1194    if (!isURIString(p_addToPath))
1195    {
1196      throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object JavaDoc[]{p_addToPath})); //"Path contains invalid character!");
1197
}
1198
1199    if (m_path == null || m_path.trim().length() == 0)
1200    {
1201      if (p_addToPath.startsWith("/"))
1202      {
1203        m_path = p_addToPath;
1204      }
1205      else
1206      {
1207        m_path = "/" + p_addToPath;
1208      }
1209    }
1210    else if (m_path.endsWith("/"))
1211    {
1212      if (p_addToPath.startsWith("/"))
1213      {
1214        m_path = m_path.concat(p_addToPath.substring(1));
1215      }
1216      else
1217      {
1218        m_path = m_path.concat(p_addToPath);
1219      }
1220    }
1221    else
1222    {
1223      if (p_addToPath.startsWith("/"))
1224      {
1225        m_path = m_path.concat(p_addToPath);
1226      }
1227      else
1228      {
1229        m_path = m_path.concat("/" + p_addToPath);
1230      }
1231    }
1232  }
1233
1234  /**
1235   * Set the query string for this URI. A non-null value is valid only
1236   * if this is an URI conforming to the generic URI syntax and
1237   * the path value is not null.
1238   *
1239   * @param p_queryString the query string for this URI
1240   *
1241   * @throws MalformedURIException if p_queryString is not null and this
1242   * URI does not conform to the generic
1243   * URI syntax or if the path is null
1244   */

1245  public void setQueryString(String JavaDoc p_queryString)
1246          throws MalformedURIException
1247  {
1248
1249    if (p_queryString == null)
1250    {
1251      m_queryString = null;
1252    }
1253    else if (!isGenericURI())
1254    {
1255      throw new MalformedURIException(
1256        "Query string can only be set for a generic URI!");
1257    }
1258    else if (getPath() == null)
1259    {
1260      throw new MalformedURIException(
1261        "Query string cannot be set when path is null!");
1262    }
1263    else if (!isURIString(p_queryString))
1264    {
1265      throw new MalformedURIException(
1266        "Query string contains invalid character!");
1267    }
1268    else
1269    {
1270      m_queryString = p_queryString;
1271    }
1272  }
1273
1274  /**
1275   * Set the fragment for this URI. A non-null value is valid only
1276   * if this is a URI conforming to the generic URI syntax and
1277   * the path value is not null.
1278   *
1279   * @param p_fragment the fragment for this URI
1280   *
1281   * @throws MalformedURIException if p_fragment is not null and this
1282   * URI does not conform to the generic
1283   * URI syntax or if the path is null
1284   */

1285  public void setFragment(String JavaDoc p_fragment) throws MalformedURIException
1286  {
1287
1288    if (p_fragment == null)
1289    {
1290      m_fragment = null;
1291    }
1292    else if (!isGenericURI())
1293    {
1294      throw new MalformedURIException(
1295        Utils.messages.createMessage(MsgKey.ER_FRAG_FOR_GENERIC_URI, null)); //"Fragment can only be set for a generic URI!");
1296
}
1297    else if (getPath() == null)
1298    {
1299      throw new MalformedURIException(
1300        Utils.messages.createMessage(MsgKey.ER_FRAG_WHEN_PATH_NULL, null)); //"Fragment cannot be set when path is null!");
1301
}
1302    else if (!isURIString(p_fragment))
1303    {
1304      throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_FRAG_INVALID_CHAR, null)); //"Fragment contains invalid character!");
1305
}
1306    else
1307    {
1308      m_fragment = p_fragment;
1309    }
1310  }
1311
1312  /**
1313   * Determines if the passed-in Object is equivalent to this URI.
1314   *
1315   * @param p_test the Object to test for equality.
1316   *
1317   * @return true if p_test is a URI with all values equal to this
1318   * URI, false otherwise
1319   */

1320  public boolean equals(Object JavaDoc p_test)
1321  {
1322
1323    if (p_test instanceof URI)
1324    {
1325      URI testURI = (URI) p_test;
1326
1327      if (((m_scheme == null && testURI.m_scheme == null) || (m_scheme != null && testURI.m_scheme != null && m_scheme.equals(
1328              testURI.m_scheme))) && ((m_userinfo == null && testURI.m_userinfo == null) || (m_userinfo != null && testURI.m_userinfo != null && m_userinfo.equals(
1329              testURI.m_userinfo))) && ((m_host == null && testURI.m_host == null) || (m_host != null && testURI.m_host != null && m_host.equals(
1330              testURI.m_host))) && m_port == testURI.m_port && ((m_path == null && testURI.m_path == null) || (m_path != null && testURI.m_path != null && m_path.equals(
1331              testURI.m_path))) && ((m_queryString == null && testURI.m_queryString == null) || (m_queryString != null && testURI.m_queryString != null && m_queryString.equals(
1332              testURI.m_queryString))) && ((m_fragment == null && testURI.m_fragment == null) || (m_fragment != null && testURI.m_fragment != null && m_fragment.equals(
1333              testURI.m_fragment))))
1334      {
1335        return true;
1336      }
1337    }
1338
1339    return false;
1340  }
1341
1342  /**
1343   * Get the URI as a string specification. See RFC 2396 Section 5.2.
1344   *
1345   * @return the URI string specification
1346   */

1347  public String JavaDoc toString()
1348  {
1349
1350    StringBuffer JavaDoc uriSpecString = new StringBuffer JavaDoc();
1351
1352    if (m_scheme != null)
1353    {
1354      uriSpecString.append(m_scheme);
1355      uriSpecString.append(':');
1356    }
1357
1358    uriSpecString.append(getSchemeSpecificPart());
1359
1360    return uriSpecString.toString();
1361  }
1362
1363  /**
1364   * Get the indicator as to whether this URI uses the "generic URI"
1365   * syntax.
1366   *
1367   * @return true if this URI uses the "generic URI" syntax, false
1368   * otherwise
1369   */

1370  public boolean isGenericURI()
1371  {
1372
1373    // presence of the host (whether valid or empty) means
1374
// double-slashes which means generic uri
1375
return (m_host != null);
1376  }
1377
1378  /**
1379   * Determine whether a scheme conforms to the rules for a scheme name.
1380   * A scheme is conformant if it starts with an alphanumeric, and
1381   * contains only alphanumerics, '+','-' and '.'.
1382   *
1383   *
1384   * @param p_scheme The sheme name to check
1385   * @return true if the scheme is conformant, false otherwise
1386   */

1387  public static boolean isConformantSchemeName(String JavaDoc p_scheme)
1388  {
1389
1390    if (p_scheme == null || p_scheme.trim().length() == 0)
1391    {
1392      return false;
1393    }
1394
1395    if (!isAlpha(p_scheme.charAt(0)))
1396    {
1397      return false;
1398    }
1399
1400    char testChar;
1401
1402    for (int i = 1; i < p_scheme.length(); i++)
1403    {
1404      testChar = p_scheme.charAt(i);
1405
1406      if (!isAlphanum(testChar) && SCHEME_CHARACTERS.indexOf(testChar) == -1)
1407      {
1408        return false;
1409      }
1410    }
1411
1412    return true;
1413  }
1414
1415  /**
1416   * Determine whether a string is syntactically capable of representing
1417   * a valid IPv4 address or the domain name of a network host. A valid
1418   * IPv4 address consists of four decimal digit groups separated by a
1419   * '.'. A hostname consists of domain labels (each of which must
1420   * begin and end with an alphanumeric but may contain '-') separated
1421   * & by a '.'. See RFC 2396 Section 3.2.2.
1422   *
1423   *
1424   * @param p_address The address string to check
1425   * @return true if the string is a syntactically valid IPv4 address
1426   * or hostname
1427   */

1428  public static boolean isWellFormedAddress(String JavaDoc p_address)
1429  {
1430
1431    if (p_address == null)
1432    {
1433      return false;
1434    }
1435
1436    String JavaDoc address = p_address.trim();
1437    int addrLength = address.length();
1438
1439    if (addrLength == 0 || addrLength > 255)
1440    {
1441      return false;
1442    }
1443
1444    if (address.startsWith(".") || address.startsWith("-"))
1445    {
1446      return false;
1447    }
1448
1449    // rightmost domain label starting with digit indicates IP address
1450
// since top level domain label can only start with an alpha
1451
// see RFC 2396 Section 3.2.2
1452
int index = address.lastIndexOf('.');
1453
1454    if (address.endsWith("."))
1455    {
1456      index = address.substring(0, index).lastIndexOf('.');
1457    }
1458
1459    if (index + 1 < addrLength && isDigit(p_address.charAt(index + 1)))
1460    {
1461      char testChar;
1462      int numDots = 0;
1463
1464      // make sure that 1) we see only digits and dot separators, 2) that
1465
// any dot separator is preceded and followed by a digit and
1466
// 3) that we find 3 dots
1467
for (int i = 0; i < addrLength; i++)
1468      {
1469        testChar = address.charAt(i);
1470
1471        if (testChar == '.')
1472        {
1473          if (!isDigit(address.charAt(i - 1))
1474                  || (i + 1 < addrLength &&!isDigit(address.charAt(i + 1))))
1475          {
1476            return false;
1477          }
1478
1479          numDots++;
1480        }
1481        else if (!isDigit(testChar))
1482        {
1483          return false;
1484        }
1485      }
1486
1487      if (numDots != 3)
1488      {
1489        return false;
1490      }
1491    }
1492    else
1493    {
1494
1495      // domain labels can contain alphanumerics and '-"
1496
// but must start and end with an alphanumeric
1497
char testChar;
1498
1499      for (int i = 0; i < addrLength; i++)
1500      {
1501        testChar = address.charAt(i);
1502
1503        if (testChar == '.')
1504        {
1505          if (!isAlphanum(address.charAt(i - 1)))
1506          {
1507            return false;
1508          }
1509
1510          if (i + 1 < addrLength &&!isAlphanum(address.charAt(i + 1)))
1511          {
1512            return false;
1513          }
1514        }
1515        else if (!isAlphanum(testChar) && testChar != '-')
1516        {
1517          return false;
1518        }
1519      }
1520    }
1521
1522    return true;
1523  }
1524
1525  /**
1526   * Determine whether a char is a digit.
1527   *
1528   *
1529   * @param p_char the character to check
1530   * @return true if the char is betweeen '0' and '9', false otherwise
1531   */

1532  private static boolean isDigit(char p_char)
1533  {
1534    return p_char >= '0' && p_char <= '9';
1535  }
1536
1537  /**
1538   * Determine whether a character is a hexadecimal character.
1539   *
1540   *
1541   * @param p_char the character to check
1542   * @return true if the char is betweeen '0' and '9', 'a' and 'f'
1543   * or 'A' and 'F', false otherwise
1544   */

1545  private static boolean isHex(char p_char)
1546  {
1547    return (isDigit(p_char) || (p_char >= 'a' && p_char <= 'f')
1548            || (p_char >= 'A' && p_char <= 'F'));
1549  }
1550
1551  /**
1552   * Determine whether a char is an alphabetic character: a-z or A-Z
1553   *
1554   *
1555   * @param p_char the character to check
1556   * @return true if the char is alphabetic, false otherwise
1557   */

1558  private static boolean isAlpha(char p_char)
1559  {
1560    return ((p_char >= 'a' && p_char <= 'z')
1561            || (p_char >= 'A' && p_char <= 'Z'));
1562  }
1563
1564  /**
1565   * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
1566   *
1567   *
1568   * @param p_char the character to check
1569   * @return true if the char is alphanumeric, false otherwise
1570   */

1571  private static boolean isAlphanum(char p_char)
1572  {
1573    return (isAlpha(p_char) || isDigit(p_char));
1574  }
1575
1576  /**
1577   * Determine whether a character is a reserved character:
1578   * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
1579   *
1580   *
1581   * @param p_char the character to check
1582   * @return true if the string contains any reserved characters
1583   */

1584  private static boolean isReservedCharacter(char p_char)
1585  {
1586    return RESERVED_CHARACTERS.indexOf(p_char) != -1;
1587  }
1588
1589  /**
1590   * Determine whether a char is an unreserved character.
1591   *
1592   *
1593   * @param p_char the character to check
1594   * @return true if the char is unreserved, false otherwise
1595   */

1596  private static boolean isUnreservedCharacter(char p_char)
1597  {
1598    return (isAlphanum(p_char) || MARK_CHARACTERS.indexOf(p_char) != -1);
1599  }
1600
1601  /**
1602   * Determine whether a given string contains only URI characters (also
1603   * called "uric" in RFC 2396). uric consist of all reserved
1604   * characters, unreserved characters and escaped characters.
1605   *
1606   *
1607   * @param p_uric URI string
1608   * @return true if the string is comprised of uric, false otherwise
1609   */

1610  private static boolean isURIString(String JavaDoc p_uric)
1611  {
1612
1613    if (p_uric == null)
1614    {
1615      return false;
1616    }
1617
1618    int end = p_uric.length();
1619    char testChar = '\0';
1620
1621    for (int i = 0; i < end; i++)
1622    {
1623      testChar = p_uric.charAt(i);
1624
1625      if (testChar == '%')
1626      {
1627        if (i + 2 >= end ||!isHex(p_uric.charAt(i + 1))
1628                ||!isHex(p_uric.charAt(i + 2)))
1629        {
1630          return false;
1631        }
1632        else
1633        {
1634          i += 2;
1635
1636          continue;
1637        }
1638      }
1639
1640      if (isReservedCharacter(testChar) || isUnreservedCharacter(testChar))
1641      {
1642        continue;
1643      }
1644      else
1645      {
1646        return false;
1647      }
1648    }
1649
1650    return true;
1651  }
1652}
1653
Popular Tags