KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > HTTPClient > URI


1 /*
2  * @(#)URI.java 0.3-2 18/06/1999
3  *
4  * This file is part of the HTTPClient package
5  * Copyright (C) 1996-1999 Ronald Tschalär
6  *
7  * This library is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2 of the License, or (at your option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this library; if not, write to the Free
19  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
20  * MA 02111-1307, USA
21  *
22  * For questions, suggestions, bug-reports, enhancement-requests etc.
23  * I may be contacted at:
24  *
25  * ronald@innovation.ch
26  *
27  */

28
29 package HTTPClient;
30
31
32 import java.net.URL JavaDoc;
33 import java.net.MalformedURLException JavaDoc;
34 import java.util.BitSet JavaDoc;
35
36 /**
37  * This class represents a generic URI, as defined in RFC-2396.
38  * This is similar to java.net.URL, with the following enhancements:
39  * <UL>
40  * <LI>it doesn't require a URLStreamhandler to exist for the scheme; this
41  * allows this class to be used to hold any URI, construct absolute
42  * URIs from relative ones, etc.
43  * <LI>it handles escapes correctly
44  * <LI>equals() works correctly
45  * <LI>relative URIs are correctly constructed
46  * <LI>it has methods for accessing various fields such as userinfo,
47  * fragment, params, etc.
48  * <LI>it handles less common forms of resources such as the "*" used in
49  * http URLs.
50  * </UL>
51  *
52  * <P>Ideally, java.net.URL should subclass URI.
53  *
54  * @see <A HREF="http://www.ics.uci.edu/pub/ietf/uri/rfc2396.txt">rfc-2396</A>
55  * @version 0.3-2 18/06/1999
56  * @author Ronald Tschalär
57  * @since V0.3-1
58  */

59
60 public class URI
61 {
62     /* various character classes as defined in the draft */
63     protected static BitSet JavaDoc alphanumChar;
64     protected static BitSet JavaDoc markChar;
65     protected static BitSet JavaDoc reservedChar;
66     protected static BitSet JavaDoc unreservedChar;
67     protected static BitSet JavaDoc uricChar;
68     protected static BitSet JavaDoc pcharChar;
69     protected static BitSet JavaDoc userinfoChar;
70     protected static BitSet JavaDoc schemeChar;
71     protected static BitSet JavaDoc reg_nameChar;
72
73     static
74     {
75     alphanumChar = new BitSet JavaDoc(128);
76     for (int ch='0'; ch<='9'; ch++) alphanumChar.set(ch);
77     for (int ch='A'; ch<='Z'; ch++) alphanumChar.set(ch);
78     for (int ch='a'; ch<='z'; ch++) alphanumChar.set(ch);
79
80     markChar = new BitSet JavaDoc(128);
81     markChar.set('-');
82     markChar.set('_');
83     markChar.set('.');
84     markChar.set('!');
85     markChar.set('~');
86     markChar.set('*');
87     markChar.set('\'');
88     markChar.set('(');
89     markChar.set(')');
90
91     reservedChar = new BitSet JavaDoc(128);
92     reservedChar.set(';');
93     reservedChar.set('/');
94     reservedChar.set('?');
95     reservedChar.set(':');
96     reservedChar.set('@');
97     reservedChar.set('&');
98     reservedChar.set('=');
99     reservedChar.set('+');
100     reservedChar.set('$');
101     reservedChar.set(',');
102
103     unreservedChar = new BitSet JavaDoc(128);
104     unreservedChar.or(alphanumChar);
105     unreservedChar.or(markChar);
106
107     uricChar = new BitSet JavaDoc(128);
108     uricChar.or(unreservedChar);
109     uricChar.or(reservedChar);
110
111     pcharChar = new BitSet JavaDoc(128);
112     pcharChar.or(unreservedChar);
113     pcharChar.set(':');
114     pcharChar.set('@');
115     pcharChar.set('&');
116     pcharChar.set('=');
117     pcharChar.set('+');
118     pcharChar.set('$');
119     pcharChar.set(',');
120
121     userinfoChar = new BitSet JavaDoc(128);
122     userinfoChar.or(unreservedChar);
123     userinfoChar.set(';');
124     userinfoChar.set(':');
125     userinfoChar.set('&');
126     userinfoChar.set('=');
127     userinfoChar.set('+');
128     userinfoChar.set('$');
129     userinfoChar.set(',');
130
131     // this actually shouldn't contain uppercase letters...
132
schemeChar = new BitSet JavaDoc(128);
133     schemeChar.or(alphanumChar);
134     schemeChar.set('+');
135     schemeChar.set('-');
136     schemeChar.set('.');
137
138     reg_nameChar = new BitSet JavaDoc(128);
139     reg_nameChar.or(unreservedChar);
140     reg_nameChar.set('$');
141     reg_nameChar.set(',');
142     reg_nameChar.set(';');
143     reg_nameChar.set(':');
144     reg_nameChar.set('@');
145     reg_nameChar.set('&');
146     reg_nameChar.set('=');
147     reg_nameChar.set('+');
148     }
149
150
151     /* our uri in pieces */
152
153     protected boolean is_generic;
154     protected String JavaDoc scheme;
155     protected String JavaDoc opaque;
156     protected String JavaDoc userinfo;
157     protected String JavaDoc host;
158     protected int port = -1;
159     protected String JavaDoc path;
160     protected String JavaDoc query;
161     protected String JavaDoc fragment;
162
163
164     /* cache the java.net.URL */
165
166     protected URL JavaDoc url = null;
167
168
169     // Constructors
170

171     /**
172      * Constructs a URI from the given string representation. The string
173      * must be an absolute URI.
174      *
175      * @param uri a String containing an absolute URI
176      * @exception ParseException if no scheme can be found or a specified
177      * port cannot be parsed as a number
178      */

179     public URI(String JavaDoc uri) throws ParseException
180     {
181     this((URI) null, uri);
182     }
183
184
185     /**
186      * Constructs a URI from the given string representation, relative to
187      * the given base URI.
188      *
189      * @param base the base URI, relative to which <var>rel_uri</var>
190      * is to be parsed
191      * @param rel_uri a String containing a relative or absolute URI
192      * @exception ParseException if <var>base</var> is null and
193      * <var>rel_uri</var> is not an absolute URI, or
194      * if <var>base</var> is not null and the scheme
195      * is not known to use the generic syntax, or
196      * if a given port cannot be parsed as a number
197      */

198     public URI(URI base, String JavaDoc rel_uri) throws ParseException
199     {
200     /* Parsing is done according to the following RE:
201      *
202      * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
203      * 12 3 4 5 6 7 8 9
204      *
205      * 2: scheme
206      * 4: authority
207      * 5: path
208      * 7: query
209      * 9: fragment
210      */

211
212     char[] uri = rel_uri.toCharArray();
213     int pos = 0, idx, len = uri.length;
214
215
216     // trim()
217

218     while (pos < len && Character.isSpace(uri[pos])) pos++;
219     while (len > 0 && Character.isSpace(uri[len-1])) len--;
220
221
222     // strip the special "url" or "uri" scheme
223

224     if (pos < len-3 && uri[pos+3] == ':' &&
225         (uri[pos+0] == 'u' || uri[pos+0] == 'U') &&
226         (uri[pos+1] == 'r' || uri[pos+1] == 'R') &&
227         (uri[pos+2] == 'i' || uri[pos+2] == 'I' ||
228          uri[pos+2] == 'l' || uri[pos+2] == 'L'))
229         pos += 4;
230
231
232     // get scheme: (([^:/?#]+):)?
233

234     idx = pos;
235     while (idx < len && uri[idx] != ':' && uri[idx] != '/' &&
236            uri[idx] != '?' && uri[idx] != '#')
237         idx++;
238     if (idx < len && uri[idx] == ':')
239     {
240         scheme = rel_uri.substring(pos, idx).trim().toLowerCase();
241         pos = idx + 1;
242     }
243
244
245     // check and resolve scheme
246

247     String JavaDoc final_scheme = scheme;
248     if (scheme == null)
249     {
250         if (base == null)
251         throw new ParseException("No scheme found");
252         final_scheme = base.scheme;
253     }
254
255
256     // check for generic vs. opaque
257

258     is_generic = usesGenericSyntax(final_scheme);
259     if (!is_generic)
260     {
261         if (base != null && scheme == null)
262         throw new ParseException("Can't resolve relative URI for " +
263                      "scheme " + final_scheme);
264
265         opaque = rel_uri.substring(pos);
266         return;
267     }
268
269
270     // get authority: (//([^/?#]*))?
271

272     if (pos < len-1 && uri[pos] == '/' && uri[pos+1] == '/')
273     {
274         pos += 2;
275         idx = pos;
276         while (idx < len && uri[idx] != '/' && uri[idx] != '?' &&
277            uri[idx] != '#')
278         idx++;
279
280         parse_authority(rel_uri.substring(pos, idx), final_scheme);
281         pos = idx;
282     }
283
284
285     // get path: ([^?#]*)
286

287     idx = pos;
288     while (idx < len && uri[idx] != '?' && uri[idx] != '#')
289         idx++;
290     this.path = rel_uri.substring(pos, idx);
291     pos = idx;
292
293
294     // get query: (\?([^#]*))?
295

296     if (pos < len && uri[pos] == '?')
297     {
298         pos += 1;
299         idx = pos;
300         while (idx < len && uri[idx] != '#')
301         idx++;
302         this.query = unescape(rel_uri.substring(pos, idx));
303         pos = idx;
304     }
305
306
307     // get fragment: (#(.*))?
308

309     if (pos < len && uri[pos] == '#')
310         this.fragment = unescape(rel_uri.substring(pos+1, len));
311
312
313     // now resolve the parts relative to the base
314

315     if (base != null)
316     {
317         if (scheme != null) return; // resolve scheme
318
scheme = base.scheme;
319
320         if (host != null) return; // resolve authority
321
userinfo = base.userinfo;
322         host = base.host;
323         port = base.port;
324
325         if (path.length() == 0 && query == null) // current doc
326
{
327         path = base.path;
328         query = base.query;
329         return;
330         }
331
332         if (path.length() == 0 || path.charAt(0) != '/') // relative uri
333
{
334         idx = base.path.lastIndexOf('/');
335         if (idx == -1) return; // weird one
336
path = base.path.substring(0, idx+1) + path;
337
338         len = path.length();
339         if (!((idx = path.indexOf("/.")) != -1 &&
340               (idx == len-2 || path.charAt(idx+2) == '/' ||
341                (path.charAt(idx+2) == '.' &&
342             (idx == len-3 || path.charAt(idx+3) == '/')) )))
343             return;
344
345         char[] p = new char[path.length()]; // clean path
346
path.getChars(0, p.length, p, 0);
347
348         int beg = 0;
349         for (idx=1; idx<len; idx++)
350         {
351             if (p[idx] == '.' && p[idx-1] == '/')
352             {
353             int end;
354             if (idx == len-1) // trailing "/."
355
{
356                 end = idx;
357                 idx += 1;
358             }
359             else if (p[idx+1] == '/') // "/./"
360
{
361                 end = idx - 1;
362                 idx += 1;
363             }
364             else if (p[idx+1] == '.' &&
365                  (idx == len-2 || p[idx+2] == '/')) // "/../"
366
{
367                 if (idx < beg + 2) // keep from backing up too much
368
{
369                 beg = idx + 2;
370                 continue;
371                 }
372
373                 end = idx - 2;
374                 while (end > beg && p[end] != '/') end--;
375                 if (p[end] != '/') continue;
376                 if (idx == len-2) end++;
377                 idx += 2;
378             }
379             else
380                 continue;
381             System.arraycopy(p, idx, p, end, len-idx);
382             len -= idx - end;
383             idx = end;
384             }
385         }
386         path = new String JavaDoc(p, 0, len);
387         }
388     }
389     }
390
391
392     /**
393      * Parse the authority specific part
394      */

395     private void parse_authority(String JavaDoc authority, String JavaDoc scheme)
396         throws ParseException
397     {
398     /* The authority is further parsed according to:
399      *
400      * ^(([^@]*)@?)([^:]*)?(:(.*))?
401      * 12 3 4 5
402      *
403      * 2: userinfo
404      * 3: host
405      * 5: port
406      */

407
408     char[] uri = authority.toCharArray();
409     int pos = 0, idx, len = uri.length;
410
411
412     // get userinfo: (([^@]*)@?)
413

414     idx = pos;
415     while (idx < len && uri[idx] != '@')
416         idx++;
417     if (idx < len && uri[idx] == '@')
418     {
419         this.userinfo = unescape(authority.substring(pos, idx));
420         pos = idx + 1;
421     }
422
423
424     // get host: ([^:]*)?
425

426     idx = pos;
427     while (idx < len && uri[idx] != ':')
428         idx++;
429     this.host = authority.substring(pos, idx);
430     pos = idx;
431
432
433     // get port: (:(.*))?
434

435     if (pos < (len-1) && uri[pos] == ':')
436     {
437         int p;
438         try
439         {
440         p = Integer.parseInt(authority.substring(pos+1, len));
441         if (p < 0) throw new NumberFormatException JavaDoc();
442         }
443         catch (NumberFormatException JavaDoc e)
444         {
445         throw new ParseException(authority.substring(pos+1, len) +
446                      " is an invalid port number");
447         }
448         if (p == defaultPort(scheme))
449         this.port = -1;
450         else
451         this.port = p;
452     }
453     }
454
455
456     /**
457      * Construct a URI from the given URL.
458      *
459      * @param url the URL
460      * @exception ParseException if <code>url.toExternalForm()</code> generates
461      * an invalid string representation
462      */

463     public URI(URL JavaDoc url) throws ParseException
464     {
465     this((URI) null, url.toExternalForm());
466     }
467
468
469     /**
470      * Constructs a URI from the given parts, using the default port for
471      * this scheme (if known).
472      *
473      * @param scheme the scheme (sometimes known as protocol)
474      * @param host the host
475      * @param path the path part
476      * @exception ParseException if <var>scheme</var> is null
477      */

478     public URI(String JavaDoc scheme, String JavaDoc host, String JavaDoc path) throws ParseException
479     {
480     this(scheme, null, host, -1, path, null, null);
481     }
482
483
484     /**
485      * Constructs a URI from the given parts.
486      *
487      * @param scheme the scheme (sometimes known as protocol)
488      * @param host the host
489      * @param port the port
490      * @param path the path part
491      * @exception ParseException if <var>scheme</var> is null
492      */

493     public URI(String JavaDoc scheme, String JavaDoc host, int port, String JavaDoc path)
494         throws ParseException
495     {
496     this(scheme, null, host, port, path, null, null);
497     }
498
499
500     /**
501      * Constructs a URI from the given parts. Any part except for the
502      * the scheme may be null.
503      *
504      * @param scheme the scheme (sometimes known as protocol)
505      * @param userinfo the userinfo
506      * @param host the host
507      * @param port the port
508      * @param path the path part
509      * @param query the query string
510      * @param fragment the fragment identifier
511      * @exception ParseException if <var>scheme</var> is null
512      */

513     public URI(String JavaDoc scheme, String JavaDoc userinfo, String JavaDoc host, int port,
514            String JavaDoc path, String JavaDoc query, String JavaDoc fragment)
515         throws ParseException
516     {
517     if (scheme == null)
518         throw new ParseException("missing scheme");
519     this.scheme = scheme.trim().toLowerCase();
520     if (userinfo != null) this.userinfo = unescape(userinfo.trim());
521     if (host != null) this.host = host.trim();
522     if (port != defaultPort(scheme)) this.port = port;
523     if (path != null) this.path = path.trim(); // ???
524
if (query != null) this.query = query.trim();
525     if (fragment != null) this.fragment = fragment.trim();
526
527     this.is_generic = true;
528     }
529
530
531     /**
532      * Constructs an opaque URI from the given parts.
533      *
534      * @param scheme the scheme (sometimes known as protocol)
535      * @param opaque the opaque part
536      * @exception ParseException if <var>scheme</var> is null
537      */

538     public URI(String JavaDoc scheme, String JavaDoc opaque)
539         throws ParseException
540     {
541     if (scheme == null)
542         throw new ParseException("missing scheme");
543     this.scheme = scheme.trim().toLowerCase();
544     this.opaque = opaque;
545
546     this.is_generic = false;
547     }
548
549
550     // Class Methods
551

552     /**
553      * @return true if the scheme should be parsed according to the
554      * generic-URI syntax
555      */

556     public static boolean usesGenericSyntax(String JavaDoc scheme)
557     {
558     scheme = scheme.trim();
559
560     if (scheme.equalsIgnoreCase("http") ||
561         scheme.equalsIgnoreCase("https") ||
562         scheme.equalsIgnoreCase("shttp") ||
563         scheme.equalsIgnoreCase("coffee") ||
564         scheme.equalsIgnoreCase("ftp") ||
565         scheme.equalsIgnoreCase("file") ||
566         scheme.equalsIgnoreCase("gopher") ||
567         scheme.equalsIgnoreCase("nntp") ||
568         scheme.equalsIgnoreCase("smtp") ||
569         scheme.equalsIgnoreCase("telnet") ||
570         scheme.equalsIgnoreCase("news") ||
571         scheme.equalsIgnoreCase("snews") ||
572         scheme.equalsIgnoreCase("hnews") ||
573         scheme.equalsIgnoreCase("rwhois") ||
574         scheme.equalsIgnoreCase("whois++") ||
575         scheme.equalsIgnoreCase("imap") ||
576         scheme.equalsIgnoreCase("pop") ||
577         scheme.equalsIgnoreCase("wais") ||
578         scheme.equalsIgnoreCase("irc") ||
579         scheme.equalsIgnoreCase("nfs") ||
580         scheme.equalsIgnoreCase("ldap") ||
581         scheme.equalsIgnoreCase("prospero")||
582         scheme.equalsIgnoreCase("z39.50r") ||
583         scheme.equalsIgnoreCase("z39.50s") ||
584         scheme.equalsIgnoreCase("sip") ||
585         scheme.equalsIgnoreCase("sips") ||
586         scheme.equalsIgnoreCase("sipt") ||
587         scheme.equalsIgnoreCase("sipu") ||
588         scheme.equalsIgnoreCase("vemmi") ||
589         scheme.equalsIgnoreCase("videotex"))
590         return true;
591
592     /* Note: schemes which definitely don't use the generic-URI syntax
593      * and must therefore never appear in the above list:
594      * "urn", "mailto", "sdp", "service", "tv", "gsm-sms", "tel", "fax",
595      * "modem", "eid", "cid", "mid", "data"
596      */

597     return false;
598     }
599
600
601     /**
602      * Return the default port used by a given protocol.
603      *
604      * @param protocol the protocol
605      * @return the port number, or 0 if unknown
606      */

607     public final static int defaultPort(String JavaDoc protocol)
608     {
609     String JavaDoc prot = protocol.trim();
610
611     if (prot.equalsIgnoreCase("http") ||
612         prot.equalsIgnoreCase("shttp") ||
613         prot.equalsIgnoreCase("http-ng") ||
614         prot.equalsIgnoreCase("coffee"))
615         return 80;
616     else if (prot.equalsIgnoreCase("https"))
617         return 443;
618     else if (prot.equalsIgnoreCase("ftp"))
619         return 21;
620     else if (prot.equalsIgnoreCase("telnet"))
621         return 23;
622     else if (prot.equalsIgnoreCase("nntp") ||
623          prot.equalsIgnoreCase("news"))
624         return 119;
625     else if (prot.equalsIgnoreCase("snews"))
626         return 563;
627     else if (prot.equalsIgnoreCase("hnews"))
628         return 80;
629     else if (prot.equalsIgnoreCase("smtp"))
630         return 25;
631     else if (prot.equalsIgnoreCase("gopher"))
632         return 70;
633     else if (prot.equalsIgnoreCase("wais"))
634         return 210;
635     else if (prot.equalsIgnoreCase("whois"))
636         return 43;
637     else if (prot.equalsIgnoreCase("whois++"))
638         return 63;
639     else if (prot.equalsIgnoreCase("rwhois"))
640         return 4321;
641     else if (prot.equalsIgnoreCase("imap"))
642         return 143;
643     else if (prot.equalsIgnoreCase("pop"))
644         return 110;
645     else if (prot.equalsIgnoreCase("prospero"))
646         return 1525;
647     else if (prot.equalsIgnoreCase("irc"))
648         return 194;
649     else if (prot.equalsIgnoreCase("ldap"))
650         return 389;
651     else if (prot.equalsIgnoreCase("nfs"))
652         return 2049;
653     else if (prot.equalsIgnoreCase("z39.50r") ||
654          prot.equalsIgnoreCase("z39.50s"))
655         return 210;
656     else if (prot.equalsIgnoreCase("vemmi"))
657         return 575;
658     else if (prot.equalsIgnoreCase("videotex"))
659         return 516;
660     else
661         return 0;
662     }
663
664
665     // Instance Methods
666

667     /**
668      * @return the scheme (often also referred to as protocol)
669      */

670     public String JavaDoc getScheme()
671     {
672     return scheme;
673     }
674
675
676     /**
677      * @return the opaque part, or null if this URI is generic
678      */

679     public String JavaDoc getOpaque()
680     {
681     return opaque;
682     }
683
684
685     /**
686      * @return the host
687      */

688     public String JavaDoc getHost()
689     {
690     return host;
691     }
692
693
694     /**
695      * @return the port, or -1 if it's the default port
696      */

697     public int getPort()
698     {
699     return port;
700     }
701
702
703     /**
704      * @return the user info
705      */

706     public String JavaDoc getUserinfo()
707     {
708     return userinfo;
709     }
710
711
712     /**
713      * @return the path; this includes the query string
714      */

715     public String JavaDoc getPath()
716     {
717     if (query != null)
718         if (path != null)
719         return path + "?" + query;
720         else
721         return "?" + query;
722     return path;
723     }
724
725
726     /**
727      * @return the query string
728      */

729     public String JavaDoc getQueryString()
730     {
731     return query;
732     }
733
734
735     /**
736      * @return the fragment
737      */

738     public String JavaDoc getFragment()
739     {
740     return fragment;
741     }
742
743
744     /**
745      * Does the scheme specific part of this URI use the generic-URI syntax?
746      *
747      * <P>In general URI are split into two categories: opaque-URI and
748      * generic-URI. The generic-URI syntax is the syntax most are familiar
749      * with from URLs such as ftp- and http-URLs, which is roughly:
750      * <PRE>
751      * generic-URI = scheme ":" [ "//" server ] [ "/" ] [ path_segments ] [ "?" query ]
752      * </PRE>
753      * (see draft-fielding-uri-syntax-03 for exact syntax). Only URLs
754      * using the generic-URI syntax can be used to create and resolve
755      * relative URIs.
756      *
757      * <P>Whether a given scheme is parsed according to the generic-URI
758      * syntax or wether it is treated as opaque is determined by an internal
759      * table of URI schemes.
760      *
761      * @see <A HREF="http://www.ics.uci.edu/pub/ietf/uri/rfc2396.txt">rfc-2396</A>
762      */

763     public boolean isGenericURI()
764     {
765     return is_generic;
766     }
767
768
769     /**
770      * Will try to create a java.net.URL object from this URI.
771      *
772      * @return the URL
773      * @exception MalformedURLException if no handler is available for the
774      * scheme
775      */

776     public URL JavaDoc toURL() throws MalformedURLException JavaDoc
777     {
778     if (url != null) return url;
779
780     if (opaque != null)
781         return (url = new URL JavaDoc(scheme + ":" + opaque));
782
783     String JavaDoc hostinfo;
784     if (userinfo != null && host != null)
785         hostinfo = userinfo + "@" + host;
786     else if (userinfo != null)
787         hostinfo = userinfo + "@";
788     else
789         hostinfo = host;
790
791     StringBuffer JavaDoc file = new StringBuffer JavaDoc(100);
792
793     if (path != null)
794         file.append(escape(path.toCharArray(), uricChar));
795
796     if (query != null)
797     {
798         file.append('?');
799         file.append(escape(query.toCharArray(), uricChar));
800     }
801
802     if (fragment != null)
803     {
804         file.append('#');
805         file.append(escape(fragment.toCharArray(), uricChar));
806     }
807
808     url = new URL JavaDoc(scheme, hostinfo, port, file.toString());
809     return url;
810     }
811
812
813     /**
814      * @return a string representation of this URI suitable for use in
815      * links, headers, etc.
816      */

817     public String JavaDoc toExternalForm()
818     {
819     StringBuffer JavaDoc uri = new StringBuffer JavaDoc(100);
820
821     if (scheme != null)
822     {
823         uri.append(escape(scheme.toCharArray(), schemeChar));
824         uri.append(':');
825     }
826
827     if (opaque != null) // it's an opaque-uri
828
{
829         uri.append(escape(opaque.toCharArray(), uricChar));
830         return uri.toString();
831     }
832
833     if (userinfo != null || host != null || port != -1)
834         uri.append("//");
835
836     if (userinfo != null)
837     {
838         uri.append(escape(userinfo.toCharArray(), userinfoChar));
839         uri.append('@');
840     }
841
842     if (host != null)
843         uri.append(host.toCharArray());
844
845     if (port != -1)
846     {
847         uri.append(':');
848         uri.append(port);
849     }
850
851     if (path != null)
852         uri.append(path.toCharArray());
853
854     if (query != null)
855     {
856         uri.append('?');
857         uri.append(escape(query.toCharArray(), uricChar));
858     }
859
860     if (fragment != null)
861     {
862         uri.append('#');
863         uri.append(escape(fragment.toCharArray(), uricChar));
864     }
865
866     return uri.toString();
867     }
868
869
870     /**
871      * @see #toExternalForm
872      */

873     public String JavaDoc toString()
874     {
875     return toExternalForm();
876     }
877
878
879     /**
880      * @return true if <var>other</var> is either a URI or URL and it
881      * matches the current URI
882      */

883     public boolean equals(Object JavaDoc other)
884     {
885     if (other instanceof URI)
886     {
887         URI o = (URI) other;
888         return (scheme.equals(o.scheme) &&
889             (
890              !is_generic &&
891              (opaque == null && o.opaque == null ||
892               opaque != null && o.opaque != null &&
893               opaque.equals(o.opaque)) ||
894
895              is_generic &&
896              (userinfo == null && o.userinfo == null ||
897               userinfo != null && o.userinfo != null &&
898               userinfo.equals(o.userinfo)) &&
899              (host == null && o.host == null ||
900               host != null && o.host != null &&
901               host.equalsIgnoreCase(o.host)) &&
902              port == o.port &&
903              (path == null && o.path == null ||
904               path != null && o.path != null &&
905               unescapeNoPE(path).equals(unescapeNoPE(o.path))) &&
906              (query == null && o.query == null ||
907               query != null && o.query != null &&
908               unescapeNoPE(query).equals(unescapeNoPE(o.query))) &&
909              (fragment == null && o.fragment == null ||
910               fragment != null && o.fragment != null &&
911               unescapeNoPE(fragment).equals(unescapeNoPE(o.fragment)))
912             ));
913     }
914
915     if (other instanceof URL JavaDoc)
916     {
917         URL JavaDoc o = (URL JavaDoc) other;
918         String JavaDoc h, f;
919
920         if (userinfo != null)
921         h = userinfo + "@" + host;
922         else
923         h = host;
924
925         if (query != null)
926         f = path + "?" + query;
927         else
928         f = path;
929
930         return (scheme.equalsIgnoreCase(o.getProtocol()) &&
931             (!is_generic && opaque.equals(o.getFile()) ||
932              is_generic &&
933              (h == null && o.getHost() == null ||
934               h != null && o.getHost() != null &&
935               h.equalsIgnoreCase(o.getHost())) &&
936              (port == o.getPort() ||
937               o.getPort() == defaultPort(scheme)) &&
938              (f == null && o.getFile() == null ||
939               f != null && o.getFile() != null &&
940               unescapeNoPE(f).equals(unescapeNoPE(o.getFile()))) &&
941              (fragment == null && o.getRef() == null ||
942               fragment != null && o.getRef() != null &&
943               unescapeNoPE(fragment).equals(unescapeNoPE(o.getRef())))
944              )
945             );
946     }
947
948     return false;
949     }
950
951
952     /**
953      * Escape any character not in the given character class.
954      *
955      * @param elem the array of characters to escape
956      * @param allowed_char the BitSet of all allowed characters
957      * @return the elem array with all characters not in allowed_char
958      * escaped
959      */

960     private static char[] escape(char[] elem, BitSet JavaDoc allowed_char)
961     {
962     int cnt=0;
963     for (int idx=0; idx<elem.length; idx++)
964         if (!allowed_char.get(elem[idx])) cnt++;
965
966     if (cnt == 0) return elem;
967
968     char[] tmp = new char[elem.length + 2*cnt];
969     for (int idx=0, pos=0; idx<elem.length; idx++, pos++)
970     {
971         if (allowed_char.get(elem[idx]))
972         tmp[pos] = elem[idx];
973         else
974         {
975         if (elem[idx] > 255)
976             throw new RuntimeException JavaDoc("Can't handle non 8-bt chars");
977         tmp[pos++] = '%';
978         tmp[pos++] = hex[(elem[idx] >> 4) & 0xf];
979         tmp[pos] = hex[elem[idx] & 0xf];
980         }
981     }
982
983     return tmp;
984     }
985
986     private static final char[] hex =
987         {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
988
989
990     /**
991      * Unescape escaped characters (i.e. %xx).
992      *
993      * @param str the string to unescape
994      * @return the unescaped string
995      * @exception ParseException if the two digits following a `%' are
996      * not a valid hex number
997      */

998     static final String JavaDoc unescape(String JavaDoc str) throws ParseException
999     {
1000    if (str == null || str.indexOf('%') == -1)
1001        return str; // an optimization
1002

1003    char[] buf = str.toCharArray();
1004    char[] res = new char[buf.length];
1005
1006    int didx=0;
1007    for (int sidx=0; sidx<buf.length; sidx++, didx++)
1008    {
1009        if (buf[sidx] == '%')
1010        {
1011        int ch;
1012                try
1013                {
1014            ch = Integer.parseInt(str.substring(sidx+1,sidx+3), 16);
1015            if (ch < 0) throw new NumberFormatException JavaDoc();
1016                }
1017                catch (NumberFormatException JavaDoc e)
1018                {
1019                    throw new ParseException(str.substring(sidx,sidx+3) +
1020                                            " is an invalid code");
1021                }
1022        res[didx] = (char) ch;
1023        sidx += 2;
1024        }
1025        else
1026        res[didx] = buf[sidx];
1027    }
1028
1029    return new String JavaDoc(res, 0, didx);
1030    }
1031
1032
1033    /**
1034     * Unescape escaped characters (i.e. %xx). If a ParseException would
1035     * be thrown then just return the original string.
1036     *
1037     * @param str the string to unescape
1038     * @return the unescaped string, or the original string if unescaping
1039     * would throw a ParseException
1040     * @see #unescape(java.lang.String)
1041     */

1042    private static final String JavaDoc unescapeNoPE(String JavaDoc str)
1043    {
1044    try
1045        { return unescape(str); }
1046    catch (ParseException pe)
1047        { return str; }
1048    }
1049
1050
1051    /**
1052     * Run test set.
1053     *
1054     * @exception Exception if any test fails
1055     */

1056    public static void main(String JavaDoc args[]) throws Exception JavaDoc
1057    {
1058    System.err.println();
1059    System.err.println("*** URI Tests ...");
1060
1061
1062    /* Relative URI test set, taken from Section C of rfc-2396 and
1063     * Roy's test1. All Roy's URI parser tests can be found at
1064     * http://www.ics.uci.edu/~fielding/url/
1065     */

1066
1067    URI base = new URI("http://a/b/c/d;p?q");
1068
1069    // normal examples
1070
testParser(base, "g:h", "g:h");
1071    testParser(base, "g", "http://a/b/c/g");
1072    testParser(base, "./g", "http://a/b/c/g");
1073    testParser(base, "g/", "http://a/b/c/g/");
1074    testParser(base, "/g", "http://a/g");
1075    testParser(base, "//g", "http://g");
1076    testParser(base, "?y", "http://a/b/c/?y");
1077    testParser(base, "g?y", "http://a/b/c/g?y");
1078    testParser(base, "#s", "http://a/b/c/d;p?q#s");
1079    testParser(base, "g#s", "http://a/b/c/g#s");
1080    testParser(base, "g?y#s", "http://a/b/c/g?y#s");
1081    testParser(base, ";x", "http://a/b/c/;x");
1082    testParser(base, "g;x", "http://a/b/c/g;x");
1083    testParser(base, "g;x?y#s", "http://a/b/c/g;x?y#s");
1084    testParser(base, ".", "http://a/b/c/");
1085    testParser(base, "./", "http://a/b/c/");
1086    testParser(base, "..", "http://a/b/");
1087    testParser(base, "../", "http://a/b/");
1088    testParser(base, "../g", "http://a/b/g");
1089    testParser(base, "../..", "http://a/");
1090    testParser(base, "../../", "http://a/");
1091    testParser(base, "../../g", "http://a/g");
1092
1093    // abnormal examples
1094
testParser(base, "", "http://a/b/c/d;p?q");
1095    testParser(base, "/./g", "http://a/./g");
1096    testParser(base, "/../g", "http://a/../g");
1097    testParser(base, "../../../g", "http://a/../g");
1098    testParser(base, "../../../../g", "http://a/../../g");
1099    testParser(base, "g.", "http://a/b/c/g.");
1100    testParser(base, ".g", "http://a/b/c/.g");
1101    testParser(base, "g..", "http://a/b/c/g..");
1102    testParser(base, "..g", "http://a/b/c/..g");
1103    testParser(base, "./../g", "http://a/b/g");
1104    testParser(base, "./g/.", "http://a/b/c/g/");
1105    testParser(base, "g/./h", "http://a/b/c/g/h");
1106    testParser(base, "g/../h", "http://a/b/c/h");
1107    testParser(base, "g;x=1/./y", "http://a/b/c/g;x=1/y");
1108    testParser(base, "g;x=1/../y", "http://a/b/c/y");
1109    testParser(base, "g?y/./x", "http://a/b/c/g?y/./x");
1110    testParser(base, "g?y/../x", "http://a/b/c/g?y/../x");
1111    testParser(base, "g#s/./x", "http://a/b/c/g#s/./x");
1112    testParser(base, "g#s/../x", "http://a/b/c/g#s/../x");
1113    testParser(base, "http:g", "http:g");
1114    testParser(base, "http:", "http:");
1115    testParser(base, "./g:h", "http://a/b/c/g:h");
1116
1117
1118    /* Roy's test2
1119     */

1120    base = new URI("http://a/b/c/d;p?q=1/2");
1121
1122    testParser(base, "g", "http://a/b/c/g");
1123    testParser(base, "./g", "http://a/b/c/g");
1124    testParser(base, "g/", "http://a/b/c/g/");
1125    testParser(base, "/g", "http://a/g");
1126    testParser(base, "//g", "http://g");
1127    testParser(base, "?y", "http://a/b/c/?y");
1128    testParser(base, "g?y", "http://a/b/c/g?y");
1129    testParser(base, "g?y/./x", "http://a/b/c/g?y/./x");
1130    testParser(base, "g?y/../x", "http://a/b/c/g?y/../x");
1131    testParser(base, "g#s", "http://a/b/c/g#s");
1132    testParser(base, "g#s/./x", "http://a/b/c/g#s/./x");
1133    testParser(base, "g#s/../x", "http://a/b/c/g#s/../x");
1134    testParser(base, "./", "http://a/b/c/");
1135    testParser(base, "../", "http://a/b/");
1136    testParser(base, "../g", "http://a/b/g");
1137    testParser(base, "../../", "http://a/");
1138    testParser(base, "../../g", "http://a/g");
1139
1140
1141    /* Roy's test3
1142     */

1143    base = new URI("http://a/b/c/d;p=1/2?q");
1144
1145    testParser(base, "g", "http://a/b/c/d;p=1/g");
1146    testParser(base, "./g", "http://a/b/c/d;p=1/g");
1147    testParser(base, "g/", "http://a/b/c/d;p=1/g/");
1148    testParser(base, "g?y", "http://a/b/c/d;p=1/g?y");
1149    testParser(base, ";x", "http://a/b/c/d;p=1/;x");
1150    testParser(base, "g;x", "http://a/b/c/d;p=1/g;x");
1151    testParser(base, "g;x=1/./y", "http://a/b/c/d;p=1/g;x=1/y");
1152    testParser(base, "g;x=1/../y", "http://a/b/c/d;p=1/y");
1153    testParser(base, "./", "http://a/b/c/d;p=1/");
1154    testParser(base, "../", "http://a/b/c/");
1155    testParser(base, "../g", "http://a/b/c/g");
1156    testParser(base, "../../", "http://a/b/");
1157    testParser(base, "../../g", "http://a/b/g");
1158
1159
1160    /* Roy's test4
1161     */

1162    base = new URI("fred:///s//a/b/c");
1163
1164    testParser(base, "g:h", "g:h");
1165    /* we have to skip these, as usesGeneraicSyntax("fred") returns false
1166     * and we therefore don't parse relative URI's here. But test5 is
1167     * the same except that the http scheme is used.
1168    testParser(base, "g", "fred:///s//a/b/g");
1169    testParser(base, "./g", "fred:///s//a/b/g");
1170    testParser(base, "g/", "fred:///s//a/b/g/");
1171    testParser(base, "/g", "fred:///g");
1172    testParser(base, "//g", "fred://g");
1173    testParser(base, "//g/x", "fred://g/x");
1174    testParser(base, "///g", "fred:///g");
1175    testParser(base, "./", "fred:///s//a/b/");
1176    testParser(base, "../", "fred:///s//a/");
1177    testParser(base, "../g", "fred:///s//a/g");
1178    testParser(base, "../../", "fred:///s//");
1179    testParser(base, "../../g", "fred:///s//g");
1180    testParser(base, "../../../g", "fred:///s/g");
1181    testParser(base, "../../../../g", "fred:///g");
1182     */

1183    testPE(base, "g");
1184
1185
1186    /* Roy's test5
1187     */

1188    base = new URI("http:///s//a/b/c");
1189
1190    testParser(base, "g:h", "g:h");
1191    testParser(base, "g", "http:///s//a/b/g");
1192    testParser(base, "./g", "http:///s//a/b/g");
1193    testParser(base, "g/", "http:///s//a/b/g/");
1194    testParser(base, "/g", "http:///g");
1195    testParser(base, "//g", "http://g");
1196    testParser(base, "//g/x", "http://g/x");
1197    testParser(base, "///g", "http:///g");
1198    testParser(base, "./", "http:///s//a/b/");
1199    testParser(base, "../", "http:///s//a/");
1200    testParser(base, "../g", "http:///s//a/g");
1201    testParser(base, "../../", "http:///s//");
1202    testParser(base, "../../g", "http:///s//g");
1203    testParser(base, "../../../g", "http:///s/g");
1204    testParser(base, "../../../../g", "http:///g");
1205
1206
1207    /* equality tests */
1208
1209    // protocol
1210
testNotEqual("http://a/", "nntp://a/");
1211    testNotEqual("http://a/", "https://a/");
1212    testNotEqual("http://a/", "shttp://a/");
1213    testEqual("http://a/", "Http://a/");
1214    testEqual("http://a/", "hTTP://a/");
1215    testEqual("url:http://a/", "hTTP://a/");
1216    testEqual("urI:http://a/", "hTTP://a/");
1217
1218    // host
1219
testEqual("http://a/", "Http://A/");
1220    testEqual("http://a.b.c/", "Http://A.b.C/");
1221    testEqual("http:///", "Http:///");
1222    testNotEqual("http:///", "Http://a/");
1223
1224    // port
1225
testEqual("http://a.b.c/", "Http://A.b.C:80/");
1226    testEqual("http://a.b.c:/", "Http://A.b.C:80/");
1227    testEqual("nntp://a", "nntp://a:119");
1228    testEqual("nntp://a:", "nntp://a:119");
1229    testEqual("nntp://a/", "nntp://a:119/");
1230    testNotEqual("nntp://a", "nntp://a:118");
1231    testNotEqual("nntp://a", "nntp://a:0");
1232    testNotEqual("nntp://a:", "nntp://a:0");
1233    testEqual("telnet://:23/", "telnet:///");
1234    testPE(null, "ftp://:a/");
1235    testPE(null, "ftp://:-1/");
1236    testPE(null, "ftp://::1/");
1237
1238    // userinfo
1239
testNotEqual("ftp://me@a", "ftp://a");
1240    testNotEqual("ftp://me@a", "ftp://Me@a");
1241    testEqual("ftp://Me@a", "ftp://Me@a");
1242    testEqual("ftp://Me:My@a:21", "ftp://Me:My@a");
1243    testEqual("ftp://Me:My@a:", "ftp://Me:My@a");
1244    testNotEqual("ftp://Me:My@a:21", "ftp://Me:my@a");
1245    testNotEqual("ftp://Me:My@a:", "ftp://Me:my@a");
1246
1247    // path
1248
testEqual("ftp://a/b%2b/", "ftp://a/b+/");
1249    testEqual("ftp://a/b%2b/", "ftp://a/b+/");
1250    testEqual("ftp://a/b%5E/", "ftp://a/b^/");
1251    testNotEqual("ftp://a/b%3f/", "ftp://a/b?/");
1252
1253    System.err.println("*** Tests finished successfuly");
1254    }
1255
1256    private static void testParser(URI base, String JavaDoc relURI, String JavaDoc result)
1257        throws Exception JavaDoc
1258    {
1259    if (!(new URI(base, relURI).toString().equals(result)))
1260    {
1261        String JavaDoc nl = System.getProperty("line.separator");
1262        throw new Exception JavaDoc("Test failed: " + nl +
1263                " base-URI = <" + base + ">" + nl +
1264                " rel-URI = <" + relURI + ">" + nl+
1265                " expected <" + result + ">" + nl+
1266                " but got <" + new URI(base, relURI) + ">");
1267    }
1268    }
1269
1270    private static void testEqual(String JavaDoc one, String JavaDoc two) throws Exception JavaDoc
1271    {
1272    if (!(new URI(one).equals(new URI(two))))
1273    {
1274        String JavaDoc nl = System.getProperty("line.separator");
1275        throw new Exception JavaDoc("Test failed: " + nl +
1276                " <" + one + "> != <" + two + ">");
1277    }
1278    }
1279
1280    private static void testNotEqual(String JavaDoc one, String JavaDoc two) throws Exception JavaDoc
1281    {
1282    if ((new URI(one).equals(new URI(two))))
1283    {
1284        String JavaDoc nl = System.getProperty("line.separator");
1285        throw new Exception JavaDoc("Test failed: " + nl +
1286                " <" + one + "> == <" + two + ">");
1287    }
1288    }
1289
1290    private static void testPE(URI base, String JavaDoc uri) throws Exception JavaDoc
1291    {
1292    boolean got_pe = false;
1293    try
1294        { new URI(base, uri); }
1295    catch (ParseException pe)
1296        { got_pe = true; }
1297    if (!got_pe)
1298    {
1299        String JavaDoc nl = System.getProperty("line.separator");
1300        throw new Exception JavaDoc("Test failed: " + nl +
1301                " <" + uri + "> should be invalid");
1302    }
1303    }
1304}
1305
1306
Popular Tags