KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > nu > xom > URIUtil


1 /* Copyright 2004 Elliotte Rusty Harold
2    
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6    
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10    GNU Lesser General Public License for more details.
11    
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307 USA
16    
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@metalab.unc.edu. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */

21
22 package nu.xom;
23
24 import java.io.UnsupportedEncodingException JavaDoc;
25
26
27 class URIUtil {
28
29     
30     static boolean isOpaque(String JavaDoc uri) {
31         
32         int colon = uri.indexOf(':');
33         if (colon < 1) return false;
34         // This next line is the difference between absolute and opaque
35
if (uri.substring(colon+1).startsWith("/")) return false;
36         if (!Verifier.isAlpha(uri.charAt(0))) return false;
37         for (int i = 1; i < colon; i++) {
38              if (!Verifier.isSchemeCharacter(uri.charAt(i))) {
39                  return false;
40              }
41         }
42         return true;
43         
44     }
45
46     
47     static boolean isAbsolute(String JavaDoc uri) {
48         
49         int colon = uri.indexOf(':');
50         if (colon < 1) return false;
51         if (!Verifier.isAlpha(uri.charAt(0))) return false;
52         for (int i = 1; i < colon; i++) {
53              if (!Verifier.isSchemeCharacter(uri.charAt(i))) return false;
54         }
55         return true;
56         
57     }
58     
59     
60     // This doesn't do enough error checking to be a public API.
61
static String JavaDoc absolutize(String JavaDoc baseURI, String JavaDoc spec) {
62         
63         if ("".equals(baseURI) || baseURI == null) return spec;
64         
65         ParsedURI base = new ParsedURI(baseURI);
66         ParsedURI R = new ParsedURI(spec);
67         ParsedURI T = new ParsedURI();
68         
69         if (R.scheme != null) {
70             T.scheme = R.scheme;
71             T.authority = R.authority;
72             T.query = R.query;
73             T.path = removeDotSegments(R.path);
74         }
75         else {
76             if (R.authority != null) {
77                 T.authority = R.authority;
78                 T.query = R.query;
79                 T.path = removeDotSegments(R.path);
80             }
81             else {
82                 if ("".equals(R.path)) {
83                     T.path = base.path;
84                     if (R.query != null) {
85                         T.query = R.query;
86                     }
87                     else {
88                         T.query = base.query;
89                     }
90                 }
91                 else {
92                     if (R.path.startsWith("/")) {
93                        T.path = removeDotSegments(R.path);
94                     }
95                     else {
96                        T.path = merge(base, R.path);
97                        T.path = removeDotSegments(T.path);
98                     }
99                     T.query = R.query;
100                 }
101                 T.authority = base.authority;
102             }
103             T.scheme = base.scheme;
104         }
105         // Fragment ID of base URI is never considered
106
T.fragment = R.fragment;
107         
108         return T.toString();
109         
110     }
111     
112     
113     private static String JavaDoc merge(ParsedURI base, String JavaDoc relativePath) {
114     
115         if (base.authority != null && "".equals(base.path)
116           && !"".equals(base.authority)) {
117             return "/" + relativePath;
118         }
119     
120         int lastSlash = base.path.lastIndexOf('/');
121         if (lastSlash == -1) return relativePath;
122         String JavaDoc topPath = base.path.substring(0, lastSlash+1);
123         return topPath + relativePath;
124         
125     }
126     
127     
128     private static String JavaDoc removeDotSegments(String JavaDoc path) {
129     
130         String JavaDoc output = "";
131
132         while (path.length() > 0) {
133             if (path.startsWith("../")) {
134                 path = path.substring(3);
135             }
136             else if (path.startsWith("./")) {
137                 path = path.substring(2);
138             }
139             else if (path.startsWith("/./")) {
140                 path = '/' + path.substring(3);
141             }
142             else if (path.equals("/.")) {
143                 path = "/";
144             }
145             else if (path.startsWith("/../")) {
146                 path = '/' + path.substring(4);
147                 int lastSlash = output.lastIndexOf('/');
148                 if (lastSlash != -1) output = output.substring(0, lastSlash);
149             }
150             else if (path.equals("/..")) {
151                 path = "/";
152                 int lastSlash = output.lastIndexOf('/');
153                 if (lastSlash != -1) {
154                     output = output.substring(0, lastSlash);
155                 }
156             }
157             else if (path.equals(".") || path.equals("..")) {
158                 path = "";
159             }
160             else {
161                 int nextSlash = path.indexOf('/');
162                 if (nextSlash == 0) nextSlash = path.indexOf('/', 1);
163                 if (nextSlash == -1) {
164                     output += path;
165                     path = "";
166                 }
167                 else {
168                     output += path.substring(0, nextSlash);
169                     path = path.substring(nextSlash);
170                 }
171             }
172         }
173         
174         return output;
175         
176     }
177
178
179     // really just a struct
180
static class ParsedURI {
181      
182         String JavaDoc scheme;
183         String JavaDoc schemeSpecificPart;
184         String JavaDoc query;
185         String JavaDoc fragment;
186         String JavaDoc authority;
187         String JavaDoc path = "";
188         
189         ParsedURI(String JavaDoc spec) {
190             
191             int colon = spec.indexOf(':');
192             int question = -1;
193             
194             // URIs can only contain one sharp sign
195
int sharp = spec.lastIndexOf('#');
196             
197             // Fragment IDs can contain question marks so we only read
198
// the question mark before the fragment ID, if any
199
if (sharp == -1) question = spec.indexOf('?');
200             else question = spec.substring(0, sharp).indexOf('?');
201
202             if (colon != -1) scheme = spec.substring(0, colon);
203             
204             if (question == -1 && sharp == -1) {
205                 schemeSpecificPart = spec.substring(colon+1);
206             }
207             else if (question != -1) {
208                 if (question < colon) {
209                     MalformedURIException ex
210                       = new MalformedURIException("Unparseable URI");
211                     ex.setData(spec);
212                     throw ex;
213                 }
214                 schemeSpecificPart = spec.substring(colon+1, question);
215             }
216             else {
217                 if (sharp < colon) {
218                     MalformedURIException ex
219                       = new MalformedURIException("Unparseable URI");
220                     ex.setData(spec);
221                     throw ex;
222                 }
223                 schemeSpecificPart = spec.substring(colon+1, sharp);
224             }
225             
226             if (sharp != -1) {
227                 fragment = spec.substring(sharp+1);
228             }
229             
230             if (question != -1) {
231                 if (sharp == -1) {
232                     query = spec.substring(question+1);
233                 }
234                 else {
235                     query = spec.substring(question+1, sharp);
236                 }
237             }
238
239             if (schemeSpecificPart.startsWith("//")) {
240                 int authorityBegin = 2;
241                 int authorityEnd = schemeSpecificPart.indexOf('/', authorityBegin);
242                 if (authorityEnd == -1) {
243                     authority = schemeSpecificPart.substring(2);
244                     path = "";
245                 }
246                 else {
247                     authority = schemeSpecificPart.substring(authorityBegin, authorityEnd);
248                     path = schemeSpecificPart.substring(authorityEnd);
249                 }
250             }
251             else {
252                 path = schemeSpecificPart;
253             }
254             
255         }
256
257         ParsedURI() {}
258         
259         public String JavaDoc toString() {
260         
261             StringBuffer JavaDoc result = new StringBuffer JavaDoc(20);
262             if (scheme != null) result.append(scheme + ':');
263             if (schemeSpecificPart != null) {
264                 result.append(schemeSpecificPart);
265             }
266             else {
267                 result.append("//");
268                 if (authority != null) result.append(authority);
269                 result.append(path);
270             }
271             
272             if (query != null) result.append('?' + query);
273             if (fragment != null) result.append('#' + fragment);
274             
275             return result.toString();
276         }
277         
278     }
279
280
281     static String JavaDoc toURI(String JavaDoc iri) {
282     
283         int length = iri.length();
284         StringBuffer JavaDoc uri = new StringBuffer JavaDoc(length);
285         for (int i = 0; i < length; i++) {
286             char c = iri.charAt(i);
287             switch(c) {
288                 case ' ':
289                     uri.append("%20");
290                     break;
291                 case '!':
292                     uri.append(c);
293                     break;
294                 case '"':
295                     uri.append("%22");
296                     break;
297                 case '#':
298                     uri.append(c);
299                     break;
300                 case '$':
301                     uri.append(c);
302                     break;
303                 case '%':
304                     uri.append(c);
305                     break;
306                 case '&':
307                     uri.append(c);
308                     break;
309                 case '\'':
310                     uri.append(c);
311                     break;
312                 case '(':
313                     uri.append(c);
314                     break;
315                 case ')':
316                     uri.append(c);
317                     break;
318                 case '*':
319                     uri.append(c);
320                     break;
321                 case '+':
322                     uri.append(c);
323                     break;
324                 case ',':
325                     uri.append(c);
326                     break;
327                 case '-':
328                     uri.append(c);
329                     break;
330                 case '.':
331                     uri.append(c);
332                     break;
333                 case '/':
334                     uri.append(c);
335                     break;
336                 case '0':
337                     uri.append(c);
338                     break;
339                 case '1':
340                     uri.append(c);
341                     break;
342                 case '2':
343                     uri.append(c);
344                     break;
345                 case '3':
346                     uri.append(c);
347                     break;
348                 case '4':
349                     uri.append(c);
350                     break;
351                 case '5':
352                     uri.append(c);
353                     break;
354                 case '6':
355                     uri.append(c);
356                     break;
357                 case '7':
358                     uri.append(c);
359                     break;
360                 case '8':
361                     uri.append(c);
362                     break;
363                 case '9':
364                     uri.append(c);
365                     break;
366                 case ':':
367                     uri.append(c);
368                     break;
369                 case ';':
370                     uri.append(c);
371                     break;
372                 case '<':
373                     uri.append("%3C");
374                     break;
375                 case '=':
376                     uri.append(c);
377                     break;
378                 case '>':
379                     uri.append("%3E");
380                     break;
381                 case '?':
382                     uri.append(c);
383                     break;
384                 case '@':
385                     uri.append(c);
386                     break;
387                 case 'A':
388                     uri.append(c);
389                     break;
390                 case 'B':
391                     uri.append(c);
392                     break;
393                 case 'C':
394                     uri.append(c);
395                     break;
396                 case 'D':
397                     uri.append(c);
398                     break;
399                 case 'E':
400                     uri.append(c);
401                     break;
402                 case 'F':
403                     uri.append(c);
404                     break;
405                 case 'G':
406                     uri.append(c);
407                     break;
408                 case 'H':
409                     uri.append(c);
410                     break;
411                 case 'I':
412                     uri.append(c);
413                     break;
414                 case 'J':
415                     uri.append(c);
416                     break;
417                 case 'K':
418                     uri.append(c);
419                     break;
420                 case 'L':
421                     uri.append(c);
422                     break;
423                 case 'M':
424                     uri.append(c);
425                     break;
426                 case 'N':
427                     uri.append(c);
428                     break;
429                 case 'O':
430                     uri.append(c);
431                     break;
432                 case 'P':
433                     uri.append(c);
434                     break;
435                 case 'Q':
436                     uri.append(c);
437                     break;
438                 case 'R':
439                     uri.append(c);
440                     break;
441                 case 'S':
442                     uri.append(c);
443                     break;
444                 case 'T':
445                     uri.append(c);
446                     break;
447                 case 'U':
448                     uri.append(c);
449                     break;
450                 case 'V':
451                     uri.append(c);
452                     break;
453                 case 'W':
454                     uri.append(c);
455                     break;
456                 case 'X':
457                     uri.append(c);
458                     break;
459                 case 'Y':
460                     uri.append(c);
461                     break;
462                 case 'Z':
463                     uri.append(c);
464                     break;
465                 case '[':
466                     uri.append(c);
467                     break;
468                 case '\\':
469                     uri.append("%5C");
470                     break;
471                 case ']':
472                     uri.append(c);
473                     break;
474                 case '^':
475                     uri.append("%5E");
476                     break;
477                 case '_':
478                     uri.append(c);
479                     break;
480                 case '`':
481                     uri.append("%60");
482                     break;
483                 case 'a':
484                     uri.append(c);
485                     break;
486                 case 'b':
487                     uri.append(c);
488                     break;
489                 case 'c':
490                     uri.append(c);
491                     break;
492                 case 'd':
493                     uri.append(c);
494                     break;
495                 case 'e':
496                     uri.append(c);
497                     break;
498                 case 'f':
499                     uri.append(c);
500                     break;
501                 case 'g':
502                     uri.append(c);
503                     break;
504                 case 'h':
505                     uri.append(c);
506                     break;
507                 case 'i':
508                     uri.append(c);
509                     break;
510                 case 'j':
511                     uri.append(c);
512                     break;
513                 case 'k':
514                     uri.append(c);
515                     break;
516                 case 'l':
517                     uri.append(c);
518                     break;
519                 case 'm':
520                     uri.append(c);
521                     break;
522                 case 'n':
523                     uri.append(c);
524                     break;
525                 case 'o':
526                     uri.append(c);
527                     break;
528                 case 'p':
529                     uri.append(c);
530                     break;
531                 case 'q':
532                     uri.append(c);
533                     break;
534                 case 'r':
535                     uri.append(c);
536                     break;
537                 case 's':
538                     uri.append(c);
539                     break;
540                 case 't':
541                     uri.append(c);
542                     break;
543                 case 'u':
544                     uri.append(c);
545                     break;
546                 case 'v':
547                     uri.append(c);
548                     break;
549                 case 'w':
550                     uri.append(c);
551                     break;
552                 case 'x':
553                     uri.append(c);
554                     break;
555                 case 'y':
556                     uri.append(c);
557                     break;
558                 case 'z':
559                     uri.append(c);
560                     break;
561                 case '{':
562                     uri.append("%7B");
563                     break;
564                 case '|':
565                     uri.append("%7C");
566                     break;
567                 case '}':
568                     uri.append("%7D");
569                     break;
570                 case '~':
571                     uri.append(c);
572                     break;
573                 default:
574                     uri.append(percentEscape(c));
575             }
576         }
577         return uri.toString();
578         
579     }
580
581     
582     static String JavaDoc percentEscape(char c) {
583         
584         StringBuffer JavaDoc result = new StringBuffer JavaDoc(3);
585         String JavaDoc s = String.valueOf(c);
586         try {
587             byte[] data = s.getBytes("UTF8");
588             for (int i = 0; i < data.length; i++) {
589                 result.append('%');
590                 String JavaDoc hex = Integer.toHexString(data[i]).toUpperCase();
591                 if (c < 16) {
592                     result.append('0');
593                     result.append(hex);
594                 }
595                 else {
596                     // When c is negative as a byte, (e.g. greater
597
// than 128) the hex strings come out as 8
598
// characters rather than 2.
599
result.append(hex.substring(hex.length()-2));
600                 }
601             }
602             return result.toString();
603         }
604         catch (UnsupportedEncodingException JavaDoc ex) {
605             throw new RuntimeException JavaDoc(
606               "Broken VM: does not recognize UTF-8 encoding");
607         }
608         
609     }
610
611     
612 }
613
Popular Tags