KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > excalibur > source > SourceUtil


1 /*
2  * Copyright 2002-2004 The Apache Software Foundation
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12  * implied.
13  *
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17 package org.apache.excalibur.source;
18
19 import java.io.*;
20 import java.util.BitSet JavaDoc;
21 import java.util.Iterator JavaDoc;
22
23 import org.apache.avalon.framework.parameters.Parameters;
24
25 /**
26  *
27  * Utility class for source resolving.
28  *
29  * @author <a HREF="mailto:dev@avalon.apache.org">Avalon Development Team</a>
30  * @version CVS $Revision: 1.5 $ $Date: 2004/02/28 11:47:26 $
31  */

32 public final class SourceUtil
33 {
34     private static final char[] alphabet = new char[]
35     {
36         'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // 0 to 7
37
'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 8 to 15
38
'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', // 16 to 23
39
'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', // 24 to 31
40
'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', // 32 to 39
41
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', // 40 to 47
42
'w', 'x', 'y', 'z', '0', '1', '2', '3', // 48 to 55
43
'4', '5', '6', '7', '8', '9', '+', '/'}; // 56 to 63
44

45     /**
46      * Append parameters to the uri.
47      * Each parameter is appended to the uri with "parameter=value",
48      * the parameters are separated by "&".
49      */

50     public static String JavaDoc appendParameters( String JavaDoc uri,
51                                            Parameters parameters )
52     {
53         if( parameters != null )
54         {
55             StringBuffer JavaDoc buffer = new StringBuffer JavaDoc( uri );
56             String JavaDoc[] keys = parameters.getNames();
57             String JavaDoc current;
58             char separator = ( uri.indexOf( "?" ) == -1 ? '?' : '&' );
59
60             if( keys != null )
61             {
62                 for( int i = 0; i < keys.length; i++ )
63                 {
64                     current = keys[ i ];
65                     buffer.append( separator )
66                         .append( current )
67                         .append( '=' )
68                         .append( SourceUtil.encode( parameters.getParameter( current, null ) ) );
69                     separator = '&';
70                 }
71             }
72             return buffer.toString();
73         }
74
75         return uri;
76     }
77
78     /**
79      * Append parameters to the uri
80      * Each parameter is appended to the uri with "parameter=value",
81      * the parameters are separated by "&".
82      */

83     public static String JavaDoc appendParameters( String JavaDoc uri,
84                                            SourceParameters parameters )
85     {
86         if( parameters != null )
87         {
88             StringBuffer JavaDoc buffer = new StringBuffer JavaDoc( uri );
89             Iterator JavaDoc keys = parameters.getParameterNames();
90             String JavaDoc current;
91             char separator = ( uri.indexOf( "?" ) == -1 ? '?' : '&' );
92             Iterator JavaDoc values;
93
94             while( keys.hasNext() == true )
95             {
96                 current = (String JavaDoc)keys.next();
97                 values = parameters.getParameterValues( current );
98                 while( values.hasNext() == true )
99                 {
100                     buffer.append( separator )
101                         .append( current )
102                         .append( '=' )
103                         .append( SourceUtil.encode( (String JavaDoc)values.next() ) );
104                     separator = '&';
105                 }
106             }
107             return buffer.toString();
108         }
109
110         return uri;
111     }
112
113     /**
114      * BASE 64 encoding.
115      * See also RFC 1421
116      */

117     public static String JavaDoc encodeBASE64( String JavaDoc s )
118     {
119         return encodeBASE64( s.getBytes() );
120     }
121
122     /**
123      * BASE 64 encoding.
124      * See also RFC 1421
125      */

126     public static String JavaDoc encodeBASE64( byte[] octetString )
127     {
128         int bits24;
129         int bits6;
130
131         char[] out = new char[ ( ( octetString.length - 1 ) / 3 + 1 ) * 4 ];
132
133         int outIndex = 0;
134         int i = 0;
135
136         while( ( i + 3 ) <= octetString.length )
137         {
138             // store the octets
139
bits24 = ( octetString[ i++ ] & 0xFF ) << 16;
140             bits24 |= ( octetString[ i++ ] & 0xFF ) << 8;
141             bits24 |= ( octetString[ i++ ] & 0xFF ) << 0;
142
143             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
144             out[ outIndex++ ] = alphabet[ bits6 ];
145             bits6 = ( bits24 & 0x0003F000 ) >> 12;
146             out[ outIndex++ ] = alphabet[ bits6 ];
147             bits6 = ( bits24 & 0x00000FC0 ) >> 6;
148             out[ outIndex++ ] = alphabet[ bits6 ];
149             bits6 = ( bits24 & 0x0000003F );
150             out[ outIndex++ ] = alphabet[ bits6 ];
151         }
152
153         if( octetString.length - i == 2 )
154         {
155             // store the octets
156
bits24 = ( octetString[ i ] & 0xFF ) << 16;
157             bits24 |= ( octetString[ i + 1 ] & 0xFF ) << 8;
158
159             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
160             out[ outIndex++ ] = alphabet[ bits6 ];
161             bits6 = ( bits24 & 0x0003F000 ) >> 12;
162             out[ outIndex++ ] = alphabet[ bits6 ];
163             bits6 = ( bits24 & 0x00000FC0 ) >> 6;
164             out[ outIndex++ ] = alphabet[ bits6 ];
165
166             // padding
167
out[ outIndex++ ] = '=';
168         }
169         else if( octetString.length - i == 1 )
170         {
171             // store the octets
172
bits24 = ( octetString[ i ] & 0xFF ) << 16;
173
174             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
175             out[ outIndex++ ] = alphabet[ bits6 ];
176             bits6 = ( bits24 & 0x0003F000 ) >> 12;
177             out[ outIndex++ ] = alphabet[ bits6 ];
178
179             // padding
180
out[ outIndex++ ] = '=';
181             out[ outIndex++ ] = '=';
182         }
183
184         return new String JavaDoc( out );
185     }
186
187     /** A BitSet defining the characters which don't need encoding */
188     static BitSet JavaDoc charactersDontNeedingEncoding;
189     static final int characterCaseDiff = ( 'a' - 'A' );
190
191     /** Initialize the BitSet */
192     static
193     {
194         charactersDontNeedingEncoding = new BitSet JavaDoc( 256 );
195         int i;
196         for( i = 'a'; i <= 'z'; i++ )
197         {
198             charactersDontNeedingEncoding.set( i );
199         }
200         for( i = 'A'; i <= 'Z'; i++ )
201         {
202             charactersDontNeedingEncoding.set( i );
203         }
204         for( i = '0'; i <= '9'; i++ )
205         {
206             charactersDontNeedingEncoding.set( i );
207         }
208         charactersDontNeedingEncoding.set( '-' );
209         charactersDontNeedingEncoding.set( '_' );
210         charactersDontNeedingEncoding.set( '.' );
211         charactersDontNeedingEncoding.set( '*' );
212         charactersDontNeedingEncoding.set( '"' );
213     }
214
215     /**
216      * Translates a string into <code>x-www-form-urlencoded</code> format.
217      *
218      * @param s <code>String</code> to be translated.
219      * @return the translated <code>String</code>.
220      */

221     public static String JavaDoc encode( String JavaDoc s )
222     {
223         final StringBuffer JavaDoc out = new StringBuffer JavaDoc( s.length() );
224         final ByteArrayOutputStream buf = new ByteArrayOutputStream( 32 );
225         final OutputStreamWriter writer = new OutputStreamWriter( buf );
226         for( int i = 0; i < s.length(); i++ )
227         {
228             int c = s.charAt( i );
229             if( charactersDontNeedingEncoding.get( c ) )
230             {
231                 out.append( (char)c );
232             }
233             else
234             {
235                 try
236                 {
237                     writer.write( c );
238                     writer.flush();
239                 }
240                 catch( IOException e )
241                 {
242                     buf.reset();
243                     continue;
244                 }
245                 byte[] ba = buf.toByteArray();
246                 for( int j = 0; j < ba.length; j++ )
247                 {
248                     out.append( '%' );
249                     char ch = Character.forDigit( ( ba[ j ] >> 4 ) & 0xF, 16 );
250                     // converting to use uppercase letter as part of
251
// the hex value if ch is a letter.
252
if( Character.isLetter( ch ) )
253                     {
254                         ch -= characterCaseDiff;
255                     }
256                     out.append( ch );
257                     ch = Character.forDigit( ba[ j ] & 0xF, 16 );
258                     if( Character.isLetter( ch ) )
259                     {
260                         ch -= characterCaseDiff;
261                     }
262                     out.append( ch );
263                 }
264                 buf.reset();
265             }
266         }
267
268         return out.toString();
269     }
270
271     /**
272      * Translates a string into <code>x-www-form-urlencoded</code> format
273      * with specified encoding
274      *
275      * @param s <code>String</code> to be translated.
276      * @param enc The name of a supported charset
277      * @return the translated <code>String</code>.
278      * @throws UnsupportedEncodingException
279      */

280     public static String JavaDoc encode( String JavaDoc s, String JavaDoc enc ) throws UnsupportedEncodingException
281     {
282         // Why not use the java.net.URLEncoder for this purpose?
283
final StringBuffer JavaDoc out = new StringBuffer JavaDoc( s.length() );
284         final ByteArrayOutputStream buf = new ByteArrayOutputStream( 32 );
285         final OutputStreamWriter writer = new OutputStreamWriter( buf, enc );
286         for( int i = 0; i < s.length(); i++ )
287         {
288             int c = s.charAt( i );
289             if( charactersDontNeedingEncoding.get( c ) )
290             {
291                 out.append( (char)c );
292             }
293             else
294             {
295                 try
296                 {
297                     writer.write( c );
298                     writer.flush();
299                 }
300                 catch( IOException e )
301                 {
302                     buf.reset();
303                     continue;
304                 }
305                 byte[] ba = buf.toByteArray();
306                 for( int j = 0; j < ba.length; j++ )
307                 {
308                     out.append( '%' );
309                     char ch = Character.forDigit( ( ba[ j ] >> 4 ) & 0xF, 16 );
310                     // converting to use uppercase letter as part of
311
// the hex value if ch is a letter.
312
if( Character.isLetter( ch ) )
313                     {
314                         ch -= characterCaseDiff;
315                     }
316                     out.append( ch );
317                     ch = Character.forDigit( ba[ j ] & 0xF, 16 );
318                     if( Character.isLetter( ch ) )
319                     {
320                         ch -= characterCaseDiff;
321                     }
322                     out.append( ch );
323                 }
324                 buf.reset();
325             }
326         }
327
328         return out.toString();
329     }
330
331     /**
332      * Return a <code>File</code> object associated with the <code>Source</code> object.
333      *
334      * @return The corresponding <code>File</code> object or null if the
335      * <code>Source</code> object does not point to a file URI.
336      */

337     public static File getFile( Source source )
338     {
339         final String JavaDoc systemId = source.getURI();
340         if( systemId.startsWith( "file:" ) )
341         {
342             return new File( systemId.substring( 5 ) );
343         }
344         return null;
345     }
346
347     /**
348      * Move the source to a specified destination.
349      *
350      * @param source Source of the source.
351      * @param destination Destination of the source.
352      *
353      * @throws SourceException If an exception occurs during
354      * the move.
355      */

356     static public void move(Source source,
357                               Source destination)
358     throws SourceException
359     {
360         if (source instanceof MoveableSource
361             && source.getClass().equals(destination.getClass()))
362         {
363             ((MoveableSource)source).moveTo(destination);
364         }
365         else if (source instanceof ModifiableSource)
366         {
367             copy(source, destination);
368             ((ModifiableSource) source).delete();
369         }
370         else
371         {
372             throw new SourceException("Source '"+source.getURI()+ "' is not writeable");
373         }
374     }
375
376     /**
377      * Get the position of the scheme-delimiting colon in an absolute URI, as specified
378      * by <a HREF="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>, appendix A. This method is
379      * primarily useful for {@link Source} implementors that want to separate
380      * the scheme part from the specific part of an URI.
381      * <p>
382      * Use this method when you need both the scheme and the scheme-specific part of an URI,
383      * as calling successively {@link #getScheme(String)} and {@link #getSpecificPart(String)}
384      * will call this method twice, and as such won't be efficient.
385      *
386      * @param uri the URI
387      * @return int the scheme-delimiting colon, or <code>-1</code> if not found.
388      */

389     public static int indexOfSchemeColon(String JavaDoc uri)
390     {
391         // absoluteURI = scheme ":" ( hier_part | opaque_part )
392
//
393
// scheme = alpha *( alpha | digit | "+" | "-" | "." )
394
//
395
// alpha = lowalpha | upalpha
396
//
397
// lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
398
// "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
399
// "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
400
//
401
// upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
402
// "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
403
// "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
404
//
405
// digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
406
// "8" | "9"
407

408         // Must have at least one character followed by a colon
409
if (uri == null || uri.length() < 2)
410         {
411             return -1;
412         }
413
414         // Check that first character is alpha
415
// (lowercase first since it's the most common case)
416
char ch = uri.charAt(0);
417         if ( (ch < 'a' || ch > 'z') &&
418              (ch < 'A' || ch > 'Z') )
419         {
420             // Invalid first character
421
return -1;
422         }
423
424         int pos = uri.indexOf(':');
425         if (pos != -1)
426         {
427             // Check that every character before the colon is in the allowed range
428
// (the first one was tested above)
429
for (int i = 1; i < pos; i++)
430             {
431                 ch = uri.charAt(i);
432                 if ( (ch < 'a' || ch > 'z') &&
433                      (ch < 'A' || ch > 'Z') &&
434                      (ch < '0' || ch > '9') &&
435                      ch != '+' && ch != '-' && ch != '.')
436                 {
437                     return -1;
438                 }
439             }
440         }
441
442         return pos;
443     }
444
445     /**
446      * Get the scheme of an absolute URI.
447      *
448      * @param uri the absolute URI
449      * @return the URI scheme
450      */

451     public static String JavaDoc getScheme(String JavaDoc uri)
452     {
453         int pos = indexOfSchemeColon(uri);
454         return (pos == -1) ? null : uri.substring(0, pos);
455     }
456
457     /**
458      * Get the scheme-specific part of an absolute URI. Note that this includes everything
459      * after the separating colon, including the fragment, if any (RFC 2396 separates it
460      * from the scheme-specific part).
461      *
462      * @param uri the absolute URI
463      * @return the scheme-specific part of the URI
464      */

465     public static String JavaDoc getSpecificPart(String JavaDoc uri)
466     {
467         int pos = indexOfSchemeColon(uri);
468         return (pos == -1) ? null : uri.substring(pos+1);
469     }
470
471     /**
472      * Copy the source to a specified destination.
473      *
474      * @param source Source of the source.
475      * @param destination Destination of the source.
476      *
477      * @throws SourceException If an exception occurs during
478      * the copy.
479      */

480     static public void copy(Source source,
481                             Source destination)
482     throws SourceException {
483         if (source instanceof MoveableSource
484             && source.getClass().equals(destination.getClass()))
485         {
486             ((MoveableSource) source).copyTo(destination);
487         }
488         else
489         {
490             if ( !(destination instanceof ModifiableSource)) {
491                 throw new SourceException("Source '"+
492                                           destination.getURI()+
493                                           "' is not writeable");
494             }
495
496             try {
497                 OutputStream out = ((ModifiableSource) destination).getOutputStream();
498                 InputStream in = source.getInputStream();
499
500                 copy(in, out);
501             } catch (IOException ioe) {
502                 throw new SourceException("Could not copy source '"+
503                                           source.getURI()+"' to '"+
504                                           destination.getURI()+"' :"+
505                                           ioe.getMessage(), ioe);
506             }
507         }
508     }
509
510     /**
511      * Copy the contents of an <code>InputStream</code> to an <code>OutputStream</code>.
512      *
513      * @param in
514      * @param out
515      * @throws IOException
516      */

517     static public void copy(InputStream in, OutputStream out) throws IOException
518     {
519         byte[] buffer = new byte[8192];
520         int length = -1;
521
522         while ((length = in.read(buffer))>-1) {
523             out.write(buffer, 0, length);
524         }
525         in.close();
526         out.flush();
527         out.close();
528     }
529
530     /**
531      * Calls absolutize(url1, url2, false).
532      */

533     public static String JavaDoc absolutize(String JavaDoc url1, String JavaDoc url2)
534     {
535         return absolutize(url1, url2, false, true);
536     }
537
538     /**
539      * Calls absolutize(url1, url2, false, true).
540      */

541     public static String JavaDoc absolutize(String JavaDoc url1, String JavaDoc url2, boolean treatAuthorityAsBelongingToPath)
542     {
543         return absolutize(url1, url2, treatAuthorityAsBelongingToPath, true);
544     }
545
546     /**
547      * Applies a location to a baseURI. This is done as described in RFC 2396 section 5.2.
548      *
549      * @param url1 the baseURI
550      * @param url2 the location
551      * @param treatAuthorityAsBelongingToPath considers the authority to belong to the path. These
552      * special kind of URIs are used in the Apache Cocoon project.
553      * @param normalizePath should the path be normalized, i.e. remove ../ and /./ etc.
554      */

555     public static String JavaDoc absolutize(String JavaDoc url1, String JavaDoc url2, boolean treatAuthorityAsBelongingToPath, boolean normalizePath)
556     {
557         if (url1 == null)
558             return url2;
559
560         // If the URL contains a scheme (and thus is already absolute), don't do any further work
561
if (getScheme(url2) != null)
562             return url2;
563
564         // parse the urls into parts
565
// if the second url contains a scheme, it is not relative so return it right away (part 3 of the algorithm)
566
String JavaDoc[] url1Parts = parseUrl(url1);
567         String JavaDoc[] url2Parts = parseUrl(url2);
568
569         if (treatAuthorityAsBelongingToPath)
570             return absolutizeWithoutAuthority(url1Parts, url2Parts);
571
572         // check if it is a reference to the current document (part 2 of the algorithm)
573
if (url2Parts[PATH].equals("") && url2Parts[QUERY] == null && url2Parts[AUTHORITY] == null)
574             return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url1Parts[PATH], url1Parts[QUERY], url2Parts[FRAGMENT]);
575
576         // it is a network reference (part 4 of the algorithm)
577
if (url2Parts[AUTHORITY] != null)
578             return makeUrl(url1Parts[SCHEME], url2Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
579
580         String JavaDoc url1Path = url1Parts[PATH];
581         String JavaDoc url2Path = url2Parts[PATH];
582
583         // if the path starts with a slash (part 5 of the algorithm)
584
if (url2Path != null && url2Path.length() > 0 && url2Path.charAt(0) == '/')
585             return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
586
587         // combine the 2 paths
588
String JavaDoc path = stripLastSegment(url1Path);
589         path = path + (path.endsWith("/") ? "" : "/") + url2Path;
590         if (normalizePath)
591             path = normalize(path);
592
593         return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], path, url2Parts[QUERY], url2Parts[FRAGMENT]);
594     }
595
596     /**
597      * Absolutizes URIs whereby the authority part is considered to be a part of the path.
598      * This special kind of URIs is used in the Apache Cocoon project for the cocoon and context protocols.
599      * This method is internally used by {@link #absolutize}.
600      */

601     private static String JavaDoc absolutizeWithoutAuthority(String JavaDoc[] url1Parts, String JavaDoc[] url2Parts)
602     {
603         String JavaDoc authority1 = url1Parts[AUTHORITY];
604         String JavaDoc authority2 = url2Parts[AUTHORITY];
605
606         String JavaDoc path1 = url1Parts[PATH];
607         String JavaDoc path2 = url2Parts[PATH];
608
609         if (authority1 != null)
610             path1 = "//" + authority1 + path1;
611         if (authority2 != null)
612             path2 = "//" + authority2 + path2;
613
614         String JavaDoc path = stripLastSegment(path1);
615         path = path + (path.endsWith("/") ? "" : "/") + path2;
616         path = normalize(path);
617
618         String JavaDoc scheme = url1Parts[SCHEME];
619         return scheme + ":" + path;
620     }
621
622     private static String JavaDoc stripLastSegment(String JavaDoc path)
623     {
624         int i = path.lastIndexOf('/');
625         if(i > -1)
626             return path.substring(0, i + 1);
627         return path;
628     }
629
630     /**
631      * Removes things like &lt;segment&gt;/../ or ./, as described in RFC 2396 in
632      * step 6 of section 5.2.
633      */

634     private static String JavaDoc normalize(String JavaDoc path)
635     {
636         // replace all /./ with /
637
int i = path.indexOf("/./");
638         while (i > -1)
639         {
640             path = path.substring(0, i + 1) + path.substring(i + 3);
641             i = path.indexOf("/./");
642         }
643
644         if (path.endsWith("/."))
645             path = path.substring(0, path.length() - 1);
646
647         int f = path.indexOf("/../");
648         while (f > 0)
649         {
650             int sb = path.lastIndexOf("/", f - 1);
651             if (sb > - 1)
652                 path = path.substring(0, sb + 1) + (path.length() >= f + 4 ? path.substring(f + 4) : "");
653             f = path.indexOf("/../");
654         }
655
656         if (path.length() > 3 && path.endsWith("/.."))
657         {
658             int sb = path.lastIndexOf("/", path.length() - 4);
659             String JavaDoc segment = path.substring(sb, path.length() - 3);
660             if (!segment.equals(".."))
661             {
662                 path = path.substring(0, sb + 1);
663             }
664         }
665
666         return path;
667     }
668
669     /**
670      * Assembles an URL from the given URL parts, each of these parts can be null.
671      * Used internally by {@link #absolutize}.
672      */

673     private static String JavaDoc makeUrl(String JavaDoc scheme, String JavaDoc authority, String JavaDoc path, String JavaDoc query, String JavaDoc fragment)
674     {
675         StringBuffer JavaDoc url = new StringBuffer JavaDoc();
676         if (scheme != null)
677             url.append(scheme).append(':');
678
679         if (authority != null)
680             url.append("//").append(authority);
681
682         if (path != null)
683             url.append(path);
684
685         if (query != null)
686             url.append('?').append(query);
687
688         if (fragment != null)
689             url.append('#').append(fragment);
690
691         return url.toString();
692     }
693
694     public static final int SCHEME = 0;
695     public static final int AUTHORITY = 1;
696     public static final int PATH = 2;
697     public static final int QUERY = 3;
698     public static final int FRAGMENT = 4;
699
700     /**
701      * Parses an URL into the following parts: scheme, authority, path, query and fragment identifier.
702      *
703      * <p>The parsing is designed to be robust in the sense that it will never fail, even when an invalid
704      * URL is given. The parser will simply look for the most important delimiter characters. Basically
705      * it does the same as what would be achieved using the following regular expression (from RFC 2396):
706      * <pre>
707      * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
708      * 12 3 4 5 6 7 8 9
709      * </pre>
710      * but without actually using the regular expression.
711      *
712      * <p>The result is returned as a string array, use the constants SCHEME, AUTHORITY, PATH,
713      * QUERY and FRAGMENT_IDENTIFIER to access the different parts.
714      *
715      * <p>If a part is missing, its corresponding entry in the array will be null, except for the
716      * path, which will never be null.
717      */

718     public static String JavaDoc[] parseUrl(String JavaDoc url) {
719         char[] urlchars = url.toCharArray();
720
721         int pos = 0;
722
723         String JavaDoc scheme = null;
724         String JavaDoc authority = null;
725         String JavaDoc path = null;
726         String JavaDoc query = null;
727         String JavaDoc fragid = null;
728
729         // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
730

731         // the scheme
732
boolean keepgoing = true;
733         while (keepgoing && pos < urlchars.length)
734         {
735             switch (urlchars[pos])
736             {
737                 case ':':
738                     if (pos >= 1)
739                     {
740                         scheme = new String JavaDoc(urlchars, 0, pos);
741                         keepgoing = false;
742                         pos++;
743                         break;
744                     }
745                 case '/':
746                 case '?':
747                 case '#':
748                     keepgoing = false;
749                     break;
750                 default:
751                     pos++;
752             }
753         }
754
755         if (scheme == null)
756             pos = 0;
757
758         // the authority
759
if (pos + 1 < urlchars.length && urlchars[pos] == '/' && urlchars[pos+1] == '/')
760         {
761             pos += 2;
762             int authorityBeginPos = pos;
763             keepgoing = true;
764             while (keepgoing && pos < urlchars.length)
765             {
766                 switch (urlchars[pos])
767                 {
768                     case '/':
769                     case '?':
770                     case '#':
771                         keepgoing = false;
772                         break;
773                     default:
774                         pos++;
775                 }
776             }
777             authority = new String JavaDoc(urlchars, authorityBeginPos, pos - authorityBeginPos);
778         }
779
780         // the path
781
int pathBeginPos = pos;
782         keepgoing = true;
783         while (keepgoing && pos < urlchars.length)
784         {
785             switch (urlchars[pos])
786             {
787                 case '?':
788                 case '#':
789                     keepgoing = false;
790                     break;
791                 default:
792                     pos++;
793             }
794         }
795         path = new String JavaDoc(urlchars, pathBeginPos, pos - pathBeginPos);
796
797         // the query
798
if (pos < urlchars.length && urlchars[pos] == '?')
799         {
800             pos++;
801             int queryBeginPos = pos;
802             keepgoing = true;
803             while (keepgoing && pos < urlchars.length)
804             {
805                 switch (urlchars[pos])
806                 {
807                     case '#':
808                         keepgoing = false;
809                         break;
810                     default:
811                         pos++;
812                 }
813             }
814             query = new String JavaDoc(urlchars, queryBeginPos, pos - queryBeginPos);
815         }
816
817         // the fragment identifier
818
pos++;
819         if (pos < urlchars.length)
820             fragid = new String JavaDoc(urlchars, pos, urlchars.length - pos);
821
822         return new String JavaDoc[] {scheme, authority, path, query, fragid};
823     }
824
825     /**
826      * Decode a path.
827      *
828      * <p>Interprets %XX (where XX is hexadecimal number) as UTF-8 encoded bytes.
829      * <p>The validity of the input path is not checked (i.e. characters that
830      * were not encoded will not be reported as errors).
831      * <p>This method differs from URLDecoder.decode in that it always uses UTF-8
832      * (while URLDecoder uses the platform default encoding, often ISO-8859-1),
833      * and doesn't translate + characters to spaces.
834      *
835      * @param path the path to decode
836      * @return the decoded path
837      */

838     public static String JavaDoc decodePath(String JavaDoc path) {
839         StringBuffer JavaDoc translatedPath = new StringBuffer JavaDoc(path.length());
840         byte[] encodedchars = new byte[path.length() / 3];
841         int i = 0;
842         int length = path.length();
843         int encodedcharsLength = 0;
844         while (i < length) {
845             if (path.charAt(i) == '%') {
846                 // we must process all consecutive %-encoded characters in one go, because they represent
847
// an UTF-8 encoded string, and in UTF-8 one character can be encoded as multiple bytes
848
while (i < length && path.charAt(i) == '%') {
849                     if (i + 2 < length) {
850                         try {
851                             byte x = (byte)Integer.parseInt(path.substring(i + 1, i + 3), 16);
852                             encodedchars[encodedcharsLength] = x;
853                         } catch (NumberFormatException JavaDoc e) {
854                             throw new IllegalArgumentException JavaDoc("Illegal hex characters in pattern %" + path.substring(i + 1, i + 3));
855                         }
856                         encodedcharsLength++;
857                         i += 3;
858                     } else {
859                         throw new IllegalArgumentException JavaDoc("% character should be followed by 2 hexadecimal characters.");
860                     }
861                 }
862                 try {
863                     String JavaDoc translatedPart = new String JavaDoc(encodedchars, 0, encodedcharsLength, "UTF-8");
864                     translatedPath.append(translatedPart);
865                 } catch (UnsupportedEncodingException e) {
866                     // the situation that UTF-8 is not supported is quite theoretical, so throw a runtime exception
867
throw new RuntimeException JavaDoc("Problem in decodePath: UTF-8 encoding not supported.");
868                 }
869                 encodedcharsLength = 0;
870             } else {
871                 // a normal character
872
translatedPath.append(path.charAt(i));
873                 i++;
874             }
875         }
876         return translatedPath.toString();
877     }
878
879 }
880
Popular Tags