SourceUtil


1   /* 
2    * Copyright 2002-2004 The Apache Software Foundation
3    * Licensed  under the  Apache License,  Version 2.0  (the "License");
4    * you may not use  this file  except in  compliance with the License.
5    * You may obtain a copy of the License at 
6    * 
7    *   http://www.apache.org/licenses/LICENSE-2.0
8    * 
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed  under the  License is distributed on an "AS IS" BASIS,
11   * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY KIND, either  express  or
12   * implied.
13   * 
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.excalibur.source;
18  
19  import java.io.*;
20  import java.util.BitSet  ;
21  import java.util.Iterator  ;
22  
23  import org.apache.avalon.framework.parameters.Parameters;
24  
25  /**
26   *
27   * Utility class for source resolving.
28   *
29   * @author <a HREF="mailto:dev@avalon.apache.org">Avalon Development Team</a>
30   * @version CVS $Revision: 1.5 $ $Date: 2004/02/28 11:47:26 $
31   */
32  public final class SourceUtil
33  {
34      private static final char[] alphabet = new char[]
35      {
36          'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // 0 to 7
37          'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 8 to 15
38          'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', // 16 to 23
39          'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', // 24 to 31
40          'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', // 32 to 39
41          'o', 'p', 'q', 'r', 's', 't', 'u', 'v', // 40 to 47
42          'w', 'x', 'y', 'z', '0', '1', '2', '3', // 48 to 55
43          '4', '5', '6', '7', '8', '9', '+', '/'}; // 56 to 63
44  
45      /**
46       * Append parameters to the uri.
47       * Each parameter is appended to the uri with "parameter=value",
48       * the parameters are separated by "&".
49       */
50      public static String   appendParameters( String   uri,
51                                             Parameters parameters )
52      {
53          if( parameters != null )
54          {
55              StringBuffer   buffer = new StringBuffer  ( uri );
56              String  [] keys = parameters.getNames();
57              String   current;
58              char separator = ( uri.indexOf( "?" ) == -1 ? '?' : '&' );
59  
60              if( keys != null )
61              {
62                  for( int i = 0; i < keys.length; i++ )
63                  {
64                      current = keys[ i ];
65                      buffer.append( separator )
66                          .append( current )
67                          .append( '=' )
68                          .append( SourceUtil.encode( parameters.getParameter( current, null ) ) );
69                      separator = '&';
70                  }
71              }
72              return buffer.toString();
73          }
74  
75          return uri;
76      }
77  
78      /**
79       * Append parameters to the uri
80       * Each parameter is appended to the uri with "parameter=value",
81       * the parameters are separated by "&".
82       */
83      public static String   appendParameters( String   uri,
84                                             SourceParameters parameters )
85      {
86          if( parameters != null )
87          {
88              StringBuffer   buffer = new StringBuffer  ( uri );
89              Iterator   keys = parameters.getParameterNames();
90              String   current;
91              char separator = ( uri.indexOf( "?" ) == -1 ? '?' : '&' );
92              Iterator   values;
93  
94              while( keys.hasNext() == true )
95              {
96                  current = (String  )keys.next();
97                  values = parameters.getParameterValues( current );
98                  while( values.hasNext() == true )
99                  {
100                     buffer.append( separator )
101                         .append( current )
102                         .append( '=' )
103                         .append( SourceUtil.encode( (String  )values.next() ) );
104                     separator = '&';
105                 }
106             }
107             return buffer.toString();
108         }
109 
110         return uri;
111     }
112 
113     /**
114      * BASE 64 encoding.
115      * See also RFC 1421
116      */
117     public static String   encodeBASE64( String   s )
118     {
119         return encodeBASE64( s.getBytes() );
120     }
121 
122     /**
123      * BASE 64 encoding.
124      * See also RFC 1421
125      */
126     public static String   encodeBASE64( byte[] octetString )
127     {
128         int bits24;
129         int bits6;
130 
131         char[] out = new char[ ( ( octetString.length - 1 ) / 3 + 1 ) * 4 ];
132 
133         int outIndex = 0;
134         int i = 0;
135 
136         while( ( i + 3 ) <= octetString.length )
137         {
138             // store the octets
139             bits24 = ( octetString[ i++ ] & 0xFF ) << 16;
140             bits24 |= ( octetString[ i++ ] & 0xFF ) << 8;
141             bits24 |= ( octetString[ i++ ] & 0xFF ) << 0;
142 
143             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
144             out[ outIndex++ ] = alphabet[ bits6 ];
145             bits6 = ( bits24 & 0x0003F000 ) >> 12;
146             out[ outIndex++ ] = alphabet[ bits6 ];
147             bits6 = ( bits24 & 0x00000FC0 ) >> 6;
148             out[ outIndex++ ] = alphabet[ bits6 ];
149             bits6 = ( bits24 & 0x0000003F );
150             out[ outIndex++ ] = alphabet[ bits6 ];
151         }
152 
153         if( octetString.length - i == 2 )
154         {
155             // store the octets
156             bits24 = ( octetString[ i ] & 0xFF ) << 16;
157             bits24 |= ( octetString[ i + 1 ] & 0xFF ) << 8;
158 
159             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
160             out[ outIndex++ ] = alphabet[ bits6 ];
161             bits6 = ( bits24 & 0x0003F000 ) >> 12;
162             out[ outIndex++ ] = alphabet[ bits6 ];
163             bits6 = ( bits24 & 0x00000FC0 ) >> 6;
164             out[ outIndex++ ] = alphabet[ bits6 ];
165 
166             // padding
167             out[ outIndex++ ] = '=';
168         }
169         else if( octetString.length - i == 1 )
170         {
171             // store the octets
172             bits24 = ( octetString[ i ] & 0xFF ) << 16;
173 
174             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
175             out[ outIndex++ ] = alphabet[ bits6 ];
176             bits6 = ( bits24 & 0x0003F000 ) >> 12;
177             out[ outIndex++ ] = alphabet[ bits6 ];
178 
179             // padding
180             out[ outIndex++ ] = '=';
181             out[ outIndex++ ] = '=';
182         }
183 
184         return new String  ( out );
185     }
186 
187     /** A BitSet defining the characters which don't need encoding */
188     static BitSet   charactersDontNeedingEncoding;
189     static final int characterCaseDiff = ( 'a' - 'A' );
190 
191     /** Initialize the BitSet */
192     static
193     {
194         charactersDontNeedingEncoding = new BitSet  ( 256 );
195         int i;
196         for( i = 'a'; i <= 'z'; i++ )
197         {
198             charactersDontNeedingEncoding.set( i );
199         }
200         for( i = 'A'; i <= 'Z'; i++ )
201         {
202             charactersDontNeedingEncoding.set( i );
203         }
204         for( i = '0'; i <= '9'; i++ )
205         {
206             charactersDontNeedingEncoding.set( i );
207         }
208         charactersDontNeedingEncoding.set( '-' );
209         charactersDontNeedingEncoding.set( '_' );
210         charactersDontNeedingEncoding.set( '.' );
211         charactersDontNeedingEncoding.set( '*' );
212         charactersDontNeedingEncoding.set( '"' );
213     }
214 
215     /**
216      * Translates a string into <code>x-www-form-urlencoded</code> format.
217      *
218      * @param   s   <code>String</code> to be translated.
219      * @return  the translated <code>String</code>.
220      */
221     public static String   encode( String   s )
222     {
223         final StringBuffer   out = new StringBuffer  ( s.length() );
224         final ByteArrayOutputStream buf = new ByteArrayOutputStream( 32 );
225         final OutputStreamWriter writer = new OutputStreamWriter( buf );
226         for( int i = 0; i < s.length(); i++ )
227         {
228             int c = s.charAt( i );
229             if( charactersDontNeedingEncoding.get( c ) )
230             {
231                 out.append( (char)c );
232             }
233             else
234             {
235                 try
236                 {
237                     writer.write( c );
238                     writer.flush();
239                 }
240                 catch( IOException e )
241                 {
242                     buf.reset();
243                     continue;
244                 }
245                 byte[] ba = buf.toByteArray();
246                 for( int j = 0; j < ba.length; j++ )
247                 {
248                     out.append( '%' );
249                     char ch = Character.forDigit( ( ba[ j ] >> 4 ) & 0xF, 16 );
250                     // converting to use uppercase letter as part of
251                     // the hex value if ch is a letter.
252                     if( Character.isLetter( ch ) )
253                     {
254                         ch -= characterCaseDiff;
255                     }
256                     out.append( ch );
257                     ch = Character.forDigit( ba[ j ] & 0xF, 16 );
258                     if( Character.isLetter( ch ) )
259                     {
260                         ch -= characterCaseDiff;
261                     }
262                     out.append( ch );
263                 }
264                 buf.reset();
265             }
266         }
267 
268         return out.toString();
269     }
270 
271     /**
272      * Translates a string into <code>x-www-form-urlencoded</code> format
273      * with specified encoding
274      *
275      * @param   s   <code>String</code> to be translated.
276      * @param   enc The name of a supported charset
277      * @return  the translated <code>String</code>.
278      * @throws UnsupportedEncodingException
279      */
280     public static String   encode( String   s, String   enc ) throws UnsupportedEncodingException
281     {
282         // Why not use the java.net.URLEncoder for this purpose?
283         final StringBuffer   out = new StringBuffer  ( s.length() );
284         final ByteArrayOutputStream buf = new ByteArrayOutputStream( 32 );
285         final OutputStreamWriter writer = new OutputStreamWriter( buf, enc );
286         for( int i = 0; i < s.length(); i++ )
287         {
288             int c = s.charAt( i );
289             if( charactersDontNeedingEncoding.get( c ) )
290             {
291                 out.append( (char)c );
292             }
293             else
294             {
295                 try
296                 {
297                     writer.write( c );
298                     writer.flush();
299                 }
300                 catch( IOException e )
301                 {
302                     buf.reset();
303                     continue;
304                 }
305                 byte[] ba = buf.toByteArray();
306                 for( int j = 0; j < ba.length; j++ )
307                 {
308                     out.append( '%' );
309                     char ch = Character.forDigit( ( ba[ j ] >> 4 ) & 0xF, 16 );
310                     // converting to use uppercase letter as part of
311                     // the hex value if ch is a letter.
312                     if( Character.isLetter( ch ) )
313                     {
314                         ch -= characterCaseDiff;
315                     }
316                     out.append( ch );
317                     ch = Character.forDigit( ba[ j ] & 0xF, 16 );
318                     if( Character.isLetter( ch ) )
319                     {
320                         ch -= characterCaseDiff;
321                     }
322                     out.append( ch );
323                 }
324                 buf.reset();
325             }
326         }
327 
328         return out.toString();
329     }
330 
331     /**
332      * Return a <code>File</code> object associated with the <code>Source</code> object.
333      *
334      * @return The corresponding <code>File</code> object or null if the
335      *         <code>Source</code> object does not point to a file URI.
336      */
337     public static File getFile( Source source )
338     {
339         final String   systemId = source.getURI();
340         if( systemId.startsWith( "file:" ) )
341         {
342             return new File( systemId.substring( 5 ) );
343         }
344         return null;
345     }
346 
347     /**
348      * Move the source to a specified destination.
349      *
350      * @param source Source of the source.
351      * @param destination Destination of the source.
352      *
353      * @throws SourceException If an exception occurs during
354      *                         the move.
355      */
356     static public void move(Source source,
357                               Source destination)
358     throws SourceException
359     {
360         if (source instanceof MoveableSource
361             && source.getClass().equals(destination.getClass()))
362         {
363             ((MoveableSource)source).moveTo(destination);
364         }
365         else if (source instanceof ModifiableSource)
366         {
367             copy(source, destination);
368             ((ModifiableSource) source).delete();
369         }
370         else
371         {
372             throw new SourceException("Source '"+source.getURI()+ "' is not writeable");
373         }
374     }
375 
376     /**
377      * Get the position of the scheme-delimiting colon in an absolute URI, as specified
378      * by <a HREF="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>, appendix A. This method is
379      * primarily useful for {@link Source} implementors that want to separate
380      * the scheme part from the specific part of an URI.
381      * <p>
382      * Use this method when you need both the scheme and the scheme-specific part of an URI,
383      * as calling successively {@link #getScheme(String)} and {@link #getSpecificPart(String)}
384      * will call this method twice, and as such won't be efficient.
385      *
386      * @param uri the URI
387      * @return int the scheme-delimiting colon, or <code>-1</code> if not found.
388      */
389     public static int indexOfSchemeColon(String   uri)
390     {
391         // absoluteURI   = scheme ":" ( hier_part | opaque_part )
392         //
393         // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
394         //
395         // alpha         = lowalpha | upalpha
396         //
397         // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
398         //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
399         //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
400         //
401         // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
402         //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
403         //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
404         //
405         // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
406         //            "8" | "9"
407 
408         // Must have at least one character followed by a colon
409         if (uri == null || uri.length() < 2)
410         {
411             return -1;
412         }
413 
414         // Check that first character is alpha
415         // (lowercase first since it's the most common case)
416         char ch = uri.charAt(0);
417         if ( (ch < 'a' || ch > 'z') &&
418              (ch < 'A' || ch > 'Z') )
419         {
420             // Invalid first character
421             return -1;
422         }
423 
424         int pos = uri.indexOf(':');
425         if (pos != -1)
426         {
427             // Check that every character before the colon is in the allowed range
428             // (the first one was tested above)
429             for (int i = 1; i < pos; i++)
430             {
431                 ch = uri.charAt(i);
432                 if ( (ch < 'a' || ch > 'z') &&
433                      (ch < 'A' || ch > 'Z') &&
434                      (ch < '0' || ch > '9') &&
435                      ch != '+' && ch != '-' && ch != '.')
436                 {
437                     return -1;
438                 }
439             }
440         }
441 
442         return pos;
443     }
444 
445     /**
446      * Get the scheme of an absolute URI.
447      *
448      * @param uri the absolute URI
449      * @return the URI scheme
450      */
451     public static String   getScheme(String   uri)
452     {
453         int pos = indexOfSchemeColon(uri);
454         return (pos == -1) ? null : uri.substring(0, pos);
455     }
456 
457     /**
458      * Get the scheme-specific part of an absolute URI. Note that this includes everything
459      * after the separating colon, including the fragment, if any (RFC 2396 separates it
460      * from the scheme-specific part).
461      *
462      * @param uri the absolute URI
463      * @return the scheme-specific part of the URI
464      */
465     public static String   getSpecificPart(String   uri)
466     {
467         int pos = indexOfSchemeColon(uri);
468         return (pos == -1) ? null : uri.substring(pos+1);
469     }
470 
471     /**
472      * Copy the source to a specified destination.
473      *
474      * @param source Source of the source.
475      * @param destination Destination of the source.
476      *
477      * @throws SourceException If an exception occurs during
478      *                         the copy.
479      */
480     static public void copy(Source source,
481                             Source destination)
482     throws SourceException {
483         if (source instanceof MoveableSource
484             && source.getClass().equals(destination.getClass()))
485         {
486             ((MoveableSource) source).copyTo(destination);
487         }
488         else
489         {
490             if ( !(destination instanceof ModifiableSource)) {
491                 throw new SourceException("Source '"+
492                                           destination.getURI()+
493                                           "' is not writeable");
494             }
495 
496             try {
497                 OutputStream out = ((ModifiableSource) destination).getOutputStream();
498                 InputStream in = source.getInputStream();
499 
500                 copy(in, out);
501             } catch (IOException ioe) {
502                 throw new SourceException("Could not copy source '"+
503                                           source.getURI()+"' to '"+
504                                           destination.getURI()+"' :"+
505                                           ioe.getMessage(), ioe);
506             }
507         }
508     }
509 
510     /**
511      * Copy the contents of an <code>InputStream</code> to an <code>OutputStream</code>.
512      *
513      * @param in
514      * @param out
515      * @throws IOException
516      */
517     static public void copy(InputStream in, OutputStream out) throws IOException
518     {
519         byte[] buffer = new byte[8192];
520         int length = -1;
521 
522         while ((length = in.read(buffer))>-1) {
523             out.write(buffer, 0, length);
524         }
525         in.close();
526         out.flush();
527         out.close();
528     }
529 
530     /**
531      * Calls absolutize(url1, url2, false).
532      */
533     public static String   absolutize(String   url1, String   url2)
534     {
535         return absolutize(url1, url2, false, true);
536     }
537 
538     /**
539      * Calls absolutize(url1, url2, false, true).
540      */
541     public static String   absolutize(String   url1, String   url2, boolean treatAuthorityAsBelongingToPath)
542     {
543         return absolutize(url1, url2, treatAuthorityAsBelongingToPath, true);
544     }
545 
546     /**
547      * Applies a location to a baseURI. This is done as described in RFC 2396 section 5.2.
548      *
549      * @param url1 the baseURI
550      * @param url2 the location
551      * @param treatAuthorityAsBelongingToPath considers the authority to belong to the path. These
552      * special kind of URIs are used in the Apache Cocoon project.
553      * @param normalizePath should the path be normalized, i.e. remove ../ and /./ etc.
554      */
555     public static String   absolutize(String   url1, String   url2, boolean treatAuthorityAsBelongingToPath, boolean normalizePath)
556     {
557         if (url1 == null)
558             return url2;
559 
560         // If the URL contains a scheme (and thus is already absolute), don't do any further work
561         if (getScheme(url2) != null)
562             return url2;
563 
564         // parse the urls into parts
565         // if the second url contains a scheme, it is not relative so return it right away (part 3 of the algorithm)
566         String  [] url1Parts = parseUrl(url1);
567         String  [] url2Parts = parseUrl(url2);
568 
569         if (treatAuthorityAsBelongingToPath)
570             return absolutizeWithoutAuthority(url1Parts, url2Parts);
571 
572         // check if it is a reference to the current document (part 2 of the algorithm)
573         if (url2Parts[PATH].equals("") && url2Parts[QUERY] == null && url2Parts[AUTHORITY] == null)
574             return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url1Parts[PATH], url1Parts[QUERY], url2Parts[FRAGMENT]);
575 
576         // it is a network reference (part 4 of the algorithm)
577         if (url2Parts[AUTHORITY] != null)
578             return makeUrl(url1Parts[SCHEME], url2Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
579 
580         String   url1Path = url1Parts[PATH];
581         String   url2Path = url2Parts[PATH];
582 
583         // if the path starts with a slash (part 5 of the algorithm)
584         if (url2Path != null && url2Path.length() > 0 && url2Path.charAt(0) == '/')
585             return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], url2Parts[PATH], url2Parts[QUERY], url2Parts[QUERY]);
586 
587         // combine the 2 paths
588         String   path = stripLastSegment(url1Path);
589         path = path + (path.endsWith("/") ? "" : "/") + url2Path;
590         if (normalizePath)
591             path = normalize(path);
592 
593         return makeUrl(url1Parts[SCHEME], url1Parts[AUTHORITY], path, url2Parts[QUERY], url2Parts[FRAGMENT]);
594     }
595 
596     /**
597      * Absolutizes URIs whereby the authority part is considered to be a part of the path.
598      * This special kind of URIs is used in the Apache Cocoon project for the cocoon and context protocols.
599      * This method is internally used by {@link #absolutize}.
600      */
601     private static String   absolutizeWithoutAuthority(String  [] url1Parts, String  [] url2Parts)
602     {
603         String   authority1 = url1Parts[AUTHORITY];
604         String   authority2 = url2Parts[AUTHORITY];
605 
606         String   path1 = url1Parts[PATH];
607         String   path2 = url2Parts[PATH];
608 
609         if (authority1 != null)
610             path1 = "//" + authority1 + path1;
611         if (authority2 != null)
612             path2 = "//" + authority2 + path2;
613 
614         String   path = stripLastSegment(path1);
615         path = path + (path.endsWith("/") ? "" : "/") + path2;
616         path = normalize(path);
617 
618         String   scheme = url1Parts[SCHEME];
619         return scheme + ":" + path;
620     }
621 
622     private static String   stripLastSegment(String   path)
623     {
624         int i = path.lastIndexOf('/');
625         if(i > -1)
626             return path.substring(0, i + 1);
627         return path;
628     }
629 
630     /**
631      * Removes things like &lt;segment&gt;/../ or ./, as described in RFC 2396 in
632      * step 6 of section 5.2.
633      */
634     private static String   normalize(String   path)
635     {
636         // replace all /./ with /
637         int i = path.indexOf("/./");
638         while (i > -1)
639         {
640             path = path.substring(0, i + 1) + path.substring(i + 3);
641             i = path.indexOf("/./");
642         }
643 
644         if (path.endsWith("/."))
645             path = path.substring(0, path.length() - 1);
646 
647         int f = path.indexOf("/../");
648         while (f > 0)
649         {
650             int sb = path.lastIndexOf("/", f - 1);
651             if (sb > - 1)
652                 path = path.substring(0, sb + 1) + (path.length() >= f + 4 ? path.substring(f + 4) : "");
653             f = path.indexOf("/../");
654         }
655 
656         if (path.length() > 3 && path.endsWith("/.."))
657         {
658             int sb = path.lastIndexOf("/", path.length() - 4);
659             String   segment = path.substring(sb, path.length() - 3);
660             if (!segment.equals(".."))
661             {
662                 path = path.substring(0, sb + 1);
663             }
664         }
665 
666         return path;
667     }
668 
669     /**
670      * Assembles an URL from the given URL parts, each of these parts can be null.
671      * Used internally by {@link #absolutize}.
672      */
673     private static String   makeUrl(String   scheme, String   authority, String   path, String   query, String   fragment)
674     {
675         StringBuffer   url = new StringBuffer  ();
676         if (scheme != null)
677             url.append(scheme).append(':');
678 
679         if (authority != null)
680             url.append("//").append(authority);
681 
682         if (path != null)
683             url.append(path);
684 
685         if (query != null)
686             url.append('?').append(query);
687 
688         if (fragment != null)
689             url.append('#').append(fragment);
690 
691         return url.toString();
692     }
693 
694     public static final int SCHEME = 0;
695     public static final int AUTHORITY = 1;
696     public static final int PATH = 2;
697     public static final int QUERY = 3;
698     public static final int FRAGMENT = 4;
699 
700     /**
701      * Parses an URL into the following parts: scheme, authority, path, query and fragment identifier.
702      *
703      * <p>The parsing is designed to be robust in the sense that it will never fail, even when an invalid
704      * URL is given. The parser will simply look for the most important delimiter characters. Basically
705      * it does the same as what would be achieved using the following regular expression (from RFC 2396):
706      * <pre>
707      * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
708      *  12            3  4          5       6  7        8 9
709      * </pre>
710      * but without actually using the regular expression.
711      *
712      * <p>The result is returned as a string array, use the constants SCHEME, AUTHORITY, PATH,
713      * QUERY and FRAGMENT_IDENTIFIER to access the different parts.
714      *
715      * <p>If a part is missing, its corresponding entry in the array will be null, except for the
716      * path, which will never be null.
717      */
718     public static String  [] parseUrl(String   url) {
719         char[] urlchars = url.toCharArray();
720 
721         int pos = 0;
722 
723         String   scheme = null;
724         String   authority = null;
725         String   path = null;
726         String   query = null;
727         String   fragid = null;
728 
729         //  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
730 
731         // the scheme
732         boolean keepgoing = true;
733         while (keepgoing && pos < urlchars.length)
734         {
735             switch (urlchars[pos])
736             {
737                 case ':':
738                     if (pos >= 1)
739                     {
740                         scheme = new String  (urlchars, 0, pos);
741                         keepgoing = false;
742                         pos++;
743                         break;
744                     }
745                 case '/':
746                 case '?':
747                 case '#':
748                     keepgoing = false;
749                     break;
750                 default:
751                     pos++;
752             }
753         }
754 
755         if (scheme == null)
756             pos = 0;
757 
758         //  the authority
759         if (pos + 1 < urlchars.length && urlchars[pos] == '/' && urlchars[pos+1] == '/')
760         {
761             pos += 2;
762             int authorityBeginPos = pos;
763             keepgoing = true;
764             while (keepgoing && pos < urlchars.length)
765             {
766                 switch (urlchars[pos])
767                 {
768                     case '/':
769                     case '?':
770                     case '#':
771                         keepgoing = false;
772                         break;
773                     default:
774                         pos++;
775                 }
776             }
777             authority = new String  (urlchars, authorityBeginPos, pos - authorityBeginPos);
778         }
779 
780         //  the path
781         int pathBeginPos = pos;
782         keepgoing = true;
783         while (keepgoing && pos < urlchars.length)
784         {
785             switch (urlchars[pos])
786             {
787                 case '?':
788                 case '#':
789                     keepgoing = false;
790                     break;
791                 default:
792                     pos++;
793             }
794         }
795         path = new String  (urlchars, pathBeginPos, pos - pathBeginPos);
796 
797         // the query
798         if (pos < urlchars.length && urlchars[pos] == '?')
799         {
800             pos++;
801             int queryBeginPos = pos;
802             keepgoing = true;
803             while (keepgoing && pos < urlchars.length)
804             {
805                 switch (urlchars[pos])
806                 {
807                     case '#':
808                         keepgoing = false;
809                         break;
810                     default:
811                         pos++;
812                 }
813             }
814             query = new String  (urlchars, queryBeginPos, pos - queryBeginPos);
815         }
816 
817         // the fragment identifier
818         pos++;
819         if (pos < urlchars.length)
820             fragid = new String  (urlchars, pos, urlchars.length - pos);
821 
822         return new String  [] {scheme, authority, path, query, fragid};
823     }
824 
825     /**
826      * Decode a path.
827      *
828      * <p>Interprets %XX (where XX is hexadecimal number) as UTF-8 encoded bytes.
829      * <p>The validity of the input path is not checked (i.e. characters that
830      * were not encoded will not be reported as errors).
831      * <p>This method differs from URLDecoder.decode in that it always uses UTF-8
832      * (while URLDecoder uses the platform default encoding, often ISO-8859-1),
833      * and doesn't translate + characters to spaces.
834      *
835      * @param path the path to decode
836      * @return the decoded path
837      */
838     public static String   decodePath(String   path) {
839         StringBuffer   translatedPath = new StringBuffer  (path.length());
840         byte[] encodedchars = new byte[path.length() / 3];
841         int i = 0;
842         int length = path.length();
843         int encodedcharsLength = 0;
844         while (i < length) {
845             if (path.charAt(i) == '%') {
846                 // we must process all consecutive %-encoded characters in one go, because they represent
847                 // an UTF-8 encoded string, and in UTF-8 one character can be encoded as multiple bytes
848                 while (i < length && path.charAt(i) == '%') {
849                     if (i + 2 < length) {
850                         try {
851                             byte x = (byte)Integer.parseInt(path.substring(i + 1, i + 3), 16);
852                             encodedchars[encodedcharsLength] = x;
853                         } catch (NumberFormatException   e) {
854                             throw new IllegalArgumentException  ("Illegal hex characters in pattern %" + path.substring(i + 1, i + 3));
855                         }
856                         encodedcharsLength++;
857                         i += 3;
858                     } else {
859                         throw new IllegalArgumentException  ("% character should be followed by 2 hexadecimal characters.");
860                     }
861                 }
862                 try {
863                     String   translatedPart = new String  (encodedchars, 0, encodedcharsLength, "UTF-8");
864                     translatedPath.append(translatedPart);
865                 } catch (UnsupportedEncodingException e) {
866                     // the situation that UTF-8 is not supported is quite theoretical, so throw a runtime exception
867                     throw new RuntimeException  ("Problem in decodePath: UTF-8 encoding not supported.");
868                 }
869                 encodedcharsLength = 0;
870             } else {
871                 // a normal character
872                 translatedPath.append(path.charAt(i));
873                 i++;
874             }
875         }
876         return translatedPath.toString();
877     }
878 
879 }
880
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags