KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > hp > hpl > jena > util > URIref


1 /*
2  * (c) Copyright 2001, 2002, 2003, 2004, 2005 Hewlett-Packard Development Company, LP
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * 3. The name of the author may not be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  
27  * * $Id: URIref.java,v 1.4 2005/02/21 12:18:58 andy_seaborne Exp $
28    
29    AUTHOR: Jeremy J. Carroll
30 */
/*
31  * URIref.java
32  *
33  * Created on September 20, 2001, 12:04 PM
34  */

35
36 package com.hp.hpl.jena.util;
37
38
39 /**
40  * This class provides methods to encode and decode URI References
41  * in accordance with http://www.w3.org/TR/charmod/#sec-URIs .
42  * The details of how the algorithms handle '%' are captured in
43  * http://lists.w3.org/Archives/Public/uri/2001Sep/0009.html
44  * @author jjc
45  */

46 public class URIref extends Object JavaDoc {
47     
48     /** Convert a Unicode string, first to UTF-8 and then to
49      * an RFC 2396 compliant URI with optional fragment identifier
50      * using %NN escape mechanism as appropriate.
51      * The '%' character is assumed to already indicated an escape byte.
52      * The '%' character must be followed by two hexadecimal digits.
53      * @param unicode The uri, in characters specified by RFC 2396 + '#'
54      * @return The corresponding Unicode String
55      */

56     static public String JavaDoc encode(String JavaDoc unicode) {
57         try {
58         byte utf8[] = unicode.getBytes("UTF-8");
59         byte rsltAscii[] = new byte[utf8.length*6];
60         int in = 0;
61         int out = 0;
62         while ( in < utf8.length ) {
63             switch ( utf8[in] ) {
64                 case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': case (byte)'g': case (byte)'h': case (byte)'i': case (byte)'j': case (byte)'k': case (byte)'l': case (byte)'m': case (byte)'n': case (byte)'o': case (byte)'p': case (byte)'q': case (byte)'r': case (byte)'s': case (byte)'t': case (byte)'u': case (byte)'v': case (byte)'w': case (byte)'x': case (byte)'y': case (byte)'z':
65                 case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': case (byte)'G': case (byte)'H': case (byte)'I': case (byte)'J': case (byte)'K': case (byte)'L': case (byte)'M': case (byte)'N': case (byte)'O': case (byte)'P': case (byte)'Q': case (byte)'R': case (byte)'S': case (byte)'T': case (byte)'U': case (byte)'V': case (byte)'W': case (byte)'X': case (byte)'Y': case (byte)'Z':
66                 case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9':
67                 case (byte)';': case (byte)'/': case (byte)'?': case (byte)':': case (byte)'@': case (byte)'&': case (byte)'=': case (byte)'+': case (byte)'$': case (byte)',':
68                 case (byte)'-': case (byte)'_': case (byte)'.': case (byte)'!': case (byte)'~': case (byte)'*': case (byte)'\'': case (byte)'(': case (byte)')':
69                 case (byte)'#':
70                 case (byte)'[': case (byte)']':
71                     rsltAscii[out] = utf8[in];
72                     out++;
73                     in++;
74                     break;
75                 case (byte) '%':
76                     try {
77                         if ( in+2 < utf8.length ) {
78                             byte first = hexEncode(hexDecode(utf8[in+1]));
79                             byte second = hexEncode(hexDecode(utf8[in+2]));
80                             rsltAscii[out++] = (byte)'%';
81                             rsltAscii[out++] = first;
82                             rsltAscii[out++] = second;
83                             in += 3;
84                             break;
85                         }
86                     }
87                     catch (IllegalArgumentException JavaDoc e) {
88                         // Illformed - should issue message ....
89
System.err.println("Confusing IRI to encode - contains literal '%': " + unicode);
90                         // Fall through.
91
}
92                 default:
93                         rsltAscii[out++] = (byte)'%';
94                         // Get rid of sign ...
95
int c = ((int)utf8[in])&255;
96                         rsltAscii[out++] = hexEncode( c/16 );
97                         rsltAscii[out++] = hexEncode( c%16 );
98                         in++;
99                         break;
100             }
101         }
102         return new String JavaDoc(rsltAscii,0,out,"US-ASCII");
103         }
104         catch ( java.io.UnsupportedEncodingException JavaDoc e ) {
105             throw new Error JavaDoc( "The JVM is required to support UTF-8 and US-ASCII encodings.");
106         }
107     }
108     
109     /** Convert a URI, in US-ASCII, with escaped characters taken from UTF-8,
110      * to the corresponding Unicode string.
111      * On ill-formed input the results are undefined, specifically if
112      * the unescaped version is not a UTF-8 String, some String will be
113      * returned.
114      * Escaped '%' characters (i.e. "%25") are left unchanged.
115      * @param uri The uri, in characters specified by RFC 2396 + '#'.
116      * @return The corresponding Unicode String.
117      * @exception IllegalArgumentException If a % hex sequence is ill-formed.
118      */

119     static public String JavaDoc decode(String JavaDoc uri) {
120         try {
121             byte ascii[] = uri.getBytes("US-ASCII");
122             byte utf8[] = new byte[ascii.length];
123             int in = 0;
124             int out = 0;
125             while ( in < ascii.length ) {
126                 if ( ascii[in] == (byte)'%'
127                      && ( ascii[in+1] != '2'
128                        || ascii[in+2] != '5' ) ) {
129                     in++;
130                     utf8[out++] = (byte)(hexDecode(ascii[in])*16 | hexDecode(ascii[in+1]));
131                     in += 2;
132                 } else {
133                     utf8[out++] = ascii[in++];
134                 }
135             }
136             return new String JavaDoc(utf8,0,out,"UTF-8");
137         }
138         catch ( java.io.UnsupportedEncodingException JavaDoc e ) {
139             throw new Error JavaDoc( "The JVM is required to support UTF-8 and US-ASCII encodings.");
140         }
141         catch ( ArrayIndexOutOfBoundsException JavaDoc ee ) {
142             throw new IllegalArgumentException JavaDoc("Incomplete Hex escape sequence in " + uri );
143         }
144     }
145     
146     static private byte hexEncode(int i ) {
147         if (i<10)
148             return (byte) ('0' + i);
149         else
150             return (byte)('A' + i - 10);
151     }
152     
153     static private int hexDecode(byte b ) {
154         switch (b) {
155             case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f':
156              return (((int)b)&255)-'a'+10;
157             case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F':
158             return b - (byte)'A' + 10;
159             case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9':
160                 return b - (byte)'0';
161                 default:
162                     throw new IllegalArgumentException JavaDoc("Bad Hex escape character: " + (((int)b)&255) );
163         }
164     }
165     
166     /** For simple testing ...
167      */

168     static public void main(String JavaDoc args[]) {
169         for (int i=0; i<args.length; i++) {
170             System.out.println(args[i] + " => " + decode(args[i]) + " => " + encode(decode(args[i])));
171         }
172     }
173
174     
175
176 }
177
Popular Tags