1 35 36 package com.hp.hpl.jena.util; 37 38 39 46 public class URIref extends Object { 47 48 56 static public String encode(String unicode) { 57 try { 58 byte utf8[] = unicode.getBytes("UTF-8"); 59 byte rsltAscii[] = new byte[utf8.length*6]; 60 int in = 0; 61 int out = 0; 62 while ( in < utf8.length ) { 63 switch ( utf8[in] ) { 64 case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': case (byte)'g': case (byte)'h': case (byte)'i': case (byte)'j': case (byte)'k': case (byte)'l': case (byte)'m': case (byte)'n': case (byte)'o': case (byte)'p': case (byte)'q': case (byte)'r': case (byte)'s': case (byte)'t': case (byte)'u': case (byte)'v': case (byte)'w': case (byte)'x': case (byte)'y': case (byte)'z': 65 case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': case (byte)'G': case (byte)'H': case (byte)'I': case (byte)'J': case (byte)'K': case (byte)'L': case (byte)'M': case (byte)'N': case (byte)'O': case (byte)'P': case (byte)'Q': case (byte)'R': case (byte)'S': case (byte)'T': case (byte)'U': case (byte)'V': case (byte)'W': case (byte)'X': case (byte)'Y': case (byte)'Z': 66 case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9': 67 case (byte)';': case (byte)'/': case (byte)'?': case (byte)':': case (byte)'@': case (byte)'&': case (byte)'=': case (byte)'+': case (byte)'$': case (byte)',': 68 case (byte)'-': case (byte)'_': case (byte)'.': case (byte)'!': case (byte)'~': case (byte)'*': case (byte)'\'': case (byte)'(': case (byte)')': 69 case (byte)'#': 70 case (byte)'[': case (byte)']': 71 rsltAscii[out] = utf8[in]; 72 out++; 73 in++; 74 break; 75 case (byte) '%': 76 try { 77 if ( in+2 < utf8.length ) { 78 byte first = hexEncode(hexDecode(utf8[in+1])); 79 byte second = hexEncode(hexDecode(utf8[in+2])); 80 rsltAscii[out++] = (byte)'%'; 81 rsltAscii[out++] = first; 82 rsltAscii[out++] = second; 83 in += 3; 84 break; 85 } 86 } 87 catch (IllegalArgumentException e) { 88 System.err.println("Confusing IRI to encode - contains literal '%': " + unicode); 90 } 92 default: 93 rsltAscii[out++] = (byte)'%'; 94 int c = ((int)utf8[in])&255; 96 rsltAscii[out++] = hexEncode( c/16 ); 97 rsltAscii[out++] = hexEncode( c%16 ); 98 in++; 99 break; 100 } 101 } 102 return new String (rsltAscii,0,out,"US-ASCII"); 103 } 104 catch ( java.io.UnsupportedEncodingException e ) { 105 throw new Error ( "The JVM is required to support UTF-8 and US-ASCII encodings."); 106 } 107 } 108 109 119 static public String decode(String uri) { 120 try { 121 byte ascii[] = uri.getBytes("US-ASCII"); 122 byte utf8[] = new byte[ascii.length]; 123 int in = 0; 124 int out = 0; 125 while ( in < ascii.length ) { 126 if ( ascii[in] == (byte)'%' 127 && ( ascii[in+1] != '2' 128 || ascii[in+2] != '5' ) ) { 129 in++; 130 utf8[out++] = (byte)(hexDecode(ascii[in])*16 | hexDecode(ascii[in+1])); 131 in += 2; 132 } else { 133 utf8[out++] = ascii[in++]; 134 } 135 } 136 return new String (utf8,0,out,"UTF-8"); 137 } 138 catch ( java.io.UnsupportedEncodingException e ) { 139 throw new Error ( "The JVM is required to support UTF-8 and US-ASCII encodings."); 140 } 141 catch ( ArrayIndexOutOfBoundsException ee ) { 142 throw new IllegalArgumentException ("Incomplete Hex escape sequence in " + uri ); 143 } 144 } 145 146 static private byte hexEncode(int i ) { 147 if (i<10) 148 return (byte) ('0' + i); 149 else 150 return (byte)('A' + i - 10); 151 } 152 153 static private int hexDecode(byte b ) { 154 switch (b) { 155 case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f': 156 return (((int)b)&255)-'a'+10; 157 case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F': 158 return b - (byte)'A' + 10; 159 case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9': 160 return b - (byte)'0'; 161 default: 162 throw new IllegalArgumentException ("Bad Hex escape character: " + (((int)b)&255) ); 163 } 164 } 165 166 168 static public void main(String args[]) { 169 for (int i=0; i<args.length; i++) { 170 System.out.println(args[i] + " => " + decode(args[i]) + " => " + encode(decode(args[i]))); 171 } 172 } 173 174 175 176 } 177 | Popular Tags |