1 25 package org.archive.net; 26 27 import java.util.Arrays ; 28 import java.util.BitSet ; 29 30 import org.apache.commons.httpclient.URI; 31 import org.apache.commons.httpclient.URIException; 32 import org.apache.commons.httpclient.util.EncodingUtil; 33 34 41 public class LaxURI extends URI { 42 43 private static final long serialVersionUID = 5273922211722239537L; 44 45 final protected static char[] HTTP_SCHEME = {'h','t','t','p'}; 46 final protected static char[] HTTPS_SCHEME = {'h','t','t','p','s'}; 47 48 protected static final BitSet lax_rel_segment = new BitSet (256); 49 static { 51 lax_rel_segment.or(rel_segment); 52 lax_rel_segment.set(':'); } 55 56 protected static final BitSet lax_abs_path = new BitSet (256); 57 static { 58 lax_abs_path.or(abs_path); 59 lax_abs_path.set('|'); } 61 62 protected static final BitSet lax_query = new BitSet (256); 63 static { 64 lax_query.or(query); 65 lax_query.set('{'); lax_query.set('}'); lax_query.set('|'); lax_query.set('['); lax_query.set(']'); lax_query.set('^'); } 72 73 public LaxURI(String uri, boolean escaped, String charset) 75 throws URIException { 76 super(uri,escaped,charset); 77 } 78 public LaxURI(URI base, URI relative) throws URIException { 79 super(base,relative); 80 } 81 public LaxURI(String uri, boolean escaped) throws URIException { 82 super(uri,escaped); 83 } 84 public LaxURI() { 85 super(); 86 } 87 88 public String getURI() throws URIException { 90 return (_uri == null) ? null : decode(_uri, getProtocolCharset()); 91 } 92 93 public String getPath() throws URIException { 95 char[] p = getRawPath(); 96 return (p == null) ? null : decode(p, getProtocolCharset()); 97 } 98 99 public String getPathQuery() throws URIException { 101 char[] rawPathQuery = getRawPathQuery(); 102 return (rawPathQuery == null) ? null : decode(rawPathQuery, 103 getProtocolCharset()); 104 } 105 protected static String decode(char[] component, String charset) 107 throws URIException { 108 if (component == null) { 109 throw new IllegalArgumentException ( 110 "Component array of chars may not be null"); 111 } 112 return decode(new String (component), charset); 113 } 114 115 protected static String decode(String component, String charset) 117 throws URIException { 118 if (component == null) { 119 throw new IllegalArgumentException ( 120 "Component array of chars may not be null"); 121 } 122 byte[] rawdata = null; 123 rawdata = LaxURLCodec.decodeUrlLoose(EncodingUtil 125 .getAsciiBytes(component)); 126 return EncodingUtil.getString(rawdata, charset); 130 } 131 132 protected boolean validate(char[] component, BitSet generous) { 134 return super.validate(component, lax(generous)); 135 } 136 137 protected boolean validate(char[] component, int soffset, int eoffset, 139 BitSet generous) { 140 return super.validate(component, soffset, eoffset, lax(generous)); 141 } 142 143 152 protected BitSet lax(BitSet generous) { 153 if (generous == rel_segment) { 154 return lax_rel_segment; 156 } 157 if (generous == abs_path) { 158 return lax_abs_path; 159 } 160 if (generous == query) { 161 return lax_query; 162 } 163 return generous; 165 } 166 167 184 protected void parseAuthority(String original, boolean escaped) 185 throws URIException { 186 super.parseAuthority(original, escaped); 187 if (_host != null && _authority != null 188 && _host.length == _authority.length) { 189 _host = _authority; 190 } 191 } 192 193 194 205 protected void setURI() { 206 if (_scheme != null) { 207 if (_scheme.length == 4 && Arrays.equals(_scheme, HTTP_SCHEME)) { 208 _scheme = HTTP_SCHEME; 209 } else if (_scheme.length == 5 210 && Arrays.equals(_scheme, HTTP_SCHEME)) { 211 _scheme = HTTPS_SCHEME; 212 } 213 } 214 super.setURI(); 215 } 216 217 250 protected void parseUriReference(String original, boolean escaped) 251 throws URIException { 252 253 if (original == null) { 255 throw new URIException("URI-Reference required"); 256 } 257 258 261 String tmp = original.trim(); 262 263 267 int length = tmp.length(); 268 269 272 if (length > 0) { 273 char[] firstDelimiter = { tmp.charAt(0) }; 274 if (validate(firstDelimiter, delims)) { 275 if (length >= 2) { 276 char[] lastDelimiter = { tmp.charAt(length - 1) }; 277 if (validate(lastDelimiter, delims)) { 278 tmp = tmp.substring(1, length - 1); 279 length = length - 2; 280 } 281 } 282 } 283 } 284 285 288 int from = 0; 289 290 293 boolean isStartedFromPath = false; 294 int atColon = tmp.indexOf(':'); 295 int atSlash = tmp.indexOf('/'); 296 if ((atColon <= 0 && !tmp.startsWith("//")) 297 || (atSlash >= 0 && atSlash < atColon)) { 298 isStartedFromPath = true; 299 } 300 301 307 int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); 308 if (at == -1) { 309 at = 0; 310 } 311 312 320 if (at > 0 && at < length && tmp.charAt(at) == ':') { 321 char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); 322 if (validate(target, scheme)) { 323 _scheme = target; 324 } else { 325 throw new URIException("incorrect scheme"); 326 } 327 from = ++at; 328 } 329 330 338 _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; 340 if (0 <= at && at < length && tmp.charAt(at) == '/') { 341 _is_hier_part = true; 343 if (at + 2 < length && tmp.charAt(at + 1) == '/' 344 && !isStartedFromPath) { 345 int next = indexFirstOf(tmp, "/?#", at + 2); 347 if (next == -1) { 348 next = (tmp.substring(at + 2).length() == 0) ? at + 2 349 : tmp.length(); 350 } 351 parseAuthority(tmp.substring(at + 2, next), escaped); 352 from = at = next; 353 _is_net_path = true; 355 } 356 if (from == at) { 357 _is_abs_path = true; 359 } 360 } 361 362 370 if (from < length) { 371 int next = indexFirstOf(tmp, "?#", from); 373 if (next == -1) { 374 next = tmp.length(); 375 } 376 if (!_is_abs_path) { 377 if (!escaped 378 && prevalidate(tmp.substring(from, next), disallowed_rel_path) 379 || escaped 380 && validate(tmp.substring(from, next).toCharArray(), rel_path)) { 381 _is_rel_path = true; 383 } else if (!escaped 384 && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 385 || escaped 386 && validate(tmp.substring(from, next).toCharArray(), opaque_part)) { 387 _is_opaque_part = true; 389 } else { 390 _path = null; 392 } 393 } 394 String s = tmp.substring(from, next); 395 if (escaped) { 396 setRawPath(s.toCharArray()); 397 } else { 398 setPath(s); 399 } 400 at = next; 401 } 402 403 String charset = getProtocolCharset(); 405 406 414 if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') { 415 int next = tmp.indexOf('#', at + 1); 416 if (next == -1) { 417 next = tmp.length(); 418 } 419 if (escaped) { 420 _query = tmp.substring(at + 1, next).toCharArray(); 421 if (!validate(_query, query)) { 422 throw new URIException("Invalid query"); 423 } 424 } else { 425 _query = encode(tmp.substring(at + 1, next), allowed_query, charset); 426 } 427 at = next; 428 } 429 430 438 if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') { 439 if (at + 1 == length) { _fragment = "".toCharArray(); 441 } else { 442 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 443 : encode(tmp.substring(at + 1), allowed_fragment, charset); 444 } 445 } 446 447 setURI(); 449 } 450 451 } 452 | Popular Tags |