1 25 package org.archive.net; 26 27 import java.io.File ; 28 import java.io.Serializable ; 29 import java.net.URI ; 30 import java.net.URISyntaxException ; 31 import java.util.logging.Level ; 32 import java.util.logging.Logger ; 33 34 import org.apache.commons.httpclient.URIException; 35 import org.archive.crawler.datamodel.CandidateURI; 36 import org.archive.util.SURT; 37 import org.archive.util.TextUtils; 38 39 40 62 public class UURI extends LaxURI 63 implements CharSequence , Serializable { 64 65 private static final long serialVersionUID = -1277570889914647093L; 66 67 private static Logger LOGGER = 68 Logger.getLogger(UURI.class.getName()); 69 70 73 public final static int MAX_URL_LENGTH = 2083; 74 75 public static final String MASSAGEHOST_PATTERN = "^www\\d*\\."; 76 77 83 private transient String cachedHost = null; 84 85 91 private transient String cachedEscapedURI = null; 92 93 99 private transient String cachedString = null; 100 101 104 private transient String cachedAuthorityMinusUserinfo = null; 105 106 109 private transient String surtForm = null; 110 111 static { 116 hostname.set('_'); 117 } 118 119 120 123 protected UURI() { 124 super(); 125 } 126 127 133 protected UURI(String uri, boolean escaped, String charset) 134 throws URIException { 135 super(uri, escaped, charset); 136 normalize(); 137 } 138 139 144 protected UURI(UURI base, UURI relative) throws URIException { 145 super(base, relative); 146 normalize(); 147 } 148 149 155 public UURI(String uri, boolean escaped) throws URIException, NullPointerException { 156 super(uri,escaped); 157 normalize(); 158 } 159 160 165 public UURI resolve(String uri) 166 throws URIException { 167 return resolve(uri, false, this.getProtocolCharset()); 169 } 170 171 177 public UURI resolve(String uri, boolean e) 178 throws URIException { 179 return resolve(uri, e, this.getProtocolCharset()); 180 } 181 182 189 public UURI resolve(String uri, boolean e, String charset) 190 throws URIException { 191 return new UURI(this, new UURI(uri, e, charset)); 192 } 193 194 200 public boolean equals(Object obj) { 201 202 if (obj == this) { 204 return true; 205 } 206 if (!(obj instanceof UURI)) { 207 return false; 208 } 209 UURI another = (UURI) obj; 210 if (!equals(this._scheme, another._scheme)) { 212 return false; 213 } 214 if (!equals(this._opaque, another._opaque)) { 216 return false; 217 } 218 if (!equals(this._authority, another._authority)) { 221 return false; 222 } 223 if (!equals(this._path, another._path)) { 225 return false; 226 } 227 if (!equals(this._query, another._query)) { 229 return false; 230 } 231 return true; 233 } 234 235 245 public String getHostBasename() throws URIException { 246 return (this.getReferencedHost() == null) 251 ? null 252 : TextUtils.replaceFirst(MASSAGEHOST_PATTERN, 253 this.getReferencedHost(), UURIFactory.EMPTY_STRING); 254 } 255 256 261 public synchronized String toString() { 262 if (this.cachedString == null) { 263 this.cachedString = super.toString(); 264 coalesceUriStrings(); 265 } 266 return this.cachedString; 267 } 268 269 public synchronized String getEscapedURI() { 270 if (this.cachedEscapedURI == null) { 271 this.cachedEscapedURI = super.getEscapedURI(); 272 coalesceUriStrings(); 273 } 274 return this.cachedEscapedURI; 275 } 276 277 281 protected void coalesceUriStrings() { 282 if (this.cachedString != null && this.cachedEscapedURI != null 283 && this.cachedString.length() == this.cachedEscapedURI.length()) { 284 this.cachedString = this.cachedEscapedURI; 288 } 289 } 290 291 public synchronized String getHost() throws URIException { 292 if (this.cachedHost == null) { 293 if (this._host != null) { 296 this.cachedHost = super.getHost(); 297 coalesceHostAuthorityStrings(); 298 } 299 } 300 return this.cachedHost; 301 } 302 303 307 protected void coalesceHostAuthorityStrings() { 308 if (this.cachedAuthorityMinusUserinfo != null 309 && this.cachedHost != null 310 && this.cachedHost.length() == 311 this.cachedAuthorityMinusUserinfo.length()) { 312 this.cachedAuthorityMinusUserinfo = this.cachedHost; 315 } 316 } 317 318 325 public String getReferencedHost() throws URIException { 326 String referencedHost = this.getHost(); 327 if(referencedHost==null && this.getScheme().equals("dns")) { 328 String possibleHost = this.getCurrentHierPath(); 330 if(possibleHost != null && possibleHost.matches("[-_\\w\\.:]+")) { 331 referencedHost = possibleHost; 332 } 333 } 334 return referencedHost; 335 } 336 337 340 public String getSurtForm() { 341 if (surtForm == null) { 342 surtForm = SURT.fromURI(this.toString()); 343 } 344 return surtForm; 345 } 346 347 355 public String getAuthorityMinusUserinfo() 356 throws URIException { 357 if (this.cachedAuthorityMinusUserinfo == null) { 358 String tmp = getAuthority(); 359 if (tmp != null && tmp.length() > 0) { 360 int index = tmp.indexOf('@'); 361 if (index >= 0 && index < tmp.length()) { 362 tmp = tmp.substring(index + 1); 363 } 364 } 365 this.cachedAuthorityMinusUserinfo = tmp; 366 coalesceHostAuthorityStrings(); 367 } 368 return this.cachedAuthorityMinusUserinfo; 369 } 370 371 374 public int length() { 375 return getEscapedURI().length(); 376 } 377 378 381 public char charAt(int index) { 382 return getEscapedURI().charAt(index); 383 } 384 385 388 public CharSequence subSequence(int start, int end) { 389 return getEscapedURI().subSequence(start,end); 390 } 391 392 395 public int compareTo(Object arg0) { 396 return getEscapedURI().compareTo(arg0.toString()); 397 } 398 399 406 public static UURI from(Object o) { 407 UURI u = null; 408 if (o instanceof UURI) { 409 u = (UURI)o; 410 } else if (o instanceof CandidateURI) { 411 u = ((CandidateURI) o).getUURI(); 412 } else if (o instanceof CharSequence ) { 413 String s = o.toString(); 414 try { 415 u = UURIFactory.getInstance(s); 416 } catch (URIException e) { 417 LOGGER.log(Level.FINE,"bad URI",e); 418 } 419 } 420 return u; 421 } 422 423 428 public static boolean hasScheme(String possibleUrl) { 429 boolean result = false; 430 for (int i = 0; i < possibleUrl.length(); i++) { 431 char c = possibleUrl.charAt(i); 432 if (c == ':') { 433 if (i != 0) { 434 result = true; 435 } 436 break; 437 } 438 if (!scheme.get(c)) { 439 break; 440 } 441 } 442 return result; 443 } 444 445 450 public static String parseFilename(final String pathOrUri) 451 throws URISyntaxException { 452 String path = pathOrUri; 453 if (UURI.hasScheme(pathOrUri)) { 454 URI url = new URI (pathOrUri); 455 path = url.getPath(); 456 } 457 return (new File (path)).getName(); 458 } 459 } 460 | Popular Tags |