1 23 24 package org.archive.crawler.deciderules; 25 26 import java.util.logging.Logger ; 27 import org.apache.commons.httpclient.URIException; 28 import org.archive.crawler.datamodel.CandidateURI; 29 import org.archive.net.UURI; 30 31 32 public class AddRedirectFromRootServerToScope extends PredicatedDecideRule { 33 34 private static final long serialVersionUID = 2644131585813079064L; 35 36 private static final Logger LOGGER = 37 Logger.getLogger(AddRedirectFromRootServerToScope.class.getName()); 38 private static final String SLASH = "/"; 39 public AddRedirectFromRootServerToScope(String name) { 40 super(name); 41 setDescription("Allow URI only if it is a redirect and via URI is a " + 42 "root server (host's slash page) that is within the " + 43 "scope. Also mark the URI as a seed."); 44 } 45 46 boolean evaluate(Object object) { 47 UURI via = getVia(object); 48 if (via == null) { 49 return false; 50 } 51 CandidateURI curi = (CandidateURI) object; 52 if ( curi == null) { 53 return false; 54 } 55 try { 56 if (curi.getUURI().getHostBasename() != null && 59 via.getHostBasename() != null && 60 !curi.getUURI().getHostBasename().equals(via.getHostBasename()) 61 && curi.isLocation() 62 && via.getPath().equals(SLASH)) { 63 curi.setIsSeed(true); 64 LOGGER.info("Adding " + object.toString() + " to seeds via " 65 + getVia(object).toString()); 66 return true; 67 } 68 } catch (URIException e) { 69 e.printStackTrace(); 70 } catch (Exception e) { 71 e.printStackTrace(); 72 } 75 return false; 76 } 77 78 private UURI getVia(Object o){ 79 return (o instanceof CandidateURI)? ((CandidateURI)o).getVia(): null; 80 } 81 } 82 | Popular Tags |