1 26 package org.archive.crawler.frontier; 27 28 import org.apache.commons.httpclient.URIException; 29 import org.archive.crawler.datamodel.CandidateURI; 30 import org.archive.crawler.datamodel.CrawlHost; 31 import org.archive.crawler.framework.CrawlController; 32 33 39 public class BucketQueueAssignmentPolicy extends QueueAssignmentPolicy { 40 private static final int DEFAULT_NOIP_BITMASK = 1023; 41 private static final int DEFAULT_QUEUES_HOSTS_MODULO = 1021; 42 43 public String getClassKey(final CrawlController controller, 44 final CandidateURI curi) { 45 46 CrawlHost host; 47 try { 48 host = controller.getServerCache().getHostFor( 49 curi.getUURI().getReferencedHost()); 50 } catch (URIException e) { 51 e.printStackTrace(); 53 host = null; 54 } 55 if(host == null) { 56 return "NO-HOST"; 57 } else if(host.getIP() == null) { 58 return "NO-IP-".concat(Integer.toString(Math.abs(host.getHostName() 59 .hashCode()) 60 & DEFAULT_NOIP_BITMASK)); 61 } else { 62 return Integer.toString(Math.abs(host.getIP().hashCode()) 63 % DEFAULT_QUEUES_HOSTS_MODULO); 64 } 65 } 66 67 public int maximumNumberOfKeys() { 68 return DEFAULT_NOIP_BITMASK + DEFAULT_QUEUES_HOSTS_MODULO + 2; 69 } 70 } 71 | Popular Tags |