KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > frontier > BucketQueueAssignmentPolicy


1 /* BucketQueueAssignmentPolicy
2  *
3  * $Header: /cvsroot/archive-crawler/ArchiveOpenCrawler/src/java/org/archive/crawler/frontier/BucketQueueAssignmentPolicy.java,v 1.3 2005/06/13 20:59:38 stack-sf Exp $
4  *
5  * Created on May 06, 2005
6  *
7  * Copyright (C) 2005 Christian Kohlschuetter
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  *
25  */

26 package org.archive.crawler.frontier;
27
28 import org.apache.commons.httpclient.URIException;
29 import org.archive.crawler.datamodel.CandidateURI;
30 import org.archive.crawler.datamodel.CrawlHost;
31 import org.archive.crawler.framework.CrawlController;
32
33 /**
34  * Uses the target IPs as basis for queue-assignment,
35  * distributing them over a fixed number of sub-queues.
36  *
37  * @author Christian Kohlschuetter
38  */

39 public class BucketQueueAssignmentPolicy extends QueueAssignmentPolicy {
40     private static final int DEFAULT_NOIP_BITMASK = 1023;
41     private static final int DEFAULT_QUEUES_HOSTS_MODULO = 1021;
42
43     public String JavaDoc getClassKey(final CrawlController controller,
44         final CandidateURI curi) {
45         
46         CrawlHost host;
47         try {
48             host = controller.getServerCache().getHostFor(
49                 curi.getUURI().getReferencedHost());
50         } catch (URIException e) {
51             // FIXME error handling
52
e.printStackTrace();
53             host = null;
54         }
55         if(host == null) {
56             return "NO-HOST";
57         } else if(host.getIP() == null) {
58             return "NO-IP-".concat(Integer.toString(Math.abs(host.getHostName()
59                 .hashCode())
60                 & DEFAULT_NOIP_BITMASK));
61         } else {
62             return Integer.toString(Math.abs(host.getIP().hashCode())
63                 % DEFAULT_QUEUES_HOSTS_MODULO);
64         }
65     }
66
67     public int maximumNumberOfKeys() {
68         return DEFAULT_NOIP_BITMASK + DEFAULT_QUEUES_HOSTS_MODULO + 2;
69     }
70 }
71
Popular Tags