KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > datamodel > ServerCache


1 /* ServerCache
2  *
3  * Created on Nov 19, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.datamodel;
24
25 import java.util.Map JavaDoc;
26 import java.util.Hashtable JavaDoc;
27 import java.util.logging.Level JavaDoc;
28 import java.util.logging.Logger JavaDoc;
29
30 import org.apache.commons.httpclient.URIException;
31 import org.archive.crawler.framework.CrawlController;
32 import org.archive.crawler.settings.SettingsHandler;
33
34 /**
35  * Server and Host cache.
36  * @author stack
37  * @version $Date: 2007/01/13 01:31:12 $, $Revision: 1.32.4.1 $
38  */

39 public class ServerCache {
40     private static Logger JavaDoc logger =
41         Logger.getLogger(ServerCache.class.getName());
42     
43     protected SettingsHandler settingsHandler = null;
44     
45     /**
46      * hostname[:port] -> CrawlServer.
47      * Set in the initialization.
48      */

49     protected Map JavaDoc<String JavaDoc,CrawlServer> servers = null;
50     
51     /**
52      * hostname -> CrawlHost.
53      * Set in the initialization.
54      */

55     protected Map JavaDoc<String JavaDoc,CrawlHost> hosts = null;
56     
57     /**
58      * Constructor.
59      * Shutdown access to the default constructor by making it protected.
60      */

61     protected ServerCache() {
62         super();
63     }
64     
65     /**
66      * This constructor creates a ServerCache that is all memory-based using
67      * Hashtables. Used for unit testing only
68      * (Use {@link #ServerCache(CrawlController)} when crawling).
69      * @param sh
70      * @throws Exception
71      */

72     public ServerCache(final SettingsHandler sh)
73     throws Exception JavaDoc {
74         this.settingsHandler = sh;
75         this.servers = new Hashtable JavaDoc<String JavaDoc,CrawlServer>();
76         this.hosts = new Hashtable JavaDoc<String JavaDoc,CrawlHost>();
77     }
78     
79     public ServerCache(final CrawlController c)
80     throws Exception JavaDoc {
81         this.settingsHandler = c.getSettingsHandler();
82         this.servers = c.getBigMap("servers", String JavaDoc.class, CrawlServer.class);
83         this.hosts = c.getBigMap("hosts", String JavaDoc.class, CrawlHost.class);
84     }
85     
86     /**
87      * Get the {@link CrawlServer} associated with <code>name</code>.
88      * @param serverKey Server name we're to return server for.
89      * @return CrawlServer instance that matches the passed server name.
90      */

91     public synchronized CrawlServer getServerFor(String JavaDoc serverKey) {
92         CrawlServer cserver = (CrawlServer)this.servers.get(serverKey);
93         return (cserver != null)? cserver: createServerFor(serverKey);
94     }
95     
96     protected CrawlServer createServerFor(String JavaDoc s) {
97         CrawlServer cserver = (CrawlServer)this.servers.get(s);
98         if (cserver != null) {
99             return cserver;
100         }
101         // Ensure key is private object
102
String JavaDoc skey = new String JavaDoc(s);
103         cserver = new CrawlServer(skey);
104         cserver.setSettingsHandler(settingsHandler);
105         servers.put(skey,cserver);
106         if (logger.isLoggable(Level.FINER)) {
107             logger.finer("Created server " + s);
108         }
109         return cserver;
110     }
111
112     /**
113      * Get the {@link CrawlServer} associated with <code>curi</code>.
114      * @param cauri CandidateURI we're to get server from.
115      * @return CrawlServer instance that matches the passed CandidateURI.
116      */

117     public CrawlServer getServerFor(CandidateURI cauri) {
118         CrawlServer cs = null;
119         try {
120             String JavaDoc key = CrawlServer.getServerKey(cauri);
121             // TODOSOMEDAY: make this robust against those rare cases
122
// where authority is not a hostname.
123
if (key != null) {
124                 cs = getServerFor(key);
125             }
126         } catch (URIException e) {
127             logger.severe(e.getMessage() + ": " + cauri);
128             e.printStackTrace();
129         } catch (NullPointerException JavaDoc npe) {
130             logger.severe(npe.getMessage() + ": " + cauri);
131             npe.printStackTrace();
132         }
133         return cs;
134     }
135     
136     /**
137      * Get the {@link CrawlHost} associated with <code>name</code>.
138      * @param hostname Host name we're to return Host for.
139      * @return CrawlHost instance that matches the passed Host name.
140      */

141     public synchronized CrawlHost getHostFor(String JavaDoc hostname) {
142         if (hostname == null || hostname.length() == 0) {
143             return null;
144         }
145         CrawlHost host = (CrawlHost)this.hosts.get(hostname);
146         return (host != null)? host: createHostFor(hostname);
147     }
148     
149     protected CrawlHost createHostFor(String JavaDoc hostname) {
150         if (hostname == null || hostname.length() == 0) {
151             return null;
152         }
153         CrawlHost host = (CrawlHost)this.hosts.get(hostname);
154         if (host != null) {
155             return host;
156         }
157         String JavaDoc hkey = new String JavaDoc(hostname);
158         host = new CrawlHost(hkey);
159         this.hosts.put(hkey, host);
160         if (logger.isLoggable(Level.FINE)) {
161             logger.fine("Created host " + hostname);
162         }
163         return host;
164     }
165     
166     /**
167      * Get the {@link CrawlHost} associated with <code>curi</code>.
168      * @param cauri CandidateURI we're to return Host for.
169      * @return CandidateURI instance that matches the passed Host name.
170      */

171     public CrawlHost getHostFor(CandidateURI cauri) {
172         CrawlHost h = null;
173         try {
174             h = getHostFor(cauri.getUURI().getReferencedHost());
175         } catch (URIException e) {
176             e.printStackTrace();
177         }
178         return h;
179     }
180
181     /**
182      * @param serverKey Key to use doing lookup.
183      * @return True if a server instance exists.
184      */

185     public boolean containsServer(String JavaDoc serverKey) {
186         return (CrawlServer) servers.get(serverKey) != null;
187     }
188
189     /**
190      * @param hostKey Key to use doing lookup.
191      * @return True if a host instance exists.
192      */

193     public boolean containsHost(String JavaDoc hostKey) {
194         return (CrawlHost) hosts.get(hostKey) != null;
195     }
196
197     /**
198      * Called when shutting down the cache so we can do clean up.
199      */

200     public void cleanup() {
201         if (this.hosts != null) {
202             // If we're using a bdb bigmap, the call to clear will
203
// close down the bdb database.
204
this.hosts.clear();
205             this.hosts = null;
206         }
207         if (this.servers != null) {
208             this.servers.clear();
209             this.servers = null;
210         }
211     }
212 }
Popular Tags