KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > AddRedirectFromRootServerToScope


1 /* AddRedirectFromRootServerToScope
2  *
3  * Created on May 25, 2005
4  *
5  * Copyright (C) 2005 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23
24 package org.archive.crawler.deciderules;
25
26 import java.util.logging.Logger JavaDoc;
27 import org.apache.commons.httpclient.URIException;
28 import org.archive.crawler.datamodel.CandidateURI;
29 import org.archive.net.UURI;
30
31
32 public class AddRedirectFromRootServerToScope extends PredicatedDecideRule {
33
34     private static final long serialVersionUID = 2644131585813079064L;
35
36     private static final Logger JavaDoc LOGGER =
37             Logger.getLogger(AddRedirectFromRootServerToScope.class.getName());
38     private static final String JavaDoc SLASH = "/";
39     public AddRedirectFromRootServerToScope(String JavaDoc name) {
40         super(name);
41         setDescription("Allow URI only if it is a redirect and via URI is a " +
42                 "root server (host's slash page) that is within the " +
43                 "scope. Also mark the URI as a seed.");
44     }
45
46     boolean evaluate(Object JavaDoc object) {
47         UURI via = getVia(object);
48         if (via == null) {
49             return false;
50         }
51         CandidateURI curi = (CandidateURI) object;
52         if ( curi == null) {
53             return false;
54         }
55         try {
56             // Mark URI as seed if via is from different host, URI is not a seed
57
// already, URI is redirect and via is root server
58
if (curi.getUURI().getHostBasename() != null &&
59                     via.getHostBasename() != null &&
60                     !curi.getUURI().getHostBasename().equals(via.getHostBasename())
61                     && curi.isLocation()
62                     && via.getPath().equals(SLASH)) {
63                 curi.setIsSeed(true);
64                 LOGGER.info("Adding " + object.toString() + " to seeds via "
65                         + getVia(object).toString());
66                 return true;
67             }
68         } catch (URIException e) {
69             e.printStackTrace();
70         } catch (Exception JavaDoc e) {
71             e.printStackTrace();
72             // Return false since we could not get hostname or something else
73
// went wrong
74
}
75         return false;
76     }
77
78     private UURI getVia(Object JavaDoc o){
79         return (o instanceof CandidateURI)? ((CandidateURI)o).getVia(): null;
80     }
81 }
82
Popular Tags