KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > filter > SurtPrefixFilter


1 /* SurtPrefixFilter
2 *
3 * $Id: SurtPrefixFilter.java,v 1.9.2.1 2007/01/13 01:31:21 stack-sf Exp $
4 *
5 * Created on Jul 22, 2004
6 *
7 * Copyright (C) 2004 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.crawler.filter;
26
27 import java.io.File JavaDoc;
28 import java.io.FileReader JavaDoc;
29 import java.io.IOException JavaDoc;
30
31 import org.archive.crawler.deciderules.DecideRule;
32 import org.archive.crawler.deciderules.DecidingFilter;
33 import org.archive.crawler.framework.Filter;
34 import org.archive.crawler.settings.SimpleType;
35 import org.archive.util.SURT;
36 import org.archive.util.SurtPrefixSet;
37 /**
38  * A filter which tests a URI against a set of SURT
39  * prefixes, and if the URI's prefix is in the set,
40  * returns the chosen true/false accepts value.
41  *
42  * @author gojomo
43  * @deprecated As of release 1.10.0. Replaced by {@link DecidingFilter} and
44  * equivalent {@link DecideRule}.
45  */

46 public class SurtPrefixFilter extends Filter {
47
48     private static final long serialVersionUID = -6933592892325852022L;
49
50     public static final String JavaDoc ATTR_SURTS_SOURCE_FILE = "surts-source-file";
51     public static final String JavaDoc ATTR_MATCH_RETURN_VALUE = "if-match-return";
52
53     SurtPrefixSet surtPrefixes = null;
54     
55     /**
56      * @param name
57      */

58     public SurtPrefixFilter(String JavaDoc name) {
59         super(name, "SURT prefix filter *Deprecated* Use" +
60                 "DecidingFilter and equivalent DecideRule instead.");
61         addElementToDefinition(
62             new SimpleType(ATTR_MATCH_RETURN_VALUE, "What to return when " +
63                     "a prefix matches.\n", new Boolean JavaDoc(true)));
64         addElementToDefinition(
65                 new SimpleType(ATTR_SURTS_SOURCE_FILE,
66                         "Source file from which to infer SURT prefixes. Any URLs " +
67                         "in file will be converted to the implied SURT prefix, and " +
68                         "literal SURT prefixes may be listed on lines beginning " +
69                         "with a '+' character.",
70                         ""));
71     }
72     
73     /* (non-Javadoc)
74      * @see org.archive.crawler.framework.Filter#accepts(java.lang.Object)
75      */

76     protected synchronized boolean innerAccepts(Object JavaDoc o) {
77         if (surtPrefixes == null) {
78             readPrefixes();
79         }
80         String JavaDoc s = SURT.fromURI(o.toString());
81         // also want to treat https as http
82
if(s.startsWith("https:")) {
83             s = "http:"+s.substring(6);
84         }
85         // TODO: consider other cases of scheme-indifference?
86
return surtPrefixes.containsPrefixOf(s);
87     }
88
89     private void readPrefixes() {
90         surtPrefixes = new SurtPrefixSet();
91         String JavaDoc sourcePath = (String JavaDoc) getUncheckedAttribute(null,
92                 ATTR_SURTS_SOURCE_FILE);
93         File JavaDoc source = new File JavaDoc(sourcePath);
94         if (!source.isAbsolute()) {
95             source = new File JavaDoc(getSettingsHandler().getOrder()
96                     .getController().getDisk(), sourcePath);
97         }
98         FileReader JavaDoc fr = null;
99         try {
100             fr = new FileReader JavaDoc(source);
101             try {
102                 surtPrefixes.importFromMixed(fr,true);
103             } finally {
104                 fr.close();
105             }
106         } catch (IOException JavaDoc e) {
107             e.printStackTrace();
108             throw new RuntimeException JavaDoc(e);
109         }
110     }
111     
112     /**
113      * Re-read prefixes after a settings update.
114      *
115      */

116     public synchronized void kickUpdate() {
117         super.kickUpdate();
118         // TODO: make conditional on file having actually changed,
119
// perhaps by remembering mod-time
120
readPrefixes();
121     }
122 }
123
Popular Tags