KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > framework > Scoper


1 /* Scoper
2  *
3  * Created on Jun 6, 2005
4  *
5  * Copyright (C) 2005 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.framework;
24
25 import java.util.logging.Level JavaDoc;
26 import java.util.logging.Logger JavaDoc;
27
28 import javax.management.AttributeNotFoundException JavaDoc;
29
30 import org.archive.crawler.datamodel.CandidateURI;
31 import org.archive.crawler.settings.SimpleType;
32 import org.archive.crawler.settings.Type;
33 import org.archive.crawler.util.LogUtils;
34
35 /**
36  * Base class for Scopers.
37  * Scopers test CandidateURIs against a scope.
38  * Scopers allow logging of rejected CandidateURIs.
39  * @author stack
40  * @version $Date: 2007/01/13 01:31:22 $, $Revision: 1.4.16.1 $
41  */

42 public abstract class Scoper extends Processor {
43     private static Logger JavaDoc LOGGER =
44         Logger.getLogger(Scoper.class.getName());
45     
46     /**
47      * Protected so avaiilable to subclasses.
48      */

49     protected static final String JavaDoc ATTR_OVERRIDE_LOGGER_ENABLED =
50         "override-logger";
51     
52     /**
53      * Constructor.
54      * @param name
55      * @param description
56      */

57     public Scoper(String JavaDoc name, String JavaDoc description) {
58         super(name, description);
59         Type t = addElementToDefinition(
60             new SimpleType(ATTR_OVERRIDE_LOGGER_ENABLED,
61             "If enabled, override default logger for this class (Default " +
62             "logger writes the console). Override " +
63             "logger will instead send all logging to a file named for this " +
64             "class in the job log directory. Set the logging level and " +
65             "other " +
66             "characteristics of the override logger such as rotation size, " +
67             "suffix pattern, etc. in heritrix.properties. This attribute " +
68             "is only checked once, on startup of a job.",
69             new Boolean JavaDoc(false)));
70         t.setExpertSetting(true);
71     }
72     
73     protected void initialTasks() {
74         super.initialTasks();
75         if (!isOverrideLogger(null)) {
76             return;
77         }
78         // Set up logger for this instance. May have special directives
79
// since this class can log scope-rejected URLs.
80
LogUtils.createFileLogger(getController().getLogsDir(),
81             this.getClass().getName(),
82             Logger.getLogger(this.getClass().getName()));
83     }
84     
85     /**
86      * @param context Context to use looking up attribute.
87      * @return True if we are to override default logger (default logs
88      * to console) with a logger that writes all loggings to a file
89      * named for this class.
90      */

91     protected boolean isOverrideLogger(Object JavaDoc context) {
92         boolean result = true;
93         try {
94             Boolean JavaDoc b = (Boolean JavaDoc)getAttribute(context,
95                 ATTR_OVERRIDE_LOGGER_ENABLED);
96             if (b != null) {
97                 result = b.booleanValue();
98             }
99         } catch (AttributeNotFoundException JavaDoc e) {
100             LOGGER.warning("Failed get of 'enabled' attribute.");
101         }
102
103         return result;
104     }
105     
106     /**
107      * Schedule the given {@link CandidateURI CandidateURI} with the Frontier.
108      * @param caUri The CandidateURI to be scheduled.
109      * @return true if CandidateURI was accepted by crawl scope, false
110      * otherwise.
111      */

112     protected boolean isInScope(CandidateURI caUri) {
113         boolean result = false;
114         if (getController().getScope().accepts(caUri)) {
115             result = true;
116             if (LOGGER.isLoggable(Level.FINER)) {
117                 LOGGER.finer("Accepted: " + caUri);
118             }
119         } else {
120             outOfScope(caUri);
121         }
122         return result;
123     }
124     
125     /**
126      * Called when a CandidateUri is ruled out of scope.
127      * Override if you don't want logs as coming from this class.
128      * @param caUri CandidateURI that is out of scope.
129      */

130     protected void outOfScope(CandidateURI caUri) {
131         if (!LOGGER.isLoggable(Level.INFO)) {
132             return;
133         }
134         LOGGER.info(caUri.getUURI().toString());
135     }
136 }
137
Popular Tags