KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > postprocessor > FrontierScheduler


1 /* FrontierScheduler
2  *
3  * $Id: FrontierScheduler.java,v 1.6.12.1 2007/01/13 01:31:24 stack-sf Exp $
4  *
5  * Created on June 6, 2005
6  *
7  * Copyright (C) 2005 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  *
25  */

26 package org.archive.crawler.postprocessor;
27
28
29 import java.util.logging.Level JavaDoc;
30 import java.util.logging.Logger JavaDoc;
31
32 import org.archive.crawler.datamodel.CandidateURI;
33 import org.archive.crawler.datamodel.CrawlURI;
34 import org.archive.crawler.datamodel.FetchStatusCodes;
35 import org.archive.crawler.framework.Processor;
36
37 /**
38  * 'Schedule' with the Frontier CandidateURIs being carried by the passed
39  * CrawlURI.
40  * Adds either prerequisites or whatever is in CrawlURI outlinks to the
41  * Frontier. Run a Scoper ahead of this processor so only links that
42  * are in-scope get scheduled.
43  * @author stack
44  */

45 public class FrontierScheduler extends Processor
46 implements FetchStatusCodes {
47
48     private static final long serialVersionUID = -5178775477602250542L;
49
50     private static Logger JavaDoc LOGGER =
51         Logger.getLogger(FrontierScheduler.class.getName());
52     
53     /**
54      * @param name Name of this filter.
55      */

56     public FrontierScheduler(String JavaDoc name) {
57         super(name, "FrontierScheduler. 'Schedule' with the Frontier " +
58             "any CandidateURIs carried by the passed CrawlURI. " +
59             "Run a Scoper before this " +
60             "processor so links that are not in-scope get bumped from the " +
61             "list of links (And so those in scope get promoted from Link " +
62             "to CandidateURI).");
63     }
64
65     protected void innerProcess(final CrawlURI curi) {
66         if (LOGGER.isLoggable(Level.FINEST)) {
67             LOGGER.finest(getName() + " processing " + curi);
68         }
69         
70         // Handle any prerequisites when S_DEFERRED for prereqs
71
if (curi.hasPrerequisiteUri() && curi.getFetchStatus() == S_DEFERRED) {
72             handlePrerequisites(curi);
73             return;
74         }
75
76         synchronized(this) {
77             for (CandidateURI cauri: curi.getOutCandidates()) {
78                 schedule(cauri);
79             }
80         }
81     }
82
83     protected void handlePrerequisites(CrawlURI curi) {
84         schedule((CandidateURI)curi.getPrerequisiteUri());
85     }
86
87     /**
88      * Schedule the given {@link CandidateURI CandidateURI} with the Frontier.
89      * @param caUri The CandidateURI to be scheduled.
90      */

91     protected void schedule(CandidateURI caUri) {
92         getController().getFrontier().schedule(caUri);
93     }
94 }
95
Popular Tags