1 26 package org.archive.crawler.postprocessor; 27 28 29 import java.util.logging.Level ; 30 import java.util.logging.Logger ; 31 32 import org.archive.crawler.datamodel.CandidateURI; 33 import org.archive.crawler.datamodel.CrawlURI; 34 import org.archive.crawler.datamodel.FetchStatusCodes; 35 import org.archive.crawler.framework.Processor; 36 37 45 public class FrontierScheduler extends Processor 46 implements FetchStatusCodes { 47 48 private static final long serialVersionUID = -5178775477602250542L; 49 50 private static Logger LOGGER = 51 Logger.getLogger(FrontierScheduler.class.getName()); 52 53 56 public FrontierScheduler(String name) { 57 super(name, "FrontierScheduler. 'Schedule' with the Frontier " + 58 "any CandidateURIs carried by the passed CrawlURI. " + 59 "Run a Scoper before this " + 60 "processor so links that are not in-scope get bumped from the " + 61 "list of links (And so those in scope get promoted from Link " + 62 "to CandidateURI)."); 63 } 64 65 protected void innerProcess(final CrawlURI curi) { 66 if (LOGGER.isLoggable(Level.FINEST)) { 67 LOGGER.finest(getName() + " processing " + curi); 68 } 69 70 if (curi.hasPrerequisiteUri() && curi.getFetchStatus() == S_DEFERRED) { 72 handlePrerequisites(curi); 73 return; 74 } 75 76 synchronized(this) { 77 for (CandidateURI cauri: curi.getOutCandidates()) { 78 schedule(cauri); 79 } 80 } 81 } 82 83 protected void handlePrerequisites(CrawlURI curi) { 84 schedule((CandidateURI)curi.getPrerequisiteUri()); 85 } 86 87 91 protected void schedule(CandidateURI caUri) { 92 getController().getFrontier().schedule(caUri); 93 } 94 } 95 | Popular Tags |