1 25 package org.archive.crawler.framework; 26 27 import java.io.IOException ; 28 import java.util.ArrayList ; 29 30 import org.archive.crawler.datamodel.CandidateURI; 31 import org.archive.crawler.datamodel.CrawlSubstats; 32 import org.archive.crawler.datamodel.CrawlURI; 33 import org.archive.crawler.framework.exceptions.EndedException; 34 import org.archive.crawler.framework.exceptions.FatalConfigurationException; 35 import org.archive.crawler.framework.exceptions.InvalidFrontierMarkerException; 36 import org.archive.crawler.frontier.FrontierJournal; 37 import org.archive.net.UURI; 38 import org.archive.util.Reporter; 39 40 41 113 public interface Frontier extends Reporter { 114 122 public static final String ATTR_NAME = "frontier"; 123 124 139 public void initialize(CrawlController c) 140 throws FatalConfigurationException, IOException ; 141 142 150 CrawlURI next() throws InterruptedException , EndedException; 151 152 162 boolean isEmpty(); 163 164 178 public void schedule(CandidateURI caURI); 179 180 190 public void finished(CrawlURI cURI); 191 192 211 public long discoveredUriCount(); 212 213 222 public long queuedUriCount(); 223 224 public long deepestUri(); public long averageDepth(); public float congestionRatio(); 228 238 public long finishedUriCount(); 239 240 249 public long succeededFetchCount(); 250 251 262 public long failedFetchCount(); 263 264 274 public long disregardedUriCount(); 275 276 281 public long totalBytesWritten(); 282 283 296 public void importRecoverLog(String pathToLog, boolean retainFailures) 297 throws IOException ; 298 299 313 public FrontierMarker getInitialMarker(String regexpr, 314 boolean inCacheOnly); 315 316 358 public ArrayList getURIsList(FrontierMarker marker, 359 int numberOfMatches, 360 boolean verbose) 361 throws InvalidFrontierMarkerException; 362 363 380 public long deleteURIs(String match); 381 382 388 public void deleted(CrawlURI curi); 389 390 396 public void considerIncluded(UURI u); 397 398 402 public void kickUpdate(); 403 404 408 public void pause(); 409 410 414 public void unpause(); 415 416 421 public void terminate(); 422 423 427 public FrontierJournal getFrontierJournal(); 428 429 434 public String getClassKey(CandidateURI cauri); 435 436 440 public void loadSeeds(); 441 442 446 public void start(); 447 448 454 public FrontierGroup getGroup(CrawlURI curi); 455 456 461 public interface FrontierGroup extends CrawlSubstats.HasCrawlSubstats { 462 463 } 464 } 465 | Popular Tags |