1 package net.javacoding.jspider.core.task.work; 2 3 4 import net.javacoding.jspider.api.event.resource.ResourceForbiddenEvent; 5 import net.javacoding.jspider.api.event.resource.ResourceIgnoredForFetchingEvent; 6 import net.javacoding.jspider.api.model.*; 7 import net.javacoding.jspider.core.SpiderContext; 8 import net.javacoding.jspider.core.dispatch.EventDispatcher; 9 import net.javacoding.jspider.core.event.impl.URLFoundEvent; 10 import net.javacoding.jspider.core.rule.Ruleset; 11 import net.javacoding.jspider.core.storage.Storage; 12 import net.javacoding.jspider.core.task.WorkerTask; 13 import net.javacoding.jspider.core.util.URLUtil; 14 15 import java.net.URL ; 16 17 18 24 public class DecideOnSpideringTask extends BaseWorkerTaskImpl { 25 26 protected Storage storage; 27 protected URLFoundEvent event; 28 protected EventDispatcher eventDispatcher; 29 30 public DecideOnSpideringTask(SpiderContext context, URLFoundEvent urlFoundEvent) { 31 super(context, WorkerTask.WORKERTASK_THINKERTASK); 32 this.event = urlFoundEvent; 33 this.eventDispatcher = context.getEventDispatcher(); 34 this.storage = context.getStorage(); 35 } 36 37 public void prepare() { 38 } 39 40 public synchronized void execute() { 41 URL url = event.getURL(); 42 URL foundURL = event.getFoundURL(); 43 44 URL currentSiteURL = URLUtil.getSiteURL(url); 45 URL siteURL = URLUtil.getSiteURL(foundURL); 46 47 Site currentSite = null; 48 if ( currentSiteURL != null ) { 49 currentSite = storage.getSiteDAO().find(currentSiteURL); 50 } 51 Site site = storage.getSiteDAO().find(siteURL); 52 53 Resource foundResource = storage.getResourceDAO().getResource(foundURL); 54 55 Ruleset spiderRules = context.getSiteSpiderRules(site); 56 Decision spiderDecision = spiderRules.applyRules(context, currentSite, foundURL); 57 58 storage.getDecisionDAO().saveSpiderDecision(foundResource, spiderDecision); 59 60 switch (spiderDecision.getDecision()) { 61 case Decision.RULE_IGNORE: 62 storage.getResourceDAO().setIgnoredForFetching(foundURL, event); 63 eventDispatcher.dispatch(new ResourceIgnoredForFetchingEvent(foundResource)); 64 break; 65 case Decision.RULE_FORBIDDEN: 66 storage.getResourceDAO().setForbidden(foundURL, event); 67 eventDispatcher.dispatch(new ResourceForbiddenEvent(foundResource)); 68 break; 69 case Decision.RULE_ACCEPT: 70 case Decision.RULE_DONTCARE: 71 default: 72 context.getAgent().scheduleForSpidering(foundURL); 73 break; 74 } 75 } 76 77 } 78 | Popular Tags |