1 package net.javacoding.jspider.mod.rule; 2 3 import net.javacoding.jspider.core.rule.impl.BaseRuleImpl; 4 import net.javacoding.jspider.core.util.config.PropertySet; 5 import net.javacoding.jspider.core.util.URLUtil; 6 import net.javacoding.jspider.core.SpiderContext; 7 import net.javacoding.jspider.core.model.DecisionInternal; 8 import net.javacoding.jspider.core.logging.LogFactory; 9 import net.javacoding.jspider.core.logging.Log; 10 import net.javacoding.jspider.api.model.Decision; 11 import net.javacoding.jspider.api.model.Site; 12 13 import java.net.URL ; 14 import java.util.Map ; 15 import java.util.HashMap ; 16 17 20 public class MaxResourcesPerSiteRule extends BaseRuleImpl { 21 22 public static final String MAX = "max"; 23 24 protected int max; 25 protected Map counters; 26 27 public MaxResourcesPerSiteRule ( PropertySet config ) { 28 Log log = LogFactory.getLog(MaxResourcesPerSiteRule.class); 29 max = config.getInteger(MaxResourcesPerSiteRule.MAX, 0); 30 this.counters = new HashMap ( ); 31 log.info("maximum resources per site set to " + max ); 32 } 33 34 public synchronized Decision apply(SpiderContext context, Site currentSite, URL url) { 35 URL siteURL = URLUtil.getSiteURL(url); 36 37 Integer counter = (Integer ) counters.get(siteURL); 38 if ( counter == null ) { 39 counter = new Integer (0); 40 } 41 42 Decision decision = null; 43 44 if ( counter.intValue() + 1 > max ) { 45 decision = new DecisionInternal (Decision.RULE_IGNORE, "counter for site is " + counter.intValue() + ", max is " + max + ", so not allowed anymore!"); 46 } else { 47 decision = new DecisionInternal (Decision.RULE_ACCEPT, "counter for site is " + counter.intValue() + ", max is " + max + ", so allowed!"); 48 counter = new Integer (counter.intValue() + 1); 49 counters.put(siteURL, counter); 50 } 51 return decision; 52 } 53 54 } 55 | Popular Tags |