1 package net.javacoding.jspider.mod.rule; 2 3 import net.javacoding.jspider.core.rule.impl.BaseRuleImpl; 4 import net.javacoding.jspider.core.util.config.PropertySet; 5 import net.javacoding.jspider.core.logging.LogFactory; 6 import net.javacoding.jspider.core.logging.Log; 7 import net.javacoding.jspider.core.SpiderContext; 8 import net.javacoding.jspider.core.model.DecisionInternal; 9 import net.javacoding.jspider.api.model.Decision; 10 import net.javacoding.jspider.api.model.Site; 11 12 import java.net.URL ; 13 14 17 public class MaxNumberOfURLParamsRule extends BaseRuleImpl { 18 19 public static final String MAX = "max"; 20 21 protected int max; 22 23 public MaxNumberOfURLParamsRule ( PropertySet config ) { 24 Log log = LogFactory.getLog(MaxNumberOfURLParamsRule.class); 25 max = config.getInteger(MaxNumberOfURLParamsRule.MAX, 0); 26 log.info("max set to " + max); 27 } 28 29 public Decision apply(SpiderContext context, Site currentSite, URL url) { 30 Decision decision = null; 31 32 String query = url.getQuery(); 33 int params; 34 35 if ( query == null || query.length() < 2 ) { 36 params = 0; 37 } else { 38 int amps = 0; 39 int pos = query.indexOf('&'); 40 while ( pos != -1 ) { 41 amps++; 42 pos = query.indexOf('&', pos + 1); 43 } 44 params = amps + 1; 45 } 46 47 48 if ( params > max ) { 49 decision = new DecisionInternal(Decision.RULE_IGNORE, "params = " + params + ", max = " + max + ", url ingored"); 50 } else { 51 decision = new DecisionInternal(Decision.RULE_ACCEPT, "params = " + params + ", max = " + max + ", url accepted"); 52 } 53 54 return decision; 55 } 56 57 } 58 | Popular Tags |