1 package net.javacoding.jspider.core.util.html; 2 3 import java.net.URL ; 4 5 8 public class RobotsTXTLine { 9 10 public static final String ALLOW="allow:"; 11 public static final String DISALLOW="disallow:"; 12 13 public static final int ROBOTSTXT_RULE_ALLOW = 0; 14 public static final int ROBOTSTXT_RULE_DISALLOW = 1; 15 16 protected int type; 17 protected String resourceURI; 18 19 RobotsTXTLine(String resourceURI, int type) { 20 this.type = type; 21 this.resourceURI = resourceURI; 22 } 23 24 public boolean matches(URL url) { 25 String path = url.getPath(); 26 if ( path.length() == 0 && resourceURI.equals("/") ) { 27 return true; 28 } else { 29 return url.getPath().startsWith(resourceURI); 30 } 31 } 32 33 public int getType() { 34 return type; 35 } 36 37 public String getResourceURI() { 38 return resourceURI; 39 } 40 41 public static RobotsTXTLine parse(String line) { 42 if (line == null) { 43 return null; 44 } else { 45 line = line.trim(); 46 String lineLowerCase = line.toLowerCase(); 47 String resourceURI = ""; 48 int type = 0; 49 if (lineLowerCase.startsWith(DISALLOW)) { 50 resourceURI = line.substring(DISALLOW.length()).trim(); 51 type = RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW; 52 } else if (lineLowerCase.startsWith(ALLOW)) { 53 resourceURI = line.substring(ALLOW.length()).trim(); 54 type = RobotsTXTLine.ROBOTSTXT_RULE_ALLOW; 55 } else { 56 return null; 57 } 58 59 if ( resourceURI.length() > 0 ) { 60 return new RobotsTXTLine(resourceURI, type); 61 } else { 62 return null; 63 } 64 } 65 } 66 67 } 68 | Popular Tags |