1 package net.matuschek.spider; 2 3 6 7 8 import java.io.BufferedReader ; 9 import java.io.IOException ; 10 import java.io.Reader ; 11 import java.net.URL ; 12 import java.util.StringTokenizer ; 13 import java.util.Vector ; 14 15 import org.apache.regexp.RESyntaxException; 16 17 18 25 public class RegExpURLCheck 26 implements URLCheck 27 { 28 29 private Vector <RegExpRule> rules = null; 30 31 32 private boolean defaultResult = true; 33 34 35 public RegExpURLCheck() { 36 rules = new Vector <RegExpRule>(); 37 } 38 39 52 public RegExpURLCheck(Reader r) 53 throws IOException , 54 org.apache.regexp.RESyntaxException 55 { 56 this(); 57 58 BufferedReader reader = 59 new BufferedReader (r); 60 61 String line = ""; 62 int lineno=0; 63 64 while (line != null) { 65 line=reader.readLine(); 66 lineno++; 67 68 if ((line != null) && 69 (! line.trim().equals("")) && 70 (! line.startsWith("#"))) { 71 StringTokenizer st = new StringTokenizer (line); 72 if (st.countTokens() != 2) { 74 throw new IOException ("line "+lineno+" don't consists of 2 fields"); 75 } 76 77 String allowStr = st.nextToken(); 78 boolean allow = true; 79 String expression = st.nextToken(); 80 81 if (allowStr.equalsIgnoreCase("allow")) { 83 allow=true; 84 } else if (allowStr.equalsIgnoreCase("deny")) { 85 allow=false; 86 } else { 87 throw new IOException ("first token in line "+lineno+ 88 " has to be allow or deny"); 89 } 90 91 addRule(expression,allow); 92 } 93 } 94 } 95 96 97 102 public void setDefaultResult(boolean defaultResult) { 103 this.defaultResult = defaultResult; 104 } 105 106 111 public boolean getDefaultResult() { 112 return defaultResult; 113 } 114 115 119 public Vector getRules() { 120 return rules; 121 } 122 123 127 public void setRules(Vector <RegExpRule> rules) { 128 this.rules=rules; 129 } 130 131 132 137 public void addRule(String regExp, boolean allow) 138 throws RESyntaxException 139 { 140 RegExpRule rule = new RegExpRule(); 141 rule.setPattern(regExp); 142 rule.setAllow(allow); 143 rules.add(rule); 144 } 145 146 147 155 public boolean checkURL(URL u) { 156 String urlStr = u.toString(); 157 158 for (int i=0; i<rules.size(); i++) { 159 RegExpRule rule = rules.elementAt(i); 160 161 if (rule.match(urlStr)) { 162 return rule.getAllow(); 163 } 164 } 165 166 return defaultResult; 167 } 168 169 177 public boolean checkURLForProcessing(URL u) { 178 String urlStr = u.toString(); 179 180 for (int i=0; i<rules.size(); i++) { 181 RegExpRule rule = rules.elementAt(i); 182 183 if (rule.match(urlStr)) { 184 return rule.getProcessAllowed(); 185 } 186 } 187 188 return defaultResult; 189 } 190 191 } | Popular Tags |