1 23 package org.archive.crawler.deciderules; 24 25 import java.io.File ; 26 27 import javax.management.Attribute ; 28 import javax.management.AttributeNotFoundException ; 29 import javax.management.InvalidAttributeValueException ; 30 import javax.management.MBeanException ; 31 import javax.management.ReflectionException ; 32 33 import org.apache.commons.httpclient.URIException; 34 import org.archive.crawler.datamodel.CandidateURI; 35 import org.archive.crawler.datamodel.CrawlOrder; 36 import org.archive.crawler.settings.MapType; 37 import org.archive.crawler.settings.SettingsHandler; 38 import org.archive.crawler.settings.XMLSettingsHandler; 39 import org.archive.net.UURI; 40 import org.archive.net.UURIFactory; 41 import org.archive.util.SurtPrefixSet; 42 import org.archive.util.TmpDirTestCase; 43 44 48 public class DecideRuleSequenceTest extends TmpDirTestCase { 49 52 private DecideRuleSequence rule = null; 53 54 protected void setUp() throws Exception { 55 super.setUp(); 56 final String name = this.getClass().getName(); 57 SettingsHandler settingsHandler = new XMLSettingsHandler( 58 new File (getTmpDir(), name + ".order.xml")); 59 settingsHandler.initialize(); 60 this.rule = (DecideRuleSequence)((MapType)settingsHandler.getOrder(). 64 getAttribute(CrawlOrder.ATTR_RULES)).addElement(settingsHandler. 65 getSettingsObject(null), new DecideRuleSequence(name)); 66 } 67 68 public void testEmptySequence() { 69 Object decision = this.rule.decisionFor("test"); 70 assertTrue("Expect PASS but got " + decision, 71 decision == DecideRule.PASS); 72 } 73 74 public void testSingleACCEPT() throws InvalidAttributeValueException { 75 Object decision = addDecideRule(new AcceptDecideRule("ACCEPT")). 76 decisionFor("test"); 77 assertTrue("Expect ACCEPT but got " + decision, 78 decision == DecideRule.ACCEPT); 79 } 80 81 public void testSingleREJECT() throws InvalidAttributeValueException { 82 Object decision = addDecideRule(new RejectDecideRule("REJECT")). 83 decisionFor("test"); 84 assertTrue("Expect REJECT but got " + decision, 85 decision == DecideRule.REJECT); 86 } 87 88 public void testSinglePASS() throws InvalidAttributeValueException { 89 Object decision = addDecideRule(new DecideRule("PASS")). 90 decisionFor("test"); 91 assertTrue("Expect PASS but got " + decision, 92 decision == DecideRule.PASS); 93 } 94 95 96 public void testACCEPTWins() throws InvalidAttributeValueException { 97 addDecideRule(new DecideRule("PASS1")); 98 addDecideRule(new RejectDecideRule("REJECT1")); 99 addDecideRule(new DecideRule("PASS2")); 100 addDecideRule(new AcceptDecideRule("ACCEPT1")); 101 addDecideRule(new RejectDecideRule("REJECT2")); 102 addDecideRule(new DecideRule("PASS3")); 103 addDecideRule(new AcceptDecideRule("ACCEPT2")); 104 addDecideRule(new DecideRule("PASS4")); 105 Object decision = this.rule.decisionFor("test"); 106 assertTrue("Expect ACCEPT but got " + decision, 107 decision == DecideRule.ACCEPT); 108 } 109 110 public void testREJECTWins() throws InvalidAttributeValueException { 111 addDecideRule(new DecideRule("PASS1")); 112 addDecideRule(new RejectDecideRule("REJECT1")); 113 addDecideRule(new DecideRule("PASS2")); 114 addDecideRule(new AcceptDecideRule("ACCEPT1")); 115 addDecideRule(new RejectDecideRule("REJECT2")); 116 addDecideRule(new DecideRule("PASS3")); 117 addDecideRule(new AcceptDecideRule("ACCEPT2")); 118 addDecideRule(new DecideRule("PASS4")); 119 addDecideRule(new RejectDecideRule("REJECT3")); 120 Object decision = this.rule.decisionFor("test"); 121 assertTrue("Expect REJECT but got " + decision, 122 decision == DecideRule.REJECT); 123 } 124 125 public void testRegex() 126 throws InvalidAttributeValueException , AttributeNotFoundException , 127 MBeanException , ReflectionException { 128 final String regexName = "REGEX"; 129 DecideRule r = addDecideRule(new MatchesRegExpDecideRule(regexName)); 130 r.setAttribute(new Attribute (MatchesRegExpDecideRule.ATTR_REGEXP, 132 "^.*\\.archive\\.org")); 133 Object decision = this.rule.decisionFor("http://google.com"); 134 assertTrue("Expect PASS but got " + decision, 135 decision == DecideRule.PASS); 136 decision = this.rule.decisionFor("http://archive.org"); 137 assertTrue("Expect PASS but got " + decision, 138 decision == DecideRule.PASS); 139 decision = this.rule.decisionFor("http://www.archive.org"); 140 assertTrue("Expect ACCEPT but got " + decision, 141 decision == DecideRule.ACCEPT); 142 } 143 144 public void testNotRegex() 145 throws InvalidAttributeValueException , AttributeNotFoundException , 146 MBeanException , ReflectionException { 147 final String regexName = "NOT_REGEX"; 148 DecideRule r = addDecideRule(new NotMatchesRegExpDecideRule(regexName)); 149 r.setAttribute(new Attribute (MatchesRegExpDecideRule.ATTR_REGEXP, 151 "^.*\\.archive\\.org")); 152 Object decision = this.rule.decisionFor("http://google.com"); 153 assertTrue("Expect ACCEPT but got " + decision, 154 decision == DecideRule.ACCEPT); 155 decision = this.rule.decisionFor("http://www.archive.org"); 156 assertTrue("Expect PASS but got " + decision, 157 decision == DecideRule.PASS); 158 } 159 160 161 public void testPrerequisite() 162 throws InvalidAttributeValueException , URIException { 163 addDecideRule(new PrerequisiteAcceptDecideRule("PREREQUISITE")); 164 UURI uuri = UURIFactory.getInstance("http://archive.org"); 165 CandidateURI candidate = new CandidateURI(uuri); 166 Object decision = this.rule.decisionFor(candidate); 167 assertTrue("Expect PASS but got " + decision, 168 decision == DecideRule.PASS); 169 candidate = new CandidateURI(uuri, "LLP", null, null); 170 decision = this.rule.decisionFor(candidate); 171 assertTrue("Expect ACCEPT but got " + decision, 172 decision == DecideRule.ACCEPT); 173 } 174 175 public void testHops() 176 throws InvalidAttributeValueException , URIException { 177 addDecideRule(new TooManyHopsDecideRule("HOPS")); 178 testHopLimit(TooManyHopsDecideRule.DEFAULT_MAX_HOPS.intValue(), 'L', 179 DecideRule.PASS, DecideRule.REJECT); 180 } 181 182 public void testTransclusion() 183 throws InvalidAttributeValueException , URIException { 184 addDecideRule(new TransclusionDecideRule("TRANSCLUSION")); 185 final int max = 186 TransclusionDecideRule.DEFAULT_MAX_TRANS_HOPS.intValue(); 187 final char pathExpansion = 'X'; 188 UURI uuri = UURIFactory.getInstance("http://archive.org"); 189 CandidateURI candidate = new CandidateURI(uuri); 190 Object decision = this.rule.decisionFor(candidate); 191 assertTrue("Expect " + DecideRule.PASS + " but got " + decision, 192 decision == DecideRule.PASS); 193 StringBuffer path = new StringBuffer (max); 194 for (int i = 0; i < (max - 1); i++) { 195 path.append(pathExpansion); 196 } 197 candidate = new CandidateURI(uuri, path.toString(), null, null); 198 decision = this.rule.decisionFor(candidate); 199 assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision, 200 decision == DecideRule.ACCEPT); 201 String pathCopy = path.toString(); 202 path.append(pathExpansion); 203 candidate = new CandidateURI(uuri, path.toString(), null, null); 204 decision = this.rule.decisionFor(candidate); 205 assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision, 206 decision == DecideRule.ACCEPT); 207 path.append(pathExpansion); 208 candidate = new CandidateURI(uuri, path.toString(), null, null); 209 decision = this.rule.decisionFor(candidate); 210 assertTrue("Expect " + DecideRule.PASS + " but got " + decision, 211 decision == DecideRule.PASS); 212 candidate = new CandidateURI(uuri, pathCopy + 'L', null, null); 213 decision = this.rule.decisionFor(candidate); 214 assertTrue("Expect " + DecideRule.PASS + " but got " + decision, 215 decision == DecideRule.PASS); 216 } 217 218 public void testPathologicalPath() 219 throws InvalidAttributeValueException , URIException { 220 addDecideRule(new PathologicalPathDecideRule("PATHOLOGICAL")); 221 final int max = 222 PathologicalPathDecideRule.DEFAULT_REPETITIONS.intValue(); 223 String uri = "http://archive.org/"; 224 final String segment = "abc/"; 225 for (int i = 1; i < max; i++) { 226 uri = uri + segment; 227 } 228 final String baseUri = uri; 229 UURI uuri = UURIFactory.getInstance(uri); 230 CandidateURI candidate = new CandidateURI(uuri); 231 Object decision = this.rule.decisionFor(candidate); 232 assertTrue("Expect " + DecideRule.PASS + " but got " + decision, 233 decision == DecideRule.PASS); 234 uuri = UURIFactory.getInstance(baseUri + segment); 235 candidate = new CandidateURI(uuri); 236 decision = this.rule.decisionFor(candidate); 237 assertTrue("Expect " + DecideRule.PASS + " but got " + decision, 238 decision == DecideRule.PASS); 239 uuri = UURIFactory.getInstance(baseUri + segment + segment); 240 candidate = new CandidateURI(uuri); 241 decision = this.rule.decisionFor(candidate); 242 assertTrue("Expect " + DecideRule.REJECT + " but got " + decision, 243 decision == DecideRule.REJECT); 244 } 245 246 public void testTooManyPathSegments() 247 throws InvalidAttributeValueException , URIException { 248 addDecideRule(new TooManyPathSegmentsDecideRule("SEGMENTS")); 249 final int max = 250 TooManyPathSegmentsDecideRule.DEFAULT_MAX_PATH_DEPTH.intValue(); 251 StringBuffer baseUri = new StringBuffer ("http://archive.org"); 252 for (int i = 0; i < max; i++) { 253 baseUri.append('/'); 254 baseUri.append(Integer.toString(i + 1)); 255 } 256 UURI uuri = UURIFactory.getInstance(baseUri.toString()); 257 CandidateURI candidate = new CandidateURI(uuri); 258 Object decision = this.rule.decisionFor(candidate); 259 assertTrue("Expect " + DecideRule.PASS + " but got " + decision, 260 decision == DecideRule.PASS); 261 baseUri.append("/x"); 262 uuri = UURIFactory.getInstance(baseUri.toString()); 263 candidate = new CandidateURI(uuri); 264 decision = this.rule.decisionFor(candidate); 265 assertTrue("Expect " + DecideRule.REJECT + " but got " + decision, 266 decision == DecideRule.REJECT); 267 } 268 269 public void testMatchesFilePattern() 270 throws InvalidAttributeValueException , URIException { 271 addDecideRule(new MatchesFilePatternDecideRule("FILE_PATTERN")); 272 StringBuffer baseUri = new StringBuffer ("http://archive.org/"); 273 UURI uuri = UURIFactory.getInstance(baseUri.toString() + "ms.doc"); 274 CandidateURI candidate = new CandidateURI(uuri); 275 Object decision = this.rule.decisionFor(candidate); 276 assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision, 277 decision == DecideRule.ACCEPT); 278 uuri = UURIFactory.getInstance(baseUri.toString() + "index.html"); 279 candidate = new CandidateURI(uuri); 280 decision = this.rule.decisionFor(candidate); 281 assertTrue("Expect " + DecideRule.PASS + " but got " + decision, 282 decision == DecideRule.PASS); 283 } 284 285 public void testNotMatchesFilePattern() 286 throws InvalidAttributeValueException , URIException { 287 addDecideRule(new NotMatchesFilePatternDecideRule("NOT_FILE_PATTERN")); 288 StringBuffer baseUri = new StringBuffer ("http://archive.org/"); 289 UURI uuri = UURIFactory.getInstance(baseUri.toString() + "ms.doc"); 290 CandidateURI candidate = new CandidateURI(uuri); 291 Object decision = this.rule.decisionFor(candidate); 292 assertTrue("Expect " + DecideRule.PASS + " but got " + decision, 293 decision == DecideRule.PASS); 294 uuri = UURIFactory.getInstance(baseUri.toString() + "index.html"); 295 candidate = new CandidateURI(uuri); 296 decision = this.rule.decisionFor(candidate); 297 assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision, 298 decision == DecideRule.ACCEPT); 299 } 300 301 protected void testHopLimit(final int max, final char pathExpansion, 302 final String defaultDecision, final String overLimitDecision) 303 throws URIException { 304 UURI uuri = UURIFactory.getInstance("http://archive.org"); 305 CandidateURI candidate = new CandidateURI(uuri); 306 Object decision = this.rule.decisionFor(candidate); 307 assertTrue("Expect " + defaultDecision + " but got " + decision, 308 decision == defaultDecision); 309 StringBuffer path = new StringBuffer (max); 310 for (int i = 0; i < (max - 1); i++) { 311 path.append(pathExpansion); 312 } 313 candidate = new CandidateURI(uuri, path.toString(), null, null); 314 decision = this.rule.decisionFor(candidate); 315 assertTrue("Expect " + defaultDecision + " but got " + decision, 316 decision == defaultDecision); 317 path.append(pathExpansion); 318 candidate = new CandidateURI(uuri, path.toString(), null, null); 319 decision = this.rule.decisionFor(candidate); 320 assertTrue("Expect " + defaultDecision + " but got " + decision, 321 decision == defaultDecision); 322 path.append(pathExpansion); 323 candidate = new CandidateURI(uuri, path.toString(), null, null); 324 decision = this.rule.decisionFor(candidate); 325 assertTrue("Expect " + overLimitDecision + " but got " + decision, 326 decision == overLimitDecision); 327 } 328 329 public void testScopePlusOne() 330 throws URIException, InvalidAttributeValueException , 331 AttributeNotFoundException , MBeanException , 332 ReflectionException { 333 ScopePlusOneDecideRule t = new ScopePlusOneDecideRule("host"); 335 SurtPrefixSet mSet = new SurtPrefixSet(); 336 mSet.add(SurtPrefixSet.prefixFromPlain("http://audio.archive.org")); 337 mSet.convertAllPrefixesToHosts(); 338 t.surtPrefixes = mSet; 339 DecideRule s = addDecideRule(t); 340 s.setAttribute(new Attribute (ScopePlusOneDecideRule.ATTR_SCOPE, 341 ScopePlusOneDecideRule.HOST)); 342 343 344 UURI uuri = 345 UURIFactory.getInstance("http://audio.archive.org/examples"); 346 CandidateURI candidate = new CandidateURI(uuri); 347 Object decision = this.rule.decisionFor(candidate); 348 assertTrue("URI Expect " + DecideRule.ACCEPT + " for " + candidate + 349 " but got " + decision, decision == DecideRule.ACCEPT); 350 UURI uuriOne = UURIFactory.getInstance("http://movies.archive.org"); 351 CandidateURI plusOne = new CandidateURI(uuriOne); 352 plusOne.setVia(uuri); 353 decision = this.rule.decisionFor(plusOne); 354 assertTrue("PlusOne Expect " + DecideRule.ACCEPT + " for " + plusOne + 355 " with via " + plusOne.flattenVia() + " but got " + decision, 356 decision == DecideRule.ACCEPT); 357 UURI uuriTwo = UURIFactory.getInstance("http://sloan.archive.org"); 358 CandidateURI plusTwo = new CandidateURI(uuriTwo); 359 plusTwo.setVia(uuriOne); 360 decision = this.rule.decisionFor(plusTwo); 361 assertTrue("PlusTwo Expect " + DecideRule.PASS + " for " + plusTwo + 362 " with via " + plusTwo.flattenVia() + " but got " + decision, 363 decision == DecideRule.PASS); 364 365 366 ScopePlusOneDecideRule u = new ScopePlusOneDecideRule("domain"); 368 SurtPrefixSet mSet1 = new SurtPrefixSet(); 369 mSet1.add(SurtPrefixSet.prefixFromPlain("archive.org")); 370 mSet1.convertAllPrefixesToDomains(); 371 u.surtPrefixes = mSet1; 372 DecideRule v = addDecideRule(u); 373 v.setAttribute(new Attribute (ScopePlusOneDecideRule.ATTR_SCOPE, 374 ScopePlusOneDecideRule.DOMAIN)); 375 376 decision = this.rule.decisionFor(candidate); 377 assertTrue("Domain: URI Expect " + DecideRule.ACCEPT + " for " + 378 candidate + " but got " + decision, decision == DecideRule.ACCEPT); 379 decision = this.rule.decisionFor(plusOne); 380 assertTrue("Domain: PlusOne Expect " + DecideRule.ACCEPT + " for " + 381 plusOne + " with via " + plusOne.flattenVia() + " but got " + 382 decision, decision == DecideRule.ACCEPT); 383 decision = this.rule.decisionFor(plusTwo); 384 assertTrue("Domain: PlusTwo Expect " + DecideRule.ACCEPT + " for " + 385 plusTwo + " with via " + plusTwo.flattenVia() + " but got " + 386 decision, decision == DecideRule.ACCEPT); 387 UURI uuriThree = UURIFactory.getInstance("http://sloan.org"); 388 CandidateURI plusThree = new CandidateURI(uuriThree); 389 plusThree.setVia(uuriTwo); 390 decision = this.rule.decisionFor(plusThree); 391 assertTrue("Domain: PlusThree Expect " + DecideRule.ACCEPT + " for " + 392 plusThree + " with via " + plusThree.flattenVia() + " but got " + 393 decision, decision == DecideRule.ACCEPT); 394 UURI uuriFour = UURIFactory.getInstance("http://example.com"); 395 CandidateURI plusFour = new CandidateURI(uuriFour); 396 plusFour.setVia(uuriThree); 397 decision = this.rule.decisionFor(plusFour); 398 assertTrue("Domain: PlusFour Expect " + DecideRule.PASS + " for " + 399 plusFour + " with via " + plusFour.flattenVia() + " but got " + 400 decision, decision == DecideRule.PASS); 401 } 402 403 protected DecideRule addDecideRule(DecideRule dr) 404 throws InvalidAttributeValueException { 405 MapType rules = this.rule.getRules(null); 406 rules.addElement(null, dr); 407 return dr; 408 } 409 } 410 | Popular Tags |