KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > DecideRuleSequenceTest


1 /* DecideRuleSequenceTest
2  *
3  * Created on Apr 4, 2005
4  *
5  * Copyright (C) 2005 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.deciderules;
24
25 import java.io.File JavaDoc;
26
27 import javax.management.Attribute JavaDoc;
28 import javax.management.AttributeNotFoundException JavaDoc;
29 import javax.management.InvalidAttributeValueException JavaDoc;
30 import javax.management.MBeanException JavaDoc;
31 import javax.management.ReflectionException JavaDoc;
32
33 import org.apache.commons.httpclient.URIException;
34 import org.archive.crawler.datamodel.CandidateURI;
35 import org.archive.crawler.datamodel.CrawlOrder;
36 import org.archive.crawler.settings.MapType;
37 import org.archive.crawler.settings.SettingsHandler;
38 import org.archive.crawler.settings.XMLSettingsHandler;
39 import org.archive.net.UURI;
40 import org.archive.net.UURIFactory;
41 import org.archive.util.SurtPrefixSet;
42 import org.archive.util.TmpDirTestCase;
43
44 /**
45  * @author stack
46  * @version $Date: 2005/09/01 19:18:05 $, $Revision: 1.11 $
47  */

48 public class DecideRuleSequenceTest extends TmpDirTestCase {
49     /**
50      * Gets setup by {@link #setUp()}.
51      */

52     private DecideRuleSequence rule = null;
53     
54     protected void setUp() throws Exception JavaDoc {
55         super.setUp();
56         final String JavaDoc name = this.getClass().getName();
57         SettingsHandler settingsHandler = new XMLSettingsHandler(
58             new File JavaDoc(getTmpDir(), name + ".order.xml"));
59         settingsHandler.initialize();
60         // Create a new ConfigureDecideRule instance and add it to a MapType
61
// (I can change MapTypes after instantiation). The chosen MapType
62
// is the rules canonicalization rules list.
63
this.rule = (DecideRuleSequence)((MapType)settingsHandler.getOrder().
64             getAttribute(CrawlOrder.ATTR_RULES)).addElement(settingsHandler.
65                 getSettingsObject(null), new DecideRuleSequence(name));
66     }
67     
68     public void testEmptySequence() {
69         Object JavaDoc decision = this.rule.decisionFor("test");
70         assertTrue("Expect PASS but got " + decision,
71             decision == DecideRule.PASS);
72     }
73     
74     public void testSingleACCEPT() throws InvalidAttributeValueException JavaDoc {
75         Object JavaDoc decision = addDecideRule(new AcceptDecideRule("ACCEPT")).
76             decisionFor("test");
77         assertTrue("Expect ACCEPT but got " + decision,
78             decision == DecideRule.ACCEPT);
79     }
80     
81     public void testSingleREJECT() throws InvalidAttributeValueException JavaDoc {
82         Object JavaDoc decision = addDecideRule(new RejectDecideRule("REJECT")).
83             decisionFor("test");
84         assertTrue("Expect REJECT but got " + decision,
85                 decision == DecideRule.REJECT);
86     }
87     
88     public void testSinglePASS() throws InvalidAttributeValueException JavaDoc {
89         Object JavaDoc decision = addDecideRule(new DecideRule("PASS")).
90             decisionFor("test");
91         assertTrue("Expect PASS but got " + decision,
92                 decision == DecideRule.PASS);
93     }
94     
95     
96     public void testACCEPTWins() throws InvalidAttributeValueException JavaDoc {
97         addDecideRule(new DecideRule("PASS1"));
98         addDecideRule(new RejectDecideRule("REJECT1"));
99         addDecideRule(new DecideRule("PASS2"));
100         addDecideRule(new AcceptDecideRule("ACCEPT1"));
101         addDecideRule(new RejectDecideRule("REJECT2"));
102         addDecideRule(new DecideRule("PASS3"));
103         addDecideRule(new AcceptDecideRule("ACCEPT2"));
104         addDecideRule(new DecideRule("PASS4"));
105         Object JavaDoc decision = this.rule.decisionFor("test");
106         assertTrue("Expect ACCEPT but got " + decision,
107             decision == DecideRule.ACCEPT);
108     }
109     
110     public void testREJECTWins() throws InvalidAttributeValueException JavaDoc {
111         addDecideRule(new DecideRule("PASS1"));
112         addDecideRule(new RejectDecideRule("REJECT1"));
113         addDecideRule(new DecideRule("PASS2"));
114         addDecideRule(new AcceptDecideRule("ACCEPT1"));
115         addDecideRule(new RejectDecideRule("REJECT2"));
116         addDecideRule(new DecideRule("PASS3"));
117         addDecideRule(new AcceptDecideRule("ACCEPT2"));
118         addDecideRule(new DecideRule("PASS4"));
119         addDecideRule(new RejectDecideRule("REJECT3"));
120         Object JavaDoc decision = this.rule.decisionFor("test");
121         assertTrue("Expect REJECT but got " + decision,
122             decision == DecideRule.REJECT);
123     }
124         
125     public void testRegex()
126     throws InvalidAttributeValueException JavaDoc, AttributeNotFoundException JavaDoc,
127     MBeanException JavaDoc, ReflectionException JavaDoc {
128         final String JavaDoc regexName = "REGEX";
129         DecideRule r = addDecideRule(new MatchesRegExpDecideRule(regexName));
130         // Set regex to be match anything that ends in archive.org.
131
r.setAttribute(new Attribute JavaDoc(MatchesRegExpDecideRule.ATTR_REGEXP,
132             "^.*\\.archive\\.org"));
133         Object JavaDoc decision = this.rule.decisionFor("http://google.com");
134         assertTrue("Expect PASS but got " + decision,
135             decision == DecideRule.PASS);
136         decision = this.rule.decisionFor("http://archive.org");
137         assertTrue("Expect PASS but got " + decision,
138             decision == DecideRule.PASS);
139         decision = this.rule.decisionFor("http://www.archive.org");
140         assertTrue("Expect ACCEPT but got " + decision,
141             decision == DecideRule.ACCEPT);
142     }
143     
144     public void testNotRegex()
145     throws InvalidAttributeValueException JavaDoc, AttributeNotFoundException JavaDoc,
146     MBeanException JavaDoc, ReflectionException JavaDoc {
147         final String JavaDoc regexName = "NOT_REGEX";
148         DecideRule r = addDecideRule(new NotMatchesRegExpDecideRule(regexName));
149         // Set regex to be match anything that ends in archive.org.
150
r.setAttribute(new Attribute JavaDoc(MatchesRegExpDecideRule.ATTR_REGEXP,
151             "^.*\\.archive\\.org"));
152         Object JavaDoc decision = this.rule.decisionFor("http://google.com");
153         assertTrue("Expect ACCEPT but got " + decision,
154             decision == DecideRule.ACCEPT);
155         decision = this.rule.decisionFor("http://www.archive.org");
156         assertTrue("Expect PASS but got " + decision,
157             decision == DecideRule.PASS);
158     }
159     
160     
161     public void testPrerequisite()
162     throws InvalidAttributeValueException JavaDoc, URIException {
163         addDecideRule(new PrerequisiteAcceptDecideRule("PREREQUISITE"));
164         UURI uuri = UURIFactory.getInstance("http://archive.org");
165         CandidateURI candidate = new CandidateURI(uuri);
166         Object JavaDoc decision = this.rule.decisionFor(candidate);
167         assertTrue("Expect PASS but got " + decision,
168             decision == DecideRule.PASS);
169         candidate = new CandidateURI(uuri, "LLP", null, null);
170         decision = this.rule.decisionFor(candidate);
171         assertTrue("Expect ACCEPT but got " + decision,
172             decision == DecideRule.ACCEPT);
173     }
174     
175     public void testHops()
176     throws InvalidAttributeValueException JavaDoc, URIException {
177         addDecideRule(new TooManyHopsDecideRule("HOPS"));
178         testHopLimit(TooManyHopsDecideRule.DEFAULT_MAX_HOPS.intValue(), 'L',
179             DecideRule.PASS, DecideRule.REJECT);
180     }
181     
182     public void testTransclusion()
183     throws InvalidAttributeValueException JavaDoc, URIException {
184         addDecideRule(new TransclusionDecideRule("TRANSCLUSION"));
185         final int max =
186             TransclusionDecideRule.DEFAULT_MAX_TRANS_HOPS.intValue();
187         final char pathExpansion = 'X';
188         UURI uuri = UURIFactory.getInstance("http://archive.org");
189         CandidateURI candidate = new CandidateURI(uuri);
190         Object JavaDoc decision = this.rule.decisionFor(candidate);
191         assertTrue("Expect " + DecideRule.PASS + " but got " + decision,
192             decision == DecideRule.PASS);
193         StringBuffer JavaDoc path = new StringBuffer JavaDoc(max);
194         for (int i = 0; i < (max - 1); i++) {
195             path.append(pathExpansion);
196         }
197         candidate = new CandidateURI(uuri, path.toString(), null, null);
198         decision = this.rule.decisionFor(candidate);
199         assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision,
200             decision == DecideRule.ACCEPT);
201         String JavaDoc pathCopy = path.toString();
202         path.append(pathExpansion);
203         candidate = new CandidateURI(uuri, path.toString(), null, null);
204         decision = this.rule.decisionFor(candidate);
205         assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision,
206             decision == DecideRule.ACCEPT);
207         path.append(pathExpansion);
208         candidate = new CandidateURI(uuri, path.toString(), null, null);
209         decision = this.rule.decisionFor(candidate);
210         assertTrue("Expect " + DecideRule.PASS + " but got " + decision,
211             decision == DecideRule.PASS);
212         candidate = new CandidateURI(uuri, pathCopy + 'L', null, null);
213         decision = this.rule.decisionFor(candidate);
214         assertTrue("Expect " + DecideRule.PASS + " but got " + decision,
215             decision == DecideRule.PASS);
216     }
217     
218     public void testPathologicalPath()
219     throws InvalidAttributeValueException JavaDoc, URIException {
220         addDecideRule(new PathologicalPathDecideRule("PATHOLOGICAL"));
221         final int max =
222             PathologicalPathDecideRule.DEFAULT_REPETITIONS.intValue();
223         String JavaDoc uri = "http://archive.org/";
224         final String JavaDoc segment = "abc/";
225         for (int i = 1; i < max; i++) {
226             uri = uri + segment;
227         }
228         final String JavaDoc baseUri = uri;
229         UURI uuri = UURIFactory.getInstance(uri);
230         CandidateURI candidate = new CandidateURI(uuri);
231         Object JavaDoc decision = this.rule.decisionFor(candidate);
232         assertTrue("Expect " + DecideRule.PASS + " but got " + decision,
233             decision == DecideRule.PASS);
234         uuri = UURIFactory.getInstance(baseUri + segment);
235         candidate = new CandidateURI(uuri);
236         decision = this.rule.decisionFor(candidate);
237         assertTrue("Expect " + DecideRule.PASS + " but got " + decision,
238             decision == DecideRule.PASS);
239         uuri = UURIFactory.getInstance(baseUri + segment + segment);
240         candidate = new CandidateURI(uuri);
241         decision = this.rule.decisionFor(candidate);
242         assertTrue("Expect " + DecideRule.REJECT + " but got " + decision,
243             decision == DecideRule.REJECT);
244     }
245     
246     public void testTooManyPathSegments()
247     throws InvalidAttributeValueException JavaDoc, URIException {
248         addDecideRule(new TooManyPathSegmentsDecideRule("SEGMENTS"));
249         final int max =
250             TooManyPathSegmentsDecideRule.DEFAULT_MAX_PATH_DEPTH.intValue();
251         StringBuffer JavaDoc baseUri = new StringBuffer JavaDoc("http://archive.org");
252         for (int i = 0; i < max; i++) {
253             baseUri.append('/');
254             baseUri.append(Integer.toString(i + 1));
255         }
256         UURI uuri = UURIFactory.getInstance(baseUri.toString());
257         CandidateURI candidate = new CandidateURI(uuri);
258         Object JavaDoc decision = this.rule.decisionFor(candidate);
259         assertTrue("Expect " + DecideRule.PASS + " but got " + decision,
260             decision == DecideRule.PASS);
261         baseUri.append("/x");
262         uuri = UURIFactory.getInstance(baseUri.toString());
263         candidate = new CandidateURI(uuri);
264         decision = this.rule.decisionFor(candidate);
265         assertTrue("Expect " + DecideRule.REJECT + " but got " + decision,
266             decision == DecideRule.REJECT);
267     }
268     
269     public void testMatchesFilePattern()
270     throws InvalidAttributeValueException JavaDoc, URIException {
271         addDecideRule(new MatchesFilePatternDecideRule("FILE_PATTERN"));
272         StringBuffer JavaDoc baseUri = new StringBuffer JavaDoc("http://archive.org/");
273         UURI uuri = UURIFactory.getInstance(baseUri.toString() + "ms.doc");
274         CandidateURI candidate = new CandidateURI(uuri);
275         Object JavaDoc decision = this.rule.decisionFor(candidate);
276         assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision,
277             decision == DecideRule.ACCEPT);
278         uuri = UURIFactory.getInstance(baseUri.toString() + "index.html");
279         candidate = new CandidateURI(uuri);
280         decision = this.rule.decisionFor(candidate);
281         assertTrue("Expect " + DecideRule.PASS + " but got " + decision,
282             decision == DecideRule.PASS);
283     }
284     
285     public void testNotMatchesFilePattern()
286     throws InvalidAttributeValueException JavaDoc, URIException {
287         addDecideRule(new NotMatchesFilePatternDecideRule("NOT_FILE_PATTERN"));
288         StringBuffer JavaDoc baseUri = new StringBuffer JavaDoc("http://archive.org/");
289         UURI uuri = UURIFactory.getInstance(baseUri.toString() + "ms.doc");
290         CandidateURI candidate = new CandidateURI(uuri);
291         Object JavaDoc decision = this.rule.decisionFor(candidate);
292         assertTrue("Expect " + DecideRule.PASS + " but got " + decision,
293             decision == DecideRule.PASS);
294         uuri = UURIFactory.getInstance(baseUri.toString() + "index.html");
295         candidate = new CandidateURI(uuri);
296         decision = this.rule.decisionFor(candidate);
297         assertTrue("Expect " + DecideRule.ACCEPT + " but got " + decision,
298             decision == DecideRule.ACCEPT);
299     }
300     
301     protected void testHopLimit(final int max, final char pathExpansion,
302         final String JavaDoc defaultDecision, final String JavaDoc overLimitDecision)
303     throws URIException {
304         UURI uuri = UURIFactory.getInstance("http://archive.org");
305         CandidateURI candidate = new CandidateURI(uuri);
306         Object JavaDoc decision = this.rule.decisionFor(candidate);
307         assertTrue("Expect " + defaultDecision + " but got " + decision,
308             decision == defaultDecision);
309         StringBuffer JavaDoc path = new StringBuffer JavaDoc(max);
310         for (int i = 0; i < (max - 1); i++) {
311             path.append(pathExpansion);
312         }
313         candidate = new CandidateURI(uuri, path.toString(), null, null);
314         decision = this.rule.decisionFor(candidate);
315         assertTrue("Expect " + defaultDecision + " but got " + decision,
316             decision == defaultDecision);
317         path.append(pathExpansion);
318         candidate = new CandidateURI(uuri, path.toString(), null, null);
319         decision = this.rule.decisionFor(candidate);
320         assertTrue("Expect " + defaultDecision + " but got " + decision,
321             decision == defaultDecision);
322         path.append(pathExpansion);
323         candidate = new CandidateURI(uuri, path.toString(), null, null);
324         decision = this.rule.decisionFor(candidate);
325         assertTrue("Expect " + overLimitDecision + " but got " + decision,
326             decision == overLimitDecision);
327     }
328           
329     public void testScopePlusOne()
330                 throws URIException, InvalidAttributeValueException JavaDoc,
331                 AttributeNotFoundException JavaDoc, MBeanException JavaDoc,
332                 ReflectionException JavaDoc {
333         // first test host scope
334
ScopePlusOneDecideRule t = new ScopePlusOneDecideRule("host");
335         SurtPrefixSet mSet = new SurtPrefixSet();
336         mSet.add(SurtPrefixSet.prefixFromPlain("http://audio.archive.org"));
337         mSet.convertAllPrefixesToHosts();
338         t.surtPrefixes = mSet;
339         DecideRule s = addDecideRule(t);
340         s.setAttribute(new Attribute JavaDoc(ScopePlusOneDecideRule.ATTR_SCOPE,
341             ScopePlusOneDecideRule.HOST));
342
343
344         UURI uuri =
345             UURIFactory.getInstance("http://audio.archive.org/examples");
346         CandidateURI candidate = new CandidateURI(uuri);
347         Object JavaDoc decision = this.rule.decisionFor(candidate);
348         assertTrue("URI Expect " + DecideRule.ACCEPT + " for " + candidate +
349             " but got " + decision, decision == DecideRule.ACCEPT);
350         UURI uuriOne = UURIFactory.getInstance("http://movies.archive.org");
351         CandidateURI plusOne = new CandidateURI(uuriOne);
352         plusOne.setVia(uuri);
353         decision = this.rule.decisionFor(plusOne);
354         assertTrue("PlusOne Expect " + DecideRule.ACCEPT + " for " + plusOne +
355             " with via " + plusOne.flattenVia() + " but got " + decision,
356             decision == DecideRule.ACCEPT);
357         UURI uuriTwo = UURIFactory.getInstance("http://sloan.archive.org");
358         CandidateURI plusTwo = new CandidateURI(uuriTwo);
359         plusTwo.setVia(uuriOne);
360         decision = this.rule.decisionFor(plusTwo);
361         assertTrue("PlusTwo Expect " + DecideRule.PASS + " for " + plusTwo +
362             " with via " + plusTwo.flattenVia() + " but got " + decision,
363             decision == DecideRule.PASS);
364         
365
366         //now test domain scope
367
ScopePlusOneDecideRule u = new ScopePlusOneDecideRule("domain");
368         SurtPrefixSet mSet1 = new SurtPrefixSet();
369         mSet1.add(SurtPrefixSet.prefixFromPlain("archive.org"));
370         mSet1.convertAllPrefixesToDomains();
371         u.surtPrefixes = mSet1;
372         DecideRule v = addDecideRule(u);
373         v.setAttribute(new Attribute JavaDoc(ScopePlusOneDecideRule.ATTR_SCOPE,
374             ScopePlusOneDecideRule.DOMAIN));
375         
376         decision = this.rule.decisionFor(candidate);
377         assertTrue("Domain: URI Expect " + DecideRule.ACCEPT + " for " +
378             candidate + " but got " + decision, decision == DecideRule.ACCEPT);
379         decision = this.rule.decisionFor(plusOne);
380         assertTrue("Domain: PlusOne Expect " + DecideRule.ACCEPT + " for " +
381             plusOne + " with via " + plusOne.flattenVia() + " but got " +
382             decision, decision == DecideRule.ACCEPT);
383         decision = this.rule.decisionFor(plusTwo);
384         assertTrue("Domain: PlusTwo Expect " + DecideRule.ACCEPT + " for " +
385             plusTwo + " with via " + plusTwo.flattenVia() + " but got " +
386             decision, decision == DecideRule.ACCEPT);
387         UURI uuriThree = UURIFactory.getInstance("http://sloan.org");
388         CandidateURI plusThree = new CandidateURI(uuriThree);
389         plusThree.setVia(uuriTwo);
390         decision = this.rule.decisionFor(plusThree);
391         assertTrue("Domain: PlusThree Expect " + DecideRule.ACCEPT + " for " +
392             plusThree + " with via " + plusThree.flattenVia() + " but got " +
393             decision, decision == DecideRule.ACCEPT);
394         UURI uuriFour = UURIFactory.getInstance("http://example.com");
395         CandidateURI plusFour = new CandidateURI(uuriFour);
396         plusFour.setVia(uuriThree);
397         decision = this.rule.decisionFor(plusFour);
398         assertTrue("Domain: PlusFour Expect " + DecideRule.PASS + " for " +
399             plusFour + " with via " + plusFour.flattenVia() + " but got " +
400             decision, decision == DecideRule.PASS);
401     }
402     
403     protected DecideRule addDecideRule(DecideRule dr)
404     throws InvalidAttributeValueException JavaDoc {
405         MapType rules = this.rule.getRules(null);
406         rules.addElement(null, dr);
407         return dr;
408     }
409 }
410
Popular Tags