KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > scope > DomainScopeTest


1 /*
2  * TestDomianScope
3  *
4  * $Id: DomainScopeTest.java,v 1.6.16.1 2007/01/13 01:31:25 stack-sf Exp $
5  *
6  * Created on May 17, 2004
7  *
8  * Copyright (C) 2004 Internet Archive.
9  *
10  * This file is part of the Heritrix web crawler (crawler.archive.org).
11  *
12  * Heritrix is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Lesser Public License as published by
14  * the Free Software Foundation; either version 2.1 of the License, or
15  * any later version.
16  *
17  * Heritrix is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Lesser Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser Public License
23  * along with Heritrix; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  */

26
27 package org.archive.crawler.scope;
28
29 import java.util.ArrayList JavaDoc;
30 import java.util.Iterator JavaDoc;
31
32 import junit.framework.TestCase;
33
34 import org.apache.commons.httpclient.URIException;
35 import org.archive.net.UURI;
36 import org.archive.net.UURIFactory;
37
38 /**
39  * Test the domain scope focus filter.
40  *
41  * @author Igor Ranitovic
42  */

43 public class DomainScopeTest extends TestCase {
44
45     private ArrayList JavaDoc<UURI> testSeeds;
46     private ArrayList JavaDoc<UURI> urlsInScope;
47     private ArrayList JavaDoc<UURI> urlsOutOfScope;
48
49     private TestUnitDomainScope dc;
50
51     /**
52      * Since testing only focus filter overwrite all other filter to return
53      * false.
54      *
55      * Also override seedsIterator so the test seeds are used.
56      */

57     @SuppressWarnings JavaDoc("deprecation")
58     private class TestUnitDomainScope extends DomainScope {
59
60         private static final long serialVersionUID = 2509499903112690451L;
61
62         public TestUnitDomainScope(String JavaDoc name) {
63             super(name);
64         }
65         
66         /* Force test seeds to be used.
67          * @see org.archive.crawler.framework.CrawlScope#seedsIterator()
68          */

69         public Iterator JavaDoc<UURI> seedsIterator() {
70             return testSeeds.iterator();
71         }
72         
73         protected boolean additionalFocusAccepts(Object JavaDoc o) {
74             return false;
75         }
76
77         protected boolean transitiveAccepts(Object JavaDoc o) {
78             return false;
79         }
80
81         protected boolean excludeAccepts(Object JavaDoc o) {
82             return false;
83         }
84     }
85
86     public void setUp() throws URIException {
87         testSeeds = new ArrayList JavaDoc<UURI>();
88         urlsInScope = new ArrayList JavaDoc<UURI>();
89         urlsOutOfScope = new ArrayList JavaDoc<UURI>();
90         dc = new TestUnitDomainScope("TESTCASE");
91
92         // Add seeds
93
addURL(testSeeds, "http://www.a.com/");
94         addURL(testSeeds, "http://b.com/");
95         addURL(testSeeds, "http://www11.c.com");
96         addURL(testSeeds, "http://www.x.y.z.com/index.html");
97         addURL(testSeeds, "http://www.1.com/index.html");
98         addURL(testSeeds, "http://www.a_b.com/index.html");
99
100
101         // Add urls in domain scope
102
addURL(urlsInScope, "http://www.a.com/");
103         addURL(urlsInScope, "http://www1.a.com/");
104         addURL(urlsInScope, "http://a.com/");
105         addURL(urlsInScope, "http://a.a.com/");
106
107         addURL(urlsInScope, "http://www.b.com/");
108         addURL(urlsInScope, "http://www1.b.com/");
109         addURL(urlsInScope, "http://b.com/");
110         addURL(urlsInScope, "http://b.b.com/");
111
112         addURL(urlsInScope, "http://www.c.com/");
113         addURL(urlsInScope, "http://www1.c.com/");
114         addURL(urlsInScope, "http://c.com/");
115         addURL(urlsInScope, "http://c.c.com/");
116
117         addURL(urlsInScope, "http://www.x.y.z.com/");
118         addURL(urlsInScope, "http://www1.x.y.z.com/");
119         addURL(urlsInScope, "http://x.y.z.com/");
120         addURL(urlsInScope, "http://xyz.x.y.z.com/");
121         addURL(urlsInScope, "http://1.com/index.html");
122         addURL(urlsInScope, "http://a_b.com/index.html");
123
124         // Add urls out of scope
125
addURL(urlsOutOfScope, "http://a.co");
126         addURL(urlsOutOfScope, "http://a.comm");
127         addURL(urlsOutOfScope, "http://aa.com");
128         addURL(urlsOutOfScope, "http://z.com");
129         addURL(urlsOutOfScope, "http://y.z.com");
130     }
131
132     public void addURL(ArrayList JavaDoc<UURI> list, String JavaDoc url) throws URIException {
133         list.add(UURIFactory.getInstance(url));
134     }
135
136     public void testInScope() throws URIException {
137         for (Iterator JavaDoc i = this.urlsInScope.iterator(); i.hasNext();) {
138             Object JavaDoc url = i.next();
139             assertTrue("Should be in domain scope: " + url, dc.accepts(url));
140         }
141     }
142
143     public void testOutOfScope() throws URIException {
144         for (Iterator JavaDoc i = this.urlsOutOfScope.iterator(); i.hasNext();) {
145             Object JavaDoc url = i.next();
146             assertFalse(
147                 "Should not be in domain scope: " + url,
148                 dc.accepts(url));
149         }
150     }
151 }
152
Popular Tags