KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > scope > SeedCachingScopeTest


1 package org.archive.crawler.scope;
2
3 /* SeedCachingScopeTest
4 *
5 * $Id: SeedCachingScopeTest.java,v 1.5.16.1 2007/01/13 01:31:25 stack-sf Exp $
6 *
7 * Created on Mar 30, 2005
8 *
9 * Copyright (C) 2005 Internet Archive.
10 *
11 * This file is part of the Heritrix web crawler (crawler.archive.org).
12 *
13 * Heritrix is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU Lesser Public License as published by
15 * the Free Software Foundation; either version 2.1 of the License, or
16 * any later version.
17 *
18 * Heritrix is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser Public License
24 * along with Heritrix; if not, write to the Free Software
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 */

27
28 import java.io.File JavaDoc;
29 import java.io.FileWriter JavaDoc;
30 import java.io.IOException JavaDoc;
31 import java.io.PrintWriter JavaDoc;
32 import java.util.Comparator JavaDoc;
33 import java.util.Iterator JavaDoc;
34 import java.util.Set JavaDoc;
35 import java.util.TreeSet JavaDoc;
36
37 import org.apache.commons.httpclient.URIException;
38 import org.archive.crawler.datamodel.CrawlURI;
39 import org.archive.net.UURI;
40 import org.archive.net.UURIFactory;
41 import org.archive.util.TmpDirTestCase;
42
43
44 /**
45 * Test {@link SeedCachingScope}.
46 * @author stack gojomo
47 * @version $Revision: 1.5.16.1 $, $Date: 2007/01/13 01:31:25 $
48 */

49 public class SeedCachingScopeTest extends TmpDirTestCase {
50     /**
51      * Constrained SeedCachingScope subclass for testing
52      *
53      * @author gojomo
54      */

55     private class UnitTestSeedCachingScope extends SeedCachingScope {
56
57         private static final long serialVersionUID = -1651873833038665447L;
58
59         private File JavaDoc seedsfile;
60
61         public UnitTestSeedCachingScope(File JavaDoc seedsfile) {
62             super("test");
63             this.seedsfile = seedsfile;
64         }
65         
66         public File JavaDoc getSeedfile() {
67             return seedsfile;
68         }
69     }
70     
71    private static Set JavaDoc<UURI> seeds = null;
72
73    /**
74     * Comparator for treeset of uuris.
75     */

76    private static final Comparator JavaDoc<UURI> CMP = new Comparator JavaDoc<UURI> () {
77        public int compare(UURI o1, UURI o2) {
78            int result = -1;
79            if (o1 == null && o1 == null){
80                result = 0;
81            } else if (o1 == null) {
82                result = -1;
83            } else if (o2 == null) {
84                result = 1;
85            } else {
86                String JavaDoc s1 = o1.toString();
87                String JavaDoc s2 = o2.toString();
88                result = s1.compareTo(s2);
89                result = (result < 0)? result = -1:
90                    (result > 0)? result = 1: 0;
91            }
92            return result;
93        }
94    };
95
96
97    /**
98     * Seed file reference.
99     */

100    private File JavaDoc seedsfile;
101
102
103    /* (non-Javadoc)
104     * @see org.archive.util.TmpDirTestCase#setUp()
105     */

106    protected void setUp() throws Exception JavaDoc {
107        super.setUp();
108
109        // First create array of seeds and add to treeset.
110
SeedCachingScopeTest.seeds = new TreeSet JavaDoc<UURI>(SeedCachingScopeTest.CMP);
111        String JavaDoc [] uris = {"mailto:www.google.com",
112            "http://www.port.com:80/etc/motd2",
113            "http://a:b@userinfo.com/etc/motd2",
114            "news:www.google.com",
115            "http://www.google.com",
116            "https://www.google.com",
117            "gopher://www.google.com",
118            "news://www.google.com",
119            "rss://www.google.com",
120            "telnet://www.google.com",
121            "ftp://myname@example.com/etc/motd",
122            "ftp://example.com/etc/motd2"
123        };
124        for (int i = 0; i < uris.length; i++) {
125            SeedCachingScopeTest.seeds.add(UURIFactory.getInstance(uris[i]));
126        }
127
128        // Write a seeds file w/ our list of seeds.
129
this.seedsfile = new File JavaDoc(getTmpDir(),
130                SeedCachingScopeTest.class.getName() + ".seedfile");
131        PrintWriter JavaDoc writer = new PrintWriter JavaDoc(new FileWriter JavaDoc(this.seedsfile));
132        for (int i = 0; i < uris.length; i++) {
133            writer.println(uris[i]);
134        }
135        writer.close();
136    }
137
138
139    /* (non-Javadoc)
140     * @see org.archive.util.TmpDirTestCase#tearDown()
141     */

142    protected void tearDown() throws Exception JavaDoc {
143        super.tearDown();
144        if (this.seedsfile.exists()) {
145             this.seedsfile.delete();
146        }
147    }
148
149    public void testGeneral() throws URIException {
150        // First make sure that I can get the seed set from seed file.
151
SeedCachingScope sl = checkContent(SeedCachingScopeTest.seeds);
152        // Now do add and see if get set matches seed file content.
153
final CrawlURI curi = new CrawlURI(UURIFactory.getInstance("http://one.two.three"));
154        sl.addSeed(curi);
155        Set JavaDoc<UURI> set = new TreeSet JavaDoc<UURI>(SeedCachingScopeTest.CMP);
156        set.addAll(SeedCachingScopeTest.seeds);
157        set.add(curi.getUURI());
158        checkContent(sl, set);
159    }
160
161    public void testNoScheme() throws IOException JavaDoc {
162        final String JavaDoc NOSCHEME = "x.y.z";
163        FileWriter JavaDoc fw = new FileWriter JavaDoc(this.seedsfile, true);
164        // Write to new (last) line the URL.
165
fw.write("\n");
166        fw.write(NOSCHEME);
167        fw.flush();
168        fw.close();
169        boolean found = false;
170        SeedCachingScope sl = new UnitTestSeedCachingScope(seedsfile);
171        for (Iterator JavaDoc i = sl.seedsIterator(); i.hasNext();) {
172            UURI uuri = (UURI)i.next();
173            if (uuri.getHost() == null) {
174                continue;
175            }
176            if (uuri.getHost().equals(NOSCHEME)) {
177                found = true;
178                break;
179            }
180        }
181        assertTrue("Did not find " + NOSCHEME, found);
182    }
183
184    private SeedCachingScope checkContent(Set JavaDoc seedSet) {
185        return checkContent(null, seedSet);
186    }
187
188    private SeedCachingScope checkContent(SeedCachingScope sl, Set JavaDoc seedSet) {
189        if (sl == null) {
190            sl = new UnitTestSeedCachingScope(this.seedsfile);
191        }
192        int count = 0;
193        for (Iterator JavaDoc i = sl.seedsIterator(); i.hasNext();) {
194            count++;
195            UURI uuri = (UURI)i.next();
196            assertTrue("Does not contain: " + uuri.toString(),
197                seedSet.contains(uuri));
198        }
199        assertTrue("Different sizes: " + count + ", " + seedSet.size(),
200            count == seedSet.size());
201        return sl;
202    }
203 }
204
205
Popular Tags