KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > scope > SeedFileIteratorTest


1 /* SeedFileIteratorTest
2  *
3  * $Id: SeedFileIteratorTest.java,v 1.4.16.1 2007/01/13 01:31:25 stack-sf Exp $
4  *
5  * Created on May 31, 2005
6  *
7  * Copyright (C) 2005 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.scope;
26
27 import java.io.BufferedReader JavaDoc;
28 import java.io.BufferedWriter JavaDoc;
29 import java.io.IOException JavaDoc;
30 import java.io.StringReader JavaDoc;
31 import java.io.StringWriter JavaDoc;
32 import java.util.LinkedList JavaDoc;
33
34 import junit.framework.TestCase;
35
36 import org.archive.net.UURI;
37
38 /**
39  * Test {@link SeedFileIterator}.
40  * @author gojomo
41  * @version $Revision: 1.4.16.1 $, $Date: 2007/01/13 01:31:25 $
42  */

43 public class SeedFileIteratorTest extends TestCase {
44     public void testHyphenInHost() {
45         final String JavaDoc seedFileContent = "http://www.examp-le.com/";
46         StringWriter JavaDoc sw = new StringWriter JavaDoc();
47         StringReader JavaDoc sr = new StringReader JavaDoc(seedFileContent);
48         UURI seed =
49             (UURI)(new SeedFileIterator(new BufferedReader JavaDoc(sr), sw)).next();
50         assertEquals("Hyphen is problem", seed.toString(),
51             seedFileContent);
52     }
53
54     public void testGeneral() throws IOException JavaDoc {
55         String JavaDoc seedFile = "# comment\n" + // comment
56
"\n" + // blank line
57
"www.example.com\n" + // naked host, implied scheme
58
"www.example.org/foo\n" + // naked host+path, implied scheme
59
"http://www.example.net\n" + // full HTTP URL
60
"+http://www.example.us"; // 'directive' (should be ignored)
61
StringWriter JavaDoc ignored = new StringWriter JavaDoc();
62         SeedFileIterator iter = new SeedFileIterator(new BufferedReader JavaDoc(
63                 new StringReader JavaDoc(seedFile)), new BufferedWriter JavaDoc(ignored));
64         LinkedList JavaDoc<String JavaDoc> seeds = new LinkedList JavaDoc<String JavaDoc>();
65         while (iter.hasNext()) {
66             UURI n = iter.next();
67             if (n instanceof UURI) {
68                 seeds.add(n.getURI());
69             }
70         }
71         assertTrue("didn't get naked host", seeds
72                 .contains("http://www.example.com/"));
73         assertTrue("didn't get naked host+path", seeds
74                 .contains("http://www.example.org/foo"));
75         assertTrue("didn't get full http URL", seeds
76                 .contains("http://www.example.net/"));
77         assertTrue("got wrong number of URLs", seeds.size() == 3);
78         assertTrue("ignored entry not reported", ignored.toString().indexOf(
79                 "+http://www.example.us") >= 0);
80     }
81 }
82
83
Popular Tags