KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > fetcher > TestFetcher


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.fetcher;
5
6 import net.nutch.io.*;
7 import net.nutch.db.*;
8 import net.nutch.fs.*;
9 import net.nutch.util.*;
10 import net.nutch.pagedb.*;
11 import net.nutch.parse.*;
12 import net.nutch.protocol.*;
13
14 import java.io.*;
15 import java.util.logging.Level JavaDoc;
16 import junit.framework.TestCase;
17
18
19 public class TestFetcher extends TestCase {
20
21   public TestFetcher(String JavaDoc name) {
22       super(name);
23   }
24     
25
26   public void testFetcher() throws Exception JavaDoc {
27     NutchFileSystem nfs = new LocalFileSystem();
28     try {
29         String JavaDoc directory = System.getProperty("test.build.data",".");
30     
31         String JavaDoc fetchListFilename = directory + "/" + FetchListEntry.DIR_NAME;
32         
33         ArrayFile.Writer testFetchList =
34             new ArrayFile.Writer(nfs, fetchListFilename, FetchListEntry.class);
35
36         MD5Hash id1 = new MD5Hash(new byte[]{0,0,0,0, 0,0,0,0, 0,0,0,0, 1,2,3,4});
37         MD5Hash id2 = new MD5Hash(new byte[]{0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0});
38     
39         String JavaDoc url1 = "http://sourceforge.net/projects/nutch/";
40         String JavaDoc url2 = "http://www.yahoo.com/";
41         String JavaDoc url3 = "http://jakarta.apache.org/lucene/";
42         String JavaDoc url4 = "http://www.nutch.org/docs/index.html";
43         String JavaDoc url5 = "ftp://ftp.redhat.com/";
44     
45         Page page1 = new Page(url1, id1);
46         Page page2 = new Page(url2, id2);
47         Page page3 = new Page(url3, id2);
48         Page page4 = new Page(url4, id2);
49         Page page5 = new Page(url5, id2);
50     
51         String JavaDoc[] anchors = new String JavaDoc[] {"foo", "bar"};
52
53         FetchListEntry fe1 = new FetchListEntry(true, page1, anchors);
54         FetchListEntry fe2 = new FetchListEntry(true, page2, anchors);
55         FetchListEntry fe3 = new FetchListEntry(true, page3, anchors);
56         FetchListEntry fe4 = new FetchListEntry(true, page4, anchors);
57         FetchListEntry fe5 = new FetchListEntry(false, page5, anchors);
58     
59         testFetchList.append(fe1);
60         testFetchList.append(fe2);
61         testFetchList.append(fe3);
62         testFetchList.append(fe4);
63         testFetchList.append(fe5);
64         testFetchList.close();
65         
66         Fetcher fetcher = new Fetcher(nfs, directory, true);
67         fetcher.setLogLevel(Level.FINE);
68         //fetcher.getHttp().setMaxContentLength(4096);
69
//fetcher.getHttp().setAgentString("NutchCVS");
70

71         fetcher.run();
72
73         ArrayFile.Reader fetcher_stripped;
74         String JavaDoc stripped = directory + "/" + ParseText.DIR_NAME;
75         ParseText s = new ParseText();
76         fetcher_stripped = new ArrayFile.Reader(nfs, stripped);
77
78         boolean yahoo = false;
79         boolean nutch = false;
80
81         while (fetcher_stripped.next(s) != null) {
82
83             if (s.toString().indexOf("Yahoo!") >= 0)
84                 yahoo = true;
85
86             if (s.toString().indexOf("Nutch") >= 0 )
87                 nutch = true;
88         }
89         fetcher_stripped.close();
90         assertTrue(yahoo);
91         assertTrue(nutch);
92     } finally {
93         nfs.close();
94     }
95   }
96 }
97
98
99
100
101
102
Popular Tags