1 2 3 4 package net.nutch.fetcher; 5 6 import net.nutch.io.*; 7 import net.nutch.db.*; 8 import net.nutch.fs.*; 9 import net.nutch.util.*; 10 import net.nutch.pagedb.*; 11 import net.nutch.parse.*; 12 import net.nutch.protocol.*; 13 14 import java.io.*; 15 import java.util.logging.Level ; 16 import junit.framework.TestCase; 17 18 19 public class TestFetcher extends TestCase { 20 21 public TestFetcher(String name) { 22 super(name); 23 } 24 25 26 public void testFetcher() throws Exception { 27 NutchFileSystem nfs = new LocalFileSystem(); 28 try { 29 String directory = System.getProperty("test.build.data","."); 30 31 String fetchListFilename = directory + "/" + FetchListEntry.DIR_NAME; 32 33 ArrayFile.Writer testFetchList = 34 new ArrayFile.Writer(nfs, fetchListFilename, FetchListEntry.class); 35 36 MD5Hash id1 = new MD5Hash(new byte[]{0,0,0,0, 0,0,0,0, 0,0,0,0, 1,2,3,4}); 37 MD5Hash id2 = new MD5Hash(new byte[]{0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0}); 38 39 String url1 = "http://sourceforge.net/projects/nutch/"; 40 String url2 = "http://www.yahoo.com/"; 41 String url3 = "http://jakarta.apache.org/lucene/"; 42 String url4 = "http://www.nutch.org/docs/index.html"; 43 String url5 = "ftp://ftp.redhat.com/"; 44 45 Page page1 = new Page(url1, id1); 46 Page page2 = new Page(url2, id2); 47 Page page3 = new Page(url3, id2); 48 Page page4 = new Page(url4, id2); 49 Page page5 = new Page(url5, id2); 50 51 String [] anchors = new String [] {"foo", "bar"}; 52 53 FetchListEntry fe1 = new FetchListEntry(true, page1, anchors); 54 FetchListEntry fe2 = new FetchListEntry(true, page2, anchors); 55 FetchListEntry fe3 = new FetchListEntry(true, page3, anchors); 56 FetchListEntry fe4 = new FetchListEntry(true, page4, anchors); 57 FetchListEntry fe5 = new FetchListEntry(false, page5, anchors); 58 59 testFetchList.append(fe1); 60 testFetchList.append(fe2); 61 testFetchList.append(fe3); 62 testFetchList.append(fe4); 63 testFetchList.append(fe5); 64 testFetchList.close(); 65 66 Fetcher fetcher = new Fetcher(nfs, directory, true); 67 fetcher.setLogLevel(Level.FINE); 68 71 fetcher.run(); 72 73 ArrayFile.Reader fetcher_stripped; 74 String stripped = directory + "/" + ParseText.DIR_NAME; 75 ParseText s = new ParseText(); 76 fetcher_stripped = new ArrayFile.Reader(nfs, stripped); 77 78 boolean yahoo = false; 79 boolean nutch = false; 80 81 while (fetcher_stripped.next(s) != null) { 82 83 if (s.toString().indexOf("Yahoo!") >= 0) 84 yahoo = true; 85 86 if (s.toString().indexOf("Nutch") >= 0 ) 87 nutch = true; 88 } 89 fetcher_stripped.close(); 90 assertTrue(yahoo); 91 assertTrue(nutch); 92 } finally { 93 nfs.close(); 94 } 95 } 96 } 97 98 99 100 101 102 | Popular Tags |