1 25 package org.archive.crawler.scope; 26 27 import java.io.BufferedReader ; 28 import java.io.BufferedWriter ; 29 import java.io.IOException ; 30 import java.io.StringReader ; 31 import java.io.StringWriter ; 32 import java.util.LinkedList ; 33 34 import junit.framework.TestCase; 35 36 import org.archive.net.UURI; 37 38 43 public class SeedFileIteratorTest extends TestCase { 44 public void testHyphenInHost() { 45 final String seedFileContent = "http://www.examp-le.com/"; 46 StringWriter sw = new StringWriter (); 47 StringReader sr = new StringReader (seedFileContent); 48 UURI seed = 49 (UURI)(new SeedFileIterator(new BufferedReader (sr), sw)).next(); 50 assertEquals("Hyphen is problem", seed.toString(), 51 seedFileContent); 52 } 53 54 public void testGeneral() throws IOException { 55 String seedFile = "# comment\n" + "\n" + "www.example.com\n" + "www.example.org/foo\n" + "http://www.example.net\n" + "+http://www.example.us"; StringWriter ignored = new StringWriter (); 62 SeedFileIterator iter = new SeedFileIterator(new BufferedReader ( 63 new StringReader (seedFile)), new BufferedWriter (ignored)); 64 LinkedList <String > seeds = new LinkedList <String >(); 65 while (iter.hasNext()) { 66 UURI n = iter.next(); 67 if (n instanceof UURI) { 68 seeds.add(n.getURI()); 69 } 70 } 71 assertTrue("didn't get naked host", seeds 72 .contains("http://www.example.com/")); 73 assertTrue("didn't get naked host+path", seeds 74 .contains("http://www.example.org/foo")); 75 assertTrue("didn't get full http URL", seeds 76 .contains("http://www.example.net/")); 77 assertTrue("got wrong number of URLs", seeds.size() == 3); 78 assertTrue("ignored entry not reported", ignored.toString().indexOf( 79 "+http://www.example.us") >= 0); 80 } 81 } 82 83 | Popular Tags |