1 package org.archive.crawler.scope; 2 3 27 28 import java.io.File ; 29 import java.io.FileWriter ; 30 import java.io.IOException ; 31 import java.io.PrintWriter ; 32 import java.util.Comparator ; 33 import java.util.Iterator ; 34 import java.util.Set ; 35 import java.util.TreeSet ; 36 37 import org.apache.commons.httpclient.URIException; 38 import org.archive.crawler.datamodel.CrawlURI; 39 import org.archive.net.UURI; 40 import org.archive.net.UURIFactory; 41 import org.archive.util.TmpDirTestCase; 42 43 44 49 public class SeedCachingScopeTest extends TmpDirTestCase { 50 55 private class UnitTestSeedCachingScope extends SeedCachingScope { 56 57 private static final long serialVersionUID = -1651873833038665447L; 58 59 private File seedsfile; 60 61 public UnitTestSeedCachingScope(File seedsfile) { 62 super("test"); 63 this.seedsfile = seedsfile; 64 } 65 66 public File getSeedfile() { 67 return seedsfile; 68 } 69 } 70 71 private static Set <UURI> seeds = null; 72 73 76 private static final Comparator <UURI> CMP = new Comparator <UURI> () { 77 public int compare(UURI o1, UURI o2) { 78 int result = -1; 79 if (o1 == null && o1 == null){ 80 result = 0; 81 } else if (o1 == null) { 82 result = -1; 83 } else if (o2 == null) { 84 result = 1; 85 } else { 86 String s1 = o1.toString(); 87 String s2 = o2.toString(); 88 result = s1.compareTo(s2); 89 result = (result < 0)? result = -1: 90 (result > 0)? result = 1: 0; 91 } 92 return result; 93 } 94 }; 95 96 97 100 private File seedsfile; 101 102 103 106 protected void setUp() throws Exception { 107 super.setUp(); 108 109 SeedCachingScopeTest.seeds = new TreeSet <UURI>(SeedCachingScopeTest.CMP); 111 String [] uris = {"mailto:www.google.com", 112 "http://www.port.com:80/etc/motd2", 113 "http://a:b@userinfo.com/etc/motd2", 114 "news:www.google.com", 115 "http://www.google.com", 116 "https://www.google.com", 117 "gopher://www.google.com", 118 "news://www.google.com", 119 "rss://www.google.com", 120 "telnet://www.google.com", 121 "ftp://myname@example.com/etc/motd", 122 "ftp://example.com/etc/motd2" 123 }; 124 for (int i = 0; i < uris.length; i++) { 125 SeedCachingScopeTest.seeds.add(UURIFactory.getInstance(uris[i])); 126 } 127 128 this.seedsfile = new File (getTmpDir(), 130 SeedCachingScopeTest.class.getName() + ".seedfile"); 131 PrintWriter writer = new PrintWriter (new FileWriter (this.seedsfile)); 132 for (int i = 0; i < uris.length; i++) { 133 writer.println(uris[i]); 134 } 135 writer.close(); 136 } 137 138 139 142 protected void tearDown() throws Exception { 143 super.tearDown(); 144 if (this.seedsfile.exists()) { 145 this.seedsfile.delete(); 146 } 147 } 148 149 public void testGeneral() throws URIException { 150 SeedCachingScope sl = checkContent(SeedCachingScopeTest.seeds); 152 final CrawlURI curi = new CrawlURI(UURIFactory.getInstance("http://one.two.three")); 154 sl.addSeed(curi); 155 Set <UURI> set = new TreeSet <UURI>(SeedCachingScopeTest.CMP); 156 set.addAll(SeedCachingScopeTest.seeds); 157 set.add(curi.getUURI()); 158 checkContent(sl, set); 159 } 160 161 public void testNoScheme() throws IOException { 162 final String NOSCHEME = "x.y.z"; 163 FileWriter fw = new FileWriter (this.seedsfile, true); 164 fw.write("\n"); 166 fw.write(NOSCHEME); 167 fw.flush(); 168 fw.close(); 169 boolean found = false; 170 SeedCachingScope sl = new UnitTestSeedCachingScope(seedsfile); 171 for (Iterator i = sl.seedsIterator(); i.hasNext();) { 172 UURI uuri = (UURI)i.next(); 173 if (uuri.getHost() == null) { 174 continue; 175 } 176 if (uuri.getHost().equals(NOSCHEME)) { 177 found = true; 178 break; 179 } 180 } 181 assertTrue("Did not find " + NOSCHEME, found); 182 } 183 184 private SeedCachingScope checkContent(Set seedSet) { 185 return checkContent(null, seedSet); 186 } 187 188 private SeedCachingScope checkContent(SeedCachingScope sl, Set seedSet) { 189 if (sl == null) { 190 sl = new UnitTestSeedCachingScope(this.seedsfile); 191 } 192 int count = 0; 193 for (Iterator i = sl.seedsIterator(); i.hasNext();) { 194 count++; 195 UURI uuri = (UURI)i.next(); 196 assertTrue("Does not contain: " + uuri.toString(), 197 seedSet.contains(uuri)); 198 } 199 assertTrue("Different sizes: " + count + ", " + seedSet.size(), 200 count == seedSet.size()); 201 return sl; 202 } 203 } 204 205 | Popular Tags |