1 25 package org.archive.crawler.util; 26 import java.io.File ; 27 import java.io.IOException ; 28 import java.util.ArrayList ; 29 import java.util.HashSet ; 30 import java.util.Iterator ; 31 import java.util.logging.Logger ; 32 33 import junit.framework.Test; 34 import junit.framework.TestSuite; 35 36 import org.apache.commons.httpclient.URIException; 37 import org.archive.crawler.datamodel.CandidateURI; 38 import org.archive.crawler.datamodel.UriUniqFilter; 39 import org.archive.net.UURI; 40 import org.archive.net.UURIFactory; 41 import org.archive.util.FileUtils; 42 import org.archive.util.TmpDirTestCase; 43 44 import com.sleepycat.je.DatabaseException; 45 46 47 51 public class BdbUriUniqFilterTest extends TmpDirTestCase 52 implements UriUniqFilter.HasUriReceiver { 53 private Logger logger = 54 Logger.getLogger(BdbUriUniqFilterTest.class.getName()); 55 56 private UriUniqFilter filter = null; 57 private File bdbDir = null; 58 59 62 private boolean received = false; 63 64 protected void setUp() throws Exception { 65 super.setUp(); 66 this.bdbDir = new File (getTmpDir(), this.getClass().getName()); 68 if (this.bdbDir.exists()) { 69 FileUtils.deleteDir(bdbDir); 70 } 71 this.filter = new BdbUriUniqFilter(bdbDir, 50); 72 this.filter.setDestination(this); 73 } 74 75 protected void tearDown() throws Exception { 76 super.tearDown(); 77 ((BdbUriUniqFilter)this.filter).close(); 78 } 82 83 public void testAdding() throws URIException { 84 this.filter.add(this.getUri(), 85 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 86 this.filter.addNow(this.getUri(), 87 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 88 this.filter.addForce(this.getUri(), 89 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 90 assertTrue("Count is off", this.filter.count() == 1); 92 } 93 94 public void testCreateKey() { 95 String url = "dns:archive.org"; 96 long fingerprint = BdbUriUniqFilter.createKey(url); 97 assertTrue("Fingerprint wrong " + url, 98 fingerprint == 8812917769287344085L); 99 url = "http://archive.org/index.html"; 100 fingerprint = BdbUriUniqFilter.createKey(url); 101 assertTrue("Fingerprint wrong " + url, 102 fingerprint == 6613237167064754714L); 103 } 104 105 110 public void testCreateKeyCollisions() { 111 HashSet <Long > fingerprints = new HashSet <Long >(); 112 fingerprints.add(new Long (BdbUriUniqFilter 113 .createKey("dns:mail.daps.dla.mil"))); 114 fingerprints.add(new Long (BdbUriUniqFilter 115 .createKey("dns:militaryreview.army.mil"))); 116 assertEquals("colliding fingerprints",2,fingerprints.size()); 117 } 118 119 125 public void testWriting() 126 throws IOException , DatabaseException { 127 long maxcount = 1000; 128 String key = this.getClass().getName() + ".maxcount"; 130 String maxcountStr = System.getProperty(key); 131 logger.info("Looking for override system property " + key); 132 if (maxcountStr != null && maxcountStr.length() > 0) { 133 maxcount = Long.parseLong(maxcountStr); 134 } 135 runTestWriting(maxcount); 136 } 137 138 protected void runTestWriting(long max) 139 throws DatabaseException, URIException { 140 long start = System.currentTimeMillis(); 141 ArrayList <UURI> list = new ArrayList <UURI>(1000); 142 int count = 0; 143 for (; count < max; count++) { 144 UURI u = UURIFactory.getInstance("http://www" + 145 count + ".archive.org/" + count + "/index.html"); 146 this.filter.add(u.toString(), new CandidateURI(u)); 147 if (count > 0 && ((count % 100) == 0)) { 148 list.add(u); 149 } 150 if (count > 0 && ((count % 100000) == 0)) { 151 this.logger.info("Added " + count + " in " + 152 (System.currentTimeMillis() - start) + 153 " misses " + 154 ((BdbUriUniqFilter)this.filter).getCacheMisses() + 155 " diff of misses " + 156 ((BdbUriUniqFilter)this.filter).getLastCacheMissDiff()); 157 } 158 } 159 this.logger.info("Added " + count + " in " + 160 (System.currentTimeMillis() - start)); 161 162 start = System.currentTimeMillis(); 163 for (Iterator i = list.iterator(); i.hasNext();) { 164 UURI uuri = (UURI)i.next(); 165 this.filter.add(uuri.toString(), new CandidateURI(uuri)); 166 } 167 this.logger.info("Added random " + list.size() + " in " + 168 (System.currentTimeMillis() - start)); 169 170 start = System.currentTimeMillis(); 171 for (Iterator i = list.iterator(); i.hasNext();) { 172 UURI uuri = (UURI)i.next(); 173 this.filter.add(uuri.toString(), new CandidateURI(uuri)); 174 } 175 this.logger.info("Deleted random " + list.size() + " in " + 176 (System.currentTimeMillis() - start)); 177 assertTrue("Count is off: " + this.filter.count(), 179 this.filter.count() == max); 180 } 181 182 public void testNote() { 183 this.filter.note(this.getUri()); 184 assertFalse("Receiver was called", this.received); 185 } 186 187 public void testForget() throws URIException { 188 this.filter.forget(this.getUri(), 189 new CandidateURI(UURIFactory.getInstance(getUri()))); 190 assertTrue("Didn't forget", this.filter.count() == 0); 191 } 192 193 public void receive(CandidateURI item) { 194 this.received = true; 195 } 196 197 public String getUri() { 198 return "http://www.archive.org"; 199 } 200 201 206 public static Test suite() { 207 return new TestSuite(BdbUriUniqFilterTest.class); 208 } 209 210 public static void main(String [] args) { 211 junit.textui.TestRunner.run(suite()); 212 } 213 } | Popular Tags |