1 25 package org.archive.crawler.util; 26 27 import java.io.FileNotFoundException ; 28 import java.io.IOException ; 29 import java.util.ArrayList ; 30 import java.util.Iterator ; 31 import java.util.logging.Logger ; 32 33 import junit.framework.TestCase; 34 35 import org.apache.commons.httpclient.URIException; 36 import org.archive.crawler.datamodel.CandidateURI; 37 import org.archive.crawler.datamodel.UriUniqFilter; 38 import org.archive.net.UURI; 39 import org.archive.net.UURIFactory; 40 import org.archive.util.fingerprint.MemLongFPSet; 41 42 43 47 public class FPUriUniqFilterTest extends TestCase 48 implements UriUniqFilter.HasUriReceiver { 49 private Logger logger = 50 Logger.getLogger(FPUriUniqFilterTest.class.getName()); 51 52 private UriUniqFilter filter = null; 53 54 57 private boolean received = false; 58 59 protected void setUp() throws Exception { 60 super.setUp(); 61 this.filter = new FPUriUniqFilter(new MemLongFPSet(10, 0.75f)); 63 this.filter.setDestination(this); 64 } 65 66 public void testAdding() throws URIException { 67 this.filter.add(this.getUri(), 68 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 69 this.filter.addNow(this.getUri(), 70 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 71 this.filter.addForce(this.getUri(), 72 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 73 assertTrue("Count is off", this.filter.count() == 1); 75 } 76 77 82 public void testWriting() throws FileNotFoundException , IOException { 83 long start = System.currentTimeMillis(); 84 ArrayList <UURI> list = new ArrayList <UURI>(1000); 85 int count = 0; 86 final int MAX_COUNT = 1000; 87 for (; count < MAX_COUNT; count++) { 88 UURI u = UURIFactory.getInstance("http://www" + 89 count + ".archive.org/" + count + "/index.html"); 90 this.filter.add(u.toString(), new CandidateURI(u)); 91 if (count > 0 && ((count % 100) == 0)) { 92 list.add(u); 93 } 94 } 95 this.logger.info("Added " + count + " in " + 96 (System.currentTimeMillis() - start)); 97 98 start = System.currentTimeMillis(); 99 for (Iterator i = list.iterator(); i.hasNext();) { 100 UURI uuri = (UURI)i.next(); 101 this.filter.add(uuri.toString(), new CandidateURI(uuri)); 102 } 103 this.logger.info("Added random " + list.size() + " in " + 104 (System.currentTimeMillis() - start)); 105 106 start = System.currentTimeMillis(); 107 for (Iterator i = list.iterator(); i.hasNext();) { 108 UURI uuri = (UURI)i.next(); 109 this.filter.add(uuri.toString(), new CandidateURI(uuri)); 110 } 111 this.logger.info("Deleted random " + list.size() + " in " + 112 (System.currentTimeMillis() - start)); 113 assertTrue("Count is off: " + this.filter.count(), 115 this.filter.count() == MAX_COUNT); 116 } 117 118 public void testNote() { 119 this.filter.note(this.getUri()); 120 assertFalse("Receiver was called", this.received); 121 } 122 123 public void testForget() throws URIException { 124 this.filter.forget(this.getUri(), 125 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 126 assertTrue("Didn't forget", this.filter.count() == 0); 127 } 128 129 public void receive(CandidateURI item) { 130 this.received = true; 131 } 132 133 public String getUri() { 134 return "http://www.archive.org"; 135 } 136 } 137 | Popular Tags |