| 1 25 package org.archive.crawler.util; 26 27 import java.io.FileNotFoundException ; 28 import java.io.IOException ; 29 import java.util.ArrayList ; 30 import java.util.Iterator ; 31 import java.util.logging.Logger ; 32 33 import junit.framework.TestCase; 34 35 import org.apache.commons.httpclient.URIException; 36 import org.archive.crawler.datamodel.CandidateURI; 37 import org.archive.crawler.datamodel.UriUniqFilter; 38 import org.archive.net.UURI; 39 import org.archive.net.UURIFactory; 40 41 42 46 public class BloomUriUniqFilterTest extends TestCase 47 implements UriUniqFilter.HasUriReceiver { 48 private Logger logger = 49 Logger.getLogger(BloomUriUniqFilterTest.class.getName()); 50 51 private BloomUriUniqFilter filter = null; 52 53 56 private boolean received = false; 57 58 protected void setUp() throws Exception { 59 super.setUp(); 60 this.filter = new BloomUriUniqFilter(2000,24); 61 this.filter.setDestination(this); 62 } 63 64 public void testAdding() throws URIException { 65 this.filter.add(this.getUri(), 66 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 67 this.filter.addNow(this.getUri(), 68 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 69 this.filter.addForce(this.getUri(), 70 new CandidateURI(UURIFactory.getInstance(this.getUri()))); 71 assertTrue("Count is off", this.filter.count() == 1); 73 } 74 75 81 public void testWriting() throws URIException { 82 long start = System.currentTimeMillis(); 83 ArrayList <UURI> list = new ArrayList <UURI>(1000); 84 int count = 0; 85 final int MAX_COUNT = 1000; 86 for (; count < MAX_COUNT; count++) { 87 assertEquals("count off",count,filter.count()); 88 UURI u = UURIFactory.getInstance("http://www" + 89 count + ".archive.org/" + count + "/index.html"); 90 assertFalse("already contained "+u.toString(),filter.bloom.contains(u.toString())); 91 logger.fine("adding "+u.toString()); 92 filter.add(u.toString(), new CandidateURI(u)); 93 assertTrue("not in bloom",filter.bloom.contains(u.toString())); 94 if (count > 0 && ((count % 100) == 0)) { 95 list.add(u); 96 } 97 } 98 logger.fine("Added " + count + " in " + 99 (System.currentTimeMillis() - start)); 100 101 start = System.currentTimeMillis(); 102 for (Iterator i = list.iterator(); i.hasNext();) { 103 UURI uuri = (UURI)i.next(); 104 filter.add(uuri.toString(), new CandidateURI(uuri)); 105 } 106 logger.fine("Readded subset " + list.size() + " in " + 107 (System.currentTimeMillis() - start)); 108 109 assertTrue("Count is off: " + filter.count(), 110 filter.count() == MAX_COUNT); 111 } 112 113 public void testNote() { 114 filter.note(this.getUri()); 115 assertFalse("Receiver was called", this.received); 116 } 117 118 125 public void receive(CandidateURI item) { 126 this.received = true; 127 } 128 129 public String getUri() { 130 return "http://www.archive.org"; 131 } 132 } 133 | Popular Tags |