KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > util > BdbUriUniqFilterTest


1 /* BdbUriUniqFilterTest
2  *
3  * $Id: BdbUriUniqFilterTest.java,v 1.8.16.1 2007/01/13 01:31:29 stack-sf Exp $
4  *
5  * Created on Sep 15, 2004.
6  *
7  * Copyright (C) 2003 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.util;
26 import java.io.File JavaDoc;
27 import java.io.IOException JavaDoc;
28 import java.util.ArrayList JavaDoc;
29 import java.util.HashSet JavaDoc;
30 import java.util.Iterator JavaDoc;
31 import java.util.logging.Logger JavaDoc;
32
33 import junit.framework.Test;
34 import junit.framework.TestSuite;
35
36 import org.apache.commons.httpclient.URIException;
37 import org.archive.crawler.datamodel.CandidateURI;
38 import org.archive.crawler.datamodel.UriUniqFilter;
39 import org.archive.net.UURI;
40 import org.archive.net.UURIFactory;
41 import org.archive.util.FileUtils;
42 import org.archive.util.TmpDirTestCase;
43
44 import com.sleepycat.je.DatabaseException;
45
46
47 /**
48  * Test BdbUriUniqFilter.
49  * @author stack
50  */

51 public class BdbUriUniqFilterTest extends TmpDirTestCase
52 implements UriUniqFilter.HasUriReceiver {
53     private Logger JavaDoc logger =
54         Logger.getLogger(BdbUriUniqFilterTest.class.getName());
55     
56     private UriUniqFilter filter = null;
57     private File JavaDoc bdbDir = null;
58     
59     /**
60      * Set to true if we visited received.
61      */

62     private boolean received = false;
63     
64     protected void setUp() throws Exception JavaDoc {
65         super.setUp();
66         // Remove any bdb that already exists.
67
this.bdbDir = new File JavaDoc(getTmpDir(), this.getClass().getName());
68         if (this.bdbDir.exists()) {
69             FileUtils.deleteDir(bdbDir);
70         }
71         this.filter = new BdbUriUniqFilter(bdbDir, 50);
72         this.filter.setDestination(this);
73     }
74     
75     protected void tearDown() throws Exception JavaDoc {
76         super.tearDown();
77         ((BdbUriUniqFilter)this.filter).close();
78         // if (this.bdbDir.exists()) {
79
// FileUtils.deleteDir(bdbDir);
80
// }
81
}
82     
83     public void testAdding() throws URIException {
84         this.filter.add(this.getUri(),
85             new CandidateURI(UURIFactory.getInstance(this.getUri())));
86         this.filter.addNow(this.getUri(),
87             new CandidateURI(UURIFactory.getInstance(this.getUri())));
88         this.filter.addForce(this.getUri(),
89             new CandidateURI(UURIFactory.getInstance(this.getUri())));
90         // Should only have add 'this' once.
91
assertTrue("Count is off", this.filter.count() == 1);
92     }
93     
94     public void testCreateKey() {
95         String JavaDoc url = "dns:archive.org";
96         long fingerprint = BdbUriUniqFilter.createKey(url);
97         assertTrue("Fingerprint wrong " + url,
98             fingerprint == 8812917769287344085L);
99         url = "http://archive.org/index.html";
100         fingerprint = BdbUriUniqFilter.createKey(url);
101         assertTrue("Fingerprint wrong " + url,
102             fingerprint == 6613237167064754714L);
103     }
104     
105     /**
106      * Verify that two URIs which gave colliding hashes, when previously
107      * the last 40bits of the composite did not sufficiently vary with certain
108      * inputs, no longer collide.
109      */

110     public void testCreateKeyCollisions() {
111         HashSet JavaDoc<Long JavaDoc> fingerprints = new HashSet JavaDoc<Long JavaDoc>();
112         fingerprints.add(new Long JavaDoc(BdbUriUniqFilter
113                 .createKey("dns:mail.daps.dla.mil")));
114         fingerprints.add(new Long JavaDoc(BdbUriUniqFilter
115                 .createKey("dns:militaryreview.army.mil")));
116         assertEquals("colliding fingerprints",2,fingerprints.size());
117     }
118     
119     /**
120      * Time import of recovery log.
121      * REMOVE
122      * @throws IOException
123      * @throws DatabaseException
124      */

125     public void testWriting()
126     throws IOException JavaDoc, DatabaseException {
127         long maxcount = 1000;
128         // Look for a system property to override default max count.
129
String JavaDoc key = this.getClass().getName() + ".maxcount";
130         String JavaDoc maxcountStr = System.getProperty(key);
131         logger.info("Looking for override system property " + key);
132         if (maxcountStr != null && maxcountStr.length() > 0) {
133             maxcount = Long.parseLong(maxcountStr);
134         }
135         runTestWriting(maxcount);
136     }
137     
138     protected void runTestWriting(long max)
139     throws DatabaseException, URIException {
140         long start = System.currentTimeMillis();
141         ArrayList JavaDoc<UURI> list = new ArrayList JavaDoc<UURI>(1000);
142         int count = 0;
143         for (; count < max; count++) {
144             UURI u = UURIFactory.getInstance("http://www" +
145                 count + ".archive.org/" + count + "/index.html");
146             this.filter.add(u.toString(), new CandidateURI(u));
147             if (count > 0 && ((count % 100) == 0)) {
148                 list.add(u);
149             }
150             if (count > 0 && ((count % 100000) == 0)) {
151                 this.logger.info("Added " + count + " in " +
152                     (System.currentTimeMillis() - start) +
153                     " misses " +
154                     ((BdbUriUniqFilter)this.filter).getCacheMisses() +
155                     " diff of misses " +
156                     ((BdbUriUniqFilter)this.filter).getLastCacheMissDiff());
157             }
158         }
159         this.logger.info("Added " + count + " in " +
160             (System.currentTimeMillis() - start));
161         
162         start = System.currentTimeMillis();
163         for (Iterator JavaDoc i = list.iterator(); i.hasNext();) {
164             UURI uuri = (UURI)i.next();
165             this.filter.add(uuri.toString(), new CandidateURI(uuri));
166         }
167         this.logger.info("Added random " + list.size() + " in " +
168                 (System.currentTimeMillis() - start));
169         
170         start = System.currentTimeMillis();
171         for (Iterator JavaDoc i = list.iterator(); i.hasNext();) {
172             UURI uuri = (UURI)i.next();
173             this.filter.add(uuri.toString(), new CandidateURI(uuri));
174         }
175         this.logger.info("Deleted random " + list.size() + " in " +
176             (System.currentTimeMillis() - start));
177         // Looks like delete doesn't work.
178
assertTrue("Count is off: " + this.filter.count(),
179             this.filter.count() == max);
180     }
181     
182     public void testNote() {
183         this.filter.note(this.getUri());
184         assertFalse("Receiver was called", this.received);
185     }
186     
187     public void testForget() throws URIException {
188         this.filter.forget(this.getUri(),
189             new CandidateURI(UURIFactory.getInstance(getUri())));
190         assertTrue("Didn't forget", this.filter.count() == 0);
191     }
192     
193     public void receive(CandidateURI item) {
194         this.received = true;
195     }
196
197     public String JavaDoc getUri() {
198         return "http://www.archive.org";
199     }
200     
201     /**
202      * return the suite of tests for MemQueueTest
203      *
204      * @return the suite of test
205      */

206     public static Test suite() {
207         return new TestSuite(BdbUriUniqFilterTest.class);
208     }
209
210     public static void main(String JavaDoc[] args) {
211         junit.textui.TestRunner.run(suite());
212     }
213 }
Popular Tags