KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > util > BloomUriUniqFilterTest


1 /* FPUriUniqFilterTest
2  *
3  * $Id: BloomUriUniqFilterTest.java,v 1.4.14.1 2007/01/13 01:31:29 stack-sf Exp $
4  *
5  * Created on Sep 15, 2004.
6  *
7  * Copyright (C) 2003 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.util;
26
27 import java.io.FileNotFoundException JavaDoc;
28 import java.io.IOException JavaDoc;
29 import java.util.ArrayList JavaDoc;
30 import java.util.Iterator JavaDoc;
31 import java.util.logging.Logger JavaDoc;
32
33 import junit.framework.TestCase;
34
35 import org.apache.commons.httpclient.URIException;
36 import org.archive.crawler.datamodel.CandidateURI;
37 import org.archive.crawler.datamodel.UriUniqFilter;
38 import org.archive.net.UURI;
39 import org.archive.net.UURIFactory;
40
41
42 /**
43  * Test BloomUriUniqFilter.
44  * @author gojomo
45  */

46 public class BloomUriUniqFilterTest extends TestCase
47 implements UriUniqFilter.HasUriReceiver {
48     private Logger JavaDoc logger =
49         Logger.getLogger(BloomUriUniqFilterTest.class.getName());
50
51     private BloomUriUniqFilter filter = null;
52
53     /**
54      * Set to true if we visited received.
55      */

56     private boolean received = false;
57
58     protected void setUp() throws Exception JavaDoc {
59         super.setUp();
60         this.filter = new BloomUriUniqFilter(2000,24);
61         this.filter.setDestination(this);
62     }
63
64     public void testAdding() throws URIException {
65         this.filter.add(this.getUri(),
66             new CandidateURI(UURIFactory.getInstance(this.getUri())));
67         this.filter.addNow(this.getUri(),
68             new CandidateURI(UURIFactory.getInstance(this.getUri())));
69         this.filter.addForce(this.getUri(),
70             new CandidateURI(UURIFactory.getInstance(this.getUri())));
71         // Should only have add 'this' once.
72
assertTrue("Count is off", this.filter.count() == 1);
73     }
74
75     /**
76      * Test inserting.
77      * @throws URIException
78      * @throws IOException
79      * @throws FileNotFoundException
80      */

81     public void testWriting() throws URIException {
82         long start = System.currentTimeMillis();
83         ArrayList JavaDoc<UURI> list = new ArrayList JavaDoc<UURI>(1000);
84         int count = 0;
85         final int MAX_COUNT = 1000;
86         for (; count < MAX_COUNT; count++) {
87             assertEquals("count off",count,filter.count());
88             UURI u = UURIFactory.getInstance("http://www" +
89                     count + ".archive.org/" + count + "/index.html");
90             assertFalse("already contained "+u.toString(),filter.bloom.contains(u.toString()));
91             logger.fine("adding "+u.toString());
92             filter.add(u.toString(), new CandidateURI(u));
93             assertTrue("not in bloom",filter.bloom.contains(u.toString()));
94             if (count > 0 && ((count % 100) == 0)) {
95                 list.add(u);
96             }
97         }
98         logger.fine("Added " + count + " in " +
99                 (System.currentTimeMillis() - start));
100
101         start = System.currentTimeMillis();
102         for (Iterator JavaDoc i = list.iterator(); i.hasNext();) {
103             UURI uuri = (UURI)i.next();
104             filter.add(uuri.toString(), new CandidateURI(uuri));
105         }
106         logger.fine("Readded subset " + list.size() + " in " +
107                 (System.currentTimeMillis() - start));
108
109         assertTrue("Count is off: " + filter.count(),
110             filter.count() == MAX_COUNT);
111     }
112
113     public void testNote() {
114         filter.note(this.getUri());
115         assertFalse("Receiver was called", this.received);
116     }
117
118 // FORGET CURRENTLY UNSUPPORTED IN BloomUriUniqFilter
119
// public void testForget() throws URIException {
120
// this.filter.forget(this.getUri(),
121
// new CandidateURI(UURIFactory.getInstance(this.getUri())));
122
// assertTrue("Didn't forget", this.filter.count() == 0);
123
// }
124

125     public void receive(CandidateURI item) {
126         this.received = true;
127     }
128
129     public String JavaDoc getUri() {
130         return "http://www.archive.org";
131     }
132 }
133
Popular Tags