KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > util > FPUriUniqFilterTest


1 /* FPUriUniqFilterTest
2  *
3  * $Id: FPUriUniqFilterTest.java,v 1.7.16.1 2007/01/13 01:31:29 stack-sf Exp $
4  *
5  * Created on Sep 15, 2004.
6  *
7  * Copyright (C) 2003 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.util;
26
27 import java.io.FileNotFoundException JavaDoc;
28 import java.io.IOException JavaDoc;
29 import java.util.ArrayList JavaDoc;
30 import java.util.Iterator JavaDoc;
31 import java.util.logging.Logger JavaDoc;
32
33 import junit.framework.TestCase;
34
35 import org.apache.commons.httpclient.URIException;
36 import org.archive.crawler.datamodel.CandidateURI;
37 import org.archive.crawler.datamodel.UriUniqFilter;
38 import org.archive.net.UURI;
39 import org.archive.net.UURIFactory;
40 import org.archive.util.fingerprint.MemLongFPSet;
41
42
43 /**
44  * Test FPUriUniqFilter.
45  * @author stack
46  */

47 public class FPUriUniqFilterTest extends TestCase
48 implements UriUniqFilter.HasUriReceiver {
49     private Logger JavaDoc logger =
50         Logger.getLogger(FPUriUniqFilterTest.class.getName());
51
52     private UriUniqFilter filter = null;
53     
54     /**
55      * Set to true if we visited received.
56      */

57     private boolean received = false;
58     
59     protected void setUp() throws Exception JavaDoc {
60         super.setUp();
61         // 17 makes a MemLongFPSet of one meg of longs (64megs).
62
this.filter = new FPUriUniqFilter(new MemLongFPSet(10, 0.75f));
63         this.filter.setDestination(this);
64     }
65     
66     public void testAdding() throws URIException {
67         this.filter.add(this.getUri(),
68             new CandidateURI(UURIFactory.getInstance(this.getUri())));
69         this.filter.addNow(this.getUri(),
70             new CandidateURI(UURIFactory.getInstance(this.getUri())));
71         this.filter.addForce(this.getUri(),
72             new CandidateURI(UURIFactory.getInstance(this.getUri())));
73         // Should only have add 'this' once.
74
assertTrue("Count is off", this.filter.count() == 1);
75     }
76     
77     /**
78      * Test inserting and removing.
79      * @throws IOException
80      * @throws FileNotFoundException
81      */

82     public void testWriting() throws FileNotFoundException JavaDoc, IOException JavaDoc {
83         long start = System.currentTimeMillis();
84         ArrayList JavaDoc<UURI> list = new ArrayList JavaDoc<UURI>(1000);
85         int count = 0;
86         final int MAX_COUNT = 1000;
87         for (; count < MAX_COUNT; count++) {
88             UURI u = UURIFactory.getInstance("http://www" +
89                     count + ".archive.org/" + count + "/index.html");
90             this.filter.add(u.toString(), new CandidateURI(u));
91             if (count > 0 && ((count % 100) == 0)) {
92                 list.add(u);
93             }
94         }
95         this.logger.info("Added " + count + " in " +
96                 (System.currentTimeMillis() - start));
97         
98         start = System.currentTimeMillis();
99         for (Iterator JavaDoc i = list.iterator(); i.hasNext();) {
100             UURI uuri = (UURI)i.next();
101             this.filter.add(uuri.toString(), new CandidateURI(uuri));
102         }
103         this.logger.info("Added random " + list.size() + " in " +
104                 (System.currentTimeMillis() - start));
105         
106         start = System.currentTimeMillis();
107         for (Iterator JavaDoc i = list.iterator(); i.hasNext();) {
108             UURI uuri = (UURI)i.next();
109             this.filter.add(uuri.toString(), new CandidateURI(uuri));
110         }
111         this.logger.info("Deleted random " + list.size() + " in " +
112             (System.currentTimeMillis() - start));
113         // Looks like delete doesn't work.
114
assertTrue("Count is off: " + this.filter.count(),
115             this.filter.count() == MAX_COUNT);
116     }
117     
118     public void testNote() {
119         this.filter.note(this.getUri());
120         assertFalse("Receiver was called", this.received);
121     }
122     
123     public void testForget() throws URIException {
124         this.filter.forget(this.getUri(),
125                 new CandidateURI(UURIFactory.getInstance(this.getUri())));
126         assertTrue("Didn't forget", this.filter.count() == 0);
127     }
128     
129     public void receive(CandidateURI item) {
130         this.received = true;
131     }
132
133     public String JavaDoc getUri() {
134         return "http://www.archive.org";
135     }
136 }
137
Popular Tags