KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > util > BenchmarkUriUniqFilters


1 /* BenchmarkUriUniqFilters
2  *
3  * $Id: BenchmarkUriUniqFilters.java,v 1.3.16.1 2007/01/13 01:31:29 stack-sf Exp $
4  *
5  * Created on Jun 22, 2005.
6  *
7  * Copyright (C) 2005 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.util;
26 import java.io.BufferedReader JavaDoc;
27 import java.io.BufferedWriter JavaDoc;
28 import java.io.File JavaDoc;
29 import java.io.FileReader JavaDoc;
30 import java.io.FileWriter JavaDoc;
31 import java.io.IOException JavaDoc;
32
33 import org.archive.crawler.datamodel.CandidateURI;
34 import org.archive.crawler.datamodel.UriUniqFilter;
35 import org.archive.util.fingerprint.MemLongFPSet;
36
37
38 /**
39  * BenchmarkUriUniqFilters
40  *
41  * @author gojomo
42  */

43 public class BenchmarkUriUniqFilters implements UriUniqFilter.HasUriReceiver {
44 // private Logger LOGGER =
45
// Logger.getLogger(BenchmarkUriUniqFilters.class.getName());
46

47     private BufferedWriter JavaDoc out; // optional to dump uniq items
48
String JavaDoc current; // current line/URI being checked
49

50     /**
51      * Test the UriUniqFilter implementation (MemUriUniqFilter,
52      * BloomUriUniqFilter, or BdbUriUniqFilter) named in first
53      * argument against the file of one-per-line URIs named
54      * in the second argument.
55      *
56      * @param args from cmd-line
57      * @throws IOException
58      */

59     public static void main(String JavaDoc[] args) throws IOException JavaDoc {
60         (new BenchmarkUriUniqFilters()).instanceMain(args);
61     }
62     
63     public void instanceMain(String JavaDoc[] args) throws IOException JavaDoc {
64         String JavaDoc testClass = args[0];
65         String JavaDoc inputFilename = args[1];
66         long start = System.currentTimeMillis();
67         UriUniqFilter uniq = createUriUniqFilter(testClass);
68         long created = System.currentTimeMillis();
69         BufferedReader JavaDoc br = new BufferedReader JavaDoc(new FileReader JavaDoc(inputFilename));
70         if(args.length>2) {
71             String JavaDoc outputFilename = args[2];
72             out = new BufferedWriter JavaDoc(new FileWriter JavaDoc(outputFilename));
73         }
74         int added = 0;
75         while((current=br.readLine())!=null) {
76             added++;
77             uniq.add(current,null);
78         }
79         uniq.close();
80         long finished = System.currentTimeMillis();
81         if(out!=null) {
82             out.close();
83         }
84         System.out.println(added+" adds");
85         System.out.println(uniq.count()+" retained");
86         System.out.println((created-start)+"ms to setup UUF");
87         System.out.println((finished-created)+"ms to perform all adds");
88     }
89     
90     private UriUniqFilter createUriUniqFilter(String JavaDoc testClass) throws IOException JavaDoc {
91         UriUniqFilter uniq = null;
92         if(BdbUriUniqFilter.class.getName().endsWith(testClass)) {;
93             // BDB setup
94
File JavaDoc tmpDir = File.createTempFile("uuf","benchmark");
95             tmpDir.delete();
96             tmpDir.mkdir();
97             uniq = new BdbUriUniqFilter(tmpDir, 50);
98         } else if(BloomUriUniqFilter.class.getName().endsWith(testClass)) {
99             // bloom setup
100
uniq = new BloomUriUniqFilter();
101         } else if(MemUriUniqFilter.class.getName().endsWith(testClass)) {
102             // mem hashset
103
uniq = new MemUriUniqFilter();
104         } else if (FPUriUniqFilter.class.getName().endsWith(testClass)) {
105             // mem fp set (open-addressing) setup
106
uniq = new FPUriUniqFilter(new MemLongFPSet(21,0.75f));
107         }
108         uniq.setDestination(this);
109         return uniq;
110     }
111
112     /* (non-Javadoc)
113      * @see org.archive.crawler.datamodel.UriUniqFilter.HasUriReceiver#receive(org.archive.crawler.datamodel.CandidateURI)
114      */

115     public void receive(CandidateURI item) {
116         if(out!=null) {
117             try {
118                 // we assume all tested filters are immediate passthrough so
119
// we can use 'current'; a buffering filter would change this
120
// assumption
121
out.write(current);
122                 out.write("\n");
123             } catch (IOException JavaDoc e) {
124                 // TODO Auto-generated catch block
125
e.printStackTrace();
126             }
127         }
128     }
129 }
Popular Tags