1 25 package org.archive.crawler.util; 26 import java.io.BufferedReader ; 27 import java.io.BufferedWriter ; 28 import java.io.File ; 29 import java.io.FileReader ; 30 import java.io.FileWriter ; 31 import java.io.IOException ; 32 33 import org.archive.crawler.datamodel.CandidateURI; 34 import org.archive.crawler.datamodel.UriUniqFilter; 35 import org.archive.util.fingerprint.MemLongFPSet; 36 37 38 43 public class BenchmarkUriUniqFilters implements UriUniqFilter.HasUriReceiver { 44 47 private BufferedWriter out; String current; 50 59 public static void main(String [] args) throws IOException { 60 (new BenchmarkUriUniqFilters()).instanceMain(args); 61 } 62 63 public void instanceMain(String [] args) throws IOException { 64 String testClass = args[0]; 65 String inputFilename = args[1]; 66 long start = System.currentTimeMillis(); 67 UriUniqFilter uniq = createUriUniqFilter(testClass); 68 long created = System.currentTimeMillis(); 69 BufferedReader br = new BufferedReader (new FileReader (inputFilename)); 70 if(args.length>2) { 71 String outputFilename = args[2]; 72 out = new BufferedWriter (new FileWriter (outputFilename)); 73 } 74 int added = 0; 75 while((current=br.readLine())!=null) { 76 added++; 77 uniq.add(current,null); 78 } 79 uniq.close(); 80 long finished = System.currentTimeMillis(); 81 if(out!=null) { 82 out.close(); 83 } 84 System.out.println(added+" adds"); 85 System.out.println(uniq.count()+" retained"); 86 System.out.println((created-start)+"ms to setup UUF"); 87 System.out.println((finished-created)+"ms to perform all adds"); 88 } 89 90 private UriUniqFilter createUriUniqFilter(String testClass) throws IOException { 91 UriUniqFilter uniq = null; 92 if(BdbUriUniqFilter.class.getName().endsWith(testClass)) {; 93 File tmpDir = File.createTempFile("uuf","benchmark"); 95 tmpDir.delete(); 96 tmpDir.mkdir(); 97 uniq = new BdbUriUniqFilter(tmpDir, 50); 98 } else if(BloomUriUniqFilter.class.getName().endsWith(testClass)) { 99 uniq = new BloomUriUniqFilter(); 101 } else if(MemUriUniqFilter.class.getName().endsWith(testClass)) { 102 uniq = new MemUriUniqFilter(); 104 } else if (FPUriUniqFilter.class.getName().endsWith(testClass)) { 105 uniq = new FPUriUniqFilter(new MemLongFPSet(21,0.75f)); 107 } 108 uniq.setDestination(this); 109 return uniq; 110 } 111 112 115 public void receive(CandidateURI item) { 116 if(out!=null) { 117 try { 118 out.write(current); 122 out.write("\n"); 123 } catch (IOException e) { 124 e.printStackTrace(); 126 } 127 } 128 } 129 } | Popular Tags |