1 25 package org.archive.crawler.util; 26 27 import java.io.BufferedOutputStream ; 28 import java.io.File ; 29 import java.io.FileNotFoundException ; 30 import java.io.FileOutputStream ; 31 import java.io.PrintWriter ; 32 import java.util.logging.Level ; 33 import java.util.logging.Logger ; 34 35 import org.archive.crawler.datamodel.CandidateURI; 36 import org.archive.crawler.datamodel.UriUniqFilter; 37 38 43 public abstract class SetBasedUriUniqFilter implements UriUniqFilter { 44 private static Logger LOGGER = 45 Logger.getLogger(SetBasedUriUniqFilter.class.getName()); 46 47 protected HasUriReceiver receiver; 48 protected PrintWriter profileLog; 49 protected long duplicateCount = 0; 50 protected long duplicatesAtLastSample = 0; 51 52 public SetBasedUriUniqFilter() { 53 super(); 54 String profileLogFile = 55 System.getProperty(SetBasedUriUniqFilter.class.getName() 56 + ".profileLogFile"); 57 if (profileLogFile != null) { 58 setProfileLog(new File (profileLogFile)); 59 } 60 } 61 62 protected abstract boolean setAdd(CharSequence key); 63 64 protected abstract boolean setRemove(CharSequence key); 65 66 protected abstract long setCount(); 67 68 public long count() { 69 return setCount(); 70 } 71 72 public long pending() { 73 return 0; 75 } 76 77 public void setDestination(HasUriReceiver receiver) { 78 this.receiver = receiver; 79 } 80 81 protected void profileLog(String key) { 82 if (profileLog != null) { 83 profileLog.println(key); 84 } 85 } 86 87 public void add(String key, CandidateURI value) { 88 profileLog(key); 89 if (setAdd(key)) { 90 this.receiver.receive(value); 91 if (setCount() % 50000 == 0) { 92 LOGGER.log(Level.FINE, "count: " + setCount() + " totalDups: " 93 + duplicateCount + " recentDups: " 94 + (duplicateCount - duplicatesAtLastSample)); 95 duplicatesAtLastSample = duplicateCount; 96 } 97 } else { 98 duplicateCount++; 99 } 100 } 101 102 public void addNow(String key, CandidateURI value) { 103 add(key, value); 104 } 105 106 public void addForce(String key, CandidateURI value) { 107 profileLog(key); 108 setAdd(key); 109 this.receiver.receive(value); 110 } 111 112 public void note(String key) { 113 profileLog(key); 114 setAdd(key); 115 } 116 117 public void forget(String key, CandidateURI value) { 118 setRemove(key); 119 } 120 121 public long requestFlush() { 122 return 0; 124 } 125 126 public void close() { 127 if (profileLog != null) { 128 profileLog.close(); 129 } 130 } 131 132 public void setProfileLog(File logfile) { 133 try { 134 profileLog = new PrintWriter (new BufferedOutputStream ( 135 new FileOutputStream (logfile))); 136 } catch (FileNotFoundException e) { 137 throw new RuntimeException (e); 138 } 139 } 140 } 141 | Popular Tags |