KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > util > SetBasedUriUniqFilter


1 /* UriUniqFilterImpl
2 *
3 * $Id: SetBasedUriUniqFilter.java,v 1.5 2005/12/16 03:10:54 gojomo Exp $
4 *
5 * Created on Sep 29, 2005
6 *
7 * Copyright (C) 2005 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.crawler.util;
26
27 import java.io.BufferedOutputStream JavaDoc;
28 import java.io.File JavaDoc;
29 import java.io.FileNotFoundException JavaDoc;
30 import java.io.FileOutputStream JavaDoc;
31 import java.io.PrintWriter JavaDoc;
32 import java.util.logging.Level JavaDoc;
33 import java.util.logging.Logger JavaDoc;
34
35 import org.archive.crawler.datamodel.CandidateURI;
36 import org.archive.crawler.datamodel.UriUniqFilter;
37
38 /**
39  * UriUniqFilter based on an underlying UriSet (essentially a Set).
40  *
41  * @author gojomo
42  */

43 public abstract class SetBasedUriUniqFilter implements UriUniqFilter {
44     private static Logger JavaDoc LOGGER =
45         Logger.getLogger(SetBasedUriUniqFilter.class.getName());
46
47     protected HasUriReceiver receiver;
48     protected PrintWriter JavaDoc profileLog;
49     protected long duplicateCount = 0;
50     protected long duplicatesAtLastSample = 0;
51     
52     public SetBasedUriUniqFilter() {
53         super();
54         String JavaDoc profileLogFile =
55             System.getProperty(SetBasedUriUniqFilter.class.getName()
56                 + ".profileLogFile");
57         if (profileLogFile != null) {
58             setProfileLog(new File JavaDoc(profileLogFile));
59         }
60     }
61     
62     protected abstract boolean setAdd(CharSequence JavaDoc key);
63
64     protected abstract boolean setRemove(CharSequence JavaDoc key);
65
66     protected abstract long setCount();
67     
68     public long count() {
69         return setCount();
70     }
71
72     public long pending() {
73         // no items pile up in this implementation
74
return 0;
75     }
76
77     public void setDestination(HasUriReceiver receiver) {
78         this.receiver = receiver;
79     }
80
81     protected void profileLog(String JavaDoc key) {
82         if (profileLog != null) {
83             profileLog.println(key);
84         }
85     }
86     
87     public void add(String JavaDoc key, CandidateURI value) {
88         profileLog(key);
89         if (setAdd(key)) {
90             this.receiver.receive(value);
91             if (setCount() % 50000 == 0) {
92                 LOGGER.log(Level.FINE, "count: " + setCount() + " totalDups: "
93                         + duplicateCount + " recentDups: "
94                         + (duplicateCount - duplicatesAtLastSample));
95                 duplicatesAtLastSample = duplicateCount;
96             }
97         } else {
98             duplicateCount++;
99         }
100     }
101
102     public void addNow(String JavaDoc key, CandidateURI value) {
103         add(key, value);
104     }
105     
106     public void addForce(String JavaDoc key, CandidateURI value) {
107         profileLog(key);
108         setAdd(key);
109         this.receiver.receive(value);
110     }
111
112     public void note(String JavaDoc key) {
113         profileLog(key);
114         setAdd(key);
115     }
116
117     public void forget(String JavaDoc key, CandidateURI value) {
118         setRemove(key);
119     }
120
121     public long requestFlush() {
122         // unnecessary; all actions with set-based uniqfilter are immediate
123
return 0;
124     }
125
126     public void close() {
127         if (profileLog != null) {
128             profileLog.close();
129         }
130     }
131
132     public void setProfileLog(File JavaDoc logfile) {
133         try {
134             profileLog = new PrintWriter JavaDoc(new BufferedOutputStream JavaDoc(
135                     new FileOutputStream JavaDoc(logfile)));
136         } catch (FileNotFoundException JavaDoc e) {
137             throw new RuntimeException JavaDoc(e);
138         }
139     }
140 }
141
Popular Tags