KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > util > AbstractLongFPSet


1 /* AbstractLongFPSet
2  *
3  * $Id: AbstractLongFPSet.java,v 1.11 2005/05/06 02:48:59 stack-sf Exp $
4  *
5  * Created on Oct 20, 2003
6  *
7  * Copyright (C) 2003 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.util;
26
27 import java.io.Serializable JavaDoc;
28 import java.util.logging.Logger JavaDoc;
29
30 import org.archive.util.fingerprint.LongFPSet;
31
32 /**
33  * Shell of functionality for a Set of primitive long fingerprints, held
34  * in an array of possibly-empty slots.
35  *
36  * The implementation of that holding array is delegated to subclasses.
37  *
38  * <p>Capacity is always a power of 2.
39  *
40  * <p>Fingerprints are already assumed to be well-distributed, so the
41  * hashed position for a value is just its high-order bits.
42  *
43  * @author gojomo
44  * @version $Date: 2005/05/06 02:48:59 $, $Revision: 1.11 $
45  */

46 public abstract class AbstractLongFPSet implements LongFPSet, Serializable JavaDoc {
47     private static Logger JavaDoc logger =
48         Logger.getLogger("org.archive.util.AbstractLongFPSet");
49
50     /**
51      * A constant used to indicate that a slot in the set storage is empty.
52      * A zero or positive value means slot is filled
53      */

54     protected static byte EMPTY = -1;
55
56     /** the capacity of this set, specified as the exponent of a power of 2 */
57     protected int capacityPowerOfTwo;
58
59     /** The load factor, as a fraction. This gives the amount of free space
60      * to keep in the Set. */

61     protected float loadFactor;
62
63     /** The current number of elements in the set */
64     protected long count;
65
66     /**
67      * To support serialization
68      * TODO: verify needed?
69      */

70     public AbstractLongFPSet() {
71         super();
72     }
73     
74     /**
75      * Create a new AbstractLongFPSet with a given capacity and load Factor
76      *
77      * @param capacityPowerOfTwo The capacity as the exponent of a power of 2.
78      * e.g if the capacity is <code>4</code> this means <code>2^^4</code>
79      * entries
80      * @param loadFactor The load factor as a fraction. This gives the amount
81      * of free space to keep in the Set.
82      */

83     public AbstractLongFPSet(final int capacityPowerOfTwo, float loadFactor) {
84         this.capacityPowerOfTwo = capacityPowerOfTwo;
85         this.loadFactor = loadFactor;
86         this.count = 0;
87     }
88
89     /**
90      * Does this set contain the given value?
91      *
92      * @see org.archive.util.fingerprint.LongFPSet#contains(long)
93      */

94     public boolean contains(long val) {
95         long i = indexFor(val);
96         if (slotHasData(i)) {
97             noteAccess(i);
98             return true;
99         }
100         return false;
101     }
102
103     /**
104      * Check the state of a slot in the storage.
105      *
106      * @param i the index of the slot to check
107      * @return -1 if slot is filled; nonegative if full.
108      */

109     protected abstract int getSlotState(long i);
110
111     /**
112      * Note access (hook for subclass cache-replacement strategies)
113      *
114      * @param index The index of the slot to check.
115      */

116     private void noteAccess(long index) {
117         // by default do nothing
118
// cache subclasses may use to update access counts, etc.
119
}
120
121     /**
122      * Return the number of entries in this set.
123      *
124      * @see org.archive.util.fingerprint.LongFPSet#count()
125      */

126     public long count() {
127         return count;
128     }
129
130     /**
131      * Add the given value to this set
132      *
133      * @see org.archive.util.fingerprint.LongFPSet#add(long)
134      */

135     public boolean add(long val) {
136         logger.finest("Adding " + val);
137         long i = indexFor(val);
138         if (slotHasData(i)) {
139             // positive index indicates already in set
140
return false;
141         }
142         // we have a possible slot now, which is encoded as a negative number
143

144         // check for space, and grow if needed
145
if ((count + 1) > (loadFactor * (1 << capacityPowerOfTwo))) {
146             makeSpace();
147             // find new i
148
i = indexFor(val);
149             assert i < 0 : "slot should be empty";
150         }
151
152         i = asDataSlot(i); // convert to positive index
153
setAt(i, val);
154         count++;
155         noteAccess(i);
156         return true;
157     }
158
159     /**
160      * Make additional space to keep the load under the target
161      * loadFactor level.
162      *
163      * Subclasses may grow or discard entries to satisfy.
164      */

165     protected abstract void makeSpace();
166
167     /**
168      * Set the stored value at the given slot.
169      *
170      * @param i the slot index
171      * @param l the value to set
172      */

173     protected abstract void setAt(long i, long l);
174
175     /**
176      * Get the stored value at the given slot.
177      *
178      * @param i the slot index
179      * @return The stored value at the given slot.
180      */

181     protected abstract long getAt(long i);
182
183     /**
184      * Given a value, check the store for its existence. If it exists, it
185      * will return the index where the value resides. Otherwise it return
186      * an encoded index, which is a possible storage location for the value.
187      *
188      * <p>Note, if we have a loading factor less than 1.0, there should always
189      * be an empty location where we can store the value
190      *
191      * @param val the fingerprint value to check for
192      * @return The (positive) index where the value already resides,
193      * or an empty index where it could be inserted (encoded as a
194      * negative number).
195      */

196     private long indexFor(long val) {
197         long candidateIndex = startIndexFor(val);
198         while (true) {
199             if (getSlotState(candidateIndex) < 0) {
200                 // slot empty; return negative number encoding index
201
return asEmptySlot(candidateIndex);
202             }
203             if (getAt(candidateIndex) == val) {
204                 // already present; return positive index
205
return candidateIndex;
206             }
207             candidateIndex++;
208             if (candidateIndex == 1 << capacityPowerOfTwo) {
209                 candidateIndex = 0; // wraparound
210
}
211         }
212     }
213
214     /**
215      * Return the recommended storage index for the given value.
216      * Assumes values are already well-distributed; merely uses
217      * high-order bits.
218      *
219      * @param val
220      * @return The recommended storage index for the given value.
221      */

222     private long startIndexFor(long val) {
223         return (val >>> (64 - capacityPowerOfTwo));
224     }
225
226     public boolean remove(long l) {
227         long i = indexFor(l);
228         if (!slotHasData(i)) {
229             // not present, not changed
230
return false;
231         }
232         removeAt(i);
233         return true;
234     }
235
236     /**
237      * Remove the value at the given index, relocating its
238      * successors as necessary.
239      *
240      * @param index
241      */

242     protected void removeAt(long index) {
243         count--;
244         clearAt(index);
245         long probeIndex = index + 1;
246         while (true) {
247             if (probeIndex == 1 << capacityPowerOfTwo) {
248                 probeIndex = 0; //wraparound
249
}
250             if (getSlotState(probeIndex) < 0) {
251                 // vacant
252
break;
253             }
254             long val = getAt(probeIndex);
255             long newIndex = indexFor(val);
256             if (newIndex != probeIndex) {
257                 // value must shift down
258
newIndex = asDataSlot(newIndex); // positivize
259
relocate(val, probeIndex, newIndex);
260             }
261             probeIndex++;
262         }
263     }
264
265     protected abstract void clearAt(long index);
266
267     protected abstract void relocate(long value, long fromIndex, long toIndex);
268
269     /**
270      * Low-cost, non-definitive (except when true) contains
271      * test. Default answer of false is acceptable.
272      *
273      * @see org.archive.util.fingerprint.LongFPSet#quickContains(long)
274      */

275     public boolean quickContains(long fp) {
276         return false;
277     }
278
279     /**
280      * given a slot index, which could or could not be empty, return it as
281      * a slot index indicating an non-empty slot
282      *
283      * @param index the slot index to convert
284      * @return the index, converted to represent an slot with data
285      */

286     private long asDataSlot(final long index) {
287         if (slotHasData(index)) { // slot already has data
288
return index;
289         }
290         return - (index + 1);
291     }
292
293     /**
294      * Given a slot index, which could or could not be empty, return it as
295      * a slot index indicating an empty slot
296      * @param index the slot index to convert
297      * @return the index, converted to represent an empty slot
298      */

299     private long asEmptySlot(final long index) {
300         if (!slotHasData(index)) { // already empty slot
301
return index;
302         }
303         return -index - 1;
304     }
305
306     /**
307      * Does this index represent a slot with data?
308      *
309      * @param index the index to check
310      * @return <code>true</code> if the slot has data
311      */

312     private boolean slotHasData(final long index) {
313         return index >= 0;
314     }
315 }
Popular Tags