1 26 27 package org.archive.util; 28 29 import java.io.Serializable ; 30 import java.security.SecureRandom ; 31 32 71 public class BloomFilter64bit implements Serializable , BloomFilter { 72 73 private static final long serialVersionUID = 2317000663009608403L; 74 75 76 final public static int NUMBER_OF_WEIGHTS = 2083; 78 final public long m; 79 80 final public int d; 81 82 final private long[] bits; 83 84 final private long[][] weight; 85 86 90 private int size; 91 92 93 private final static double NATURAL_LOG_OF_2 = Math.log( 2 ); 94 95 private final static boolean DEBUG = false; 96 97 103 public BloomFilter64bit( final int n, final int d ) { 104 this.d = d; 105 int len = (int)Math.ceil( ( (long)n * (long)d / NATURAL_LOG_OF_2 ) / 64L ); 106 if ( len/64 > Integer.MAX_VALUE ) throw new IllegalArgumentException ( "This filter would require " + len * 64L + " bits" ); 107 bits = new long[ len ]; 108 m = bits.length * 64; 109 110 if ( DEBUG ) System.err.println( "Number of bits: " + m ); 111 112 final SecureRandom random = new SecureRandom (new byte[] {19,96}); 116 weight = new long[ d ][]; 117 for( int i = 0; i < d; i++ ) { 118 weight[ i ] = new long[ NUMBER_OF_WEIGHTS ]; 119 for( int j = 0; j < NUMBER_OF_WEIGHTS; j++ ) 120 weight[ i ][ j ] = random.nextLong(); 121 } 122 } 123 124 128 129 public int size() { 130 return size; 131 } 132 133 140 141 private long hash( final CharSequence s, final int l, final int k ) { 142 final long[] w = weight[ k ]; 143 long h = 0; 144 int i = l; 145 while( i-- != 0 ) h ^= s.charAt( i ) * w[ i % NUMBER_OF_WEIGHTS ]; 146 return ( h & 0x7FFFFFFFFFFFFFFFL ) % m; 147 } 148 149 161 162 public boolean contains( final CharSequence s ) { 163 int i = d, l = s.length(); 164 while( i-- != 0 ) if ( ! getBit( hash( s, l, i ) ) ) return false; 165 return true; 166 } 167 168 173 174 public boolean add( final CharSequence s ) { 175 boolean result = false; 176 int i = d, l = s.length(); 177 long h; 178 while( i-- != 0 ) { 179 h = hash( s, l, i ); 180 if ( ! getBit( h ) ) result = true; 181 setBit( h ); 182 } 183 if ( result ) size++; 184 return result; 185 } 186 187 protected final static long ADDRESS_BITS_PER_UNIT = 6; protected final static long BIT_INDEX_MASK = 63; 190 201 protected boolean getBit(long bitIndex) { 202 return ((bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] & (1L << (bitIndex & BIT_INDEX_MASK))) != 0); 203 } 204 205 212 protected void setBit( long bitIndex) { 213 bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] |= 1L << (bitIndex & BIT_INDEX_MASK); 214 } 215 216 219 public long getSizeBytes() { 220 return bits.length*8; 221 } 222 } 223 | Popular Tags |