1 26 27 package org.archive.util; 28 29 import java.io.Serializable ; 30 import java.security.SecureRandom ; 31 32 71 public class BloomFilter32bit implements Serializable , BloomFilter { 72 73 private static final long serialVersionUID = -1567837798979475689L; 74 75 76 final public static int NUMBER_OF_WEIGHTS = 2083; 78 final public long m; 79 80 final public int d; 81 82 final private int[] bits; 83 84 final private int[][] weight; 85 86 90 private int size; 91 92 93 private final static double NATURAL_LOG_OF_2 = Math.log( 2 ); 94 95 private final static boolean DEBUG = false; 96 97 103 public BloomFilter32bit( final int n, final int d ) { 104 this.d = d; 105 int len = 106 (int)Math.ceil( ( (long)n * (long)d / NATURAL_LOG_OF_2 ) / 32 ); 107 this.m = len*32L; 108 if ( m >= 1L<<32 ) { 109 throw new IllegalArgumentException ( "This filter would require " + m + " bits" ); 110 } 111 bits = new int[ len ]; 112 113 if ( DEBUG ) System.err.println( "Number of bits: " + m ); 114 115 final SecureRandom random = new SecureRandom (new byte[] {19,96}); 119 weight = new int[ d ][]; 120 for( int i = 0; i < d; i++ ) { 121 weight[ i ] = new int[ NUMBER_OF_WEIGHTS ]; 122 for( int j = 0; j < NUMBER_OF_WEIGHTS; j++ ) 123 weight[ i ][ j ] = random.nextInt(); 124 } 125 } 126 127 131 132 public int size() { 133 return size; 134 } 135 136 143 private long hash( final CharSequence s, final int l, final int k ) { 144 final int[] w = weight[ k ]; 145 int h = 0, i = l; 146 while( i-- != 0 ) h ^= s.charAt( i ) * w[ i % NUMBER_OF_WEIGHTS ]; 147 return ((long)h-Integer.MIN_VALUE) % m; 148 } 149 150 162 163 public boolean contains( final CharSequence s ) { 164 int i = d, l = s.length(); 165 while( i-- != 0 ) if ( ! getBit( hash( s, l, i ) ) ) return false; 166 return true; 167 } 168 169 174 175 public boolean add( final CharSequence s ) { 176 boolean result = false; 177 int i = d, l = s.length(); 178 long h; 179 while( i-- != 0 ) { 180 h = hash( s, l, i ); 181 if ( ! getBit( h ) ) result = true; 182 setBit( h ); 183 } 184 if ( result ) size++; 185 return result; 186 } 187 188 protected final static long ADDRESS_BITS_PER_UNIT = 5; protected final static long BIT_INDEX_MASK = 31; 191 202 protected boolean getBit(long bitIndex) { 203 return ((bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] & (1 << (bitIndex & BIT_INDEX_MASK))) != 0); 204 } 205 206 213 protected void setBit(long bitIndex) { 214 bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] |= 1 << (bitIndex & BIT_INDEX_MASK); 215 } 216 217 220 public long getSizeBytes() { 221 return bits.length*4; 222 } 223 } 224 | Popular Tags |