1 26 27 package org.archive.util; 28 29 import java.io.Serializable ; 30 import java.security.SecureRandom ; 31 32 71 public class BloomFilter32bitSplit implements Serializable , BloomFilter { 72 73 private static final long serialVersionUID = -164106965277863971L; 74 75 76 final public static int NUMBER_OF_WEIGHTS = 2083; 78 final public long m; 79 80 final public int d; 81 82 final private int[][] bits; 84 85 final private int[][] weight; 86 87 91 private int size; 92 93 94 private final static double NATURAL_LOG_OF_2 = Math.log( 2 ); 95 96 97 private final static int ONE_MB_INTS = 1 << 18; 99 private final static boolean DEBUG = false; 100 101 107 public BloomFilter32bitSplit( final int n, final int d ) { 108 this.d = d; 109 int len = 110 (int)Math.ceil( ( (long)n * (long)d / NATURAL_LOG_OF_2 ) / 32 ); 111 len = ((len / ONE_MB_INTS)+1)*ONE_MB_INTS; 113 this.m = len*32L; 114 if ( m >= 1L<<32 ) { 115 throw new IllegalArgumentException ( "This filter would require " + m + " bits" ); 116 } 117 bits = new int[ len/ONE_MB_INTS ][ONE_MB_INTS]; 119 120 if ( DEBUG ) System.err.println( "Number of bits: " + m ); 121 122 final SecureRandom random = new SecureRandom (new byte[] {19,96}); 126 weight = new int[ d ][]; 127 for( int i = 0; i < d; i++ ) { 128 weight[ i ] = new int[ NUMBER_OF_WEIGHTS ]; 129 for( int j = 0; j < NUMBER_OF_WEIGHTS; j++ ) 130 weight[ i ][ j ] = random.nextInt(); 131 } 132 } 133 134 138 139 public int size() { 140 return size; 141 } 142 143 150 private long hash( final CharSequence s, final int l, final int k ) { 151 final int[] w = weight[ k ]; 152 int h = 0, i = l; 153 while( i-- != 0 ) h ^= s.charAt( i ) * w[ i % NUMBER_OF_WEIGHTS ]; 154 return ((long)h-Integer.MIN_VALUE) % m; 155 } 156 157 169 170 public boolean contains( final CharSequence s ) { 171 int i = d, l = s.length(); 172 while( i-- != 0 ) if ( ! getBit( hash( s, l, i ) ) ) return false; 173 return true; 174 } 175 176 181 182 public boolean add( final CharSequence s ) { 183 boolean result = false; 184 int i = d, l = s.length(); 185 long h; 186 while( i-- != 0 ) { 187 h = hash( s, l, i ); 188 if ( ! setGetBit( h ) ) result = true; 189 } 190 if ( result ) size++; 191 return result; 192 } 193 194 protected final static long ADDRESS_BITS_PER_UNIT = 5; protected final static long BIT_INDEX_MASK = 31; 197 208 protected boolean getBit(long bitIndex) { 209 int intIndex = (int) (bitIndex >>> ADDRESS_BITS_PER_UNIT); 210 return ((bits[intIndex / ONE_MB_INTS][intIndex % ONE_MB_INTS] & (1 << (bitIndex & BIT_INDEX_MASK))) != 0); 211 } 212 213 220 protected void setBit(long bitIndex) { 221 int intIndex = (int) (bitIndex >>> ADDRESS_BITS_PER_UNIT); 222 bits[intIndex / ONE_MB_INTS][intIndex % ONE_MB_INTS] |= 1 << (bitIndex & BIT_INDEX_MASK); 223 } 224 225 233 protected boolean setGetBit(long bitIndex) { 234 int intIndex = (int) (bitIndex >>> ADDRESS_BITS_PER_UNIT); 235 int a = intIndex / ONE_MB_INTS; 236 int b = intIndex % ONE_MB_INTS; 237 int mask = 1 << (bitIndex & BIT_INDEX_MASK); 238 boolean ret = ((bits[a][b] & (mask)) != 0); 239 bits[a][b] |= mask; 240 return ret; 241 } 242 243 246 public long getSizeBytes() { 247 return bits.length*bits[0].length*4; 248 } 249 } 250 | Popular Tags |