1 26 27 package org.archive.util; 28 29 import java.io.Serializable ; 30 import java.security.SecureRandom ; 31 32 74 public class BloomFilter32bp2 implements Serializable , BloomFilter { 75 76 private static final long serialVersionUID = -2292902803681146635L; 77 78 79 final public static int NUMBER_OF_WEIGHTS = 2083; 81 final public long m; 82 83 final public long power; 85 final public int d; 86 87 final private int[] bits; 88 89 final private int[][] weight; 90 91 95 private int size; 96 97 98 private final static double NATURAL_LOG_OF_2 = Math.log( 2 ); 99 100 private final static boolean DEBUG = false; 101 102 108 public BloomFilter32bp2( final int n, final int d ) { 109 this.d = d; 110 long minBits = (long) ((long)n * (long)d / NATURAL_LOG_OF_2); 111 long pow = 0; 112 while((1L<<pow) < minBits) { 113 pow++; 114 } 115 this.power = pow; 116 this.m = 1L<<pow; 117 int len = (int) (m / 32); 118 if ( m > 1L<<32 ) { 119 throw new IllegalArgumentException ( "This filter would require " + m + " bits" ); 120 } 121 System.out.println("power "+power+" bits "+m+" len "+len); 122 123 bits = new int[ len ]; 124 125 if ( DEBUG ) System.err.println( "Number of bits: " + m ); 126 127 final SecureRandom random = new SecureRandom (new byte[] {19,96}); 131 weight = new int[ d ][]; 132 for( int i = 0; i < d; i++ ) { 133 weight[ i ] = new int[ NUMBER_OF_WEIGHTS ]; 134 for( int j = 0; j < NUMBER_OF_WEIGHTS; j++ ) 135 weight[ i ][ j ] = random.nextInt(); 136 } 137 } 138 139 143 144 public int size() { 145 return size; 146 } 147 148 155 private int hash( final CharSequence s, final int l, final int k ) { 156 final int[] w = weight[ k ]; 157 int h = 0, i = l; 158 while( i-- != 0 ) h ^= s.charAt( i ) * w[ i % NUMBER_OF_WEIGHTS ]; 159 return h >>> (32-power); 160 } 161 162 174 175 public boolean contains( final CharSequence s ) { 176 int i = d, l = s.length(); 177 while( i-- != 0 ) if ( ! getBit( hash( s, l, i ) ) ) return false; 178 return true; 179 } 180 181 186 187 public boolean add( final CharSequence s ) { 188 boolean result = false; 189 int i = d, l = s.length(); 190 int h; 191 while( i-- != 0 ) { 192 h = hash( s, l, i ); 193 if ( ! getBit( h ) ) result = true; 194 setBit( h ); 195 } 196 if ( result ) size++; 197 return result; 198 } 199 200 protected final static int ADDRESS_BITS_PER_UNIT = 5; protected final static int BIT_INDEX_MASK = 31; 203 214 protected boolean getBit(int bitIndex) { 215 return ((bits[(int)(bitIndex >>> ADDRESS_BITS_PER_UNIT)] & (1 << (bitIndex & BIT_INDEX_MASK))) != 0); 216 } 217 218 225 protected void setBit(int bitIndex) { 226 bits[(int)(bitIndex >>> ADDRESS_BITS_PER_UNIT)] |= 1 << (bitIndex & BIT_INDEX_MASK); 227 } 228 229 232 public long getSizeBytes() { 233 return bits.length*4; 234 } 235 } 236 | Popular Tags |