1 26 27 package org.archive.util; 28 29 import java.io.Serializable ; 30 import java.security.SecureRandom ; 31 32 74 public class BloomFilter32bp2Split implements Serializable , BloomFilter { 75 76 private static final long serialVersionUID = -1504889954381695129L; 77 78 79 final public static int NUMBER_OF_WEIGHTS = 2083; 81 final public long m; 82 83 final public long power; 85 final public int d; 86 87 final private int[][] bits; 88 89 final private int aShift; 90 91 final private int bMask; 92 93 final private int[][] weight; 94 95 99 private int size; 100 101 102 private final static double NATURAL_LOG_OF_2 = Math.log( 2 ); 103 104 private final static boolean DEBUG = false; 105 106 112 public BloomFilter32bp2Split( final int n, final int d ) { 113 this.d = d; 114 long minBits = (long) ((long)n * (long)d / NATURAL_LOG_OF_2); 115 long pow = 0; 116 while((1L<<pow) < minBits) { 117 pow++; 118 } 119 this.power = pow; 120 this.m = 1L<<pow; 121 int len = (int) (m / 32); 122 if ( m > 1L<<32 ) { 123 throw new IllegalArgumentException ( "This filter would require " + m + " bits" ); 124 } 125 126 aShift = (int) (pow - ADDRESS_BITS_PER_UNIT - 8); 127 bMask = (1<<aShift) - 1; 128 bits = new int[256][ 1<<aShift ]; 129 130 System.out.println("power "+power+" bits "+m+" len "+len); 131 System.out.println("aShift "+aShift+" bMask "+bMask); 132 133 if ( DEBUG ) System.err.println( "Number of bits: " + m ); 134 135 final SecureRandom random = new SecureRandom (new byte[] {19,96}); 139 weight = new int[ d ][]; 140 for( int i = 0; i < d; i++ ) { 141 weight[ i ] = new int[ NUMBER_OF_WEIGHTS ]; 142 for( int j = 0; j < NUMBER_OF_WEIGHTS; j++ ) 143 weight[ i ][ j ] = random.nextInt(); 144 } 145 } 146 147 151 152 public int size() { 153 return size; 154 } 155 156 163 private int hash( final CharSequence s, final int l, final int k ) { 164 final int[] w = weight[ k ]; 165 int h = 0, i = l; 166 while( i-- != 0 ) h ^= s.charAt( i ) * w[ i % NUMBER_OF_WEIGHTS ]; 167 return h >>> (32-power); 168 } 169 170 182 183 public boolean contains( final CharSequence s ) { 184 int i = d, l = s.length(); 185 while( i-- != 0 ) if ( ! getBit( hash( s, l, i ) ) ) return false; 186 return true; 187 } 188 189 194 195 public boolean add( final CharSequence s ) { 196 boolean result = false; 197 int i = d, l = s.length(); 198 int h; 199 while( i-- != 0 ) { 200 h = hash( s, l, i ); 201 if ( ! setGetBit( h ) ) result = true; 202 } 203 if ( result ) size++; 204 return result; 205 } 206 207 protected final static int ADDRESS_BITS_PER_UNIT = 5; protected final static int BIT_INDEX_MASK = 31; 210 221 protected boolean getBit(int bitIndex) { 222 int intIndex = (int)(bitIndex >>> ADDRESS_BITS_PER_UNIT); 223 return ((bits[intIndex>>>aShift][intIndex&bMask] & (1 << (bitIndex & BIT_INDEX_MASK))) != 0); 224 } 225 226 233 protected void setBit(int bitIndex) { 234 int intIndex = (int)(bitIndex >>> ADDRESS_BITS_PER_UNIT); 235 bits[intIndex>>>aShift][intIndex&bMask] |= 1 << (bitIndex & BIT_INDEX_MASK); 236 } 237 238 246 protected boolean setGetBit(int bitIndex) { 247 int intIndex = (int)(bitIndex >>> ADDRESS_BITS_PER_UNIT); 248 int a = intIndex>>>aShift; 249 int b = intIndex&bMask; 250 int mask = 1 << (bitIndex & BIT_INDEX_MASK); 251 boolean ret = ((bits[a][b] & (mask)) != 0); 252 bits[a][b] |= mask; 253 return ret; 254 } 255 256 259 public long getSizeBytes() { 260 return bits.length*bits[0].length*4; 261 } 262 } 263 | Popular Tags |