KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > gov > nasa > jpf > util > Md5Set


1 //
2
// Copyright (C) 2005 United States Government as represented by the
3
// Administrator of the National Aeronautics and Space Administration
4
// (NASA). All Rights Reserved.
5
//
6
// This software is distributed under the NASA Open Source Agreement
7
// (NOSA), version 1.3. The NOSA has been approved by the Open Source
8
// Initiative. See the file NOSA-1.3-JPF at the top of the distribution
9
// directory tree for the complete NOSA document.
10
//
11
// THE SUBJECT SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY OF ANY
12
// KIND, EITHER EXPRESSED, IMPLIED, OR STATUTORY, INCLUDING, BUT NOT
13
// LIMITED TO, ANY WARRANTY THAT THE SUBJECT SOFTWARE WILL CONFORM TO
14
// SPECIFICATIONS, ANY IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
15
// A PARTICULAR PURPOSE, OR FREEDOM FROM INFRINGEMENT, ANY WARRANTY THAT
16
// THE SUBJECT SOFTWARE WILL BE ERROR FREE, OR ANY WARRANTY THAT
17
// DOCUMENTATION, IF PROVIDED, WILL CONFORM TO THE SUBJECT SOFTWARE.
18
//
19

20 package gov.nasa.jpf.util;
21
22 /**
23  * A specialized set that can store Md5 checksums efficiently. It will also
24  * assign each element of the set a unique and sequential integer Id (0 to the
25  * size of the set - 1).
26  *
27  * This implementation does not allocate an individual object for each element
28  * of the set.
29  *
30  * We balance the tree using red-black balancing after each add operation.
31  * The tree will stay roughly balanced.
32  *
33  * @author Owen O'Malley
34  */

35 public class Md5Set
36 {
37   public static final int NULL = -1;
38   private static final int DEFAULT_INITIAL_CAPACITY = 10000;
39
40   // Various values controlling the offset of the data within the array.
41
private static final int VALUE_OFFSET = 0;
42   private static final int VALUE_SIZE = 4;
43   private static final int LEFT_OFFSET = VALUE_OFFSET + VALUE_SIZE;
44   private static final int RIGHT_OFFSET = LEFT_OFFSET + 1;
45   private static final int ELEMENT_SIZE = RIGHT_OFFSET + 1;
46
47   private static final int BYTES_PER_INT = 4;
48
49   private int size = 0;
50   private DynamicIntArray data;
51   private int root = 0;
52
53   /**
54    * Create a set with a default initial capacity.
55    */

56   public Md5Set() {
57     data = new DynamicIntArray(DEFAULT_INITIAL_CAPACITY * ELEMENT_SIZE);
58   }
59
60   /**
61    * Create a set with the given initial capacity.
62    */

63   public Md5Set(int initial_capacity) {
64     data = new DynamicIntArray(initial_capacity * ELEMENT_SIZE);
65   }
66
67   /**
68    * Insert a new node into the data array, growing the array as necessary.
69    *
70    * @return Returns the position of the new node.
71    */

72   private int insert(int[] val, int left, int right) {
73     int position = size;
74     size += 1;
75     int offset = position * ELEMENT_SIZE + VALUE_OFFSET;
76     data.grow((position + 1) * ELEMENT_SIZE);
77     for(int i=0; i < VALUE_SIZE; ++i) {
78       data.set(offset + i, val[i]);
79     }
80     setLeft(position, left);
81     setRight(position, right);
82     setRed(position, true);
83     return position;
84   }
85
86   /**
87    * Convert bytes into a single integer.
88    */

89   private int formInt(byte[] val, int offset) {
90     int result = 0;
91     for(int i=0; i < VALUE_SIZE; ++i) {
92       result = (result << 8) | (val[offset + i] & 255);
93     }
94     return result;
95   }
96
97   /**
98    * I keep this array around so that I can reuse it with each addition.
99    * The only access should be through convertValueToInts.
100    */

101   private int[] new_val = new int[VALUE_SIZE];
102
103   /**
104    * Convert the array of bytes to an array of ints.
105    */

106   private int[] convertValueToInts(byte[] val) {
107     assert val.length == VALUE_SIZE * BYTES_PER_INT;
108     for(int i=0; i < VALUE_SIZE; ++i) {
109       new_val[i] = formInt(val, i * BYTES_PER_INT);
110     }
111     return new_val;
112   }
113
114   /**
115    * Compare the value at the given position to the new value.
116    * @return 0 if the values are the same, -1 if the new value is smaller and
117    * 1 if the new value is larger.
118    */

119   private int compareValue(int[] val, int position) {
120     int offset = position * ELEMENT_SIZE + VALUE_OFFSET;
121     for(int i=0; i < VALUE_SIZE; ++i) {
122       int array_val = data.get(offset + i);
123       if (array_val > val[i]) {
124         return -1;
125       } else if (array_val < val[i]) {
126         return 1;
127       }
128     }
129     return 0;
130   }
131
132   /**
133    * Is the given node red as opposed to black? To prevent having an extra word
134    * in the data array, we just the low bit on the left child index.
135    */

136   private boolean isRed(int position) {
137     if (position == NULL) {
138       return false;
139     }
140     return (data.get(position * ELEMENT_SIZE + LEFT_OFFSET) & 1) == 1;
141   }
142
143   /**
144    * Set the red bit true or false.
145    */

146   private void setRed(int position, boolean is_red) {
147     int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
148     if (is_red) {
149       data.set(offset, data.get(offset) | 1);
150     } else {
151       data.set(offset, data.get(offset) & ~1);
152     }
153   }
154
155   /**
156    * Get the left field of the given position.
157    */

158   private int getLeft(int position) {
159     return data.get(position * ELEMENT_SIZE + LEFT_OFFSET) >> 1;
160   }
161
162   /**
163    * Get the right field of the given position.
164    */

165   private int getRight(int position) {
166     return data.get(position * ELEMENT_SIZE + RIGHT_OFFSET);
167   }
168
169   /**
170    * Set the left field of the given position.
171    * Note that we are storing the node color in the low bit of the left pointer.
172    */

173   private void setLeft(int position, int left) {
174     int offset = position * ELEMENT_SIZE + LEFT_OFFSET;
175     data.set(offset, (left << 1) | (data.get(offset) & 1));
176   }
177
178   /**
179    * Set the right field of the given position.
180    */

181   private void setRight(int position, int right) {
182     data.set(position * ELEMENT_SIZE + RIGHT_OFFSET, right);
183   }
184
185   /**
186    * Rebalance the red-black aspect of the tree to restore the invariants.
187    * Invariants:
188    * 1. If a node is red, both of its children are black.
189    * 2. Each child of a node has the same black height (the number of black
190    * nodes between it and the leaves of the tree).
191    *
192    * Inserted nodes are at the leaves and are red, therefore there is at most a
193    * violation of rule 1 at the node we just put in. Instead of always keeping
194    * the parents, this routine passing down the context.
195    *
196    * The fix is broken down into 6 cases (1.{1,2,3} and 2.{1,2,3} that are
197    * left-right mirror images of each other). See Algorighms by Cormen,
198    * Leiserson, and Rivest for the explaination of the subcases.
199    *
200    * @param goal The value of the node that we added.
201    * @param node The node that we are fixing right now.
202    * @param parent Nodes' parent
203    * @param grandparent Parent's parent
204    * @param great_grandparent Grandparent's parent
205    * @return Does parent also need to be checked and/or fixed?
206    */

207   private boolean fixAfterInsert(int[] goal, int node, int parent,
208                                  int grandparent, int great_grandparent) {
209     int compare = compareValue(goal, node);
210     boolean keep_going = true;
211
212     // Recurse down to find the added node. We need to recurse down to
213
// build the context that lets us fix the tree.
214
if (compare < 0) {
215       keep_going = fixAfterInsert(goal, getLeft(node), node, parent,
216                                   grandparent);
217     } else if (compare > 0) {
218       keep_going = fixAfterInsert(goal, getRight(node), node, parent,
219                                   grandparent);
220     }
221
222     // we don't need to fix the root (because it is always set to black)
223
if (node == root) {
224       keep_going = false;
225     }
226
227     // Do we need to fix this node? Only if there are two reds right under each
228
// other.
229
if (keep_going && isRed(node) && isRed(parent)) {
230       if (parent == getLeft(grandparent)) {
231         int uncle = getRight(grandparent);
232         if (isRed(uncle)) {
233           // case 1.1
234
setRed(parent, false);
235           setRed(uncle, false);
236           setRed(grandparent, true);
237           keep_going = true;
238         } else {
239           if (node == getRight(parent)) {
240             // case 1.2
241
// swap node and parent
242
int tmp = node;
243             node = parent;
244             parent = tmp;
245             // left-rotate on node
246
if (node == getLeft(grandparent)) {
247               setLeft(grandparent, parent);
248             } else {
249               setRight(grandparent, parent);
250             }
251             setRight(node, getLeft(parent));
252             setLeft(parent, node);
253           }
254
255           // case 1.2 and 1.3
256
setRed(parent, false);
257           setRed(grandparent, true);
258           // right-rotate on grandparent
259
if (great_grandparent == NULL) {
260             root = parent;
261           } else if (getLeft(great_grandparent) == grandparent) {
262             setLeft(great_grandparent, parent);
263           } else {
264             setRight(great_grandparent, parent);
265           }
266           setLeft(grandparent, getRight(parent));
267           setRight(parent, grandparent);
268           keep_going = false;
269         }
270       } else {
271         int uncle = getLeft(grandparent);
272         if (isRed(uncle)) {
273           // case 2.1
274
setRed(parent, false);
275           setRed(uncle, false);
276           setRed(grandparent, true);
277           keep_going = true;
278         } else {
279           if (node == getLeft(parent)) {
280             // case 2.2
281
// swap node and parent
282
int tmp = node;
283             node = parent;
284             parent = tmp;
285             // right-rotate on node
286
if (node == getLeft(grandparent)) {
287               setLeft(grandparent, parent);
288             } else {
289               setRight(grandparent, parent);
290             }
291             setLeft(node, getRight(parent));
292             setRight(parent, node);
293           }
294           // case 2.2 and 2.3
295
setRed(parent, false);
296           setRed(grandparent, true);
297           // left-rotate on grandparent
298
if (great_grandparent == NULL) {
299             root = parent;
300           } else if (getRight(great_grandparent) == grandparent) {
301             setRight(great_grandparent, parent);
302           } else {
303             setLeft(great_grandparent, parent);
304           }
305           setRight(grandparent, getLeft(parent));
306           setLeft(parent, grandparent);
307           keep_going = false;
308         }
309       }
310     }
311     return keep_going;
312   }
313
314   /**
315    * Add the given value to the set, if it is not already present.
316    *
317    * @return true if the value was not already in the set.
318    */

319   public boolean add(byte[] val) {
320     int[] int_val = convertValueToInts(val);
321     if (size == 0) {
322       insert(int_val, NULL, NULL);
323       setRed(root, false);
324       return true;
325     }
326     int current = root;
327     int prev = NULL;
328     boolean on_left = true;
329     while (current != NULL) {
330       int compare = compareValue(int_val, current);
331       if (compare == 0) {
332         return false;
333       }
334       prev = current;
335       if (compare < 0) {
336         current = getLeft(current);
337         on_left = true;
338       } else {
339         current = getRight(current);
340         on_left = false;
341       }
342     }
343     int position = insert(int_val, NULL, NULL);
344     if (on_left) {
345       setLeft(prev, position);
346     } else {
347       setRight(prev, position);
348     }
349     fixAfterInsert(int_val, root, NULL, NULL, NULL);
350     setRed(root, false);
351     return true;
352   }
353
354   public int getId(byte[] val) {
355     if (size == 0) {
356       return NULL;
357     }
358     int[] int_val = convertValueToInts(val);
359
360     // Check the one that was just added, because most of the time that will be
361
// the one that they want.
362
if (compareValue(int_val, size - 1) == 0) {
363       return size - 1;
364     }
365
366     // Check the "hard" case by traversing the tree.
367
int current = root;
368     while (current != NULL) {
369       int compare = compareValue(int_val, current);
370       if (compare < 0) {
371         current = getLeft(current);
372       } else if (compare > 0) {
373         current = getRight(current);
374       } else {
375         return current;
376       }
377     }
378     return NULL;
379   }
380
381   /**
382    * Get the number of elements in the set.
383    */

384   public int size() {
385     return size;
386   }
387
388   /**
389    * A counter for testing that counts how many times checkSubtree was called.
390    */

391   private int check_count = 0;
392
393   /**
394    * Checks the red-black tree rules to make sure that we have correctly built
395    * a valid tree.
396    *
397    * Properties:
398    * 1. Red nodes must have black children
399    * 2. Each node must have the same black height on both sides.
400    *
401    * @param node The id of the root of the subtree to check for the red-black
402    * tree properties.
403    * @return The black-height of the subtree.
404    */

405   private int checkSubtree(int node) {
406     if (node == NULL) {
407       return 1;
408     }
409     check_count += 1;
410     boolean is_red = isRed(node);
411     int left = getLeft(node);
412     int right = getRight(node);
413     if (is_red) {
414       assert !isRed(left) : "Left node of " + node + " is " + left +
415         " and both are red.";
416       assert !isRed(right) : "Right node of " + node + " is " + right +
417         " and both are red.";
418     }
419     int left_depth = checkSubtree(left);
420     int right_depth = checkSubtree(right);
421     assert left_depth == right_depth: "Lopsided tree at node " + node +
422       " with depths " + left_depth + " and " + right_depth;
423     if (is_red) {
424       return left_depth;
425     } else {
426       return left_depth + 1;
427     }
428   }
429
430   /**
431    * Checks the validity of the entire tree. Also ensures that the number of
432    * nodes visited is the same as the size of the set.
433    */

434   private void checkTree() {
435     check_count = 0;
436     checkSubtree(root);
437     assert check_count == size : "Traverse count " + check_count +
438       " disagrees with size " + size;
439   }
440
441   /**
442    * Build up a test md5 value from a string. The data is filled with the
443    * character values and then 0 filled.
444    */

445   private static void buildTestData(byte[] data, String JavaDoc val) {
446     int val_len = val.length();
447     for(int i = 0; i < 16; i++) {
448       if (i < val_len) {
449         data[i] = (byte) val.charAt(i);
450       } else {
451         data[i] = 0;
452       }
453     }
454   }
455
456   /**
457    * A test driver that takes each command line argument and puts it into the
458    * set. The checkTree function, which checks for a valid red-black tree, is
459    * called after each addition.
460    */

461   public static void main(String JavaDoc[] arg) {
462     Md5Set my_set = new Md5Set(2);
463     byte[] bytes = new byte[16];
464     for(int i = 0; i < arg.length ; ++i) {
465       buildTestData(bytes, arg[i]);
466       System.out.println("add(" + arg[i] + ")= " + my_set.add(bytes));
467       my_set.checkTree();
468     }
469     System.out.println("size = " + my_set.size());
470     for(int i = 0; i < arg.length ; ++i) {
471       buildTestData(bytes, arg[i]);
472       int id = my_set.getId(bytes);
473       assert id != -1 : "Can not find element " + arg[i];
474       System.out.println("getId(" + arg[i] + ")= " + id);
475     }
476   }
477 }
478
479
Popular Tags