KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > mckoi > database > RIDList


1 /**
2  * com.mckoi.database.RIDList 01 Dec 2000
3  *
4  * Mckoi SQL Database ( http://www.mckoi.com/database )
5  * Copyright (C) 2000, 2001, 2002 Diehl and Associates, Inc.
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * Version 2 as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License Version 2 for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * Version 2 along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19  *
20  * Change Log:
21  *
22  *
23  */

24
25 package com.mckoi.database;
26
27 import java.io.*;
28 import java.util.ArrayList JavaDoc;
29 import com.mckoi.util.IntegerVector;
30 import com.mckoi.util.IntegerIterator;
31 import com.mckoi.util.IndexComparator;
32 import com.mckoi.util.BlockIntegerList;
33 import com.mckoi.debug.*;
34
35 /**
36  * This is an optimization to help sorting over a column in a table. It is
37  * an aid for sorting rows in a query without having to resort to cell
38  * lookup. It uses memory to speed up sorting.
39  * <p>
40  * Sorting data is a central part of any database system. This object
41  * maintains a list of values that represent each cell in a column
42  * relationally.
43  * <p>
44  * For example, consider the following data in a column:<p>
45  * { 'a', 'g', 'i', 'b', 'a' }
46  * <p>
47  * A RID list is a set of integer values that represents a column relationally.
48  * So the above column data could be represented in a RID list as:<p>
49  * { 1, 3, 4, 2, 1 }
50  * <p>
51  * If 'c' is inserted into the above list, there is not an integer value that
52  * we can use to represent this cell. In this case, the RID list is
53  * renumbered to make room for the insertion.
54  *
55  * @author Tobias Downer
56  */

57
58 final class RIDList {
59
60   /**
61    * The TransactionSystem that we are in.
62    */

63   private TransactionSystem system;
64
65   /**
66    * The master table for the column this is in.
67    */

68   private MasterTableDataSource master_table;
69
70   /**
71    * The TableName of the table.
72    */

73   private TableName table_name;
74
75   /**
76    * The name of the column of this rid list.
77    */

78   private String JavaDoc column_name;
79
80   /**
81    * The column in the master table.
82    */

83   private int column;
84
85   /**
86    * The sorted list of rows in this set. This is sorted from min to max
87    * (not sorted by row number - sorted by entity row value).
88    */

89   private BlockIntegerList set_list;
90
91   /**
92    * The contents of our list.
93    */

94   private IntegerVector rid_list;
95
96   /**
97    * The difference between each hash when the uid_list was last created or
98    * rehashed.
99    */

100   private int hash_rid_difference;
101
102   /**
103    * The IndexComparator that we use to refer elements in the set to actual
104    * data objects.
105    */

106   private IndexComparator set_comparator;
107
108   /**
109    * Set to true if this list has been fully built.
110    */

111   private boolean is_built;
112
113   /**
114    * The RID list build state.
115    * 0 - list not built.
116    * 1 - stage 1 (set_list being built).
117    * 2 - state 2 (rid_list being built).
118    * 3 - pending modifications.
119    * 4 - finished
120    */

121   private int build_state = 0;
122
123   /**
124    * A list of modifications made to the index while it is being built.
125    */

126   private IntegerVector concurrent_modification_info;
127   private ArrayList JavaDoc concurrent_modification_data;
128   private Object JavaDoc modification_lock = new Object JavaDoc();
129
130   /**
131    * Set to true if a request to build the rid list is on the event dispatcher.
132    */

133   private boolean request_processing = false;
134
135   /**
136    * Constructs the object.
137    */

138   RIDList(MasterTableDataSource master_table, int column) {
139 // rid_list = new IntegerVector();
140
this.master_table = master_table;
141     this.system = master_table.getSystem();
142     this.column = column;
143
144     DataTableDef table_def = master_table.getDataTableDef();
145     table_name = table_def.getTableName();
146     column_name = table_def.columnAt(column).getName();
147
148     is_built = false;
149     setupComparator();
150   }
151
152   /**
153    * Returns a DebugLogger object that we can use to log debug messages.
154    */

155   public final DebugLogger Debug() {
156     return master_table.Debug();
157   }
158
159   /**
160    * Sets the internal comparator that enables us to sort and lookup on the
161    * data in this column.
162    */

163   private void setupComparator() {
164     set_comparator = new IndexComparator() {
165
166       private int internalCompare(int index, TObject cell2) {
167         TObject cell1 = getCellContents(index);
168         return cell1.compareTo(cell2);
169       }
170
171       public int compare(int index, Object JavaDoc val) {
172         return internalCompare(index, (TObject) val);
173       }
174       public int compare(int index1, int index2) {
175         TObject cell = getCellContents(index2);
176         return internalCompare(index1, cell);
177       }
178     };
179   }
180
181   /**
182    * Gets the cell at the given row in the column of the master table.
183    */

184   private TObject getCellContents(int row) {
185     return master_table.getCellContents(column, row);
186   }
187
188   /**
189    * Calculates the 'hash_rid_difference' variable. This dictates the
190    * difference between hashing entries.
191    */

192   private void calcHashRIDDifference(int size) {
193     if (size == 0) {
194       hash_rid_difference = 32;
195     }
196     else {
197       hash_rid_difference = (65536 * 4096) / size;
198       if (hash_rid_difference > 16384) {
199         hash_rid_difference = 16384;
200       }
201       else if (hash_rid_difference < 8) {
202         hash_rid_difference = 8;
203       }
204     }
205
206 // hash_rid_difference = 2;
207
// System.out.println(hash_rid_difference);
208
}
209
210
211   /**
212    * Rehashes the entire rid list. This goes through the entire list from
213    * first sorted entry to last and spaces out each rid so that there's 16
214    * numbers between each entry.
215    */

216   private int rehashRIDList(int old_rid_place) {
217     calcHashRIDDifference(set_list.size());
218
219     int new_rid_place = -1;
220
221     int cur_rid = 0;
222     int old_rid = 0;
223     IntegerIterator iterator = set_list.iterator();
224
225     while (iterator.hasNext()) {
226       int row_index = iterator.next();
227       if (row_index >= 0 && row_index < rid_list.size()) {
228         int old_value = rid_list.intAt(row_index);
229         int new_value;
230
231         if (old_value == 0) {
232           cur_rid += hash_rid_difference;
233           new_rid_place = cur_rid;
234         }
235         else {
236           if (old_value != old_rid) {
237             old_rid = old_value;
238             cur_rid += hash_rid_difference;
239             new_value = cur_rid;
240           }
241           else {
242             new_value = cur_rid;
243           }
244           rid_list.placeIntAt(new_value, row_index);
245         }
246       }
247     }
248
249     if (new_rid_place == -1) {
250       throw new Error JavaDoc(
251                 "Post condition not correct - new_rid_place shouldn't be -1");
252     }
253
254     system.stats().increment("RIDList.rehash_rid_table");
255
256     return new_rid_place;
257   }
258
259
260   /**
261    * Algorithm for inserting a new row into the rid table. For most cases
262    * this should be a very fast method.
263    * <p>
264    * NOTE: This must never be called from anywhere except inside
265    * MasterTableDataStore.
266    *
267    * @param cell the cell to insert into the list.
268    * @param row the row number.
269    */

270   void insertRID(TObject cell, int row) {
271     // NOTE: We are guarenteed to be synchronized on master_table when this
272
// is called.
273

274     synchronized (modification_lock) {
275
276       // If state isn't pre-build or finished, then note this modification.
277
if (build_state > 0 && build_state < 4) {
278         concurrent_modification_info.addInt(1);
279         concurrent_modification_info.addInt(row);
280         concurrent_modification_data.add(cell);
281         return;
282       }
283
284       // Only register if this list has been created.
285
if (rid_list == null) {
286         return;
287       }
288
289     }
290
291     // Place a zero to mark the new row
292
rid_list.placeIntAt(0, row);
293
294     // Insert this into the set_list.
295
set_list.insertSort(cell, row, set_comparator);
296
297     int given_rid = -1;
298     TObject previous_cell;
299
300     // The index of this cell in the list
301
int set_index = set_list.searchLast(cell, set_comparator);
302
303     if (set_list.get(set_index) != row) {
304       throw new Error JavaDoc(
305                  "set_list.searchLast(cell) didn't turn up expected row.");
306     }
307
308     int next_set_index = set_index + 1;
309     if (next_set_index >= set_list.size()) {
310       next_set_index = -1;
311     }
312     int previous_set_index = set_index - 1;
313
314     int next_rid;
315     if (next_set_index > -1) {
316       next_rid = rid_list.intAt(set_list.get(next_set_index));
317     }
318     else {
319       if (previous_set_index > -1) {
320         // If at end and there's a previous set then use that as the next
321
// rid.
322
next_rid = rid_list.intAt(set_list.get(previous_set_index)) +
323                   (hash_rid_difference * 2);
324       }
325       else {
326         next_rid = (hash_rid_difference * 2);
327       }
328     }
329     int previous_rid;
330     if (previous_set_index > -1) {
331       previous_rid = rid_list.intAt(set_list.get(previous_set_index));
332     }
333     else {
334       previous_rid = 0;
335     }
336
337     // Are we the same as the previous or next cell in the list?
338
if (previous_set_index > -1) {
339       previous_cell = getCellContents(set_list.get(previous_set_index));
340       if (previous_cell.compareTo(cell) == 0) {
341         given_rid = previous_rid;
342       }
343     }
344
345     // If not given a rid yet,
346
if (given_rid == -1) {
347       if (previous_rid + 1 == next_rid) {
348         // There's no room so we have to rehash the rid list.
349
given_rid = rehashRIDList(next_rid);
350       }
351       else {
352         given_rid = ((next_rid + 1) + (previous_rid - 1)) / 2;
353       }
354     }
355
356     // Finally (!!) - set the rid for this row.
357
rid_list.placeIntAt(given_rid, row);
358
359   }
360
361   /**
362    * Removes a RID entry from the given row. This <b>MUST</b> only be
363    * called when the row is perminantly removed from the table (eg. by the
364    * row garbage collector).
365    * <p>
366    * NOTE: This must never be called from anywhere except inside
367    * MasterTableDataStore.
368    */

369   void removeRID(int row) {
370     // NOTE: We are guarenteed to be synchronized on master_table when this
371
// is called.
372

373     synchronized (modification_lock) {
374
375       // If state isn't pre-build or finished, then note this modification.
376
if (build_state > 0 && build_state < 4) {
377         concurrent_modification_info.addInt(2);
378         concurrent_modification_info.addInt(row);
379         return;
380       }
381
382       // Only register if this list has been created.
383
if (rid_list == null) {
384         return;
385       }
386
387     }
388
389     try {
390       // Remove from the set_list index.
391
TObject cell = getCellContents(row);
392       int removed = set_list.removeSort(cell, row, set_comparator);
393     }
394     catch (Error JavaDoc e) {
395       System.err.println("RIDList: " + table_name + "." + column_name);
396       throw e;
397     }
398
399   }
400
401   /**
402    * Requests that a rid_list should be built for this column. The list will
403    * be built on the database dispatcher thread.
404    */

405   void requestBuildRIDList() {
406     if (!isBuilt()) {
407       if (!request_processing) {
408         request_processing = true;
409         // Wait 10 seconds to build rid list.
410
system.postEvent(10000, system.createEvent(new Runnable JavaDoc() {
411           public void run() {
412             createRIDCache();
413           }
414         }));
415       }
416     }
417   }
418
419   /**
420    * If rid_list is null then create it now.
421    * <p>
422    * NOTE: This must never be called from anywhere except inside
423    * MasterTableDataStore.
424    */

425   private void createRIDCache() {
426
427     try {
428
429       // If the master table is closed then return
430
// ISSUE: What if this happens while we are constructing the list?
431
if (master_table.isClosed()) {
432         return;
433       }
434
435       long time_start = System.currentTimeMillis();
436       long time_took;
437       int rid_list_size;
438
439       int set_size;
440
441       synchronized (master_table) {
442         synchronized (modification_lock) {
443
444           if (is_built) {
445             return;
446           }
447
448           // Set the build state
449
build_state = 1;
450           concurrent_modification_info = new IntegerVector();
451           concurrent_modification_data = new ArrayList JavaDoc();
452
453           // The set_list (complete index of the master table).
454
set_size = master_table.rawRowCount();
455           set_list = new BlockIntegerList();
456           // Go through the master table and build set_list.
457
for (int r = 0; r < set_size; ++r) {
458             if (!master_table.recordDeleted(r)) {
459               TObject cell = getCellContents(r);
460               set_list.insertSort(cell, r, set_comparator);
461             }
462           }
463           // Now we have a complete/current index, including uncommitted,
464
// and committed added and removed rows, of the given column
465

466           // Add a root lock to the table
467
master_table.addRootLock();
468
469         } // synchronized (modification_lock)
470
} // synchronized master_table
471

472       try {
473
474         // Go through and work out the rid values for the list. We know
475
// that 'set_list' is correct and no entries can be deleted from it
476
// until we relinquish the root lock.
477

478         calcHashRIDDifference(set_size);
479
480         rid_list = new IntegerVector(set_size + 128);
481
482         // Go through 'set_list'. All entries that are equal are given the
483
// same rid.
484
if (set_list.size() > 0) { //set_size > 0) {
485
int cur_rid = hash_rid_difference;
486           IntegerIterator iterator = set_list.iterator();
487           int row_index = iterator.next();
488           TObject last_cell = getCellContents(row_index);
489           rid_list.placeIntAt(cur_rid, row_index);
490
491           while (iterator.hasNext()) {
492             row_index = iterator.next();
493             TObject cur_cell = getCellContents(row_index);
494             int cmp = cur_cell.compareTo(last_cell);
495             if (cmp > 0) {
496               cur_rid += hash_rid_difference;
497             }
498             else if (cmp < 0) { // ASSERTION
499
// If current cell is less than last cell then the list ain't
500
// sorted!
501
throw new Error JavaDoc("Internal Database Error: Index is corrupt " +
502                               " - InsertSearch list is not sorted.");
503             }
504             rid_list.placeIntAt(cur_rid, row_index);
505
506             last_cell = cur_cell;
507           }
508         }
509
510         // Final stage, insert final changes,
511
// We lock the master_table so we are guarenteed no changes to the
512
// table can happen during the final stage.
513
synchronized (master_table) {
514           synchronized (modification_lock) {
515             build_state = 4;
516
517             // Make any modifications to the list that occured during the time
518
// we were building the RID list.
519
int mod_size = concurrent_modification_info.size();
520             int i = 0;
521             int m_data = 0;
522             int insert_count = 0;
523             int remove_count = 0;
524             while (i < mod_size) {
525               int type = concurrent_modification_info.intAt(i);
526               int row = concurrent_modification_info.intAt(i + 1);
527               // An insert
528
if (type == 1) {
529                 TObject cell =
530                             (TObject) concurrent_modification_data.get(m_data);
531                 insertRID(cell, row);
532                 ++m_data;
533                 ++insert_count;
534               }
535               // A remove
536
else if (type == 2) {
537                 removeRID(row);
538                 ++remove_count;
539               }
540               else {
541                 throw new Error JavaDoc("Unknown modification type.");
542               }
543
544               i += 2;
545             }
546
547             if (remove_count > 0) {
548               Debug().write(Lvl.ERROR, this,
549                   "Assertion failed: It should not be possible to remove " +
550                   "rows during a root lock when building a RID list.");
551             }
552
553             concurrent_modification_info = null;
554             concurrent_modification_data = null;
555
556             // Log the time it took
557
time_took = System.currentTimeMillis() - time_start;
558             rid_list_size = rid_list.size();
559
560             is_built = true;
561
562           }
563         } // synchronized (modification_lock)
564

565       }
566       finally {
567         // Must guarentee we remove the root lock from the master table
568
master_table.removeRootLock();
569       }
570
571       Debug().write(Lvl.MESSAGE, this,
572                "RID List " + table_name.toString() + "." + column_name +
573                " Initial Size = " + rid_list_size);
574       Debug().write(Lvl.MESSAGE, this,
575                "RID list built in " + time_took + "ms.");
576
577       // The number of rid caches created.
578
system.stats().increment(
579                            "{session} RIDList.rid_caches_created");
580       // The total size of all rid indices that we have created.
581
system.stats().add(rid_list_size,
582                            "{session} RIDList.rid_indices");
583
584     }
585     catch (IOException e) {
586       throw new Error JavaDoc("IO Error: " + e.getMessage());
587     }
588
589   }
590
591   /**
592    * Quick way of determining if the RID list has been built.
593    */

594   boolean isBuilt() {
595     synchronized (modification_lock) {
596       return is_built;
597     }
598   }
599
600   /**
601    * Given an unsorted set of rows in this table, this will return the row
602    * list sorted in descending order. This only uses the information from
603    * within this list to make up the sorted result, and does not reference any
604    * data in the master table.
605    * <p>
606    * SYNCHRONIZATION: This does not lock the master_table because it doesn't
607    * use any information in it.
608    */

609   BlockIntegerList sortedSet(final IntegerVector row_set) {
610
611     // The length of the set to order
612
int row_set_length = row_set.size();
613
614     // This will be 'row_set' sorted by its entry lookup. This must only
615
// contain indices to row_set entries.
616
BlockIntegerList new_set = new BlockIntegerList();
617
618     // The comparator we use to sort
619
IndexComparator comparator = new IndexComparator() {
620       public int compare(int index, Object JavaDoc val) {
621         int rid_val = rid_list.intAt(row_set.intAt(index));
622         int rid_val2 = ((Integer JavaDoc) val).intValue();
623         return rid_val - rid_val2;
624       }
625       public int compare(int index1, int index2) {
626         throw new Error JavaDoc("Shouldn't be called!");
627       }
628     };
629
630     // This synchronized statement is required because a RID list may be
631
// altered when a row is deleted from the dispatcher thread. Inserts and
632
// deletes on a table at this level do not necessarily only happen when
633
// the table is under a lock. For this reason this block of code must
634
// be synchronized.
635
synchronized (master_table) {
636
637       // Fill new_set with the set { 0, 1, 2, .... , row_set_length }
638
for (int i = 0; i < row_set_length; ++i) {
639         Integer JavaDoc rid_val =
640                           new Integer JavaDoc(rid_list.intAt(row_set.intAt(i)));
641         new_set.insertSort(rid_val, i, comparator);
642       }
643
644       return new_set;
645
646     } // synchronized (master_table)
647

648   }
649
650
651 }
652
653
Popular Tags