KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > derby > impl > store > access > btree > BTreeCostController


1 /*
2
3    Derby - Class org.apache.derby.impl.store.access.btree.BTreeCostController
4
5    Licensed to the Apache Software Foundation (ASF) under one or more
6    contributor license agreements. See the NOTICE file distributed with
7    this work for additional information regarding copyright ownership.
8    The ASF licenses this file to you under the Apache License, Version 2.0
9    (the "License"); you may not use this file except in compliance with
10    the License. You may obtain a copy of the License at
11
12       http://www.apache.org/licenses/LICENSE-2.0
13
14    Unless required by applicable law or agreed to in writing, software
15    distributed under the License is distributed on an "AS IS" BASIS,
16    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17    See the License for the specific language governing permissions and
18    limitations under the License.
19
20  */

21
22 package org.apache.derby.impl.store.access.btree;
23
24 import org.apache.derby.iapi.reference.Property;
25 import org.apache.derby.iapi.reference.SQLState;
26
27 import org.apache.derby.iapi.services.sanity.SanityManager;
28
29 import org.apache.derby.iapi.error.StandardException;
30
31 import org.apache.derby.iapi.store.access.conglomerate.Conglomerate;
32 import org.apache.derby.iapi.store.access.conglomerate.LogicalUndo;
33 import org.apache.derby.iapi.store.access.conglomerate.TransactionManager;
34
35 import org.apache.derby.iapi.store.access.DynamicCompiledOpenConglomInfo;
36 import org.apache.derby.iapi.store.access.RowUtil;
37 import org.apache.derby.iapi.store.access.ScanController;
38 import org.apache.derby.iapi.store.access.StoreCostController;
39 import org.apache.derby.iapi.store.access.StoreCostResult;
40 import org.apache.derby.iapi.store.access.TransactionController;
41
42 import org.apache.derby.iapi.store.raw.ContainerHandle;
43 import org.apache.derby.iapi.store.raw.LockingPolicy;
44 import org.apache.derby.iapi.store.raw.RawStoreFactory;
45 import org.apache.derby.iapi.store.raw.Transaction;
46
47 import org.apache.derby.iapi.types.DataValueDescriptor;
48
49 import org.apache.derby.iapi.types.RowLocation;
50
51
52 import org.apache.derby.iapi.services.io.FormatableBitSet;
53 import java.util.Properties JavaDoc;
54
55 /**
56
57 The StoreCostController interface provides methods that an access client
58 (most likely the system optimizer) can use to get store's estimated cost of
59 various operations on the conglomerate the StoreCostController was opened
60 for.
61 <p>
62 It is likely that the implementation of StoreCostController will open
63 the conglomerate and will leave the conglomerate open until the
64 StoreCostController is closed. This represents a significant amount of
65 work, so the caller if possible should attempt to open the StoreCostController
66 once per unit of work and rather than close and reopen the controller. For
67 instance if the optimizer needs to cost 2 different scans against a single
68 conglomerate, it should use one instance of the StoreCostController.
69 <p>
70 The locking behavior of the implementation of a StoreCostController is
71 undefined, it may or may not get locks on the underlying conglomerate. It
72 may or may not hold locks until end of transaction.
73 An optimal implementation will not get any locks on the underlying
74 conglomerate, thus allowing concurrent access to the table by a executing
75 query while another query is optimizing.
76 <p>
77 @see TransactionController#openStoreCost
78
79 **/

80
81 public class BTreeCostController extends OpenBTree
82     implements StoreCostController
83 {
84
85     // 1.5 numbers on mikem old machine:
86
//
87
// The magic numbers are based on the following benchmark results:
88
//
89
// no col one int col all cols
90
// ------ ----------- --------
91
//100 byte heap fetch by row loc, cached 0.3625 0.5098 0.6629
92
//100 byte heap fetch by row loc, uncached 1.3605769 1.5168269 1.5769231
93
//4 byte heap fetch by row loc, cached 0.3745 0.4016 0.3766
94
//4 byte heap fetch by row loc, uncached 4.1938777 3.5714285 4.4897957
95
//
96
// no col one int col all cols
97
// ------ ----------- --------
98
//Int col one level btree
99
// fetch by exact key, cached 0.781 1.012 0.42
100
// fetch by exact key, sort merge 1.081 1.221 0.851
101
// fetch by exact key, uncached 0.0 0.0 0.0
102
//Int col two level btree
103
// fetch by exact key, cached 1.062 1.342 0.871
104
// fetch by exact key, sort merge 1.893 2.273 1.633
105
// fetch by exact key, uncached 5.7238097 5.3428574 4.7714286
106
//String key one level btree
107
// fetch by exact key, cached 1.082 0.811 0.781
108
// fetch by exact key, sort merge 1.572 1.683 1.141
109
// fetch by exact key, uncached 0.0 0.0 0.0
110
//String key two level btree
111
// fetch by exact key, cached 2.143 2.664 1.953
112
// fetch by exact key, sort merge 3.775 4.116 3.505
113
// fetch by exact key, uncached 4.639474 5.0052633 4.4289474
114

115     // mikem new machine - insane, codeline, non-jit 1.1.7 numbers
116
//
117
// no col one int col all cols
118
// ------ ----------- --------
119
//100 byte heap fetch by row loc, cached 0.1662 0.4597 0.5618
120
//100 byte heap fetch by row loc, uncached 0.7565947 1.2601918 1.6690648
121
//4 byte heap fetch by row loc, cached 0.1702 0.1983 0.1903
122
//4 byte heap fetch by row loc, uncached 1.5068493 1.3013699 1.6438357
123
//
124
// no col one int col all cols
125
// ------ ----------- --------
126
// Int col one level btree
127
// fetch by exact key, cached 0.271 0.511 0.33
128
// fetch by exact key, sort merge 0.691 0.921 0.771
129
// fetch by exact key, uncached 0.0 0.0 0.0
130
// Int col two level btree
131
// fetch by exact key, cached 0.541 0.711 0.561
132
// fetch by exact key, sort merge 1.432 1.682 1.533
133
// fetch by exact key, uncached 3.142857 3.6285715 3.2380953
134
// String key one level btree
135
// fetch by exact key, cached 0.611 0.851 0.701
136
// fetch by exact key, sort merge 1.051 1.272 1.122
137
// fetch by exact key, uncached 0.0 0.0 0.0
138
// String key two level btree
139
// fetch by exact key, cached 1.532 1.843 1.622
140
// fetch by exact key, sort merge 2.844 3.155 2.984
141
// fetch by exact key, uncached 3.4 3.636842 3.531579
142
//
143

144
145     // The following costs are search costs to find a row on a leaf, use
146
// the heap costs to determine scan costs, for now ignore qualifier
147
// application and stop comparisons.
148
// I used the int key, 2 level numbers divided by 2 to get per level.
149

150     private static final double
151         BTREE_CACHED_FETCH_BY_KEY_PER_LEVEL = (0.541 / 2);
152
153     private static final double
154         BTREE_SORTMERGE_FETCH_BY_KEY_PER_LEVEL = (1.432 / 2);
155
156     private static final double
157         BTREE_UNCACHED_FETCH_BY_KEY_PER_LEVEL = (3.143 / 2);
158
159     // saved values passed to init().
160
TransactionManager init_xact_manager;
161     Transaction init_rawtran;
162     Conglomerate init_conglomerate;
163
164     /**
165      * Only lookup these estimates from raw store once.
166      **/

167     long num_pages;
168     long num_rows;
169     long page_size;
170     int tree_height;
171
172     /* Constructors for This class: */
173
174     public BTreeCostController()
175     {
176     }
177
178     /* Private/Protected methods of This class: */
179
180     /**
181      * Initialize the cost controller.
182      * <p>
183      * Save initialize parameters away, and open the underlying container.
184      * <p>
185      *
186      * @param xact_manager access manager transaction.
187      * @param rawtran Raw store transaction.
188      *
189      * @exception StandardException Standard exception policy.
190      **/

191     public void init(
192     TransactionManager xact_manager,
193     BTree conglomerate,
194     Transaction rawtran)
195         throws StandardException
196     {
197         super.init(
198             xact_manager,
199             xact_manager,
200             (ContainerHandle) null, // open the btree.
201
rawtran,
202             false,
203             ContainerHandle.MODE_READONLY,
204             TransactionManager.MODE_NONE,
205             (BTreeLockingPolicy) null, // RESOLVE (mikem) - this means
206
// no locks during costing - will
207
// that work?????
208
conglomerate,
209             (LogicalUndo) null, // read only, so no undo necessary
210
(DynamicCompiledOpenConglomInfo) null);
211
212         // look up costs from raw store. For btrees these numbers are out
213
// of whack as they want to be leaf specific numbers but they include
214
// every page branch and leafs.
215
num_pages = this.container.getEstimatedPageCount(/* unused flag */ 0);
216
217         // subtract one row for every page to account for internal control row
218
// which exists on every page.
219
num_rows =
220             this.container.getEstimatedRowCount(/*unused flag*/ 0) - num_pages;
221
222         Properties JavaDoc prop = new Properties JavaDoc();
223         prop.put(Property.PAGE_SIZE_PARAMETER, "");
224         this.container.getContainerProperties(prop);
225         page_size =
226             Integer.parseInt(prop.getProperty(Property.PAGE_SIZE_PARAMETER));
227
228         tree_height = getHeight();
229
230         return;
231     }
232
233     /* Public Methods of This class: */
234
235     /**
236      * Close the controller.
237      * <p>
238      * Close the open controller. This method always succeeds, and never
239      * throws any exceptions. Callers must not use the StoreCostController
240      * Cost controller after closing it; they are strongly advised to clear
241      * out the scan controller reference after closing.
242      * <p>
243      **/

244     public void close()
245         throws StandardException
246     {
247         super.close();
248     }
249
250     /**
251      * Return the cost of calling ConglomerateController.fetch().
252      * <p>
253      * Return the estimated cost of calling ConglomerateController.fetch()
254      * on the current conglomerate. This gives the cost of finding a record
255      * in the conglomerate given the exact RowLocation of the record in
256      * question.
257      * <p>
258      * The validColumns parameter describes what kind of row
259      * is being fetched, ie. it may be cheaper to fetch a partial row than a
260      * complete row.
261      * <p>
262      *
263      *
264      * @param validColumns A description of which columns to return from
265      * row on the page into "templateRow." templateRow,
266      * and validColumns work together to
267      * describe the row to be returned by the fetch -
268      * see RowUtil for description of how these three
269      * parameters work together to describe a fetched
270      * "row".
271      *
272      * @param access_type Describe the type of access the query will be
273      * performing to the ConglomerateController.
274      *
275      * STORECOST_CLUSTERED - The location of one fetch
276      * is likely clustered "close" to the next
277      * fetch. For instance if the query plan were
278      * to sort the RowLocations of a heap and then
279      * use those RowLocations sequentially to
280      * probe into the heap, then this flag should
281      * be specified. If this flag is not set then
282      * access to the table is assumed to be
283      * random - ie. the type of access one gets
284      * if you scan an index and probe each row
285      * in turn into the base table is "random".
286      *
287      *
288      * @return The cost of the fetch.
289      *
290      * @exception StandardException Standard exception policy.
291      *
292      * @see RowUtil
293      **/

294     public double getFetchFromRowLocationCost(
295     FormatableBitSet validColumns,
296     int access_type)
297         throws StandardException
298     {
299         throw StandardException.newException(
300                 SQLState.BTREE_UNIMPLEMENTED_FEATURE);
301     }
302
303     /**
304      * Return the cost of exact key lookup.
305      * <p>
306      * Return the estimated cost of calling ScanController.fetch()
307      * on the current conglomerate, with start and stop positions set such
308      * that an exact match is expected.
309      * <p>
310      * This call returns the cost of a fetchNext() performed on a scan which
311      * has been positioned with a start position which specifies exact match
312      * on all keys in the row.
313      * <p>
314      * Example:
315      * <p>
316      * In the case of a btree this call can be used to determine the cost of
317      * doing an exact probe into btree, giving all key columns. This cost
318      * can be used if the client knows it will be doing an exact key probe
319      * but does not have the key's at optimize time to use to make a call to
320      * getScanCost()
321      * <p>
322      *
323      *
324      * @param validColumns A description of which columns to return from
325      * row on the page into "templateRow." templateRow,
326      * and validColumns work together to
327      * describe the row to be returned by the fetch -
328      * see RowUtil for description of how these three
329      * parameters work together to describe a fetched
330      * "row".
331      *
332      * @param access_type Describe the type of access the query will be
333      * performing to the ScanController.
334      *
335      * STORECOST_CLUSTERED - The location of one scan
336      * is likely clustered "close" to the previous
337      * scan. For instance if the query plan were
338      * to used repeated "reopenScan()'s" to probe
339      * for the next key in an index, then this flag
340      * should be be specified. If this flag is not
341      * set then each scan will be costed independant
342      * of any other predicted scan access.
343      *
344      * @return The cost of the fetch.
345      *
346      * @exception StandardException Standard exception policy.
347      *
348      * @see RowUtil
349      **/

350     public double getFetchFromFullKeyCost(
351     FormatableBitSet validColumns,
352     int access_type)
353         throws StandardException
354     {
355         double ret_cost;
356
357         if ((access_type & StoreCostController.STORECOST_CLUSTERED) == 0)
358         {
359             // uncached fetch
360
ret_cost = BTREE_UNCACHED_FETCH_BY_KEY_PER_LEVEL;
361         }
362         else
363         {
364             ret_cost = BTREE_SORTMERGE_FETCH_BY_KEY_PER_LEVEL;
365         }
366         ret_cost *= tree_height;
367
368         return(ret_cost);
369     }
370
371
372     /**
373      * Calculate the cost of a scan.
374      * <p>
375      * Cause this object to calculate the cost of performing the described
376      * scan. The interface is setup such that first a call is made to
377      * calcualteScanCost(), and then subsequent calls to accessor routines
378      * are made to get various pieces of information about the cost of
379      * the scan.
380      * <p>
381      * For the purposes of costing this routine is going to assume that
382      * a page will remain in cache between the time one next()/fetchNext()
383      * call and a subsequent next()/fetchNext() call is made within a scan.
384      * <p>
385      * The result of costing the scan is placed in the "cost_result".
386      * The cost of the scan is stored by calling
387      * cost_result.setEstimatedCost(cost).
388      * The estimated row count is stored by calling
389      * cost_result.setEstimatedRowCount(row_count).
390      * <p>
391      * The estimated cost of the scan assumes the caller will
392      * execute a fetchNext() loop for every row that qualifies between
393      * start and stop position. Note that this cost is different than
394      * execution a next(),fetch() loop; or if the scan is going to be
395      * terminated by client prior to reaching the stop condition.
396      * <p>
397      * The estimated number of rows returned from the scan
398      * assumes the caller will execute a fetchNext() loop for every
399      * row that qualifies between start and stop position.
400      * <p>
401      *
402      *
403      * @param scan_type The type of scan that will be executed. There
404      * are currently 2 types:
405      * STORECOST_SCAN_NORMAL - scans will be executed
406      * using the standard next/fetch, where each fetch
407      * can retrieve 1 or many rows (if fetchNextGroup()
408      * interface is used).
409      *
410      * STORECOST_SCAN_SET - The entire result set will
411      * be retrieved using the the fetchSet() interface.
412      *
413      * @param row_count Estimated total row count of the table. The
414      * current system tracks row counts in heaps better
415      * than btree's (btree's have "rows" which are not
416      * user rows - branch rows, control rows), so
417      * if available the client should
418      * pass in the base table's row count into this
419      * routine to be used as the index's row count.
420      * If the caller has no idea, pass in -1.
421      *
422      * @param group_size The number of rows to be returned by a single
423      * fetch call for STORECOST_SCAN_NORMAL scans.
424      *
425      * @param forUpdate Should be true if the caller intends to update
426      * through the scan.
427      *
428      * @param scanColumnList A description of which columns to return from
429      * every fetch in the scan. template,
430      * and scanColumnList work together
431      * to describe the row to be returned by the scan -
432      * see RowUtil for description of how these three
433      * parameters work together to describe a "row".
434      *
435      * @param template A prototypical row which the scan may use to
436      * maintain its position in the conglomerate. Not
437      * all access method scan types will require this,
438      * if they don't it's ok to pass in null.
439      * In order to scan a conglomerate one must
440      * allocate 2 separate "row" templates. The "row"
441      * template passed into openScan is for the private
442      * use of the scan itself, and no access to it
443      * should be made by the caller while the scan is
444      * still open. Because of this the scanner must
445      * allocate another "row" template to hold the
446      * values returned from fetch(). Note that this
447      * template must be for the full row, whether a
448      * partial row scan is being executed or not.
449      *
450      * @param startKeyValue An indexable row which holds a (partial) key
451      * value which, in combination with the
452      * startSearchOperator, defines the starting
453      * position of the scan. If null, the starting
454      * position of the scan is the first row of the
455      * conglomerate. The startKeyValue must only
456      * reference columns included in the scanColumnList.
457      *
458      * @param startSearchOperator
459      * an operator which defines how the startKeyValue
460      * is to be searched for. If startSearchOperation
461      * is ScanController.GE, the scan starts on the
462      * first row which is greater than or equal to the
463      * startKeyValue. If startSearchOperation is
464      * ScanController.GT, the scan starts on the first
465      * row whose key is greater than startKeyValue. The
466      * startSearchOperation parameter is ignored if the
467      * startKeyValue parameter is null.
468      *
469      * @param stopKeyValue An indexable row which holds a (partial) key
470      * value which, in combination with the
471      * stopSearchOperator, defines the ending position
472      * of the scan. If null, the ending position of the
473      * scan is the last row of the conglomerate. The
474      * stopKeyValue must only reference columns included
475      * in the scanColumnList.
476      *
477      * @param stopSearchOperator
478      * an operator which defines how the stopKeyValue
479      * is used to determine the scan stopping position.
480      * If stopSearchOperation is ScanController.GE, the
481      * scan stops just before the first row which is
482      * greater than or equal to the stopKeyValue. If
483      * stopSearchOperation is ScanController.GT, the
484      * scan stops just before the first row whose key
485      * is greater than startKeyValue. The
486      * stopSearchOperation parameter is ignored if the
487      * stopKeyValue parameter is null.
488      *
489      *
490      * @param access_type Describe the type of access the query will be
491      * performing to the ScanController.
492      *
493      * STORECOST_CLUSTERED - The location of one scan
494      * is likely clustered "close" to the previous
495      * scan. For instance if the query plan were
496      * to used repeated "reopenScan()'s" to probe
497      * for the next key in an index, then this flag
498      * should be be specified. If this flag is not
499      * set then each scan will be costed independant
500      * of any other predicted scan access.
501      *
502      *
503      * @exception StandardException Standard exception policy.
504      *
505      * @see RowUtil
506      **/

507     public void getScanCost(
508     int scan_type,
509     long row_count,
510     int group_size,
511     boolean forUpdate,
512     FormatableBitSet scanColumnList,
513     DataValueDescriptor[] template,
514     DataValueDescriptor[] startKeyValue,
515     int startSearchOperator,
516     DataValueDescriptor[] stopKeyValue,
517     int stopSearchOperator,
518     boolean reopen_scan,
519     int access_type,
520     StoreCostResult cost_result)
521         throws StandardException
522     {
523         float left_of_start;
524         float left_of_stop;
525         ControlRow control_row = null;
526         long input_row_count = (row_count < 0 ? num_rows : row_count);
527
528         try
529         {
530             // Find the starting page and row slot.
531
if (startKeyValue == null)
532             {
533                 left_of_start = 0;
534             }
535             else
536             {
537                 // Search for the starting row.
538

539                 SearchParameters sp = new SearchParameters(
540                     startKeyValue,
541                     ((startSearchOperator == ScanController.GE) ?
542                         SearchParameters.POSITION_LEFT_OF_PARTIAL_KEY_MATCH :
543                         SearchParameters.POSITION_RIGHT_OF_PARTIAL_KEY_MATCH),
544                     template, this, true);
545
546                 control_row =
547                     ControlRow.Get(this, BTree.ROOTPAGEID).search(sp);
548
549                 control_row.release();
550                 control_row = null;
551
552                 left_of_start = sp.left_fraction;
553             }
554
555             if (stopKeyValue == null)
556             {
557                 left_of_stop = 1;
558             }
559             else
560             {
561                 // Search for the stopping row.
562

563                 SearchParameters sp =
564                     new SearchParameters(
565                         stopKeyValue,
566                         ((stopSearchOperator == ScanController.GE) ?
567                           SearchParameters.POSITION_LEFT_OF_PARTIAL_KEY_MATCH :
568                           SearchParameters.POSITION_RIGHT_OF_PARTIAL_KEY_MATCH),
569                         template, this, true);
570
571                 control_row =
572                     ControlRow.Get(this, BTree.ROOTPAGEID).search(sp);
573
574                 control_row.release();
575                 control_row = null;
576
577                 left_of_stop = sp.left_fraction;
578             }
579
580             // System.out.println(
581
// "\n\tleft_of_start = " + left_of_start +
582
// "\n\tleft_of_stop = " + left_of_stop);
583

584             // what percentage of rows are between start and stop?
585

586             float ret_fraction = left_of_stop - left_of_start;
587
588             // If for some reason the stop position comes before the start
589
// position, assume 0 rows will return from query.
590
if (ret_fraction < 0)
591                 ret_fraction = 0;
592
593             // Never return estimate of more rows than exist, sometimes
594
// the recursive estimation through the btree may return a number
595
// like 1.00001.
596
if (ret_fraction > 1)
597                 ret_fraction = 1;
598
599             float estimated_row_count = input_row_count * ret_fraction;
600
601             // first the base cost of positioning on the first row in the scan.
602
double cost =
603                 getFetchFromFullKeyCost(scanColumnList, access_type);
604
605             // add the base cost of bringing each page for the first time into
606
// the cache. This is basically the cost of bringing each leaf
607
// uncached into the cache and reading the control row off of it.:
608
cost +=
609                 (num_pages * ret_fraction) * BASE_UNCACHED_ROW_FETCH_COST;
610
611             // Now some magic to try and figure out the cost of doing a
612
// scan along the leaf level of the tree. Mostly just assume
613
// the costs are the same as the heap, and ignore qualifier
614
// processing and stop row comparisons for now.
615

616             // the base cost of getting each of the rows from a page assumed
617
// to already be cached (by the scan fetch) - this is only for all
618
// rows after the initial row on the page has been accounted for
619
// under the BASE_UNCACHED_ROW_FETCH_COST cost.:
620
long cached_row_count = ((long) estimated_row_count) - num_pages;
621             if (cached_row_count < 0)
622                 cached_row_count = 0;
623
624             if (scan_type == StoreCostController.STORECOST_SCAN_NORMAL)
625                 cost += cached_row_count * BASE_GROUPSCAN_ROW_COST;
626             else
627                 cost += cached_row_count * BASE_HASHSCAN_ROW_FETCH_COST;
628
629             // finally add the cost associated with the number of bytes in row:
630
long row_size =
631                 (input_row_count == 0) ?
632                     4 : (num_pages * page_size) / input_row_count;
633
634             cost +=
635                 (estimated_row_count * row_size) * BASE_ROW_PER_BYTECOST;
636
637             if (SanityManager.DEBUG)
638             {
639                 if (cost < 0)
640                     SanityManager.THROWASSERT("cost " + cost);
641
642                 if (estimated_row_count < 0)
643                     SanityManager.THROWASSERT(
644                         "estimated_row_count = " + estimated_row_count);
645             }
646
647             // return the cost
648
cost_result.setEstimatedCost(cost);
649
650             // RESOLVE - should we make sure this number is > 0?
651
cost_result.setEstimatedRowCount(Math.round(estimated_row_count));
652         }
653         finally
654         {
655             if (control_row != null)
656                 control_row.release();
657         }
658
659         // System.out.println("BTreeCostController.getScanCost():" +
660
// "\n\t cost = " + cost_result.getEstimatedCost() +
661
// "\n\t rows = " + cost_result.getEstimatedRowCount());
662

663         return;
664     }
665
666     /**
667      * Return an "empty" row location object of the correct type.
668      * <p>
669      *
670      * @return The empty Rowlocation.
671      *
672      * @exception StandardException Standard exception policy.
673      **/

674     public RowLocation newRowLocationTemplate()
675         throws StandardException
676     {
677         throw StandardException.newException(
678                 SQLState.BTREE_UNIMPLEMENTED_FEATURE);
679     }
680 }
681
Popular Tags