HeapCostController


1   /*
2   
3      Derby - Class org.apache.derby.impl.store.access.heap.HeapCostController
4   
5      Licensed to the Apache Software Foundation (ASF) under one or more
6      contributor license agreements.  See the NOTICE file distributed with
7      this work for additional information regarding copyright ownership.
8      The ASF licenses this file to you under the Apache License, Version 2.0
9      (the "License"); you may not use this file except in compliance with
10     the License.  You may obtain a copy of the License at
11  
12        http://www.apache.org/licenses/LICENSE-2.0
13  
14     Unless required by applicable law or agreed to in writing, software
15     distributed under the License is distributed on an "AS IS" BASIS,
16     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17     See the License for the specific language governing permissions and
18     limitations under the License.
19  
20   */
21  
22  package org.apache.derby.impl.store.access.heap;
23  
24  import org.apache.derby.iapi.reference.SQLState;
25  import org.apache.derby.iapi.reference.Property;
26  
27  import org.apache.derby.iapi.services.sanity.SanityManager;
28  
29  import org.apache.derby.iapi.error.StandardException; 
30  
31  import org.apache.derby.iapi.store.access.conglomerate.TransactionManager;
32  
33  import org.apache.derby.iapi.types.RowLocation;
34  import org.apache.derby.iapi.store.access.StoreCostController;
35  import org.apache.derby.iapi.store.access.StoreCostResult;
36  
37  import org.apache.derby.iapi.store.raw.ContainerHandle;
38  import org.apache.derby.iapi.store.raw.LockingPolicy;
39  import org.apache.derby.iapi.store.raw.RawStoreFactory;
40  import org.apache.derby.iapi.store.raw.Transaction;
41  
42  import org.apache.derby.impl.store.access.conglomerate.GenericCostController;
43  import org.apache.derby.impl.store.access.conglomerate.OpenConglomerate;
44  
45  import org.apache.derby.iapi.store.access.RowUtil;
46  
47  import org.apache.derby.iapi.types.DataValueDescriptor;
48  
49  import org.apache.derby.iapi.services.io.FormatableBitSet;
50  import java.util.Properties  ;
51  
52  
53  /**
54  
55  The StoreCostController interface provides methods that an access client
56  (most likely the system optimizer) can use to get store's estimated cost of
57  various operations on the conglomerate the StoreCostController was opened
58  for.
59  <p>
60  It is likely that the implementation of StoreCostController will open 
61  the conglomerate and will leave the conglomerate open until the
62  StoreCostController is closed.  This represents a significant amount of
63  work, so the caller if possible should attempt to open the StoreCostController
64  once per unit of work and rather than close and reopen the controller.  For
65  instance if the optimizer needs to cost 2 different scans against a single
66  conglomerate, it should use one instance of the StoreCostController.
67  <p>
68  The locking behavior of the implementation of a StoreCostController is
69  undefined, it may or may not get locks on the underlying conglomerate.  It
70  may or may not hold locks until end of transaction.  
71  An optimal implementation will not get any locks on the underlying 
72  conglomerate, thus allowing concurrent access to the table by a executing
73  query while another query is optimizing.
74  <p>
75  The StoreCostController gives 2 kinds of cost information
76  
77  **/
78  
79  public class HeapCostController 
80      extends GenericCostController implements StoreCostController
81  {
82      /**
83       * Only lookup these estimates from raw store once.
84       **/
85      long    num_pages;
86      long    num_rows;
87      long    page_size;
88      long    row_size;
89  
90      /* Private/Protected methods of This class: */
91  
92      /**
93       * Initialize the cost controller.
94       * <p>
95       * Let super.init() do it's work and then get the initial stats about the
96       * table from raw store.
97       *
98       * @exception  StandardException  Standard exception policy.
99       **/
100     public void init(
101     OpenConglomerate    open_conglom)
102         throws StandardException
103     {
104         super.init(open_conglom);
105 
106         ContainerHandle container = open_conglom.getContainer();
107 
108         // look up costs from raw store.
109         num_rows  = container.getEstimatedRowCount(/*unused flag*/ 0);
110 
111         // Don't use 0 rows (use 1 instead), as 0 rows often leads the 
112         // optimizer to produce plans which don't use indexes because of the 0 
113         // row edge case.
114         //
115         // Eventually the plan is recompiled when rows are added, but we
116         // have seen multiple customer cases of deadlocks and timeouts 
117         // because of these 0 row based plans.  
118         if (num_rows == 0)
119             num_rows = 1;
120 
121         // eliminate the allocation page from the count.
122         num_pages = container.getEstimatedPageCount(/* unused flag */ 0);
123 
124         Properties   prop = new Properties  ();
125         prop.put(Property.PAGE_SIZE_PARAMETER, "");
126         container.getContainerProperties(prop);
127         page_size = 
128             Integer.parseInt(prop.getProperty(Property.PAGE_SIZE_PARAMETER));
129 
130         row_size = (num_pages * page_size / num_rows);
131 
132         return;
133     }
134 
135     /* Public Methods of This class: */
136     /* Public Methods of XXXX class: */
137 
138 
139     /**
140      * Return the cost of calling ConglomerateController.fetch().
141      * <p>
142      * Return the estimated cost of calling ConglomerateController.fetch()
143      * on the current conglomerate.  This gives the cost of finding a record
144      * in the conglomerate given the exact RowLocation of the record in
145      * question. 
146      * <p>
147      * The validColumns describes what kind of row is being fetched, 
148      * ie. it may be cheaper to fetch a partial row than a complete row.
149      * <p>
150      *
151      *
152      * @param validColumns    A description of which columns to return from
153      *                        row on the page into "templateRow."  templateRow,
154      *                        and validColumns work together to
155      *                        describe the row to be returned by the fetch - 
156      *                        see RowUtil for description of how these three 
157      *                        parameters work together to describe a fetched 
158      *                        "row".
159      *
160      * @param access_type     Describe the type of access the query will be
161      *                        performing to the ConglomerateController.  
162      *
163      *                        STORECOST_CLUSTERED - The location of one fetch
164      *                            is likely clustered "close" to the next 
165      *                            fetch.  For instance if the query plan were
166      *                            to sort the RowLocations of a heap and then
167      *                            use those RowLocations sequentially to 
168      *                            probe into the heap, then this flag should
169      *                            be specified.  If this flag is not set then
170      *                            access to the table is assumed to be
171      *                            random - ie. the type of access one gets 
172      *                            if you scan an index and probe each row
173      *                            in turn into the base table is "random".
174      *
175      *
176      * @return The cost of the fetch.
177      *
178      * @exception  StandardException  Standard exception policy.
179      *
180      * @see RowUtil
181      **/
182     public double getFetchFromRowLocationCost(
183     FormatableBitSet      validColumns,
184     int         access_type)
185         throws StandardException
186     {
187         double ret_cost;
188 
189         // get "per-byte" cost of fetching a row from the page.
190         ret_cost = row_size * BASE_ROW_PER_BYTECOST;
191 
192         long num_pages_per_row = (row_size / page_size) + 1;
193 
194         if ((access_type & StoreCostController.STORECOST_CLUSTERED) == 0)
195         {
196             // this is the "base" unit case.
197             ret_cost += (BASE_UNCACHED_ROW_FETCH_COST * num_pages_per_row);
198         }
199         else
200         {
201             ret_cost += (BASE_CACHED_ROW_FETCH_COST * num_pages_per_row);
202         }
203 
204         return(ret_cost);
205     }
206 
207     /**
208      * Calculate the cost of a scan.
209      * <p>
210      * Cause this object to calculate the cost of performing the described
211      * scan.  The interface is setup such that first a call is made to
212      * calcualteScanCost(), and then subsequent calls to accessor routines
213      * are made to get various pieces of information about the cost of
214      * the scan.
215      * <p>
216      * For the purposes of costing this routine is going to assume that 
217      * a page will remain in cache between the time one next()/fetchNext()
218      * call and a subsequent next()/fetchNext() call is made within a scan.
219      * <p>
220      * The result of costing the scan is placed in the "cost_result".  
221      * The cost of the scan is stored by calling 
222      * cost_result.setEstimatedCost(cost).
223      * The estimated row count is stored by calling 
224      * cost_result.setEstimatedRowCount(row_count).
225      * <p>
226      * The estimated cost of the scan assumes the caller will 
227      * execute a fetchNext() loop for every row that qualifies between
228      * start and stop position.  Note that this cost is different than
229      * execution a next(),fetch() loop; or if the scan is going to be
230      * terminated by client prior to reaching the stop condition.
231      * <p>
232      * The estimated number of rows returned from the scan 
233      * assumes the caller will execute a fetchNext() loop for every 
234      * row that qualifies between start and stop position.
235      * <p>
236      *
237      *
238      * @param scan_type       The type of scan that will be executed.  There
239      *                        are currently 2 types:
240      *                        STORECOST_SCAN_NORMAL - scans will be executed
241      *                        using the standard next/fetch, where each fetch
242      *                        can retrieve 1 or many rows (if fetchNextGroup()
243      *                        interface is used).
244      *
245      *                        STORECOST_SCAN_SET - The entire result set will
246      *                        be retrieved using the the fetchSet() interface.
247      *
248      * @param row_count       Estimated total row count of the table.  The 
249      *                        current system tracks row counts in heaps better
250      *                        than btree's (btree's have "rows" which are not
251      *                        user rows - branch rows, control rows), so 
252      *                        if available the client should
253      *                        pass in the base table's row count into this
254      *                        routine to be used as the index's row count.
255      *                        If the caller has no idea, pass in -1.
256      *
257      * @param group_size      The number of rows to be returned by a single
258      *                        fetch call for STORECOST_SCAN_NORMAL scans.
259      *
260      * @param forUpdate       Should be true if the caller intends to update 
261      *                        through the scan.
262      * 
263      * @param scanColumnList  A description of which columns to return from 
264      *                        every fetch in the scan.  template, 
265      *                        and scanColumnList work together
266      *                        to describe the row to be returned by the scan - 
267      *                        see RowUtil for description of how these three 
268      *                        parameters work together to describe a "row".
269      * 
270      * @param template        A prototypical row which the scan may use to
271      *                        maintain its position in the conglomerate.  Not 
272      *                        all access method scan types will require this, 
273      *                        if they don't it's ok to pass in null.
274      *                        In order to scan a conglomerate one must 
275      *                        allocate 2 separate "row" templates.  The "row" 
276      *                        template passed into openScan is for the private
277      *                        use of the scan itself, and no access to it
278      *                        should be made by the caller while the scan is 
279      *                        still open.  Because of this the scanner must 
280      *                        allocate another "row" template to hold the 
281      *                        values returned from fetch().  Note that this 
282      *                        template must be for the full row, whether a 
283      *                        partial row scan is being executed or not.
284      *
285      * @param startKeyValue   An indexable row which holds a (partial) key 
286      *                        value which, in combination with the 
287      *                        startSearchOperator, defines the starting 
288      *                        position of the scan.  If null, the starting
289      *                        position of the scan is the first row of the 
290      *                        conglomerate.  The startKeyValue must only
291      *                        reference columns included in the scanColumnList.
292      *
293      * @param startSearchOperator 
294      *                        an operator which defines how the startKeyValue
295      *                        is to be searched for.  If startSearchOperation 
296      *                        is ScanController.GE, the scan starts on the 
297      *                        first row which is greater than or equal to the 
298      *                        startKeyValue.  If startSearchOperation is 
299      *                        ScanController.GT, the scan starts on the first
300      *                        row whose key is greater than startKeyValue.  The
301      *                        startSearchOperation parameter is ignored if the
302      *                        startKeyValue parameter is null.
303      *
304      * @param stopKeyValue    An indexable row which holds a (partial) key 
305      *                        value which, in combination with the 
306      *                        stopSearchOperator, defines the ending position
307      *                        of the scan.  If null, the ending position of the
308      *                        scan is the last row of the conglomerate.  The
309      *                        stopKeyValue must only reference columns included
310      *                        in the scanColumnList.
311      *
312      * @param stopSearchOperator
313      *                        an operator which defines how the stopKeyValue
314      *                        is used to determine the scan stopping position. 
315      *                        If stopSearchOperation is ScanController.GE, the
316      *                        scan stops just before the first row which is
317      *                        greater than or equal to the stopKeyValue.  If 
318      *                        stopSearchOperation is ScanController.GT, the 
319      *                        scan stops just before the first row whose key 
320      *                        is greater than startKeyValue.  The
321      *                        stopSearchOperation parameter is ignored if the
322      *                        stopKeyValue parameter is null.
323      *
324      *                        
325      * @param access_type     Describe the type of access the query will be
326      *                        performing to the ScanController.  
327      *
328      *                        STORECOST_CLUSTERED - The location of one scan
329      *                            is likely clustered "close" to the previous 
330      *                            scan.  For instance if the query plan were
331      *                            to used repeated "reopenScan()'s" to probe
332      *                            for the next key in an index, then this flag
333      *                            should be be specified.  If this flag is not 
334      *                            set then each scan will be costed independant
335      *                            of any other predicted scan access.
336      *
337      *
338      * @exception  StandardException  Standard exception policy.
339      *
340      * @see RowUtil
341      **/
342     public void getScanCost(
343     int                     scan_type,
344     long                    row_count,
345     int                     group_size,
346     boolean                 forUpdate,
347     FormatableBitSet        scanColumnList,
348     DataValueDescriptor[]   template,
349     DataValueDescriptor[]   startKeyValue,
350     int                     startSearchOperator,
351     DataValueDescriptor[]   stopKeyValue,
352     int                     stopSearchOperator,
353     boolean                 reopen_scan,
354     int                     access_type,
355     StoreCostResult         cost_result)
356         throws StandardException
357     {
358         if (SanityManager.DEBUG)
359         {
360             SanityManager.ASSERT(
361                 scan_type == StoreCostController.STORECOST_SCAN_NORMAL ||
362                 scan_type == StoreCostController.STORECOST_SCAN_SET);
363         }
364 
365         long estimated_row_count = ((row_count < 0) ?  num_rows : row_count);
366 
367         // This cost is if the caller has to go in and out of access for
368         // every row in the table.  The cost will be significantly less if
369         // group fetch is used, or if qualifiers
370 
371         // first the base cost of bringing each page in from cache:
372         double cost = (num_pages * BASE_UNCACHED_ROW_FETCH_COST);
373 
374         // the cost associated with the number of bytes in each row:
375         cost += (estimated_row_count * row_size) * BASE_ROW_PER_BYTECOST;
376 
377         // the base cost of getting each of the rows from a page assumed
378         // to already be cached (by the scan fetch) - this is only for all
379         // rows after the initial row on the page has been accounted for
380         // under the BASE_UNCACHED_ROW_FETCH_COST cost.:
381         long cached_row_count = estimated_row_count - num_pages;
382         if (cached_row_count < 0)
383             cached_row_count = 0;
384 
385         if (scan_type == StoreCostController.STORECOST_SCAN_NORMAL)
386             cost += cached_row_count * BASE_GROUPSCAN_ROW_COST;
387         else
388             cost += cached_row_count * BASE_HASHSCAN_ROW_FETCH_COST;
389 
390         if (SanityManager.DEBUG)
391         {
392             SanityManager.ASSERT(cost >= 0);
393             SanityManager.ASSERT(estimated_row_count >= 0);
394         }
395 
396         cost_result.setEstimatedCost(cost);
397 
398         // return that all rows will be scanned.
399         cost_result.setEstimatedRowCount(estimated_row_count);
400 
401         return;
402     }
403 }
404
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags