KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > derby > impl > store > access > sort > ExternalSortFactory


1 /*
2
3    Derby - Class org.apache.derby.impl.store.access.sort.ExternalSortFactory
4
5    Licensed to the Apache Software Foundation (ASF) under one or more
6    contributor license agreements. See the NOTICE file distributed with
7    this work for additional information regarding copyright ownership.
8    The ASF licenses this file to you under the Apache License, Version 2.0
9    (the "License"); you may not use this file except in compliance with
10    the License. You may obtain a copy of the License at
11
12       http://www.apache.org/licenses/LICENSE-2.0
13
14    Unless required by applicable law or agreed to in writing, software
15    distributed under the License is distributed on an "AS IS" BASIS,
16    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17    See the License for the specific language governing permissions and
18    limitations under the License.
19
20  */

21
22 package org.apache.derby.impl.store.access.sort;
23
24 import java.util.Properties JavaDoc;
25
26 import org.apache.derby.iapi.services.monitor.ModuleControl;
27 import org.apache.derby.iapi.services.monitor.ModuleSupportable;
28 import org.apache.derby.iapi.services.monitor.Monitor;
29 import org.apache.derby.iapi.services.property.PropertyUtil;
30 import org.apache.derby.iapi.services.sanity.SanityManager;
31
32 import org.apache.derby.iapi.error.StandardException;
33
34 import org.apache.derby.iapi.store.access.conglomerate.MethodFactory;
35 import org.apache.derby.iapi.store.access.conglomerate.Sort;
36 import org.apache.derby.iapi.store.access.conglomerate.SortFactory;
37
38 import org.apache.derby.iapi.store.access.SortObserver;
39 import org.apache.derby.iapi.store.access.SortCostController;
40 import org.apache.derby.iapi.store.access.ColumnOrdering;
41 import org.apache.derby.iapi.store.access.TransactionController;
42
43 import org.apache.derby.iapi.types.DataValueDescriptor;
44
45 import org.apache.derby.iapi.services.uuid.UUIDFactory;
46
47 import org.apache.derby.catalog.UUID;
48
49 /**
50
51 **/

52
53 public class ExternalSortFactory implements
54     SortFactory, ModuleControl, ModuleSupportable, SortCostController
55 {
56
57     private boolean userSpecified; // did the user specify sortBufferMax
58
private int defaultSortBufferMax;
59     private int sortBufferMax;
60
61     private static final String JavaDoc IMPLEMENTATIONID = "sort external";
62     private static final String JavaDoc FORMATUUIDSTRING = "D2976090-D9F5-11d0-B54D-00A024BF8879";
63     private UUID formatUUID = null;
64     private static final int DEFAULT_SORTBUFFERMAX = 1024;
65     private static final int MINIMUM_SORTBUFFERMAX = 4;
66
67     protected static final int DEFAULT_MEM_USE = 1024*1024; // aim for about 1Meg
68
// how many sort runs to combined into a larger sort run
69
// (DERBY-1661)
70
protected static final int DEFAULT_MAX_MERGE_RUN = 512;
71
72     // sizeof Node + reference to Node + 12 bytes tax
73
private static final int SORT_ROW_OVERHEAD = 8*4+12;
74
75
76     /*
77     ** Methods of MethodFactory
78     */

79
80     /**
81     There are no default properties for the external sort..
82     @see MethodFactory#defaultProperties
83     **/

84     public Properties JavaDoc defaultProperties()
85     {
86         return new Properties JavaDoc();
87     }
88
89     /**
90     @see MethodFactory#supportsImplementation
91     **/

92     public boolean supportsImplementation(String JavaDoc implementationId)
93     {
94         return implementationId.equals(IMPLEMENTATIONID);
95     }
96
97     /**
98     @see MethodFactory#primaryImplementationType
99     **/

100     public String JavaDoc primaryImplementationType()
101     {
102         return IMPLEMENTATIONID;
103     }
104
105     /**
106     @see MethodFactory#supportsFormat
107     **/

108     public boolean supportsFormat(UUID formatid)
109     {
110         return formatid.equals(formatUUID);
111     }
112
113     /**
114     @see MethodFactory#primaryFormat
115     **/

116     public UUID primaryFormat()
117     {
118         return formatUUID;
119     }
120
121     /*
122     ** Methods of SortFactory
123     */

124
125     /**
126     Create a sort.
127     This method could choose among different sort options,
128     depending on the properties etc., but currently it always
129     returns a merge sort.
130     @see SortFactory#createSort
131     **/

132     public Sort createSort(
133     TransactionController tran,
134     int segment,
135     Properties JavaDoc implParameters,
136     DataValueDescriptor[] template,
137     ColumnOrdering columnOrdering[],
138     SortObserver sortObserver,
139     boolean alreadyInOrder,
140     long estimatedRows,
141     int estimatedRowSize)
142         throws StandardException
143     {
144         MergeSort sort = new MergeSort();
145
146         // RESOLVE - mikem change this to use estimatedRows and
147
// estimatedRowSize to come up with a smarter number for sortBufferMax
148
// than a fixed number of rows. At least 2 possibilities:
149
// 1) add sortBufferMaxMem which would be the amount of memory
150
// the sorter could use, and then just pick the number of
151
// rows as (sortBufferMaxMem / (estimatedRows * estimatedRowSize)
152
// 2) add sortBufferUsePercentFree. This would be how much of
153
// the current free memory can the current sort use.
154
//
155

156         if (!userSpecified)
157         {
158             // derby.storage.sortBufferMax is not specified by the
159
// user, use default or try to figure out a reasonable sort
160
// size.
161

162             // if we have some idea on row size, set sort approx 1 meg of
163
// memory sort.
164
if (estimatedRowSize > 0)
165             {
166                 //
167
// for each column, there is a reference from the key array and
168
// the 4 bytes reference to the column object plus 12 bytes
169
// tax on the column object
170
// for each row, SORT_ROW_OVERHEAD is the Node and 4 bytes to
171
// point to the column array and 4 for alignment
172
//
173
estimatedRowSize += SORT_ROW_OVERHEAD +
174                     (template.length*(4+12)) + 8;
175                 sortBufferMax = DEFAULT_MEM_USE/estimatedRowSize;
176             }
177             else
178             {
179                 sortBufferMax = defaultSortBufferMax;
180             }
181             
182             // if there are barely more rows than sortBufferMax, use 2
183
// smaller runs of similar size instead of one larger run
184
//
185
// 10% slush is added to estimated Rows to catch the case where
186
// estimated rows underestimate the actual number of rows by 10%.
187
//
188
if (estimatedRows > sortBufferMax &&
189                 (estimatedRows*1.1) < sortBufferMax*2)
190                 sortBufferMax = (int)(estimatedRows/2 + estimatedRows/10);
191
192             // Make sure it is at least the minimum sort buffer size
193
if (sortBufferMax < MINIMUM_SORTBUFFERMAX)
194                 sortBufferMax = MINIMUM_SORTBUFFERMAX;
195         }
196         else
197         {
198             // if user specified derby.storage.sortBufferMax, use it.
199
sortBufferMax = defaultSortBufferMax;
200         }
201
202         if (SanityManager.DEBUG)
203         {
204             if (SanityManager.DEBUG_ON("SortTuning"))
205             {
206                 SanityManager.DEBUG("SortTuning",
207                     "sortBufferMax = " + sortBufferMax +
208                     " estimatedRows = " + estimatedRows +
209                     " estimatedRowSize = " + estimatedRowSize +
210                     " defaultSortBufferMax = " + defaultSortBufferMax);
211             }
212         }
213
214         sort.initialize(
215             template, columnOrdering, sortObserver,
216             alreadyInOrder, estimatedRows, sortBufferMax);
217         return sort;
218     }
219
220     /**
221      * Return an open SortCostController.
222      * <p>
223      * Return an open SortCostController which can be used to ask about
224      * the estimated costs of SortController() operations.
225      * <p>
226      *
227      * @return The open SortCostController.
228      *
229      * @exception StandardException Standard exception policy.
230      *
231      * @see SortCostController
232      **/

233     public SortCostController openSortCostController()
234         throws StandardException
235     {
236         return(this);
237     }
238
239     /*
240     ** Methods of SortCostController
241     */

242
243     public void close()
244     {
245         // nothing to do.
246
}
247
248     /**
249      * Short one line description of routine.
250      * <p>
251      * The sort algorithm is a N * log(N) algorithm. The following numbers
252      * on a PII, 400 MHZ machine, jdk117 with jit, insane.zip. This test
253      * is a simple "select * from table order by first_int_column. I then
254      * subtracted the time it takes to do "select * from table" from the
255      * result.
256      *
257      * number of rows elaspsed time in seconds
258      * -------------- -----------------------------
259      * 1000 0.20
260      * 10000 10.5
261      * 100000 80.0
262      *
263      * We assume that the formula for sort performance is of the form:
264      * performance = K * N * log(N). Solving the equation for the 1000
265      * and 100000 case we come up with:
266      *
267      * performance = 1 + 0.08 N ln(n)
268      *
269      * NOTE: Apparently, these measurements were done on a faster machine
270      * than was used for other performance measurements used by the optimizer.
271      * Experiments show that the 0.8 multiplier is off by a factor of 4
272      * with respect to other measurements (such as the time it takes to
273      * scan a conglomerate). I am correcting the formula to use 0.32
274      * rather than 0.08.
275      *
276      * - Jeff
277      *
278      * <p>
279      * RESOLVE (mikem) - this formula is very crude at the moment and will be
280      * refined later. known problems:
281      * 1) internal vs. external sort - we know that the performance of sort
282      * is discontinuous when we go from an internal to an external sort.
283      * A better model is probably a different set of contants for internal
284      * vs. external sort and some way to guess when this is going to happen.
285      * 2) current row size is never considered but is critical to performance.
286      * 3) estimatedExportRows is not used. This is a critical number to know
287      * if an internal vs. an external sort will happen.
288      *
289      * <p>
290      *
291      * @return The identifier to be used to open the conglomerate later.
292      *
293      *
294      * @exception StandardException Standard exception policy.
295      **/

296     public double getSortCost(
297     DataValueDescriptor[] template,
298     ColumnOrdering columnOrdering[],
299     boolean alreadyInOrder,
300     long estimatedInputRows,
301     long estimatedExportRows,
302     int estimatedRowSize)
303         throws StandardException
304     {
305         /* Avoid taking the log of 0 */
306         if (estimatedInputRows == 0)
307             return 0.0;
308
309         // RESOLVE - come up with some real benchmark. For now the cost
310
// of sort is 3 times the cost of scanning the data.
311

312         if (SanityManager.DEBUG)
313         {
314             SanityManager.ASSERT(estimatedInputRows >= 0);
315             SanityManager.ASSERT(estimatedExportRows >= 0);
316         }
317
318         double ret_val =
319             1 +
320             ((0.32) * (estimatedInputRows) * Math.log(estimatedInputRows));
321
322         return(ret_val);
323     }
324
325     /*
326     ** Methods of ModuleControl.
327     */

328
329     public boolean canSupport(Properties JavaDoc startParams) {
330
331         if (startParams == null)
332             return false;
333
334         String JavaDoc impl = startParams.getProperty("derby.access.Conglomerate.type");
335         if (impl == null)
336             return false;
337
338         return supportsImplementation(impl);
339     }
340
341
342     public void boot(boolean create, Properties JavaDoc startParams)
343         throws StandardException
344     {
345         // Find the UUID factory.
346
UUIDFactory uuidFactory = Monitor.getMonitor().getUUIDFactory();
347
348         // Make a UUID that identifies this sort's format.
349
formatUUID = uuidFactory.recreateUUID(FORMATUUIDSTRING);
350
351         // See if there's a new maximum sort buffer size.
352
defaultSortBufferMax = PropertyUtil.getSystemInt("derby.storage.sortBufferMax",
353                                 0, Integer.MAX_VALUE, 0);
354
355         // if defaultSortBufferMax is 0, the user did not specify
356
// sortBufferMax, then just set it to DEFAULT_SORTBUFFERMAX.
357
// if defaultSortBufferMax is not 0, the user specified sortBufferMax,
358
// do not override it.
359
if (defaultSortBufferMax == 0)
360         {
361             userSpecified = false;
362             defaultSortBufferMax = DEFAULT_SORTBUFFERMAX;
363         }
364         else
365         {
366             userSpecified = true;
367             if (defaultSortBufferMax < MINIMUM_SORTBUFFERMAX)
368                 defaultSortBufferMax = MINIMUM_SORTBUFFERMAX;
369         }
370
371     }
372
373     public void stop()
374     {
375     }
376
377 }
378
Popular Tags