KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > hsqldb > lib > ArrayCounter


1 /* Copyright (c) 2001-2005, The HSQL Development Group
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * Redistributions of source code must retain the above copyright notice, this
8  * list of conditions and the following disclaimer.
9  *
10  * Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  *
14  * Neither the name of the HSQL Development Group nor the names of its
15  * contributors may be used to endorse or promote products derived from this
16  * software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG,
22  * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
26  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */

30
31
32 package org.hsqldb.lib;
33
34 /**
35  * Collection of routines for counting the distribution of the values
36  * in an int[] array.
37  *
38  * @author fredt@users
39  * @version 1.7.2
40  * @since 1.7.2
41  */

42 public class ArrayCounter {
43
44     /**
45      * Returns an int[] array of length segments containing the distribution
46      * count of the elements in unsorted int[] array with values between min
47      * and max (range). Values outside the min-max reange are ignored<p>
48      *
49      * A usage example is determining the count of people of each age group
50      * in a large int[] array containing the age of each person. Called with
51      * (array, 16,0,79), it will return an int[16] with the first element
52      * the count of people aged 0-4, the second element the count of those
53      * aged 5-9, and so on. People above the age of 79 are excluded. If the
54      * range is not a multiple of segments, the last segment will be cover a
55      * smaller sub-range than the rest.
56      *
57      */

58     public static int[] countSegments(int[] array, int elements,
59                                       int segments, int start, int limit) {
60
61         int[] counts = new int[segments];
62         long interval = calcInterval(segments, start, limit);
63         int index = 0;
64         int element = 0;
65
66         if (interval <= 0) {
67             return counts;
68         }
69
70         for (int i = 0; i < elements; i++) {
71             element = array[i];
72
73             if (element < start || element >= limit) {
74                 continue;
75             }
76
77             index = (int) ((element - start) / interval);
78
79             counts[index]++;
80         }
81
82         return counts;
83     }
84
85     /**
86      * With an unsorted int[] array and with target a positive integer in the
87      * range (1,array.length), finds the value in the range (start,limit) of the
88      * largest element (rank) where the count of all smaller elements in that
89      * range is less than or equals target. Parameter margin indicates the
90      * margin of error in target<p>
91      *
92      * In statistics, this can be used to calculate a median or quadrile value.
93      * A usage example applied to an array of age values is to determine
94      * the maximum age of a given number of people. With the example array
95      * given in countSegments, rank(array, c, 6000, 18, 65, 0) will return an age
96      * value between 18-64 (inclusive) and the count of all people aged between
97      * 18 and the returned value(exclusive) will be less than or equal
98      * 6000.
99      *
100      */

101     public static int rank(int[] array, int elements, int target, int start,
102                            int limit, int margin) {
103
104         final int segments = 256;
105         int elementCount = 0;
106         int currentLimit = limit;
107
108         for (;;) {
109             long interval = calcInterval(segments, start, currentLimit);
110             int[] counts = countSegments(array, elements, segments, start,
111                                          currentLimit);
112
113             for (int i = 0; i < counts.length; i++) {
114                 if (elementCount + counts[i] < target) {
115                     elementCount += counts[i];
116                     start += interval;
117                 } else {
118                     break;
119                 }
120             }
121
122             if (elementCount + margin >= target) {
123                 return start;
124             }
125
126             if (interval <= 1) {
127                 return start;
128             }
129
130             currentLimit = start + interval < limit ? (int) (start + interval)
131                                                     : limit;
132         }
133     }
134
135     /**
136      * Helper method to calculate the span of the sub-interval. Simply returns
137      * the cieling of ((limit - start) / segments) and accounts for invalid
138      * start and limit combinations.
139      */

140     static long calcInterval(int segments, int start, int limit) {
141
142         long range = limit - start;
143
144         if (range < 0) {
145             return 0;
146         }
147
148         int partSegment = (range % segments) == 0 ? 0
149                                                   : 1;
150
151         return (range / segments) + partSegment;
152     }
153 }
154
Popular Tags