KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > prefuse > util > DataLib


1 /**
2  * Copyright (c) 2004-2006 Regents of the University of California.
3  * See "license-prefuse.txt" for licensing terms.
4  */

5 package prefuse.util;
6
7 import java.util.Arrays JavaDoc;
8 import java.util.Comparator JavaDoc;
9 import java.util.HashMap JavaDoc;
10 import java.util.HashSet JavaDoc;
11 import java.util.Iterator JavaDoc;
12 import java.util.Map JavaDoc;
13
14 import prefuse.data.Table;
15 import prefuse.data.Tuple;
16 import prefuse.data.column.ColumnMetadata;
17 import prefuse.data.tuple.TupleSet;
18 import prefuse.util.collections.DefaultLiteralComparator;
19
20 /**
21  * Functions for processing an iterator of tuples, including the creation
22  * of arrays of particular tuple data values and summary
23  * statistics (min, max, median, mean, standard deviation).
24  *
25  * @author <a HREF="http://jheer.org">jeffrey heer</a>
26  */

27 public class DataLib {
28
29     /**
30      * Get an array containing all data values for a given tuple iteration
31      * and field.
32      * @param tuples an iterator over tuples
33      * @param field the column / data field name
34      * @return an array containing the data values
35      */

36     public static Object JavaDoc[] toArray(Iterator JavaDoc tuples, String JavaDoc field) {
37         Object JavaDoc[] array = new Object JavaDoc[100];
38         int i=0;
39         for ( ; tuples.hasNext(); ++i ) {
40             if ( i >= array.length )
41                 array = ArrayLib.resize(array, 3*array.length/2);
42             array[i] = ((Tuple)tuples.next()).get(field);
43         }
44         return ArrayLib.trim(array, i);
45     }
46
47     /**
48      * Get an array of doubles containing all column values for a given table
49      * and field. The {@link Table#canGetDouble(String)} method must return
50      * true for the given column name, otherwise an exception will be thrown.
51      * @param tuples an iterator over tuples
52      * @param field the column / data field name
53      * @return an array of doubles containing the column values
54      */

55     public static double[] toDoubleArray(Iterator JavaDoc tuples, String JavaDoc field) {
56         double[] array = new double[100];
57         int i=0;
58         for ( ; tuples.hasNext(); ++i ) {
59             if ( i >= array.length )
60                 array = ArrayLib.resize(array, 3*array.length/2);
61             array[i] = ((Tuple)tuples.next()).getDouble(field);
62         }
63         return ArrayLib.trim(array, i);
64     }
65
66     // ------------------------------------------------------------------------
67

68     /**
69      * Get a sorted array containing all column values for a given tuple
70      * iterator and field.
71      * @param tuples an iterator over tuples
72      * @param field the column / data field name
73      * @return an array containing the column values sorted
74      */

75     public static Object JavaDoc[] ordinalArray(Iterator JavaDoc tuples, String JavaDoc field) {
76         return DataLib.ordinalArray(tuples, field,
77                             DefaultLiteralComparator.getInstance());
78     }
79
80     /**
81      * Get a sorted array containing all column values for a given table and
82      * field.
83      * @param tuples an iterator over tuples
84      * @param field the column / data field name
85      * @param cmp a comparator for sorting the column contents
86      * @return an array containing the column values sorted
87      */

88     public static Object JavaDoc[] ordinalArray(Iterator JavaDoc tuples, String JavaDoc field,
89                                         Comparator JavaDoc cmp)
90     {
91         // get set of all unique values
92
HashSet JavaDoc set = new HashSet JavaDoc();
93         while ( tuples.hasNext() )
94             set.add(((Tuple)tuples.next()).get(field));
95         
96         // sort the unique values
97
Object JavaDoc[] o = set.toArray();
98         Arrays.sort(o, cmp);
99         return o;
100     }
101     
102     /**
103      * Get a sorted array containing all column values for a given tuple
104      * iterator and field.
105      * @param tuples a TupleSet
106      * @param field the column / data field name
107      * @return an array containing the column values sorted
108      */

109     public static Object JavaDoc[] ordinalArray(TupleSet tuples, String JavaDoc field) {
110         return ordinalArray(tuples, field,
111                             DefaultLiteralComparator.getInstance());
112     }
113
114     /**
115      * Get a sorted array containing all column values for a given table and
116      * field.
117      * @param tuples a TupleSet
118      * @param field the column / data field name
119      * @param cmp a comparator for sorting the column contents
120      * @return an array containing the column values sorted
121      */

122     public static Object JavaDoc[] ordinalArray(TupleSet tuples, String JavaDoc field,
123                                         Comparator JavaDoc cmp)
124     {
125         if ( tuples instanceof Table ) {
126             ColumnMetadata md = ((Table)tuples).getMetadata(field);
127             return md.getOrdinalArray();
128         } else {
129             return ordinalArray(tuples.tuples(), field, cmp);
130         }
131     }
132
133     // ------------------------------------------------------------------------
134

135     /**
136      * Get map mapping from column values (as Object instances) to their
137      * ordinal index in a sorted array.
138      * @param tuples an iterator over tuples
139      * @param field the column / data field name
140      * @return a map mapping column values to their position in a sorted
141      * order of values
142      */

143     public static Map JavaDoc ordinalMap(Iterator JavaDoc tuples, String JavaDoc field) {
144         return ordinalMap(tuples, field,
145                 DefaultLiteralComparator.getInstance());
146     }
147
148     /**
149      * Get map mapping from column values (as Object instances) to their
150      * ordinal index in a sorted array.
151      * @param tuples an iterator over tuples
152      * @param field the column / data field name
153      * @param cmp a comparator for sorting the column contents
154      * @return a map mapping column values to their position in a sorted
155      * order of values
156      */

157     public static Map JavaDoc ordinalMap(Iterator JavaDoc tuples, String JavaDoc field, Comparator JavaDoc cmp)
158     {
159         Object JavaDoc[] o = ordinalArray(tuples, field, cmp);
160         
161         // map the values to the non-negative numbers
162
HashMap JavaDoc map = new HashMap JavaDoc();
163         for ( int i=0; i<o.length; ++i )
164             map.put(o[i], new Integer JavaDoc(i));
165         return map;
166     }
167     
168     /**
169      * Get map mapping from column values (as Object instances) to their
170      * ordinal index in a sorted array.
171      * @param tuples a TupleSet
172      * @param field the column / data field name
173      * @return a map mapping column values to their position in a sorted
174      * order of values
175      */

176     public static Map JavaDoc ordinalMap(TupleSet tuples, String JavaDoc field) {
177         return ordinalMap(tuples, field,
178                           DefaultLiteralComparator.getInstance());
179     }
180
181     /**
182      * Get map mapping from column values (as Object instances) to their
183      * ordinal index in a sorted array.
184      * @param tuples a TupleSet
185      * @param field the column / data field name
186      * @param cmp a comparator for sorting the column contents
187      * @return a map mapping column values to their position in a sorted
188      * order of values
189      */

190     public static Map JavaDoc ordinalMap(TupleSet tuples, String JavaDoc field, Comparator JavaDoc cmp)
191     {
192         if ( tuples instanceof Table ) {
193             ColumnMetadata md = ((Table)tuples).getMetadata(field);
194             return md.getOrdinalMap();
195         } else {
196             return ordinalMap(tuples.tuples(), field, cmp);
197         }
198     }
199
200     // ------------------------------------------------------------------------
201

202     /**
203      * Get the number of values in a data column. Duplicates will be counted.
204      * @param tuples an iterator over tuples
205      * @param field the column / data field name
206      * @return the number of values
207      */

208     public static int count(Iterator JavaDoc tuples, String JavaDoc field) {
209         int i = 0;
210         for ( ; tuples.hasNext(); ++i, tuples.next() );
211         return i;
212     }
213
214     /**
215      * Get the number of distinct values in a data column.
216      * @param tuples an iterator over tuples
217      * @param field the column / data field name
218      * @return the number of distinct values
219      */

220     public static int uniqueCount(Iterator JavaDoc tuples, String JavaDoc field) {
221         HashSet JavaDoc set = new HashSet JavaDoc();
222         while ( tuples.hasNext() )
223             set.add(((Tuple)tuples.next()).get(field));
224         return set.size();
225     }
226
227     // ------------------------------------------------------------------------
228

229     /**
230      * Get the Tuple with the minimum data field value.
231      * @param tuples an iterator over tuples
232      * @param field the column / data field name
233      * @return the Tuple with the minimum data field value
234      */

235     public static Tuple min(Iterator JavaDoc tuples, String JavaDoc field) {
236         return min(tuples, field, DefaultLiteralComparator.getInstance());
237     }
238
239     /**
240      * Get the Tuple with the minimum data field value.
241      * @param tuples an iterator over tuples
242      * @param field the column / data field name
243      * @param cmp a comparator for sorting the column contents
244      * @return the Tuple with the minimum data field value
245      */

246     public static Tuple min(Iterator JavaDoc tuples, String JavaDoc field, Comparator JavaDoc cmp) {
247         Tuple t = null, tmp;
248         Object JavaDoc min = null;
249         if ( tuples.hasNext() ) {
250             t = (Tuple)tuples.next();
251             min = t.get(field);
252         }
253         while ( tuples.hasNext() ) {
254             tmp = (Tuple)tuples.next();
255             Object JavaDoc obj = tmp.get(field);
256             if ( cmp.compare(obj,min) < 0 ) {
257                 t = tmp;
258                 min = obj;
259             }
260         }
261         return t;
262     }
263
264     /**
265      * Get the Tuple with the minimum data field value.
266      * @param tuples a TupleSet
267      * @param field the column / data field name
268      * @return the Tuple with the minimum data field value
269      */

270     public static Tuple min(TupleSet tuples, String JavaDoc field, Comparator JavaDoc cmp) {
271         if ( tuples instanceof Table ) {
272             Table table = (Table)tuples;
273             ColumnMetadata md = table.getMetadata(field);
274             return table.getTuple(md.getMinimumRow());
275         } else {
276             return min(tuples.tuples(), field, cmp);
277         }
278     }
279     
280     /**
281      * Get the Tuple with the minimum data field value.
282      * @param tuples a TupleSet
283      * @param field the column / data field name
284      * @return the Tuple with the minimum data field value
285      */

286     public static Tuple min(TupleSet tuples, String JavaDoc field) {
287         return min(tuples, field, DefaultLiteralComparator.getInstance());
288     }
289     
290     // ------------------------------------------------------------------------
291

292     /**
293      * Get the Tuple with the maximum data field value.
294      * @param tuples an iterator over tuples
295      * @param field the column / data field name
296      * @return the Tuple with the maximum data field value
297      */

298     public static Tuple max(Iterator JavaDoc tuples, String JavaDoc field) {
299         return max(tuples, field, DefaultLiteralComparator.getInstance());
300     }
301
302     /**
303      * Get the Tuple with the maximum data field value.
304      * @param tuples an iterator over tuples
305      * @param field the column / data field name
306      * @param cmp a comparator for sorting the column contents
307      * @return the Tuple with the maximum data field value
308      */

309     public static Tuple max(Iterator JavaDoc tuples, String JavaDoc field, Comparator JavaDoc cmp) {
310         Tuple t = null, tmp;
311         Object JavaDoc min = null;
312         if ( tuples.hasNext() ) {
313             t = (Tuple)tuples.next();
314             min = t.get(field);
315         }
316         while ( tuples.hasNext() ) {
317             tmp = (Tuple)tuples.next();
318             Object JavaDoc obj = tmp.get(field);
319             if ( cmp.compare(obj,min) > 0 ) {
320                 t = tmp;
321                 min = obj;
322             }
323         }
324         return t;
325     }
326
327     /**
328      * Get the Tuple with the maximum data field value.
329      * @param tuples a TupleSet
330      * @param field the column / data field name
331      * @return the Tuple with the maximum data field value
332      */

333     public static Tuple max(TupleSet tuples, String JavaDoc field, Comparator JavaDoc cmp) {
334         if ( tuples instanceof Table ) {
335             Table table = (Table)tuples;
336             ColumnMetadata md = table.getMetadata(field);
337             return table.getTuple(md.getMaximumRow());
338         } else {
339             return max(tuples.tuples(), field, cmp);
340         }
341     }
342     
343     /**
344      * Get the Tuple with the maximum data field value.
345      * @param tuples a TupleSet
346      * @param field the column / data field name
347      * @return the Tuple with the maximum data field value
348      */

349     public static Tuple max(TupleSet tuples, String JavaDoc field) {
350         return max(tuples, field, DefaultLiteralComparator.getInstance());
351     }
352     
353     // ------------------------------------------------------------------------
354

355     /**
356      * Get the Tuple with the median data field value.
357      * @param tuples an iterator over tuples
358      * @param field the column / data field name
359      * @return the Tuple with the median data field value
360      */

361     public static Tuple median(Iterator JavaDoc tuples, String JavaDoc field) {
362         return median(tuples, field, DefaultLiteralComparator.getInstance());
363     }
364
365     /**
366      * Get the Tuple with the median data field value.
367      * @param tuples an iterator over tuples
368      * @param field the column / data field name
369      * @param cmp a comparator for sorting the column contents
370      * @return the Tuple with the median data field value
371      */

372     public static Tuple median(Iterator JavaDoc tuples, String JavaDoc field, Comparator JavaDoc cmp) {
373         Object JavaDoc[] t = new Tuple[100];
374         int i=0;
375         for ( ; tuples.hasNext(); ++i ) {
376             if ( i >= t.length )
377                 t = ArrayLib.resize(t, 3*t.length/2);
378             t[i] = (Tuple)tuples.next();
379         }
380         ArrayLib.trim(t, i);
381         
382         Object JavaDoc[] v = new Object JavaDoc[t.length];
383         int[] idx = new int[t.length];
384         for ( i=0; i<t.length; ++i ) {
385             idx[i] = i;
386             v[i] = ((Tuple)t[i]).get(field);
387         }
388     
389         ArrayLib.sort(v, idx, cmp);
390         return (Tuple)t[idx[idx.length/2]];
391     }
392
393     /**
394      * Get the Tuple with the median data field value.
395      * @param tuples a TupleSet
396      * @param field the column / data field name
397      * @return the Tuple with the median data field value
398      */

399     public static Tuple median(TupleSet tuples, String JavaDoc field, Comparator JavaDoc cmp) {
400         if ( tuples instanceof Table ) {
401             Table table = (Table)tuples;
402             ColumnMetadata md = table.getMetadata(field);
403             return table.getTuple(md.getMedianRow());
404         } else {
405             return median(tuples.tuples(), field, cmp);
406         }
407     }
408     
409     /**
410      * Get the Tuple with the median data field value.
411      * @param tuples a TupleSet
412      * @param field the column / data field name
413      * @return the Tuple with the median data field value
414      */

415     public static Tuple median(TupleSet tuples, String JavaDoc field) {
416         return median(tuples, field, DefaultLiteralComparator.getInstance());
417     }
418     
419     // ------------------------------------------------------------------------
420

421     /**
422      * Get the mean value of a tuple data value. If any tuple does not have the
423      * named field or the field is not a numeric data type, NaN will be returned.
424      * @param tuples an iterator over tuples
425      * @param field the column / data field name
426      * @return the mean value, or NaN if a non-numeric data type is encountered
427      */

428     public static double mean(Iterator JavaDoc tuples, String JavaDoc field) {
429         try {
430             int count = 0;
431             double sum = 0;
432             
433             while ( tuples.hasNext() ) {
434                 sum += ((Tuple)tuples.next()).getDouble(field);
435                 ++count;
436             }
437             return sum/count;
438         } catch ( Exception JavaDoc e ) {
439             return Double.NaN;
440         }
441     }
442     
443     /**
444      * Get the standard deviation of a tuple data value. If any tuple does not
445      * have the named field or the field is not a numeric data type, NaN will be
446      * returned.
447      * @param tuples an iterator over tuples
448      * @param field the column / data field name
449      * @return the standard deviation value, or NaN if a non-numeric data type
450      * is encountered
451      */

452     public static double deviation(Iterator JavaDoc tuples, String JavaDoc field) {
453         return deviation(tuples, field, DataLib.mean(tuples, field));
454     }
455     
456     /**
457      * Get the standard deviation of a tuple data value. If any tuple does not
458      * have the named field or the field is not a numeric data type, NaN will be
459      * returned.
460      * @param tuples an iterator over tuples
461      * @param field the column / data field name
462      * @param mean the mean of the column, used to speed up accurate
463      * deviation calculation
464      * @return the standard deviation value, or NaN if a non-numeric data type
465      * is encountered
466      */

467     public static double deviation(Iterator JavaDoc tuples, String JavaDoc field, double mean) {
468         try {
469             int count = 0;
470             double sumsq = 0;
471             double x;
472             
473             while ( tuples.hasNext() ) {
474                 x = ((Tuple)tuples.next()).getDouble(field) - mean;
475                 sumsq += x*x;
476                 ++count;
477             }
478             return Math.sqrt(sumsq/count);
479         } catch ( Exception JavaDoc e ) {
480             return Double.NaN;
481         }
482     }
483
484     /**
485      * Get the sum of a tuple data value. If any tuple does not have the named
486      * field or the field is not a numeric data type, NaN will be returned.
487      * @param tuples an iterator over tuples
488      * @param field the column / data field name
489      * @return the sum, or NaN if a non-numeric data type is encountered
490      */

491     public static double sum(Iterator JavaDoc tuples, String JavaDoc field) {
492         try {
493             double sum = 0;
494             
495             while ( tuples.hasNext() ) {
496                 sum += ((Tuple)tuples.next()).getDouble(field);
497             }
498             return sum;
499         } catch ( Exception JavaDoc e ) {
500             return Double.NaN;
501         }
502     }
503
504     // ------------------------------------------------------------------------
505

506     /**
507      * Infer the data field type across all tuples in a TupleSet.
508      * @param tuples the TupleSet to analyze
509      * @param field the data field to type check
510      * @return the inferred data type
511      * @throws IllegalArgumentException if incompatible types are used
512      */

513     public static Class JavaDoc inferType(TupleSet tuples, String JavaDoc field) {
514         if ( tuples instanceof Table ) {
515             return ((Table)tuples).getColumnType(field);
516         } else {
517             Class JavaDoc type = null, type2 = null;
518             Iterator JavaDoc iter = tuples.tuples();
519             while ( iter.hasNext() ) {
520                 Tuple t = (Tuple)iter.next();
521                 if ( type == null ) {
522                     type = t.getColumnType(field);
523                 } else if ( !type.equals(type2=t.getColumnType(field)) ) {
524                     if ( type2.isAssignableFrom(type) ) {
525                         type = type2;
526                     } else if ( !type.isAssignableFrom(type2) ) {
527                         throw new IllegalArgumentException JavaDoc(
528                            "The data field ["+field+"] does not have " +
529                            "a consistent type across provided Tuples");
530                     }
531                 }
532             }
533             return type;
534         }
535     }
536     
537 } // end of class DataLib
538
Popular Tags