KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > commons > math > random > EmpiricalDistributionImpl


1 /*
2  * Copyright 2003-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.commons.math.random;
18
19 import java.io.Serializable JavaDoc;
20 import java.io.BufferedReader JavaDoc;
21 import java.io.FileReader JavaDoc;
22 import java.io.File JavaDoc;
23 import java.io.IOException JavaDoc;
24 import java.io.InputStreamReader JavaDoc;
25 import java.net.URL JavaDoc;
26 import java.util.ArrayList JavaDoc;
27 import java.util.List JavaDoc;
28
29 import org.apache.commons.math.stat.descriptive.SummaryStatistics;
30 import org.apache.commons.math.stat.descriptive.StatisticalSummary;
31
32 /**
33  * Implements <code>EmpiricalDistribution</code> interface. This implementation
34  * uses what amounts to the
35  * <a HREF="http://nedwww.ipac.caltech.edu/level5/March02/Silverman/Silver2_6.html">
36  * Variable Kernel Method</a> with Gaussian smoothing:<p>
37  * <strong>Digesting the input file</strong>
38  * <ol><li>Pass the file once to compute min and max.</li>
39  * <li>Divide the range from min-max into <code>binCount</code> "bins."</li>
40  * <li>Pass the data file again, computing bin counts and univariate
41  * statistics (mean, std dev.) for each of the bins </li>
42  * <li>Divide the interval (0,1) into subintervals associated with the bins,
43  * with the length of a bin's subinterval proportional to its count.</li></ol>
44  * <strong>Generating random values from the distribution</strong><ol>
45  * <li>Generate a uniformly distributed value in (0,1) </li>
46  * <li>Select the subinterval to which the value belongs.
47  * <li>Generate a random Gaussian value with mean = mean of the associated
48  * bin and std dev = std dev of associated bin.</li></ol></p><p>
49  *<strong>USAGE NOTES:</strong><ul>
50  *<li>The <code>binCount</code> is set by default to 1000. A good rule of thumb
51  * is to set the bin count to approximately the length of the input file divided
52  * by 10. </li>
53  *<li>The input file <i>must</i> be a plain text file containing one valid numeric
54  * entry per line.</li>
55  * </ul></p>
56  *
57  * @version $Revision$ $Date: 2005-02-26 05:11:52 -0800 (Sat, 26 Feb 2005) $
58  */

59 public class EmpiricalDistributionImpl implements Serializable JavaDoc, EmpiricalDistribution {
60
61     /** Serializable version identifier */
62     static final long serialVersionUID = -6773236347582113490L;
63
64     /** List of SummaryStatistics objects characterizing the bins */
65     private ArrayList JavaDoc binStats = null;
66
67     /** Sample statistics */
68     SummaryStatistics sampleStats = null;
69
70     /** number of bins */
71     private int binCount = 1000;
72
73     /** is the distribution loaded? */
74     private boolean loaded = false;
75
76     /** upper bounds of subintervals in (0,1) "belonging" to the bins */
77     private double[] upperBounds = null;
78
79     /** RandomData instance to use in repeated calls to getNext() */
80     private RandomData randomData = new RandomDataImpl();
81
82     /**
83      * Creates a new EmpiricalDistribution with the default bin count.
84      */

85     public EmpiricalDistributionImpl() {
86         binStats = new ArrayList JavaDoc();
87     }
88
89     /**
90      * Creates a new EmpiricalDistribution with the specified bin count.
91      *
92      * @param binCount number of bins
93      */

94     public EmpiricalDistributionImpl(int binCount) {
95         this.binCount = binCount;
96         binStats = new ArrayList JavaDoc();
97     }
98
99      /**
100      * Computes the empirical distribution from the provided
101      * array of numbers.
102      *
103      * @param in the input data array
104      */

105     public void load(double[] in) {
106         DataAdapter da = new ArrayDataAdapter(in);
107         try {
108             da.computeStats();
109             fillBinStats(in);
110         } catch (Exception JavaDoc e) {
111             throw new RuntimeException JavaDoc(e.getMessage());
112         }
113         loaded = true;
114
115     }
116
117     /**
118      * Computes the empirical distribution using data read from a URL.
119      * @param url url of the input file
120      *
121      * @throws IOException if an IO error occurs
122      */

123     public void load(URL JavaDoc url) throws IOException JavaDoc {
124         BufferedReader JavaDoc in =
125             new BufferedReader JavaDoc(new InputStreamReader JavaDoc(url.openStream()));
126         try {
127             DataAdapter da = new StreamDataAdapter(in);
128             try {
129                 da.computeStats();
130             } catch (Exception JavaDoc e) {
131                 throw new IOException JavaDoc(e.getMessage());
132             }
133             in = new BufferedReader JavaDoc(new InputStreamReader JavaDoc(url.openStream()));
134             fillBinStats(in);
135             loaded = true;
136         } finally {
137            if (in != null) {
138                try {
139                    in.close();
140                } catch (Exception JavaDoc ex) {
141                    // ignore
142
}
143            }
144         }
145     }
146
147     /**
148      * Computes the empirical distribution from the input file.
149      *
150      * @param file the input file
151      * @throws IOException if an IO error occurs
152      */

153     public void load(File JavaDoc file) throws IOException JavaDoc {
154         BufferedReader JavaDoc in = new BufferedReader JavaDoc(new FileReader JavaDoc(file));
155         try {
156             DataAdapter da = new StreamDataAdapter(in);
157             try {
158                 da.computeStats();
159             } catch (Exception JavaDoc e) {
160                 throw new IOException JavaDoc(e.getMessage());
161             }
162             in = new BufferedReader JavaDoc(new FileReader JavaDoc(file));
163             fillBinStats(in);
164             loaded = true;
165         } finally {
166             if (in != null) {
167                 try {
168                     in.close();
169                 } catch (Exception JavaDoc ex) {
170                     // ignore
171
}
172             }
173         }
174     }
175
176     /**
177      * Provides methods for computing <code>sampleStats</code> and
178      * <code>beanStats</code> abstracting the source of data.
179      */

180     private abstract class DataAdapter{
181         /**
182          * Compute bin stats.
183          *
184          * @param min minimum value
185          * @param delta grid size
186          * @throws Exception if an error occurs computing bin stats
187          */

188         public abstract void computeBinStats(double min, double delta)
189                 throws Exception JavaDoc;
190         /**
191          * Compute sample statistics.
192          *
193          * @throws Exception if an error occurs computing sample stats
194          */

195         public abstract void computeStats() throws Exception JavaDoc;
196     }
197     /**
198      * Factory of <code>DataAdapter</code> objects. For every supported source
199      * of data (array of doubles, file, etc.) an instance of the proper object
200      * is returned.
201      */

202     private class DataAdapterFactory{
203         /**
204          * Creates a DataAdapter from a data object
205          *
206          * @param in object providing access to the data
207          * @return DataAdapter instance
208          */

209         public DataAdapter getAdapter(Object JavaDoc in) {
210             if (in instanceof BufferedReader JavaDoc) {
211                 BufferedReader JavaDoc inputStream = (BufferedReader JavaDoc) in;
212                 return new StreamDataAdapter(inputStream);
213             } else if (in instanceof double[]) {
214                 double[] inputArray = (double[]) in;
215                 return new ArrayDataAdapter(inputArray);
216             } else {
217                 throw new IllegalArgumentException JavaDoc(
218                     "Input data comes from the" + " unsupported source");
219             }
220         }
221     }
222     /**
223      * <code>DataAdapter</code> for data provided through some input stream
224      */

225     private class StreamDataAdapter extends DataAdapter{
226         
227         /** Input stream providng access to the data */
228         BufferedReader JavaDoc inputStream;
229         
230         /**
231          * Create a StreamDataAdapter from a BufferedReader
232          *
233          * @param in BufferedReader input stream
234          */

235         public StreamDataAdapter(BufferedReader JavaDoc in){
236             super();
237             inputStream = in;
238         }
239         /**
240          * Computes binStats
241          *
242          * @param min minimum value
243          * @param delta grid size
244          * @throws IOException if an IO error occurs
245          */

246         public void computeBinStats(double min, double delta)
247                 throws IOException JavaDoc {
248             String JavaDoc str = null;
249             double val = 0.0d;
250             while ((str = inputStream.readLine()) != null) {
251                 val = Double.parseDouble(str);
252                 SummaryStatistics stats =
253                     (SummaryStatistics) binStats.get(
254                         Math.max((int) Math.ceil((val - min) / delta) - 1, 0));
255                 stats.addValue(val);
256             }
257
258             inputStream.close();
259             inputStream = null;
260         }
261         /**
262          * Computes sampleStats
263          *
264          * @throws IOException if an IOError occurs
265          */

266         public void computeStats() throws IOException JavaDoc {
267             String JavaDoc str = null;
268             double val = 0.0;
269             sampleStats = SummaryStatistics.newInstance();
270             while ((str = inputStream.readLine()) != null) {
271                 val = new Double JavaDoc(str).doubleValue();
272                 sampleStats.addValue(val);
273             }
274             inputStream.close();
275             inputStream = null;
276         }
277     }
278
279     /**
280      * <code>DataAdapter</code> for data provided as array of doubles.
281      */

282     private class ArrayDataAdapter extends DataAdapter{
283         
284         /** Array of input data values */
285         private double[] inputArray;
286         
287         /**
288          * Construct an ArrayDataAdapter from a double[] array
289          *
290          * @param in double[] array holding the data
291          */

292         public ArrayDataAdapter(double[] in){
293             super();
294             inputArray = in;
295         }
296         /**
297          * Computes sampleStats
298          *
299          * @throws IOException if an IO error occurs
300          */

301         public void computeStats() throws IOException JavaDoc {
302             sampleStats = SummaryStatistics.newInstance();
303             for (int i = 0; i < inputArray.length; i++) {
304                 sampleStats.addValue(inputArray[i]);
305             }
306         }
307         /**
308          * Computes binStats
309          *
310          * @param min minimum value
311          * @param delta grid size
312          * @throws IOException if an IO error occurs
313          */

314         public void computeBinStats(double min, double delta)
315             throws IOException JavaDoc {
316             for (int i = 0; i < inputArray.length; i++) {
317                 SummaryStatistics stats =
318                     (SummaryStatistics) binStats.get(
319                         Math.max((int) Math.ceil(
320                                 (inputArray[i] - min) / delta)- 1, 0));
321                 stats.addValue(inputArray[i]);
322             }
323         }
324     }
325
326     /**
327      * Fills binStats array (second pass through data file).
328      *
329      * @param in object providing access to the data
330      * @throws IOException if an IO error occurs
331      */

332     private void fillBinStats(Object JavaDoc in) throws IOException JavaDoc {
333         // Load array of bin upper bounds -- evenly spaced from min - max
334
double min = sampleStats.getMin();
335         double max = sampleStats.getMax();
336         double delta = (max - min)/(new Double JavaDoc(binCount)).doubleValue();
337         double[] binUpperBounds = new double[binCount];
338         binUpperBounds[0] = min + delta;
339         for (int i = 1; i< binCount - 1; i++) {
340             binUpperBounds[i] = binUpperBounds[i-1] + delta;
341         }
342         binUpperBounds[binCount -1] = max;
343
344         // Initialize binStats ArrayList
345
if (!binStats.isEmpty()) {
346             binStats.clear();
347         }
348         for (int i = 0; i < binCount; i++) {
349             SummaryStatistics stats = SummaryStatistics.newInstance();
350             binStats.add(i,stats);
351         }
352
353         // Filling data in binStats Array
354
DataAdapterFactory aFactory = new DataAdapterFactory();
355         DataAdapter da = aFactory.getAdapter(in);
356         try {
357             da.computeBinStats(min, delta);
358         } catch (Exception JavaDoc e) {
359             if(e instanceof RuntimeException JavaDoc){
360                 throw new RuntimeException JavaDoc(e.getMessage());
361             }else{
362                 throw new IOException JavaDoc(e.getMessage());
363             }
364         }
365
366         // Assign upperBounds based on bin counts
367
upperBounds = new double[binCount];
368         upperBounds[0] =
369         ((double)((SummaryStatistics)binStats.get(0)).getN())/
370         (double)sampleStats.getN();
371         for (int i = 1; i < binCount-1; i++) {
372             upperBounds[i] = upperBounds[i-1] +
373             ((double)((SummaryStatistics)binStats.get(i)).getN())/
374             (double)sampleStats.getN();
375         }
376         upperBounds[binCount-1] = 1.0d;
377     }
378
379     /**
380      * Generates a random value from this distribution.
381      *
382      * @return the random value.
383      * @throws IllegalStateException if the distribution has not been loaded
384      */

385     public double getNextValue() throws IllegalStateException JavaDoc {
386
387         if (!loaded) {
388             throw new IllegalStateException JavaDoc("distribution not loaded");
389         }
390
391         // Start with a uniformly distributed random number in (0,1)
392
double x = Math.random();
393
394         // Use this to select the bin and generate a Gaussian within the bin
395
for (int i = 0; i < binCount; i++) {
396            if (x <= upperBounds[i]) {
397                SummaryStatistics stats = (SummaryStatistics)binStats.get(i);
398                if (stats.getN() > 0) {
399                    if (stats.getStandardDeviation() > 0) { // more than one obs
400
return randomData.nextGaussian
401                             (stats.getMean(),stats.getStandardDeviation());
402                    } else {
403                        return stats.getMean(); // only one obs in bin
404
}
405                }
406            }
407         }
408         throw new RuntimeException JavaDoc("No bin selected");
409     }
410
411     /**
412      * Returns a {@link StatisticalSummary} describing this distribution.
413      * <strong>Preconditions:</strong><ul>
414      * <li>the distribution must be loaded before invoking this method</li></ul>
415      *
416      * @return the sample statistics
417      * @throws IllegalStateException if the distribution has not been loaded
418      */

419     public StatisticalSummary getSampleStats() {
420         return sampleStats;
421     }
422
423     /**
424      * Returns the number of bins.
425      *
426      * @return the number of bins.
427      */

428     public int getBinCount() {
429         return binCount;
430     }
431
432     /**
433      * Returns an ArrayList of {@link SummaryStatistics} instances containing
434      * statistics describing the values in each of the bins. The ArrayList is
435      * indexed on the bin number.
436      *
437      * @return List of bin statistics.
438      */

439     public List JavaDoc getBinStats() {
440         return binStats;
441     }
442
443     /**
444      * Returns the array of upper bounds for the bins. Bins are: <br/>
445      * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],...,
446      * (upperBounds[binCount-1],max]
447      *
448      * @return array of bin upper bounds
449      */

450     public double[] getUpperBounds() {
451         return upperBounds;
452     }
453
454     /**
455      * Property indicating whether or not the distribution has been loaded.
456      *
457      * @return true if the distribution has been loaded
458      */

459     public boolean isLoaded() {
460         return loaded;
461     }
462 }
463
Popular Tags