KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > io > TestSequenceFile


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.io;
5
6 import java.io.*;
7 import java.util.*;
8 import junit.framework.TestCase;
9 import java.util.logging.*;
10
11 import net.nutch.fs.*;
12 import net.nutch.util.*;
13
14 /** Support for flat files of binary key/value pairs. */
15 public class TestSequenceFile extends TestCase {
16   private static Logger LOG = SequenceFile.LOG;
17
18   public TestSequenceFile(String JavaDoc name) { super(name); }
19
20   /** Unit tests for SequenceFile. */
21   public void testSequenceFile() throws Exception JavaDoc {
22     int count = 1024 * 10;
23     int megabytes = 1;
24     int factor = 5;
25     String JavaDoc file = System.getProperty("test.build.data",".") + "/test.seq";
26  
27     int seed = new Random().nextInt();
28
29     NutchFileSystem nfs = new LocalFileSystem();
30     try {
31         //LOG.setLevel(Level.FINE);
32
writeTest(nfs, count, seed, file);
33         readTest(nfs, count, seed, file);
34
35         sortTest(nfs, count, megabytes, factor, false, file);
36         checkSort(nfs, count, seed, file);
37
38         sortTest(nfs, count, megabytes, factor, true, file);
39         checkSort(nfs, count, seed, file);
40
41         mergeTest(nfs, count, seed, file, false, factor, megabytes);
42         checkSort(nfs, count, seed, file);
43
44         mergeTest(nfs, count, seed, file, true, factor, megabytes);
45         checkSort(nfs, count, seed, file);
46     } finally {
47         nfs.close();
48     }
49   }
50
51   private static void writeTest(NutchFileSystem nfs, int count, int seed, String JavaDoc file)
52     throws IOException {
53     new File(file).delete();
54     LOG.fine("creating with " + count + " records");
55     SequenceFile.Writer writer =
56       new SequenceFile.Writer(nfs, file, RandomDatum.class, RandomDatum.class);
57     RandomDatum.Generator generator = new RandomDatum.Generator(seed);
58     for (int i = 0; i < count; i++) {
59       generator.next();
60       RandomDatum key = generator.getKey();
61       RandomDatum value = generator.getValue();
62
63       writer.append(key, value);
64     }
65     writer.close();
66   }
67
68   private static void readTest(NutchFileSystem nfs, int count, int seed, String JavaDoc file)
69     throws IOException {
70     RandomDatum k = new RandomDatum();
71     RandomDatum v = new RandomDatum();
72     LOG.fine("reading " + count + " records");
73     SequenceFile.Reader reader = new SequenceFile.Reader(nfs, file);
74     RandomDatum.Generator generator = new RandomDatum.Generator(seed);
75     for (int i = 0; i < count; i++) {
76       generator.next();
77       RandomDatum key = generator.getKey();
78       RandomDatum value = generator.getValue();
79       
80       reader.next(k, v);
81       
82       if (!k.equals(key))
83         throw new RuntimeException JavaDoc("wrong key at " + i);
84       if (!v.equals(value))
85         throw new RuntimeException JavaDoc("wrong value at " + i);
86     }
87     reader.close();
88   }
89
90
91   private static void sortTest(NutchFileSystem nfs, int count, int megabytes,
92                                int factor, boolean fast, String JavaDoc file)
93     throws IOException {
94     new File(file+".sorted").delete();
95     SequenceFile.Sorter sorter = newSorter(nfs, fast, megabytes, factor);
96     LOG.fine("sorting " + count + " records");
97     sorter.sort(file, file+".sorted");
98     LOG.fine("done sorting " + count + " records");
99   }
100
101   private static void checkSort(NutchFileSystem nfs, int count, int seed, String JavaDoc file)
102     throws IOException {
103     LOG.fine("sorting " + count + " records in memory for check");
104     RandomDatum.Generator generator = new RandomDatum.Generator(seed);
105     SortedMap map = new TreeMap();
106     for (int i = 0; i < count; i++) {
107       generator.next();
108       RandomDatum key = generator.getKey();
109       RandomDatum value = generator.getValue();
110       map.put(key, value);
111     }
112
113     LOG.fine("checking order of " + count + " records");
114     RandomDatum k = new RandomDatum();
115     RandomDatum v = new RandomDatum();
116     Iterator iterator = map.entrySet().iterator();
117     SequenceFile.Reader reader = new SequenceFile.Reader(nfs, file + ".sorted");
118     for (int i = 0; i < count; i++) {
119       Map.Entry entry = (Map.Entry)iterator.next();
120       RandomDatum key = (RandomDatum)entry.getKey();
121       RandomDatum value = (RandomDatum)entry.getValue();
122
123       reader.next(k, v);
124
125       if (!k.equals(key))
126         throw new RuntimeException JavaDoc("wrong key at " + i);
127       if (!v.equals(value))
128         throw new RuntimeException JavaDoc("wrong value at " + i);
129     }
130
131     reader.close();
132     LOG.fine("sucessfully checked " + count + " records");
133   }
134
135   private static void mergeTest(NutchFileSystem nfs, int count, int seed,
136                                 String JavaDoc file, boolean fast, int factor,
137                                 int megabytes)
138     throws IOException {
139
140     LOG.fine("creating "+factor+" files with "+count/factor+" records");
141
142     SequenceFile.Writer[] writers = new SequenceFile.Writer[factor];
143     String JavaDoc[] names = new String JavaDoc[factor];
144     String JavaDoc[] sortedNames = new String JavaDoc[factor];
145     
146     for (int i = 0; i < factor; i++) {
147       names[i] = file+"."+i;
148       sortedNames[i] = names[i] + ".sorted";
149       nfs.delete(new File(names[i]));
150       nfs.delete(new File(sortedNames[i]));
151       writers[i] =
152         new SequenceFile.Writer(nfs, names[i], RandomDatum.class,RandomDatum.class);
153     }
154
155     RandomDatum.Generator generator = new RandomDatum.Generator(seed);
156
157     for (int i = 0; i < count; i++) {
158       generator.next();
159       RandomDatum key = generator.getKey();
160       RandomDatum value = generator.getValue();
161
162       writers[i%factor].append(key, value);
163     }
164
165     for (int i = 0; i < factor; i++)
166       writers[i].close();
167
168     for (int i = 0; i < factor; i++) {
169       LOG.fine("sorting file " + i + " with " + count/factor + " records");
170       newSorter(nfs, fast, megabytes, factor).sort(names[i], sortedNames[i]);
171     }
172
173     LOG.fine("merging " + factor + " files with " + count/factor + " records");
174     nfs.delete(new File(file+".sorted"));
175     newSorter(nfs, fast, megabytes, factor).merge(sortedNames, file+".sorted");
176   }
177
178   private static SequenceFile.Sorter newSorter(NutchFileSystem nfs,
179                                                boolean fast,
180                                                int megabytes, int factor) {
181     SequenceFile.Sorter sorter =
182       fast
183       ? new SequenceFile.Sorter(nfs, new RandomDatum.Comparator(),RandomDatum.class)
184       : new SequenceFile.Sorter(nfs, RandomDatum.class, RandomDatum.class);
185     sorter.setMemory(megabytes * 1024*1024);
186     sorter.setFactor(factor);
187     return sorter;
188   }
189
190
191   /** For debugging and testing. */
192   public static void main(String JavaDoc[] args) throws Exception JavaDoc {
193     int count = 1024 * 1024;
194     int megabytes = 1;
195     int factor = 10;
196     boolean create = true;
197     boolean check = false;
198     boolean fast = false;
199     boolean merge = false;
200     String JavaDoc file = null;
201     String JavaDoc usage = "Usage: SequenceFile (-local | -ndfs <namenode:port>) [-count N] [-megabytes M] [-factor F] [-nocreate] [-check] [-fast] [-merge] file";
202     
203     if (args.length == 0) {
204         System.err.println(usage);
205         System.exit(-1);
206     }
207     int i = 0;
208     NutchFileSystem nfs = NutchFileSystem.parseArgs(args, i);
209     try {
210       for (; i < args.length; i++) { // parse command line
211
if (args[i] == null) {
212               continue;
213           } else if (args[i].equals("-count")) {
214               count = Integer.parseInt(args[++i]);
215           } else if (args[i].equals("-megabytes")) {
216               megabytes = Integer.parseInt(args[++i]);
217           } else if (args[i].equals("-factor")) {
218               factor = Integer.parseInt(args[++i]);
219           } else if (args[i].equals("-nocreate")) {
220               create = false;
221           } else if (args[i].equals("-check")) {
222               check = true;
223           } else if (args[i].equals("-fast")) {
224               fast = true;
225           } else if (args[i].equals("-merge")) {
226               merge = true;
227           } else {
228               // file is required parameter
229
file = args[i];
230           }
231         }
232         LOG.info("count = " + count);
233         LOG.info("megabytes = " + megabytes);
234         LOG.info("factor = " + factor);
235         LOG.info("create = " + create);
236         LOG.info("check = " + check);
237         LOG.info("fast = " + fast);
238         LOG.info("merge = " + merge);
239         LOG.info("file = " + file);
240
241         int seed = 0;
242  
243         LOG.setLevel(Level.FINE);
244
245         if (create && !merge) {
246             writeTest(nfs, count, seed, file);
247             readTest(nfs, count, seed, file);
248         }
249
250         if (merge) {
251             mergeTest(nfs, count, seed, file, fast, factor, megabytes);
252         } else {
253             sortTest(nfs, count, megabytes, factor, fast, file);
254         }
255     
256         if (check) {
257             checkSort(nfs, count, seed, file);
258         }
259       } finally {
260           nfs.close();
261       }
262   }
263 }
264
Popular Tags