1 2 3 4 package net.nutch.io; 5 6 import java.io.*; 7 import java.util.*; 8 import junit.framework.TestCase; 9 import java.util.logging.*; 10 11 import net.nutch.fs.*; 12 import net.nutch.util.*; 13 14 15 public class TestSequenceFile extends TestCase { 16 private static Logger LOG = SequenceFile.LOG; 17 18 public TestSequenceFile(String name) { super(name); } 19 20 21 public void testSequenceFile() throws Exception { 22 int count = 1024 * 10; 23 int megabytes = 1; 24 int factor = 5; 25 String file = System.getProperty("test.build.data",".") + "/test.seq"; 26 27 int seed = new Random().nextInt(); 28 29 NutchFileSystem nfs = new LocalFileSystem(); 30 try { 31 writeTest(nfs, count, seed, file); 33 readTest(nfs, count, seed, file); 34 35 sortTest(nfs, count, megabytes, factor, false, file); 36 checkSort(nfs, count, seed, file); 37 38 sortTest(nfs, count, megabytes, factor, true, file); 39 checkSort(nfs, count, seed, file); 40 41 mergeTest(nfs, count, seed, file, false, factor, megabytes); 42 checkSort(nfs, count, seed, file); 43 44 mergeTest(nfs, count, seed, file, true, factor, megabytes); 45 checkSort(nfs, count, seed, file); 46 } finally { 47 nfs.close(); 48 } 49 } 50 51 private static void writeTest(NutchFileSystem nfs, int count, int seed, String file) 52 throws IOException { 53 new File(file).delete(); 54 LOG.fine("creating with " + count + " records"); 55 SequenceFile.Writer writer = 56 new SequenceFile.Writer(nfs, file, RandomDatum.class, RandomDatum.class); 57 RandomDatum.Generator generator = new RandomDatum.Generator(seed); 58 for (int i = 0; i < count; i++) { 59 generator.next(); 60 RandomDatum key = generator.getKey(); 61 RandomDatum value = generator.getValue(); 62 63 writer.append(key, value); 64 } 65 writer.close(); 66 } 67 68 private static void readTest(NutchFileSystem nfs, int count, int seed, String file) 69 throws IOException { 70 RandomDatum k = new RandomDatum(); 71 RandomDatum v = new RandomDatum(); 72 LOG.fine("reading " + count + " records"); 73 SequenceFile.Reader reader = new SequenceFile.Reader(nfs, file); 74 RandomDatum.Generator generator = new RandomDatum.Generator(seed); 75 for (int i = 0; i < count; i++) { 76 generator.next(); 77 RandomDatum key = generator.getKey(); 78 RandomDatum value = generator.getValue(); 79 80 reader.next(k, v); 81 82 if (!k.equals(key)) 83 throw new RuntimeException ("wrong key at " + i); 84 if (!v.equals(value)) 85 throw new RuntimeException ("wrong value at " + i); 86 } 87 reader.close(); 88 } 89 90 91 private static void sortTest(NutchFileSystem nfs, int count, int megabytes, 92 int factor, boolean fast, String file) 93 throws IOException { 94 new File(file+".sorted").delete(); 95 SequenceFile.Sorter sorter = newSorter(nfs, fast, megabytes, factor); 96 LOG.fine("sorting " + count + " records"); 97 sorter.sort(file, file+".sorted"); 98 LOG.fine("done sorting " + count + " records"); 99 } 100 101 private static void checkSort(NutchFileSystem nfs, int count, int seed, String file) 102 throws IOException { 103 LOG.fine("sorting " + count + " records in memory for check"); 104 RandomDatum.Generator generator = new RandomDatum.Generator(seed); 105 SortedMap map = new TreeMap(); 106 for (int i = 0; i < count; i++) { 107 generator.next(); 108 RandomDatum key = generator.getKey(); 109 RandomDatum value = generator.getValue(); 110 map.put(key, value); 111 } 112 113 LOG.fine("checking order of " + count + " records"); 114 RandomDatum k = new RandomDatum(); 115 RandomDatum v = new RandomDatum(); 116 Iterator iterator = map.entrySet().iterator(); 117 SequenceFile.Reader reader = new SequenceFile.Reader(nfs, file + ".sorted"); 118 for (int i = 0; i < count; i++) { 119 Map.Entry entry = (Map.Entry)iterator.next(); 120 RandomDatum key = (RandomDatum)entry.getKey(); 121 RandomDatum value = (RandomDatum)entry.getValue(); 122 123 reader.next(k, v); 124 125 if (!k.equals(key)) 126 throw new RuntimeException ("wrong key at " + i); 127 if (!v.equals(value)) 128 throw new RuntimeException ("wrong value at " + i); 129 } 130 131 reader.close(); 132 LOG.fine("sucessfully checked " + count + " records"); 133 } 134 135 private static void mergeTest(NutchFileSystem nfs, int count, int seed, 136 String file, boolean fast, int factor, 137 int megabytes) 138 throws IOException { 139 140 LOG.fine("creating "+factor+" files with "+count/factor+" records"); 141 142 SequenceFile.Writer[] writers = new SequenceFile.Writer[factor]; 143 String [] names = new String [factor]; 144 String [] sortedNames = new String [factor]; 145 146 for (int i = 0; i < factor; i++) { 147 names[i] = file+"."+i; 148 sortedNames[i] = names[i] + ".sorted"; 149 nfs.delete(new File(names[i])); 150 nfs.delete(new File(sortedNames[i])); 151 writers[i] = 152 new SequenceFile.Writer(nfs, names[i], RandomDatum.class,RandomDatum.class); 153 } 154 155 RandomDatum.Generator generator = new RandomDatum.Generator(seed); 156 157 for (int i = 0; i < count; i++) { 158 generator.next(); 159 RandomDatum key = generator.getKey(); 160 RandomDatum value = generator.getValue(); 161 162 writers[i%factor].append(key, value); 163 } 164 165 for (int i = 0; i < factor; i++) 166 writers[i].close(); 167 168 for (int i = 0; i < factor; i++) { 169 LOG.fine("sorting file " + i + " with " + count/factor + " records"); 170 newSorter(nfs, fast, megabytes, factor).sort(names[i], sortedNames[i]); 171 } 172 173 LOG.fine("merging " + factor + " files with " + count/factor + " records"); 174 nfs.delete(new File(file+".sorted")); 175 newSorter(nfs, fast, megabytes, factor).merge(sortedNames, file+".sorted"); 176 } 177 178 private static SequenceFile.Sorter newSorter(NutchFileSystem nfs, 179 boolean fast, 180 int megabytes, int factor) { 181 SequenceFile.Sorter sorter = 182 fast 183 ? new SequenceFile.Sorter(nfs, new RandomDatum.Comparator(),RandomDatum.class) 184 : new SequenceFile.Sorter(nfs, RandomDatum.class, RandomDatum.class); 185 sorter.setMemory(megabytes * 1024*1024); 186 sorter.setFactor(factor); 187 return sorter; 188 } 189 190 191 192 public static void main(String [] args) throws Exception { 193 int count = 1024 * 1024; 194 int megabytes = 1; 195 int factor = 10; 196 boolean create = true; 197 boolean check = false; 198 boolean fast = false; 199 boolean merge = false; 200 String file = null; 201 String usage = "Usage: SequenceFile (-local | -ndfs <namenode:port>) [-count N] [-megabytes M] [-factor F] [-nocreate] [-check] [-fast] [-merge] file"; 202 203 if (args.length == 0) { 204 System.err.println(usage); 205 System.exit(-1); 206 } 207 int i = 0; 208 NutchFileSystem nfs = NutchFileSystem.parseArgs(args, i); 209 try { 210 for (; i < args.length; i++) { if (args[i] == null) { 212 continue; 213 } else if (args[i].equals("-count")) { 214 count = Integer.parseInt(args[++i]); 215 } else if (args[i].equals("-megabytes")) { 216 megabytes = Integer.parseInt(args[++i]); 217 } else if (args[i].equals("-factor")) { 218 factor = Integer.parseInt(args[++i]); 219 } else if (args[i].equals("-nocreate")) { 220 create = false; 221 } else if (args[i].equals("-check")) { 222 check = true; 223 } else if (args[i].equals("-fast")) { 224 fast = true; 225 } else if (args[i].equals("-merge")) { 226 merge = true; 227 } else { 228 file = args[i]; 230 } 231 } 232 LOG.info("count = " + count); 233 LOG.info("megabytes = " + megabytes); 234 LOG.info("factor = " + factor); 235 LOG.info("create = " + create); 236 LOG.info("check = " + check); 237 LOG.info("fast = " + fast); 238 LOG.info("merge = " + merge); 239 LOG.info("file = " + file); 240 241 int seed = 0; 242 243 LOG.setLevel(Level.FINE); 244 245 if (create && !merge) { 246 writeTest(nfs, count, seed, file); 247 readTest(nfs, count, seed, file); 248 } 249 250 if (merge) { 251 mergeTest(nfs, count, seed, file, fast, factor, megabytes); 252 } else { 253 sortTest(nfs, count, megabytes, factor, fast, file); 254 } 255 256 if (check) { 257 checkSort(nfs, count, seed, file); 258 } 259 } finally { 260 nfs.close(); 261 } 262 } 263 } 264 | Popular Tags |