1 2 3 4 package net.nutch.indexer; 5 6 import java.io.*; 7 import java.text.*; 8 import java.util.*; 9 import java.util.logging.*; 10 11 import net.nutch.fs.*; 12 import net.nutch.util.*; 13 14 import org.apache.lucene.store.Directory; 15 import org.apache.lucene.store.FSDirectory; 16 import org.apache.lucene.index.IndexWriter; 17 18 25 public class IndexMerger { 26 public static final Logger LOG = 27 LogFormatter.getLogger("net.nutch.indexer.IndexMerger"); 28 29 public static final String DONE_NAME = "merge.done"; 30 31 private NutchFileSystem nfs; 32 private File outputIndex; 33 private File localWorkingDir; 34 private File[] segments; 35 36 39 public IndexMerger(NutchFileSystem nfs, File[] segments, File outputIndex, File localWorkingDir) throws IOException { 40 this.nfs = nfs; 41 this.segments = segments; 42 this.outputIndex = outputIndex; 43 this.localWorkingDir = localWorkingDir; 44 } 45 46 49 private void merge() throws IOException { 50 Directory[] dirs = new Directory[segments.length]; 54 File[] localSegments = new File[segments.length]; 55 for (int i = 0; i < segments.length; i++) { 56 File tmpFile = new File(localWorkingDir, "indexmerge-" + new SimpleDateFormat("yyyMMddHHmmss").format(new Date(System.currentTimeMillis()))); 57 localSegments[i] = nfs.startLocalInput(new File(segments[i], "index"), tmpFile); 58 dirs[i] = FSDirectory.getDirectory(localSegments[i], false); 59 } 60 61 File tmpLocalOutput = new File(localWorkingDir, "merge-output"); 65 File localOutput = nfs.startLocalOutput(outputIndex, tmpLocalOutput); 66 67 IndexWriter writer = new IndexWriter(localOutput, null, true); 71 writer.mergeFactor = 50; 72 writer.infoStream = LogFormatter.getLogStream(LOG, Level.INFO); 73 writer.setUseCompoundFile(false); 74 writer.setSimilarity(new NutchSimilarity()); 75 writer.addIndexes(dirs); 76 writer.close(); 77 78 nfs.completeLocalOutput(outputIndex, tmpLocalOutput); 82 83 for (int i = 0; i < localSegments.length; i++) { 87 nfs.completeLocalInput(localSegments[i]); 88 } 89 localWorkingDir.delete(); 90 } 91 92 95 public static void main(String [] args) throws Exception { 96 String usage = "IndexMerger (-local | -ndfs <nameserver:port>) [-workingdir <workingdir>] outputIndex segments..."; 97 if (args.length < 2) { 98 System.err.println("Usage: " + usage); 99 return; 100 } 101 102 NutchFileSystem nfs = NutchFileSystem.parseArgs(args, 0); 106 try { 107 File workingDir = new File(new File("").getCanonicalPath()); 108 Vector segments = new Vector(); 109 110 int i = 0; 111 if ("-workingdir".equals(args[i])) { 112 i++; 113 workingDir = new File(new File(args[i++]).getCanonicalPath()); 114 } 115 File outputIndex = new File(args[i++]); 116 117 for (; i < args.length; i++) { 118 if (args[i] != null) { 119 segments.add(new File(args[i])); 120 } 121 } 122 workingDir = new File(workingDir, "indexmerger-workingdir"); 123 124 File[] segmentFiles = (File[]) segments.toArray(new File[segments.size()]); 128 LOG.info("merging segment indexes to: " + outputIndex); 129 130 if (workingDir.exists()) { 131 FileUtil.fullyDelete(workingDir); 132 } 133 workingDir.mkdirs(); 134 IndexMerger merger = new IndexMerger(nfs, segmentFiles, outputIndex, workingDir); 135 merger.merge(); 136 LOG.info("done merging"); 137 FileUtil.fullyDelete(workingDir); 138 } finally { 139 nfs.close(); 140 } 141 } 142 } 143 | Popular Tags |