KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > indexer > IndexMerger


1 /* Copyright (c) 2003-2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.indexer;
5
6 import java.io.*;
7 import java.text.*;
8 import java.util.*;
9 import java.util.logging.*;
10
11 import net.nutch.fs.*;
12 import net.nutch.util.*;
13
14 import org.apache.lucene.store.Directory;
15 import org.apache.lucene.store.FSDirectory;
16 import org.apache.lucene.index.IndexWriter;
17
18 /*************************************************************************
19  * IndexMerger creates an index for the output corresponding to a
20  * single fetcher run.
21  *
22  * @author Doug Cutting
23  * @author Mike Cafarella
24  *************************************************************************/

25 public class IndexMerger {
26   public static final Logger LOG =
27     LogFormatter.getLogger("net.nutch.indexer.IndexMerger");
28
29   public static final String JavaDoc DONE_NAME = "merge.done";
30
31   private NutchFileSystem nfs;
32   private File outputIndex;
33   private File localWorkingDir;
34   private File[] segments;
35
36   /**
37    * Merge all of the segments given
38    */

39   public IndexMerger(NutchFileSystem nfs, File[] segments, File outputIndex, File localWorkingDir) throws IOException {
40       this.nfs = nfs;
41       this.segments = segments;
42       this.outputIndex = outputIndex;
43       this.localWorkingDir = localWorkingDir;
44   }
45
46   /**
47    * Load all input segment indices, then add to the single output index
48    */

49   private void merge() throws IOException {
50     //
51
// Open local copies of NFS indices
52
//
53
Directory[] dirs = new Directory[segments.length];
54     File[] localSegments = new File[segments.length];
55     for (int i = 0; i < segments.length; i++) {
56         File tmpFile = new File(localWorkingDir, "indexmerge-" + new SimpleDateFormat("yyyMMddHHmmss").format(new Date(System.currentTimeMillis())));
57         localSegments[i] = nfs.startLocalInput(new File(segments[i], "index"), tmpFile);
58         dirs[i] = FSDirectory.getDirectory(localSegments[i], false);
59     }
60
61     //
62
// Get local output target
63
//
64
File tmpLocalOutput = new File(localWorkingDir, "merge-output");
65     File localOutput = nfs.startLocalOutput(outputIndex, tmpLocalOutput);
66
67     //
68
// Merge indices
69
//
70
IndexWriter writer = new IndexWriter(localOutput, null, true);
71     writer.mergeFactor = 50;
72     writer.infoStream = LogFormatter.getLogStream(LOG, Level.INFO);
73     writer.setUseCompoundFile(false);
74     writer.setSimilarity(new NutchSimilarity());
75     writer.addIndexes(dirs);
76     writer.close();
77
78     //
79
// Put target back
80
//
81
nfs.completeLocalOutput(outputIndex, tmpLocalOutput);
82
83     //
84
// Delete all local inputs, if necessary
85
//
86
for (int i = 0; i < localSegments.length; i++) {
87         nfs.completeLocalInput(localSegments[i]);
88     }
89     localWorkingDir.delete();
90   }
91
92   /**
93    * Create an index for the input files in the named directory.
94    */

95   public static void main(String JavaDoc[] args) throws Exception JavaDoc {
96     String JavaDoc usage = "IndexMerger (-local | -ndfs <nameserver:port>) [-workingdir <workingdir>] outputIndex segments...";
97     if (args.length < 2) {
98       System.err.println("Usage: " + usage);
99       return;
100     }
101
102     //
103
// Parse args, read all segment directories to be processed
104
//
105
NutchFileSystem nfs = NutchFileSystem.parseArgs(args, 0);
106     try {
107         File workingDir = new File(new File("").getCanonicalPath());
108         Vector segments = new Vector();
109
110         int i = 0;
111         if ("-workingdir".equals(args[i])) {
112             i++;
113             workingDir = new File(new File(args[i++]).getCanonicalPath());
114         }
115         File outputIndex = new File(args[i++]);
116
117         for (; i < args.length; i++) {
118             if (args[i] != null) {
119                 segments.add(new File(args[i]));
120             }
121         }
122         workingDir = new File(workingDir, "indexmerger-workingdir");
123
124         //
125
// Merge the indices
126
//
127
File[] segmentFiles = (File[]) segments.toArray(new File[segments.size()]);
128         LOG.info("merging segment indexes to: " + outputIndex);
129
130         if (workingDir.exists()) {
131             FileUtil.fullyDelete(workingDir);
132         }
133         workingDir.mkdirs();
134         IndexMerger merger = new IndexMerger(nfs, segmentFiles, outputIndex, workingDir);
135         merger.merge();
136         LOG.info("done merging");
137         FileUtil.fullyDelete(workingDir);
138     } finally {
139         nfs.close();
140     }
141   }
142 }
143
Popular Tags