KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > tools > LinkAnalysisTool


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.tools;
5
6 import java.io.*;
7 import java.util.*;
8
9 import net.nutch.fs.*;
10 import net.nutch.util.*;
11
12 /***************************************
13  * LinkAnalysisTool performs link-analysis by using the
14  * DistributedAnalysisTool. This single-process all-in-one
15  * tool is a wrapper around the more complicated distributed
16  * one.
17  *
18  * @author Mike Cafarella
19  ***************************************/

20 public class LinkAnalysisTool {
21     NutchFileSystem nfs;
22     File dbDir;
23     DistributedAnalysisTool dat = null;
24
25     /**
26      * We need a DistributedAnalysisTool in order to get
27      * things done!
28      */

29     public LinkAnalysisTool(NutchFileSystem nfs, File dbDir) throws IOException {
30         this.nfs = nfs;
31         this.dbDir = dbDir;
32         this.dat = new DistributedAnalysisTool(nfs, dbDir);
33     }
34
35     /**
36      * Do a single-process iteration over the database. Implemented
37      * by calling the distributed tool's functions.
38      */

39     public void iterate(int numIterations, File scoreFile) throws IOException {
40         for (int i = 0; i < numIterations; i++) {
41             File tmpDir = nfs.createTempFile("tmpdir", "la", dbDir);
42             nfs.delete(tmpDir);
43             nfs.mkdirs(tmpDir);
44
45             dat.initRound(1, tmpDir);
46             dat.computeRound(0, tmpDir);
47             dat.completeRound(tmpDir, scoreFile);
48         }
49     }
50
51     /**
52      * Kick off the link analysis. Submit the location of the db
53      * directory, as well as the cache size.
54      */

55     public static void main(String JavaDoc argv[]) throws IOException {
56         if (argv.length < 2) {
57             System.out.println("usage: java net.nutch.tools.LinkAnalysisTool (-local | -ndfs <namenode:port>) <db_dir> <numIterations>");
58             return;
59         }
60
61         NutchFileSystem nfs = NutchFileSystem.parseArgs(argv, 0);
62         File dbDir = new File(argv[0]);
63         int numIterations = Integer.parseInt(argv[1]);
64
65         System.out.println("Started at " + new Date(System.currentTimeMillis()));
66         try {
67             LinkAnalysisTool lat = new LinkAnalysisTool(nfs, dbDir);
68             lat.iterate(numIterations, new File(dbDir, "linkstats.txt"));
69         } finally {
70             nfs.close();
71             System.out.println("Finished at " + new Date(System.currentTimeMillis()));
72         }
73     }
74 }
75
Popular Tags