KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > fetcher > FetcherOutput


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.fetcher;
5
6 import java.io.*;
7 import java.util.Arrays JavaDoc;
8 import java.util.Date JavaDoc;
9
10 import net.nutch.io.*;
11 import net.nutch.fs.*;
12 import net.nutch.util.*;
13 import net.nutch.pagedb.FetchListEntry;
14 import net.nutch.tools.UpdateDatabaseTool;
15 import net.nutch.parse.Outlink;
16
17 /*********************************************
18  * An entry in the fetcher's output. This includes all of the fetcher output
19  * except the raw and stripped versions of the content, which are placed in
20  * separate files.
21  *
22  * <p>
23  * Note by John Xing: As of 20041022, option -noParsing is introduced
24  * in Fetcher.java. This changes fetcher behavior. Accordingly
25  * there are necessary modifications in this class.
26  * Check Fetcher.java and ParseSegment.java for details.
27  *
28  * @author Doug Cutting
29  *********************************************/

30 public final class FetcherOutput implements Writable {
31   public static final String JavaDoc DIR_NAME = "fetcher";
32   // 20041024, xing,
33
// When fetcher is run with option -noParsing, DIR_NAME_NP is created
34
// instead of DIR_NAME. In separate pass, ParseSegment.java looks for
35
// DIR_NAME_NP and generates DIR_NAME. Check ParseSegment.java for more info.
36
public static final String JavaDoc DIR_NAME_NP = DIR_NAME+"_output";
37   public static final String JavaDoc DONE_NAME = "fetcher.done";
38   public static final String JavaDoc ERROR_NAME = "fetcher.error";
39
40   private final static byte VERSION = 4;
41
42   public final static byte RETRY = 0;
43   public final static byte SUCCESS = 1;
44   public final static byte NOT_FOUND = 2;
45   public final static byte CANT_PARSE = 4; // fetched, but can't be parsed
46

47   private FetchListEntry fetchListEntry;
48   private MD5Hash md5Hash;
49   private int status;
50   private long fetchDate;
51
52   public FetcherOutput() {}
53
54   public FetcherOutput(FetchListEntry fetchListEntry,
55                        MD5Hash md5Hash, int status) {
56     this.fetchListEntry = fetchListEntry;
57     this.md5Hash = md5Hash;
58     this.status = status;
59     this.fetchDate = System.currentTimeMillis();
60   }
61
62   public byte getVersion() { return VERSION; }
63
64   public final void readFields(DataInput in) throws IOException {
65     byte version = in.readByte(); // read version
66
fetchListEntry = FetchListEntry.read(in);
67     md5Hash = MD5Hash.read(in);
68     status = in.readByte();
69
70     if (version < 4) {
71       UTF8.readString(in); // read & ignore title
72
int totalOutlinks = in.readInt(); // read & ignore outlinks
73
for (int i = 0; i < totalOutlinks; i++) {
74         Outlink.skip(in);
75       }
76     }
77
78     fetchDate = (version > 1) ? in.readLong() : 0; // added in version=2
79
}
80
81   public final void write(DataOutput out) throws IOException {
82     out.writeByte(VERSION); // store current version
83
fetchListEntry.write(out);
84     md5Hash.write(out);
85     out.writeByte(status);
86     out.writeLong(fetchDate);
87   }
88
89   public static FetcherOutput read(DataInput in) throws IOException {
90     FetcherOutput fetcherOutput = new FetcherOutput();
91     fetcherOutput.readFields(in);
92     return fetcherOutput;
93   }
94
95   //
96
// Accessor methods
97
//
98
public FetchListEntry getFetchListEntry() { return fetchListEntry; }
99   public MD5Hash getMD5Hash() { return md5Hash; }
100   public int getStatus() { return status; }
101   public void setStatus(int status) { this.status = status; }
102   public long getFetchDate() { return fetchDate; }
103   public void setFetchDate(long fetchDate) { this.fetchDate = fetchDate; }
104
105   // convenience methods
106
public UTF8 getUrl() { return getFetchListEntry().getUrl(); }
107   public String JavaDoc[] getAnchors() { return getFetchListEntry().getAnchors(); }
108
109   public boolean equals(Object JavaDoc o) {
110     if (!(o instanceof FetcherOutput))
111       return false;
112     FetcherOutput other = (FetcherOutput)o;
113     return
114       this.fetchListEntry.equals(other.fetchListEntry) &&
115       this.md5Hash.equals(other.md5Hash) &&
116       (this.status == other.status);
117   }
118
119
120   public String JavaDoc toString() {
121     StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
122     buffer.append("FetchListEntry: " + fetchListEntry + "Fetch Result:\n" );
123     buffer.append("MD5Hash: " + md5Hash + "\n" );
124     buffer.append("Status: " + status + "\n" );
125     buffer.append("FetchDate: " + new Date JavaDoc(fetchDate) + "\n" );
126     return buffer.toString();
127   }
128
129   public static void main(String JavaDoc argv[]) throws Exception JavaDoc {
130     String JavaDoc usage = "FetcherOutput (-local <path> | -ndfs <path> <namenode:port>) (-recno <recno> | -dumpall) [-filename <filename>]";
131     if (argv.length == 0 || argv.length > 4) {
132       System.out.println("usage:" + usage);
133       return;
134     }
135
136     // Process the args
137
String JavaDoc filename = FetcherOutput.DIR_NAME;
138     boolean dumpall = false;
139     int recno = -1;
140     int i = 0;
141     NutchFileSystem nfs = NutchFileSystem.parseArgs(argv, i);
142     for (; i < argv.length; i++) {
143         if ("-recno".equals(argv[i])) {
144             recno = Integer.parseInt(argv[i+1]);
145             i++;
146         } else if ("-dumpall".equals(argv[i])) {
147             dumpall = true;
148         } else if ("-filename".equals(argv[i])) {
149             filename = argv[i+1];
150             i++;
151         }
152     }
153
154     // Now carry out the command
155
ArrayFile.Reader fetcher = new ArrayFile.Reader(nfs, filename);
156     try {
157       FetcherOutput fo = new FetcherOutput();
158
159       if (dumpall) {
160         while ((fo = (FetcherOutput) fetcher.next(fo)) != null) {
161           recno++;
162           System.out.println("Retrieved " + recno + " from file " + filename);
163           System.out.println(fo);
164         }
165       } else if (recno >= 0) {
166         fetcher.get(recno, fo);
167         System.out.println("Retrieved " + recno + " from file " + filename);
168         System.out.println(fo);
169       }
170     } finally {
171       fetcher.close();
172     }
173   }
174 }
175
Popular Tags