1 2 3 4 package net.nutch.fetcher; 5 6 import java.io.*; 7 import java.util.Arrays ; 8 import java.util.Date ; 9 10 import net.nutch.io.*; 11 import net.nutch.fs.*; 12 import net.nutch.util.*; 13 import net.nutch.pagedb.FetchListEntry; 14 import net.nutch.tools.UpdateDatabaseTool; 15 import net.nutch.parse.Outlink; 16 17 30 public final class FetcherOutput implements Writable { 31 public static final String DIR_NAME = "fetcher"; 32 public static final String DIR_NAME_NP = DIR_NAME+"_output"; 37 public static final String DONE_NAME = "fetcher.done"; 38 public static final String ERROR_NAME = "fetcher.error"; 39 40 private final static byte VERSION = 4; 41 42 public final static byte RETRY = 0; 43 public final static byte SUCCESS = 1; 44 public final static byte NOT_FOUND = 2; 45 public final static byte CANT_PARSE = 4; 47 private FetchListEntry fetchListEntry; 48 private MD5Hash md5Hash; 49 private int status; 50 private long fetchDate; 51 52 public FetcherOutput() {} 53 54 public FetcherOutput(FetchListEntry fetchListEntry, 55 MD5Hash md5Hash, int status) { 56 this.fetchListEntry = fetchListEntry; 57 this.md5Hash = md5Hash; 58 this.status = status; 59 this.fetchDate = System.currentTimeMillis(); 60 } 61 62 public byte getVersion() { return VERSION; } 63 64 public final void readFields(DataInput in) throws IOException { 65 byte version = in.readByte(); fetchListEntry = FetchListEntry.read(in); 67 md5Hash = MD5Hash.read(in); 68 status = in.readByte(); 69 70 if (version < 4) { 71 UTF8.readString(in); int totalOutlinks = in.readInt(); for (int i = 0; i < totalOutlinks; i++) { 74 Outlink.skip(in); 75 } 76 } 77 78 fetchDate = (version > 1) ? in.readLong() : 0; } 80 81 public final void write(DataOutput out) throws IOException { 82 out.writeByte(VERSION); fetchListEntry.write(out); 84 md5Hash.write(out); 85 out.writeByte(status); 86 out.writeLong(fetchDate); 87 } 88 89 public static FetcherOutput read(DataInput in) throws IOException { 90 FetcherOutput fetcherOutput = new FetcherOutput(); 91 fetcherOutput.readFields(in); 92 return fetcherOutput; 93 } 94 95 public FetchListEntry getFetchListEntry() { return fetchListEntry; } 99 public MD5Hash getMD5Hash() { return md5Hash; } 100 public int getStatus() { return status; } 101 public void setStatus(int status) { this.status = status; } 102 public long getFetchDate() { return fetchDate; } 103 public void setFetchDate(long fetchDate) { this.fetchDate = fetchDate; } 104 105 public UTF8 getUrl() { return getFetchListEntry().getUrl(); } 107 public String [] getAnchors() { return getFetchListEntry().getAnchors(); } 108 109 public boolean equals(Object o) { 110 if (!(o instanceof FetcherOutput)) 111 return false; 112 FetcherOutput other = (FetcherOutput)o; 113 return 114 this.fetchListEntry.equals(other.fetchListEntry) && 115 this.md5Hash.equals(other.md5Hash) && 116 (this.status == other.status); 117 } 118 119 120 public String toString() { 121 StringBuffer buffer = new StringBuffer (); 122 buffer.append("FetchListEntry: " + fetchListEntry + "Fetch Result:\n" ); 123 buffer.append("MD5Hash: " + md5Hash + "\n" ); 124 buffer.append("Status: " + status + "\n" ); 125 buffer.append("FetchDate: " + new Date (fetchDate) + "\n" ); 126 return buffer.toString(); 127 } 128 129 public static void main(String argv[]) throws Exception { 130 String usage = "FetcherOutput (-local <path> | -ndfs <path> <namenode:port>) (-recno <recno> | -dumpall) [-filename <filename>]"; 131 if (argv.length == 0 || argv.length > 4) { 132 System.out.println("usage:" + usage); 133 return; 134 } 135 136 String filename = FetcherOutput.DIR_NAME; 138 boolean dumpall = false; 139 int recno = -1; 140 int i = 0; 141 NutchFileSystem nfs = NutchFileSystem.parseArgs(argv, i); 142 for (; i < argv.length; i++) { 143 if ("-recno".equals(argv[i])) { 144 recno = Integer.parseInt(argv[i+1]); 145 i++; 146 } else if ("-dumpall".equals(argv[i])) { 147 dumpall = true; 148 } else if ("-filename".equals(argv[i])) { 149 filename = argv[i+1]; 150 i++; 151 } 152 } 153 154 ArrayFile.Reader fetcher = new ArrayFile.Reader(nfs, filename); 156 try { 157 FetcherOutput fo = new FetcherOutput(); 158 159 if (dumpall) { 160 while ((fo = (FetcherOutput) fetcher.next(fo)) != null) { 161 recno++; 162 System.out.println("Retrieved " + recno + " from file " + filename); 163 System.out.println(fo); 164 } 165 } else if (recno >= 0) { 166 fetcher.get(recno, fo); 167 System.out.println("Retrieved " + recno + " from file " + filename); 168 System.out.println(fo); 169 } 170 } finally { 171 fetcher.close(); 172 } 173 } 174 } 175 | Popular Tags |