1 2 3 4 package net.nutch.pagedb; 5 6 import java.io.*; 7 import java.util.Arrays ; 8 9 import net.nutch.io.*; 10 import net.nutch.db.*; 11 import net.nutch.fs.*; 12 import net.nutch.util.*; 13 14 public final class FetchListEntry implements Writable, Cloneable { 15 public static final String DIR_NAME = "fetchlist"; 16 17 private final static byte CUR_VERSION = 2; 18 19 private boolean fetch; 20 private Page page; 21 private String [] anchors; 22 23 public FetchListEntry() {} 24 25 public FetchListEntry(boolean fetch, Page page, String [] anchors) { 26 this.fetch = fetch; 27 this.page = page; 28 this.anchors = anchors; 29 } 30 31 public final void readFields(DataInput in) throws IOException { 35 byte version = in.readByte(); if (version > CUR_VERSION) throw new VersionMismatchException(CUR_VERSION, version); 38 39 fetch = in.readByte() != 0; 41 page = Page.read(in); 43 if (version > 1) { anchors = new String [in.readInt()]; for (int i = 0; i < anchors.length; i++) { 46 anchors[i] = UTF8.readString(in); 47 } 48 } else { 49 anchors = new String [0]; 50 } 51 } 52 53 public static FetchListEntry read(DataInput in) throws IOException { 54 FetchListEntry result = new FetchListEntry(); 55 result.readFields(in); 56 return result; 57 } 58 59 public final void write(DataOutput out) throws IOException { 60 out.writeByte(CUR_VERSION); out.writeByte((byte)(fetch ? 1 : 0)); page.write(out); out.writeInt(anchors.length); for (int i = 0; i < anchors.length; i++) { 65 UTF8.writeString(out, anchors[i]); 66 } 67 } 68 69 public boolean getFetch() { return fetch; } 73 public Page getPage() { return page; } 74 public String [] getAnchors() { return anchors; } 75 76 public UTF8 getUrl() { return getPage().getURL(); } 78 79 public boolean equals(Object o) { 80 if (!(o instanceof FetchListEntry)) 81 return false; 82 FetchListEntry other = (FetchListEntry)o; 83 return 84 this.fetch == other.fetch && 85 this.page.equals(other.page) && 86 Arrays.equals(this.anchors, other.anchors); 87 } 88 89 public Object clone() { 90 try { 91 FetchListEntry clone = (FetchListEntry)super.clone(); 92 clone.page = (Page)clone.page.clone(); 93 clone.anchors = new String [this.anchors.length]; 94 System.arraycopy(this.anchors, 0, clone.anchors, 0, this.anchors.length); 95 return clone; 96 } catch (CloneNotSupportedException e) { 97 throw new RuntimeException (e); 98 } 99 } 100 101 public String toString() { 102 StringBuffer buffer = new StringBuffer (); 103 buffer.append("version: " + CUR_VERSION + "\n"); 104 buffer.append("fetch: " + fetch + "\n"); 105 buffer.append("page: " + page + "\n"); 106 buffer.append("anchors: " + anchors.length + "\n" ); 107 for (int i = 0; i < anchors.length; i++) { 108 buffer.append(" anchor: " + anchors[i] + "\n"); 109 } 110 return buffer.toString(); 111 } 112 113 public static void main(String argv[]) throws Exception { 114 String usage = "FetchListEntry (-local | -ndfs <namenode:port>) [ -recno N | -dumpurls ] segmentDir"; 115 if (argv.length < 1) { 116 System.out.println("Usage: " + usage); 117 System.exit(-1); 118 } 119 String segment = null; 120 boolean dumpUrls = false; 121 int recno = -1; 122 int i = 0; 123 NutchFileSystem nfs = NutchFileSystem.parseArgs(argv, i); 124 for (; i < argv.length; i++) { 125 if ("-dumpurls".equals(argv[i])) { 126 dumpUrls = true; 127 } else if ("-recno".equals(argv[i])) { 128 recno = Integer.parseInt(argv[++i]); 129 } else if (argv[i] != null) { 130 segment = argv[i]; 131 } 132 } 133 FetchListEntry fle = new FetchListEntry(); 134 ArrayFile.Reader fetchlist = 135 new ArrayFile.Reader(nfs, new File(segment, FetchListEntry.DIR_NAME).getPath()); 136 137 if (dumpUrls) { 138 int count = 0; 139 while (fetchlist.next(fle) != null) { 140 System.out.println("Recno " + count + ": " + fle.getPage().getURL()); 141 count++; 142 } 143 } 144 145 if (recno != -1) { 146 fetchlist.get(recno, fle); 147 System.out.println(fle); 148 } 149 150 fetchlist.close(); 151 } 152 153 } 154 | Popular Tags |