1 2 3 4 package net.nutch.parse; 5 6 import java.io.*; 7 import net.nutch.io.*; 8 import net.nutch.fs.*; 9 import net.nutch.util.*; 10 11 14 public final class ParseText extends VersionedWritable { 15 public static final String DIR_NAME = "parse_text"; 16 17 private final static byte VERSION = 1; 18 19 public ParseText() {} 20 private String text; 21 22 public ParseText(String text){ 23 this.text = text; 24 } 25 26 public byte getVersion() { return VERSION; } 27 28 public void readFields(DataInput in) throws IOException { 29 super.readFields(in); text = WritableUtils.readCompressedString(in); 31 return; 32 } 33 34 public final void write(DataOutput out) throws IOException { 35 super.write(out); WritableUtils.writeCompressedString(out, text); 37 return; 38 } 39 40 public final static ParseText read(DataInput in) throws IOException { 41 ParseText parseText = new ParseText(); 42 parseText.readFields(in); 43 return parseText; 44 } 45 46 public String getText() { return text; } 50 51 public boolean equals(Object o) { 52 if (!(o instanceof ParseText)) 53 return false; 54 ParseText other = (ParseText)o; 55 return this.text.equals(other.text); 56 } 57 58 public String toString() { 59 return text; 60 } 61 62 public static void main(String argv[]) throws Exception { 63 String usage = "ParseText (-local | -ndfs <namenode:port>) recno segment"; 64 65 if (argv.length < 3) { 66 System.out.println("usage:" + usage); 67 return; 68 } 69 70 NutchFileSystem nfs = NutchFileSystem.parseArgs(argv, 0); 71 try { 72 int recno = Integer.parseInt(argv[0]); 73 String segment = argv[1]; 74 String filename = new File(segment, ParseText.DIR_NAME).getPath(); 75 76 ParseText parseText = new ParseText(); 77 ArrayFile.Reader parseTexts = new ArrayFile.Reader(nfs, filename); 78 79 parseTexts.get(recno, parseText); 80 System.out.println("Retrieved " + recno + " from file " + filename); 81 System.out.println(parseText); 82 parseTexts.close(); 83 } finally { 84 nfs.close(); 85 } 86 } 87 } 88 | Popular Tags |