KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > parse > ParseText


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.parse;
5
6 import java.io.*;
7 import net.nutch.io.*;
8 import net.nutch.fs.*;
9 import net.nutch.util.*;
10
11 /* The text conversion of page's content, stored using gzip compression.
12  * @see Parse#getText()
13  */

14 public final class ParseText extends VersionedWritable {
15   public static final String JavaDoc DIR_NAME = "parse_text";
16
17   private final static byte VERSION = 1;
18
19   public ParseText() {}
20   private String JavaDoc text;
21     
22   public ParseText(String JavaDoc text){
23     this.text = text;
24   }
25
26   public byte getVersion() { return VERSION; }
27
28   public void readFields(DataInput in) throws IOException {
29     super.readFields(in); // check version
30
text = WritableUtils.readCompressedString(in);
31     return;
32   }
33
34   public final void write(DataOutput out) throws IOException {
35     super.write(out); // write version
36
WritableUtils.writeCompressedString(out, text);
37     return;
38   }
39
40   public final static ParseText read(DataInput in) throws IOException {
41     ParseText parseText = new ParseText();
42     parseText.readFields(in);
43     return parseText;
44   }
45
46   //
47
// Accessor methods
48
//
49
public String JavaDoc getText() { return text; }
50
51   public boolean equals(Object JavaDoc o) {
52     if (!(o instanceof ParseText))
53       return false;
54     ParseText other = (ParseText)o;
55     return this.text.equals(other.text);
56   }
57
58   public String JavaDoc toString() {
59     return text;
60   }
61
62   public static void main(String JavaDoc argv[]) throws Exception JavaDoc {
63     String JavaDoc usage = "ParseText (-local | -ndfs <namenode:port>) recno segment";
64
65     if (argv.length < 3) {
66       System.out.println("usage:" + usage);
67       return;
68     }
69
70     NutchFileSystem nfs = NutchFileSystem.parseArgs(argv, 0);
71     try {
72       int recno = Integer.parseInt(argv[0]);
73       String JavaDoc segment = argv[1];
74       String JavaDoc filename = new File(segment, ParseText.DIR_NAME).getPath();
75
76       ParseText parseText = new ParseText();
77       ArrayFile.Reader parseTexts = new ArrayFile.Reader(nfs, filename);
78
79       parseTexts.get(recno, parseText);
80       System.out.println("Retrieved " + recno + " from file " + filename);
81       System.out.println(parseText);
82       parseTexts.close();
83     } finally {
84       nfs.close();
85     }
86   }
87 }
88
Popular Tags