KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > protocol > Content


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.protocol;
5
6 import java.util.*;
7 import java.io.*;
8
9 import net.nutch.io.*;
10 import net.nutch.fs.*;
11 import net.nutch.util.*;
12
13 public final class Content extends VersionedWritable {
14
15   public static final String JavaDoc DIR_NAME = "content";
16
17   private final static byte VERSION = 1;
18
19   private String JavaDoc url;
20   private String JavaDoc base;
21   private byte[] content;
22   private String JavaDoc contentType;
23   private Properties metadata;
24
25   public Content() {}
26     
27   public Content(String JavaDoc url, String JavaDoc base, byte[] content, String JavaDoc contentType,
28                  Properties metadata){
29
30     if (url == null) throw new IllegalArgumentException JavaDoc("null url");
31     if (base == null) throw new IllegalArgumentException JavaDoc("null base");
32     if (content == null) throw new IllegalArgumentException JavaDoc("null content");
33     if (contentType == null) throw new IllegalArgumentException JavaDoc("null type");
34     if (metadata == null) throw new IllegalArgumentException JavaDoc("null metadata");
35
36     this.url = url;
37     this.base = base;
38     this.content = content;
39     this.contentType = contentType;
40     this.metadata = metadata;
41   }
42
43   public byte getVersion() { return VERSION; }
44
45   public final void readFields(DataInput in) throws IOException {
46     super.readFields(in); // check version
47

48     url = UTF8.readString(in); // read url
49
base = UTF8.readString(in); // read base
50

51     content = WritableUtils.readCompressedByteArray(in);
52
53     contentType = UTF8.readString(in); // read contentType
54

55     int propertyCount = in.readInt(); // read metadata
56
metadata = new Properties();
57     for (int i = 0; i < propertyCount; i++) {
58       metadata.put(UTF8.readString(in), UTF8.readString(in));
59     }
60   }
61
62   public final void write(DataOutput out) throws IOException {
63     super.write(out); // write version
64

65     UTF8.writeString(out, url); // write url
66
UTF8.writeString(out, base); // write base
67

68     WritableUtils.writeCompressedByteArray(out, content); // write content
69

70     UTF8.writeString(out, contentType); // write contentType
71

72     out.writeInt(metadata.size()); // write metadata
73
Iterator i = metadata.entrySet().iterator();
74     while (i.hasNext()) {
75       Map.Entry e = (Map.Entry)i.next();
76       UTF8.writeString(out, (String JavaDoc)e.getKey());
77       UTF8.writeString(out, (String JavaDoc)e.getValue());
78     }
79   }
80
81   public static Content read(DataInput in) throws IOException {
82     Content content = new Content();
83     content.readFields(in);
84     return content;
85   }
86
87   //
88
// Accessor methods
89
//
90

91   /** The url fetched. */
92   public String JavaDoc getUrl() { return url; }
93
94   /** The base url for relative links contained in the content.
95    * Maybe be different from url if the request redirected.
96    */

97   public String JavaDoc getBaseUrl() { return base; }
98
99   /** The binary content retrieved. */
100   public byte[] getContent() { return content; }
101   public void setContent(byte[] content) { this.content = content; }
102
103   /** The media type of the retrieved content.
104    * @see http://www.iana.org/assignments/media-types/
105    */

106   public String JavaDoc getContentType() { return contentType; }
107   public void setContentType(String JavaDoc contentType) {
108     this.contentType = contentType;
109   }
110
111   /** Other protocol-specific data. */
112   public Properties getMetadata() { return metadata; }
113
114   /** Return the value of a metadata property. */
115   public String JavaDoc get(String JavaDoc name) { return getMetadata().getProperty(name); }
116
117   public boolean equals(Object JavaDoc o) {
118     if (!(o instanceof Content)){
119       return false;
120     }
121     Content that = (Content)o;
122     return
123       this.url.equals(that.url) &&
124       this.base.equals(that.base) &&
125       Arrays.equals(this.getContent(), that.getContent()) &&
126       this.contentType.equals(that.contentType) &&
127       this.metadata.equals(that.metadata);
128   }
129
130   public String JavaDoc toString() {
131     StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
132
133     buffer.append("url: " + url + "\n" );
134     buffer.append("base: " + base + "\n" );
135     buffer.append("contentType: " + contentType + "\n" );
136     buffer.append("metadata: " + metadata + "\n" );
137     buffer.append("Content:\n");
138     buffer.append(new String JavaDoc(content)); // try default encoding
139

140     return buffer.toString();
141
142   }
143
144   public static void main(String JavaDoc argv[]) throws Exception JavaDoc {
145
146     String JavaDoc usage = "Content (-local | -ndfs <namenode:port>) recno segment";
147     
148     if (argv.length < 3) {
149       System.out.println("usage:" + usage);
150       return;
151     }
152
153     NutchFileSystem nfs = NutchFileSystem.parseArgs(argv, 0);
154     try {
155       int recno = Integer.parseInt(argv[0]);
156       String JavaDoc segment = argv[1];
157
158       File file = new File(segment, DIR_NAME);
159       System.out.println("Reading from file: " + file);
160
161       ArrayFile.Reader contents = new ArrayFile.Reader(nfs, file.toString());
162
163       Content content = new Content();
164       contents.get(recno, content);
165       System.out.println("Retrieved " + recno + " from file " + file);
166
167       System.out.println(content);
168
169       contents.close();
170     } finally {
171       nfs.close();
172     }
173   }
174 }
175
Popular Tags