KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > searcher > FetchedSegments


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.searcher;
5
6 import java.io.IOException JavaDoc;
7 import java.io.File JavaDoc;
8
9 import java.util.HashMap JavaDoc;
10
11 import net.nutch.io.*;
12 import net.nutch.fs.*;
13 import net.nutch.db.*;
14 import net.nutch.util.*;
15 import net.nutch.fetcher.*;
16 import net.nutch.protocol.*;
17 import net.nutch.parse.*;
18 import net.nutch.pagedb.*;
19 import net.nutch.indexer.*;
20
21 /** Implements {@link HitSummarizer} and {@link HitContent} for a set of
22  * fetched segments. */

23 public class FetchedSegments implements HitSummarizer, HitContent {
24
25   private static class Segment {
26     private NutchFileSystem nfs;
27     private File JavaDoc segmentDir;
28
29     private ArrayFile.Reader fetcher;
30     private ArrayFile.Reader content;
31     private ArrayFile.Reader text;
32     private ArrayFile.Reader parsedata;
33
34     public Segment(NutchFileSystem nfs, File JavaDoc segmentDir) throws IOException JavaDoc {
35       this.nfs = nfs;
36       this.segmentDir = segmentDir;
37     }
38
39     public FetcherOutput getFetcherOutput(int docNo) throws IOException JavaDoc {
40       if (fetcher == null) {
41         this.fetcher = new ArrayFile.Reader
42           (nfs, new File JavaDoc(segmentDir, FetcherOutput.DIR_NAME).toString());
43       }
44
45       FetcherOutput entry = new FetcherOutput();
46       fetcher.get(docNo, entry);
47       return entry;
48     }
49
50     public byte[] getContent(int docNo) throws IOException JavaDoc {
51       if (content == null) {
52         this.content = new ArrayFile.Reader
53           (nfs, new File JavaDoc(segmentDir, Content.DIR_NAME).toString());
54       }
55
56       Content entry = new Content();
57       content.get(docNo, entry);
58       return entry.getContent();
59     }
60
61     public ParseData getParseData(int docNo) throws IOException JavaDoc {
62       if (parsedata == null) {
63         this.parsedata = new ArrayFile.Reader
64           (nfs, new File JavaDoc(segmentDir, ParseData.DIR_NAME).toString());
65       }
66       
67       ParseData entry = new ParseData();
68       parsedata.get(docNo, entry);
69       return entry;
70     }
71
72     public ParseText getParseText(int docNo) throws IOException JavaDoc {
73       if (text == null) {
74         this.text = new ArrayFile.Reader
75           (nfs, new File JavaDoc(segmentDir, ParseText.DIR_NAME).toString());
76       }
77
78       ParseText entry = new ParseText();
79       text.get(docNo, entry);
80       return entry;
81     }
82     
83   }
84
85   private HashMap JavaDoc segments = new HashMap JavaDoc();
86
87   /** Construct given a directory containing fetcher output. */
88   public FetchedSegments(NutchFileSystem nfs, String JavaDoc segmentsDir) throws IOException JavaDoc {
89     File JavaDoc[] segmentDirs = nfs.listFiles(new File JavaDoc(segmentsDir));
90
91     if (segmentDirs != null) {
92         for (int i = 0; i < segmentDirs.length; i++) {
93             File JavaDoc segmentDir = segmentDirs[i];
94             File JavaDoc indexdone = new File JavaDoc(segmentDir, IndexSegment.DONE_NAME);
95             if (nfs.exists(indexdone) && nfs.isFile(indexdone)) {
96                 segments.put(segmentDir.getName(), new Segment(nfs, segmentDir));
97             }
98         }
99     }
100   }
101
102   public String JavaDoc[] getSegmentNames() {
103     return (String JavaDoc[])segments.keySet().toArray(new String JavaDoc[segments.size()]);
104   }
105
106   public byte[] getContent(HitDetails details) throws IOException JavaDoc {
107     return getSegment(details).getContent(getDocNo(details));
108   }
109
110   public ParseData getParseData(HitDetails details) throws IOException JavaDoc {
111     return getSegment(details).getParseData(getDocNo(details));
112   }
113
114   public String JavaDoc[] getAnchors(HitDetails details) throws IOException JavaDoc {
115     return getSegment(details).getFetcherOutput(getDocNo(details))
116       .getFetchListEntry().getAnchors();
117   }
118
119   public long getFetchDate(HitDetails details) throws IOException JavaDoc {
120     return getSegment(details).getFetcherOutput(getDocNo(details))
121       .getFetchDate();
122   }
123
124   public ParseText getParseText(HitDetails details) throws IOException JavaDoc {
125     return getSegment(details).getParseText(getDocNo(details));
126   }
127
128   public String JavaDoc getSummary(HitDetails details, Query query)
129     throws IOException JavaDoc {
130
131     String JavaDoc text = getSegment(details).getParseText(getDocNo(details)).getText();
132
133     return new Summarizer().getSummary(text, query).toString();
134   }
135     
136   public String JavaDoc[] getSummary(HitDetails[] details, Query query)
137     throws IOException JavaDoc {
138     String JavaDoc[] results = new String JavaDoc[details.length];
139     for (int i = 0; i < details.length; i++)
140       results[i] = getSummary(details[i], query);
141     return results;
142   }
143
144
145   private Segment getSegment(HitDetails details) {
146     return (Segment)segments.get(details.getValue("segment"));
147   }
148
149   private int getDocNo(HitDetails details) {
150     return Integer.parseInt(details.getValue("docNo"), 16);
151   }
152
153
154 }
155
Popular Tags