1 2 3 4 package net.nutch.searcher; 5 6 import java.io.IOException ; 7 import java.io.File ; 8 9 import java.util.HashMap ; 10 11 import net.nutch.io.*; 12 import net.nutch.fs.*; 13 import net.nutch.db.*; 14 import net.nutch.util.*; 15 import net.nutch.fetcher.*; 16 import net.nutch.protocol.*; 17 import net.nutch.parse.*; 18 import net.nutch.pagedb.*; 19 import net.nutch.indexer.*; 20 21 23 public class FetchedSegments implements HitSummarizer, HitContent { 24 25 private static class Segment { 26 private NutchFileSystem nfs; 27 private File segmentDir; 28 29 private ArrayFile.Reader fetcher; 30 private ArrayFile.Reader content; 31 private ArrayFile.Reader text; 32 private ArrayFile.Reader parsedata; 33 34 public Segment(NutchFileSystem nfs, File segmentDir) throws IOException { 35 this.nfs = nfs; 36 this.segmentDir = segmentDir; 37 } 38 39 public FetcherOutput getFetcherOutput(int docNo) throws IOException { 40 if (fetcher == null) { 41 this.fetcher = new ArrayFile.Reader 42 (nfs, new File (segmentDir, FetcherOutput.DIR_NAME).toString()); 43 } 44 45 FetcherOutput entry = new FetcherOutput(); 46 fetcher.get(docNo, entry); 47 return entry; 48 } 49 50 public byte[] getContent(int docNo) throws IOException { 51 if (content == null) { 52 this.content = new ArrayFile.Reader 53 (nfs, new File (segmentDir, Content.DIR_NAME).toString()); 54 } 55 56 Content entry = new Content(); 57 content.get(docNo, entry); 58 return entry.getContent(); 59 } 60 61 public ParseData getParseData(int docNo) throws IOException { 62 if (parsedata == null) { 63 this.parsedata = new ArrayFile.Reader 64 (nfs, new File (segmentDir, ParseData.DIR_NAME).toString()); 65 } 66 67 ParseData entry = new ParseData(); 68 parsedata.get(docNo, entry); 69 return entry; 70 } 71 72 public ParseText getParseText(int docNo) throws IOException { 73 if (text == null) { 74 this.text = new ArrayFile.Reader 75 (nfs, new File (segmentDir, ParseText.DIR_NAME).toString()); 76 } 77 78 ParseText entry = new ParseText(); 79 text.get(docNo, entry); 80 return entry; 81 } 82 83 } 84 85 private HashMap segments = new HashMap (); 86 87 88 public FetchedSegments(NutchFileSystem nfs, String segmentsDir) throws IOException { 89 File [] segmentDirs = nfs.listFiles(new File (segmentsDir)); 90 91 if (segmentDirs != null) { 92 for (int i = 0; i < segmentDirs.length; i++) { 93 File segmentDir = segmentDirs[i]; 94 File indexdone = new File (segmentDir, IndexSegment.DONE_NAME); 95 if (nfs.exists(indexdone) && nfs.isFile(indexdone)) { 96 segments.put(segmentDir.getName(), new Segment(nfs, segmentDir)); 97 } 98 } 99 } 100 } 101 102 public String [] getSegmentNames() { 103 return (String [])segments.keySet().toArray(new String [segments.size()]); 104 } 105 106 public byte[] getContent(HitDetails details) throws IOException { 107 return getSegment(details).getContent(getDocNo(details)); 108 } 109 110 public ParseData getParseData(HitDetails details) throws IOException { 111 return getSegment(details).getParseData(getDocNo(details)); 112 } 113 114 public String [] getAnchors(HitDetails details) throws IOException { 115 return getSegment(details).getFetcherOutput(getDocNo(details)) 116 .getFetchListEntry().getAnchors(); 117 } 118 119 public long getFetchDate(HitDetails details) throws IOException { 120 return getSegment(details).getFetcherOutput(getDocNo(details)) 121 .getFetchDate(); 122 } 123 124 public ParseText getParseText(HitDetails details) throws IOException { 125 return getSegment(details).getParseText(getDocNo(details)); 126 } 127 128 public String getSummary(HitDetails details, Query query) 129 throws IOException { 130 131 String text = getSegment(details).getParseText(getDocNo(details)).getText(); 132 133 return new Summarizer().getSummary(text, query).toString(); 134 } 135 136 public String [] getSummary(HitDetails[] details, Query query) 137 throws IOException { 138 String [] results = new String [details.length]; 139 for (int i = 0; i < details.length; i++) 140 results[i] = getSummary(details[i], query); 141 return results; 142 } 143 144 145 private Segment getSegment(HitDetails details) { 146 return (Segment)segments.get(details.getValue("segment")); 147 } 148 149 private int getDocNo(HitDetails details) { 150 return Integer.parseInt(details.getValue("docNo"), 16); 151 } 152 153 154 } 155 | Popular Tags |