KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > searcher > IndexSearcher


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.searcher;
5
6 import java.io.IOException JavaDoc;
7 import java.io.File JavaDoc;
8
9 import java.util.ArrayList JavaDoc;
10 import java.util.Enumeration JavaDoc;
11
12 import org.apache.lucene.index.IndexReader;
13 import org.apache.lucene.index.MultiReader;
14
15 import org.apache.lucene.search.MultiSearcher;
16 import org.apache.lucene.search.TopDocs;
17 import org.apache.lucene.search.ScoreDoc;
18 import org.apache.lucene.search.FieldCache;
19
20 import org.apache.lucene.document.Document;
21 import org.apache.lucene.document.Field;
22
23 import net.nutch.io.*;
24 import net.nutch.util.*;
25 import net.nutch.db.*;
26 import net.nutch.fetcher.*;
27 import net.nutch.linkdb.*;
28 import net.nutch.indexer.*;
29 import net.nutch.analysis.NutchDocumentAnalyzer;
30
31 /** Implements {@link Searcher} and {@link HitDetailer} for either a single
32  * merged index, or for a set of individual segment indexes. */

33 public class IndexSearcher implements Searcher, HitDetailer {
34
35   private org.apache.lucene.search.Searcher luceneSearcher;
36
37   private String JavaDoc[] sites;
38   
39   private LuceneQueryOptimizer optimizer = new LuceneQueryOptimizer
40     (NutchConf.getInt("searcher.filter.cache.size", 16),
41      NutchConf.getFloat("searcher.filter.cache.threshold", 0.05f));
42
43   /** Construct given a number of indexed segments. */
44   public IndexSearcher(File JavaDoc[] segmentDirs) throws IOException JavaDoc {
45     IndexReader[] readers = new IndexReader[segmentDirs.length];
46     for (int i = 0; i < segmentDirs.length; i++) {
47       readers[i] = IndexReader.open(new File JavaDoc(segmentDirs[i], "index"));
48     }
49     init(new MultiReader(readers));
50   }
51
52   /** Construct given a directory containing fetched segments, and a separate
53    * directory naming their merged index. */

54   public IndexSearcher(String JavaDoc index)
55     throws IOException JavaDoc {
56     init(IndexReader.open(index));
57   }
58
59   private void init(IndexReader reader) throws IOException JavaDoc {
60     this.sites = FieldCache.DEFAULT.getStrings(reader, "site");
61     this.luceneSearcher = new org.apache.lucene.search.IndexSearcher(reader);
62     this.luceneSearcher.setSimilarity(new NutchSimilarity());
63   }
64
65   public Hits search(Query query, int numHits) throws IOException JavaDoc {
66
67     org.apache.lucene.search.BooleanQuery luceneQuery =
68       QueryFilters.filter(query);
69     
70     return translateHits
71       (optimizer.optimize(luceneQuery, luceneSearcher, numHits));
72   }
73
74   public String JavaDoc getExplanation(Query query, Hit hit) throws IOException JavaDoc {
75     return luceneSearcher.explain(QueryFilters.filter(query),
76                                   hit.getIndexDocNo()).toHtml();
77   }
78
79   public HitDetails getDetails(Hit hit) throws IOException JavaDoc {
80     ArrayList JavaDoc fields = new ArrayList JavaDoc();
81     ArrayList JavaDoc values = new ArrayList JavaDoc();
82
83     Document doc = luceneSearcher.doc(hit.getIndexDocNo());
84
85     Enumeration JavaDoc e = doc.fields();
86     while (e.hasMoreElements()) {
87       Field field = (Field)e.nextElement();
88       fields.add(field.name());
89       values.add(field.stringValue());
90     }
91
92     return new HitDetails((String JavaDoc[])fields.toArray(new String JavaDoc[fields.size()]),
93                           (String JavaDoc[])values.toArray(new String JavaDoc[values.size()]));
94   }
95
96   public HitDetails[] getDetails(Hit[] hits) throws IOException JavaDoc {
97     HitDetails[] results = new HitDetails[hits.length];
98     for (int i = 0; i < hits.length; i++)
99       results[i] = getDetails(hits[i]);
100     return results;
101   }
102
103   private Hits translateHits(TopDocs topDocs) throws IOException JavaDoc {
104     ScoreDoc[] scoreDocs = topDocs.scoreDocs;
105     int length = scoreDocs.length;
106     Hit[] hits = new Hit[length];
107     for (int i = 0; i < length; i++) {
108       int doc = scoreDocs[i].doc;
109       hits[i] = new Hit(doc, scoreDocs[i].score, sites[doc]);
110     }
111     return new Hits(topDocs.totalHits, hits);
112   }
113
114 }
115
Popular Tags