1 2 3 4 package net.nutch.searcher; 5 6 import java.io.IOException ; 7 import java.io.File ; 8 9 import java.util.ArrayList ; 10 import java.util.Enumeration ; 11 12 import org.apache.lucene.index.IndexReader; 13 import org.apache.lucene.index.MultiReader; 14 15 import org.apache.lucene.search.MultiSearcher; 16 import org.apache.lucene.search.TopDocs; 17 import org.apache.lucene.search.ScoreDoc; 18 import org.apache.lucene.search.FieldCache; 19 20 import org.apache.lucene.document.Document; 21 import org.apache.lucene.document.Field; 22 23 import net.nutch.io.*; 24 import net.nutch.util.*; 25 import net.nutch.db.*; 26 import net.nutch.fetcher.*; 27 import net.nutch.linkdb.*; 28 import net.nutch.indexer.*; 29 import net.nutch.analysis.NutchDocumentAnalyzer; 30 31 33 public class IndexSearcher implements Searcher, HitDetailer { 34 35 private org.apache.lucene.search.Searcher luceneSearcher; 36 37 private String [] sites; 38 39 private LuceneQueryOptimizer optimizer = new LuceneQueryOptimizer 40 (NutchConf.getInt("searcher.filter.cache.size", 16), 41 NutchConf.getFloat("searcher.filter.cache.threshold", 0.05f)); 42 43 44 public IndexSearcher(File [] segmentDirs) throws IOException { 45 IndexReader[] readers = new IndexReader[segmentDirs.length]; 46 for (int i = 0; i < segmentDirs.length; i++) { 47 readers[i] = IndexReader.open(new File (segmentDirs[i], "index")); 48 } 49 init(new MultiReader(readers)); 50 } 51 52 54 public IndexSearcher(String index) 55 throws IOException { 56 init(IndexReader.open(index)); 57 } 58 59 private void init(IndexReader reader) throws IOException { 60 this.sites = FieldCache.DEFAULT.getStrings(reader, "site"); 61 this.luceneSearcher = new org.apache.lucene.search.IndexSearcher(reader); 62 this.luceneSearcher.setSimilarity(new NutchSimilarity()); 63 } 64 65 public Hits search(Query query, int numHits) throws IOException { 66 67 org.apache.lucene.search.BooleanQuery luceneQuery = 68 QueryFilters.filter(query); 69 70 return translateHits 71 (optimizer.optimize(luceneQuery, luceneSearcher, numHits)); 72 } 73 74 public String getExplanation(Query query, Hit hit) throws IOException { 75 return luceneSearcher.explain(QueryFilters.filter(query), 76 hit.getIndexDocNo()).toHtml(); 77 } 78 79 public HitDetails getDetails(Hit hit) throws IOException { 80 ArrayList fields = new ArrayList (); 81 ArrayList values = new ArrayList (); 82 83 Document doc = luceneSearcher.doc(hit.getIndexDocNo()); 84 85 Enumeration e = doc.fields(); 86 while (e.hasMoreElements()) { 87 Field field = (Field)e.nextElement(); 88 fields.add(field.name()); 89 values.add(field.stringValue()); 90 } 91 92 return new HitDetails((String [])fields.toArray(new String [fields.size()]), 93 (String [])values.toArray(new String [values.size()])); 94 } 95 96 public HitDetails[] getDetails(Hit[] hits) throws IOException { 97 HitDetails[] results = new HitDetails[hits.length]; 98 for (int i = 0; i < hits.length; i++) 99 results[i] = getDetails(hits[i]); 100 return results; 101 } 102 103 private Hits translateHits(TopDocs topDocs) throws IOException { 104 ScoreDoc[] scoreDocs = topDocs.scoreDocs; 105 int length = scoreDocs.length; 106 Hit[] hits = new Hit[length]; 107 for (int i = 0; i < length; i++) { 108 int doc = scoreDocs[i].doc; 109 hits[i] = new Hit(doc, scoreDocs[i].score, sites[doc]); 110 } 111 return new Hits(topDocs.totalHits, hits); 112 } 113 114 } 115 | Popular Tags |