IndexSearcher


1   /* Copyright (c) 2003 The Nutch Organization.  All rights reserved.   */
2   /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3   
4   package net.nutch.searcher;
5   
6   import java.io.IOException  ;
7   import java.io.File  ;
8   
9   import java.util.ArrayList  ;
10  import java.util.Enumeration  ;
11  
12  import org.apache.lucene.index.IndexReader;
13  import org.apache.lucene.index.MultiReader;
14  
15  import org.apache.lucene.search.MultiSearcher;
16  import org.apache.lucene.search.TopDocs;
17  import org.apache.lucene.search.ScoreDoc;
18  import org.apache.lucene.search.FieldCache;
19  
20  import org.apache.lucene.document.Document;
21  import org.apache.lucene.document.Field;
22  
23  import net.nutch.io.*;
24  import net.nutch.util.*;
25  import net.nutch.db.*;
26  import net.nutch.fetcher.*;
27  import net.nutch.linkdb.*;
28  import net.nutch.indexer.*;
29  import net.nutch.analysis.NutchDocumentAnalyzer;
30  
31  /** Implements {@link Searcher} and {@link HitDetailer} for either a single
32   * merged index, or for a set of individual segment indexes. */
33  public class IndexSearcher implements Searcher, HitDetailer {
34  
35    private org.apache.lucene.search.Searcher luceneSearcher;
36  
37    private String  [] sites;
38    
39    private LuceneQueryOptimizer optimizer = new LuceneQueryOptimizer
40      (NutchConf.getInt("searcher.filter.cache.size", 16),
41       NutchConf.getFloat("searcher.filter.cache.threshold", 0.05f));
42  
43    /** Construct given a number of indexed segments. */
44    public IndexSearcher(File  [] segmentDirs) throws IOException   {
45      IndexReader[] readers = new IndexReader[segmentDirs.length];
46      for (int i = 0; i < segmentDirs.length; i++) {
47        readers[i] = IndexReader.open(new File  (segmentDirs[i], "index"));
48      }
49      init(new MultiReader(readers));
50    }
51  
52    /** Construct given a directory containing fetched segments, and a separate
53     * directory naming their merged index. */
54    public IndexSearcher(String   index)
55      throws IOException   {
56      init(IndexReader.open(index));
57    }
58  
59    private void init(IndexReader reader) throws IOException   {
60      this.sites = FieldCache.DEFAULT.getStrings(reader, "site");
61      this.luceneSearcher = new org.apache.lucene.search.IndexSearcher(reader);
62      this.luceneSearcher.setSimilarity(new NutchSimilarity());
63    }
64  
65    public Hits search(Query query, int numHits) throws IOException   {
66  
67      org.apache.lucene.search.BooleanQuery luceneQuery =
68        QueryFilters.filter(query);
69      
70      return translateHits
71        (optimizer.optimize(luceneQuery, luceneSearcher, numHits));
72    }
73  
74    public String   getExplanation(Query query, Hit hit) throws IOException   {
75      return luceneSearcher.explain(QueryFilters.filter(query),
76                                    hit.getIndexDocNo()).toHtml();
77    }
78  
79    public HitDetails getDetails(Hit hit) throws IOException   {
80      ArrayList   fields = new ArrayList  ();
81      ArrayList   values = new ArrayList  ();
82  
83      Document doc = luceneSearcher.doc(hit.getIndexDocNo());
84  
85      Enumeration   e = doc.fields();
86      while (e.hasMoreElements()) {
87        Field field = (Field)e.nextElement();
88        fields.add(field.name());
89        values.add(field.stringValue());
90      }
91  
92      return new HitDetails((String  [])fields.toArray(new String  [fields.size()]),
93                            (String  [])values.toArray(new String  [values.size()]));
94    }
95  
96    public HitDetails[] getDetails(Hit[] hits) throws IOException   {
97      HitDetails[] results = new HitDetails[hits.length];
98      for (int i = 0; i < hits.length; i++)
99        results[i] = getDetails(hits[i]);
100     return results;
101   }
102 
103   private Hits translateHits(TopDocs topDocs) throws IOException   {
104     ScoreDoc[] scoreDocs = topDocs.scoreDocs;
105     int length = scoreDocs.length;
106     Hit[] hits = new Hit[length];
107     for (int i = 0; i < length; i++) {
108       int doc = scoreDocs[i].doc;
109       hits[i] = new Hit(doc, scoreDocs[i].score, sites[doc]);
110     }
111     return new Hits(topDocs.totalHits, hits);
112   }
113 
114 }
115
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags