KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > contineo > searchengine > search > Search


1 /*
2  * Searcher.java
3  *
4  * Created on 6. November 2003, 16:45
5  */

6
7 package org.contineo.searchengine.search;
8
9 import java.io.File JavaDoc;
10 import java.io.StringReader JavaDoc;
11 import java.util.ArrayList JavaDoc;
12 import java.util.Collection JavaDoc;
13 import java.util.Date JavaDoc;
14 import java.util.Vector JavaDoc;
15 import org.apache.log4j.Level;
16 import org.apache.log4j.Logger;
17 import org.apache.lucene.analysis.Analyzer;
18 import org.apache.lucene.analysis.TokenStream;
19 import org.apache.lucene.analysis.de.GermanAnalyzer;
20 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
21 import org.apache.lucene.analysis.standard.StandardAnalyzer;
22 import org.apache.lucene.document.Document;
23 import org.apache.lucene.queryParser.MultiFieldQueryParser;
24 import org.apache.lucene.search.Hits;
25 import org.apache.lucene.search.IndexSearcher;
26 import org.apache.lucene.search.MultiSearcher;
27 import org.apache.lucene.search.Query;
28 import org.apache.lucene.search.Searcher;
29 import org.apache.lucene.search.highlight.Highlighter;
30 import org.apache.lucene.search.highlight.QueryScorer;
31 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
32 import org.contineo.admin.dao.MenuDAO;
33 import org.contineo.core.LoggingManager;
34 import org.contineo.core.config.SearchSettings;
35 import org.contineo.core.config.SettingConfigurator;
36 import org.contineo.core.config.dao.SearchSettingsDAO;
37 import org.contineo.core.text.analyze.Stopwords;
38 import org.contineo.searchengine.util.SquareSimilarity;
39 /**
40  * @author Michael Scholz
41  */

42 public class Search {
43
44     /**
45      * @uml.property name="logger"
46      * @uml.associationEnd
47      */

48     private Logger logger;
49     
50     private int hitCount;
51     
52     private Vector JavaDoc<Integer JavaDoc> pageVect;
53
54     /** Creates a new instance of Searcher */
55     public Search() {
56         logger = LoggingManager.getLogger(this.getClass());
57         hitCount = 0;
58         pageVect = new Vector JavaDoc<Integer JavaDoc>();
59     }
60     
61     /**
62      * @return Returns the hitCount.
63      * @uml.property name="hitCount"
64      */

65     public int getHitCount() {
66         return hitCount;
67     }
68     
69     /**
70      * @return Returns the pageVect.
71      * @uml.property name="pageVect"
72      */

73     public Vector JavaDoc getPageVect() {
74         return pageVect;
75     }
76
77     public Collection JavaDoc search(SearchOptions opt, String JavaDoc language) {
78         Collection JavaDoc<Result> coll = new ArrayList JavaDoc<Result>();
79         SettingConfigurator conf = new SettingConfigurator();
80         try {
81             String JavaDoc[] languages = opt.getLanguages();
82             if (languages == null || languages.length == 0) {
83                 languages = new String JavaDoc[]{"de","en","fr"};
84                 opt.setLanguages(languages);
85             }
86             Searcher[] searcher = new Searcher[languages.length];
87             String JavaDoc indexPath = conf.getValue("indexdir");
88             if (!indexPath.endsWith(File.pathSeparator))
89                 indexPath += "/";
90             for (int i=0; i<languages.length; i++) {
91                 String JavaDoc lang = languages[i];
92                 String JavaDoc dir = "english";
93                 if (lang.equals("de"))
94                     dir = "german";
95                 else
96                     if (lang.equals("fr"))
97                         dir = "french";
98                 Searcher s = new IndexSearcher(indexPath + dir + "/");
99                 searcher[i] = s;
100             }
101             MultiSearcher multiSearcher = new MultiSearcher(searcher);
102             Analyzer analyzer = new StandardAnalyzer(Stopwords.getStopwords("en"));
103             if (language.equals("de")) {
104                 analyzer = new GermanAnalyzer(Stopwords.getStopwords("de"));
105             } else {
106                 if (language.equals("fr")) {
107                     analyzer = new FrenchAnalyzer(Stopwords.getStopwords("fr"));
108                 }
109             }
110             if (opt.getFields() == null) {
111                 String JavaDoc[] fields = new String JavaDoc[]{"content","keywords"};
112                 opt.setFields(fields);
113             }
114             multiSearcher.setSimilarity(new SquareSimilarity());
115             //IndexReader enreader = IndexReader.open(indexPath + "english/");
116
Query query = MultiFieldQueryParser.parse(opt.getQueryStr(), opt.getFields(), analyzer);
117             //query = query.rewrite(reader);
118
Hits hits = multiSearcher.search(query);
119             MenuDAO mdao = new MenuDAO();
120             Collection JavaDoc coll2 = mdao.findMenuIdByUserName(opt.getUsername());
121             //TermHighlighter highlighter = new TermHighlighter(false, false, "FFFF00");
122
int maxNumFragmentsRequired = 4;
123             SearchSettingsDAO settingsDao = new SearchSettingsDAO();
124             SearchSettings settings = settingsDao.findByPrimaryKey(opt.getUsername());
125             maxNumFragmentsRequired = settings.getMaxFragments();
126             String JavaDoc fragmentSeparator = "&nbsp;...&nbsp;";
127             //QueryHighlightExtractor highlighter = new QueryHighlightExtractor(query, analyzer, "<font style='background-color:#FFFF00'>", "</font>");
128
Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font style='background-color:#FFFF00'>", "</font>"), new QueryScorer(query));
129             String JavaDoc path = conf.getValue("indexdir");
130             if (!path.endsWith(File.pathSeparator))
131                 path += "/";
132             //pageVect.add(new Integer(0));
133
for (int j=0; j<hits.length(); j++) {
134                 Document doc = hits.doc(j);
135                 String JavaDoc menuid = doc.get("menuid");
136                 if (coll2.contains(menuid)) {
137                     String JavaDoc size = doc.get("size");
138                     if (size.equals("0"))
139                         size = "1";
140                     Result result = new Result();
141                     result.setMenuid(doc.get("menuid"));
142                     result.setName(doc.get("name"));
143                     result.setPath(doc.get("path"));
144                     result.setSize(size);
145                     result.setLength(new Integer JavaDoc(doc.get("length")));
146                     result.setDocid(new Integer JavaDoc(doc.get("docid")));
147                     result.setType(doc.get("type"));
148                     result.setDate(new Date JavaDoc(Long.parseLong(doc.get("date"))));
149                     if (result.isRelevant(opt, doc.get("date"))) {
150                         if (hitCount % opt.getHitsPerPage() == 0)
151                             pageVect.add(new Integer JavaDoc(j));
152                         hitCount++;
153                     }
154                 }
155             }
156             for (int resultStart=opt.getResultStart(), k=opt.getStart(); resultStart<hits.length() && k<(opt.getStart() + opt.getHitsPerPage()); resultStart++, k++) {
157                 Document doc = hits.doc(resultStart);
158                 String JavaDoc menuid = doc.get("menuid");
159                 // When user can see document with menuid then put it into result-collection.
160
if (coll2.contains(menuid)) {
161                     String JavaDoc size = doc.get("size");
162                     if (size.equals("0"))
163                         size = "1";
164                     //summary = LuceneTools.highlightTerms(summary,highlighter,query,analyzer);
165
String JavaDoc content = doc.get("content");
166                     //String summary = highlighter.getBestFragments(content,highlightFragmentSizeInBytes,maxNumFragmentsRequired,fragmentSeparator);
167
TokenStream stream = analyzer.tokenStream("content", new StringReader JavaDoc(content));
168                     String JavaDoc summary = highlighter.getBestFragments(stream, content, maxNumFragmentsRequired, fragmentSeparator);
169                     if (summary == null || summary.equals(""))
170                         summary = doc.get("summary");
171                     Result result = new Result();
172                     result.setMenuid(doc.get("menuid"));
173                     result.setName(doc.get("name"));
174                     result.setPath(doc.get("path"));
175                     result.setSize(size);
176                     result.setLength(new Integer JavaDoc(doc.get("length")));
177                     result.setDocid(new Integer JavaDoc(doc.get("docid")));
178                     result.setType(doc.get("type"));
179                     result.setDate(new Date JavaDoc(Long.parseLong(doc.get("date"))));
180                     result.setSummary(summary);
181                     result.createScore(hits.score(resultStart));
182                     if (result.isRelevant(opt, doc.get("date")))
183                         coll.add(result);
184                 } else {
185                     k--;
186                 }
187             }
188         }
189         catch (Exception JavaDoc e) {
190             if (logger.isEnabledFor(Level.ERROR))
191                 logger.error(e.getMessage());
192         }
193         return coll;
194     }
195 }
Popular Tags