KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > snapper > wrapper > lucene > LuceneSearcher


1 package org.enhydra.snapper.wrapper.lucene;
2
3 import org.enhydra.snapper.api.Searcher;
4
5 import java.text.SimpleDateFormat JavaDoc;
6 import java.text.ParseException JavaDoc;
7 import java.util.Vector JavaDoc;
8 import java.util.Date JavaDoc;
9 import java.util.Properties JavaDoc;
10 import java.util.Enumeration JavaDoc;
11 import java.io.IOException JavaDoc;
12 import java.io.StringReader JavaDoc;
13 import org.apache.lucene.analysis.Analyzer;
14 import org.apache.lucene.analysis.standard.StandardAnalyzer;
15 import org.apache.lucene.document.Document;
16 import org.apache.lucene.queryParser.QueryParser;
17 import org.apache.lucene.search.Hits;
18 import org.apache.lucene.search.Sort;
19 import org.apache.lucene.search.IndexSearcher;
20 import org.apache.lucene.search.MultiSearcher;
21 import org.apache.lucene.search.TermQuery;
22 import org.apache.lucene.search.Query;
23 import org.apache.lucene.search.highlight.QueryScorer;
24 import org.apache.lucene.search.highlight.Highlighter;
25 import org.apache.lucene.search.highlight.Scorer;
26 import org.apache.lucene.analysis.TokenStream;
27 import org.apache.lucene.index.IndexReader;
28 import org.apache.lucene.search.Explanation;
29
30
31 import org.apache.lucene.search.highlight.SimpleFragmenter;
32 /**
33  * Searcher is a searching service. For example, Lucene index search engine
34  *
35  * @author Milin Radivoj
36  */

37
38 public class LuceneSearcher implements Searcher{
39
40     String JavaDoc searchedTerm="";
41     String JavaDoc highlighterTerm;
42     String JavaDoc highlighterPropertiesTerm;
43     String JavaDoc highlighterMetaDataTerm;
44     
45     Query query=null;
46     Hits hits=null;
47     
48     long searchedTime=0;
49     int docsNumber=0;
50     
51     IndexReader[] listReader;
52     IndexSearcher[] list;
53     MultiSearcher searcher;
54     Vector JavaDoc siteNames = new Vector JavaDoc();
55     
56     int contentLength = 150;
57     int metaDataLength = 150;
58     int wordLength = 150;
59
60     public LuceneSearcher() {}
61
62     public void setUpSearcher(Properties JavaDoc sites) {
63
64         try {
65             list=new IndexSearcher[sites.size()];
66             listReader=new IndexReader[sites.size()];
67             int i = 0;
68             for (Enumeration JavaDoc e = sites.propertyNames() ; e.hasMoreElements() ;)
69              {
70              String JavaDoc siteName = (String JavaDoc)e.nextElement();
71              String JavaDoc indexDir = (String JavaDoc)sites.getProperty(siteName);
72            
73               listReader[i]=IndexReader.open(indexDir);
74               list[i]=new IndexSearcher(listReader[i]);
75               siteNames.add(siteName);
76               i++;
77              }
78             searcher=new MultiSearcher(list);
79
80             }catch (IOException JavaDoc e) {
81                  try{
82                     LuceneSearcherFactory.logger.error("Could not initialize Searcher \n caught a"+e.getClass()+
83                                "\n with message: " + e.getMessage());
84                 } catch (Exception JavaDoc ex) {
85                     System.out.println(" caught a " + e.getClass() +
86                        "\n with message: " + e.getMessage());
87                 }
88
89             }
90   }
91
92     public void setUpHighLighter(Properties JavaDoc sites) {
93
94         for (Enumeration JavaDoc e = sites.propertyNames() ; e.hasMoreElements() ;)
95          {
96            String JavaDoc name = (String JavaDoc)e.nextElement();
97            String JavaDoc value = (String JavaDoc)sites.getProperty(name);
98            int len=(new Integer JavaDoc(value)).intValue();
99           
100          if(name.equals("contentLength"))
101           {
102           if(len>300)
103             contentLength=300;
104           else
105             contentLength =len;
106           }
107
108          if(name.equals("metaDataLength"))
109           {
110           if(len>300)
111             metaDataLength=300;
112           else
113             metaDataLength =len;
114           }
115
116          if(name.equals("wordLength"))
117           {
118           if(len>300)
119             wordLength = 300;
120           else
121             wordLength =len;
122           }
123
124
125          }
126       }
127   public void close()
128     {
129      try{
130         
131           for(int i=0;i<listReader.length;i++)
132            {
133             listReader[i].close();
134            }
135          } catch (IOException JavaDoc e) {
136              //e.printStackTrace();
137
try{
138                 LuceneSearcherFactory.logger.error("Could not close Searcher \n caught a "+e.getClass()+
139                            "\n with message: " + e.getMessage());
140             } catch (Exception JavaDoc ex) {
141                 System.out.println(" caught a " + e.getClass() +
142                    "\n with message: " + e.getMessage());
143             }
144          }
145         searchedTerm= null;
146         highlighterTerm= null;
147         highlighterPropertiesTerm= null;
148         highlighterMetaDataTerm= null;
149         listReader = null;
150         list = null;
151         searcher = null;
152         query = null;
153         hits=null;
154         siteNames.removeAllElements();
155         siteNames=null;
156     }
157
158
159
160   public void searchDocs(String JavaDoc luceneQuery, String JavaDoc language) throws IOException JavaDoc
161     {
162     searchDocs(luceneQuery, null, null, null, null, language);
163     }
164
165   public void searchDocs(String JavaDoc luceneQuery, String JavaDoc highlighterContentQuery, String JavaDoc language) throws IOException JavaDoc
166    {
167     searchDocs(luceneQuery, highlighterContentQuery, null, null, null, language);
168    }
169
170   public void searchDocs(String JavaDoc luceneQuery, String JavaDoc highlighterContentQuery, String JavaDoc highlighterPropertiesQuery, String JavaDoc language) throws IOException JavaDoc
171   {
172     searchDocs(luceneQuery, highlighterContentQuery, highlighterPropertiesQuery, null, null, language);
173   }
174
175   public void searchDocs(String JavaDoc luceneQuery,String JavaDoc highlighterContentQuery, String JavaDoc highlighterPropertiesQuery, String JavaDoc sortType, String JavaDoc language) throws IOException JavaDoc
176    {
177     searchDocs(luceneQuery,highlighterContentQuery, highlighterPropertiesQuery, null, sortType, language);
178    }
179
180
181
182   public void searchDocs(String JavaDoc luceneQuery,String JavaDoc highlighterContentQuery, String JavaDoc highlighterPropertiesQuery, String JavaDoc highlighterMetaDataQuery, String JavaDoc sortType, String JavaDoc language) throws IOException JavaDoc
183    {
184    try
185      {
186        Date JavaDoc start = new Date JavaDoc();
187        
188         searchedTerm=luceneQuery;
189         highlighterTerm=highlighterContentQuery;
190         highlighterPropertiesTerm=highlighterPropertiesQuery;
191         highlighterMetaDataTerm = highlighterMetaDataQuery;
192         Analyzer analyzer = new StandardAnalyzer();
193
194         try{
195             query = QueryParser.parse(searchedTerm, "contents", analyzer);
196             
197         }catch(org.apache.lucene.queryParser.ParseException e){
198             searchedTerm = filter(searchedTerm);
199             query=null;
200             hits=null;
201             searchedTime=0;
202             docsNumber=0;
203             return;
204         }
205              
206         if(sortType!=null&&sortType.equals("newest"))
207          {
208           hits = searcher.search(query,new Sort("modified",true));
209          }
210         else if(sortType!=null&&sortType.equals("oldest"))
211          {
212           hits = searcher.search(query,new Sort("modified",false));
213          }
214    /* else if(sortType!=null&&sortType.equals("alphabet"))
215         {
216           hits = searcher.search(query,new Sort("type",false));
217         }
218         else if(sortType!=null&&sortType.equals("alphabetReverse"))
219         {
220           hits = searcher.search(query,new Sort("type",true));
221         }
222      */
else
223          {
224           hits = searcher.search(query);
225           }
226      Date JavaDoc end = new Date JavaDoc();
227
228      searchedTime=end.getTime() - start.getTime();
229      docsNumber=hits.length();
230         
231
232     } catch (IOException JavaDoc e) {
233        try{
234           LuceneSearcherFactory.logger.error("Could not Search Docs \n caught a "+e.getClass()+
235                "\n with message: " + e.getMessage());
236            } catch (Exception JavaDoc ex) {
237              System.out.println(" caught a " + e.getClass() +
238                 "\n with message: " + e.getMessage());
239             }
240         searchedTerm = filter(searchedTerm);
241         query=null;
242         hits=null;
243         searchedTime=0;
244         docsNumber=0;
245     }
246    }
247
248
249 public int getSearchedDocsNumber()
250   {
251     return docsNumber;
252   }
253
254 public long getSearchedTime()
255   {
256     return searchedTime;
257   }
258
259
260 public String JavaDoc getXmlSearchedResult()
261  {
262       
263    return getXmlSearchedResult(1,docsNumber);
264  }
265
266 public String JavaDoc getXmlSearchedResult(int begin,int end)
267 {
268
269     int beginIndex;
270     
271     if(docsNumber==0||begin>docsNumber||begin<=0)
272         beginIndex = begin = 0;
273     else
274         beginIndex = begin-1;
275     
276     if(end>docsNumber)
277         end=docsNumber;
278     
279     
280     StringBuffer JavaDoc result=new StringBuffer JavaDoc();
281
282     result.append(addStartTag());
283     result.append(addSummary(searchedTerm, begin, end, docsNumber, searchedTime));
284
285
286     if(docsNumber==0){
287          result.append(addEndTag());
288          return result.toString();
289     }
290
291       
292     for(int i=beginIndex;i<end;i++)
293     {
294         Document doc=null;
295         try{
296             doc = hits.doc(i);
297         }catch(IOException JavaDoc e){
298          try{
299            LuceneSearcherFactory.logger.error("Could not get Searched Result for "+i+" document \n caught a " + e.getClass() +
300                 "\n with message: " + e.getMessage());
301             } catch (Exception JavaDoc ex) {
302             System.out.println(" caught a " + e.getClass() +
303                 "\n with message: " + e.getMessage());
304            }
305           break;
306         }
307
308
309     String JavaDoc site="";
310     float score = 0.0f;
311     try{
312       int indexer=searcher.subSearcher(hits.id(i));
313       score = hits.score(i);
314       site=(String JavaDoc)siteNames.elementAt(indexer);
315     }catch(Exception JavaDoc e){
316         score = 0.0f;
317     }
318       
319       String JavaDoc type=doc.get("type");
320       String JavaDoc title=doc.get("title");
321       String JavaDoc path=doc.get("path");
322       String JavaDoc modified=doc.get("modified");
323       String JavaDoc properties=doc.get("properties");
324       String JavaDoc fullcontents=doc.get("fullcontents");
325       String JavaDoc metadata=doc.get("metadata");
326       String JavaDoc fileName=doc.get("fileName");
327   
328       fullcontents = replaceAll(fullcontents,"<"," ");
329       fullcontents = replaceAll(fullcontents,">"," ");
330        
331       modified = transformTime(modified);
332
333    String JavaDoc highlightedContent=null;
334    String JavaDoc highlightedProperties=null;
335    String JavaDoc highlightedMetaData=null;
336
337   if(highlighterTerm!=null&&contentLength>0)
338    {
339     highlightedContent = hightlight( highlighterTerm, "fullcontents", fullcontents);
340    }
341   else if(highlighterTerm!=null)
342    {
343     highlightedContent = "";
344    }
345   else
346    {
347     highlightedContent = getContent(fullcontents,"fullcontents");
348    }
349
350
351
352   if(highlighterPropertiesTerm!=null && type.equals("doc")&&wordLength>0)
353    {
354      highlightedProperties = hightlight( highlighterPropertiesTerm, "properties", properties);
355    }
356   else if(highlighterPropertiesTerm!=null&&type.equals("doc"))
357    {
358      highlightedProperties = "";
359    }
360
361
362   if(highlighterMetaDataTerm!=null&&metaDataLength>0)
363    {
364     highlightedMetaData = hightlight( highlighterMetaDataTerm, "metadata", metadata);
365    }
366   else if(highlighterMetaDataTerm!=null)
367    {
368     highlightedMetaData = "";
369    }
370
371     result.append(addResult(type, title, path, fileName, score, site, modified, highlightedContent, highlightedProperties, highlightedMetaData));
372
373   }
374     
375     result.append(addEndTag());
376     return result.toString();
377 }
378
379      public static String JavaDoc replaceAll(
380             String JavaDoc input,
381             String JavaDoc forReplace,
382             String JavaDoc replaceWith) {
383             if( input == null )
384               return null;
385             StringBuffer JavaDoc result = new StringBuffer JavaDoc();
386             boolean hasMore = true;
387             while (hasMore) {
388               int start = input.indexOf(forReplace);
389               int end = start + forReplace.length();
390               if (start != -1) {
391                 result.append(input.substring(0, start) + replaceWith);
392                 input = input.substring(end);
393               }
394               else {
395                 hasMore = false;
396                 result.append(input);
397               }
398             }
399             if (result.toString().equals(""))
400               return input; //nothing is changed
401
else
402               return result.toString();
403           }
404
405      private String JavaDoc filter(String JavaDoc stringToFilter){
406          //potrebno zbog xml-a
407
stringToFilter = replaceAll(stringToFilter,"&","&#38;#38;");
408           stringToFilter = replaceAll(stringToFilter,"<","&#60;");
409           stringToFilter = replaceAll(stringToFilter,">","&#62;");
410          
411  
412           //stringToFilter = replaceAll(stringToFilter,"\\r"," ");
413
//stringToFilter = replaceAll(stringToFilter,"\\n"," ");
414

415           stringToFilter = stringToFilter.replaceAll("\u0000"," ");
416           stringToFilter = stringToFilter.replaceAll("\u0001"," ");
417           stringToFilter = stringToFilter.replaceAll("\u0002"," ");
418           stringToFilter = stringToFilter.replaceAll("\u0003"," ");
419           stringToFilter = stringToFilter.replaceAll("\u0004"," ");
420           stringToFilter = stringToFilter.replaceAll("\u0005"," ");
421           stringToFilter = stringToFilter.replaceAll("\u0006"," ");
422           stringToFilter = stringToFilter.replaceAll("\u0007"," ");
423           stringToFilter = stringToFilter.replaceAll("\u0008"," ");
424           stringToFilter = stringToFilter.replaceAll("\u0009"," ");
425           stringToFilter = stringToFilter.replaceAll("\u0010"," ");
426           stringToFilter = stringToFilter.replaceAll("\u0011"," ");
427           stringToFilter = stringToFilter.replaceAll("\u0012"," ");
428           stringToFilter = stringToFilter.replaceAll("\u0013"," ");
429           stringToFilter = stringToFilter.replaceAll("\u0014"," ");
430           stringToFilter = stringToFilter.replaceAll("\u0015"," ");
431           stringToFilter = stringToFilter.replaceAll("\u0016"," ");
432           stringToFilter = stringToFilter.replaceAll("\u0017"," ");
433           stringToFilter = stringToFilter.replaceAll("\u0018"," ");
434           stringToFilter = stringToFilter.replaceAll("\u0019"," ");
435
436           stringToFilter = stringToFilter.replaceAll("\uffff"," ");
437           stringToFilter = stringToFilter.replaceAll("\ufffe"," ");
438
439           stringToFilter = stringToFilter.replaceAll("\ud800"," ");
440           stringToFilter = stringToFilter.replaceAll("\udc00"," ");
441           stringToFilter = stringToFilter.replaceAll("\u1c0000"," ");
442
443           stringToFilter = stringToFilter.replaceAll("\u000b"," ");
444           stringToFilter = stringToFilter.replaceAll("\u000c"," ");
445           stringToFilter = stringToFilter.replaceAll("\u000e"," ");
446           stringToFilter = stringToFilter.replaceAll("\u000f"," ");
447
448           stringToFilter = stringToFilter.replaceAll("\u001a"," ");
449           stringToFilter = stringToFilter.replaceAll("\u001b"," ");
450           stringToFilter = stringToFilter.replaceAll("\u001c"," ");
451           stringToFilter = stringToFilter.replaceAll("\u001d"," ");
452           stringToFilter = stringToFilter.replaceAll("\u001e"," ");
453           stringToFilter = stringToFilter.replaceAll("\u001f"," ");
454
455          return stringToFilter;
456      }
457
458
459      private String JavaDoc addStartTag()
460       {
461          return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Search>\n";
462       }
463
464      private String JavaDoc addEndTag()
465      {
466         return "</SearchResult>\n</Search>\n";
467      }
468
469
470      private String JavaDoc addSummary(String JavaDoc searchedTerm,int beginIndex,int endIndex,int docsNumber,long searchedTime )
471        {
472          StringBuffer JavaDoc result=new StringBuffer JavaDoc();
473          result.append("<SearchSummary>\n");
474          result.append("<SearchedTerm>"+filter(searchedTerm)+"</SearchedTerm>\n");
475          result.append("<BeginIndex>"+beginIndex+"</BeginIndex>\n");
476          result.append("<EndIndex>"+endIndex+"</EndIndex>\n");
477          result.append("<DocsNumber>"+docsNumber+"</DocsNumber>\n");
478          result.append("<SearchedTime>"+searchedTime+"</SearchedTime>\n");
479          result.append("</SearchSummary>\n");
480          result.append("<SearchResult>\n");
481          return result.toString();
482      }
483
484      private String JavaDoc addResult(String JavaDoc type, String JavaDoc title, String JavaDoc path,String JavaDoc fileName,float score, String JavaDoc site, String JavaDoc modified, String JavaDoc content, String JavaDoc properties ,String JavaDoc metadata)
485        {
486          StringBuffer JavaDoc result=new StringBuffer JavaDoc();
487          result.append("<Result>\n");
488          result.append("<FileType>"+type+"</FileType>\n");
489          result.append("<Title>"+filter(title)+"</Title>\n");
490          result.append("<AbsolutFilePath>"+filter(path)+"</AbsolutFilePath>\n");
491          result.append("<AbsolutFileName>"+filter(fileName)+"</AbsolutFileName>\n");
492          result.append("<SiteName>"+site+"</SiteName>\n");
493          result.append("<Score>"+(new Float JavaDoc(score)).toString()+"</Score>\n");
494          result.append("<Modified>"+modified+"</Modified>\n");
495      
496          if(content.equals(""))
497            result.append("<Content></Content>\n");
498          else
499             result.append("<Content>..."+content+"...</Content>\n");
500
501          if(properties!=null)
502           {
503             if(properties.equals(""))
504               result.append("<Properties></Properties>\n");
505             else
506               result.append("<Properties>..."+properties+"..</Properties>\n");
507           }
508
509          if(metadata!=null)
510           {
511             if(metadata.equals(""))
512               result.append("<MetaData></MetaData>\n");
513             else
514               result.append("<MetaData>..."+metadata+"..</MetaData>\n");
515           }
516          result.append("</Result>\n");
517          return result.toString();
518      }
519
520      private String JavaDoc hightlight(String JavaDoc term, String JavaDoc filed, String JavaDoc stringToHightLight)
521        {
522          String JavaDoc result=null;
523          Analyzer analyzer = new StandardAnalyzer();
524          Query queryHighlighter=null;
525
526          try{
527                  queryHighlighter = QueryParser.parse(term, filed, analyzer);
528            if(!(queryHighlighter instanceof TermQuery))
529              {
530                 Query aquery[] = new Query[list.length];
531                    for(int j = 0;j<list.length;j++)
532                    {
533                     Query tempQuery=queryHighlighter;
534                     aquery[j]=tempQuery.rewrite(listReader[j]);
535                    }
536                    queryHighlighter=Query.mergeBooleanQueries(aquery);
537              }
538            } catch (Exception JavaDoc e) {
539          }
540
541         if(queryHighlighter!=null)
542          {
543           try{
544             Scorer scorer = new QueryScorer(queryHighlighter);
545             Highlighter highlighter = new Highlighter(scorer);
546
547             if(filed.equals("properties"))
548                 highlighter.setTextFragmenter(new SimpleFragmenter(metaDataLength));
549             if(filed.equals("metadata"))
550                 highlighter.setTextFragmenter(new SimpleFragmenter(wordLength));
551             if(filed.equals("fullcontents"))
552                 highlighter.setTextFragmenter(new SimpleFragmenter(contentLength));
553
554             TokenStream tokenStream =new StandardAnalyzer().tokenStream(filed,new StringReader JavaDoc(stringToHightLight));
555             result = highlighter.getBestFragment(tokenStream, stringToHightLight);
556
557           }catch(Exception JavaDoc e){
558                 result=null;
559             }
560           }
561
562          if(result==null)
563            {
564              result = getContent(stringToHightLight,filed);
565                }
566            else
567            {
568              result=filter(result);
569            }
570             return result;
571         }
572
573
574     private String JavaDoc getContent(String JavaDoc content,String JavaDoc fieldName)
575      {
576       if(content==null)
577         return "";
578
579        String JavaDoc result=null;
580
581        int len = 150;
582
583
584        if(fieldName.equals("fullcontents"))
585           {
586         len = contentLength;
587           }
588
589      if(fieldName.equals("metadata"))
590         {
591         len = metaDataLength;
592         }
593
594       if(fieldName.equals("properties"))
595           {
596         len = wordLength;
597           }
598
599
600
601       int contentLen=content.length();
602
603       if(contentLen<len)
604         {
605            if(contentLen>0)
606                result=content;
607            else
608                result="";
609         }
610       else
611           result=content.substring(0,len);
612
613       result=filter(result);
614       return result;
615      }
616
617     private String JavaDoc transformTime(String JavaDoc origin)
618      {
619       String JavaDoc result ;
620       SimpleDateFormat JavaDoc informatter = new SimpleDateFormat JavaDoc("yyyyMMdd");
621       SimpleDateFormat JavaDoc outformatter = new SimpleDateFormat JavaDoc("yyyy.MM.dd");
622
623       try{
624       Date JavaDoc d = informatter.parse(origin);
625       result = outformatter.format(d);
626       }catch(ParseException JavaDoc e){
627         return origin;
628       }
629       return result;
630      }
631
632 }
Popular Tags