|                                                                                                              1   package org.enhydra.snapper.wrapper.lucene;
 2
 3   import org.enhydra.snapper.api.Searcher;
 4
 5   import java.text.SimpleDateFormat
  ; 6   import java.text.ParseException
  ; 7   import java.util.Vector
  ; 8   import java.util.Date
  ; 9   import java.util.Properties
  ; 10  import java.util.Enumeration
  ; 11  import java.io.IOException
  ; 12  import java.io.StringReader
  ; 13  import org.apache.lucene.analysis.Analyzer;
 14  import org.apache.lucene.analysis.standard.StandardAnalyzer;
 15  import org.apache.lucene.document.Document;
 16  import org.apache.lucene.queryParser.QueryParser;
 17  import org.apache.lucene.search.Hits;
 18  import org.apache.lucene.search.Sort;
 19  import org.apache.lucene.search.IndexSearcher;
 20  import org.apache.lucene.search.MultiSearcher;
 21  import org.apache.lucene.search.TermQuery;
 22  import org.apache.lucene.search.Query;
 23  import org.apache.lucene.search.highlight.QueryScorer;
 24  import org.apache.lucene.search.highlight.Highlighter;
 25  import org.apache.lucene.search.highlight.Scorer;
 26  import org.apache.lucene.analysis.TokenStream;
 27  import org.apache.lucene.index.IndexReader;
 28  import org.apache.lucene.search.Explanation;
 29
 30
 31  import org.apache.lucene.search.highlight.SimpleFragmenter;
 32
 37
 38  public class LuceneSearcher implements Searcher{
 39
 40      String
  searchedTerm=""; 41      String
  highlighterTerm; 42      String
  highlighterPropertiesTerm; 43      String
  highlighterMetaDataTerm; 44
 45      Query query=null;
 46      Hits hits=null;
 47
 48      long searchedTime=0;
 49      int docsNumber=0;
 50
 51      IndexReader[] listReader;
 52      IndexSearcher[] list;
 53      MultiSearcher searcher;
 54      Vector
  siteNames = new Vector  (); 55
 56      int contentLength = 150;
 57      int metaDataLength = 150;
 58      int wordLength = 150;
 59
 60      public LuceneSearcher() {}
 61
 62      public void setUpSearcher(Properties
  sites) { 63
 64          try {
 65              list=new IndexSearcher[sites.size()];
 66              listReader=new IndexReader[sites.size()];
 67              int i = 0;
 68              for (Enumeration
  e = sites.propertyNames() ; e.hasMoreElements() ;) 69               {
 70               String
  siteName = (String  )e.nextElement(); 71               String
  indexDir = (String  )sites.getProperty(siteName); 72
 73                listReader[i]=IndexReader.open(indexDir);
 74                list[i]=new IndexSearcher(listReader[i]);
 75                siteNames.add(siteName);
 76                i++;
 77               }
 78              searcher=new MultiSearcher(list);
 79
 80              }catch (IOException
  e) { 81                   try{
 82                      LuceneSearcherFactory.logger.error("Could not initialize Searcher \n caught a"+e.getClass()+
 83                                 "\n with message: " + e.getMessage());
 84                  } catch (Exception
  ex) { 85                      System.out.println(" caught a " + e.getClass() +
 86                         "\n with message: " + e.getMessage());
 87                  }
 88
 89              }
 90    }
 91
 92      public void setUpHighLighter(Properties
  sites) { 93
 94          for (Enumeration
  e = sites.propertyNames() ; e.hasMoreElements() ;) 95           {
 96             String
  name = (String  )e.nextElement(); 97             String
  value = (String  )sites.getProperty(name); 98             int len=(new Integer
  (value)).intValue(); 99
 100          if(name.equals("contentLength"))
 101           {
 102           if(len>300)
 103             contentLength=300;
 104           else
 105             contentLength =len;
 106           }
 107
 108          if(name.equals("metaDataLength"))
 109           {
 110           if(len>300)
 111             metaDataLength=300;
 112           else
 113             metaDataLength =len;
 114           }
 115
 116          if(name.equals("wordLength"))
 117           {
 118           if(len>300)
 119             wordLength = 300;
 120           else
 121             wordLength =len;
 122           }
 123
 124
 125          }
 126       }
 127   public void close()
 128     {
 129      try{
 130
 131           for(int i=0;i<listReader.length;i++)
 132            {
 133             listReader[i].close();
 134            }
 135          } catch (IOException
  e) { 136                           try{
 138                 LuceneSearcherFactory.logger.error("Could not close Searcher \n caught a "+e.getClass()+
 139                            "\n with message: " + e.getMessage());
 140             } catch (Exception
  ex) { 141                 System.out.println(" caught a " + e.getClass() +
 142                    "\n with message: " + e.getMessage());
 143             }
 144          }
 145         searchedTerm= null;
 146         highlighterTerm= null;
 147         highlighterPropertiesTerm= null;
 148         highlighterMetaDataTerm= null;
 149         listReader = null;
 150         list = null;
 151         searcher = null;
 152         query = null;
 153         hits=null;
 154         siteNames.removeAllElements();
 155         siteNames=null;
 156     }
 157
 158
 159
 160   public void searchDocs(String
  luceneQuery, String  language) throws IOException  161     {
 162     searchDocs(luceneQuery, null, null, null, null, language);
 163     }
 164
 165   public void searchDocs(String
  luceneQuery, String  highlighterContentQuery, String  language) throws IOException  166    {
 167     searchDocs(luceneQuery, highlighterContentQuery, null, null, null, language);
 168    }
 169
 170   public void searchDocs(String
  luceneQuery, String  highlighterContentQuery, String  highlighterPropertiesQuery, String  language) throws IOException  171   {
 172     searchDocs(luceneQuery, highlighterContentQuery, highlighterPropertiesQuery, null, null, language);
 173   }
 174
 175   public void searchDocs(String
  luceneQuery,String  highlighterContentQuery, String  highlighterPropertiesQuery, String  sortType, String  language) throws IOException  176    {
 177     searchDocs(luceneQuery,highlighterContentQuery, highlighterPropertiesQuery, null, sortType, language);
 178    }
 179
 180
 181
 182   public void searchDocs(String
  luceneQuery,String  highlighterContentQuery, String  highlighterPropertiesQuery, String  highlighterMetaDataQuery, String  sortType, String  language) throws IOException  183    {
 184    try
 185      {
 186        Date
  start = new Date  (); 187
 188         searchedTerm=luceneQuery;
 189         highlighterTerm=highlighterContentQuery;
 190         highlighterPropertiesTerm=highlighterPropertiesQuery;
 191         highlighterMetaDataTerm = highlighterMetaDataQuery;
 192         Analyzer analyzer = new StandardAnalyzer();
 193
 194         try{
 195             query = QueryParser.parse(searchedTerm, "contents", analyzer);
 196
 197         }catch(org.apache.lucene.queryParser.ParseException e){
 198             searchedTerm = filter(searchedTerm);
 199             query=null;
 200             hits=null;
 201             searchedTime=0;
 202             docsNumber=0;
 203             return;
 204         }
 205
 206         if(sortType!=null&&sortType.equals("newest"))
 207          {
 208           hits = searcher.search(query,new Sort("modified",true));
 209          }
 210         else if(sortType!=null&&sortType.equals("oldest"))
 211          {
 212           hits = searcher.search(query,new Sort("modified",false));
 213          }
 214     else
 223          {
 224           hits = searcher.search(query);
 225           }
 226      Date
  end = new Date  (); 227
 228      searchedTime=end.getTime() - start.getTime();
 229      docsNumber=hits.length();
 230
 231
 232     } catch (IOException
  e) { 233        try{
 234           LuceneSearcherFactory.logger.error("Could not Search Docs  \n caught a "+e.getClass()+
 235                "\n with message: " + e.getMessage());
 236            } catch (Exception
  ex) { 237              System.out.println(" caught a " + e.getClass() +
 238                 "\n with message: " + e.getMessage());
 239             }
 240         searchedTerm = filter(searchedTerm);
 241         query=null;
 242         hits=null;
 243         searchedTime=0;
 244         docsNumber=0;
 245     }
 246    }
 247
 248
 249 public int getSearchedDocsNumber()
 250   {
 251     return docsNumber;
 252   }
 253
 254 public long getSearchedTime()
 255   {
 256     return searchedTime;
 257   }
 258
 259
 260 public String
  getXmlSearchedResult() 261  {
 262
 263    return getXmlSearchedResult(1,docsNumber);
 264  }
 265
 266 public String
  getXmlSearchedResult(int begin,int end) 267 {
 268
 269     int beginIndex;
 270
 271     if(docsNumber==0||begin>docsNumber||begin<=0)
 272         beginIndex = begin = 0;
 273     else
 274         beginIndex = begin-1;
 275
 276     if(end>docsNumber)
 277         end=docsNumber;
 278
 279
 280     StringBuffer
  result=new StringBuffer  (); 281
 282     result.append(addStartTag());
 283     result.append(addSummary(searchedTerm, begin, end, docsNumber, searchedTime));
 284
 285
 286     if(docsNumber==0){
 287          result.append(addEndTag());
 288          return result.toString();
 289     }
 290
 291
 292     for(int i=beginIndex;i<end;i++)
 293     {
 294         Document doc=null;
 295         try{
 296             doc = hits.doc(i);
 297         }catch(IOException
  e){ 298          try{
 299            LuceneSearcherFactory.logger.error("Could not get Searched Result for "+i+" document \n  caught a " + e.getClass() +
 300                 "\n with message: " + e.getMessage());
 301             } catch (Exception
  ex) { 302             System.out.println(" caught a " + e.getClass() +
 303                 "\n with message: " + e.getMessage());
 304            }
 305           break;
 306         }
 307
 308
 309     String
  site=""; 310     float score = 0.0f;
 311     try{
 312       int indexer=searcher.subSearcher(hits.id(i));
 313       score = hits.score(i);
 314       site=(String
  )siteNames.elementAt(indexer); 315     }catch(Exception
  e){ 316         score = 0.0f;
 317     }
 318
 319       String
  type=doc.get("type"); 320       String
  title=doc.get("title"); 321       String
  path=doc.get("path"); 322       String
  modified=doc.get("modified"); 323       String
  properties=doc.get("properties"); 324       String
  fullcontents=doc.get("fullcontents"); 325       String
  metadata=doc.get("metadata"); 326       String
  fileName=doc.get("fileName"); 327
 328       fullcontents = replaceAll(fullcontents,"<"," ");
 329       fullcontents = replaceAll(fullcontents,">"," ");
 330
 331       modified = transformTime(modified);
 332
 333    String
  highlightedContent=null; 334    String
  highlightedProperties=null; 335    String
  highlightedMetaData=null; 336
 337   if(highlighterTerm!=null&&contentLength>0)
 338    {
 339     highlightedContent = hightlight( highlighterTerm, "fullcontents", fullcontents);
 340    }
 341   else if(highlighterTerm!=null)
 342    {
 343     highlightedContent = "";
 344    }
 345   else
 346    {
 347     highlightedContent = getContent(fullcontents,"fullcontents");
 348    }
 349
 350
 351
 352   if(highlighterPropertiesTerm!=null && type.equals("doc")&&wordLength>0)
 353    {
 354      highlightedProperties = hightlight( highlighterPropertiesTerm, "properties", properties);
 355    }
 356   else if(highlighterPropertiesTerm!=null&&type.equals("doc"))
 357    {
 358      highlightedProperties = "";
 359    }
 360
 361
 362   if(highlighterMetaDataTerm!=null&&metaDataLength>0)
 363    {
 364     highlightedMetaData = hightlight( highlighterMetaDataTerm, "metadata", metadata);
 365    }
 366   else if(highlighterMetaDataTerm!=null)
 367    {
 368     highlightedMetaData = "";
 369    }
 370
 371     result.append(addResult(type, title, path, fileName, score, site, modified, highlightedContent, highlightedProperties, highlightedMetaData));
 372
 373   }
 374
 375     result.append(addEndTag());
 376     return result.toString();
 377 }
 378
 379      public static String
  replaceAll( 380             String
  input, 381             String
  forReplace, 382             String
  replaceWith) { 383             if( input == null )
 384               return null;
 385             StringBuffer
  result = new StringBuffer  (); 386             boolean hasMore = true;
 387             while (hasMore) {
 388               int start = input.indexOf(forReplace);
 389               int end = start + forReplace.length();
 390               if (start != -1) {
 391                 result.append(input.substring(0, start) + replaceWith);
 392                 input = input.substring(end);
 393               }
 394               else {
 395                 hasMore = false;
 396                 result.append(input);
 397               }
 398             }
 399             if (result.toString().equals(""))
 400               return input;             else
 402               return result.toString();
 403           }
 404
 405      private String
  filter(String  stringToFilter){ 406                    stringToFilter = replaceAll(stringToFilter,"&","&#38;");
 408           stringToFilter = replaceAll(stringToFilter,"<","<");
 409           stringToFilter = replaceAll(stringToFilter,">",">");
 410
 411
 412
 415           stringToFilter = stringToFilter.replaceAll("\u0000"," ");
 416           stringToFilter = stringToFilter.replaceAll("\u0001"," ");
 417           stringToFilter = stringToFilter.replaceAll("\u0002"," ");
 418           stringToFilter = stringToFilter.replaceAll("\u0003"," ");
 419           stringToFilter = stringToFilter.replaceAll("\u0004"," ");
 420           stringToFilter = stringToFilter.replaceAll("\u0005"," ");
 421           stringToFilter = stringToFilter.replaceAll("\u0006"," ");
 422           stringToFilter = stringToFilter.replaceAll("\u0007"," ");
 423           stringToFilter = stringToFilter.replaceAll("\u0008"," ");
 424           stringToFilter = stringToFilter.replaceAll("\u0009"," ");
 425           stringToFilter = stringToFilter.replaceAll("\u0010"," ");
 426           stringToFilter = stringToFilter.replaceAll("\u0011"," ");
 427           stringToFilter = stringToFilter.replaceAll("\u0012"," ");
 428           stringToFilter = stringToFilter.replaceAll("\u0013"," ");
 429           stringToFilter = stringToFilter.replaceAll("\u0014"," ");
 430           stringToFilter = stringToFilter.replaceAll("\u0015"," ");
 431           stringToFilter = stringToFilter.replaceAll("\u0016"," ");
 432           stringToFilter = stringToFilter.replaceAll("\u0017"," ");
 433           stringToFilter = stringToFilter.replaceAll("\u0018"," ");
 434           stringToFilter = stringToFilter.replaceAll("\u0019"," ");
 435
 436           stringToFilter = stringToFilter.replaceAll("\uffff"," ");
 437           stringToFilter = stringToFilter.replaceAll("\ufffe"," ");
 438
 439           stringToFilter = stringToFilter.replaceAll("\ud800"," ");
 440           stringToFilter = stringToFilter.replaceAll("\udc00"," ");
 441           stringToFilter = stringToFilter.replaceAll("\u1c0000"," ");
 442
 443           stringToFilter = stringToFilter.replaceAll("\u000b"," ");
 444           stringToFilter = stringToFilter.replaceAll("\u000c"," ");
 445           stringToFilter = stringToFilter.replaceAll("\u000e"," ");
 446           stringToFilter = stringToFilter.replaceAll("\u000f"," ");
 447
 448           stringToFilter = stringToFilter.replaceAll("\u001a"," ");
 449           stringToFilter = stringToFilter.replaceAll("\u001b"," ");
 450           stringToFilter = stringToFilter.replaceAll("\u001c"," ");
 451           stringToFilter = stringToFilter.replaceAll("\u001d"," ");
 452           stringToFilter = stringToFilter.replaceAll("\u001e"," ");
 453           stringToFilter = stringToFilter.replaceAll("\u001f"," ");
 454
 455          return stringToFilter;
 456      }
 457
 458
 459      private String
  addStartTag() 460       {
 461          return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Search>\n";
 462       }
 463
 464      private String
  addEndTag() 465      {
 466         return "</SearchResult>\n</Search>\n";
 467      }
 468
 469
 470      private String
  addSummary(String  searchedTerm,int beginIndex,int endIndex,int docsNumber,long searchedTime ) 471        {
 472          StringBuffer
  result=new StringBuffer  (); 473          result.append("<SearchSummary>\n");
 474          result.append("<SearchedTerm>"+filter(searchedTerm)+"</SearchedTerm>\n");
 475          result.append("<BeginIndex>"+beginIndex+"</BeginIndex>\n");
 476          result.append("<EndIndex>"+endIndex+"</EndIndex>\n");
 477          result.append("<DocsNumber>"+docsNumber+"</DocsNumber>\n");
 478          result.append("<SearchedTime>"+searchedTime+"</SearchedTime>\n");
 479          result.append("</SearchSummary>\n");
 480          result.append("<SearchResult>\n");
 481          return result.toString();
 482      }
 483
 484      private String
  addResult(String  type, String  title, String  path,String  fileName,float score, String  site, String  modified, String  content, String  properties ,String  metadata) 485        {
 486          StringBuffer
  result=new StringBuffer  (); 487          result.append("<Result>\n");
 488          result.append("<FileType>"+type+"</FileType>\n");
 489          result.append("<Title>"+filter(title)+"</Title>\n");
 490          result.append("<AbsolutFilePath>"+filter(path)+"</AbsolutFilePath>\n");
 491          result.append("<AbsolutFileName>"+filter(fileName)+"</AbsolutFileName>\n");
 492          result.append("<SiteName>"+site+"</SiteName>\n");
 493          result.append("<Score>"+(new Float
  (score)).toString()+"</Score>\n"); 494          result.append("<Modified>"+modified+"</Modified>\n");
 495
 496          if(content.equals(""))
 497            result.append("<Content></Content>\n");
 498          else
 499             result.append("<Content>..."+content+"...</Content>\n");
 500
 501          if(properties!=null)
 502           {
 503             if(properties.equals(""))
 504               result.append("<Properties></Properties>\n");
 505             else
 506               result.append("<Properties>..."+properties+"..</Properties>\n");
 507           }
 508
 509          if(metadata!=null)
 510           {
 511             if(metadata.equals(""))
 512               result.append("<MetaData></MetaData>\n");
 513             else
 514               result.append("<MetaData>..."+metadata+"..</MetaData>\n");
 515           }
 516          result.append("</Result>\n");
 517          return result.toString();
 518      }
 519
 520      private String
  hightlight(String  term, String  filed, String  stringToHightLight) 521        {
 522          String
  result=null; 523          Analyzer analyzer = new StandardAnalyzer();
 524          Query queryHighlighter=null;
 525
 526          try{
 527                  queryHighlighter = QueryParser.parse(term, filed, analyzer);
 528            if(!(queryHighlighter instanceof TermQuery))
 529              {
 530                 Query aquery[] = new Query[list.length];
 531                    for(int j = 0;j<list.length;j++)
 532                    {
 533                     Query tempQuery=queryHighlighter;
 534                     aquery[j]=tempQuery.rewrite(listReader[j]);
 535                    }
 536                    queryHighlighter=Query.mergeBooleanQueries(aquery);
 537              }
 538            } catch (Exception
  e) { 539          }
 540
 541         if(queryHighlighter!=null)
 542          {
 543           try{
 544             Scorer scorer = new QueryScorer(queryHighlighter);
 545             Highlighter highlighter = new Highlighter(scorer);
 546
 547             if(filed.equals("properties"))
 548                 highlighter.setTextFragmenter(new SimpleFragmenter(metaDataLength));
 549             if(filed.equals("metadata"))
 550                 highlighter.setTextFragmenter(new SimpleFragmenter(wordLength));
 551             if(filed.equals("fullcontents"))
 552                 highlighter.setTextFragmenter(new SimpleFragmenter(contentLength));
 553
 554             TokenStream tokenStream =new StandardAnalyzer().tokenStream(filed,new StringReader
  (stringToHightLight)); 555             result = highlighter.getBestFragment(tokenStream, stringToHightLight);
 556
 557           }catch(Exception
  e){ 558                 result=null;
 559             }
 560           }
 561
 562          if(result==null)
 563            {
 564              result = getContent(stringToHightLight,filed);
 565                }
 566            else
 567            {
 568              result=filter(result);
 569            }
 570             return result;
 571         }
 572
 573
 574     private String
  getContent(String  content,String  fieldName) 575      {
 576       if(content==null)
 577         return "";
 578
 579        String
  result=null; 580
 581        int len = 150;
 582
 583
 584        if(fieldName.equals("fullcontents"))
 585           {
 586         len = contentLength;
 587           }
 588
 589      if(fieldName.equals("metadata"))
 590         {
 591         len = metaDataLength;
 592         }
 593
 594       if(fieldName.equals("properties"))
 595           {
 596         len = wordLength;
 597           }
 598
 599
 600
 601       int contentLen=content.length();
 602
 603       if(contentLen<len)
 604         {
 605            if(contentLen>0)
 606                result=content;
 607            else
 608                result="";
 609         }
 610       else
 611           result=content.substring(0,len);
 612
 613       result=filter(result);
 614       return result;
 615      }
 616
 617     private String
  transformTime(String  origin) 618      {
 619       String
  result ; 620       SimpleDateFormat
  informatter = new SimpleDateFormat  ("yyyyMMdd"); 621       SimpleDateFormat
  outformatter = new SimpleDateFormat  ("yyyy.MM.dd"); 622
 623       try{
 624       Date
  d =   informatter.parse(origin); 625       result = outformatter.format(d);
 626       }catch(ParseException
  e){ 627         return origin;
 628       }
 629       return result;
 630      }
 631
 632 }
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |