1 package org.enhydra.snapper.wrapper.lucene; 2 3 import org.enhydra.snapper.api.Searcher; 4 5 import java.text.SimpleDateFormat ; 6 import java.text.ParseException ; 7 import java.util.Vector ; 8 import java.util.Date ; 9 import java.util.Properties ; 10 import java.util.Enumeration ; 11 import java.io.IOException ; 12 import java.io.StringReader ; 13 import org.apache.lucene.analysis.Analyzer; 14 import org.apache.lucene.analysis.standard.StandardAnalyzer; 15 import org.apache.lucene.document.Document; 16 import org.apache.lucene.queryParser.QueryParser; 17 import org.apache.lucene.search.Hits; 18 import org.apache.lucene.search.Sort; 19 import org.apache.lucene.search.IndexSearcher; 20 import org.apache.lucene.search.MultiSearcher; 21 import org.apache.lucene.search.TermQuery; 22 import org.apache.lucene.search.Query; 23 import org.apache.lucene.search.highlight.QueryScorer; 24 import org.apache.lucene.search.highlight.Highlighter; 25 import org.apache.lucene.search.highlight.Scorer; 26 import org.apache.lucene.analysis.TokenStream; 27 import org.apache.lucene.index.IndexReader; 28 import org.apache.lucene.search.Explanation; 29 30 31 import org.apache.lucene.search.highlight.SimpleFragmenter; 32 37 38 public class LuceneSearcher implements Searcher{ 39 40 String searchedTerm=""; 41 String highlighterTerm; 42 String highlighterPropertiesTerm; 43 String highlighterMetaDataTerm; 44 45 Query query=null; 46 Hits hits=null; 47 48 long searchedTime=0; 49 int docsNumber=0; 50 51 IndexReader[] listReader; 52 IndexSearcher[] list; 53 MultiSearcher searcher; 54 Vector siteNames = new Vector (); 55 56 int contentLength = 150; 57 int metaDataLength = 150; 58 int wordLength = 150; 59 60 public LuceneSearcher() {} 61 62 public void setUpSearcher(Properties sites) { 63 64 try { 65 list=new IndexSearcher[sites.size()]; 66 listReader=new IndexReader[sites.size()]; 67 int i = 0; 68 for (Enumeration e = sites.propertyNames() ; e.hasMoreElements() ;) 69 { 70 String siteName = (String )e.nextElement(); 71 String indexDir = (String )sites.getProperty(siteName); 72 73 listReader[i]=IndexReader.open(indexDir); 74 list[i]=new IndexSearcher(listReader[i]); 75 siteNames.add(siteName); 76 i++; 77 } 78 searcher=new MultiSearcher(list); 79 80 }catch (IOException e) { 81 try{ 82 LuceneSearcherFactory.logger.error("Could not initialize Searcher \n caught a"+e.getClass()+ 83 "\n with message: " + e.getMessage()); 84 } catch (Exception ex) { 85 System.out.println(" caught a " + e.getClass() + 86 "\n with message: " + e.getMessage()); 87 } 88 89 } 90 } 91 92 public void setUpHighLighter(Properties sites) { 93 94 for (Enumeration e = sites.propertyNames() ; e.hasMoreElements() ;) 95 { 96 String name = (String )e.nextElement(); 97 String value = (String )sites.getProperty(name); 98 int len=(new Integer (value)).intValue(); 99 100 if(name.equals("contentLength")) 101 { 102 if(len>300) 103 contentLength=300; 104 else 105 contentLength =len; 106 } 107 108 if(name.equals("metaDataLength")) 109 { 110 if(len>300) 111 metaDataLength=300; 112 else 113 metaDataLength =len; 114 } 115 116 if(name.equals("wordLength")) 117 { 118 if(len>300) 119 wordLength = 300; 120 else 121 wordLength =len; 122 } 123 124 125 } 126 } 127 public void close() 128 { 129 try{ 130 131 for(int i=0;i<listReader.length;i++) 132 { 133 listReader[i].close(); 134 } 135 } catch (IOException e) { 136 try{ 138 LuceneSearcherFactory.logger.error("Could not close Searcher \n caught a "+e.getClass()+ 139 "\n with message: " + e.getMessage()); 140 } catch (Exception ex) { 141 System.out.println(" caught a " + e.getClass() + 142 "\n with message: " + e.getMessage()); 143 } 144 } 145 searchedTerm= null; 146 highlighterTerm= null; 147 highlighterPropertiesTerm= null; 148 highlighterMetaDataTerm= null; 149 listReader = null; 150 list = null; 151 searcher = null; 152 query = null; 153 hits=null; 154 siteNames.removeAllElements(); 155 siteNames=null; 156 } 157 158 159 160 public void searchDocs(String luceneQuery, String language) throws IOException 161 { 162 searchDocs(luceneQuery, null, null, null, null, language); 163 } 164 165 public void searchDocs(String luceneQuery, String highlighterContentQuery, String language) throws IOException 166 { 167 searchDocs(luceneQuery, highlighterContentQuery, null, null, null, language); 168 } 169 170 public void searchDocs(String luceneQuery, String highlighterContentQuery, String highlighterPropertiesQuery, String language) throws IOException 171 { 172 searchDocs(luceneQuery, highlighterContentQuery, highlighterPropertiesQuery, null, null, language); 173 } 174 175 public void searchDocs(String luceneQuery,String highlighterContentQuery, String highlighterPropertiesQuery, String sortType, String language) throws IOException 176 { 177 searchDocs(luceneQuery,highlighterContentQuery, highlighterPropertiesQuery, null, sortType, language); 178 } 179 180 181 182 public void searchDocs(String luceneQuery,String highlighterContentQuery, String highlighterPropertiesQuery, String highlighterMetaDataQuery, String sortType, String language) throws IOException 183 { 184 try 185 { 186 Date start = new Date (); 187 188 searchedTerm=luceneQuery; 189 highlighterTerm=highlighterContentQuery; 190 highlighterPropertiesTerm=highlighterPropertiesQuery; 191 highlighterMetaDataTerm = highlighterMetaDataQuery; 192 Analyzer analyzer = new StandardAnalyzer(); 193 194 try{ 195 query = QueryParser.parse(searchedTerm, "contents", analyzer); 196 197 }catch(org.apache.lucene.queryParser.ParseException e){ 198 searchedTerm = filter(searchedTerm); 199 query=null; 200 hits=null; 201 searchedTime=0; 202 docsNumber=0; 203 return; 204 } 205 206 if(sortType!=null&&sortType.equals("newest")) 207 { 208 hits = searcher.search(query,new Sort("modified",true)); 209 } 210 else if(sortType!=null&&sortType.equals("oldest")) 211 { 212 hits = searcher.search(query,new Sort("modified",false)); 213 } 214 else 223 { 224 hits = searcher.search(query); 225 } 226 Date end = new Date (); 227 228 searchedTime=end.getTime() - start.getTime(); 229 docsNumber=hits.length(); 230 231 232 } catch (IOException e) { 233 try{ 234 LuceneSearcherFactory.logger.error("Could not Search Docs \n caught a "+e.getClass()+ 235 "\n with message: " + e.getMessage()); 236 } catch (Exception ex) { 237 System.out.println(" caught a " + e.getClass() + 238 "\n with message: " + e.getMessage()); 239 } 240 searchedTerm = filter(searchedTerm); 241 query=null; 242 hits=null; 243 searchedTime=0; 244 docsNumber=0; 245 } 246 } 247 248 249 public int getSearchedDocsNumber() 250 { 251 return docsNumber; 252 } 253 254 public long getSearchedTime() 255 { 256 return searchedTime; 257 } 258 259 260 public String getXmlSearchedResult() 261 { 262 263 return getXmlSearchedResult(1,docsNumber); 264 } 265 266 public String getXmlSearchedResult(int begin,int end) 267 { 268 269 int beginIndex; 270 271 if(docsNumber==0||begin>docsNumber||begin<=0) 272 beginIndex = begin = 0; 273 else 274 beginIndex = begin-1; 275 276 if(end>docsNumber) 277 end=docsNumber; 278 279 280 StringBuffer result=new StringBuffer (); 281 282 result.append(addStartTag()); 283 result.append(addSummary(searchedTerm, begin, end, docsNumber, searchedTime)); 284 285 286 if(docsNumber==0){ 287 result.append(addEndTag()); 288 return result.toString(); 289 } 290 291 292 for(int i=beginIndex;i<end;i++) 293 { 294 Document doc=null; 295 try{ 296 doc = hits.doc(i); 297 }catch(IOException e){ 298 try{ 299 LuceneSearcherFactory.logger.error("Could not get Searched Result for "+i+" document \n caught a " + e.getClass() + 300 "\n with message: " + e.getMessage()); 301 } catch (Exception ex) { 302 System.out.println(" caught a " + e.getClass() + 303 "\n with message: " + e.getMessage()); 304 } 305 break; 306 } 307 308 309 String site=""; 310 float score = 0.0f; 311 try{ 312 int indexer=searcher.subSearcher(hits.id(i)); 313 score = hits.score(i); 314 site=(String )siteNames.elementAt(indexer); 315 }catch(Exception e){ 316 score = 0.0f; 317 } 318 319 String type=doc.get("type"); 320 String title=doc.get("title"); 321 String path=doc.get("path"); 322 String modified=doc.get("modified"); 323 String properties=doc.get("properties"); 324 String fullcontents=doc.get("fullcontents"); 325 String metadata=doc.get("metadata"); 326 String fileName=doc.get("fileName"); 327 328 fullcontents = replaceAll(fullcontents,"<"," "); 329 fullcontents = replaceAll(fullcontents,">"," "); 330 331 modified = transformTime(modified); 332 333 String highlightedContent=null; 334 String highlightedProperties=null; 335 String highlightedMetaData=null; 336 337 if(highlighterTerm!=null&&contentLength>0) 338 { 339 highlightedContent = hightlight( highlighterTerm, "fullcontents", fullcontents); 340 } 341 else if(highlighterTerm!=null) 342 { 343 highlightedContent = ""; 344 } 345 else 346 { 347 highlightedContent = getContent(fullcontents,"fullcontents"); 348 } 349 350 351 352 if(highlighterPropertiesTerm!=null && type.equals("doc")&&wordLength>0) 353 { 354 highlightedProperties = hightlight( highlighterPropertiesTerm, "properties", properties); 355 } 356 else if(highlighterPropertiesTerm!=null&&type.equals("doc")) 357 { 358 highlightedProperties = ""; 359 } 360 361 362 if(highlighterMetaDataTerm!=null&&metaDataLength>0) 363 { 364 highlightedMetaData = hightlight( highlighterMetaDataTerm, "metadata", metadata); 365 } 366 else if(highlighterMetaDataTerm!=null) 367 { 368 highlightedMetaData = ""; 369 } 370 371 result.append(addResult(type, title, path, fileName, score, site, modified, highlightedContent, highlightedProperties, highlightedMetaData)); 372 373 } 374 375 result.append(addEndTag()); 376 return result.toString(); 377 } 378 379 public static String replaceAll( 380 String input, 381 String forReplace, 382 String replaceWith) { 383 if( input == null ) 384 return null; 385 StringBuffer result = new StringBuffer (); 386 boolean hasMore = true; 387 while (hasMore) { 388 int start = input.indexOf(forReplace); 389 int end = start + forReplace.length(); 390 if (start != -1) { 391 result.append(input.substring(0, start) + replaceWith); 392 input = input.substring(end); 393 } 394 else { 395 hasMore = false; 396 result.append(input); 397 } 398 } 399 if (result.toString().equals("")) 400 return input; else 402 return result.toString(); 403 } 404 405 private String filter(String stringToFilter){ 406 stringToFilter = replaceAll(stringToFilter,"&","&#38;"); 408 stringToFilter = replaceAll(stringToFilter,"<","<"); 409 stringToFilter = replaceAll(stringToFilter,">",">"); 410 411 412 415 stringToFilter = stringToFilter.replaceAll("\u0000"," "); 416 stringToFilter = stringToFilter.replaceAll("\u0001"," "); 417 stringToFilter = stringToFilter.replaceAll("\u0002"," "); 418 stringToFilter = stringToFilter.replaceAll("\u0003"," "); 419 stringToFilter = stringToFilter.replaceAll("\u0004"," "); 420 stringToFilter = stringToFilter.replaceAll("\u0005"," "); 421 stringToFilter = stringToFilter.replaceAll("\u0006"," "); 422 stringToFilter = stringToFilter.replaceAll("\u0007"," "); 423 stringToFilter = stringToFilter.replaceAll("\u0008"," "); 424 stringToFilter = stringToFilter.replaceAll("\u0009"," "); 425 stringToFilter = stringToFilter.replaceAll("\u0010"," "); 426 stringToFilter = stringToFilter.replaceAll("\u0011"," "); 427 stringToFilter = stringToFilter.replaceAll("\u0012"," "); 428 stringToFilter = stringToFilter.replaceAll("\u0013"," "); 429 stringToFilter = stringToFilter.replaceAll("\u0014"," "); 430 stringToFilter = stringToFilter.replaceAll("\u0015"," "); 431 stringToFilter = stringToFilter.replaceAll("\u0016"," "); 432 stringToFilter = stringToFilter.replaceAll("\u0017"," "); 433 stringToFilter = stringToFilter.replaceAll("\u0018"," "); 434 stringToFilter = stringToFilter.replaceAll("\u0019"," "); 435 436 stringToFilter = stringToFilter.replaceAll("\uffff"," "); 437 stringToFilter = stringToFilter.replaceAll("\ufffe"," "); 438 439 stringToFilter = stringToFilter.replaceAll("\ud800"," "); 440 stringToFilter = stringToFilter.replaceAll("\udc00"," "); 441 stringToFilter = stringToFilter.replaceAll("\u1c0000"," "); 442 443 stringToFilter = stringToFilter.replaceAll("\u000b"," "); 444 stringToFilter = stringToFilter.replaceAll("\u000c"," "); 445 stringToFilter = stringToFilter.replaceAll("\u000e"," "); 446 stringToFilter = stringToFilter.replaceAll("\u000f"," "); 447 448 stringToFilter = stringToFilter.replaceAll("\u001a"," "); 449 stringToFilter = stringToFilter.replaceAll("\u001b"," "); 450 stringToFilter = stringToFilter.replaceAll("\u001c"," "); 451 stringToFilter = stringToFilter.replaceAll("\u001d"," "); 452 stringToFilter = stringToFilter.replaceAll("\u001e"," "); 453 stringToFilter = stringToFilter.replaceAll("\u001f"," "); 454 455 return stringToFilter; 456 } 457 458 459 private String addStartTag() 460 { 461 return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<Search>\n"; 462 } 463 464 private String addEndTag() 465 { 466 return "</SearchResult>\n</Search>\n"; 467 } 468 469 470 private String addSummary(String searchedTerm,int beginIndex,int endIndex,int docsNumber,long searchedTime ) 471 { 472 StringBuffer result=new StringBuffer (); 473 result.append("<SearchSummary>\n"); 474 result.append("<SearchedTerm>"+filter(searchedTerm)+"</SearchedTerm>\n"); 475 result.append("<BeginIndex>"+beginIndex+"</BeginIndex>\n"); 476 result.append("<EndIndex>"+endIndex+"</EndIndex>\n"); 477 result.append("<DocsNumber>"+docsNumber+"</DocsNumber>\n"); 478 result.append("<SearchedTime>"+searchedTime+"</SearchedTime>\n"); 479 result.append("</SearchSummary>\n"); 480 result.append("<SearchResult>\n"); 481 return result.toString(); 482 } 483 484 private String addResult(String type, String title, String path,String fileName,float score, String site, String modified, String content, String properties ,String metadata) 485 { 486 StringBuffer result=new StringBuffer (); 487 result.append("<Result>\n"); 488 result.append("<FileType>"+type+"</FileType>\n"); 489 result.append("<Title>"+filter(title)+"</Title>\n"); 490 result.append("<AbsolutFilePath>"+filter(path)+"</AbsolutFilePath>\n"); 491 result.append("<AbsolutFileName>"+filter(fileName)+"</AbsolutFileName>\n"); 492 result.append("<SiteName>"+site+"</SiteName>\n"); 493 result.append("<Score>"+(new Float (score)).toString()+"</Score>\n"); 494 result.append("<Modified>"+modified+"</Modified>\n"); 495 496 if(content.equals("")) 497 result.append("<Content></Content>\n"); 498 else 499 result.append("<Content>..."+content+"...</Content>\n"); 500 501 if(properties!=null) 502 { 503 if(properties.equals("")) 504 result.append("<Properties></Properties>\n"); 505 else 506 result.append("<Properties>..."+properties+"..</Properties>\n"); 507 } 508 509 if(metadata!=null) 510 { 511 if(metadata.equals("")) 512 result.append("<MetaData></MetaData>\n"); 513 else 514 result.append("<MetaData>..."+metadata+"..</MetaData>\n"); 515 } 516 result.append("</Result>\n"); 517 return result.toString(); 518 } 519 520 private String hightlight(String term, String filed, String stringToHightLight) 521 { 522 String result=null; 523 Analyzer analyzer = new StandardAnalyzer(); 524 Query queryHighlighter=null; 525 526 try{ 527 queryHighlighter = QueryParser.parse(term, filed, analyzer); 528 if(!(queryHighlighter instanceof TermQuery)) 529 { 530 Query aquery[] = new Query[list.length]; 531 for(int j = 0;j<list.length;j++) 532 { 533 Query tempQuery=queryHighlighter; 534 aquery[j]=tempQuery.rewrite(listReader[j]); 535 } 536 queryHighlighter=Query.mergeBooleanQueries(aquery); 537 } 538 } catch (Exception e) { 539 } 540 541 if(queryHighlighter!=null) 542 { 543 try{ 544 Scorer scorer = new QueryScorer(queryHighlighter); 545 Highlighter highlighter = new Highlighter(scorer); 546 547 if(filed.equals("properties")) 548 highlighter.setTextFragmenter(new SimpleFragmenter(metaDataLength)); 549 if(filed.equals("metadata")) 550 highlighter.setTextFragmenter(new SimpleFragmenter(wordLength)); 551 if(filed.equals("fullcontents")) 552 highlighter.setTextFragmenter(new SimpleFragmenter(contentLength)); 553 554 TokenStream tokenStream =new StandardAnalyzer().tokenStream(filed,new StringReader (stringToHightLight)); 555 result = highlighter.getBestFragment(tokenStream, stringToHightLight); 556 557 }catch(Exception e){ 558 result=null; 559 } 560 } 561 562 if(result==null) 563 { 564 result = getContent(stringToHightLight,filed); 565 } 566 else 567 { 568 result=filter(result); 569 } 570 return result; 571 } 572 573 574 private String getContent(String content,String fieldName) 575 { 576 if(content==null) 577 return ""; 578 579 String result=null; 580 581 int len = 150; 582 583 584 if(fieldName.equals("fullcontents")) 585 { 586 len = contentLength; 587 } 588 589 if(fieldName.equals("metadata")) 590 { 591 len = metaDataLength; 592 } 593 594 if(fieldName.equals("properties")) 595 { 596 len = wordLength; 597 } 598 599 600 601 int contentLen=content.length(); 602 603 if(contentLen<len) 604 { 605 if(contentLen>0) 606 result=content; 607 else 608 result=""; 609 } 610 else 611 result=content.substring(0,len); 612 613 result=filter(result); 614 return result; 615 } 616 617 private String transformTime(String origin) 618 { 619 String result ; 620 SimpleDateFormat informatter = new SimpleDateFormat ("yyyyMMdd"); 621 SimpleDateFormat outformatter = new SimpleDateFormat ("yyyy.MM.dd"); 622 623 try{ 624 Date d = informatter.parse(origin); 625 result = outformatter.format(d); 626 }catch(ParseException e){ 627 return origin; 628 } 629 return result; 630 } 631 632 } | Popular Tags |