1 16 package com.blandware.atleap.search; 17 18 import java.io.ByteArrayInputStream ; 19 import java.io.File ; 20 import java.io.IOException ; 21 import java.io.StringReader ; 22 import java.util.ArrayList ; 23 import java.util.Calendar ; 24 import java.util.Collection ; 25 import java.util.Collections ; 26 import java.util.Date ; 27 import java.util.HashMap ; 28 import java.util.Iterator ; 29 import java.util.List ; 30 import java.util.Locale ; 31 import java.util.Map ; 32 33 import javax.servlet.ServletContext ; 34 import javax.servlet.http.HttpServletRequest ; 35 36 import org.apache.commons.logging.Log; 37 import org.apache.commons.logging.LogFactory; 38 import org.apache.lucene.analysis.Analyzer; 39 import org.apache.lucene.analysis.StopAnalyzer; 40 import org.apache.lucene.analysis.TokenStream; 41 import org.apache.lucene.analysis.cjk.CJKAnalyzer; 42 import org.apache.lucene.analysis.cz.CzechAnalyzer; 43 import org.apache.lucene.analysis.de.GermanAnalyzer; 44 import org.apache.lucene.analysis.el.GreekAnalyzer; 45 import org.apache.lucene.analysis.fr.FrenchAnalyzer; 46 import org.apache.lucene.analysis.nl.DutchAnalyzer; 47 import org.apache.lucene.analysis.ru.RussianAnalyzer; 48 import org.apache.lucene.analysis.snowball.SnowballAnalyzer; 49 import org.apache.lucene.analysis.standard.StandardAnalyzer; 50 import org.apache.lucene.document.DateTools; 51 import org.apache.lucene.document.Document; 52 import org.apache.lucene.document.Field; 53 import org.apache.lucene.index.IndexReader; 54 import org.apache.lucene.index.IndexWriter; 55 import org.apache.lucene.index.Term; 56 import org.apache.lucene.misc.ChainedFilter; 57 import org.apache.lucene.queryParser.ParseException; 58 import org.apache.lucene.queryParser.QueryParser; 59 import org.apache.lucene.search.CachingWrapperFilter; 60 import org.apache.lucene.search.Filter; 61 import org.apache.lucene.search.Hits; 62 import org.apache.lucene.search.IndexSearcher; 63 import org.apache.lucene.search.Query; 64 import org.apache.lucene.search.QueryFilter; 65 import org.apache.lucene.search.RangeFilter; 66 import org.apache.lucene.search.TermQuery; 67 import org.apache.lucene.search.highlight.Highlighter; 68 import org.apache.lucene.search.highlight.QueryScorer; 69 import org.apache.lucene.search.highlight.SimpleFragmenter; 70 import org.apache.lucene.store.Directory; 71 import org.apache.lucene.store.FSDirectory; 72 import org.apache.struts.tiles.TilesUtil; 73 import org.springframework.context.ApplicationContext; 74 import org.springframework.web.context.support.WebApplicationContextUtils; 75 76 import com.blandware.atleap.common.Constants; 77 import com.blandware.atleap.common.parsers.PlainTextExtractor; 78 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException; 79 import com.blandware.atleap.common.parsers.exception.UnsupportedMimeTypeException; 80 import com.blandware.atleap.common.util.ConvertUtil; 81 import com.blandware.atleap.common.util.DateUtil; 82 import com.blandware.atleap.common.util.StringUtil; 83 import com.blandware.atleap.model.core.ActionPage; 84 import com.blandware.atleap.model.core.ContentDocument; 85 import com.blandware.atleap.model.core.ContentField; 86 import com.blandware.atleap.model.core.ContentFieldValue; 87 import com.blandware.atleap.model.core.ContentLocale; 88 import com.blandware.atleap.model.core.ContentPage; 89 import com.blandware.atleap.model.core.Layout; 90 import com.blandware.atleap.model.core.Page; 91 import com.blandware.atleap.search.analysis.NorwegianAnalyzer; 92 import com.blandware.atleap.search.analysis.PortugueseAnalyzer; 93 import com.blandware.atleap.search.analysis.SpanishAnalyzer; 94 import com.blandware.atleap.search.filters.RolesFilter; 95 import com.blandware.atleap.service.core.ContentResourceManager; 96 import com.blandware.atleap.service.core.LayoutManager; 97 import com.blandware.atleap.service.core.PageManager; 98 import com.blandware.atleap.webapp.struts.HeritableComponentDefinition; 99 import com.blandware.atleap.webapp.util.core.GlobalProperties; 100 import com.blandware.atleap.webapp.util.core.WebappConstants; 101 import com.blandware.atleap.webapp.util.core.WebappUtil; 102 103 111 public class SearchManager { 112 113 public static final String URI_FIELD = "uri"; 114 public static final String BODY_FIELD = "body"; 115 public static final String TITLE_FIELD = "title"; 116 public static final String SUMMARY_FIELD = "summary"; 117 public static final String MODIFIED_FIELD = "modified"; 118 public static final String LOCALE_FIELD = "locale"; 119 public static final String ROLES_FIELD = "roles"; 120 public static final String TYPE_FIELD = "type"; 121 122 public static final String PAGE_TYPE = "page"; 123 public static final String DOCUMENT_TYPE = "document"; 124 125 protected static final String SEARCH_DIR_KEY = "search.index.dir"; 126 127 protected static final String HTML_MIMETYPE = "text/html"; 128 129 protected static final int RESULT_FRAGMENT_SIZE = 60; 130 protected static final int RESULT_FRAGMENT_NUMBER = 3; 131 protected static final String RESULT_FRAGMENT_DELIMITER = "..."; 132 133 protected static final String INSTANCE_KEY = "com.blandware.atleap.search.SearchManager.INSTANCE"; 134 135 protected transient final Log log = LogFactory.getLog(SearchManager.class); 136 137 protected ServletContext servletContext = null; 138 protected File indexDir = null; 139 140 143 protected boolean initialized = false; 144 145 148 protected IndexReader searchIndexReader = null; 149 150 153 protected IndexReader unindexIndexReader = null; 154 155 158 protected IndexSearcher indexSearcher = null; 159 160 163 protected Map analyzers = null; 164 165 168 protected ApplicationContext applicationCtx = null; 169 170 173 protected HashMap localeFilters = new HashMap (); 174 175 178 protected HashMap rolesFilters = new HashMap (); 179 180 181 187 public static SearchManager getInstance(ServletContext servletContext) { 188 SearchManager ourInstance = (SearchManager) servletContext.getAttribute(INSTANCE_KEY); 189 if ( ourInstance == null ) { 190 ourInstance = new SearchManager(servletContext); 191 servletContext.setAttribute(INSTANCE_KEY, ourInstance); 192 } 193 return ourInstance; 194 } 195 196 201 protected SearchManager(ServletContext servletContext) { 202 applicationCtx = WebApplicationContextUtils.getRequiredWebApplicationContext(servletContext); 203 204 analyzers = new HashMap (); 205 206 String indexDirPath = GlobalProperties.getInstance(servletContext).getString(SEARCH_DIR_KEY); 207 if ( indexDirPath == null || indexDirPath.length() == 0 ) { 208 String error = "The property '" + SEARCH_DIR_KEY + "' must be specified"; 209 if ( log.isErrorEnabled() ) { 210 log.error(error); 211 } 212 throw new RuntimeException (error); 213 } 214 215 File lockDir = new File (FSDirectory.LOCK_DIR); 217 if (!lockDir.exists()) { 218 boolean created = true; 219 try { 220 created = lockDir.mkdirs(); 221 } catch ( Exception ex ) { 222 created = false; 223 } 224 if ( !created ) { 225 if ( log.isErrorEnabled() ) { 226 log.error("Cannot create dir " + lockDir.getAbsolutePath() + " for search index lock files"); 227 } 228 } 229 } else { 230 if (!lockDir.canWrite() || !lockDir.canRead()) { 231 if ( log.isErrorEnabled() ) { 232 log.error("The system has not permissions to write into " + lockDir.getAbsolutePath()); 233 } 234 } 235 } 236 237 indexDir = new File (indexDirPath); 239 this.servletContext = servletContext; 240 if ( indexDir.exists() ) { 241 try { 242 Directory index = FSDirectory.getDirectory(indexDir, false); 243 if ( IndexReader.isLocked(indexDir.getAbsolutePath()) ) { 244 IndexReader.unlock(index); 245 } 246 } catch ( IOException ex ) { 247 if ( log.isErrorEnabled() ) { 248 log.error("Cannot remove lock from search index " + indexDir.getAbsolutePath()); 249 } 250 } 251 } else { 252 boolean created = true; 253 try { 254 created = indexDir.mkdirs(); 255 } catch ( Exception ex ) { 256 created = false; 257 } 258 if ( !created ) { 259 if ( log.isErrorEnabled() ) { 260 log.error("Cannot create dir " + indexDir.getAbsolutePath() + " for search index"); 261 } 262 } 263 264 if ( log.isInfoEnabled() ) { 265 log.info("Search manager initialized"); 266 } 267 } 268 269 IndexWriter indexWriter = null; 271 try { 272 indexWriter = getIndexWriter("", true); 273 } finally { 274 closeIndexWriter(indexWriter); 275 } 276 } 277 278 280 286 public synchronized void indexPage(Page page, HttpServletRequest request) { 287 initialize(request); 288 if ( page instanceof ContentPage ) { 289 indexContentPage((ContentPage) page, request); 290 } else if (page instanceof ActionPage) { 291 indexActionPage((ActionPage) page, request); 292 } else { 293 String roles = WebappUtil.rolesToString(page.getRoles()); 294 indexPageFields(page.getContentFields(), page, roles); 295 } 296 } 297 298 304 public synchronized void reIndexPage(Page page, HttpServletRequest request) { 305 unIndexPage(page.getUri(), request); 306 indexPage(page, request); 307 } 308 309 315 public synchronized void unIndexPage(String uri, HttpServletRequest request) { 316 initialize(request); 317 unIndex(uri); 318 } 319 320 326 protected synchronized void indexContentPage(ContentPage contentPage, HttpServletRequest request) { 327 Collection allFields = null; 328 List layouts = new ArrayList (); 329 LayoutManager layoutManager = (LayoutManager) applicationCtx.getBean(Constants.LAYOUT_MANAGER_BEAN); 330 String tmpDefinition = contentPage.getLayout().getDefinition(); 331 try { 332 do { 333 Layout layout = layoutManager.findLayoutByDefinition(tmpDefinition); 334 if ( layout != null ) { 335 layouts.add(layout); 336 } 337 tmpDefinition = ((HeritableComponentDefinition) TilesUtil.getDefinition(tmpDefinition, request, servletContext)).getExtends(); 338 } while ( tmpDefinition != null ); 339 340 } catch ( Exception ex ) { 341 if ( log.isErrorEnabled() ) { 342 log.error("Cannot traverse definitions", ex); 343 } 344 } 345 Collections.reverse(layouts); 346 for ( int i = 0; i < layouts.size(); i++ ) { 347 Layout layout = (Layout) layouts.get(i); 348 allFields = WebappUtil.joinFields(allFields, layout.getContentFields()); 349 } 350 allFields = WebappUtil.joinFields(allFields, contentPage.getContentFields()); 351 352 indexPageFields(allFields, contentPage, WebappUtil.rolesToString(contentPage.getRoles())); 353 } 354 355 360 protected synchronized void indexActionPage(ActionPage actionPage, HttpServletRequest request) { 361 String roles = ConvertUtil.convertListToString(WebappUtil.getAPRoleNamesAsList(actionPage.getUri(), request), ","); 362 indexPageFields(actionPage.getContentFields(), actionPage, roles); 363 } 364 365 367 373 public synchronized void indexDocument(ContentDocument resourceDocument, HttpServletRequest request) { 374 initialize(request); 375 byte[] resourceData = resourceDocument.getResourceData().getData(); 376 String language = resourceDocument.getContentLocale().getIdentifier(); 377 378 String plainText = null; 379 try { 380 plainText = new PlainTextExtractor().extract(new ByteArrayInputStream (resourceData), resourceDocument.getMimeType(), resourceDocument.getCharset()); 381 } catch ( UnsupportedMimeTypeException ex ) { 382 if ( log.isErrorEnabled() ) { 383 log.error("Unsupported mime type " + resourceDocument.getMimeType(), ex); 384 } 385 } catch ( PlainTextExtractorException ex ) { 386 if ( log.isErrorEnabled() ) { 387 log.error("Cannot parse resource document with mimetype " + resourceDocument.getMimeType(), ex); 388 } 389 } 390 391 if ( plainText != null ) { 392 Integer summarySize = GlobalProperties.getInstance(request.getSession().getServletContext()).getInteger(WebappConstants.DOCUMENT_SUMMARY_SIZE_KEY, new Integer (400)); 393 String summary = plainText.substring(0, Math.min(plainText.length(), summarySize.intValue())); 394 395 Document document = new Document(); 396 document.add(new Field(URI_FIELD, resourceDocument.getUri(), Field.Store.YES, Field.Index.UN_TOKENIZED)); 397 document.add(new Field(BODY_FIELD, plainText, Field.Store.NO, Field.Index.TOKENIZED)); 398 document.add(new Field(MODIFIED_FIELD, DateTools.dateToString(resourceDocument.getLastUpdatedDatetime(), DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.UN_TOKENIZED)); 399 document.add(new Field(TYPE_FIELD, DOCUMENT_TYPE, Field.Store.YES, Field.Index.UN_TOKENIZED)); 400 document.add(new Field(LOCALE_FIELD, language, Field.Store.YES, Field.Index.UN_TOKENIZED)); 401 document.add(new Field(SUMMARY_FIELD, summary, Field.Store.YES, Field.Index.NO)); 402 String roles = WebappUtil.rolesToString(resourceDocument.getRoles()); 403 if ( roles != null ) { 404 document.add(new Field(ROLES_FIELD, roles, Field.Store.YES, Field.Index.NO)); 405 } 406 407 String description = resourceDocument.getDescription(); 408 if ( description != null && description.trim().length() > 0 ) { 409 document.add(new Field(TITLE_FIELD, description, Field.Store.YES, Field.Index.TOKENIZED)); 410 } 411 412 IndexWriter indexWriter = null; 413 try { 414 indexWriter = getIndexWriter(language); 415 416 if ( log.isDebugEnabled() ) { 417 log.debug("Adding into index resource document: " + document + " for locale " + language); 418 } 419 indexWriter.addDocument(document); 421 } catch ( IOException ex ) { 422 if ( log.isErrorEnabled() ) { 423 log.error("Cannot add resource document into index ", ex); 424 } 425 } finally { 426 closeIndexWriter(indexWriter); 427 } 428 } 429 } 430 431 437 public synchronized void reIndexDocument(ContentDocument document, HttpServletRequest request) { 438 unIndexDocument(document.getUri(), request); 439 indexDocument(document, request); 440 } 441 442 448 public synchronized void unIndexDocument(String uri, HttpServletRequest request) { 449 initialize(request); 450 unIndex(uri); 451 } 452 453 455 462 protected synchronized void indexPageFields(Collection fields, Page page, String roles) { 463 Map documents = new HashMap (); 464 Map documentModifications = new HashMap (); 465 466 if ( log.isDebugEnabled() ) { 467 log.debug("Indexing " + fields.size() + " fields for uri '" + page.getUri() + "'"); 468 } 469 470 for ( Iterator iterator = fields.iterator(); iterator.hasNext(); ) { 472 ContentField contentField = (ContentField) iterator.next(); 473 List contents = contentField.getContentFieldValues(); 474 String identifier = contentField.getIdentifier(); 475 476 for ( int j = 0; j < contents.size(); j++ ) { 478 ContentFieldValue contentFieldValue = (ContentFieldValue) contents.get(j); 479 ContentLocale contentLocale = contentFieldValue.getContentLocale(); 480 String locale = contentLocale.getIdentifier(); 481 482 String value = null; 484 if ( contentField.getType() == ContentField.LINE_TYPE ) { 485 value = contentFieldValue.getSimpleValue(); 486 } else { 487 value = ConvertUtil.convertToString(contentFieldValue.getValue(), Constants.DEFAULT_ENCODING); 488 } 489 if ( value == null ) { 490 continue; 491 } 492 493 Document document = (Document) documents.get(locale); 495 if ( document == null ) { 497 document = new Document(); 498 documents.put(locale, document); 499 document.add(new Field(URI_FIELD, page.getUri(), Field.Store.YES, Field.Index.UN_TOKENIZED)); 500 501 document.add(new Field(TYPE_FIELD, PAGE_TYPE, Field.Store.YES, Field.Index.UN_TOKENIZED)); 502 document.add(new Field(ROLES_FIELD, roles, Field.Store.YES, Field.Index.NO)); 503 } 504 505 Date modificationDate = (Date ) documentModifications.get(locale); 507 if ( modificationDate == null ) { 508 documentModifications.put(locale, (Date ) contentFieldValue.getLastUpdatedDatetime()); 509 if ( log.isDebugEnabled() ) { 510 log.debug("Add into documentModification locale-" + locale + ", date-" + contentFieldValue.getLastUpdatedDatetime()); 511 } 512 } else if ( modificationDate.before((Date ) contentFieldValue.getLastUpdatedDatetime()) ) { 513 documentModifications.put(locale, (Date ) contentFieldValue.getLastUpdatedDatetime()); 514 if ( log.isDebugEnabled() ) { 515 log.debug("Update in documentModification locale-" + locale + ", date-" + contentFieldValue.getLastUpdatedDatetime()); 516 } 517 } 518 519 if ( identifier.equalsIgnoreCase(ContentField.TITLE_IDENTIFIER) ) { 521 document.add(new Field(BODY_FIELD, value, Field.Store.NO, Field.Index.TOKENIZED)); 522 document.add(new Field(TITLE_FIELD, value, Field.Store.YES, Field.Index.TOKENIZED)); 523 } else if ( identifier.equalsIgnoreCase(ContentField.KEYWORDS_IDENTIFIER) ) { 524 String [] keywords = value.split(","); 525 for ( int k = 0; k < keywords.length; k++ ) { 526 document.add(new Field(BODY_FIELD, keywords[k], Field.Store.NO, Field.Index.UN_TOKENIZED)); 528 } 529 } else if ( identifier.equalsIgnoreCase(ContentField.DESCRIPTION_IDENTIFIER) ) { 530 document.add(new Field(BODY_FIELD, value, Field.Store.NO, Field.Index.TOKENIZED)); 531 } else { 532 if ( ContentField.HTML_TYPE == contentField.getType() ) { 533 try { 534 String plainText = new PlainTextExtractor().extract(value, HTML_MIMETYPE); 535 document.add(new Field(BODY_FIELD, plainText, Field.Store.NO, Field.Index.TOKENIZED)); 536 } catch ( Exception ex ) { 537 if ( log.isErrorEnabled() ) { 538 log.error("The content cannot be parsed ", ex); 539 } 540 } 541 } else { 542 document.add(new Field(BODY_FIELD, value, Field.Store.NO, Field.Index.TOKENIZED)); 543 } 544 } 545 } } 548 Iterator localesIterator = documents.keySet().iterator(); 550 while ( localesIterator.hasNext() ) { 551 String locale = (String ) localesIterator.next(); 552 Document document = (Document) documents.get(locale); 553 IndexWriter indexWriter = null; 554 try { 555 indexWriter = getIndexWriter(locale); 556 Date modificationDate = (Date ) documentModifications.get(locale); 558 if ( modificationDate != null ) { 559 document.add(new Field(MODIFIED_FIELD, DateTools.dateToString(modificationDate, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.UN_TOKENIZED)); 560 } 561 562 document.add(new Field(LOCALE_FIELD, locale, Field.Store.YES, Field.Index.UN_TOKENIZED)); 564 565 if ( log.isDebugEnabled() ) { 566 log.debug("Adding into index document: " + document + " for locale " + locale); 567 } 568 569 indexWriter.addDocument(document); 571 } catch ( IOException ex ) { 572 if ( log.isErrorEnabled() ) { 573 log.error("Cannot add document into index ", ex); 574 } 575 } finally { 576 closeIndexWriter(indexWriter); 577 } 578 } } 580 581 586 protected synchronized void unIndex(String uri) { 587 loadUnindexIndexReader(); 588 Term term = new Term(URI_FIELD, uri); 589 if ( log.isDebugEnabled() ) { 590 log.debug("Try to unindex documents using term: " + term.toString()); 591 } 592 try { 593 int number = unindexIndexReader.deleteDocuments(term); 594 if ( log.isDebugEnabled() ) { 595 log.debug("Unindexed " + number + " documents using uri: " + uri); 596 } 597 } catch ( IOException ex ) { 598 if ( log.isErrorEnabled() ) { 599 log.error("Cannot remove documents from index ", ex); 600 } 601 } 602 } 603 604 610 public synchronized void unIndexByLocale(String locale, HttpServletRequest request) { 611 initialize(request); 612 loadUnindexIndexReader(); 613 Term term = new Term(LOCALE_FIELD, locale); 614 if ( log.isDebugEnabled() ) { 615 log.debug("Try to unindex documents using term: " + term.toString()); 616 } 617 try { 618 int number = unindexIndexReader.deleteDocuments(term); 619 if ( log.isDebugEnabled() ) { 620 log.debug("Unindexed " + number + " documents using locale: " + locale); 621 } 622 } catch ( IOException ex ) { 623 if ( log.isErrorEnabled() ) { 624 log.error("Cannot remove documents from index ", ex); 625 } 626 } 627 } 628 629 630 647 public synchronized List search(List hits, String criteria, String locale, boolean allLanguages, int days, String roles, int offset, int pageSize, HttpServletRequest request) 648 throws ParseException, IOException , java.text.ParseException { 649 initialize(request); 650 651 loadIndexSearcher(); 652 653 QueryParser queryParser = new QueryParser(BODY_FIELD, getAnalyzer(locale)); 654 Query query = queryParser.parse(criteria); 655 656 if ( hits == null ) { 657 if ( log.isDebugEnabled() ) { 658 log.debug("Searching for: " + query.toString(BODY_FIELD)); 659 } 660 661 List filters = new ArrayList (); 662 if (!allLanguages) { 664 QueryFilter localeFilter = (QueryFilter)localeFilters.get(locale); 665 if (localeFilter == null) { 666 localeFilter = new QueryFilter(new TermQuery(new Term(LOCALE_FIELD, locale))); 667 localeFilters.put(locale, localeFilter); 668 } 669 filters.add(localeFilter); 670 } 671 672 filters.add(new CachingWrapperFilter(new RolesFilter(roles))); 674 675 if ( days > 0 ) { 677 Calendar calendar = Calendar.getInstance(); 678 calendar.add(Calendar.DATE, -days); 679 filters.add(new CachingWrapperFilter(RangeFilter.More(SearchManager.MODIFIED_FIELD, DateTools.dateToString(calendar.getTime(), DateTools.Resolution.SECOND)))); 680 } 681 682 ChainedFilter chainedFilter = new ChainedFilter((Filter[])filters.toArray(new Filter[filters.size()]), ChainedFilter.AND); 683 684 hits = prepareHitsList(indexSearcher.search(query, chainedFilter), locale, request); 685 } 686 687 int limit = (offset + pageSize) > hits.size() ? hits.size() : (offset + pageSize); 688 689 for ( int i = offset; i < limit; i++ ) { 691 Hit hit = (Hit) hits.get(i); 692 693 if (hit.getHighlightedText() != null) 695 continue; 696 697 String type = hit.getType(); 698 query = query.rewrite(searchIndexReader); 700 Highlighter highlighter = new Highlighter(new QueryScorer(query)); 701 highlighter.setTextFragmenter(new SimpleFragmenter(RESULT_FRAGMENT_SIZE)); 702 703 String documentLocale = hit.getLocale(); 704 Analyzer documentAnalyzer = getAnalyzer(documentLocale); 705 706 if ( !DOCUMENT_TYPE.equalsIgnoreCase(type) ) { 707 708 PageManager pageManager = (PageManager) applicationCtx.getBean(Constants.PAGE_MANAGER_BEAN); 709 Page page = pageManager.findPageByUri(hit.getOriginalUri()); 710 if (page == null) { 711 if(log.isWarnEnabled()) { 712 log.warn("Search index is not synchronized with database. Page with uri=" + hit.getOriginalUri() + " was not found"); 713 } 714 continue; 715 } 716 Collection fields = page.getContentFields(); 717 718 if ( fields != null && fields.size() > 0 ) { 719 StringBuffer highlightedText = new StringBuffer (""); 720 for ( Iterator iterator = fields.iterator(); iterator.hasNext(); ) { 721 ContentField contentField = (ContentField) iterator.next(); 722 if ( !contentField.getIdentifier().equalsIgnoreCase(ContentField.KEYWORDS_IDENTIFIER) ) { 724 List fieldValues = contentField.getContentFieldValues(); 725 for ( int j = 0; j < fieldValues.size(); j++ ) { 726 ContentFieldValue contentFieldValue = (ContentFieldValue) fieldValues.get(j); 727 if ( contentFieldValue.getContentLocale().getIdentifier().equalsIgnoreCase(documentLocale) ) { 728 String content = null; 729 if ( contentField.getType() == ContentField.LINE_TYPE ) { 730 content = contentFieldValue.getSimpleValue(); 731 } else { 732 content = ConvertUtil.convertToString(contentFieldValue.getValue(), Constants.DEFAULT_ENCODING); 733 } 734 if (content == null || content.trim().length() == 0) 735 continue; 736 if ( contentField.getIdentifier().equalsIgnoreCase(ContentField.TITLE_IDENTIFIER) ) { 737 content = StringUtil.htmlEncode(content); 738 TokenStream tokenStream = documentAnalyzer.tokenStream(null, new StringReader (content)); 739 String fragment = highlighter.getBestFragment(tokenStream, content); 740 if ( fragment != null && fragment.length() > 0 ) { 741 hit.setTitle(fragment); 742 } 743 } else { 744 if ( ContentField.HTML_TYPE == contentField.getType() ) { 745 try { 746 content = new PlainTextExtractor().extract(content, HTML_MIMETYPE); 747 } catch ( Exception ex ) { 748 if ( log.isErrorEnabled() ) { 749 log.error("The content cannot be parsed ", ex); 750 } 751 } 752 } else { 753 content = StringUtil.htmlEncode(content); 754 } 755 TokenStream tokenStream = documentAnalyzer.tokenStream(null, new StringReader (content)); 756 String fragment = highlighter.getBestFragments(tokenStream, content, RESULT_FRAGMENT_NUMBER, RESULT_FRAGMENT_DELIMITER); 757 if ( fragment != null && fragment.length() > 0 ) { 758 highlightedText.append(fragment).append(RESULT_FRAGMENT_DELIMITER); 759 } 760 } 761 } 762 } 763 } 764 } 765 766 String highlightedTextString = highlightedText.toString(); 767 if ( highlightedTextString != null && highlightedTextString.length() > 0 ) { 768 if ( log.isDebugEnabled() ) { 769 log.debug("Highlighted text: " + highlightedTextString); 770 } 771 hit.setHighlightedText(highlightedTextString); 772 } 773 } 774 } else { 775 776 String summary = hit.getSummary(); 777 if ( summary != null ) { 778 summary = StringUtil.htmlEncode(summary); 779 TokenStream tokenStream = documentAnalyzer.tokenStream(null, new StringReader (summary)); 780 String fragment = highlighter.getBestFragments(tokenStream, summary, RESULT_FRAGMENT_NUMBER, RESULT_FRAGMENT_DELIMITER); 781 if ( fragment != null && fragment.length() > 0 ) { 782 if ( log.isDebugEnabled() ) { 783 log.debug("Highlighted text: " + fragment); 784 } 785 hit.setHighlightedText(fragment); 786 } 787 } 788 789 } 790 } 791 792 return hits; 793 } 794 795 802 protected synchronized List prepareHitsList(Hits hits, String locale, HttpServletRequest request) throws IOException , java.text.ParseException { 803 if ( log.isDebugEnabled() ) { 804 log.debug("Found " + hits.length() + " hits"); 805 } 806 807 List result = new ArrayList (hits.length()); 808 for ( int i = 0; i < hits.length(); i++ ) { 809 Document doc = hits.doc(i); 810 String type = doc.get(TYPE_FIELD); 811 String uri = doc.get(SearchManager.URI_FIELD); 812 String documentLocale = doc.get(LOCALE_FIELD); 813 if ( log.isDebugEnabled() ) { 814 log.debug("Checking for roles hit with uri=" + uri + ", type=" + type); 815 } 816 817 Hit hit = new Hit(); 818 819 hit.setOriginalUri(uri); 821 822 hit.setLocale(documentLocale); 824 825 String fullUri; 827 if ( DOCUMENT_TYPE.equalsIgnoreCase(type) ) { 828 fullUri = request.getContextPath() + uri; 829 } else { 830 fullUri = WebappUtil.getActionMappingURL(uri, null, request, WebappConstants.URL_TYPE_DOMAIN_RELATIVE, documentLocale); 831 } 832 hit.setUri(fullUri); 833 834 hit.setType(type); 836 837 String title = doc.get(SearchManager.TITLE_FIELD); 839 if ( title == null || title.trim().length() == 0 ) { 840 title = fullUri; 841 } 842 hit.setTitle(title); 843 844 Date modifiedDate = DateTools.stringToDate(doc.get(SearchManager.MODIFIED_FIELD)); 846 String date = DateUtil.formatDateTime(modifiedDate, new Locale (locale)); 847 hit.setModified(date); 848 849 int score = Math.round(hits.score(i) * 100.0F); 851 hit.setScore(score); 852 853 hit.setSummary(doc.get(SUMMARY_FIELD)); 855 856 result.add(hit); 857 } 858 return result; 859 } 860 861 863 866 protected synchronized void loadIndexSearcher() { 867 loadSearchIndexReader(); 868 indexSearcher = new IndexSearcher(searchIndexReader); 869 } 870 871 872 875 protected synchronized void closeIndexSearcher() { 876 try { 877 if ( indexSearcher != null ) { 878 indexSearcher.close(); 879 indexSearcher = null; 880 } 881 } catch ( IOException ex ) { 882 } 884 } 885 886 887 890 protected synchronized void loadSearchIndexReader() { 891 closeUnindexIndexReader(); 892 if (searchIndexReader == null) 893 searchIndexReader = getIndexReader(); 894 } 895 896 897 900 protected synchronized void closeSearchIndexReader() { 901 closeIndexSearcher(); 902 try { 903 if ( searchIndexReader != null ) { 904 searchIndexReader.close(); 905 searchIndexReader = null; 906 } 907 } catch ( IOException ex ) { 908 } 910 } 911 912 915 protected synchronized void loadUnindexIndexReader() { 916 closeSearchIndexReader(); 917 if (unindexIndexReader == null) 918 unindexIndexReader = getIndexReader(); 919 } 920 921 924 protected synchronized void closeUnindexIndexReader() { 925 try { 926 if ( unindexIndexReader != null ) { 927 unindexIndexReader.close(); 928 unindexIndexReader = null; 929 optimizeIndex(); 930 } 931 } catch ( IOException ex ) { 932 } 934 } 935 936 939 protected synchronized IndexReader getIndexReader() { 940 IndexReader indexReader = null; 941 try { 942 indexReader = IndexReader.open(indexDir); 943 } catch ( IOException ex ) { 944 String error = "Cannot open index for read in " + indexDir.getAbsolutePath() 945 + File.pathSeparatorChar 946 + indexDir.getName(); 947 if ( log.isErrorEnabled() ) { 948 log.error(error, ex); 949 } 950 throw new RuntimeException (error, ex); 951 } 952 return indexReader; 953 } 954 955 956 962 protected synchronized IndexWriter getIndexWriter(String locale, boolean create) { 963 if (!create) { 964 closeSearchIndexReader(); 965 closeUnindexIndexReader(); 966 } 967 968 IndexWriter indexWriter = null; 969 try { 970 indexWriter = new IndexWriter(indexDir, getAnalyzer(locale), create); 971 } catch ( IOException ex ) { 972 String error = "Cannot open index for write in " + indexDir.getAbsolutePath(); 973 if ( log.isErrorEnabled() ) { 974 log.error(error, ex); 975 } 976 throw new RuntimeException (error, ex); 977 } 978 return indexWriter; 979 } 980 981 986 protected synchronized IndexWriter getIndexWriter(String locale) { 987 return getIndexWriter(locale, false); 988 } 989 990 991 994 protected synchronized void closeIndexWriter(IndexWriter indexWriter) { 995 try { 996 if ( indexWriter != null ) { 997 indexWriter.optimize(); 998 indexWriter.close(); 999 indexWriter = null; 1000 } 1001 } catch ( IOException ex ) { 1002 } 1004 } 1005 1006 1009 protected synchronized void optimizeIndex() { 1010 IndexWriter indexWriter = null; 1011 try { 1012 indexWriter = getIndexWriter(""); 1013 } finally { 1014 closeIndexWriter(indexWriter); 1015 } 1016 } 1017 1018 1024 protected Analyzer getAnalyzer(String locale) { 1025 Analyzer analyzer = (Analyzer)analyzers.get(locale); 1026 if (analyzer != null) 1027 return analyzer; 1028 1029 if ( "en".equalsIgnoreCase(locale) ) { 1031 analyzer = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS); 1032 } else if ( "ru".equalsIgnoreCase(locale) ) { 1033 analyzer = new RussianAnalyzer(); 1034 } else if ( "es".equalsIgnoreCase(locale) ) { 1035 analyzer = new SpanishAnalyzer(); 1036 } else if ( "de".equalsIgnoreCase(locale) ) { 1037 analyzer = new GermanAnalyzer(); 1038 } else if ( "pt".equalsIgnoreCase(locale) ) { 1039 analyzer = new PortugueseAnalyzer(); 1040 } else if ( "da".equalsIgnoreCase(locale) ) { 1041 analyzer = new SnowballAnalyzer("Danish"); 1042 } else if ( "fi".equalsIgnoreCase(locale) ) { 1043 analyzer = new SnowballAnalyzer("Finnish"); 1044 } else if ( "fr".equalsIgnoreCase(locale) ) { 1045 analyzer = new FrenchAnalyzer(); 1046 } else if ( "it".equalsIgnoreCase(locale) ) { 1047 analyzer = new SnowballAnalyzer("Italian"); 1048 } else if ( "no".equalsIgnoreCase(locale) ) { 1049 analyzer = new NorwegianAnalyzer(); 1051 } else if ( "sv".equalsIgnoreCase(locale) ) { 1052 analyzer = new SnowballAnalyzer("Swedish"); 1053 } else if ( "nl".equalsIgnoreCase(locale) ) { 1054 analyzer = new DutchAnalyzer(); 1055 } else if ( "ja".equalsIgnoreCase(locale) ) { 1056 analyzer = new CJKAnalyzer(); 1057 } else if ( "ko".equalsIgnoreCase(locale) ) { 1058 analyzer = new CJKAnalyzer(); 1059 } else if ( "zh".equalsIgnoreCase(locale) ) { 1062 analyzer = new CJKAnalyzer(); 1063 } else if ( "cs".equalsIgnoreCase(locale) ) { 1064 analyzer = new CzechAnalyzer(); 1065 } else if ( "el".equalsIgnoreCase(locale) ) { 1066 analyzer = new GreekAnalyzer(); 1067 } else { 1068 analyzer = new StandardAnalyzer(); 1069 } 1070 1071 analyzers.put(locale, analyzer); 1072 1073 return analyzer; 1074 } 1075 1076 1077 1081 protected synchronized void initialize(HttpServletRequest request) { 1082 if (!initialized) { 1083 initialized = true; 1084 indexAll(request); 1085 } 1086 } 1087 1088 1093 protected void finalize() throws Throwable { 1094 closeSearchIndexReader(); 1095 closeUnindexIndexReader(); 1096 super.finalize(); 1097 } 1098 1099 1100 1103 public synchronized void reIndexAll(HttpServletRequest request) { 1104 PageManager pageManager = (PageManager) applicationCtx.getBean(Constants.PAGE_MANAGER_BEAN); 1105 Collection pages = pageManager.listPagesFetching(); 1106 1107 ContentResourceManager resourceManager = (ContentResourceManager) applicationCtx.getBean(Constants.CONTENT_RESOURCE_MANAGER_BEAN); 1108 Collection resources = resourceManager.listContentDocuments(null); 1109 1110 for ( Iterator iterator = pages.iterator(); iterator.hasNext(); ) { 1112 Page page = (Page) iterator.next(); 1113 unIndexPage(page.getUri(), request); 1114 } 1115 for ( Iterator iterator = resources.iterator(); iterator.hasNext(); ) { 1116 ContentDocument document = (ContentDocument) iterator.next(); 1117 unIndexDocument(document.getUri(), request); 1118 } 1119 1120 for ( Iterator iterator = pages.iterator(); iterator.hasNext(); ) { 1122 Page page = (Page) iterator.next(); 1123 indexPage(page, request); 1124 } 1125 for ( Iterator iterator = resources.iterator(); iterator.hasNext(); ) { 1126 ContentDocument document = (ContentDocument) iterator.next(); 1127 indexDocument(document, request); 1128 } 1129 1130 if (log.isInfoEnabled()) { 1131 log.info("All pages and resources index rebuilt"); 1132 } 1133 } 1134 1135 1140 public synchronized void indexAll(HttpServletRequest request) { 1141 PageManager pageManager = (PageManager) applicationCtx.getBean(Constants.PAGE_MANAGER_BEAN); 1142 Collection pages = pageManager.listPagesFetching(); 1144 for ( Iterator iterator = pages.iterator(); iterator.hasNext(); ) { 1145 Page page = (Page) iterator.next(); 1146 indexPage(page, request); 1147 } 1148 1149 ContentResourceManager resourceManager = (ContentResourceManager) applicationCtx.getBean(Constants.CONTENT_RESOURCE_MANAGER_BEAN); 1151 Collection resources = resourceManager.listContentDocuments(null); 1152 for ( Iterator iterator = resources.iterator(); iterator.hasNext(); ) { 1153 ContentDocument document = (ContentDocument) iterator.next(); 1154 indexDocument(document, request); 1155 } 1156 } 1157 1158 1159} 1160 | Popular Tags |