1 31 32 package org.opencms.search; 33 34 import org.opencms.configuration.I_CmsConfigurationParameterHandler; 35 import org.opencms.file.CmsObject; 36 import org.opencms.file.CmsProject; 37 import org.opencms.file.CmsRequestContext; 38 import org.opencms.main.CmsException; 39 import org.opencms.main.CmsIllegalArgumentException; 40 import org.opencms.main.CmsLog; 41 import org.opencms.main.OpenCms; 42 import org.opencms.search.documents.CmsHighlightFinder; 43 import org.opencms.search.documents.I_CmsDocumentFactory; 44 import org.opencms.util.CmsStringUtil; 45 46 import java.io.File ; 47 import java.io.IOException ; 48 import java.util.ArrayList ; 49 import java.util.HashMap ; 50 import java.util.Iterator ; 51 import java.util.List ; 52 import java.util.Map ; 53 import java.util.TreeMap ; 54 55 import org.apache.commons.logging.Log; 56 import org.apache.lucene.analysis.Analyzer; 57 import org.apache.lucene.document.Document; 58 import org.apache.lucene.document.Field; 59 import org.apache.lucene.index.IndexWriter; 60 import org.apache.lucene.index.Term; 61 import org.apache.lucene.queryParser.QueryParser; 62 import org.apache.lucene.search.BooleanClause; 63 import org.apache.lucene.search.BooleanQuery; 64 import org.apache.lucene.search.Hits; 65 import org.apache.lucene.search.IndexSearcher; 66 import org.apache.lucene.search.PhraseQuery; 67 import org.apache.lucene.search.Query; 68 import org.apache.lucene.search.TermQuery; 69 70 81 public class CmsSearchIndex implements I_CmsConfigurationParameterHandler { 82 83 84 public static final String [] DOC_META_FIELDS = new String [] { 85 I_CmsDocumentFactory.DOC_META, 86 I_CmsDocumentFactory.DOC_CONTENT}; 87 88 89 public static final String EXCERPT = CmsSearchIndex.class.getName() + ".createExcerpt"; 90 91 92 public static final String PERMISSIONS = CmsSearchIndex.class.getName() + ".checkPermissions"; 93 94 95 public static final String PRIORITY = CmsSearchIndex.class.getName() + ".priority"; 96 97 98 public static final String REBUILD_MODE_AUTO = "auto"; 99 100 101 public static final String REBUILD_MODE_MANUAL = "manual"; 102 103 104 public static final String ROOT_PATH_SUFFIX = "@o.c"; 105 106 107 public static final String ROOT_PATH_TOKEN = "root" + ROOT_PATH_SUFFIX; 108 109 110 private static final String EXCERPT_FRAGMENT_SEPARATOR = " ... "; 111 112 113 private static final int EXCERPT_FRAGMENT_SIZE = 60; 114 115 116 private static final int EXCERPT_REQUIRED_FRAGMENTS = 5; 117 118 119 private static final Log LOG = CmsLog.getLog(CmsSearchIndex.class); 120 121 122 List m_sources; 123 124 125 private boolean m_createExcerpt; 126 127 128 private Map m_documenttypes; 129 130 131 private boolean m_dontCheckPermissions; 132 133 134 private boolean m_enabled; 135 136 137 private String m_locale; 138 139 140 private String m_name; 141 142 143 private String m_path; 144 145 146 private int m_priority; 147 148 149 private String m_project; 150 151 152 private String m_rebuild; 153 154 155 private List m_sourceNames; 156 157 164 public CmsSearchIndex() { 165 166 m_sourceNames = new ArrayList (); 167 m_documenttypes = new HashMap (); 168 m_createExcerpt = true; 169 m_enabled = true; 170 m_priority = -1; 171 } 172 173 183 public CmsSearchIndex(String name) 184 throws CmsIllegalArgumentException { 185 186 this(); 187 setName(name); 188 } 189 190 212 public static String rootPathRewrite(String path) { 213 214 StringBuffer result = new StringBuffer (256); 215 String [] elements = rootPathSplit(path); 216 for (int i = 0; i < elements.length; i++) { 217 result.append(elements[i]); 218 if ((i + 1) < elements.length) { 219 result.append(' '); 220 } 221 } 222 return result.toString(); 223 } 224 225 235 public static String [] rootPathSplit(String path) { 236 237 if (CmsStringUtil.isEmpty(path)) { 238 return new String [] {ROOT_PATH_TOKEN}; 239 } 240 241 String [] elements = CmsStringUtil.splitAsArray(path, '/'); 243 int length = elements.length + 1; 244 String [] result = new String [length]; 245 result[0] = ROOT_PATH_TOKEN; 246 for (int i = 1; i < length; i++) { 247 result[i] = elements[i - 1] + ROOT_PATH_SUFFIX; 249 result[i] = result[i].replace('_', '0'); 251 } 252 return result; 253 } 254 255 261 public void addConfigurationParameter(String key, String value) { 262 263 if (PERMISSIONS.equals(key)) { 264 m_dontCheckPermissions = !Boolean.valueOf(value).booleanValue(); 265 } else if (EXCERPT.equals(key)) { 266 m_createExcerpt = Boolean.valueOf(value).booleanValue(); 267 } else if (PRIORITY.equals(key)) { 268 m_priority = Integer.parseInt(value); 269 if (m_priority < Thread.MIN_PRIORITY) { 270 m_priority = Thread.MIN_PRIORITY; 271 LOG.error(Messages.get().getBundle().key( 272 Messages.LOG_SEARCH_PRIORITY_TOO_LOW_2, 273 value, 274 new Integer (Thread.MIN_PRIORITY))); 275 276 } else if (m_priority > Thread.MAX_PRIORITY) { 277 m_priority = Thread.MAX_PRIORITY; 278 LOG.debug(Messages.get().getBundle().key( 279 Messages.LOG_SEARCH_PRIORITY_TOO_HIGH_2, 280 value, 281 new Integer (Thread.MAX_PRIORITY))); 282 283 } 284 } 285 } 286 287 292 public void addSourceName(String sourceName) { 293 294 m_sourceNames.add(sourceName); 295 } 296 297 309 public boolean checkConfiguration(CmsObject cms) { 310 311 if (isEnabled()) { 312 try { 314 cms.readProject(getProject()); 315 setEnabled(true); 316 } catch (CmsException e) { 317 setEnabled(false); 319 if (LOG.isErrorEnabled()) { 320 LOG.error(Messages.get().getBundle().key( 321 Messages.LOG_SEARCHINDEX_CREATE_BAD_PROJECT_2, 322 getProject(), 323 getName())); 324 } 325 } 326 } else { 327 if (LOG.isInfoEnabled()) { 328 LOG.info(Messages.get().getBundle().key(Messages.LOG_SEARCHINDEX_DISABLED_1, getName())); 329 } 330 } 331 332 return isEnabled(); 333 } 334 335 338 public boolean equals(Object obj) { 339 340 if (obj == this) { 341 return true; 342 } 343 if (obj instanceof CmsSearchIndex) { 344 return ((CmsSearchIndex)obj).m_name.equals(m_name); 345 } 346 return false; 347 } 348 349 352 public Map getConfiguration() { 353 354 Map result = new TreeMap (); 355 if (m_priority > 0) { 356 result.put(PRIORITY, new Integer (m_priority)); 357 } 358 if (!m_createExcerpt) { 359 result.put(EXCERPT, new Boolean (m_createExcerpt)); 360 } 361 if (m_dontCheckPermissions) { 362 result.put(PERMISSIONS, new Boolean (!m_dontCheckPermissions)); 363 } 364 return result; 365 } 366 367 376 public List getDocumenttypes(String path) { 377 378 List documenttypes = null; 379 if (m_documenttypes != null) { 380 for (Iterator i = m_documenttypes.keySet().iterator(); i.hasNext();) { 381 String key = (String )i.next(); 382 if (path.startsWith(key)) { 384 documenttypes = (List )m_documenttypes.get(key); 385 break; 386 } 387 } 388 } 389 if (documenttypes == null) { 390 documenttypes = OpenCms.getSearchManager().getDocumentTypes(); 391 } 392 return documenttypes; 393 } 394 395 403 public IndexWriter getIndexWriter(boolean create) throws CmsIndexException { 404 405 IndexWriter indexWriter; 406 Analyzer analyzer = OpenCms.getSearchManager().getAnalyzer(m_locale); 407 408 try { 409 File f = new File (m_path); 410 if (f.exists()) { 411 indexWriter = new IndexWriter(m_path, analyzer, create); 413 } else { 414 f = f.getParentFile(); 416 if (f != null && !f.exists()) { 417 f.mkdirs(); 419 } 420 indexWriter = new IndexWriter(m_path, analyzer, true); 421 } 422 423 } catch (Exception e) { 424 throw new CmsIndexException( 425 Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, m_path, m_name), 426 e); 427 } 428 429 return indexWriter; 430 } 431 432 437 public String getLocale() { 438 439 return m_locale; 440 } 441 442 447 public String getName() { 448 449 return m_name; 450 } 451 452 457 public String getPath() { 458 459 return m_path; 460 } 461 462 467 public String getProject() { 468 469 return m_project; 470 } 471 472 477 public String getRebuildMode() { 478 479 return m_rebuild; 480 } 481 482 487 public List getSourceNames() { 488 489 return m_sourceNames; 490 } 491 492 497 public List getSources() { 498 499 return m_sources; 500 } 501 502 505 public int hashCode() { 506 507 return m_name != null ? m_name.hashCode() : 0; 508 } 509 510 513 public void initConfiguration() { 514 515 } 517 518 523 public void initialize() throws CmsSearchException { 524 525 if (!isEnabled()) { 526 return; 528 } 529 530 String sourceName = null; 531 CmsSearchIndexSource indexSource = null; 532 List searchIndexSourceDocumentTypes = null; 533 List resourceNames = null; 534 String resourceName = null; 535 m_sources = new ArrayList (); 536 537 m_path = OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf( 538 OpenCms.getSearchManager().getDirectory() + "/" + m_name); 539 540 for (int i = 0, n = m_sourceNames.size(); i < n; i++) { 541 542 try { 543 sourceName = (String )m_sourceNames.get(i); 544 indexSource = OpenCms.getSearchManager().getIndexSource(sourceName); 545 m_sources.add(indexSource); 546 547 resourceNames = indexSource.getResourcesNames(); 548 searchIndexSourceDocumentTypes = indexSource.getDocumentTypes(); 549 for (int j = 0, m = resourceNames.size(); j < m; j++) { 550 551 resourceName = (String )resourceNames.get(j); 552 m_documenttypes.put(resourceName, searchIndexSourceDocumentTypes); 553 } 554 } catch (Exception e) { 555 setEnabled(false); 557 throw new CmsSearchException(Messages.get().container( 558 Messages.ERR_INDEX_SOURCE_ASSOCIATION_1, 559 sourceName), e); 560 } 561 } 562 } 563 564 569 public boolean isEnabled() { 570 571 return m_enabled; 572 } 573 574 579 public void removeSourceName(String sourceName) { 580 581 m_sourceNames.remove(sourceName); 582 } 583 584 594 public synchronized CmsSearchResultList search(CmsObject cms, CmsSearchParameters params, int matchesPerPage) 595 throws CmsSearchException { 596 597 long timeTotal = -System.currentTimeMillis(); 598 long timeLucene; 599 long timeResultProcessing; 600 601 if (LOG.isDebugEnabled()) { 602 LOG.debug(Messages.get().getBundle().key(Messages.LOG_SEARCH_PARAMS_2, params, m_name)); 603 } 604 605 CmsRequestContext context = cms.getRequestContext(); 606 CmsProject currentProject = context.currentProject(); 607 608 IndexSearcher searcher = null; 610 611 Hits hits; 613 614 CmsSearchResultList searchResults = new CmsSearchResultList(); 616 617 int previousPriority = Thread.currentThread().getPriority(); 618 619 try { 620 621 if (m_priority > 0) { 622 Thread.currentThread().setPriority(m_priority); 624 } 625 626 context.setCurrentProject(cms.readProject(m_project)); 628 629 String [] roots; 631 if ((params.getRoots() != null) && (params.getRoots().size() > 0)) { 632 roots = new String [params.getRoots().size()]; 634 for (int i = 0; i < params.getRoots().size(); i++) { 635 roots[i] = cms.getRequestContext().addSiteRoot((String )params.getRoots().get(i)); 636 } 637 } else { 638 roots = new String [] {cms.getRequestContext().getSiteRoot()}; 641 } 642 643 timeLucene = -System.currentTimeMillis(); 644 645 Analyzer languageAnalyzer = OpenCms.getSearchManager().getAnalyzer(m_locale); 647 648 BooleanQuery query = new BooleanQuery(); 650 651 BooleanQuery pathQuery = new BooleanQuery(); 657 for (int i = 0; i < roots.length; i++) { 658 String [] paths = rootPathSplit(roots[i]); 659 PhraseQuery phrase = new PhraseQuery(); 660 for (int j = 0; j < paths.length; j++) { 661 Term term = new Term(I_CmsDocumentFactory.DOC_ROOT, paths[j].toLowerCase()); 662 phrase.add(term); 663 } 664 pathQuery.add(phrase, BooleanClause.Occur.SHOULD); 665 } 666 query.add(pathQuery, BooleanClause.Occur.MUST); 668 669 if ((params.getCategories() != null) && (params.getCategories().size() > 0)) { 670 BooleanQuery categoryQuery = new BooleanQuery(); 672 for (int i = 0; i < params.getCategories().size(); i++) { 673 Term term = new Term(I_CmsDocumentFactory.DOC_CATEGORY, (String )params.getCategories().get(i)); 674 TermQuery termQuery = new TermQuery(term); 675 categoryQuery.add(termQuery, BooleanClause.Occur.SHOULD); 676 } 677 query.add(categoryQuery, BooleanClause.Occur.MUST); 678 } 679 680 if ((params.getFields() != null) && (params.getFields().size() > 0)) { 681 BooleanQuery fieldsQuery = new BooleanQuery(); 683 for (int i = 0; i < params.getFields().size(); i++) { 685 QueryParser p = new QueryParser((String )params.getFields().get(i), languageAnalyzer); 686 fieldsQuery.add(p.parse(params.getQuery()), BooleanClause.Occur.SHOULD); 687 } 688 query.add(fieldsQuery, BooleanClause.Occur.MUST); 690 } else { 691 QueryParser p = new QueryParser(I_CmsDocumentFactory.DOC_CONTENT, languageAnalyzer); 693 query.add(p.parse(params.getQuery()), BooleanClause.Occur.MUST); 694 } 695 696 searcher = new IndexSearcher(m_path); 698 Query finalQuery; 699 700 if (m_createExcerpt || LOG.isDebugEnabled()) { 701 finalQuery = searcher.rewrite(query); 703 } else { 704 finalQuery = query; 705 } 706 if (LOG.isDebugEnabled()) { 707 LOG.debug(Messages.get().getBundle().key(Messages.LOG_BASE_QUERY_1, query)); 708 LOG.debug(Messages.get().getBundle().key(Messages.LOG_REWRITTEN_QUERY_1, finalQuery)); 709 710 } 711 712 CmsSearchCategoryCollector categoryCollector; 714 if (params.isCalculateCategories()) { 715 categoryCollector = new CmsSearchCategoryCollector(searcher); 718 searcher.search(finalQuery, categoryCollector); 720 searchResults.setCategories(categoryCollector.getCategoryCountResult()); 722 } 723 724 hits = searcher.search(finalQuery, params.getSort()); 726 727 int hitCount = hits.length(); 728 729 timeLucene += System.currentTimeMillis(); 730 timeResultProcessing = -System.currentTimeMillis(); 731 732 Document doc; 733 CmsSearchResult searchResult; 734 String excerpt = null; 735 736 if (hits != null) { 737 int page = params.getSearchPage(); 738 int start = -1, end = -1; 739 if (matchesPerPage > 0 && page > 0 && hitCount > 0) { 740 start = matchesPerPage * (page - 1); 742 end = start + matchesPerPage; 743 start = (start > hitCount) ? hitCount : start; 745 end = (end > hitCount) ? hitCount : end; 746 } else { 747 start = 0; 749 end = hitCount; 750 } 751 752 int visibleHitCount = hitCount; 753 for (int i = 0, cnt = 0; i < hitCount && cnt < end; i++) { 754 try { 755 doc = hits.doc(i); 756 if (hasReadPermission(cms, doc)) { 757 if (cnt >= start) { 759 if (m_createExcerpt && doc.getField(I_CmsDocumentFactory.DOC_CONTENT) != null) { 762 excerpt = getExcerpt( 763 doc.getField(I_CmsDocumentFactory.DOC_CONTENT).stringValue(), 764 finalQuery, 765 languageAnalyzer); 766 } 767 searchResult = new CmsSearchResult(Math.round(hits.score(i) * 100f), doc, excerpt); 768 searchResults.add(searchResult); 769 } 770 cnt++; 771 } else { 772 visibleHitCount--; 773 } 774 } catch (Exception e) { 775 if (LOG.isWarnEnabled()) { 777 LOG.warn(Messages.get().getBundle().key(Messages.LOG_RESULT_ITERATION_FAILED_0), e); 778 } 779 } 780 } 781 782 searchResults.setHitCount(visibleHitCount); 784 } else { 785 searchResults.setHitCount(0); 786 } 787 788 timeResultProcessing += System.currentTimeMillis(); 789 790 } catch (Exception exc) { 791 throw new CmsSearchException(Messages.get().container(Messages.ERR_SEARCH_PARAMS_1, params), exc); 792 } finally { 793 794 Thread.currentThread().setPriority(previousPriority); 796 797 if (searcher != null) { 798 try { 799 searcher.close(); 800 } catch (IOException exc) { 801 } 803 } 804 805 context.setCurrentProject(currentProject); 807 } 808 809 timeTotal += System.currentTimeMillis(); 810 811 Object [] logParams = new Object [] { 812 new Integer (hits.length()), 813 new Long (timeTotal), 814 new Long (timeLucene), 815 new Long (timeResultProcessing)}; 816 if (LOG.isDebugEnabled()) { 817 LOG.debug(Messages.get().getBundle().key(Messages.LOG_STAT_RESULTS_TIME_4, logParams)); 818 } 819 820 return searchResults; 821 } 822 823 828 public void setEnabled(boolean enabled) { 829 830 m_enabled = enabled; 831 } 832 833 838 public void setLocale(String locale) { 839 840 m_locale = locale; 841 } 842 843 852 public void setName(String name) throws CmsIllegalArgumentException { 853 854 if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) { 855 throw new CmsIllegalArgumentException(Messages.get().container( 856 Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0)); 857 } else { 858 859 if (!name.equals(m_name)) { 862 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) { 864 Iterator itIdxNames = OpenCms.getSearchManager().getIndexNames().iterator(); 866 while (itIdxNames.hasNext()) { 867 if (itIdxNames.next().equals(name)) { 868 throw new CmsIllegalArgumentException(Messages.get().container( 869 Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, 870 name)); 871 } 872 } 873 } 874 } 875 } 876 877 m_name = name; 878 879 } 880 881 890 public void setProject(String projectName) { 891 892 setProjectName(projectName); 893 } 894 895 900 public void setProjectName(String projectName) { 901 902 m_project = projectName; 903 } 904 905 910 public void setRebuildMode(String rebuildMode) { 911 912 m_rebuild = rebuildMode; 913 } 914 915 922 public String toString() { 923 924 return getName(); 925 } 926 927 938 protected String getExcerpt(String content, Query searchQuery, Analyzer analyzer) throws IOException { 939 940 if (content == null) { 941 return null; 942 } 943 944 CmsHighlightFinder highlighter = new CmsHighlightFinder( 945 OpenCms.getSearchManager().getHighlighter(), 946 searchQuery, 947 analyzer); 948 949 String excerpt = highlighter.getBestFragments( 950 content, 951 EXCERPT_FRAGMENT_SIZE, 952 EXCERPT_REQUIRED_FRAGMENTS, 953 EXCERPT_FRAGMENT_SEPARATOR); 954 955 excerpt = excerpt.replace('\t', ' '); 957 excerpt = excerpt.replace('\n', ' '); 958 excerpt = excerpt.replace('\r', ' '); 959 excerpt = excerpt.replace('\f', ' '); 960 961 int maxLength = OpenCms.getSearchManager().getMaxExcerptLength(); 962 if (excerpt != null && excerpt.length() > maxLength) { 963 excerpt = excerpt.substring(0, maxLength); 964 } 965 966 return excerpt; 967 } 968 969 977 protected boolean hasReadPermission(CmsObject cms, Document doc) { 978 979 if (m_dontCheckPermissions) { 980 return true; 982 } 983 984 Field typeField = doc.getField(I_CmsDocumentFactory.DOC_TYPE); 985 Field pathField = doc.getField(I_CmsDocumentFactory.DOC_PATH); 986 if ((typeField == null) || (pathField == null)) { 987 return true; 989 } 990 991 String rootPath = cms.getRequestContext().removeSiteRoot(pathField.stringValue()); 992 993 return cms.existsResource(rootPath); 995 } 996 }
| Popular Tags
|