CmsSearchIndex


1   /*
2    * File   : $Source: /usr/local/cvs/opencms/src/org/opencms/search/CmsSearchIndex.java,v $
3    * Date   : $Date: 2006/10/26 10:22:11 $
4    * Version: $Revision: 1.61 $
5    *
6    * This library is part of OpenCms -
7    * the Open Source Content Mananagement System
8    *
9    * Copyright (c) 2005 Alkacon Software GmbH (http://www.alkacon.com)
10   *
11   * This library is free software; you can redistribute it and/or
12   * modify it under the terms of the GNU Lesser General Public
13   * License as published by the Free Software Foundation; either
14   * version 2.1 of the License, or (at your option) any later version.
15   *
16   * This library is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19   * Lesser General Public License for more details.
20   *
21   * For further information about Alkacon Software GmbH, please see the
22   * company website: http://www.alkacon.com
23   *
24   * For further information about OpenCms, please see the
25   * project website: http://www.opencms.org
26   * 
27   * You should have received a copy of the GNU Lesser General Public
28   * License along with this library; if not, write to the Free Software
29   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30   */
31  
32  package org.opencms.search;
33  
34  import org.opencms.configuration.I_CmsConfigurationParameterHandler;
35  import org.opencms.file.CmsObject;
36  import org.opencms.file.CmsProject;
37  import org.opencms.file.CmsRequestContext;
38  import org.opencms.main.CmsException;
39  import org.opencms.main.CmsIllegalArgumentException;
40  import org.opencms.main.CmsLog;
41  import org.opencms.main.OpenCms;
42  import org.opencms.search.documents.CmsHighlightFinder;
43  import org.opencms.search.documents.I_CmsDocumentFactory;
44  import org.opencms.util.CmsStringUtil;
45  
46  import java.io.File  ;
47  import java.io.IOException  ;
48  import java.util.ArrayList  ;
49  import java.util.HashMap  ;
50  import java.util.Iterator  ;
51  import java.util.List  ;
52  import java.util.Map  ;
53  import java.util.TreeMap  ;
54  
55  import org.apache.commons.logging.Log;
56  import org.apache.lucene.analysis.Analyzer;
57  import org.apache.lucene.document.Document;
58  import org.apache.lucene.document.Field;
59  import org.apache.lucene.index.IndexWriter;
60  import org.apache.lucene.index.Term;
61  import org.apache.lucene.queryParser.QueryParser;
62  import org.apache.lucene.search.BooleanClause;
63  import org.apache.lucene.search.BooleanQuery;
64  import org.apache.lucene.search.Hits;
65  import org.apache.lucene.search.IndexSearcher;
66  import org.apache.lucene.search.PhraseQuery;
67  import org.apache.lucene.search.Query;
68  import org.apache.lucene.search.TermQuery;
69  
70  /**
71   * Implements the search within an index and the management of the index configuration.<p>
72   * 
73   * @author Carsten Weinholz 
74   * @author Thomas Weckert  
75   * @author Alexander Kandzior 
76   * 
77   * @version $Revision: 1.61 $ 
78   * 
79   * @since 6.0.0 
80   */
81  public class CmsSearchIndex implements I_CmsConfigurationParameterHandler {
82  
83      /** Constant for a field list that contains the "meta" field as well as the "content" field. */
84      public static final String  [] DOC_META_FIELDS = new String  [] {
85          I_CmsDocumentFactory.DOC_META,
86          I_CmsDocumentFactory.DOC_CONTENT};
87  
88      /** Constant for additional param to enable excerpt creation (default: true). */
89      public static final String   EXCERPT = CmsSearchIndex.class.getName() + ".createExcerpt";
90  
91      /** Constant for additional param to enable permission checks (default: true). */
92      public static final String   PERMISSIONS = CmsSearchIndex.class.getName() + ".checkPermissions";
93  
94      /** Constant for additional param to set the thread priority during search. */
95      public static final String   PRIORITY = CmsSearchIndex.class.getName() + ".priority";
96  
97      /** Automatic ("auto") index rebuild mode. */
98      public static final String   REBUILD_MODE_AUTO = "auto";
99  
100     /** Manual ("manual") index rebuild mode. */
101     public static final String   REBUILD_MODE_MANUAL = "manual";
102 
103     /** Special root path append token for optimized path queries. */
104     public static final String   ROOT_PATH_SUFFIX = "@o.c";
105 
106     /** Special root path start token for optimized path queries. */
107     public static final String   ROOT_PATH_TOKEN = "root" + ROOT_PATH_SUFFIX;
108 
109     /** Separator for the search excerpt fragments. */
110     private static final String   EXCERPT_FRAGMENT_SEPARATOR = " ... ";
111 
112     /** Size of the excerpt fragments in byte. */
113     private static final int EXCERPT_FRAGMENT_SIZE = 60;
114 
115     /** Fragments required in excerpt. */
116     private static final int EXCERPT_REQUIRED_FRAGMENTS = 5;
117 
118     /** The log object for this class. */
119     private static final Log LOG = CmsLog.getLog(CmsSearchIndex.class);
120 
121     /** The list of configured index sources. */
122     List   m_sources;
123 
124     /** The excerpt mode for this index. */
125     private boolean m_createExcerpt;
126 
127     /** Documenttypes of folders/channels. */
128     private Map   m_documenttypes;
129 
130     /** The permission check mode for this index. */
131     private boolean m_dontCheckPermissions;
132 
133     /** An internal enabled flag, used to disable the index if for instance the configured project does not exist. */
134     private boolean m_enabled;
135 
136     /** The language filter of this index. */
137     private String   m_locale;
138 
139     /** The name of this index. */
140     private String   m_name;
141 
142     /** The path where this index stores it's data in the "real" file system. */
143     private String   m_path;
144 
145     /** The thread priority for a search. */
146     private int m_priority;
147 
148     /** The project of this index. */
149     private String   m_project;
150 
151     /** The rebuild mode for this index. */
152     private String   m_rebuild;
153 
154     /** The configured sources for this index. */
155     private List   m_sourceNames;
156 
157     /**
158      * Default constructor only intended to be used by the xml configuration. <p>
159      * 
160      * It is recommended to use the constructor <code>{@link #CmsSearchIndex(String)}</code> 
161      * as it enforces the mandatory name argument. <p>
162      * 
163      */
164     public CmsSearchIndex() {
165 
166         m_sourceNames = new ArrayList  ();
167         m_documenttypes = new HashMap  ();
168         m_createExcerpt = true;
169         m_enabled = true;
170         m_priority = -1;
171     }
172 
173     /**
174      * Creates a new CmsSearchIndex with the given name.<p>
175      * 
176      * @param name the system-wide unique name for the search index 
177      * 
178      * @throws org.opencms.main.CmsIllegalArgumentException 
179      *   if the given name is null, empty or already taken 
180      *   by another search index. 
181      * 
182      */
183     public CmsSearchIndex(String   name)
184     throws CmsIllegalArgumentException {
185 
186         this();
187         setName(name);
188     }
189 
190     /**
191      * Rewrites the a resource path for use in the {@link I_CmsDocumentFactory#DOC_ROOT} field.<p>
192      * 
193      * All "/" chars in the path are replaced with the {@link #ROOT_PATH_SUFFIX} token.
194      * This is required in order to use a Lucene "phrase query" on the resource path.
195      * Using a phrase query is much, much better for the search performance then using a straightforward 
196      * "prefix query". With a "prefix query", Lucene would interally generate a huge list of boolean sub-queries,
197      * exactly one for every document in the VFS subtree of the query. So if you query on "/sites/default/*" on 
198      * a large OpenCms installation, this means thousands of sub-queries.
199      * Using the "phrase query", only one (or very few) queries are internally generated, and the result 
200      * is just the same.<p>  
201      * 
202      * This implementation basically replaces the "/" of a path with "@o.c ". 
203      * This is a trick so that the Lucene analyzer leaves the
204      * directory names untouched, since it treats them like literal email addresses. 
205      * Otherwise the language analyzer might modify the directory names, leading to potential
206      * duplicates (e.g. <code>members/</code> and <code>member/</code> may both be trimmed to <code>member</code>),
207      * so that the prefix search returns more results then expected.<p>
208      * @param path the path to rewrite
209      * 
210      * @return the re-written path
211      */
212     public static String   rootPathRewrite(String   path) {
213 
214         StringBuffer   result = new StringBuffer  (256);
215         String  [] elements = rootPathSplit(path);
216         for (int i = 0; i < elements.length; i++) {
217             result.append(elements[i]);
218             if ((i + 1) < elements.length) {
219                 result.append(' ');
220             }
221         }
222         return result.toString();
223     }
224 
225     /**
226      * Spits the a resource path into tokens for use in the <code>{@link I_CmsDocumentFactory#DOC_ROOT}</code> field
227      * and with the <code>{@link #rootPathRewrite(String)}</code> method.<p>
228      * 
229      * @param path the path to split
230      * 
231      * @return the splitted path
232      * 
233      * @see #rootPathRewrite(String)
234      */
235     public static String  [] rootPathSplit(String   path) {
236 
237         if (CmsStringUtil.isEmpty(path)) {
238             return new String  [] {ROOT_PATH_TOKEN};
239         }
240 
241         // split the path
242         String  [] elements = CmsStringUtil.splitAsArray(path, '/');
243         int length = elements.length + 1;
244         String  [] result = new String  [length];
245         result[0] = ROOT_PATH_TOKEN;
246         for (int i = 1; i < length; i++) {
247             // append suffix to all path elements
248             result[i] = elements[i - 1] + ROOT_PATH_SUFFIX;
249             // underscore '_' is a word separator for the Lucene analyzer, must replace this
250             result[i] = result[i].replace('_', '0');
251         }
252         return result;
253     }
254 
255     /**
256      * Adds a parameter.<p>
257      * 
258      * @param key the key/name of the parameter
259      * @param value the value of the parameter
260      */
261     public void addConfigurationParameter(String   key, String   value) {
262 
263         if (PERMISSIONS.equals(key)) {
264             m_dontCheckPermissions = !Boolean.valueOf(value).booleanValue();
265         } else if (EXCERPT.equals(key)) {
266             m_createExcerpt = Boolean.valueOf(value).booleanValue();
267         } else if (PRIORITY.equals(key)) {
268             m_priority = Integer.parseInt(value);
269             if (m_priority < Thread.MIN_PRIORITY) {
270                 m_priority = Thread.MIN_PRIORITY;
271                 LOG.error(Messages.get().getBundle().key(
272                     Messages.LOG_SEARCH_PRIORITY_TOO_LOW_2,
273                     value,
274                     new Integer  (Thread.MIN_PRIORITY)));
275 
276             } else if (m_priority > Thread.MAX_PRIORITY) {
277                 m_priority = Thread.MAX_PRIORITY;
278                 LOG.debug(Messages.get().getBundle().key(
279                     Messages.LOG_SEARCH_PRIORITY_TOO_HIGH_2,
280                     value,
281                     new Integer  (Thread.MAX_PRIORITY)));
282 
283             }
284         }
285     }
286 
287     /**
288      * Adds am index source to this search index.<p>
289      * 
290      * @param sourceName the index source name to add
291      */
292     public void addSourceName(String   sourceName) {
293 
294         m_sourceNames.add(sourceName);
295     }
296 
297     /**
298      * Checks is this index has been configured correctly.<p>
299      * 
300      * In case the check fails, the <code>enabled</code> property
301      * is set to <code>false</code>
302      * 
303      * @param cms a OpenCms user context to perform the checks with (should have "Administrator" permissions)
304      *
305      * @return <code>true</code> in case the index is correctly configured and enabled after the check
306      * 
307      * @see #isEnabled()
308      */
309     public boolean checkConfiguration(CmsObject cms) {
310 
311         if (isEnabled()) {
312             // check if the project for the index exists        
313             try {
314                 cms.readProject(getProject());
315                 setEnabled(true);
316             } catch (CmsException e) {
317                 // the project does not exist, disable the index
318                 setEnabled(false);
319                 if (LOG.isErrorEnabled()) {
320                     LOG.error(Messages.get().getBundle().key(
321                         Messages.LOG_SEARCHINDEX_CREATE_BAD_PROJECT_2,
322                         getProject(),
323                         getName()));
324                 }
325             }
326         } else {
327             if (LOG.isInfoEnabled()) {
328                 LOG.info(Messages.get().getBundle().key(Messages.LOG_SEARCHINDEX_DISABLED_1, getName()));
329             }
330         }
331 
332         return isEnabled();
333     }
334 
335     /**
336      * @see java.lang.Object#equals(java.lang.Object)
337      */
338     public boolean equals(Object   obj) {
339 
340         if (obj == this) {
341             return true;
342         }
343         if (obj instanceof CmsSearchIndex) {
344             return ((CmsSearchIndex)obj).m_name.equals(m_name);
345         }
346         return false;
347     }
348 
349     /**
350      * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#getConfiguration()
351      */
352     public Map   getConfiguration() {
353 
354         Map   result = new TreeMap  ();
355         if (m_priority > 0) {
356             result.put(PRIORITY, new Integer  (m_priority));
357         }
358         if (!m_createExcerpt) {
359             result.put(EXCERPT, new Boolean  (m_createExcerpt));
360         }
361         if (m_dontCheckPermissions) {
362             result.put(PERMISSIONS, new Boolean  (!m_dontCheckPermissions));
363         }
364         return result;
365     }
366 
367     /**
368      * Returns the configured document types of this index for the given resource path.<p>
369      * 
370      * The result List contains Strings with the names of the document types.<p>
371      * 
372      * @param path path of the folder 
373      * 
374      * @return the configured document types of this index for the given resource path
375      */
376     public List   getDocumenttypes(String   path) {
377 
378         List   documenttypes = null;
379         if (m_documenttypes != null) {
380             for (Iterator   i = m_documenttypes.keySet().iterator(); i.hasNext();) {
381                 String   key = (String  )i.next();
382                 // NOTE: assumed that configured resource paths do not overlap, otherwise result is undefined
383                 if (path.startsWith(key)) {
384                     documenttypes = (List  )m_documenttypes.get(key);
385                     break;
386                 }
387             }
388         }
389         if (documenttypes == null) {
390             documenttypes = OpenCms.getSearchManager().getDocumentTypes();
391         }
392         return documenttypes;
393     }
394 
395     /**
396      * Returns a new index writer for this index.<p>
397      * 
398      * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated
399      * 
400      * @return a new instance of IndexWriter
401      * @throws CmsIndexException if the index can not be opened
402      */
403     public IndexWriter getIndexWriter(boolean create) throws CmsIndexException {
404 
405         IndexWriter indexWriter;
406         Analyzer analyzer = OpenCms.getSearchManager().getAnalyzer(m_locale);
407 
408         try {
409             File   f = new File  (m_path);
410             if (f.exists()) {
411                 // index already exists
412                 indexWriter = new IndexWriter(m_path, analyzer, create);
413             } else {
414                 // index does not exist yet
415                 f = f.getParentFile();
416                 if (f != null && !f.exists()) {
417                     // create the parent folders if required
418                     f.mkdirs();
419                 }
420                 indexWriter = new IndexWriter(m_path, analyzer, true);
421             }
422 
423         } catch (Exception   e) {
424             throw new CmsIndexException(
425                 Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, m_path, m_name),
426                 e);
427         }
428 
429         return indexWriter;
430     }
431 
432     /**
433      * Gets the langauge of this index.<p>
434      * 
435      * @return the language of the index, i.e. de
436      */
437     public String   getLocale() {
438 
439         return m_locale;
440     }
441 
442     /**
443      * Gets the name of this index.<p>
444      * 
445      * @return the name of the index
446      */
447     public String   getName() {
448 
449         return m_name;
450     }
451 
452     /**
453      * Returns the path where this index stores it's data in the "real" file system.<p>
454      * 
455      * @return the path where this index stores it's data in the "real" file system
456      */
457     public String   getPath() {
458 
459         return m_path;
460     }
461 
462     /**
463      * Gets the project of this index.<p>
464      * 
465      * @return the project of the index, i.e. "online"
466      */
467     public String   getProject() {
468 
469         return m_project;
470     }
471 
472     /**
473      * Get the rebuild mode of this index.<p>
474      * 
475      * @return the current rebuild mode
476      */
477     public String   getRebuildMode() {
478 
479         return m_rebuild;
480     }
481 
482     /**
483      * Returns all configured sources names of this search index.<p>
484      * 
485      * @return a list with all configured sources names of this search index
486      */
487     public List   getSourceNames() {
488 
489         return m_sourceNames;
490     }
491 
492     /**
493      * Returns all configured index sources of this search index.<p>
494      * 
495      * @return all configured index sources of this search index
496      */
497     public List   getSources() {
498 
499         return m_sources;
500     }
501 
502     /**
503      * @see java.lang.Object#hashCode()
504      */
505     public int hashCode() {
506 
507         return m_name != null ? m_name.hashCode() : 0;
508     }
509 
510     /**
511      * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#initConfiguration()
512      */
513     public void initConfiguration() {
514 
515         // noting to do here
516     }
517 
518     /**
519      * Initializes the search index.<p>
520      * 
521      * @throws CmsSearchException if the index source association failed
522      */
523     public void initialize() throws CmsSearchException {
524 
525         if (!isEnabled()) {
526             // index is disabled, no initialization is required
527             return;
528         }
529 
530         String   sourceName = null;
531         CmsSearchIndexSource indexSource = null;
532         List   searchIndexSourceDocumentTypes = null;
533         List   resourceNames = null;
534         String   resourceName = null;
535         m_sources = new ArrayList  ();
536 
537         m_path = OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(
538             OpenCms.getSearchManager().getDirectory() + "/" + m_name);
539 
540         for (int i = 0, n = m_sourceNames.size(); i < n; i++) {
541 
542             try {
543                 sourceName = (String  )m_sourceNames.get(i);
544                 indexSource = OpenCms.getSearchManager().getIndexSource(sourceName);
545                 m_sources.add(indexSource);
546 
547                 resourceNames = indexSource.getResourcesNames();
548                 searchIndexSourceDocumentTypes = indexSource.getDocumentTypes();
549                 for (int j = 0, m = resourceNames.size(); j < m; j++) {
550 
551                     resourceName = (String  )resourceNames.get(j);
552                     m_documenttypes.put(resourceName, searchIndexSourceDocumentTypes);
553                 }
554             } catch (Exception   e) {
555                 // mark this index as disabled
556                 setEnabled(false);
557                 throw new CmsSearchException(Messages.get().container(
558                     Messages.ERR_INDEX_SOURCE_ASSOCIATION_1,
559                     sourceName), e);
560             }
561         }
562     }
563 
564     /**
565      * Returns <code>true</code> if this index is currently disabled.<p>
566      * 
567      * @return <code>true</code> if this index is currently disabled
568      */
569     public boolean isEnabled() {
570 
571         return m_enabled;
572     }
573 
574     /**
575      * Removes an index source from this search index.<p>
576      * 
577      * @param sourceName the index source name to remove
578      */
579     public void removeSourceName(String   sourceName) {
580 
581         m_sourceNames.remove(sourceName);
582     }
583 
584     /**
585      * Performs a search on the index within the given fields.<p>
586      * 
587      * The result is returned as List with entries of type I_CmsSearchResult.<p>
588      * @param cms the current user's Cms object
589      * @param params the parameters to use for the search
590      * @param matchesPerPage the number of search results per page, or -1 to return all found documents in the search result
591      * @return the List of results found or an empty list
592      * @throws CmsSearchException if something goes wrong
593      */
594     public synchronized CmsSearchResultList search(CmsObject cms, CmsSearchParameters params, int matchesPerPage)
595     throws CmsSearchException {
596 
597         long timeTotal = -System.currentTimeMillis();
598         long timeLucene;
599         long timeResultProcessing;
600 
601         if (LOG.isDebugEnabled()) {
602             LOG.debug(Messages.get().getBundle().key(Messages.LOG_SEARCH_PARAMS_2, params, m_name));
603         }
604 
605         CmsRequestContext context = cms.getRequestContext();
606         CmsProject currentProject = context.currentProject();
607 
608         // the searcher to perform the operation in
609         IndexSearcher searcher = null;
610 
611         // the hits found during the search
612         Hits hits;
613 
614         // storage for the results found
615         CmsSearchResultList searchResults = new CmsSearchResultList();
616 
617         int previousPriority = Thread.currentThread().getPriority();
618 
619         try {
620 
621             if (m_priority > 0) {
622                 // change thread priority in order to reduce search impact on overall system performance
623                 Thread.currentThread().setPriority(m_priority);
624             }
625 
626             // change the project     
627             context.setCurrentProject(cms.readProject(m_project));
628 
629             // complete the search root
630             String  [] roots;
631             if ((params.getRoots() != null) && (params.getRoots().size() > 0)) {
632                 // add the site root to all the search root
633                 roots = new String  [params.getRoots().size()];
634                 for (int i = 0; i < params.getRoots().size(); i++) {
635                     roots[i] = cms.getRequestContext().addSiteRoot((String  )params.getRoots().get(i));
636                 }
637             } else {
638                 // just use the site root as the search root
639                 // this permits searching in indexes that contain content of other sites than the current selected one?!?!
640                 roots = new String  [] {cms.getRequestContext().getSiteRoot()};
641             }
642 
643             timeLucene = -System.currentTimeMillis();
644 
645             // the language analyzer to use for creating the queries
646             Analyzer languageAnalyzer = OpenCms.getSearchManager().getAnalyzer(m_locale);
647 
648             // the main query to use, will be constructed in the next lines 
649             BooleanQuery query = new BooleanQuery();
650 
651             // implementation note: 
652             // initially this was a simple PrefixQuery based on the DOC_PATH
653             // however, internally Lucene rewrote that to literally hundreds of BooleanQuery parts
654             // the following implementation will lead to just one Lucene PhraseQuery per directory and is thus much better    
655             // cw/261006 - paths elements should not contain uppercase letters, otherwise searcher does not find the appropriate results
656             BooleanQuery pathQuery = new BooleanQuery();
657             for (int i = 0; i < roots.length; i++) {
658                 String  [] paths = rootPathSplit(roots[i]);
659                 PhraseQuery phrase = new PhraseQuery();
660                 for (int j = 0; j < paths.length; j++) {
661                     Term term = new Term(I_CmsDocumentFactory.DOC_ROOT, paths[j].toLowerCase());
662                     phrase.add(term);
663                 }
664                 pathQuery.add(phrase, BooleanClause.Occur.SHOULD);
665             }
666             // add the calculated phrase query for the root path
667             query.add(pathQuery, BooleanClause.Occur.MUST);
668 
669             if ((params.getCategories() != null) && (params.getCategories().size() > 0)) {
670                 // add query categories (if required)
671                 BooleanQuery categoryQuery = new BooleanQuery();
672                 for (int i = 0; i < params.getCategories().size(); i++) {
673                     Term term = new Term(I_CmsDocumentFactory.DOC_CATEGORY, (String  )params.getCategories().get(i));
674                     TermQuery termQuery = new TermQuery(term);
675                     categoryQuery.add(termQuery, BooleanClause.Occur.SHOULD);
676                 }
677                 query.add(categoryQuery, BooleanClause.Occur.MUST);
678             }
679 
680             if ((params.getFields() != null) && (params.getFields().size() > 0)) {
681                 // this is a "regular" query over one or more fields
682                 BooleanQuery fieldsQuery = new BooleanQuery();
683                 // add one sub-query for each of the selected fields, e.g. "content", "title" etc.
684                 for (int i = 0; i < params.getFields().size(); i++) {
685                     QueryParser p = new QueryParser((String  )params.getFields().get(i), languageAnalyzer);
686                     fieldsQuery.add(p.parse(params.getQuery()), BooleanClause.Occur.SHOULD);
687                 }
688                 // finally add the field queries to the main query
689                 query.add(fieldsQuery, BooleanClause.Occur.MUST);
690             } else {
691                 // if no fields are provided, just use the "content" field by default
692                 QueryParser p = new QueryParser(I_CmsDocumentFactory.DOC_CONTENT, languageAnalyzer);
693                 query.add(p.parse(params.getQuery()), BooleanClause.Occur.MUST);
694             }
695 
696             // create the index searcher
697             searcher = new IndexSearcher(m_path);
698             Query finalQuery;
699 
700             if (m_createExcerpt || LOG.isDebugEnabled()) {
701                 // we re-write the query because this enables highlighting of wildcard terms in excerpts 
702                 finalQuery = searcher.rewrite(query);
703             } else {
704                 finalQuery = query;
705             }
706             if (LOG.isDebugEnabled()) {
707                 LOG.debug(Messages.get().getBundle().key(Messages.LOG_BASE_QUERY_1, query));
708                 LOG.debug(Messages.get().getBundle().key(Messages.LOG_REWRITTEN_QUERY_1, finalQuery));
709 
710             }
711 
712             // collect the categories
713             CmsSearchCategoryCollector categoryCollector;
714             if (params.isCalculateCategories()) {
715                 // USE THIS OPTION WITH CAUTION
716                 // this may slow down searched by an order of magnitude
717                 categoryCollector = new CmsSearchCategoryCollector(searcher);
718                 // perform a first search to collect the categories
719                 searcher.search(finalQuery, categoryCollector);
720                 // store the result
721                 searchResults.setCategories(categoryCollector.getCategoryCountResult());
722             }
723 
724             // perform the search operation          
725             hits = searcher.search(finalQuery, params.getSort());
726 
727             int hitCount = hits.length();
728 
729             timeLucene += System.currentTimeMillis();
730             timeResultProcessing = -System.currentTimeMillis();
731 
732             Document doc;
733             CmsSearchResult searchResult;
734             String   excerpt = null;
735 
736             if (hits != null) {
737                 int page = params.getSearchPage();
738                 int start = -1, end = -1;
739                 if (matchesPerPage > 0 && page > 0 && hitCount > 0) {
740                     // calculate the final size of the search result
741                     start = matchesPerPage * (page - 1);
742                     end = start + matchesPerPage;
743                     // ensure that both i and n are inside the range of foundDocuments.size()
744                     start = (start > hitCount) ? hitCount : start;
745                     end = (end > hitCount) ? hitCount : end;
746                 } else {
747                     // return all found documents in the search result
748                     start = 0;
749                     end = hitCount;
750                 }
751 
752                 int visibleHitCount = hitCount;
753                 for (int i = 0, cnt = 0; i < hitCount && cnt < end; i++) {
754                     try {
755                         doc = hits.doc(i);
756                         if (hasReadPermission(cms, doc)) {
757                             // user has read permission
758                             if (cnt >= start) {
759                                 // do not use the resource to obtain the raw content, read it from the lucene document !
760                                 // documents must not have content (i.e. images), so check if the content field exists
761                                 if (m_createExcerpt && doc.getField(I_CmsDocumentFactory.DOC_CONTENT) != null) {
762                                     excerpt = getExcerpt(
763                                         doc.getField(I_CmsDocumentFactory.DOC_CONTENT).stringValue(),
764                                         finalQuery,
765                                         languageAnalyzer);
766                                 }
767                                 searchResult = new CmsSearchResult(Math.round(hits.score(i) * 100f), doc, excerpt);
768                                 searchResults.add(searchResult);
769                             }
770                             cnt++;
771                         } else {
772                             visibleHitCount--;
773                         }
774                     } catch (Exception   e) {
775                         // should not happen, but if it does we want to go on with the next result nevertheless                        
776                         if (LOG.isWarnEnabled()) {
777                             LOG.warn(Messages.get().getBundle().key(Messages.LOG_RESULT_ITERATION_FAILED_0), e);
778                         }
779                     }
780                 }
781 
782                 // save the total count of search results at the last index of the search result 
783                 searchResults.setHitCount(visibleHitCount);
784             } else {
785                 searchResults.setHitCount(0);
786             }
787 
788             timeResultProcessing += System.currentTimeMillis();
789 
790         } catch (Exception   exc) {
791             throw new CmsSearchException(Messages.get().container(Messages.ERR_SEARCH_PARAMS_1, params), exc);
792         } finally {
793 
794             // re-set thread to previous priority
795             Thread.currentThread().setPriority(previousPriority);
796 
797             if (searcher != null) {
798                 try {
799                     searcher.close();
800                 } catch (IOException   exc) {
801                     // noop
802                 }
803             }
804 
805             // switch back to the original project
806             context.setCurrentProject(currentProject);
807         }
808 
809         timeTotal += System.currentTimeMillis();
810 
811         Object  [] logParams = new Object  [] {
812             new Integer  (hits.length()),
813             new Long  (timeTotal),
814             new Long  (timeLucene),
815             new Long  (timeResultProcessing)};
816         if (LOG.isDebugEnabled()) {
817             LOG.debug(Messages.get().getBundle().key(Messages.LOG_STAT_RESULTS_TIME_4, logParams));
818         }
819 
820         return searchResults;
821     }
822 
823     /**
824      * Can be used to enable / disable this index.<p>
825      * 
826      * @param enabled the state of the index to set
827      */
828     public void setEnabled(boolean enabled) {
829 
830         m_enabled = enabled;
831     }
832 
833     /**
834      * Sets the locale to index resources.<p>
835      * 
836      * @param locale the locale to index resources
837      */
838     public void setLocale(String   locale) {
839 
840         m_locale = locale;
841     }
842 
843     /**
844      * Sets the logical key/name of this search index.<p>
845      * 
846      * @param name the logical key/name of this search index 
847      * 
848      * @throws org.opencms.main.CmsIllegalArgumentException 
849      *   if the given name is null, empty or already taken 
850      *   by another search index. 
851      */
852     public void setName(String   name) throws CmsIllegalArgumentException {
853 
854         if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
855             throw new CmsIllegalArgumentException(Messages.get().container(
856                 Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
857         } else {
858 
859             // check if already used, but only if the name was modified: 
860             // this is important as unmodifiable DisplayWidgets will also invoke this...
861             if (!name.equals(m_name)) {
862                 // don't mess with xml-configuration
863                 if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
864                     // Not needed at startup and additionally getSearchManager may return null
865                     Iterator   itIdxNames = OpenCms.getSearchManager().getIndexNames().iterator();
866                     while (itIdxNames.hasNext()) {
867                         if (itIdxNames.next().equals(name)) {
868                             throw new CmsIllegalArgumentException(Messages.get().container(
869                                 Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1,
870                                 name));
871                         }
872                     }
873                 }
874             }
875         }
876 
877         m_name = name;
878 
879     }
880 
881     /**
882      * Sets the name of the project used to index resources.<p>
883      * 
884      * A duplicate method of <code>{@link #setProjectName(String)}</code> that allows 
885      * to use instances of this class as a widget object (bean convention, 
886      * cp.: <code>{@link #getProject()}</code>.<p> 
887      * 
888      * @param projectName the name of the project used to index resources
889      */
890     public void setProject(String   projectName) {
891 
892         setProjectName(projectName);
893     }
894 
895     /**
896      * Sets the name of the project used to index resources.<p>
897      * 
898      * @param projectName the name of the project used to index resources
899      */
900     public void setProjectName(String   projectName) {
901 
902         m_project = projectName;
903     }
904 
905     /**
906      * Sets the rebuild mode of this search index.<p>
907      * 
908      * @param rebuildMode the rebuild mode of this search index {auto|manual}
909      */
910     public void setRebuildMode(String   rebuildMode) {
911 
912         m_rebuild = rebuildMode;
913     }
914 
915     /**
916      * Returns the name (<code>{@link #getName()}</code>) of this search index.<p>
917      *  
918      * @return the name (<code>{@link #getName()}</code>) of this search index
919      * 
920      * @see java.lang.Object#toString()
921      */
922     public String   toString() {
923 
924         return getName();
925     }
926 
927     /**
928      * Returns an excerpt of the given content related to the given search query.<p>
929      * 
930      * @param content the content
931      * @param searchQuery the search query
932      * @param analyzer the analyzer used 
933      * 
934      * @return an excerpt of the content
935      * 
936      * @throws IOException if something goes wrong
937      */
938     protected String   getExcerpt(String   content, Query searchQuery, Analyzer analyzer) throws IOException   {
939 
940         if (content == null) {
941             return null;
942         }
943 
944         CmsHighlightFinder highlighter = new CmsHighlightFinder(
945             OpenCms.getSearchManager().getHighlighter(),
946             searchQuery,
947             analyzer);
948 
949         String   excerpt = highlighter.getBestFragments(
950             content,
951             EXCERPT_FRAGMENT_SIZE,
952             EXCERPT_REQUIRED_FRAGMENTS,
953             EXCERPT_FRAGMENT_SEPARATOR);
954 
955         // kill all unwanted chars in the excerpt
956         excerpt = excerpt.replace('\t', ' ');
957         excerpt = excerpt.replace('\n', ' ');
958         excerpt = excerpt.replace('\r', ' ');
959         excerpt = excerpt.replace('\f', ' ');
960 
961         int maxLength = OpenCms.getSearchManager().getMaxExcerptLength();
962         if (excerpt != null && excerpt.length() > maxLength) {
963             excerpt = excerpt.substring(0, maxLength);
964         }
965 
966         return excerpt;
967     }
968 
969     /**
970      * Checks if the OpenCms resource referenced by the result document can be read 
971      * be the user of the given OpenCms context.<p>
972      * 
973      * @param cms the OpenCms user context to use for permission testing
974      * @param doc the search result document to check
975      * @return <code>true</code> if the user has read permissions to the resource
976      */
977     protected boolean hasReadPermission(CmsObject cms, Document doc) {
978 
979         if (m_dontCheckPermissions) {
980             // no permission check is performed at all
981             return true;
982         }
983 
984         Field typeField = doc.getField(I_CmsDocumentFactory.DOC_TYPE);
985         Field pathField = doc.getField(I_CmsDocumentFactory.DOC_PATH);
986         if ((typeField == null) || (pathField == null)) {
987             // permission check needs only to be performed for VFS documents that contain both fields
988             return true;
989         }
990 
991         String   rootPath = cms.getRequestContext().removeSiteRoot(pathField.stringValue());
992 
993         // check if the resource "exits", this will implicitly check read permission and if the resource was deleted
994         return cms.existsResource(rootPath);
995     }
996 }
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Free Books Free Magazines
Popular Tags