KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > opencms > search > CmsSearchIndex


1 /*
2  * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/CmsSearchIndex.java,v $
3  * Date : $Date: 2006/10/26 10:22:11 $
4  * Version: $Revision: 1.61 $
5  *
6  * This library is part of OpenCms -
7  * the Open Source Content Mananagement System
8  *
9  * Copyright (c) 2005 Alkacon Software GmbH (http://www.alkacon.com)
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * For further information about Alkacon Software GmbH, please see the
22  * company website: http://www.alkacon.com
23  *
24  * For further information about OpenCms, please see the
25  * project website: http://www.opencms.org
26  *
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30  */

31
32 package org.opencms.search;
33
34 import org.opencms.configuration.I_CmsConfigurationParameterHandler;
35 import org.opencms.file.CmsObject;
36 import org.opencms.file.CmsProject;
37 import org.opencms.file.CmsRequestContext;
38 import org.opencms.main.CmsException;
39 import org.opencms.main.CmsIllegalArgumentException;
40 import org.opencms.main.CmsLog;
41 import org.opencms.main.OpenCms;
42 import org.opencms.search.documents.CmsHighlightFinder;
43 import org.opencms.search.documents.I_CmsDocumentFactory;
44 import org.opencms.util.CmsStringUtil;
45
46 import java.io.File JavaDoc;
47 import java.io.IOException JavaDoc;
48 import java.util.ArrayList JavaDoc;
49 import java.util.HashMap JavaDoc;
50 import java.util.Iterator JavaDoc;
51 import java.util.List JavaDoc;
52 import java.util.Map JavaDoc;
53 import java.util.TreeMap JavaDoc;
54
55 import org.apache.commons.logging.Log;
56 import org.apache.lucene.analysis.Analyzer;
57 import org.apache.lucene.document.Document;
58 import org.apache.lucene.document.Field;
59 import org.apache.lucene.index.IndexWriter;
60 import org.apache.lucene.index.Term;
61 import org.apache.lucene.queryParser.QueryParser;
62 import org.apache.lucene.search.BooleanClause;
63 import org.apache.lucene.search.BooleanQuery;
64 import org.apache.lucene.search.Hits;
65 import org.apache.lucene.search.IndexSearcher;
66 import org.apache.lucene.search.PhraseQuery;
67 import org.apache.lucene.search.Query;
68 import org.apache.lucene.search.TermQuery;
69
70 /**
71  * Implements the search within an index and the management of the index configuration.<p>
72  *
73  * @author Carsten Weinholz
74  * @author Thomas Weckert
75  * @author Alexander Kandzior
76  *
77  * @version $Revision: 1.61 $
78  *
79  * @since 6.0.0
80  */

81 public class CmsSearchIndex implements I_CmsConfigurationParameterHandler {
82
83     /** Constant for a field list that contains the "meta" field as well as the "content" field. */
84     public static final String JavaDoc[] DOC_META_FIELDS = new String JavaDoc[] {
85         I_CmsDocumentFactory.DOC_META,
86         I_CmsDocumentFactory.DOC_CONTENT};
87
88     /** Constant for additional param to enable excerpt creation (default: true). */
89     public static final String JavaDoc EXCERPT = CmsSearchIndex.class.getName() + ".createExcerpt";
90
91     /** Constant for additional param to enable permission checks (default: true). */
92     public static final String JavaDoc PERMISSIONS = CmsSearchIndex.class.getName() + ".checkPermissions";
93
94     /** Constant for additional param to set the thread priority during search. */
95     public static final String JavaDoc PRIORITY = CmsSearchIndex.class.getName() + ".priority";
96
97     /** Automatic ("auto") index rebuild mode. */
98     public static final String JavaDoc REBUILD_MODE_AUTO = "auto";
99
100     /** Manual ("manual") index rebuild mode. */
101     public static final String JavaDoc REBUILD_MODE_MANUAL = "manual";
102
103     /** Special root path append token for optimized path queries. */
104     public static final String JavaDoc ROOT_PATH_SUFFIX = "@o.c";
105
106     /** Special root path start token for optimized path queries. */
107     public static final String JavaDoc ROOT_PATH_TOKEN = "root" + ROOT_PATH_SUFFIX;
108
109     /** Separator for the search excerpt fragments. */
110     private static final String JavaDoc EXCERPT_FRAGMENT_SEPARATOR = " ... ";
111
112     /** Size of the excerpt fragments in byte. */
113     private static final int EXCERPT_FRAGMENT_SIZE = 60;
114
115     /** Fragments required in excerpt. */
116     private static final int EXCERPT_REQUIRED_FRAGMENTS = 5;
117
118     /** The log object for this class. */
119     private static final Log LOG = CmsLog.getLog(CmsSearchIndex.class);
120
121     /** The list of configured index sources. */
122     List JavaDoc m_sources;
123
124     /** The excerpt mode for this index. */
125     private boolean m_createExcerpt;
126
127     /** Documenttypes of folders/channels. */
128     private Map JavaDoc m_documenttypes;
129
130     /** The permission check mode for this index. */
131     private boolean m_dontCheckPermissions;
132
133     /** An internal enabled flag, used to disable the index if for instance the configured project does not exist. */
134     private boolean m_enabled;
135
136     /** The language filter of this index. */
137     private String JavaDoc m_locale;
138
139     /** The name of this index. */
140     private String JavaDoc m_name;
141
142     /** The path where this index stores it's data in the "real" file system. */
143     private String JavaDoc m_path;
144
145     /** The thread priority for a search. */
146     private int m_priority;
147
148     /** The project of this index. */
149     private String JavaDoc m_project;
150
151     /** The rebuild mode for this index. */
152     private String JavaDoc m_rebuild;
153
154     /** The configured sources for this index. */
155     private List JavaDoc m_sourceNames;
156
157     /**
158      * Default constructor only intended to be used by the xml configuration. <p>
159      *
160      * It is recommended to use the constructor <code>{@link #CmsSearchIndex(String)}</code>
161      * as it enforces the mandatory name argument. <p>
162      *
163      */

164     public CmsSearchIndex() {
165
166         m_sourceNames = new ArrayList JavaDoc();
167         m_documenttypes = new HashMap JavaDoc();
168         m_createExcerpt = true;
169         m_enabled = true;
170         m_priority = -1;
171     }
172
173     /**
174      * Creates a new CmsSearchIndex with the given name.<p>
175      *
176      * @param name the system-wide unique name for the search index
177      *
178      * @throws org.opencms.main.CmsIllegalArgumentException
179      * if the given name is null, empty or already taken
180      * by another search index.
181      *
182      */

183     public CmsSearchIndex(String JavaDoc name)
184     throws CmsIllegalArgumentException {
185
186         this();
187         setName(name);
188     }
189
190     /**
191      * Rewrites the a resource path for use in the {@link I_CmsDocumentFactory#DOC_ROOT} field.<p>
192      *
193      * All "/" chars in the path are replaced with the {@link #ROOT_PATH_SUFFIX} token.
194      * This is required in order to use a Lucene "phrase query" on the resource path.
195      * Using a phrase query is much, much better for the search performance then using a straightforward
196      * "prefix query". With a "prefix query", Lucene would interally generate a huge list of boolean sub-queries,
197      * exactly one for every document in the VFS subtree of the query. So if you query on "/sites/default/*" on
198      * a large OpenCms installation, this means thousands of sub-queries.
199      * Using the "phrase query", only one (or very few) queries are internally generated, and the result
200      * is just the same.<p>
201      *
202      * This implementation basically replaces the "/" of a path with "@o.c ".
203      * This is a trick so that the Lucene analyzer leaves the
204      * directory names untouched, since it treats them like literal email addresses.
205      * Otherwise the language analyzer might modify the directory names, leading to potential
206      * duplicates (e.g. <code>members/</code> and <code>member/</code> may both be trimmed to <code>member</code>),
207      * so that the prefix search returns more results then expected.<p>
208      * @param path the path to rewrite
209      *
210      * @return the re-written path
211      */

212     public static String JavaDoc rootPathRewrite(String JavaDoc path) {
213
214         StringBuffer JavaDoc result = new StringBuffer JavaDoc(256);
215         String JavaDoc[] elements = rootPathSplit(path);
216         for (int i = 0; i < elements.length; i++) {
217             result.append(elements[i]);
218             if ((i + 1) < elements.length) {
219                 result.append(' ');
220             }
221         }
222         return result.toString();
223     }
224
225     /**
226      * Spits the a resource path into tokens for use in the <code>{@link I_CmsDocumentFactory#DOC_ROOT}</code> field
227      * and with the <code>{@link #rootPathRewrite(String)}</code> method.<p>
228      *
229      * @param path the path to split
230      *
231      * @return the splitted path
232      *
233      * @see #rootPathRewrite(String)
234      */

235     public static String JavaDoc[] rootPathSplit(String JavaDoc path) {
236
237         if (CmsStringUtil.isEmpty(path)) {
238             return new String JavaDoc[] {ROOT_PATH_TOKEN};
239         }
240
241         // split the path
242
String JavaDoc[] elements = CmsStringUtil.splitAsArray(path, '/');
243         int length = elements.length + 1;
244         String JavaDoc[] result = new String JavaDoc[length];
245         result[0] = ROOT_PATH_TOKEN;
246         for (int i = 1; i < length; i++) {
247             // append suffix to all path elements
248
result[i] = elements[i - 1] + ROOT_PATH_SUFFIX;
249             // underscore '_' is a word separator for the Lucene analyzer, must replace this
250
result[i] = result[i].replace('_', '0');
251         }
252         return result;
253     }
254
255     /**
256      * Adds a parameter.<p>
257      *
258      * @param key the key/name of the parameter
259      * @param value the value of the parameter
260      */

261     public void addConfigurationParameter(String JavaDoc key, String JavaDoc value) {
262
263         if (PERMISSIONS.equals(key)) {
264             m_dontCheckPermissions = !Boolean.valueOf(value).booleanValue();
265         } else if (EXCERPT.equals(key)) {
266             m_createExcerpt = Boolean.valueOf(value).booleanValue();
267         } else if (PRIORITY.equals(key)) {
268             m_priority = Integer.parseInt(value);
269             if (m_priority < Thread.MIN_PRIORITY) {
270                 m_priority = Thread.MIN_PRIORITY;
271                 LOG.error(Messages.get().getBundle().key(
272                     Messages.LOG_SEARCH_PRIORITY_TOO_LOW_2,
273                     value,
274                     new Integer JavaDoc(Thread.MIN_PRIORITY)));
275
276             } else if (m_priority > Thread.MAX_PRIORITY) {
277                 m_priority = Thread.MAX_PRIORITY;
278                 LOG.debug(Messages.get().getBundle().key(
279                     Messages.LOG_SEARCH_PRIORITY_TOO_HIGH_2,
280                     value,
281                     new Integer JavaDoc(Thread.MAX_PRIORITY)));
282
283             }
284         }
285     }
286
287     /**
288      * Adds am index source to this search index.<p>
289      *
290      * @param sourceName the index source name to add
291      */

292     public void addSourceName(String JavaDoc sourceName) {
293
294         m_sourceNames.add(sourceName);
295     }
296
297     /**
298      * Checks is this index has been configured correctly.<p>
299      *
300      * In case the check fails, the <code>enabled</code> property
301      * is set to <code>false</code>
302      *
303      * @param cms a OpenCms user context to perform the checks with (should have "Administrator" permissions)
304      *
305      * @return <code>true</code> in case the index is correctly configured and enabled after the check
306      *
307      * @see #isEnabled()
308      */

309     public boolean checkConfiguration(CmsObject cms) {
310
311         if (isEnabled()) {
312             // check if the project for the index exists
313
try {
314                 cms.readProject(getProject());
315                 setEnabled(true);
316             } catch (CmsException e) {
317                 // the project does not exist, disable the index
318
setEnabled(false);
319                 if (LOG.isErrorEnabled()) {
320                     LOG.error(Messages.get().getBundle().key(
321                         Messages.LOG_SEARCHINDEX_CREATE_BAD_PROJECT_2,
322                         getProject(),
323                         getName()));
324                 }
325             }
326         } else {
327             if (LOG.isInfoEnabled()) {
328                 LOG.info(Messages.get().getBundle().key(Messages.LOG_SEARCHINDEX_DISABLED_1, getName()));
329             }
330         }
331
332         return isEnabled();
333     }
334
335     /**
336      * @see java.lang.Object#equals(java.lang.Object)
337      */

338     public boolean equals(Object JavaDoc obj) {
339
340         if (obj == this) {
341             return true;
342         }
343         if (obj instanceof CmsSearchIndex) {
344             return ((CmsSearchIndex)obj).m_name.equals(m_name);
345         }
346         return false;
347     }
348
349     /**
350      * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#getConfiguration()
351      */

352     public Map JavaDoc getConfiguration() {
353
354         Map JavaDoc result = new TreeMap JavaDoc();
355         if (m_priority > 0) {
356             result.put(PRIORITY, new Integer JavaDoc(m_priority));
357         }
358         if (!m_createExcerpt) {
359             result.put(EXCERPT, new Boolean JavaDoc(m_createExcerpt));
360         }
361         if (m_dontCheckPermissions) {
362             result.put(PERMISSIONS, new Boolean JavaDoc(!m_dontCheckPermissions));
363         }
364         return result;
365     }
366
367     /**
368      * Returns the configured document types of this index for the given resource path.<p>
369      *
370      * The result List contains Strings with the names of the document types.<p>
371      *
372      * @param path path of the folder
373      *
374      * @return the configured document types of this index for the given resource path
375      */

376     public List JavaDoc getDocumenttypes(String JavaDoc path) {
377
378         List JavaDoc documenttypes = null;
379         if (m_documenttypes != null) {
380             for (Iterator JavaDoc i = m_documenttypes.keySet().iterator(); i.hasNext();) {
381                 String JavaDoc key = (String JavaDoc)i.next();
382                 // NOTE: assumed that configured resource paths do not overlap, otherwise result is undefined
383
if (path.startsWith(key)) {
384                     documenttypes = (List JavaDoc)m_documenttypes.get(key);
385                     break;
386                 }
387             }
388         }
389         if (documenttypes == null) {
390             documenttypes = OpenCms.getSearchManager().getDocumentTypes();
391         }
392         return documenttypes;
393     }
394
395     /**
396      * Returns a new index writer for this index.<p>
397      *
398      * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated
399      *
400      * @return a new instance of IndexWriter
401      * @throws CmsIndexException if the index can not be opened
402      */

403     public IndexWriter getIndexWriter(boolean create) throws CmsIndexException {
404
405         IndexWriter indexWriter;
406         Analyzer analyzer = OpenCms.getSearchManager().getAnalyzer(m_locale);
407
408         try {
409             File JavaDoc f = new File JavaDoc(m_path);
410             if (f.exists()) {
411                 // index already exists
412
indexWriter = new IndexWriter(m_path, analyzer, create);
413             } else {
414                 // index does not exist yet
415
f = f.getParentFile();
416                 if (f != null && !f.exists()) {
417                     // create the parent folders if required
418
f.mkdirs();
419                 }
420                 indexWriter = new IndexWriter(m_path, analyzer, true);
421             }
422
423         } catch (Exception JavaDoc e) {
424             throw new CmsIndexException(
425                 Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, m_path, m_name),
426                 e);
427         }
428
429         return indexWriter;
430     }
431
432     /**
433      * Gets the langauge of this index.<p>
434      *
435      * @return the language of the index, i.e. de
436      */

437     public String JavaDoc getLocale() {
438
439         return m_locale;
440     }
441
442     /**
443      * Gets the name of this index.<p>
444      *
445      * @return the name of the index
446      */

447     public String JavaDoc getName() {
448
449         return m_name;
450     }
451
452     /**
453      * Returns the path where this index stores it's data in the "real" file system.<p>
454      *
455      * @return the path where this index stores it's data in the "real" file system
456      */

457     public String JavaDoc getPath() {
458
459         return m_path;
460     }
461
462     /**
463      * Gets the project of this index.<p>
464      *
465      * @return the project of the index, i.e. "online"
466      */

467     public String JavaDoc getProject() {
468
469         return m_project;
470     }
471
472     /**
473      * Get the rebuild mode of this index.<p>
474      *
475      * @return the current rebuild mode
476      */

477     public String JavaDoc getRebuildMode() {
478
479         return m_rebuild;
480     }
481
482     /**
483      * Returns all configured sources names of this search index.<p>
484      *
485      * @return a list with all configured sources names of this search index
486      */

487     public List JavaDoc getSourceNames() {
488
489         return m_sourceNames;
490     }
491
492     /**
493      * Returns all configured index sources of this search index.<p>
494      *
495      * @return all configured index sources of this search index
496      */

497     public List JavaDoc getSources() {
498
499         return m_sources;
500     }
501
502     /**
503      * @see java.lang.Object#hashCode()
504      */

505     public int hashCode() {
506
507         return m_name != null ? m_name.hashCode() : 0;
508     }
509
510     /**
511      * @see org.opencms.configuration.I_CmsConfigurationParameterHandler#initConfiguration()
512      */

513     public void initConfiguration() {
514
515         // noting to do here
516
}
517
518     /**
519      * Initializes the search index.<p>
520      *
521      * @throws CmsSearchException if the index source association failed
522      */

523     public void initialize() throws CmsSearchException {
524
525         if (!isEnabled()) {
526             // index is disabled, no initialization is required
527
return;
528         }
529
530         String JavaDoc sourceName = null;
531         CmsSearchIndexSource indexSource = null;
532         List JavaDoc searchIndexSourceDocumentTypes = null;
533         List JavaDoc resourceNames = null;
534         String JavaDoc resourceName = null;
535         m_sources = new ArrayList JavaDoc();
536
537         m_path = OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(
538             OpenCms.getSearchManager().getDirectory() + "/" + m_name);
539
540         for (int i = 0, n = m_sourceNames.size(); i < n; i++) {
541
542             try {
543                 sourceName = (String JavaDoc)m_sourceNames.get(i);
544                 indexSource = OpenCms.getSearchManager().getIndexSource(sourceName);
545                 m_sources.add(indexSource);
546
547                 resourceNames = indexSource.getResourcesNames();
548                 searchIndexSourceDocumentTypes = indexSource.getDocumentTypes();
549                 for (int j = 0, m = resourceNames.size(); j < m; j++) {
550
551                     resourceName = (String JavaDoc)resourceNames.get(j);
552                     m_documenttypes.put(resourceName, searchIndexSourceDocumentTypes);
553                 }
554             } catch (Exception JavaDoc e) {
555                 // mark this index as disabled
556
setEnabled(false);
557                 throw new CmsSearchException(Messages.get().container(
558                     Messages.ERR_INDEX_SOURCE_ASSOCIATION_1,
559                     sourceName), e);
560             }
561         }
562     }
563
564     /**
565      * Returns <code>true</code> if this index is currently disabled.<p>
566      *
567      * @return <code>true</code> if this index is currently disabled
568      */

569     public boolean isEnabled() {
570
571         return m_enabled;
572     }
573
574     /**
575      * Removes an index source from this search index.<p>
576      *
577      * @param sourceName the index source name to remove
578      */

579     public void removeSourceName(String JavaDoc sourceName) {
580
581         m_sourceNames.remove(sourceName);
582     }
583
584     /**
585      * Performs a search on the index within the given fields.<p>
586      *
587      * The result is returned as List with entries of type I_CmsSearchResult.<p>
588      * @param cms the current user's Cms object
589      * @param params the parameters to use for the search
590      * @param matchesPerPage the number of search results per page, or -1 to return all found documents in the search result
591      * @return the List of results found or an empty list
592      * @throws CmsSearchException if something goes wrong
593      */

594     public synchronized CmsSearchResultList search(CmsObject cms, CmsSearchParameters params, int matchesPerPage)
595     throws CmsSearchException {
596
597         long timeTotal = -System.currentTimeMillis();
598         long timeLucene;
599         long timeResultProcessing;
600
601         if (LOG.isDebugEnabled()) {
602             LOG.debug(Messages.get().getBundle().key(Messages.LOG_SEARCH_PARAMS_2, params, m_name));
603         }
604
605         CmsRequestContext context = cms.getRequestContext();
606         CmsProject currentProject = context.currentProject();
607
608         // the searcher to perform the operation in
609
IndexSearcher searcher = null;
610
611         // the hits found during the search
612
Hits hits;
613
614         // storage for the results found
615
CmsSearchResultList searchResults = new CmsSearchResultList();
616
617         int previousPriority = Thread.currentThread().getPriority();
618
619         try {
620
621             if (m_priority > 0) {
622                 // change thread priority in order to reduce search impact on overall system performance
623
Thread.currentThread().setPriority(m_priority);
624             }
625
626             // change the project
627
context.setCurrentProject(cms.readProject(m_project));
628
629             // complete the search root
630
String JavaDoc[] roots;
631             if ((params.getRoots() != null) && (params.getRoots().size() > 0)) {
632                 // add the site root to all the search root
633
roots = new String JavaDoc[params.getRoots().size()];
634                 for (int i = 0; i < params.getRoots().size(); i++) {
635                     roots[i] = cms.getRequestContext().addSiteRoot((String JavaDoc)params.getRoots().get(i));
636                 }
637             } else {
638                 // just use the site root as the search root
639
// this permits searching in indexes that contain content of other sites than the current selected one?!?!
640
roots = new String JavaDoc[] {cms.getRequestContext().getSiteRoot()};
641             }
642
643             timeLucene = -System.currentTimeMillis();
644
645             // the language analyzer to use for creating the queries
646
Analyzer languageAnalyzer = OpenCms.getSearchManager().getAnalyzer(m_locale);
647
648             // the main query to use, will be constructed in the next lines
649
BooleanQuery query = new BooleanQuery();
650
651             // implementation note:
652
// initially this was a simple PrefixQuery based on the DOC_PATH
653
// however, internally Lucene rewrote that to literally hundreds of BooleanQuery parts
654
// the following implementation will lead to just one Lucene PhraseQuery per directory and is thus much better
655
// cw/261006 - paths elements should not contain uppercase letters, otherwise searcher does not find the appropriate results
656
BooleanQuery pathQuery = new BooleanQuery();
657             for (int i = 0; i < roots.length; i++) {
658                 String JavaDoc[] paths = rootPathSplit(roots[i]);
659                 PhraseQuery phrase = new PhraseQuery();
660                 for (int j = 0; j < paths.length; j++) {
661                     Term term = new Term(I_CmsDocumentFactory.DOC_ROOT, paths[j].toLowerCase());
662                     phrase.add(term);
663                 }
664                 pathQuery.add(phrase, BooleanClause.Occur.SHOULD);
665             }
666             // add the calculated phrase query for the root path
667
query.add(pathQuery, BooleanClause.Occur.MUST);
668
669             if ((params.getCategories() != null) && (params.getCategories().size() > 0)) {
670                 // add query categories (if required)
671
BooleanQuery categoryQuery = new BooleanQuery();
672                 for (int i = 0; i < params.getCategories().size(); i++) {
673                     Term term = new Term(I_CmsDocumentFactory.DOC_CATEGORY, (String JavaDoc)params.getCategories().get(i));
674                     TermQuery termQuery = new TermQuery(term);
675                     categoryQuery.add(termQuery, BooleanClause.Occur.SHOULD);
676                 }
677                 query.add(categoryQuery, BooleanClause.Occur.MUST);
678             }
679
680             if ((params.getFields() != null) && (params.getFields().size() > 0)) {
681                 // this is a "regular" query over one or more fields
682
BooleanQuery fieldsQuery = new BooleanQuery();
683                 // add one sub-query for each of the selected fields, e.g. "content", "title" etc.
684
for (int i = 0; i < params.getFields().size(); i++) {
685                     QueryParser p = new QueryParser((String JavaDoc)params.getFields().get(i), languageAnalyzer);
686                     fieldsQuery.add(p.parse(params.getQuery()), BooleanClause.Occur.SHOULD);
687                 }
688                 // finally add the field queries to the main query
689
query.add(fieldsQuery, BooleanClause.Occur.MUST);
690             } else {
691                 // if no fields are provided, just use the "content" field by default
692
QueryParser p = new QueryParser(I_CmsDocumentFactory.DOC_CONTENT, languageAnalyzer);
693                 query.add(p.parse(params.getQuery()), BooleanClause.Occur.MUST);
694             }
695
696             // create the index searcher
697
searcher = new IndexSearcher(m_path);
698             Query finalQuery;
699
700             if (m_createExcerpt || LOG.isDebugEnabled()) {
701                 // we re-write the query because this enables highlighting of wildcard terms in excerpts
702
finalQuery = searcher.rewrite(query);
703             } else {
704                 finalQuery = query;
705             }
706             if (LOG.isDebugEnabled()) {
707                 LOG.debug(Messages.get().getBundle().key(Messages.LOG_BASE_QUERY_1, query));
708                 LOG.debug(Messages.get().getBundle().key(Messages.LOG_REWRITTEN_QUERY_1, finalQuery));
709
710             }
711
712             // collect the categories
713
CmsSearchCategoryCollector categoryCollector;
714             if (params.isCalculateCategories()) {
715                 // USE THIS OPTION WITH CAUTION
716
// this may slow down searched by an order of magnitude
717
categoryCollector = new CmsSearchCategoryCollector(searcher);
718                 // perform a first search to collect the categories
719
searcher.search(finalQuery, categoryCollector);
720                 // store the result
721
searchResults.setCategories(categoryCollector.getCategoryCountResult());
722             }
723
724             // perform the search operation
725
hits = searcher.search(finalQuery, params.getSort());
726
727             int hitCount = hits.length();
728
729             timeLucene += System.currentTimeMillis();
730             timeResultProcessing = -System.currentTimeMillis();
731
732             Document doc;
733             CmsSearchResult searchResult;
734             String JavaDoc excerpt = null;
735
736             if (hits != null) {
737                 int page = params.getSearchPage();
738                 int start = -1, end = -1;
739                 if (matchesPerPage > 0 && page > 0 && hitCount > 0) {
740                     // calculate the final size of the search result
741
start = matchesPerPage * (page - 1);
742                     end = start + matchesPerPage;
743                     // ensure that both i and n are inside the range of foundDocuments.size()
744
start = (start > hitCount) ? hitCount : start;
745                     end = (end > hitCount) ? hitCount : end;
746                 } else {
747                     // return all found documents in the search result
748
start = 0;
749                     end = hitCount;
750                 }
751
752                 int visibleHitCount = hitCount;
753                 for (int i = 0, cnt = 0; i < hitCount && cnt < end; i++) {
754                     try {
755                         doc = hits.doc(i);
756                         if (hasReadPermission(cms, doc)) {
757                             // user has read permission
758
if (cnt >= start) {
759                                 // do not use the resource to obtain the raw content, read it from the lucene document !
760
// documents must not have content (i.e. images), so check if the content field exists
761
if (m_createExcerpt && doc.getField(I_CmsDocumentFactory.DOC_CONTENT) != null) {
762                                     excerpt = getExcerpt(
763                                         doc.getField(I_CmsDocumentFactory.DOC_CONTENT).stringValue(),
764                                         finalQuery,
765                                         languageAnalyzer);
766                                 }
767                                 searchResult = new CmsSearchResult(Math.round(hits.score(i) * 100f), doc, excerpt);
768                                 searchResults.add(searchResult);
769                             }
770                             cnt++;
771                         } else {
772                             visibleHitCount--;
773                         }
774                     } catch (Exception JavaDoc e) {
775                         // should not happen, but if it does we want to go on with the next result nevertheless
776
if (LOG.isWarnEnabled()) {
777                             LOG.warn(Messages.get().getBundle().key(Messages.LOG_RESULT_ITERATION_FAILED_0), e);
778                         }
779                     }
780                 }
781
782                 // save the total count of search results at the last index of the search result
783
searchResults.setHitCount(visibleHitCount);
784             } else {
785                 searchResults.setHitCount(0);
786             }
787
788             timeResultProcessing += System.currentTimeMillis();
789
790         } catch (Exception JavaDoc exc) {
791             throw new CmsSearchException(Messages.get().container(Messages.ERR_SEARCH_PARAMS_1, params), exc);
792         } finally {
793
794             // re-set thread to previous priority
795
Thread.currentThread().setPriority(previousPriority);
796
797             if (searcher != null) {
798                 try {
799                     searcher.close();
800                 } catch (IOException JavaDoc exc) {
801                     // noop
802
}
803             }
804
805             // switch back to the original project
806
context.setCurrentProject(currentProject);
807         }
808
809         timeTotal += System.currentTimeMillis();
810
811         Object JavaDoc[] logParams = new Object JavaDoc[] {
812             new Integer JavaDoc(hits.length()),
813             new Long JavaDoc(timeTotal),
814             new Long JavaDoc(timeLucene),
815             new Long JavaDoc(timeResultProcessing)};
816         if (LOG.isDebugEnabled()) {
817             LOG.debug(Messages.get().getBundle().key(Messages.LOG_STAT_RESULTS_TIME_4, logParams));
818         }
819
820         return searchResults;
821     }
822
823     /**
824      * Can be used to enable / disable this index.<p>
825      *
826      * @param enabled the state of the index to set
827      */

828     public void setEnabled(boolean enabled) {
829
830         m_enabled = enabled;
831     }
832
833     /**
834      * Sets the locale to index resources.<p>
835      *
836      * @param locale the locale to index resources
837      */

838     public void setLocale(String JavaDoc locale) {
839
840         m_locale = locale;
841     }
842
843     /**
844      * Sets the logical key/name of this search index.<p>
845      *
846      * @param name the logical key/name of this search index
847      *
848      * @throws org.opencms.main.CmsIllegalArgumentException
849      * if the given name is null, empty or already taken
850      * by another search index.
851      */

852     public void setName(String JavaDoc name) throws CmsIllegalArgumentException {
853
854         if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
855             throw new CmsIllegalArgumentException(Messages.get().container(
856                 Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
857         } else {
858
859             // check if already used, but only if the name was modified:
860
// this is important as unmodifiable DisplayWidgets will also invoke this...
861
if (!name.equals(m_name)) {
862                 // don't mess with xml-configuration
863
if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
864                     // Not needed at startup and additionally getSearchManager may return null
865
Iterator JavaDoc itIdxNames = OpenCms.getSearchManager().getIndexNames().iterator();
866                     while (itIdxNames.hasNext()) {
867                         if (itIdxNames.next().equals(name)) {
868                             throw new CmsIllegalArgumentException(Messages.get().container(
869                                 Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1,
870                                 name));
871                         }
872                     }
873                 }
874             }
875         }
876
877         m_name = name;
878
879     }
880
881     /**
882      * Sets the name of the project used to index resources.<p>
883      *
884      * A duplicate method of <code>{@link #setProjectName(String)}</code> that allows
885      * to use instances of this class as a widget object (bean convention,
886      * cp.: <code>{@link #getProject()}</code>.<p>
887      *
888      * @param projectName the name of the project used to index resources
889      */

890     public void setProject(String JavaDoc projectName) {
891
892         setProjectName(projectName);
893     }
894
895     /**
896      * Sets the name of the project used to index resources.<p>
897      *
898      * @param projectName the name of the project used to index resources
899      */

900     public void setProjectName(String JavaDoc projectName) {
901
902         m_project = projectName;
903     }
904
905     /**
906      * Sets the rebuild mode of this search index.<p>
907      *
908      * @param rebuildMode the rebuild mode of this search index {auto|manual}
909      */

910     public void setRebuildMode(String JavaDoc rebuildMode) {
911
912         m_rebuild = rebuildMode;
913     }
914
915     /**
916      * Returns the name (<code>{@link #getName()}</code>) of this search index.<p>
917      *
918      * @return the name (<code>{@link #getName()}</code>) of this search index
919      *
920      * @see java.lang.Object#toString()
921      */

922     public String JavaDoc toString() {
923
924         return getName();
925     }
926
927     /**
928      * Returns an excerpt of the given content related to the given search query.<p>
929      *
930      * @param content the content
931      * @param searchQuery the search query
932      * @param analyzer the analyzer used
933      *
934      * @return an excerpt of the content
935      *
936      * @throws IOException if something goes wrong
937      */

938     protected String JavaDoc getExcerpt(String JavaDoc content, Query searchQuery, Analyzer analyzer) throws IOException JavaDoc {
939
940         if (content == null) {
941             return null;
942         }
943
944         CmsHighlightFinder highlighter = new CmsHighlightFinder(
945             OpenCms.getSearchManager().getHighlighter(),
946             searchQuery,
947             analyzer);
948
949         String JavaDoc excerpt = highlighter.getBestFragments(
950             content,
951             EXCERPT_FRAGMENT_SIZE,
952             EXCERPT_REQUIRED_FRAGMENTS,
953             EXCERPT_FRAGMENT_SEPARATOR);
954
955         // kill all unwanted chars in the excerpt
956
excerpt = excerpt.replace('\t', ' ');
957         excerpt = excerpt.replace('\n', ' ');
958         excerpt = excerpt.replace('\r', ' ');
959         excerpt = excerpt.replace('\f', ' ');
960
961         int maxLength = OpenCms.getSearchManager().getMaxExcerptLength();
962         if (excerpt != null && excerpt.length() > maxLength) {
963             excerpt = excerpt.substring(0, maxLength);
964         }
965
966         return excerpt;
967     }
968
969     /**
970      * Checks if the OpenCms resource referenced by the result document can be read
971      * be the user of the given OpenCms context.<p>
972      *
973      * @param cms the OpenCms user context to use for permission testing
974      * @param doc the search result document to check
975      * @return <code>true</code> if the user has read permissions to the resource
976      */

977     protected boolean hasReadPermission(CmsObject cms, Document doc) {
978
979         if (m_dontCheckPermissions) {
980             // no permission check is performed at all
981
return true;
982         }
983
984         Field typeField = doc.getField(I_CmsDocumentFactory.DOC_TYPE);
985         Field pathField = doc.getField(I_CmsDocumentFactory.DOC_PATH);
986         if ((typeField == null) || (pathField == null)) {
987             // permission check needs only to be performed for VFS documents that contain both fields
988
return true;
989         }
990
991         String JavaDoc rootPath = cms.getRequestContext().removeSiteRoot(pathField.stringValue());
992
993         // check if the resource "exits", this will implicitly check read permission and if the resource was deleted
994
return cms.existsResource(rootPath);
995     }
996 }
Popular Tags