KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > blandware > atleap > search > SearchManager


1 /*
2  * Copyright 2004 Blandware (http://www.blandware.com)
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package com.blandware.atleap.search;
17
18 import java.io.ByteArrayInputStream JavaDoc;
19 import java.io.File JavaDoc;
20 import java.io.IOException JavaDoc;
21 import java.io.StringReader JavaDoc;
22 import java.util.ArrayList JavaDoc;
23 import java.util.Calendar JavaDoc;
24 import java.util.Collection JavaDoc;
25 import java.util.Collections JavaDoc;
26 import java.util.Date JavaDoc;
27 import java.util.HashMap JavaDoc;
28 import java.util.Iterator JavaDoc;
29 import java.util.List JavaDoc;
30 import java.util.Locale JavaDoc;
31 import java.util.Map JavaDoc;
32
33 import javax.servlet.ServletContext JavaDoc;
34 import javax.servlet.http.HttpServletRequest JavaDoc;
35
36 import org.apache.commons.logging.Log;
37 import org.apache.commons.logging.LogFactory;
38 import org.apache.lucene.analysis.Analyzer;
39 import org.apache.lucene.analysis.StopAnalyzer;
40 import org.apache.lucene.analysis.TokenStream;
41 import org.apache.lucene.analysis.cjk.CJKAnalyzer;
42 import org.apache.lucene.analysis.cz.CzechAnalyzer;
43 import org.apache.lucene.analysis.de.GermanAnalyzer;
44 import org.apache.lucene.analysis.el.GreekAnalyzer;
45 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
46 import org.apache.lucene.analysis.nl.DutchAnalyzer;
47 import org.apache.lucene.analysis.ru.RussianAnalyzer;
48 import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
49 import org.apache.lucene.analysis.standard.StandardAnalyzer;
50 import org.apache.lucene.document.DateTools;
51 import org.apache.lucene.document.Document;
52 import org.apache.lucene.document.Field;
53 import org.apache.lucene.index.IndexReader;
54 import org.apache.lucene.index.IndexWriter;
55 import org.apache.lucene.index.Term;
56 import org.apache.lucene.misc.ChainedFilter;
57 import org.apache.lucene.queryParser.ParseException;
58 import org.apache.lucene.queryParser.QueryParser;
59 import org.apache.lucene.search.CachingWrapperFilter;
60 import org.apache.lucene.search.Filter;
61 import org.apache.lucene.search.Hits;
62 import org.apache.lucene.search.IndexSearcher;
63 import org.apache.lucene.search.Query;
64 import org.apache.lucene.search.QueryFilter;
65 import org.apache.lucene.search.RangeFilter;
66 import org.apache.lucene.search.TermQuery;
67 import org.apache.lucene.search.highlight.Highlighter;
68 import org.apache.lucene.search.highlight.QueryScorer;
69 import org.apache.lucene.search.highlight.SimpleFragmenter;
70 import org.apache.lucene.store.Directory;
71 import org.apache.lucene.store.FSDirectory;
72 import org.apache.struts.tiles.TilesUtil;
73 import org.springframework.context.ApplicationContext;
74 import org.springframework.web.context.support.WebApplicationContextUtils;
75
76 import com.blandware.atleap.common.Constants;
77 import com.blandware.atleap.common.parsers.PlainTextExtractor;
78 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException;
79 import com.blandware.atleap.common.parsers.exception.UnsupportedMimeTypeException;
80 import com.blandware.atleap.common.util.ConvertUtil;
81 import com.blandware.atleap.common.util.DateUtil;
82 import com.blandware.atleap.common.util.StringUtil;
83 import com.blandware.atleap.model.core.ActionPage;
84 import com.blandware.atleap.model.core.ContentDocument;
85 import com.blandware.atleap.model.core.ContentField;
86 import com.blandware.atleap.model.core.ContentFieldValue;
87 import com.blandware.atleap.model.core.ContentLocale;
88 import com.blandware.atleap.model.core.ContentPage;
89 import com.blandware.atleap.model.core.Layout;
90 import com.blandware.atleap.model.core.Page;
91 import com.blandware.atleap.search.analysis.NorwegianAnalyzer;
92 import com.blandware.atleap.search.analysis.PortugueseAnalyzer;
93 import com.blandware.atleap.search.analysis.SpanishAnalyzer;
94 import com.blandware.atleap.search.filters.RolesFilter;
95 import com.blandware.atleap.service.core.ContentResourceManager;
96 import com.blandware.atleap.service.core.LayoutManager;
97 import com.blandware.atleap.service.core.PageManager;
98 import com.blandware.atleap.webapp.struts.HeritableComponentDefinition;
99 import com.blandware.atleap.webapp.util.core.GlobalProperties;
100 import com.blandware.atleap.webapp.util.core.WebappConstants;
101 import com.blandware.atleap.webapp.util.core.WebappUtil;
102
103 /**
104  * <p>Manager to perform search operations</p>
105  * <p><a HREF="SearchManager.java.htm"><i>View Source</i></a></p>
106  * <p/>
107  *
108  * @author Andrey Grebnev <a HREF="mailto:andrey.grebnev@blandware.com">&lt;andrey.grebnev@blandware.com&gt;</a>
109  * @version $Revision: 1.40 $ $Date: 2006/03/16 11:09:39 $
110  */

111 public class SearchManager {
112
113     public static final String JavaDoc URI_FIELD = "uri";
114     public static final String JavaDoc BODY_FIELD = "body";
115     public static final String JavaDoc TITLE_FIELD = "title";
116     public static final String JavaDoc SUMMARY_FIELD = "summary";
117     public static final String JavaDoc MODIFIED_FIELD = "modified";
118     public static final String JavaDoc LOCALE_FIELD = "locale";
119     public static final String JavaDoc ROLES_FIELD = "roles";
120     public static final String JavaDoc TYPE_FIELD = "type";
121
122     public static final String JavaDoc PAGE_TYPE = "page";
123     public static final String JavaDoc DOCUMENT_TYPE = "document";
124
125     protected static final String JavaDoc SEARCH_DIR_KEY = "search.index.dir";
126
127     protected static final String JavaDoc HTML_MIMETYPE = "text/html";
128
129     protected static final int RESULT_FRAGMENT_SIZE = 60;
130     protected static final int RESULT_FRAGMENT_NUMBER = 3;
131     protected static final String JavaDoc RESULT_FRAGMENT_DELIMITER = "...";
132
133     protected static final String JavaDoc INSTANCE_KEY = "com.blandware.atleap.search.SearchManager.INSTANCE";
134
135     protected transient final Log log = LogFactory.getLog(SearchManager.class);
136
137     protected ServletContext JavaDoc servletContext = null;
138     protected File JavaDoc indexDir = null;
139
140     /**
141      * Flag to indicate whether the initialize finction was called for this SearchManager instance or not
142      */

143     protected boolean initialized = false;
144
145     /**
146      * Index reader used for search facilities
147      */

148     protected IndexReader searchIndexReader = null;
149
150     /**
151      * Index reader used for unindex facilities
152      */

153     protected IndexReader unindexIndexReader = null;
154
155     /**
156      * Index searcher
157      */

158     protected IndexSearcher indexSearcher = null;
159
160     /**
161      * Map of analyzers where key is locale
162      */

163     protected Map JavaDoc analyzers = null;
164
165     /**
166      * Application context
167      */

168     protected ApplicationContext applicationCtx = null;
169
170     /**
171      * This map contains Filters by locale key
172      */

173     protected HashMap JavaDoc localeFilters = new HashMap JavaDoc();
174
175     /**
176      * This map contains Filters by comma separated roles string
177      */

178     protected HashMap JavaDoc rolesFilters = new HashMap JavaDoc();
179
180
181     /**
182      * Returns instance of SearchManager
183      *
184      * @param servletContext servlet context
185      * @return Instance of SearchManager
186      */

187     public static SearchManager getInstance(ServletContext JavaDoc servletContext) {
188         SearchManager ourInstance = (SearchManager) servletContext.getAttribute(INSTANCE_KEY);
189         if ( ourInstance == null ) {
190             ourInstance = new SearchManager(servletContext);
191             servletContext.setAttribute(INSTANCE_KEY, ourInstance);
192         }
193         return ourInstance;
194     }
195
196     /**
197      * Private constructor
198      *
199      * @param servletContext servlet context
200      */

201     protected SearchManager(ServletContext JavaDoc servletContext) {
202         applicationCtx = WebApplicationContextUtils.getRequiredWebApplicationContext(servletContext);
203
204         analyzers = new HashMap JavaDoc();
205
206         String JavaDoc indexDirPath = GlobalProperties.getInstance(servletContext).getString(SEARCH_DIR_KEY);
207         if ( indexDirPath == null || indexDirPath.length() == 0 ) {
208             String JavaDoc error = "The property '" + SEARCH_DIR_KEY + "' must be specified";
209             if ( log.isErrorEnabled() ) {
210                 log.error(error);
211             }
212             throw new RuntimeException JavaDoc(error);
213         }
214
215         //make lock dir if it does not exist
216
File JavaDoc lockDir = new File JavaDoc(FSDirectory.LOCK_DIR);
217         if (!lockDir.exists()) {
218             boolean created = true;
219             try {
220                 created = lockDir.mkdirs();
221             } catch ( Exception JavaDoc ex ) {
222                 created = false;
223             }
224             if ( !created ) {
225                 if ( log.isErrorEnabled() ) {
226                     log.error("Cannot create dir " + lockDir.getAbsolutePath() + " for search index lock files");
227                 }
228             }
229         } else {
230             if (!lockDir.canWrite() || !lockDir.canRead()) {
231                 if ( log.isErrorEnabled() ) {
232                     log.error("The system has not permissions to write into " + lockDir.getAbsolutePath());
233                 }
234             }
235         }
236
237         //create or unlock index dir
238
indexDir = new File JavaDoc(indexDirPath);
239         this.servletContext = servletContext;
240         if ( indexDir.exists() ) {
241             try {
242                 Directory index = FSDirectory.getDirectory(indexDir, false);
243                 if ( IndexReader.isLocked(indexDir.getAbsolutePath()) ) {
244                     IndexReader.unlock(index);
245                 }
246             } catch ( IOException JavaDoc ex ) {
247                 if ( log.isErrorEnabled() ) {
248                     log.error("Cannot remove lock from search index " + indexDir.getAbsolutePath());
249                 }
250             }
251         } else {
252             boolean created = true;
253             try {
254                 created = indexDir.mkdirs();
255             } catch ( Exception JavaDoc ex ) {
256                 created = false;
257             }
258             if ( !created ) {
259                 if ( log.isErrorEnabled() ) {
260                     log.error("Cannot create dir " + indexDir.getAbsolutePath() + " for search index");
261                 }
262             }
263
264             if ( log.isInfoEnabled() ) {
265                 log.info("Search manager initialized");
266             }
267         }
268
269         //recreate index
270
IndexWriter indexWriter = null;
271         try {
272             indexWriter = getIndexWriter("", true);
273         } finally {
274             closeIndexWriter(indexWriter);
275         }
276     }
277
278     // P A G E S
279

280     /**
281      * Indexes any child of <code>Page</code> class
282      *
283      * @param page page to index
284      * @param request request
285      */

286     public synchronized void indexPage(Page page, HttpServletRequest JavaDoc request) {
287         initialize(request);
288         if ( page instanceof ContentPage ) {
289             indexContentPage((ContentPage) page, request);
290         } else if (page instanceof ActionPage) {
291             indexActionPage((ActionPage) page, request);
292         } else {
293             String JavaDoc roles = WebappUtil.rolesToString(page.getRoles());
294             indexPageFields(page.getContentFields(), page, roles);
295         }
296     }
297
298     /**
299      * Reindexes any child of <code>Page</code> class
300      *
301      * @param page page to reindex
302      * @param request request
303      */

304     public synchronized void reIndexPage(Page page, HttpServletRequest JavaDoc request) {
305         unIndexPage(page.getUri(), request);
306         indexPage(page, request);
307     }
308
309     /**
310      * Unindexes page by specified uri
311      *
312      * @param uri page uri by which to remove from index
313      * @param request request
314      */

315     public synchronized void unIndexPage(String JavaDoc uri, HttpServletRequest JavaDoc request) {
316         initialize(request);
317         unIndex(uri);
318     }
319
320     /**
321      * Indexes content page including fields from its layout and its parent layouts
322      *
323      * @param contentPage content page to index
324      * @param request request
325      */

326     protected synchronized void indexContentPage(ContentPage contentPage, HttpServletRequest JavaDoc request) {
327         Collection JavaDoc allFields = null;
328         List JavaDoc layouts = new ArrayList JavaDoc();
329         LayoutManager layoutManager = (LayoutManager) applicationCtx.getBean(Constants.LAYOUT_MANAGER_BEAN);
330         String JavaDoc tmpDefinition = contentPage.getLayout().getDefinition();
331         try {
332             do {
333                 Layout layout = layoutManager.findLayoutByDefinition(tmpDefinition);
334                 if ( layout != null ) {
335                     layouts.add(layout);
336                 }
337                 tmpDefinition = ((HeritableComponentDefinition) TilesUtil.getDefinition(tmpDefinition, request, servletContext)).getExtends();
338             } while ( tmpDefinition != null );
339
340         } catch ( Exception JavaDoc ex ) {
341             if ( log.isErrorEnabled() ) {
342                 log.error("Cannot traverse definitions", ex);
343             }
344         }
345         Collections.reverse(layouts);
346         for ( int i = 0; i < layouts.size(); i++ ) {
347             Layout layout = (Layout) layouts.get(i);
348             allFields = WebappUtil.joinFields(allFields, layout.getContentFields());
349         }
350         allFields = WebappUtil.joinFields(allFields, contentPage.getContentFields());
351
352         indexPageFields(allFields, contentPage, WebappUtil.rolesToString(contentPage.getRoles()));
353     }
354
355     /**
356      * Indexes action page
357      * @param actionPage action page to index
358      * @param request request
359      */

360     protected synchronized void indexActionPage(ActionPage actionPage, HttpServletRequest JavaDoc request) {
361         String JavaDoc roles = ConvertUtil.convertListToString(WebappUtil.getAPRoleNamesAsList(actionPage.getUri(), request), ",");
362         indexPageFields(actionPage.getContentFields(), actionPage, roles);
363     }
364
365     // D O C U M E N T R E S O U R C E
366

367     /**
368      * Indexes document resource
369      *
370      * @param resourceDocument document to index
371      * @param request request
372      */

373     public synchronized void indexDocument(ContentDocument resourceDocument, HttpServletRequest JavaDoc request) {
374         initialize(request);
375         byte[] resourceData = resourceDocument.getResourceData().getData();
376         String JavaDoc language = resourceDocument.getContentLocale().getIdentifier();
377
378         String JavaDoc plainText = null;
379         try {
380             plainText = new PlainTextExtractor().extract(new ByteArrayInputStream JavaDoc(resourceData), resourceDocument.getMimeType(), resourceDocument.getCharset());
381         } catch ( UnsupportedMimeTypeException ex ) {
382             if ( log.isErrorEnabled() ) {
383                 log.error("Unsupported mime type " + resourceDocument.getMimeType(), ex);
384             }
385         } catch ( PlainTextExtractorException ex ) {
386             if ( log.isErrorEnabled() ) {
387                 log.error("Cannot parse resource document with mimetype " + resourceDocument.getMimeType(), ex);
388             }
389         }
390
391         if ( plainText != null ) {
392             Integer JavaDoc summarySize = GlobalProperties.getInstance(request.getSession().getServletContext()).getInteger(WebappConstants.DOCUMENT_SUMMARY_SIZE_KEY, new Integer JavaDoc(400));
393             String JavaDoc summary = plainText.substring(0, Math.min(plainText.length(), summarySize.intValue()));
394
395             Document document = new Document();
396             document.add(new Field(URI_FIELD, resourceDocument.getUri(), Field.Store.YES, Field.Index.UN_TOKENIZED));
397             document.add(new Field(BODY_FIELD, plainText, Field.Store.NO, Field.Index.TOKENIZED));
398             document.add(new Field(MODIFIED_FIELD, DateTools.dateToString(resourceDocument.getLastUpdatedDatetime(), DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.UN_TOKENIZED));
399             document.add(new Field(TYPE_FIELD, DOCUMENT_TYPE, Field.Store.YES, Field.Index.UN_TOKENIZED));
400             document.add(new Field(LOCALE_FIELD, language, Field.Store.YES, Field.Index.UN_TOKENIZED));
401             document.add(new Field(SUMMARY_FIELD, summary, Field.Store.YES, Field.Index.NO));
402             String JavaDoc roles = WebappUtil.rolesToString(resourceDocument.getRoles());
403             if ( roles != null ) {
404                 document.add(new Field(ROLES_FIELD, roles, Field.Store.YES, Field.Index.NO));
405             }
406
407             String JavaDoc description = resourceDocument.getDescription();
408             if ( description != null && description.trim().length() > 0 ) {
409                 document.add(new Field(TITLE_FIELD, description, Field.Store.YES, Field.Index.TOKENIZED));
410             }
411
412             IndexWriter indexWriter = null;
413             try {
414                 indexWriter = getIndexWriter(language);
415
416                 if ( log.isDebugEnabled() ) {
417                     log.debug("Adding into index resource document: " + document + " for locale " + language);
418                 }
419                 //add document
420
indexWriter.addDocument(document);
421             } catch ( IOException JavaDoc ex ) {
422                 if ( log.isErrorEnabled() ) {
423                     log.error("Cannot add resource document into index ", ex);
424                 }
425             } finally {
426                 closeIndexWriter(indexWriter);
427             }
428         }
429     }
430
431     /**
432      * Reindexes document resource
433      *
434      * @param document document to reindex
435      * @param request request
436      */

437     public synchronized void reIndexDocument(ContentDocument document, HttpServletRequest JavaDoc request) {
438         unIndexDocument(document.getUri(), request);
439         indexDocument(document, request);
440     }
441
442     /**
443      * Unindexes document resource by specified uri
444      *
445      * @param uri resource URI by which to remove from index
446      * @param request request
447      */

448     public synchronized void unIndexDocument(String JavaDoc uri, HttpServletRequest JavaDoc request) {
449         initialize(request);
450         unIndex(uri);
451     }
452
453     // C O M M O N M E T H O D S
454

455     /**
456      * Indexes content of the fields, after it they will be available to search
457      *
458      * @param fields array of the <code>ContentField</code> objects
459      * @param page Page object
460      * @param roles comma separated list of roles
461      */

462     protected synchronized void indexPageFields(Collection JavaDoc fields, Page page, String JavaDoc roles) {
463         Map JavaDoc documents = new HashMap JavaDoc();
464         Map JavaDoc documentModifications = new HashMap JavaDoc();
465
466         if ( log.isDebugEnabled() ) {
467             log.debug("Indexing " + fields.size() + " fields for uri '" + page.getUri() + "'");
468         }
469
470         //iterate list of ContentFields
471
for ( Iterator JavaDoc iterator = fields.iterator(); iterator.hasNext(); ) {
472             ContentField contentField = (ContentField) iterator.next();
473             List JavaDoc contents = contentField.getContentFieldValues();
474             String JavaDoc identifier = contentField.getIdentifier();
475
476             //iterate list of ContentFieldValue
477
for ( int j = 0; j < contents.size(); j++ ) {
478                 ContentFieldValue contentFieldValue = (ContentFieldValue) contents.get(j);
479                 ContentLocale contentLocale = contentFieldValue.getContentLocale();
480                 String JavaDoc locale = contentLocale.getIdentifier();
481
482                 //convert to string
483
String JavaDoc value = null;
484                 if ( contentField.getType() == ContentField.LINE_TYPE ) {
485                     value = contentFieldValue.getSimpleValue();
486                 } else {
487                     value = ConvertUtil.convertToString(contentFieldValue.getValue(), Constants.DEFAULT_ENCODING);
488                 }
489                 if ( value == null ) {
490                     continue;
491                 }
492
493                 //looking up document for this locale in the map
494
Document document = (Document) documents.get(locale);
495                 //create if does not exist and add to the map
496
if ( document == null ) {
497                     document = new Document();
498                     documents.put(locale, document);
499                     document.add(new Field(URI_FIELD, page.getUri(), Field.Store.YES, Field.Index.UN_TOKENIZED));
500
501                     document.add(new Field(TYPE_FIELD, PAGE_TYPE, Field.Store.YES, Field.Index.UN_TOKENIZED));
502                     document.add(new Field(ROLES_FIELD, roles, Field.Store.YES, Field.Index.NO));
503                 }
504
505                 //put into map maximum date of modification
506
Date JavaDoc modificationDate = (Date JavaDoc) documentModifications.get(locale);
507                 if ( modificationDate == null ) {
508                     documentModifications.put(locale, (Date JavaDoc) contentFieldValue.getLastUpdatedDatetime());
509                     if ( log.isDebugEnabled() ) {
510                         log.debug("Add into documentModification locale-" + locale + ", date-" + contentFieldValue.getLastUpdatedDatetime());
511                     }
512                 } else if ( modificationDate.before((Date JavaDoc) contentFieldValue.getLastUpdatedDatetime()) ) {
513                     documentModifications.put(locale, (Date JavaDoc) contentFieldValue.getLastUpdatedDatetime());
514                     if ( log.isDebugEnabled() ) {
515                         log.debug("Update in documentModification locale-" + locale + ", date-" + contentFieldValue.getLastUpdatedDatetime());
516                     }
517                 }
518
519                 //add fields to document
520
if ( identifier.equalsIgnoreCase(ContentField.TITLE_IDENTIFIER) ) {
521                     document.add(new Field(BODY_FIELD, value, Field.Store.NO, Field.Index.TOKENIZED));
522                     document.add(new Field(TITLE_FIELD, value, Field.Store.YES, Field.Index.TOKENIZED));
523                 } else if ( identifier.equalsIgnoreCase(ContentField.KEYWORDS_IDENTIFIER) ) {
524                     String JavaDoc[] keywords = value.split(",");
525                     for ( int k = 0; k < keywords.length; k++ ) {
526                         //neither stored not tokenized, but indexed
527
document.add(new Field(BODY_FIELD, keywords[k], Field.Store.NO, Field.Index.UN_TOKENIZED));
528                     }
529                 } else if ( identifier.equalsIgnoreCase(ContentField.DESCRIPTION_IDENTIFIER) ) {
530                     document.add(new Field(BODY_FIELD, value, Field.Store.NO, Field.Index.TOKENIZED));
531                 } else {
532                     if ( ContentField.HTML_TYPE == contentField.getType() ) {
533                         try {
534                             String JavaDoc plainText = new PlainTextExtractor().extract(value, HTML_MIMETYPE);
535                             document.add(new Field(BODY_FIELD, plainText, Field.Store.NO, Field.Index.TOKENIZED));
536                         } catch ( Exception JavaDoc ex ) {
537                             if ( log.isErrorEnabled() ) {
538                                 log.error("The content cannot be parsed ", ex);
539                             }
540                         }
541                     } else {
542                         document.add(new Field(BODY_FIELD, value, Field.Store.NO, Field.Index.TOKENIZED));
543                     }
544                 }
545             } //end of FeildContent iteration
546
} //end of Feild iteration
547

548         //iterate locales and index document for each locale
549
Iterator JavaDoc localesIterator = documents.keySet().iterator();
550         while ( localesIterator.hasNext() ) {
551             String JavaDoc locale = (String JavaDoc) localesIterator.next();
552             Document document = (Document) documents.get(locale);
553             IndexWriter indexWriter = null;
554             try {
555                 indexWriter = getIndexWriter(locale);
556                 //add modification
557
Date JavaDoc modificationDate = (Date JavaDoc) documentModifications.get(locale);
558                 if ( modificationDate != null ) {
559                     document.add(new Field(MODIFIED_FIELD, DateTools.dateToString(modificationDate, DateTools.Resolution.SECOND), Field.Store.YES, Field.Index.UN_TOKENIZED));
560                 }
561
562                 //add locale
563
document.add(new Field(LOCALE_FIELD, locale, Field.Store.YES, Field.Index.UN_TOKENIZED));
564
565                 if ( log.isDebugEnabled() ) {
566                     log.debug("Adding into index document: " + document + " for locale " + locale);
567                 }
568
569                 //add document
570
indexWriter.addDocument(document);
571             } catch ( IOException JavaDoc ex ) {
572                 if ( log.isErrorEnabled() ) {
573                     log.error("Cannot add document into index ", ex);
574                 }
575             } finally {
576                 closeIndexWriter(indexWriter);
577             }
578         } //end locale iterator
579
}
580
581     /**
582      * Removes documents with specified uri from index
583      *
584      * @param uri module-relative uri
585      */

586     protected synchronized void unIndex(String JavaDoc uri) {
587         loadUnindexIndexReader();
588         Term term = new Term(URI_FIELD, uri);
589         if ( log.isDebugEnabled() ) {
590             log.debug("Try to unindex documents using term: " + term.toString());
591         }
592         try {
593             int number = unindexIndexReader.deleteDocuments(term);
594             if ( log.isDebugEnabled() ) {
595                 log.debug("Unindexed " + number + " documents using uri: " + uri);
596             }
597         } catch ( IOException JavaDoc ex ) {
598             if ( log.isErrorEnabled() ) {
599                 log.error("Cannot remove documents from index ", ex);
600             }
601         }
602     }
603
604     /**
605      * Unindexes all documents by locale
606      *
607      * @param locale locale for which documents must be unindexed
608      * @param request request
609      */

610     public synchronized void unIndexByLocale(String JavaDoc locale, HttpServletRequest JavaDoc request) {
611         initialize(request);
612         loadUnindexIndexReader();
613         Term term = new Term(LOCALE_FIELD, locale);
614         if ( log.isDebugEnabled() ) {
615             log.debug("Try to unindex documents using term: " + term.toString());
616         }
617         try {
618             int number = unindexIndexReader.deleteDocuments(term);
619             if ( log.isDebugEnabled() ) {
620                 log.debug("Unindexed " + number + " documents using locale: " + locale);
621             }
622         } catch ( IOException JavaDoc ex ) {
623             if ( log.isErrorEnabled() ) {
624                 log.error("Cannot remove documents from index ", ex);
625             }
626         }
627     }
628
629
630     /**
631      * Searches by query string and hightlights result only for current page
632      *
633      * @param hits hits list from previous page or null
634      * @param criteria query string
635      * @param locale current language
636      * @param allLanguages if <code>true</code> it means the search will be performed on all languages but using alanyzer on specified locale
637      * otherwise the search will be performed on only on specified one
638      * @param days if more that zero show result only for last N days, otherwise all
639      * @param roles comma separated list of roles of current user to restict results by roles
640      * @param offset offset for pagination
641      * @param pageSize pageSize for pagination
642      * @param request http request
643      * @return list of <code>Hit</code>
644      * @throws ParseException if there is an error during parsing of the criteria
645      * @throws IOException if there is a problem with writing or reading index
646      */

647     public synchronized List JavaDoc search(List JavaDoc hits, String JavaDoc criteria, String JavaDoc locale, boolean allLanguages, int days, String JavaDoc roles, int offset, int pageSize, HttpServletRequest JavaDoc request)
648             throws ParseException, IOException JavaDoc, java.text.ParseException JavaDoc {
649         initialize(request);
650
651         loadIndexSearcher();
652
653         QueryParser queryParser = new QueryParser(BODY_FIELD, getAnalyzer(locale));
654         Query query = queryParser.parse(criteria);
655
656         if ( hits == null ) {
657             if ( log.isDebugEnabled() ) {
658                 log.debug("Searching for: " + query.toString(BODY_FIELD));
659             }
660
661             List JavaDoc filters = new ArrayList JavaDoc();
662             //locale
663
if (!allLanguages) {
664                 QueryFilter localeFilter = (QueryFilter)localeFilters.get(locale);
665                 if (localeFilter == null) {
666                     localeFilter = new QueryFilter(new TermQuery(new Term(LOCALE_FIELD, locale)));
667                     localeFilters.put(locale, localeFilter);
668                 }
669                 filters.add(localeFilter);
670             }
671
672             //roles
673
filters.add(new CachingWrapperFilter(new RolesFilter(roles)));
674
675             //time period in days
676
if ( days > 0 ) {
677                 Calendar JavaDoc calendar = Calendar.getInstance();
678                 calendar.add(Calendar.DATE, -days);
679                 filters.add(new CachingWrapperFilter(RangeFilter.More(SearchManager.MODIFIED_FIELD, DateTools.dateToString(calendar.getTime(), DateTools.Resolution.SECOND))));
680             }
681
682             ChainedFilter chainedFilter = new ChainedFilter((Filter[])filters.toArray(new Filter[filters.size()]), ChainedFilter.AND);
683
684             hits = prepareHitsList(indexSearcher.search(query, chainedFilter), locale, request);
685         }
686
687         int limit = (offset + pageSize) > hits.size() ? hits.size() : (offset + pageSize);
688
689         //preparing search results only for requested page
690
for ( int i = offset; i < limit; i++ ) {
691             Hit hit = (Hit) hits.get(i);
692
693             //in order to do not do double work
694
if (hit.getHighlightedText() != null)
695                 continue;
696
697             String JavaDoc type = hit.getType();
698             //for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a rewritten query!
699
query = query.rewrite(searchIndexReader);
700             Highlighter highlighter = new Highlighter(new QueryScorer(query));
701             highlighter.setTextFragmenter(new SimpleFragmenter(RESULT_FRAGMENT_SIZE));
702
703             String JavaDoc documentLocale = hit.getLocale();
704             Analyzer documentAnalyzer = getAnalyzer(documentLocale);
705
706             if ( !DOCUMENT_TYPE.equalsIgnoreCase(type) ) {
707
708                 PageManager pageManager = (PageManager) applicationCtx.getBean(Constants.PAGE_MANAGER_BEAN);
709                 Page page = pageManager.findPageByUri(hit.getOriginalUri());
710                 if (page == null) {
711                     if(log.isWarnEnabled()) {
712                         log.warn("Search index is not synchronized with database. Page with uri=" + hit.getOriginalUri() + " was not found");
713                     }
714                     continue;
715                 }
716                 Collection JavaDoc fields = page.getContentFields();
717
718                 if ( fields != null && fields.size() > 0 ) {
719                     StringBuffer JavaDoc highlightedText = new StringBuffer JavaDoc("");
720                     for ( Iterator JavaDoc iterator = fields.iterator(); iterator.hasNext(); ) {
721                         ContentField contentField = (ContentField) iterator.next();
722                         //keywords cannot be shown in the result
723
if ( !contentField.getIdentifier().equalsIgnoreCase(ContentField.KEYWORDS_IDENTIFIER) ) {
724                             List JavaDoc fieldValues = contentField.getContentFieldValues();
725                             for ( int j = 0; j < fieldValues.size(); j++ ) {
726                                 ContentFieldValue contentFieldValue = (ContentFieldValue) fieldValues.get(j);
727                                 if ( contentFieldValue.getContentLocale().getIdentifier().equalsIgnoreCase(documentLocale) ) {
728                                     String JavaDoc content = null;
729                                     if ( contentField.getType() == ContentField.LINE_TYPE ) {
730                                         content = contentFieldValue.getSimpleValue();
731                                     } else {
732                                         content = ConvertUtil.convertToString(contentFieldValue.getValue(), Constants.DEFAULT_ENCODING);
733                                     }
734                                     if (content == null || content.trim().length() == 0)
735                                         continue;
736                                     if ( contentField.getIdentifier().equalsIgnoreCase(ContentField.TITLE_IDENTIFIER) ) {
737                                         content = StringUtil.htmlEncode(content);
738                                         TokenStream tokenStream = documentAnalyzer.tokenStream(null, new StringReader JavaDoc(content));
739                                         String JavaDoc fragment = highlighter.getBestFragment(tokenStream, content);
740                                         if ( fragment != null && fragment.length() > 0 ) {
741                                             hit.setTitle(fragment);
742                                         }
743                                     } else {
744                                         if ( ContentField.HTML_TYPE == contentField.getType() ) {
745                                             try {
746                                                 content = new PlainTextExtractor().extract(content, HTML_MIMETYPE);
747                                             } catch ( Exception JavaDoc ex ) {
748                                                 if ( log.isErrorEnabled() ) {
749                                                     log.error("The content cannot be parsed ", ex);
750                                                 }
751                                             }
752                                         } else {
753                                             content = StringUtil.htmlEncode(content);
754                                         }
755                                         TokenStream tokenStream = documentAnalyzer.tokenStream(null, new StringReader JavaDoc(content));
756                                         String JavaDoc fragment = highlighter.getBestFragments(tokenStream, content, RESULT_FRAGMENT_NUMBER, RESULT_FRAGMENT_DELIMITER);
757                                         if ( fragment != null && fragment.length() > 0 ) {
758                                             highlightedText.append(fragment).append(RESULT_FRAGMENT_DELIMITER);
759                                         }
760                                     }
761                                 }
762                             }
763                         }
764                     }
765
766                     String JavaDoc highlightedTextString = highlightedText.toString();
767                     if ( highlightedTextString != null && highlightedTextString.length() > 0 ) {
768                         if ( log.isDebugEnabled() ) {
769                             log.debug("Highlighted text: " + highlightedTextString);
770                         }
771                         hit.setHighlightedText(highlightedTextString);
772                     }
773                 }
774             } else /* DOCUMENT type */ {
775
776                 String JavaDoc summary = hit.getSummary();
777                 if ( summary != null ) {
778                     summary = StringUtil.htmlEncode(summary);
779                     TokenStream tokenStream = documentAnalyzer.tokenStream(null, new StringReader JavaDoc(summary));
780                     String JavaDoc fragment = highlighter.getBestFragments(tokenStream, summary, RESULT_FRAGMENT_NUMBER, RESULT_FRAGMENT_DELIMITER);
781                     if ( fragment != null && fragment.length() > 0 ) {
782                         if ( log.isDebugEnabled() ) {
783                             log.debug("Highlighted text: " + fragment);
784                         }
785                         hit.setHighlightedText(fragment);
786                     }
787                 }
788
789             }
790         }
791
792         return hits;
793     }
794
795     /**
796      * This method prepares list of <code>Hit</code> objects. It filters list by roles and setting up general properties
797      *
798      * @param hits hit list to prepare
799      * @param request request
800      * @return prepared and filtered list of <code>Hit</code> objects
801      */

802     protected synchronized List JavaDoc prepareHitsList(Hits hits, String JavaDoc locale, HttpServletRequest JavaDoc request) throws IOException JavaDoc, java.text.ParseException JavaDoc {
803         if ( log.isDebugEnabled() ) {
804             log.debug("Found " + hits.length() + " hits");
805         }
806
807         List JavaDoc result = new ArrayList JavaDoc(hits.length());
808         for ( int i = 0; i < hits.length(); i++ ) {
809             Document doc = hits.doc(i);
810             String JavaDoc type = doc.get(TYPE_FIELD);
811             String JavaDoc uri = doc.get(SearchManager.URI_FIELD);
812             String JavaDoc documentLocale = doc.get(LOCALE_FIELD);
813             if ( log.isDebugEnabled() ) {
814                 log.debug("Checking for roles hit with uri=" + uri + ", type=" + type);
815             }
816
817             Hit hit = new Hit();
818
819             //original uri
820
hit.setOriginalUri(uri);
821
822             //set document locale
823
hit.setLocale(documentLocale);
824
825             //uri
826
String JavaDoc fullUri;
827             if ( DOCUMENT_TYPE.equalsIgnoreCase(type) ) {
828                 fullUri = request.getContextPath() + uri;
829             } else {
830                 fullUri = WebappUtil.getActionMappingURL(uri, null, request, WebappConstants.URL_TYPE_DOMAIN_RELATIVE, documentLocale);
831             }
832             hit.setUri(fullUri);
833
834             //type
835
hit.setType(type);
836
837             //title
838
String JavaDoc title = doc.get(SearchManager.TITLE_FIELD);
839             if ( title == null || title.trim().length() == 0 ) {
840                 title = fullUri;
841             }
842             hit.setTitle(title);
843
844             //modified date
845
Date JavaDoc modifiedDate = DateTools.stringToDate(doc.get(SearchManager.MODIFIED_FIELD));
846             String JavaDoc date = DateUtil.formatDateTime(modifiedDate, new Locale JavaDoc(locale));
847             hit.setModified(date);
848
849             //setting score
850
int score = Math.round(hits.score(i) * 100.0F);
851             hit.setScore(score);
852
853             //summary (used only by document now)
854
hit.setSummary(doc.get(SUMMARY_FIELD));
855
856             result.add(hit);
857         }
858         return result;
859     }
860
861     // U T I L M E T H O D S
862

863     /**
864      * Loads index searcher
865      */

866     protected synchronized void loadIndexSearcher() {
867         loadSearchIndexReader();
868         indexSearcher = new IndexSearcher(searchIndexReader);
869     }
870
871
872     /**
873      * Closes indexSearcher
874      */

875     protected synchronized void closeIndexSearcher() {
876         try {
877             if ( indexSearcher != null ) {
878                 indexSearcher.close();
879                 indexSearcher = null;
880             }
881         } catch ( IOException JavaDoc ex ) {
882             //do nothing
883
}
884     }
885
886
887     /**
888      * Loads index reader for search
889      */

890     protected synchronized void loadSearchIndexReader() {
891         closeUnindexIndexReader();
892         if (searchIndexReader == null)
893             searchIndexReader = getIndexReader();
894     }
895
896
897     /**
898      * Closes indexReader for search
899      */

900     protected synchronized void closeSearchIndexReader() {
901         closeIndexSearcher();
902         try {
903             if ( searchIndexReader != null ) {
904                 searchIndexReader.close();
905                 searchIndexReader = null;
906             }
907         } catch ( IOException JavaDoc ex ) {
908             //do nothing
909
}
910     }
911
912     /**
913      * Loads index reader for unindex
914      */

915     protected synchronized void loadUnindexIndexReader() {
916         closeSearchIndexReader();
917         if (unindexIndexReader == null)
918             unindexIndexReader = getIndexReader();
919     }
920
921     /**
922      * Closes indexReader for unindex
923      */

924     protected synchronized void closeUnindexIndexReader() {
925         try {
926             if ( unindexIndexReader != null ) {
927                 unindexIndexReader.close();
928                 unindexIndexReader = null;
929                 optimizeIndex();
930             }
931         } catch ( IOException JavaDoc ex ) {
932             //do nothing
933
}
934     }
935
936     /**
937      * Gets index reader
938      */

939     protected synchronized IndexReader getIndexReader() {
940         IndexReader indexReader = null;
941         try {
942             indexReader = IndexReader.open(indexDir);
943         } catch ( IOException JavaDoc ex ) {
944             String JavaDoc error = "Cannot open index for read in " + indexDir.getAbsolutePath()
945                     + File.pathSeparatorChar
946                     + indexDir.getName();
947             if ( log.isErrorEnabled() ) {
948                 log.error(error, ex);
949             }
950             throw new RuntimeException JavaDoc(error, ex);
951         }
952         return indexReader;
953     }
954
955
956     /**
957      * Loads index writer
958      *
959      * @param locale language code
960      * @param create true to create the index or overwrite the existing one; false to append to the existing index
961      */

962     protected synchronized IndexWriter getIndexWriter(String JavaDoc locale, boolean create) {
963         if (!create) {
964             closeSearchIndexReader();
965             closeUnindexIndexReader();
966         }
967
968         IndexWriter indexWriter = null;
969         try {
970             indexWriter = new IndexWriter(indexDir, getAnalyzer(locale), create);
971         } catch ( IOException JavaDoc ex ) {
972             String JavaDoc error = "Cannot open index for write in " + indexDir.getAbsolutePath();
973             if ( log.isErrorEnabled() ) {
974                 log.error(error, ex);
975             }
976             throw new RuntimeException JavaDoc(error, ex);
977         }
978         return indexWriter;
979     }
980
981     /**
982      * Loads index writer, does not create/overwrite index
983      *
984      * @param locale language code
985      */

986     protected synchronized IndexWriter getIndexWriter(String JavaDoc locale) {
987         return getIndexWriter(locale, false);
988     }
989
990
991     /**
992      * Finalizes indexWriter
993      */

994     protected synchronized void closeIndexWriter(IndexWriter indexWriter) {
995         try {
996             if ( indexWriter != null ) {
997                 indexWriter.optimize();
998                 indexWriter.close();
999                 indexWriter = null;
1000            }
1001        } catch ( IOException JavaDoc ex ) {
1002            //do nothing
1003
}
1004    }
1005
1006    /**
1007     * Optimizes index
1008     */

1009    protected synchronized void optimizeIndex() {
1010        IndexWriter indexWriter = null;
1011        try {
1012            indexWriter = getIndexWriter("");
1013        } finally {
1014            closeIndexWriter(indexWriter);
1015        }
1016    }
1017
1018    /**
1019     * Gets Analyzer for particular locale
1020     *
1021     * @param locale language code
1022     * @return Analyzer
1023     */

1024    protected Analyzer getAnalyzer(String JavaDoc locale) {
1025        Analyzer analyzer = (Analyzer)analyzers.get(locale);
1026        if (analyzer != null)
1027            return analyzer;
1028
1029        //TODO Need to add stop words for some languages and maybe some other languages
1030
if ( "en".equalsIgnoreCase(locale) ) {
1031            analyzer = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS);
1032        } else if ( "ru".equalsIgnoreCase(locale) ) {
1033            analyzer = new RussianAnalyzer();
1034        } else if ( "es".equalsIgnoreCase(locale) ) {
1035            analyzer = new SpanishAnalyzer();
1036        } else if ( "de".equalsIgnoreCase(locale) ) {
1037            analyzer = new GermanAnalyzer();
1038        } else if ( "pt".equalsIgnoreCase(locale) ) {
1039            analyzer = new PortugueseAnalyzer();
1040        } else if ( "da".equalsIgnoreCase(locale) ) {
1041            analyzer = new SnowballAnalyzer("Danish");
1042        } else if ( "fi".equalsIgnoreCase(locale) ) {
1043            analyzer = new SnowballAnalyzer("Finnish");
1044        } else if ( "fr".equalsIgnoreCase(locale) ) {
1045            analyzer = new FrenchAnalyzer();
1046        } else if ( "it".equalsIgnoreCase(locale) ) {
1047            analyzer = new SnowballAnalyzer("Italian");
1048        } else if ( "no".equalsIgnoreCase(locale) ) {
1049// analyzer = new SnowballAnalyzer("Norwegian");
1050
analyzer = new NorwegianAnalyzer();
1051        } else if ( "sv".equalsIgnoreCase(locale) ) {
1052            analyzer = new SnowballAnalyzer("Swedish");
1053        } else if ( "nl".equalsIgnoreCase(locale) ) {
1054            analyzer = new DutchAnalyzer();
1055        } else if ( "ja".equalsIgnoreCase(locale) ) {
1056            analyzer = new CJKAnalyzer();
1057        } else if ( "ko".equalsIgnoreCase(locale) ) {
1058            analyzer = new CJKAnalyzer();
1059// } else if ( "zh".equalsIgnoreCase(locale) ) {
1060
// analyzer = new ChineseAnalyzer();
1061
} else if ( "zh".equalsIgnoreCase(locale) ) {
1062            analyzer = new CJKAnalyzer();
1063        } else if ( "cs".equalsIgnoreCase(locale) ) {
1064            analyzer = new CzechAnalyzer();
1065        } else if ( "el".equalsIgnoreCase(locale) ) {
1066            analyzer = new GreekAnalyzer();
1067        } else {
1068            analyzer = new StandardAnalyzer();
1069        }
1070
1071        analyzers.put(locale, analyzer);
1072
1073        return analyzer;
1074    }
1075
1076
1077    /**
1078     * Initializes empty index, in our case indexes all initial pages and resources
1079     * @param request
1080     */

1081    protected synchronized void initialize(HttpServletRequest JavaDoc request) {
1082        if (!initialized) {
1083            initialized = true;
1084            indexAll(request);
1085        }
1086    }
1087
1088    /**
1089     * Closes searcher and reader
1090     *
1091     * @throws Throwable
1092     */

1093    protected void finalize() throws Throwable JavaDoc {
1094        closeSearchIndexReader();
1095        closeUnindexIndexReader();
1096        super.finalize();
1097    }
1098
1099
1100    /**
1101     * Reindexes all entities.
1102     */

1103    public synchronized void reIndexAll(HttpServletRequest JavaDoc request) {
1104        PageManager pageManager = (PageManager) applicationCtx.getBean(Constants.PAGE_MANAGER_BEAN);
1105        Collection JavaDoc pages = pageManager.listPagesFetching();
1106
1107        ContentResourceManager resourceManager = (ContentResourceManager) applicationCtx.getBean(Constants.CONTENT_RESOURCE_MANAGER_BEAN);
1108        Collection JavaDoc resources = resourceManager.listContentDocuments(null);
1109
1110        //unindex
1111
for ( Iterator JavaDoc iterator = pages.iterator(); iterator.hasNext(); ) {
1112            Page page = (Page) iterator.next();
1113            unIndexPage(page.getUri(), request);
1114        }
1115        for ( Iterator JavaDoc iterator = resources.iterator(); iterator.hasNext(); ) {
1116            ContentDocument document = (ContentDocument) iterator.next();
1117            unIndexDocument(document.getUri(), request);
1118        }
1119
1120        //index
1121
for ( Iterator JavaDoc iterator = pages.iterator(); iterator.hasNext(); ) {
1122            Page page = (Page) iterator.next();
1123            indexPage(page, request);
1124        }
1125        for ( Iterator JavaDoc iterator = resources.iterator(); iterator.hasNext(); ) {
1126            ContentDocument document = (ContentDocument) iterator.next();
1127            indexDocument(document, request);
1128        }
1129
1130        if (log.isInfoEnabled()) {
1131            log.info("All pages and resources index rebuilt");
1132        }
1133    }
1134
1135    /**
1136     * Indexes all entities.
1137     *
1138     * @param request request which is being processed
1139     */

1140    public synchronized void indexAll(HttpServletRequest JavaDoc request) {
1141        PageManager pageManager = (PageManager) applicationCtx.getBean(Constants.PAGE_MANAGER_BEAN);
1142        //reindex pages
1143
Collection JavaDoc pages = pageManager.listPagesFetching();
1144        for ( Iterator JavaDoc iterator = pages.iterator(); iterator.hasNext(); ) {
1145            Page page = (Page) iterator.next();
1146            indexPage(page, request);
1147        }
1148
1149        // documents
1150
ContentResourceManager resourceManager = (ContentResourceManager) applicationCtx.getBean(Constants.CONTENT_RESOURCE_MANAGER_BEAN);
1151        Collection JavaDoc resources = resourceManager.listContentDocuments(null);
1152        for ( Iterator JavaDoc iterator = resources.iterator(); iterator.hasNext(); ) {
1153            ContentDocument document = (ContentDocument) iterator.next();
1154            indexDocument(document, request);
1155        }
1156    }
1157
1158
1159}
1160
Popular Tags