KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > services > search > JahiaSearchBaseService


1 //
2
// ____.
3
// __/\ ______| |__/\. _______
4
// __ .____| | \ | +----+ \
5
// _______| /--| | | - \ _ | : - \_________
6
// \\______: :---| : : | : | \________>
7
// |__\---\_____________:______: :____|____:_____\
8
// /_____|
9
//
10
// . . . i n j a h i a w e t r u s t . . .
11
//
12
//
13
// JahiaSearchService
14
// NK 25.01.2002 Implementation based on Lucene engine.
15
//
16
//
17
package org.jahia.services.search;
18
19
20 import java.io.*;
21 import java.lang.reflect.Method JavaDoc;
22 import java.util.*;
23
24 import org.apache.log4j.Logger;
25 import org.apache.lucene.analysis.*;
26 import org.apache.lucene.document.*;
27 import org.apache.lucene.index.*;
28 import org.apache.lucene.store.*;
29 import org.htmlparser.Parser;
30 import org.htmlparser.lexer.Lexer;
31 import org.jahia.content.*;
32 import org.jahia.data.fields.*;
33 import org.jahia.data.search.*;
34 import org.jahia.exceptions.*;
35 import org.jahia.params.*;
36 import org.jahia.registries.*;
37 import org.jahia.services.cache.*;
38 import org.jahia.services.fields.*;
39 import org.jahia.services.search.analyzer.*;
40 import org.jahia.services.sites.*;
41 import org.jahia.services.version.*;
42 import org.jahia.settings.*;
43 import org.jahia.utils.*;
44 import org.jahia.utils.fileparsers.FileExtractor;
45 import org.springframework.beans.factory.xml.XmlBeanFactory;
46 import org.springframework.beans.factory.BeanFactory;
47 import org.jahia.utils.fileparsers.FileExtractorsConfig;
48 import org.jahia.bin.Jahia;
49 import org.quartz.Scheduler;
50 import org.quartz.JobDataMap;
51 import org.quartz.SimpleTrigger;
52
53 /**
54  * Search Service based on Lucene engine.
55  * This service is a little unique, since the indexing is done by a background
56  * thread that executes the indexing orders. This is done because indexing is
57  * a slow task and there is no reason that the user should wait on it.
58  *
59  * @author Khue Nguyen <a HREF="mailto:khue@jahia.org">khue@jahia.org</a>
60  * @version 1.0
61  */

62 public class JahiaSearchBaseService extends JahiaSearchService implements
63         CacheListener, Runnable JavaDoc {
64
65     public static final String JavaDoc SEARCH_INDEX_ORDERS_CACHE = "SEARCH_INDEX_ORDER_CACHE";
66
67     public static final String JavaDoc FULL_SITE_INDEXATION_STATUS_CACHE = "FULL_SITE_INDEXATION_STATUS_CACHE";
68
69     public static final String JavaDoc RAM_INDEXER_DOCUMENT = "RAM_INDEXER_DOCUMENT";
70     
71     public static final String JavaDoc[] EMPTY_STOP_WORDS = new String JavaDoc[]{};
72
73     protected static Logger logger = Logger.getLogger (JahiaSearchBaseService.class);
74     
75     private static Map searchMethodMap = new HashMap(2);
76
77     /** The unique instance of this service * */
78     private static JahiaSearchBaseService theObject;
79
80     /** Constants * */
81     private static final String JavaDoc searchIndexesDir = "search_indexes";
82
83     /** The indexes root path * */
84     private String JavaDoc searchIndexesDiskPath = "";
85
86     /** The Lucene search analyzer * */
87     private Analyzer analyzer;
88
89     /** The Lucene analyzer used for search */
90     private Analyzer indexAnalyzer;
91
92     /** search handlers * */
93     private Properties searchHandlers;
94
95     private byte[] lock = new byte[0];
96
97     // Cache used as synchrinized Hub in a Load Balanced environment
98
private Cache indexOrdersCache;
99
100     // list of all removed/added fields to add/remove from the index in background
101
private Vector indexOrders; // les ordres de add/remove
102

103     private IndexationJobDetail scheduledIndexationJob;
104     private long scheduledIndexationInterval = 1000;
105     private long optimizationInterval = 120000;
106
107     private Thread JavaDoc backgroundIndexingThread;
108     private boolean indexingThreadActivated = true;
109
110     private boolean localIndexing = true;
111
112     private HashMap FileExtractors = new HashMap();
113
114     private String JavaDoc fileExtractorsConfigFilePath = "";
115
116     private BeanFactory fileExtractorsFactory = null;
117
118     private Properties searchSettings = null;
119
120     private Cache fullSiteIndexationStatusCache;
121
122     private RAMIndexer ramIndexer;
123     
124     private boolean autoAppendWildcards;
125
126     /**
127      * Constructor
128      * Client should always call getInstance() method
129      */

130     protected JahiaSearchBaseService () {
131         logger.debug ("***** Starting the Jahia Search Service *****");
132         String JavaDoc[] stopWord = getStopWords();
133         analyzer = new StandardAnalyzer (stopWord);
134         indexAnalyzer = new StandardAnalyzer (stopWord,true);
135         searchHandlers = new Properties ();
136         searchHandlers.setProperty (JahiaSearcher.PAGE_SEARCHER, "doPageSearch");
137         searchHandlers.setProperty (JahiaSearcher.CONTAINER_SEARCHER,
138                 "doContainerSearch");
139     }
140
141     //--------------------------------------------------------------------------
142
/**
143      * Returns the unique instance of this service.
144      */

145     public static JahiaSearchBaseService getInstance () {
146         if (theObject == null) {
147           synchronized (JahiaSearchBaseService.class) {
148             if (theObject == null) {
149               theObject = new JahiaSearchBaseService ();
150             }
151           }
152         }
153         return theObject;
154     }
155
156     //--------------------------------------------------------------------------
157
/**
158      * Initialization
159      *
160      * @param jSettings
161      */

162     public synchronized void init (SettingsBean jSettings)
163         throws JahiaInitializationException {
164
165         logger.debug("Jahia Main Thread name=" + Thread.currentThread().getName());
166
167         if ( !this.isInitialized() ){
168
169             indexOrders = new Vector();
170
171             loadSearchSettings(jSettings);
172
173             indexOrdersCache = CacheFactory.createCache(
174                 SEARCH_INDEX_ORDERS_CACHE);
175             indexOrdersCache.registerListener(this);
176
177             fullSiteIndexationStatusCache = CacheFactory.createCache(
178                 FULL_SITE_INDEXATION_STATUS_CACHE);
179
180             fullSiteIndexationStatusCache.registerListener(this);
181
182             File f = new File(searchIndexesDiskPath);
183             // Create lucene search index repository if not exists.
184
if (!f.isDirectory()) {
185                 f.mkdir();
186             }
187
188             // now let's remove any stale lock files if there were some.
189
removeStaleLockFiles(searchIndexesDiskPath);
190
191             // fileExtractors
192
StringBuffer JavaDoc buff = new StringBuffer JavaDoc(jSettings.getJahiaEtcDiskPath());
193             buff.append(File.separator);
194             buff.append("config");
195             buff.append(File.separator);
196             buff.append("fileextractor.xml");
197             this.fileExtractorsConfigFilePath = buff.toString();
198             try {
199                 loadFileExtractors();
200             } catch ( Throwable JavaDoc t ){
201                 throw new JahiaInitializationException("Error loading config file :"
202                                                         + this.fileExtractorsConfigFilePath, t);
203             }
204
205             mIsServiceInitialized = true;
206
207             // launch indexation Job
208
this.startScheduledIndexationJob();
209
210             if ( this.indexingThreadActivated ){
211                 backgroundIndexingThread = new Thread JavaDoc(theObject,
212                     "Background content indexing");
213                 int priority = Thread.NORM_PRIORITY;
214                 String JavaDoc val = searchSettings.getProperty("org.apache.lucene.backgroundIndexing.priority");
215                 if ( val != null){
216                     try {
217                         priority = Integer.parseInt(val);
218                     } catch ( NumberFormatException JavaDoc ex ){
219                       logger.warn("Invalid value for thread priority. Using Thread.NORM_PRIORITY", ex);
220                     }
221                 }
222                 backgroundIndexingThread.setPriority(priority);
223                 backgroundIndexingThread.setDaemon(true);
224                 backgroundIndexingThread.start(); // start background thread
225
logger.debug("backgroundIndexingThread name=" + backgroundIndexingThread.getName());
226             }
227             logger.debug("Initialized");
228         }
229     }
230
231     private void loadSearchSettings(SettingsBean jSettings) throws JahiaInitializationException{
232         searchSettings = (Properties)Jahia.getConfigBeanFactory().getBean("searchSettings");
233         if ( searchSettings == null ){
234             throw new JahiaInitializationException("searchSettings bean not found in config.xml");
235         }
236         String JavaDoc val = searchSettings.getProperty("inMemoryIndex.allowed","true");
237         if ( "true".equalsIgnoreCase(val) ){
238             int i = 1000;
239             try {
240                 i = Integer.parseInt(searchSettings
241                                      .getProperty("inMemoryIndex.maxDocs","1000"));
242             } catch ( Throwable JavaDoc t ){
243             }
244             this.ramIndexer = new RAMIndexer(indexAnalyzer,i);
245         }
246         try {
247             val = searchSettings.getProperty("optimizationInterval");
248             this.optimizationInterval = Integer.parseInt(val);
249         } catch ( NumberFormatException JavaDoc ex ){
250           logger.warn("Invalid integer value for optimizationInterval", ex);
251         }
252         if ( this.optimizationInterval < 10000 ){
253             this.optimizationInterval = 10000;
254         }
255
256         String JavaDoc key = null;
257         String JavaDoc value = null;
258         int mergeFactor = -1;
259         int minMergeDocs = -1;
260         Enumeration names = searchSettings.propertyNames();
261         while ( names.hasMoreElements() ) {
262             key = (String JavaDoc)names.nextElement();
263             if ( key.startsWith("org.apache.lucene") ){
264                 value = searchSettings.getProperty(key);
265                 if ( value != null && !"".equals(value.trim()) ){
266                     System.setProperty(key, value);
267                     if ("org.apache.lucene.mergeFactor".equals(key) ){
268                         try {
269                             mergeFactor = Integer.parseInt(value);
270                         } catch ( NumberFormatException JavaDoc ex ){
271                           logger.warn("Invalid integer value for org.apache.lucene.mergeFactor", ex);
272                         }
273                     }
274                     if ("org.apache.lucene.minMergeDocs".equals(key) ){
275                         try {
276                             minMergeDocs = Integer.parseInt(value);
277                         } catch ( NumberFormatException JavaDoc ex ){
278                           logger.warn("Invalid integer value for org.apache.lucene.minMergeDocs", ex);
279                         }
280                     }
281                 }
282             }
283         }
284         if ( mergeFactor == -1 ){
285             System.setProperty("org.apache.lucene.mergeFactor", String.valueOf(50));
286         }
287         if ( minMergeDocs == -1 ){
288             System.setProperty("org.apache.lucene.minMergeDocs", String.valueOf(1000));
289         }
290
291         val = searchSettings.getProperty("scheduledJobInterval");
292         if ( val != null ){
293             try {
294                 this.scheduledIndexationInterval = Long.parseLong(val);
295             } catch ( NumberFormatException JavaDoc ex) {
296               logger.warn("Invalid long value for scheduledJobInterval", ex);
297             }
298         }
299
300         val = searchSettings.getProperty("org.apache.lucene.searchIndexRootDir");
301         if ( val != null ){
302             val = JahiaTools.convertContexted (val,
303                     Jahia.getSettings().getPathResolver());
304             File f = new File(val);
305             if ( f.isAbsolute() ){
306                 searchIndexesDiskPath = val;
307             } else {
308                 if (val.startsWith("/")) {
309                     searchIndexesDiskPath = org.jahia.bin.Jahia.
310                         getStaticServletConfig()
311                         .getServletContext().getRealPath(val);
312                 } else {
313                     searchIndexesDiskPath = val;
314                 }
315             }
316         } else {
317             // the default
318
searchIndexesDiskPath = jSettings.getJahiaVarDiskPath()
319                 + File.separator
320                 + searchIndexesDir;
321         }
322         logger.debug("Search index Root Dir: " + searchIndexesDiskPath);
323
324         val = searchSettings.getProperty("org.apache.lucene.localIndexing");
325         if ( val != null && "0".equals(val)){
326             this.localIndexing = false;
327             this.indexingThreadActivated = false;
328             logger.debug("Local indexing disabled !!!!");
329         }
330         
331         autoAppendWildcards = doAutoAppendWildcards();
332     }
333
334     private void startScheduledIndexationJob(){
335
336         this.scheduledIndexationJob = new IndexationJobDetail("scheduledIndexation" + "_Job", Scheduler.DEFAULT_GROUP,
337                                   ScheduledIndexationJob.class);
338         JobDataMap jobDataMap = new JobDataMap();
339         this.scheduledIndexationJob.setJobDataMap(jobDataMap);
340         this.scheduledIndexationJob.setSleepInterval(this.scheduledIndexationInterval);
341
342         SimpleTrigger trigger = new SimpleTrigger("scheduledIndexation" + "_Trigger",
343                                     Scheduler.DEFAULT_GROUP);
344
345         try {
346             ServicesRegistry.getInstance().getSchedulerService().unscheduleJob(trigger.getName(),
347                     Scheduler.DEFAULT_GROUP);
348             ServicesRegistry.getInstance().getSchedulerService().scheduleJob(
349                 this.scheduledIndexationJob, trigger);
350         } catch (JahiaException je) {
351             logger.error("Error while scheduling search indexation", je);
352         }
353
354     }
355
356     private void loadFileExtractors() throws JahiaException {
357         java.io.InputStream JavaDoc is = null;
358         try {
359             is = new java.io.FileInputStream JavaDoc(this.fileExtractorsConfigFilePath);
360             this.fileExtractorsFactory = new XmlBeanFactory(is);
361             FileExtractorsConfig fec = (FileExtractorsConfig)
362                 this.fileExtractorsFactory.getBean("fileExtractorsConfig");
363             this.FileExtractors = (HashMap)fec.getFileExtractors().clone();
364         } catch ( Throwable JavaDoc t ){
365             throw new JahiaException("Error loading File Extractors config file",
366                                       "Error loading File Extractors config file",
367                                       JahiaException.CONFIG_ERROR, JahiaException.SERVICE_ERROR, t);
368         }
369     }
370
371     /**
372      * Returns the number of pending files indexing queue
373      * -1 on error
374      * @return the number of pending files indexing queue
375      * -1 on error
376      */

377     public int getNbOfFilesInQueue(){
378         try {
379             return this.scheduledIndexationJob != null ?
380               this.scheduledIndexationJob.getNbDocs() : 0;
381         } catch ( Throwable JavaDoc t ){
382             logger.warn(t);
383             return -1;
384         }
385     }
386
387     /**
388      * Returns the number of pending documents in the indexing queue
389      * -1 on error
390      * @return the number of pending documents in the indexing queue
391      * -1 on error
392      */

393     public int getNbDocumentsInQueue(){
394         try {
395             return this.indexOrders.size();
396         } catch ( Throwable JavaDoc t) {
397             logger.warn(t);
398             return -1;
399         }
400     }
401
402     /**
403      * Wake up the main index thread
404      */

405     public void notifyMainIndexThread(){
406         synchronized(lock){
407             lock.notifyAll();
408         }
409     }
410
411     //--------------------------------------------------------------------------
412
/**
413      * index afield
414      *
415      * @param indObj
416      */

417     public void addToSearchEngine (JahiaIndexableDocument indObj) {
418         if (indObj == null) {
419             return;
420         }
421         indObj.setBeAdded(true);
422         if (this.localIndexing) {
423              addToIndexOrder(indObj);
424         }
425         indexOrdersCache.put(getCacheKey(indObj), indObj);
426         addToRAMIndexer(indObj,indObj.getSiteId());
427         synchronized(lock){
428             lock.notifyAll();
429         }
430     }
431
432     private void addToRAMIndexer(JahiaIndexableDocument doc, int siteId){
433         if ( ramIndexer == null || doc == null ){
434             return;
435         }
436         if ( doc.isCacheableWithRAMIndexer() ){
437             Document luceneDoc = getLuceneDocument(doc);
438             if ( luceneDoc == null ){
439                 return;
440             }
441             luceneDoc.add(new Field(RAM_INDEXER_DOCUMENT,"1",Field.Store.YES, Field.Index.UN_TOKENIZED));
442             ramIndexer.addDoc(luceneDoc, siteId);
443         }
444     }
445
446     /**
447      * this method is should only be used by the ScheduledIndexationJob to return back a scheduled document in the indexation queue without sending it to the other clusters
448      *
449      * @param indObj
450      */

451     public void addScheduledDocumentToSearchEngine(JahiaIndexableDocument indObj) {
452         if (indObj == null) {
453             return;
454         }
455         indObj.setBeAdded(true);
456         if (this.localIndexing) {
457              addToIndexOrder(indObj);
458         }
459         synchronized(lock){
460             lock.notifyAll();
461         }
462     }
463
464     private String JavaDoc getCacheKey(JahiaIndexableDocument indObj){
465         return indObj.getKeyFieldName() + "_" + indObj.getKey() + "_" + (indObj.toBeAdded()?"1":"0");
466     }
467
468     private void addToIndexOrder(JahiaIndexableDocument indObj){
469         if ( indObj == null ){
470             return;
471         }
472
473        JahiaIndexableDocument doc = null;
474        int size = this.indexOrders.size();
475        Vector result = new Vector();
476        for ( int i=0; i<size; i++ ){
477            doc = (JahiaIndexableDocument)this.indexOrders.get(i);
478            if ( (indObj.toBeAdded() && doc.toBeAdded())
479                && indObj.getKeyFieldName().equals(doc.getKeyFieldName())
480                && indObj.getKey().equals(doc.getKey()) ){
481               // same doc, ignore old doc
482
} else if ( (indObj.toBeRemoved() && doc.toBeRemoved())
483               && indObj.getKeyFieldName().equals(doc.getKeyFieldName())
484               && indObj.getKey().equals(doc.getKey()) ){
485               // same doc, ignore old doc
486
} else {
487                result.add(doc);
488            }
489        }
490        result.add(indObj);
491        this.indexOrders = result;
492     }
493
494     //--------------------------------------------------------------------------
495
/**
496      * Remove a field from search engine.Remove all entry ( all lang,workflow...)
497      *
498      * @param JahiaField aField, the field to remove.
499      */

500     public synchronized void removeFromSearchEngine (JahiaIndexableDocument indObj) {
501         if (indObj == null)
502             return;
503         indObj.setBeAdded (false);
504         if (indObj.beforeRemovingFromSearchEngine ()) {
505             if (this.localIndexing) {
506                 addToIndexOrder(indObj);
507             }
508             indexOrdersCache.put(getCacheKey(indObj), indObj);
509             synchronized(lock){
510                 lock.notifyAll();
511             }
512         }
513     }
514
515     //--------------------------------------------------------------------------
516
/**
517      * Remove any document matching a field key from search index.
518      * The key field is by default JahiaSearchConstant.OBJECT_KEY
519      *
520      * @param siteId
521      * @param keyFieldValue, the key field value
522      */

523     public void removeFromSearchEngine (int siteId, String JavaDoc keyFieldValue) {
524
525         removeFromSearchEngine (siteId, JahiaSearchConstant.OBJECT_KEY,
526                 keyFieldValue);
527     }
528
529     //--------------------------------------------------------------------------
530
/**
531      * Remove any document matching a field key from search index
532      *
533      * @param siteId
534      * @param keyFieldName, name of key field for which to remove all documents from index
535      * @param keyFieldValue, the key field value
536      */

537     public synchronized void removeFromSearchEngine (int siteId,
538                                                      String JavaDoc keyFieldName,
539                                                      String JavaDoc keyFieldValue) {
540         if (keyFieldName == null || keyFieldValue == null)
541             return;
542
543         JahiaIndexableDocumentImpl doc =
544                 new JahiaIndexableDocumentImpl (siteId, keyFieldName,
545                         keyFieldValue, new Hashtable ());
546         doc.setBeAdded (false);
547         removeFromSearchEngine (doc);
548     }
549
550     public RAMIndexer getRAMIndexer(){
551         return ramIndexer;
552     }
553
554     //--------------------------------------------------------------------------
555
/**
556      * Perform a search for a given JahiaSearcher object.
557      *
558      * @param ParamBean jParams, to check read access.
559      *
560      * @return JahiaSearchResult, containing a vector of matching objects.
561      */

562     public JahiaSearchResult search (JahiaSearcher jSearcher, ParamBean jParams)
563             throws JahiaException {
564         logger.debug ("Started");
565         JahiaSearchResult result = null;
566
567         try {
568             Method JavaDoc theMethod = getSearchMethod(searchHandlers.getProperty (jSearcher.getName ()));
569
570             // the parameter values
571
Object JavaDoc args[] = {jSearcher, jParams};
572
573             result = (JahiaSearchResult) theMethod.invoke (new SearchEngine (this.
574                     analyzer, null), args);
575
576         } catch (Throwable JavaDoc t) {
577             logger.error ("Error in search :", t);
578         }
579
580         return result;
581     }
582
583     //--------------------------------------------------------------------------
584
/**
585      * Return a vector of matching pages.
586      * Perform a search for a given query ( valid lucene query ).
587      * The search is limited to the site returned by the param bean.
588      *
589      * @param String queryString, a valid lucene query string.
590      * @param ParamBean jParams, to check read access.
591      * @param LanguageCodes, in which language to search.
592      *
593      * @return JahiaSearchResult, containing a vector of matching page.
594      *
595      * @Deprecated Should use search method with PageSearcher instead.
596      */

597     public JahiaSearchResult doSearch (String JavaDoc queryString,
598                                        ParamBean jParams,
599                                        ArrayList languageCodes)
600             throws JahiaException {
601
602         PageSearcher pageSearcher = new PageSearcher (new int[]{jParams.getJahiaID()}, languageCodes);
603         JahiaSearchResult result = pageSearcher.search (
604                 queryString, jParams);
605         return result;
606     }
607
608     //--------------------------------------------------------------------------
609
/**
610      * Re-index a field.
611      *
612      * @param fieldID int
613      * @param stagingOnly boolean, if false, skip re-index active version
614      * @param jParams ParamBean
615      */

616     public void indexField (int fieldID,
617                                          boolean stagingOnly,
618                                          ParamBean jParams) {
619         indexField(fieldID,stagingOnly,jParams,false);
620    }
621
622    //--------------------------------------------------------------------------
623
/**
624     * Re-index a field.
625     *
626     * @param fieldID int
627     * @param stagingOnly boolean, if false, skip re-index active version
628     * @param jParams ParamBean
629     * @param allowInMemoryIndex boolean if true, index in in memory index ( temporary and limited momory-size index )
630     */

631    public synchronized void indexField (int fieldID,
632                                         boolean stagingOnly,
633                                         ParamBean jParams,
634                                         boolean allowInMemoryIndex ) {
635        try {
636            // init the JahiaContentFieldFacade
637

638            ArrayList localeList = new ArrayList ();
639            Vector siteLanguageSettings = jParams.getSite ().getLanguageSettings ();
640            if (siteLanguageSettings != null) {
641                for (int i = 0; i < siteLanguageSettings.size (); i++) {
642                    SiteLanguageSettings curSetting = (SiteLanguageSettings)
643                            siteLanguageSettings.elementAt (i);
644                    if (curSetting.isActivated ()) {
645                        Locale tempLocale = LanguageCodeConverters.
646                                languageCodeToLocale (curSetting.
647                                getCode ());
648                        localeList.add (tempLocale);
649                    }
650                }
651            }
652
653            JahiaContentFieldFacade jahiaContentFieldFacade =
654                    new JahiaContentFieldFacade (fieldID, LoadFlags.TEXTS, jParams,
655                            localeList, false);
656            Enumeration enumeration = jahiaContentFieldFacade.getFields ();
657            JahiaField aField = null;
658            while (enumeration.hasMoreElements ()) {
659                aField = (JahiaField) enumeration.nextElement ();
660                if ( stagingOnly && aField.getWorkflowState()
661                     == EntryLoadRequest.ACTIVE_WORKFLOW_STATE ){
662                    continue;
663                }
664
665                this.addFieldToSearchEngine (aField,
666                                             aField.getWorkflowState (),
667                                             allowInMemoryIndex);
668            }
669        } catch (Throwable JavaDoc t) {
670          logger.warn(t);
671        }
672    }
673
674    //--------------------------------------------------------------------------
675
/**
676     * Add a field to search engine.
677     * With updated field, you should remove it first from search engine before
678     * adding it again.
679     *
680     * @param aField JahiaField, the field to index.
681     * @param workflowState effective workflow state to use
682     */

683     public void addFieldToSearchEngine (JahiaField aField,
684                                                      int workflowState) {
685         addFieldToSearchEngine(aField, workflowState, false);
686     }
687
688     //--------------------------------------------------------------------------
689
/**
690      * Add a field to search engine.
691      * With updated field, you should remove it first from search engine before
692      * adding it again.
693      *
694      * @param aField JahiaField, the field to index
695      * @param workflowState effective workflow state to use
696      * @param allowInMemoryIndex boolean if true, index in in memory index ( temporary and limited momory-size index )
697      */

698      public synchronized void addFieldToSearchEngine (JahiaField aField,
699                                                       int workflowState,
700                                                       boolean allowInMemoryIndex) {
701          try {
702          if (aField == null)
703              return;
704
705          int wfState = workflowState;
706          if (wfState > EntryLoadRequest.ACTIVE_WORKFLOW_STATE) {
707              wfState = EntryLoadRequest.STAGING_WORKFLOW_STATE;
708          }
709
710             HashMap values = aField.getValuesForSearch();
711             if (values == null || values.size() == 0) {
712                 return;
713             }
714             Iterator iterator = values.keySet().iterator();
715             while (iterator.hasNext()) {
716                 String JavaDoc languageCode = (String JavaDoc) iterator.next();
717
718                 StringBuffer JavaDoc key = new StringBuffer JavaDoc(String.valueOf(aField
719                         .getID()));
720                 key.append("_");
721                 key.append(wfState);
722                 key.append("_");
723                 key.append(languageCode);
724
725                 AddedField addedField = new AddedField(aField, wfState,
726                         JahiaSearchConstant.FIELD_KEY, key.toString());
727                 addedField.setLanguageCode(languageCode);
728                 Object JavaDoc[] value = (Object JavaDoc[]) values.get(languageCode);
729                 if (FieldTypes.BIGTEXT == aField.getType()) {
730                     // SCSE: extract only text from HTML - EDE0167
731
BigTextFieldExtractor extractor = new BigTextFieldExtractor();
732                     for (int i = 0; i < value.length; i++) {
733                         if (i > 0)
734                             extractor.reset();
735                         new Parser(new Lexer((String JavaDoc) value[i]))
736                                 .visitAllNodesWith(extractor);
737                         value[i] = extractor.getExtractedText();
738                     }
739                 }
740                 addedField.setValues(value);
741                 if (addedField.beforeAddingToSearchEngine()) {
742                      if ( this.localIndexing ){
743                           addToIndexOrder(addedField);
744                      }
745                      addedField.setCacheableWithRAMIndexer(allowInMemoryIndex);
746                      addToRAMIndexer(addedField, addedField.getSiteId());
747                      indexOrdersCache.put(getCacheKey(addedField),addedField);
748                  }
749              }
750          } catch (Throwable JavaDoc t) {
751              logger.warn(t);
752          } finally {
753              synchronized(lock){
754                  lock.notifyAll();
755              }
756          }
757      }
758
759     // --------------------------------------------------------------------------
760
/**
761      * Remove a field from search engine.Remove all entry ( all
762      * lang,workflow...)
763      *
764      * @param JahiaField
765      * aField, the field to remove.
766      */

767     public synchronized void removeFieldFromSearchEngine (JahiaField aField) {
768         if (aField == null)
769             return;
770         RemovedField rField = new RemovedField (aField);
771         rField.setBeAdded (false);
772         if (rField.beforeRemovingFromSearchEngine ()) {
773             if ( this.localIndexing ){
774                  addToIndexOrder(rField);
775             }
776             indexOrdersCache.put (getCacheKey(rField),rField);
777             synchronized(lock){
778                 lock.notifyAll();
779             }
780         }
781     }
782
783     //--------------------------------------------------------------------------
784
/**
785      * Remove a field from search engine.Remove all entry ( all lang,workflow...)
786      *
787      * @param JahiaField aField, the field to remove.
788      */

789     public synchronized void removeFieldFromSearchEngine (ContentField
790             contentField) {
791         if (contentField == null)
792             return;
793
794         RemovedField rField = new RemovedField (contentField);
795         rField.setBeAdded (false);
796         if (rField.beforeRemovingFromSearchEngine ()) {
797             if ( this.localIndexing ){
798                  addToIndexOrder(rField);
799             }
800             indexOrdersCache.put (getCacheKey(rField),rField);
801             synchronized(lock){
802                 lock.notifyAll();
803             }
804         }
805     }
806
807     //--------------------------------------------------------------------------
808
/**
809      * Remove a field by it's wfs and language code.
810      * If the languageCode is ContentObject.SHARED_LANGUAGE, remove the field
811      * in the given workflow state for all languages
812      *
813      * @param siteId
814      * @param fieldId
815      * @param workflowState
816      * @param languageCode
817      */

818     public synchronized void removeFieldFromSearchEngine (int siteId,
819                                                           int fieldId,
820                                                           int workflowState,
821                                                           String JavaDoc languageCode) {
822
823         int wfState = workflowState;
824         if (wfState > EntryLoadRequest.ACTIVE_WORKFLOW_STATE) {
825             wfState = EntryLoadRequest.STAGING_WORKFLOW_STATE;
826         }
827         if (!ContentObject.SHARED_LANGUAGE.equals (languageCode)) {
828             removeFromSearchEngine (siteId, JahiaSearchConstant.FIELD_KEY,
829                     fieldId + "_" + wfState + "_" + languageCode);
830         } else {
831
832             JahiaSite site = null;
833             try {
834                 site = ServicesRegistry.getInstance().getJahiaSitesService().getSite(siteId);
835
836                 Vector siteLanguageSettings = site.getLanguageSettings ();
837                 if (siteLanguageSettings != null) {
838                     for (int i = 0; i < siteLanguageSettings.size (); i++) {
839                         SiteLanguageSettings curSetting = (SiteLanguageSettings)
840                                 siteLanguageSettings.elementAt (i);
841                             removeFieldFromSearchEngine(siteId,fieldId,workflowState,curSetting.getCode());
842                     }
843                 }
844             } catch (JahiaException je ){
845                 logger.debug("Error removing field fieldId=" + fieldId, je);
846             }
847         }
848     }
849
850     //--------------------------------------------------------------------------
851
/**
852      * Return the current site's indexation job detail if any.
853      *
854      * @param siteID the site ID
855      * @return the current site's indexation job detail if any
856      */

857     public SiteIndexationStatus getSiteIndexationStatus (int siteID){
858         SiteIndexationStatus job = (SiteIndexationStatus)fullSiteIndexationStatusCache.get(new Integer JavaDoc(siteID));
859         return job;
860     }
861
862     /**
863      * This method is used to reflect amy changes to all clusters.
864      *
865      * @param status
866      */

867     public void updateSiteIndexationStatus(SiteIndexationStatus status){
868         if ( status != null ){
869             fullSiteIndexationStatusCache.put(new Integer JavaDoc(status.getSiteId()),status);
870         }
871     }
872
873     //--------------------------------------------------------------------------
874
/**
875      * Abort site indexation.
876      *
877      * @param siteID
878      * @return <code>true</code>
879      */

880     public boolean abortSiteIndexation (int siteID) {
881
882         SiteIndexationStatus status = getSiteIndexationStatus(siteID);
883         if ( status != null ){
884             status.setShutdown(true);
885             this.updateSiteIndexationStatus(status);
886         }
887
888         boolean result = true;
889         try {
890             String JavaDoc triggerName = "siteScheduledIndexation_" + String.valueOf(siteID) + "_Trigger";
891             ServicesRegistry.getInstance().getSchedulerService().unscheduleJob(triggerName,
892                     Scheduler.DEFAULT_GROUP);
893         } catch (JahiaException je) {
894             logger.error("Error aborting site indexation job", je);
895             result = false;
896         }
897         return result;
898     }
899
900     //--------------------------------------------------------------------------
901
/**
902      * Re-index a full site.
903      *
904      * @param siteID
905      * @param jParams
906      * @param deleteIndexFirst
907      * @param abortPreviousJob
908      * @return result of the operation
909      */

910     public boolean indexSite (int siteID,
911                               ParamBean jParams,
912                               boolean deleteIndexFirst,
913                               boolean abortPreviousJob) {
914
915         logger.debug("calling Thread name=" + Thread.currentThread().getName());
916
917         if ( !this.localIndexing ){
918             logger.info("Ignore re-indexing the site because local indexing is disabled !");
919             return false;
920         }
921
922         SiteIndexationStatus status = (SiteIndexationStatus)
923                 fullSiteIndexationStatusCache.get(new Integer JavaDoc(siteID));
924         String JavaDoc triggerName = "siteScheduledIndexation_" + String.valueOf(siteID) + "_Trigger";
925
926         if ( abortPreviousJob ){
927             if ( status != null ){
928                 status.setShutdown(true);
929                 this.updateSiteIndexationStatus(status);
930             }
931             try {
932                 ServicesRegistry.getInstance().getSchedulerService().unscheduleJob(triggerName,
933                         Scheduler.DEFAULT_GROUP);
934             } catch (JahiaException je) {
935                 logger.error("Error aborting site indexation job", je);
936             }
937         } else {
938             if ( status != null && !status.isDone() && !status.isShutdown() ){
939                 // job still running
940
return true;
941             }
942         }
943
944         status = new SiteIndexationStatus(siteID);
945         this.updateSiteIndexationStatus(status);
946
947         ParamBean serJParams = null;
948         try {
949             serJParams = SerializableParamBean.getInstance(Jahia.getThreadParamBean());
950         } catch ( Throwable JavaDoc t) {
951             logger.debug("Exception creating serializable ParamBean",t);
952             return false;
953         }
954
955         SiteIndexationJobDetail job =
956                 new SiteIndexationJobDetail("siteScheduledIndexation_" + String.valueOf(siteID) + "_Trigger"
957                                             , Scheduler.DEFAULT_GROUP, ScheduledSiteIndexationJob.class);
958         job.setSiteId(siteID);
959         JobDataMap jobDataMap = new JobDataMap();
960         jobDataMap.put("jParams",serJParams);
961         job.setJobDataMap(jobDataMap);
962
963         try {
964             ServicesRegistry.getInstance().getSchedulerService().unscheduleJob(triggerName,
965                     Scheduler.DEFAULT_GROUP);
966         } catch (JahiaException je) {
967             logger.error("Error while scheduling site indexation siteId=" + siteID, je);
968         }
969         try {
970             SimpleTrigger trigger = new SimpleTrigger(triggerName,
971                                         Scheduler.DEFAULT_GROUP);
972             ServicesRegistry.getInstance().getSchedulerService().scheduleJob(
973                 job, trigger);
974         } catch (JahiaException je) {
975             logger.error("Error while scheduling site indexation siteId=" + siteID, je);
976         }
977
978         return true;
979     }
980
981     //--------------------------------------------------------------------------
982
/**
983      * Perform an index optimization for a given site.
984      *
985      * @param siteID
986      *
987      * @return boolean false on error.
988      */

989     public boolean optimizeIndex (int siteID) {
990
991         if ( !this.localIndexing ){
992             logger.info("Ignore optimizing search index because local indexing is disabled !");
993             return false;
994         }
995
996         IndexWriter writer = null;
997         boolean result = false;
998
999         try {
1000            JahiaSite site = ServicesRegistry.getInstance().getJahiaSitesService().getSite(siteID);
1001            String JavaDoc indexDirectory = composeSiteIndexDir (site);
1002            if ( !IndexReader.indexExists(indexDirectory) ){
1003                writer = getIndexWriter (siteID, this.indexAnalyzer, true);
1004            } else {
1005                writer = getIndexWriter (siteID, this.indexAnalyzer, false);
1006            }
1007            if (writer == null)
1008                return false;
1009            writer.setUseCompoundFile(true);
1010            writer.optimize ();
1011            result = true;
1012        } catch (Throwable JavaDoc t) {
1013            logger.error ("Error while optimizing index", t);
1014        } finally {
1015            closeIndexWriter (writer);
1016        }
1017        return result;
1018    }
1019
1020    //--------------------------------------------------------------------------
1021
/**
1022     * Return the full path to the directory containing the index for a given site.
1023     *
1024     * @param siteID the site ID
1025     *
1026     * @return String the site's index path, null if not exist.
1027     */

1028    public String JavaDoc getSiteIndex(int siteID) throws JahiaException
1029    {
1030      String JavaDoc path = null;
1031      JahiaSite site = ServicesRegistry.getInstance().getJahiaSitesService()
1032        .getSite(siteID);
1033      if (site != null)
1034      {
1035        String JavaDoc indexDir = composeSiteIndexDir(site);
1036        if (IndexReader.indexExists(indexDir))
1037          path = indexDir;
1038      }
1039      return path;
1040    }
1041
1042    // --------------------------------------------------------------------------
1043
/**
1044     * Return the full path to search indexes root directory.
1045     *
1046     * @return String the site's index path, null if not exist.
1047     */

1048    public String JavaDoc getSearchIndexRootDir ()
1049            throws JahiaException {
1050        return searchIndexesDiskPath;
1051    }
1052
1053    //--------------------------------------------------------------------------
1054
/**
1055     * Background task that handles the remove/add fields to search engine from
1056     * the queue
1057     */

1058    public void run () {
1059
1060        logger.debug("search service is running in Thread name=" + Thread.currentThread().getName());
1061
1062        if ( this.isInitialized() && this.localIndexing ){
1063            this.optimizeAllIndexes();
1064        }
1065
1066        SiteIndexer siteIndexer = null;
1067        Integer JavaDoc siteId = null;
1068        HashMap siteIndexers = new HashMap();
1069        long lastOptimizationTime = System.currentTimeMillis();
1070        Vector v = null;
1071        Vector validDocs = null;
1072        JahiaIndexableDocument doc = null;
1073        JahiaIndexableDocument doc2 = null;
1074        Iterator iterator = null;
1075        RemovedField remField = null;
1076        AddedField addField = null;
1077        long now = 0;
1078        while (this.isInitialized() && indexingThreadActivated) {
1079            now = System.currentTimeMillis();
1080            v = new Vector();
1081            doc = null;
1082
1083            synchronized (this) {
1084                for ( int i=0; i<indexOrders.size(); i++ ){
1085                    doc = (JahiaIndexableDocument)indexOrders.get(i);
1086                    if (this.scheduledIndexationJob != null
1087                      && doc.scheduled(this.scheduledIndexationJob)) {
1088                      continue;
1089                    }
1090                    if ( doc instanceof RemovedField ) {
1091                        remField = (RemovedField)doc;
1092                        for ( int j=0; j<v.size() ; j++ ){
1093                            doc2 = (JahiaIndexableDocument)v.get(j);
1094                            if ( doc2 != null && doc2 instanceof AddedField ){
1095                                addField = (AddedField)doc2;
1096                                if ( remField.getID() == addField.getID() ){
1097                                    // we are going to remove the same field, so we don't need to add it at all.
1098
v.setElementAt(null,j);
1099                                }
1100                            }
1101                        }
1102                        // process remove first
1103
v.insertElementAt(doc,0);
1104                    } else {
1105                        // process add at last
1106
v.add(doc);
1107                    }
1108                }
1109
1110                validDocs = new Vector();
1111                for ( int i=0; i<v.size() ; i++ ){
1112                    doc = (JahiaIndexableDocument)v.get(i);
1113                    if ( doc != null){
1114                        validDocs.add(doc);
1115                    }
1116                }
1117                v = null;
1118
1119                /*
1120                // 1.Separate docs that are going to be added or removed
1121                for ( int i=0; i<indexOrders.size(); i++ ){
1122                    doc = (JahiaIndexableDocument)indexOrders.get(i);
1123                    if ( doc.scheduled((IndexationJobDetail)this.scheduledIndexationJob) ){
1124                        continue;
1125                    }
1126                    if ( doc.toBeAdded() ){
1127                        toBeAdded.put(doc.getKeyFieldName() + "_" + doc.getKey(),new Integer(i));
1128                    } else {
1129                        toBeRemoved.put(doc.getKeyFieldName() + "_" + doc.getKey(),new Integer(i));
1130                    }
1131                    v.add(indexOrders.get(i));
1132                }
1133
1134                // 2. remove "to added" docs, if they are going to be removed
1135                iterator = toBeRemoved.keySet().iterator();
1136                key = null;
1137                addOrder = null;
1138                removeOrder = null;
1139                while ( iterator.hasNext() ){
1140                    key = (String)iterator.next();
1141                    addOrder = (Integer)toBeAdded.get(key);
1142                    if ( addOrder != null ){
1143                        removeOrder = (Integer)toBeRemoved.get(key);
1144                        if ( removeOrder.intValue() > addOrder.intValue() ){
1145                            // it would be great to use weight with objects to be removed or added
1146                            // ( removing a staging entry of a field is not as weight as removing the whole field )
1147                            v.setElementAt(null,addOrder.intValue());
1148                        }
1149                    }
1150                }
1151                */

1152
1153                indexOrders = new Vector();
1154            }
1155
1156            while (validDocs.size() > 0) {
1157                doc = null;
1158                if (validDocs.size() != 0) {
1159                    doc = (JahiaIndexableDocument)
1160                        validDocs.elementAt(0);
1161                    validDocs.remove(0);
1162                }
1163
1164                // okay now we have the next added/removed field, we process it!
1165
if (doc != null) {
1166
1167                    siteId = new Integer JavaDoc(doc.getSiteId());
1168
1169                    siteIndexer = (SiteIndexer)siteIndexers.get(siteId);
1170                    try {
1171                        if ( siteIndexer == null ){
1172                            siteIndexer = new SiteIndexer(siteId.intValue(),500);
1173                            siteIndexers.put(siteId,siteIndexer);
1174                        }
1175                    } catch ( Throwable JavaDoc t ){
1176                        logger.debug("Error occured indexing in background", t);
1177                    }
1178                    if ( siteIndexer != null ) {
1179                        try {
1180                            siteIndexer.addDocument(doc);
1181                        } catch ( Throwable JavaDoc t ){
1182                            logger.debug("Error addind document to SiteIndexer siteId="
1183                                    + siteId.intValue(), t);
1184                        }
1185                    }
1186                    this.indexOrdersCache.remove(getCacheKey(doc));
1187                }
1188            }
1189
1190            iterator = siteIndexers.values().iterator();
1191            while (iterator.hasNext()) {
1192                siteIndexer = (SiteIndexer) iterator.next();
1193                try {
1194                    siteIndexer.storeInPersistance();
1195                } catch (Throwable JavaDoc t) {
1196                    logger.debug(
1197                        "Error calling storeInPersistance on siteIndexer",
1198                        t);
1199                }
1200            }
1201
1202            //SCSE: this timing means nothing, because indexing is done in background now
1203
/*
1204            long indexingElapsedTime = now - indexingStartTime;
1205            if (logger.isInfoEnabled() && indexOrderCount>0 ) {
1206                logger.info(
1207                    "Finished processing " + indexOrderCount +
1208                    " indexing orders in " + indexingElapsedTime + "ms.");
1209            }
1210            */

1211            
1212            if ( ( now - lastOptimizationTime) > this.optimizationInterval ) {
1213                optimizeAllIndexes();
1214                lastOptimizationTime = now;
1215            }
1216
1217            // FIXME : oops, As we are in a separate thread, this would may have sence
1218
// to terminate Connection ?
1219
//org.jahia.services.database.ConnectionDispenser.
1220
// terminateConnection ();
1221

1222            int size = 0;
1223            synchronized (this) {
1224                size = indexOrders.size();
1225            }
1226            if (size == 0) {
1227                synchronized (lock) {
1228                    try {
1229                        lock.wait(); // wait for next notify
1230
}
1231                    catch (InterruptedException JavaDoc ie) {
1232                    }
1233                }
1234            }
1235        }
1236    }
1237
1238    //**************************************************************************
1239
//
1240
// Private section
1241
//
1242
//**************************************************************************
1243

1244    private void optimizeAllIndexes() {
1245        try {
1246            Enumeration sites = ServicesRegistry.getInstance().getJahiaSitesService().getSites();
1247            JahiaSite site = null;
1248            while ( sites.hasMoreElements() ){
1249                site = (JahiaSite)sites.nextElement();
1250                this.optimizeIndex(site.getID());
1251            }
1252        } catch ( Throwable JavaDoc t) {
1253            logger.debug("Exception occured when trying to optimize all indexes ",t);
1254        }
1255    }
1256
1257    //--------------------------------------------------------------------------
1258
/**
1259     * index a JahiaIndexable object
1260     *
1261     * @param indObj
1262     * @param writer, if null, a new ramWriter will be opened.
1263     */

1264    private void index (JahiaIndexableDocument indObj, IndexWriter writer, IndexReader reader) {
1265
1266        if (indObj == null)
1267            return;
1268
1269        long startTime = System.currentTimeMillis();
1270
1271        Document doc = this.getLuceneDocument(indObj);
1272        if ( doc == null ){
1273            return;
1274        }
1275
1276        // first remove previous entry
1277
Term term = new Term (indObj.getKeyFieldName (), indObj.getKey ());
1278        removeDoc (indObj.getSiteId (), term, reader);
1279
1280        startTime = System.currentTimeMillis();
1281
1282        IndexWriter ramWriter = null;
1283        RAMDirectory ramDir = null;
1284        boolean closeWriter = false;
1285        try {
1286
1287            startTime = System.currentTimeMillis();
1288
1289            ramDir = new RAMDirectory();
1290            ramWriter = new IndexWriter(ramDir, this.indexAnalyzer, true);
1291
1292            // Try to get the site's index if any.
1293
if (writer == null) {
1294                writer = getIndexWriter (indObj.getSiteId (),
1295                        this.indexAnalyzer, false);
1296
1297                // If the site's index doesn't exist. Create it.
1298
if (writer == null)
1299                    writer = getIndexWriter (indObj.getSiteId (),
1300                            this.indexAnalyzer, true);
1301
1302                if (writer == null) {
1303                    logger.warn ("The index ramWriter is null, abort indexing the object");
1304                    return;
1305                }
1306                closeWriter = true;
1307            }
1308
1309            ramWriter.addDocument(doc);
1310
1311            ramWriter.close();
1312
1313            writer.addIndexes(new Directory[] { ramDir });
1314        } catch (Throwable JavaDoc t) {
1315            logger.error ("Error while indexing object " + indObj.getKey () + ":", t);
1316        } finally {
1317            if (closeWriter) {
1318                closeIndexWriter (writer);
1319            }
1320            ramDir = null;
1321        }
1322
1323        if (logger.isInfoEnabled()) {
1324            logger.debug(
1325                "Indexing document : " + indObj.getKey() + " in "
1326                + (System.currentTimeMillis() - startTime) + " ms.");
1327        }
1328    }
1329
1330
1331    //--------------------------------------------------------------------------
1332
/**
1333     * Return a ready for indexation lucene Document
1334     *
1335     * @param indObj
1336     */

1337    protected Document getLuceneDocument (JahiaIndexableDocument indObj)
1338    {
1339
1340        if (indObj == null)
1341            return null;
1342
1343        StringBuffer JavaDoc freeText = new StringBuffer JavaDoc();
1344
1345        Document doc = new Document ();
1346        Locale locale = Locale.getDefault();
1347        String JavaDoc[] langCodes = (String JavaDoc[])indObj.getFields()
1348            .get(JahiaSearchConstant.FIELD_LANGUAGE_CODE);
1349        String JavaDoc langCode = null;
1350        if ( langCodes != null && langCodes.length>0 ){
1351            langCode = langCodes[0];
1352        }
1353        if ( langCodes != null && !ContentObject.SHARED_LANGUAGE.equals(langCode) ){
1354            locale = org.jahia.utils.LanguageCodeConverters.languageCodeToLocale(langCode);
1355        } else {
1356            locale = null;
1357        }
1358        Hashtable attributes = indObj.getFields ();
1359        if (attributes != null && attributes.size () > 0) {
1360            Enumeration keys = attributes.keys ();
1361            String JavaDoc[] vals = null;
1362            int count = 0;
1363            while (keys.hasMoreElements ()) {
1364                String JavaDoc key = (String JavaDoc) keys.nextElement ();
1365                vals = (String JavaDoc[]) attributes.get (key);
1366                count = vals.length;
1367                for (int i = 0; i < count; i++) {
1368                    String JavaDoc val = vals[i];
1369                    freeText.append(val);
1370                    freeText.append(" ");
1371                    if ( locale != null && val != null ){
1372                        val = val.toLowerCase(locale);
1373                    }
1374                    if (!indObj.isFieldUnStored (key)) {
1375                        doc.add (new Field(key, val, Field.Store.YES, Field.Index.UN_TOKENIZED));
1376                    } else {
1377                        doc.add (new Field(key, val, Field.Store.NO, Field.Index.TOKENIZED));
1378                    }
1379                }
1380            }
1381        }
1382        doc.add (new Field(indObj.getKeyFieldName (),
1383                indObj.getKey (), Field.Store.YES, Field.Index.UN_TOKENIZED));
1384        doc.add(new Field(JahiaSearchConstant.FIELD_TEXT, freeText.toString(), Field.Store.NO, Field.Index.TOKENIZED));
1385
1386        return doc;
1387    }
1388
1389    //--------------------------------------------------------------------------
1390
/**
1391     * Indexes a field with a given IndexWriter
1392     * Don't forget to close the index ramWriter to flush change to the index file!
1393     *
1394     * @param JahiaField aField, the field to index.
1395     * @param workflowState
1396     * @param IndexWriter ramWriter, the index ramWriter to use.
1397     */

1398    private void indexField (JahiaField aField, int workflowState,
1399                             IndexWriter writer, IndexReader reader)
1400            throws JahiaException {
1401
1402        if (aField == null || aField.getValuesForSearch () == null)
1403            return;
1404
1405        int wfState = workflowState;
1406        if (wfState > EntryLoadRequest.ACTIVE_WORKFLOW_STATE) {
1407            wfState = EntryLoadRequest.STAGING_WORKFLOW_STATE;
1408        }
1409
1410        try {
1411            HashMap values = aField.getValuesForSearch ();
1412            if (values == null || values.size () == 0) {
1413                return;
1414            }
1415            Iterator iterator = values.keySet ().iterator ();
1416            while (iterator.hasNext ()) {
1417                String JavaDoc languageCode = (String JavaDoc) iterator.next ();
1418
1419                StringBuffer JavaDoc key = new StringBuffer JavaDoc (String.valueOf (aField.getID()));
1420                key.append ("_");
1421                key.append (wfState);
1422                key.append ("_");
1423                key.append (languageCode);
1424
1425                AddedField addedField = new AddedField (aField, wfState,
1426                        JahiaSearchConstant.FIELD_KEY, key.toString());
1427                addedField.setLanguageCode (languageCode);
1428                Object JavaDoc[] value = (Object JavaDoc[]) values.get (languageCode);
1429                addedField.setValues (value);
1430                index (addedField, writer, reader);
1431            }
1432        } catch (Throwable JavaDoc t) {
1433            logger.error ("Error while indexing field " + aField.getID () + ":", t);
1434        }
1435    }
1436
1437    //**************************************************************************
1438
//
1439
// Private section
1440
//
1441
//**************************************************************************
1442

1443
1444    //--------------------------------------------------------------------------
1445
/**
1446     * Add an object into the search engine index in background
1447     */

1448    private void backgroundAddObjectToSearchEngine (JahiaIndexableDocument indObj,
1449                                                    IndexWriter writer,
1450                                                    IndexReader reader) {
1451        if (indObj == null)
1452            return;
1453
1454        index (indObj, writer, reader);
1455    }
1456
1457    //--------------------------------------------------------------------------
1458
/**
1459     * Remove an object from search engine in background
1460     */

1461    private void backgroundRemoveObjectFromSearchEngine (JahiaIndexableDocument indObj,
1462        IndexReader reader) {
1463
1464        // Create a term with the object key unique identifier.
1465
Term term = new Term (indObj.getKeyFieldName (),
1466                String.valueOf (indObj.getKey ()));
1467
1468        removeDoc (indObj.getSiteId (), term, reader);
1469        //this.ramIndexer.removeDoc(term,indObj.getSiteId());
1470
}
1471
1472    //--------------------------------------------------------------------------
1473
/**
1474     * Remove a doc containing a given term
1475     *
1476     *
1477     * @param siteID
1478     * @param term
1479     * @param reader optional, use it if not null
1480     */

1481    private void removeDoc (int siteID, Term term, IndexReader reader) {
1482
1483        boolean closeReader = false;
1484        try {
1485
1486            // Try to get the site's index if any.
1487
if ( reader == null ){
1488                closeReader = true;
1489                reader = getIndexReader(siteID);
1490            }
1491
1492            if (reader == null)
1493                return;
1494
1495            // Remove all documents containing the term.
1496
reader.deleteDocuments (term);
1497
1498            // logger.debug( "Field removed :" + aField.getID() + " , "
1499
// + String.valueOf(nbDeleted) + " Doc deleted ");
1500
} catch (Throwable JavaDoc t) {
1501            logger.error ("Error while removing doc for site " + siteID, t);
1502        } finally {
1503            if ( closeReader ){
1504                closeIndexReader(reader);
1505            }
1506        }
1507    }
1508
1509    //--------------------------------------------------------------------------
1510
/**
1511     * Returns the IndexWriter for a given site.
1512     * Don't forget to close the returned index ramWriter to flush change to the index file !
1513     *
1514     * @param int siteID, the site id.
1515     * @param Analyzer the analyzer to use.
1516     * @param boolean if true, create a new index and replace existing one.
1517     *
1518     * @return IndexWriter ramWriter, the IndexWriter, null on error.
1519     */

1520    protected IndexWriter getIndexWriter (int siteID,
1521                                        Analyzer analyzer_,
1522                                        boolean create) throws JahiaException {
1523
1524        ServicesRegistry sReg = ServicesRegistry.getInstance ();
1525        JahiaSite site = null;
1526        IndexWriter writer = null;
1527        site = sReg.getJahiaSitesService ().getSite (siteID);
1528        if (site != null) {
1529            String JavaDoc indexDir = composeSiteIndexDir (site);
1530            try {
1531                writer = new IndexWriter (indexDir, analyzer_,
1532                        create);
1533                writer.setUseCompoundFile(false);
1534                site = null;
1535            } catch (Throwable JavaDoc t) {
1536                logger.error (
1537                        "An IO Exception occured when retrieving the index ramWriter for directory :" + indexDir,
1538                        t);
1539            }
1540        }
1541        return writer;
1542    }
1543
1544    //--------------------------------------------------------------------------
1545
/**
1546     * Returns the IndexReader for a given site.
1547     * Don't forget to close the returned index reader to flush change to the index file !
1548     *
1549     * @param int siteID, the site id.
1550     *
1551     * @return IndexReader reader, the IndexReader, null if not found.
1552     */

1553    public IndexReader getIndexReader (int siteID)
1554            throws IOException, JahiaException {
1555
1556        ServicesRegistry sReg = ServicesRegistry.getInstance ();
1557        JahiaSite site = null;
1558        IndexReader reader = null;
1559        site = sReg.getJahiaSitesService ().getSite (siteID);
1560        if (site != null) {
1561            String JavaDoc indexDir = composeSiteIndexDir (site);
1562            File indexDirFile = new File (indexDir);
1563            if (indexDirFile.exists ()) {
1564                reader = IndexReader.open (indexDirFile);
1565                site = null;
1566            } else {
1567                logger.warn (
1568                        "Cannot read index because directory " + indexDir + " does not exist, will be created upon first full site indexing...");
1569                return null;
1570            }
1571        }
1572        return reader;
1573    }
1574
1575    //--------------------------------------------------------------------------
1576
/**
1577     * Compose a index directory full path for a given site.
1578     *
1579     * @param JahiaSite site, the site.
1580     *
1581     * @return String the path
1582     */

1583    private String JavaDoc composeSiteIndexDir (JahiaSite site) {
1584        if (site == null)
1585            return null;
1586
1587        StringBuffer JavaDoc buff = new StringBuffer JavaDoc (searchIndexesDiskPath);
1588        buff.append (File.separator);
1589        buff.append (site.getSiteKey ());
1590        return buff.toString ();
1591    }
1592
1593    //--------------------------------------------------------------------------
1594
/**
1595     * Close a IndexWriter
1596     *
1597     * @param IndexWriter ramWriter, the index ramWriter
1598     */

1599    protected void closeIndexWriter (IndexWriter writer) {
1600        if (writer == null)
1601            return;
1602
1603        try {
1604            writer.close ();
1605        } catch (Throwable JavaDoc t) {
1606            logger.error ("Error while closing index ramWriter:", t);
1607        }
1608    }
1609
1610    //--------------------------------------------------------------------------
1611
/**
1612     * Close a IndexReader
1613     *
1614     * @param IndexReader reader, the index reader
1615     */

1616    protected void closeIndexReader (IndexReader reader) {
1617        if (reader == null)
1618            return;
1619
1620        try {
1621            reader.close ();
1622        } catch (Throwable JavaDoc t) {
1623            logger.error ("Error while closing index reader:", t);
1624        }
1625    }
1626
1627    /**
1628     * Removes all *.lock files in the specified directory, going down
1629     * recursively. Make sure you call this ONLY on lucene managed directories.
1630     *
1631     * @param startingDirectory directory in which to remove the *.lock files
1632     */

1633    private synchronized void removeStaleLockFiles (String JavaDoc startingDirectory) {
1634
1635        // With lucene > 1.3, the locks are created in "java.io.tmpdir", not in the search index folder
1636
String JavaDoc tmpDir = System.getProperty("java.io.tmpdir");
1637        File f = new File (tmpDir);
1638
1639        if (!f.isDirectory ()) {
1640            logger.warn ("Called on a file name, exiting...");
1641            return;
1642        }
1643
1644        File[] files = f.listFiles();
1645
1646        for (int i = 0; i < files.length; i++) {
1647            File curFile = files[i];
1648            if (curFile.isFile() && curFile.getName().toLowerCase().startsWith("lucene")
1649                && curFile.getName().toLowerCase().endsWith (".lock")) {
1650                logger.debug ("Removing stale lock file : [" + curFile.toString () + "]");
1651                curFile.delete();
1652            }
1653        }
1654
1655    }
1656
1657    /**
1658     * Cache Listener implementation
1659     *
1660     * @param cacheName String
1661     */

1662    public void onCacheFlush (String JavaDoc cacheName){
1663
1664    }
1665
1666    /**
1667     * Cache Listener implementation
1668     *
1669     * On cache put, add the new cache entry in the local indexOrders vector.
1670     *
1671     * @param cacheName String
1672     * @param entryKey Object
1673     */

1674    /* PAP: synchronized moved down to the code */
1675    public void onCachePut (String JavaDoc cacheName, Object JavaDoc entryKey){
1676        if (entryKey == null)
1677            return;
1678        
1679        if (FULL_SITE_INDEXATION_STATUS_CACHE.equals(cacheName)) {
1680            CacheEntry cacheEntry = fullSiteIndexationStatusCache.getCacheEntry(entryKey);
1681            if (cacheEntry == null)
1682                return;
1683           
1684            SiteIndexationStatus status = (SiteIndexationStatus)cacheEntry.getObject();
1685            if ( status != null && status.isShutdown() ) {
1686                String JavaDoc triggerName = "siteScheduledIndexation_" + String.valueOf(status.getSiteId()) + "_Trigger";
1687                try {
1688                    ServicesRegistry.getInstance().getSchedulerService().unscheduleJob(triggerName,
1689                            Scheduler.DEFAULT_GROUP);
1690                } catch (JahiaException je) {
1691                    logger.error("Error aborting site indexation job", je);
1692                }
1693            }
1694            return;
1695        }
1696
1697        if ( !this.localIndexing ){
1698            return;
1699        }
1700
1701        //logger.debug("Search Cache listener :" + cacheName + entryKey.toString());
1702
if ( SEARCH_INDEX_ORDERS_CACHE.equals(cacheName)){
1703            CacheEntry cacheEntry = null;
1704            Cache cache = CacheFactory.getCache(CacheFactory.getInstance(), cacheName);
1705            if (cache != null) {
1706                cacheEntry = cache.getCacheEntry(entryKey);
1707            }
1708            if (cacheEntry == null)
1709                return;
1710            JahiaIndexableDocument doc = (JahiaIndexableDocument)cacheEntry.getObject();
1711            synchronized (this) {
1712                addToIndexOrder(doc);
1713                if (doc.toBeAdded())
1714                  addToRAMIndexer(doc, doc.getSiteId());
1715            }
1716            logger.debug("Search Cache listener : added value in indexOrders");
1717            synchronized(lock){
1718                lock.notifyAll();
1719            }
1720            //indexQueue();
1721
}
1722    }
1723
1724    /**
1725     * Return a file extractor for a given content type
1726     *
1727     * @param contentType String
1728     * @throws JahiaException
1729     * @return FileExtractor
1730     */

1731    public FileExtractor getFileExtractor(String JavaDoc contentType)
1732        throws JahiaException {
1733        if ( contentType != null ){
1734            String JavaDoc className = (String JavaDoc)FileExtractors.get(contentType);
1735            if ( className != null ){
1736                try {
1737                    Class JavaDoc c = Class.forName(className);
1738                    return (FileExtractor)c.newInstance();
1739                } catch ( ClassNotFoundException JavaDoc cnfe ){
1740                    logger.debug(cnfe);
1741                } catch ( InstantiationException JavaDoc ie ){
1742                    logger.debug(ie);
1743                } catch ( IllegalAccessException JavaDoc iae ){
1744                    logger.debug(iae);
1745                }
1746            }
1747        }
1748        return null;
1749    }
1750
1751    public synchronized void shutdown() throws JahiaException {
1752        super.shutdown();
1753        indexingThreadActivated = false;
1754        mIsServiceInitialized = false;
1755        synchronized(this){
1756            notifyAll();
1757        }
1758        try {
1759            if ( this.ramIndexer != null ){
1760                this.ramIndexer.shutDown();
1761            }
1762        } catch ( Throwable JavaDoc t ){
1763          logger.warn(t);
1764        }
1765        try {
1766            if ( this.scheduledIndexationJob != null ) {
1767                this.scheduledIndexationJob.setShutdown(true);
1768            }
1769        } catch ( Throwable JavaDoc t ){
1770          logger.warn(t);
1771        }
1772
1773        this.shutdowSiteIndexationJobs();
1774
1775        Thread.yield();
1776    }
1777
1778    private synchronized void shutdowSiteIndexationJobs(){
1779        Object JavaDoc[] keys = this.fullSiteIndexationStatusCache.keys();
1780        SiteIndexationStatus status = null;
1781        for ( int i=0; i<keys.length; i++ ){
1782            try {
1783                status = (SiteIndexationStatus)
1784                        this.fullSiteIndexationStatusCache.get(keys[i]);
1785                this.abortSiteIndexation(status.getSiteId());
1786            } catch ( Throwable JavaDoc t ){
1787              logger.warn(t);
1788            }
1789        }
1790    }
1791
1792    protected class SiteIndexer {
1793
1794        private int UNDEFINED = 0;
1795        private int ADD = 1;
1796        private int REMOVE = 2;
1797
1798        private int siteId;
1799
1800        private int maxDocs = 1000;
1801        private Vector docs;
1802        private int lastOperation = 0;
1803
1804        public SiteIndexer(int siteId_, int maxDocs_)
1805        throws IOException, JahiaException {
1806            this.siteId = siteId_;
1807            this.maxDocs = maxDocs_;
1808            this.docs = new Vector();
1809        }
1810
1811        public synchronized void addDocument(JahiaIndexableDocument doc)
1812        throws IOException, JahiaException {
1813            if ( doc == null ){
1814                return;
1815            }
1816            int requestOp = doc.toBeAdded()?ADD:REMOVE;
1817            if ( this.getLastOperation() == UNDEFINED ) {
1818                this.setLastOperation(requestOp);
1819                docs.add(doc);
1820            } else if ( this.getLastOperation() != requestOp ){
1821                storeInPersistance();
1822                this.setLastOperation(requestOp);
1823                docs.add(doc);
1824            } else {
1825                docs.add(doc);
1826            }
1827
1828            if ( docs.size() > maxDocs ){
1829                storeInPersistance();
1830            }
1831        }
1832
1833        public synchronized void storeInPersistance()
1834        throws IOException, JahiaException {
1835            if ( docs.size() ==0 ) {
1836                this.setLastOperation(UNDEFINED);
1837                return;
1838            }
1839            Vector luceneDocs = new Vector();
1840            Document luceneDoc = null;
1841            int size = docs.size();
1842            JahiaIndexableDocument doc = null;
1843            for ( int i=0; i<size; i++ ){
1844                doc = (JahiaIndexableDocument)docs.get(i);
1845                luceneDoc = getLuceneDocument(doc);
1846                if ( luceneDoc != null ){
1847                    luceneDocs.add(luceneDoc);
1848                }
1849            }
1850
1851            if ( this.getLastOperation() == ADD ) {
1852                IndexReader reader = null;
1853                try {
1854                    reader = getIndexReader(siteId);
1855                    size = luceneDocs.size();
1856                    doc = null;
1857                    for ( int i=0; i<size; i++ ){
1858                        doc = (JahiaIndexableDocument)docs.get(i);
1859                        try {
1860                            Term term = new Term(doc.getKeyFieldName(),
1861                                                 doc.getKey());
1862                            reader.deleteDocuments(term);
1863                        } catch ( Throwable JavaDoc t ){
1864                            logger.debug("Error removing document from index",t);
1865                        }
1866                    }
1867                } catch ( Throwable JavaDoc t ) {
1868                    logger.debug("Error removing doc from index", t);
1869                } finally {
1870                    closeIndexReader(reader);
1871                }
1872
1873                RAMDirectory ramDir = new RAMDirectory();
1874                IndexWriter ramWriter = new IndexWriter(ramDir,indexAnalyzer,true);
1875                ramWriter.setMergeFactor(50);
1876                ramWriter.setMaxBufferedDocs(1000);
1877                luceneDoc = null;
1878                size = luceneDocs.size();
1879                for ( int i=0; i<size; i++ ){
1880                    luceneDoc = (Document)luceneDocs.get(i);
1881                    if (luceneDoc != null){
1882                        ramWriter.addDocument(luceneDoc);
1883                    }
1884                }
1885
1886                luceneDocs = null;
1887                docs = null;
1888
1889                ramWriter.close();
1890
1891                IndexWriter fsWriter = null;
1892                try {
1893                    fsWriter = getIndexWriter(siteId,indexAnalyzer,false);
1894                    if ( fsWriter == null ){
1895                        fsWriter = getIndexWriter(siteId,indexAnalyzer,true);
1896                    }
1897                    if ( fsWriter != null ){
1898                        SiteIndexationStatus status = getSiteIndexationStatus(siteId);
1899                        if ( status != null && !status.isDone() ){
1900                            if ( fsWriter.getMaxBufferedDocs() < 1000 ){
1901                                fsWriter.setMaxBufferedDocs(1000);
1902                            }
1903                            if ( fsWriter.getMergeFactor() < 30 ){
1904                                fsWriter.setMergeFactor(30);
1905                            }
1906                        }
1907                        /*
1908                        luceneDoc = null;
1909                        size = luceneDocs.size();
1910                        for ( int i=0; i<size; i++ ){
1911                            luceneDoc = (Document)luceneDocs.get(i);
1912                            if (luceneDoc != null){
1913                                fsWriter.addDocument(luceneDoc);
1914                            }
1915                        }*/

1916                        fsWriter.addIndexes(new Directory[]{ramDir});
1917                    }
1918                } catch ( Throwable JavaDoc t ) {
1919                    logger.debug("Error adding doc from index", t);
1920                } finally {
1921                    closeIndexWriter(fsWriter);
1922                    ramDir = null;
1923                    ramWriter = null;
1924                    fsWriter = null;
1925                }
1926            } else if ( this.getLastOperation() == REMOVE ) {
1927                IndexReader reader = getIndexReader(siteId);
1928                size = docs.size();
1929                doc = null;
1930                for ( int i=0; i<size; i++ ){
1931                    doc = (JahiaIndexableDocument)docs.get(i);
1932                    backgroundRemoveObjectFromSearchEngine(doc,reader);
1933                    doc = null;
1934                }
1935                closeIndexReader(reader);
1936                reader = null;
1937            }
1938            this.docs = new Vector();
1939            this.setLastOperation(UNDEFINED);
1940        }
1941
1942        public int getSiteId() {
1943            return siteId;
1944        }
1945
1946        public void setSiteId(int siteId_) {
1947            this.siteId = siteId_;
1948        }
1949
1950        public int getMaxDocs() {
1951            return maxDocs;
1952        }
1953
1954        public void setMaxDocs(int maxDocs_) {
1955            this.maxDocs = maxDocs_;
1956        }
1957
1958        public int getNbDocs() {
1959            return docs.size();
1960        }
1961
1962        public synchronized Vector getDocs() {
1963            return docs;
1964        }
1965
1966        public synchronized void setDocs(Vector docs_) {
1967            this.docs = docs_;
1968        }
1969
1970        public int getLastOperation() {
1971            return lastOperation;
1972        }
1973
1974        public void setLastOperation(int lastOperation_) {
1975            this.lastOperation = lastOperation_;
1976        }
1977
1978    }
1979
1980    private static String JavaDoc[] getStopWords()
1981    {
1982      return EMPTY_STOP_WORDS;
1983    }
1984    
1985    private Method JavaDoc getSearchMethod(String JavaDoc methodName)
1986    {
1987      if (!searchMethodMap.containsKey(methodName))
1988      {
1989        synchronized (searchMethodMap)
1990        {
1991          if (!searchMethodMap.containsKey(methodName))
1992          {
1993            try
1994            {
1995              Class JavaDoc[] params = {JahiaSearcher.class, ParamBean.class};
1996              Method JavaDoc theMethod = SearchEngine.class.getMethod(methodName, params);
1997              searchMethodMap.put(methodName, theMethod);
1998            }
1999            catch (Exception JavaDoc ex)
2000            {
2001              logger.fatal("Unable to retrieve search method for name "
2002                + methodName, ex);
2003              throw new RuntimeException JavaDoc(
2004                "Unable to retrieve search method for name " + methodName, ex);
2005            }
2006          }
2007        }
2008      }
2009  
2010      return (Method JavaDoc)searchMethodMap.get(methodName);
2011    }
2012    
2013    private static boolean doAutoAppendWildcards()
2014    {
2015      return false;
2016// TODO: make it configuratble AppConfigService.getBoolean("gen.search.autoAppendWildcards",
2017
// AppContextManager.getSystemAppContext());
2018
}
2019}
2020
Popular Tags