KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > jetspeed > services > search > lucene > LuceneSearchService


1 /*
2  * Copyright 2000-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.jetspeed.services.search.lucene;
18
19 // Java imports
20
import java.io.File JavaDoc;
21 import java.io.IOException JavaDoc;
22 import java.net.URL JavaDoc;
23 import javax.servlet.ServletConfig JavaDoc;
24 import java.util.Collection JavaDoc;
25 import java.util.ArrayList JavaDoc;
26 import java.util.HashMap JavaDoc;
27 import java.util.Iterator JavaDoc;
28 import java.util.Map JavaDoc;
29 import java.util.Set JavaDoc;
30
31 // Jetspeed imports
32
import org.apache.commons.collections.MultiHashMap;
33 import org.apache.commons.collections.MultiMap;
34 import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
35 import org.apache.jetspeed.services.logging.JetspeedLogger;
36 import org.apache.jetspeed.services.search.HandlerFactory;
37 import org.apache.jetspeed.services.search.ObjectHandler;
38 import org.apache.jetspeed.services.search.ParsedObject;
39 import org.apache.jetspeed.services.search.BaseParsedObject;
40 import org.apache.jetspeed.services.search.SearchResults;
41 import org.apache.jetspeed.services.search.SearchService;
42
43 // Turbine imports
44
import org.apache.turbine.services.InitializationException;
45 import org.apache.turbine.services.resources.ResourceService;
46 import org.apache.turbine.services.servlet.TurbineServlet;
47 import org.apache.turbine.services.TurbineBaseService;
48 import org.apache.turbine.services.TurbineServices;
49
50 // Lucene imports
51
import org.apache.lucene.analysis.Analyzer;
52 import org.apache.lucene.analysis.standard.StandardAnalyzer;
53 import org.apache.lucene.document.Document;
54 import org.apache.lucene.document.Field;
55 import org.apache.lucene.index.Term;
56 import org.apache.lucene.index.IndexWriter;
57 import org.apache.lucene.index.IndexReader;
58 import org.apache.lucene.queryParser.MultiFieldQueryParser;
59 import org.apache.lucene.queryParser.ParseException;
60 import org.apache.lucene.queryParser.QueryParser;
61 import org.apache.lucene.search.IndexSearcher;
62 import org.apache.lucene.search.Hits;
63 import org.apache.lucene.search.Query;
64 import org.apache.lucene.search.Searcher;
65
66 /**
67  * Lucene implementation of search service.
68  *
69  * @author <a HREF="mailto:taylor@apache.org">David Sean taylor</a>
70  * @author <a HREF="mailto:caius1440@hotmail.com">Jeremy Ford</a>
71  * @author <a HREF="mailto:morciuch@apache.org">Mark Orciuch</a>
72  * @version $Id: LuceneSearchService.java,v 1.10 2004/03/05 03:49:15 jford Exp $
73  */

74 public class LuceneSearchService extends TurbineBaseService implements SearchService
75 {
76     /**
77      * Static initialization of the logger for this class
78      */

79     private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(LuceneSearchService.class.getName());
80     
81     private static final int KEYWORD = 0;
82     private static final int TEXT = 1;
83     
84     private static final String JavaDoc CONFIG_DIRECTORY = "directory";
85     private File JavaDoc rootDir = null;
86     private String JavaDoc indexRoot = null;
87
88     /**
89      * This is the early initialization method called by the
90      * Turbine <code>Service</code> framework
91      * @param conf The <code>ServletConfig</code>
92      * @exception throws a <code>InitializationException</code> if the service
93      * fails to initialize
94      */

95     public synchronized void init(ServletConfig JavaDoc conf) throws InitializationException
96     {
97
98         // already initialized
99
if (getInit())
100         {
101             return;
102         }
103
104         initConfiguration(conf);
105
106         // initialization done
107
setInit(true);
108
109     }
110
111     /**
112      * This is the lateinitialization method called by the
113      * Turbine <code>Service</code> framework
114      *
115      * @exception throws a <code>InitializationException</code> if the service
116      * fails to initialize
117      */

118     public void init() throws InitializationException
119     {
120         logger.info("Late init for " + SearchService.SERVICE_NAME + " called");
121         while (!getInit())
122         {
123             //Not yet...
124
try
125             {
126                 Thread.sleep(100);
127                 logger.info("Waiting for init of " + SearchService.SERVICE_NAME + "...");
128             }
129             catch (InterruptedException JavaDoc ie)
130             {
131                 logger.error("Exception", ie);
132             }
133         }
134     }
135
136     /**
137      * This is the shutdown method called by the
138      * Turbine <code>Service</code> framework
139      */

140     public void shutdown()
141     {
142     }
143
144     /**
145      * Loads the configuration parameters for this service from the
146      * JetspeedResources.properties file.
147      *
148      * @exception throws a <code>InitializationException</code> if the service
149      * fails to initialize
150      */

151     private void initConfiguration(ServletConfig JavaDoc conf) throws InitializationException
152     {
153         if (getInit())
154         {
155             return;
156         }
157
158         // get configuration parameters from Jetspeed Resources
159
ResourceService serviceConf = ((TurbineServices) TurbineServices.getInstance())
160                                       .getResources(SearchService.SERVICE_NAME);
161
162         // Get config properties
163
indexRoot = serviceConf.getString(CONFIG_DIRECTORY);
164         //
165
// The following section opens or creates the search index
166
//
167
//
168
rootDir = new File JavaDoc(indexRoot);
169
170         //If the rootDir does not exist, treat it as context relative
171
if (!rootDir.exists())
172         {
173             if (indexRoot != null)
174             {
175                 String JavaDoc rootDirPath = TurbineServlet.getRealPath("") + indexRoot;
176                 rootDir = new File JavaDoc(rootDirPath);
177                 if (!rootDir.exists())
178                 {
179                     rootDir.mkdir();
180                     logger.info("Created index directory '" + rootDir.getPath() + "'");
181                 }
182             }
183         }
184
185         try
186         {
187             Searcher searcher = null;
188             searcher = new IndexSearcher(rootDir.getPath());
189             searcher.close();
190         }
191         catch (Exception JavaDoc e)
192         {
193             try
194             {
195                 IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), true);
196                 indexWriter.close();
197                 indexWriter = null;
198                 logger.info("Created Lucene Index in " + rootDir.getPath());
199             }
200             catch (Exception JavaDoc e1)
201             {
202                 logger.error(this.getClass().getName() + ".initConfiguration - Getting or creating IndexSearcher", e);
203                 throw new InitializationException("Getting or creating Index Searcher");
204             }
205         }
206
207         //Mark that we are done
208
setInit(true);
209     }
210
211     /**
212      * Search
213      *
214      * @task Parse content into title and description fields
215      * @param searchString
216      * is the what is being searched for
217      * @return Hits, if no hits then null.
218      */

219     public SearchResults search(String JavaDoc searchString)
220     {
221         Searcher searcher = null;
222         Hits hits = null;
223         
224         try
225         {
226             searcher = new IndexSearcher(rootDir.getPath());
227         }
228         catch (IOException JavaDoc e)
229         {
230             logger.error("Failed to create index search using path " + rootDir.getPath());
231             return null;
232         }
233         
234         Analyzer analyzer = new StandardAnalyzer();
235         
236         String JavaDoc[] searchFields = {ParsedObject.FIELDNAME_CONTENT, ParsedObject.FIELDNAME_DESCRIPTION, ParsedObject.FIELDNAME_FIELDS,
237                            ParsedObject.FIELDNAME_KEY, ParsedObject.FIELDNAME_KEYWORDS, ParsedObject.FIELDNAME_LANGUAGE,
238                            ParsedObject.FIELDNAME_SCORE, ParsedObject.FIELDNAME_TITLE, ParsedObject.FIELDNAME_TYPE,
239                            ParsedObject.FIELDNAME_URL, ParsedObject.FIELDNAME_CLASSNAME};
240                             
241         Query query= null;
242         try
243         {
244             query = MultiFieldQueryParser.parse(searchString, searchFields, analyzer);
245 // Query query = QueryParser.parse(searchString, ParsedObject.FIELDNAME_CONTENT, analyzer);
246
}
247         catch (ParseException e)
248         {
249             logger.info("Failed to parse query " + searchString);
250             return null;
251         }
252         
253         try
254         {
255             hits = searcher.search(query);
256         }
257         catch (IOException JavaDoc e)
258         {
259            logger.error("Error while peforming search.", e);
260            return null;
261         }
262
263         // Copy hits to the result list
264
int hitCount = hits.length();
265         Document doc = null;
266         SearchResults results = new SearchResults(hitCount);
267         for (int counter = 0; counter < hitCount; counter++)
268         {
269             ParsedObject result = new BaseParsedObject();
270             try
271             {
272                 doc = hits.doc(counter);
273                 addFieldsToParsedObject(doc, result);
274                 
275                 result.setScore(hits.score(counter));
276                 result.setType(doc.getField(ParsedObject.FIELDNAME_TYPE).stringValue());
277                 result.setKey(doc.getField(ParsedObject.FIELDNAME_KEY).stringValue());
278                 result.setDescription(doc.getField(ParsedObject.FIELDNAME_DESCRIPTION).stringValue());
279                 result.setTitle(doc.getField(ParsedObject.FIELDNAME_TITLE).stringValue());
280                 result.setContent(doc.getField(ParsedObject.FIELDNAME_CLASSNAME).stringValue());
281                 Field language = doc.getField(ParsedObject.FIELDNAME_LANGUAGE);
282                 if (language != null)
283                 {
284                     result.setLanguage(language.stringValue());
285                 }
286                 Field classname = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
287                 if (classname != null)
288                 {
289                     result.setClassName(classname.stringValue());
290                 }
291                 Field url = doc.getField(ParsedObject.FIELDNAME_URL);
292                 if (url != null)
293                 {
294                     result.setURL(new URL JavaDoc(url.stringValue()));
295                 }
296                 
297                 results.add(counter, result);
298             }
299             catch (Exception JavaDoc ioe)
300             {
301                 logger.error("Exception", ioe);
302             }
303         }
304
305         if (searcher != null)
306         {
307             try
308             {
309                 searcher.close();
310             }
311             catch (IOException JavaDoc ioe)
312             {
313                 logger.error("Closing Searcher", ioe);
314             }
315         }
316         return results;
317     }
318     
319     private void addFieldsToParsedObject(Document doc, ParsedObject o)
320     {
321         try
322         {
323             MultiMap multiKeywords = new MultiHashMap();
324             MultiMap multiFields = new MultiHashMap();
325             HashMap JavaDoc fieldMap = new HashMap JavaDoc();
326             
327             Field classNameField = doc.getField(ParsedObject.FIELDNAME_CLASSNAME);
328             if(classNameField != null)
329             {
330                 String JavaDoc className = classNameField.stringValue();
331                 o.setClassName(className);
332                 ObjectHandler handler = HandlerFactory.getHandler(className);
333                 
334                 Set JavaDoc fields = handler.getFields();
335                 addFieldsToMap(doc, fields, multiFields);
336                 addFieldsToMap(doc, fields, fieldMap);
337                 
338                 Set JavaDoc keywords = handler.getKeywords();
339                 addFieldsToMap(doc, keywords, multiKeywords);
340             }
341             
342             o.setMultiKeywords(multiKeywords);
343             o.setMultiFields(multiFields);
344             o.setFields(fieldMap);
345         }
346         catch(Exception JavaDoc e)
347         {
348             logger.error("Error trying to add fields to parsed object.", e);
349         }
350     }
351     
352     private void addFieldsToMap(Document doc, Set JavaDoc fieldNames, Map JavaDoc fields)
353     {
354         Iterator JavaDoc fieldIter = fieldNames.iterator();
355         while(fieldIter.hasNext())
356         {
357             String JavaDoc fieldName = (String JavaDoc)fieldIter.next();
358             Field[] docFields = doc.getFields(fieldName);
359             if(fields != null)
360             {
361                 for(int i=0; i<docFields.length; i++)
362                 {
363                     Field field = docFields[i];
364                     if(field != null)
365                     {
366                         String JavaDoc value = field.stringValue();
367                         fields.put(fieldName, value);
368                     }
369                 }
370             }
371         }
372     }
373
374     /**
375      *
376      * @return
377      */

378     public String JavaDoc[] getSearchSets()
379     {
380         return null;
381     }
382
383     /**
384      *
385      * @see org.apache.jetspeed.services.search.SearchService#add(java.lang.Object)
386      * @param o
387      * @return
388      */

389     public boolean add(Object JavaDoc o)
390     {
391         Collection JavaDoc c = new ArrayList JavaDoc(1);
392         c.add(o);
393
394         return add(c);
395     }
396
397     /**
398      *
399      * @see org.apache.jetspeed.services.search.SearchService#add(java.lang.Collection)
400      * @param c
401      * @return
402      */

403     public boolean add(Collection JavaDoc c)
404     {
405         boolean result = false;
406
407         IndexWriter indexWriter;
408         try
409         {
410             indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), false);
411         }
412         catch (IOException JavaDoc e)
413         {
414             logger.error("Error while creating index writer. Skipping add...", e);
415             return result;
416         }
417
418         Iterator JavaDoc it = c.iterator();
419         while (it.hasNext())
420         {
421             Object JavaDoc o = it.next();
422             // Look up appropriate handler
423
ObjectHandler handler = null;
424             try
425             {
426                 handler = HandlerFactory.getHandler(o);
427             }
428             catch (Exception JavaDoc e)
429             {
430                 logger.error("Failed to create hanlder for object " + o.getClass().getName());
431                 continue;
432             }
433
434             // Parse the object
435
ParsedObject parsedObject = handler.parseObject(o);
436
437             // Create document
438
Document doc = new Document();
439
440             // Populate document from the parsed object
441
if (parsedObject.getKey() != null)
442             {
443                 doc.add(Field.Keyword(ParsedObject.FIELDNAME_KEY, parsedObject.getKey()));
444             }
445             if (parsedObject.getType() != null)
446             {
447                 doc.add(Field.Text(ParsedObject.FIELDNAME_TYPE, parsedObject.getType()));
448             }
449             if (parsedObject.getTitle() != null)
450             {
451                 doc.add(Field.Text(ParsedObject.FIELDNAME_TITLE, parsedObject.getTitle()));
452             }
453             if (parsedObject.getDescription() != null)
454             {
455                 doc.add(Field.Text(ParsedObject.FIELDNAME_DESCRIPTION, parsedObject.getDescription()));
456             }
457             if (parsedObject.getContent() != null)
458             {
459                 doc.add(Field.Text(ParsedObject.FIELDNAME_CONTENT, parsedObject.getContent()));
460             }
461             if (parsedObject.getLanguage() != null)
462             {
463                 doc.add(Field.Text(ParsedObject.FIELDNAME_LANGUAGE, parsedObject.getLanguage()));
464             }
465             if (parsedObject.getURL() != null)
466             {
467                 doc.add(Field.Text(ParsedObject.FIELDNAME_URL, parsedObject.getURL().toString()));
468             }
469             if(parsedObject.getClassName() != null)
470             {
471                 doc.add(Field.Text(ParsedObject.FIELDNAME_CLASSNAME, parsedObject.getClassName()));
472             }
473
474             MultiMap multiKeywords = parsedObject.getMultiKeywords();
475             addFieldsToDocument(doc, multiKeywords, KEYWORD);
476             
477             MultiMap multiFields = parsedObject.getMultiFields();
478             addFieldsToDocument(doc, multiFields, TEXT);
479             
480             Map JavaDoc fields = parsedObject.getFields();
481             addFieldsToDocument(doc, fields, TEXT);
482
483             // Add the document to search index
484
try
485             {
486                 indexWriter.addDocument(doc);
487             }
488             catch (IOException JavaDoc e)
489             {
490                logger.error("Error adding document to index.", e);
491             }
492             logger.debug("Index Document Count = " + indexWriter.docCount());
493             logger.info("Added '" + parsedObject.getTitle() + "' to index");
494             result = true;
495         }
496
497         try
498         {
499             indexWriter.optimize();
500         }
501         catch (IOException JavaDoc e)
502         {
503             logger.error("Error while trying to optimize index.");
504         }
505         finally
506         {
507             try
508             {
509                 indexWriter.close();
510             }
511             catch (IOException JavaDoc e)
512             {
513                logger.error("Error while closing index writer.", e);
514             }
515         }
516
517         return result;
518     }
519     
520     private void addFieldsToDocument(Document doc, Map JavaDoc fields, int type)
521     {
522         if(fields != null)
523         {
524             Iterator JavaDoc keyIter = fields.keySet().iterator();
525             while(keyIter.hasNext())
526             {
527                 Object JavaDoc key = keyIter.next();
528                 if(key != null)
529                 {
530                     Object JavaDoc values = fields.get(key);
531                     if(values != null)
532                     {
533                         if(values instanceof Collection JavaDoc)
534                         {
535                             Iterator JavaDoc valueIter = ((Collection JavaDoc)values).iterator();
536                             while(valueIter.hasNext())
537                             {
538                                 Object JavaDoc value = valueIter.next();
539                                 if(value != null)
540                                 {
541                                     if(type == TEXT)
542                                     {
543                                         doc.add(Field.Text(key.toString(), value.toString()));
544                                     }
545                                     else
546                                     {
547                                         doc.add(Field.Keyword(key.toString(), value.toString()));
548                                     }
549                                 }
550                             }
551                         }
552                         else
553                         {
554                             if(type == TEXT)
555                             {
556                                 doc.add(Field.Text(key.toString(), values.toString()));
557                             }
558                             else
559                             {
560                                 doc.add(Field.Keyword(key.toString(), values.toString()));
561                             }
562                         }
563                     }
564                 }
565             }
566         }
567     }
568
569     /**
570      *
571      * @see org.apache.jetspeed.services.search.SearchService#remove(java.lang.Object)
572      * @param o
573      * @return
574      */

575     public boolean remove(Object JavaDoc o)
576     {
577         Collection JavaDoc c = new ArrayList JavaDoc(1);
578         c.add(o);
579
580         return remove(c);
581     }
582
583     /**
584      *
585      * @see org.apache.jetspeed.services.search.SearchService#remove(java.lang.Collection)
586      * @param c
587      * @return
588      */

589     public boolean remove(Collection JavaDoc c)
590     {
591         boolean result = false;
592
593         try
594         {
595             IndexReader indexReader = IndexReader.open(this.rootDir);
596
597             Iterator JavaDoc it = c.iterator();
598             while (it.hasNext())
599             {
600                 Object JavaDoc o = it.next();
601                 // Look up appropriate handler
602
ObjectHandler handler = HandlerFactory.getHandler(o);
603
604                 // Parse the object
605
ParsedObject parsedObject = handler.parseObject(o);
606
607                 // Create term
608
Term term = null;
609
610                 if (parsedObject.getKey() != null)
611                 {
612                     term = new Term(ParsedObject.FIELDNAME_KEY, parsedObject.getKey());
613                     // Remove the document from search index
614
int rc = indexReader.delete(term);
615                     logger.info("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
616                     //System.out.println("Attempted to delete '" + term.toString() + "' from index, documents deleted = " + rc);
617
result = rc > 0;
618                 }
619             }
620
621             indexReader.close();
622
623             IndexWriter indexWriter = new IndexWriter(rootDir, new StandardAnalyzer(), false);
624             indexWriter.optimize();
625             indexWriter.close();
626
627         }
628         catch (Exception JavaDoc e)
629         {
630             logger.error("Exception", e);
631             result = false;
632         }
633
634         return result;
635     }
636
637     /**
638      *
639      * @see org.apache.jetspeed.services.search.SearchService#update(java.lang.Object)
640      * @param o
641      * @return
642      */

643     public boolean update(Object JavaDoc o)
644     {
645         Collection JavaDoc c = new ArrayList JavaDoc(1);
646         c.add(o);
647
648         return update(c);
649     }
650     /**
651      * Updates an index entry. For now, it's a remove and add.
652      *
653      * @param c
654      * @return
655      * @see org.apache.jetspeed.services.search.SearchService#update(java.lang.Collection)
656      */

657     public boolean update(Collection JavaDoc c)
658     {
659         boolean result = false;
660
661         try
662         {
663             // Delete entries from index
664
remove(c);
665             result = true;
666         }
667         catch (Throwable JavaDoc e)
668         {
669             logger.error("Exception", e);
670         }
671
672         try
673         {
674             // Add entries to index
675
add(c);
676             result = true;
677         }
678         catch (Throwable JavaDoc e)
679         {
680             logger.error("Exception", e);
681         }
682
683         return false;
684     }
685
686 }
687
Popular Tags