KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > columba > mail > folder > search > LuceneQueryEngine


1 //The contents of this file are subject to the Mozilla Public License Version 1.1
2
//(the "License"); you may not use this file except in compliance with the
3
//License. You may obtain a copy of the License at http://www.mozilla.org/MPL/
4
//
5
//Software distributed under the License is distributed on an "AS IS" basis,
6
//WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
7
//for the specific language governing rights and
8
//limitations under the License.
9
//
10
//The Original Code is "The Columba Project"
11
//
12
//The Initial Developers of the Original Code are Frederik Dietz and Timo Stich.
13
//Portions created by Frederik Dietz and Timo Stich are Copyright (C) 2003.
14
//
15
//All Rights Reserved.
16
package org.columba.mail.folder.search;
17
18 import java.io.File JavaDoc;
19 import java.io.IOException JavaDoc;
20 import java.io.StringReader JavaDoc;
21 import java.util.Arrays JavaDoc;
22 import java.util.LinkedList JavaDoc;
23 import java.util.List JavaDoc;
24 import java.util.ListIterator JavaDoc;
25 import java.util.logging.Logger JavaDoc;
26
27 import javax.swing.JOptionPane JavaDoc;
28
29 import org.apache.lucene.analysis.Analyzer;
30 import org.apache.lucene.analysis.Token;
31 import org.apache.lucene.analysis.TokenStream;
32 import org.apache.lucene.analysis.standard.StandardAnalyzer;
33 import org.apache.lucene.document.Document;
34 import org.apache.lucene.document.Field;
35 import org.apache.lucene.index.IndexReader;
36 import org.apache.lucene.index.IndexWriter;
37 import org.apache.lucene.index.Term;
38 import org.apache.lucene.search.BooleanQuery;
39 import org.apache.lucene.search.Hits;
40 import org.apache.lucene.search.IndexSearcher;
41 import org.apache.lucene.search.Query;
42 import org.apache.lucene.search.WildcardQuery;
43 import org.apache.lucene.store.Directory;
44 import org.apache.lucene.store.FSDirectory;
45 import org.apache.lucene.store.RAMDirectory;
46 import org.columba.api.command.IStatusObservable;
47 import org.columba.core.base.ListTools;
48 import org.columba.core.filter.FilterCriteria;
49 import org.columba.core.filter.FilterRule;
50 import org.columba.core.filter.IFilterCriteria;
51 import org.columba.core.filter.IFilterRule;
52 import org.columba.core.io.DiskIO;
53 import org.columba.core.io.StreamUtils;
54 import org.columba.mail.folder.IMailbox;
55 import org.columba.mail.folder.event.IFolderEvent;
56 import org.columba.mail.message.ICloseableIterator;
57 import org.columba.mail.message.IHeaderList;
58 import org.columba.mail.util.MailResourceLoader;
59 import org.columba.ristretto.message.MimePart;
60 import org.columba.ristretto.message.MimeTree;
61
62 /**
63  * @author timo
64  */

65 public class LuceneQueryEngine implements QueryEngine {
66
67     /** JDK 1.4+ logging framework logger, used for logging. */
68     private static final Logger JavaDoc LOG = Logger
69             .getLogger("org.columba.mail.folder.search");
70
71     private static final int OPTIMIZE_AFTER_N_OPERATIONS = 30;
72
73     private static final String JavaDoc[] CAPS = { "Body" };
74
75     private File JavaDoc indexDir;
76
77     private IndexReader fileIndexReader;
78
79     private IndexReader ramIndexReader;
80
81     private Directory luceneIndexDir;
82
83     private Directory ramIndexDir;
84
85     private long ramLastModified;
86
87     private long luceneLastModified;
88
89     private LinkedList JavaDoc deleted;
90
91     private int operationCounter;
92
93     private Analyzer analyzer;
94
95     private IMailbox folder;
96
97     /**
98      * Constructor for LuceneQueryEngine.
99      */

100     public LuceneQueryEngine(IMailbox folder) {
101         this.folder = folder;
102
103         analyzer = new StandardAnalyzer();
104
105         try {
106             initRAMDir();
107         } catch (IOException JavaDoc e) {
108             e.printStackTrace();
109         }
110
111         luceneLastModified = -1;
112         ramLastModified = -1;
113
114         deleted = new LinkedList JavaDoc();
115         operationCounter = 0;
116
117         File JavaDoc folderInDir = folder.getDirectoryFile();
118         indexDir = new File JavaDoc(folderInDir, ".index");
119
120         try {
121             if (!indexDir.exists()) {
122                 createIndex();
123             }
124
125             luceneIndexDir = FSDirectory.getDirectory(indexDir, false);
126         } catch (IOException JavaDoc e) {
127             JOptionPane.showMessageDialog(null, e.getLocalizedMessage(),
128                     "Error while creating Lucene Index",
129                     JOptionPane.ERROR_MESSAGE);
130         }
131
132         try {
133             // If there is an existing lock then it must be from a
134
// previous crash -> remove it!
135
if (IndexReader.isLocked(luceneIndexDir)) {
136                 IndexReader.unlock(luceneIndexDir);
137             }
138         } catch (IOException JavaDoc e) {
139             // Remove of lock didn't work -> delete by hand
140
File JavaDoc commitLock = new File JavaDoc(indexDir, "commit.lock");
141
142             if (commitLock.exists()) {
143                 commitLock.delete();
144             }
145
146             File JavaDoc writeLock = new File JavaDoc(indexDir, "write.lock");
147
148             if (writeLock.exists()) {
149                 writeLock.delete();
150             }
151         }
152
153         // Check if index is consitent with mailbox
154
try {
155             if( getFileReader().numDocs() != folder.getHeaderList().count() ) {
156                 LOG.warning("Lucene Index includes " + getFileReader().numDocs() + " messages, but mailbox has " + folder.getHeaderList().count());
157                 sync();
158             }
159         } catch (Exception JavaDoc e) {
160             LOG.severe(e.getMessage());
161             e.printStackTrace();
162         }
163     }
164
165     protected void createIndex() throws IOException JavaDoc {
166         DiskIO.ensureDirectory(indexDir);
167
168         IndexWriter indexWriter = new IndexWriter(indexDir, null, true);
169         indexWriter.close();
170     }
171
172     protected IndexReader getFileReader() {
173         try {
174             // @TODO dont use deprecated method
175
if (IndexReader.getCurrentVersion(luceneIndexDir) != luceneLastModified) {
176                 fileIndexReader = IndexReader.open(luceneIndexDir);
177                 // @TODO dont use deprecated method
178
luceneLastModified = IndexReader
179                         .getCurrentVersion(luceneIndexDir);
180             }
181         } catch (IOException JavaDoc e) {
182             LOG.severe(e.getLocalizedMessage());
183             try {
184                 reset();
185             } catch (Exception JavaDoc e1) {
186                 LOG.severe(e.getLocalizedMessage());
187             }
188         }
189
190         return fileIndexReader;
191     }
192
193     protected IndexReader getRAMReader() {
194         try {
195             if (IndexReader.getCurrentVersion(ramIndexDir) != ramLastModified) {
196                 ramIndexReader = IndexReader.open(ramIndexDir);
197                 ramLastModified = IndexReader.getCurrentVersion(ramIndexDir);
198             }
199         } catch (IOException JavaDoc e) {
200             e.printStackTrace();
201         }
202
203         return ramIndexReader;
204     }
205
206     private Query getLuceneQuery(IFilterRule filterRule, Analyzer analyzer) {
207         IFilterCriteria criteria;
208         String JavaDoc field;
209         int mode;
210
211         Query result = new BooleanQuery();
212         Query subresult = null;
213
214         int condition = filterRule.getConditionInt();
215         boolean prohibited;
216         boolean required;
217
218         if (condition == FilterRule.MATCH_ALL) {
219             prohibited = false;
220             required = true;
221         } else {
222             prohibited = false;
223             required = false;
224         }
225
226         BooleanQuery termQuery = null;
227
228         for (int i = 0; i < filterRule.count(); i++) {
229             criteria = filterRule.get(i);
230             mode = criteria.getCriteria();
231
232             field = "Body";
233
234             TokenStream tokenStream = analyzer.tokenStream(field,
235                     new StringReader JavaDoc(criteria.getPatternString()));
236
237             termQuery = new BooleanQuery();
238
239             try {
240                 Token token = tokenStream.next();
241
242                 while (token != null) {
243                     String JavaDoc pattern = "*" + token.termText() + "*";
244                     LOG.info("Field = \"" + field + "\" Text = \"" + pattern
245                             + "\"");
246                     termQuery.add(new WildcardQuery(new Term(field, pattern)),
247                             true, false);
248
249                     token = tokenStream.next();
250                 }
251             } catch (IOException JavaDoc e) {
252                 e.printStackTrace();
253             }
254
255             switch (mode) {
256             case FilterCriteria.CONTAINS: {
257                 subresult = new BooleanQuery();
258                 ((BooleanQuery) subresult).add(termQuery, true, false);
259
260                 break;
261             }
262
263             case FilterCriteria.CONTAINS_NOT: {
264                 subresult = new BooleanQuery();
265                 ((BooleanQuery) subresult).add(new WildcardQuery(new Term(
266                         "uid", "*")), true, false);
267                 ((BooleanQuery) subresult).add(termQuery, false, true);
268
269                 break;
270             }
271             }
272
273             ((BooleanQuery) result).add(subresult, required, prohibited);
274         }
275
276         return result;
277     }
278
279     public List JavaDoc queryEngine(IFilterRule filter) throws Exception JavaDoc {
280         Query query = getLuceneQuery(filter, analyzer);
281
282         List JavaDoc result = search(query);
283
284         ListTools.substract(result, deleted);
285
286         if (!checkResult(result)) {
287             // Search again
288
result = search(query);
289             ListTools.substract(result, deleted);
290         }
291
292         return result;
293     }
294
295     protected List JavaDoc search(Query query) throws IOException JavaDoc {
296         LinkedList JavaDoc result = new LinkedList JavaDoc();
297
298         if (getFileReader().numDocs() > 0) {
299             Hits hitsFile = new IndexSearcher(getFileReader()).search(query);
300
301             for (int i = 0; i < hitsFile.length(); i++) {
302                 result.add(new Integer JavaDoc(hitsFile.doc(i).getField("uid")
303                         .stringValue()));
304             }
305         }
306
307         if (getRAMReader().numDocs() > 0) {
308             Hits hitsRAM = new IndexSearcher(getRAMReader()).search(query);
309
310             for (int i = 0; i < hitsRAM.length(); i++) {
311                 result.add(new Integer JavaDoc(hitsRAM.doc(i).getField("uid")
312                         .stringValue()));
313             }
314         }
315
316         return result;
317     }
318
319     public List JavaDoc queryEngine(IFilterRule filter, Object JavaDoc[] uids) throws Exception JavaDoc {
320         List JavaDoc result = queryEngine(filter);
321
322         ListTools.intersect(result, Arrays.asList(uids));
323
324         return result;
325     }
326
327     /**
328      * @see org.columba.mail.folder.SearchEngineInterface#messageAdded(IFolderEvent)
329      */

330     public void messageAdded(Object JavaDoc uid) throws Exception JavaDoc {
331         Document messageDoc = getDocument(uid);
332
333         IndexWriter writer = new IndexWriter(ramIndexDir, analyzer, false);
334         writer.addDocument(messageDoc);
335         writer.close();
336         incOperationCounter();
337     }
338
339     private Document getDocument(Object JavaDoc uid) {
340         Document messageDoc = new Document();
341
342         messageDoc.add(Field.Keyword("uid", uid.toString()));
343
344         // Find the body text part
345
try {
346             MimeTree mimeTree = folder.getMimePartTree(uid);
347             MimePart bodyPart = mimeTree.getFirstTextPart("plain");
348
349             if (bodyPart != null) {
350                 messageDoc.add(Field.UnStored("Body", StreamUtils
351                         .readCharacterStream(
352                                 folder.getMimePartBodyStream(uid, bodyPart
353                                         .getAddress())).toString()));
354             }
355         } catch (IOException JavaDoc e) {
356             e.printStackTrace();
357             LOG.severe(e.getMessage());
358         } catch (Exception JavaDoc e) {
359             e.printStackTrace();
360             LOG.severe(e.getMessage());
361         }
362
363         return messageDoc;
364     }
365
366     /**
367      * @see org.columba.mail.folder.SearchEngineInterface#messageRemoved(IFolderEvent)
368      */

369     public void messageRemoved(Object JavaDoc uid) throws Exception JavaDoc {
370         deleted.add(uid);
371
372         /*
373          * try { indexLock.tryToGetLock(null); getReader().delete(new
374          * Term("uid", uid.toString())); indexLock.release(); } catch
375          * (IOException e) { JOptionPane.showMessageDialog( null,
376          * e.getMessage(), "Error while removing Message from Lucene Index",
377          * JOptionPane.ERROR_MESSAGE); }
378          */

379     }
380
381     protected void mergeRAMtoIndex() throws IOException JavaDoc {
382         IndexReader ramReader = getRAMReader();
383         IndexReader fileReader = getFileReader();
384
385         LOG.fine("Lucene: Merging RAMIndex to FileIndex");
386
387         /*
388          * Document doc; for( int i=0; i<ramReader.numDocs(); i++) { doc =
389          * ramReader.document(i); if( !deleted.contains(new
390          * Integer(ramReader.document(i).getField("uid").stringValue())) ) {
391          * fileIndex.addDocument(doc); } }
392          */

393         ListIterator JavaDoc it = deleted.listIterator();
394
395         while (it.hasNext()) {
396             String JavaDoc uid = it.next().toString();
397
398             if (ramReader.delete(new Term("uid", uid)) == 0) {
399                 fileReader.delete(new Term("uid", uid));
400             }
401         }
402
403         fileReader.close();
404         ramReader.close();
405
406         IndexWriter fileIndex = new IndexWriter(luceneIndexDir, analyzer, false);
407
408         fileIndex.addIndexes(new Directory[] { ramIndexDir });
409
410         fileIndex.optimize();
411         fileIndex.close();
412
413         initRAMDir();
414
415         deleted.clear();
416     }
417
418     private void initRAMDir() throws IOException JavaDoc {
419         ramIndexDir = new RAMDirectory();
420
421         IndexWriter writer = new IndexWriter(ramIndexDir, analyzer, true);
422         writer.close();
423         ramLastModified = -1;
424     }
425
426     private void incOperationCounter() throws IOException JavaDoc {
427         operationCounter++;
428
429         if (operationCounter > OPTIMIZE_AFTER_N_OPERATIONS) {
430             mergeRAMtoIndex();
431             operationCounter = 0;
432         }
433     }
434
435     /**
436      * Returns the caps.
437      *
438      * @return String[]
439      */

440     public String JavaDoc[] getCaps() {
441         return CAPS;
442     }
443
444     private boolean checkResult(List JavaDoc result) {
445         ListIterator JavaDoc it = result.listIterator();
446
447         try {
448             while (it.hasNext()) {
449                 if (!folder.exists(it.next())) {
450                     result.clear();
451                     sync();
452
453                     return false;
454                 }
455             }
456         } catch (Exception JavaDoc e) {
457             e.printStackTrace();
458         }
459
460         return true;
461     }
462
463     /**
464      * @see org.columba.mail.folder.DefaultSearchEngine#reset()
465      */

466     public void reset() throws Exception JavaDoc {
467         createIndex();
468     }
469
470     /** {@inheritDoc} */
471     public void sync() throws Exception JavaDoc {
472         LOG.severe("Lucene Index inconsistent - recreation forced");
473         IHeaderList hl = folder.getHeaderList();
474
475         if (getObservable() != null) {
476             getObservable().setMessage(
477                     MailResourceLoader.getString("statusbar", "message",
478                             "lucene_sync"));
479         }
480
481         getObservable().setCurrent(0);
482
483         try {
484             createIndex();
485
486             IndexWriter writer = new IndexWriter(luceneIndexDir, analyzer,
487                     false);
488
489             int count = hl.count();
490             getObservable().setCurrent(count);
491
492             Object JavaDoc uid;
493             int i = 0;
494             ICloseableIterator it;
495             for (it = hl.keyIterator(); it.hasNext();) {
496                 uid = it.next();
497                 
498                 writer.addDocument(getDocument(uid));
499
500                 getObservable().setCurrent(i);
501             }
502             it.close();
503
504             getObservable().setCurrent(count);
505
506             writer.optimize();
507             writer.close();
508         } catch (Exception JavaDoc e) {
509             LOG.severe("Creation of Lucene Index failed :"
510                     + e.getLocalizedMessage());
511
512             // show neat error dialog here
513
}
514     }
515
516     public IStatusObservable getObservable() {
517         return folder.getObservable();
518     }
519
520     public void save() {
521         try {
522             mergeRAMtoIndex();
523         } catch (IOException JavaDoc e) {
524             LOG.severe(e.getMessage());
525         }
526         
527     }
528
529 }
530
Popular Tags