KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > Yasna > forum > database > DbSearchIndexer


1 /**
2  * Copyright (C) 2001 Yasna.com. All rights reserved.
3  *
4  * ===================================================================
5  * The Apache Software License, Version 1.1
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in
16  * the documentation and/or other materials provided with the
17  * distribution.
18  *
19  * 3. The end-user documentation included with the redistribution,
20  * if any, must include the following acknowledgment:
21  * "This product includes software developed by
22  * Yasna.com (http://www.yasna.com)."
23  * Alternately, this acknowledgment may appear in the software itself,
24  * if and wherever such third-party acknowledgments normally appear.
25  *
26  * 4. The names "Yazd" and "Yasna.com" must not be used to
27  * endorse or promote products derived from this software without
28  * prior written permission. For written permission, please
29  * contact yazd@yasna.com.
30  *
31  * 5. Products derived from this software may not be called "Yazd",
32  * nor may "Yazd" appear in their name, without prior written
33  * permission of Yasna.com.
34  *
35  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38  * DISCLAIMED. IN NO EVENT SHALL YASNA.COM OR
39  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46  * SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of Yasna.com. For more information
51  * on Yasna.com, please see <http://www.yasna.com>.
52  */

53
54 /**
55  * Copyright (C) 2000 CoolServlets.com. All rights reserved.
56  *
57  * ===================================================================
58  * The Apache Software License, Version 1.1
59  *
60  * Redistribution and use in source and binary forms, with or without
61  * modification, are permitted provided that the following conditions
62  * are met:
63  *
64  * 1. Redistributions of source code must retain the above copyright
65  * notice, this list of conditions and the following disclaimer.
66  *
67  * 2. Redistributions in binary form must reproduce the above copyright
68  * notice, this list of conditions and the following disclaimer in
69  * the documentation and/or other materials provided with the
70  * distribution.
71  *
72  * 3. The end-user documentation included with the redistribution,
73  * if any, must include the following acknowledgment:
74  * "This product includes software developed by
75  * CoolServlets.com (http://www.coolservlets.com)."
76  * Alternately, this acknowledgment may appear in the software itself,
77  * if and wherever such third-party acknowledgments normally appear.
78  *
79  * 4. The names "Jive" and "CoolServlets.com" must not be used to
80  * endorse or promote products derived from this software without
81  * prior written permission. For written permission, please
82  * contact webmaster@coolservlets.com.
83  *
84  * 5. Products derived from this software may not be called "Jive",
85  * nor may "Jive" appear in their name, without prior written
86  * permission of CoolServlets.com.
87  *
88  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
89  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
90  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
91  * DISCLAIMED. IN NO EVENT SHALL COOLSERVLETS.COM OR
92  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
93  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
94  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
95  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
96  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
97  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
98  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99  * SUCH DAMAGE.
100  * ====================================================================
101  *
102  * This software consists of voluntary contributions made by many
103  * individuals on behalf of CoolServlets.com. For more information
104  * on CoolServlets.com, please see <http://www.coolservlets.com>.
105  */

106
107 package com.Yasna.forum.database;
108
109 import java.sql.*;
110 import java.util.*;
111 import java.io.*;
112
113 import org.apache.lucene.document.*;
114 import org.apache.lucene.analysis.*;
115 import org.apache.lucene.analysis.standard.*;
116 import org.apache.lucene.index.*;
117 import org.apache.lucene.store.*;
118
119 import com.Yasna.forum.*;
120 import com.Yasna.util.*;
121
122 /**
123  * Database implementation of SearchIndexer using the Lucene search package.
124  *
125  * Search indexes are stored in the "search" subdirectory of directory pointed to
126  * by the Yazd property "yazdHome".
127  */

128 public class DbSearchIndexer extends Thread JavaDoc implements SearchIndexer{
129
130     /** DATABASE QUERIES **/
131     //private static final String MESSAGES_BEFORE_DATE =
132
// "SELECT messageID FROM yazdMessage WHERE modifiedDate < ?";
133
private static final String JavaDoc MESSAGES_BEFORE_DATE =
134         "SELECT messageID, userID, yazdMessage.threadID, forumID, " +
135         "subject, body, yazdMessage.creationDate " +
136         "FROM yazdMessage, yazdThread WHERE yazdMessage.threadID=yazdThread.threadID " +
137         "AND yazdMessage.modifiedDate < ?";
138     private static final String JavaDoc MESSAGES_BEFORE_DATE_COUNT =
139         "SELECT count(messageID) FROM yazdMessage WHERE modifiedDate < ?";
140     private static final String JavaDoc MESSAGES_SINCE_DATE =
141         "SELECT messageID FROM yazdMessage WHERE modifiedDate > ? " +
142         "AND modifiedDate < ?";
143     private static final String JavaDoc MESSAGES_SINCE_DATE_COUNT =
144         "SELECT count(messageID) FROM yazdMessage WHERE modifiedDate > ? " +
145         "AND modifiedDate < ?";
146     private static final String JavaDoc LOAD_MESSAGE =
147         "SELECT subject, body, userID, yazdMessage.threadID, forumID, " +
148         "yazdMessage.creationDate FROM yazdMessage, yazdThread WHERE " +
149         "yazdMessage.threadID=yazdThread.threadID AND yazdMessage.messageID=?";
150
151     /**
152      * Path to where index is stored.
153      */

154     private static String JavaDoc indexPath = null;
155
156     /**
157      * Time constants (in milleseconds)
158      */

159     private static final long MINUTE = 1000 * 60;
160     private static final long HOUR = MINUTE * 60;
161
162     /**
163      * Maintains the amount of time that should elapse until the next index.
164      */

165     private long updateInterval;
166
167     /**
168      * Maintains the time that the last index took place.
169      */

170     private long lastIndexed;
171
172     /**
173      * Indicates whether auto-indexing should be on or off. When on, an update
174      * will be run at the "updateInterval".
175      */

176     private boolean autoIndex = true;
177
178     /**
179      * ForumFactory so that we can load message objects based on their ID.
180      */

181     private DbForumFactory factory;
182
183     /**
184      * Lock so that only one indexing function can be executed at once. Not
185      * locking could impact the database integrity. Therefore, in a cluster of
186      * Yazd servers all pointed at the same db, only one indexer should be
187      * running once.
188      */

189     private Object JavaDoc indexLock = new Object JavaDoc();
190
191     private static Analyzer analyzer = new StopAnalyzer();
192
193     /**
194      * Creates a new DbSearchIndexer. It attempts to load properties for
195      * the update interval and when the last index occured from the Yazd
196      * properties then starts the indexing thread.
197      */

198     public DbSearchIndexer(DbForumFactory factory) {
199         this.factory = factory;
200
201         //Default to performing updates ever 10 minutes.
202
updateInterval = 80 * MINUTE;
203         //If the update interval property exists, use that
204
String JavaDoc updInterval = PropertyManager.getProperty("DbSearchIndexer.updateInterval");
205         try {
206             updateInterval = Long.parseLong(updInterval);
207         }
208         catch (Exception JavaDoc e) { /* ignore */ }
209
210         //Attempt to get the last updated time from the Yazd properties
211
String JavaDoc lastInd = PropertyManager.getProperty("DbSearchIndexer.lastIndexed");
212         try {
213             lastIndexed = Long.parseLong(lastInd);
214         }
215         catch (Exception JavaDoc e) {
216             //Something went wrong. Therefore, set lastIndexed far into the past
217
//so that we'll do a full index.
218
lastIndexed = 0;
219         }
220         //Make this a daemon thread.
221
this.setDaemon(true);
222         //Start the indexing thread.
223
start();
224     }
225
226     public int getHoursUpdateInterval() {
227         return (int)(updateInterval / HOUR);
228     }
229
230     public int getMinutesUpdateInterval() {
231         return (int)((updateInterval - getHoursUpdateInterval()*HOUR) / MINUTE);
232     }
233
234     public void setUpdateInterval(int minutes, int hours) {
235         updateInterval = (minutes * MINUTE) + (hours * HOUR);
236         //Save it to the properties
237
PropertyManager.setProperty("DbSearchIndexer.updateInterval", ""+updateInterval);
238     }
239
240     public java.util.Date JavaDoc getLastIndexedDate() {
241         return new java.util.Date JavaDoc(lastIndexed);
242     }
243
244     public boolean isAutoIndexEnabled() {
245         return autoIndex;
246     }
247
248     public void setAutoIndexEnabled(boolean value) {
249         autoIndex = value;
250     }
251
252     public void addToIndex(ForumMessage message) {
253         //acquire the index lock so that no other indexing operations
254
//are performed.
255
synchronized (indexLock) {
256             IndexWriter writer = null;
257             try {
258                 writer = getWriter(false);
259                 addMessageToIndex(writer, message.getID(),
260                     message.getUnfilteredSubject(), message.getUnfilteredBody(),
261                     message.getUser().getID(), message.getForumThread().getID(),
262                     message.getForumThread().getForum().getID(),
263                     message.getCreationDate()
264                 );
265             }
266             catch (IOException ioe) {
267                 ioe.printStackTrace();
268             }
269             finally{
270                 try { writer.close(); }
271                 catch (Exception JavaDoc e) { }
272             }
273         }
274     }
275
276     public void removeFromIndex(ForumMessage message) {
277         //acquire the index lock so that no other indexing operations
278
//are performed.
279
synchronized (indexLock) {
280             try {
281                 int [] toDelete = new int [] { message.getID() };
282                 deleteMessagesFromIndex(toDelete);
283             }
284             catch (IOException ioe) {
285                 ioe.printStackTrace();
286             }
287         }
288     }
289
290     public void updateIndex() {
291         //acquire the index lock so that no other indexing operations
292
//are performed.
293
synchronized (indexLock) {
294             long now = System.currentTimeMillis();
295             updateIndex(lastIndexed, now);
296             lastIndexed = now;
297             //Save the time as a Yazd property.
298
PropertyManager.setProperty("DbSearchIndexer.lastIndexed",
299                 "" + lastIndexed);
300         }
301     }
302
303     public void rebuildIndex() {
304         //acquire the index lock so that no other indexing operations
305
//are performed.
306
synchronized (indexLock) {
307             long now = System.currentTimeMillis();
308             rebuildIndex(now);
309             lastIndexed = now;
310             //Save the time as a Yazd property.
311
PropertyManager.setProperty("DbSearchIndexer.lastIndexed",
312                 "" + lastIndexed);
313         }
314     }
315
316     /**
317      * Indexing thread logic. It wakes up once a minute to see if any threaded
318      * action should take place.
319      */

320     public void run() {
321         while (true){
322             //If auto indexing is on
323
if (autoIndex) {
324                 long now = System.currentTimeMillis();
325                 //If we want to re-index everything.
326
if (lastIndexed == 0) {
327                     synchronized(indexLock) {
328                         rebuildIndex(now);
329                         lastIndexed = now;
330                         //Save the time as a Yazd property.
331
PropertyManager.setProperty("DbSearchIndexer.lastIndexed",
332                             "" + lastIndexed);
333                     }
334                 }
335                 //We only want to do an update.
336
else {
337                     long nextIndex = lastIndexed + updateInterval;
338                     if (now > nextIndex) {
339                         synchronized(indexLock) {
340                             updateIndex(lastIndexed, now);
341                             lastIndexed = now;
342                             //Save the time as a Yazd property.
343
PropertyManager.setProperty("DbSearchIndexer.lastIndexed",
344                                 "" + lastIndexed);
345                         }
346                     }
347                 }
348             }
349             //sleep for 1 minute and then check again.
350
try {
351                 this.sleep(60000);
352             }
353             catch (Exception JavaDoc e) {
354                 e.printStackTrace();
355             }
356         }
357     }
358
359    /**
360      * Indexes an indivual message. The connection is assumed to be open when
361      * passed in and will remain open after the method is done executing.
362      */

363     protected final void addMessageToIndex(IndexWriter writer, int messageID,
364             String JavaDoc subject, String JavaDoc body, int userID, int threadID, int forumID,
365             java.util.Date JavaDoc creationDate) throws IOException
366     {
367         if (writer == null) {
368             return;
369         }
370         //Ignore messages with a null subject or body.
371
if (subject == null || body == null) {
372             return;
373         }
374
375         Document doc = new Document();
376         doc.add(Field.Keyword("messageID", Integer.toString(messageID)));
377         doc.add(new Field("userID", Integer.toString(userID), false, true, false));
378         doc.add(new Field("threadID", Integer.toString(threadID), false, true, false));
379         doc.add(new Field("forumID", Integer.toString(forumID), false, true, false));
380         doc.add(new Field("Indexer", "FORUMS", false, true, false));
381         doc.add(Field.UnStored("subject", subject));
382         doc.add(Field.UnStored("body", body));
383         doc.add(new Field("creationDate", DateField.dateToString(creationDate), false, true, false));
384
385         writer.addDocument(doc);
386     }
387
388     /**
389      * Deletes a message from the index.
390      */

391     protected final void deleteMessagesFromIndex(int [] messages) throws IOException {
392         if (messages == null) {
393             return;
394         }
395         IndexReader reader = getReader();
396         if (reader == null) {
397             //Reader will be null if the search index doesn't exist.
398
return;
399         }
400         Term messageIDTerm;
401         for (int i=0; i<messages.length; i++) {
402             messageIDTerm = new Term("messageID", Integer.toString(messages[i]));
403             try {
404                 reader.delete(messageIDTerm);
405             }
406             catch (Exception JavaDoc e) { }
407         }
408         try {
409             reader.close();
410         }
411         catch (Exception JavaDoc e) { }
412     }
413
414     /**
415      * Rebuilds the search index from scratch. It deletes the entire index
416      * and word tables and then indexes every message up to the end time.
417      */

418     protected final void rebuildIndex(long end) {
419         System.err.println("Rebuilding index...");
420
421         IndexWriter writer = null;
422         Connection con = null;
423         try {
424             writer = getWriter(true);
425             con = DbConnectionManager.getConnection();
426             PreparedStatement pstmt = con.prepareStatement(MESSAGES_BEFORE_DATE);
427             pstmt.setString(1, Long.toString(end));
428             ResultSet rs = pstmt.executeQuery();
429             while (rs.next()) {
430                 int messageID = rs.getInt(1);
431                 int userID = rs.getInt(2);
432                 int threadID = rs.getInt(3);
433                 int forumID = rs.getInt(4);
434                 String JavaDoc subject = rs.getString(5);
435                 String JavaDoc body = rs.getString(6);
436                 java.util.Date JavaDoc creationDate =
437                     new java.util.Date JavaDoc(Long.parseLong(rs.getString(7).trim()));
438                 //ForumMessage message = new DbForumMessage(messageID, factory);// factory.getMessage(messageID);
439
addMessageToIndex(writer, messageID, subject, body, userID, threadID, forumID, creationDate);
440             }
441             pstmt.close();
442         }
443         catch( Exception JavaDoc sqle ) {
444             sqle.printStackTrace();
445         }
446         finally {
447             try { con.close(); }
448             catch (Exception JavaDoc e) { e.printStackTrace(); }
449             try {
450                 //A rebuild of the index warrants calling optimize.
451
writer.optimize();
452             }
453             catch (Exception JavaDoc e) { }
454             try {
455                 writer.close();
456             }
457             catch (Exception JavaDoc e) {
458                 e.printStackTrace();
459             }
460         }
461         System.err.println("Done rebuilding index.");
462     }
463
464     /**
465      * Updates the index. It first deletes any messages in the index between
466      * the start and end times, and then adds all messages to the index that
467      * are between the start and end times.
468      */

469     protected final void updateIndex(long start, long end) {
470         Connection con = null;
471         PreparedStatement pstmt = null;
472         IndexWriter writer = null;
473         int [] messages = null;
474
475         try {
476             con = DbConnectionManager.getConnection();
477             //For a clean update, we need to make sure that we first delete
478
//any index entries that were made since we last updated. This
479
//might happen if a process was calling indexMessage() between runs
480
//of this method. For this reason, the two types of indexing (manual
481
//and automatic) should not be intermixed. However, we still perform
482
//this deletion to be safe.
483
pstmt = con.prepareStatement(MESSAGES_SINCE_DATE_COUNT);
484             pstmt.setString(1, Long.toString(start));
485             pstmt.setString(2, Long.toString(end));
486             ResultSet rs = pstmt.executeQuery();
487             rs.next();
488             int messageCount = rs.getInt(1);
489             messages = new int[messageCount];
490             pstmt.close();
491             pstmt = con.prepareStatement(MESSAGES_SINCE_DATE);
492             pstmt.setString(1, Long.toString(start));
493             pstmt.setString(2, Long.toString(end));
494             rs = pstmt.executeQuery();
495             for (int i=0; i<messages.length; i++) {
496                 rs.next();
497                 messages[i] = rs.getInt("messageID");
498             }
499         }
500         catch (Exception JavaDoc e) {
501             e.printStackTrace();
502         }
503         finally {
504             try { pstmt.close(); }
505             catch (Exception JavaDoc e) { e.printStackTrace(); }
506             try { con.close(); }
507             catch (Exception JavaDoc e) { e.printStackTrace(); }
508         }
509
510         try {
511             deleteMessagesFromIndex(messages);
512
513             //Finally, index all new messages;
514
writer = getWriter(false);
515             for (int i=0; i<messages.length; i++) {
516                 ForumMessage message = factory.getMessage(messages[i]);
517                 addMessageToIndex(writer, message.getID(),
518                     message.getUnfilteredSubject(), message.getUnfilteredBody(),
519                     message.getUser().getID(), message.getForumThread().getID(),
520                     message.getForumThread().getForum().getID(),
521                     message.getCreationDate()
522                 );
523             }
524         }
525         catch( Exception JavaDoc e ) {
526             e.printStackTrace();
527         }
528         finally {
529             try { writer.close(); }
530             catch (Exception JavaDoc e) { e.printStackTrace(); }
531         }
532     }
533
534     /**
535      * Returns a Lucene IndexWriter.
536      */

537     private static IndexWriter getWriter(boolean create) throws IOException {
538         if (indexPath == null) {
539             //Get path of where search index should be. It should be
540
//the search subdirectory of [yazdHome].
541
String JavaDoc yazdHome = PropertyManager.getProperty("yazdHome");
542             if (yazdHome == null) {
543                 System.err.println("ERROR: the yazdHome property is not set.");
544                 throw new IOException("Unable to open index for searching " +
545                         "because yazdHome was not set.");
546             }
547             indexPath = yazdHome + File.separator + "search";
548         }
549
550         IndexWriter writer = null;
551
552         //If create is true, we always want to create a new index writer.
553
if (create) {
554             try {
555                 writer = new IndexWriter(indexPath, analyzer, true);
556             }
557             catch (Exception JavaDoc e) {
558                 System.err.println("ERROR: Failed to create a new index writer.");
559                 e.printStackTrace();
560             }
561         }
562         //Otherwise, use an existing index if it exists.
563
else {
564             if (indexExists(indexPath)) {
565                 try {
566                     writer = new IndexWriter(indexPath, analyzer, false);
567                 }
568                 catch (Exception JavaDoc e) {
569                     System.err.println("ERROR: Failed to open an index writer.");
570                     e.printStackTrace();
571                 }
572             }
573             else {
574                 try {
575                     writer = new IndexWriter(indexPath, analyzer, true);
576                 }
577                 catch (Exception JavaDoc e) {
578                     System.err.println("ERROR: Failed to create a new index writer.");
579                     e.printStackTrace();
580                 }
581             }
582         }
583
584         return writer;
585     }
586
587     /**
588      * Returns a Lucene IndexReader.
589      */

590     private static IndexReader getReader() throws IOException {
591         if (indexPath == null) {
592             //Get path of where search index should be. It should be
593
//the search subdirectory of [yazdHome].
594
String JavaDoc yazdHome = PropertyManager.getProperty("yazdHome");
595             if (yazdHome == null) {
596                 System.err.println("ERROR: the yazdHome property is not set.");
597                 throw new IOException("Unable to open index for searching " +
598                         "because yazdHome was not set.");
599             }
600             indexPath = yazdHome + File.separator + "search";
601         }
602
603         if (indexExists(indexPath)) {
604             IndexReader reader = IndexReader.open(indexPath);
605             return reader;
606         }
607         else {
608             return null;
609         }
610     }
611
612     /**
613      * Returns true if the search index exists at the specified path.
614      *
615      * @param indexPath the path to check for the search index at.
616      */

617     private static boolean indexExists(String JavaDoc indexPath) {
618         //Lucene always creates a file called "segments" -- if it exists, we
619
//assume that the search index exists.
620
File segments = new File(indexPath + File.separator + "segments");
621         return segments.exists();
622     }
623 }
624
Popular Tags