KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > knowgate > lucene > MailIndexer


1 package com.knowgate.lucene;
2
3 import java.io.IOException JavaDoc;
4 import java.io.InputStream JavaDoc;
5 import java.io.File JavaDoc;
6
7 import java.sql.SQLException JavaDoc;
8 import java.sql.Connection JavaDoc;
9 import java.sql.DriverManager JavaDoc;
10 import java.sql.Statement JavaDoc;
11 import java.sql.ResultSet JavaDoc;
12 import java.sql.PreparedStatement JavaDoc;
13
14 import java.util.Properties JavaDoc;
15 import java.util.Date JavaDoc;
16 import java.util.LinkedList JavaDoc;
17 import java.util.ListIterator JavaDoc;
18
19 import java.math.BigDecimal JavaDoc;
20
21 import java.text.SimpleDateFormat JavaDoc;
22
23 import javax.mail.MessagingException JavaDoc;
24 import javax.mail.internet.MimeBodyPart JavaDoc;
25
26 import org.htmlparser.beans.StringBean;
27 import org.apache.lucene.document.Document;
28 import org.apache.lucene.document.Field;
29 import org.apache.lucene.index.IndexWriter;
30 import org.apache.lucene.index.IndexReader;
31 import org.apache.lucene.index.Term;
32 import org.apache.lucene.analysis.Analyzer;
33
34 import com.knowgate.debug.DebugFile;
35 import com.knowgate.misc.Gadgets;
36 import com.knowgate.hipermail.DBMimePart;
37 import com.knowgate.dfs.FileSystem;
38 import org.htmlparser.Parser;
39 import org.htmlparser.util.ParserException;
40
41 /**
42  * Indexer subclass for e-mail messages
43  * @author Sergio Montoro Ten
44  * @version 3.0
45  */

46 public class MailIndexer extends Indexer {
47
48   private static SimpleDateFormat JavaDoc oFmt = new SimpleDateFormat JavaDoc("yyyy-MM-dd HH:mm:ss");
49
50   public MailIndexer() { }
51
52   /**
53    * Add a single mail message to the index
54    * @param oIWrt IndexWriter
55    * @param sGuid String GUID of mime message to be indexed (from gu_mimemsg field of table k_mime_msgs)
56    * @param dNumber BigDecimal mime message number (from pg_message field of table k_mime_msgs)
57    * @param sWorkArea String GUID of WorkArea (from gu_workarea field of table k_mime_msgs)
58    * @param sContainer String Name of Category (Folder) where message is stored.
59    * This is nm_category field at k_categories table record corresponding to gu_category from k_mime_msgs
60    * @param sSubject String Subject
61    * @param sAuthor String Display name of message sender
62    * @param sRecipients String Recipients list (both display name and e-mails)
63    * @param dtSent Date
64    * @param sComments String
65    * @param oStrm InputStream Full mime message body as an InputStream (from by_content field of table k_mime_msgs)
66    * @throws ClassNotFoundException
67    * @throws IOException
68    * @throws IllegalArgumentException
69    * @throws NoSuchFieldException
70    * @throws IllegalAccessException
71    * @throws InstantiationException
72    * @throws NullPointerException
73    */

74   public static void addMail(IndexWriter oIWrt,
75                              String JavaDoc sGuid, BigDecimal JavaDoc dNumber, String JavaDoc sWorkArea,
76                              String JavaDoc sContainer, String JavaDoc sSubject,
77                              String JavaDoc sAuthor, String JavaDoc sRecipients, Date JavaDoc dtSent,
78                              String JavaDoc sComments, InputStream JavaDoc oStrm, int iSize)
79       throws ClassNotFoundException JavaDoc, IOException JavaDoc, IllegalArgumentException JavaDoc,
80              NoSuchFieldException JavaDoc, IllegalAccessException JavaDoc, InstantiationException JavaDoc,
81              NullPointerException JavaDoc {
82
83     String JavaDoc sText;
84     String JavaDoc sAbstract = sGuid+"¨"+sSubject+"¨"+sAuthor+"¨"+oFmt.format(dtSent)+"¨"+String.valueOf(iSize)+"¨"+dNumber.toString();
85     sSubject = Gadgets.ASCIIEncode(sSubject);
86     sAuthor = Gadgets.ASCIIEncode(sAuthor);
87
88     if (null != oStrm) {
89       StringBuffer JavaDoc oStrBuff = new StringBuffer JavaDoc();
90       try {
91         MimeBodyPart JavaDoc oMsgText = new MimeBodyPart JavaDoc(oStrm);
92         DBMimePart.parseMimePart(oStrBuff, null, sContainer, "", oMsgText, 0);
93       } catch (MessagingException JavaDoc xcpt) {
94         if (DebugFile.trace)
95           DebugFile.writeln(xcpt.getClass().getName()+" "+xcpt.getMessage()+" indexing message "+sGuid+" - "+sSubject);
96       }
97       if (oStrBuff.length()>0) {
98         if (Gadgets.indexOfIgnoreCase(oStrBuff.toString(), "<html>")>=0) {
99           Parser oPrsr = Parser.createParser(oStrBuff.toString(), null);
100           StringBean oStrs = new StringBean();
101           try {
102             oPrsr.visitAllNodesWith (oStrs);
103           } catch (ParserException pe) {
104             throw new IOException JavaDoc(pe.getMessage());
105           }
106
107           if (DebugFile.trace) DebugFile.writeln("Gadgets.ASCIIEncode(StringBean.getStrings())");
108           sText = Gadgets.ASCIIEncode(oStrs.getStrings());
109           if (DebugFile.trace) DebugFile.writeln("StringBean.getStrings() done");
110         } // fi (oStrBuff contains <html>)
111
else {
112           if (DebugFile.trace) DebugFile.writeln("Gadgets.ASCIIEncode(StringBuffer.toString())");
113           sText = Gadgets.ASCIIEncode(oStrBuff.toString());
114           if (DebugFile.trace) DebugFile.writeln("StringBuffer.toString() done");
115         }
116       } else {
117         sText = "";
118       }
119     } // fi (oStrm)
120
else {
121       sText = "";
122     }
123
124     Document oDoc = new Document();
125     oDoc.add (Field.Keyword ("workarea" , sWorkArea));
126     oDoc.add (Field.Keyword ("container", sContainer));
127     oDoc.add (Field.Keyword ("guid" , sGuid));
128     oDoc.add (Field.Keyword ("number" , dNumber.toString()));
129     oDoc.add (Field.Keyword ("created" , dtSent));
130     oDoc.add (Field.Keyword ("size" , Gadgets.leftPad(String.valueOf(iSize),'0',10)));
131     oDoc.add (Field.Text ("title" , sSubject));
132     oDoc.add (Field.Text ("author" , sAuthor));
133     oDoc.add (Field.Text ("abstract" , sAbstract));
134     oDoc.add (Field.Text ("recipients", sRecipients.trim()));
135     oDoc.add (Field.UnStored("comments" , sComments));
136     oDoc.add (Field.UnStored("text" , sText));
137
138     if (DebugFile.trace) DebugFile.writeln("IndexWriter.addDocument([Document])");
139
140     oIWrt.addDocument(oDoc);
141   } // addMail
142

143   /**
144    * <p>Re-build full text index for a given mail folder</p>
145    * All previously indexed messages for given folder are removed from index and written back
146    * @param oProps Properties containing: luceneindex, driver, dburl, dbuser, dbpassword
147    * @param sWorkArea String GUID of WorkArea to which folder belongs
148    * @param sFolder String Folder name as in field nm_category of table k_categories
149    * @throws SQLException
150    * @throws IOException
151    * @throws ClassNotFoundException
152    * @throws IllegalArgumentException
153    * @throws NoSuchFieldException
154    * @throws IllegalAccessException
155    * @throws InstantiationException
156    */

157   public static void rebuildFolder(Properties JavaDoc oProps, String JavaDoc sWorkArea, String JavaDoc sFolder)
158     throws SQLException JavaDoc, IOException JavaDoc, ClassNotFoundException JavaDoc,
159            IllegalArgumentException JavaDoc, NoSuchFieldException JavaDoc,
160            IllegalAccessException JavaDoc, InstantiationException JavaDoc {
161
162     String JavaDoc sGuid, sContainer, sTitle, sAuthor, sComments;
163     Date JavaDoc dtCreated;
164     BigDecimal JavaDoc dNumber;
165     int iSize;
166
167     final BigDecimal JavaDoc dZero = new BigDecimal JavaDoc(0);
168
169     if (DebugFile.trace) {
170       DebugFile.writeln("Begin MailIndexer.rebuildFolder([Properties]" + sWorkArea + "," + sFolder + ")");
171       DebugFile.incIdent();
172     }
173
174     // Get physical base path to index files from luceneindex property
175
String JavaDoc sDirectory = oProps.getProperty("luceneindex");
176
177     if (null==sDirectory) throw new NoSuchFieldException JavaDoc ("Cannot find luceneindex property");
178
179     // Append WorkArea and table name to luceneindex base path
180
sDirectory = Gadgets.chomp(sDirectory, File.separator) + "k_mime_msgs";
181     if (null!=sWorkArea) sDirectory += File.separator + sWorkArea;
182
183     if (DebugFile.trace) DebugFile.writeln("index directory is " + sDirectory);
184
185     if (null==oProps.getProperty("driver"))
186       throw new NoSuchFieldException JavaDoc ("Cannot find driver property");
187
188     if (null==oProps.getProperty("dburl"))
189       throw new NoSuchFieldException JavaDoc ("Cannot find dburl property");
190
191     if (DebugFile.trace) DebugFile.writeln("Class.forName(" + oProps.getProperty("analyzer" , DEFAULT_ANALYZER) + ")");
192
193     Class JavaDoc oAnalyzer = Class.forName(oProps.getProperty("analyzer" , DEFAULT_ANALYZER));
194
195     if (DebugFile.trace) DebugFile.writeln("Class.forName(" + oProps.getProperty("driver") + ")");
196
197     Class JavaDoc oDriver = Class.forName(oProps.getProperty("driver"));
198
199     if (DebugFile.trace) DebugFile.writeln("IndexReader.open("+sDirectory+")");
200
201     // *********************************************************************
202
// Delete every document from this folder before re-indexing
203
File JavaDoc oDir = new File JavaDoc(sDirectory);
204     if (oDir.exists()) {
205       IndexReader oReader = IndexReader.open(sDirectory);
206       int iDeleted = oReader.delete(new Term("container", sFolder));
207       oReader.close();
208     } else {
209       FileSystem oFS = new FileSystem();
210       try { oFS.mkdirs(sDirectory); } catch (Exception JavaDoc e) { throw new IOException JavaDoc(e.getClass().getName()+" "+e.getMessage()); }
211     }
212     // *********************************************************************
213

214     if (DebugFile.trace) DebugFile.writeln("new IndexWriter("+sDirectory+",[Analyzer], true)");
215
216     IndexWriter oIWrt = new IndexWriter(sDirectory, (Analyzer) oAnalyzer.newInstance(), true);
217
218     if (DebugFile.trace)
219       DebugFile.writeln("DriverManager.getConnection(" + oProps.getProperty("dburl") + ", ...)");
220
221     Connection JavaDoc oConn = DriverManager.getConnection(oProps.getProperty("dburl"), oProps.getProperty("dbuser"),oProps.getProperty("dbpassword"));
222
223     Statement JavaDoc oStmt = oConn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
224     ResultSet JavaDoc oRSet;
225
226       PreparedStatement JavaDoc oRecp = oConn.prepareStatement("SELECT tx_personal,tx_email FROM k_inet_addrs WHERE tp_recipient<>'to' AND gu_mimemsg=?", ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY);
227
228       if (DebugFile.trace)
229         DebugFile.writeln("Statement.executeQuery(SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"+sWorkArea+"' AND c.nm_category='"+sFolder+"')");
230
231       oRSet = oStmt.executeQuery("SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"+sWorkArea+"' AND c.nm_category='"+sFolder+"'");
232
233       while (oRSet.next()) {
234
235         sWorkArea = oRSet.getString(1);
236         sContainer = oRSet.getString(2);
237         sGuid = oRSet.getString(3);
238         sTitle = oRSet.getString(4);
239         sAuthor = oRSet.getString(5);
240         if (oRSet.wasNull()) sAuthor = "";
241         sAuthor += " " + oRSet.getString(6);
242         dNumber = oRSet.getBigDecimal(7);
243         if (oRSet.wasNull()) dNumber = dZero;
244         sComments = oRSet.getString(8);
245         dtCreated = oRSet.getDate(9);
246         iSize = oRSet.getInt(10);
247
248         if (DebugFile.trace) DebugFile.writeln("Indexing message "+sGuid+" - "+sTitle);
249
250         InputStream JavaDoc oStrm = oRSet.getBinaryStream(11);
251
252         String JavaDoc sRecipients = "";
253         oRecp.setString(1, sGuid);
254         ResultSet JavaDoc oRecs = oRecp.executeQuery();
255         while (oRecs.next()) {
256           sRecipients += oRecs.getString(1)+" "+oRecs.getString(2)+" ";
257         } // wend
258
oRecs.close();
259
260         MailIndexer.addMail(oIWrt, sGuid, dNumber, sWorkArea, sContainer, sTitle,
261                             sAuthor, sRecipients, dtCreated, sComments, oStrm, iSize);
262
263       } // wend
264
oRSet.close();
265       oRecp.close();
266
267     oStmt.executeUpdate("UPDATE k_mime_msgs SET bo_indexed=1 WHERE gu_workarea='"+sWorkArea+"' AND gu_category IN (SELECT gu_category FROM k_categories WHERE nm_category='"+sFolder+"')");
268
269     oStmt.close();
270     oConn.close();
271
272     if (DebugFile.trace) DebugFile.writeln("IndexWriter.optimize()");
273
274     oIWrt.optimize();
275
276     if (DebugFile.trace) DebugFile.writeln("IndexWriter.close()");
277
278     oIWrt.close();
279
280     if (DebugFile.trace) {
281       DebugFile.decIdent();
282       DebugFile.writeln("End Indexer.rebuild()");
283     }
284   } // rebuildFolder
285

286 }
287
Popular Tags