1 package com.knowgate.lucene; 2 3 import java.io.IOException ; 4 import java.io.InputStream ; 5 import java.io.File ; 6 7 import java.sql.SQLException ; 8 import java.sql.Connection ; 9 import java.sql.DriverManager ; 10 import java.sql.Statement ; 11 import java.sql.ResultSet ; 12 import java.sql.PreparedStatement ; 13 14 import java.util.Properties ; 15 import java.util.Date ; 16 import java.util.LinkedList ; 17 import java.util.ListIterator ; 18 19 import java.math.BigDecimal ; 20 21 import java.text.SimpleDateFormat ; 22 23 import javax.mail.MessagingException ; 24 import javax.mail.internet.MimeBodyPart ; 25 26 import org.htmlparser.beans.StringBean; 27 import org.apache.lucene.document.Document; 28 import org.apache.lucene.document.Field; 29 import org.apache.lucene.index.IndexWriter; 30 import org.apache.lucene.index.IndexReader; 31 import org.apache.lucene.index.Term; 32 import org.apache.lucene.analysis.Analyzer; 33 34 import com.knowgate.debug.DebugFile; 35 import com.knowgate.misc.Gadgets; 36 import com.knowgate.hipermail.DBMimePart; 37 import com.knowgate.dfs.FileSystem; 38 import org.htmlparser.Parser; 39 import org.htmlparser.util.ParserException; 40 41 46 public class MailIndexer extends Indexer { 47 48 private static SimpleDateFormat oFmt = new SimpleDateFormat ("yyyy-MM-dd HH:mm:ss"); 49 50 public MailIndexer() { } 51 52 74 public static void addMail(IndexWriter oIWrt, 75 String sGuid, BigDecimal dNumber, String sWorkArea, 76 String sContainer, String sSubject, 77 String sAuthor, String sRecipients, Date dtSent, 78 String sComments, InputStream oStrm, int iSize) 79 throws ClassNotFoundException , IOException , IllegalArgumentException , 80 NoSuchFieldException , IllegalAccessException , InstantiationException , 81 NullPointerException { 82 83 String sText; 84 String sAbstract = sGuid+"¨"+sSubject+"¨"+sAuthor+"¨"+oFmt.format(dtSent)+"¨"+String.valueOf(iSize)+"¨"+dNumber.toString(); 85 sSubject = Gadgets.ASCIIEncode(sSubject); 86 sAuthor = Gadgets.ASCIIEncode(sAuthor); 87 88 if (null != oStrm) { 89 StringBuffer oStrBuff = new StringBuffer (); 90 try { 91 MimeBodyPart oMsgText = new MimeBodyPart (oStrm); 92 DBMimePart.parseMimePart(oStrBuff, null, sContainer, "", oMsgText, 0); 93 } catch (MessagingException xcpt) { 94 if (DebugFile.trace) 95 DebugFile.writeln(xcpt.getClass().getName()+" "+xcpt.getMessage()+" indexing message "+sGuid+" - "+sSubject); 96 } 97 if (oStrBuff.length()>0) { 98 if (Gadgets.indexOfIgnoreCase(oStrBuff.toString(), "<html>")>=0) { 99 Parser oPrsr = Parser.createParser(oStrBuff.toString(), null); 100 StringBean oStrs = new StringBean(); 101 try { 102 oPrsr.visitAllNodesWith (oStrs); 103 } catch (ParserException pe) { 104 throw new IOException (pe.getMessage()); 105 } 106 107 if (DebugFile.trace) DebugFile.writeln("Gadgets.ASCIIEncode(StringBean.getStrings())"); 108 sText = Gadgets.ASCIIEncode(oStrs.getStrings()); 109 if (DebugFile.trace) DebugFile.writeln("StringBean.getStrings() done"); 110 } else { 112 if (DebugFile.trace) DebugFile.writeln("Gadgets.ASCIIEncode(StringBuffer.toString())"); 113 sText = Gadgets.ASCIIEncode(oStrBuff.toString()); 114 if (DebugFile.trace) DebugFile.writeln("StringBuffer.toString() done"); 115 } 116 } else { 117 sText = ""; 118 } 119 } else { 121 sText = ""; 122 } 123 124 Document oDoc = new Document(); 125 oDoc.add (Field.Keyword ("workarea" , sWorkArea)); 126 oDoc.add (Field.Keyword ("container", sContainer)); 127 oDoc.add (Field.Keyword ("guid" , sGuid)); 128 oDoc.add (Field.Keyword ("number" , dNumber.toString())); 129 oDoc.add (Field.Keyword ("created" , dtSent)); 130 oDoc.add (Field.Keyword ("size" , Gadgets.leftPad(String.valueOf(iSize),'0',10))); 131 oDoc.add (Field.Text ("title" , sSubject)); 132 oDoc.add (Field.Text ("author" , sAuthor)); 133 oDoc.add (Field.Text ("abstract" , sAbstract)); 134 oDoc.add (Field.Text ("recipients", sRecipients.trim())); 135 oDoc.add (Field.UnStored("comments" , sComments)); 136 oDoc.add (Field.UnStored("text" , sText)); 137 138 if (DebugFile.trace) DebugFile.writeln("IndexWriter.addDocument([Document])"); 139 140 oIWrt.addDocument(oDoc); 141 } 143 157 public static void rebuildFolder(Properties oProps, String sWorkArea, String sFolder) 158 throws SQLException , IOException , ClassNotFoundException , 159 IllegalArgumentException , NoSuchFieldException , 160 IllegalAccessException , InstantiationException { 161 162 String sGuid, sContainer, sTitle, sAuthor, sComments; 163 Date dtCreated; 164 BigDecimal dNumber; 165 int iSize; 166 167 final BigDecimal dZero = new BigDecimal (0); 168 169 if (DebugFile.trace) { 170 DebugFile.writeln("Begin MailIndexer.rebuildFolder([Properties]" + sWorkArea + "," + sFolder + ")"); 171 DebugFile.incIdent(); 172 } 173 174 String sDirectory = oProps.getProperty("luceneindex"); 176 177 if (null==sDirectory) throw new NoSuchFieldException ("Cannot find luceneindex property"); 178 179 sDirectory = Gadgets.chomp(sDirectory, File.separator) + "k_mime_msgs"; 181 if (null!=sWorkArea) sDirectory += File.separator + sWorkArea; 182 183 if (DebugFile.trace) DebugFile.writeln("index directory is " + sDirectory); 184 185 if (null==oProps.getProperty("driver")) 186 throw new NoSuchFieldException ("Cannot find driver property"); 187 188 if (null==oProps.getProperty("dburl")) 189 throw new NoSuchFieldException ("Cannot find dburl property"); 190 191 if (DebugFile.trace) DebugFile.writeln("Class.forName(" + oProps.getProperty("analyzer" , DEFAULT_ANALYZER) + ")"); 192 193 Class oAnalyzer = Class.forName(oProps.getProperty("analyzer" , DEFAULT_ANALYZER)); 194 195 if (DebugFile.trace) DebugFile.writeln("Class.forName(" + oProps.getProperty("driver") + ")"); 196 197 Class oDriver = Class.forName(oProps.getProperty("driver")); 198 199 if (DebugFile.trace) DebugFile.writeln("IndexReader.open("+sDirectory+")"); 200 201 File oDir = new File (sDirectory); 204 if (oDir.exists()) { 205 IndexReader oReader = IndexReader.open(sDirectory); 206 int iDeleted = oReader.delete(new Term("container", sFolder)); 207 oReader.close(); 208 } else { 209 FileSystem oFS = new FileSystem(); 210 try { oFS.mkdirs(sDirectory); } catch (Exception e) { throw new IOException (e.getClass().getName()+" "+e.getMessage()); } 211 } 212 214 if (DebugFile.trace) DebugFile.writeln("new IndexWriter("+sDirectory+",[Analyzer], true)"); 215 216 IndexWriter oIWrt = new IndexWriter(sDirectory, (Analyzer) oAnalyzer.newInstance(), true); 217 218 if (DebugFile.trace) 219 DebugFile.writeln("DriverManager.getConnection(" + oProps.getProperty("dburl") + ", ...)"); 220 221 Connection oConn = DriverManager.getConnection(oProps.getProperty("dburl"), oProps.getProperty("dbuser"),oProps.getProperty("dbpassword")); 222 223 Statement oStmt = oConn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); 224 ResultSet oRSet; 225 226 PreparedStatement oRecp = oConn.prepareStatement("SELECT tx_personal,tx_email FROM k_inet_addrs WHERE tp_recipient<>'to' AND gu_mimemsg=?", ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); 227 228 if (DebugFile.trace) 229 DebugFile.writeln("Statement.executeQuery(SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"+sWorkArea+"' AND c.nm_category='"+sFolder+"')"); 230 231 oRSet = oStmt.executeQuery("SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"+sWorkArea+"' AND c.nm_category='"+sFolder+"'"); 232 233 while (oRSet.next()) { 234 235 sWorkArea = oRSet.getString(1); 236 sContainer = oRSet.getString(2); 237 sGuid = oRSet.getString(3); 238 sTitle = oRSet.getString(4); 239 sAuthor = oRSet.getString(5); 240 if (oRSet.wasNull()) sAuthor = ""; 241 sAuthor += " " + oRSet.getString(6); 242 dNumber = oRSet.getBigDecimal(7); 243 if (oRSet.wasNull()) dNumber = dZero; 244 sComments = oRSet.getString(8); 245 dtCreated = oRSet.getDate(9); 246 iSize = oRSet.getInt(10); 247 248 if (DebugFile.trace) DebugFile.writeln("Indexing message "+sGuid+" - "+sTitle); 249 250 InputStream oStrm = oRSet.getBinaryStream(11); 251 252 String sRecipients = ""; 253 oRecp.setString(1, sGuid); 254 ResultSet oRecs = oRecp.executeQuery(); 255 while (oRecs.next()) { 256 sRecipients += oRecs.getString(1)+" "+oRecs.getString(2)+" "; 257 } oRecs.close(); 259 260 MailIndexer.addMail(oIWrt, sGuid, dNumber, sWorkArea, sContainer, sTitle, 261 sAuthor, sRecipients, dtCreated, sComments, oStrm, iSize); 262 263 } oRSet.close(); 265 oRecp.close(); 266 267 oStmt.executeUpdate("UPDATE k_mime_msgs SET bo_indexed=1 WHERE gu_workarea='"+sWorkArea+"' AND gu_category IN (SELECT gu_category FROM k_categories WHERE nm_category='"+sFolder+"')"); 268 269 oStmt.close(); 270 oConn.close(); 271 272 if (DebugFile.trace) DebugFile.writeln("IndexWriter.optimize()"); 273 274 oIWrt.optimize(); 275 276 if (DebugFile.trace) DebugFile.writeln("IndexWriter.close()"); 277 278 oIWrt.close(); 279 280 if (DebugFile.trace) { 281 DebugFile.decIdent(); 282 DebugFile.writeln("End Indexer.rebuild()"); 283 } 284 } 286 } 287 | Popular Tags |