1 32 33 package com.knowgate.lucene; 34 35 import java.math.BigDecimal ; 36 37 import java.util.Date ; 38 import java.util.Map ; 39 import java.util.HashMap ; 40 import java.util.Iterator ; 41 import java.util.LinkedList ; 42 import java.util.ListIterator ; 43 import java.text.SimpleDateFormat ; 44 45 import java.sql.DriverManager ; 46 import java.sql.Connection ; 47 import java.sql.Statement ; 48 import java.sql.PreparedStatement ; 49 import java.sql.ResultSet ; 50 import java.sql.SQLException ; 51 52 import java.util.Properties ; 53 import java.io.IOException ; 54 import java.io.FileNotFoundException ; 55 import java.io.InputStream ; 56 import java.io.FileInputStream ; 57 import java.io.File ; 58 59 import javax.mail.MessagingException ; 60 import javax.mail.internet.MimeBodyPart ; 61 62 import org.apache.lucene.analysis.Analyzer; 63 import org.apache.lucene.index.*; 64 import org.apache.lucene.document.*; 65 66 import org.htmlparser.beans.StringBean; 67 68 import com.knowgate.debug.DebugFile; 69 import com.knowgate.misc.Gadgets; 70 import com.knowgate.dfs.FileSystem; 71 72 import com.knowgate.hipermail.DBMimePart; 73 74 80 81 public class Indexer { 82 83 public final static String DEFAULT_ANALYZER = "org.apache.lucene.analysis.StopAnalyzer"; 84 85 87 private static String IfNull(Connection oConn) throws SQLException { 88 String sDBMS = oConn.getMetaData().getDatabaseProductName(); 89 90 if (sDBMS.equals("PostgreSQL")) 91 return "COALESCE"; 92 else if (sDBMS.equals("Microsoft SQL Server")) 93 return "ISNULL"; 94 else if (sDBMS.equals("Oracle")) 95 return "NVL"; 96 else if (sDBMS.equals("MySQL")) 97 return "COALESCE"; 98 else 99 return null; 100 } 101 102 104 private static boolean allowedTable(String sTableName) { 105 return sTableName.equalsIgnoreCase("k_bugs") || sTableName.equalsIgnoreCase("k_newsmsgs") || sTableName.equalsIgnoreCase("k_mime_msgs"); 106 } 107 108 110 123 public static void optimize(Properties oProps, String sTableName, String sWorkArea) 124 throws NoSuchFieldException ,IllegalArgumentException , ClassNotFoundException , 125 FileNotFoundException ,IOException ,InstantiationException ,IllegalAccessException { 126 127 if (!allowedTable(sTableName)) 128 throw new IllegalArgumentException ("Table name must be k_bugs or k_newsmsgs or k_mime_msgs"); 129 130 if (DebugFile.trace) { 131 DebugFile.writeln("Begin Indexer.rebuild([Properties]" + sTableName); 132 DebugFile.incIdent(); 133 } 134 135 String sDirectory = oProps.getProperty("luceneindex"); 136 137 if (null==sDirectory) { 138 if (DebugFile.trace) DebugFile.decIdent(); 139 throw new NoSuchFieldException ("Cannot find luceneindex property"); 140 } 141 142 sDirectory = Gadgets.chomp(sDirectory, File.separator) + sTableName.toLowerCase(); 143 if (null!=sWorkArea) sDirectory += File.separator + sWorkArea; 144 145 if (DebugFile.trace) DebugFile.writeln("index directory is " + sDirectory); 146 147 File oDir = new File (sDirectory); 148 if (!oDir.exists()) { 149 if (DebugFile.trace) DebugFile.decIdent(); 150 throw new FileNotFoundException ("Directory " + sDirectory + " does not exist"); 151 } 152 153 if (DebugFile.trace) 154 DebugFile.writeln("Class.forName(" + oProps.getProperty("analyzer" , DEFAULT_ANALYZER) + ")"); 155 156 Class oAnalyzer = Class.forName(oProps.getProperty("analyzer" , DEFAULT_ANALYZER)); 157 158 if (DebugFile.trace) 159 DebugFile.writeln("new IndexWriter(...)"); 160 161 IndexWriter oIWrt = new IndexWriter(sDirectory, (Analyzer) oAnalyzer.newInstance(), true); 162 163 if (DebugFile.trace) DebugFile.writeln("IndexWriter.optimize()"); 164 165 oIWrt.optimize(); 166 167 if (DebugFile.trace) DebugFile.writeln("IndexWriter.close()"); 168 169 oIWrt.close(); 170 171 if (DebugFile.trace) { 172 DebugFile.decIdent(); 173 DebugFile.writeln("End Indexer.optimize()"); 174 } 175 } 177 179 211 public static void rebuild(Properties oProps, String sTableName, String sWorkArea) 212 throws SQLException , IOException , ClassNotFoundException , 213 IllegalArgumentException , NoSuchFieldException , 214 IllegalAccessException , InstantiationException { 215 216 String sGuid, sContainer, sTitle, sAuthor, sComments, sText; 217 Date dtCreated; 218 BigDecimal dNumber; 219 int iNumber, iSize; 220 221 final BigDecimal dZero = new BigDecimal (0); 222 223 if (!allowedTable(sTableName)) 225 throw new IllegalArgumentException ("Table name must be k_bugs or k_newsmsgs or k_mime_msgs"); 226 227 if (DebugFile.trace) { 228 DebugFile.writeln("Begin Indexer.rebuild([Properties]," + sTableName + "," + sWorkArea + ")"); 229 DebugFile.incIdent(); 230 } 231 232 String sDirectory = oProps.getProperty("luceneindex"); 234 235 if (null==sDirectory) { 236 if (DebugFile.trace) DebugFile.decIdent(); 237 throw new NoSuchFieldException ("Cannot find luceneindex property"); 238 } 239 240 sDirectory = Gadgets.chomp(sDirectory, File.separator) + sTableName.toLowerCase(); 242 if (null!=sWorkArea) sDirectory += File.separator + sWorkArea; 243 244 if (DebugFile.trace) DebugFile.writeln("index directory is " + sDirectory); 245 246 if (null==oProps.getProperty("driver")) { 247 if (DebugFile.trace) DebugFile.decIdent(); 248 throw new NoSuchFieldException ("Cannot find driver property"); 249 } 250 251 if (null==oProps.getProperty("dburl")) { 252 if (DebugFile.trace) DebugFile.decIdent(); 253 throw new NoSuchFieldException ("Cannot find dburl property"); 254 } 255 256 if (DebugFile.trace) DebugFile.writeln("Class.forName(" + oProps.getProperty("analyzer" , DEFAULT_ANALYZER) + ")"); 257 258 Class oAnalyzer = Class.forName(oProps.getProperty("analyzer" , DEFAULT_ANALYZER)); 259 260 if (DebugFile.trace) DebugFile.writeln("Class.forName(" + oProps.getProperty("driver") + ")"); 261 262 Class oDriver = Class.forName(oProps.getProperty("driver")); 263 264 if (DebugFile.trace) DebugFile.writeln("IndexReader.open("+sDirectory+")"); 265 266 File oDir = new File (sDirectory); 269 if (oDir.exists()) { 270 IndexReader oReader = IndexReader.open(sDirectory); 271 int iDeleted = oReader.delete(new Term("workarea", sWorkArea)); 272 oReader.close(); 273 } else { 274 FileSystem oFS = new FileSystem(); 275 try { oFS.mkdirs(sDirectory); } catch (Exception e) { throw new IOException (e.getClass().getName()+" "+e.getMessage()); } 276 } 277 279 if (DebugFile.trace) DebugFile.writeln("new IndexWriter("+sDirectory+",[Analyzer], true)"); 280 281 IndexWriter oIWrt = new IndexWriter(sDirectory, (Analyzer) oAnalyzer.newInstance(), true); 282 283 if (DebugFile.trace) 284 DebugFile.writeln("DriverManager.getConnection(" + oProps.getProperty("dburl") + ", ...)"); 285 286 Connection oConn = DriverManager.getConnection(oProps.getProperty("dburl"), oProps.getProperty("dbuser"),oProps.getProperty("dbpassword")); 287 oConn.setAutoCommit(true); 288 289 Statement oStmt = oConn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); 290 ResultSet oRSet; 291 292 if (sTableName.equalsIgnoreCase("k_bugs")) { 293 294 if (DebugFile.trace) 295 DebugFile.writeln("Statement.executeQuery(SELECT p.gu_workarea,p.nm_project,b.gu_bug,b.tl_bug,b.dt_modified," + IfNull(oConn) + "(b.nm_reporter,'')," + IfNull(oConn) + "(b.tx_bug_brief,'')," + IfNull(oConn) + "(b.tx_comments,'') FROM k_bugs b, k_projects p WHERE b.gu_project=p.gu_project AND p.gu_owner='"+sWorkArea+"')"); 296 297 oRSet = oStmt.executeQuery("SELECT p.gu_owner,p.nm_project,b.gu_bug,b.pg_bug,b.tl_bug,b.dt_modified," + IfNull(oConn) + "(b.nm_reporter,'')," + IfNull(oConn) + "(b.tx_comments,'')," + IfNull(oConn) + "(b.tx_bug_brief,'') FROM k_bugs b, k_projects p WHERE b.gu_project=p.gu_project AND p.gu_owner='"+sWorkArea+"'"); 298 299 while (oRSet.next()) { 300 sWorkArea = oRSet.getString(1); 301 sContainer = oRSet.getString(2); 302 sGuid = oRSet.getString(3); 303 iNumber = oRSet.getInt(4); 304 sTitle = oRSet.getString(5); 305 dtCreated = oRSet.getDate(6); 306 sAuthor = oRSet.getString(7); 307 sComments = oRSet.getString(8); 308 if (null==sComments) sComments = ""; 309 sText = oRSet.getString(9); 310 if (null==sText) sText = ""; 311 BugIndexer.addBug(oIWrt, sGuid, iNumber, sWorkArea, sContainer, sTitle, sAuthor, dtCreated, sComments, sText); 312 } oRSet.close(); 314 } 315 316 else if (sTableName.equalsIgnoreCase("k_newsmsgs")) { 317 318 if (DebugFile.trace) 319 DebugFile.writeln("Statement.executeQuery(SELECT g.gu_workarea,c.nm_category,m.gu_msg,m.tx_subject,m.dt_published," + IfNull(oConn) + "(b.nm_author,'')," + IfNull(oConn) + "(b.tx_msg,'') FROM k_newsmsgs m, k_categories c, k_newsgroups g, k_x_cat_objs x WHERE m.id_status=0 AND m.gu_msg=x.gu_object AND x.gu_category=g.gu_newsgrp AND c.gu_category=g.gu_newsgrp AND g.gu_workarea='"+sWorkArea+"')"); 320 321 oRSet = oStmt.executeQuery("SELECT g.gu_workarea,c.nm_category,m.gu_msg,m.tx_subject,m.dt_published," + IfNull(oConn) + "(m.nm_author,'')," + IfNull(oConn) + "(m.tx_msg,'') FROM k_newsmsgs m, k_categories c, k_newsgroups g, k_x_cat_objs x WHERE m.id_status=0 AND m.gu_msg=x.gu_object AND x.gu_category=g.gu_newsgrp AND c.gu_category=g.gu_newsgrp AND g.gu_workarea='"+sWorkArea+"'"); 322 323 while (oRSet.next()) { 324 sWorkArea = oRSet.getString(1); 325 sContainer = oRSet.getString(2); 326 sGuid = oRSet.getString(3); 327 sTitle = oRSet.getString(4); 328 dtCreated = oRSet.getDate(5); 329 sAuthor = oRSet.getString(6); 330 sText = oRSet.getString(7); 331 NewsMessageIndexer.addNewsMessage(oIWrt, sGuid, sWorkArea, sContainer, sTitle, sAuthor, dtCreated, sText); 332 } oRSet.close(); 334 } 335 else if (sTableName.equalsIgnoreCase("k_mime_msgs")) { 336 337 LinkedList oIndexedGuids = new LinkedList (); 338 339 PreparedStatement oRecp = oConn.prepareStatement("SELECT tx_personal,tx_email FROM k_inet_addrs WHERE tp_recipient<>'to' AND gu_mimemsg=?", ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); 340 341 if (DebugFile.trace) 342 DebugFile.writeln("Statement.executeQuery(SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"+sWorkArea+"')"); 343 344 oRSet = oStmt.executeQuery("SELECT g.gu_workarea,c.nm_category,m.gu_mimemsg,m.tx_subject,m.nm_from,m.tx_mail_from,m.pg_mimemsg,m.de_mimemsg,m.dt_sent,m.len_mimemsg,m.by_content FROM k_mime_msgs m, k_categories c WHERE m.bo_deleted<>0 AND m.bo_draft<>0 AND m.gu_category=c.gu_category AND m.gu_workarea='"+sWorkArea+"'"); 345 346 while (oRSet.next()) { 347 348 sWorkArea = oRSet.getString(1); 349 sContainer = oRSet.getString(2); 350 sGuid = oRSet.getString(3); 351 sTitle = oRSet.getString(4); 352 sAuthor = oRSet.getString(5); 353 if (oRSet.wasNull()) sAuthor = ""; 354 sAuthor += " " + oRSet.getString(6); 355 dNumber = oRSet.getBigDecimal(7); 356 if (oRSet.wasNull()) dNumber = dZero; 357 sComments = oRSet.getString(8); 358 dtCreated = oRSet.getDate(9); 359 iSize = oRSet.getInt(10); 360 361 if (DebugFile.trace) DebugFile.writeln("Indexing message "+sGuid+" - "+sTitle); 362 363 InputStream oStrm = oRSet.getBinaryStream(11); 364 365 String sRecipients = ""; 366 oRecp.setString(1, sGuid); 367 ResultSet oRecs = oRecp.executeQuery(); 368 while (oRecs.next()) { 369 sRecipients += oRecs.getString(1)+" "+oRecs.getString(2)+" "; 370 } oRecs.close(); 372 373 MailIndexer.addMail(oIWrt, sGuid, dNumber, sWorkArea, sContainer, sTitle, 374 sAuthor, sRecipients, dtCreated, sComments, oStrm, iSize); 375 376 oIndexedGuids.add(sGuid); 377 } oRSet.close(); 379 oRecp.close(); 380 381 PreparedStatement oUpdt = oConn.prepareStatement("UPDATE k_mime_msgs SET bo_indexed=1 WHERE gu_mimemsg=?"); 382 ListIterator oIter = oIndexedGuids.listIterator(); 383 while (oIter.hasNext()) { 384 oUpdt.setObject(1, oIter.next(), java.sql.Types.CHAR); 385 oUpdt.executeUpdate(); 386 } oUpdt.close(); 388 } 390 oStmt.close(); 391 oConn.close(); 392 393 if (DebugFile.trace) DebugFile.writeln("IndexWriter.optimize()"); 394 395 oIWrt.optimize(); 396 397 if (DebugFile.trace) DebugFile.writeln("IndexWriter.close()"); 398 399 oIWrt.close(); 400 401 if (DebugFile.trace) { 402 DebugFile.decIdent(); 403 DebugFile.writeln("End Indexer.rebuild()"); 404 } 405 } 407 419 public static void rebuild(Properties oProps, String sTableName) 420 throws SQLException , IOException , ClassNotFoundException , 421 IllegalArgumentException , NoSuchFieldException , 422 IllegalAccessException , InstantiationException { 423 } 424 425 public static void add(IndexWriter oIWrt, 426 Map oKeywords, Map oTexts, Map oUnStored) 427 throws ClassNotFoundException , IOException , 428 IllegalArgumentException , NoSuchFieldException , 429 IllegalAccessException , InstantiationException , 430 NullPointerException { 431 432 String sFieldName; 433 Object oFieldValue; 434 Document oDoc = new Document(); 435 Iterator oKeys = oKeywords.keySet().iterator(); 436 while (oKeys.hasNext()) { 437 sFieldName = (String ) oKeys.next(); 438 oFieldValue = oKeywords.get(sFieldName); 439 if (null==oFieldValue) oFieldValue = ""; 440 if (oFieldValue.getClass().getName().equals("java.util.Date")) 441 oDoc.add (Field.Keyword(sFieldName, (Date ) oFieldValue)); 442 else 443 oDoc.add (Field.Keyword(sFieldName, (String ) oFieldValue)); 444 } Iterator oTxts = oTexts.keySet().iterator(); 446 while (oTxts.hasNext()) { 447 sFieldName = (String ) oTxts.next(); 448 oFieldValue = oTexts.get(sFieldName); 449 if (null==oFieldValue) oFieldValue = ""; 450 oDoc.add (Field.Text(sFieldName, (String ) oFieldValue)); 451 } Iterator oUnStor = oUnStored.keySet().iterator(); 453 while (oUnStor.hasNext()) { 454 sFieldName = (String ) oUnStor.next(); 455 oFieldValue = oUnStored.get(sFieldName); 456 if (null==oFieldValue) oFieldValue = ""; 457 oDoc.add (Field.Text(sFieldName, (String ) oFieldValue)); 458 } oIWrt.addDocument(oDoc); 460 } 462 public static void add(String sTableName, String sDirectory, String sAnalyzer, 463 Map oKeywords, Map oTexts, Map oUnStored) 464 throws ClassNotFoundException , IOException , 465 IllegalArgumentException , NoSuchFieldException , 466 IllegalAccessException , InstantiationException , 467 NullPointerException { 468 469 if (!allowedTable(sTableName)) 470 throw new IllegalArgumentException ("Table name must be k_bugs or k_newsmsgs or k_mime_msgs"); 471 472 if (null==sDirectory) 473 throw new NoSuchFieldException ("Cannot find luceneindex property"); 474 475 File oDir = new File (sDirectory); 476 if (!oDir.exists()) { 477 FileSystem oFS = new FileSystem(); 478 try { oFS.mkdirs(sDirectory); } catch (Exception e) { throw new IOException (e.getClass().getName()+" "+e.getMessage()); } 479 } 480 481 Class oAnalyzer = Class.forName((sAnalyzer==null) ? DEFAULT_ANALYZER : sAnalyzer); 482 483 IndexWriter oIWrt = new IndexWriter(sDirectory, (Analyzer) oAnalyzer.newInstance(), true); 484 485 add (oIWrt, oKeywords, oTexts, oUnStored); 486 487 oIWrt.close(); 488 } 490 512 513 public static void add(String sTableName, Properties oProps, 514 String sGUID, int iNumber, String sWorkArea, 515 String sContainer, String sTitle, 516 String sText, String sAuthor, 517 String sAbstract, String sComments) 518 throws ClassNotFoundException , IOException , 519 IllegalArgumentException , NoSuchFieldException , 520 IllegalAccessException , InstantiationException , 521 NullPointerException { 522 523 if (null==sGUID) 524 throw new NullPointerException ("Document GUID may not be null"); 525 526 if (!sTableName.equalsIgnoreCase("k_bugs") && !sTableName.equalsIgnoreCase("k_newsmsgs") && !sTableName.equalsIgnoreCase("k_mime_msgs")) 527 throw new IllegalArgumentException ("Table name must be k_bugs or k_newsmsgs or k_mime_msgs"); 528 529 String sDirectory = oProps.getProperty("luceneindex"); 530 531 if (null==sDirectory) 532 throw new NoSuchFieldException ("Cannot find luceneindex property"); 533 534 sDirectory = Gadgets.chomp(sDirectory, File.separator) + sTableName.toLowerCase() + File.separator + sWorkArea; 535 File oDir = new File (sDirectory); 536 if (!oDir.exists()) { 537 FileSystem oFS = new FileSystem(); 538 try { oFS.mkdirs(sDirectory); } catch (Exception e) { throw new IOException (e.getClass().getName()+" "+e.getMessage()); } 539 } 540 541 Class oAnalyzer = Class.forName(oProps.getProperty("analyzer" , DEFAULT_ANALYZER)); 542 543 HashMap oKeys = new HashMap (11); 544 oKeys.put("workarea" , sWorkArea==null ? "" : sWorkArea); 545 oKeys.put("container", sContainer==null ? "" : sContainer); 546 oKeys.put("guid", sGUID); 547 oKeys.put("number", String.valueOf(iNumber)); 548 HashMap oTexts = new HashMap (11); 549 oTexts.put("title", sTitle==null ? "" : sTitle); 550 oTexts.put("author", sAuthor==null ? "" : sAuthor); 551 oTexts.put("abstract", sAbstract==null ? "" : Gadgets.left(sAbstract, 80)); 552 HashMap oUnstor = new HashMap (11); 553 oUnstor.put("comments", sComments==null ? "" : sComments); 554 oUnstor.put("text", sText==null ? "" : sText); 555 556 IndexWriter oIWrt = new IndexWriter(sDirectory, (Analyzer) oAnalyzer.newInstance(), true); 557 add(oIWrt, oKeys, oTexts, oUnstor); 558 oIWrt.close(); 559 } 561 563 575 public static int delete(String sTableName, String sWorkArea, Properties oProps, String sGuid) 576 throws IllegalArgumentException , NoSuchFieldException , 577 IllegalAccessException , IOException , NullPointerException { 578 579 if (null==sGuid) 580 throw new NullPointerException ("Document GUID may not be null"); 581 582 if (!allowedTable(sTableName)) 583 throw new IllegalArgumentException ("Table name must be k_bugs or k_newsmsgs or k_mime_msgs"); 584 585 String sDirectory = oProps.getProperty("luceneindex"); 586 587 if (null==sDirectory) 588 throw new NoSuchFieldException ("Cannot find luceneindex property"); 589 590 sDirectory = Gadgets.chomp(sDirectory, File.separator) + sTableName.toLowerCase() + File.separator + sWorkArea; 591 File oDir = new File (sDirectory); 592 if (!oDir.exists()) { 593 FileSystem oFS = new FileSystem(); 594 try { oFS.mkdirs(sDirectory); } catch (Exception e) { throw new IOException (e.getClass().getName()+" "+e.getMessage()); } 595 } 596 597 IndexReader oReader = IndexReader.open(sDirectory); 598 599 int iDeleted = oReader.delete(new Term("guid", sGuid)); 600 601 oReader.close(); 602 603 return iDeleted; 604 } 606 608 private static void printUsage() { 609 System.out.println(""); 610 System.out.println("Usage:"); 611 System.out.println("Indexer cnf_path rebuild {k_bugs|k_newsmsgs|k_mime_msgs}"); 612 System.out.println("cnf_path : Full path to hipergate.cnf file"); 613 } 614 615 617 632 public static void main(String [] argv) 633 throws SQLException , IOException , ClassNotFoundException , 634 IllegalArgumentException , NoSuchFieldException , 635 IllegalAccessException , InstantiationException { 636 637 if (argv.length!=3) 638 printUsage(); 639 else if (!argv[1].equals("rebuild")) { 640 printUsage(); 641 } else if (!allowedTable(argv[2])) { 642 printUsage(); 643 } 644 else { 645 Properties oProps = new Properties (); 646 FileInputStream oCNF = new FileInputStream (argv[0]); 647 oProps.load(oCNF); 648 oCNF.close(); 649 rebuild (oProps, argv[2]); 650 } 651 } } 653 | Popular Tags |