1 package org.tigris.scarab.util.word; 2 3 48 49 import java.io.File ; 51 import java.io.IOException ; 52 import java.util.ArrayList ; 53 import java.util.HashMap ; 54 import java.util.Iterator ; 55 import java.util.List ; 56 import java.util.Map ; 57 58 import org.apache.avalon.framework.activity.Initializable; 59 import org.apache.avalon.framework.configuration.Configurable; 60 import org.apache.avalon.framework.configuration.Configuration; 61 import org.apache.avalon.framework.context.Context; 62 import org.apache.avalon.framework.context.ContextException; 63 import org.apache.avalon.framework.context.Contextualizable; 64 import org.apache.lucene.document.Document; 65 import org.apache.lucene.document.Field; 66 import org.apache.lucene.index.IndexReader; 67 import org.apache.lucene.index.IndexWriter; 68 import org.apache.lucene.index.Term; 69 import org.apache.lucene.queryParser.QueryParser; 70 import org.apache.lucene.search.Hits; 71 import org.apache.lucene.search.IndexSearcher; 72 import org.apache.lucene.search.Query; 73 import org.apache.torque.util.Criteria; 74 import org.tigris.scarab.om.Attachment; 75 import org.tigris.scarab.om.AttachmentPeer; 76 import org.tigris.scarab.om.AttributeValue; 77 import org.tigris.scarab.om.AttributeValuePeer; 78 import org.tigris.scarab.om.IssuePeer; 79 import org.tigris.scarab.tools.localization.L10NKeySet; 80 import org.tigris.scarab.tools.localization.L10NMessage; 81 import org.tigris.scarab.tools.localization.Localizable; 82 import org.tigris.scarab.util.Log; 83 import org.tigris.scarab.util.ScarabException; 84 85 import com.workingdogs.village.Record; 86 87 93 public class LuceneSearchIndex 94 implements SearchIndex, Configurable,Contextualizable,Initializable 95 { 96 private String applicationRoot; 97 private static int counter = 0; 99 100 101 private String path; 102 103 104 private List attributeIds; 105 106 107 private List queryText; 108 109 110 private List attachmentIds; 111 112 113 private List attachmentQueryText; 114 115 119 public LuceneSearchIndex() 120 throws IOException 121 { 122 123 124 } 125 126 public void addQuery(Integer [] ids, String text) 127 { 128 attributeIds.add(ids); 129 queryText.add(text); 130 } 131 132 public void addAttachmentQuery(Integer [] ids, String text) 133 { 134 attachmentIds.add(ids); 135 attachmentQueryText.add(text); 136 } 137 138 public Long [] getRelatedIssues() 139 throws Exception 140 { 141 return getRelatedIssues(false); } 143 144 150 public Long [] getRelatedIssues(boolean mergeResults) 151 throws Exception 152 { 153 Long [] result; 154 List issueIds = null; 155 if (queryText.size() != 0 || attachmentQueryText.size() != 0) 157 { 158 for (int j=attributeIds.size()-1; j>=0; j--) 160 { 161 Integer [] ids = (Integer [])attributeIds.get(j); 162 String query = (String ) queryText.get(j); 163 issueIds = performPartialQuery(ATTRIBUTE_ID, 164 ids, query, issueIds, 165 mergeResults); 166 } 167 168 for (int j=attachmentIds.size()-1; j>=0; j--) 170 { 171 Integer [] ids = (Integer [])attachmentIds.get(j); 172 String query = (String ) attachmentQueryText.get(j); 173 issueIds = performPartialQuery(ATTACHMENT_TYPE_ID, 174 ids, query, issueIds, 175 mergeResults); 176 } 177 178 result = new Long [issueIds.size()]; 180 for (int i=0; i<issueIds.size(); i++) 181 { 182 result[i] = (Long )issueIds.get(i); 183 } 184 } 185 else 186 { 187 result = EMPTY_LIST; 188 } 189 190 return result; 191 } 192 193 private List performPartialQuery(String key, Integer [] ids, 194 String query, List issueIds, 195 boolean mergeResults) 196 throws ScarabException, IOException 197 { 198 StringBuffer fullQuery = new StringBuffer (query.length()+100); 199 200 if (query.length() > 0) 201 { 202 query.trim(); 203 } 204 205 if (ids != null && ids.length != 0) 206 { 207 fullQuery.append("+(("); 208 for (int i=ids.length-1; i>=0; i--) 209 { 210 fullQuery.append(key) 211 .append(':') 212 .append(ids[i].toString()); 213 if (i != 0) 214 { 215 fullQuery.append(" OR "); 216 } 217 } 218 fullQuery.append(") AND (") 219 .append(query) 220 .append("))"); 221 } 222 else 223 { 224 fullQuery 225 .append("+(") 226 .append(query) 227 .append(')'); 228 } 229 230 Query q = null; 231 try 232 { 233 Log.get().debug("Querybefore=" + fullQuery); 234 q = QueryParser.parse(fullQuery.toString(), TEXT, 235 new PorterStemAnalyzer()); 236 Log.get().debug("Queryafter=" + q.toString("text")); 237 } 238 catch (Throwable t) 239 { 240 throw new ScarabException( 241 L10NKeySet.ExceptionParseError, 242 fullQuery, 243 t); 244 } 245 246 IndexSearcher is = new IndexSearcher(path); 247 Hits hits = is.search(q); 248 Map deduper = new HashMap ((int)(1.25*hits.length()+1)); 250 for (int i=0; i<hits.length(); i++) 251 { 252 deduper.put(hits.doc(i).get(ISSUE_ID), null); 253 Log.get().debug("Possible issueId from search: " + 254 hits.doc(i).get(ISSUE_ID)); 255 } 256 is.close(); 257 258 if (issueIds == null) 259 { 260 issueIds = new ArrayList (deduper.size()); 261 Iterator iter = deduper.keySet().iterator(); 262 while (iter.hasNext()) 263 { 264 issueIds.add(new Long ((String )iter.next())); 265 Log.get().debug("Adding issueId from search: " + 266 issueIds.get(issueIds.size()-1)); 267 } 268 } 269 else 270 { 271 if (mergeResults) 272 { 273 mergeResults(issueIds, deduper); 275 } 276 else 277 { 278 removeUniqueElements(issueIds, deduper); 280 } 281 } 282 return issueIds; 283 } 284 285 288 private void removeUniqueElements(List list, Map map) 289 { 290 for (int i=list.size()-1; i>=0; i--) 291 { 292 Object obj = list.get(i); 293 if (!map.containsKey(obj.toString())) 294 { 295 Log.get().debug("removing issueId from search: " + obj); 296 list.remove(i); 297 } 298 } 299 } 300 301 304 private void mergeResults(List list, Map map) 305 { 306 for (int i=list.size()-1; i>=0; i--) 307 { 308 Long issueId = (Long )list.get(i); 309 String id = issueId.toString(); 310 if (map.containsKey(id)) 311 { 312 map.remove(id); 313 Log.get().debug("removed duplicate issueId from map: " + id); 314 } 315 } 316 Iterator iter = map.keySet().iterator(); 317 while(iter.hasNext()) 318 { 319 String id = (String )iter.next(); 320 list.add(new Long (Long.parseLong(id))); 321 Log.get().debug("Add issueId from map to List: " + id); 322 } 323 } 324 325 326 329 public void index(AttributeValue attributeValue) 330 throws Exception 331 { 332 String valId = attributeValue.getValueId().toString(); 333 334 Term term = new Term(VALUE_ID, valId); 336 int deletedDocs = 0; 337 try 338 { 339 synchronized (getClass()) 340 { 341 IndexReader reader = null; 342 try 343 { 344 reader = IndexReader.open(path); 345 deletedDocs = reader.delete(term); 346 } 347 finally 348 { 349 if (reader != null) 350 { 351 reader.close(); 352 } 353 } 354 } 355 } 356 catch (NullPointerException npe) 357 { 358 363 IndexSearcher is = new IndexSearcher(path); 364 Query q = QueryParser.parse("+" + VALUE_ID + ":" + valId, TEXT, 365 new PorterStemAnalyzer()); 366 Hits hits = is.search(q); 367 if (hits.length() > 0) 368 { 369 Localizable l10nInstance = new L10NMessage(L10NKeySet.ExceptionLucene, valId, npe); 370 Log.get().debug(l10nInstance.getMessage()); throw new ScarabException(l10nInstance); 372 } 373 } 374 if (deletedDocs > 1) 375 { 376 throw new ScarabException(L10NKeySet.ExceptionMultipleAttValues, 377 valId); 378 } 379 391 392 if (attributeValue.getValue() == null) 393 { 394 Log.get().warn("Attribute value pk=" + valId + 395 " has a null value."); 396 } 397 else 398 { 399 Document doc = new Document(); 400 Field valueId = Field.Keyword(VALUE_ID, valId); 401 Field issueId = Field.UnIndexed(ISSUE_ID, 402 attributeValue.getIssueId().toString()); 403 Field attributeId = Field.Keyword(ATTRIBUTE_ID, 404 attributeValue.getAttributeId().toString()); 405 Field text = Field.UnStored(TEXT, attributeValue.getValue()); 406 doc.add(valueId); 407 doc.add(issueId); 408 doc.add(attributeId); 409 doc.add(text); 410 addDoc(doc); 411 } 412 } 413 414 private void addDoc(Document doc) 415 throws IOException 416 { 417 synchronized (getClass()) 418 { 419 IndexWriter indexer = null; 420 try 421 { 422 indexer = new IndexWriter(path, 423 new PorterStemAnalyzer(), false); 424 indexer.addDocument(doc); 425 426 if (++counter % 100 == 0) 427 { 428 indexer.optimize(); 429 } 430 } 431 finally 432 { 433 if (indexer != null) 434 { 435 indexer.close(); 436 } 437 } 438 } 439 } 440 441 444 public void index(Attachment attachment) 445 throws Exception 446 { 447 String attId = attachment.getAttachmentId().toString(); 448 449 Term term = new Term(ATTACHMENT_ID, attId); 451 int deletedDocs = 0; 452 try 453 { 454 synchronized (getClass()) 455 { 456 IndexReader reader = null; 457 try 458 { 459 reader = IndexReader.open(path); 460 deletedDocs = reader.delete(term); 461 } 462 finally 463 { 464 if (reader != null) 465 { 466 reader.close(); 467 } 468 } 469 } 470 } 471 catch (NullPointerException npe) 472 { 473 478 IndexSearcher is = new IndexSearcher(path); 479 Query q = QueryParser.parse("+" + ATTACHMENT_ID + ":" + attId, 480 TEXT, new PorterStemAnalyzer()); 481 Hits hits = is.search(q); 482 if (hits.length() > 0) 483 { 484 Localizable l10nInstance = new L10NMessage(L10NKeySet.ExceptionLucene, attId, npe); 485 Log.get().debug(l10nInstance.getMessage()); throw new ScarabException(l10nInstance); 487 } 488 } 489 if (deletedDocs > 1) 490 { 491 throw new ScarabException(L10NKeySet.ExceptionMultipleAttachements, 492 attId); 493 } 494 495 496 if (attachment.getData() == null) 497 { 498 Log.get().warn("Attachment pk=" + attId + " has a null data."); 499 } 500 else 501 { 502 Document doc = new Document(); 503 Field attachmentId = Field.Keyword(ATTACHMENT_ID, attId); 504 Field issueId = Field.UnIndexed(ISSUE_ID, 505 attachment.getIssueId().toString()); 506 Field typeId = Field.Keyword(ATTACHMENT_TYPE_ID, 507 attachment.getTypeId().toString()); 508 Field text = Field.UnStored(TEXT, attachment.getData()); 509 doc.add(attachmentId); 510 doc.add(issueId); 511 doc.add(typeId); 512 doc.add(text); 513 addDoc(doc); 514 } 515 } 516 517 520 public void updateIndex() 521 throws Exception 522 { 523 Criteria crit = new Criteria(); 525 crit.addSelectColumn("max(" + AttributeValuePeer.VALUE_ID + ")"); 526 List records = AttributeValuePeer.doSelectVillageRecords(crit); 527 long max = ((Record)records.get(0)).getValue(1).asLong(); 528 529 long i = 0L; 530 List avs = null; 531 do 532 { 533 crit = new Criteria(); 534 Criteria.Criterion low = crit.getNewCriterion( 535 AttributeValuePeer.VALUE_ID, 536 new Long (i), Criteria.GREATER_THAN); 537 i += 100L; 538 Criteria.Criterion high = crit.getNewCriterion( 539 AttributeValuePeer.VALUE_ID, 540 new Long (i), Criteria.LESS_EQUAL); 541 crit.add(low.and(high)); 542 crit.add(AttributeValuePeer.DELETED, false); 543 crit.addJoin(AttributeValuePeer.ISSUE_ID, IssuePeer.ISSUE_ID); 545 crit.add(IssuePeer.DELETED, false); 546 avs = AttributeValuePeer.doSelect(crit); 547 if (!avs.isEmpty()) 548 { 549 Iterator avi = avs.iterator(); 550 while (avi.hasNext()) 551 { 552 AttributeValue av = (AttributeValue)avi.next(); 553 index(av); 554 } 555 if (Log.get().isDebugEnabled()) 556 { 557 Log.get().debug("Updated index for attribute values (" + 558 (i-100L) + "-" + i + "]"); 559 Log.debugMemory(); 560 } 561 } 562 } 563 while (i<max || !avs.isEmpty()); 564 565 567 crit = new Criteria(); 568 crit.addSelectColumn("max(" + AttachmentPeer.ATTACHMENT_ID + ")"); 569 records = AttachmentPeer.doSelectVillageRecords(crit); 570 max = ((Record)records.get(0)).getValue(1).asLong(); 571 i = 0L; 572 List atts = null; 573 do 574 { 575 crit = new Criteria(); 576 Criteria.Criterion low = crit.getNewCriterion( 577 AttachmentPeer.ATTACHMENT_ID, 578 new Long (i), Criteria.GREATER_THAN); 579 i += 100L; 580 Criteria.Criterion high = crit.getNewCriterion( 581 AttachmentPeer.ATTACHMENT_ID, 582 new Long (i), Criteria.LESS_EQUAL); 583 crit.add(low.and(high)); 584 crit.add(AttachmentPeer.DELETED, false); 585 crit.addJoin(AttachmentPeer.ISSUE_ID, IssuePeer.ISSUE_ID); 587 crit.add(IssuePeer.DELETED, false); 588 atts = AttachmentPeer.doSelect(crit); 589 if (!atts.isEmpty()) 590 { 591 Iterator atti = atts.iterator(); 592 while (atti.hasNext()) 593 { 594 Attachment att = (Attachment)atti.next(); 595 if (att.getData() != null && att.getData().length() > 0 && 596 att.getIssueId() != null && att.getTypeId() != null) 597 { 598 index(att); 599 } 600 } 601 602 if (Log.get().isDebugEnabled()) 603 { 604 Log.get().debug("Updated index for attachments (" + 605 (i-100L) + "-" + i + "]"); 606 Log.debugMemory(); 607 } 608 } 609 } 610 while (i<max || !atts.isEmpty()); 611 612 synchronized (getClass()) 614 { 615 IndexWriter indexer = null; 616 try 617 { 618 indexer = new IndexWriter(path, 619 new PorterStemAnalyzer(), false); 620 indexer.optimize(); 621 } 622 finally 623 { 624 if (indexer != null) 625 { 626 indexer.close(); 627 } 628 } 629 } 630 } 631 632 636 public void configure(Configuration conf) 637 { 638 path = conf.getAttribute(INDEX_PATH, null); 639 640 641 642 } 643 644 648 public void contextualize(Context context) throws ContextException 649 { 650 this.applicationRoot = context.get( "urn:avalon:home" ).toString(); 651 } 652 653 660 public void initialize() throws Exception 661 { 662 663 664 File indexDir = new File (path); 665 if (!indexDir.isAbsolute()) 666 { 667 path = getRealPath(path); 668 indexDir = new File (path); 669 } 670 671 boolean createIndex = false; 672 if (indexDir.exists()) 673 { 674 if (indexDir.listFiles().length == 0) 675 { 676 createIndex = true; 677 } 678 } 679 else 680 { 681 indexDir.mkdirs(); 682 createIndex = true; 683 } 684 685 if (createIndex) 686 { 687 Log.get().info("Creating index at '" + path + '\''); 688 synchronized (getClass()) 689 { 690 IndexWriter indexer = null; 691 try 692 { 693 indexer = 694 new IndexWriter(path, new PorterStemAnalyzer(), true); 695 } 696 finally 697 { 698 if (indexer != null) 699 { 700 indexer.close(); 701 } 702 } 703 } 704 } 705 706 clear(); 707 } 708 709 private String getRealPath(String path) 710 { 711 String absolutePath = null; 712 if (applicationRoot == null) 713 { 714 absolutePath = new File (path).getAbsolutePath(); 715 } 716 else 717 { 718 absolutePath = new File (applicationRoot, path).getAbsolutePath(); 719 } 720 return absolutePath; 721 } 722 723 726 public void clear() 727 { 728 attributeIds = new ArrayList (5); 729 queryText = new ArrayList (5); 730 attachmentIds = new ArrayList (2); 731 attachmentQueryText = new ArrayList (2); 732 } 733 734 } 735 | Popular Tags |