1 40 package org.dspace.search; 41 42 import java.io.IOException ; 43 import java.io.InputStreamReader ; 44 import java.sql.SQLException ; 45 import java.util.ArrayList ; 46 import java.util.HashMap ; 47 import java.util.Iterator ; 48 49 import org.apache.log4j.Logger; 50 import org.apache.lucene.analysis.Analyzer; 51 import org.apache.lucene.document.Document; 52 import org.apache.lucene.document.Field; 53 import org.apache.lucene.index.IndexReader; 54 import org.apache.lucene.index.IndexWriter; 55 import org.apache.lucene.index.Term; 56 import org.dspace.authorize.AuthorizeException; 57 import org.dspace.content.Bitstream; 58 import org.dspace.content.Bundle; 59 import org.dspace.content.Collection; 60 import org.dspace.content.Community; 61 import org.dspace.content.DCValue; 62 import org.dspace.content.DSpaceObject; 63 import org.dspace.content.Item; 64 import org.dspace.content.ItemIterator; 65 import org.dspace.content.MetadataSchema; 66 import org.dspace.core.ConfigurationManager; 67 import org.dspace.core.Constants; 68 import org.dspace.core.Context; 69 import org.dspace.core.LogManager; 70 import org.dspace.handle.HandleManager; 71 72 78 public class DSIndexer 79 { 80 private static final Logger log = Logger.getLogger(DSIndexer.class); 81 82 84 private static Analyzer analyzer = null; 85 86 89 public static void indexContent(Context c, DSpaceObject dso) 90 throws SQLException , IOException 91 { 92 IndexWriter writer = openIndex(c, false); 93 94 try 95 { 96 switch (dso.getType()) 97 { 98 case Constants.ITEM: 99 writeItemIndex(c, writer, (Item) dso); 100 101 break; 102 103 case Constants.COLLECTION: 104 writeCollectionIndex(c, writer, (Collection) dso); 105 106 break; 107 108 case Constants.COMMUNITY: 109 writeCommunityIndex(c, writer, (Community) dso); 110 111 break; 112 113 } 115 } 116 finally 117 { 118 closeIndex(c, writer); 119 } 120 } 121 122 129 public static void unIndexContent(Context c, DSpaceObject dso) 130 throws SQLException , IOException 131 { 132 String h = HandleManager.findHandle(c, dso); 133 134 unIndexContent(c, h); 135 } 136 137 public static void unIndexContent(Context c, String myhandle) 138 throws SQLException , IOException 139 { 140 String index_directory = ConfigurationManager.getProperty("search.dir"); 141 IndexReader ir = IndexReader.open(index_directory); 142 143 try 144 { 145 if (myhandle != null) 146 { 147 Term t = new Term("handle", myhandle); 149 ir.deleteDocuments(t); 150 } 151 else 152 { 153 log.warn("unindex of content with null handle attempted"); 154 155 } 159 } 160 finally 161 { 162 ir.close(); 163 } 164 } 165 166 172 public static void reIndexContent(Context c, DSpaceObject dso) 173 throws SQLException , IOException 174 { 175 unIndexContent(c, dso); 176 indexContent(c, dso); 177 } 178 179 184 public static void createIndex(Context c) throws SQLException , IOException 185 { 186 IndexWriter writer = openIndex(c, true); 187 188 try 189 { 190 indexAllCommunities(c, writer); 191 indexAllCollections(c, writer); 192 indexAllItems(c, writer); 193 194 writer.optimize(); 199 } 200 finally 201 { 202 closeIndex(c, writer); 203 } 204 } 205 206 212 public static void main(String [] args) throws Exception 213 { 214 Context c = new Context(); 215 216 if ((args.length == 2) && (args[0].equals("remove"))) 218 { 219 unIndexContent(c, args[1]); 220 } 221 else 222 { 223 c.setIgnoreAuthorization(true); 224 225 createIndex(c); 226 227 System.out.println("Done with indexing"); 228 } 229 } 230 231 240 static Analyzer getAnalyzer() throws IllegalStateException 241 { 242 if (analyzer == null) 243 { 244 String analyzerClassName = ConfigurationManager 246 .getProperty("search.analyzer"); 247 248 if (analyzerClassName == null) 249 { 250 analyzerClassName = "org.dspace.search.DSAnalyzer"; 252 } 253 254 try 255 { 256 Class analyzerClass = Class.forName(analyzerClassName); 257 analyzer = (Analyzer) analyzerClass.newInstance(); 258 } 259 catch (Exception e) 260 { 261 log.fatal(LogManager.getHeader(null, "no_search_analyzer", 262 "search.analyzer=" + analyzerClassName), e); 263 264 throw new IllegalStateException (e.toString()); 265 } 266 } 267 268 return analyzer; 269 } 270 271 272 276 279 private static IndexWriter openIndex(Context c, boolean wipe_existing) 280 throws IOException 281 { 282 IndexWriter writer; 283 284 String index_directory = ConfigurationManager.getProperty("search.dir"); 285 286 writer = new IndexWriter(index_directory, getAnalyzer(), 287 wipe_existing); 288 289 290 if (ConfigurationManager.getProperty("search.maxfieldlength") != null) 291 { 292 int maxfieldlength = ConfigurationManager 293 .getIntProperty("search.maxfieldlength"); 294 if (maxfieldlength == -1) 295 { 296 writer.setMaxFieldLength(Integer.MAX_VALUE); 297 } 298 else 299 { 300 writer.setMaxFieldLength(maxfieldlength); 301 } 302 } 303 304 return writer; 305 } 306 307 310 private static void closeIndex(Context c, IndexWriter writer) 311 throws IOException 312 { 313 if (writer != null) 314 { 315 writer.close(); 316 } 317 } 318 319 private static String buildItemLocationString(Context c, Item myitem) 320 throws SQLException 321 { 322 Community[] communities = myitem.getCommunities(); 324 325 Collection[] collections = myitem.getCollections(); 327 328 String location = ""; 330 int i = 0; 331 332 for (i = 0; i < communities.length; i++) 333 location = new String (location + " m" + communities[i].getID()); 334 335 for (i = 0; i < collections.length; i++) 336 location = new String (location + " l" + collections[i].getID()); 337 338 return location; 339 } 340 341 private static String buildCollectionLocationString(Context c, 342 Collection target) throws SQLException 343 { 344 Community[] communities = target.getCommunities(); 346 347 String location = ""; 349 int i = 0; 350 351 for (i = 0; i < communities.length; i++) 352 location = new String (location + " m" + communities[i].getID()); 353 354 return location; 355 } 356 357 360 private static void indexAllCommunities(Context c, IndexWriter writer) 361 throws SQLException , IOException 362 { 363 Community[] targets = Community.findAll(c); 364 365 int i; 366 367 for (i = 0; i < targets.length; i++) 368 writeCommunityIndex(c, writer, targets[i]); 369 } 370 371 374 private static void indexAllCollections(Context c, IndexWriter writer) 375 throws SQLException , IOException 376 { 377 Collection[] targets = Collection.findAll(c); 378 379 int i; 380 381 for (i = 0; i < targets.length; i++) 382 writeCollectionIndex(c, writer, targets[i]); 383 } 384 385 388 private static void indexAllItems(Context c, IndexWriter writer) 389 throws SQLException , IOException 390 { 391 ItemIterator i = Item.findAll(c); 392 393 while (i.hasNext()) 394 { 395 Item target = (Item) i.next(); 396 397 writeItemIndex(c, writer, target); 398 } 399 } 400 401 404 private static void writeCommunityIndex(Context c, IndexWriter writer, 405 Community target) throws SQLException , IOException 406 { 407 HashMap textvalues = new HashMap (); 409 410 String myhandle = HandleManager.findHandle(c, target); 412 413 String name = target.getMetadata("name"); 415 416 textvalues.put("name", name); 419 420 textvalues.put("handletext", myhandle); 423 424 writeIndexRecord(writer, Constants.COMMUNITY, myhandle, textvalues, ""); 425 } 426 427 430 private static void writeCollectionIndex(Context c, IndexWriter writer, 431 Collection target) throws SQLException , IOException 432 { 433 String location_text = buildCollectionLocationString(c, target); 434 435 String myhandle = HandleManager.findHandle(c, target); 437 438 HashMap textvalues = new HashMap (); 440 441 String name = target.getMetadata("name"); 443 444 textvalues.put("name", name); 447 448 textvalues.put("location", location_text); 451 textvalues.put("handletext", myhandle); 452 453 writeIndexRecord(writer, Constants.COLLECTION, myhandle, textvalues, ""); 454 } 455 456 460 private static void writeItemIndex(Context c, IndexWriter writer, 461 Item myitem) throws SQLException , IOException 462 { 463 465 String location_text = buildItemLocationString(c, myitem); 467 468 ArrayList indexes = new ArrayList (); 470 471 for (int i = 1; ConfigurationManager.getProperty("search.index." + i) != null; i++) 473 { 474 indexes.add(ConfigurationManager.getProperty("search.index." + i)); 475 } 476 477 int j; 478 int k = 0; 479 480 HashMap textvalues = new HashMap (); 482 483 if (indexes.size() > 0) 484 { 485 ArrayList fields = new ArrayList (); 486 ArrayList content = new ArrayList (); 487 DCValue[] mydc; 488 489 for (int i = 0; i < indexes.size(); i++) 490 { 491 String index = (String ) indexes.get(i); 492 493 String [] configLine = index.split(":"); 494 String indexName = configLine[0]; 495 496 String schema; 497 String element; 498 String qualifier = null; 499 500 String [] parts = configLine[1].split("\\."); 503 504 switch (parts.length) 505 { 506 case 3: 507 qualifier = parts[2]; 508 case 2: 509 schema = parts[0]; 510 element = parts[1]; 511 break; 512 default: 513 log.warn("Malformed configuration line: search.index." + i); 514 throw new RuntimeException ( 516 "Malformed configuration line: search.index." + i); 517 } 518 519 if (qualifier!= null && qualifier.equals("*")) 521 { 522 mydc = myitem.getMetadata(schema, element, Item.ANY, Item.ANY); 523 } 524 else 525 { 526 mydc = myitem.getMetadata(schema, element, qualifier, Item.ANY); 527 } 528 529 String content_text = ""; 533 534 for (j = 0; j < mydc.length; j++) 535 { 536 content_text = new String (content_text + mydc[j].value 537 + " "); 538 } 539 540 k = fields.indexOf(indexName); 544 545 if (k < 0) 546 { 547 fields.add(indexName); 548 content.add(content_text); 549 } 550 else 551 { 552 content_text = new String (content_text 553 + (String ) content.get(k) + " "); 554 content.set(k, content_text); 555 } 556 } 557 558 for (int i = 0; i < fields.size(); i++) 560 { 561 textvalues.put((String ) fields.get(i), (String ) content.get(i)); 562 } 563 564 textvalues.put("location", location_text); 565 } 566 else 567 { 569 DCValue[] authors = myitem.getDC("contributor", Item.ANY, Item.ANY); 571 DCValue[] creators = myitem.getDC("creator", Item.ANY, Item.ANY); 572 DCValue[] titles = myitem.getDC("title", Item.ANY, Item.ANY); 573 DCValue[] keywords = myitem.getDC("subject", Item.ANY, Item.ANY); 574 575 DCValue[] abstracts = myitem.getDC("description", "abstract", 576 Item.ANY); 577 DCValue[] sors = myitem.getDC("description", 578 "statementofresponsibility", Item.ANY); 579 DCValue[] series = myitem.getDC("relation", "ispartofseries", 580 Item.ANY); 581 DCValue[] tocs = myitem.getDC("description", "tableofcontents", 582 Item.ANY); 583 DCValue[] mimetypes = myitem.getDC("format", "mimetype", Item.ANY); 584 DCValue[] sponsors = myitem.getDC("description", "sponsorship", 585 Item.ANY); 586 DCValue[] identifiers = myitem.getDC("identifier", Item.ANY, 587 Item.ANY); 588 589 String author_text = ""; 592 String title_text = ""; 593 String keyword_text = ""; 594 595 String abstract_text = ""; 596 String series_text = ""; 597 String mime_text = ""; 598 String sponsor_text = ""; 599 String id_text = ""; 600 601 for (j = 0; j < authors.length; j++) 605 { 606 author_text = new String (author_text + authors[j].value + " "); 607 } 608 609 for (j = 0; j < creators.length; j++) { 611 author_text = new String (author_text + creators[j].value + " "); 612 } 613 614 for (j = 0; j < sors.length; j++) { 616 author_text = new String (author_text + sors[j].value + " "); 617 } 618 619 for (j = 0; j < titles.length; j++) 620 { 621 title_text = new String (title_text + titles[j].value + " "); 622 } 623 624 for (j = 0; j < keywords.length; j++) 625 { 626 keyword_text = new String (keyword_text + keywords[j].value 627 + " "); 628 } 629 630 for (j = 0; j < abstracts.length; j++) 631 { 632 abstract_text = new String (abstract_text + abstracts[j].value 633 + " "); 634 } 635 636 for (j = 0; j < tocs.length; j++) 637 { 638 abstract_text = new String (abstract_text + tocs[j].value + " "); 639 } 640 641 for (j = 0; j < series.length; j++) 642 { 643 series_text = new String (series_text + series[j].value + " "); 644 } 645 646 for (j = 0; j < mimetypes.length; j++) 647 { 648 mime_text = new String (mime_text + mimetypes[j].value + " "); 649 } 650 651 for (j = 0; j < sponsors.length; j++) 652 { 653 sponsor_text = new String (sponsor_text + sponsors[j].value 654 + " "); 655 } 656 657 for (j = 0; j < identifiers.length; j++) 658 { 659 id_text = new String (id_text + identifiers[j].value + " "); 660 } 661 662 textvalues.put("author", author_text); 664 textvalues.put("title", title_text); 665 textvalues.put("keyword", keyword_text); 666 textvalues.put("location", location_text); 667 textvalues.put("abstract", abstract_text); 668 669 textvalues.put("series", series_text); 670 textvalues.put("mimetype", mime_text); 671 textvalues.put("sponsor", sponsor_text); 672 textvalues.put("identifier", id_text); 673 } 674 675 String extractedText = ""; 677 678 Bundle[] myBundles = myitem.getBundles(); 680 681 for (int i = 0; i < myBundles.length; i++) 682 { 683 if ((myBundles[i].getName() != null) 684 && myBundles[i].getName().equals("TEXT")) 685 { 686 Bitstream[] myBitstreams = myBundles[i].getBitstreams(); 688 689 for (j = 0; j < myBitstreams.length; j++) 690 { 691 try 692 { 693 InputStreamReader is = new InputStreamReader ( 694 myBitstreams[j].retrieve()); StringBuffer sb = new StringBuffer (); 697 char[] charBuffer = new char[1024]; 698 699 while (true) 700 { 701 int bytesIn = is.read(charBuffer); 702 703 if (bytesIn == -1) 704 { 705 break; 706 } 707 708 if (bytesIn > 0) 709 { 710 sb.append(charBuffer, 0, bytesIn); 711 } 712 } 713 714 extractedText = extractedText.concat(new String (sb)); 716 717 } 720 catch (AuthorizeException e) 721 { 722 } 724 } 725 } 726 } 727 728 String itemhandle = HandleManager.findHandle(c, myitem); 730 textvalues.put("handletext", itemhandle); 731 732 if (log.isDebugEnabled()) 733 { 734 log.debug(LogManager.getHeader(c, "write_index", "handle=" +itemhandle)); 735 log.debug(textvalues.toString()); 736 } 737 738 writeIndexRecord(writer, Constants.ITEM, itemhandle, textvalues, 741 extractedText); 742 } 743 744 748 private static void writeIndexRecord(IndexWriter iw, int type, 749 String handle, HashMap textvalues, String extractedText) 750 throws IOException 751 { 752 Document doc = new Document(); 753 Integer ty = new Integer (type); 754 String fulltext = ""; 755 756 doc.add(new Field("type", ty.toString(), Field.Store.YES, Field.Index.NO)); 758 759 if (handle != null) 762 { 763 doc.add(new Field("handle", handle, Field.Store.YES, Field.Index.UN_TOKENIZED)); 764 } 765 766 Iterator i = textvalues.keySet().iterator(); 769 770 while (i.hasNext()) 771 { 772 String key = (String ) i.next(); 773 String value = (String ) textvalues.get(key); 774 775 fulltext = fulltext + " " + value; 776 777 if (value != null) 778 { 779 doc.add(new Field(key, value, Field.Store.YES, Field.Index.TOKENIZED)); 780 } 781 } 782 783 fulltext = fulltext.concat(extractedText); 784 785 doc.add(new Field("default", fulltext, Field.Store.YES, Field.Index.TOKENIZED)); 788 789 iw.addDocument(doc); 791 } 792 } 793 | Popular Tags |