KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > search > DSIndexer


1 /*
2  * DSIndexer.java
3  *
4  * Version: $Revision: 1.41 $
5  *
6  * Date: $Date: 2006/11/03 05:01:31 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40 package org.dspace.search;
41
42 import java.io.IOException JavaDoc;
43 import java.io.InputStreamReader JavaDoc;
44 import java.sql.SQLException JavaDoc;
45 import java.util.ArrayList JavaDoc;
46 import java.util.HashMap JavaDoc;
47 import java.util.Iterator JavaDoc;
48
49 import org.apache.log4j.Logger;
50 import org.apache.lucene.analysis.Analyzer;
51 import org.apache.lucene.document.Document;
52 import org.apache.lucene.document.Field;
53 import org.apache.lucene.index.IndexReader;
54 import org.apache.lucene.index.IndexWriter;
55 import org.apache.lucene.index.Term;
56 import org.dspace.authorize.AuthorizeException;
57 import org.dspace.content.Bitstream;
58 import org.dspace.content.Bundle;
59 import org.dspace.content.Collection;
60 import org.dspace.content.Community;
61 import org.dspace.content.DCValue;
62 import org.dspace.content.DSpaceObject;
63 import org.dspace.content.Item;
64 import org.dspace.content.ItemIterator;
65 import org.dspace.content.MetadataSchema;
66 import org.dspace.core.ConfigurationManager;
67 import org.dspace.core.Constants;
68 import org.dspace.core.Context;
69 import org.dspace.core.LogManager;
70 import org.dspace.handle.HandleManager;
71
72 /**
73  * DSIndexer contains the methods that index Items and their metadata,
74  * collections, communities, etc. It is meant to either be invoked from the
75  * command line (see dspace/bin/index-all) or via the indexContent() methods
76  * within DSpace.
77  */

78 public class DSIndexer
79 {
80     private static final Logger log = Logger.getLogger(DSIndexer.class);
81
82     // TODO: Support for analyzers per language, or multiple indices
83
/** The analyzer for this DSpace instance */
84     private static Analyzer analyzer = null;
85     
86     /**
87      * IndexItem() adds a single item to the index
88      */

89     public static void indexContent(Context c, DSpaceObject dso)
90             throws SQLException JavaDoc, IOException JavaDoc
91     {
92         IndexWriter writer = openIndex(c, false);
93
94         try
95         {
96             switch (dso.getType())
97             {
98             case Constants.ITEM:
99                 writeItemIndex(c, writer, (Item) dso);
100
101                 break;
102
103             case Constants.COLLECTION:
104                 writeCollectionIndex(c, writer, (Collection) dso);
105
106                 break;
107
108             case Constants.COMMUNITY:
109                 writeCommunityIndex(c, writer, (Community) dso);
110
111                 break;
112
113             // FIXME: should probably default unknown type exception
114
}
115         }
116         finally
117         {
118             closeIndex(c, writer);
119         }
120     }
121
122     /**
123      * unIndex removes an Item, Collection, or Community only works if the
124      * DSpaceObject has a handle (uses the handle for its unique ID)
125      *
126      * @param dso
127      * DSpace Object, can be Community, Item, or Collection
128      */

129     public static void unIndexContent(Context c, DSpaceObject dso)
130             throws SQLException JavaDoc, IOException JavaDoc
131     {
132         String JavaDoc h = HandleManager.findHandle(c, dso);
133
134         unIndexContent(c, h);
135     }
136
137     public static void unIndexContent(Context c, String JavaDoc myhandle)
138             throws SQLException JavaDoc, IOException JavaDoc
139     {
140         String JavaDoc index_directory = ConfigurationManager.getProperty("search.dir");
141         IndexReader ir = IndexReader.open(index_directory);
142
143         try
144         {
145             if (myhandle != null)
146             {
147                 // we have a handle (our unique ID, so remove)
148
Term t = new Term("handle", myhandle);
149                 ir.deleteDocuments(t);
150             }
151             else
152             {
153                 log.warn("unindex of content with null handle attempted");
154
155                 // FIXME: no handle, fail quietly - should log failure
156
//System.out.println("Error in unIndexContent: Object had no
157
// handle!");
158
}
159         }
160         finally
161         {
162             ir.close();
163         }
164     }
165
166     /**
167      * reIndexContent removes something from the index, then re-indexes it
168      *
169      * @param c context object
170      * @param dso object to re-index
171      */

172     public static void reIndexContent(Context c, DSpaceObject dso)
173             throws SQLException JavaDoc, IOException JavaDoc
174     {
175         unIndexContent(c, dso);
176         indexContent(c, dso);
177     }
178
179     /**
180      * create full index - wiping old index
181      *
182      * @param c context to use
183      */

184     public static void createIndex(Context c) throws SQLException JavaDoc, IOException JavaDoc
185     {
186         IndexWriter writer = openIndex(c, true);
187
188         try
189         {
190             indexAllCommunities(c, writer);
191             indexAllCollections(c, writer);
192             indexAllItems(c, writer);
193
194             // optimize the index - important to do regularly to reduce
195
// filehandle
196
// usage
197
// and keep performance fast!
198
writer.optimize();
199         }
200         finally
201         {
202             closeIndex(c, writer);
203         }
204     }
205
206     /**
207      * When invoked as a command-line tool, (re)-builds the whole index
208      *
209      * @param args
210      * the command-line arguments, none used
211      */

212     public static void main(String JavaDoc[] args) throws Exception JavaDoc
213     {
214         Context c = new Context();
215
216         // for testing, pass in a handle of something to remove...
217
if ((args.length == 2) && (args[0].equals("remove")))
218         {
219             unIndexContent(c, args[1]);
220         }
221         else
222         {
223             c.setIgnoreAuthorization(true);
224
225             createIndex(c);
226
227             System.out.println("Done with indexing");
228         }
229     }
230
231     /**
232      * Get the Lucene analyzer to use according to current configuration (or
233      * default). TODO: Should have multiple analyzers (and maybe indices?) for
234      * multi-lingual DSpaces.
235      *
236      * @return <code>Analyzer</code> to use
237      * @throws IllegalStateException
238      * if the configured analyzer can't be instantiated
239      */

240     static Analyzer getAnalyzer() throws IllegalStateException JavaDoc
241     {
242         if (analyzer == null)
243         {
244             // We need to find the analyzer class from the configuration
245
String JavaDoc analyzerClassName = ConfigurationManager
246                     .getProperty("search.analyzer");
247
248             if (analyzerClassName == null)
249             {
250                 // Use default
251
analyzerClassName = "org.dspace.search.DSAnalyzer";
252             }
253
254             try
255             {
256                 Class JavaDoc analyzerClass = Class.forName(analyzerClassName);
257                 analyzer = (Analyzer) analyzerClass.newInstance();
258             }
259             catch (Exception JavaDoc e)
260             {
261                 log.fatal(LogManager.getHeader(null, "no_search_analyzer",
262                         "search.analyzer=" + analyzerClassName), e);
263
264                 throw new IllegalStateException JavaDoc(e.toString());
265             }
266         }
267
268         return analyzer;
269     }
270     
271     
272     ////////////////////////////////////
273
// Private
274
////////////////////////////////////
275

276     /**
277      * prepare index, opening writer, and wiping out existing index if necessary
278      */

279     private static IndexWriter openIndex(Context c, boolean wipe_existing)
280             throws IOException JavaDoc
281     {
282         IndexWriter writer;
283
284         String JavaDoc index_directory = ConfigurationManager.getProperty("search.dir");
285
286         writer = new IndexWriter(index_directory, getAnalyzer(),
287                 wipe_existing);
288
289         /* Set maximum number of terms to index if present in dspace.cfg */
290         if (ConfigurationManager.getProperty("search.maxfieldlength") != null)
291         {
292             int maxfieldlength = ConfigurationManager
293                     .getIntProperty("search.maxfieldlength");
294             if (maxfieldlength == -1)
295             {
296                 writer.setMaxFieldLength(Integer.MAX_VALUE);
297             }
298             else
299             {
300                 writer.setMaxFieldLength(maxfieldlength);
301             }
302         }
303
304         return writer;
305     }
306
307     /**
308      * close up the indexing engine
309      */

310     private static void closeIndex(Context c, IndexWriter writer)
311             throws IOException JavaDoc
312     {
313         if (writer != null)
314         {
315             writer.close();
316         }
317     }
318
319     private static String JavaDoc buildItemLocationString(Context c, Item myitem)
320             throws SQLException JavaDoc
321     {
322         // build list of community ids
323
Community[] communities = myitem.getCommunities();
324
325         // build list of collection ids
326
Collection[] collections = myitem.getCollections();
327
328         // now put those into strings
329
String JavaDoc location = "";
330         int i = 0;
331
332         for (i = 0; i < communities.length; i++)
333             location = new String JavaDoc(location + " m" + communities[i].getID());
334
335         for (i = 0; i < collections.length; i++)
336             location = new String JavaDoc(location + " l" + collections[i].getID());
337
338         return location;
339     }
340
341     private static String JavaDoc buildCollectionLocationString(Context c,
342             Collection target) throws SQLException JavaDoc
343     {
344         // build list of community ids
345
Community[] communities = target.getCommunities();
346
347         // now put those into strings
348
String JavaDoc location = "";
349         int i = 0;
350
351         for (i = 0; i < communities.length; i++)
352             location = new String JavaDoc(location + " m" + communities[i].getID());
353
354         return location;
355     }
356
357     /**
358      * iterate through the communities, and index each one
359      */

360     private static void indexAllCommunities(Context c, IndexWriter writer)
361             throws SQLException JavaDoc, IOException JavaDoc
362     {
363         Community[] targets = Community.findAll(c);
364
365         int i;
366
367         for (i = 0; i < targets.length; i++)
368             writeCommunityIndex(c, writer, targets[i]);
369     }
370
371     /**
372      * iterate through collections, indexing each one
373      */

374     private static void indexAllCollections(Context c, IndexWriter writer)
375             throws SQLException JavaDoc, IOException JavaDoc
376     {
377         Collection[] targets = Collection.findAll(c);
378
379         int i;
380
381         for (i = 0; i < targets.length; i++)
382             writeCollectionIndex(c, writer, targets[i]);
383     }
384
385     /**
386      * iterate through all items, indexing each one
387      */

388     private static void indexAllItems(Context c, IndexWriter writer)
389             throws SQLException JavaDoc, IOException JavaDoc
390     {
391         ItemIterator i = Item.findAll(c);
392
393         while (i.hasNext())
394         {
395             Item target = (Item) i.next();
396
397             writeItemIndex(c, writer, target);
398         }
399     }
400
401     /**
402      * write index record for a community
403      */

404     private static void writeCommunityIndex(Context c, IndexWriter writer,
405             Community target) throws SQLException JavaDoc, IOException JavaDoc
406     {
407         // build a hash for the metadata
408
HashMap JavaDoc textvalues = new HashMap JavaDoc();
409
410         // get the handle
411
String JavaDoc myhandle = HandleManager.findHandle(c, target);
412
413         // and populate it
414
String JavaDoc name = target.getMetadata("name");
415
416         // String description = target.getMetadata("short_description");
417
// String intro_text = target.getMetadata("introductory_text");
418
textvalues.put("name", name);
419
420         // textvalues.put("description", description);
421
// textvalues.put("intro_text", intro_text );
422
textvalues.put("handletext", myhandle);
423
424         writeIndexRecord(writer, Constants.COMMUNITY, myhandle, textvalues, "");
425     }
426
427     /**
428      * write an index record for a collection
429      */

430     private static void writeCollectionIndex(Context c, IndexWriter writer,
431             Collection target) throws SQLException JavaDoc, IOException JavaDoc
432     {
433         String JavaDoc location_text = buildCollectionLocationString(c, target);
434
435         // get the handle
436
String JavaDoc myhandle = HandleManager.findHandle(c, target);
437
438         // build a hash for the metadata
439
HashMap JavaDoc textvalues = new HashMap JavaDoc();
440
441         // and populate it
442
String JavaDoc name = target.getMetadata("name");
443
444         // String description = target.getMetadata("short_description");
445
// String intro_text = target.getMetadata("introductory_text");
446
textvalues.put("name", name);
447
448         // textvalues.put("description",description );
449
// textvalues.put("intro_text", intro_text );
450
textvalues.put("location", location_text);
451         textvalues.put("handletext", myhandle);
452
453         writeIndexRecord(writer, Constants.COLLECTION, myhandle, textvalues, "");
454     }
455
456     /**
457      * writes an index record - the index record is a set of name/value hashes,
458      * which are sent to Lucene.
459      */

460     private static void writeItemIndex(Context c, IndexWriter writer,
461             Item myitem) throws SQLException JavaDoc, IOException JavaDoc
462     {
463         // FIXME: config reading should happen just once & be cached?
464

465         // get the location string (for searching by collection & community)
466
String JavaDoc location_text = buildItemLocationString(c, myitem);
467
468         // read in indexes from the config
469
ArrayList JavaDoc indexes = new ArrayList JavaDoc();
470
471         // read in search.index.1, search.index.2....
472
for (int i = 1; ConfigurationManager.getProperty("search.index." + i) != null; i++)
473         {
474             indexes.add(ConfigurationManager.getProperty("search.index." + i));
475         }
476
477         int j;
478         int k = 0;
479
480         // initialize hash to be built
481
HashMap JavaDoc textvalues = new HashMap JavaDoc();
482
483         if (indexes.size() > 0)
484         {
485             ArrayList JavaDoc fields = new ArrayList JavaDoc();
486             ArrayList JavaDoc content = new ArrayList JavaDoc();
487             DCValue[] mydc;
488
489             for (int i = 0; i < indexes.size(); i++)
490             {
491                 String JavaDoc index = (String JavaDoc) indexes.get(i);
492
493                 String JavaDoc[] configLine = index.split(":");
494                 String JavaDoc indexName = configLine[0];
495
496                 String JavaDoc schema;
497                 String JavaDoc element;
498                 String JavaDoc qualifier = null;
499
500                 // Get the schema, element and qualifier for the index
501
// TODO: Should check valid schema, element, qualifier?
502
String JavaDoc[] parts = configLine[1].split("\\.");
503                 
504                 switch (parts.length)
505                 {
506                 case 3:
507                     qualifier = parts[2];
508                 case 2:
509                     schema = parts[0];
510                     element = parts[1];
511                     break;
512                 default:
513                     log.warn("Malformed configuration line: search.index." + i);
514                     // FIXME: Can't proceed here, no suitable exception to throw
515
throw new RuntimeException JavaDoc(
516                             "Malformed configuration line: search.index." + i);
517                 }
518                 
519                 // extract metadata (ANY is wildcard from Item class)
520
if (qualifier!= null && qualifier.equals("*"))
521                 {
522                     mydc = myitem.getMetadata(schema, element, Item.ANY, Item.ANY);
523                 }
524                 else
525                 {
526                     mydc = myitem.getMetadata(schema, element, qualifier, Item.ANY);
527                 }
528
529                 // put them all from an array of strings to one string for
530
// writing out pack all of the arrays of DCValues into plain
531
// text strings for the indexer
532
String JavaDoc content_text = "";
533
534                 for (j = 0; j < mydc.length; j++)
535                 {
536                     content_text = new String JavaDoc(content_text + mydc[j].value
537                             + " ");
538                 }
539
540                 // arranges content with fields in ArrayLists with same index to
541
// put
542
// into hash later
543
k = fields.indexOf(indexName);
544
545                 if (k < 0)
546                 {
547                     fields.add(indexName);
548                     content.add(content_text);
549                 }
550                 else
551                 {
552                     content_text = new String JavaDoc(content_text
553                             + (String JavaDoc) content.get(k) + " ");
554                     content.set(k, content_text);
555                 }
556             }
557
558             // build the hash
559
for (int i = 0; i < fields.size(); i++)
560             {
561                 textvalues.put((String JavaDoc) fields.get(i), (String JavaDoc) content.get(i));
562             }
563
564             textvalues.put("location", location_text);
565         }
566         else
567         // if no search indexes found in cfg file, for backward compatibility
568
{
569             // extract metadata (ANY is wildcard from Item class)
570
DCValue[] authors = myitem.getDC("contributor", Item.ANY, Item.ANY);
571             DCValue[] creators = myitem.getDC("creator", Item.ANY, Item.ANY);
572             DCValue[] titles = myitem.getDC("title", Item.ANY, Item.ANY);
573             DCValue[] keywords = myitem.getDC("subject", Item.ANY, Item.ANY);
574
575             DCValue[] abstracts = myitem.getDC("description", "abstract",
576                     Item.ANY);
577             DCValue[] sors = myitem.getDC("description",
578                     "statementofresponsibility", Item.ANY);
579             DCValue[] series = myitem.getDC("relation", "ispartofseries",
580                     Item.ANY);
581             DCValue[] tocs = myitem.getDC("description", "tableofcontents",
582                     Item.ANY);
583             DCValue[] mimetypes = myitem.getDC("format", "mimetype", Item.ANY);
584             DCValue[] sponsors = myitem.getDC("description", "sponsorship",
585                     Item.ANY);
586             DCValue[] identifiers = myitem.getDC("identifier", Item.ANY,
587                     Item.ANY);
588
589             // put them all from an array of strings to one string for writing
590
// out
591
String JavaDoc author_text = "";
592             String JavaDoc title_text = "";
593             String JavaDoc keyword_text = "";
594
595             String JavaDoc abstract_text = "";
596             String JavaDoc series_text = "";
597             String JavaDoc mime_text = "";
598             String JavaDoc sponsor_text = "";
599             String JavaDoc id_text = "";
600
601             // pack all of the arrays of DCValues into plain text strings for
602
// the
603
// indexer
604
for (j = 0; j < authors.length; j++)
605             {
606                 author_text = new String JavaDoc(author_text + authors[j].value + " ");
607             }
608
609             for (j = 0; j < creators.length; j++) //also authors
610
{
611                 author_text = new String JavaDoc(author_text + creators[j].value + " ");
612             }
613
614             for (j = 0; j < sors.length; j++) //also authors
615
{
616                 author_text = new String JavaDoc(author_text + sors[j].value + " ");
617             }
618
619             for (j = 0; j < titles.length; j++)
620             {
621                 title_text = new String JavaDoc(title_text + titles[j].value + " ");
622             }
623
624             for (j = 0; j < keywords.length; j++)
625             {
626                 keyword_text = new String JavaDoc(keyword_text + keywords[j].value
627                         + " ");
628             }
629
630             for (j = 0; j < abstracts.length; j++)
631             {
632                 abstract_text = new String JavaDoc(abstract_text + abstracts[j].value
633                         + " ");
634             }
635
636             for (j = 0; j < tocs.length; j++)
637             {
638                 abstract_text = new String JavaDoc(abstract_text + tocs[j].value + " ");
639             }
640
641             for (j = 0; j < series.length; j++)
642             {
643                 series_text = new String JavaDoc(series_text + series[j].value + " ");
644             }
645
646             for (j = 0; j < mimetypes.length; j++)
647             {
648                 mime_text = new String JavaDoc(mime_text + mimetypes[j].value + " ");
649             }
650
651             for (j = 0; j < sponsors.length; j++)
652             {
653                 sponsor_text = new String JavaDoc(sponsor_text + sponsors[j].value
654                         + " ");
655             }
656
657             for (j = 0; j < identifiers.length; j++)
658             {
659                 id_text = new String JavaDoc(id_text + identifiers[j].value + " ");
660             }
661
662             // build the hash
663
textvalues.put("author", author_text);
664             textvalues.put("title", title_text);
665             textvalues.put("keyword", keyword_text);
666             textvalues.put("location", location_text);
667             textvalues.put("abstract", abstract_text);
668
669             textvalues.put("series", series_text);
670             textvalues.put("mimetype", mime_text);
671             textvalues.put("sponsor", sponsor_text);
672             textvalues.put("identifier", id_text);
673         }
674
675         // now get full text of any bitstreams in the TEXT bundle
676
String JavaDoc extractedText = "";
677
678         // trundle through the bundles
679
Bundle[] myBundles = myitem.getBundles();
680
681         for (int i = 0; i < myBundles.length; i++)
682         {
683             if ((myBundles[i].getName() != null)
684                     && myBundles[i].getName().equals("TEXT"))
685             {
686                 // a-ha! grab the text out of the bitstreams
687
Bitstream[] myBitstreams = myBundles[i].getBitstreams();
688
689                 for (j = 0; j < myBitstreams.length; j++)
690                 {
691                     try
692                     {
693                         InputStreamReader JavaDoc is = new InputStreamReader JavaDoc(
694                                 myBitstreams[j].retrieve()); // get input
695
// stream
696
StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
697                         char[] charBuffer = new char[1024];
698
699                         while (true)
700                         {
701                             int bytesIn = is.read(charBuffer);
702
703                             if (bytesIn == -1)
704                             {
705                                 break;
706                             }
707
708                             if (bytesIn > 0)
709                             {
710                                 sb.append(charBuffer, 0, bytesIn);
711                             }
712                         }
713
714                         // now sb has the full text - tack on to fullText string
715
extractedText = extractedText.concat(new String JavaDoc(sb));
716
717                         // System.out.println("Found extracted text!\n" + new
718
// String(sb));
719
}
720                     catch (AuthorizeException e)
721                     {
722                         // this will never happen, but compiler is now happy.
723
}
724                 }
725             }
726         }
727
728         // lastly, get the handle
729
String JavaDoc itemhandle = HandleManager.findHandle(c, myitem);
730         textvalues.put("handletext", itemhandle);
731
732         if (log.isDebugEnabled())
733         {
734             log.debug(LogManager.getHeader(c, "write_index", "handle=" +itemhandle));
735             log.debug(textvalues.toString());
736         }
737
738         // write out the metatdata (for scalability, using hash instead of
739
// individual strings)
740
writeIndexRecord(writer, Constants.ITEM, itemhandle, textvalues,
741                 extractedText);
742     }
743
744     /**
745      * writeIndexRecord() creates a document from its args and writes it out to
746      * the index that is opened
747      */

748     private static void writeIndexRecord(IndexWriter iw, int type,
749             String JavaDoc handle, HashMap JavaDoc textvalues, String JavaDoc extractedText)
750             throws IOException JavaDoc
751     {
752         Document doc = new Document();
753         Integer JavaDoc ty = new Integer JavaDoc(type);
754         String JavaDoc fulltext = "";
755
756         // do id, type, handle first
757
doc.add(new Field("type", ty.toString(), Field.Store.YES, Field.Index.NO));
758
759         // want to be able to search for handle, so use keyword
760
// (not tokenized, but it is indexed)
761
if (handle != null)
762         {
763             doc.add(new Field("handle", handle, Field.Store.YES, Field.Index.UN_TOKENIZED));
764         }
765
766         // now iterate through the hash, building full text string
767
// and index all values
768
Iterator JavaDoc i = textvalues.keySet().iterator();
769
770         while (i.hasNext())
771         {
772             String JavaDoc key = (String JavaDoc) i.next();
773             String JavaDoc value = (String JavaDoc) textvalues.get(key);
774
775             fulltext = fulltext + " " + value;
776
777             if (value != null)
778             {
779                 doc.add(new Field(key, value, Field.Store.YES, Field.Index.TOKENIZED));
780             }
781         }
782
783         fulltext = fulltext.concat(extractedText);
784
785         // System.out.println("Full Text:\n" + fulltext + "------------\n\n");
786
// add the full text
787
doc.add(new Field("default", fulltext, Field.Store.YES, Field.Index.TOKENIZED));
788
789         // index the document
790
iw.addDocument(doc);
791     }
792 }
793
Popular Tags