LuceneIndex


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  package org.netbeans.modules.retouche.source.usages;
21  
22  import java.io.File  ;
23  import java.io.IOException  ;
24  import java.net.URL  ;
25  import java.text.ParseException  ;
26  import java.util.ArrayList  ;
27  import java.util.Collection  ;
28  import java.util.Comparator  ;
29  import java.util.EnumSet  ;
30  import java.util.Enumeration  ;
31  import java.util.HashMap  ;
32  import java.util.HashSet  ;
33  import java.util.Iterator  ;
34  import java.util.LinkedList  ;
35  import java.util.List  ;
36  import java.util.Map  ;
37  import java.util.Map.Entry;
38  import java.util.Set  ;
39  import java.util.TreeSet  ;
40  import java.util.concurrent.atomic.AtomicBoolean  ;
41  import java.util.logging.Logger  ;
42  import java.util.regex.Matcher  ;
43  import java.util.regex.Pattern  ;
44  import org.apache.lucene.analysis.KeywordAnalyzer;
45  import org.apache.lucene.document.DateTools;
46  import org.apache.lucene.document.Document;
47  import org.apache.lucene.document.Field;
48  import org.apache.lucene.index.IndexReader;
49  import org.apache.lucene.index.IndexWriter;
50  import org.apache.lucene.index.Term;
51  import org.apache.lucene.index.TermDocs;
52  import org.apache.lucene.index.TermEnum;
53  import org.apache.lucene.search.BooleanClause;
54  import org.apache.lucene.search.BooleanClause.Occur;
55  import org.apache.lucene.search.BooleanQuery;
56  import org.apache.lucene.search.BooleanQuery;
57  import org.apache.lucene.search.Hit;
58  import org.apache.lucene.search.Hits;
59  import org.apache.lucene.search.IndexSearcher;
60  import org.apache.lucene.search.Searcher;
61  import org.apache.lucene.search.Query;
62  import org.apache.lucene.search.TermQuery;
63  import org.apache.lucene.search.WildcardQuery;
64  import org.apache.lucene.store.Directory;
65  import org.apache.lucene.store.FSDirectory;
66  import org.apache.lucene.store.RAMDirectory;
67  import org.netbeans.api.gsf.Index.SearchResult;
68  import org.netbeans.api.retouche.source.ClassIndex;
69  import org.netbeans.modules.retouche.source.usages.LuceneIndexMBeanImpl;
70  import org.netbeans.modules.retouche.source.util.LowMemoryEvent;
71  import org.netbeans.modules.retouche.source.util.LowMemoryListener;
72  import org.netbeans.modules.retouche.source.util.LowMemoryNotifier;
73  import org.openide.util.Exceptions;
74  
75  /**
76   * Lucene interface - Responsible for storing and and querying at the lowest level.
77   * This file is originally from Retouche, the Java Support 
78   * infrastructure in NetBeans. I have modified the file as little
79   * as possible to make merging Retouche fixes back as simple as
80   * possible. 
81   *
82   * 
83   * Rip out the old query stuff.
84   * 
85   * @todo Find a faster or more efficient "batch" operation for storing tons of documents
86   *   at startup (When scanning the boot path).
87   * @todo Can deletion be better?
88   * 
89   * @author Tomas Zezula
90   * @author Tor Norbye
91   */
92  class LuceneIndex extends Index {
93      
94      private static final boolean debugIndexMerging = Boolean.getBoolean("LuceneIndex.debugIndexMerge");     // NOI18N
95      static final String   REFERENCES = "gsf";    // NOI18N
96      
97      private static final Logger   LOGGER = Logger.getLogger(LuceneIndex.class.getName());
98      
99      private final Directory directory;
100     private Long   rootTimeStamp;
101     
102     private IndexReader reader; //Cache, do not use this dirrectly, use getReader
103     private IndexWriter writer; //Cache, do not use this dirrectly, use getWriter
104     private Set  <String  > rootPkgCache;   //Cache, do not use this dirrectly
105     
106     // For debugging purposes only
107     private ClassIndexImpl classIndex;
108     private File   cacheRoot;
109     
110     public static Index create (final File   cacheRoot, ClassIndexImpl classIndex) throws IOException   {        
111         assert cacheRoot != null && cacheRoot.exists() && cacheRoot.canRead() && cacheRoot.canWrite();
112         LuceneIndex index = new LuceneIndex (getReferencesCacheFolder(cacheRoot));
113         
114         // For debugging (lucene browser) only
115         index.classIndex = classIndex;
116         index.cacheRoot = cacheRoot;
117         
118         return index;
119     }
120     
121     /** Creates a new instance of LuceneIndex */
122     private LuceneIndex (final File   refCacheRoot) throws IOException   {
123         assert refCacheRoot != null;
124         this.directory = FSDirectory.getDirectory(refCacheRoot, false);
125     }
126 
127     private void regExpSearch (final Pattern   pattern, final Term startTerm, final IndexReader in, final Set  <Term> toSearch) throws IOException   {        
128         final String   startText = startTerm.text();
129         final StringBuilder   startBuilder = new StringBuilder   ();
130         startBuilder.append(startText.charAt(0));
131         for (int i=1; i<startText.length(); i++) {
132             char c = startText.charAt(i);
133             if (!Character.isJavaIdentifierPart(c)) {
134                 break;
135             }
136             startBuilder.append(c);
137         }
138         final String   startPrefix = startBuilder.toString();
139         final String   camelField = startTerm.field();
140         final TermEnum en = in.terms(startTerm);
141         try {
142             do {
143                 Term term = en.term();                
144                 if (term != null && camelField == term.field() && term.text().startsWith(startPrefix)) {
145                     final Matcher   m = pattern.matcher(term.text());
146                     if (m.matches()) {
147                         toSearch.add (term);
148                     }
149                 }
150                 else {
151                     break;
152                 }
153             } while (en.next());
154         } finally {
155             en.close();
156         }
157     }
158     
159     private void prefixSearch (Term nameTerm, final IndexReader in, final Set  <Term> toSearch) throws IOException   {
160         final String   prefixField = nameTerm.field();
161         final String   name = nameTerm.text();
162         final TermEnum en = in.terms(nameTerm);
163         try {
164             do {
165                 Term term = en.term();                
166                 if (term != null && prefixField == term.field() && term.text().startsWith(name)) {
167                     toSearch.add (term);
168                 }
169                 else {
170                     break;
171                 }
172             } while (en.next());
173         } finally {
174             en.close();
175         }
176     }    
177 
178     public boolean isUpToDate(String   resourceName, long timeStamp) throws IOException   {
179         // Don't do anything for preindexed filesystems
180         if (Index.isPreindexed(cacheRoot)) {
181             return true;
182         }
183         
184         if (!isValid(false)) {
185             return false;
186         }        
187         Searcher searcher = new IndexSearcher (this.getReader());
188         try {
189             Hits hits;
190             if (resourceName == null) {
191                 synchronized (this) {
192                     if (this.rootTimeStamp != null) {
193                         return rootTimeStamp.longValue() >= timeStamp;
194                     }
195                 }
196                 hits = searcher.search(new TermQuery(DocumentUtil.rootDocumentTerm()));
197             }
198             else {
199                 hits = searcher.search(DocumentUtil.binaryNameQuery(resourceName));
200             }
201 
202             assert hits.length() <= 1;
203             if (hits.length() == 0) {
204                 return false;
205             }
206             else {                    
207                 try { 
208                     Hit hit = (Hit) hits.iterator().next();
209                     long cacheTime = DocumentUtil.getTimeStamp(hit.getDocument());
210                     if (resourceName == null) {
211                         synchronized (this) {
212                             this.rootTimeStamp = new Long   (cacheTime);
213                         }
214                     }
215                     return cacheTime >= timeStamp;
216                 } catch (ParseException   pe) {
217                     throw new IOException   ();
218                 }
219             }
220         } catch (java.io.FileNotFoundException   fnf) {
221             this.clear();
222             return false;
223         } finally {
224             searcher.close();
225         }
226     }
227         
228     public boolean isValid (boolean tryOpen) throws IOException   {  
229         boolean res = IndexReader.indexExists(this.directory);
230         if (res && tryOpen) {
231             try {
232                 getReader();
233             } catch (java.io.FileNotFoundException   e) {
234                 res = false;
235                 clear();
236             }
237         }
238         return res;
239      }    
240 
241     public synchronized void clear () throws IOException   {
242         this.close ();
243         final String  [] content = this.directory.list();
244         for (String   file : content) {
245             directory.deleteFile(file);
246         }
247     }
248     
249     public synchronized void close () throws IOException   {
250         if (this.reader != null) {
251             this.reader.close();
252             this.reader = null;
253         }
254         if (this.writer != null) {
255             this.writer.close();
256             this.writer = null;
257         }
258     }
259     
260     public @Override   String   toString () {
261         return this.directory.toString();
262     }
263     
264     private synchronized IndexReader getReader () throws IOException   {
265         if (this.reader == null) {
266             if (this.writer != null) {
267                 this.writer.close();
268                 this.writer = null;
269             }
270             this.reader = IndexReader.open(this.directory);
271         }        
272         return this.reader;
273     }
274     
275     private synchronized IndexWriter getWriter (final boolean create) throws IOException   {
276         if (this.writer == null) {
277             if (this.reader != null) {
278                 this.reader.close();
279                 this.reader = null;
280             }
281             this.writer = new IndexWriter (this.directory,new KeywordAnalyzer(), create);
282         }        
283         return this.writer;
284     }
285     
286     
287     private static File   getReferencesCacheFolder (final File   cacheRoot) throws IOException   {
288         File   refRoot = new File   (cacheRoot,REFERENCES);
289         if (!refRoot.exists()) {
290             refRoot.mkdir();
291         }
292         return refRoot;
293     }
294     
295     private static class LMListener implements LowMemoryListener {        
296         
297         private AtomicBoolean   lowMemory = new AtomicBoolean   (false);
298         
299         public void lowMemory(LowMemoryEvent event) {
300             lowMemory.set(true);
301         }        
302     }
303     
304 
305     // BEGIN TOR MODIFICATIONS
306     public void gsfStore(final Set  <Map  <String  ,String  >> fieldsSet,  Set  <Map  <String  ,String  >> noIndexFields, final Map  <String  ,String  > toDelete) throws IOException   {
307         this.rootPkgCache = null;
308         boolean create = !isValid(false);
309         if (!create) {
310             IndexReader in = getReader();
311             
312             if (toDelete.size() > 0) {
313                 final Searcher searcher = new IndexSearcher (in);
314                 try {
315                     for (String   key : toDelete.keySet()) {
316                         BooleanQuery query = new BooleanQuery ();
317                         String   value = toDelete.get(key);
318                         query.add (new TermQuery (new Term (key, value)),BooleanClause.Occur.MUST);
319 
320                         Hits hits = searcher.search(query);
321     //                    Disabled assertion until the signature file links are integrated
322     //                    assert hits.length() <=1 : "Multiple index entries for binaryName: " + toDeleteItem;    //NOI18N
323                         if (hits.length()>1) {
324                             java.util.logging.Logger.getLogger("global").warning("Multiple index entries for binaryName: " + key); //NOI18N
325                         }
326                         for (int i=0; i<hits.length(); i++) {
327                             in.deleteDocument (hits.id(i));
328                         }
329                     }
330                     in.deleteDocuments (DocumentUtil.rootDocumentTerm());
331                 } finally {
332                     searcher.close();
333                 }
334             } else {
335                 in.deleteDocuments (DocumentUtil.rootDocumentTerm());
336             }
337         }
338         long timeStamp = System.currentTimeMillis();
339         gsfStore(fieldsSet, noIndexFields, create, timeStamp);
340     }    
341 
342     private void gsfStore (final Set  <Map  <String  ,String  >> fieldsSet, Set  <Map  <String  ,String  >> noIndexFields, final boolean create, final long timeStamp) throws IOException   {        
343         IndexWriter out = getWriter(create);
344         if (debugIndexMerging) {
345             out.setInfoStream (System.err);
346         }
347         final LuceneIndexMBean indexSettings = LuceneIndexMBeanImpl.getDefault();
348         if (indexSettings != null) {
349             out.setMergeFactor(indexSettings.getMergeFactor());
350             out.setMaxMergeDocs(indexSettings.getMaxMergeDocs());
351             out.setMaxBufferedDocs(indexSettings.getMaxBufferedDocs());
352         }        
353         LowMemoryNotifier lm = LowMemoryNotifier.getDefault();
354         LMListener lmListener = new LMListener ();
355         lm.addLowMemoryListener (lmListener);        
356         Directory memDir = null;
357         IndexWriter activeOut = null;        
358         if (lmListener.lowMemory.getAndSet(false)) {
359             activeOut = out;
360         }
361         else {
362             memDir = new RAMDirectory ();
363             activeOut = new IndexWriter (memDir,new KeywordAnalyzer(), true);
364         }        
365         try {
366             activeOut.addDocument (DocumentUtil.createRootTimeStampDocument (timeStamp));
367 
368             Document newDoc = new Document ();
369             newDoc.add(new Field (DocumentUtil.FIELD_TIME_STAMP,DateTools.timeToString(timeStamp,DateTools.Resolution.MILLISECOND),Field.Store.YES,Field.Index.NO));
370             
371             for (Map  <String  ,String  > fields : fieldsSet) {
372                 for (Iterator  <Map.Entry  <String  ,String  >> it = fields.entrySet().iterator(); it.hasNext();) {
373                     Map.Entry  <String  ,String  > fieldEntry = it.next();
374                     it.remove();
375                     String   key = fieldEntry.getKey();
376                     String   value = fieldEntry.getValue();
377                     Field field = new Field(key, value, Field.Store.YES, Field.Index.UN_TOKENIZED);
378                     newDoc.add(field);
379                 }
380             }
381 
382             for (Map  <String  ,String  > fields : noIndexFields) {
383                 for (Iterator  <Map.Entry  <String  ,String  >> it = fields.entrySet().iterator(); it.hasNext();) {
384                     Map.Entry  <String  ,String  > fieldEntry = it.next();
385                     it.remove();
386                     String   key = fieldEntry.getKey();
387                     String   value = fieldEntry.getValue();
388                     Field field = new Field(key, value, Field.Store.YES, Field.Index.NO);
389                     newDoc.add(field);
390                 }
391             }
392 
393             activeOut.addDocument(newDoc);
394             if (memDir != null && lmListener.lowMemory.getAndSet(false)) {                       
395                 activeOut.close();
396                 out.addIndexes(new Directory[] {memDir});                        
397                 memDir = new RAMDirectory ();        
398                 activeOut = new IndexWriter (memDir,new KeywordAnalyzer(), true);
399             }
400             if (memDir != null) {
401                 activeOut.close();
402                 out.addIndexes(new Directory[] {memDir});   
403                 activeOut = null;
404                 memDir = null;
405             }
406             synchronized (this) {
407                 this.rootTimeStamp = new Long   (timeStamp);
408             }
409         } finally {
410             lm.removeLowMemoryListener (lmListener);  
411         }
412     }
413     
414     
415     @SuppressWarnings   ("unchecked") // NOI18N, unchecked - lucene has source 1.4
416     public void gsfSearch(final String   primaryField, final String   name, final ClassIndex.NameKind kind, final Set  <ClassIndex.SearchScope> scope, 
417             final Set  <SearchResult> result) throws IOException   {
418         if (!isValid(false)) {
419             LOGGER.fine(String.format("LuceneIndex[%s] is invalid!\n", this.toString()));
420             return;
421         }
422 
423         assert name != null;                
424         final Set  <Term> toSearch = new TreeSet  <Term> (new Comparator  <Term>(){
425             public int compare (Term t1, Term t2) {
426                 int ret = t1.field().compareTo(t2.field());
427                 if (ret == 0) {
428                     ret = t1.text().compareTo(t2.text());
429                 }
430                 return ret;
431             }
432         });
433                 
434         final IndexReader in = getReader();
435         switch (kind) {
436             case EXACT_NAME:
437                 {
438                     toSearch.add(new Term (primaryField, name));
439                     break;
440                 }
441             case PREFIX:
442                 if (name.length() == 0) {
443                     //Special case (all) handle in different way
444                     gsfEmptyPrefixSearch(in, result, primaryField);
445                     return;
446                 }
447                 else {
448                     final Term nameTerm = new Term (primaryField, name);
449                     prefixSearch(nameTerm, in, toSearch);
450                     break;
451                 }
452             case CASE_INSENSITIVE_PREFIX:
453                 if (name.length() == 0) {
454                     //Special case (all) handle in different way
455                     gsfEmptyPrefixSearch(in, result, primaryField);
456                     return;
457                 }
458                 else {                    
459                     final Term nameTerm = new Term (primaryField, name);
460                     prefixSearch(nameTerm, in, toSearch);
461                     break;
462                 }
463             case CAMEL_CASE:
464                 if (name.length() == 0) {
465                     throw new IllegalArgumentException   ();
466                 }        
467                 {
468                 final StringBuilder   patternString = new StringBuilder   ();                        
469                 char startChar = 0;
470                 for (int i=0; i<name.length(); i++) {
471                     char c = name.charAt(i);
472                     //todo: maybe check for upper case, I18N????
473                     if (i == 0) {
474                         startChar = c;
475                     }
476                     patternString.append(c);
477                     if (i == name.length()-1) {
478                         patternString.append("\\w*");  // NOI18N
479                     }
480                     else {
481                         patternString.append("[\\p{Lower}\\p{Digit}]*");  // NOI18N
482                     }
483                 }
484                 final Pattern   pattern = Pattern.compile(patternString.toString());
485                 Term t = new Term (primaryField, Character.toString(startChar));
486                 regExpSearch(pattern, t, in, toSearch);
487                 break;
488                 }
489             case CASE_INSENSITIVE_REGEXP:
490                 if (name.length() == 0 || !Character.isJavaIdentifierStart(name.charAt(0))) {
491                     throw new IllegalArgumentException   ();
492                 }
493                 {   
494                     final Pattern   pattern = Pattern.compile(name,Pattern.CASE_INSENSITIVE);
495                     final Term nameTerm = new Term (primaryField, name.toLowerCase());
496                     regExpSearch(pattern, nameTerm, in, toSearch);      //XXX: Locale
497                     break;
498                 }
499             case REGEXP:
500                 if (name.length() == 0 || !Character.isJavaIdentifierStart(name.charAt(0))) {
501                     throw new IllegalArgumentException   ();
502                 }                
503                 {   
504                     final Pattern   pattern = Pattern.compile(name);                    
505                     final Term nameTerm = new Term (primaryField, name);
506                     regExpSearch(pattern, nameTerm, in, toSearch);
507                     break;
508                 }
509             default:
510                 throw new UnsupportedOperationException   (kind.toString());
511         }           
512         TermDocs tds = in.termDocs();
513         LOGGER.fine(String.format("LuceneIndex.getDeclaredTypes[%s] returned %d elements\n",this.toString(), toSearch.size()));
514         final Iterator  <Term> it = toSearch.iterator();        
515         Set  <Integer  > docNums = new TreeSet  <Integer  >();
516         Map  <Integer  ,List  <String  >> matches = new HashMap  <Integer  ,List  <String  >>();
517         while (it.hasNext()) {
518             Term next = it.next();
519             tds.seek(next);
520             while (tds.next()) {
521                 Integer   docNum = Integer.valueOf(tds.doc());
522                 List  <String  > matchTerms = matches.get(docNum);
523                 if (matchTerms == null) {
524                     matchTerms = new ArrayList  <String  >();
525                     matches.put(docNum, matchTerms);
526                 }
527                 matchTerms.add(next.text());
528                 docNums.add(docNum);
529             }
530         }
531         for (Integer   docNum : docNums) {
532             final Document doc = in.document(docNum);
533             
534             List  <String  > matchList = matches.get(docNum);
535             FilteredDocumentSearchResult map = new FilteredDocumentSearchResult(doc, primaryField, matchList, docNum);
536             result.add(map);
537         }
538     }
539     
540     // TODO: Create a filtered DocumentSearchResult here which
541     // contains matches for a given document.
542     
543     private class DocumentSearchResult implements SearchResult {
544         private Document doc;
545         private int docId;
546         
547         private DocumentSearchResult(Document doc, int docId) {
548             this.doc = doc;
549             this.docId = docId;
550         }
551 
552         public String   getValue(String   key) {
553             return doc.get((String  )key);
554         }
555 
556         public String  [] getValues(String   key) {
557             return doc.getValues((String  )key);
558         }
559         
560         public String   toString() {
561             StringBuilder   sb = new StringBuilder  ();
562             Enumeration   en = doc.fields();
563             while (en.hasMoreElements()) {
564                 Field f = (Field)en.nextElement();
565                 sb.append(f.name());
566                 sb.append(":");
567                 sb.append(f.stringValue());
568                 sb.append("\n");
569             }
570             
571             return sb.toString();
572         }
573     
574         public int getDocumentNumber() {
575             return docId;
576         }
577 
578         public Object   getDocument() {
579             return doc;
580         }
581 
582         public Object   getIndexReader() {
583             try {
584                 return getReader();
585             } catch (IOException   ioe) {
586                 Exceptions.printStackTrace(ioe);
587                 return null;
588             }
589         }
590 
591         public Object   getIndex() {
592             return LuceneIndex.this.classIndex;
593         }
594 
595         public File   getSegment() {
596             return LuceneIndex.this.cacheRoot;
597         }
598     }
599     
600     private class FilteredDocumentSearchResult implements SearchResult {
601         private Document doc;
602         private int docId;
603         private String   primaryKey;
604         private List  <String  > primaryValues;
605         
606         private FilteredDocumentSearchResult(Document doc, String   primaryKey, List  <String  > primaryValues, int docId) {
607             this.doc = doc;
608             this.primaryKey = primaryKey;
609             this.primaryValues = primaryValues;
610             this.docId = docId;
611         }
612 
613         public String   getValue(String   key) {
614             if (key.equals(primaryKey)) {
615                 if (primaryValues.size() > 0) {
616                     return primaryValues.get(0);
617                 } else {
618                     return null;
619                 }
620             }
621             return doc.get((String  )key);
622         }
623 
624         public String  [] getValues(String   key) {
625             if (key.equals(primaryKey)) {
626                 return primaryValues.toArray(new String  [primaryValues.size()]);
627             }
628             return doc.getValues((String  )key);
629         }
630         
631         public String   toString() {
632             StringBuilder   sb = new StringBuilder  ();
633             Enumeration   en = doc.fields();
634             while (en.hasMoreElements()) {
635                 Field f = (Field)en.nextElement();
636                 if (f.name().equals(primaryKey)) {
637                     sb.append(primaryKey);
638                     sb.append(":");
639                     sb.append(primaryValues.toString());
640                 } else {
641                     sb.append(f.name());
642                     sb.append(":");
643                     sb.append(f.stringValue());
644                 }
645                 sb.append("\n");
646             }
647             
648             return sb.toString();
649         }
650     
651         public int getDocumentNumber() {
652             return docId;
653         }
654 
655         public Object   getDocument() {
656             return doc;
657         }
658         
659         public Object   getIndexReader() {
660             try {
661                 return getReader();
662             } catch (IOException   ioe) {
663                 Exceptions.printStackTrace(ioe);
664                 return null;
665             }
666         }
667 
668         public Object   getIndex() {
669             return LuceneIndex.this.classIndex;
670         }
671 
672         public File   getSegment() {
673             return LuceneIndex.this.cacheRoot;
674         }
675     }
676     
677     private <T> void gsfEmptyPrefixSearch (final IndexReader in, final Set  <SearchResult> result, 
678                                         final String   primaryField) throws IOException   {        
679         final int bound = in.maxDoc();        
680         for (int i=0; i<bound; i++) {
681             if (!in.isDeleted(i)) {
682                 final Document doc = in.document(i);
683                 if (doc != null) {
684                     SearchResult map = new DocumentSearchResult(doc, i);
685                     result.add(map);
686                 }
687             }
688         }
689     }
690 }
691
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags