KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > alfresco > repo > search > impl > lucene > query > LeafScorer


1 /*
2  * Copyright (C) 2005 Alfresco, Inc.
3  *
4  * Licensed under the Mozilla Public License version 1.1
5  * with a permitted attribution clause. You may obtain a
6  * copy of the License at
7  *
8  * http://www.alfresco.org/legal/license.txt
9  *
10  * Unless required by applicable law or agreed to in writing,
11  * software distributed under the License is distributed on an
12  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13  * either express or implied. See the License for the specific
14  * language governing permissions and limitations under the
15  * License.
16  */

17 package org.alfresco.repo.search.impl.lucene.query;
18
19 import java.io.IOException JavaDoc;
20 import java.util.ArrayList JavaDoc;
21 import java.util.Arrays JavaDoc;
22 import java.util.BitSet JavaDoc;
23 import java.util.HashMap JavaDoc;
24 import java.util.HashSet JavaDoc;
25 import java.util.List JavaDoc;
26
27 import org.alfresco.model.ContentModel;
28 import org.alfresco.repo.search.SearcherException;
29 import org.alfresco.service.cmr.dictionary.AspectDefinition;
30 import org.alfresco.service.cmr.dictionary.DataTypeDefinition;
31 import org.alfresco.service.cmr.dictionary.DictionaryService;
32 import org.alfresco.service.cmr.dictionary.PropertyDefinition;
33 import org.alfresco.service.cmr.dictionary.TypeDefinition;
34 import org.alfresco.service.namespace.QName;
35 import org.apache.lucene.document.Document;
36 import org.apache.lucene.document.Field;
37 import org.apache.lucene.index.IndexReader;
38 import org.apache.lucene.index.Term;
39 import org.apache.lucene.index.TermPositions;
40 import org.apache.lucene.search.Explanation;
41 import org.apache.lucene.search.Scorer;
42 import org.apache.lucene.search.Similarity;
43 import org.apache.lucene.search.Weight;
44
45 public class LeafScorer extends Scorer
46 {
47     static class Counter
48     {
49         int count = 0;
50
51         public String JavaDoc toString()
52         {
53             return "count = " + count;
54         }
55     }
56
57     private int counter;
58
59     private int countInCounter;
60
61     int min = 0;
62
63     int max = 0;
64
65     boolean more = true;
66
67     Scorer containerScorer;
68
69     StructuredFieldPosition[] sfps;
70
71     float freq = 0.0f;
72
73     HashMap JavaDoc<String JavaDoc, Counter> parentIds = new HashMap JavaDoc<String JavaDoc, Counter>();
74
75     HashMap JavaDoc<String JavaDoc, List JavaDoc<String JavaDoc>> categories = new HashMap JavaDoc<String JavaDoc, List JavaDoc<String JavaDoc>>();
76
77     HashMap JavaDoc<String JavaDoc, Counter> selfIds = null;
78
79     boolean hasSelfScorer;
80
81     IndexReader reader;
82
83     private TermPositions allNodes;
84
85     TermPositions level0;
86
87     HashSet JavaDoc<String JavaDoc> selfLinks = new HashSet JavaDoc<String JavaDoc>();
88
89     BitSet JavaDoc selfDocs = new BitSet JavaDoc();
90
91     private TermPositions root;
92
93     private int rootDoc;
94
95     private boolean repeat;
96
97     private DictionaryService dictionaryService;
98
99     private int[] parents;
100
101     private int[] self;
102
103     private int[] cats;
104
105     private TermPositions tp;
106
107     public LeafScorer(Weight weight, TermPositions root, TermPositions level0, ContainerScorer containerScorer,
108             StructuredFieldPosition[] sfps, TermPositions allNodes, HashMap JavaDoc<String JavaDoc, Counter> selfIds,
109             IndexReader reader, Similarity similarity, byte[] norms, DictionaryService dictionaryService,
110             boolean repeat, TermPositions tp)
111     {
112         super(similarity);
113         this.root = root;
114         this.containerScorer = containerScorer;
115         this.sfps = sfps;
116         this.allNodes = allNodes;
117         this.tp = tp;
118         if (selfIds == null)
119         {
120             this.selfIds = new HashMap JavaDoc<String JavaDoc, Counter>();
121             hasSelfScorer = false;
122         }
123         else
124         {
125             this.selfIds = selfIds;
126             hasSelfScorer = true;
127         }
128         this.reader = reader;
129         this.level0 = level0;
130         this.dictionaryService = dictionaryService;
131         this.repeat = repeat;
132         try
133         {
134             initialise();
135         }
136         catch (IOException JavaDoc e)
137         {
138             throw new SearcherException(e);
139         }
140
141     }
142
143     private void initialise() throws IOException JavaDoc
144     {
145         if (containerScorer != null)
146         {
147             parentIds.clear();
148             while (containerScorer.next())
149             {
150                 int doc = containerScorer.doc();
151                 Document document = reader.document(doc);
152                 Field id = document.getField("ID");
153                 Counter counter = parentIds.get(id.stringValue());
154                 if (counter == null)
155                 {
156                     counter = new Counter();
157                     parentIds.put(id.stringValue(), counter);
158                 }
159                 counter.count++;
160
161                 if (!hasSelfScorer)
162                 {
163                     counter = selfIds.get(id.stringValue());
164                     if (counter == null)
165                     {
166                         counter = new Counter();
167                         selfIds.put(id.stringValue(), counter);
168                     }
169                     counter.count++;
170                 }
171
172                 Field isCategory = document.getField("ISCATEGORY");
173                 if (isCategory != null)
174                 {
175                     Field path = document.getField("PATH");
176                     String JavaDoc pathString = path.stringValue();
177                     if ((pathString.length() > 0) && (pathString.charAt(0) == '/'))
178                     {
179                         pathString = pathString.substring(1);
180                     }
181                     List JavaDoc<String JavaDoc> list = categories.get(id.stringValue());
182                     if (list == null)
183                     {
184                         list = new ArrayList JavaDoc<String JavaDoc>();
185                         categories.put(id.stringValue(), list);
186                     }
187                     list.add(pathString);
188                 }
189             }
190         }
191         else if (level0 != null)
192         {
193             parentIds.clear();
194             while (level0.next())
195             {
196                 int doc = level0.doc();
197                 Document document = reader.document(doc);
198                 Field id = document.getField("ID");
199                 if (id != null)
200                 {
201                     Counter counter = parentIds.get(id.stringValue());
202                     if (counter == null)
203                     {
204                         counter = new Counter();
205                         parentIds.put(id.stringValue(), counter);
206                     }
207                     counter.count++;
208
209                     counter = selfIds.get(id.stringValue());
210                     if (counter == null)
211                     {
212                         counter = new Counter();
213                         selfIds.put(id.stringValue(), counter);
214                     }
215                     counter.count++;
216                 }
217             }
218             if (parentIds.size() > 1)
219             {
220                 throw new SearcherException("More than one root node in index: " + parentIds.size());
221             }
222             else if (parentIds.size() == 0)
223             {
224                 throw new SearcherException("Index has no root node. Check that the correct index locations are being used.");
225             }
226         }
227
228         if (allNodes())
229         {
230             int position = 0;
231             parents = new int[10000];
232             for (String JavaDoc parent : parentIds.keySet())
233             {
234                 Counter counter = parentIds.get(parent);
235                 tp.seek(new Term("PARENT", parent));
236                 while (tp.next())
237                 {
238                     for (int i = 0, l = tp.freq(); i < l; i++)
239                     {
240                         for(int j = 0; j < counter.count; j++)
241                         {
242                            parents[position++] = tp.doc();
243                            if (position == parents.length)
244                            {
245                                int[] old = parents;
246                                parents = new int[old.length * 2];
247                                System.arraycopy(old, 0, parents, 0, old.length);
248                            }
249                         }
250                        
251                     }
252                 }
253                 
254             }
255             int[] old = parents;
256             parents = new int[position];
257             System.arraycopy(old, 0, parents, 0, position);
258             Arrays.sort(parents);
259
260             position = 0;
261             self = new int[10000];
262             for (String JavaDoc id : selfIds.keySet())
263             {
264                 tp.seek(new Term("ID", id));
265                 while (tp.next())
266                 {
267                     Counter counter = selfIds.get(id);
268                     for(int i = 0; i < counter.count; i++)
269                     {
270                        self[position++] = tp.doc();
271                        if (position == self.length)
272                        {
273                            old = self;
274                            self = new int[old.length * 2];
275                            System.arraycopy(old, 0, self, 0, old.length);
276                        }
277                     }
278                 }
279                 
280             }
281             old = self;
282             self = new int[position];
283             System.arraycopy(old, 0, self, 0, position);
284             Arrays.sort(self);
285
286             position = 0;
287             cats = new int[10000];
288             for (String JavaDoc catid : categories.keySet())
289             {
290                 for (QName apsectQName : dictionaryService.getAllAspects())
291                 {
292                     AspectDefinition aspDef = dictionaryService.getAspect(apsectQName);
293                     if (isCategorised(aspDef))
294                     {
295                         for (PropertyDefinition propDef : aspDef.getProperties().values())
296                         {
297                             if (propDef.getDataType().getName().equals(DataTypeDefinition.CATEGORY))
298                             {
299                                 tp.seek(new Term("@" + propDef.getName().toString(), catid));
300                                 while (tp.next())
301                                 {
302                                     for (int i = 0, l = tp.freq(); i < l; i++)
303                                     {
304                                         cats[position++] = tp.doc();
305                                         if (position == cats.length)
306                                         {
307                                             old = cats;
308                                             cats = new int[old.length * 2];
309                                             System.arraycopy(old, 0, cats, 0, old.length);
310                                         }
311                                     }
312                                 }
313                                
314                             }
315                         }
316                     }
317                 }
318
319             }
320             old = cats;
321             cats = new int[position];
322             System.arraycopy(old, 0, cats, 0, position);
323             Arrays.sort(cats);
324         }
325     }
326
327     public boolean next() throws IOException JavaDoc
328     {
329
330         if (repeat && (countInCounter < counter))
331         {
332             countInCounter++;
333             return true;
334         }
335         else
336         {
337             countInCounter = 1;
338             counter = 0;
339         }
340
341         if (allNodes())
342         {
343             while (more)
344             {
345                 if (allNodes.next() && root.next())
346                 {
347                     if (check())
348                     {
349                         return true;
350                     }
351                 }
352                 else
353                 {
354                     more = false;
355                     return false;
356                 }
357             }
358         }
359
360         if (!more)
361         {
362             // One of the search terms has no more docuements
363
return false;
364         }
365
366         if (max == 0)
367         {
368             // We need to initialise
369
// Just do a next on all terms and check if the first doc matches
370
doNextOnAll();
371             if (found())
372             {
373                 return true;
374             }
375         }
376
377         return findNext();
378     }
379
380     private boolean allNodes()
381     {
382         if (sfps.length == 0)
383         {
384             return true;
385         }
386         for (StructuredFieldPosition sfp : sfps)
387         {
388             if (sfp.getCachingTermPositions() != null)
389             {
390                 return false;
391             }
392         }
393         return true;
394     }
395
396     private boolean findNext() throws IOException JavaDoc
397     {
398         // Move to the next document
399

400         while (more)
401         {
402             move(); // may set more to false
403
if (found())
404             {
405                 return true;
406             }
407         }
408
409         // If we get here we must have no more documents
410
return false;
411     }
412
413     private void skipToMax() throws IOException JavaDoc
414     {
415         // Do the terms
416
int current;
417         for (int i = 0, l = sfps.length; i < l; i++)
418         {
419             if (i == 0)
420             {
421                 min = max;
422             }
423             if (sfps[i].getCachingTermPositions() != null)
424             {
425                 if (sfps[i].getCachingTermPositions().doc() < max)
426                 {
427                     if (sfps[i].getCachingTermPositions().skipTo(max))
428                     {
429                         current = sfps[i].getCachingTermPositions().doc();
430                         adjustMinMax(current, false);
431                     }
432                     else
433                     {
434                         more = false;
435                         return;
436                     }
437                 }
438             }
439         }
440
441         // Do the root
442
if (root.doc() < max)
443         {
444             if (root.skipTo(max))
445             {
446                 rootDoc = root.doc();
447             }
448             else
449             {
450                 more = false;
451                 return;
452             }
453         }
454     }
455
456     private void move() throws IOException JavaDoc
457     {
458         if (min == max)
459         {
460             // If we were at a match just do next on all terms
461
doNextOnAll();
462         }
463         else
464         {
465             // We are in a range - try and skip to the max position on all terms
466
skipToMax();
467         }
468     }
469
470     private void doNextOnAll() throws IOException JavaDoc
471     {
472         // Do the terms
473
int current;
474         boolean first = true;
475         for (int i = 0, l = sfps.length; i < l; i++)
476         {
477             if (sfps[i].getCachingTermPositions() != null)
478             {
479                 if (sfps[i].getCachingTermPositions().next())
480                 {
481                     current = sfps[i].getCachingTermPositions().doc();
482                     adjustMinMax(current, first);
483                     first = false;
484                 }
485                 else
486                 {
487                     more = false;
488                     return;
489                 }
490             }
491         }
492
493         // Do the root term
494
if (root.next())
495         {
496             rootDoc = root.doc();
497         }
498         else
499         {
500             more = false;
501             return;
502         }
503         if (root.doc() < max)
504         {
505             if (root.skipTo(max))
506             {
507                 rootDoc = root.doc();
508             }
509             else
510             {
511                 more = false;
512                 return;
513             }
514         }
515     }
516
517     private void adjustMinMax(int doc, boolean setMin)
518     {
519
520         if (max < doc)
521         {
522             max = doc;
523         }
524
525         if (setMin)
526         {
527             min = doc;
528         }
529         else if (min > doc)
530         {
531             min = doc;
532         }
533     }
534
535     private boolean found() throws IOException JavaDoc
536     {
537         if (sfps.length == 0)
538         {
539             return true;
540         }
541
542         // no more documents - no match
543
if (!more)
544         {
545             return false;
546         }
547
548         // min and max must point to the same document
549
if (min != max)
550         {
551             return false;
552         }
553
554         if (rootDoc != max)
555         {
556             return false;
557         }
558
559         return check();
560     }
561
562     private boolean check() throws IOException JavaDoc
563     {
564         if (allNodes())
565         {
566             this.counter = 0;
567             int position;
568
569             StructuredFieldPosition last = sfps[sfps.length - 1];
570
571             if (last.linkSelf())
572             {
573                 if ((self != null) && sfps[1].linkSelf() && ((position = Arrays.binarySearch(self, doc())) >= 0))
574                 {
575                     if (!selfDocs.get(doc()))
576                     {
577                         selfDocs.set(doc());
578                         while (position > -1 && self[position] == doc())
579                         {
580                             position--;
581                         }
582                         for (int i = position + 1, l = self.length; ((i < l) && (self[i] == doc())); i++)
583                         {
584                             this.counter++;
585                         }
586                     }
587                 }
588             }
589             if (!selfDocs.get(doc()) && last.linkParent())
590             {
591                 if ((parents != null) && ((position = Arrays.binarySearch(parents, doc())) >= 0))
592                 {
593                     while (position > -1 && parents[position] == doc())
594                     {
595                         position--;
596                     }
597                     for (int i = position + 1, l = parents.length; ((i < l) && (parents[i] == doc())); i++)
598                     {
599                         this.counter++;
600                     }
601                 }
602
603                 if ((cats != null) && ((position = Arrays.binarySearch(cats, doc())) >= 0))
604                 {
605                     while (position > -1 && cats[position] == doc())
606                     {
607                         position--;
608                     }
609                     for (int i = position + 1, l = cats.length; ((i < l) && (cats[i] == doc())); i++)
610                     {
611                         this.counter++;
612                     }
613                 }
614             }
615             return counter > 0;
616         }
617
618         // String name = reader.document(doc()).getField("QNAME").stringValue();
619
// We have duplicate entries
620
// The match must be in a known term range
621
int count = root.freq();
622         int start = 0;
623         int end = -1;
624         for (int i = 0; i < count; i++)
625         {
626             if (i == 0)
627             {
628                 // First starts at zero
629
start = 0;
630                 end = root.nextPosition();
631             }
632             else
633             {
634                 start = end + 1;
635                 end = root.nextPosition();
636             }
637
638             check(start, end, i);
639
640         }
641         // We had checks to do and they all failed.
642
return this.counter > 0;
643     }
644
645     private void check(int start, int end, int position) throws IOException JavaDoc
646     {
647         int offset = 0;
648         for (int i = 0, l = sfps.length; i < l; i++)
649         {
650             offset = sfps[i].matches(start, end, offset);
651             if (offset == -1)
652             {
653                 return;
654             }
655         }
656         // Last match may fail
657
if (offset == -1)
658         {
659             return;
660         }
661         else
662         {
663             if ((sfps[sfps.length - 1].isTerminal()) && (offset != 2))
664             {
665                 return;
666             }
667         }
668
669         Document doc = reader.document(doc());
670         Field[] parentFields = doc.getFields("PARENT");
671         Field[] linkFields = doc.getFields("LINKASPECT");
672
673         String JavaDoc parentID = null;
674         String JavaDoc linkAspect = null;
675         if ((parentFields != null) && (parentFields.length > position) && (parentFields[position] != null))
676         {
677             parentID = parentFields[position].stringValue();
678         }
679         if ((linkFields != null) && (linkFields.length > position) && (linkFields[position] != null))
680         {
681             linkAspect = linkFields[position].stringValue();
682         }
683
684         containersIncludeCurrent(doc, parentID, linkAspect);
685
686     }
687
688     private void containersIncludeCurrent(Document document, String JavaDoc parentID, String JavaDoc aspectQName) throws IOException JavaDoc
689     {
690         if ((containerScorer != null) || (level0 != null))
691         {
692             if (sfps.length == 0)
693             {
694                 return;
695             }
696             String JavaDoc id = document.getField("ID").stringValue();
697             StructuredFieldPosition last = sfps[sfps.length - 1];
698             if ((last.linkSelf() && selfIds.containsKey(id)))
699             {
700                 Counter counter = selfIds.get(id);
701                 if (counter != null)
702                 {
703                     if (!selfLinks.contains(id))
704                     {
705                         this.counter += counter.count;
706                         selfLinks.add(id);
707                         return;
708                     }
709                 }
710             }
711             if ((parentID != null) && (parentID.length() > 0) && last.linkParent())
712             {
713                 if (!selfLinks.contains(id))
714                 {
715                     if (categories.containsKey(parentID))
716                     {
717                         Field typeField = document.getField("TYPE");
718                         if ((typeField != null) && (typeField.stringValue() != null))
719                         {
720                             QName typeRef = QName.createQName(typeField.stringValue());
721                             if (isCategory(typeRef))
722                             {
723                                 Counter counter = parentIds.get(parentID);
724                                 if (counter != null)
725                                 {
726                                     this.counter += counter.count;
727                                     return;
728                                 }
729                             }
730                         }
731
732                         if (aspectQName != null)
733                         {
734                             QName classRef = QName.createQName(aspectQName);
735                             AspectDefinition aspDef = dictionaryService.getAspect(classRef);
736                             if (isCategorised(aspDef))
737                             {
738                                 for (PropertyDefinition propDef : aspDef.getProperties().values())
739                                 {
740                                     if (propDef.getDataType().getName().equals(DataTypeDefinition.CATEGORY))
741                                     {
742                                         // get field and compare to ID
743
// Check in path as QName
744
// somewhere
745
Field[] categoryFields = document.getFields("@" + propDef.getName());
746                                         if (categoryFields != null)
747                                         {
748                                             for (Field categoryField : categoryFields)
749                                             {
750                                                 if ((categoryField != null) && (categoryField.stringValue() != null))
751                                                 {
752                                                     if (categoryField.stringValue().endsWith(parentID))
753                                                     {
754                                                         int count = 0;
755                                                         List JavaDoc<String JavaDoc> paths = categories.get(parentID);
756                                                         if (paths != null)
757                                                         {
758                                                             for (String JavaDoc path : paths)
759                                                             {
760                                                                 if (path.indexOf(aspectQName) != -1)
761                                                                 {
762                                                                     count++;
763                                                                 }
764                                                             }
765                                                         }
766                                                         this.counter += count;
767                                                         return;
768                                                     }
769                                                 }
770                                             }
771                                         }
772                                     }
773                                 }
774                             }
775
776                         }
777                     }
778                     else
779                     {
780                         Counter counter = parentIds.get(parentID);
781                         if (counter != null)
782                         {
783                             this.counter += counter.count;
784                             return;
785                         }
786                     }
787
788                 }
789             }
790
791             return;
792         }
793         else
794         {
795             return;
796         }
797     }
798
799     private boolean isCategory(QName classRef)
800     {
801         if (classRef == null)
802         {
803             return false;
804         }
805         TypeDefinition current = dictionaryService.getType(classRef);
806         while (current != null)
807         {
808             if (current.getName().equals(ContentModel.TYPE_CATEGORY))
809             {
810                 return true;
811             }
812             else
813             {
814                 QName parentName = current.getParentName();
815                 if (parentName == null)
816                 {
817                     break;
818                 }
819                 current = dictionaryService.getType(parentName);
820             }
821         }
822         return false;
823     }
824
825     private boolean isCategorised(AspectDefinition aspDef)
826     {
827         AspectDefinition current = aspDef;
828         while (current != null)
829         {
830             if (current.getName().equals(ContentModel.ASPECT_CLASSIFIABLE))
831             {
832                 return true;
833             }
834             else
835             {
836                 QName parentName = current.getParentName();
837                 if (parentName == null)
838                 {
839                     break;
840                 }
841                 current = dictionaryService.getAspect(parentName);
842             }
843         }
844         return false;
845     }
846
847     public int doc()
848     {
849         if (allNodes())
850         {
851             return allNodes.doc();
852         }
853         return max;
854     }
855
856     public float score() throws IOException JavaDoc
857     {
858         return repeat ? 1.0f : counter;
859     }
860
861     public boolean skipTo(int target) throws IOException JavaDoc
862     {
863
864         countInCounter = 1;
865         counter = 0;
866
867         if (allNodes())
868         {
869             allNodes.skipTo(target);
870             root.skipTo(allNodes.doc()); // must match
871
if (check())
872             {
873                 return true;
874             }
875             while (more)
876             {
877                 if (allNodes.next() && root.next())
878                 {
879                     if (check())
880                     {
881                         return true;
882                     }
883                 }
884                 else
885                 {
886                     more = false;
887                     return false;
888                 }
889             }
890         }
891
892         max = target;
893         return findNext();
894     }
895
896     public Explanation explain(int doc) throws IOException JavaDoc
897     {
898         Explanation tfExplanation = new Explanation();
899
900         while (next() && doc() < doc)
901         {
902         }
903
904         float phraseFreq = (doc() == doc) ? freq : 0.0f;
905         tfExplanation.setValue(getSimilarity().tf(phraseFreq));
906         tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
907
908         return tfExplanation;
909     }
910
911 }
912
Popular Tags