KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > alfresco > repo > search > impl > lucene > query > ContainerScorer


1 /*
2  * Copyright (C) 2005 Alfresco, Inc.
3  *
4  * Licensed under the Mozilla Public License version 1.1
5  * with a permitted attribution clause. You may obtain a
6  * copy of the License at
7  *
8  * http://www.alfresco.org/legal/license.txt
9  *
10  * Unless required by applicable law or agreed to in writing,
11  * software distributed under the License is distributed on an
12  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13  * either express or implied. See the License for the specific
14  * language governing permissions and limitations under the
15  * License.
16  */

17 package org.alfresco.repo.search.impl.lucene.query;
18
19 import java.io.IOException JavaDoc;
20
21 import org.apache.lucene.index.TermPositions;
22 import org.apache.lucene.search.Explanation;
23 import org.apache.lucene.search.Scorer;
24 import org.apache.lucene.search.Similarity;
25 import org.apache.lucene.search.Weight;
26
27 /**
28  * The scorer for structured field queries.
29  *
30  * A document either matches or it does not, there for the frequency is reported
31  * as 0.0f or 1.0.
32  *
33  *
34  *
35  * @author andyh
36  */

37 public class ContainerScorer extends Scorer
38 {
39     // Unused
40
Weight weight;
41
42     // Positions of documents with multiple structure elements
43
// e.g have mutiple paths, multiple categories or multiples entries in the
44
// same category
45
TermPositions root;
46
47     // The Field positions that describe the structure we are trying to match
48
StructuredFieldPosition[] positions;
49
50     // Unused at the moment
51
byte[] norms;
52
53     // The minium document found so far
54
int min = 0;
55
56     // The max document found so far
57
int max = 0;
58
59     // The next root doc
60
// -1 and it has gone off the end
61
int rootDoc = 0;
62
63     // Are there potentially more documents
64
boolean more = true;
65
66     // The frequency of the terms in the doc (0.0f or 1.0f)
67
float freq = 0.0f;
68
69     // A term position to find all container entries (there is no better way of finding the set of rquired containers)
70
private TermPositions containers;
71
72     /**
73      * The arguments here follow the same pattern as used by the PhraseQuery.
74      * (It has the same unused arguments)
75      *
76      * @param weight -
77      * curently unsued
78      * @param tps -
79      * the term positions for the terms we are trying to find
80      * @param root -
81      * the term positions for documents with multiple entries - this
82      * may be null, or contain no matches - it specifies those things
83      * that appear under multiple categories etc.
84      * @param positions -
85      * the structured field positions - where terms should appear
86      * @param similarity -
87      * used in the abstract scorer implementation
88      * @param norms -
89      * unused
90      */

91     public ContainerScorer(Weight weight, TermPositions root, StructuredFieldPosition[] positions, TermPositions containers, Similarity similarity, byte[] norms)
92     {
93         super(similarity);
94         this.weight = weight;
95         this.positions = positions;
96         this.norms = norms;
97         this.root = root;
98         this.containers = containers;
99     }
100
101     /*
102      * (non-Javadoc)
103      *
104      * @see org.apache.lucene.search.Scorer#next()
105      */

106     public boolean next() throws IOException JavaDoc
107     {
108         // If there is no filtering
109
if (allContainers())
110         {
111             // containers and roots must be in sync or the index is broken
112
while (more)
113             {
114                 if (containers.next() && root.next())
115                 {
116                     if (check(0, root.nextPosition()))
117                     {
118                         return true;
119                     }
120                 }
121                 else
122                 {
123                     more = false;
124                     return false;
125                 }
126             }
127         }
128
129         if (!more)
130         {
131             // One of the search terms has no more docuements
132
return false;
133         }
134
135         if (max == 0)
136         {
137             // We need to initialise
138
// Just do a next on all terms and check if the first doc matches
139
doNextOnAll();
140             if (found())
141             {
142                 return true;
143             }
144             // drop through to the normal find sequence
145
}
146
147         return findNext();
148     }
149
150     /**
151      * Are we looking for all containers?
152      * If there are no positions we must have a better filter
153      *
154      * @return
155      */

156     private boolean allContainers()
157     {
158         if (positions.length == 0)
159         {
160             return true;
161         }
162         for (StructuredFieldPosition sfp : positions)
163         {
164             if (sfp.getCachingTermPositions() != null)
165             {
166                 return false;
167             }
168         }
169         return true;
170     }
171
172     /**
173      * @return
174      * @throws IOException
175      */

176     private boolean findNext() throws IOException JavaDoc
177     {
178         // Move to the next document
179

180         while (more)
181         {
182             move(); // may set more to false
183
if (found())
184             {
185                 return true;
186             }
187         }
188
189         // If we get here we must have no more documents
190
return false;
191     }
192
193     /**
194      * Check if we have found a match
195      *
196      * @return
197      * @throws IOException
198      */

199
200     private boolean found() throws IOException JavaDoc
201     {
202         // No predicate test if there are no positions
203
if (positions.length == 0)
204         {
205             return true;
206         }
207
208         // no more documents - no match
209
if (!more)
210         {
211             return false;
212         }
213
214         // min and max must point to the same document
215
if (min != max)
216         {
217             return false;
218         }
219
220         if (rootDoc != max)
221         {
222             return false;
223         }
224
225         // We have duplicate entries - suport should be improved but it is not used at the moment
226
// This shuld work akin to the leaf scorer
227
// It would compact the index
228
// The match must be in a known term range
229
int count = root.freq();
230         int start = 0;
231         int end = -1;
232         for (int i = 0; i < count; i++)
233         {
234             if (i == 0)
235             {
236                 // First starts at zero
237
start = 0;
238                 end = root.nextPosition() ;
239             }
240             else
241             {
242                 start = end + 1;
243                 end = root.nextPosition() ;
244             }
245
246             if (check(start, end))
247             {
248                 return true;
249             }
250         }
251
252         // We had checks to do and they all failed.
253
return false;
254     }
255
256     /*
257      * We have all documents at the same state. Now we check the positions of
258      * the terms.
259      */

260
261     private boolean check(int start, int end) throws IOException JavaDoc
262     {
263         int offset = checkTail(start, end, 0, 0);
264         // Last match may fail
265
if (offset == -1)
266         {
267             return false;
268         }
269         else
270         {
271             // Check non // ending patterns end at the end of the available pattern
272
if (positions[positions.length - 1].isTerminal())
273             {
274                 return ((offset+1) == end);
275             }
276             else
277             {
278                 return true;
279             }
280         }
281     }
282
283     /**
284      * For // type pattern matches we need to test patterns of variable greedyness.
285      *
286      *
287      * @param start
288      * @param end
289      * @param currentPosition
290      * @param currentOffset
291      * @return
292      * @throws IOException
293      */

294     private int checkTail(int start, int end, int currentPosition, int currentOffset) throws IOException JavaDoc
295     {
296         int offset = currentOffset;
297         for (int i = currentPosition, l = positions.length; i < l; i++)
298         {
299             offset = positions[i].matches(start, end, offset);
300             if (offset == -1)
301             {
302                 return -1;
303             }
304             if (positions[i].isDescendant())
305             {
306                 for (int j = offset; j < end; j++)
307                 {
308                     int newOffset = checkTail(start, end, i + 1, j);
309                     if (newOffset != -1)
310                     {
311                         return newOffset;
312                     }
313                 }
314                 return -1;
315             }
316         }
317         return offset;
318     }
319
320     /*
321      * Move to the next position to consider for a match test
322      */

323
324     private void move() throws IOException JavaDoc
325     {
326         if (min == max)
327         {
328             // If we were at a match just do next on all terms
329
// They all must move on
330
doNextOnAll();
331         }
332         else
333         {
334             // We are in a range - try and skip to the max position on all terms
335
// Only some need to move on - some may move past the current max and set a new target
336
skipToMax();
337         }
338     }
339
340     /*
341      * Go through all the term positions and try and move to next document. Any
342      * failure measn we have no more.
343      *
344      * This can be used at initialisation and when moving away from an existing
345      * match.
346      *
347      * This will set min, max, more and rootDoc
348      *
349      */

350     private void doNextOnAll() throws IOException JavaDoc
351     {
352         // Do the terms
353
int current;
354         boolean first = true;
355         for (int i = 0, l = positions.length; i < l; i++)
356         {
357             if (positions[i].getCachingTermPositions() != null)
358             {
359                 if (positions[i].getCachingTermPositions().next())
360
361                 {
362                     current = positions[i].getCachingTermPositions().doc();
363                     adjustMinMax(current, first);
364                     first = false;
365                 }
366                 else
367                 {
368                     more = false;
369                     return;
370                 }
371             }
372         }
373
374         // Do the root term - it must always exists as the path could well have mutiple entries
375
// If an entry in the index does not have a root terminal it is broken
376
if (root.next())
377         {
378             rootDoc = root.doc();
379         }
380         else
381         {
382             more = false;
383             return;
384         }
385         if (root.doc() < max)
386         {
387             if (root.skipTo(max))
388             {
389                 rootDoc = root.doc();
390             }
391             else
392             {
393                 more = false;
394                 return;
395             }
396         }
397     }
398
399     /*
400      * Try and skip all those term positions at documents less than the current
401      * max up to value. This is quite likely to fail and leave us with (min !=
402      * max) but that is OK, we try again.
403      *
404      * It is possible that max increases as we process terms, this is OK. We
405      * just failed to skip to a given value of max and start doing the next.
406      */

407     private void skipToMax() throws IOException JavaDoc
408     {
409         // Do the terms
410
int current;
411         for (int i = 0, l = positions.length; i < l; i++)
412         {
413             if (i == 0)
414             {
415                 min = max;
416             }
417             if (positions[i].getCachingTermPositions() != null)
418             {
419                 if (positions[i].getCachingTermPositions().doc() < max)
420                 {
421                     if (positions[i].getCachingTermPositions().skipTo(max))
422                     {
423                         current = positions[i].getCachingTermPositions().doc();
424                         adjustMinMax(current, false);
425                     }
426                     else
427                     {
428                         more = false;
429                         return;
430                     }
431                 }
432             }
433         }
434
435         // Do the root
436
if (root.doc() < max)
437         {
438             if (root.skipTo(max))
439             {
440                 rootDoc = root.doc();
441             }
442             else
443             {
444                 more = false;
445                 return;
446             }
447         }
448     }
449
450     /*
451      * Adjust the min and max values Convenience boolean to set or adjust the
452      * minimum.
453      */

454     private void adjustMinMax(int doc, boolean setMin)
455     {
456
457         if (max < doc)
458         {
459             max = doc;
460         }
461
462         if (setMin)
463         {
464             min = doc;
465         }
466         else if (min > doc)
467         {
468             min = doc;
469         }
470     }
471
472     /*
473      * (non-Javadoc)
474      *
475      * @see org.apache.lucene.search.Scorer#doc()
476      */

477     public int doc()
478     {
479         if (allContainers())
480         {
481             return containers.doc();
482         }
483         return max;
484     }
485
486     /*
487      * (non-Javadoc)
488      *
489      * @see org.apache.lucene.search.Scorer#score()
490      */

491     public float score() throws IOException JavaDoc
492     {
493         return 1.0f;
494     }
495
496     /*
497      * (non-Javadoc)
498      *
499      * @see org.apache.lucene.search.Scorer#skipTo(int)
500      */

501     public boolean skipTo(int target) throws IOException JavaDoc
502     {
503         if (allContainers())
504         {
505             containers.skipTo(target);
506             root.skipTo(containers.doc()); // must match
507
if (check(0, root.nextPosition()))
508             {
509                 return true;
510             }
511             while (more)
512             {
513                 if (containers.next() && root.next())
514                 {
515                     if (check(0, root.nextPosition()))
516                     {
517                         return true;
518                     }
519                 }
520                 else
521                 {
522                     more = false;
523                     return false;
524                 }
525             }
526         }
527
528         max = target;
529         return findNext();
530     }
531
532     /*
533      * (non-Javadoc)
534      *
535      * @see org.apache.lucene.search.Scorer#explain(int)
536      */

537     public Explanation explain(int doc) throws IOException JavaDoc
538     {
539         // TODO: Work out what a proper explanation would be here?
540
Explanation tfExplanation = new Explanation();
541
542         while (next() && doc() < doc)
543         {
544         }
545
546         float phraseFreq = (doc() == doc) ? freq : 0.0f;
547         tfExplanation.setValue(getSimilarity().tf(phraseFreq));
548         tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
549
550         return tfExplanation;
551     }
552
553 }
Popular Tags