1 17 package org.alfresco.repo.search.impl.lucene.query; 18 19 import java.io.IOException ; 20 import java.util.ArrayList ; 21 import java.util.Arrays ; 22 import java.util.BitSet ; 23 import java.util.HashMap ; 24 import java.util.HashSet ; 25 import java.util.List ; 26 27 import org.alfresco.model.ContentModel; 28 import org.alfresco.repo.search.SearcherException; 29 import org.alfresco.service.cmr.dictionary.AspectDefinition; 30 import org.alfresco.service.cmr.dictionary.DataTypeDefinition; 31 import org.alfresco.service.cmr.dictionary.DictionaryService; 32 import org.alfresco.service.cmr.dictionary.PropertyDefinition; 33 import org.alfresco.service.cmr.dictionary.TypeDefinition; 34 import org.alfresco.service.namespace.QName; 35 import org.apache.lucene.document.Document; 36 import org.apache.lucene.document.Field; 37 import org.apache.lucene.index.IndexReader; 38 import org.apache.lucene.index.Term; 39 import org.apache.lucene.index.TermPositions; 40 import org.apache.lucene.search.Explanation; 41 import org.apache.lucene.search.Scorer; 42 import org.apache.lucene.search.Similarity; 43 import org.apache.lucene.search.Weight; 44 45 public class LeafScorer extends Scorer 46 { 47 static class Counter 48 { 49 int count = 0; 50 51 public String toString() 52 { 53 return "count = " + count; 54 } 55 } 56 57 private int counter; 58 59 private int countInCounter; 60 61 int min = 0; 62 63 int max = 0; 64 65 boolean more = true; 66 67 Scorer containerScorer; 68 69 StructuredFieldPosition[] sfps; 70 71 float freq = 0.0f; 72 73 HashMap <String , Counter> parentIds = new HashMap <String , Counter>(); 74 75 HashMap <String , List <String >> categories = new HashMap <String , List <String >>(); 76 77 HashMap <String , Counter> selfIds = null; 78 79 boolean hasSelfScorer; 80 81 IndexReader reader; 82 83 private TermPositions allNodes; 84 85 TermPositions level0; 86 87 HashSet <String > selfLinks = new HashSet <String >(); 88 89 BitSet selfDocs = new BitSet (); 90 91 private TermPositions root; 92 93 private int rootDoc; 94 95 private boolean repeat; 96 97 private DictionaryService dictionaryService; 98 99 private int[] parents; 100 101 private int[] self; 102 103 private int[] cats; 104 105 private TermPositions tp; 106 107 public LeafScorer(Weight weight, TermPositions root, TermPositions level0, ContainerScorer containerScorer, 108 StructuredFieldPosition[] sfps, TermPositions allNodes, HashMap <String , Counter> selfIds, 109 IndexReader reader, Similarity similarity, byte[] norms, DictionaryService dictionaryService, 110 boolean repeat, TermPositions tp) 111 { 112 super(similarity); 113 this.root = root; 114 this.containerScorer = containerScorer; 115 this.sfps = sfps; 116 this.allNodes = allNodes; 117 this.tp = tp; 118 if (selfIds == null) 119 { 120 this.selfIds = new HashMap <String , Counter>(); 121 hasSelfScorer = false; 122 } 123 else 124 { 125 this.selfIds = selfIds; 126 hasSelfScorer = true; 127 } 128 this.reader = reader; 129 this.level0 = level0; 130 this.dictionaryService = dictionaryService; 131 this.repeat = repeat; 132 try 133 { 134 initialise(); 135 } 136 catch (IOException e) 137 { 138 throw new SearcherException(e); 139 } 140 141 } 142 143 private void initialise() throws IOException 144 { 145 if (containerScorer != null) 146 { 147 parentIds.clear(); 148 while (containerScorer.next()) 149 { 150 int doc = containerScorer.doc(); 151 Document document = reader.document(doc); 152 Field id = document.getField("ID"); 153 Counter counter = parentIds.get(id.stringValue()); 154 if (counter == null) 155 { 156 counter = new Counter(); 157 parentIds.put(id.stringValue(), counter); 158 } 159 counter.count++; 160 161 if (!hasSelfScorer) 162 { 163 counter = selfIds.get(id.stringValue()); 164 if (counter == null) 165 { 166 counter = new Counter(); 167 selfIds.put(id.stringValue(), counter); 168 } 169 counter.count++; 170 } 171 172 Field isCategory = document.getField("ISCATEGORY"); 173 if (isCategory != null) 174 { 175 Field path = document.getField("PATH"); 176 String pathString = path.stringValue(); 177 if ((pathString.length() > 0) && (pathString.charAt(0) == '/')) 178 { 179 pathString = pathString.substring(1); 180 } 181 List <String > list = categories.get(id.stringValue()); 182 if (list == null) 183 { 184 list = new ArrayList <String >(); 185 categories.put(id.stringValue(), list); 186 } 187 list.add(pathString); 188 } 189 } 190 } 191 else if (level0 != null) 192 { 193 parentIds.clear(); 194 while (level0.next()) 195 { 196 int doc = level0.doc(); 197 Document document = reader.document(doc); 198 Field id = document.getField("ID"); 199 if (id != null) 200 { 201 Counter counter = parentIds.get(id.stringValue()); 202 if (counter == null) 203 { 204 counter = new Counter(); 205 parentIds.put(id.stringValue(), counter); 206 } 207 counter.count++; 208 209 counter = selfIds.get(id.stringValue()); 210 if (counter == null) 211 { 212 counter = new Counter(); 213 selfIds.put(id.stringValue(), counter); 214 } 215 counter.count++; 216 } 217 } 218 if (parentIds.size() > 1) 219 { 220 throw new SearcherException("More than one root node in index: " + parentIds.size()); 221 } 222 else if (parentIds.size() == 0) 223 { 224 throw new SearcherException("Index has no root node. Check that the correct index locations are being used."); 225 } 226 } 227 228 if (allNodes()) 229 { 230 int position = 0; 231 parents = new int[10000]; 232 for (String parent : parentIds.keySet()) 233 { 234 Counter counter = parentIds.get(parent); 235 tp.seek(new Term("PARENT", parent)); 236 while (tp.next()) 237 { 238 for (int i = 0, l = tp.freq(); i < l; i++) 239 { 240 for(int j = 0; j < counter.count; j++) 241 { 242 parents[position++] = tp.doc(); 243 if (position == parents.length) 244 { 245 int[] old = parents; 246 parents = new int[old.length * 2]; 247 System.arraycopy(old, 0, parents, 0, old.length); 248 } 249 } 250 251 } 252 } 253 254 } 255 int[] old = parents; 256 parents = new int[position]; 257 System.arraycopy(old, 0, parents, 0, position); 258 Arrays.sort(parents); 259 260 position = 0; 261 self = new int[10000]; 262 for (String id : selfIds.keySet()) 263 { 264 tp.seek(new Term("ID", id)); 265 while (tp.next()) 266 { 267 Counter counter = selfIds.get(id); 268 for(int i = 0; i < counter.count; i++) 269 { 270 self[position++] = tp.doc(); 271 if (position == self.length) 272 { 273 old = self; 274 self = new int[old.length * 2]; 275 System.arraycopy(old, 0, self, 0, old.length); 276 } 277 } 278 } 279 280 } 281 old = self; 282 self = new int[position]; 283 System.arraycopy(old, 0, self, 0, position); 284 Arrays.sort(self); 285 286 position = 0; 287 cats = new int[10000]; 288 for (String catid : categories.keySet()) 289 { 290 for (QName apsectQName : dictionaryService.getAllAspects()) 291 { 292 AspectDefinition aspDef = dictionaryService.getAspect(apsectQName); 293 if (isCategorised(aspDef)) 294 { 295 for (PropertyDefinition propDef : aspDef.getProperties().values()) 296 { 297 if (propDef.getDataType().getName().equals(DataTypeDefinition.CATEGORY)) 298 { 299 tp.seek(new Term("@" + propDef.getName().toString(), catid)); 300 while (tp.next()) 301 { 302 for (int i = 0, l = tp.freq(); i < l; i++) 303 { 304 cats[position++] = tp.doc(); 305 if (position == cats.length) 306 { 307 old = cats; 308 cats = new int[old.length * 2]; 309 System.arraycopy(old, 0, cats, 0, old.length); 310 } 311 } 312 } 313 314 } 315 } 316 } 317 } 318 319 } 320 old = cats; 321 cats = new int[position]; 322 System.arraycopy(old, 0, cats, 0, position); 323 Arrays.sort(cats); 324 } 325 } 326 327 public boolean next() throws IOException 328 { 329 330 if (repeat && (countInCounter < counter)) 331 { 332 countInCounter++; 333 return true; 334 } 335 else 336 { 337 countInCounter = 1; 338 counter = 0; 339 } 340 341 if (allNodes()) 342 { 343 while (more) 344 { 345 if (allNodes.next() && root.next()) 346 { 347 if (check()) 348 { 349 return true; 350 } 351 } 352 else 353 { 354 more = false; 355 return false; 356 } 357 } 358 } 359 360 if (!more) 361 { 362 return false; 364 } 365 366 if (max == 0) 367 { 368 doNextOnAll(); 371 if (found()) 372 { 373 return true; 374 } 375 } 376 377 return findNext(); 378 } 379 380 private boolean allNodes() 381 { 382 if (sfps.length == 0) 383 { 384 return true; 385 } 386 for (StructuredFieldPosition sfp : sfps) 387 { 388 if (sfp.getCachingTermPositions() != null) 389 { 390 return false; 391 } 392 } 393 return true; 394 } 395 396 private boolean findNext() throws IOException 397 { 398 400 while (more) 401 { 402 move(); if (found()) 404 { 405 return true; 406 } 407 } 408 409 return false; 411 } 412 413 private void skipToMax() throws IOException 414 { 415 int current; 417 for (int i = 0, l = sfps.length; i < l; i++) 418 { 419 if (i == 0) 420 { 421 min = max; 422 } 423 if (sfps[i].getCachingTermPositions() != null) 424 { 425 if (sfps[i].getCachingTermPositions().doc() < max) 426 { 427 if (sfps[i].getCachingTermPositions().skipTo(max)) 428 { 429 current = sfps[i].getCachingTermPositions().doc(); 430 adjustMinMax(current, false); 431 } 432 else 433 { 434 more = false; 435 return; 436 } 437 } 438 } 439 } 440 441 if (root.doc() < max) 443 { 444 if (root.skipTo(max)) 445 { 446 rootDoc = root.doc(); 447 } 448 else 449 { 450 more = false; 451 return; 452 } 453 } 454 } 455 456 private void move() throws IOException 457 { 458 if (min == max) 459 { 460 doNextOnAll(); 462 } 463 else 464 { 465 skipToMax(); 467 } 468 } 469 470 private void doNextOnAll() throws IOException 471 { 472 int current; 474 boolean first = true; 475 for (int i = 0, l = sfps.length; i < l; i++) 476 { 477 if (sfps[i].getCachingTermPositions() != null) 478 { 479 if (sfps[i].getCachingTermPositions().next()) 480 { 481 current = sfps[i].getCachingTermPositions().doc(); 482 adjustMinMax(current, first); 483 first = false; 484 } 485 else 486 { 487 more = false; 488 return; 489 } 490 } 491 } 492 493 if (root.next()) 495 { 496 rootDoc = root.doc(); 497 } 498 else 499 { 500 more = false; 501 return; 502 } 503 if (root.doc() < max) 504 { 505 if (root.skipTo(max)) 506 { 507 rootDoc = root.doc(); 508 } 509 else 510 { 511 more = false; 512 return; 513 } 514 } 515 } 516 517 private void adjustMinMax(int doc, boolean setMin) 518 { 519 520 if (max < doc) 521 { 522 max = doc; 523 } 524 525 if (setMin) 526 { 527 min = doc; 528 } 529 else if (min > doc) 530 { 531 min = doc; 532 } 533 } 534 535 private boolean found() throws IOException 536 { 537 if (sfps.length == 0) 538 { 539 return true; 540 } 541 542 if (!more) 544 { 545 return false; 546 } 547 548 if (min != max) 550 { 551 return false; 552 } 553 554 if (rootDoc != max) 555 { 556 return false; 557 } 558 559 return check(); 560 } 561 562 private boolean check() throws IOException 563 { 564 if (allNodes()) 565 { 566 this.counter = 0; 567 int position; 568 569 StructuredFieldPosition last = sfps[sfps.length - 1]; 570 571 if (last.linkSelf()) 572 { 573 if ((self != null) && sfps[1].linkSelf() && ((position = Arrays.binarySearch(self, doc())) >= 0)) 574 { 575 if (!selfDocs.get(doc())) 576 { 577 selfDocs.set(doc()); 578 while (position > -1 && self[position] == doc()) 579 { 580 position--; 581 } 582 for (int i = position + 1, l = self.length; ((i < l) && (self[i] == doc())); i++) 583 { 584 this.counter++; 585 } 586 } 587 } 588 } 589 if (!selfDocs.get(doc()) && last.linkParent()) 590 { 591 if ((parents != null) && ((position = Arrays.binarySearch(parents, doc())) >= 0)) 592 { 593 while (position > -1 && parents[position] == doc()) 594 { 595 position--; 596 } 597 for (int i = position + 1, l = parents.length; ((i < l) && (parents[i] == doc())); i++) 598 { 599 this.counter++; 600 } 601 } 602 603 if ((cats != null) && ((position = Arrays.binarySearch(cats, doc())) >= 0)) 604 { 605 while (position > -1 && cats[position] == doc()) 606 { 607 position--; 608 } 609 for (int i = position + 1, l = cats.length; ((i < l) && (cats[i] == doc())); i++) 610 { 611 this.counter++; 612 } 613 } 614 } 615 return counter > 0; 616 } 617 618 int count = root.freq(); 622 int start = 0; 623 int end = -1; 624 for (int i = 0; i < count; i++) 625 { 626 if (i == 0) 627 { 628 start = 0; 630 end = root.nextPosition(); 631 } 632 else 633 { 634 start = end + 1; 635 end = root.nextPosition(); 636 } 637 638 check(start, end, i); 639 640 } 641 return this.counter > 0; 643 } 644 645 private void check(int start, int end, int position) throws IOException 646 { 647 int offset = 0; 648 for (int i = 0, l = sfps.length; i < l; i++) 649 { 650 offset = sfps[i].matches(start, end, offset); 651 if (offset == -1) 652 { 653 return; 654 } 655 } 656 if (offset == -1) 658 { 659 return; 660 } 661 else 662 { 663 if ((sfps[sfps.length - 1].isTerminal()) && (offset != 2)) 664 { 665 return; 666 } 667 } 668 669 Document doc = reader.document(doc()); 670 Field[] parentFields = doc.getFields("PARENT"); 671 Field[] linkFields = doc.getFields("LINKASPECT"); 672 673 String parentID = null; 674 String linkAspect = null; 675 if ((parentFields != null) && (parentFields.length > position) && (parentFields[position] != null)) 676 { 677 parentID = parentFields[position].stringValue(); 678 } 679 if ((linkFields != null) && (linkFields.length > position) && (linkFields[position] != null)) 680 { 681 linkAspect = linkFields[position].stringValue(); 682 } 683 684 containersIncludeCurrent(doc, parentID, linkAspect); 685 686 } 687 688 private void containersIncludeCurrent(Document document, String parentID, String aspectQName) throws IOException 689 { 690 if ((containerScorer != null) || (level0 != null)) 691 { 692 if (sfps.length == 0) 693 { 694 return; 695 } 696 String id = document.getField("ID").stringValue(); 697 StructuredFieldPosition last = sfps[sfps.length - 1]; 698 if ((last.linkSelf() && selfIds.containsKey(id))) 699 { 700 Counter counter = selfIds.get(id); 701 if (counter != null) 702 { 703 if (!selfLinks.contains(id)) 704 { 705 this.counter += counter.count; 706 selfLinks.add(id); 707 return; 708 } 709 } 710 } 711 if ((parentID != null) && (parentID.length() > 0) && last.linkParent()) 712 { 713 if (!selfLinks.contains(id)) 714 { 715 if (categories.containsKey(parentID)) 716 { 717 Field typeField = document.getField("TYPE"); 718 if ((typeField != null) && (typeField.stringValue() != null)) 719 { 720 QName typeRef = QName.createQName(typeField.stringValue()); 721 if (isCategory(typeRef)) 722 { 723 Counter counter = parentIds.get(parentID); 724 if (counter != null) 725 { 726 this.counter += counter.count; 727 return; 728 } 729 } 730 } 731 732 if (aspectQName != null) 733 { 734 QName classRef = QName.createQName(aspectQName); 735 AspectDefinition aspDef = dictionaryService.getAspect(classRef); 736 if (isCategorised(aspDef)) 737 { 738 for (PropertyDefinition propDef : aspDef.getProperties().values()) 739 { 740 if (propDef.getDataType().getName().equals(DataTypeDefinition.CATEGORY)) 741 { 742 Field[] categoryFields = document.getFields("@" + propDef.getName()); 746 if (categoryFields != null) 747 { 748 for (Field categoryField : categoryFields) 749 { 750 if ((categoryField != null) && (categoryField.stringValue() != null)) 751 { 752 if (categoryField.stringValue().endsWith(parentID)) 753 { 754 int count = 0; 755 List <String > paths = categories.get(parentID); 756 if (paths != null) 757 { 758 for (String path : paths) 759 { 760 if (path.indexOf(aspectQName) != -1) 761 { 762 count++; 763 } 764 } 765 } 766 this.counter += count; 767 return; 768 } 769 } 770 } 771 } 772 } 773 } 774 } 775 776 } 777 } 778 else 779 { 780 Counter counter = parentIds.get(parentID); 781 if (counter != null) 782 { 783 this.counter += counter.count; 784 return; 785 } 786 } 787 788 } 789 } 790 791 return; 792 } 793 else 794 { 795 return; 796 } 797 } 798 799 private boolean isCategory(QName classRef) 800 { 801 if (classRef == null) 802 { 803 return false; 804 } 805 TypeDefinition current = dictionaryService.getType(classRef); 806 while (current != null) 807 { 808 if (current.getName().equals(ContentModel.TYPE_CATEGORY)) 809 { 810 return true; 811 } 812 else 813 { 814 QName parentName = current.getParentName(); 815 if (parentName == null) 816 { 817 break; 818 } 819 current = dictionaryService.getType(parentName); 820 } 821 } 822 return false; 823 } 824 825 private boolean isCategorised(AspectDefinition aspDef) 826 { 827 AspectDefinition current = aspDef; 828 while (current != null) 829 { 830 if (current.getName().equals(ContentModel.ASPECT_CLASSIFIABLE)) 831 { 832 return true; 833 } 834 else 835 { 836 QName parentName = current.getParentName(); 837 if (parentName == null) 838 { 839 break; 840 } 841 current = dictionaryService.getAspect(parentName); 842 } 843 } 844 return false; 845 } 846 847 public int doc() 848 { 849 if (allNodes()) 850 { 851 return allNodes.doc(); 852 } 853 return max; 854 } 855 856 public float score() throws IOException 857 { 858 return repeat ? 1.0f : counter; 859 } 860 861 public boolean skipTo(int target) throws IOException 862 { 863 864 countInCounter = 1; 865 counter = 0; 866 867 if (allNodes()) 868 { 869 allNodes.skipTo(target); 870 root.skipTo(allNodes.doc()); if (check()) 872 { 873 return true; 874 } 875 while (more) 876 { 877 if (allNodes.next() && root.next()) 878 { 879 if (check()) 880 { 881 return true; 882 } 883 } 884 else 885 { 886 more = false; 887 return false; 888 } 889 } 890 } 891 892 max = target; 893 return findNext(); 894 } 895 896 public Explanation explain(int doc) throws IOException 897 { 898 Explanation tfExplanation = new Explanation(); 899 900 while (next() && doc() < doc) 901 { 902 } 903 904 float phraseFreq = (doc() == doc) ? freq : 0.0f; 905 tfExplanation.setValue(getSimilarity().tf(phraseFreq)); 906 tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")"); 907 908 return tfExplanation; 909 } 910 911 } 912 | Popular Tags |