KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > hp > hpl > jena > graph > query > SimpleTripleSorter


1 /*
2   (c) Copyright 2003, 2004, 2005 Hewlett-Packard Development Company, LP
3   [See end of file]
4   $Id: SimpleTripleSorter.java,v 1.9 2005/02/21 11:52:25 andy_seaborne Exp $
5 */

6
7 package com.hp.hpl.jena.graph.query;
8
9 import com.hp.hpl.jena.graph.*;
10
11 import java.util.*;
12
13 /**
14     A TripleSorter for "optimising" queries. The triples of the query are permuted by
15     moving the "lightest" triples to earlier positions. Within each region of the same
16     lightness, triples the bind the most variables to their right are preferred. Otherwise
17     the order is preserved.
18 <p>
19     The notion of "lightness" makes more concrete triples lighter than less concrete ones,
20     and variables lighter than ANY. Variables that have been bound by the time their
21     containing triple is processed weigh just a little.
22 <p>
23     The notion of "bind the most" is just the sum of occurances of the variables in the
24     triple in the other triples.
25 <p>
26     No weighting is applied to predicate position, and no knowledge about the graph
27     being queried is required.
28     
29     @author kers
30 */

31 public class SimpleTripleSorter implements TripleSorter
32     {
33     private Triple [] result;
34     private int putIndex;
35     private Set bound;
36     private List remaining;
37         
38     /**
39         A public SimpleTripleSorter needs no arguments (we imagine more sophisticated
40         ones might).
41     */

42     public SimpleTripleSorter()
43         {}
44         
45     /**
46         Sort the triple array so that more-bound triples come before less-bound triples.
47         Preserve the order of the elements unless they <i>have<i> to move. Return
48         a new permuted copy of the original array. The work is done by a new instance
49         of SimpleTripleSorter specialised to this triple array (and with helpful state).
50     */

51     public Triple [] sort( Triple[] ts )
52         { return new SimpleTripleSorter( ts ) .sort(); }
53         
54     /**
55         Initialise a working SimpleTripleSorter from the triple array to sort. The working
56         copy has an empty set of bound variables and a mutable (and mutated) list of the
57         original triple array, in the same order.
58     */

59     protected SimpleTripleSorter( Triple [] triples )
60         {
61         this();
62         this.bound = new HashSet();
63         this.result = new Triple[triples.length];
64         this.remaining = new ArrayList( Arrays.asList( triples ) );
65         }
66
67     /**
68         Sort the triple array so that more-bound triples come before less-bound triples.
69         Preserve the order of the elements unless they <i>have<i> to move.
70     <p>
71         The algorithm just repeatedly looks for a lightest triple, moves it into the result
72         array, and re-weighs triples in the light of the new bindings that makes. Of several
73         lightest triples, the first is picked [mostly so that it's easier to write the tests].
74     */

75     protected Triple [] sort()
76         {
77         while (remaining.size() > 0) accept( findMostBinding( findLightest( remaining ) ) );
78         return result;
79         }
80         
81     /**
82         Accept a triple as the next element in the result array, note that all its variables are
83         now bound, and remove it from the list of remaining triples.
84     */

85     protected void accept( Triple t )
86         {
87         result[putIndex++] = t;
88         bind( t );
89         remaining.remove( t );
90         }
91                 
92     /**
93         Answer a list of the lightest triples in the candidate list; takes one pass over the
94         candidates.
95         
96         @param candidates the list of triples to select from
97         @return the light of lightest triples [by <code>weight</code>], preserving order
98     */

99     protected List findLightest( List candidates )
100         {
101         List lightest = new ArrayList();
102         int minWeight = 100;
103         for (int i = 0; i < candidates.size(); i +=1)
104             {
105             Triple t = (Triple) candidates.get(i);
106             int w = weight( t );
107             if (w < minWeight)
108                 {
109                 lightest.clear();
110                 lightest.add( t );
111                 minWeight = w;
112                 }
113             else if (w == minWeight)
114                 lightest.add( t );
115             }
116         return lightest;
117         }
118         
119     /**
120         Answer the first most-binding triple in the list of candidates.
121     */

122     protected Triple findMostBinding( List candidates )
123         {
124         int maxBinding = -1;
125         Triple mostBinding = null;
126         for (int i = 0; i < candidates.size(); i += 1)
127             {
128             Triple t = (Triple) candidates.get(i);
129             int count = bindingCount( t );
130             if (count > maxBinding) { mostBinding = t; maxBinding = count; }
131             }
132         return mostBinding;
133         }
134         
135     /**
136         The binding count of a triple is the number of instances of variables in other triples
137         it would capture if it were to be bound.
138         
139         @param t the triple to compute the binding count for
140         @return the total binding count of t with respect to all the triples in remaining
141     */

142     protected int bindingCount( Triple t )
143         {
144         int count = 0;
145         for (int i = 0; i < remaining.size(); i += 1)
146             {
147             Triple other = (Triple) remaining.get(i);
148             if (other != t) count += bindingCount( t, other );
149             }
150         return count;
151         }
152         
153     /**
154         Answer the binding count of t with respect to some other triple
155     */

156     protected int bindingCount( Triple t, Triple other )
157         {
158         return
159             bindingCount( t.getSubject(), other )
160             + bindingCount( t.getPredicate(), other )
161             + bindingCount( t.getObject(), other ) ;
162         }
163         
164     protected int bindingCount( Node n, Triple o )
165         {
166         return n.isVariable()
167             ? bc( n, o.getSubject() ) + bc( n, o.getPredicate() ) + bc( n, o.getObject() )
168             : 0;
169         }
170         
171     /**
172         Answer 1 if nodes are .equals, 0 otherwise.
173     */

174     protected int bc( Node n, Node other )
175         { return n.equals( other ) ? 1 : 0; }
176
177     /**
178         Bind a triple by binding each of its nodes.
179     */

180     protected void bind( Triple t )
181         {
182         bind( t.getSubject() );
183         bind( t.getPredicate() );
184         bind( t.getObject() );
185         }
186         
187     protected void bind( Node n )
188         { if (n.isVariable()) bound.add( n ); }
189         
190     /**
191         In this simple sorter, the weight of a triple is the sum of the weights of its nodes.
192         None of the positions get weighted differently. One might choose to weigh
193         positions that were more search-intensive more heavily.
194         
195         @param t the triple to be weighed [with respect to the bound variables]
196         @return the weight of the triple, rising as the triple is more variable
197     */

198     protected int weight( Triple t )
199         {
200         return weight( t.getSubject() ) + weight( t.getPredicate() ) + weight( t.getObject() );
201         }
202         
203     /**
204         In this simple sorter, concrete nodes weigh nothing. [This is, after all, computing
205         rather than building.] ANYs cost the most, because they cannot be bound, and
206         variable nodes cost a little if they are bound and a lot if they are not.
207     <p>
208         The rules are
209     <ul>
210         <li>any concrete node weighs nothing
211         <li>a bound variable node weighs something, but a triple which is three bound
212             variables must weigh less than a triple with an unbound variable
213         <li>an ANY node weighs more than an unbound variable node but less than
214             two unbound variable nodes
215     </ul>
216     
217         @param n the node to be weighed [with respect to the bound variables]
218         @return the weight of the node
219     */

220     protected int weight( Node n )
221         { return n.isConcrete() ? 0 : n.equals( Node.ANY ) ? 5 : bound.contains( n ) ? 1 : 4; }
222     }
223
224
225 /*
226     (c) Copyright 2003, 2004, 2005 Hewlett-Packard Development Company, LP
227     All rights reserved.
228
229     Redistribution and use in source and binary forms, with or without
230     modification, are permitted provided that the following conditions
231     are met:
232
233     1. Redistributions of source code must retain the above copyright
234        notice, this list of conditions and the following disclaimer.
235
236     2. Redistributions in binary form must reproduce the above copyright
237        notice, this list of conditions and the following disclaimer in the
238        documentation and/or other materials provided with the distribution.
239
240     3. The name of the author may not be used to endorse or promote products
241        derived from this software without specific prior written permission.
242
243     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
244     IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
245     OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
246     IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
247     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
248     NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
249     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
250     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
251     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
252     THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
253 */
Popular Tags