KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > search > DSQuery


1 /*
2  * DSQuery.java
3  *
4  * Version: $Revision: 1.26 $
5  *
6  * Date: $Date: 2006/11/03 05:01:31 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40 package org.dspace.search;
41
42 import java.io.IOException JavaDoc;
43 import java.util.ArrayList JavaDoc;
44 import java.util.Iterator JavaDoc;
45 import java.util.List JavaDoc;
46
47 import org.apache.log4j.Logger;
48 import org.apache.lucene.document.Document;
49 import org.apache.lucene.index.IndexReader;
50 import org.apache.lucene.queryParser.ParseException;
51 import org.apache.lucene.queryParser.QueryParser;
52 import org.apache.lucene.queryParser.TokenMgrError;
53 import org.apache.lucene.search.BooleanQuery;
54 import org.apache.lucene.search.Hits;
55 import org.apache.lucene.search.IndexSearcher;
56 import org.apache.lucene.search.Query;
57 import org.apache.lucene.search.Searcher;
58 import org.apache.oro.text.perl.Perl5Util;
59 import org.dspace.content.Collection;
60 import org.dspace.content.Community;
61 import org.dspace.core.ConfigurationManager;
62 import org.dspace.core.Constants;
63 import org.dspace.core.Context;
64 import org.dspace.core.LogManager;
65
66 // issues
67
// need to filter query string for security
68
// cmd line query needs to process args correctly (seems to split them up)
69
/**
70  * DSIndexer contains various static methods for performing queries on indices,
71  * for collections and communities.
72  *
73  */

74 public class DSQuery
75 {
76     // Result types
77
static final String JavaDoc ALL = "999";
78
79     static final String JavaDoc ITEM = "" + Constants.ITEM;
80
81     static final String JavaDoc COLLECTION = "" + Constants.COLLECTION;
82
83     static final String JavaDoc COMMUNITY = "" + Constants.COMMUNITY;
84
85     // cache a Lucene IndexSearcher for more efficient searches
86
private static Searcher searcher;
87
88     private static long lastModified;
89     
90     /** log4j logger */
91     private static Logger log = Logger.getLogger(DSQuery.class);
92
93     static
94     {
95         String JavaDoc maxClauses = ConfigurationManager
96                 .getProperty("search.max-clauses");
97         if (maxClauses != null)
98         {
99             BooleanQuery.setMaxClauseCount(Integer.parseInt(maxClauses));
100         }
101     }
102
103     /**
104      * Do a query, returning a QueryResults object
105      *
106      * @param c context
107      * @param args query arguments in QueryArgs object
108      *
109      * @return query results QueryResults
110      */

111     public static QueryResults doQuery(Context c, QueryArgs args)
112             throws IOException JavaDoc
113     {
114         String JavaDoc querystring = args.getQuery();
115         QueryResults qr = new QueryResults();
116         List JavaDoc hitHandles = new ArrayList JavaDoc();
117         List JavaDoc hitTypes = new ArrayList JavaDoc();
118
119         // set up the QueryResults object
120
qr.setHitHandles(hitHandles);
121         qr.setHitTypes(hitTypes);
122         qr.setStart(args.getStart());
123         qr.setPageSize(args.getPageSize());
124
125         // massage the query string a bit
126
querystring = checkEmptyQuery(querystring); // change nulls to an empty
127
// string
128
querystring = workAroundLuceneBug(querystring); // logicals changed to
129
// && ||, etc.
130
querystring = stripHandles(querystring); // remove handles from query
131
// string
132
querystring = stripAsterisk(querystring); // remove asterisk from
133
// beginning of string
134

135         try
136         {
137             // grab a searcher, and do the search
138
Searcher searcher = getSearcher(ConfigurationManager
139                     .getProperty("search.dir"));
140
141             QueryParser qp = new QueryParser("default", DSIndexer.getAnalyzer());
142             log.info("Final query string: " + querystring);
143             
144             String JavaDoc operator = ConfigurationManager.getProperty("search.operator");
145             if (operator == null || operator.equals("OR"))
146             {
147                 qp.setDefaultOperator(QueryParser.OR_OPERATOR);
148             }
149             else
150             {
151                 qp.setDefaultOperator(QueryParser.AND_OPERATOR);
152             }
153             
154             Query myquery = qp.parse(querystring);
155             Hits hits = searcher.search(myquery);
156
157             // set total number of hits
158
qr.setHitCount(hits.length());
159
160             // We now have a bunch of hits - snip out a 'window'
161
// defined in start, count and return the handles
162
// from that window
163
// first, are there enough hits?
164
if (args.getStart() < hits.length())
165             {
166                 // get as many as we can, up to the window size
167
// how many are available after snipping off at offset 'start'?
168
int hitsRemaining = hits.length() - args.getStart();
169
170                 int hitsToProcess = (hitsRemaining < args.getPageSize()) ? hitsRemaining
171                         : args.getPageSize();
172
173                 for (int i = args.getStart(); i < (args.getStart() + hitsToProcess); i++)
174                 {
175                     Document d = hits.doc(i);
176
177                     String JavaDoc handleText = d.get("handle");
178                     String JavaDoc handletype = d.get("type");
179
180                     hitHandles.add(handleText);
181
182                     if (handletype.equals("" + Constants.ITEM))
183                     {
184                         hitTypes.add(new Integer JavaDoc(Constants.ITEM));
185                     }
186                     else if (handletype.equals("" + Constants.COLLECTION))
187                     {
188                         hitTypes.add(new Integer JavaDoc(Constants.COLLECTION));
189                     }
190                     else if (handletype.equals("" + Constants.COMMUNITY))
191                     {
192                         hitTypes.add(new Integer JavaDoc(Constants.COMMUNITY));
193                     }
194                     else
195                     {
196                         // error! unknown type!
197
}
198                 }
199             }
200         }
201         catch (NumberFormatException JavaDoc e)
202         {
203             log
204                     .warn(LogManager.getHeader(c, "Number format exception", ""
205                             + e));
206
207             qr.setErrorMsg("Number format exception");
208         }
209         catch (ParseException e)
210         {
211             // a parse exception - log and return null results
212
log.warn(LogManager.getHeader(c, "Invalid search string", "" + e));
213
214             qr.setErrorMsg("Invalid search string");
215         }
216         catch (TokenMgrError tme)
217         {
218             // Similar to parse exception
219
log
220                     .warn(LogManager.getHeader(c, "Invalid search string", ""
221                             + tme));
222
223             qr.setErrorMsg("Invalid search string");
224         }
225         catch(BooleanQuery.TooManyClauses e)
226         {
227             log.warn(LogManager.getHeader(c, "Query too broad", e.toString()));
228             qr.setErrorMsg("Your query was too broad. Try a narrower query.");
229         }
230
231         return qr;
232     }
233
234     static String JavaDoc checkEmptyQuery(String JavaDoc myquery)
235     {
236         if (myquery.equals(""))
237         {
238             myquery = "empty_query_string";
239         }
240
241         return myquery;
242     }
243
244     static String JavaDoc workAroundLuceneBug(String JavaDoc myquery)
245     {
246         // Lucene currently has a bug which breaks wildcard
247
// searching when you have uppercase characters.
248
// Here we substitute the boolean operators -- which
249
// have to be uppercase -- before tranforming the
250
// query string to lowercase.
251
Perl5Util util = new Perl5Util();
252
253         myquery = util.substitute("s/ AND / && /g", myquery);
254         myquery = util.substitute("s/ OR / || /g", myquery);
255         myquery = util.substitute("s/ NOT / ! /g", myquery);
256
257         myquery = myquery.toLowerCase();
258
259         return myquery;
260     }
261
262     static String JavaDoc stripHandles(String JavaDoc myquery)
263     {
264         // Drop beginning pieces of full handle strings
265
Perl5Util util = new Perl5Util();
266
267         myquery = util.substitute("s|^(\\s+)?http://hdl\\.handle\\.net/||",
268                 myquery);
269         myquery = util.substitute("s|^(\\s+)?hdl:||", myquery);
270
271         return myquery;
272     }
273
274     static String JavaDoc stripAsterisk(String JavaDoc myquery)
275     {
276         // query strings (or words) begining with "*" cause a null pointer error
277
Perl5Util util = new Perl5Util();
278
279         myquery = util.substitute("s/^\\*//", myquery);
280         myquery = util.substitute("s| \\*| |", myquery);
281         myquery = util.substitute("s|\\(\\*|\\(|", myquery);
282         myquery = util.substitute("s|:\\*|:|", myquery);
283
284         return myquery;
285     }
286
287     /**
288      * Do a query, restricted to a collection
289      *
290      * @param c
291      * context
292      * @param args
293      * query args
294      * @param coll
295      * collection to restrict to
296      *
297      * @return QueryResults same results as doQuery, restricted to a collection
298      */

299     public static QueryResults doQuery(Context c, QueryArgs args,
300             Collection coll) throws IOException JavaDoc
301     {
302         String JavaDoc querystring = args.getQuery();
303
304         querystring = checkEmptyQuery(querystring);
305
306         String JavaDoc location = "l" + (coll.getID());
307
308         String JavaDoc newquery = new String JavaDoc("+(" + querystring + ") +location:\""
309                 + location + "\"");
310
311         args.setQuery(newquery);
312
313         return doQuery(c, args);
314     }
315
316     /**
317      * Do a query, restricted to a community
318      *
319      * @param c
320      * context
321      * @param args
322      * query args
323      * @param comm
324      * community to restrict to
325      *
326      * @return QueryResults same results as doQuery, restricted to a collection
327      */

328     public static QueryResults doQuery(Context c, QueryArgs args, Community comm)
329             throws IOException JavaDoc
330     {
331         String JavaDoc querystring = args.getQuery();
332
333         querystring = checkEmptyQuery(querystring);
334
335         String JavaDoc location = "m" + (comm.getID());
336
337         String JavaDoc newquery = new String JavaDoc("+(" + querystring + ") +location:\""
338                 + location + "\"");
339
340         args.setQuery(newquery);
341
342         return doQuery(c, args);
343     }
344
345
346     /**
347      * Do a query, printing results to stdout largely for testing, but it is
348      * useful
349      */

350     public static void doCMDLineQuery(String JavaDoc query)
351     {
352         System.out.println("Command line query: " + query);
353         System.out.println("Only reporting default-sized results list");
354
355         try
356         {
357             Context c = new Context();
358
359             QueryArgs args = new QueryArgs();
360             args.setQuery(query);
361
362             QueryResults results = doQuery(c, args);
363
364             Iterator JavaDoc i = results.getHitHandles().iterator();
365             Iterator JavaDoc j = results.getHitTypes().iterator();
366
367             while (i.hasNext())
368             {
369                 String JavaDoc thisHandle = (String JavaDoc) i.next();
370                 Integer JavaDoc thisType = (Integer JavaDoc) j.next();
371                 String JavaDoc type = Constants.typeText[thisType.intValue()];
372
373                 // also look up type
374
System.out.println(type + "\t" + thisHandle);
375             }
376         }
377         catch (Exception JavaDoc e)
378         {
379             System.out.println("Exception caught: " + e);
380         }
381     }
382
383     public static void main(String JavaDoc[] args)
384     {
385         if (args.length > 0)
386         {
387             DSQuery.doCMDLineQuery(args[0]);
388         }
389     }
390
391     /*--------- private methods ----------*/
392
393     /**
394      * get an IndexSearcher, hopefully a cached one (gives much better
395      * performance.) checks to see if the index has been modified - if so, it
396      * creates a new IndexSearcher
397      */

398     private static synchronized Searcher getSearcher(String JavaDoc indexDir)
399             throws IOException JavaDoc
400     {
401         if (lastModified != IndexReader.getCurrentVersion(indexDir))
402         {
403             // there's a new index, open it
404
lastModified = IndexReader.getCurrentVersion(indexDir);
405             searcher = new IndexSearcher(indexDir);
406         }
407
408         return searcher;
409     }
410 }
411
412 // it's now up to the display page to do the right thing displaying
413
// items & communities & collections
414
Popular Tags