1 40 package org.dspace.search; 41 42 import java.io.IOException ; 43 import java.util.ArrayList ; 44 import java.util.Iterator ; 45 import java.util.List ; 46 47 import org.apache.log4j.Logger; 48 import org.apache.lucene.document.Document; 49 import org.apache.lucene.index.IndexReader; 50 import org.apache.lucene.queryParser.ParseException; 51 import org.apache.lucene.queryParser.QueryParser; 52 import org.apache.lucene.queryParser.TokenMgrError; 53 import org.apache.lucene.search.BooleanQuery; 54 import org.apache.lucene.search.Hits; 55 import org.apache.lucene.search.IndexSearcher; 56 import org.apache.lucene.search.Query; 57 import org.apache.lucene.search.Searcher; 58 import org.apache.oro.text.perl.Perl5Util; 59 import org.dspace.content.Collection; 60 import org.dspace.content.Community; 61 import org.dspace.core.ConfigurationManager; 62 import org.dspace.core.Constants; 63 import org.dspace.core.Context; 64 import org.dspace.core.LogManager; 65 66 74 public class DSQuery 75 { 76 static final String ALL = "999"; 78 79 static final String ITEM = "" + Constants.ITEM; 80 81 static final String COLLECTION = "" + Constants.COLLECTION; 82 83 static final String COMMUNITY = "" + Constants.COMMUNITY; 84 85 private static Searcher searcher; 87 88 private static long lastModified; 89 90 91 private static Logger log = Logger.getLogger(DSQuery.class); 92 93 static 94 { 95 String maxClauses = ConfigurationManager 96 .getProperty("search.max-clauses"); 97 if (maxClauses != null) 98 { 99 BooleanQuery.setMaxClauseCount(Integer.parseInt(maxClauses)); 100 } 101 } 102 103 111 public static QueryResults doQuery(Context c, QueryArgs args) 112 throws IOException 113 { 114 String querystring = args.getQuery(); 115 QueryResults qr = new QueryResults(); 116 List hitHandles = new ArrayList (); 117 List hitTypes = new ArrayList (); 118 119 qr.setHitHandles(hitHandles); 121 qr.setHitTypes(hitTypes); 122 qr.setStart(args.getStart()); 123 qr.setPageSize(args.getPageSize()); 124 125 querystring = checkEmptyQuery(querystring); querystring = workAroundLuceneBug(querystring); querystring = stripHandles(querystring); querystring = stripAsterisk(querystring); 135 try 136 { 137 Searcher searcher = getSearcher(ConfigurationManager 139 .getProperty("search.dir")); 140 141 QueryParser qp = new QueryParser("default", DSIndexer.getAnalyzer()); 142 log.info("Final query string: " + querystring); 143 144 String operator = ConfigurationManager.getProperty("search.operator"); 145 if (operator == null || operator.equals("OR")) 146 { 147 qp.setDefaultOperator(QueryParser.OR_OPERATOR); 148 } 149 else 150 { 151 qp.setDefaultOperator(QueryParser.AND_OPERATOR); 152 } 153 154 Query myquery = qp.parse(querystring); 155 Hits hits = searcher.search(myquery); 156 157 qr.setHitCount(hits.length()); 159 160 if (args.getStart() < hits.length()) 165 { 166 int hitsRemaining = hits.length() - args.getStart(); 169 170 int hitsToProcess = (hitsRemaining < args.getPageSize()) ? hitsRemaining 171 : args.getPageSize(); 172 173 for (int i = args.getStart(); i < (args.getStart() + hitsToProcess); i++) 174 { 175 Document d = hits.doc(i); 176 177 String handleText = d.get("handle"); 178 String handletype = d.get("type"); 179 180 hitHandles.add(handleText); 181 182 if (handletype.equals("" + Constants.ITEM)) 183 { 184 hitTypes.add(new Integer (Constants.ITEM)); 185 } 186 else if (handletype.equals("" + Constants.COLLECTION)) 187 { 188 hitTypes.add(new Integer (Constants.COLLECTION)); 189 } 190 else if (handletype.equals("" + Constants.COMMUNITY)) 191 { 192 hitTypes.add(new Integer (Constants.COMMUNITY)); 193 } 194 else 195 { 196 } 198 } 199 } 200 } 201 catch (NumberFormatException e) 202 { 203 log 204 .warn(LogManager.getHeader(c, "Number format exception", "" 205 + e)); 206 207 qr.setErrorMsg("Number format exception"); 208 } 209 catch (ParseException e) 210 { 211 log.warn(LogManager.getHeader(c, "Invalid search string", "" + e)); 213 214 qr.setErrorMsg("Invalid search string"); 215 } 216 catch (TokenMgrError tme) 217 { 218 log 220 .warn(LogManager.getHeader(c, "Invalid search string", "" 221 + tme)); 222 223 qr.setErrorMsg("Invalid search string"); 224 } 225 catch(BooleanQuery.TooManyClauses e) 226 { 227 log.warn(LogManager.getHeader(c, "Query too broad", e.toString())); 228 qr.setErrorMsg("Your query was too broad. Try a narrower query."); 229 } 230 231 return qr; 232 } 233 234 static String checkEmptyQuery(String myquery) 235 { 236 if (myquery.equals("")) 237 { 238 myquery = "empty_query_string"; 239 } 240 241 return myquery; 242 } 243 244 static String workAroundLuceneBug(String myquery) 245 { 246 Perl5Util util = new Perl5Util(); 252 253 myquery = util.substitute("s/ AND / && /g", myquery); 254 myquery = util.substitute("s/ OR / || /g", myquery); 255 myquery = util.substitute("s/ NOT / ! /g", myquery); 256 257 myquery = myquery.toLowerCase(); 258 259 return myquery; 260 } 261 262 static String stripHandles(String myquery) 263 { 264 Perl5Util util = new Perl5Util(); 266 267 myquery = util.substitute("s|^(\\s+)?http://hdl\\.handle\\.net/||", 268 myquery); 269 myquery = util.substitute("s|^(\\s+)?hdl:||", myquery); 270 271 return myquery; 272 } 273 274 static String stripAsterisk(String myquery) 275 { 276 Perl5Util util = new Perl5Util(); 278 279 myquery = util.substitute("s/^\\*//", myquery); 280 myquery = util.substitute("s| \\*| |", myquery); 281 myquery = util.substitute("s|\\(\\*|\\(|", myquery); 282 myquery = util.substitute("s|:\\*|:|", myquery); 283 284 return myquery; 285 } 286 287 299 public static QueryResults doQuery(Context c, QueryArgs args, 300 Collection coll) throws IOException 301 { 302 String querystring = args.getQuery(); 303 304 querystring = checkEmptyQuery(querystring); 305 306 String location = "l" + (coll.getID()); 307 308 String newquery = new String ("+(" + querystring + ") +location:\"" 309 + location + "\""); 310 311 args.setQuery(newquery); 312 313 return doQuery(c, args); 314 } 315 316 328 public static QueryResults doQuery(Context c, QueryArgs args, Community comm) 329 throws IOException 330 { 331 String querystring = args.getQuery(); 332 333 querystring = checkEmptyQuery(querystring); 334 335 String location = "m" + (comm.getID()); 336 337 String newquery = new String ("+(" + querystring + ") +location:\"" 338 + location + "\""); 339 340 args.setQuery(newquery); 341 342 return doQuery(c, args); 343 } 344 345 346 350 public static void doCMDLineQuery(String query) 351 { 352 System.out.println("Command line query: " + query); 353 System.out.println("Only reporting default-sized results list"); 354 355 try 356 { 357 Context c = new Context(); 358 359 QueryArgs args = new QueryArgs(); 360 args.setQuery(query); 361 362 QueryResults results = doQuery(c, args); 363 364 Iterator i = results.getHitHandles().iterator(); 365 Iterator j = results.getHitTypes().iterator(); 366 367 while (i.hasNext()) 368 { 369 String thisHandle = (String ) i.next(); 370 Integer thisType = (Integer ) j.next(); 371 String type = Constants.typeText[thisType.intValue()]; 372 373 System.out.println(type + "\t" + thisHandle); 375 } 376 } 377 catch (Exception e) 378 { 379 System.out.println("Exception caught: " + e); 380 } 381 } 382 383 public static void main(String [] args) 384 { 385 if (args.length > 0) 386 { 387 DSQuery.doCMDLineQuery(args[0]); 388 } 389 } 390 391 392 393 398 private static synchronized Searcher getSearcher(String indexDir) 399 throws IOException 400 { 401 if (lastModified != IndexReader.getCurrentVersion(indexDir)) 402 { 403 lastModified = IndexReader.getCurrentVersion(indexDir); 405 searcher = new IndexSearcher(indexDir); 406 } 407 408 return searcher; 409 } 410 } 411 412 | Popular Tags |