1 23 24 package org.apache.slide.index; 25 26 import org.apache.slide.search.IndexException; 27 import org.apache.slide.search.basic.IBasicExpressionFactory; 28 import org.apache.slide.util.logger.Logger; 29 import org.apache.slide.common.*; 30 import org.apache.slide.content.NodeRevisionNumber; 31 import org.apache.slide.content.NodeRevisionDescriptor; 32 import org.apache.slide.content.NodeRevisionContent; 33 import org.apache.slide.store.IndexStore; 34 import org.apache.slide.extractor.ExtractorManager; 35 import org.apache.slide.extractor.ExtractorException; 36 import org.apache.slide.extractor.ContentExtractor; 37 import org.apache.lucene.index.IndexWriter; 38 import org.apache.lucene.index.IndexReader; 39 import org.apache.lucene.index.Term; 40 import org.apache.lucene.analysis.standard.StandardAnalyzer; 41 import org.apache.lucene.analysis.Analyzer; 42 import org.apache.lucene.document.Document; 43 import org.apache.lucene.document.Field; 44 45 import java.io.IOException ; 46 import java.io.CharArrayReader ; 47 import java.io.ByteArrayInputStream ; 48 import java.io.Reader ; 49 import java.util.ArrayList ; 50 import java.util.Collection ; 51 import java.util.Hashtable ; 52 import java.util.Iterator ; 53 import java.util.StringTokenizer ; 54 55 60 public class TextContentIndexer extends XAServiceBase implements IndexStore { 61 62 private static final String INDEX_PATH = "indexpath"; 63 private static final String INCLUDES = "includes"; 64 private static final String ANALYZER = "analyzer"; 65 66 public static final String URI_FIELD = "uri"; 67 public static final String CONTENT_TEXT = "content"; 68 69 private String indexpath = ""; 70 private Collection includes; 71 private String analyzerClassName; 72 private Analyzer analyzer; 73 private boolean started = false; 74 75 83 public void initialize(NamespaceAccessToken token) 84 throws ServiceInitializationFailedException 85 { 86 initAnalyzer(); 87 88 IndexWriter indexWriter = null; 89 try 90 { 91 indexWriter = new IndexWriter(indexpath, analyzer, false); 92 } 93 catch (IOException e) 95 { 96 try 97 { 98 indexWriter = new IndexWriter(indexpath, analyzer, true); 100 } 101 catch (IOException ex) 102 { 103 getLogger().log("Error while initializing the Lucene index " + e.getMessage(), LOG_CHANNEL, Logger.ERROR); 104 throw new ServiceInitializationFailedException(this, ex); 105 } 106 } 107 108 try 109 { 110 indexWriter.close(); 111 } 112 catch (IOException e) 113 { 114 getLogger().log("Error while initializing the Lucene index " + e.getMessage(), LOG_CHANNEL, Logger.ERROR); 115 throw new ServiceInitializationFailedException (this, e); 116 117 } 118 getLogger().log("Lucene is correctly initialized", LOG_CHANNEL, Logger.INFO); 119 } 120 121 127 synchronized public void createIndex (Uri uri, 128 NodeRevisionDescriptor revisionDescriptor, 129 NodeRevisionContent revisionContent) 130 throws IndexException 131 { 132 if (!isIncluded(uri.toString())) return; 133 IndexWriter indexWriter = null; 134 try 135 { 136 indexWriter = new IndexWriter(indexpath, analyzer, false); 137 138 Document doc = new Document(); 140 141 doc.add(Field.Keyword(URI_FIELD, uri.toString())); 142 doc.add(Field.Text(CONTENT_TEXT, readContent(revisionDescriptor, revisionContent))); 143 144 if ( revisionContent != null && revisionDescriptor != null ) { 145 ContentExtractor[] extractor = ExtractorManager.getInstance().getContentExtractors(uri.getNamespace().getName(), null, revisionDescriptor); 146 for ( int i = 0; i < extractor.length; i++ ) { 147 Reader reader = extractor[i].extract(new ByteArrayInputStream (revisionContent.getContentBytes())); 148 doc.add(Field.Text(CONTENT_TEXT, reader)); 149 } 150 } 151 152 indexWriter.addDocument(doc); 153 indexWriter.optimize(); 154 155 getLogger().log( 156 "Added '" + uri.toString() + " - " + revisionDescriptor.getRevisionNumber().toString() + "' to index", 157 LOG_CHANNEL, 158 Logger.INFO); 159 } 160 catch (IOException e) 161 { 162 getLogger().log( 163 "Error creating an index with " + uri.toString() + " - " + revisionDescriptor.getRevisionNumber(), 164 LOG_CHANNEL, 165 Logger.ERROR); 166 } 167 catch( ExtractorException e) 168 { 169 getLogger().log( 170 "Error extracting content from " + uri.toString() + " - " + revisionDescriptor.getRevisionNumber(), 171 LOG_CHANNEL, 172 Logger.ERROR); 173 } 174 finally 175 { 176 try 177 { 178 if(indexWriter != null) 179 indexWriter.close(); 180 } 181 catch(IOException ioe ) {} 182 } 183 } 184 185 195 synchronized public void updateIndex(Uri uri, 196 NodeRevisionDescriptor revisionDescriptor, 197 NodeRevisionContent revisionContent) 198 throws IndexException 199 { 200 if (!isIncluded(uri.toString())) return; 201 IndexWriter indexWriter = null; 202 try 203 { 204 IndexReader indexReader = IndexReader.open(indexpath); 206 Term term = new Term(URI_FIELD, uri.toString()); 207 208 indexReader.delete(term); 209 indexReader.close(); 210 211 indexWriter = new IndexWriter(indexpath, analyzer, false); 212 213 Document doc = new Document(); 215 216 doc.add(Field.Keyword(URI_FIELD, uri.toString())); 217 doc.add(Field.Text(CONTENT_TEXT, readContent(revisionDescriptor, revisionContent))); 218 219 if ( revisionContent != null && revisionDescriptor != null ) { 220 ContentExtractor[] extractor = ExtractorManager.getInstance().getContentExtractors(uri.getNamespace().getName(), null, revisionDescriptor); 221 for ( int i = 0; i < extractor.length; i++ ) { 222 Reader reader = extractor[i].extract(new ByteArrayInputStream (revisionContent.getContentBytes())); 223 doc.add(Field.Text(CONTENT_TEXT, reader)); 224 } 225 } 226 227 indexWriter.addDocument(doc); 228 indexWriter.optimize(); 229 230 if (getLogger().isEnabled(Logger.DEBUG)) { 231 getLogger().log( 232 "Updated '" + uri + " - " + revisionDescriptor.getRevisionNumber() + "' to index", 233 LOG_CHANNEL, 234 Logger.DEBUG); 235 } 236 } 237 catch (IOException e) 238 { 239 getLogger().log( 240 "Error updating the index with " + uri + " - " + revisionDescriptor.getRevisionNumber(), 241 LOG_CHANNEL, 242 Logger.ERROR); 243 } 244 catch( ExtractorException e) 245 { 246 getLogger().log( 247 "Error extracting content from " + uri + " - " + revisionDescriptor.getRevisionNumber(), 248 LOG_CHANNEL, 249 Logger.ERROR); 250 } 251 finally 252 { 253 try 254 { 255 if(indexWriter != null) 256 indexWriter.close(); 257 } 258 catch(IOException ioe ) {} 259 } 260 } 261 262 268 synchronized public void dropIndex(Uri uri, NodeRevisionNumber number) 269 throws IndexException 270 { 271 if (!isIncluded(uri.toString())) return; 272 if (number == NodeRevisionNumber.HIDDEN_0_0) return; 273 274 IndexWriter indexWriter = null; 275 try 276 { 277 IndexReader indexReader = IndexReader.open(indexpath); 278 Term term = new Term(URI_FIELD, uri.toString()); 279 280 indexReader.delete(term); 281 indexReader.close(); 282 283 indexWriter = new IndexWriter(indexpath, analyzer, false); 284 indexWriter.optimize(); 285 286 if (getLogger().isEnabled(Logger.DEBUG)) { 287 getLogger().log( 288 "Deleted '" + uri + "' from the index", 289 LOG_CHANNEL, 290 Logger.DEBUG); 291 } 292 } 293 catch (IOException e) 294 { 295 getLogger().log("Impossible to delete " + uri + " - " + number + " from the Lucene index"); 296 } 297 finally 298 { 299 try 300 { 301 if(indexWriter != null) 302 indexWriter.close(); 303 } 304 catch(IOException ioe ) {} 305 } 306 } 307 308 309 315 public IBasicExpressionFactory getBasicExpressionFactory() 316 { 317 return new TextContainsExpressionFactory(indexpath, analyzer); 318 } 319 320 321 326 public void connect() throws ServiceConnectionFailedException 327 { 328 getLogger().log( 329 "TextContentIndexer: connect", 330 LOG_CHANNEL, 331 Logger.INFO); 332 started = true; 333 } 334 335 341 public boolean isConnected() throws ServiceAccessException 342 { 343 return started; 344 } 345 346 359 public void setParameters (Hashtable parameters) throws ServiceParameterErrorException, ServiceParameterMissingException 360 { 361 indexpath = (String )parameters.get (INDEX_PATH); 362 if (indexpath == null || indexpath.length() == 0) { 363 throw new ServiceParameterMissingException (this, INDEX_PATH); 364 } 365 String includes = (String ) parameters.get(INCLUDES); 366 if (includes != null && includes.length() > 0) { 367 StringTokenizer tokenizer = new StringTokenizer (includes, ","); 368 this.includes = new ArrayList (tokenizer.countTokens()); 369 while (tokenizer.hasMoreTokens()) { 370 this.includes.add(tokenizer.nextToken()); 371 } 372 } 373 analyzerClassName = (String )parameters.get (ANALYZER); 374 } 375 376 381 public void disconnect() throws ServiceDisconnectionFailedException 382 { 383 getLogger().log( 384 "TextContentIndexer: disconnect", 385 LOG_CHANNEL, 386 Logger.INFO); 387 started = false; 388 } 389 390 395 public void reset() throws ServiceResetFailedException 396 { 397 getLogger().log( 398 "TextContentIndexer: reset", 399 LOG_CHANNEL, 400 Logger.INFO); 401 } 402 403 protected Reader readContent(NodeRevisionDescriptor revisionDescriptor, 404 NodeRevisionContent revisionContent) throws IOException { 405 return new CharArrayReader (revisionContent.getContent()); 406 } 407 408 protected boolean isIncluded(String uri) { 409 if (includes == null) return true; 410 Iterator iter = includes.iterator(); 411 while (iter.hasNext()) { 412 if (uri.startsWith((String ) iter.next())) { 413 return true; 414 } 415 } 416 return false; 417 } 418 419 420 protected void initAnalyzer() throws ServiceInitializationFailedException { 421 422 if (analyzerClassName == null || analyzerClassName.length() == 0) { 423 getLogger().log("using Lucene StandardAnalyzer", LOG_CHANNEL, Logger.INFO); 424 analyzer = new StandardAnalyzer(); 425 426 } else { 427 getLogger().log("loading Lucene analyzer: " + analyzerClassName, LOG_CHANNEL, Logger.INFO); 428 429 try { 430 Class analyzerClazz = Class.forName(analyzerClassName); 431 analyzer = (Analyzer)analyzerClazz.newInstance(); 432 433 } catch (ClassNotFoundException cnfe) { 434 getLogger().log("Error while instantiating analyzer " + 435 analyzerClassName + cnfe.getMessage(), LOG_CHANNEL, Logger.ERROR); 436 throw new ServiceInitializationFailedException(this, cnfe); 437 438 } catch (InstantiationException ie) { 439 getLogger().log("Error while instantiating analyzer " + 440 analyzerClassName + ie.getMessage(), LOG_CHANNEL, Logger.ERROR); 441 throw new ServiceInitializationFailedException(this, ie); 442 443 } catch (IllegalAccessException iae) { 444 getLogger().log("Error while instantiating analyzer " + 445 analyzerClassName + iae.getMessage(), LOG_CHANNEL, Logger.ERROR); 446 throw new ServiceInitializationFailedException(this, iae); 447 } 448 } 449 } 450 451 } | Popular Tags |