1 package org.contineo.searchengine.crawler; 2 3 import java.io.File ; 4 import java.io.IOException ; 5 import org.apache.log4j.Level; 6 import org.apache.log4j.Logger; 7 import org.apache.lucene.analysis.Analyzer; 8 import org.apache.lucene.analysis.de.GermanAnalyzer; 9 import org.apache.lucene.analysis.fr.FrenchAnalyzer; 10 import org.apache.lucene.analysis.standard.StandardAnalyzer; 11 import org.apache.lucene.document.Document; 12 import org.apache.lucene.index.IndexReader; 13 import org.apache.lucene.index.IndexWriter; 14 import org.apache.lucene.index.MultiReader; 15 import org.apache.lucene.index.Term; 16 import org.apache.lucene.store.FSDirectory; 17 import org.contineo.core.LoggingManager; 18 import org.contineo.core.config.SettingConfigurator; 19 import org.contineo.core.text.AnalyzeText; 20 import org.contineo.core.text.analyze.Stopwords; 21 import org.contineo.core.text.lili.LanguageIdentifier; 22 import org.contineo.core.text.parser.Parser; 23 import org.contineo.core.text.parser.ParserFactory; 24 import org.contineo.searchengine.util.SquareSimilarity; 25 26 31 public class Indexer { 32 33 37 private IndexWriter writer; 38 39 43 private Logger logger; 44 45 51 private SettingConfigurator conf; 52 53 57 public Indexer() { 58 logger = LoggingManager.getLogger(this.getClass()); 59 conf = new SettingConfigurator(); 60 } 61 62 public synchronized int addFile(File file, org.contineo.documan.Document d, StringBuffer content, String language) throws Exception { 63 String name = file.getName(); 64 int testversion = -1; 65 int result = -1; 66 name = name.substring(name.lastIndexOf(".") + 1); 67 try { 68 testversion = Integer.parseInt(name); 69 } catch (Exception e) { 70 } 71 if (testversion == -1) { 72 LuceneDocument lDoc = new LuceneDocument(d); 73 try { 74 Document doc = lDoc.getDocument(file, content); 75 result = addDocument(doc, language); 76 } catch (Exception e) { 77 if (logger.isEnabledFor(Level.ERROR)) 78 logger.error(e.getMessage()); 79 } 80 try { 81 AnalyzeText aText = new AnalyzeText(); 82 aText.storeTerms(d.getMenuId(), content.toString(), language); 83 } catch (Exception e) { 84 if (logger.isEnabledFor(Level.ERROR)) 85 logger.error(e.getMessage()); 86 } 87 } 88 return result; 89 } 90 91 95 public int addDocument(Document doc, String language) { 96 try { 97 Analyzer analyzer; 98 String dir; 99 if (language.equals("de")) { 100 analyzer = new GermanAnalyzer(Stopwords.getStopwords("de")); 101 dir = "german"; 102 } else if (language.equals("fr")) { 103 analyzer = new FrenchAnalyzer(Stopwords.getStopwords("fr")); 104 dir = "french"; 105 } else { 106 analyzer = new StandardAnalyzer(Stopwords.getStopwords("en")); 107 dir = "english"; 108 } 109 String path = conf.getValue("indexdir"); 110 if (!path.endsWith(File.pathSeparator)) 111 path += "/"; 112 path += dir + "/"; 113 try { 114 writer = new IndexWriter(path ,analyzer, false); 115 } catch (IOException ioe) { 116 writer = new IndexWriter(path ,analyzer, true); 117 } 120 writer.setSimilarity(new SquareSimilarity()); 121 writer.addDocument(doc); 122 writer.optimize(); 123 writer.close(); 124 return writer.docCount() - 1; 125 } catch (Exception e) { 126 if (logger.isEnabledFor(Level.ERROR)) 127 logger.error(e.getMessage()); 128 return -1; 129 } 130 } 131 132 137 public void addDirectory(File file, org.contineo.documan.Document doc) throws Exception { 138 if (file.isDirectory()){ 139 String [] subitems = file.list(); 140 for (int i=0; i<subitems.length; i++) 141 { 142 addDirectory(new File (file,subitems[i]), doc); 143 } 144 } else{ 145 try { 146 Parser parser = ParserFactory.getParser(file); 147 if (parser == null) 148 return; 149 StringBuffer content = parser.getContent(); 150 LanguageIdentifier lili = new LanguageIdentifier(); 151 String language = lili.identify(content.toString()); 152 if (language == null || language.equals("")) 153 language = "en"; 154 addFile(file, doc, content, language); 155 } catch (Exception e) { 156 if (logger.isEnabledFor(Level.ERROR)) 157 logger.error(e.getMessage()); 158 } 159 } 160 } 161 162 protected void optimize() { 163 String path = conf.getValue("indexdir"); 164 if (!path.endsWith(File.pathSeparator)) 165 path += "/"; 166 try { 167 writer = new IndexWriter(path + "english/",new StandardAnalyzer(), false); 168 writer.optimize(); 169 writer.close(); 170 writer = new IndexWriter(path + "french/",new FrenchAnalyzer(), false); 171 writer.optimize(); 172 writer.close(); 173 writer = new IndexWriter(path + "german/",new GermanAnalyzer(), false); 174 writer.optimize(); 175 writer.close(); 176 } catch (Exception e) { 177 if (logger.isEnabledFor(Level.ERROR)) 178 logger.error(e.getMessage()); 179 } 180 } 181 182 186 public void deleteFile(String menuid) { 187 String path = conf.getValue("indexdir"); 188 if (!path.endsWith(File.pathSeparator)) 189 path += "/"; 190 try { 191 IndexReader enreader = IndexReader.open(path + "english/"); 192 IndexReader frreader = IndexReader.open(path + "french/"); 193 IndexReader dereader = IndexReader.open(path + "german/"); 194 IndexReader[] readers = new IndexReader[]{enreader,frreader,dereader}; 195 MultiReader reader = new MultiReader(readers); 196 reader.delete(new Term("menuid",menuid)); 197 reader.close(); 198 optimize(); 199 } catch (IOException ioe) { 200 if (logger.isEnabledFor(Level.ERROR)) 201 logger.error(ioe.getMessage()); 202 } 203 } 204 205 public Document getDocument(int luceneid) { 206 String path = conf.getValue("indexdir"); 207 if (!path.endsWith(File.pathSeparator)) 208 path += "/"; 209 try { 210 IndexReader enreader = IndexReader.open(path + "english/"); 211 IndexReader frreader = IndexReader.open(path + "french/"); 212 IndexReader dereader = IndexReader.open(path + "german/"); 213 IndexReader[] readers = new IndexReader[]{enreader,frreader,dereader}; 214 MultiReader reader = new MultiReader(readers); 215 Document doc = reader.document(luceneid); 216 reader.close(); 217 return doc; 218 } catch (Exception e) { 219 if (logger.isEnabledFor(Level.ERROR)) 220 logger.error(e.getMessage()); 221 return null; 222 } 223 } 224 225 228 public void unlock() { 229 String path = conf.getValue("indexdir"); 230 if (!path.endsWith(File.pathSeparator)) 231 path += "/"; 232 try { 233 FSDirectory enfsdir = FSDirectory.getDirectory(path + "english/",false); 234 IndexReader reader = IndexReader.open(enfsdir); 235 IndexReader.unlock(enfsdir); 236 reader.close(); 237 FSDirectory frfsdir = FSDirectory.getDirectory(path + "french/",false); 238 reader = IndexReader.open(frfsdir); 239 IndexReader.unlock(frfsdir); 240 reader.close(); 241 FSDirectory defsdir = FSDirectory.getDirectory(path + "german/",false); 242 reader = IndexReader.open(defsdir); 243 IndexReader.unlock(defsdir); 244 reader.close(); 245 } catch (Exception e) { 246 if (logger.isEnabledFor(Level.ERROR)) 247 logger.error(e.getMessage()); 248 } 249 } 250 251 public boolean isLocked() { 252 boolean result = false; 253 String path = conf.getValue("indexdir"); 254 if (!path.endsWith(File.pathSeparator)) 255 path += "/"; 256 try { 257 FSDirectory enfsdir = FSDirectory.getDirectory(path + "english/",false); 258 IndexReader reader = IndexReader.open(enfsdir); 259 if (IndexReader.isLocked(enfsdir)) 260 result = true; 261 reader.close(); 262 FSDirectory frfsdir = FSDirectory.getDirectory(path + "french/",false); 263 reader = IndexReader.open(frfsdir); 264 if (IndexReader.isLocked(frfsdir)) 265 result = true; 266 reader.close(); 267 FSDirectory defsdir = FSDirectory.getDirectory(path + "german/",false); 268 reader = IndexReader.open(defsdir); 269 if (IndexReader.isLocked(defsdir)) 270 result = true; 271 reader.close(); 272 } catch (Exception e) { 273 if (logger.isEnabledFor(Level.ERROR)) 274 logger.error(e.getMessage()); 275 } 276 return result; 277 } 278 279 284 public int getCount() { 285 int count = 0; 286 String path = conf.getValue("indexdir"); 287 if (!path.endsWith(File.pathSeparator)) 288 path += "/"; 289 try { 290 IndexReader enreader = IndexReader.open(path + "english/"); 291 IndexReader frreader = IndexReader.open(path + "french/"); 292 IndexReader dereader = IndexReader.open(path + "german/"); 293 count = enreader.numDocs(); 294 count += frreader.numDocs(); 295 count += dereader.numDocs(); 296 enreader.close(); 297 frreader.close(); 298 dereader.close(); 299 } catch (Exception e) { 300 if (logger.isEnabledFor(Level.ERROR)) 301 logger.error(e.getMessage()); 302 } 303 return count; 304 } 305 } 306
| Popular Tags
|