1 6 7 package org.contineo.searchengine.search; 8 9 import java.io.File ; 10 import java.io.StringReader ; 11 import java.util.ArrayList ; 12 import java.util.Collection ; 13 import java.util.Date ; 14 import java.util.Vector ; 15 import org.apache.log4j.Level; 16 import org.apache.log4j.Logger; 17 import org.apache.lucene.analysis.Analyzer; 18 import org.apache.lucene.analysis.TokenStream; 19 import org.apache.lucene.analysis.de.GermanAnalyzer; 20 import org.apache.lucene.analysis.fr.FrenchAnalyzer; 21 import org.apache.lucene.analysis.standard.StandardAnalyzer; 22 import org.apache.lucene.document.Document; 23 import org.apache.lucene.queryParser.MultiFieldQueryParser; 24 import org.apache.lucene.search.Hits; 25 import org.apache.lucene.search.IndexSearcher; 26 import org.apache.lucene.search.MultiSearcher; 27 import org.apache.lucene.search.Query; 28 import org.apache.lucene.search.Searcher; 29 import org.apache.lucene.search.highlight.Highlighter; 30 import org.apache.lucene.search.highlight.QueryScorer; 31 import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 32 import org.contineo.admin.dao.MenuDAO; 33 import org.contineo.core.LoggingManager; 34 import org.contineo.core.config.SearchSettings; 35 import org.contineo.core.config.SettingConfigurator; 36 import org.contineo.core.config.dao.SearchSettingsDAO; 37 import org.contineo.core.text.analyze.Stopwords; 38 import org.contineo.searchengine.util.SquareSimilarity; 39 42 public class Search { 43 44 48 private Logger logger; 49 50 private int hitCount; 51 52 private Vector <Integer > pageVect; 53 54 55 public Search() { 56 logger = LoggingManager.getLogger(this.getClass()); 57 hitCount = 0; 58 pageVect = new Vector <Integer >(); 59 } 60 61 65 public int getHitCount() { 66 return hitCount; 67 } 68 69 73 public Vector getPageVect() { 74 return pageVect; 75 } 76 77 public Collection search(SearchOptions opt, String language) { 78 Collection <Result> coll = new ArrayList <Result>(); 79 SettingConfigurator conf = new SettingConfigurator(); 80 try { 81 String [] languages = opt.getLanguages(); 82 if (languages == null || languages.length == 0) { 83 languages = new String []{"de","en","fr"}; 84 opt.setLanguages(languages); 85 } 86 Searcher[] searcher = new Searcher[languages.length]; 87 String indexPath = conf.getValue("indexdir"); 88 if (!indexPath.endsWith(File.pathSeparator)) 89 indexPath += "/"; 90 for (int i=0; i<languages.length; i++) { 91 String lang = languages[i]; 92 String dir = "english"; 93 if (lang.equals("de")) 94 dir = "german"; 95 else 96 if (lang.equals("fr")) 97 dir = "french"; 98 Searcher s = new IndexSearcher(indexPath + dir + "/"); 99 searcher[i] = s; 100 } 101 MultiSearcher multiSearcher = new MultiSearcher(searcher); 102 Analyzer analyzer = new StandardAnalyzer(Stopwords.getStopwords("en")); 103 if (language.equals("de")) { 104 analyzer = new GermanAnalyzer(Stopwords.getStopwords("de")); 105 } else { 106 if (language.equals("fr")) { 107 analyzer = new FrenchAnalyzer(Stopwords.getStopwords("fr")); 108 } 109 } 110 if (opt.getFields() == null) { 111 String [] fields = new String []{"content","keywords"}; 112 opt.setFields(fields); 113 } 114 multiSearcher.setSimilarity(new SquareSimilarity()); 115 Query query = MultiFieldQueryParser.parse(opt.getQueryStr(), opt.getFields(), analyzer); 117 Hits hits = multiSearcher.search(query); 119 MenuDAO mdao = new MenuDAO(); 120 Collection coll2 = mdao.findMenuIdByUserName(opt.getUsername()); 121 int maxNumFragmentsRequired = 4; 123 SearchSettingsDAO settingsDao = new SearchSettingsDAO(); 124 SearchSettings settings = settingsDao.findByPrimaryKey(opt.getUsername()); 125 maxNumFragmentsRequired = settings.getMaxFragments(); 126 String fragmentSeparator = " ... "; 127 Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter("<font style='background-color:#FFFF00'>", "</font>"), new QueryScorer(query)); 129 String path = conf.getValue("indexdir"); 130 if (!path.endsWith(File.pathSeparator)) 131 path += "/"; 132 for (int j=0; j<hits.length(); j++) { 134 Document doc = hits.doc(j); 135 String menuid = doc.get("menuid"); 136 if (coll2.contains(menuid)) { 137 String size = doc.get("size"); 138 if (size.equals("0")) 139 size = "1"; 140 Result result = new Result(); 141 result.setMenuid(doc.get("menuid")); 142 result.setName(doc.get("name")); 143 result.setPath(doc.get("path")); 144 result.setSize(size); 145 result.setLength(new Integer (doc.get("length"))); 146 result.setDocid(new Integer (doc.get("docid"))); 147 result.setType(doc.get("type")); 148 result.setDate(new Date (Long.parseLong(doc.get("date")))); 149 if (result.isRelevant(opt, doc.get("date"))) { 150 if (hitCount % opt.getHitsPerPage() == 0) 151 pageVect.add(new Integer (j)); 152 hitCount++; 153 } 154 } 155 } 156 for (int resultStart=opt.getResultStart(), k=opt.getStart(); resultStart<hits.length() && k<(opt.getStart() + opt.getHitsPerPage()); resultStart++, k++) { 157 Document doc = hits.doc(resultStart); 158 String menuid = doc.get("menuid"); 159 if (coll2.contains(menuid)) { 161 String size = doc.get("size"); 162 if (size.equals("0")) 163 size = "1"; 164 String content = doc.get("content"); 166 TokenStream stream = analyzer.tokenStream("content", new StringReader (content)); 168 String summary = highlighter.getBestFragments(stream, content, maxNumFragmentsRequired, fragmentSeparator); 169 if (summary == null || summary.equals("")) 170 summary = doc.get("summary"); 171 Result result = new Result(); 172 result.setMenuid(doc.get("menuid")); 173 result.setName(doc.get("name")); 174 result.setPath(doc.get("path")); 175 result.setSize(size); 176 result.setLength(new Integer (doc.get("length"))); 177 result.setDocid(new Integer (doc.get("docid"))); 178 result.setType(doc.get("type")); 179 result.setDate(new Date (Long.parseLong(doc.get("date")))); 180 result.setSummary(summary); 181 result.createScore(hits.score(resultStart)); 182 if (result.isRelevant(opt, doc.get("date"))) 183 coll.add(result); 184 } else { 185 k--; 186 } 187 } 188 } 189 catch (Exception e) { 190 if (logger.isEnabledFor(Level.ERROR)) 191 logger.error(e.getMessage()); 192 } 193 return coll; 194 } 195 } | Popular Tags |