KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > openedit > search > SearchModule


1 /*
2  * Created on May 12, 2005
3  */

4 package com.openedit.search;
5
6 import java.io.File JavaDoc;
7 import java.io.IOException JavaDoc;
8 import java.util.ArrayList JavaDoc;
9 import java.util.List JavaDoc;
10
11 import org.apache.commons.logging.Log;
12 import org.apache.commons.logging.LogFactory;
13 import org.apache.hadoop.conf.Configuration;
14 import org.apache.nutch.searcher.Hit;
15 import org.apache.nutch.searcher.HitDetails;
16 import org.apache.nutch.searcher.Hits;
17 import org.apache.nutch.searcher.NutchBean;
18 import org.apache.nutch.searcher.Query;
19 import org.apache.nutch.searcher.Summary;
20 import org.apache.nutch.util.NutchConfiguration;
21
22 import com.openedit.OpenEditRuntimeException;
23 import com.openedit.WebPageRequest;
24 import com.openedit.modules.BaseModule;
25 import com.openedit.util.Exec;
26 import com.openedit.util.FileUtils;
27
28 /**
29  * @author cburkey
30  *
31  */

32 public class SearchModule extends BaseModule
33 {
34     protected NutchBean fieldNutchBean;
35     protected Configuration fieldConfiguration;
36     protected boolean fieldCrawling;
37     private static final Log log = LogFactory.getLog(SearchModule.class);
38     public synchronized boolean crawlWebSite(WebPageRequest inReq) throws Exception JavaDoc
39     {
40         fieldCrawling = true;
41         try
42         {
43             fieldNutchBean = null;
44             
45             File JavaDoc cvs = new File JavaDoc(getRoot(), "/search/urls/CVS");
46             new FileUtils().deleteAll(cvs);
47
48             File JavaDoc tmp = File.createTempFile("nutch", "index");
49             tmp.delete();
50             
51             //CrawlTool.main(args);
52
List JavaDoc command = new ArrayList JavaDoc();
53             command.add("java");
54             command.add("-cp");
55             
56             String JavaDoc home = new File JavaDoc(getRoot(),"WEB-INF/").getAbsolutePath();
57             StringBuffer JavaDoc cp = new StringBuffer JavaDoc();
58             cp.append(new File JavaDoc( home, "classes").getAbsolutePath() );
59             cp.append(File.pathSeparatorChar);
60             cp.append(new File JavaDoc( home, "/lib/nutch-0.8.1.jar").getAbsolutePath() );
61             cp.append(File.pathSeparatorChar);
62             cp.append(new File JavaDoc( home, "/lib/jakarta-oro-2.0.7.jar").getAbsolutePath() );
63             cp.append(File.pathSeparatorChar);
64             cp.append(new File JavaDoc( home, "/lib/lucene-core-1.9.1.jar").getAbsolutePath() );
65             cp.append(File.pathSeparatorChar);
66             cp.append(new File JavaDoc( home, "/lib/lucene-misc-1.9.1.jar").getAbsolutePath() );
67             cp.append(File.pathSeparatorChar);
68             cp.append(new File JavaDoc( home, "/lib/hadoop-0.4.0-patched.jar").getAbsolutePath() );
69             cp.append(File.pathSeparatorChar);
70             cp.append(new File JavaDoc( home, "/lib/log4j-1.2.13.jar").getAbsolutePath() );
71             cp.append(File.pathSeparatorChar);
72             cp.append(new File JavaDoc( home, "/lib/commons-logging-1.0.4.jar").getAbsolutePath() );
73             cp.append(File.pathSeparatorChar);
74             cp.append(new File JavaDoc( home, "/lib/commons-cli-2.0-SNAPSHOT.jar").getAbsolutePath() );
75             
76             command.add(cp.toString());
77             command.add("org.apache.nutch.crawl.Crawl");
78             command.add( new File JavaDoc( getRoot() , "/search/urls/").getAbsolutePath() );
79             command.add("-dir");
80             command.add(tmp.getAbsolutePath());
81             Exec exec = new Exec();
82             exec.setTrackOutput(true);
83             if ( exec.runExec(command) )
84             {
85                 log.info("Completed ok");
86                 FileUtils util = new FileUtils();
87                 util.deleteAll(new File JavaDoc(getIndexDir()));
88                 util.copyFiles(tmp,new File JavaDoc(getIndexDir()));
89                 return true;
90             }
91             log.error("Crawl failed");
92             return false;
93
94         }
95         finally
96         {
97             fieldCrawling = false;
98         }
99     }
100     
101     public Configuration getConfiguration()
102     {
103         if ( fieldConfiguration == null)
104         {
105             fieldConfiguration = NutchConfiguration.create();
106             fieldConfiguration.set( "searcher.dir", getIndexDir() );
107         }
108         return fieldConfiguration;
109     }
110     
111     protected NutchBean getNutchBean()
112     {
113         if ( fieldNutchBean == null)
114         {
115             try
116             {
117                 fieldNutchBean = new NutchBean(getConfiguration());
118             }
119             catch (IOException JavaDoc ex)
120             {
121                 throw new OpenEditRuntimeException(ex);
122             }
123         }
124         return fieldNutchBean;
125     }
126     
127     public void search(WebPageRequest inReq) throws Exception JavaDoc
128     {
129         if ( fieldCrawling )
130         {
131             return;
132         }
133         // get query from inReq
134
String JavaDoc queryString = inReq.getRequestParameter("query");
135         if (queryString == null)
136         {
137             queryString = "";
138         }
139
140         int start = 0; // first hit to display
141
String JavaDoc startString = inReq.getRequestParameter("start");
142         if (startString != null && startString.length() > 0)
143         {
144             start = Integer.parseInt(startString);
145         }
146
147         int hitsPerPage = 10; // number of hits to display
148
String JavaDoc hitsString = inReq.getRequestParameter("hitsPerPage");
149         if (hitsString != null && hitsString.length() > 0)
150         {
151             hitsPerPage = Integer.parseInt(hitsString);
152         }
153
154 /* int hitsPerSite = 2; // max hits per site
155         String hitsPerSiteString = inReq.getRequestParameter("hitsPerSite");
156         if (hitsPerSiteString != null && hitsPerSiteString.length() > 0)
157         {
158             hitsPerSite = Integer.parseInt(hitsPerSiteString);
159         }
160 */

161         Query query = Query.parse(queryString, getConfiguration());
162         log.info("query: " + queryString);
163
164         NutchBean bean = getNutchBean();
165         Hits hits = bean.search(query, start + hitsPerPage);
166         int end = (int)Math.min(hits.getLength(), start + hitsPerPage);
167         int length = end-start;
168         Hit[] show = hits.getHits(start, length);
169
170         log.info("total hits: " + hits.getTotal());
171         inReq.putPageValue("hits",makeHitRows(show, query));
172         inReq.putPageValue("query",queryString);
173         inReq.putPageValue("totalHits", new Long JavaDoc(hits.getTotal()));
174         inReq.putPageValue("firstHitIndex", new Integer JavaDoc(start));
175         inReq.putPageValue("lastHitIndex", new Integer JavaDoc(end));
176     }
177     
178     /**
179      * @param inShow
180      * @return
181      */

182     private Object JavaDoc makeHitRows(Hit[] inShow, Query inQuery) throws Exception JavaDoc
183     {
184         HitDetails[] details = getNutchBean().getDetails(inShow);
185         Summary[] summaries = getNutchBean().getSummary(details, inQuery);
186         List JavaDoc hits = new ArrayList JavaDoc(inShow.length);
187         for (int i = 0; i < inShow.length; i++)
188         {
189             HitRow row = new HitRow();
190             row.setHit(inShow[i]);
191             row.setDetail(details[i]);
192             row.setSummary(summaries[i].toString());
193             hits.add(row);
194         }
195         return hits;
196     }
197     
198     protected String JavaDoc getIndexDir()
199     {
200         return new File JavaDoc( getRoot(), "search/index" ).getAbsolutePath();
201     }
202 }
203
Popular Tags