KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > websphinx > searchengine > Search


1 /*
2  * WebSphinx web-crawling toolkit
3  *
4  * Copyright (c) 1998-2002 Carnegie Mellon University. All rights
5  * reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  * notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  * notice, this list of conditions and the following disclaimer in
16  * the documentation and/or other materials provided with the
17  * distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  */

32
33 package websphinx.searchengine;
34
35 import websphinx.*;
36 import java.util.Vector JavaDoc;
37 import java.util.Enumeration JavaDoc;
38 import java.util.NoSuchElementException JavaDoc;
39
40 public class Search extends Crawler implements Enumeration JavaDoc {
41
42     int maxResults;
43     int walkedResults; // approximate number of results walked to
44

45     Vector JavaDoc results = new Vector JavaDoc(); // vector of SearchEngineResults
46
int nextResult = 0; // next result to be returned by the enumeration
47
int approxCount = -1; // (approximate) total number of results
48
boolean crawling = false;
49
50     public Search () {
51         this (Integer.MAX_VALUE);
52     }
53
54     public Search (int maxResults) {
55         this.maxResults = maxResults;
56         setDepthFirst (false);
57         setMaxDepth (Integer.MAX_VALUE);
58         EventLog.monitor (this); // FIX: debugging only
59
}
60
61     public Search (SearchEngine engine, String JavaDoc keywords, int maxResults) {
62         this (maxResults);
63         addQuery (engine, keywords);
64         search ();
65     }
66
67     public Search (SearchEngine engine, String JavaDoc keywords) {
68         this (engine, keywords, Integer.MAX_VALUE);
69     }
70
71     public void addQuery (SearchEngine engine, String JavaDoc keywords) {
72         addRoot (new Link (engine.makeQuery (keywords)));
73         addClassifier (engine);
74         walkedResults += engine.getResultsPerPage ();
75     }
76
77     public void search () {
78         crawling = true;
79         Thread JavaDoc thread = new Thread JavaDoc (this, "Search");
80         thread.setDaemon (true);
81         thread.start ();
82     }
83
84     public int count () {
85         synchronized (results) {
86             // block until count is ready
87
try {
88                 while (approxCount == -1 && crawling)
89                     results.wait ();
90             } catch (InterruptedException JavaDoc e) {}
91             return approxCount;
92         }
93     }
94     
95     public boolean hasMoreElements () {
96         synchronized (results) {
97             try {
98                 while (nextResult >= results.size() && crawling)
99                     results.wait ();
100             } catch (InterruptedException JavaDoc e) {}
101
102             return nextResult < results.size ();
103         }
104     }
105
106     public Object JavaDoc nextElement () {
107         return nextResult ();
108     }
109
110     public SearchEngineResult nextResult () {
111         if (!hasMoreElements ())
112             throw new NoSuchElementException JavaDoc ();
113         synchronized (results) {
114             SearchEngineResult result = (SearchEngineResult)results.elementAt (nextResult++);
115             if (result.rank == 0)
116                result.rank = nextResult;
117             return result;
118         }
119     }
120
121     public void run () {
122         super.run ();
123         synchronized (results) {
124             if (approxCount == -1)
125                 approxCount = 0;
126             crawling = false;
127             results.notify ();
128         }
129     }
130
131     public void visit (Page page) {
132         synchronized (results) {
133             if (approxCount == -1)
134                 approxCount = page.getNumericLabel ("searchengine.count", new Integer JavaDoc(0)).intValue();
135             
136             Region[] ser = page.getFields ("searchengine.results");
137             for (int i=0; i<ser.length; ++i) {
138                 if (results.size() == maxResults) {
139                     stop ();
140                     return;
141                 }
142                 results.addElement (ser[i]);
143             }
144             results.notify ();
145         }
146     }
147     
148     public boolean shouldVisit (Link link) {
149         if (walkedResults >= maxResults
150             || !link.hasLabel ("searchengine.more-results"))
151             return false;
152         SearchEngine engine = (SearchEngine)link.getSource().getObjectLabel("searchengine.source");
153         walkedResults += engine.getResultsPerPage ();
154         return true;
155     }
156     
157
158     public static void main (String JavaDoc[] args) throws Exception JavaDoc {
159         if (args.length == 0) {
160             System.err.println ("Search <search engine classname> [-max n] <keywords>*");
161             return;
162         }
163
164         SearchEngine engine = (SearchEngine) Class.forName (args[0]).newInstance ();
165
166         int max = Integer.MAX_VALUE;
167         int firstKeyword = 1;
168         if (args[1].equals ("-max")) {
169             max = Integer.parseInt (args[2]);
170             firstKeyword = 3;
171         }
172
173         Search ms = new Search (max);
174         ms.addQuery (engine, concat (args, firstKeyword));
175         ms.search ();
176         while (ms.hasMoreElements ())
177             System.out.println (ms.nextResult ());
178     }
179
180     static String JavaDoc concat (String JavaDoc[] args, int start) {
181         StringBuffer JavaDoc buf = new StringBuffer JavaDoc ();
182         for (int i=start; i<args.length; ++i) {
183             if (buf.length() > 0)
184                 buf.append (' ');
185             buf.append (args[i]);
186         }
187         return buf.toString ();
188     }
189     
190 }
191
Popular Tags