1 32 33 package websphinx.searchengine; 34 35 import websphinx.*; 36 import java.util.Vector ; 37 import java.util.Enumeration ; 38 import java.util.NoSuchElementException ; 39 40 public class Search extends Crawler implements Enumeration { 41 42 int maxResults; 43 int walkedResults; 45 Vector results = new Vector (); int nextResult = 0; int approxCount = -1; boolean crawling = false; 49 50 public Search () { 51 this (Integer.MAX_VALUE); 52 } 53 54 public Search (int maxResults) { 55 this.maxResults = maxResults; 56 setDepthFirst (false); 57 setMaxDepth (Integer.MAX_VALUE); 58 EventLog.monitor (this); } 60 61 public Search (SearchEngine engine, String keywords, int maxResults) { 62 this (maxResults); 63 addQuery (engine, keywords); 64 search (); 65 } 66 67 public Search (SearchEngine engine, String keywords) { 68 this (engine, keywords, Integer.MAX_VALUE); 69 } 70 71 public void addQuery (SearchEngine engine, String keywords) { 72 addRoot (new Link (engine.makeQuery (keywords))); 73 addClassifier (engine); 74 walkedResults += engine.getResultsPerPage (); 75 } 76 77 public void search () { 78 crawling = true; 79 Thread thread = new Thread (this, "Search"); 80 thread.setDaemon (true); 81 thread.start (); 82 } 83 84 public int count () { 85 synchronized (results) { 86 try { 88 while (approxCount == -1 && crawling) 89 results.wait (); 90 } catch (InterruptedException e) {} 91 return approxCount; 92 } 93 } 94 95 public boolean hasMoreElements () { 96 synchronized (results) { 97 try { 98 while (nextResult >= results.size() && crawling) 99 results.wait (); 100 } catch (InterruptedException e) {} 101 102 return nextResult < results.size (); 103 } 104 } 105 106 public Object nextElement () { 107 return nextResult (); 108 } 109 110 public SearchEngineResult nextResult () { 111 if (!hasMoreElements ()) 112 throw new NoSuchElementException (); 113 synchronized (results) { 114 SearchEngineResult result = (SearchEngineResult)results.elementAt (nextResult++); 115 if (result.rank == 0) 116 result.rank = nextResult; 117 return result; 118 } 119 } 120 121 public void run () { 122 super.run (); 123 synchronized (results) { 124 if (approxCount == -1) 125 approxCount = 0; 126 crawling = false; 127 results.notify (); 128 } 129 } 130 131 public void visit (Page page) { 132 synchronized (results) { 133 if (approxCount == -1) 134 approxCount = page.getNumericLabel ("searchengine.count", new Integer (0)).intValue(); 135 136 Region[] ser = page.getFields ("searchengine.results"); 137 for (int i=0; i<ser.length; ++i) { 138 if (results.size() == maxResults) { 139 stop (); 140 return; 141 } 142 results.addElement (ser[i]); 143 } 144 results.notify (); 145 } 146 } 147 148 public boolean shouldVisit (Link link) { 149 if (walkedResults >= maxResults 150 || !link.hasLabel ("searchengine.more-results")) 151 return false; 152 SearchEngine engine = (SearchEngine)link.getSource().getObjectLabel("searchengine.source"); 153 walkedResults += engine.getResultsPerPage (); 154 return true; 155 } 156 157 158 public static void main (String [] args) throws Exception { 159 if (args.length == 0) { 160 System.err.println ("Search <search engine classname> [-max n] <keywords>*"); 161 return; 162 } 163 164 SearchEngine engine = (SearchEngine) Class.forName (args[0]).newInstance (); 165 166 int max = Integer.MAX_VALUE; 167 int firstKeyword = 1; 168 if (args[1].equals ("-max")) { 169 max = Integer.parseInt (args[2]); 170 firstKeyword = 3; 171 } 172 173 Search ms = new Search (max); 174 ms.addQuery (engine, concat (args, firstKeyword)); 175 ms.search (); 176 while (ms.hasMoreElements ()) 177 System.out.println (ms.nextResult ()); 178 } 179 180 static String concat (String [] args, int start) { 181 StringBuffer buf = new StringBuffer (); 182 for (int i=start; i<args.length; ++i) { 183 if (buf.length() > 0) 184 buf.append (' '); 185 buf.append (args[i]); 186 } 187 return buf.toString (); 188 } 189 190 } 191
| Popular Tags
|