1 32 33 package websphinx.searchengine; 34 35 import websphinx.*; 36 import java.net.URL ; 37 import java.net.URLEncoder ; 38 import java.net.MalformedURLException ; 39 40 43 public class NewsBot implements SearchEngine { 44 45 static Pattern patTitle = new Regexp ("^"); 46 47 static Pattern patCount = new Regexp ( 48 "Returned <B>(\\d+)</b> results" 49 ); 50 static Pattern patNoHits = new Regexp ( 51 "Sorry -- your search yielded no results" 52 ); 53 54 static Pattern patResult = new Tagexp ( 56 "<font>" 57 +"(?{link}<A>(?{title})</A>)" 58 +"</font>" 59 +"<br>" 60 +"<font></font>(?{description})<br>" 61 +"<font><b></b></font><p>" 62 ); 63 64 static Pattern patMoreLink = new Tagexp ( 65 "<input type=image name=act.next>" 66 ); 67 68 79 public void classify (Page page) { 80 String title = page.getTitle (); 81 if (title != null && title.startsWith ("HotBot results:")) { 82 page.setObjectLabel ("searchengine.source", this); 83 84 Region count = patCount.oneMatch (page); 85 if (count != null) 86 page.setField ("searchengine.count", count.getField ("0")); 87 88 Region[] results = patResult.allMatches (page); 89 SearchEngineResult[] ser = new SearchEngineResult[results.length]; 90 for (int i=0; i<results.length; ++i) { 91 ser[i] = new SearchEngineResult (results[i]); 92 } 94 page.setFields ("searchengine.results", ser); 95 96 PatternMatcher m = patMoreLink.match (page); 97 while (m.hasMoreElements ()) { 98 Link link = (Link)m.nextMatch (); 99 link.setLabel ("searchengine.more-results"); 100 link.setLabel ("hyperlink"); 101 } 102 } 103 else System.err.println ("not a NewsBot page"); 104 105 } 106 107 110 public static final float priority = 0.0F; 111 112 116 public float getPriority () { 117 return priority; 118 } 119 120 125 public URL makeQuery (String keywords) { 126 try { 127 java.util.StringTokenizer tok = new java.util.StringTokenizer (keywords); 128 StringBuffer output = new StringBuffer (); 129 while (tok.hasMoreElements ()) { 130 String kw = tok.nextToken (); 131 if (output.length () > 0) 132 output.append (" or "); 133 output.append (kw); 134 } 135 136 return new URL ( 137 "http://engine.newbot.com/newbot/server/query.fpl?client_id=0sQaJNoAahXc&output=hotbot4&logad=1&client_sw=html&client_vr=0.9&client_last_updated=ignore&T0=hotbot&S0=date&P0=&F0=24&Q0=" 138 + URLEncoder.encode(output.toString()) 139 + "&max_results=50&S0=rank&Search.x=55&Search.y=4" 140 ); 141 } catch (MalformedURLException e) { 142 throw new RuntimeException ("internal error"); 143 } 144 } 145 146 150 public int getResultsPerPage () { 151 return 10; 152 } 153 154 159 public static Search search (String keywords) { 160 return new Search (new NewsBot(), keywords); 161 } 162 163 170 public static Search search (String keywords, int maxResults) { 171 return new Search (new NewsBot(), keywords, maxResults); 172 } 173 } 174
| Popular Tags
|