1 32 33 package websphinx.searchengine; 34 35 import websphinx.*; 36 import java.net.URL ; 37 import java.net.URLEncoder ; 38 import java.net.MalformedURLException ; 39 40 44 public class Google implements SearchEngine { 45 46 static Pattern patCount = new Regexp ( 47 "</b> of approximately <b>\\d+,?(\\d+)</b> for <b>" 48 ); 49 static Pattern patNoHits = new Regexp ( 50 "Your search did not produce any results" 51 ); 52 53 static Pattern patResult = new Tagexp ( 54 "<p>(?{link}<a>(?{title})</a>)<font>" + "<BR>(?{description}.*?)<font color=green>" ); 57 58 static Pattern patMoreLink = new Tagexp ( 59 "<A HREF=/search?q=*><img><br><font>.*?</a>" 60 ); 61 62 73 public void classify (Page page) { 74 String title = page.getTitle (); 75 if(title !=null && title.startsWith("Google Search:")){ 76 page.setObjectLabel("searchengine.source",this); 77 78 Region count = patCount.oneMatch (page); 79 80 if (count != null) 81 page.setField ("searchengine.count", count.getField ("0")); 82 83 Region[] results = patResult.allMatches (page); 84 SearchEngineResult[] ser = new SearchEngineResult[results.length]; 85 86 for (int i=0; i<results.length; ++i) 87 ser[i] = new SearchEngineResult (results[i]); 88 page.setFields ("searchengine.results", ser); 89 90 PatternMatcher m = patMoreLink.match (page); 91 92 while (m.hasMoreElements ()) { 93 Link link = (Link)m.nextMatch(); 94 link.setLabel ("searchengine.more-results"); 95 link.setLabel ("hyperlink"); 96 } 97 } 98 } 99 100 103 public static final float priority = 0.0F; 104 105 109 public float getPriority () { 110 return priority; 111 } 112 113 118 public URL makeQuery (String keywords) { 119 try { 120 return new URL ("http://www.google.com/search?q=" 121 + URLEncoder.encode(keywords) 122 ); 123 } catch (MalformedURLException e) { 124 throw new RuntimeException ("internal error"); 125 } 126 } 127 128 132 public int getResultsPerPage () { 133 return 10; 134 } 135 136 141 public static Search search (String keywords) { 142 return new Search (new Google(), keywords); 143 } 144 145 152 public static Search search (String keywords, int maxResults) { 153 return new Search (new Google(), keywords, maxResults); 154 } 155 } 156
| Popular Tags
|