1 32 33 package websphinx.searchengine; 34 35 import websphinx.*; 36 import java.net.URL ; 37 import java.net.URLEncoder ; 38 import java.net.MalformedURLException ; 39 40 43 public class MetaCrawler implements SearchEngine { 44 45 static Pattern patCount = new Regexp ( 46 "Collated Results: 1 to \\d+ of (\\d+) references" 47 ); 48 static Pattern patNoHits = new Regexp ( 49 "Your search did not produce any results" 50 ); 51 52 static Pattern patResult = new Tagexp ( 53 "<dt><font color=#000000><b>(?{relevance})</b></font>" + "(?{link}(?{title}<a>.*?</a>))" + "(?{description}<dt>.*?<font>)" ); 57 58 static Pattern patMoreLink = new Tagexp ( 62 "<a HREF=http://*.metacrawler.com/crawler\\?general*></a>" 63 ); 64 65 76 public void classify (Page page) { 77 String title = page.getTitle (); 78 if (title != null && title.startsWith ("Metacrawler query:")) { 79 page.setObjectLabel ("searchengine.source", this); 80 81 Region count = patCount.oneMatch (page); 82 if (count != null) 83 page.setField ("searchengine.count", count.getField ("0")); 84 85 Region[] results = patResult.allMatches (page); 86 SearchEngineResult[] ser = new SearchEngineResult[results.length]; 87 for (int i=0; i<results.length; ++i) 88 ser[i] = new SearchEngineResult (results[i]); 89 page.setFields ("searchengine.results", ser); 90 91 PatternMatcher m = patMoreLink.match (page); 92 while (m.hasMoreElements ()) { 93 Link link = (Link)m.nextMatch (); 94 link.setLabel ("searchengine.more-results"); 95 link.setLabel ("hyperlink"); 96 } 97 } 98 } 99 100 103 public static final float priority = 0.0F; 104 105 109 public float getPriority () { 110 return priority; 111 } 112 113 118 public URL makeQuery (String keywords) { 119 try { 120 return new URL ("http://www.metacrawler.com/crawler?general=" 121 + URLEncoder.encode(keywords) 122 + "&method=1&format=1®ion=&rpp=20&timeout=15&hpe=10"); 123 } catch (MalformedURLException e) { 124 throw new RuntimeException ("internal error"); 125 } 126 } 127 128 132 public int getResultsPerPage () { 133 return 20; 134 } 135 136 141 public static Search search (String keywords) { 142 return new Search (new MetaCrawler(), keywords); 143 } 144 145 152 public static Search search (String keywords, int maxResults) { 153 return new Search (new MetaCrawler(), keywords, maxResults); 154 } 155 } 156 | Popular Tags |