1 32 33 package websphinx.searchengine; 34 import websphinx.*; 35 import java.net.URL ; 36 import java.net.URLEncoder ; 37 import java.net.MalformedURLException ; 38 39 42 public class AltaVista implements SearchEngine { 43 44 static Pattern patCount = new Regexp ( 45 "<font size=-1 face=\"arial, helvetica\">(?:About )?<b>(\\d+)</b> documents? match your query."); 46 static Pattern patNoHits = new Regexp ( 47 "No documents match the query." 48 ); 49 50 static Pattern patResult = new Tagexp ( 51 "<dt><b>(?{rank})</b>" + "(?{link}<a><b>(?{title})</b></a>)" + "<dd>(?{description})<br>" + "(?:<i>(?:<a></a>)?</i><br>)+" + "<p>" ); 57 58 static Pattern patMoreLink = new Tagexp ( 59 "<input type=image name=navig* value=nav.gif>" 60 ); 61 62 73 public void classify (Page page) { 74 String title = page.getTitle (); 75 if (title != null && 76 (title.startsWith ("AltaVista: Simple Query") 77 || title.startsWith ("AltaVista: Advanced Query"))) { 78 page.setObjectLabel ("searchengine.source", this); 79 80 Region count = patCount.oneMatch (page); 81 if (count != null) 82 page.setField ("searchengine.count", count.getField ("0")); 83 84 Region[] results = patResult.allMatches (page); 85 SearchEngineResult[] ser = new SearchEngineResult[results.length]; 86 for (int i=0; i<results.length; ++i) 87 ser[i] = new SearchEngineResult (results[i]); 88 page.setFields ("searchengine.results", ser); 89 90 PatternMatcher m = patMoreLink.match (page); 91 while (m.hasMoreElements ()) { 92 Link link = (Link)m.nextMatch (); 93 link.setLabel ("searchengine.more-results"); 94 link.setLabel ("hyperlink"); 95 } 96 } 97 } 98 99 102 public static final float priority = 0.0F; 103 104 108 public float getPriority () { 109 return priority; 110 } 111 112 117 public URL makeQuery (String keywords) { 118 try { 119 return new URL ("http://altavista.digital.com/cgi-bin/query?pg=q&what=web&kl=XX&q=" 120 + URLEncoder.encode(keywords)); 121 } catch (MalformedURLException e) { 122 throw new RuntimeException ("internal error"); 123 } 124 } 125 126 130 public int getResultsPerPage () { 131 return 10; 132 } 133 134 139 public static Search search (String keywords) { 140 return new Search (new AltaVista(), keywords); 141 } 142 143 150 public static Search search (String keywords, int maxResults) { 151 return new Search (new AltaVista(), keywords, maxResults); 152 } 153 } 154 | Popular Tags |