1 32 33 package websphinx.searchengine; 34 35 import websphinx.*; 36 import java.net.URL ; 37 import java.net.URLEncoder ; 38 import java.net.MalformedURLException ; 39 40 43 public class NewsIndex implements SearchEngine { 44 45 static Pattern patCount = new Regexp ( 46 "<center>Headlines\\s+\\d+\\s+to\\s+\\d+\\s+of\\s+(\\d+)</center>" 47 ); 48 static Pattern patNoHits = new Regexp ( 49 "No articles were found matching your search criteria" 50 ); 51 52 static Pattern patResult = new Tagexp ( 53 "<dd>(?{link}(?{title}<a>.*?</a>))" + "<blockquote><b></b>" + "(?{description})</blockquote>" ); 57 58 69 public void classify (Page page) { 70 String title = page.getTitle (); 71 if (title != null && title.equals ("News Index - Results")) { 72 page.setObjectLabel ("searchengine.source", this); 73 74 Region count = patCount.oneMatch (page); 75 if (count != null) 76 page.setField ("searchengine.count", count.getField ("0")); 77 78 Region[] results = patResult.allMatches (page); 79 SearchEngineResult[] ser = new SearchEngineResult[results.length]; 80 for (int i=0; i<results.length; ++i) 81 ser[i] = new SearchEngineResult (results[i]); 82 page.setFields ("searchengine.results", ser); 83 84 Link[] links = page.getLinks (); 86 for (int i=0; i<links.length; ++i) { 87 if (links[i].toText().equals ("Next 10 Headlines")) { 88 links[i].setLabel ("searchengine.more-results"); 89 links[i].setLabel ("hyperlink"); 90 break; 91 } 92 } 93 } 94 } 95 96 99 public static final float priority = 0.0F; 100 101 105 public float getPriority () { 106 return priority; 107 } 108 109 114 public URL makeQuery (String keywords) { 115 try { 116 return new URL ("http://www.newsindex.com/cgi-bin/process.cgi?mode=any&query=" 117 + URLEncoder.encode(keywords)); 118 } catch (MalformedURLException e) { 119 throw new RuntimeException ("internal error"); 120 } 121 } 122 123 127 public int getResultsPerPage () { 128 return 10; 129 } 130 131 136 public static Search search (String keywords) { 137 return new Search (new NewsIndex(), keywords); 138 } 139 140 147 public static Search search (String keywords, int maxResults) { 148 return new Search (new NewsIndex(), keywords, maxResults); 149 } 150 } 151 | Popular Tags |