KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > thauvin > google > GoogleSearchBean


1 /*
2  * @(#)GoogleSearchBean.java
3  *
4  * Copyright (c) 2002-2003, Erik C. Thauvin (erik@thauvin.net)
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * Redistributions of source code must retain the above copyright notice,
12  * this list of conditions and the following disclaimer.
13  *
14  * Redistributions in binary form must reproduce the above copyright notice,
15  * this list of conditions and the following disclaimer in the documentation
16  * and/or other materials provided with the distribution.
17  *
18  * Neither the name of the author nor the names of its contributors may be
19  * used to endorse or promote products derived from this software without
20  * specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
23  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
26  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
27  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
28  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * $Id: GoogleSearchBean.java,v 1.8 2003/10/20 14:14:05 ethauvin Exp $
35  *
36  */

37 package net.thauvin.google;
38
39 import com.google.soap.search.GoogleSearch;
40 import com.google.soap.search.GoogleSearchFault;
41 import com.google.soap.search.GoogleSearchResult;
42 import com.google.soap.search.GoogleSearchResultElement;
43
44 import java.net.URL JavaDoc;
45 import java.net.URLEncoder JavaDoc;
46
47
48 /**
49  * Implements methods used to perform searches on Google.
50  *
51  * @author Erik C. Thauvin
52  * @created April 25, 2002
53  * @version $Revision: 1.8 $, $Date: 2003/10/20 14:14:05 $
54  * @since 1.0
55  */

56 public class GoogleSearchBean
57 {
58     /**
59      * The default cache flag.
60      */

61     public static final boolean DEFAULT_CACHE = true;
62
63     /**
64      * The default related-queries filter.
65      */

66     public static final boolean DEFAULT_FILTER = true;
67
68     /**
69      * The default language restricts.
70      */

71     public static final String JavaDoc DEFAULT_LR = "";
72
73     /**
74      * The default maximum number of results to be returned.
75      */

76     public static final int DEFAULT_MAX_RESULTS = 10;
77
78     /**
79      * The default document store restrict.
80      */

81     public static final String JavaDoc DEFAULT_RESTRICT = "";
82
83     /**
84      * The default SafeSearch.
85      */

86     public static final boolean DEFAULT_SAFE_SEARCH = false;
87
88     /**
89      * The default site.
90      */

91     public static final String JavaDoc DEFAULT_SITE = "";
92
93     /**
94      * The default index of the result to be returned.
95      */

96     public static final int DEFAULT_START = 0;
97
98     /**
99      * The default filetype.
100      *
101      * @since 1.0.1
102      */

103     public static final String JavaDoc DEFAULT_TYPE = "";
104
105     /**
106      * The <em>next</em> keyword.
107      */

108     public static final String JavaDoc NEXT_KEYWORD = "next";
109
110     /**
111      * The <em>previous</em> keyword.
112      */

113     public static final String JavaDoc PREVIOUS_KEYWORD = "previous";
114
115     // Invalid key error message.
116
private static final String JavaDoc INVALID_KEY_ERROR =
117         "The authorization key has not been specified.";
118     private GoogleSearch service = null;
119     private GoogleSearchResult result = null;
120     private String JavaDoc keywords = null;
121     private GoogleSearchResultElement elements[] = null;
122     private boolean keySet = false;
123     private int maxResults = 10;
124
125     /**
126      * Constructs a new instance of the bean.
127      */

128     public GoogleSearchBean()
129     {
130         service = new GoogleSearch();
131     }
132
133     /**
134      * Constructs a new instance of the bean with the specified authorization
135      * key.
136      *
137      * @param key The authorization key.
138      */

139     public GoogleSearchBean(String JavaDoc key)
140     {
141         this();
142         setKey(key);
143     }
144
145     /**
146      * Sets the authorization key.
147      *
148      * @param key The key string.
149      */

150     public final void setKey(String JavaDoc key)
151     {
152         service.setKey(key);
153
154         if (isValidString(key))
155         {
156             keySet = true;
157         }
158         else
159         {
160             keySet = false;
161         }
162     }
163
164     /**
165      * Returns true if the authorization key is set.
166      *
167      * @return true or false.
168      */

169     public final boolean isKeySet()
170     {
171         return keySet;
172     }
173
174     /**
175      * Sets the search keywords.
176      *
177      * @param keywords The keywords
178      * @see #getKeywords()
179      * @since 1.0.1
180      */

181     public final void setKeywords(String JavaDoc keywords)
182     {
183         this.keywords = keywords;
184     }
185
186     /**
187      * Returns the keywords.
188      *
189      * @return The keywords value.
190      * @see #setKeywords(String)
191      * @since 1.0.1
192      */

193     public final String JavaDoc getKeywords()
194     {
195         return this.keywords;
196     }
197
198     /**
199      * Sets the HTTP proxy host, port, user name and password.
200      *
201      * @param proxyHost The host to use for the HTTP proxy.
202      * @param proxyPort The port to use for the HTTP proxy.
203      * @param proxyUserName The user name to use for the HTTP proxy.
204      * @param proxyPassword The password to use for the HTTP proxy.
205      */

206     public void setProxyServer(String JavaDoc proxyHost, String JavaDoc proxyPort,
207                                String JavaDoc proxyUserName, String JavaDoc proxyPassword)
208     {
209         int port = -1;
210
211         if (isValidString(proxyPort))
212         {
213             try
214             {
215                 port = Integer.valueOf(proxyPort).intValue();
216             }
217             catch (NumberFormatException JavaDoc e)
218             {
219                 ; // Do nothing.
220
}
221         }
222
223         setProxyServer(proxyHost, port, proxyUserName, proxyPassword);
224     }
225
226     /**
227      * Sets the HTTP proxy host, port, user name and password.
228      *
229      * @param proxyHost The host to use for the HTTP proxy.
230      * @param proxyPort The port to use for the HTTP proxy.
231      * @param proxyUserName The user name to use for the HTTP proxy.
232      * @param proxyPassword The password to use for the HTTP proxy.
233      */

234     public void setProxyServer(String JavaDoc proxyHost, int proxyPort,
235                                String JavaDoc proxyUserName, String JavaDoc proxyPassword)
236     {
237         if (isValidString(proxyHost))
238         {
239             service.setProxyHost(proxyHost);
240
241             if (proxyPort > 0)
242             {
243                 service.setProxyPort(proxyPort);
244             }
245
246             if (isValidString(proxyUserName))
247             {
248                 service.setProxyUserName(proxyUserName);
249             }
250
251             if (isValidString(proxyPassword))
252             {
253                 service.setProxyPassword(proxyPassword);
254             }
255         }
256     }
257
258     /**
259      * Returns the results of the search.
260      *
261      * @return The GoogleSearchResult object.
262      */

263     public final GoogleSearchResult getResult()
264     {
265         return result;
266     }
267
268     /**
269      * Returns an array of result elements that corresponds to the actual list
270      * of search results.
271      *
272      * @return The array of result elements.
273      */

274     public final GoogleSearchResultElement[] getResultElements()
275     {
276         return elements;
277     }
278
279     /**
280      * Returns the count of result elements.
281      *
282      * @return The result elements count.
283      * @see #getResultElements()
284      */

285     public final int getResultElementsCount()
286     {
287         if (elements != null)
288         {
289             return elements.length;
290         }
291
292         return 0;
293     }
294
295     /**
296      * Returns true whenever the result set is valid, indicating that a search
297      * was performed.
298      *
299      * @return true or false.
300      */

301     public final boolean isValidResult()
302     {
303         if (result != null)
304         {
305             return true;
306         }
307
308         return false;
309     }
310
311     /**
312      * Returns a cached web page from Google.
313      *
314      * @param url The page's URL.
315      * @return The HTML code of the cached page.
316      * @exception GoogleSearchFault
317      */

318     public String JavaDoc getCachedPage(String JavaDoc url)
319                          throws GoogleSearchFault
320     {
321         if (isKeySet())
322         {
323             reset();
324
325             return new String JavaDoc(service.doGetCachedPage(url));
326         }
327
328         throw new GoogleSearchFault(INVALID_KEY_ERROR);
329     }
330
331     /**
332      * Invokes a Google search.
333      *
334      * @param q The Google query.
335      * @param start The index of the result to be returned.
336      * @param maxResults The maximum number of results to be returned.
337      * @param filter The related-queries filter.
338      * @param restrict The document store restrict value (e.g.: "linux").
339      * @param safeSearch Enable or disable SafeSearch.
340      * @param lr The language restricts for the search.
341      * @return The results of the search.
342      * @exception GoogleSearchFault
343      */

344     public GoogleSearchResult getGoogleSearch(String JavaDoc q, int start,
345                                               int maxResults, boolean filter,
346                                               String JavaDoc restrict,
347                                               boolean safeSearch, String JavaDoc lr)
348                                        throws GoogleSearchFault
349     {
350         if (isKeySet())
351         {
352             reset();
353
354             service.setQueryString(q);
355             service.setStartResult(start);
356
357             this.maxResults = maxResults;
358             service.setMaxResults(maxResults);
359
360             service.setFilter(filter);
361             service.setRestrict(restrict);
362             service.setSafeSearch(safeSearch);
363             service.setLanguageRestricts(lr);
364
365             result = service.doSearch();
366
367             if (result != null)
368             {
369                 elements = result.getResultElements();
370             }
371
372             return result;
373         }
374
375         throw new GoogleSearchFault(INVALID_KEY_ERROR);
376     }
377
378     /**
379      * Returns the GoogleSearch attribute of the GoogleSearchBean object.
380      *
381      * @param key The authorization key.
382      * @param q The Google query.
383      * @param start The index of the result to be returned.
384      * @param maxResults The maximum number of results to be returned.
385      * @param filter The related-queries filter.
386      * @param restrict The document store restrict value (e.g.: "linux").
387      * @param safeSearch Enable or disable SafeSearch.
388      * @param lr The language restricts for the search.
389      * @return The results of the search.
390      * @exception GoogleSearchFault
391      * @see #getGoogleSearch(String, int, int, boolean, String, boolean, String)
392      */

393     public GoogleSearchResult getGoogleSearch(String JavaDoc key, String JavaDoc q, int start,
394                                               int maxResults, boolean filter,
395                                               String JavaDoc restrict,
396                                               boolean safeSearch, String JavaDoc lr)
397                                        throws GoogleSearchFault
398     {
399         setKey(key);
400
401         return getGoogleSearch(q, start, maxResults, filter, restrict,
402                                safeSearch, lr);
403     }
404
405     /**
406      * Returns the GoogleSearch attribute of the GoogleSearchBean object.
407      *
408      * @param q The Google query.
409      * @return The results of the search.
410      * @exception GoogleSearchFault
411      * @see #getGoogleSearch(String, int, int, boolean, String, boolean, String)
412      */

413     public GoogleSearchResult getGoogleSearch(String JavaDoc q)
414                                        throws GoogleSearchFault
415     {
416         return getGoogleSearch(q, DEFAULT_START, DEFAULT_MAX_RESULTS,
417                                DEFAULT_FILTER, DEFAULT_RESTRICT,
418                                DEFAULT_SAFE_SEARCH, DEFAULT_LR);
419     }
420
421     /**
422      * Returns the specified property of the given element index in the current
423      * result set.
424      * <p>The properties are:</p>
425      * <table border="3">
426      * <tr><td><code>"summary"</code></td><td>Returns the ODP summary text
427      * string.</td></tr>
428      * <tr><td><code>"url"</code></td><td>Returns the absolute URL path of the
429      * search.</td></tr>
430      * <tr><td><code>"snippet"</code></td><td>Returns a text snippet of the
431      * query in context.</td></tr>
432      * <tr><td><code>"title"</code></td><td>Returns the title (HTML) of the
433      * search result.</td></tr>
434      * <tr><td><code>"cachedSize"</code></td><td>Returns the size of (size +
435      * <code>k</code>) the cached version of the URL, in kilobytes.</td></tr>
436      * <tr><td><code>"relatedInformationPresent"</code></td><td>Returns
437      * <code>true</code> when the <em>related:</em> query term is supported for
438      * this URL; <code>false</code>, otherwise.</td></tr>
439      * <tr><td><code>"hostName"</code></td><td>Returns the host name.</td></tr>
440      * <tr><td><code>"directoryTitle"</code></td><td>Returns the ODP directory
441      * title.</td></tr>
442      * <tr><td><code>"directoryCategoryName"</code></td><td>Returns the ODP
443      * directory name of the current ODP category.</td></tr>
444      * <tr><td><code>"directoryCategoryEncoding"</code></td><td>Returns the
445      * encoding scheme of the current ODP category.</td></tr>
446      * <tr><td><code>"relatedQuery"</code></td><td>Returns the related query
447      * string, suitable for use as a {@link #getGoogleSearch(String) search}
448      * query string.<br>For example:
449      * <code>related:www.example.com/search?q=vacation%20hawaii</code></td></tr>
450      * <tr><td><code>"cachedQuery"</code></td><td>Returns the cached query
451      * string, suitable for use as a {@link #getCachedPage(String) cached} query
452      * string.<br>For example: <code>www.example.com/search?q=vacation%20hawaii</code>
453      * </td></tr>
454      * <tr><td><code>"staticQuery"</code></td><td>The static query, suitable for
455      * display.<br>For example: <code>www.example.com/search?q=vacation hawaii</code>
456      * </td></tr>
457      * </table>
458      *
459      * @param index The element index.
460      * @param property The property name.
461      * @return The property value.
462      */

463     public String JavaDoc getResultElementProperty(int index, String JavaDoc property)
464     {
465         if (elements != null)
466         {
467             if ((index >= 0) && (index < elements.length))
468             {
469                 if (property.equalsIgnoreCase("url"))
470                 {
471                     return elements[index].getURL();
472                 }
473                 else if (property.equalsIgnoreCase("summary"))
474                 {
475                     return elements[index].getSummary();
476                 }
477                 else if (property.equalsIgnoreCase("snippet"))
478                 {
479                     return elements[index].getSnippet();
480                 }
481                 else if (property.equalsIgnoreCase("title"))
482                 {
483                     return elements[index].getTitle();
484                 }
485                 else if (property.equalsIgnoreCase("cachedSize"))
486                 {
487                     return elements[index].getCachedSize();
488                 }
489                 else if (property.equalsIgnoreCase("hostName"))
490                 {
491                     return elements[index].getHostName();
492                 }
493                 else if (property.equalsIgnoreCase("relatedInformationPresent"))
494                 {
495                     return String.valueOf(elements[index]
496                                           .getRelatedInformationPresent());
497                 }
498                 else if (property.equalsIgnoreCase("directoryTitle"))
499                 {
500                     return elements[index].getDirectoryTitle();
501                 }
502                 else if (property.equalsIgnoreCase("directoryCategoryName"))
503                 {
504                     return elements[index].getDirectoryCategory()
505                                           .getFullViewableName();
506                 }
507                 else if (property.equalsIgnoreCase("directoryCategoryEncoding"))
508                 {
509                     return elements[index].getDirectoryCategory()
510                                           .getSpecialEncoding();
511                 }
512                 else if (property.toLowerCase().endsWith("query"))
513                 {
514                     try
515                     {
516                         URL JavaDoc url = new URL JavaDoc(elements[index].getURL());
517                         String JavaDoc urlString = url.toString();
518                         String JavaDoc staticQuery =
519                             urlString.substring(urlString.indexOf(url.getHost()));
520
521                         if (property.equalsIgnoreCase("relatedQuery"))
522                         {
523                             return ("related:" + URLEncoder.encode(staticQuery));
524                         }
525                         else if (property.equalsIgnoreCase("cachedQuery"))
526                         {
527                             return (URLEncoder.encode(staticQuery));
528                         }
529                         else if (property.equalsIgnoreCase("staticQuery"))
530                         {
531                             return staticQuery;
532                         }
533                     }
534                     catch (Exception JavaDoc e)
535                     {
536                         ; // Do nothing
537
}
538                 }
539             }
540         }
541
542         return "";
543     }
544
545     /**
546      * Returns the given property of the result set.
547      * <p>The properties are:</p>
548      * <table border="3">
549      * <tr><td><code>"estimatedTotalResultsCount"</code></td><td>Returns the
550      * estimated total number of results returned for the query.</td></tr>
551      * <tr><td><code>"startIndex"</code></td><td>Returns the index (1-based) of
552      * the first search result in the result elements.</td></tr>
553      * <tr><td><code>"endIndex"</code></td><td>Returns the index (1-based) of
554      * the last search result in the result elements.</td></tr>
555      * <tr><td><code>"searchTime"</code></td><td>Returns the total server time
556      * to process the query, in seconds.</td></tr>
557      * <tr><td><code>"searchTips"</code></td><td>Returns a string providing
558      * instructive suggestions on how to use Google.</td></tr>
559      * <tr><td><code>"searchComments"</code></td><td>Returns a string intended
560      * for display to the end user. (e.g.: list of removed <em>stop words</em>,
561      * etc.)</td></tr>
562      * <tr><td><code>"documentFiltering"</code></td><td>Returns
563      * <code>true</code> if filtering was performed on the search results;
564      * <code>false</code> otherwise.</td></tr>
565      * <tr><td><code>"searchQuery"</code></td><td>Returns the query string that
566      * generated this result.</td></tr>
567      * <tr><td><code>{@link #NEXT_KEYWORD next}</code></td><td>Returns the start
568      * index of the next set of results.</td></tr>
569      * <tr><td><code>{@link #PREVIOUS_KEYWORD previous}</code></td><td>Returns
570      * the start index of the previous set of results.</td></tr>
571      * </table>
572      *
573      * @param property The property name.
574      * @return The property value.
575      */

576     public String JavaDoc getResultProperty(String JavaDoc property)
577     {
578         if (result != null)
579         {
580             if (property.equalsIgnoreCase("estimatedTotalResultsCount"))
581             {
582                 return String.valueOf(result.getEstimatedTotalResultsCount());
583             }
584             else if (property.equalsIgnoreCase("startIndex"))
585             {
586                 return String.valueOf(result.getStartIndex());
587             }
588             else if (property.equalsIgnoreCase("endIndex"))
589             {
590                 return String.valueOf(result.getEndIndex());
591             }
592             else if (property.equalsIgnoreCase("searchTime"))
593             {
594                 return String.valueOf(result.getSearchTime());
595             }
596             else if (property.equalsIgnoreCase("searchTips"))
597             {
598                 return result.getSearchTips();
599             }
600             else if (property.equalsIgnoreCase("searchComments"))
601             {
602                 return result.getSearchComments();
603             }
604             else if (property.equalsIgnoreCase("documentFiltering"))
605             {
606                 return String.valueOf(result.getDocumentFiltering());
607             }
608             else if (property.equalsIgnoreCase("searchKeywords"))
609             {
610                 return getKeywords();
611             }
612             else if (property.equalsIgnoreCase("searchQuery"))
613             {
614                 return result.getSearchQuery();
615             }
616             else if (property.equalsIgnoreCase(NEXT_KEYWORD))
617             {
618                 if (result.getEndIndex() < result.getEstimatedTotalResultsCount())
619                 {
620                     if (maxResults == (result.getEndIndex()
621                             - result.getStartIndex() + 1))
622                     {
623                         return String.valueOf(result.getEndIndex());
624                     }
625                 }
626             }
627             else if (property.equalsIgnoreCase(PREVIOUS_KEYWORD))
628             {
629                 if (result.getStartIndex() > 1)
630                 {
631                     return String.valueOf(result.getStartIndex() - maxResults
632                                           - 1);
633                 }
634             }
635         }
636
637         return "";
638     }
639
640     /**
641      * Asks Google to return a spelling suggestion for a word or phrase.
642      *
643      * @param phrase The word or phrase to correct the spelling for.
644      * @return The suggested correct spelling, or null if none.
645      * @exception GoogleSearchFault
646      */

647     public String JavaDoc getSpellingSuggestion(String JavaDoc phrase)
648                                  throws GoogleSearchFault
649     {
650         if (isKeySet())
651         {
652             reset();
653
654             return service.doSpellingSuggestion(phrase);
655         }
656
657         throw new GoogleSearchFault(INVALID_KEY_ERROR);
658     }
659
660     /**
661      * Demonstration program to perform various Google searches.
662      * <p>The arguments are:</p>
663      * <code>&lt;client-key&gt; (search &lt;query&gt; | cached &lt;URL&gt; |
664      * spell &lt;phrase&gt;)</code>
665      *
666      * @param args The command line arguments.
667      */

668     public static final void main(String JavaDoc args[])
669     {
670         GoogleSearchBean bean = new GoogleSearchBean();
671
672         if (args.length == 3)
673         {
674             String JavaDoc action = args[1];
675
676             try
677             {
678                 bean.setKey(args[0]);
679
680                 if (action.equalsIgnoreCase("search"))
681                 {
682                     bean.getGoogleSearch(args[2]);
683
684                     for (int i = 0; i < bean.getResultElementsCount(); i++)
685                     {
686                         System.out.println(bean.getResultElementProperty(i,
687                                                                          "title")
688                                            + " ("
689                                            + bean.getResultElementProperty(i,
690                                                                            "url")
691                                            + ')');
692                     }
693                 }
694                 else if (action.equalsIgnoreCase("spell"))
695                 {
696                     System.out.println(bean.getSpellingSuggestion(args[2]));
697                 }
698                 else if (action.equalsIgnoreCase("cached"))
699                 {
700                     System.out.println(bean.getCachedPage(args[2]));
701                 }
702                 else
703                 {
704                     usage();
705                 }
706             }
707             catch (GoogleSearchFault googleSearchFault)
708             {
709                 googleSearchFault.printStackTrace();
710             }
711         }
712         else
713         {
714             usage();
715         }
716     }
717
718     /**
719      * Reset the bean properties.
720      */

721     public void reset()
722     {
723         // Reset the result and elements
724
result = null;
725         elements = null;
726     }
727
728     /**
729      * Validates a string value by insuring it is not null or empty.
730      *
731      * @param stringValue The String value.
732      * @return true if valid, false if not.
733      */

734     private boolean isValidString(String JavaDoc stringValue)
735     {
736         if ((stringValue != null) && (stringValue.trim().length() > 0))
737         {
738             return true;
739         }
740
741         return false;
742     }
743
744     /**
745      * Prints the usage and exits.
746      */

747     private static void usage()
748     {
749         System.err.println("Usage: java " + GoogleSearchBean.class.getName()
750                            + " <client-key> (search <query> | cached <URL> | spell <phrase>)");
751         System.exit(1);
752     }
753 }
754
Popular Tags