KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > parserapplications > LinkExtractor


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2003 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/LinkExtractor.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/01/04 03:23:09 $
10
// $Revision: 1.51 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.parserapplications;
28
29 import javax.swing.JOptionPane JavaDoc;
30 import org.htmlparser.Node;
31 import org.htmlparser.NodeFilter;
32 import org.htmlparser.Parser;
33 import org.htmlparser.filters.AndFilter;
34 import org.htmlparser.filters.NodeClassFilter;
35 import org.htmlparser.tags.LinkTag;
36 import org.htmlparser.util.NodeList;
37 import org.htmlparser.util.ParserException;
38
39 /**
40  * LinkExtractor extracts all the links from the given webpage
41  * and prints them on standard output.
42  */

43 public class LinkExtractor
44 {
45     /**
46      */

47     public static void main (String JavaDoc[] args)
48     {
49         String JavaDoc url;
50         Parser parser;
51         NodeFilter filter;
52         NodeList list;
53
54         if (0 >= args.length)
55         {
56             url = (String JavaDoc)JOptionPane.showInputDialog (
57                 null,
58                 "Enter the URL to extract links from:",
59                 "Web Site",
60                 JOptionPane.PLAIN_MESSAGE,
61                 null,
62                 null,
63                 "http://htmlparser.sourceforge.net/wiki/");
64             if (null == url)
65                 System.exit (1);
66         }
67         else
68             url = args[0];
69         filter = new NodeClassFilter (LinkTag.class);
70         if ((1 < args.length) && args[1].equalsIgnoreCase ("-maillinks"))
71             filter = new AndFilter (
72                 filter,
73                 new NodeFilter ()
74                 {
75                     public boolean accept (Node node)
76                     {
77                         return (((LinkTag)node).isMailLink ());
78                     }
79                 }
80             );
81         try
82         {
83             parser = new Parser (url);
84             list = parser.extractAllNodesThatMatch (filter);
85             for (int i = 0; i < list.size (); i++)
86                 System.out.println (list.elementAt (i).toHtml ());
87         }
88         catch (ParserException e)
89         {
90             e.printStackTrace ();
91         }
92         System.exit (0);
93     }
94 }
95
96
Popular Tags