KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > parserapplications > MailRipper


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/parserapplications/MailRipper.java,v 1.2 2004/02/10 13:41:07 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33
34 package org.htmlparser.parserapplications;
35 import java.util.Enumeration JavaDoc;
36 import java.util.Vector JavaDoc;
37
38 import org.htmlparser.Node;
39 import org.htmlparser.Parser;
40 import org.htmlparser.tags.LinkTag;
41 import org.htmlparser.util.DefaultParserFeedback;
42 import org.htmlparser.util.NodeIterator;
43 import org.htmlparser.util.ParserException;
44
45
46 /**
47  * MailRipper will rip out all the mail addresses from a given web page
48  * Pass a web site (or html file on your local disk) as an argument.
49  */

50 public class MailRipper
51 {
52     private org.htmlparser.Parser parser;
53     /**
54      * MailRipper c'tor takes the url to be ripped
55      * @param resourceLocation url to be ripped
56      */

57     public MailRipper(String JavaDoc resourceLocation)
58     {
59         try
60         {
61             parser = new Parser(resourceLocation, new DefaultParserFeedback());
62             parser.registerScanners();
63         }
64         catch (ParserException e)
65         {
66             System.err.println("Could not create parser object");
67             e.printStackTrace();
68         }
69     }
70     public static void main(String JavaDoc[] args)
71     {
72         System.out.println("Mail Ripper v" + Parser.getVersion());
73         if (args.length < 1 || args[0].equals("-help"))
74         {
75             System.out.println();
76             System.out.println(
77                 "Syntax : java -classpath htmlparser.jar org.htmlparser.parserapplications.MailRipper <resourceLocn/website>");
78             System.out.println();
79             System.out.println(
80                 " <resourceLocn> the name of the file to be parsed (with complete path ");
81             System.out.println(
82                 " if not in current directory)");
83             System.out.println(" -help This screen");
84             System.out.println();
85             System.out.println(
86                 "HTML Parser home page : http://htmlparser.sourceforge.net");
87             System.out.println();
88             System.out.println(
89                 "Example : java -classpath htmlparser.jar com.kizna.parserapplications.MailRipper http://htmlparser.sourceforge.net");
90             System.out.println();
91             System.out.println(
92                 "If you have any doubts, please join the HTMLParser mailing list (user/developer) from the HTML Parser home page instead of mailing any of the contributors directly. You will be surprised with the quality of open source support. ");
93             System.exit(-1);
94         }
95         String JavaDoc resourceLocation = "http://htmlparser.sourceforge.net";
96         if (args.length != 0)
97             resourceLocation = args[0];
98
99         MailRipper ripper = new MailRipper(resourceLocation);
100         System.out.println("Ripping Site " + resourceLocation);
101         try
102         {
103             for (Enumeration JavaDoc e = ripper.rip(); e.hasMoreElements();)
104             {
105                 LinkTag tag = (LinkTag) e.nextElement();
106                 System.out.println("Ripped mail address : " + tag.getLink());
107             }
108         }
109         catch (ParserException e)
110         {
111             e.printStackTrace();
112         }
113     }
114     /**
115      * Rip all mail addresses from the given url, and return an enumeration of such mail addresses.
116      * @return Enumeration of mail addresses (a vector of LinkTag)
117      */

118     public Enumeration JavaDoc rip() throws ParserException
119     {
120         Node node;
121         Vector JavaDoc mailAddresses = new Vector JavaDoc();
122         for (NodeIterator e = parser.elements(); e.hasMoreNodes();)
123         {
124             node = e.nextNode();
125             if (node instanceof LinkTag)
126             {
127                 LinkTag linkTag = (LinkTag) node;
128                 if (linkTag.isMailLink())
129                     mailAddresses.addElement(linkTag);
130             }
131         }
132         return mailAddresses.elements();
133     }
134 }
135
Popular Tags