KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lenya > util > HTML


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17
18 /* $Id: HTML.java 42598 2004-03-01 16:18:28Z gregor $ */
19
20 package org.apache.lenya.util;
21
22 import java.io.FileReader JavaDoc;
23 import java.io.IOException JavaDoc;
24 import java.io.InputStreamReader JavaDoc;
25 import java.io.Reader JavaDoc;
26 import java.net.URL JavaDoc;
27 import java.net.URLConnection JavaDoc;
28 import java.util.Iterator JavaDoc;
29 import java.util.List JavaDoc;
30
31 import javax.swing.text.html.parser.ParserDelegator JavaDoc;
32
33
34 /**
35  * http://developer.java.sun/developer/TechTips/1999/tt0923.html
36  */

37 public class HTML {
38     HTMLHandler htmlHandler;
39
40     /**
41      * Creates a new HTML object.
42      *
43      * @param uri DOCUMENT ME!
44      *
45      * @throws IOException DOCUMENT ME!
46      */

47     public HTML(String JavaDoc uri) throws IOException JavaDoc {
48         ParserDelegator JavaDoc pd = new ParserDelegator JavaDoc();
49         htmlHandler = new HTMLHandler();
50         pd.parse(getReader(uri), htmlHandler, true);
51     }
52
53     /**
54      * DOCUMENT ME!
55      *
56      * @param args DOCUMENT ME!
57      */

58     public static void main(String JavaDoc[] args) {
59         if (args.length != 1) {
60             System.err.println("Usage: HTML uri (file or url)");
61
62             return;
63         }
64
65         try {
66             HTML html = new HTML(args[0]);
67
68             List JavaDoc img_src_list = html.getImageSrcs(false);
69             System.out.println("<im src");
70
71             Iterator JavaDoc img_src_iterator = img_src_list.iterator();
72
73             while (img_src_iterator.hasNext()) {
74                 System.out.println((String JavaDoc) img_src_iterator.next());
75             }
76
77             List JavaDoc a_href_list = html.getAnchorHRefs(false);
78             System.out.println("<a href");
79
80             Iterator JavaDoc a_href_iterator = a_href_list.iterator();
81
82             while (a_href_iterator.hasNext()) {
83                 System.out.println((String JavaDoc) a_href_iterator.next());
84             }
85
86             List JavaDoc link_href_list = html.getLinkHRefs(false);
87             System.out.println("<link href");
88
89             Iterator JavaDoc link_href_iterator = link_href_list.iterator();
90
91             while (link_href_iterator.hasNext()) {
92                 System.out.println((String JavaDoc) link_href_iterator.next());
93             }
94         } catch (Exception JavaDoc e) {
95             System.err.println(".main(): " + e);
96         }
97     }
98
99     /**
100      * DOCUMENT ME!
101      *
102      * @param duplicate DOCUMENT ME!
103      *
104      * @return DOCUMENT ME!
105      */

106     public List JavaDoc getAnchorHRefs(boolean duplicate) {
107         if (duplicate) {
108             return htmlHandler.getAllAHRefs();
109         } else {
110             return htmlHandler.getAHRefs();
111         }
112     }
113
114     /**
115      * DOCUMENT ME!
116      *
117      * @param duplicate DOCUMENT ME!
118      *
119      * @return DOCUMENT ME!
120      */

121     public List JavaDoc getLinkHRefs(boolean duplicate) {
122         if (duplicate) {
123             return htmlHandler.getAllLinkHRefs();
124         } else {
125             return htmlHandler.getLinkHRefs();
126         }
127     }
128
129     /**
130      * DOCUMENT ME!
131      *
132      * @param duplicate DOCUMENT ME!
133      *
134      * @return DOCUMENT ME!
135      */

136     public List JavaDoc getImageSrcs(boolean duplicate) {
137         if (duplicate) {
138             return htmlHandler.getAllImageSrcs();
139         } else {
140             return htmlHandler.getImageSrcs();
141         }
142     }
143
144     private Reader JavaDoc getReader(String JavaDoc uri) throws IOException JavaDoc {
145         if (uri.startsWith("http:")) {
146             // uri is url
147
URLConnection JavaDoc connection = new URL JavaDoc(uri).openConnection();
148
149             return new InputStreamReader JavaDoc(connection.getInputStream());
150         } else {
151             // uri is file
152
return new FileReader JavaDoc(uri);
153         }
154     }
155 }
156
Popular Tags