KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > nu > xom > samples > XLinkSpider


1 /* Copyright 2002-2004 Elliotte Rusty Harold
2    
3    This library is free software; you can redistribute it and/or modify
4    it under the terms of version 2.1 of the GNU Lesser General Public
5    License as published by the Free Software Foundation.
6    
7    This library is distributed in the hope that it will be useful,
8    but WITHOUT ANY WARRANTY; without even the implied warranty of
9    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10    GNU Lesser General Public License for more details.
11    
12    You should have received a copy of the GNU Lesser General Public
13    License along with this library; if not, write to the
14    Free Software Foundation, Inc., 59 Temple Place, Suite 330,
15    Boston, MA 02111-1307 USA
16    
17    You can contact Elliotte Rusty Harold by sending e-mail to
18    elharo@metalab.unc.edu. Please include the word "XOM" in the
19    subject line. The XOM home page is located at http://www.xom.nu/
20 */

21
22 package nu.xom.samples;
23
24 import java.net.MalformedURLException JavaDoc;
25 import java.net.URL JavaDoc;
26 import java.util.HashSet JavaDoc;
27 import java.util.LinkedList JavaDoc;
28 import java.util.List JavaDoc;
29 import java.util.Set JavaDoc;
30
31 import nu.xom.Attribute;
32 import nu.xom.Builder;
33 import nu.xom.Document;
34 import nu.xom.Element;
35 import nu.xom.Elements;
36
37
38 /**
39  *
40  * <p>
41  * Demonstrates the reading of attributes in namespaces,
42  * as well as maintaining a stack of hierarchy-based state
43  * during document traversal.
44  * </p>
45  *
46  * @author Elliotte Rusty Harold
47  * @version 1.0
48  *
49  */

50 public class XLinkSpider {
51
52     private Set JavaDoc spidered = new HashSet JavaDoc();
53     private Builder parser = new Builder();
54     private List JavaDoc queue = new LinkedList JavaDoc();
55     
56     public static final String JavaDoc XLINK_NS
57       = "http://www.w3.org/1999/xlink";
58     public static final String JavaDoc XML_NS
59       = "http://www.w3.org/XML/1998/namespace";
60     
61     public void search(URL JavaDoc url) {
62         
63         try {
64             String JavaDoc systemID = url.toExternalForm();
65             Document doc = parser.build(systemID);
66             System.out.println(url);
67             search(doc.getRootElement(), url);
68         }
69         catch (Exception JavaDoc ex) {
70             // just skip this document
71
}
72         
73         if (queue.isEmpty()) return;
74         
75         URL JavaDoc discovered = (URL JavaDoc) queue.remove(0);
76         spidered.add(discovered);
77         search(discovered);
78         
79     }
80
81     private void search(Element element, URL JavaDoc base) {
82
83         Attribute href = element.getAttribute("href", XLINK_NS);
84         Attribute xmlbase = element.getAttribute("base", XML_NS);
85         try {
86             if (xmlbase != null) {
87                 base = new URL JavaDoc(base, xmlbase.getValue());
88             }
89         }
90         catch (MalformedURLException JavaDoc ex) {
91             // Probably just no protocol handler for the
92
// kind of URLs used inside this element
93
return;
94         }
95         if (href != null) {
96             String JavaDoc uri = href.getValue();
97             // absolutize URL
98
try {
99                 URL JavaDoc discovered = new URL JavaDoc(base, uri);
100                 // remove fragment identifier if any
101
discovered = new URL JavaDoc(
102                   discovered.getProtocol(),
103                   discovered.getHost(),
104                   discovered.getFile()
105                 );
106                 
107                 if (!spidered.contains(discovered)
108                   && !queue.contains(discovered)) {
109                     queue.add(discovered);
110                 }
111             }
112             catch (MalformedURLException JavaDoc ex) {
113                 // skip this one
114
}
115         }
116         Elements children = element.getChildElements();
117         for (int i = 0; i < children.size(); i++) {
118             search(children.get(i), base);
119         }
120         
121     }
122
123     public static void main(String JavaDoc[] args) {
124       
125         XLinkSpider spider = new XLinkSpider();
126         for (int i = 0; i < args.length; i++) {
127             try {
128                 spider.search(new URL JavaDoc(args[i]));
129             }
130             catch (MalformedURLException JavaDoc ex) {
131                 System.err.println(ex);
132             }
133         }
134       
135     } // end main()
136

137 }
Popular Tags