KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > parserapplications > WikiCapturer


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2003 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/parserapplications/WikiCapturer.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/05/30 01:43:54 $
10
// $Revision: 1.2 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.parserapplications;
28
29 import java.io.File JavaDoc;
30 import java.io.IOException JavaDoc;
31 import java.net.MalformedURLException JavaDoc;
32 import java.net.URL JavaDoc;
33 import javax.swing.JFileChooser JavaDoc;
34 import javax.swing.JOptionPane JavaDoc;
35 import org.htmlparser.filters.AndFilter;
36 import org.htmlparser.filters.HasAttributeFilter;
37 import org.htmlparser.filters.NotFilter;
38 import org.htmlparser.filters.OrFilter;
39 import org.htmlparser.filters.TagNameFilter;
40
41 /**
42  * Save a wikiwikiweb locally.
43  * Illustrative program to save a wiki locally.
44  */

45 public class WikiCapturer
46     extends
47         SiteCapturer
48 {
49     /**
50      * Create a wikicapturer.
51      */

52     public WikiCapturer ()
53     {
54     }
55
56     /**
57      * Returns <code>true</code> if the link is one we are interested in.
58      * @param link The link to be checked.
59      * @return <code>true</code> if the link has the source URL as a prefix
60      * and doesn't contain '?' or '#'; the former because we won't be able to
61      * handle server side queries in the static target directory structure and
62      * the latter because presumably the full page with that reference has
63      * already been captured previously. This performs a case insensitive
64      * comparison, which is cheating really, but it's cheap.
65      */

66     protected boolean isToBeCaptured (String JavaDoc link)
67     {
68         boolean ret;
69         
70         ret = super.isToBeCaptured (link);
71
72         // eliminate PhpWiki specific pages
73
if (ret)
74             if (link.endsWith ("PhpWikiAdministration"))
75                 ret = false;
76             else if (link.endsWith ("PhpWikiDocumentation"))
77                 ret = false;
78             else if (link.endsWith ("TextFormattingRules"))
79                 ret = false;
80             else if (link.endsWith ("NewMarkupTestPage"))
81                 ret = false;
82             else if (link.endsWith ("OldMarkupTestPage"))
83                 ret = false;
84             else if (link.endsWith ("OldTextFormattingRules"))
85                 ret = false;
86             else if (link.endsWith ("PgsrcTranslation"))
87                 ret = false;
88             else if (link.endsWith ("HowToUseWiki"))
89                 ret = false;
90             else if (link.endsWith ("MoreAboutMechanics"))
91                 ret = false;
92             else if (link.endsWith ("AddingPages"))
93                 ret = false;
94             else if (link.endsWith ("WikiWikiWeb"))
95                 ret = false;
96             else if (link.endsWith ("UserPreferences"))
97                 ret = false;
98             else if (link.endsWith ("PhpWiki"))
99                 ret = false;
100             else if (link.endsWith ("WabiSabi"))
101                 ret = false;
102             else if (link.endsWith ("EditText"))
103                 ret = false;
104             else if (link.endsWith ("FindPage"))
105                 ret = false;
106             else if (link.endsWith ("RecentChanges"))
107                 ret = false;
108             else if (link.endsWith ("RecentEdits"))
109                 ret = false;
110             else if (link.endsWith ("RecentVisitors"))
111                 ret = false;
112             else if (link.endsWith ("SteveWainstead"))
113                 ret = false;
114
115         return (ret);
116     }
117
118     /**
119      * Mainline to capture a web site locally.
120      * @param args The command line arguments.
121      * There are three arguments the web site to capture, the local directory
122      * to save it to, and a flag (true or false) to indicate whether resources
123      * such as images and video are to be captured as well.
124      * These are requested via dialog boxes if not supplied.
125      */

126     public static void main (String JavaDoc[] args)
127         throws
128             MalformedURLException JavaDoc,
129             IOException JavaDoc
130     {
131         WikiCapturer worker;
132         String JavaDoc url;
133         JFileChooser JavaDoc chooser;
134         URL JavaDoc source;
135         String JavaDoc path;
136         File JavaDoc target;
137         Boolean JavaDoc capture;
138         int ret;
139         
140         worker = new WikiCapturer ();
141         if (0 >= args.length)
142         {
143             url = (String JavaDoc)JOptionPane.showInputDialog (
144                 null,
145                 "Enter the URL to capture:",
146                 "Web Site",
147                 JOptionPane.PLAIN_MESSAGE,
148                 null,
149                 null,
150                 "http://htmlparser.sourceforge.net/wiki");
151             if (null != url)
152                 worker.setSource (url);
153             else
154                 System.exit (1);
155         }
156         else
157             worker.setSource (args[0]);
158         if (1 >= args.length)
159         {
160             url = worker.getSource ();
161             source = new URL JavaDoc (url);
162             path = new File JavaDoc (new File JavaDoc ("." + File.separator), source.getHost () + File.separator).getCanonicalPath ();
163             target = new File JavaDoc (path);
164             chooser = new JFileChooser JavaDoc (target);
165             chooser.setDialogType (JFileChooser.SAVE_DIALOG);
166             chooser.setFileSelectionMode (JFileChooser.DIRECTORIES_ONLY);
167             chooser.setSelectedFile (target); // this doesn't frickin' work
168
chooser.setMultiSelectionEnabled (false);
169             chooser.setDialogTitle ("Target Directory");
170             ret = chooser.showSaveDialog (null);
171             if (ret == JFileChooser.APPROVE_OPTION)
172                 worker.setTarget (chooser.getSelectedFile ().getAbsolutePath ());
173             else
174                 System.exit (1);
175         }
176         else
177             worker.setTarget (args[1]);
178         if (2 >= args.length)
179         {
180             capture = (Boolean JavaDoc)JOptionPane.showInputDialog (
181                 null,
182                 "Should resources be captured:",
183                 "Capture Resources",
184                 JOptionPane.PLAIN_MESSAGE,
185                 null,
186                 new Object JavaDoc[] { Boolean.TRUE, Boolean.FALSE},
187                 Boolean.TRUE);
188             if (null != capture)
189                 worker.setCaptureResources (capture.booleanValue ());
190             else
191                 System.exit (1);
192         }
193         else
194             worker.setCaptureResources ((Boolean.valueOf (args[2]).booleanValue ()));
195         worker.setFilter (
196             new NotFilter (
197                 new OrFilter (
198                     new AndFilter (
199                         new TagNameFilter ("DIV"),
200                         new HasAttributeFilter ("id", "navbar")),
201                     new OrFilter (
202                         new AndFilter (
203                             new TagNameFilter ("DIV"),
204                             new HasAttributeFilter ("id", "actionbar")),
205                         new AndFilter (
206                             new TagNameFilter ("DIV"),
207                             new HasAttributeFilter ("id", "xhtml-validator"))))));
208         worker.capture ();
209         
210         System.exit (0);
211     }
212 }
213
Popular Tags