KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > matuschek > jobo > JoBo


1 package net.matuschek.jobo;
2
3 /************************************************
4     Copyright (c) 2001/2002 by Daniel Matuschek
5 *************************************************/

6
7 import java.io.File JavaDoc;
8 import java.net.URL JavaDoc;
9
10 import net.matuschek.getopt.GetOpt;
11 import net.matuschek.http.HttpDocToFile;
12 import net.matuschek.http.SystemOutHttpToolCallback;
13 import net.matuschek.spider.WebRobot;
14
15 import org.apache.log4j.Category;
16 import org.apache.log4j.PropertyConfigurator;
17 import org.apache.log4j.BasicConfigurator;
18
19 /**
20  * This is the JoBo command line interface.
21  *
22  * @author Daniel Matuschek
23  * @version $Id $
24  */

25 public class JoBo {
26
27   protected static Category log = Category.getInstance("");
28
29   public static void printUsage() {
30     final String JavaDoc usageInfo =
31       "command line options: \n"+
32       " [-r http://...] set start referer (default \"-\")\n"+
33       " [-d maxdepth] set maximal search depth (default 0)\n"+
34       " [-o] allow walk to other hosts (default no)\n"+
35       " [-s directory] directory to store retrieved documents\n"+
36       " (default \".\")\n"+
37       " [-m minsize] store only files larger then this size in bytes\n"+
38       " (default 0)\n"+
39       " [-a agentName] set user agent name\n"+
40       " (default \"JoBo\")\n"+
41       " [-i] ignore robots.txt\n"+
42       " [-w seconds] wait n seconds after retrieving a file to limit\n"+
43       " load on the remote server (default 60)\n"+
44       " [-v] verbose mode, useful, if something is wrong\n"+
45       " with the XML configuration\n"+
46       " url start URL";
47
48     System.out.println(usageInfo+"\n\n");
49   }
50
51
52   /**
53      initialize log4j logging subsystem
54    **/

55   public static void initializeLogging() {
56     final String JavaDoc configfile = "logging.conf";
57
58     
59     File JavaDoc f=new File JavaDoc(configfile);
60     if (f.exists()) {
61       // read the logging properties from configuration file
62
PropertyConfigurator.configure(configfile);
63     } else {
64       BasicConfigurator.configure();
65     }
66   }
67   
68
69
70   public static void main(String JavaDoc[] argv)
71     throws Exception JavaDoc
72   {
73     String JavaDoc basedir=".";
74     int minSize=0;
75
76     initializeLogging();
77    
78     if (argv.length<1) {
79       printUsage();
80       return;
81     }
82
83     // get command line options
84
GetOpt opt = new GetOpt(argv);
85     String JavaDoc option = null;
86
87     JoBoBase jobobase = JoBoBase.createFromXML();
88     WebRobot robby = jobobase.getRobot();
89
90     // referer
91
option=opt.getOptionString("r");
92     if (option != null) {
93       robby.setStartReferer(option);
94     }
95     
96     // maximal depth
97
option=opt.getOptionString("d");
98     if (option != null) {
99     try {
100       int maxDepth=Integer.parseInt(option);
101       robby.setMaxDepth(maxDepth);
102     } catch (NumberFormatException JavaDoc e) {
103       System.out.println("Wrong number for maxDepth: "+option);
104     }
105     }
106     
107     // walk to other hosts ?
108
if (opt.getOptionBoolean("o")) {
109       robby.setWalkToOtherHosts(true);
110     }
111     
112     // store directory
113
option=opt.getOptionString("s");
114     if (option != null) {
115       basedir=option;
116     }
117     
118     // minimal file size
119
option=opt.getOptionString("m");
120     if (option != null) {
121       try {
122     minSize=Integer.parseInt(option);
123       } catch (NumberFormatException JavaDoc e) {}
124     }
125     
126     // agent name
127
option=opt.getOptionString("a");
128     if (option != null) {
129       robby.setAgentName(option);
130     }
131     
132     // ignore robots.txt
133
if (opt.getOptionBoolean("i")) {
134       robby.setIgnoreRobotsTxt(true);
135     }
136     
137     // wait time
138
option=opt.getOptionString("w");
139     if (option != null) {
140       try {
141     int waitTime=Integer.parseInt(option);
142     robby.setSleepTime(waitTime*1000);
143       } catch (NumberFormatException JavaDoc e) {}
144     }
145     
146     // print usage
147
if (opt.getOptionBoolean("?")) {
148       printUsage();
149       return;
150     }
151     
152     URL JavaDoc u = new URL JavaDoc(argv[argv.length-1]);
153
154     HttpDocToFile docStore=new HttpDocToFile(basedir);
155     docStore.setMinFileSize(minSize);
156
157     SystemOutHttpToolCallback statusInfo = new SystemOutHttpToolCallback();
158
159     robby.setStartURL(u);
160     robby.setDocManager(docStore);
161     robby.setHttpToolCallback(statusInfo);
162     
163     robby.run();
164     
165   }
166 }
167
Popular Tags