KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > Heritrix


1 /* Heritrix
2  *
3  * $Id: Heritrix.java,v 1.142.2.2 2007/01/13 01:31:06 stack-sf Exp $
4  *
5  * Created on May 15, 2003
6  *
7  * Copyright (C) 2003 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler;
26
27 import java.io.File JavaDoc;
28 import java.io.FileInputStream JavaDoc;
29 import java.io.FileNotFoundException JavaDoc;
30 import java.io.FileOutputStream JavaDoc;
31 import java.io.IOException JavaDoc;
32 import java.io.InputStream JavaDoc;
33 import java.io.PrintStream JavaDoc;
34 import java.io.PrintWriter JavaDoc;
35 import java.net.HttpURLConnection JavaDoc;
36 import java.net.InetAddress JavaDoc;
37 import java.net.URL JavaDoc;
38 import java.net.URLConnection JavaDoc;
39 import java.net.UnknownHostException JavaDoc;
40 import java.util.ArrayList JavaDoc;
41 import java.util.Arrays JavaDoc;
42 import java.util.Collection JavaDoc;
43 import java.util.Collections JavaDoc;
44 import java.util.Enumeration JavaDoc;
45 import java.util.Hashtable JavaDoc;
46 import java.util.Iterator JavaDoc;
47 import java.util.List JavaDoc;
48 import java.util.Map JavaDoc;
49 import java.util.Properties JavaDoc;
50 import java.util.StringTokenizer JavaDoc;
51 import java.util.TimeZone JavaDoc;
52 import java.util.Vector JavaDoc;
53 import java.util.logging.Level JavaDoc;
54 import java.util.logging.LogManager JavaDoc;
55 import java.util.logging.Logger JavaDoc;
56
57 import javax.management.Attribute JavaDoc;
58 import javax.management.AttributeList JavaDoc;
59 import javax.management.AttributeNotFoundException JavaDoc;
60 import javax.management.DynamicMBean JavaDoc;
61 import javax.management.InstanceAlreadyExistsException JavaDoc;
62 import javax.management.InstanceNotFoundException JavaDoc;
63 import javax.management.InvalidAttributeValueException JavaDoc;
64 import javax.management.MBeanInfo JavaDoc;
65 import javax.management.MBeanNotificationInfo JavaDoc;
66 import javax.management.MBeanOperationInfo JavaDoc;
67 import javax.management.MBeanRegistration JavaDoc;
68 import javax.management.MBeanRegistrationException JavaDoc;
69 import javax.management.MBeanServer JavaDoc;
70 import javax.management.MBeanServerFactory JavaDoc;
71 import javax.management.MalformedObjectNameException JavaDoc;
72 import javax.management.NotCompliantMBeanException JavaDoc;
73 import javax.management.ObjectName JavaDoc;
74 import javax.management.ReflectionException JavaDoc;
75 import javax.management.RuntimeOperationsException JavaDoc;
76 import javax.management.openmbean.CompositeData JavaDoc;
77 import javax.management.openmbean.CompositeDataSupport JavaDoc;
78 import javax.management.openmbean.CompositeType JavaDoc;
79 import javax.management.openmbean.OpenDataException JavaDoc;
80 import javax.management.openmbean.OpenMBeanAttributeInfoSupport JavaDoc;
81 import javax.management.openmbean.OpenMBeanConstructorInfoSupport JavaDoc;
82 import javax.management.openmbean.OpenMBeanInfoSupport JavaDoc;
83 import javax.management.openmbean.OpenMBeanOperationInfoSupport JavaDoc;
84 import javax.management.openmbean.OpenMBeanParameterInfo JavaDoc;
85 import javax.management.openmbean.OpenMBeanParameterInfoSupport JavaDoc;
86 import javax.management.openmbean.OpenType JavaDoc;
87 import javax.management.openmbean.SimpleType JavaDoc;
88 import javax.management.openmbean.TabularData JavaDoc;
89 import javax.management.openmbean.TabularDataSupport JavaDoc;
90 import javax.management.openmbean.TabularType JavaDoc;
91 import javax.naming.CompoundName JavaDoc;
92 import javax.naming.Context JavaDoc;
93 import javax.naming.NameNotFoundException JavaDoc;
94 import javax.naming.NamingException JavaDoc;
95 import javax.naming.NoInitialContextException JavaDoc;
96
97 import org.apache.commons.cli.Option;
98 import org.archive.crawler.admin.CrawlJob;
99 import org.archive.crawler.admin.CrawlJobErrorHandler;
100 import org.archive.crawler.admin.CrawlJobHandler;
101 import org.archive.crawler.datamodel.CredentialStore;
102 import org.archive.crawler.datamodel.credential.Credential;
103 import org.archive.crawler.event.CrawlStatusListener;
104 import org.archive.crawler.framework.AlertManager;
105 import org.archive.crawler.framework.CrawlController;
106 import org.archive.crawler.framework.exceptions.FatalConfigurationException;
107 import org.archive.crawler.framework.exceptions.InitializationException;
108 import org.archive.crawler.selftest.SelfTestCrawlJobHandler;
109 import org.archive.crawler.settings.XMLSettingsHandler;
110 import org.archive.io.SinkHandler;
111 import org.archive.io.SinkHandlerLogRecord;
112 import org.archive.net.UURI;
113 import org.archive.util.FileUtils;
114 import org.archive.util.IoUtils;
115 import org.archive.util.JmxUtils;
116 import org.archive.util.JndiUtils;
117 import org.archive.util.PropertyUtils;
118 import org.archive.util.TextUtils;
119
120 import sun.net.www.protocol.file.FileURLConnection;
121
122
123 /**
124  * Main class for Heritrix crawler.
125  *
126  * Heritrix is usually launched by a shell script that backgrounds heritrix
127  * that redirects all stdout and stderr emitted by heritrix to a log file. So
128  * that startup messages emitted subsequent to the redirection of stdout and
129  * stderr show on the console, this class prints usage or startup output
130  * such as where the web UI can be found, etc., to a STARTLOG that the shell
131  * script is waiting on. As soon as the shell script sees output in this file,
132  * it prints its content and breaks out of its wait.
133  * See ${HERITRIX_HOME}/bin/heritrix.
134  *
135  * <p>Heritrix can also be embedded or launched by webapp initialization or
136  * by JMX bootstrapping. So far I count 4 methods of instantiation:
137  * <ol>
138  * <li>From this classes main -- the method usually used;</li>
139  * <li>From the Heritrix UI (The local-instances.jsp) page;</li>
140  * <li>A creation by a JMX agent at the behest of a remote JMX client; and</li>
141  * <li>A container such as tomcat or jboss.</li>
142  * </ol>
143  *
144  * @author gojomo
145  * @author Kristinn Sigurdsson
146  * @author Stack
147  */

148 public class Heritrix implements DynamicMBean JavaDoc, MBeanRegistration JavaDoc {
149     /**
150      * Heritrix logging instance.
151      */

152     private static final Logger JavaDoc logger =
153         Logger.getLogger(Heritrix.class.getName());
154     
155     private static final File JavaDoc TMPDIR =
156         new File JavaDoc(System.getProperty("java.io.tmpdir", "/tmp"));
157
158     /**
159      * Name of the heritrix properties file.
160      */

161     private static final String JavaDoc PROPERTIES = "heritrix.properties";
162
163     /**
164      * Name of the key to use specifying alternate heritrix properties on
165      * command line.
166      */

167     private static final String JavaDoc PROPERTIES_KEY = PROPERTIES;
168     
169     /**
170      * Prefix used on properties we'll add to the System.properties list.
171      */

172     private static final String JavaDoc HERITRIX_PROPERTIES_PREFIX = "heritrix.";
173
174     /**
175      * Instance of web server if one was started.
176      */

177     private static SimpleHttpServer httpServer = null;
178
179     /**
180      * CrawlJob handler. Manages multiple crawl jobs at runtime.
181      */

182     private CrawlJobHandler jobHandler = null;
183
184     /**
185      * Heritrix start log file.
186      *
187      * This file contains standard out produced by this main class for startup
188      * only. Used by heritrix shell script. Name here MUST match that in the
189      * <code>bin/heritrix</code> shell script. This is a DEPENDENCY the shell
190      * wrapper has on this here java heritrix.
191      */

192     private static final String JavaDoc STARTLOG = "heritrix_dmesg.log";
193
194     /**
195      * Default encoding.
196      *
197      * Used for content when fetching if none specified.
198      */

199     public static final String JavaDoc DEFAULT_ENCODING = "ISO-8859-1";
200
201     /**
202      * Heritrix stderr/stdout log file.
203      *
204      * This file should have nothing in it except messages over which we have
205      * no control (JVM stacktrace, 3rd-party lib emissions). The wrapper
206      * startup script directs stderr/stdout here. This is an INTERDEPENDENCY
207      * this program has with the wrapper shell script. Shell can actually
208      * pass us an alternate to use for this file.
209      */

210     private static String JavaDoc DEFAULT_HERITRIX_OUT = "heritrix_out.log";
211
212     /**
213      * Where to write this classes startup output.
214      *
215      * This out should only be used if Heritrix is being run from the
216      * command-line.
217      */

218     private static PrintWriter JavaDoc out = null;
219
220     /**
221      * The org.archive package
222      */

223     private static final String JavaDoc ARCHIVE_PACKAGE = "org.archive.";
224
225     /**
226      * The crawler package.
227      */

228     private static final String JavaDoc CRAWLER_PACKAGE = Heritrix.class.getName().
229         substring(0, Heritrix.class.getName().lastIndexOf('.'));
230     
231     /**
232      * The root context for a webapp.
233      */

234     private static final String JavaDoc ROOT_CONTEXT = "/";
235
236     /**
237      * Set to true if application is started from command line.
238      */

239     private static boolean commandLine = false;
240     
241     /**
242      * True if container initialization has been run.
243      */

244     private static boolean containerInitialized = false;
245     
246     /**
247      * True if properties have been loaded.
248      */

249     private static boolean propertiesLoaded = false;
250     
251     private static final String JavaDoc JAR_SUFFIX = ".jar";
252     
253     private AlertManager alertManager;
254
255     /**
256      * The context of the GUI webapp. Default is root.
257      */

258     private static String JavaDoc adminContext = ROOT_CONTEXT;
259     
260     /**
261      * True if we're to put up a GUI.
262      * Cmdline processing can override.
263      */

264     private static boolean gui =
265         !PropertyUtils.getBooleanProperty("heritrix.cmdline.nowui");
266     
267     /**
268      * Port to put the GUI up on.
269      * Cmdline processing can override.
270      */

271     private static int guiPort = SimpleHttpServer.DEFAULT_PORT;
272
273     
274     /**
275      * A collection containing only localhost. Used as default value
276      * for guiHosts, and passed to SimpleHttpServer when doing selftest.
277      */

278     final private static Collection JavaDoc<String JavaDoc> LOCALHOST_ONLY =
279      Collections.unmodifiableList(Arrays.asList(new String JavaDoc[] { "127.0.0.1" }));
280
281     
282     /**
283      * Hosts to bind the GUI webserver to.
284      * By default, only contans localhost.
285      * Set to an empty collection to indicate that all available network
286      * interfaces should be used for the webserver.
287      */

288     private static Collection JavaDoc<String JavaDoc> guiHosts = LOCALHOST_ONLY;
289     
290     
291     /**
292      * Web UI server, realm, context name.
293      */

294     private static String JavaDoc ADMIN = "admin";
295     
296     // OpenMBean support.
297
/**
298      * The MBean server we're registered with (May be null).
299      */

300     private MBeanServer JavaDoc mbeanServer = null;
301     
302     /**
303      * MBean name we were registered as.
304      */

305     private ObjectName JavaDoc mbeanName = null;
306     
307     /**
308      * Keep reference to all instances of Heritrix.
309      * Used by the UI to figure which of the local Heritrice it should
310      * be going against and to figure what to shutdown on the way out (If
311      * there was always a JMX Agent, we wouldn't need to keep this list. We
312      * could always ask the JMX Agent for all instances. UPDATE: True we could
313      * always ask the JMX Agent but we might keep around this local reference
314      * because it will allow faster, less awkward -- think of marshalling the args
315      * for JMX invoke operation -- access to local Heritrix instances. A new
316      * usage for this instances Map is in CrawlJob#preRegister to find the hosting
317      * Heritrix instance).
318      */

319     private static Map JavaDoc<String JavaDoc,Heritrix> instances
320      = new Hashtable JavaDoc<String JavaDoc,Heritrix>();
321     
322     private OpenMBeanInfoSupport JavaDoc openMBeanInfo;
323     private final static String JavaDoc STATUS_ATTR = "Status";
324     private final static String JavaDoc VERSION_ATTR = "Version";
325     private final static List JavaDoc ATTRIBUTE_LIST;
326     static {
327         ATTRIBUTE_LIST = Arrays.asList(new String JavaDoc [] {STATUS_ATTR,
328             VERSION_ATTR});
329     }
330     
331     private final static String JavaDoc START_OPER = "start";
332     private final static String JavaDoc STOP_OPER = "stop";
333     private final static String JavaDoc DESTROY_OPER = "destroy";
334     private final static String JavaDoc INTERRUPT_OPER = "interrupt";
335     private final static String JavaDoc START_CRAWLING_OPER = "startCrawling";
336     private final static String JavaDoc STOP_CRAWLING_OPER = "stopCrawling";
337     private final static String JavaDoc ADD_CRAWL_JOB_OPER = "addJob";
338     private final static String JavaDoc TERMINATE_CRAWL_JOB_OPER =
339         "terminateCurrentJob";
340     private final static String JavaDoc DELETE_CRAWL_JOB_OPER = "deleteJob";
341     private final static String JavaDoc ALERT_OPER = "alert";
342     private final static String JavaDoc ADD_CRAWL_JOB_BASEDON_OPER = "addJobBasedon";
343     private final static String JavaDoc PENDING_JOBS_OPER = "pendingJobs";
344     private final static String JavaDoc COMPLETED_JOBS_OPER = "completedJobs";
345     private final static String JavaDoc CRAWLEND_REPORT_OPER = "crawlendReport";
346     private final static String JavaDoc SHUTDOWN_OPER = "shutdown";
347     private final static String JavaDoc LOG_OPER = "log";
348     private final static String JavaDoc REBIND_JNDI_OPER = "rebindJNDI";
349     private final static List JavaDoc OPERATION_LIST;
350     static {
351         OPERATION_LIST = Arrays.asList(new String JavaDoc [] {START_OPER, STOP_OPER,
352             INTERRUPT_OPER, START_CRAWLING_OPER, STOP_CRAWLING_OPER,
353             ADD_CRAWL_JOB_OPER, ADD_CRAWL_JOB_BASEDON_OPER,
354             DELETE_CRAWL_JOB_OPER, ALERT_OPER, PENDING_JOBS_OPER,
355             COMPLETED_JOBS_OPER, CRAWLEND_REPORT_OPER, SHUTDOWN_OPER,
356             LOG_OPER, DESTROY_OPER, TERMINATE_CRAWL_JOB_OPER,
357             REBIND_JNDI_OPER});
358     }
359     private CompositeType JavaDoc jobCompositeType = null;
360     private TabularType JavaDoc jobsTabularType = null;
361     private static final String JavaDoc [] JOB_KEYS =
362         new String JavaDoc [] {"uid", "name", "status"};
363
364     private static String JavaDoc adminUsername;
365
366     private static String JavaDoc adminPassword;
367     
368     /**
369      * Constructor.
370      * Does not register the created instance with JMX. Assumed this
371      * constructor is used by such as JMX agent creating an instance of
372      * Heritrix at the commmand of a remote client (In this case Heritrix will
373      * be registered by the invoking agent).
374      * @throws IOException
375      */

376     public Heritrix() throws IOException JavaDoc {
377         this(null, false);
378     }
379     
380     public Heritrix(final boolean jmxregister) throws IOException JavaDoc {
381         this(null, jmxregister);
382     }
383     
384     /**
385      * Constructor.
386      * @param name If null, we bring up the default Heritrix instance.
387      * @param jmxregister True if we are to register this instance with JMX
388      * agent.
389      * @throws IOException
390      */

391     public Heritrix(final String JavaDoc name, final boolean jmxregister)
392     throws IOException JavaDoc {
393         this(name, jmxregister, new CrawlJobHandler(getJobsdir()));
394     }
395     
396     /**
397      * Constructor.
398      * @param name If null, we bring up the default Heritrix instance.
399      * @param jmxregister True if we are to register this instance with JMX
400      * agent.
401      * @param cjh CrawlJobHandler to use.
402      * @throws IOException
403      */

404     public Heritrix(final String JavaDoc name, final boolean jmxregister,
405             final CrawlJobHandler cjh)
406     throws IOException JavaDoc {
407         super();
408         containerInitialization();
409         this.jobHandler = cjh;
410         this.openMBeanInfo = buildMBeanInfo();
411         // Set up the alerting system. SinkHandler is also a global so will
412
// catch alerts for all running Heritrix instances. Will need to
413
// address (Add name of instance that threw the alert to SinkRecord?).
414
final SinkHandler sinkHandler = SinkHandler.getInstance();
415         if (sinkHandler == null) {
416             throw new NullPointerException JavaDoc("SinkHandler not found.");
417         }
418         // Adapt the alerting system to use SinkHandler.
419
this.alertManager = new AlertManager() {
420             public void add(SinkHandlerLogRecord record) {
421                 sinkHandler.publish(record);
422             }
423
424             public Vector JavaDoc getAll() {
425                 return sinkHandler.getAll();
426             }
427
428             public Vector JavaDoc getNewAll() {
429                 return sinkHandler.getAllUnread();
430             }
431
432             public SinkHandlerLogRecord get(String JavaDoc alertID) {
433                 return sinkHandler.get(Long.parseLong(alertID));
434             }
435             
436             public int getCount() {
437                 return sinkHandler.getCount();
438             }
439
440             public int getNewCount() {
441                 return sinkHandler.getUnreadCount();
442             }
443
444             public void remove(String JavaDoc alertID) {
445                 sinkHandler.remove(Long.parseLong(alertID));
446             }
447
448             public void read(String JavaDoc alertID) {
449                 sinkHandler.read(Long.parseLong(alertID));
450             }
451         };
452         
453         try {
454             Heritrix.registerHeritrix(this, name, jmxregister);
455         } catch (InstanceAlreadyExistsException JavaDoc e) {
456             throw new RuntimeException JavaDoc(e);
457         } catch (MBeanRegistrationException JavaDoc e) {
458             throw new RuntimeException JavaDoc(e);
459         } catch (NotCompliantMBeanException JavaDoc e) {
460             throw new RuntimeException JavaDoc(e);
461         } catch (MalformedObjectNameException JavaDoc e) {
462             throw new RuntimeException JavaDoc(e);
463         }
464     }
465     
466     /**
467      * Run setup tasks for this 'container'. Idempotent.
468      *
469      * @throws IOException
470      */

471     protected static void containerInitialization() throws IOException JavaDoc {
472         if (Heritrix.containerInitialized) {
473             return;
474         }
475         Heritrix.containerInitialized = true;
476         // Load up the properties. This invocation adds heritrix properties
477
// to system properties so all available via System.getProperty.
478
// Note, loadProperties and patchLogging have global effects. May be an
479
// issue if we're running inside a container such as tomcat or jboss.
480
Heritrix.loadProperties();
481         Heritrix.patchLogging();
482         Heritrix.configureTrustStore();
483         // Will run on SIGTERM but not on SIGKILL, unfortunately.
484
// Otherwise, ensures we cleanup after ourselves (Deregister from
485
// JMX and JNDI).
486
Runtime.getRuntime().addShutdownHook(
487             Heritrix.getShutdownThread(false, 0, "Heritrix shutdown hook"));
488         // Register this heritrix 'container' though we may be inside another
489
// tomcat or jboss container.
490
try {
491             registerContainerJndi();
492         } catch (Exception JavaDoc e) {
493             logger.log(Level.WARNING, "Failed jndi container registration.", e);
494         }
495     }
496     
497     /**
498      * Do inverse of construction. Used by anyone who does a 'new Heritrix' when
499      * they want to cleanup the instance.
500      * Of note, there may be Heritrix threads still hanging around after the
501      * call to destroy completes. They'll eventually go down after they've
502      * finished their cleanup routines. In particular, if you are watching
503      * Heritrix via JMX, you can see the Heritrix instance JMX bean unregister
504      * ahead of the CrawlJob JMX bean that its hosting.
505      */

506     public void destroy() {
507         stop();
508         try {
509             Heritrix.unregisterHeritrix(this);
510         } catch (InstanceNotFoundException JavaDoc e) {
511             e.printStackTrace();
512         } catch (MBeanRegistrationException JavaDoc e) {
513             e.printStackTrace();
514         } catch (NullPointerException JavaDoc e) {
515             e.printStackTrace();
516         }
517         this.jobHandler = null;
518         this.openMBeanInfo = null;
519     }
520     
521     /**
522      * Launch program.
523      * Optionally will launch a web server to host UI. Will also register
524      * Heritrix MBean with first found JMX Agent (Usually the 1.5.0 JVM
525      * Agent).
526      *
527      * @param args Command line arguments.
528      * @throws Exception
529      */

530     public static void main(String JavaDoc[] args)
531     throws Exception JavaDoc {
532         Heritrix.commandLine = true;
533         
534         // Set timezone here. Would be problematic doing it if we're running
535
// inside in a container.
536
TimeZone.setDefault(TimeZone.getTimeZone("GMT"));
537         
538         File JavaDoc startLog = new File JavaDoc(getHeritrixHome(), STARTLOG);
539         Heritrix.out = new PrintWriter JavaDoc(isDevelopment()?
540             System.out: new PrintStream JavaDoc(new FileOutputStream JavaDoc(startLog)));
541         
542         try {
543             containerInitialization();
544             String JavaDoc status = doCmdLineArgs(args);
545             if (status != null) {
546                 Heritrix.out.println(status);
547             }
548         }
549
550         catch(Exception JavaDoc e) {
551             // Show any exceptions in STARTLOG.
552
e.printStackTrace(Heritrix.out);
553             throw e;
554         }
555
556         finally {
557             // If not development, close the file that signals the wrapper
558
// script that we've started. Otherwise, just flush it; if in
559
// development, the output is probably a console.
560
if (!isDevelopment()) {
561                 if (Heritrix.out != null) {
562                     Heritrix.out.close();
563                 }
564                 System.out.println("Heritrix version: " +
565                         Heritrix.getVersion());
566             } else {
567                 if (Heritrix.out != null) {
568                     Heritrix.out.flush();
569                 }
570             }
571         }
572     }
573     
574     protected static String JavaDoc doCmdLineArgs(final String JavaDoc [] args)
575     throws Exception JavaDoc {
576         // Get defaults for commandline arguments from the properties file.
577
String JavaDoc tmpStr = PropertyUtils.
578             getPropertyOrNull("heritrix.context");
579         if (tmpStr != null) {
580             Heritrix.adminContext = tmpStr;
581         }
582         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.port");
583         if (tmpStr != null) {
584             Heritrix.guiPort = Integer.parseInt(tmpStr);
585         }
586         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.admin");
587         String JavaDoc adminLoginPassword = (tmpStr == null)? "": tmpStr;
588         String JavaDoc crawlOrderFile =
589             PropertyUtils.getPropertyOrNull("heritrix.cmdline.order");
590         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.run");
591         boolean runMode =
592             PropertyUtils.getBooleanProperty("heritrix.cmdline.run");
593         boolean selfTest = false;
594         String JavaDoc selfTestName = null;
595         CommandLineParser clp = new CommandLineParser(args, Heritrix.out,
596             Heritrix.getVersion());
597         List JavaDoc arguments = clp.getCommandLineArguments();
598         Option [] options = clp.getCommandLineOptions();
599
600         // Check passed argument. Only one argument, the ORDER_FILE is allowed.
601
// If one argument, make sure exists and xml suffix.
602
if (arguments.size() > 1) {
603             clp.usage(1);
604         } else if (arguments.size() == 1) {
605             crawlOrderFile = (String JavaDoc)arguments.get(0);
606             if (!(new File JavaDoc(crawlOrderFile).exists())) {
607                 clp.usage("ORDER.XML <" + crawlOrderFile +
608                     "> specified does not exist.", 1);
609             }
610             // Must end with '.xml'
611
if (crawlOrderFile.length() > 4 &&
612                     !crawlOrderFile.substring(crawlOrderFile.length() - 4).
613                         equalsIgnoreCase(".xml")) {
614                 clp.usage("ORDER.XML <" + crawlOrderFile +
615                     "> does not have required '.xml' suffix.", 1);
616             }
617         }
618
619         // Now look at options passed.
620
for (int i = 0; i < options.length; i++) {
621             switch(options[i].getId()) {
622                 case 'h':
623                     clp.usage();
624                     break;
625
626                 case 'a':
627                     adminLoginPassword = options[i].getValue();
628                     break;
629
630                 case 'n':
631                     if (crawlOrderFile == null) {
632                         clp.usage("You must specify an ORDER_FILE with" +
633                             " '--nowui' option.", 1);
634                     }
635                     Heritrix.gui = false;
636                     break;
637                 
638                 case 'b':
639                     Heritrix.guiHosts = parseHosts(options[i].getValue());
640                     break;
641
642                 case 'p':
643                     try {
644                         Heritrix.guiPort =
645                             Integer.parseInt(options[i].getValue());
646                     } catch (NumberFormatException JavaDoc e) {
647                         clp.usage("Failed parse of port number: " +
648                             options[i].getValue(), 1);
649                     }
650                     if (Heritrix.guiPort <= 0) {
651                         clp.usage("Nonsensical port number: " +
652                             options[i].getValue(), 1);
653                     }
654                     break;
655
656                 case 'r':
657                     runMode = true;
658                     break;
659
660                 case 's':
661                     selfTestName = options[i].getValue();
662                     selfTest = true;
663                     break;
664
665                 default:
666                     assert false: options[i].getId();
667             }
668         }
669
670         // Ok, we should now have everything to launch the program.
671
String JavaDoc status = null;
672         if (selfTest) {
673             // If more than just '--selftest' and '--port' passed, then
674
// there is confusion on what is being asked of us. Print usage
675
// rather than proceed.
676
for (int i = 0; i < options.length; i++) {
677                 if (options[i].getId() != 'p' && options[i].getId() != 's') {
678                     clp.usage(1);
679                 }
680             }
681
682             if (arguments.size() > 0) {
683                 // No arguments accepted by selftest.
684
clp.usage(1);
685             }
686             status = selftest(selfTestName, Heritrix.guiPort);
687         } else {
688             if (!isValidLoginPasswordString(adminLoginPassword)) {
689                 clp.usage("Invalid admin login:password value, or none "
690                         + "specified. ", 1);
691             }
692             
693             if (!Heritrix.gui) {
694                 if (options.length > 1) {
695                     // If more than just '--nowui' passed, then there is
696
// confusion on what is being asked of us. Print usage
697
// rather than proceed.
698
clp.usage(1);
699                 }
700                 Heritrix h = new Heritrix(true);
701                 status = h.doOneCrawl(crawlOrderFile);
702             } else {
703                 status = startEmbeddedWebserver(
704                         Heritrix.guiHosts, Heritrix.guiPort,
705                         adminLoginPassword);
706                 Heritrix h = new Heritrix(true);
707
708                 String JavaDoc tmp = h.launch(crawlOrderFile, runMode);
709                 if (tmp != null) {
710                     status += ('\n' + tmp);
711                 }
712             }
713         }
714         return status;
715     }
716     
717     /**
718      * @return The file we dump stdout and stderr into.
719      */

720     public static String JavaDoc getHeritrixOut() {
721         String JavaDoc tmp = System.getProperty("heritrix.out");
722         if (tmp == null || tmp.length() == 0) {
723             tmp = Heritrix.DEFAULT_HERITRIX_OUT;
724         }
725         return tmp;
726     }
727
728     /**
729      * Exploit <code>-Dheritrix.home</code> if available to us.
730      * Is current working dir if no heritrix.home property supplied.
731      * @return Heritrix home directory.
732      * @throws IOException
733      */

734     protected static File JavaDoc getHeritrixHome()
735     throws IOException JavaDoc {
736         File JavaDoc heritrixHome = null;
737         String JavaDoc home = System.getProperty("heritrix.home");
738         if (home != null && home.length() > 0) {
739             heritrixHome = new File JavaDoc(home);
740             if (!heritrixHome.exists()) {
741                 throw new IOException JavaDoc("HERITRIX_HOME <" + home +
742                     "> does not exist.");
743             }
744         } else {
745             heritrixHome = new File JavaDoc(new File JavaDoc("").getAbsolutePath());
746         }
747         return heritrixHome;
748     }
749     
750     /**
751      * @return The directory into which we put jobs. If the system property
752      * 'heritrix.jobsdir' is set, we will use its value in place of the default
753      * 'jobs' directory in the current working directory.
754      * @throws IOException
755      */

756     public static File JavaDoc getJobsdir() throws IOException JavaDoc {
757         Heritrix.loadProperties(); // if called in constructor
758
String JavaDoc jobsdirStr = System.getProperty("heritrix.jobsdir", "jobs");
759         File JavaDoc jobsdir = new File JavaDoc(jobsdirStr);
760         return (jobsdir.isAbsolute())?
761             jobsdir:
762             new File JavaDoc(getHeritrixHome(), jobsdirStr);
763     }
764     
765     /**
766      * Get and check for existence of expected subdir.
767      *
768      * If development flag set, then look for dir under src dir.
769      *
770      * @param subdirName Dir to look for.
771      * @return The extant subdir. Otherwise null if we're running
772      * in a webapp context where there is no conf directory available.
773      * @throws IOException if unable to find expected subdir.
774      */

775     protected static File JavaDoc getSubDir(String JavaDoc subdirName)
776     throws IOException JavaDoc {
777         return getSubDir(subdirName, true);
778     }
779     
780     /**
781      * Get and optionally check for existence of subdir.
782      *
783      * If development flag set, then look for dir under src dir.
784      *
785      * @param subdirName Dir to look for.
786      * @param fail True if we are to fail if directory does not
787      * exist; false if we are to return false if the directory does not exist.
788      * @return The extant subdir. Otherwise null if we're running
789      * in a webapp context where there is no subdir directory available.
790      * @throws IOException if unable to find expected subdir.
791      */

792     protected static File JavaDoc getSubDir(String JavaDoc subdirName, boolean fail)
793     throws IOException JavaDoc {
794         String JavaDoc path = isDevelopment()?
795             "src" + File.separator + subdirName:
796             subdirName;
797         File JavaDoc dir = new File JavaDoc(getHeritrixHome(), path);
798         if (!dir.exists()) {
799             if (fail) {
800                 throw new IOException JavaDoc("Cannot find subdir: " + subdirName);
801             }
802             dir = null;
803         }
804         return dir;
805     }
806     
807     /**
808      * Test string is valid login/password string.
809      *
810      * A valid login/password string has the login and password compounded
811      * w/ a ':' delimiter.
812      *
813      * @param str String to test.
814      * @return True if valid password/login string.
815      */

816     protected static boolean isValidLoginPasswordString(String JavaDoc str) {
817         boolean isValid = false;
818         StringTokenizer JavaDoc tokenizer = new StringTokenizer JavaDoc(str, ":");
819         if (tokenizer.countTokens() == 2) {
820             String JavaDoc login = ((String JavaDoc)tokenizer.nextElement()).trim();
821             String JavaDoc password = ((String JavaDoc)tokenizer.nextElement()).trim();
822             if (login.length() > 0 && password.length() > 0) {
823                 isValid = true;
824             }
825         }
826         return isValid;
827     }
828
829     protected static boolean isDevelopment() {
830         return System.getProperty("heritrix.development") != null;
831     }
832
833     /**
834      * Load the heritrix.properties file.
835      *
836      * Adds any property that starts with
837      * <code>HERITRIX_PROPERTIES_PREFIX</code>
838      * or <code>ARCHIVE_PACKAGE</code>
839      * into system properties (except logging '.level' directives).
840      * @return Loaded properties.
841      * @throws IOException
842      */

843     protected static Properties JavaDoc loadProperties()
844     throws IOException JavaDoc {
845         if (Heritrix.propertiesLoaded) {
846             return System.getProperties();
847         }
848         Heritrix.propertiesLoaded = true;
849             
850         Properties JavaDoc properties = new Properties JavaDoc();
851         properties.load(getPropertiesInputStream());
852         
853         // Any property that begins with ARCHIVE_PACKAGE, make it
854
// into a system property. While iterating, check to see if anything
855
// defined on command-line, and if so, it overrules whats in
856
// heritrix.properties.
857
for (Enumeration JavaDoc e = properties.keys(); e.hasMoreElements();) {
858             String JavaDoc key = ((String JavaDoc)e.nextElement()).trim();
859             if (key.startsWith(ARCHIVE_PACKAGE) ||
860                     key.startsWith(HERITRIX_PROPERTIES_PREFIX)) {
861                 // Don't add the heritrix.properties entries that are
862
// changing the logging level of particular classes.
863
if (key.indexOf(".level") < 0) {
864                     if (System.getProperty(key) == null ||
865                         System.getProperty(key).length() == 0) {
866                         System.setProperty(key,
867                             properties.getProperty(key).trim());
868                     }
869                 }
870             }
871         }
872         return properties;
873     }
874
875     protected static InputStream JavaDoc getPropertiesInputStream()
876     throws IOException JavaDoc {
877         File JavaDoc file = null;
878         // Look to see if properties have been passed on the cmd-line.
879
String JavaDoc alternateProperties = System.getProperty(PROPERTIES_KEY);
880         if (alternateProperties != null && alternateProperties.length() > 0) {
881             file = new File JavaDoc(alternateProperties);
882         }
883         // Get properties from conf directory if one available.
884
if ((file == null || !file.exists()) && getConfdir(false) != null) {
885             file = new File JavaDoc(getConfdir(), PROPERTIES);
886             if (!file.exists()) {
887                 // If no properties file in the conf dir, set file back to
888
// null so we go looking for heritrix.properties on classpath.
889
file = null;
890             }
891         }
892         // If not on the command-line, there is no conf dir. Then get the
893
// properties from the CLASSPATH (Classpath file separator is always
894
// '/', whatever the platform.
895
InputStream JavaDoc is = (file != null)?
896             new FileInputStream JavaDoc(file):
897             Heritrix.class.getResourceAsStream("/" + PROPERTIES_KEY);
898         if (is == null) {
899             throw new IOException JavaDoc("Failed to load properties file from" +
900                 " filesystem or from classpath.");
901         }
902         return is;
903     }
904
905     /**
906      * If the user hasn't altered the default logging parameters, tighten them
907      * up somewhat: some of our libraries are way too verbose at the INFO or
908      * WARNING levels.
909      *
910      * This might be a problem running inside in someone else's
911      * container. Container's seem to prefer commons logging so we
912      * ain't messing them doing the below.
913      *
914      * @throws IOException
915      * @throws SecurityException
916      */

917     protected static void patchLogging()
918     throws SecurityException JavaDoc, IOException JavaDoc {
919         if (System.getProperty("java.util.logging.config.class") != null) {
920             return;
921         }
922
923         if (System.getProperty("java.util.logging.config.file") != null) {
924             return;
925         }
926
927         // No user-set logging properties established; use defaults
928
// from distribution-packaged 'heritrix.properties'.
929
LogManager.getLogManager().
930             readConfiguration(getPropertiesInputStream());
931     }
932
933     /**
934      * Configure our trust store.
935      *
936      * If system property is defined, then use it for our truststore. Otherwise
937      * use the heritrix truststore under conf directory if it exists.
938      *
939      * <p>If we're not launched from the command-line, we will not be able
940      * to find our truststore. The truststore is nor normally used so rare
941      * should this be a problem (In case where we don't use find our trust
942      * store, we'll use the 'default' -- either the JVMs or the containers).
943      */

944     protected static void configureTrustStore() {
945         // Below must be defined in jsse somewhere but can' find it.
946
final String JavaDoc TRUSTSTORE_KEY = "javax.net.ssl.trustStore";
947         String JavaDoc value = System.getProperty(TRUSTSTORE_KEY);
948         File JavaDoc confdir = null;
949         try {
950             confdir = getConfdir(false);
951         } catch (IOException JavaDoc e) {
952             logger.log(Level.WARNING, "Failed to get confdir.", e);
953         }
954         if ((value == null || value.length() <= 0) && confdir != null) {
955             // Use the heritrix store if it exists on disk.
956
File JavaDoc heritrixStore = new File JavaDoc(confdir, "heritrix.cacerts");
957             if(heritrixStore.exists()) {
958                 value = heritrixStore.getAbsolutePath();
959             }
960         }
961
962         if (value != null && value.length() > 0) {
963             System.setProperty(TRUSTSTORE_KEY, value);
964         }
965     }
966
967     /**
968      * Run the selftest
969      *
970      * @param oneSelfTestName Name of a test if we are to run one only rather
971      * than the default running all tests.
972      * @param port Port number to use for web UI.
973      *
974      * @exception Exception
975      * @return Status of how selftest startup went.
976      */

977     protected static String JavaDoc selftest(final String JavaDoc oneSelfTestName,
978             final int port)
979         throws Exception JavaDoc {
980         // Put up the webserver w/ the root and selftest webapps only.
981
final String JavaDoc SELFTEST = "selftest";
982         Heritrix.httpServer = new SimpleHttpServer(SELFTEST,
983             Heritrix.adminContext, LOCALHOST_ONLY, port, true);
984         // Set up digest auth for a section of the server so selftest can run
985
// auth tests. Looks like can only set one login realm going by the
986
// web.xml dtd. Otherwise, would be nice to selftest basic and digest.
987
// Have login, password and role all be SELFTEST. Must match what is
988
// in the selftest order.xml file.
989
Heritrix.httpServer.setAuthentication(SELFTEST, Heritrix.adminContext,
990             SELFTEST, SELFTEST, SELFTEST);
991         Heritrix.httpServer.startServer();
992         // Get the order file from the CLASSPATH unless we're running in dev
993
// environment.
994
File JavaDoc selftestDir = (isDevelopment())?
995             new File JavaDoc(getConfdir(), SELFTEST):
996             new File JavaDoc(File.separator + SELFTEST);
997         File JavaDoc crawlOrderFile = new File JavaDoc(selftestDir, "order.xml");
998         // Create a job based off the selftest order file. Then use this as
999
// a template to pass jobHandler.newJob(). Doing this gets our
1000
// selftest output to show under the jobs directory.
1001
// Pass as a seed a pointer to the webserver we just put up.
1002
final String JavaDoc ROOTURI = "127.0.0.1:" + Integer.toString(port);
1003        String JavaDoc selfTestUrl = "http://" + ROOTURI + '/';
1004        if (oneSelfTestName != null && oneSelfTestName.length() > 0) {
1005            selfTestUrl += (oneSelfTestName + '/');
1006        }
1007        CrawlJobHandler cjh = new SelfTestCrawlJobHandler(getJobsdir(),
1008                oneSelfTestName, selfTestUrl);
1009        Heritrix h = new Heritrix("Selftest", true, cjh);
1010        CrawlJob job = createCrawlJob(cjh, crawlOrderFile, "Template");
1011        job = h.getJobHandler().newJob(job, null, SELFTEST,
1012            "Integration self test", selfTestUrl, CrawlJob.PRIORITY_CRITICAL);
1013        h.getJobHandler().addJob(job);
1014        // Before we start, need to change some items in the settings file.
1015
CredentialStore cs = (CredentialStore)job.getSettingsHandler().
1016            getOrder().getAttribute(CredentialStore.ATTR_NAME);
1017        for (Iterator JavaDoc i = cs.iterator(null); i.hasNext();) {
1018            ((Credential)i.next()).setCredentialDomain(null, ROOTURI);
1019        }
1020        h.getJobHandler().startCrawler();
1021        StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
1022        buffer.append("Heritrix " + Heritrix.getVersion() +
1023                " selftest started.");
1024        buffer.append("\nSelftest first crawls " + selfTestUrl +
1025            " and then runs an analysis.");
1026        buffer.append("\nResult of analysis printed to " +
1027            getHeritrixOut() + " when done.");
1028        buffer.append("\nSelftest job directory for logs and arcs:\n" +
1029            job.getDirectory().getAbsolutePath());
1030        return buffer.toString();
1031    }
1032
1033    /**
1034     * Launch the crawler without a web UI and run the passed crawl only.
1035     *
1036     * Specialized version of {@link #launch()}.
1037     *
1038     * @param crawlOrderFile The crawl order to crawl.
1039     * @throws InitializationException
1040     * @throws InvalidAttributeValueException
1041     * @return Status string.
1042     */

1043    protected String JavaDoc doOneCrawl(String JavaDoc crawlOrderFile)
1044    throws InitializationException, InvalidAttributeValueException JavaDoc {
1045        return doOneCrawl(crawlOrderFile, null);
1046    }
1047    
1048    /**
1049     * Launch the crawler without a web UI and run passed crawl only.
1050     *
1051     * Specialized version of {@link #launch()}.
1052     *
1053     * @param crawlOrderFile The crawl order to crawl.
1054     * @param listener Register this crawl status listener before starting
1055     * crawl (You can use this listener to notice end-of-crawl).
1056     * @throws InitializationException
1057     * @throws InvalidAttributeValueException
1058     * @return Status string.
1059     */

1060    protected String JavaDoc doOneCrawl(String JavaDoc crawlOrderFile,
1061        CrawlStatusListener listener)
1062    throws InitializationException, InvalidAttributeValueException JavaDoc {
1063        XMLSettingsHandler handler =
1064            new XMLSettingsHandler(new File JavaDoc(crawlOrderFile));
1065        handler.initialize();
1066        CrawlController controller = new CrawlController();
1067        controller.initialize(handler);
1068        if (listener != null) {
1069            controller.addCrawlStatusListener(listener);
1070        }
1071        controller.requestCrawlStart();
1072        return "Crawl started using " + crawlOrderFile + ".";
1073    }
1074    
1075    /**
1076     * Launch the crawler for a web UI.
1077     *
1078     * Crawler hangs around waiting on jobs.
1079     *
1080     * @exception Exception
1081     * @return A status string describing how the launch went.
1082     * @throws Exception
1083     */

1084    public String JavaDoc launch() throws Exception JavaDoc {
1085        return launch(null, false);
1086    }
1087
1088    /**
1089     * Launch the crawler for a web UI.
1090     *
1091     * Crawler hangs around waiting on jobs.
1092     *
1093     * @param crawlOrderFile File to crawl. May be null.
1094     * @param runMode Whether crawler should be set to run mode.
1095     *
1096     * @exception Exception
1097     * @return A status string describing how the launch went.
1098     */

1099    public String JavaDoc launch(String JavaDoc crawlOrderFile, boolean runMode)
1100    throws Exception JavaDoc {
1101        String JavaDoc status = null;
1102        if (crawlOrderFile != null) {
1103            addCrawlJob(crawlOrderFile, "Autolaunched", "", "");
1104            if(runMode) {
1105                this.jobHandler.startCrawler();
1106                status = "Job being crawled: " + crawlOrderFile;
1107            } else {
1108                status = "Crawl job ready and pending: " + crawlOrderFile;
1109            }
1110        } else if(runMode) {
1111            // The use case is that jobs are to be run on a schedule and that
1112
// if the crawler is in run mode, then the scheduled job will be
1113
// run at appropriate time. Otherwise, not.
1114
this.jobHandler.startCrawler();
1115            status = "Crawler set to run mode.";
1116        }
1117        return status;
1118    }
1119    
1120    /**
1121     * Start up the embedded Jetty webserver instance.
1122     * This is done when we're run from the command-line.
1123     * @param port Port number to use for web UI.
1124     * @param adminLoginPassword Compound of login and password.
1125     * @throws Exception
1126     * @return Status on webserver startup.
1127     * @deprecated Use startEmbeddedWebserver(hosts, port, adminLoginPassword)
1128     */

1129    protected static String JavaDoc startEmbeddedWebserver(final int port,
1130        final boolean lho, final String JavaDoc adminLoginPassword)
1131    throws Exception JavaDoc {
1132        ArrayList JavaDoc<String JavaDoc> hosts = new ArrayList JavaDoc<String JavaDoc>();
1133        if (lho) {
1134            hosts.add("127.0.0.1");
1135        }
1136        return startEmbeddedWebserver(hosts, port, adminLoginPassword);
1137    }
1138
1139    
1140    /**
1141     * Parses a list of host names.
1142     *
1143     * <p>If the given string is <code>/</code>, then an empty
1144     * collection is returned. This indicates that all available network
1145     * interfaces should be used.
1146     *
1147     * <p>Otherwise, the string must contain a comma-separated list of
1148     * IP addresses or host names. The parsed list is then returned.
1149     *
1150     * @param hosts the string to parse
1151     * @return the parsed collection of hosts
1152     */

1153    private static Collection JavaDoc<String JavaDoc> parseHosts(String JavaDoc hosts) {
1154        hosts = hosts.trim();
1155        if (hosts.equals("/")) {
1156            return new ArrayList JavaDoc<String JavaDoc>(1);
1157        }
1158        String JavaDoc[] hostArray = hosts.split(",");
1159        for (int i = 0; i < hostArray.length; i++) {
1160            hostArray[i] = hostArray[i].trim();
1161        }
1162        return Arrays.asList(hostArray);
1163    }
1164    
1165    /**
1166     * Start up the embedded Jetty webserver instance.
1167     * This is done when we're run from the command-line.
1168     *
1169     * @param hosts a list of IP addresses or hostnames to bind to, or an
1170     * empty collection to bind to all available network
1171     * interfaces
1172     * @param port Port number to use for web UI.
1173     * @param adminLoginPassword Compound of login and password.
1174     * @throws Exception
1175     * @return Status on webserver startup.
1176     */

1177    protected static String JavaDoc startEmbeddedWebserver(Collection JavaDoc<String JavaDoc> hosts,
1178        int port, String JavaDoc adminLoginPassword)
1179    throws Exception JavaDoc {
1180        adminUsername = adminLoginPassword.
1181            substring(0, adminLoginPassword.indexOf(":"));
1182        adminPassword = adminLoginPassword.
1183            substring(adminLoginPassword.indexOf(":") + 1);
1184        Heritrix.httpServer = new SimpleHttpServer("admin",
1185            Heritrix.adminContext, hosts, port, false);
1186        
1187        final String JavaDoc DOTWAR = ".war";
1188        final String JavaDoc SELFTEST = "selftest";
1189        
1190        // Look for additional WAR files beyond 'selftest' and 'admin'.
1191
File JavaDoc[] wars = getWarsdir().listFiles();
1192        for(int i = 0; i < wars.length; i++) {
1193            if(wars[i].isFile()) {
1194                final String JavaDoc warName = wars[i].getName();
1195                final String JavaDoc warNameNC = warName.toLowerCase();
1196                if(warNameNC.endsWith(DOTWAR) &&
1197                        !warNameNC.equals(ADMIN + DOTWAR) &&
1198                        !warNameNC.equals(SELFTEST + DOTWAR)) {
1199                    int dot = warName.indexOf('.');
1200                    Heritrix.httpServer.addWebapp(warName.substring(0, dot),
1201                            null, true);
1202                }
1203            }
1204        }
1205        
1206        // Name of passed 'realm' must match what is in configured in web.xml.
1207
// We'll use ROLE for 'realm' and 'role'.
1208
final String JavaDoc ROLE = ADMIN;
1209        Heritrix.httpServer.setAuthentication(ROLE, Heritrix.adminContext,
1210            adminUsername, adminPassword, ROLE);
1211        Heritrix.httpServer.startServer();
1212        StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
1213        buffer.append("Heritrix " + Heritrix.getVersion() + " is running.");
1214        for (String JavaDoc host: httpServer.getHosts()) {
1215            buffer.append("\nWeb console is at: http://");
1216            buffer.append(host).append(':').append(port);
1217        }
1218        buffer.append("\nWeb console login and password: " +
1219            adminUsername + "/" + adminPassword);
1220        return buffer.toString();
1221    }
1222    
1223    /**
1224     * Replace existing administrator login info with new info.
1225     *
1226     * @param newUsername new administrator login username
1227     * @param newPassword new administrator login password
1228     */

1229    public static void resetAuthentication(String JavaDoc newUsername,
1230            String JavaDoc newPassword) {
1231        Heritrix.httpServer.resetAuthentication(ADMIN, adminUsername,
1232                newUsername, newPassword);
1233        adminUsername = newUsername;
1234        adminPassword = newPassword;
1235        logger.info("administrative login changed to "
1236                +newUsername+":"+newPassword);
1237    }
1238
1239    protected static CrawlJob createCrawlJob(CrawlJobHandler handler,
1240            File JavaDoc crawlOrderFile, String JavaDoc name)
1241    throws InvalidAttributeValueException JavaDoc {
1242        XMLSettingsHandler settings = new XMLSettingsHandler(crawlOrderFile);
1243        settings.initialize();
1244        return new CrawlJob(handler.getNextJobUID(), name, settings,
1245            new CrawlJobErrorHandler(Level.SEVERE),
1246            CrawlJob.PRIORITY_HIGH,
1247            crawlOrderFile.getAbsoluteFile().getParentFile());
1248    }
1249    
1250    /**
1251     * This method is called when we have an order file to hand that we want
1252     * to base a job on. It leaves the order file in place and just starts up
1253     * a job that uses all the order points to for locations for logs, etc.
1254     * @param orderPathOrUrl Path to an order file or to a seeds file.
1255     * @param name Name to use for this job.
1256     * @param description
1257     * @param seeds
1258     * @return A status string.
1259     * @throws IOException
1260     * @throws FatalConfigurationException
1261     */

1262    public String JavaDoc addCrawlJob(String JavaDoc orderPathOrUrl, String JavaDoc name,
1263            String JavaDoc description, String JavaDoc seeds)
1264    throws IOException JavaDoc, FatalConfigurationException {
1265        if (!UURI.hasScheme(orderPathOrUrl)) {
1266            // Assume its a file path.
1267
return addCrawlJob(new File JavaDoc(orderPathOrUrl), name, description,
1268                    seeds);
1269        }
1270
1271        // Otherwise, must be an URL.
1272
URL JavaDoc url = new URL JavaDoc(orderPathOrUrl);
1273
1274        // Handle http and file only for now (Tried to handle JarUrlConnection
1275
// but too awkward undoing jar stream. Rather just look for URLs that
1276
// end in '.jar').
1277
String JavaDoc result = null;
1278        URLConnection JavaDoc connection = url.openConnection();
1279        if (connection instanceof HttpURLConnection JavaDoc) {
1280            result = addCrawlJob(url, (HttpURLConnection JavaDoc)connection, name,
1281                description, seeds);
1282        } else if (connection instanceof FileURLConnection) {
1283            result = addCrawlJob(new File JavaDoc(url.getPath()), name, description,
1284                seeds);
1285        } else {
1286            throw new UnsupportedOperationException JavaDoc("No support for "
1287                + connection);
1288        }
1289
1290        return result;
1291    }
1292    
1293    protected String JavaDoc addCrawlJob(final URL JavaDoc url,
1294            final HttpURLConnection JavaDoc connection,
1295            final String JavaDoc name, final String JavaDoc description, final String JavaDoc seeds)
1296    throws IOException JavaDoc, FatalConfigurationException {
1297        // Look see if its a jar file. If it is undo it.
1298
boolean isJar = url.getPath() != null &&
1299            url.getPath().toLowerCase().endsWith(JAR_SUFFIX);
1300        // If http url connection, bring down the resource local.
1301
File JavaDoc localFile = File.createTempFile(Heritrix.class.getName(),
1302           isJar? JAR_SUFFIX: null, TMPDIR);
1303        connection.connect();
1304        String JavaDoc result = null;
1305        try {
1306            IoUtils.readFullyToFile(connection.getInputStream(), localFile);
1307            result = addCrawlJob(localFile, name, description, seeds);
1308        } catch (IOException JavaDoc ioe) {
1309            // Cleanup if an Exception.
1310
localFile.delete();
1311            localFile = null;
1312        } finally {
1313             connection.disconnect();
1314             // If its a jar file, then we made a job based on the jar contents.
1315
// Its no longer needed. Remove it. If not a jar file, then leave
1316
// the file around because the job depends on it.
1317
if (isJar && localFile != null && localFile.exists()) {
1318                 localFile.delete();
1319             }
1320        }
1321        return result;
1322    }
1323    
1324    protected String JavaDoc addCrawlJob(final File JavaDoc order, final String JavaDoc name,
1325            final String JavaDoc description, final String JavaDoc seeds)
1326    throws FatalConfigurationException, IOException JavaDoc {
1327        CrawlJob addedJob = null;
1328        if (this.jobHandler == null) {
1329            throw new NullPointerException JavaDoc("Heritrix jobhandler is null.");
1330        }
1331        try {
1332            if (order.getName().toLowerCase().endsWith(JAR_SUFFIX)) {
1333                return addCrawlJobBasedonJar(order, name, description, seeds);
1334            }
1335            addedJob = this.jobHandler.
1336                addJob(createCrawlJob(this.jobHandler, order, name));
1337        } catch (InvalidAttributeValueException JavaDoc e) {
1338            FatalConfigurationException fce = new FatalConfigurationException(
1339                "Converted InvalidAttributeValueException on " +
1340                order.getAbsolutePath() + ": " + e.getMessage());
1341            fce.setStackTrace(e.getStackTrace());
1342        }
1343        return addedJob != null? addedJob.getUID(): null;
1344    }
1345    
1346    /**
1347     * Undo jar file and use as basis for a new job.
1348     * @param jarFile Pointer to file that holds jar.
1349     * @param name Name to use for new job.
1350     * @param description
1351     * @param seeds
1352     * @return Message.
1353     * @throws IOException
1354     * @throws FatalConfigurationException
1355     */

1356    protected String JavaDoc addCrawlJobBasedonJar(final File JavaDoc jarFile,
1357            final String JavaDoc name, final String JavaDoc description, final String JavaDoc seeds)
1358    throws IOException JavaDoc, FatalConfigurationException {
1359        if (jarFile == null || !jarFile.exists()) {
1360            throw new FileNotFoundException JavaDoc(jarFile.getAbsolutePath());
1361        }
1362        // Create a directory with a tmp name. Do it by first creating file,
1363
// removing it, then creating the directory. There is a hole during
1364
// which the OS may put a file of same exact name in our way but
1365
// unlikely.
1366
File JavaDoc dir = File.createTempFile(Heritrix.class.getName(), ".expandedjar",
1367            TMPDIR);
1368        dir.delete();
1369        dir.mkdir();
1370        try {
1371            org.archive.crawler.util.IoUtils.unzip(jarFile, dir);
1372            // Expect to find an order file at least.
1373
File JavaDoc orderFile = new File JavaDoc(dir, "order.xml");
1374            if (!orderFile.exists()) {
1375                throw new IOException JavaDoc("Missing order: " +
1376                    orderFile.getAbsolutePath());
1377            }
1378            CrawlJob job =
1379                createCrawlJobBasedOn(orderFile, name, description, seeds);
1380            // Copy into place any seeds and settings directories before we
1381
// add job to Heritrix to crawl.
1382
File JavaDoc seedsFile = new File JavaDoc(dir, "seeds.txt");
1383            if (seedsFile.exists()) {
1384                FileUtils.copyFiles(seedsFile, new File JavaDoc(job.getDirectory(),
1385                    seedsFile.getName()));
1386            }
1387            File JavaDoc settingsDir = new File JavaDoc(dir, "settings");
1388            if (settingsDir.exists()) {
1389                FileUtils.copyFiles(settingsDir, job.getDirectory());
1390            }
1391            addCrawlJob(job);
1392            return job.getUID();
1393         } finally {
1394             // After job has been added, no more need of expanded content.
1395
// (Let the caller be responsible for cleanup of jar. Sometimes
1396
// its should be deleted -- when its a local copy of a jar pulled
1397
// across the net -- wherease other times, if its a jar passed
1398
// in w/ a 'file' scheme, it shouldn't be deleted.
1399
org.archive.util.FileUtils.deleteDir(dir);
1400         }
1401    }
1402    
1403    public String JavaDoc addCrawlJobBasedOn(String JavaDoc jobUidOrProfile,
1404            String JavaDoc name, String JavaDoc description, String JavaDoc seeds) {
1405        try {
1406            CrawlJob cj = getJobHandler().getJob(jobUidOrProfile);
1407            if (cj == null) {
1408                throw new InvalidAttributeValueException JavaDoc(jobUidOrProfile +
1409                    " is not a job UID or profile name (Job UIDs are " +
1410                    " usually the 14 digit date portion of job name).");
1411            }
1412            CrawlJob job = addCrawlJobBasedOn(
1413                cj.getSettingsHandler().getOrderFile(), name, description,
1414                    seeds);
1415            return job.getUID();
1416        } catch (Exception JavaDoc e) {
1417            e.printStackTrace();
1418            return "Exception on " + jobUidOrProfile + ": " + e.getMessage();
1419        }
1420    }
1421    
1422    protected CrawlJob addCrawlJobBasedOn(final File JavaDoc orderFile,
1423        final String JavaDoc name, final String JavaDoc description, final String JavaDoc seeds)
1424    throws FatalConfigurationException {
1425        return addCrawlJob(createCrawlJobBasedOn(orderFile, name, description,
1426                seeds));
1427    }
1428    
1429    protected CrawlJob createCrawlJobBasedOn(final File JavaDoc orderFile,
1430            final String JavaDoc name, final String JavaDoc description, final String JavaDoc seeds)
1431    throws FatalConfigurationException {
1432        CrawlJob job = getJobHandler().newJob(orderFile, name, description,
1433                seeds);
1434        return CrawlJobHandler.ensureNewJobWritten(job, name, description);
1435    }
1436    
1437    protected CrawlJob addCrawlJob(final CrawlJob job) {
1438        return getJobHandler().addJob(job);
1439    }
1440    
1441    public void startCrawling() {
1442        if (getJobHandler() == null) {
1443            throw new NullPointerException JavaDoc("Heritrix jobhandler is null.");
1444        }
1445        getJobHandler().startCrawler();
1446    }
1447
1448    public void stopCrawling() {
1449        if (getJobHandler() == null) {
1450            throw new NullPointerException JavaDoc("Heritrix jobhandler is null.");
1451        }
1452        getJobHandler().stopCrawler();
1453    }
1454    
1455    /**
1456     * Get the heritrix version.
1457     *
1458     * @return The heritrix version. May be null.
1459     */

1460    public static String JavaDoc getVersion() {
1461        return System.getProperty("heritrix.version");
1462    }
1463
1464    /**
1465     * Get the job handler
1466     *
1467     * @return The CrawlJobHandler being used.
1468     */

1469    public CrawlJobHandler getJobHandler() {
1470        return this.jobHandler;
1471    }
1472
1473    /**
1474     * Get the configuration directory.
1475     * @return The conf directory under HERITRIX_HOME or null if none can
1476     * be found.
1477     * @throws IOException
1478     */

1479    public static File JavaDoc getConfdir()
1480    throws IOException JavaDoc {
1481        return getConfdir(true);
1482    }
1483
1484    /**
1485     * Get the configuration directory.
1486     * @param fail Throw IOE if can't find directory if true, else just
1487     * return null.
1488     * @return The conf directory under HERITRIX_HOME or null (or an IOE) if
1489     * can't be found.
1490     * @throws IOException
1491     */

1492    public static File JavaDoc getConfdir(final boolean fail)
1493    throws IOException JavaDoc {
1494        final String JavaDoc key = "heritrix.conf";
1495        // Look to see if heritrix.conf property passed on the cmd-line.
1496
String JavaDoc tmp = System.getProperty(key);
1497        // if not fall back to default $HERITIX_HOME/conf
1498
if (tmp == null || tmp.length() == 0) {
1499            return getSubDir("conf", fail);
1500        }
1501        File JavaDoc dir = new File JavaDoc(tmp);
1502        if (!dir.exists()) {
1503            if (fail) {
1504                throw new IOException JavaDoc("Cannot find conf dir: " + tmp);
1505            } else {
1506                logger.log(Level.WARNING, "Specified " + key +
1507                    " dir does not exist. Falling back on default");
1508            }
1509            dir = getSubDir("conf", fail);
1510        }
1511        return dir;
1512    }
1513
1514    /**
1515     * @return Returns the httpServer. May be null if one was not started.
1516     */

1517    public static SimpleHttpServer getHttpServer() {
1518        return Heritrix.httpServer;
1519    }
1520
1521    /**
1522     * @throws IOException
1523     * @return Returns the directory under which reside the WAR files
1524     * we're to load into the servlet container.
1525     */

1526    public static File JavaDoc getWarsdir()
1527    throws IOException JavaDoc {
1528        return getSubDir("webapps");
1529    }
1530
1531    /**
1532     * Prepars for program shutdown. This method does it's best to prepare the
1533     * program so that it can exit normally. It will kill the httpServer and
1534     * terminate any running job.<br>
1535     * It is advisible to wait a few (~1000) millisec after calling this method
1536     * and before calling performHeritrixShutDown() to allow as many threads as
1537     * possible to finish what they are doing.
1538     */

1539    public static void prepareHeritrixShutDown() {
1540        // Stop and destroy all running Heritrix instances.
1541
// Get array of the key set to avoid CCEs for case where call to
1542
// destroy does a remove of an instance from Heritrix.instances.
1543
final Object JavaDoc [] keys = Heritrix.instances.keySet().toArray();
1544        for (int i = 0; i < keys.length; i++) {
1545            ((Heritrix)Heritrix.instances.get(keys[i])).destroy();
1546        }
1547        
1548        try {
1549            deregisterJndi(getJndiContainerName());
1550        } catch (NameNotFoundException JavaDoc e) {
1551            // We were probably unbound already. Ignore.
1552
logger.log(Level.WARNING, "deregistration of jndi", e);
1553        } catch (Exception JavaDoc e) {
1554            e.printStackTrace();
1555        }
1556        
1557        if(Heritrix.httpServer != null) {
1558            // Shut down the web access.
1559
try {
1560                Heritrix.httpServer.stopServer();
1561            } catch (InterruptedException JavaDoc e) {
1562                // Generally this can be ignored, but we'll print a stack trace
1563
// just in case.
1564
e.printStackTrace();
1565            } finally {
1566                Heritrix.httpServer = null;
1567            }
1568        }
1569    }
1570
1571    /**
1572     * Exit program. Recommended that prepareHeritrixShutDown() be invoked
1573     * prior to this method.
1574     */

1575    public static void performHeritrixShutDown() {
1576        performHeritrixShutDown(0);
1577    }
1578
1579    /**
1580     * Exit program. Recommended that prepareHeritrixShutDown() be invoked
1581     * prior to this method.
1582     *
1583     * @param exitCode Code to pass System.exit.
1584     *
1585     */

1586    public static void performHeritrixShutDown(int exitCode) {
1587        System.exit(exitCode);
1588    }
1589
1590    /**
1591     * Shutdown all running heritrix instances and the JVM.
1592     * Assumes stop has already been called.
1593     * @param exitCode Exit code to pass system exit.
1594     */

1595    public static void shutdown(final int exitCode) {
1596        getShutdownThread(true, exitCode, "Heritrix shutdown").start();
1597    }
1598    
1599    protected static Thread JavaDoc getShutdownThread(final boolean sysexit,
1600            final int exitCode, final String JavaDoc name) {
1601        Thread JavaDoc t = new Thread JavaDoc(name) {
1602            public void run() {
1603                Heritrix.prepareHeritrixShutDown();
1604                if (sysexit) {
1605                    Heritrix.performHeritrixShutDown(exitCode);
1606                }
1607            }
1608        };
1609        t.setDaemon(true);
1610        return t;
1611    }
1612    
1613    public static void shutdown() {
1614        shutdown(0);
1615    }
1616    
1617    /**
1618     * Register Heritrix with JNDI, JMX, and with the static hashtable of all
1619     * Heritrix instances known to this JVM.
1620     *
1621     * If launched from cmdline, register Heritrix MBean if an agent to register
1622     * ourselves with. Usually this method will only have effect if we're
1623     * running in a 1.5.0 JDK and command line options such as
1624     * '-Dcom.sun.management.jmxremote.port=8082
1625     * -Dcom.sun.management.jmxremote.authenticate=false
1626     * -Dcom.sun.management.jmxremote.ssl=false' are supplied.
1627     * See <a HREF="http://java.sun.com/j2se/1.5.0/docs/guide/management/agent.html">Monitoring
1628     * and Management Using JMX</a>
1629     * for more on the command line options and how to connect to the
1630     * Heritrix bean using the JDK 1.5.0 jconsole tool. We register currently
1631     * with first server we find (TODO: Make configurable).
1632     *
1633     * <p>If we register successfully with a JMX agent, then part of the
1634     * registration will include our registering ourselves with JNDI.
1635     *
1636     * <p>Finally, add the heritrix instance to the hashtable of all the
1637     * Heritrix instances floating in the current VM. This latter registeration
1638     * happens whether or no there is a JMX agent to register with. This is
1639     * a list we keep out of convenience so its easy iterating over all
1640     * all instances calling stop when main application is going down.
1641     *
1642     * @param h Instance of heritrix to register.
1643     * @param name Name to use for this Heritrix instance.
1644     * @param jmxregister True if we are to register this instance with JMX.
1645     * @throws NullPointerException
1646     * @throws MalformedObjectNameException
1647     * @throws NotCompliantMBeanException
1648     * @throws MBeanRegistrationException
1649     * @throws InstanceAlreadyExistsException
1650     */

1651    protected static void registerHeritrix(final Heritrix h,
1652            final String JavaDoc name, final boolean jmxregister)
1653    throws MalformedObjectNameException JavaDoc, InstanceAlreadyExistsException JavaDoc,
1654    MBeanRegistrationException JavaDoc, NotCompliantMBeanException JavaDoc {
1655        MBeanServer JavaDoc server = getMBeanServer();
1656        if (server != null) {
1657            // Are we to manage the jmx registration? Or is it being done for
1658
// us by an external process: e.g. This instance was created by
1659
// MBeanAgent.
1660
if (jmxregister) {
1661                ObjectName JavaDoc objName = (name == null || name.length() <= 0)?
1662                    getJmxObjectName(): getJmxObjectName(name);
1663                registerMBean(server, h, objName);
1664            }
1665        } else {
1666            // JMX ain't available. Put this instance into the list of Heritrix
1667
// instances so findable by the UI (Normally this is done in the
1668
// JMX postRegister routine below). When no JMX, can only have
1669
// one instance of Heritrix so no need to do the deregisteration.
1670
Heritrix.instances.put(h.getNoJmxName(), h);
1671        }
1672    }
1673    
1674    protected static void unregisterHeritrix(final Heritrix h)
1675    throws InstanceNotFoundException JavaDoc, MBeanRegistrationException JavaDoc,
1676            NullPointerException JavaDoc {
1677        MBeanServer JavaDoc server = getMBeanServer();
1678        if (server != null) {
1679            server.unregisterMBean(h.mbeanName);
1680        } else {
1681            // JMX ain't available. Remove from list of Heritrix instances.
1682
// Usually this is done by the JMX postDeregister below.
1683
Heritrix.instances.remove(h.getNoJmxName());
1684        }
1685    }
1686    
1687    /**
1688     * Get MBeanServer.
1689     * Currently uses first MBeanServer found. This will definetly not be whats
1690     * always wanted. TODO: Make which server settable. Also, if none, put up
1691     * our own MBeanServer.
1692     * @return An MBeanServer to register with or null.
1693     */

1694    public static MBeanServer JavaDoc getMBeanServer() {
1695        MBeanServer JavaDoc result = null;
1696        List JavaDoc servers = MBeanServerFactory.findMBeanServer(null);
1697        if (servers == null) {
1698            return result;
1699        }
1700        for (Iterator JavaDoc i = servers.iterator(); i.hasNext();) {
1701            MBeanServer JavaDoc server = (MBeanServer JavaDoc)i.next();
1702            if (server == null) {
1703                continue;
1704            }
1705            result = server;
1706            break;
1707        }
1708        return result;
1709    }
1710    
1711    public static MBeanServer JavaDoc registerMBean(final Object JavaDoc objToRegister,
1712            final String JavaDoc name, final String JavaDoc type)
1713    throws InstanceAlreadyExistsException JavaDoc, MBeanRegistrationException JavaDoc,
1714    NotCompliantMBeanException JavaDoc {
1715        MBeanServer JavaDoc server = getMBeanServer();
1716        if (server != null) {
1717            server = registerMBean(server, objToRegister, name, type);
1718        }
1719        return server;
1720    }
1721    
1722    public static MBeanServer JavaDoc registerMBean(final MBeanServer JavaDoc server,
1723            final Object JavaDoc objToRegister, final String JavaDoc name, final String JavaDoc type)
1724    throws InstanceAlreadyExistsException JavaDoc, MBeanRegistrationException JavaDoc,
1725    NotCompliantMBeanException JavaDoc {
1726        try {
1727            Hashtable JavaDoc<String JavaDoc,String JavaDoc> ht = new Hashtable JavaDoc<String JavaDoc,String JavaDoc>();
1728            ht.put(JmxUtils.NAME, name);
1729            ht.put(JmxUtils.TYPE, type);
1730            registerMBean(server, objToRegister,
1731                new ObjectName JavaDoc(CRAWLER_PACKAGE, ht));
1732        } catch (MalformedObjectNameException JavaDoc e) {
1733            e.printStackTrace();
1734        }
1735        return server;
1736    }
1737        
1738    public static MBeanServer JavaDoc registerMBean(final MBeanServer JavaDoc server,
1739                final Object JavaDoc objToRegister, final ObjectName JavaDoc objName)
1740    throws InstanceAlreadyExistsException JavaDoc, MBeanRegistrationException JavaDoc,
1741    NotCompliantMBeanException JavaDoc {
1742        server.registerMBean(objToRegister, objName);
1743        return server;
1744    }
1745    
1746    public static void unregisterMBean(final MBeanServer JavaDoc server,
1747            final String JavaDoc name, final String JavaDoc type) {
1748        if (server == null) {
1749            return;
1750        }
1751        try {
1752            unregisterMBean(server, getJmxObjectName(name, type));
1753        } catch (MalformedObjectNameException JavaDoc e) {
1754            e.printStackTrace();
1755        }
1756    }
1757            
1758    public static void unregisterMBean(final MBeanServer JavaDoc server,
1759            final ObjectName JavaDoc name) {
1760        try {
1761            server.unregisterMBean(name);
1762            logger.info("Unregistered bean " + name.getCanonicalName());
1763        } catch (InstanceNotFoundException JavaDoc e) {
1764            e.printStackTrace();
1765        } catch (MBeanRegistrationException JavaDoc e) {
1766            e.printStackTrace();
1767        } catch (NullPointerException JavaDoc e) {
1768            e.printStackTrace();
1769        }
1770    }
1771    
1772    /**
1773     * @return Name to use when no JMX agent available.
1774     */

1775    protected String JavaDoc getNoJmxName() {
1776        return this.getClass().getName();
1777    }
1778    
1779    public static ObjectName JavaDoc getJmxObjectName()
1780    throws MalformedObjectNameException JavaDoc, NullPointerException JavaDoc {
1781        return getJmxObjectName("Heritrix", JmxUtils.SERVICE);
1782    }
1783    
1784    public static ObjectName JavaDoc getJmxObjectName(final String JavaDoc name)
1785    throws MalformedObjectNameException JavaDoc, NullPointerException JavaDoc {
1786        return getJmxObjectName(name, JmxUtils.SERVICE);
1787    }
1788    
1789    public static ObjectName JavaDoc getJmxObjectName(final String JavaDoc name,
1790            final String JavaDoc type)
1791    throws MalformedObjectNameException JavaDoc, NullPointerException JavaDoc {
1792        Hashtable JavaDoc<String JavaDoc,String JavaDoc> ht = new Hashtable JavaDoc<String JavaDoc,String JavaDoc>();
1793        ht.put(JmxUtils.NAME, name);
1794        ht.put(JmxUtils.TYPE, type);
1795        return new ObjectName JavaDoc(CRAWLER_PACKAGE, ht);
1796    }
1797    
1798    /**
1799     * @return Returns true if Heritrix was launched from the command line.
1800     * (When launched from command line, we do stuff like put up a web server
1801     * to manage our web interface and we register ourselves with the first
1802     * available jmx agent).
1803     */

1804    public static boolean isCommandLine() {
1805        return Heritrix.commandLine;
1806    }
1807    
1808    /**
1809     * @return True if heritrix has been started.
1810     */

1811    public boolean isStarted() {
1812        return this.jobHandler != null;
1813    }
1814    
1815    public String JavaDoc getStatus() {
1816        StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
1817        if (this.getJobHandler() != null) {
1818            buffer.append("isRunning=");
1819            buffer.append(this.getJobHandler().isRunning());
1820            buffer.append(" isCrawling=");
1821            buffer.append(this.getJobHandler().isCrawling());
1822            buffer.append(" alertCount=");
1823            buffer.append(getAlertsCount());
1824            buffer.append(" newAlertCount=");
1825            buffer.append(getNewAlertsCount());
1826            if (this.getJobHandler().isCrawling()) {
1827                buffer.append(" currentJob=");
1828                buffer.append(this.getJobHandler().getCurrentJob().
1829                    getJmxJobName());
1830            }
1831        }
1832        return buffer.toString();
1833    }
1834    
1835    // Alert methods.
1836
public int getAlertsCount() {
1837        return this.alertManager.getCount();
1838    }
1839    
1840    public int getNewAlertsCount() {
1841        return this.alertManager.getNewCount();
1842    }
1843    
1844    public Vector JavaDoc getAlerts() {
1845        return this.alertManager.getAll();
1846    }
1847    
1848    public Vector JavaDoc getNewAlerts() {
1849        return this.alertManager.getNewAll();
1850    }
1851    
1852    public SinkHandlerLogRecord getAlert(final String JavaDoc id) {
1853        return this.alertManager.get(id);
1854    }
1855    
1856    public void readAlert(final String JavaDoc id) {
1857        this.alertManager.read(id);
1858    }
1859    
1860    public void removeAlert(final String JavaDoc id) {
1861        this.alertManager.remove(id);
1862    }
1863    
1864    /**
1865     * Start Heritrix.
1866     *
1867     * Used by JMX and webapp initialization for starting Heritrix.
1868     * Not by the cmdline launched Heritrix. Idempotent.
1869     * If start is called by JMX, then new instance of Heritrix is automatically
1870     * registered w/ JMX Agent. If started by webapp, need to register the new
1871     * Heritrix instance.
1872     */

1873    public void start() {
1874        // Don't start if we've been launched from the command line.
1875
// Don't start if already started.
1876
if (!Heritrix.isCommandLine() && !isStarted()) {
1877            try {
1878                logger.info(launch());
1879            } catch (Exception JavaDoc e) {
1880                e.printStackTrace();
1881            }
1882        }
1883    }
1884    
1885    /**
1886     * Stop Heritrix.
1887     *
1888     * Used by JMX and webapp initialization for stopping Heritrix.
1889     */

1890    public void stop() {
1891        if (this.jobHandler != null) {
1892            this.jobHandler.stop();
1893        }
1894    }
1895
1896    public String JavaDoc interrupt(String JavaDoc threadName) {
1897        String JavaDoc result = "Thread " + threadName + " not found";
1898        ThreadGroup JavaDoc group = Thread.currentThread().getThreadGroup();
1899        if (group == null) {
1900            return result;
1901        }
1902        // Back up to the root threadgroup before starting
1903
// to iterate over threads.
1904
ThreadGroup JavaDoc parent = null;
1905        while((parent = group.getParent()) != null) {
1906            group = parent;
1907        }
1908        // Do an array that is twice the size of active
1909
// thread count. That should be big enough.
1910
final int max = group.activeCount() * 2;
1911        Thread JavaDoc [] threads = new Thread JavaDoc[max];
1912        int threadCount = group.enumerate(threads, true);
1913        if (threadCount >= max) {
1914            logger.info("Some threads not found...array too small: " +
1915                max);
1916        }
1917        for (int j = 0; j < threadCount; j++) {
1918            if (threads[j].getName().equals(threadName)) {
1919                threads[j].interrupt();
1920                result = "Interrupt sent to " + threadName;
1921                break;
1922            }
1923        }
1924        return result;
1925    }
1926
1927    // OpenMBean implementation.
1928

1929    /**
1930     * Build up the MBean info for Heritrix main.
1931     * @return Return created mbean info instance.
1932     */

1933    protected OpenMBeanInfoSupport JavaDoc buildMBeanInfo() {
1934        OpenMBeanAttributeInfoSupport JavaDoc[] attributes =
1935            new OpenMBeanAttributeInfoSupport JavaDoc[Heritrix.ATTRIBUTE_LIST.size()];
1936        OpenMBeanConstructorInfoSupport JavaDoc[] constructors =
1937            new OpenMBeanConstructorInfoSupport JavaDoc[1];
1938        OpenMBeanOperationInfoSupport JavaDoc[] operations =
1939            new OpenMBeanOperationInfoSupport JavaDoc[Heritrix.OPERATION_LIST.size()];
1940        MBeanNotificationInfo JavaDoc[] notifications =
1941            new MBeanNotificationInfo JavaDoc[0];
1942
1943        // Attributes.
1944
attributes[0] =
1945            new OpenMBeanAttributeInfoSupport JavaDoc(Heritrix.STATUS_ATTR,
1946                "Short basic status message", SimpleType.STRING, true,
1947                false, false);
1948        // Attributes.
1949
attributes[1] =
1950            new OpenMBeanAttributeInfoSupport JavaDoc(Heritrix.VERSION_ATTR,
1951                "Heritrix version", SimpleType.STRING, true, false, false);
1952
1953        // Constructors.
1954
constructors[0] = new OpenMBeanConstructorInfoSupport JavaDoc(
1955            "HeritrixOpenMBean", "Constructs Heritrix OpenMBean instance ",
1956            new OpenMBeanParameterInfoSupport JavaDoc[0]);
1957
1958        // Operations.
1959
operations[0] = new OpenMBeanOperationInfoSupport JavaDoc(
1960            Heritrix.START_OPER, "Start Heritrix instance", null,
1961                SimpleType.VOID, MBeanOperationInfo.ACTION);
1962        
1963        operations[1] = new OpenMBeanOperationInfoSupport JavaDoc(
1964            Heritrix.STOP_OPER, "Stop Heritrix instance", null,
1965                SimpleType.VOID, MBeanOperationInfo.ACTION);
1966        
1967        OpenMBeanParameterInfo JavaDoc[] args = new OpenMBeanParameterInfoSupport JavaDoc[1];
1968        args[0] = new OpenMBeanParameterInfoSupport JavaDoc("threadName",
1969            "Name of thread to send interrupt", SimpleType.STRING);
1970        operations[2] = new OpenMBeanOperationInfoSupport JavaDoc(
1971            Heritrix.INTERRUPT_OPER, "Send thread an interrupt " +
1972                "(Used debugging)", args, SimpleType.STRING,
1973                MBeanOperationInfo.ACTION_INFO);
1974        
1975        operations[3] = new OpenMBeanOperationInfoSupport JavaDoc(
1976            Heritrix.START_CRAWLING_OPER, "Set Heritrix instance " +
1977                "into crawling mode", null, SimpleType.VOID,
1978                MBeanOperationInfo.ACTION);
1979        
1980        operations[4] = new OpenMBeanOperationInfoSupport JavaDoc(
1981            Heritrix.STOP_CRAWLING_OPER, "Unset Heritrix instance " +
1982                " crawling mode", null, SimpleType.VOID,
1983                MBeanOperationInfo.ACTION);
1984        
1985        args = new OpenMBeanParameterInfoSupport JavaDoc[4];
1986        args[0] = new OpenMBeanParameterInfoSupport JavaDoc("pathOrURL",
1987            "Path/URL to order or jar of order+seed",
1988            SimpleType.STRING);
1989        args[1] = new OpenMBeanParameterInfoSupport JavaDoc("name",
1990            "Basename for new job", SimpleType.STRING);
1991        args[2] = new OpenMBeanParameterInfoSupport JavaDoc("description",
1992            "Description to save with new job", SimpleType.STRING);
1993        args[3] = new OpenMBeanParameterInfoSupport JavaDoc("seeds",
1994            "Initial seed(s)", SimpleType.STRING);
1995        operations[5] = new OpenMBeanOperationInfoSupport JavaDoc(
1996            Heritrix.ADD_CRAWL_JOB_OPER, "Add new crawl job", args,
1997                SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
1998        
1999        args = new OpenMBeanParameterInfoSupport JavaDoc[4];
2000        args[0] = new OpenMBeanParameterInfoSupport JavaDoc("uidOrName",
2001            "Job UID or profile name", SimpleType.STRING);
2002        args[1] = new OpenMBeanParameterInfoSupport JavaDoc("name",
2003            "Basename for new job", SimpleType.STRING);
2004        args[2] = new OpenMBeanParameterInfoSupport JavaDoc("description",
2005            "Description to save with new job", SimpleType.STRING);
2006        args[3] = new OpenMBeanParameterInfoSupport JavaDoc("seeds",
2007            "Initial seed(s)", SimpleType.STRING);
2008        operations[6] = new OpenMBeanOperationInfoSupport JavaDoc(
2009            Heritrix.ADD_CRAWL_JOB_BASEDON_OPER,
2010            "Add a new crawl job based on passed Job UID or profile",
2011            args, SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2012        
2013        args = new OpenMBeanParameterInfoSupport JavaDoc[1];
2014        args[0] = new OpenMBeanParameterInfoSupport JavaDoc("UID",
2015            "Job UID", SimpleType.STRING);
2016        operations[7] = new OpenMBeanOperationInfoSupport JavaDoc(DELETE_CRAWL_JOB_OPER,
2017            "Delete/stop this crawl job", args, SimpleType.VOID,
2018            MBeanOperationInfo.ACTION);
2019        
2020        args = new OpenMBeanParameterInfoSupport JavaDoc[1];
2021        args[0] = new OpenMBeanParameterInfoSupport JavaDoc("index",
2022            "Zero-based index into array of alerts", SimpleType.INTEGER);
2023        operations[8] = new OpenMBeanOperationInfoSupport JavaDoc(
2024            Heritrix.ALERT_OPER, "Return alert at passed index", args,
2025                SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2026        
2027        try {
2028            this.jobCompositeType = new CompositeType JavaDoc("job",
2029                    "Job attributes", JOB_KEYS,
2030                    new String JavaDoc [] {"Job unique ID", "Job name", "Job status"},
2031                    new OpenType JavaDoc [] {SimpleType.STRING, SimpleType.STRING,
2032                        SimpleType.STRING});
2033            this.jobsTabularType = new TabularType JavaDoc("jobs", "List of jobs",
2034                    this.jobCompositeType, new String JavaDoc [] {"uid"});
2035        } catch (OpenDataException JavaDoc e) {
2036            // This should never happen.
2037
throw new RuntimeException JavaDoc(e);
2038        }
2039        operations[9] = new OpenMBeanOperationInfoSupport JavaDoc(
2040            Heritrix.PENDING_JOBS_OPER,
2041                "List of pending jobs (or null if none)", null,
2042                this.jobsTabularType, MBeanOperationInfo.INFO);
2043        operations[10] = new OpenMBeanOperationInfoSupport JavaDoc(
2044                Heritrix.COMPLETED_JOBS_OPER,
2045                    "List of completed jobs (or null if none)", null,
2046                    this.jobsTabularType, MBeanOperationInfo.INFO);
2047        
2048        args = new OpenMBeanParameterInfoSupport JavaDoc[2];
2049        args[0] = new OpenMBeanParameterInfoSupport JavaDoc("uid",
2050            "Job unique ID", SimpleType.STRING);
2051        args[1] = new OpenMBeanParameterInfoSupport JavaDoc("name",
2052                "Report name (e.g. crawl-report, etc.)",
2053                SimpleType.STRING);
2054        operations[11] = new OpenMBeanOperationInfoSupport JavaDoc(
2055            Heritrix.CRAWLEND_REPORT_OPER, "Return crawl-end report", args,
2056                SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2057        
2058        operations[12] = new OpenMBeanOperationInfoSupport JavaDoc(
2059            Heritrix.SHUTDOWN_OPER, "Shutdown container", null,
2060                SimpleType.VOID, MBeanOperationInfo.ACTION);
2061        
2062        args = new OpenMBeanParameterInfoSupport JavaDoc[2];
2063        args[0] = new OpenMBeanParameterInfoSupport JavaDoc("level",
2064            "Log level: e.g. SEVERE, WARNING, etc.", SimpleType.STRING);
2065        args[1] = new OpenMBeanParameterInfoSupport JavaDoc("message",
2066            "Log message", SimpleType.STRING);
2067        operations[13] = new OpenMBeanOperationInfoSupport JavaDoc(Heritrix.LOG_OPER,
2068            "Add a log message", args, SimpleType.VOID,
2069            MBeanOperationInfo.ACTION);
2070        
2071        operations[14] = new OpenMBeanOperationInfoSupport JavaDoc(
2072            Heritrix.DESTROY_OPER, "Destroy Heritrix instance", null,
2073                SimpleType.VOID, MBeanOperationInfo.ACTION);
2074        
2075        operations[15] = new OpenMBeanOperationInfoSupport JavaDoc(
2076            Heritrix.TERMINATE_CRAWL_JOB_OPER,
2077            "Returns false if no current job", null, SimpleType.BOOLEAN,
2078            MBeanOperationInfo.ACTION);
2079        
2080        operations[16] = new OpenMBeanOperationInfoSupport JavaDoc(
2081            Heritrix.REBIND_JNDI_OPER,
2082            "Rebinds this Heritrix with JNDI.", null,
2083            SimpleType.VOID, MBeanOperationInfo.ACTION);
2084
2085        // Build the info object.
2086
return new OpenMBeanInfoSupport JavaDoc(this.getClass().getName(),
2087            "Heritrix Main OpenMBean", attributes, constructors, operations,
2088            notifications);
2089    }
2090    
2091    public Object JavaDoc getAttribute(String JavaDoc attribute_name)
2092    throws AttributeNotFoundException JavaDoc {
2093        if (attribute_name == null) {
2094            throw new RuntimeOperationsException JavaDoc(
2095                 new IllegalArgumentException JavaDoc("Attribute name cannot be null"),
2096                 "Cannot call getAttribute with null attribute name");
2097        }
2098        if (!Heritrix.ATTRIBUTE_LIST.contains(attribute_name)) {
2099            throw new AttributeNotFoundException JavaDoc("Attribute " +
2100                 attribute_name + " is unimplemented.");
2101        }
2102        // The pattern in the below is to match an attribute and when found
2103
// do a return out of if clause. Doing it this way, I can fall
2104
// on to the AttributeNotFoundException for case where we've an
2105
// attribute but no handler.
2106
if (attribute_name.equals(STATUS_ATTR)) {
2107            return getStatus();
2108        }
2109        if (attribute_name.equals(VERSION_ATTR)) {
2110            return getVersion();
2111        }
2112        throw new AttributeNotFoundException JavaDoc("Attribute " +
2113            attribute_name + " not found.");
2114    }
2115
2116    public void setAttribute(Attribute JavaDoc attribute)
2117    throws AttributeNotFoundException JavaDoc {
2118        throw new AttributeNotFoundException JavaDoc("No attribute can be set in " +
2119            "this MBean");
2120    }
2121
2122    public AttributeList JavaDoc getAttributes(String JavaDoc [] attributeNames) {
2123        if (attributeNames == null) {
2124            throw new RuntimeOperationsException JavaDoc(
2125                new IllegalArgumentException JavaDoc("attributeNames[] cannot be " +
2126                "null"), "Cannot call getAttributes with null attribute " +
2127                "names");
2128        }
2129        AttributeList JavaDoc resultList = new AttributeList JavaDoc();
2130        if (attributeNames.length == 0) {
2131            return resultList;
2132        }
2133        for (int i = 0; i < attributeNames.length; i++) {
2134            try {
2135                Object JavaDoc value = getAttribute(attributeNames[i]);
2136                resultList.add(new Attribute JavaDoc(attributeNames[i], value));
2137            } catch (Exception JavaDoc e) {
2138                e.printStackTrace();
2139            }
2140        }
2141        return(resultList);
2142    }
2143
2144    public AttributeList JavaDoc setAttributes(AttributeList JavaDoc attributes) {
2145        return new AttributeList JavaDoc(); // always empty
2146
}
2147
2148    public Object JavaDoc invoke(final String JavaDoc operationName, final Object JavaDoc[] params,
2149        final String JavaDoc[] signature)
2150    throws ReflectionException JavaDoc {
2151        if (operationName == null) {
2152            throw new RuntimeOperationsException JavaDoc(
2153                new IllegalArgumentException JavaDoc("Operation name cannot be null"),
2154                "Cannot call invoke with null operation name");
2155        }
2156        // The pattern in the below is to match an operation and when found
2157
// do a return out of if clause. Doing it this way, I can fall
2158
// on to the MethodNotFoundException for case where we've an
2159
// attribute but no handler.
2160
if (operationName.equals(START_OPER)) {
2161            JmxUtils.checkParamsCount(START_OPER, params, 0);
2162            start();
2163            return null;
2164        }
2165        if (operationName.equals(STOP_OPER)) {
2166            JmxUtils.checkParamsCount(STOP_OPER, params, 0);
2167            stop();
2168            return null;
2169        }
2170        if (operationName.equals(DESTROY_OPER)) {
2171            JmxUtils.checkParamsCount(DESTROY_OPER, params, 0);
2172            destroy();
2173            return null;
2174        }
2175        if (operationName.equals(TERMINATE_CRAWL_JOB_OPER)) {
2176            JmxUtils.checkParamsCount(TERMINATE_CRAWL_JOB_OPER, params, 0);
2177            return new Boolean JavaDoc(this.jobHandler.terminateCurrentJob());
2178        }
2179        if (operationName.equals(REBIND_JNDI_OPER)) {
2180            JmxUtils.checkParamsCount(REBIND_JNDI_OPER, params, 0);
2181            try {
2182                registerContainerJndi();
2183            } catch (MalformedObjectNameException JavaDoc e) {
2184                throw new RuntimeOperationsException JavaDoc(new RuntimeException JavaDoc(e));
2185            } catch (UnknownHostException JavaDoc e) {
2186                throw new RuntimeOperationsException JavaDoc(new RuntimeException JavaDoc(e));
2187            } catch (NamingException JavaDoc e) {
2188                throw new RuntimeOperationsException JavaDoc(new RuntimeException JavaDoc(e));
2189            }
2190            return null;
2191        }
2192        if (operationName.equals(SHUTDOWN_OPER)) {
2193            JmxUtils.checkParamsCount(SHUTDOWN_OPER, params, 0);
2194            Heritrix.shutdown();
2195            return null;
2196        }
2197        if (operationName.equals(LOG_OPER)) {
2198            JmxUtils.checkParamsCount(LOG_OPER, params, 2);
2199            logger.log(Level.parse((String JavaDoc)params[0]), (String JavaDoc)params[1]);
2200            return null;
2201        }
2202        if (operationName.equals(INTERRUPT_OPER)) {
2203            JmxUtils.checkParamsCount(INTERRUPT_OPER, params, 1);
2204            return interrupt((String JavaDoc)params[0]);
2205        }
2206        if (operationName.equals(START_CRAWLING_OPER)) {
2207            JmxUtils.checkParamsCount(START_CRAWLING_OPER, params, 0);
2208            startCrawling();
2209            return null;
2210        }
2211        if (operationName.equals(STOP_CRAWLING_OPER)) {
2212            JmxUtils.checkParamsCount(STOP_CRAWLING_OPER, params, 0);
2213            stopCrawling();
2214            return null;
2215        }
2216        if (operationName.equals(ADD_CRAWL_JOB_OPER)) {
2217            JmxUtils.checkParamsCount(ADD_CRAWL_JOB_OPER, params, 4);
2218            try {
2219                return addCrawlJob((String JavaDoc)params[0], (String JavaDoc)params[1],
2220                    checkForEmptyPlaceHolder((String JavaDoc)params[2]),
2221                    checkForEmptyPlaceHolder((String JavaDoc)params[3]));
2222            } catch (IOException JavaDoc e) {
2223                throw new RuntimeOperationsException JavaDoc(new RuntimeException JavaDoc(e));
2224            } catch (FatalConfigurationException e) {
2225                throw new RuntimeOperationsException JavaDoc(new RuntimeException JavaDoc(e));
2226            }
2227        }
2228        if (operationName.equals(DELETE_CRAWL_JOB_OPER)) {
2229            JmxUtils.checkParamsCount(DELETE_CRAWL_JOB_OPER, params, 1);
2230            this.jobHandler.deleteJob((String JavaDoc)params[0]);
2231            return null;
2232        }
2233        
2234        if (operationName.equals(ADD_CRAWL_JOB_BASEDON_OPER)) {
2235            JmxUtils.checkParamsCount(ADD_CRAWL_JOB_BASEDON_OPER, params, 4);
2236            return addCrawlJobBasedOn((String JavaDoc)params[0], (String JavaDoc)params[1],
2237                    checkForEmptyPlaceHolder((String JavaDoc)params[2]),
2238                    checkForEmptyPlaceHolder((String JavaDoc)params[3]));
2239        }
2240        if (operationName.equals(ALERT_OPER)) {
2241            JmxUtils.checkParamsCount(ALERT_OPER, params, 1);
2242            SinkHandlerLogRecord slr = null;
2243            if (this.alertManager.getCount() > 0) {
2244                // This is creating a vector of all alerts just so I can then
2245
// use passed index into resultant vector -- needs to be
2246
// improved.
2247
slr = (SinkHandlerLogRecord)this.alertManager.getAll().
2248                    get(((Integer JavaDoc)params[0]).intValue());
2249            }
2250            return (slr != null)? slr.toString(): null;
2251        }
2252        
2253        if (operationName.equals(PENDING_JOBS_OPER)) {
2254                JmxUtils.checkParamsCount(PENDING_JOBS_OPER, params, 0);
2255            try {
2256                return makeJobsTabularData(getJobHandler().getPendingJobs());
2257            } catch (OpenDataException JavaDoc e) {
2258                throw new RuntimeOperationsException JavaDoc(new RuntimeException JavaDoc(e));
2259            }
2260        }
2261        
2262        if (operationName.equals(COMPLETED_JOBS_OPER)) {
2263                JmxUtils.checkParamsCount(COMPLETED_JOBS_OPER, params, 0);
2264            try {
2265                return makeJobsTabularData(getJobHandler().getCompletedJobs());
2266            } catch (OpenDataException JavaDoc e) {
2267                throw new RuntimeOperationsException JavaDoc(new RuntimeException JavaDoc(e));
2268            }
2269        }
2270        
2271        if (operationName.equals(CRAWLEND_REPORT_OPER)) {
2272            JmxUtils.checkParamsCount(CRAWLEND_REPORT_OPER, params, 2);
2273            try {
2274                return getCrawlendReport((String JavaDoc)params[0], (String JavaDoc) params[1]);
2275            } catch (IOException JavaDoc e) {
2276                throw new RuntimeOperationsException JavaDoc(new RuntimeException JavaDoc(e));
2277            }
2278        }
2279        
2280        throw new ReflectionException JavaDoc(
2281            new NoSuchMethodException JavaDoc(operationName),
2282                "Cannot find the operation " + operationName);
2283    }
2284    
2285    /**
2286     * Return named crawl end report for job with passed uid.
2287     * Crawler makes reports when its finished its crawl. Use this method
2288     * to get a String version of one of these files.
2289     * @param jobUid The unique ID for the job whose reports you want to see
2290     * (Must be a completed job).
2291     * @param reportName Name of report minus '.txt' (e.g. crawl-report).
2292     * @return String version of the on-disk report.
2293     * @throws IOException
2294     */

2295    protected String JavaDoc getCrawlendReport(String JavaDoc jobUid, String JavaDoc reportName)
2296    throws IOException JavaDoc {
2297        CrawlJob job = getJobHandler().getJob(jobUid);
2298        if (job == null) {
2299            throw new IOException JavaDoc("No such job: " + jobUid);
2300        }
2301        File JavaDoc report = new File JavaDoc(job.getDirectory(), reportName + ".txt");
2302        if (!report.exists()) {
2303            throw new FileNotFoundException JavaDoc(report.getAbsolutePath());
2304        }
2305        return FileUtils.readFileAsString(report);
2306    }
2307    
2308    protected TabularData JavaDoc makeJobsTabularData(List JavaDoc jobs)
2309    throws OpenDataException JavaDoc {
2310        if (jobs == null || jobs.size() == 0) {
2311            return null;
2312        }
2313        TabularData JavaDoc td = new TabularDataSupport JavaDoc(this.jobsTabularType);
2314        for (Iterator JavaDoc i = jobs.iterator(); i.hasNext();) {
2315            CrawlJob job = (CrawlJob)i.next();
2316            CompositeData JavaDoc cd = new CompositeDataSupport JavaDoc(this.jobCompositeType,
2317                JOB_KEYS,
2318                new String JavaDoc [] {job.getUID(), job.getJobName(), job.getStatus()});
2319            td.put(cd);
2320        }
2321        return td;
2322    }
2323    
2324    /**
2325     * If passed str has placeholder for the empty string, return the empty
2326     * string else return orginal.
2327     * Dumb jmx clients can't pass empty string so they'll pass a representation
2328     * of empty string such as ' ' or '-'. Convert such strings to empty
2329     * string.
2330     * @param str String to check.
2331     * @return Original <code>str</code> or empty string if <code>str</code>
2332     * contains a placeholder for the empty-string (e.g. '-', or ' ').
2333     */

2334    protected String JavaDoc checkForEmptyPlaceHolder(String JavaDoc str) {
2335        return TextUtils.matches("-| +", str)? "": str;
2336    }
2337
2338    public MBeanInfo JavaDoc getMBeanInfo() {
2339        return this.openMBeanInfo;
2340    }
2341    
2342    /**
2343     * @return Name this instance registered in JMX (Only available after JMX
2344     * registration).
2345     */

2346    public ObjectName JavaDoc getMBeanName() {
2347        return this.mbeanName;
2348    }
2349
2350    public ObjectName JavaDoc preRegister(MBeanServer JavaDoc server, ObjectName JavaDoc name)
2351    throws Exception JavaDoc {
2352        this.mbeanServer = server;
2353        @SuppressWarnings JavaDoc("unchecked")
2354        Hashtable JavaDoc<String JavaDoc,String JavaDoc> ht = name.getKeyPropertyList();
2355        if (!ht.containsKey(JmxUtils.NAME)) {
2356            throw new IllegalArgumentException JavaDoc("Name property required" +
2357                name.getCanonicalName());
2358        }
2359        if (!ht.containsKey(JmxUtils.TYPE)) {
2360            ht.put(JmxUtils.TYPE, JmxUtils.SERVICE);
2361            name = new ObjectName JavaDoc(name.getDomain(), ht);
2362        }
2363        this.mbeanName = addGuiPort(addVitals(name));
2364        Heritrix.instances.put(this.mbeanName.
2365            getCanonicalKeyPropertyListString(), this);
2366        return this.mbeanName;
2367    }
2368    
2369    /**
2370     * Add vital stats to passed in ObjectName.
2371     * @param name ObjectName to add to.
2372     * @return name with host, guiport, and jmxport added.
2373     * @throws UnknownHostException
2374     * @throws MalformedObjectNameException
2375     * @throws NullPointerException
2376     */

2377    protected static ObjectName JavaDoc addVitals(ObjectName JavaDoc name)
2378    throws UnknownHostException JavaDoc, MalformedObjectNameException JavaDoc,
2379    NullPointerException JavaDoc {
2380        @SuppressWarnings JavaDoc("unchecked")
2381        Hashtable JavaDoc<String JavaDoc,String JavaDoc> ht = name.getKeyPropertyList();
2382        if (!ht.containsKey(JmxUtils.HOST)) {
2383            ht.put(JmxUtils.HOST, InetAddress.getLocalHost().getHostName());
2384            name = new ObjectName JavaDoc(name.getDomain(), ht);
2385        }
2386        if (!ht.containsKey(JmxUtils.JMX_PORT)) {
2387            // Add jdk jmx-port. This will be present if we've attached
2388
// ourselves to the jdk jmx agent. Otherwise, we've been
2389
// deployed in a j2ee container with its own jmx agent. In
2390
// this case we won't know how to get jmx port.
2391
String JavaDoc p = System.getProperty("com.sun.management.jmxremote.port");
2392            if (p != null && p.length() > 0) {
2393                ht.put(JmxUtils.JMX_PORT, p);
2394                name = new ObjectName JavaDoc(name.getDomain(), ht);
2395            }
2396        }
2397        return name;
2398    }
2399    
2400    protected static ObjectName JavaDoc addGuiPort(ObjectName JavaDoc name)
2401    throws MalformedObjectNameException JavaDoc, NullPointerException JavaDoc {
2402        @SuppressWarnings JavaDoc("unchecked")
2403        Hashtable JavaDoc<String JavaDoc,String JavaDoc> ht = name.getKeyPropertyList();
2404        if (!ht.containsKey(JmxUtils.GUI_PORT)) {
2405            // Add gui port if this instance was started with a gui.
2406
if (Heritrix.gui) {
2407                ht.put(JmxUtils.GUI_PORT, Integer.toString(Heritrix.guiPort));
2408                name = new ObjectName JavaDoc(name.getDomain(), ht);
2409            }
2410        }
2411        return name;
2412    }
2413
2414    public void postRegister(Boolean JavaDoc registrationDone) {
2415        if (logger.isLoggable(Level.INFO)) {
2416            logger.info(
2417                JmxUtils.getLogRegistrationMsg(this.mbeanName.getCanonicalName(),
2418                this.mbeanServer, registrationDone.booleanValue()));
2419        }
2420        try {
2421            registerJndi(this.mbeanName);
2422        } catch (Exception JavaDoc e) {
2423            logger.log(Level.SEVERE, "Failed jndi registration", e);
2424        }
2425    }
2426
2427    public void preDeregister() throws Exception JavaDoc {
2428        deregisterJndi(this.mbeanName);
2429    }
2430
2431    public void postDeregister() {
2432        Heritrix.instances.
2433            remove(this.mbeanName.getCanonicalKeyPropertyListString());
2434        if (logger.isLoggable(Level.INFO)) {
2435            logger.info(JmxUtils.getLogUnregistrationMsg(
2436                    this.mbeanName.getCanonicalName(), this.mbeanServer));
2437        }
2438    }
2439    
2440    protected static void registerContainerJndi()
2441    throws MalformedObjectNameException JavaDoc, NullPointerException JavaDoc,
2442            UnknownHostException JavaDoc, NamingException JavaDoc {
2443        registerJndi(getJndiContainerName());
2444    }
2445
2446    protected static void registerJndi(final ObjectName JavaDoc name)
2447    throws NullPointerException JavaDoc, NamingException JavaDoc {
2448        Context JavaDoc c = getJndiContext();
2449        if (c == null) {
2450            return;
2451        }
2452        CompoundName JavaDoc key = JndiUtils.bindObjectName(c, name);
2453        if (logger.isLoggable(Level.FINE)) {
2454            logger.fine("Bound '" + key + "' to '" + JndiUtils.
2455               getCompoundName(c.getNameInNamespace()).toString()
2456               + "' jndi context");
2457        }
2458    }
2459    
2460    protected static void deregisterJndi(final ObjectName JavaDoc name)
2461    throws NullPointerException JavaDoc, NamingException JavaDoc {
2462        Context JavaDoc c = getJndiContext();
2463        if (c == null) {
2464            return;
2465        }
2466        CompoundName JavaDoc key = JndiUtils.unbindObjectName(c, name);
2467        if (logger.isLoggable(Level.FINE)) {
2468            logger.fine("Unbound '" + key + "' from '" +
2469                JndiUtils.getCompoundName(c.getNameInNamespace()).toString() +
2470                    "' jndi context");
2471        }
2472    }
2473    
2474    /**
2475     * @return Jndi context for the crawler or null if none found.
2476     * @throws NamingException
2477     */

2478    protected static Context JavaDoc getJndiContext() throws NamingException JavaDoc {
2479        Context JavaDoc c = null;
2480        try {
2481            c = JndiUtils.getSubContext(CRAWLER_PACKAGE);
2482        } catch (NoInitialContextException JavaDoc e) {
2483            logger.fine("No JNDI Context: " + e.toString());
2484        }
2485        return c;
2486    }
2487    
2488    /**
2489     * @return Jndi container name -- the name to use for the 'container' that
2490     * can host zero or more heritrix instances (Return a JMX ObjectName. We
2491     * use ObjectName because then we're sync'd with JMX naming and ObjectName
2492     * has nice parsing).
2493     * @throws NullPointerException
2494     * @throws MalformedObjectNameException
2495     * @throws UnknownHostException
2496     */

2497    protected static ObjectName JavaDoc getJndiContainerName()
2498    throws MalformedObjectNameException JavaDoc, NullPointerException JavaDoc,
2499    UnknownHostException JavaDoc {
2500        ObjectName JavaDoc objName = new ObjectName JavaDoc(CRAWLER_PACKAGE, "type",
2501            "container");
2502        return addVitals(objName);
2503    }
2504    
2505    /**
2506     * @return Return all registered instances of Heritrix (Rare are there
2507     * more than one).
2508     */

2509    public static Map JavaDoc getInstances() {
2510        return Heritrix.instances;
2511    }
2512    
2513    /**
2514     * @return True if only one instance of Heritrix.
2515     */

2516    public static boolean isSingleInstance() {
2517        return Heritrix.instances != null && Heritrix.instances.size() == 1;
2518    }
2519    
2520    /**
2521     * @return Returns single instance or null if no instance or multiple.
2522     */

2523    public static Heritrix getSingleInstance() {
2524        return !isSingleInstance()?
2525            null:
2526            (Heritrix)Heritrix.instances.
2527                get(Heritrix.instances.keySet().iterator().next());
2528    }
2529}
2530
Popular Tags