CrawlJobHandler


1   /* CrawlJobHandler
2    *
3    * $Id: CrawlJobHandler.java,v 1.99.6.1 2007/01/13 01:31:07 stack-sf Exp $
4    *
5    * Copyright (C) 2003 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.admin;
24  
25  import java.io.BufferedReader  ;
26  import java.io.BufferedWriter  ;
27  import java.io.File  ;
28  import java.io.FileWriter  ;
29  import java.io.FilenameFilter  ;
30  import java.io.IOException  ;
31  import java.io.InputStream  ;
32  import java.io.InputStreamReader  ;
33  import java.net.URL  ;
34  import java.net.URI  ;
35  import java.util.ArrayList  ;
36  import java.util.Comparator  ;
37  import java.util.Date  ;
38  import java.util.Enumeration  ;
39  import java.util.Iterator  ;
40  import java.util.List  ;
41  import java.util.TreeSet  ;
42  import java.util.logging.Level  ;
43  import java.util.logging.Logger  ;
44  
45  import javax.management.Attribute  ;
46  import javax.management.AttributeNotFoundException  ;
47  import javax.management.InvalidAttributeValueException  ;
48  import javax.management.MBeanException  ;
49  import javax.management.ReflectionException  ;
50  
51  import org.apache.commons.httpclient.URIException;
52  import org.archive.crawler.Heritrix;
53  import org.archive.crawler.datamodel.CrawlOrder;
54  import org.archive.crawler.event.CrawlStatusListener;
55  import org.archive.crawler.framework.FrontierMarker;
56  import org.archive.crawler.framework.exceptions.FatalConfigurationException;
57  import org.archive.crawler.framework.exceptions.InitializationException;
58  import org.archive.crawler.framework.exceptions.InvalidFrontierMarkerException;
59  import org.archive.crawler.frontier.FrontierJournal;
60  import org.archive.crawler.frontier.RecoveryJournal;
61  import org.archive.crawler.settings.ComplexType;
62  import org.archive.crawler.settings.CrawlerSettings;
63  import org.archive.crawler.settings.SettingsHandler;
64  import org.archive.crawler.settings.XMLSettingsHandler;
65  import org.archive.util.ArchiveUtils;
66  import org.archive.util.FileUtils;
67  
68  
69  /**
70   * This class manages CrawlJobs. Submitted crawl jobs are queued up and run
71   * in order when the crawler is running.
72   * <p>Basically this provides a layer between any potential user interface and
73   * the CrawlJobs.  It keeps the lists of completed jobs, pending jobs, etc.
74   * <p>
75   * The jobs managed by the handler can be divided into the following:
76   * <ul>
77   *  <li> <code>Pending</code> - Jobs that are ready to run and are waiting their
78   *                              turn. These can be edited, viewed, deleted etc.
79   *  <li> <code>Running</code> - Only one job can be running at a time. There may
80   *                              be no job running. The running job can be viewed
81   *                              and edited to some extent. It can also be
82   *                              terminated. This job should have a
83   *                              StatisticsTracking module attached to it for more
84   *                              details on the crawl.
85   * <li><code>Completed</code> - Jobs that have finished crawling or have been
86   *                              deleted from the pending queue or terminated
87   *                              while running. They can not be edited but can be
88   *                              viewed. They retain the StatisticsTracking
89   *                              module from their run.
90   *  <li> <code>New job</code> - At any given time their can be one 'new job' the
91   *                              new job is not considered ready to run. It can
92   *                              be edited or discarded (in which case it will be
93   *                              totally destroyed, including any files on disk).
94   *                              Once an operator deems the job ready to run it
95   *                              can be moved to the pending queue.
96   * <li> <code>Profiles</code> - Jobs under profiles are not actual jobs. They can
97   *                              be edited normally but can not be submitted to
98   *                              the pending queue. New jobs can be created
99   *                              using a profile as it's template.
100  *
101  * @author Kristinn Sigurdsson
102  *
103  * @see org.archive.crawler.admin.CrawlJob
104  */
105 
106 public class CrawlJobHandler implements CrawlStatusListener {
107     private static final Logger   logger =
108         Logger.getLogger(CrawlJobHandler.class.getName());
109 
110     /**
111      * Name of system property whose specification overrides default profile
112      * used.
113      *
114      */
115     public static final String   DEFAULT_PROFILE_NAME
116         = "heritrix.default.profile";
117 
118     /**
119      * Default profile name.
120      */
121     public static final String   DEFAULT_PROFILE = "default";
122     
123     /**
124      * Name of the profiles directory.
125      */
126     public static final String   PROFILES_DIR_NAME = "profiles";
127     
128     public static final String   ORDER_FILE_NAME = "order.xml";
129 
130     /**
131      * Job currently being crawled.
132      */
133     private CrawlJob currentJob = null;
134     
135     /**
136      * A new job that is being created/configured. Not yet ready for crawling.
137      */
138     private CrawlJob newJob = null;
139 
140     /**
141      * Thread to start the next job in background
142      */    
143     private Thread   startingNextJob = null;
144 
145     /**
146      * A list of pending CrawlJobs.
147      */
148     private TreeSet  <CrawlJob> pendingCrawlJobs;
149 
150     /**
151      * A list of completed CrawlJobs.
152      */
153     //private Vector completedCrawlJobs = new Vector();
154     private TreeSet  <CrawlJob> completedCrawlJobs;
155 
156     /**
157      * A list of profile CrawlJobs.
158      */
159     private TreeSet  <CrawlJob> profileJobs;
160     
161     // The UIDs of profiles should be NOT be timestamps. A descriptive name is
162     // ideal.
163     private String   defaultProfile = null;
164 
165     /**
166      * If true the crawler is 'running'. That is the next pending job will start
167      * crawling as soon as the current job (if any) is completed.
168      */
169     private boolean running = false;
170     
171     /**
172      * String to indicate recovery should be based on the recovery log, not
173      * based on checkpointing.
174      */
175     public static final String   RECOVER_LOG = "recover";
176     
177     /**
178      * Jobs directory.
179      */
180     private final File   jobsDir;
181     
182     /**
183      * Constructor.
184      * @param jobsDir Jobs directory.
185      */
186     public CrawlJobHandler(final File   jobsDir) {
187         this(jobsDir, true, true);
188     }
189 
190     /**
191      * Constructor allowing for optional loading of profiles and jobs.
192      * @param jobsDir Jobs directory.
193      * @param loadJobs If true then any applicable jobs will be loaded.
194      * @param loadProfiles If true then any applicable profiles will be loaded.
195      */
196     public CrawlJobHandler(final File   jobsDir,
197             final boolean loadJobs, final boolean loadProfiles) {
198         this.jobsDir = jobsDir;
199         // Make a comparator for CrawlJobs.
200         Comparator  <CrawlJob> comp = new Comparator  <CrawlJob>(){
201             public int compare(CrawlJob job1, CrawlJob job2) {
202                 if( job1.getJobPriority() < job2.getJobPriority() ){
203                     return -1;
204                 } else if( job1.getJobPriority() > job2.getJobPriority() ){
205                     return 1;
206                 } else {
207                     // Same priority, use UID (which should be a timestamp).
208                     // Lower UID (string compare) means earlier time.
209                     return job1.getUID().compareTo(job2.getUID());
210                 }
211             }
212         };
213         this.pendingCrawlJobs = new TreeSet  <CrawlJob>(comp);
214         this.completedCrawlJobs = new TreeSet  <CrawlJob>(comp);
215         // Profiles always have the same priority so it will be sorted by name
216         this.profileJobs = new TreeSet  <CrawlJob>(comp);
217         if (loadProfiles){
218             loadProfiles();
219         }
220         if (loadJobs){
221             loadJobs();
222         }
223     }
224     
225     /**
226      * Find the state.job file in the job directory.
227      * @param jobDir Directory to look in.
228      * @return Full path to 'state.job' file or null if none found.
229      */
230     protected File   getStateJobFile(final File   jobDir) {
231         // Need to find job file ('state.job').
232         File  [] jobFiles = jobDir.listFiles(new FilenameFilter  () {
233             public boolean accept(File   dir, String   name) {
234                 return name.toLowerCase().endsWith(".job") &&
235                     (new File  (dir, name)).canRead();
236             }
237             
238         });
239         return (jobFiles.length == 1)? jobFiles[0]: null;
240     }
241 
242     /**
243      * Loads any availible jobs in the jobs directory.
244      * <p>
245      * Availible jobs are any directory containing a file called
246      * <code>state.job</code>. The file must contain valid job information.
247      */
248     private void loadJobs() {
249         this.jobsDir.mkdirs();
250         File  [] jobs = this.jobsDir.listFiles();
251         for (int i = 0; i < jobs.length; i++) {
252             if (jobs[i].isDirectory()) {
253                 File   jobFile = getStateJobFile(jobs[i]);
254                 if (jobFile != null) {
255                     loadJob(jobFile);
256                 }
257             }
258         }
259     }
260 
261     /**
262      * Loads a job given a specific job file. The loaded job will be placed in
263      * the list of completed jobs or pending queue depending on its status.
264      * Running jobs will have their status set to 'finished abnormally' and put
265      * into the completed list.
266      * @param job The job file of the job to load.
267      */
268     protected void loadJob(final File   job) {
269         CrawlJob cjob = null;
270         try {
271             // Load the CrawlJob
272             cjob = new CrawlJob(job, new CrawlJobErrorHandler());
273         } catch (InvalidJobFileException e) {
274             logger.log(Level.INFO,
275                     "Invalid job file for " + job.getAbsolutePath(), e);
276             return;
277         } catch (IOException   e) {
278             logger.log(Level.INFO, "IOException for " + job.getName() +
279                     ", " + job.getAbsolutePath(), e);
280             return;
281         }
282         
283         // TODO: Move test into CrawlJob.
284         // Check job status and place it accordingly.
285         if (cjob.getStatus().equals(CrawlJob.STATUS_RUNNING)
286                 || cjob.getStatus().equals(CrawlJob.STATUS_PAUSED)
287                 || cjob.getStatus().equals(CrawlJob.STATUS_CHECKPOINTING)
288                 || cjob.getStatus().equals(CrawlJob.STATUS_WAITING_FOR_PAUSE) ){
289             // Was a running job.
290             cjob.setStatus(CrawlJob.STATUS_FINISHED_ABNORMAL);
291             this.completedCrawlJobs.add(cjob);
292         } else if( cjob.getStatus().equals(CrawlJob.STATUS_PENDING) ) {
293             // Was a pending job.
294             this.pendingCrawlJobs.add(cjob);
295         } else if( cjob.getStatus().equals(CrawlJob.STATUS_CREATED)
296                 || cjob.getStatus().equals(CrawlJob.STATUS_DELETED) ) {
297             // Ignore for now. TODO: Add to 'recycle bin'
298         } else {
299             // Must have been completed.
300             this.completedCrawlJobs.add(cjob);
301         }
302     }
303 
304     /**
305      * Looks in conf dir for a profiles dir.
306      * @return the directory where profiles are stored else null if none
307      * available
308      * @throws IOException
309      */
310     private File   getProfilesDirectory() throws IOException   {
311         URL   webappProfilePath = Heritrix.class.getResource("/" + 
312             PROFILES_DIR_NAME);
313         if (webappProfilePath != null) {
314             try {
315                 return new File  (new URI  (webappProfilePath.toString()));
316             } catch (java.lang.IllegalArgumentException   e) {
317                 // e.g. "profiles" within a jar file
318                 // try Heritrix.getConfdir() in this case
319             } catch (java.net.URISyntaxException   e) {
320                 e.printStackTrace();
321             }
322         }
323         return (Heritrix.getConfdir(false) == null)? null:
324             new File  (Heritrix.getConfdir().getAbsolutePath(), 
325                 PROFILES_DIR_NAME);        
326     }
327 
328     /**
329      * Loads the default profile and all other profiles found on disk.
330      */
331     private void loadProfiles() {
332         boolean loadedDefault = false;
333         File   profileDir = null;
334         try {
335             profileDir = getProfilesDirectory();
336         } catch (IOException   e) {
337             e.printStackTrace();
338         }
339         if (profileDir != null) {
340             File  [] ps = profileDir.listFiles();
341             if (ps != null && ps.length > 0) {
342                 for (int i = 0; i < ps.length; i++) {
343                     File   f = ps[i];
344                     if (f.isDirectory()) {
345                         // Each directory in the profiles directory should
346                         // contain the file order.xml.
347                         File   profile = new File  (f, ORDER_FILE_NAME);
348                         if (profile.canRead()) {
349                             boolean b = loadProfile(profile);
350                             if (b) {
351                                 loadedDefault = b;
352                             }
353                         }
354                     }
355                 }
356             }
357         }
358         // Now add in the default profile.  Its on the CLASSPATH and needs
359         // special handling.  Don't add if already a default present.
360         String   parent = File.separator + PROFILES_DIR_NAME + File.separator;
361         if (!loadedDefault) {
362             loadProfile(new File  (parent + DEFAULT_PROFILE, ORDER_FILE_NAME));
363         }
364         // Look to see if a default profile system property has been
365         // supplied. If so, use it instead.
366         // TODO: Try and read default profile from some permanent storage.
367         defaultProfile = DEFAULT_PROFILE;
368     }
369     
370     /**
371      * Load one profile.
372      * @param profile Profile to load.
373      * @return True if loaded profile was the default profile.
374      */
375     protected boolean loadProfile(File   profile) {
376         boolean loadedDefault = false;
377         // Ok, got the order file for this profile.
378         try {
379             // The directory name denotes the profiles UID and name.
380             XMLSettingsHandler newSettingsHandler =
381                 new XMLSettingsHandler(profile);
382             CrawlJobErrorHandler cjseh =
383                 new CrawlJobErrorHandler(Level.SEVERE);
384             newSettingsHandler.
385                 setErrorReportingLevel(cjseh.getLevel());
386             newSettingsHandler.initialize();
387             addProfile(new CrawlJob(profile.getParentFile().getName(),
388                 newSettingsHandler, cjseh));
389             loadedDefault = profile.getParentFile().getName().
390                 equals(DEFAULT_PROFILE);
391         } catch (InvalidAttributeValueException   e) {
392             System.err.println("Failed to load profile '" +
393                     profile.getParentFile().getName() +
394                     "'. InvalidAttributeValueException.");
395         }
396         return loadedDefault;
397     }
398 
399     /**
400      * Add a new profile
401      * @param profile The new profile
402      */
403     public synchronized void addProfile(CrawlJob profile){
404         profileJobs.add(profile);
405     }
406     
407     public synchronized void deleteProfile(CrawlJob cj) throws IOException   {
408         File   d = getProfilesDirectory();
409         File   p = new File  (d, cj.getJobName());
410         if (!p.exists()) {
411             throw new IOException  ("No profile named " + cj.getJobName() +
412                 " at " + d.getAbsolutePath());
413         }
414         FileUtils.deleteDir(p);
415         this.profileJobs.remove(cj);
416     }
417 
418     /**
419      * Returns a List of all known profiles.
420      * @return a List of all known profiles.
421      */
422     public synchronized List  <CrawlJob> getProfiles(){
423         ArrayList  <CrawlJob> tmp = new ArrayList  <CrawlJob>(profileJobs.size());
424         tmp.addAll(profileJobs);
425         return tmp;
426     }
427 
428     /**
429      * Submit a job to the handler. Job will be scheduled for crawling. At
430      * present it will not take the job's priority into consideration.
431      *
432      * @param job A new job for the handler
433      * @return CrawlJob that was added or null.
434      */
435     public CrawlJob addJob(CrawlJob job) {
436         if(job.isProfile()){
437             return null;     // Can't crawl profiles.
438         }
439         job.setStatus(CrawlJob.STATUS_PENDING);
440         if(job.isNew()){
441             // Are adding the new job to the pending queue.
442             this.newJob = null;
443             job.setNew(false);
444         }
445         this.pendingCrawlJobs.add(job);
446         if(isCrawling() == false && isRunning()) {
447             // Start crawling
448             startNextJob();
449         }
450         return job;
451     }
452 
453     /**
454      * Returns the default profile. If no default profile has been set it will
455      * return the first profile that was set/loaded and still exists. If no
456      * profiles exist it will return null
457      * @return the default profile.
458      */
459     public synchronized CrawlJob getDefaultProfile() {
460         if(defaultProfile != null){
461             for(Iterator   it = profileJobs.iterator(); it.hasNext();) {
462                 CrawlJob item = (CrawlJob)it.next();
463                 if(item.getJobName().equals(defaultProfile)){
464                     // Found it.
465                     return item;
466                 }
467             }
468         }
469         if(profileJobs.size() > 0){
470             return (CrawlJob)profileJobs.first();
471         }
472         return null;
473     }
474 
475     /**
476      * Set the default profile.
477      * @param profile The new default profile. The following must apply to it.
478      *                profile.isProfile() should return true and
479      *                this.getProfiles() should contain it.
480      */
481     public void setDefaultProfile(CrawlJob profile) {
482         defaultProfile = profile.getJobName();
483         // TODO: Make changes to default profile durable across restarts.
484     }
485 
486     /**
487      * A List of all pending jobs
488      *
489      * @return A List of all pending jobs.
490      * No promises are made about the order of the list
491      */
492     public List  <CrawlJob> getPendingJobs() {
493         ArrayList  <CrawlJob> tmp
494          = new ArrayList  <CrawlJob>(pendingCrawlJobs.size());
495         tmp.addAll(pendingCrawlJobs);
496         return tmp;
497     }
498 
499     /**
500      * @return The job currently being crawled.
501      */
502     public CrawlJob getCurrentJob() {
503         return currentJob;
504     }
505 
506     /**
507      * @return A List of all finished jobs.
508      */
509     public List  <CrawlJob> getCompletedJobs() {
510         ArrayList  <CrawlJob> tmp
511          = new ArrayList  <CrawlJob>(completedCrawlJobs.size());
512         tmp.addAll(completedCrawlJobs);
513         return tmp;
514     }
515 
516     /**
517      * Return a job with the given UID.
518      * Doesn't matter if it's pending, currently running, has finished running
519      * is new or a profile.
520      *
521      * @param jobUID The unique ID of the job.
522      * @return The job with the UID or null if no such job is found
523      */
524     public CrawlJob getJob(String   jobUID) {
525         if (jobUID == null){
526             return null; // UID can't be null
527         }
528         // First check currently running job
529         if (currentJob != null && currentJob.getUID().equals(jobUID)) {
530             return currentJob;
531         } else if (newJob != null && newJob.getUID().equals(jobUID)) {
532             // Then check the 'new job'
533             return newJob;
534         } else {
535             // Then check pending jobs.
536             Iterator   itPend = pendingCrawlJobs.iterator();
537             while (itPend.hasNext()) {
538                 CrawlJob cj = (CrawlJob) itPend.next();
539                 if (cj.getUID().equals(jobUID)) {
540                     return cj;
541                 }
542             }
543 
544             // Next check completed jobs.
545             Iterator   itComp = completedCrawlJobs.iterator();
546             while (itComp.hasNext()) {
547                 CrawlJob cj = (CrawlJob) itComp.next();
548                 if (cj.getUID().equals(jobUID)) {
549                     return cj;
550                 }
551             }
552 
553             // And finally check the profiles.
554             for (Iterator   i = getProfiles().iterator(); i.hasNext();) {
555                 CrawlJob cj = (CrawlJob) i.next();
556                 if (cj.getUID().equals(jobUID)) {
557                     return cj;
558                 }
559             }
560         }
561         return null; // Nothing found, return null
562     }
563     
564     /**
565      * @return True if we terminated a current job (False if no job to
566      * terminate)
567      */
568     public boolean terminateCurrentJob() {
569         if (this.currentJob == null) {
570             return false;
571         }
572         // requestCrawlStop will cause crawlEnding to be invoked.
573         // It will handle the clean up.
574         this.currentJob.stopCrawling();
575         synchronized (this) {
576             try {
577                 // Take a few moments so that the controller can change
578                 // states before the UI updates. The CrawlEnding event
579                 // will wake us if it occurs sooner than this.
580                 wait(3000);
581             } catch (InterruptedException   e) {
582                 // Ignore.
583             }
584         }
585         return true;
586     }
587 
588     /**
589      * The specified job will be removed from the pending queue or aborted if
590      * currently running.  It will be placed in the list of completed jobs with
591      * appropriate status info. If the job is already in the completed list or
592      * no job with the given UID is found, no action will be taken.
593      *
594      * @param jobUID The UID (unique ID) of the job that is to be deleted.
595      *
596      */
597     public void deleteJob(String   jobUID) {
598         // First check to see if we are deleting the current job.
599         if (currentJob != null && jobUID.equals(currentJob.getUID())) {
600             terminateCurrentJob();
601             return; // We're not going to find another job with the same UID
602         }
603         
604         // Ok, it isn't the current job, let's check the pending jobs.
605         for(Iterator   it = pendingCrawlJobs.iterator(); it.hasNext();) {
606             CrawlJob cj = (CrawlJob) it.next();
607             if (cj.getUID().equals(jobUID)) {
608                 // Found the one to delete.
609                 cj.setStatus(CrawlJob.STATUS_DELETED);
610                 it.remove();
611                 return; // We're not going to find another job with the same UID
612             }
613         }
614         
615         // And finally the completed jobs.
616         for (Iterator   it = completedCrawlJobs.iterator(); it.hasNext();) {
617             CrawlJob cj = (CrawlJob) it.next();
618             if (cj.getUID().equals(jobUID)) {
619                 // Found the one to delete.
620                 cj.setStatus(CrawlJob.STATUS_DELETED);
621                 it.remove();
622                 return; // No other job will have the same UID
623             }
624         }
625     }
626 
627     /**
628      * Cause the current job to pause. If no current job is crawling this
629      * method will have no effect. 
630      */
631     public void pauseJob() {
632         if (this.currentJob != null) {
633             this.currentJob.pause();
634         }
635     }
636 
637     /**
638      * Cause the current job to resume crawling if it was paused. Will have no
639      * effect if the current job was not paused or if there is no current job.
640      * If the current job is still waiting to pause, this will not take effect
641      * until the job has actually paused. At which time it will immeditatly
642      * resume crawling.
643      */
644     public void resumeJob() {
645         if (this.currentJob != null) {
646             this.currentJob.resume();
647         }
648     }
649 
650     /**
651      * Cause the current job to write a checkpoint to disk. Currently
652      * requires job to already be paused.
653      * @throws IllegalStateException Thrown if crawl is not paused.
654      */
655     public void checkpointJob() throws IllegalStateException   {
656         if (this.currentJob != null) {
657             this.currentJob.checkpoint();
658         }
659     }
660 
661     /**
662      * Returns a unique job ID.
663      * <p>
664      * No two calls to this method (on the same instance of this class) can ever
665      * return the same value. <br>
666      * Currently implemented to return a time stamp. That is subject to change
667      * though.
668      *
669      * @return A unique job ID.
670      *
671      * @see ArchiveUtils#TIMESTAMP17
672      */
673     public String   getNextJobUID() {
674         return ArchiveUtils.TIMESTAMP17.format(new Date  ());
675     }
676 
677     /**
678      * Creates a new job. The new job will be returned and also registered as
679      * the handler's 'new job'. The new job will be based on the settings
680      * provided but created in a new location on disk.
681      *
682      * @param baseOn
683      *            A CrawlJob (with a valid settingshandler) to use as the
684      *            template for the new job.
685      * @param recovery Whether to preinitialize new job as recovery of
686      * <code>baseOn</code> job.  String holds RECOVER_LOG if we are to
687      * do the recovery based off the recover.gz log -- See RecoveryJournal in
688      * the frontier package -- or it holds the name of
689      * the checkpoint we're to use recoverying.
690      * @param name
691      *            The name of the new job.
692      * @param description
693      *            Descriptions of the job.
694      * @param seeds
695      *            The contents of the new settings' seed file.
696      * @param priority
697      *            The priority of the new job.
698      *
699      * @return The new crawl job.
700      * @throws FatalConfigurationException If a problem occurs creating the
701      *             settings.
702      */
703     public CrawlJob newJob(CrawlJob baseOn, String   recovery, String   name,
704             String   description, String   seeds, int priority)
705     throws FatalConfigurationException {
706         // See what the recover story is.
707         File   recover = null;
708         try {
709             if (recovery != null && recovery.length() > 0
710                     && recovery.equals(RECOVER_LOG)) {
711                 // Then we're to do a recovery based off the RecoveryJournal
712                 // recover.gz log.
713                 File   dir = baseOn.getSettingsHandler().getOrder()
714                     .getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
715                 // Add name of recover file.  We're hardcoding it as
716                 // 'recover.gz'.
717                 recover = new File  (dir, FrontierJournal.LOGNAME_RECOVER +
718                     RecoveryJournal.GZIP_SUFFIX);
719             } else if (recovery != null && recovery.length() > 0) {
720                 // Must be name of a checkpoint to use.
721                 recover = new File  (baseOn.getSettingsHandler().
722                     getOrder().getSettingsDir(CrawlOrder.ATTR_CHECKPOINTS_PATH),
723                         recovery);
724             }
725         } catch (AttributeNotFoundException   e1) {
726             throw new FatalConfigurationException(
727                 "AttributeNotFoundException occured while setting up" +
728                     "new job/profile " + name + " \n" + e1.getMessage());
729         }
730 
731         CrawlJob cj = createNewJob(baseOn.getSettingsHandler().getOrderFile(),
732             name, description, seeds, priority);
733     
734         updateRecoveryPaths(recover, cj.getSettingsHandler(), name);
735         
736         return cj;
737     }
738     
739     /**
740      * Creates a new job. The new job will be returned and also registered as
741      * the handler's 'new job'. The new job will be based on the settings
742      * provided but created in a new location on disk.
743      * @param orderFile Order file to use as the template for the new job.
744      * @param name The name of the new job.
745      * @param description Descriptions of the job.
746      * @param seeds The contents of the new settings' seed file.
747      *
748      * @return The new crawl job.
749      * @throws FatalConfigurationException If a problem occurs creating the
750      *             settings.
751      */
752     public CrawlJob newJob(final File   orderFile, final String   name,
753         final String   description, final String   seeds)
754     throws FatalConfigurationException {
755         return createNewJob(orderFile, name, description, seeds,
756             CrawlJob.PRIORITY_AVERAGE);
757     }
758     
759     protected void checkDirectory(File   dir)
760     throws FatalConfigurationException {
761         if (dir == null) {
762             return;
763         }
764         if (!dir.exists() && !dir.canRead()) {
765             throw new FatalConfigurationException(dir.getAbsolutePath() +
766                 " does not exist or is unreadable");
767         }
768     }
769     
770     protected CrawlJob createNewJob(final File   orderFile, final String   name,
771             final String   description, final String   seeds, final int priority)
772     throws FatalConfigurationException {
773         if (newJob != null) {
774             //There already is a new job. Discard it.
775             discardNewJob();
776         }
777         String   UID = getNextJobUID();
778         File   jobDir;
779         jobDir = new File  (this.jobsDir, name + "-" + UID);
780         CrawlJobErrorHandler errorHandler = new CrawlJobErrorHandler();
781         XMLSettingsHandler handler =
782             createSettingsHandler(orderFile, name, description,
783                 seeds, jobDir, errorHandler, "order.xml", "seeds.txt");
784         this.newJob = new CrawlJob(UID, name, handler, errorHandler, priority,
785                 jobDir);
786         return this.newJob;
787     }
788 
789     /**
790      * Creates a new profile. The new profile will be returned and also
791      * registered as the handler's 'new job'. The new profile will be based on
792      * the settings provided but created in a new location on disk.
793      *
794      * @param baseOn
795      *            A CrawlJob (with a valid settingshandler) to use as the
796      *            template for the new profile.
797      * @param name
798      *            The name of the new profile.
799      * @param description
800      *            Description of the new profile
801      * @param seeds
802      *            The contents of the new profiles' seed file
803      * @return The new profile.
804      * @throws FatalConfigurationException
805      * @throws IOException
806      */
807     public CrawlJob newProfile(CrawlJob baseOn, String   name, String   description,
808             String   seeds)
809     throws FatalConfigurationException, IOException   {
810         File   profileDir = new File  (getProfilesDirectory().getAbsoluteFile()
811             + File.separator + name);
812         CrawlJobErrorHandler cjseh = new CrawlJobErrorHandler(Level.SEVERE);
813         CrawlJob newProfile = new CrawlJob(name,
814             createSettingsHandler(baseOn.getSettingsHandler().getOrderFile(),
815                 name, description, seeds, profileDir, cjseh, "order.xml",
816                 "seeds.txt"), cjseh);
817         addProfile(newProfile);
818         return newProfile;
819     }
820     
821     /**
822      * Creates a new settings handler based on an existing job. Basically all
823      * the settings file for the 'based on' will be copied to the specified
824      * directory.
825      *
826      * @param orderFile Order file to base new order file on.  Cannot be null.
827      * @param name Name for the new settings
828      * @param description Description of the new settings.
829      * @param seeds The contents of the new settings' seed file.
830      * @param newSettingsDir
831      * @param errorHandler
832      * @param filename Name of new order file.
833      * @param seedfile Name of new seeds file.
834      *
835      * @return The new settings handler.
836      * @throws FatalConfigurationException
837      *             If there are problems with reading the 'base on'
838      *             configuration, with writing the new configuration or it's
839      *             seed file.
840      */
841     protected XMLSettingsHandler createSettingsHandler(
842         final File   orderFile, final String   name, final String   description,
843         final String   seeds, final File   newSettingsDir,
844         final CrawlJobErrorHandler errorHandler,
845         final String   filename, final String   seedfile)
846     throws FatalConfigurationException {
847         XMLSettingsHandler newHandler = null;
848         try {
849             newHandler = new XMLSettingsHandler(orderFile);
850             if(errorHandler != null){
851                 newHandler.registerValueErrorHandler(errorHandler);
852             }
853             newHandler.setErrorReportingLevel(errorHandler.getLevel());
854             newHandler.initialize();
855         } catch (InvalidAttributeValueException   e2) {
856             throw new FatalConfigurationException(
857                 "InvalidAttributeValueException occured while creating" +
858                 " new settings handler for new job/profile\n" +
859                 e2.getMessage());
860         }
861 
862         // Make sure the directory exists.
863         newSettingsDir.mkdirs();
864 
865         try {
866             // Set the seed file
867             ((ComplexType)newHandler.getOrder().getAttribute("scope"))
868                 .setAttribute(new Attribute  ("seedsfile", seedfile));
869         } catch (AttributeNotFoundException   e1) {
870             throw new FatalConfigurationException(
871                     "AttributeNotFoundException occured while setting up" +
872                     "new job/profile\n" + e1.getMessage());
873         } catch (InvalidAttributeValueException   e1) {
874             throw new FatalConfigurationException(
875                     "InvalidAttributeValueException occured while setting" +
876                     "up new job/profile\n"  + e1.getMessage());
877         } catch (MBeanException   e1) {
878             throw new FatalConfigurationException(
879                     "MBeanException occured while setting up new" +
880                     " job/profile\n" + e1.getMessage());
881         } catch (ReflectionException   e1) {
882             throw new FatalConfigurationException(
883                     "ReflectionException occured while setting up" +
884                     " new job/profile\n" + e1.getMessage());
885         }
886 
887         File   newFile = new File  (newSettingsDir.getAbsolutePath(), filename);
888         
889         try {
890             newHandler.copySettings(newFile, (String  )newHandler.getOrder()
891                 .getAttribute(CrawlOrder.ATTR_SETTINGS_DIRECTORY));
892         } catch (IOException   e3) {
893             // Print stack trace to help debug issue where cannot create
894             // new job from an old that has overrides.
895             e3.printStackTrace();
896             throw new FatalConfigurationException(
897                     "IOException occured while writing new settings files" +
898                     " for new job/profile\n" + e3.getMessage());
899         } catch (AttributeNotFoundException   e) {
900             throw new FatalConfigurationException(
901                     "AttributeNotFoundException occured while writing new" +
902                     " settings files for new job/profile\n" + e.getMessage());
903         } catch (MBeanException   e) {
904             throw new FatalConfigurationException(
905                     "MBeanException occured while writing new settings files" +
906                     " for new job/profile\n" + e.getMessage());
907         } catch (ReflectionException   e) {
908             throw new FatalConfigurationException(
909                     "ReflectionException occured while writing new settings" +
910                     " files for new job/profile\n" + e.getMessage());
911         }
912         CrawlerSettings orderfile = newHandler.getSettingsObject(null);
913 
914         orderfile.setName(name);
915         orderfile.setDescription(description);
916 
917         if (seeds != null) {
918             BufferedWriter   writer = null;
919             try {
920                 writer = new BufferedWriter  (new FileWriter  (newHandler
921                     .getPathRelativeToWorkingDirectory(seedfile)));
922                 try {
923                     writer.write(seeds);
924                 } finally {
925                     writer.close();
926                 }
927             } catch (IOException   e) {
928                 throw new FatalConfigurationException(
929                     "IOException occured while writing seed file for new"
930                         + " job/profile\n" + e.getMessage());
931             }
932         }
933         return newHandler;
934     }
935     
936     /**
937      * @param recover
938      *            Source to use recovering. Can be full path to a recovery log
939      *            or full path to a checkpoint src dir.
940      * @param sh
941      *            Settings Handler to update.
942      * @param jobName
943      *            Name of this job.
944      * @throws FatalConfigurationException 
945      */
946     protected void updateRecoveryPaths(final File   recover,
947             final SettingsHandler sh, final String   jobName)
948     throws FatalConfigurationException {
949         if (recover == null) {
950             return;
951         }
952         checkDirectory(recover);
953         try {
954             // Set 'recover-path' to be old job's recovery log path
955             updateRecoveryPaths(recover, sh);
956         } catch (AttributeNotFoundException   e1) {
957             throw new FatalConfigurationException(
958                     "AttributeNotFoundException occured while setting up"
959                             + "new job/profile " + jobName + " \n"
960                             + e1.getMessage());
961         } catch (InvalidAttributeValueException   e1) {
962             throw new FatalConfigurationException(
963                     "InvalidAttributeValueException occured while setting"
964                             + "new job/profile " + jobName + " \n"
965                             + e1.getMessage());
966         } catch (MBeanException   e1) {
967             throw new FatalConfigurationException(
968                     "MBeanException occured while setting up new"
969                             + "new job/profile " + jobName + " \n"
970                             + e1.getMessage());
971         } catch (ReflectionException   e1) {
972             throw new FatalConfigurationException(
973                     "ReflectionException occured while setting up"
974                             + "new job/profile " + jobName + " \n"
975                             + e1.getMessage());
976         } catch (IOException   e) {
977             throw new FatalConfigurationException(
978                     "IOException occured while setting up" + "new job/profile "
979                             + jobName + " \n" + e.getMessage());
980         }
981     }
982 
983     /**
984      * @param recover
985      *            Source to use recovering. Can be full path to a recovery log
986      *            or full path to a checkpoint src dir.
987      * @param newHandler
988      * @throws ReflectionException
989      * @throws MBeanException
990      * @throws InvalidAttributeValueException
991      * @throws AttributeNotFoundException
992      * @throws IOException
993      */
994     private void updateRecoveryPaths(final File   recover,
995         SettingsHandler newHandler)
996     throws AttributeNotFoundException  , InvalidAttributeValueException  ,
997     MBeanException  , ReflectionException  , IOException   {
998         if (recover == null || !recover.exists()) {
999             throw new IOException  ("Recovery src does not exist: " + recover);
1000        }
1001        newHandler.getOrder().setAttribute(
1002            new Attribute  (CrawlOrder.ATTR_RECOVER_PATH,
1003                recover.getAbsolutePath()));
1004            
1005        // Now, ensure that 'logs' and 'state' don't overlap with
1006        // previous job's files (ok for 'arcs' and 'scratch' to overlap)
1007        File   newLogsDisk = null;
1008        final String   RECOVERY_SUFFIX = "-R";
1009        while(true) {
1010            try {
1011                newLogsDisk = newHandler.getOrder().
1012                    getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
1013            } catch (AttributeNotFoundException   e) {
1014                logger.log(Level.SEVERE, "Failed to get logs directory", e);
1015            }
1016            if (newLogsDisk.list().length > 0) {
1017                // 'new' directory is nonempty; rename with trailing '-R'
1018                String   logsPath =  (String  ) newHandler.getOrder().
1019                    getAttribute(CrawlOrder.ATTR_LOGS_PATH);
1020                if(logsPath.endsWith("/")) {
1021                    logsPath = logsPath.substring(0,logsPath.length()-1);
1022                }
1023                newHandler.getOrder().setAttribute(
1024                    new Attribute  (CrawlOrder.ATTR_LOGS_PATH,
1025                        logsPath + RECOVERY_SUFFIX));
1026            } else {
1027                // directory is suitably empty; exit loop
1028                break;
1029            }
1030        }
1031        File   newStateDisk = null;
1032        while (true) {
1033            try {
1034                newStateDisk = newHandler.getOrder().getSettingsDir(
1035                        CrawlOrder.ATTR_STATE_PATH);
1036            } catch (AttributeNotFoundException   e) {
1037                logger.log(Level.SEVERE, "Failed to get state directory", e);
1038            }
1039            if (newStateDisk.list().length>0) {
1040                // 'new' directory is nonempty; rename with trailing '-R'
1041                String   statePath =  (String  ) newHandler.getOrder().
1042                    getAttribute(CrawlOrder.ATTR_STATE_PATH);
1043                if(statePath.endsWith("/")) {
1044                    statePath = statePath.substring(0,statePath.length()-1);
1045                }
1046                newHandler.getOrder().setAttribute(
1047                    new Attribute  (CrawlOrder.ATTR_STATE_PATH,
1048                        statePath + RECOVERY_SUFFIX));
1049            } else {
1050                // directory is suitably empty; exit loop
1051                break;
1052            }
1053        }
1054    }
1055
1056    /**
1057     * Discard the handler's 'new job'. This will remove any files/directories
1058     * written to disk.
1059     */
1060    public void discardNewJob(){
1061        FileUtils.deleteDir(new File  (newJob.getSettingsDirectory()));
1062    }
1063
1064    /**
1065     * Get the handler's 'new job'
1066     * @return the handler's 'new job'
1067     */
1068    public CrawlJob getNewJob(){
1069        return newJob;
1070    }
1071
1072    /**
1073     * Is the crawler accepting crawl jobs to run?
1074     * @return True if the next availible CrawlJob will be crawled. False otherwise.
1075     */
1076    public boolean isRunning() {
1077        return running;
1078    }
1079
1080    /**
1081     * Is a crawl job being crawled?
1082     * @return True if a job is actually being crawled (even if it is paused).
1083     *         False if no job is being crawled.
1084     */
1085    public boolean isCrawling() {
1086        return this.currentJob != null;
1087    }
1088
1089    /**
1090     * Allow jobs to be crawled.
1091     */
1092    public void startCrawler() {
1093        running = true;
1094        if (pendingCrawlJobs.size() > 0 && isCrawling() == false) {
1095            // Ok, can just start the next job
1096            startNextJob();
1097        }
1098    }
1099
1100    /**
1101     * Stop future jobs from being crawled.
1102     *
1103     * This action will not affect the current job.
1104     */
1105    public void stopCrawler() {
1106        running = false;
1107    }
1108
1109    /**
1110     * Start next crawl job.
1111     *
1112     * If a is job already running this method will do nothing.
1113     */
1114    protected final void startNextJob() {
1115        synchronized (this) {
1116            if(startingNextJob != null) {
1117                try {
1118                    startingNextJob.join();
1119                } catch (InterruptedException   e) {
1120                    e.printStackTrace();
1121                    return;
1122                }
1123            }
1124            startingNextJob = new Thread  (new Runnable  () {
1125                public void run() {
1126                    startNextJobInternal();
1127                }
1128            }, "StartNextJob");
1129            startingNextJob.start();
1130        }
1131    }
1132    
1133    protected void startNextJobInternal() {
1134        if (pendingCrawlJobs.size() == 0 || isCrawling()) {
1135            // No job ready or already crawling.
1136            return;
1137        }
1138        this.currentJob = (CrawlJob)pendingCrawlJobs.first();
1139        assert pendingCrawlJobs.contains(currentJob) :
1140            "pendingCrawlJobs is in an illegal state";
1141        pendingCrawlJobs.remove(currentJob);
1142        try {
1143            this.currentJob.setupForCrawlStart();
1144            // This is ugly but needed so I can clear the currentJob
1145            // reference in the crawlEnding and update the list of completed
1146            // jobs.  Also, crawlEnded can startup next job.
1147            this.currentJob.getController().addCrawlStatusListener(this);
1148            // now, actually start
1149            this.currentJob.getController().requestCrawlStart();
1150        } catch (InitializationException e) {
1151            loadJob(getStateJobFile(this.currentJob.getDirectory()));
1152            this.currentJob = null;
1153            startNextJobInternal(); // Load the next job if there is one.
1154        }
1155    }
1156
1157    /**
1158     * Forward a 'kick' update to current job if any.
1159     */
1160    public void kickUpdate() {
1161        if(this.currentJob != null) {
1162            this.currentJob.kickUpdate();
1163        }
1164    }
1165
1166    /**
1167     * Loads options from a file. Typically these are a list of available
1168     * modules that can be plugged into some part of the configuration.
1169     * For examples Processors, Frontiers, Filters etc. Leading and trailing
1170     * spaces are trimmed from each line.
1171     * 
1172     * <p>Options are loaded from the CLASSPATH.
1173     * @param file the name of the option file (without path!)
1174     * @return The option file with each option line as a seperate entry in the
1175     *         ArrayList.
1176     * @throws IOException when there is trouble reading the file.
1177     */
1178    public static ArrayList  <String  > loadOptions(String   file)
1179    throws IOException   {
1180        ArrayList  <String  > ret = new ArrayList  <String  >();
1181        Enumeration   resources = 
1182            CrawlJob.class.getClassLoader().getResources("modules/" + file);
1183
1184        boolean noFileFound = true;
1185        while (resources.hasMoreElements()) {
1186            InputStream   is = ((URL  ) resources.nextElement()).openStream();
1187            noFileFound = false;
1188
1189            String   line = null;
1190            BufferedReader   bf =
1191                new BufferedReader  (new InputStreamReader  (is), 8192);
1192            try {
1193                while ((line = bf.readLine()) != null) {
1194                    line = line.trim();
1195                    if(line.indexOf('#')<0 && line.length()>0){
1196                        // Looks like a valid line.
1197                        ret.add(line);
1198                    }
1199                }
1200            } finally {
1201                bf.close();
1202            }
1203        }
1204        
1205        if (noFileFound) {
1206            throw new IOException  ("Failed to get " + file + " from the " +
1207                " CLASSPATH");
1208        }
1209
1210        return ret;
1211    }
1212
1213    /**
1214     * Returns a URIFrontierMarker for the current, paused, job. If there is no
1215     * current job or it is not paused null will be returned.
1216     *
1217     * @param regexpr
1218     *            A regular expression that each URI must match in order to be
1219     *            considered 'within' the marker.
1220     * @param inCacheOnly
1221     *            Limit marker scope to 'cached' URIs.
1222     * @return a URIFrontierMarker for the current job.
1223     * @see #getPendingURIsList(FrontierMarker, int, boolean)
1224     * @see org.archive.crawler.framework.Frontier#getInitialMarker(String,
1225     *      boolean)
1226     * @see org.archive.crawler.framework.FrontierMarker
1227     */
1228    public FrontierMarker getInitialMarker(String   regexpr,
1229            boolean inCacheOnly) {
1230        return (this.currentJob != null)?
1231                this.currentJob.getInitialMarker(regexpr, inCacheOnly): null;
1232    }
1233
1234    /**
1235     * Returns the frontiers URI list based on the provided marker. This method
1236     * will return null if there is not current job or if the current job is
1237     * not paused. Only when there is a paused current job will this method
1238     * return a URI list.
1239     *
1240     * @param marker
1241     *            URIFrontier marker
1242     * @param numberOfMatches
1243     *            maximum number of matches to return
1244     * @param verbose
1245     *            should detailed info be provided on each URI?
1246     * @return the frontiers URI list based on the provided marker
1247     * @throws InvalidFrontierMarkerException
1248     *             When marker is inconsistent with the current state of the
1249     *             frontier.
1250     * @see #getInitialMarker(String, boolean)
1251     * @see org.archive.crawler.framework.FrontierMarker
1252     */
1253    public ArrayList   getPendingURIsList(FrontierMarker marker,
1254            int numberOfMatches, boolean verbose)
1255    throws InvalidFrontierMarkerException {
1256        return (this.currentJob != null)?
1257           this.currentJob.getPendingURIsList(marker, numberOfMatches, verbose):
1258           null;
1259    }
1260
1261    /**
1262     * Delete any URI from the frontier of the current (paused) job that match
1263     * the specified regular expression. If the current job is not paused (or
1264     * there is no current job) nothing will be done.
1265     * @param regexpr Regular expression to delete URIs by.
1266     * @return the number of URIs deleted
1267     */
1268    public long deleteURIsFromPending(String   regexpr) {
1269        return (this.currentJob != null)?
1270                this.currentJob.deleteURIsFromPending(regexpr): 0;
1271    }
1272    
1273    public String   importUris(String   file, String   style, String   force) {
1274        return importUris(file, style, "true".equals(force));
1275    }
1276
1277    /**
1278     * @param fileOrUrl Name of file w/ seeds.
1279     * @param style What style of seeds -- crawl log (<code>crawlLog</code>
1280     * style) or recovery journal (<code>recoveryJournal</code> style), or
1281     * seeds file style (Pass <code>default</code> style).
1282     * @param forceRevisit Should we revisit even if seen before?
1283     * @return A display string that has a count of all added.
1284     */
1285    public String   importUris(final String   fileOrUrl, final String   style,
1286            final boolean forceRevisit) {
1287        return (this.currentJob != null)?
1288            this.currentJob.importUris(fileOrUrl, style, forceRevisit): null;
1289    }
1290    
1291    protected int importUris(InputStream   is, String   style,
1292            boolean forceRevisit) {
1293        return (this.currentJob != null)?
1294                this.currentJob.importUris(is, style, forceRevisit): 0;
1295    }
1296    
1297    /**
1298     * Schedule a uri.
1299     * @param uri Uri to schedule.
1300     * @param forceFetch Should it be forcefetched.
1301     * @param isSeed True if seed.
1302     * @throws URIException
1303     */
1304    public void importUri(final String   uri, final boolean forceFetch,
1305            final boolean isSeed)
1306    throws URIException {
1307        importUri(uri, forceFetch, isSeed, true);
1308    }
1309    
1310    /**
1311     * Schedule a uri.
1312     * @param str String that can be: 1. a UURI, 2. a snippet of the
1313     * crawl.log line, or 3. a snippet from recover log.  See
1314     * {@link #importUris(InputStream, String, boolean)} for how it subparses
1315     * the lines from crawl.log and recover.log.
1316     * @param forceFetch Should it be forcefetched.
1317     * @param isSeed True if seed.
1318     * @param isFlush If true, flush the frontier IF it implements
1319     * flushing.
1320     * @throws URIException
1321     */
1322    public void importUri(final String   str, final boolean forceFetch,
1323            final boolean isSeed, final boolean isFlush)
1324    throws URIException {
1325        if (this.currentJob != null) {
1326            this.currentJob.importUri(str, forceFetch, isSeed, isFlush);
1327        }
1328    }
1329    
1330    /**
1331     * If its a HostQueuesFrontier, needs to be flushed for the queued.
1332     */
1333    protected void doFlush() {
1334        if (this.currentJob != null) {
1335            this.currentJob.flush();
1336        }
1337    }
1338    
1339    public void stop() {
1340        if (isCrawling()) {
1341            deleteJob(getCurrentJob().getUID());
1342        }
1343    }
1344    
1345    public void requestCrawlStop() {
1346        if (this.currentJob != null) {
1347            this.currentJob.stopCrawling();
1348        }
1349    }
1350    
1351    /**
1352     * Ensure order file with new name/desc is written.
1353     * See '[ 1066573 ] sometimes job based-on other job uses older job name'.
1354     * @param newJob Newly created job.
1355     * @param metaname Metaname for new job.
1356     * @param description Description for new job.
1357     * @return <code>newJob</code>
1358     */
1359    public static CrawlJob ensureNewJobWritten(CrawlJob newJob, String   metaname,
1360            String   description) {
1361        XMLSettingsHandler settingsHandler = newJob.getSettingsHandler();
1362        CrawlerSettings orderfile = settingsHandler.getSettingsObject(null);
1363        orderfile.setName(metaname);
1364        orderfile.setDescription(description);
1365        settingsHandler.writeSettingsObject(orderfile);
1366        return newJob;
1367    }
1368
1369    public void crawlStarted(String   message) {
1370        // TODO Auto-generated method stub
1371        
1372    }
1373
1374    public void crawlEnding(String   sExitMessage) {
1375        loadJob(getStateJobFile(this.currentJob.getDirectory()));
1376        currentJob = null;
1377        synchronized (this) {
1378            // If the GUI terminated the job then it is waiting for this event.
1379            notifyAll();
1380        }
1381    }
1382
1383    public void crawlEnded(String   sExitMessage) {
1384        if (this.running) {
1385            startNextJob();
1386        }
1387    }
1388
1389    public void crawlPausing(String   statusMessage) {
1390        // TODO Auto-generated method stub
1391        
1392    }
1393
1394    public void crawlPaused(String   statusMessage) {
1395        // TODO Auto-generated method stub
1396        
1397    }
1398
1399    public void crawlResuming(String   statusMessage) {
1400        // TODO Auto-generated method stub
1401    }
1402
1403    public void crawlCheckpoint(File   checkpointDir) throws Exception   {
1404        // TODO Auto-generated method stub
1405    }
1406}
1407
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags