KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > selftest > SelfTestCrawlJobHandler


1 /* SelftestCrawlJobHandler
2  *
3  * Created on Feb 4, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.selftest;
24
25 import java.io.File JavaDoc;
26 import java.util.ArrayList JavaDoc;
27 import java.util.Date JavaDoc;
28 import java.util.List JavaDoc;
29 import java.util.logging.Logger JavaDoc;
30
31 import junit.framework.Test;
32 import junit.framework.TestResult;
33
34 import org.archive.crawler.Heritrix;
35 import org.archive.crawler.admin.CrawlJob;
36 import org.archive.crawler.admin.CrawlJobHandler;
37 import org.archive.crawler.datamodel.CrawlURI;
38 import org.archive.crawler.event.CrawlURIDispositionListener;
39
40
41 /**
42  * An override to gain access to end-of-crawljob message.
43  *
44  *
45  * @author stack
46  * @version $Id: SelfTestCrawlJobHandler.java,v 1.17.4.1 2007/01/13 01:31:26 stack-sf Exp $
47  */

48
49 public class SelfTestCrawlJobHandler extends CrawlJobHandler
50 implements CrawlURIDispositionListener {
51     /**
52      * Name of the selftest webapp.
53      */

54     private static final String JavaDoc SELFTEST_WEBAPP = "selftest";
55
56     private static Logger JavaDoc logger =
57         Logger.getLogger("org.archive.crawler.admin.SelftestCrawlJobHandler");
58
59     /**
60      * Name of selftest to run.
61      *
62      * If set, run this test only. Otherwise run them all.
63      */

64     private String JavaDoc selfTestName = null;
65     
66     private String JavaDoc selfTestUrl = null;
67
68
69     private SelfTestCrawlJobHandler() {
70         this(null, null, null);
71     }
72
73     public SelfTestCrawlJobHandler(final File JavaDoc jobsDir,
74             final String JavaDoc selfTestName, final String JavaDoc url) {
75         // No need to load jobs or profiles
76
super(jobsDir, false, false);
77         this.selfTestName = selfTestName;
78         this.selfTestUrl = url;
79     }
80     
81     @Override JavaDoc
82     public void crawlStarted(String JavaDoc message) {
83         super.crawlStarted(message);
84         this.getCurrentJob().getController().
85             addCrawlURIDispositionListener(this);
86     }
87
88     public void crawlEnded(String JavaDoc sExitMessage) {
89         TestResult result = null;
90         try {
91             super.crawlEnded(sExitMessage);
92
93             // At crawlEnded time, there is no current job. Get the selftest
94
// job by pulling from the completedCrawlJobs queue.
95
List JavaDoc completedCrawlJobs = getCompletedJobs();
96             if (completedCrawlJobs == null || completedCrawlJobs.size() <= 0) {
97                 logger.severe("Selftest job did not complete.");
98             } else {
99                 CrawlJob job = (CrawlJob)completedCrawlJobs.
100                     get(completedCrawlJobs.size()-1);
101                 Test test = null;
102                 if (this.selfTestName != null &&
103                         this.selfTestName.length() > 0) {
104                     // Run single selftest only.
105
// Get class for the passed single selftest.
106
// Assume test to run is in this package.
107
String JavaDoc thisClassName = this.getClass().getName();
108                     String JavaDoc pkg = thisClassName.
109                         substring(0, thisClassName.lastIndexOf('.'));
110                     // All selftests end in 'SelfTest'.
111
String JavaDoc selftestClass = pkg + '.' + this.selfTestName +
112                         "SelfTest";
113                     // Need to make a list. Make an array first.
114
List JavaDoc<Class JavaDoc<?>> classList = new ArrayList JavaDoc<Class JavaDoc<?>>();
115                     classList.add(Class.forName(selftestClass));
116                     test = AllSelfTestCases.suite(this.selfTestUrl,
117                         job, job.getDirectory(), Heritrix.getHttpServer().
118                         getWebappPath(SELFTEST_WEBAPP), classList);
119                 } else {
120                     // Run all tests.
121
test = AllSelfTestCases.suite(this.selfTestUrl,
122                         job, job.getDirectory(), Heritrix.getHttpServer().
123                         getWebappPath(SELFTEST_WEBAPP));
124                 }
125                 result = junit.textui.TestRunner.run(test);
126             }
127         } catch (Exception JavaDoc e) {
128             logger.info("Failed running selftest analysis: " + e.getMessage());
129         } finally {
130             // TODO: This technique where I'm calling shutdown directly means
131
// we bypass the running of other crawlended handlers. Means
132
// that such as the statistics tracker have no chance to run so
133
// reports are never generated. Fix -- but preserve 0 or 1 exit
134
// code.
135
logger.info((new Date JavaDoc()).toString() + " Selftest " +
136                 (result != null && result.wasSuccessful()? "PASSED": "FAILED"));
137             stop();
138             Heritrix.shutdown(((result != null) && result.wasSuccessful())?
139                 0: 1);
140         }
141     }
142
143     public void crawledURIDisregard(CrawlURI curi) {
144         // TODO Auto-generated method stub
145
}
146
147     public void crawledURIFailure(CrawlURI curi) {
148         // TODO Auto-generated method stub
149
}
150
151     public void crawledURINeedRetry(CrawlURI curi) {
152         // TODO Auto-generated method stub
153
}
154
155     public void crawledURISuccessful(CrawlURI curi) {
156         // If curi ends in 'Checkpoint/index.html', then run a Checkpoint.
157
if (curi.toString().endsWith("/Checkpoint/")) {
158             this.getCurrentJob().getController().requestCrawlCheckpoint();
159         }
160     }
161 }
Popular Tags