KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > selftest > CheckpointSelfTest


1 /* $Id: CheckpointSelfTest.java,v 1.1 2006/08/16 00:46:46 stack-sf Exp $
2  *
3  * Created Aug 15, 2006
4  *
5  * Copyright (C) 2006 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.selftest;
24
25 import java.io.File JavaDoc;
26 import java.io.FileNotFoundException JavaDoc;
27 import java.io.IOException JavaDoc;
28 import java.util.logging.Logger JavaDoc;
29
30 import javax.management.Attribute JavaDoc;
31 import javax.management.AttributeNotFoundException JavaDoc;
32 import javax.management.InvalidAttributeValueException JavaDoc;
33 import javax.management.MBeanException JavaDoc;
34 import javax.management.ReflectionException JavaDoc;
35
36 import org.archive.crawler.admin.CrawlJob.MBeanCrawlController;
37 import org.archive.crawler.datamodel.Checkpoint;
38 import org.archive.crawler.datamodel.CrawlOrder;
39 import org.archive.crawler.datamodel.CrawlURI;
40 import org.archive.crawler.event.CrawlStatusListener;
41 import org.archive.crawler.event.CrawlURIDispositionListener;
42 import org.archive.crawler.framework.Checkpointer;
43 import org.archive.crawler.framework.CrawlController;
44 import org.archive.crawler.framework.exceptions.InitializationException;
45 import org.archive.crawler.settings.XMLSettingsHandler;
46 import org.archive.crawler.util.CheckpointUtils;
47
48
49 /**
50  * Assumes checkpoint was run during the SelfTest.
51  * @author stack
52  * @version $Date: 2006/08/16 00:46:46 $ $Version$
53  */

54 public class CheckpointSelfTest extends SelfTestCase
55 implements CrawlStatusListener, CrawlURIDispositionListener {
56     private final Logger JavaDoc LOG = Logger.getLogger(this.getClass().getName());
57     private boolean crawlEnded = false;
58
59     public CheckpointSelfTest() {
60         // TODO Auto-generated constructor stub
61
}
62
63     public CheckpointSelfTest(String JavaDoc testName) {
64         super(testName);
65         // TODO Auto-generated constructor stub
66
}
67     
68     /**
69      * Recover from the checkpoint made during selftest.
70      * @throws InitializationException
71      * @throws IOException
72      * @throws InvalidAttributeValueException
73      * @throws ReflectionException
74      * @throws MBeanException
75      * @throws AttributeNotFoundException
76      * @throws ClassNotFoundException
77      * @throws InterruptedException
78      */

79     public void testCheckpointRecover()
80     throws InitializationException, IOException JavaDoc,
81             InvalidAttributeValueException JavaDoc, AttributeNotFoundException JavaDoc,
82             MBeanException JavaDoc, ReflectionException JavaDoc, ClassNotFoundException JavaDoc,
83             InterruptedException JavaDoc {
84         // Check checkpoint dir is in place.
85
File JavaDoc f = getFile(getCrawlJobDir(), "checkpoints");
86         // Use the first checkpoint in the dir.
87
File JavaDoc cpdir = getFile(f, Checkpointer.formatCheckpointName("", 1));
88         // Check valid checkpoint file is in place.
89
getFile(cpdir, Checkpoint.VALIDITY_STAMP_FILENAME);
90         // Get order file from checkpoint dir.
91
File JavaDoc order = getFile(cpdir, "order.xml");
92         XMLSettingsHandler handler =
93             new XMLSettingsHandler(order);
94         handler.initialize();
95         // Set recover-path to be this checkpoint dir.
96
handler.getOrder().setAttribute(
97             new Attribute JavaDoc(CrawlOrder.ATTR_RECOVER_PATH, cpdir.toString()));
98         Checkpoint cp =
99             CrawlController.getCheckpointRecover(handler.getOrder());
100         if (cp == null) {
101             throw new NullPointerException JavaDoc("Failed read of checkpoint object");
102         }
103         CrawlController c = (MBeanCrawlController)CheckpointUtils.
104             readObjectFromFile(MBeanCrawlController.class, cpdir);
105         c.initialize(handler);
106         c.addCrawlStatusListener(this);
107         c.addCrawlURIDispositionListener(this);
108         c.requestCrawlStart();
109         LOG.info("Recover from selftest crawl started using " +
110             order.toString() + ".");
111         // Wait here a while till its up and running?
112
while(!this.crawlEnded) {
113             LOG.info("Waiting on recovered crawl to finish");
114             Thread.sleep(1000);
115         }
116     }
117     
118     private File JavaDoc getFile(final File JavaDoc parent, final String JavaDoc name)
119     throws IOException JavaDoc {
120         File JavaDoc f = new File JavaDoc(parent, name);
121         if (!f.exists()) {
122             throw new FileNotFoundException JavaDoc(f.getAbsolutePath());
123         }
124         if (!f.canRead()) {
125             throw new IOException JavaDoc("Can't read " + f.getAbsolutePath());
126         }
127         return f;
128     }
129
130     public void crawlCheckpoint(File JavaDoc checkpointDir) throws Exception JavaDoc {
131         // TODO Auto-generated method stub
132

133     }
134
135     public void crawlEnded(String JavaDoc sExitMessage) {
136         this.crawlEnded = true;
137     }
138
139     public void crawlEnding(String JavaDoc sExitMessage) {
140         // TODO Auto-generated method stub
141

142     }
143
144     public void crawlPaused(String JavaDoc statusMessage) {
145         // TODO Auto-generated method stub
146

147     }
148
149     public void crawlPausing(String JavaDoc statusMessage) {
150         // TODO Auto-generated method stub
151

152     }
153
154     public void crawlResuming(String JavaDoc statusMessage) {
155         // TODO Auto-generated method stub
156

157     }
158
159     public void crawlStarted(String JavaDoc message) {
160         // TODO Auto-generated method stub
161

162     }
163
164     public void crawledURIDisregard(CrawlURI curi) {
165         // TODO Auto-generated method stub
166

167     }
168
169     public void crawledURIFailure(CrawlURI curi) {
170         // TODO Auto-generated method stub
171

172     }
173
174     public void crawledURINeedRetry(CrawlURI curi) {
175         // TODO Auto-generated method stub
176

177     }
178
179     public void crawledURISuccessful(CrawlURI curi) {
180         LOG.info(curi.toString());
181     }
182 }
Popular Tags