1 19 package org.archive.crawler.framework; 20 21 import java.io.Serializable ; 22 import java.util.Date ; 23 import java.util.EventObject ; 24 import java.util.logging.Level ; 25 26 import javax.management.AttributeNotFoundException ; 27 28 import org.archive.crawler.event.CrawlStatusListener; 29 import org.archive.crawler.framework.exceptions.FatalConfigurationException; 30 import org.archive.crawler.settings.ModuleType; 31 import org.archive.crawler.settings.SimpleType; 32 import org.archive.crawler.settings.Type; 33 import org.archive.util.ArchiveUtils; 34 import org.archive.util.PaddingStringBuffer; 35 import org.xbill.DNS.DClass; 36 import org.xbill.DNS.Lookup; 37 38 57 public abstract class AbstractTracker extends ModuleType 58 implements StatisticsTracking, CrawlStatusListener, Serializable { 59 60 public static final Integer DEFAULT_STATISTICS_REPORT_INTERVAL = 61 new Integer (20); 62 64 public static final String ATTR_STATS_INTERVAL = "interval-seconds"; 65 66 69 protected transient CrawlController controller; 70 71 protected long crawlerStartTime; 73 protected long crawlerEndTime = -1; protected long crawlerPauseStarted = 0; 75 protected long crawlerTotalPausedTime = 0; 76 77 78 protected long lastLogPointTime; 79 80 protected boolean shouldrun = true; 81 82 86 public AbstractTracker(String name, String description) { 87 super(name, description); 88 Type e = addElementToDefinition(new SimpleType(ATTR_STATS_INTERVAL, 89 "The interval between writing progress information to log.", 90 DEFAULT_STATISTICS_REPORT_INTERVAL)); 91 e.setOverrideable(false); 92 } 93 94 104 public void initialize(CrawlController c) 105 throws FatalConfigurationException { 106 this.controller = c; 107 108 this.controller.addCrawlStatusListener(this); 110 } 111 112 117 public void run() { 118 if (this.controller == null) { 120 return; 121 } 122 123 shouldrun = true; 125 this.controller.logProgressStatistics(progressStatisticsLegend()); 127 lastLogPointTime = System.currentTimeMillis(); 129 while (shouldrun) { 131 try { 134 Thread.sleep(getLogWriteInterval() * 1000); 135 } catch (InterruptedException e) { 136 e.printStackTrace(); 137 controller.runtimeErrors.log(Level.INFO, 138 "Periodic stat logger interrupted while sleeping."); 139 } 140 141 if (shouldrun && getCrawlPauseStartedTime() == 0) { 144 progressStatisticsEvent(new EventObject (this)); 145 } 146 } 147 } 148 149 152 public String progressStatisticsLegend() { 153 return " timestamp" + 154 " discovered " + 155 " queued downloaded doc/s(avg) KB/s(avg) " + 156 " dl-failures busy-thread mem-use-KB heap-size-KB " + 157 " congestion max-depth avg-depth"; 158 } 159 160 165 public void noteStart() { 166 if (this.crawlerStartTime == 0) { 167 this.crawlerStartTime = System.currentTimeMillis(); 169 } 170 } 171 172 186 protected synchronized void progressStatisticsEvent(final EventObject e) { 187 this.controller.progressStatisticsEvent(e); 188 Lookup.getDefaultCache(DClass.IN).clearCache(); 192 } 193 194 199 public long getCrawlStartTime() { 200 return this.crawlerStartTime; 201 } 202 203 212 public long getCrawlEndTime() { 213 return (this.crawlerEndTime == -1)? 214 System.currentTimeMillis(): this.crawlerEndTime; 215 } 216 217 223 public long getCrawlTotalPauseTime() { 224 return this.crawlerTotalPausedTime; 225 } 226 227 234 public long getCrawlPauseStartedTime() { 235 return this.crawlerPauseStarted; 236 } 237 238 public long getCrawlerTotalElapsedTime() { 239 if (getCrawlStartTime() == 0) { 240 return 0; 242 } 243 244 return (getCrawlPauseStartedTime() != 0)? 245 (getCrawlPauseStartedTime() - getCrawlTotalPauseTime() - 247 getCrawlStartTime()): 248 (getCrawlEndTime() - getCrawlTotalPauseTime() - getCrawlStartTime()); 250 } 251 252 257 protected int getLogWriteInterval() { 258 int logInterval; 259 try { 260 logInterval = 261 ((Integer ) getAttribute(null, ATTR_STATS_INTERVAL)).intValue(); 262 } catch (AttributeNotFoundException e) { 263 logInterval = 10; 264 } 265 return logInterval; 266 } 267 268 271 public void crawlPausing(String statusMessage) { 272 logNote("CRAWL WAITING - " + statusMessage); 273 } 274 275 protected void logNote(final String note) { 276 this.controller.logProgressStatistics(new PaddingStringBuffer() 277 .append(ArchiveUtils.TIMESTAMP14.format(new Date ())) 278 .append(" ") 279 .append(note) 280 .toString()); 281 } 282 283 public void crawlPaused(String statusMessage) { 284 crawlerPauseStarted = System.currentTimeMillis(); 285 progressStatisticsEvent(new EventObject (this)); 286 logNote("CRAWL PAUSED - " + statusMessage); 287 } 288 289 public void crawlResuming(String statusMessage) { 290 tallyCurrentPause(); 291 logNote("CRAWL RESUMED - " + statusMessage); 292 lastLogPointTime = System.currentTimeMillis(); 293 } 294 295 298 protected void tallyCurrentPause() { 299 if (this.crawlerPauseStarted > 0) { 300 this.crawlerTotalPausedTime 302 += (System.currentTimeMillis() - this.crawlerPauseStarted); 303 } 304 this.crawlerPauseStarted = 0; 305 } 306 307 public void crawlEnding(String sExitMessage) { 308 logNote("CRAWL ENDING - " + sExitMessage); 309 } 310 311 314 public void crawlEnded(String sExitMessage) { 315 crawlerEndTime = System.currentTimeMillis(); 317 progressStatisticsEvent(new EventObject (this)); 318 logNote("CRAWL ENDED - " + sExitMessage); 319 shouldrun = false; 320 dumpReports(); 321 finalCleanup(); 322 } 323 324 public void crawlStarted(String message) { 325 tallyCurrentPause(); 326 noteStart(); 327 } 328 329 332 protected void dumpReports() { 333 } 335 336 339 protected void finalCleanup() { 340 controller = null; } 342 343 346 public long crawlDuration() { 347 return getCrawlerTotalElapsedTime(); 348 } 349 } 350 | Popular Tags |