KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > datamodel > CrawlSubstats


1 /* CrawlSubstats
2 *
3 * $Id: CrawlSubstats.java,v 1.3.4.1 2007/01/13 01:31:09 stack-sf Exp $
4 *
5 * Created on Nov 4, 2005
6 *
7 * Copyright (C) 2005 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */

25 package org.archive.crawler.datamodel;
26
27 import java.io.Serializable JavaDoc;
28
29 import org.apache.commons.httpclient.HttpStatus;
30
31 /**
32  * Collector of statististics for a 'subset' of a crawl,
33  * such as a server (host:port), host, or frontier group
34  * (eg queue).
35  *
36  * @author gojomo
37  */

38 public class CrawlSubstats implements Serializable JavaDoc, FetchStatusCodes {
39
40     private static final long serialVersionUID = 8624425657056569036L;
41
42     public interface HasCrawlSubstats {
43         public CrawlSubstats getSubstats();
44     }
45     
46     long fetchSuccesses; // 2XX response codes
47
long fetchResponses; // all positive responses (incl. 3XX, 4XX, 5XX)
48
long successBytes; // total size of all success responses
49
long totalBytes; // total size of all responses
50
long fetchNonResponses; // processing attempts resulting in no response
51
// (both failures and temp deferrals)
52

53     public synchronized void tally(CrawlURI curi) {
54         if(curi.getFetchStatus()<=0) {
55             fetchNonResponses++;
56             return;
57         }
58         fetchResponses++;
59         totalBytes += curi.getContentSize();
60         if(curi.getFetchStatus()>=HttpStatus.SC_OK &&
61                 curi.getFetchStatus()<300) {
62             fetchSuccesses++;
63             successBytes += curi.getContentSize();
64         }
65     }
66     
67     public long getFetchSuccesses() {
68         return fetchSuccesses;
69     }
70     public long getFetchResponses() {
71         return fetchResponses;
72     }
73     public long getSuccessBytes() {
74         return successBytes;
75     }
76     public long getTotalBytes() {
77         return totalBytes;
78     }
79     public long getFetchNonResponses() {
80         return fetchNonResponses;
81     }
82 }
83
Popular Tags