KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > deciderules > ExceedsDocumentLengthTresholdDecideRule


1 /* $Id: ExceedsDocumentLengthTresholdDecideRule.java,v 1.1.2.1 2007/01/13 01:31:14 stack-sf Exp $
2  *
3  * Created on 28.8.2006
4  *
5  * Copyright (C) 2006 Olaf Freyer
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.deciderules;
24
25 import org.archive.crawler.settings.SimpleType;
26
27 public class ExceedsDocumentLengthTresholdDecideRule extends
28 NotExceedsDocumentLengthTresholdDecideRule {
29
30     private static final long serialVersionUID = -3008503096295212224L;
31
32     /**
33      * Usual constructor.
34      * @param name Name of this rule.
35      */

36     public ExceedsDocumentLengthTresholdDecideRule(String JavaDoc name) {
37         super(name);
38         setDescription("ExceedsDocumentLengthTresholdDecideRule. ACCEPTs URIs "+
39              "with content length exceeding a given treshold. "+
40              "Either examines HTTP header content length or " +
41              "actual downloaded content length and returns false " +
42              "for documents exceeding a given length treshold.");
43
44         addElementToDefinition(new SimpleType(ATTR_CONTENT_LENGTH_TRESHOLD,
45             "Min " +
46             "content-length this filter will allow to pass through. If -1, " +
47             "then no limit.", DEFAULT_CONTENT_LENGTH_TRESHOLD)); }
48     
49     /**
50      * @param contentLength content length to check against treshold
51      * @param obj Context object.
52      * @return contentLength exceeding treshold?
53      */

54     protected Boolean JavaDoc makeDecision(int contentLength, Object JavaDoc obj) {
55         return contentLength > getContentLengthTreshold(obj);
56     }
57     
58     /**
59      * @param obj Context object.
60      * @return content length threshold
61      */

62     protected int getContentLengthTreshold(Object JavaDoc obj) {
63         int len = ((Integer JavaDoc)getUncheckedAttribute(obj,
64                 ATTR_CONTENT_LENGTH_TRESHOLD)).intValue();
65         return len == -1? 0: len;
66     }
67 }
Popular Tags