KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > url > canonicalize > BaseRule


1 /* BaseRule
2  *
3  * Created on Oct 5, 2004
4  *
5  * Copyright (C) 2004 Internet Archive.
6  *
7  * This file is part of the Heritrix web crawler (crawler.archive.org).
8  *
9  * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22  */

23 package org.archive.crawler.url.canonicalize;
24
25 import java.util.logging.Logger JavaDoc;
26 import java.util.regex.Matcher JavaDoc;
27
28 import javax.management.AttributeNotFoundException JavaDoc;
29
30 import org.archive.crawler.settings.ModuleType;
31 import org.archive.crawler.settings.SimpleType;
32 import org.archive.crawler.url.CanonicalizationRule;
33
34 /**
35  * Base of all rules applied canonicalizing a URL that are configurable
36  * via the Heritrix settings system.
37  *
38  * This base class is abstact. Subclasses must implement the
39  * {@link CanonicalizationRule#canonicalize(String, Object)} method.
40  *
41  * @author stack
42  * @version $Date: 2005/11/04 23:00:23 $, $Revision: 1.5 $
43  */

44 public abstract class BaseRule
45 extends ModuleType
46 implements CanonicalizationRule {
47     private static Logger JavaDoc logger =
48         Logger.getLogger(BaseRule.class.getName());
49     public static final String JavaDoc ATTR_ENABLED = "enabled";
50     
51     /**
52      * Constructor.
53      * @param name Name of this canonicalization rule.
54      * @param description Description of what this rule does.
55      */

56     public BaseRule(String JavaDoc name, String JavaDoc description) {
57         super(name, description);
58         setExpertSetting(true);
59         setOverrideable(true);
60         Object JavaDoc [] possibleValues = {Boolean.TRUE, Boolean.FALSE};
61         addElementToDefinition(new SimpleType(ATTR_ENABLED,
62             "Rule is enabled.", new Boolean JavaDoc(true), possibleValues));
63     }
64     
65     public boolean isEnabled(Object JavaDoc context) {
66         boolean result = true;
67         try {
68             Boolean JavaDoc b = (Boolean JavaDoc)getAttribute(context, ATTR_ENABLED);
69             if (b != null) {
70                 result = b.booleanValue();
71             }
72         } catch (AttributeNotFoundException JavaDoc e) {
73             logger.warning("Failed get of 'enabled' attribute.");
74         }
75
76         return result;
77     }
78     
79     /**
80      * Run a regex that strips elements of a string.
81      *
82      * Assumes the regex has a form that wants to strip elements of the passed
83      * string. Assumes that if a match, appending group 1
84      * and group 2 yields desired result.
85      * @param url Url to search in.
86      * @param matcher Matcher whose form yields a group 1 and group 2 if a
87      * match (non-null.
88      * @return Original <code>url</code> else concatenization of group 1
89      * and group 2.
90      */

91     protected String JavaDoc doStripRegexMatch(String JavaDoc url, Matcher JavaDoc matcher) {
92         return (matcher != null && matcher.matches())?
93             checkForNull(matcher.group(1)) + checkForNull(matcher.group(2)):
94             url;
95     }
96
97     /**
98      * @param string String to check.
99      * @return <code>string</code> if non-null, else empty string ("").
100      */

101     private String JavaDoc checkForNull(String JavaDoc string) {
102         return (string != null)? string: "";
103     }
104 }
105
Popular Tags