KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > settings > refinements > RegularExpressionCriteria


1 /* RegularExpressionCriteria
2  *
3  * $Id: RegularExpressionCriteria.java,v 1.8 2005/07/18 17:29:58 stack-sf Exp $
4  *
5  * Created on Apr 8, 2004
6  *
7  * Copyright (C) 2004 Internet Archive.
8  *
9  * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24  */

25 package org.archive.crawler.settings.refinements;
26
27 import org.archive.net.UURI;
28 import org.archive.util.TextUtils;
29
30
31 /**
32  * A refinement criteria that test if a URI matches a regular expression.
33  *
34  * @author John Erik Halse
35  */

36 public class RegularExpressionCriteria implements Criteria {
37     private String JavaDoc regexp = "";
38
39     /**
40      * Create a new instance of RegularExpressionCriteria.
41      */

42     public RegularExpressionCriteria() {
43         super();
44     }
45
46     /**
47      * Create a new instance of RegularExpressionCriteria initializing it with
48      * a regular expression.
49      *
50      * @param regexp the regular expression for this criteria.
51      */

52     public RegularExpressionCriteria(String JavaDoc regexp) {
53         setRegexp(regexp);
54     }
55
56     /* (non-Javadoc)
57      * @see org.archive.crawler.settings.refinements.Criteria#isWithinRefinementBounds(org.archive.crawler.datamodel.UURI, int)
58      */

59     public boolean isWithinRefinementBounds(UURI uri) {
60         return (uri == null || uri == null)?
61             false: TextUtils.matches(regexp, uri.toString());
62     }
63
64     /**
65      * Get the regular expression to be matched against a URI.
66      *
67      * @return Returns the regexp.
68      */

69     public String JavaDoc getRegexp() {
70         return regexp;
71     }
72     /**
73      * Set the regular expression to be matched against a URI.
74      *
75      * @param regexp The regexp to set.
76      */

77     public void setRegexp(String JavaDoc regexp) {
78         this.regexp = regexp;
79     }
80
81     /* (non-Javadoc)
82      * @see org.archive.crawler.settings.refinements.Criteria#getName()
83      */

84     public String JavaDoc getName() {
85         return "Regular expression criteria";
86     }
87
88     /* (non-Javadoc)
89      * @see org.archive.crawler.settings.refinements.Criteria#getDescription()
90      */

91     public String JavaDoc getDescription() {
92         return "Accept URIs that match the following regular expression: "
93             + getRegexp();
94     }
95 }
96
Popular Tags