KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > filters > LinkRegexFilter


1 // HTMLParser Library - A java-based parser for HTML
2
// http://htmlparser.org
3
// Copyright (C) 2006 John Derrick
4
//
5
// Revision Control Information
6
//
7
// $URL: https://svn.sourceforge.net/svnroot/htmlparser/trunk/parser/src/main/java/org/htmlparser/filters/LinkRegexFilter.java $
8
// $Author: derrickoswald $
9
// $Date: 2006-09-16 10:44:17 -0400 (Sat, 16 Sep 2006) $
10
// $Revision: 4 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the Common Public License; either
14
// version 1.0 of the License, or (at your option) any later version.
15
//
16
// This library is distributed in the hope that it will be useful,
17
// but WITHOUT ANY WARRANTY; without even the implied warranty of
18
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19
// Common Public License for more details.
20
//
21
// You should have received a copy of the Common Public License
22
// along with this library; if not, the license is available from
23
// the Open Source Initiative (OSI) website:
24
// http://opensource.org/licenses/cpl1.0.php
25

26 package org.htmlparser.filters;
27
28 import java.util.regex.Matcher JavaDoc;
29 import java.util.regex.Pattern JavaDoc;
30
31 import org.htmlparser.Node;
32 import org.htmlparser.NodeFilter;
33 import org.htmlparser.tags.LinkTag;
34
35 /**
36  * This class accepts tags of class LinkTag that contain a link matching a given
37  * regex pattern. Use this filter to extract LinkTag nodes with URLs that match
38  * the desired regex pattern.
39  */

40 public class LinkRegexFilter implements NodeFilter
41 {
42     /**
43      * The regular expression to use on the link.
44      */

45     protected Pattern JavaDoc mRegex;
46
47     /**
48      * Creates a LinkRegexFilter that accepts LinkTag nodes containing
49      * a URL that matches the supplied regex pattern.
50      * The match is case insensitive.
51      * @param regexPattern The pattern to match.
52      */

53     public LinkRegexFilter (String JavaDoc regexPattern)
54     {
55         this (regexPattern, true);
56     }
57
58     /**
59      * Creates a LinkRegexFilter that accepts LinkTag nodes containing
60      * a URL that matches the supplied regex pattern.
61      * @param regexPattern The regex pattern to match.
62      * @param caseSensitive Specifies case sensitivity for the matching process.
63      */

64     public LinkRegexFilter (String JavaDoc regexPattern, boolean caseSensitive)
65     {
66         if (caseSensitive)
67             mRegex = Pattern.compile (regexPattern);
68         else
69             mRegex = Pattern.compile (regexPattern,
70                 Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
71     }
72
73     /**
74      * Accept nodes that are a LinkTag and have a URL
75      * that matches the regex pattern supplied in the constructor.
76      * @param node The node to check.
77      * @return <code>true</code> if the node is a link with the pattern.
78      */

79     public boolean accept (Node node)
80     {
81         boolean ret;
82
83         ret = false;
84         if (LinkTag.class.isAssignableFrom (node.getClass ()))
85         {
86             String JavaDoc link = ((LinkTag)node).getLink ();
87             Matcher JavaDoc matcher = mRegex.matcher (link);
88             ret = matcher.find ();
89         }
90
91         return (ret);
92     }
93 }
94
Popular Tags