KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > susebox > jtopas > spi > PatternHandler


1 /*
2  * PatternHandler.java: Interface for pattern-aware tokenizers.
3  *
4  * Copyright (C) 2002 Heiko Blau
5  *
6  * This file belongs to the JTopas Library.
7  * JTopas is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU Lesser General Public License as published by the
9  * Free Software Foundation; either version 2.1 of the License, or (at your
10  * option) any later version.
11  *
12  * This software is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.
15  * See the GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License along
18  * with JTopas. If not, write to the
19  *
20  * Free Software Foundation, Inc.
21  * 59 Temple Place, Suite 330,
22  * Boston, MA 02111-1307
23  * USA
24  *
25  * or check the Internet: http://www.fsf.org
26  *
27  * Contact:
28  * email: heiko@susebox.de
29  */

30
31 package de.susebox.jtopas.spi;
32
33 //-----------------------------------------------------------------------------
34
// Imports
35
//
36
import de.susebox.jtopas.TokenizerProperty;
37 import de.susebox.jtopas.TokenizerException;
38
39
40 //-----------------------------------------------------------------------------
41
// Interface PatternHandler
42
//
43

44 /**<p>
45  * This interface must be implemented by classes that should be used as a
46  * pattern handler for a {@link de.susebox.jtopas.Tokenizer}. Pattern are usually
47  * regular expressions that are applied on token images to check if that image
48  * matches the pattern.
49  *</p>
50  *
51  * @see de.susebox.jtopas.Tokenizer
52  * @see de.susebox.jtopas.TokenizerProperties
53  * @see de.susebox.jtopas.spi.DataMapper
54  * @author Heiko Blau
55  */

56 public interface PatternHandler {
57   
58   /**
59    * This method can be used by a {@link de.susebox.jtopas.Tokenizer} implementation
60    * for a fast detection if pattern matching must be performed at all. If the method
61    * returns <code>false</code> time-consuming preparations can be skipped.
62    *
63    * @return <code>true</code> if there actually are pattern that can be tested
64    * for a match, <code>false</code> otherwise.
65    */

66   public boolean hasPattern();
67   
68   /**
69    * This method checks if the start of a character range given through the
70    * {@link DataProvider} matches a pattern. An implementation should use
71    * a {@link de.susebox.jtopas.TokenizerException} to report problems.
72    *<br>
73    * The method returns <code>null</code> if the beginning of the character range
74    * doesn't match a pattern known to the <code>PatternHandler</code>. Otherwise
75    * it returns an object with the implemented interface {@link PatternHandler.Result}.
76    *<br>
77    * The pattern check is repeated if the method returns a match that is exactly
78    * as long as the given data range and more data is available. Since it is
79    * probably a rare case, that where are not enough data to find a complete or
80    * no match, the overhead of a repeated check on part of the data is neglected.
81    *<br>
82    * If a pattern handler has more than one pattern that could be applied to the
83    * given data, it should return the longest possible match.
84    *
85    * @param dataProvider the source to get the data from
86    * @param lengthOfMatch if a match is found, the method places the length of
87    * it into the first element of this array
88    * @return a {@link PatternHandler.Result} object or <code>null</code> if no
89    * match was found
90    * @throws TokenizerException generic exception
91    * @throws NullPointerException if no {@link DataProvider} is given
92    */

93   public PatternHandler.Result matches(DataProvider dataProvider)
94     throws TokenizerException, NullPointerException JavaDoc;
95   
96   
97   //---------------------------------------------------------------------------
98
// Inner Interfaces
99
//
100

101   /**
102    * An inner interface for the pattern match result.
103    */

104   public static interface Result {
105     
106     /**
107      * Returns the {@link TokenizerProperty} that describes the pattern that
108      * matches data passed to {@link PatternHandler#matches}. The returned value
109      * is <strong>not</strong> <code>null</code>.
110      *
111      * @return the pattern property of a successful match
112      */

113     public TokenizerProperty getProperty();
114     
115     /**
116      * Returns the number of characters that are part of a match.
117      *
118      * @return length of match
119      */

120     public int getLengthOfMatch();
121     
122     /**
123      * Returns the capturing groups of a match. It is used if the calling tokenizer
124      * needs these groups (e. g. if the flag {@link de.susebox.jtopas.TokenizerProperties#F_RETURN_IMAGE_PARTS}
125      * is set).
126      *<br>
127      * The return value must not be null or empty. The first element (array index 0)
128      * must contain the whole pattern match (as described in the Java 1.4
129      * documentation for {@link java.util.regex.Matcher} or the newer Java 1.5
130      * {@link java.util.regex.MatchResult}).
131      *
132      * @return the capturing groups of the last pattern match in {@link #matches}.
133      */

134     public String JavaDoc[] getGroups() throws TokenizerException;
135   }
136 }
137
138
Popular Tags