KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > de > susebox > jtopas > impl > PatternMatcher


1 /*
2  * PatternMatcher.java: Interface for pattern-aware tokenizers.
3  *
4  * Copyright (C) 2003 Heiko Blau
5  *
6  * This file belongs to the JTopas Library.
7  * JTopas is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU Lesser General Public License as published by the
9  * Free Software Foundation; either version 2.1 of the License, or (at your
10  * option) any later version.
11  *
12  * This software is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.
15  * See the GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License along
18  * with JTopas. If not, write to the
19  *
20  * Free Software Foundation, Inc.
21  * 59 Temple Place, Suite 330,
22  * Boston, MA 02111-1307
23  * USA
24  *
25  * or check the Internet: http://www.fsf.org
26  *
27  * Contact:
28  * email: heiko@susebox.de
29  */

30
31 package de.susebox.jtopas.impl;
32
33 //-----------------------------------------------------------------------------
34
// Imports
35
//
36
import java.util.regex.Pattern JavaDoc;
37 import java.util.regex.Matcher JavaDoc;
38 import java.util.regex.PatternSyntaxException JavaDoc;
39
40 import de.susebox.jtopas.TokenizerProperty;
41 import de.susebox.jtopas.Flags;
42 import de.susebox.jtopas.TokenizerException;
43
44 import de.susebox.jtopas.spi.PatternHandler;
45 import de.susebox.jtopas.spi.DataProvider;
46
47
48 //-----------------------------------------------------------------------------
49
// Class PatternMatcher
50
//
51

52 /**<p>
53  * Implementation of the {@link PatternHandler} interface using the JDK 1.4
54  * package <code>java.util.regex</code>.
55  *</p>
56  *
57  * @author Heiko Blau
58  */

59 public class PatternMatcher implements PatternHandler {
60   
61   //---------------------------------------------------------------------------
62
// Constructors
63
//
64

65   /**
66    * The constructor takes a pattern and the {@link TokenizerProperty} object
67    * associated with this instance of <code>PatternMatcher</code>. The global
68    * flags are passed to control the behaviour for attributes that are not
69    * specified in the property itself (e.g. case-sensitivity).
70    *
71    * @param prop the {@link TokenizerProperty} associated with this object
72    * @param globalFlags flags that are to be used if not set explicitely in the property
73    * @throws NullPointerException if the given parameter is <code>null</code>
74    */

75   public PatternMatcher(TokenizerProperty prop, int globalFlags) throws NullPointerException JavaDoc {
76     _globalFlags = globalFlags;
77     setProperty(prop);
78   }
79   
80
81   //---------------------------------------------------------------------------
82
// Methods of the PatternHandler interface
83
//
84

85   /**
86    * The method is a dummy implementation for the interface {@link PatternHandler}
87    * and always returns <code>true</code>.
88    *
89    * @return always <code>true</code>
90    */

91   public boolean hasPattern() {
92     return true;
93   }
94   
95   /**
96    * This method checks if the start of a character range given through the
97    * {@link DataProvider} matches a pattern. See {@link PatternHandler#matches}
98    * for details.
99    *
100    * @param dataProvider the source to get the data from
101    * @param freePatternOnly if <code>true</code> only unbounded pattern should be
102    * checked (pattern not enclosed in whitespaces, separators etc.)
103    * @return a {@link PatternHandler.Result} object or <code>null</code> if no
104    * match was found
105    * @throws TokenizerException generic exception
106    * @throws NullPointerException if no {@link DataProvider} is given
107    */

108   public PatternHandler.Result matches(DataProvider dataProvider)
109     throws TokenizerException, NullPointerException JavaDoc
110   {
111     // invoke JDK 1.4 or jakarta regexp API
112
try {
113       String JavaDoc[] groups;
114       
115       _matcher.reset(new DataProviderCharSequence(dataProvider));
116       if (_matcher.lookingAt()) {
117         if (_property.isFlagSet(Flags.F_RETURN_IMAGE_PARTS, (_globalFlags & Flags.F_RETURN_IMAGE_PARTS) != 0)) {
118           // get the capturing groups
119
groups = new String JavaDoc[_matcher.groupCount() + 1];
120           for (int index = 0; index < groups.length; ++index) {
121             groups[index] = _matcher.group(index);
122           }
123         } else {
124           groups = new String JavaDoc[] {};
125         }
126         return new LocalResult(_property, _matcher.end(), groups);
127       } else {
128         return null;
129       }
130     } catch (Exception JavaDoc ex) {
131       throw new TokenizerException(ex);
132     }
133   }
134
135   
136   //---------------------------------------------------------------------------
137
// Methods
138
//
139

140   /**
141    * Setting the {@link TokenizerProperty} for this <code>PatternMatcher</code>.
142    * This method will recompile the regular expression pattern.
143    *
144    * @param prop the {@link TokenizerProperty} associated with this object
145    * @throws NullPointerException if the given parameter is <code>null</code>
146    */

147   public void setProperty(TokenizerProperty prop) throws NullPointerException JavaDoc {
148     // no pattern given
149
if (prop == null) {
150       throw new NullPointerException JavaDoc("No property given.");
151     } else if (prop.getImages() == null || prop.getImages().length < 1 || prop.getImages()[0] == null) {
152       throw new NullPointerException JavaDoc("Property contains no pattern image.");
153     }
154     
155     // compile the pattern
156
int flags = Pattern.MULTILINE | Pattern.DOTALL;
157
158     if (prop.isFlagSet(Flags.F_NO_CASE, (_globalFlags & Flags.F_NO_CASE) != 0)) {
159       flags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
160     }
161     _matcher = Pattern.compile(prop.getImages()[0], flags).matcher("");
162
163     // set property
164
_property = prop;
165   }
166   
167   /**
168    * Retrieving the {@link TokenizerProperty} of this <code>PatternMatcher</code>.
169    *
170    * @return the {@link TokenizerProperty} associated with this object
171    */

172   public TokenizerProperty getProperty() {
173     return _property;
174   }
175   
176
177   //---------------------------------------------------------------------------
178
// Inner Classes
179
//
180

181   /**
182    * The result of a match operation.
183    */

184   private final class LocalResult implements PatternHandler.Result {
185     
186     /**
187      * The constructor gets all the nessecary parameters.
188      *
189      * @param prop the pattern property
190      * @param lengthOfMatch the detected number of characters that match the pattern
191      * @param groups array with the capturing groups
192      */

193     protected LocalResult(TokenizerProperty prop, int lengthOfMatch, String JavaDoc[] groups) {
194       _property = prop;
195       _lengthOfMatch = lengthOfMatch;
196       _groups = groups;
197     }
198     
199     /**
200      * Returns the capturing groups of a match.
201      *
202      * @return the capturing groups of the last pattern match in {@link #matches}.
203      */

204     public String JavaDoc[] getGroups() throws TokenizerException {
205       return _groups;
206     }
207     
208     /**
209      * Returns the number of characters that are part of a match.
210      *
211      * @return length of match
212      */

213     public int getLengthOfMatch() {
214       return _lengthOfMatch;
215     }
216     
217     /**
218      * Returns the {@link TokenizerProperty} that describes the pattern that
219      * matches data passed to {@link PatternHandler#matches}.
220      *
221      * @return the pattern property of a successful match
222      */

223     public TokenizerProperty getProperty() {
224       return _property;
225     }
226     
227     // member
228
private TokenizerProperty _property;
229     private int _lengthOfMatch;
230     private String JavaDoc[] _groups;
231   }
232
233   /**
234    * An implementation of the JDK 1.4 {@link java.lang.CharSequence} interface
235    * backed by a {@link DataProvider}.
236    */

237   private final class DataProviderCharSequence implements CharSequence JavaDoc {
238     
239     /**
240      * The constructor takes the reference to the {@link DataProvider}.
241      *
242      * @param dataProvider the backing <code>DataProvider</code>
243      */

244     public DataProviderCharSequence(DataProvider dataProvider) {
245       this(dataProvider, dataProvider.getStartPosition(), dataProvider.getLength());
246     }
247     
248     /**
249      * The constructor takes the reference to the {@link DataProvider}, the
250      * start position and length. It is nessecary for the {@link #subSequence}
251      * method
252      *
253      * @param dataProvider the backing <code>DataProvider</code>
254      */

255     private DataProviderCharSequence(DataProvider dataProvider, int start, int length) {
256       _dataProvider = dataProvider;
257       _start = start;
258       _length = length;
259     }
260     
261     /**
262      * Returns the character at the specified index. An index ranges from zero
263      * to <code>length() - 1</code>. The first character of the sequence is at
264      * index zero, the next at index one, and so on, as for array
265      * indexing. </p>
266      *
267      * @param index the index of the character to be returned
268      * @return the specified character
269      * @throws ArrayIndexOutOfBoundsException
270      * if the <code>index</code> argument is negative or not less than
271      * <code>length()</code>
272      */

273     public char charAt(int index) throws ArrayIndexOutOfBoundsException JavaDoc {
274       return _dataProvider.getCharAt(_start + index - _dataProvider.getStartPosition());
275     }
276     
277     /** Returns the length of this character sequence. The length is the number
278      * of 16-bit Unicode characters in the sequence. </p>
279      *
280      * @return the number of characters in this sequence
281      *
282      */

283     public int length() {
284       return _length;
285     }
286     
287     /**
288      * Returns a new character sequence that is a subsequence of this sequence.
289      * See {@link java.lang.CharSequence#subSequence} for details.
290      *
291      * @param start the start index, inclusive
292      * @param end the end index, exclusive
293      * @return the specified subsequence
294      * @throws IndexOutOfBoundsException
295      * if <code>start</code> or <code>end</code> are negative,
296      * if <code>end</code> is greater than <code>length()</code>,
297      * or if <code>start</code> is greater than <code>end</code>
298      */

299     public CharSequence JavaDoc subSequence(int start, int end) {
300       if (start < 0 || end < 0 || end > length() || start > end) {
301         throw new IndexOutOfBoundsException JavaDoc();
302       }
303       return new DataProviderCharSequence(_dataProvider, _start + start, end - start);
304     }
305     
306     /**
307      * Returns the string representation for the <code>DataProvider</code>.
308      *
309      * @return the string consisting of all available data in the DataProvider.
310      */

311     public String JavaDoc toString() {
312       int realStart = _start - _dataProvider.getStartPosition();
313       
314       return _dataProvider.toString().substring(realStart, realStart + _length);
315     }
316     
317     // members
318
private DataProvider _dataProvider = null;
319     private int _start = 0;
320     private int _length = 0;
321   }
322
323   
324   //---------------------------------------------------------------------------
325
// Members
326
//
327
private TokenizerProperty _property = null;
328   private Matcher JavaDoc _matcher = null;
329   private int _globalFlags = 0;
330 }
331   
332
Popular Tags