KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > oro > text > regex > PatternMatcherInput


1 package org.apache.oro.text.regex;
2
3 /* ====================================================================
4  * The Apache Software License, Version 1.1
5  *
6  * Copyright (c) 2000 The Apache Software Foundation. All rights
7  * reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in
18  * the documentation and/or other materials provided with the
19  * distribution.
20  *
21  * 3. The end-user documentation included with the redistribution,
22  * if any, must include the following acknowledgment:
23  * "This product includes software developed by the
24  * Apache Software Foundation (http://www.apache.org/)."
25  * Alternately, this acknowledgment may appear in the software itself,
26  * if and wherever such third-party acknowledgments normally appear.
27  *
28  * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
29  * must not be used to endorse or promote products derived from this
30  * software without prior written permission. For written
31  * permission, please contact apache@apache.org.
32  *
33  * 5. Products derived from this software may not be called "Apache"
34  * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
35  * name, without prior written permission of the Apache Software Foundation.
36  *
37  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  * ====================================================================
50  *
51  * This software consists of voluntary contributions made by many
52  * individuals on behalf of the Apache Software Foundation. For more
53  * information on the Apache Software Foundation, please see
54  * <http://www.apache.org/>.
55  *
56  * Portions of this software are based upon software originally written
57  * by Daniel F. Savarese. We appreciate his contributions.
58  */

59
60 /**
61  * The PatternMatcherInput class is used to preserve state across
62  * calls to the <code>contains()</code> methods of PatternMatcher instances.
63  * It is also used to specify that only a subregion of a string
64  * should be used as input when looking for a pattern match. All that
65  * is meant by preserving state is that the end offset of the last match
66  * is remembered, so that the next match is performed from that point
67  * where the last match left off. This offset can be accessed from
68  * the {@link #getCurrentOffset()} method and can be set with the
69  * {@link #setCurrentOffset(int)} method.
70  * <p>
71  * You would use a PatternMatcherInput object when you want to search for
72  * more than just the first occurrence of a pattern in a string, or when
73  * you only want to search a subregion of the string for a match. An
74  * example of its most common use is:
75  * <blockquote><pre>
76  * PatternMatcher matcher;
77  * PatternCompiler compiler;
78  * Pattern pattern;
79  * PatternMatcherInput input;
80  * MatchResult result;
81  *
82  * compiler = new Perl5Compiler();
83  * matcher = new Perl5Matcher();
84  *
85  * try {
86  * pattern = compiler.compile(somePatternString);
87  * } catch(MalformedPatternException e) {
88  * System.out.println("Bad pattern.");
89  * System.out.println(e.getMessage());
90  * return;
91  * }
92  *
93  * input = new PatternMatcherInput(someStringInput);
94  *
95  * while(matcher.contains(input, pattern)) {
96  * result = matcher.getMatch();
97  * // Perform whatever processing on the result you want.
98  * }
99  * // Suppose we want to start searching from the beginning again with
100  * // a different pattern.
101  * // Just set the current offset to the begin offset.
102  * input.setCurrentOffset(input.getBeginOffset());
103  *
104  * // Second search omitted
105  *
106  * // Suppose we're done with this input, but want to search another string.
107  * // There's no need to create another PatternMatcherInput instance.
108  * // We can just use the setInput() method.
109  * input.setInput(aNewInputString);
110  *
111  * </pre></blockquote>
112  *
113  *
114
115  @author <a HREF="dfs@savarese.org">Daniel F. Savarese</a>
116  @version $Id: PatternMatcherInput.java,v 1.1.1.1 2000/07/23 23:08:51 jon Exp $
117
118  * @see PatternMatcher
119  */

120 public final class PatternMatcherInput {
121   String JavaDoc _originalStringInput;
122   char[] _originalCharInput, _originalBuffer, _toLowerBuffer;
123   int _beginOffset, _endOffset, _currentOffset;
124   int _matchBeginOffset = -1, _matchEndOffset = -1;
125
126   /**
127    * Creates a PatternMatcherInput object, associating a region of a String
128    * as input to be used for pattern matching by PatternMatcher objects.
129    * A copy of the string is not made, therefore you should not modify
130    * the string unless you know what you are doing.
131    * The current offset of the PatternMatcherInput is set to the begin
132    * offset of the region.
133    * <p>
134    * @param input The input to associate with the PatternMatcherInput.
135    * @param begin The offset into the char[] to use as the beginning of
136    * the input.
137    * @param length The length of the reegion starting from the begin offset
138    * to use as the input for pattern matching purposes.
139    */

140   public PatternMatcherInput(String JavaDoc input, int begin, int length) {
141     setInput(input, begin, length);
142   }
143
144   /**
145    * Like calling
146    * <blockquote><pre>
147    * PatternMatcherInput(input, 0, input.length());
148    * </pre></blockquote>
149    * <p>
150    * @param input The input to associate with the PatternMatcherInput.
151    */

152   public PatternMatcherInput(String JavaDoc input) {
153     this(input, 0, input.length());
154   }
155
156
157   /**
158    * Creates a PatternMatcherInput object, associating a region of a string
159    * (represented as a char[]) as input
160    * to be used for pattern matching by PatternMatcher objects.
161    * A copy of the string is not made, therefore you should not modify
162    * the string unless you know what you are doing.
163    * The current offset of the PatternMatcherInput is set to the begin
164    * offset of the region.
165    * <p>
166    * @param input The input to associate with the PatternMatcherInput.
167    * @param begin The offset into the char[] to use as the beginning of
168    * the input.
169    * @param length The length of the reegion starting from the begin offset
170    * to use as the input for pattern matching purposes.
171    */

172   public PatternMatcherInput(char[] input, int begin, int length) {
173     setInput(input, begin, length);
174   }
175
176   /**
177    * Like calling:
178    * <blockquote><pre>
179    * PatternMatcherInput(input, 0, input.length);
180    * </pre></blockquote>
181    * <p>
182    * @param input The input to associate with the PatternMatcherInput.
183    */

184   public PatternMatcherInput(char[] input) {
185     this(input, 0, input.length);
186   }
187
188
189   /**
190    * @return The length of the region to be considered input for pattern
191    * matching purposes. Essentially this is then end offset minus
192    * the begin offset.
193    */

194   public int length() {
195     return (_endOffset - _beginOffset);
196     //return _originalBuffer.length;
197
}
198
199
200   /**
201    * Associates a region of a String as input
202    * to be used for pattern matching by PatternMatcher objects.
203    * The current offset of the PatternMatcherInput is set to the begin
204    * offset of the region.
205    * <p>
206    * @param input The input to associate with the PatternMatcherInput.
207    * @param begin The offset into the String to use as the beginning of
208    * the input.
209    * @param length The length of the reegion starting from the begin offset
210    * to use as the input for pattern matching purposes.
211    */

212   public void setInput(String JavaDoc input, int begin, int length) {
213     _originalStringInput = input;
214     _originalCharInput = null;
215     _toLowerBuffer = null;
216     _originalBuffer = input.toCharArray();
217     setCurrentOffset(begin);
218     setBeginOffset(begin);
219     setEndOffset(_beginOffset + length);
220   }
221
222   /**
223    * This method is identical to calling:
224    * <blockquote><pre>
225    * setInput(input, 0, input.length());
226    * </pre></blockquote>
227    * <p>
228    * @param input The input to associate with the PatternMatcherInput.
229    */

230   public void setInput(String JavaDoc input) {
231     setInput(input, 0, input.length());
232   }
233
234
235   /**
236    * Associates a region of a string (represented as a char[]) as input
237    * to be used for pattern matching by PatternMatcher objects.
238    * A copy of the string is not made, therefore you should not modify
239    * the string unless you know what you are doing.
240    * The current offset of the PatternMatcherInput is set to the begin
241    * offset of the region.
242    * <p>
243    * @param input The input to associate with the PatternMatcherInput.
244    * @param begin The offset into the char[] to use as the beginning of
245    * the input.
246    * @param length The length of the reegion starting from the begin offset
247    * to use as the input for pattern matching purposes.
248    */

249   public void setInput(char[] input, int begin, int length) {
250     _originalStringInput = null;
251     _toLowerBuffer = null;
252     _originalBuffer = _originalCharInput = input;
253     setCurrentOffset(begin);
254     setBeginOffset(begin);
255     setEndOffset(_beginOffset + length);
256   }
257
258
259   /**
260    * This method is identical to calling:
261    * <blockquote><pre>
262    * setInput(input, 0, input.length);
263    * </pre></blockquote>
264    * <p>
265    * @param input The input to associate with the PatternMatcherInput.
266    */

267   public void setInput(char[] input) {
268     setInput(input, 0, input.length);
269   }
270
271
272   /**
273    * Returns the character at a particular offset relative to the begin
274    * offset of the input.
275    * <p>
276    * @param offset The offset at which to fetch a character (relative to
277    * the beginning offset.
278    * @return The character at a particular offset.
279    * @exception ArrayIndexOutOfBoundsException If the offset does not occur
280    * within the bounds of the input.
281    */

282   public char charAt(int offset) {
283     return _originalBuffer[_beginOffset + offset];
284   }
285
286   /**
287    * Returns a new string that is a substring of the PatternMatcherInput
288    * instance. The substring begins at the specified beginOffset relative
289    * to the begin offset and extends to the specified endOffset - 1
290    * relative to the begin offset of the PatternMatcherInput instance.
291    * <p>
292    * @param beginOffset The offset relative to the begin offset of the
293    * PatternMatcherInput at which to start the substring (inclusive).
294    * @param endOffset The offset relative to the begin offset of the
295    * PatternMatcherInput at which to end the substring (exclusive).
296    * @return The specified substring.
297    * @exception ArrayIndexOutOfBoundsException If one of the offsets does
298    * not occur within the bounds of the input.
299    */

300   public String JavaDoc substring(int beginOffset, int endOffset) {
301     return new String JavaDoc(_originalBuffer, _beginOffset+beginOffset,
302               endOffset - beginOffset);
303   }
304
305   /**
306    * Returns a new string that is a substring of the PatternMatcherInput
307    * instance. The substring begins at the specified beginOffset relative
308    * to the begin offset and extends to the end offset of the
309    * PatternMatcherInput.
310    * <p>
311    * @param beginOffset The offset relative to the begin offset of the
312    * PatternMatcherInput at which to start the substring.
313    * @return The specified substring.
314    * @exception ArrayIndexOutOfBoundsException If the offset does not occur
315    * within the bounds of the input.
316    */

317   public String JavaDoc substring(int beginOffset) {
318     beginOffset+=_beginOffset;
319     return new String JavaDoc(_originalBuffer, beginOffset, _endOffset - beginOffset);
320   }
321
322
323   /**
324    * Retrieves the original input used to initialize the PatternMatcherInput
325    * instance. If a String was used, the String instance will be returned.
326    * If a char[] was used, a char instance will be returned. This violates
327    * data encapsulation and hiding principles, but it is a great convenience
328    * for the programmer.
329    * <p>
330    * @return The String or char[] input used to initialize the
331    * PatternMatcherInput instance.
332    */

333   public Object JavaDoc getInput(){
334     if(_originalStringInput == null)
335       return _originalCharInput;
336     return _originalStringInput;
337   }
338
339   /**
340    * Retrieves the char[] buffer to be used used as input by PatternMatcher
341    * implementations to look for matches. This array should be treated
342    * as read only by the programmer.
343    * <p>
344    * @return The char[] buffer to be used as input by PatternMatcher
345    * implementations.
346    */

347   public char[] getBuffer() { return _originalBuffer; }
348
349   /**
350    * Returns whether or not the end of the input has been reached.
351    * <p>
352    * @return True if the current offset is greater than or equal to the
353    * end offset.
354    */

355   public boolean endOfInput(){ return (_currentOffset >= _endOffset); }
356
357
358   /**
359    * @return The offset of the input that should be considered the start
360    * of the region to be considered as input by PatternMatcher
361    * methods.
362    */

363   public int getBeginOffset() { return _beginOffset; }
364
365   /**
366    * @return The offset of the input that should be considered the end
367    * of the region to be considered as input by PatternMatcher
368    * methods. This offset is actually 1 plus the last offset
369    * that is part of the input region.
370    */

371   public int getEndOffset() { return _endOffset; }
372
373   /**
374    * @return The offset of the input that should be considered the current
375    * offset where PatternMatcher methods should start looking for
376    * matches.
377    */

378   public int getCurrentOffset() { return _currentOffset; }
379
380   /**
381    * Sets the offset of the input that should be considered the start
382    * of the region to be considered as input by PatternMatcher
383    * methods. In other words, everything before this offset is ignored
384    * by a PatternMatcher.
385    * <p>
386    * @param offset The offset to use as the beginning of the input.
387    */

388   public void setBeginOffset(int offset) { _beginOffset = offset; }
389
390   /**
391    * Sets the offset of the input that should be considered the end
392    * of the region to be considered as input by PatternMatcher
393    * methods. This offset is actually 1 plus the last offset
394    * that is part of the input region.
395    * <p>
396    * @param offset The offset to use as the end of the input.
397    */

398   public void setEndOffset(int offset) { _endOffset = offset; }
399
400   /**
401    * Sets the offset of the input that should be considered the current
402    * offset where PatternMatcher methods should start looking for
403    * matches. Also resets all match offset information to -1. By calling
404    * this method, you invalidate all previous match information. Therefore
405    * a PatternMatcher implementation must call this method before setting
406    * match offset information.
407    * <p>
408    * @param offset The offset to use as the current offset.
409    */

410   public void setCurrentOffset(int offset) {
411     _currentOffset = offset;
412     setMatchOffsets(-1, -1);
413   }
414
415   /**
416    * Returns the string representation of the input, where the input is
417    * considered to start from the begin offset and end at the end offset.
418    * <p>
419    * @return The string representation of the input.
420    */

421   public String JavaDoc toString() {
422     return new String JavaDoc(_originalBuffer, _beginOffset, length());
423   }
424
425
426   /**
427    * A convenience method returning the part of the input occurring before
428    * the last match found by a call to a Perl5Matcher
429    * {@link Perl5Matcher#contains contains} method.
430    * <p>
431    * @return The input preceeding a match.
432    */

433   public String JavaDoc preMatch() {
434     return new String JavaDoc(_originalBuffer, _beginOffset,
435               _matchBeginOffset - _beginOffset);
436   }
437
438
439   /**
440    * A convenience method returning the part of the input occurring after
441    * the last match found by a call to a Perl5Matcher
442    * {@link Perl5Matcher#contains contains} method.
443    * <p>
444    * @return The input succeeding a contains() match.
445    */

446   public String JavaDoc postMatch() {
447     return new String JavaDoc(_originalBuffer, _matchEndOffset,
448               _endOffset - _matchEndOffset);
449   }
450
451
452   /**
453    * A convenience method returning the part of the input corresponding
454    * to the last match found by a call to a Perl5Matcher
455    * {@link Perl5Matcher#contains contains} method.
456    * The method is not called getMatch() so as not to confuse it
457    * with Perl5Matcher's getMatch() which returns a MatchResult instance
458    * and also for consistency with preMatch() and postMatch().
459    * <p>
460    * @return The input consisting of the match found by contains().
461    */

462   public String JavaDoc match() {
463     return new String JavaDoc(_originalBuffer, _matchBeginOffset,
464               _matchEndOffset - _matchBeginOffset);
465   }
466
467
468   /**
469    * This method is intended for use by PatternMatcher implementations.
470    * It is necessary to record the location of the previous match so that
471    * consecutive contains() matches involving null string matches are
472    * properly handled. If you are not implementing a PatternMatcher, forget
473    * this method exists. If you use it outside of its intended context, you
474    * will only disrupt the stored state.
475    * <p>
476    * As a note, the preMatch(), postMatch(), and match() methods are provided
477    * as conveniences because PatternMatcherInput must store match offset
478    * information to completely preserve state for consecutive PatternMatcher
479    * contains() matches.
480    * <p>
481    * @param matchBeginOffset The begin offset of a match found by contains().
482    * @param matchEndOffset The end offset of a match found by contains().
483    */

484   public void setMatchOffsets(int matchBeginOffset, int matchEndOffset) {
485     _matchBeginOffset = matchBeginOffset;
486     _matchEndOffset = matchEndOffset;
487   }
488
489   /**
490    * Returns the offset marking the beginning of the match found by
491    * contains().
492    * <p>
493    * @return The begin offset of a contains() match.
494    */

495   public int getMatchBeginOffset() { return _matchBeginOffset; }
496
497   /**
498    * Returns the offset marking the end of the match found by contains().
499    * <p>
500    * @return The end offset of a contains() match.
501    */

502   public int getMatchEndOffset() { return _matchEndOffset; }
503 }
504
Popular Tags