KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > oro > text > regex > Util


1 package org.apache.oro.text.regex;
2
3 /* ====================================================================
4  * The Apache Software License, Version 1.1
5  *
6  * Copyright (c) 2000 The Apache Software Foundation. All rights
7  * reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  *
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in
18  * the documentation and/or other materials provided with the
19  * distribution.
20  *
21  * 3. The end-user documentation included with the redistribution,
22  * if any, must include the following acknowledgment:
23  * "This product includes software developed by the
24  * Apache Software Foundation (http://www.apache.org/)."
25  * Alternately, this acknowledgment may appear in the software itself,
26  * if and wherever such third-party acknowledgments normally appear.
27  *
28  * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro"
29  * must not be used to endorse or promote products derived from this
30  * software without prior written permission. For written
31  * permission, please contact apache@apache.org.
32  *
33  * 5. Products derived from this software may not be called "Apache"
34  * or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their
35  * name, without prior written permission of the Apache Software Foundation.
36  *
37  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48  * SUCH DAMAGE.
49  * ====================================================================
50  *
51  * This software consists of voluntary contributions made by many
52  * individuals on behalf of the Apache Software Foundation. For more
53  * information on the Apache Software Foundation, please see
54  * <http://www.apache.org/>.
55  *
56  * Portions of this software are based upon software originally written
57  * by Daniel F. Savarese. We appreciate his contributions.
58  */

59 import java.util.*;
60
61 /**
62  * The Util class is a holder for useful static utility methods that can
63  * be generically applied to Pattern and PatternMatcher instances.
64  * This class cannot and is not meant to be instantiated.
65  * The Util class currently contains versions of the split() and substitute()
66  * methods inspired by Perl's split function and <b>s</b> operation
67  * respectively, although they are implemented in such a way as not to
68  * rely on the Perl5 implementations of the OROMatcher packages regular
69  * expression interfaces. They may operate on any interface implementations
70  * conforming to the OROMatcher API specification for the PatternMatcher,
71  * Pattern, and MatchResult interfaces. Future versions of the class may
72  * include additional utility methods.
73  * <p>
74  * A grep method is not included for two reasons:
75  * <ol>
76  * <li> The details of reading a line at a time from an input stream
77  * differ in JDK 1.0.2 and JDK 1.1, making it difficult to
78  * retain compatibility across both Java releases.
79  * <li> Grep style processing is trivial for the programmer to implement
80  * in a while loop. Rarely does anyone want to retrieve all
81  * occurences of a pattern and then process them. More often a
82  * programmer will retrieve pattern matches and process them as they
83  * are retrieved, which is more efficient than storing them all in a
84  * Vector and then accessing them.
85  * </ol>
86
87  @author <a HREF="dfs@savarese.org">Daniel F. Savarese</a>
88  @version $Id: Util.java,v 1.1.1.1 2000/07/23 23:08:54 jon Exp $
89
90  * @see Pattern
91  * @see PatternMatcher
92  */

93 public final class Util {
94   /**
95    * A constant passed to the {@link #substitute substitute()}
96    * methods indicating that all occurrences of a pattern should be
97    * substituted.
98    */

99   public static final int SUBSTITUTE_ALL = -1;
100
101   /**
102    * A constant passed to the {@link #split split()} methods
103    * indicating that all occurrences of a pattern should be used to
104    * split a string.
105    */

106   public static final int SPLIT_ALL = 0;
107
108   /**
109    * The default destructor for the Util class. It is made private
110    * to prevent the instantiation of the class.
111    */

112   private Util() { }
113
114   /**
115    * Splits up a <code>String</code> instance into strings contained in a
116    * <code>Vector</code> of size not greater than a specified limit. The
117    * string is split with a regular expression as the delimiter.
118    * The <b>limit</b> parameter essentially says to split the
119    * string only on at most the first <b>limit - 1</b> number of pattern
120    * occurences.
121    * <p>
122    * This method is inspired by the Perl split() function and behaves
123    * identically to it when used in conjunction with the Perl5Matcher and
124    * Perl5Pattern classes except for the following difference:
125    * <ul><p>
126    * In Perl, if the split expression contains parentheses, the split()
127    * method creates additional list elements from each of the matching
128    * subgroups in the pattern. In other words:
129    * <ul><p><code>split("/([,-])/", "8-12,15,18")</code></ul>
130    * <p> produces the Vector containing:
131    * <ul><p><code> { "8", "-", "12", ",", "15", ",", "18" } </code> </ul>
132    * <p> The OROMatcher split method does not follow this behavior. The
133    * following Vector would be produced by OROMatcher:
134    * <ul><p><code> { "8", "12", "15", "18" } </code> </ul>
135    * <p> To obtain the Perl behavior, use split method in the PerlTools
136    * package available from
137    * <a HREF="http://www.oroinc.com/"> http://www.oroinc.com/ </a>.
138    * </ul>
139    * <p>
140    * @param matcher The regular expression matcher to execute the split.
141    * @param pattern The regular expression to use as a split delimiter.
142    * @param input The <code>String</code> to split.
143    * @param limit The limit on the size of the returned <code>Vector</code>.
144    * Values <= 0 produce the same behavior as using the
145    * <b>SPLIT_ALL</b> constant which causes the limit to be
146    * ignored and splits to be performed on all occurrences of
147    * the pattern. You should use the <b>SPLIT_ALL</b> constant
148    * to achieve this behavior instead of relying on the default
149    * behavior associated with non-positive limit values.
150    * @return A <code>Vector</code> containing the substrings of the input
151    * that occur between the regular expression delimiter occurences.
152    * The input will not be split into any more substrings than the
153    * specified <code>limit</code>. A way of thinking of this is that
154    * only the first <code>limit - 1</code> matches of the delimiting
155    * regular expression will be used to split the input.
156    */

157   public static Vector split(PatternMatcher matcher, Pattern pattern,
158                  String JavaDoc input, int limit)
159   {
160     int beginOffset;
161     Vector results = new Vector(20);
162     MatchResult currentResult;
163     PatternMatcherInput pinput;
164
165     pinput = new PatternMatcherInput(input);
166     beginOffset = 0;
167
168     while(--limit != 0 && matcher.contains(pinput, pattern)) {
169       currentResult = matcher.getMatch();
170       results.addElement(input.substring(beginOffset,
171                      currentResult.beginOffset(0)));
172       beginOffset = currentResult.endOffset(0);
173     }
174     results.addElement(input.substring(beginOffset, input.length()));
175
176     return results;
177   }
178
179
180   /**
181    * Splits up a <code>String</code> instance into a <code>Vector</code>
182    * of all its substrings using a regular expression as the delimiter.
183    * This method is inspired by the Perl split() function and behaves
184    * identically to it when used in conjunction with the Perl5Matcher and
185    * Perl5Pattern classes except for the following difference:
186    * <p>
187    * <ul>
188    * In Perl, if the split expression contains parentheses, the split()
189    * method creates additional list elements from each of the matching
190    * subgroups in the pattern. In other words:
191    * <ul><p><code>split("/([,-])/", "8-12,15,18")</code></ul>
192    * <p> produces the Vector containing:
193    * <ul><p><code> { "8", "-", "12", ",", "15", ",", "18" } </code> </ul>
194    * <p> The OROMatcher split method does not follow this behavior. The
195    * following Vector would be produced by OROMatcher:
196    * <ul><p><code> { "8", "12", "15", "18" } </code> </ul>
197    * <p> To obtain the Perl behavior, use split method in the PerlTools
198    * package available from
199    * <a HREF="http://www.oroinc.com/"> http://www.oroinc.com/ </a>.
200    * </ul>
201    * <p>
202    * This method is identical to calling:
203    * <blockquote><pre>
204    * split(matcher, pattern, input, Util.SPLIT_ALL);
205    * </pre></blockquote>
206    * <p>
207    * @param matcher The regular expression matcher to execute the split.
208    * @param pattern The regular expression to use as a split delimiter.
209    * @param input The <code>String</code> to split.
210    * @return A <code>Vector</code> containing all the substrings of the input
211    * that occur between the regular expression delimiter occurences.
212    */

213   public static Vector split( PatternMatcher matcher, Pattern pattern,
214                   String JavaDoc input)
215   {
216     return split(matcher, pattern, input, SPLIT_ALL);
217   }
218
219
220   /**
221    * Searches a string for a pattern and replaces the first occurrences
222    * of the pattern with a Substitution up to the number of
223    * substitutions specified by the <b>numSubs</b> parameter. A
224    * <b>numSubs</b> value of <b>SUBSTITUTE_ALL</b> will cause all occurrences
225    * of the pattern to be replaced.
226    * <p>
227    * @param matcher The regular expression matcher to execute the pattern
228    * search.
229    * @param pattern The regular expression to search for and substitute
230    * occurrences of.
231    * @param sub The Substitution used to substitute pattern occurences.
232    * @param input The <code>String</code> on which to perform substitutions.
233    * @param numSubs The number of substitutions to perform. Only the
234    * first <b> numSubs </b> patterns encountered are
235    * substituted. If you want to substitute all occurences
236    * set this parameter to <b> SUBSTITUTE_ALL </b>.
237    * @return A String comprising the input string with the substitutions,
238    * if any, made. If no substitutions are made, the returned String
239    * is the original input String.
240    */

241   public static String JavaDoc substitute(PatternMatcher matcher, Pattern pattern,
242                   Substitution sub, String JavaDoc input, int numSubs)
243   {
244     int beginOffset, subCount;
245     MatchResult currentResult;
246     PatternMatcherInput pinput;
247     StringBuffer JavaDoc buffer = new StringBuffer JavaDoc(input.length());
248
249     pinput = new PatternMatcherInput(input);
250     beginOffset = subCount = 0;
251
252     // Must be != 0 because SUBSTITUTE_ALL is represented by -1.
253
// Do NOT change to numSubs > 0.
254
while(numSubs != 0 && matcher.contains(pinput, pattern)) {
255       --numSubs;
256       ++subCount;
257       currentResult = matcher.getMatch();
258       buffer.append(input.substring(beginOffset,
259                     currentResult.beginOffset(0)));
260       sub.appendSubstitution(buffer, currentResult, subCount,
261                  input, matcher, pattern);
262       beginOffset = currentResult.endOffset(0);
263     }
264
265     // No substitutions performed. There's no point in duplicating
266
// the string as would happen if this check were omitted.
267
if(subCount == 0)
268       return input;
269
270     buffer.append(input.substring(beginOffset, input.length()));
271
272     return buffer.toString();
273   }
274
275   /**
276    * Searches a string for a pattern and substitutes only the first
277    * occurence of the pattern.
278    * <p>
279    * This method is identical to calling:
280    * <blockquote><pre>
281    * substitute(matcher, pattern, sub, input, 1);
282    * </pre></blockquote>
283    * <p>
284    * @param matcher The regular expression matcher to execute the pattern
285    * search.
286    * @param pattern The regular expression to search for and substitute
287    * occurrences of.
288    * @param sub The Substitution used to substitute pattern occurences.
289    * @param input The <code>String</code> on which to perform substitutions.
290    * @return A String comprising the input string with the substitutions,
291    * if any, made. If no substitutions are made, the returned String
292    * is the original input String.
293    */

294   public static String JavaDoc substitute(PatternMatcher matcher, Pattern pattern,
295                   Substitution sub, String JavaDoc input)
296   {
297     return substitute(matcher, pattern, sub, input, 1);
298   }
299
300 }
301
Popular Tags