KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > apache > xerces > utils > regex > BMPattern


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999,2000 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.enhydra.apache.xerces.utils.regex;
59
60 import java.text.CharacterIterator JavaDoc;
61
62 /**
63  * Boyer-Moore searcher.
64  */

65 public class BMPattern {
66     char[] pattern;
67     int[] shiftTable;
68     boolean ignoreCase;
69
70     public BMPattern(String JavaDoc pat, boolean ignoreCase) {
71         this(pat, 256, ignoreCase);
72     }
73
74     public BMPattern(String JavaDoc pat, int tableSize, boolean ignoreCase) {
75         this.pattern = pat.toCharArray();
76         this.shiftTable = new int[tableSize];
77         this.ignoreCase = ignoreCase;
78
79         int length = pattern.length;
80         for (int i = 0; i < this.shiftTable.length; i ++)
81             this.shiftTable[i] = length;
82
83         for (int i = 0; i < length; i ++) {
84             char ch = this.pattern[i];
85             int diff = length-i-1;
86             int index = ch % this.shiftTable.length;
87             if (diff < this.shiftTable[index])
88                 this.shiftTable[index] = diff;
89             if (this.ignoreCase) {
90                 ch = Character.toUpperCase(ch);
91                 index = ch % this.shiftTable.length;
92                 if (diff < this.shiftTable[index])
93                     this.shiftTable[index] = diff;
94                 ch = Character.toLowerCase(ch);
95                 index = ch % this.shiftTable.length;
96                 if (diff < this.shiftTable[index])
97                     this.shiftTable[index] = diff;
98             }
99         }
100     }
101
102     /**
103      *
104      * @return -1 if <var>iterator</var> does not contain this pattern.
105      */

106     public int matches(CharacterIterator JavaDoc iterator, int start, int limit) {
107         if (this.ignoreCase) return this.matchesIgnoreCase(iterator, start, limit);
108         int plength = this.pattern.length;
109         if (plength == 0) return start;
110         int index = start+plength;
111         while (index <= limit) {
112             int pindex = plength;
113             int nindex = index+1;
114             char ch;
115             do {
116                 if ((ch = iterator.setIndex(--index)) != this.pattern[--pindex])
117                     break;
118                 if (pindex == 0)
119                     return index;
120             } while (pindex > 0);
121             index += this.shiftTable[ch % this.shiftTable.length]+1;
122             if (index < nindex) index = nindex;
123         }
124         return -1;
125     }
126
127     /**
128      *
129      * @return -1 if <var>str</var> does not contain this pattern.
130      */

131     public int matches(String JavaDoc str, int start, int limit) {
132         if (this.ignoreCase) return this.matchesIgnoreCase(str, start, limit);
133         int plength = this.pattern.length;
134         if (plength == 0) return start;
135         int index = start+plength;
136         while (index <= limit) {
137             //System.err.println("Starts at "+index);
138
int pindex = plength;
139             int nindex = index+1;
140             char ch;
141             do {
142                 if ((ch = str.charAt(--index)) != this.pattern[--pindex])
143                     break;
144                 if (pindex == 0)
145                     return index;
146             } while (pindex > 0);
147             index += this.shiftTable[ch % this.shiftTable.length]+1;
148             if (index < nindex) index = nindex;
149         }
150         return -1;
151     }
152     /**
153      *
154      * @return -1 if <var>chars</char> does not contain this pattern.
155      */

156     public int matches(char[] chars, int start, int limit) {
157         if (this.ignoreCase) return this.matchesIgnoreCase(chars, start, limit);
158         int plength = this.pattern.length;
159         if (plength == 0) return start;
160         int index = start+plength;
161         while (index <= limit) {
162             //System.err.println("Starts at "+index);
163
int pindex = plength;
164             int nindex = index+1;
165             char ch;
166             do {
167                 if ((ch = chars[--index]) != this.pattern[--pindex])
168                     break;
169                 if (pindex == 0)
170                     return index;
171             } while (pindex > 0);
172             index += this.shiftTable[ch % this.shiftTable.length]+1;
173             if (index < nindex) index = nindex;
174         }
175         return -1;
176     }
177
178     int matchesIgnoreCase(CharacterIterator JavaDoc iterator, int start, int limit) {
179         int plength = this.pattern.length;
180         if (plength == 0) return start;
181         int index = start+plength;
182         while (index <= limit) {
183             int pindex = plength;
184             int nindex = index+1;
185             char ch;
186             do {
187                 char ch1 = ch = iterator.setIndex(--index);
188                 char ch2 = this.pattern[--pindex];
189                 if (ch1 != ch2) {
190                     ch1 = Character.toUpperCase(ch1);
191                     ch2 = Character.toUpperCase(ch2);
192                     if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
193                         break;
194                 }
195                 if (pindex == 0)
196                     return index;
197             } while (pindex > 0);
198             index += this.shiftTable[ch % this.shiftTable.length]+1;
199             if (index < nindex) index = nindex;
200         }
201         return -1;
202     }
203     
204     int matchesIgnoreCase(String JavaDoc text, int start, int limit) {
205         int plength = this.pattern.length;
206         if (plength == 0) return start;
207         int index = start+plength;
208         while (index <= limit) {
209             int pindex = plength;
210             int nindex = index+1;
211             char ch;
212             do {
213                 char ch1 = ch = text.charAt(--index);
214                 char ch2 = this.pattern[--pindex];
215                 if (ch1 != ch2) {
216                     ch1 = Character.toUpperCase(ch1);
217                     ch2 = Character.toUpperCase(ch2);
218                     if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
219                         break;
220                 }
221                 if (pindex == 0)
222                     return index;
223             } while (pindex > 0);
224             index += this.shiftTable[ch % this.shiftTable.length]+1;
225             if (index < nindex) index = nindex;
226         }
227         return -1;
228     }
229     int matchesIgnoreCase(char[] chars, int start, int limit) {
230         int plength = this.pattern.length;
231         if (plength == 0) return start;
232         int index = start+plength;
233         while (index <= limit) {
234             int pindex = plength;
235             int nindex = index+1;
236             char ch;
237             do {
238                 char ch1 = ch = chars[--index];
239                 char ch2 = this.pattern[--pindex];
240                 if (ch1 != ch2) {
241                     ch1 = Character.toUpperCase(ch1);
242                     ch2 = Character.toUpperCase(ch2);
243                     if (ch1 != ch2 && Character.toLowerCase(ch1) != Character.toLowerCase(ch2))
244                         break;
245                 }
246                 if (pindex == 0)
247                     return index;
248             } while (pindex > 0);
249             index += this.shiftTable[ch % this.shiftTable.length]+1;
250             if (index < nindex) index = nindex;
251         }
252         return -1;
253     }
254
255     /*
256     public static void main(String[] argv) {
257         try {
258             int[] shiftTable = new int[256];
259             initializeBoyerMoore(argv[0], shiftTable, true);
260             int o = -1;
261             CharacterIterator ite = new java.text.StringCharacterIterator(argv[1]);
262             long start = System.currentTimeMillis();
263             //for (int i = 0; i < 10000; i ++)
264                 o = searchIgnoreCasesWithBoyerMoore(ite, 0, argv[0], shiftTable);
265             start = System.currentTimeMillis()-start;
266             System.out.println("Result: "+o+", Elapsed: "+start);
267         } catch (Exception ex) {
268             ex.printStackTrace();
269         }
270     }*/

271 }
272
273
Popular Tags