KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > lutris > util > BMByteSearch


1
2 /*
3  * Enhydra Java Application Server Project
4  *
5  * The contents of this file are subject to the Enhydra Public License
6  * Version 1.1 (the "License"); you may not use this file except in
7  * compliance with the License. You may obtain a copy of the License on
8  * the Enhydra web site ( http://www.enhydra.org/ ).
9  *
10  * Software distributed under the License is distributed on an "AS IS"
11  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
12  * the License for the specific terms governing rights and limitations
13  * under the License.
14  *
15  * The Initial Developer of the Enhydra Application Server is Lutris
16  * Technologies, Inc. The Enhydra Application Server and portions created
17  * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
18  * All Rights Reserved.
19  *
20  * Contributor(s):
21  *
22  * $Id: BMByteSearch.java,v 1.1 2004/04/21 19:31:58 slobodan Exp $
23  */

24
25
26
27
28 package com.lutris.util;
29
30 /**
31  * Implements the Boyer-Moore pattern matching algorithm for a given
32  * byte pattern. This object implements searches on signed byte arrays.
33  * The algorithm was obtained from "Computer Algorithms - Introduction to
34  * Design and Analysis, Second Edition" by Sara Baase.
35  */

36 public class BMByteSearch
37 {
38     
39     /**
40      * Byte array, beginning at index 1 (for algorithmic convenience),
41      * that contains the intended search pattern data.
42      */

43     private byte[] P;
44     
45     /**
46      * The length of the search pattern.
47      */

48     private int m;
49     
50     /**
51      * Table of jump distances for each mismatched character in the
52      * alphabet for a given search pattern. Must be recomputed for
53      * each new pattern.
54      */

55     private int[] charJump;
56     
57     /**
58      * Table of partial suffix match jump distances for a given pattern.
59      * Must be recomputed for each new pattern.
60      */

61     private int[] matchJump;
62     
63     /**
64      * Creates a precomputed Boyer-Moore byte string search object
65      * from the given pattern. The unicode characters in <code>pattern</code>
66      * are truncated if greater than 255, and converted in twos-complement
67      * fashion, to appropriate negative byte values, if necessary.
68      * This method is provided as a convenience for searching for patterns
69      * within 8 bit byte strings composed of character data.
70      *
71      * @param pattern The pattern create this object for.
72      */

73     public
74     BMByteSearch(String JavaDoc pattern)
75     {
76     genPatternFromCharArray(pattern.toCharArray());
77     computeJumps();
78     computeMatchJumps();
79     }
80     
81     /**
82      * Creates a precomputed Boyer-Moore byte string search object
83      * from the given pattern.
84      *
85      * @param pattern Binary pattern to search for.
86      */

87     public
88     BMByteSearch(byte[] pattern)
89     {
90     genPatternFromByteArray(pattern, 0, pattern.length);
91     computeJumps();
92     computeMatchJumps();
93     }
94     
95     /**
96      * Creates a precomputed Boyer-Moore byte string search object
97      * from a portion of the given pattern array.
98      *
99      * @param pattern Byte array containing a pattern to search for.
100      * @param offset Offset to beginning of search pattern.
101      * @param length Length of the search pattern.
102      */

103     public
104     BMByteSearch(byte[] pattern, int offset, int length)
105     {
106     genPatternFromByteArray(pattern, offset, length);
107     computeJumps();
108     computeMatchJumps();
109     }
110     
111     /**
112      * Compares two integers and returns the lesser value.
113      *
114      * @param i1 First integer to compare.
115      * @param i2 Second integer to compare.
116      * @return The lesser of <code>i1</code> or <code>i2</code>.
117      */

118     private static final
119     int min(int i1, int i2)
120     {
121     return (i1 < i2) ? i1 : i2;
122     }
123
124     /**
125      * Compares two integers and returns the greater value.
126      *
127      * @param i1 First integer to compare.
128      * @param i2 Second integer to compare.
129      * @return The greater of <code>i1</code> or <code>i2</code>.
130      */

131     private static final
132     int max(int i1, int i2)
133     {
134     return (i1 > i2) ? i1 : i2;
135     }
136     
137     /**
138      * Generates the pattern byte string <code>P</code> from a portion
139      * of another byte string.
140      *
141      * @param bytes The byte string from which to extract the pattern.
142      * @param off The array index within <code>bytes</code> from
143      * which to extract the pattern.
144      * @param length The number of characters to extract from
145      * <code>bytes</code> into the pattern.
146      */

147     private final
148     void genPatternFromByteArray(byte[] bytes, int off, int length)
149     {
150     int i,j;
151     m = length;
152 // 31.03.2003. patch
153
// P = new byte[length];
154
P = new byte[length+1];
155     for (i=1, j=off; i <= length; i++, j++) P[i] = bytes[j];
156     }
157     
158     /**
159      * Generates the pattern byte string <code>P</code> from a character
160      * array. The signed unicode characters are truncated to 8 bits, and
161      * converted into signed byte values. Characters between 128 and 255
162      * are converted to their signed negative counterpart in
163      * twos-complement fashion by subtracting 256.
164      *
165      * @param chars Unsigned unicode character array to turn into
166      * a signed byte array.
167      */

168     private final
169     void genPatternFromCharArray(char[] chars)
170     {
171     m = chars.length;
172     P = new byte[m + 1];
173     for (int i=1; i<= m; i++) {
174         if (chars[i-1] > 127) P[i] = (byte) ((chars[i-1] - 256) & 0xff);
175         else P[i] = (byte) (chars[i-1] & 0xff);
176     }
177     }
178     
179     /**
180      * Initializes the per-character jump table <code>charJump</code>
181      * as specified by the Boyer-Moore algorithm.
182      */

183     private final
184     void computeJumps()
185     {
186     charJump = new int[256];
187     for (int i=0; i<255; i++) charJump[i] = m;
188     for (int k=1; k<=m; k++) charJump[P[k] + 128] = m - k;
189     }
190     
191     /**
192      * Computes a partial-match jump table that skips over
193      * partially matching suffixes.
194      */

195     private
196     void computeMatchJumps()
197     {
198     int k, q, qq, mm;
199     int[] back = new int[m + 2];
200
201     matchJump = new int[m + 2];
202     mm = 2 * m;
203
204     for (k=1; k <= m; k++) matchJump[k] = mm - k;
205     k = m; q = m + 1;
206     while (k > 0) {
207         back[k] = q;
208         while ((q <= m) && (P[k] != P[q])) {
209         matchJump[q] = min(matchJump[q], m - k);
210         q = back[q];
211         }
212         k = k - 1; q = q - 1;
213     }
214     for (k = 1; k <= q; k++) {
215         matchJump[k] = min(matchJump[k], m + q - k);
216     }
217     qq = back[q];
218     while (q <= m) {
219         while (q <= qq) {
220         matchJump[q] = min(matchJump[q], qq - q + m);
221         q = q + 1;
222         }
223         qq = back[qq];
224     }
225     }
226     
227     /**
228      * Returns the length of the pattern for this searcher.
229      *
230      * @return The search pattern length.
231      */

232     public
233     int getPatternLength()
234     {
235     return(m);
236     }
237     
238     /**
239      * Search for the previously pre-compiled pattern string in an
240      * array of bytes. This method uses the Boyer-Moore pattern
241      * search algorithm.
242      *
243      * @param byteString Array of bytes in which to search
244      * for the pattern.
245      * @return The array index where the pattern
246      * begins in the string, or <code>-1</code>
247      * if the pattern was not found.
248      */

249     public
250     int search(byte[] byteString)
251     {
252     return(search(byteString, 0, byteString.length));
253     }
254     
255     /**
256      * Search for the previously pre-compiled pattern string in an
257      * array of bytes. This method uses the Boyer-Moore pattern
258      * search algorithm.
259      *
260      * @param byteString Array of bytes in which to search
261      * for the pattern.
262      * @param offset The the index in <code>byteString</code>
263      * where the search is to begin.
264      * @param length The number of bytes to search in
265      * <code>byteString</code>.
266      * @return The array index where the pattern
267      * begins in the string, or <code>-1</code>
268      * if the pattern was not found.
269      */

270     public
271     int search(byte[] byteString, int offset, int length)
272     {
273     int j, k, len;
274     j = m + offset;
275     k = m;
276     len = min(byteString.length, offset + length);
277     while ((j <= len) && (k > 0)) {
278         if ((byteString[j-1]) == P[k]) {
279         j = j - 1; k = k - 1;
280         } else {
281         j = j + max(charJump[byteString[j-1] + 128], matchJump[k]);
282         k = m;
283         }
284     }
285     if (k == 0) return(j);
286     return(-1); // No match.
287
}
288 }
289
290
Popular Tags