KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > lutris > util > BMByteSearchStream


1
2 /*
3  * Enhydra Java Application Server Project
4  *
5  * The contents of this file are subject to the Enhydra Public License
6  * Version 1.1 (the "License"); you may not use this file except in
7  * compliance with the License. You may obtain a copy of the License on
8  * the Enhydra web site ( http://www.enhydra.org/ ).
9  *
10  * Software distributed under the License is distributed on an "AS IS"
11  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
12  * the License for the specific terms governing rights and limitations
13  * under the License.
14  *
15  * The Initial Developer of the Enhydra Application Server is Lutris
16  * Technologies, Inc. The Enhydra Application Server and portions created
17  * by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
18  * All Rights Reserved.
19  *
20  * Contributor(s):
21  *
22  * $Id: BMByteSearchStream.java,v 1.3 2005/03/24 10:51:25 slobodan Exp $
23  */

24
25
26
27
28 package com.lutris.util;
29 import java.io.FilterInputStream JavaDoc;
30 import java.io.IOException JavaDoc;
31 import java.io.InputStream JavaDoc;
32
33 /**
34  * Implements the Boyer-Moore pattern matching algorithm for a given
35  * byte pattern. This object implements searches on byte-oriented input
36  * streams.
37  * <P>
38  * The algorithm was obtained from "Computer Algorithms - Introduction to
39  * Design and Analysis, Second Edition" by Sara Baase.
40  */

41 public class BMByteSearchStream extends FilterInputStream JavaDoc
42 {
43     /**
44      * EOF value. Traditionally -1.
45      */

46     public static final int EOF = -1;
47
48     /**
49      * "At Pattern" value. Indicates that the stream position is
50      * currently at the beginning of a detected pattern occurence.
51      */

52     public static final int AT_PATTERN = -2;
53
54     /**
55      * Boyer-Moore byte string searcher for scanning the scan buffer.
56      */

57     private BMByteSearch searcher;
58     
59     /**
60      * Scan buffer -- Should be at least ten times the length of the
61      * search pattern.
62      */

63     private byte[] scanBuf;
64     
65     /**
66      * Scan buffer position.
67      */

68     private int scanBufPos;
69     
70     /**
71      * Number of bytes currently in the scan buffer.
72      */

73     private int scanBufCount;
74     
75     /**
76      * Total size of the scan buffer.
77      */

78     private int scanBufSize;
79     
80     /**
81      * The length of the search pattern.
82      */

83     private int patternLength;
84     
85     /**
86      * Number of bytes to overlap buffer reads by to ensure that
87      * the pattern gets matched even if it crosses a read boundary.
88      * This should normally be the length of the pattern.
89      */

90     private int patternKeep;
91     
92     /**
93      * The current position of the pattern within the current
94      * buffer contents. This may be less than the current buffer
95      * position if the pattern has already been skipped.
96      */

97     private int patternPos;
98     
99     /**
100      * Buffer to hold a single character in order to implement the
101      * single character read() method.
102      */

103     private byte[] readByte = new byte[1];
104     
105     /**
106      * Creates a Boyer-Moore byte stream scanner for a given pattern.
107      * Creates a buffer of length <code>buflen</code> for the scanning
108      * buffer.
109      *
110      * @param inputSource The input source stream to scan.
111      * @param pattern The pattern to scan for. Characters
112      * outside the Latin-1 encoding are truncated
113      * to signed bytes.
114      * @param buflen The length to use for the scanning buffer.
115      */

116     public
117     BMByteSearchStream(InputStream JavaDoc inputSource, String JavaDoc pattern, int buflen)
118     {
119     super(inputSource); // Pass input source stream to parent class.
120
scanBuf = new byte[buflen];
121     scanBufSize = buflen;
122     scanBufPos = 0;
123     scanBufCount = 0;
124     setPattern(pattern);
125     }
126     
127     /**
128      * Reads the next byte of data from this input stream. The value byte
129      * is returned as an <code>int</code> in the range 0 to 255. If no
130      * byte is available because the end of the stream has been reached,
131      * the value -1 is returned. This method blocks until input data is
132      * available, the end of the stream is detected, or an exception is
133      * thrown
134      *
135      * @return The next byte of data, or -1 if the end
136      * of stream is reached.
137      * @exception IOException If an I/O error occurs.
138      */

139     public
140     int read()
141     throws IOException JavaDoc
142     {
143     int len = read(readByte, 0, 1);
144     if (len != 1) return -1;
145     return ((int)readByte[0] + 0x100) & 0xff;
146     }
147     
148     /**
149      * Reads up to <code>buffer.length</code> bytes of data from this input
150      * stream into an array of bytes. This method blocks until some input
151      * is available
152      *
153      * @param buffer The buffer into which data are read.
154      * @return The number of bytes actually read, or -1
155      * if there are no more bytes because the
156      * end of stream has been reached.
157      * @exception IOException If an I/O error occurs.
158      */

159     public
160     int read(byte[] buffer)
161     throws IOException JavaDoc
162     {
163     return(read(buffer, 0, buffer.length));
164     }
165     
166     /**
167      * Reads <code>length</code> bytes of data from this input stream into
168      * an array of bytes. This method blocks until some input is available.
169      *
170      * @param buffer The buffer into which data are read.
171      * @param offset The start offset of the data.
172      * @param length The maximum number of bytes read.
173      * @return The total number of bytes read into the
174      * buffer, or -1 if there are no more bytes
175      * because the end of stream has been reached.
176      * @exception IOException If an I/O error occurs.
177      */

178     public
179     int read(byte[] buffer, int offset, int length)
180     throws IOException JavaDoc
181     {
182     int count, dst, i;
183     while ((count = min(length, scanBufCount - patternKeep)) <= 0) {
184         if (loadBuf() < 0) {
185         //
186
// Underlying stream reached EOF. return remainder of
187
// buffer or EOF.
188
//
189
count = min(length, scanBufCount);
190         if (count <= 0) {
191             patternPos = -1;
192             return EOF;
193         }
194         break;
195         }
196     }
197     for (dst=offset, i=count; i > 0; i--) {
198         buffer[dst++] = scanBuf[scanBufPos++];
199     }
200     scanBufCount -= count;
201     if ((scanBufPos > patternPos) && (patternPos >= 0)) {
202         patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
203     }
204     return count;
205     }
206     
207     /**
208      * Skips over and discards n bytes of data from the input stream.
209      * The <code>skip</code> method may, for a variety of reasons,
210      * end up skipping over some smaller number of bytes, possibly 0.
211      * The actual number of bytes skipped is returned.
212      *
213      * @param n The number of bytes to be skipped.
214      * @return The actual number of bytes skipped.
215      * @exception IOException If an I/O error occurs.
216      */

217     public
218     long skip(long n)
219     throws IOException JavaDoc
220     {
221     byte[] skipBuf = new byte[1024];
222     long bytesLeft = n;
223     long skipped=0;
224     long r, count;
225     while (bytesLeft > 0) {
226         if (bytesLeft < 1024) count = bytesLeft;
227         else count = 1024;
228         if ((r = this.read(skipBuf, 0, (int)count))<0) {
229         patternPos = -1;
230         return(skipped); // At EOF.
231
}
232         skipped += r;
233         bytesLeft -= r;
234     }
235     if ((scanBufPos > patternPos) && (patternPos >= 0)) {
236         patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
237     }
238     return skipped;
239     }
240     
241     /**
242      * Returns the number of bytes that can be read from this input stream
243      * without blocking. This consists of any bytes left in the buffer
244      * plus the result of the underlying stream's <code>available</code>
245      * method.
246      *
247      * @return The number of bytes that can be read
248      * without blocking.
249      * @exception IOException If an I/O error occurs.
250      */

251     public int
252     available()
253     throws IOException JavaDoc
254     {
255     return(scanBufCount + super.available());
256     }
257
258      /**
259       * Returns the number of bytes that can be read from this input stream
260       * without blocking or encountering the search pattern. If the search
261       * pattern has been found in the buffer, this is the number of bytes
262       * in the search buffer prior to the search pattern; otherwise, while
263       * there may be additional data in the input stream buffer that can be
264       * brought into the search buffer without blocking, this data may or
265       * may not contain the search pattern (or complete a partial search
266       * pattern located at the end of the buffer), so this is calculated as
267       * the number of bytes left in the search buffer less the length of
268       * the search pattern.
269       *
270       * @return The number of bytes that can be read
271       * for certain without blocking or encountering
272       * the search pattern.
273       * @exception IOException If an I/O error occurs.
274       */

275      public int
276      availableTo()
277      throws IOException JavaDoc
278      {
279          return (patternPos < 0) ? scanBufCount - patternLength : patternPos - scanBufPos;
280      }
281  
282
283     
284     /**
285      * Reads data into a buffer until the search pattern or the
286      * end of file is detected. Returns <code>-1</code> if at the
287      * search pattern or end-of-file.
288      *
289      * @param buffer Buffer to read into.
290      * @param offset Offset in buffer to read into.
291      * @param length Number of bytes to try to read.
292      * @return The number of bytes actually read, or
293      * <code>-1</code> if at the pattern or eof.
294      * @exception IOException Thrown if an I/O exception occurs.
295      */

296     public
297     int readTo(byte[] buffer, int offset, int length)
298     throws IOException JavaDoc
299     {
300     int count, dst, i;
301     if (patternPos == scanBufPos) return AT_PATTERN;
302     if (patternPos > scanBufPos) {
303         count = min(length, patternPos - scanBufPos);
304         for (dst=offset, i=count; i > 0; i--) {
305         buffer[dst++] = scanBuf[scanBufPos++];
306         }
307         scanBufCount -= count;
308         return count;
309     }
310     while ((count = min(length, scanBufCount - patternKeep)) <= 0) {
311         if (loadBuf() < 0) {
312         //
313
// Underlying stream reached EOF. return remainder of
314
// buffer or EOF.
315
//
316
count = min(length, scanBufCount);
317         if (count <= 0) return EOF;
318         break;
319         }
320         // Bug fixed - must recheck for being at pattern.
321
if (patternPos == scanBufPos) return AT_PATTERN;
322         if (patternPos > scanBufPos) {
323         count = min(length, patternPos - scanBufPos);
324         for (dst=offset, i=count; i > 0; i--) {
325             buffer[dst++] = scanBuf[scanBufPos++];
326         }
327         scanBufCount -= count;
328         return count;
329         }
330     }
331     for (dst=offset, i=count; i > 0; i--) {
332         buffer[dst++] = scanBuf[scanBufPos++];
333     }
334     scanBufCount -= count;
335     return count;
336     }
337     
338     
339     /**
340      * Skips all bytes up to but not including the search pattern or EOF.
341      * Returns the number of bytes skipped. Repeated calls to
342      * this method will leave the stream at the same postion until
343      * another call explicitly reads or skips the pattern data.
344      *
345      * @return The number of bytes skipped.
346      *
347      * @exception IOException Thrown if an I/O exception occurs.
348      */

349     public
350     int skipTo()
351     throws IOException JavaDoc
352     {
353     int count=0, skipCount = 0;
354     while (patternPos < scanBufPos) {
355         count = min(0, scanBufCount - patternKeep);
356         if (count >= 0) {
357         skipCount += count;
358         scanBufCount -= count;
359         scanBufPos += count;
360         }
361         int nread = loadBuf();
362         if (nread < 0) {
363         boolean notFound = (patternPos < scanBufPos);
364         skipCount += scanBufCount;
365         scanBufCount = 0;
366         scanBufPos = 0;
367         patternPos = -1;
368         if (notFound) return -1;
369         return (skipCount <= 0) ? 0 : skipCount;
370         }
371         if (nread == 0) {
372         // Special case. Buffer needs to be discarded.
373
if (scanBufCount == scanBuf.length) {
374             int skip = scanBuf.length - patternKeep;
375             scanBufPos = skip;
376             scanBufCount = patternKeep;
377             skipCount += skip;
378             patternPos = -1; // Pattern not avail.
379
}
380         }
381     }
382     count = patternPos - scanBufPos;
383     skipCount += count;
384     scanBufPos += count;
385     scanBufCount -= count;
386     return skipCount;
387     }
388     
389     /**
390      * Skips all bytes up to and including the search pattern or EOF.
391      *
392      * @return The number of bytes skipped.
393      * @exception IOException Thrown if an I/O exception occurs.
394      */

395     public
396     int skipPattern()
397     throws IOException JavaDoc
398     {
399     int skipCount = skipTo();
400     if (skipCount < 0) return -1; // Pattern not found.
401
skipCount += patternLength;
402     scanBufCount -= patternLength;
403     scanBufPos += patternLength;
404     if (scanBufCount > 0) {
405         patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
406     }
407     return skipCount;
408     }
409     
410     /**
411      * Set the search pattern. After this call, all new scans will be
412      * for the new pattern. Characters outside the values 0-255 are
413      * truncated into signed byte values in the Latin-1 encoding.
414      *
415      * @param pattern The new pattern to search for.
416      */

417     public
418     void setPattern(String JavaDoc pattern)
419     {
420     searcher = new BMByteSearch(pattern);
421     patternLength = searcher.getPatternLength();
422     patternKeep = patternLength + 2; // Potential CRLF parsing. *ugh*
423
patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
424     }
425
426     /**
427      * Set the search pattern. After this call, all new scans will be
428      * for the new pattern. Characters outside the values 0-255 are
429      * truncated into signed byte values in the Latin-1 encoding.
430      *
431      * @param search The precomputed Boyer-Moore searcher.
432      */

433     public
434     void setPattern(BMByteSearch search)
435     {
436     searcher = search;
437     patternLength = searcher.getPatternLength();
438     patternKeep = patternLength + 2; // Potential CRLF parsing. *ugh*
439
patternPos = searcher.search(scanBuf, scanBufPos, scanBufCount);
440     }
441     
442     /**
443      * Returns the next <code>length</code> bytes of the input buffer
444      * as a string. If fewer than <code>length</code> bytes remain on
445      * the input stream, then only the remaining bytes are returned. If
446      * the input stream is at EOF and there are no more bytes in the buffer
447      * then an empty string is returned.
448      *
449      * @param length The number of bytes to look ahead.
450      * @return A string containing the lookahead bytes as 8 bit characters.
451      * @exception IOException Thrown if an I/O exception occurs.
452      */

453     public
454     String JavaDoc peekAheadString(int length)
455     throws IOException JavaDoc
456     {
457     while (scanBufCount < length) {
458         if (loadBuf() < 0) break;
459     }
460     int count = min(length, scanBufCount);
461     if (count <= 0) return "";
462     StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
463     for (int i=scanBufPos; count > 0; i++, count--) {
464         sb.append((char)(scanBuf[i] & 0xff));
465     }
466     return sb.toString();
467     }
468
469     /**
470      * Reads more data from the underlying input source, keeping
471      * track of buffering and pattern matching. Before reading
472      * new bytes, any remaining bytes in the buffer are copied
473      * to position 0. The <code>scanBufPos</code>,
474      * <code>scanBufCount</code>, and <code>patternPos</code>
475      * fields are reset to appropriate values.
476      *
477      * @return The number of new bytes added to the
478      * buffer, or <code>-1</code> if the input
479      * stream has reached end of file.
480      * @exception IOException Thrown if an I/O exception occurs.
481      */

482     private
483     int loadBuf()
484     throws IOException JavaDoc
485     {
486     //
487
// First, copy remainder of buffer to beginning.
488
//
489
if (scanBufPos > 0) {
490         for (int i=0; i<scanBufCount; i++) {
491         scanBuf[i] = scanBuf[scanBufPos + i];
492         }
493         scanBufPos = 0;
494     }
495     
496     //
497
// Determine how many bytes are available to read.
498
// If no bytes are available, then read one byte and block.
499
// Then, recheck available.
500
//
501
int n, avail = super.available();
502     if (avail == 0) {
503         // Wait for at least one char.
504
n = super.read(scanBuf, scanBufCount, 1);
505         if (n <= 0) {
506         patternPos = searcher.search(scanBuf, 0, scanBufCount);
507         return EOF;
508         }
509         scanBufCount += n;
510         avail = super.available();
511         if (avail == 0) {
512         patternPos = searcher.search(scanBuf, 0, scanBufCount);
513         return n;
514         }
515     }
516     
517     //
518
// Only read currently available bytes. Don't block until
519
// all bytes are ready. That could cause a hang.
520
//
521
int nread = scanBufSize - scanBufCount;
522     if ((avail >= 0) && (nread > avail)) nread = avail;
523     
524     //
525
// Second, read additional bytes using superclass's read.
526
//
527
n = super.read(scanBuf, scanBufCount, nread);
528
529     if (n < 0) {
530         patternPos = searcher.search(scanBuf, 0, scanBufCount);
531         return EOF;
532     }
533     scanBufCount += n;
534     
535     //
536
// Search for first occurence of pattern.
537
//
538
patternPos = searcher.search(scanBuf, 0, scanBufCount);
539     return n;
540     }
541
542     /**
543      * Compares two integers and returns the lesser value.
544      *
545      * @param i1 First integer to compare.
546      * @param i2 Second integer to compare.
547      * @return The lesser of <code>i1</code> or <code>i2</code>.
548      */

549     private static final
550     int min(int i1, int i2)
551     {
552     return (i1 < i2) ? i1 : i2;
553     }
554 }
555
556
Popular Tags