KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > it > unimi > dsi > fastutil > io > FastBufferedInputStream


1 package it.unimi.dsi.fastutil.io;
2
3 /*
4  * fastutil: Fast & compact type-specific collections for Java
5  *
6  * Copyright (C) 2005, 2006 Sebastiano Vigna
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with this library; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21  *
22  */

23
24 import it.unimi.dsi.fastutil.bytes.ByteArrays;
25 import it.unimi.dsi.fastutil.io.MeasurableInputStream;
26 import it.unimi.dsi.fastutil.io.RepositionableStream;
27
28 import java.io.IOException JavaDoc;
29 import java.io.InputStream JavaDoc;
30 import java.nio.channels.FileChannel JavaDoc;
31 import java.util.EnumSet JavaDoc;
32
33 /** Lightweight, unsynchronized, aligned input stream buffering class with
34  * {@linkplain MeasurableInputStream measurability},
35  * {@linkplain RepositionableStream repositionability}
36  * and {@linkplain #readLine(byte[], int, int, EnumSet) line reading} support.
37  *
38  * <P>This class provides buffering for input streams, but it does so with
39  * purposes and an internal logic that are radically different from the ones
40  * adopted in {@link java.io.BufferedInputStream}.
41  *
42  * <P>There is no support for marking. All methods are unsychronized. Moreover,
43  * we try to guarantee that in case of sequential access
44  * <em>all reads performed by this class will be
45  * of the given buffer size</em>. If, for instance, you use the
46  * default buffer size, reads will be performed on the underlying input stream
47  * in multiples of 16384 bytes. This is very important on operating systems
48  * that optimize disk reads on disk block boundaries.
49  *
50  * <P>As an additional feature, this class implements the {@link
51  * RepositionableStream} interface and extends {@link MeasurableInputStream}.
52  * An instance of this class will try to cast
53  * the underlying byte stream to a {@link RepositionableStream} and to fetch by
54  * reflection the {@link java.nio.channels.FileChannel} underlying the given
55  * output stream, in this order. If either reference can be successfully
56  * fetched, you can use {@link #position(long)} to reposition the stream.
57  * Note that in this case we do not guarantee that all reads will
58  * be performed on buffer boundaries.
59  *
60  * <p>This class keeps also track of the number of bytes read so far, so
61  * to be able to implemented {@link MeasurableInputStream#position()}
62  * independently of underlying input stream.
63  *
64  * <P>If, on the other hand, the underlying byte stream can be cast to a
65  * {@link MeasurableInputStream}, then also
66  * {@link MeasurableInputStream#length()} will work as expected.
67  *
68  * <p>This class has limited support for
69  * {@linkplain #readLine(byte[], int, int, EnumSet) &ldquo;reading a line&rdquo;}
70  * (whatever that means) from the underlying input stream. You can choose the set of
71  * {@linkplain FastBufferedInputStream.LineTerminator line terminators} that
72  * delimit lines.
73  *
74  * @since 4.4
75  */

76
77 public class FastBufferedInputStream extends MeasurableInputStream implements RepositionableStream {
78
79     /** The default size of the internal buffer in bytes (8Ki). */
80     public final static int DEFAULT_BUFFER_SIZE = 8 * 1024;
81
82     /** An enumeration of the supported line terminators. */
83     public static enum LineTerminator {
84         /** A carriage return (CR, ASCII 13). */
85         CR,
86         /** A line feed (LF, ASCII 10). */
87         LF,
88         /** A carriage return followed by a line feed (CR/LF, ASCII 13/10). */
89         CR_LF
90     }
91
92     /** A set containing <em>all available</em> line terminators. */
93     public final static EnumSet JavaDoc<LineTerminator> ALL_TERMINATORS = EnumSet.allOf( LineTerminator.class );
94     
95     /** The underlying input stream. */
96     protected InputStream is;
97
98     /** The internal buffer. */
99     protected byte buffer[];
100
101     /** The current position in the buffer. */
102     protected int pos;
103
104     /** The number of bytes ever read (reset upon a call to {@link #position(long)}).
105      * In particular, this will always represent the index (in the underlying input stream)
106      * of the first available byte in the buffer. */

107     protected long readBytes;
108
109     /** The number of buffer bytes available starting from {@link #pos}. */
110     protected int avail;
111
112     /** The cached file channel underlying {@link #is}, if any. */
113     private FileChannel JavaDoc fileChannel;
114
115     /** {@link #is} cast to a positionable stream, if possible. */
116     private RepositionableStream rs;
117
118     /** {@link #is} cast to a measurable input stream, if possible. */
119     private MeasurableInputStream ms;
120
121     /** Creates a new fast buffered input stream by wrapping a given input stream with a given buffer size.
122      *
123      * @param is an input stream to wrap.
124      * @param bufSize the size in bytes of the internal buffer.
125      */

126
127     public FastBufferedInputStream( final InputStream is, final int bufSize ) {
128         this.is = is;
129         buffer = new byte[ bufSize ];
130
131         if ( is instanceof RepositionableStream ) rs = (RepositionableStream)is;
132         if ( is instanceof MeasurableInputStream ) ms = (MeasurableInputStream)is;
133             
134         if ( rs == null ) {
135                 
136             try {
137                 fileChannel = (FileChannel JavaDoc)( is.getClass().getMethod( "getChannel", new Class JavaDoc[] {} ) ).invoke( is, new Object JavaDoc[] {} );
138             }
139             catch( IllegalAccessException JavaDoc e ) {}
140             catch( IllegalArgumentException JavaDoc e ) {}
141             catch( NoSuchMethodException JavaDoc e ) {}
142             catch( java.lang.reflect.InvocationTargetException JavaDoc e ) {}
143             catch( ClassCastException JavaDoc e ) {}
144         }
145     }
146
147     /** Creates a new fast buffered input stream by wrapping a given input stream with a buffer of {@link #DEFAULT_BUFFER_SIZE} bytes.
148      *
149      * @param is an input stream to wrap.
150      */

151     public FastBufferedInputStream( final InputStream is ) {
152         this( is, DEFAULT_BUFFER_SIZE );
153     }
154
155     /** Checks whether no more bytes will be returned.
156      *
157      * <p>This method will refill the internal buffer.
158      *
159      * @return true if there are no characters in the internal buffer and
160      * the underlying reader is exhausted.
161      */

162     
163     protected boolean noMoreCharacters() throws IOException JavaDoc {
164         if ( avail == 0 ) {
165             avail = is.read( buffer );
166             if ( avail <= 0 ) {
167                 avail = 0;
168                 return true;
169             }
170             pos = 0;
171         }
172         return false;
173     }
174     
175
176     
177     public int read() throws IOException JavaDoc {
178         if ( noMoreCharacters() ) return -1;
179         avail--;
180         readBytes++;
181         return buffer[ pos++ ] & 0xFF;
182     }
183
184
185     public int read( final byte b[], int offset, int length ) throws IOException JavaDoc {
186         if ( length <= avail ) {
187             System.arraycopy( buffer, pos, b, offset, length );
188             pos += length;
189             avail -= length;
190             readBytes += length;
191             return length;
192         }
193     
194         final int head = avail;
195         System.arraycopy( buffer, pos, b, offset, head );
196         offset += head;
197         length -= head;
198         avail = 0;
199
200         final int residual = length % buffer.length;
201         int result;
202
203         if ( ( result = is.read( b, offset, length - residual ) ) < length - residual ) {
204             final int t = result < 0
205                 ? ( head != 0 ? head : -1 )
206                 : result + head;
207             if ( t > 0 ) readBytes += t;
208             return t;
209         }
210
211         avail = is.read( buffer );
212         if ( avail < 0 ) {
213             avail = pos = 0;
214             final int t = result + head > 0 ? result + head : -1;
215             if ( t > 0 ) readBytes += t;
216             return t;
217         }
218         pos = Math.min( avail, residual );
219         System.arraycopy( buffer, 0, b, offset + length - residual, pos );
220         avail -= pos;
221         final int t = result + head + pos;
222         readBytes += t;
223         return t;
224     }
225
226     /** Reads a line into the given byte array using {@linkplain #ALL_TERMINATORS all terminators}.
227      *
228      * @param array byte array where the next line will be stored.
229      * @return the number of bytes actually placed in <code>array</code>, or -1 at end of file.
230      * @see #readLine(byte[], int, int, EnumSet)
231      */

232
233     public int readLine( final byte[] array ) throws IOException JavaDoc {
234         return readLine( array, 0, array.length, ALL_TERMINATORS );
235     }
236
237     /** Reads a line into the given byte array.
238      *
239      * @param array byte array where the next line will be stored.
240      * @param terminators a set containing the line termination sequences that we want
241      * to consider as valid.
242      * @return the number of bytes actually placed in <code>array</code>, or -1 at end of file.
243      * @see #readLine(byte[], int, int, EnumSet)
244      */

245
246     public int readLine( final byte[] array, final EnumSet JavaDoc<LineTerminator> terminators ) throws IOException JavaDoc {
247         return readLine( array, 0, array.length, terminators );
248     }
249
250     /** Reads a line into the given byte-array fragment using {@linkplain #ALL_TERMINATORS all terminators}.
251      *
252      * @param array byte array where the next line will be stored.
253      * @param off the first byte to use in <code>array</code>.
254      * @param len the maximum number of bytes to read.
255      * @return the number of bytes actually placed in <code>array</code>, or -1 at end of file.
256      * @see #readLine(byte[], int, int, EnumSet)
257      */

258     public int readLine( final byte[] array, final int off, final int len ) throws IOException JavaDoc {
259         return readLine( array, off, len, ALL_TERMINATORS );
260     }
261
262     /** Reads a line into the given byte-array fragment.
263      *
264      * <P>Reading lines (i.e., characters) out of a byte stream is not always sensible
265      * (methods available to that purpose in old versions of Java have been mercilessly deprecated).
266      * Nonetheless, in several situations, such as when decoding network protocols or headers
267      * known to be ASCII, it is very useful to be able to read a line from a byte stream.
268      *
269      * <p>This method will attempt to read the next line into <code>array</code> starting at <code>off</code>,
270      * reading at most <code>len</code> bytes. The read, however, will be stopped by the end of file or
271      * when meeting a {@linkplain LineTerminator <em>line terminator</em>}. Of course, for this operation
272      * to be sensible the encoding of the text contained in the stream, if any, must not generate spurious
273      * carriage returns or line feeds. Note that the termination detection uses a maximisation
274      * criterion, so if you specify both {@link LineTerminator#CR} and
275      * {@link LineTerminator#CR_LF} meeting a pair CR/LF will consider the whole pair a terminator.
276      *
277      * <p>Terminators are <em>not</em> copied into <em>array</em> or included in the returned count. The
278      * returned integer can be used to check whether the line is complete: if it is smaller than
279      * <code>len</code>, then more bytes might be available, but note that this method (contrarily
280      * to {@link #read(byte[], int, int)}) can legitimately return zero when <code>len</code>
281      * is nonzero just because a terminator was found as the first character. Thus, the intended
282      * usage of this method is to call it on a given array, check whether <code>len</code> bytes
283      * have been read, and if so try again (possibly extending the array) until a number of read bytes
284      * strictly smaller than <code>len</code> (possibly, -1) is returned.
285      *
286      * <p>If you need to guarantee that a full line is read, use the following idiom:
287      * <pre>
288      * int start = off, len;
289      * while( ( len = readLine( array, start, array.length - start, terminators ) ) == array.length - start ) {
290      * start += len;
291      * array = ByteArrays.grow( array, array.length + 1 );
292      * };
293      * </pre>
294      *
295      * <p>At the end of the loop, the line will be placed in <code>array</code> starting at
296      * <code>off</code> (inclusive) and ending at <code>start + Math.max( len, 0 )</code> (exclusive).
297      *
298      * @param array byte array where the next line will be stored.
299      * @param off the first byte to use in <code>array</code>.
300      * @param len the maximum number of bytes to read.
301      * @param terminators a set containing the line termination sequences that we want
302      * to consider as valid.
303      * @return the number of bytes actually placed in <code>array</code>, or -1 at end of file.
304      * Note that the returned number will be <code>len</code> if no line termination sequence
305      * specified in <code>terminators</code> has been met before scanning <code>len</code> byte,
306      * and if also we did not meet the end of file.
307      */

308
309     public int readLine( final byte[] array, final int off, final int len, final EnumSet JavaDoc<LineTerminator> terminators ) throws IOException JavaDoc {
310         ByteArrays.ensureOffsetLength( array ,off, len );
311         if ( len == 0 ) return 0; // 0-length reads always return 0
312
if ( noMoreCharacters() ) return -1;
313         int i, k = 0, remaining = len, read = 0; // The number of bytes still to be read
314
for(;;) {
315             for( i = 0; i < avail && i < remaining && ( k = buffer[ pos + i ] ) != '\n' && k != '\r' ; i++ );
316             System.arraycopy( buffer, pos, array, off + read, i );
317             pos += i;
318             avail -= i;
319             read += i;
320             remaining -= i;
321             if ( remaining == 0 ) {
322                 readBytes += read;
323                 return read; // We did not stop because of a terminator
324
}
325             
326             if ( avail > 0 ) { // We met a terminator
327
if ( k == '\n' ) { // LF first
328
pos++;
329                     avail--;
330                     if ( terminators.contains( LineTerminator.LF ) ) {
331                         readBytes += read + 1;
332                         return read;
333                     }
334                     else {
335                         array[ off + read++ ] = '\n';
336                         remaining--;
337                     }
338                 }
339                 else if ( k == '\r' ) { // CR first
340
pos++;
341                     avail--;
342                     
343                     if ( terminators.contains( LineTerminator.CR_LF ) ) {
344                         if ( avail > 0 ) {
345                             if ( buffer[ pos ] == '\n' ) { // CR/LF with LF already in the buffer.
346
pos ++;
347                                 avail--;
348                                 readBytes += read + 2;
349                                 return read;
350                             }
351                         }
352                         else { // We must search for the LF.
353
if ( noMoreCharacters() ) {
354                                 // Not found a matching LF because of end of file, will return CR in buffer if not a terminator
355

356                                 if ( ! terminators.contains( LineTerminator.CR ) ) {
357                                     array[ off + read++ ] = '\r';
358                                     remaining--;
359                                     readBytes += read;
360                                 }
361                                 else readBytes += read + 1;
362                                 
363                                 return read;
364                             }
365                             if ( buffer[ 0 ] == '\n' ) {
366                                 // Found matching LF, won't return terminators in the buffer
367
pos++;
368                                 avail--;
369                                 readBytes += read + 2;
370                                 return read;
371                             }
372                         }
373                     }
374                     
375                     if ( terminators.contains( LineTerminator.CR ) ) {
376                         readBytes += read + 1;
377                         return read;
378                     }
379                     
380                     array[ off + read++ ] = '\r';
381                     remaining--;
382                 }
383             }
384             else if ( noMoreCharacters() ) {
385                 readBytes += read;
386                 return read;
387             }
388         }
389     }
390
391     
392
393     public void position( long newPosition ) throws IOException JavaDoc {
394
395         final long position = readBytes;
396
397         /** Note that this check will succeed also in the case of
398          * an empty buffer and position == newPosition. This behaviour is
399          * intentional, as it delays buffering to when it is actually
400          * necessary and avoids useless class the underlying stream. */

401         
402         if ( newPosition <= position + avail && newPosition >= position - pos ) {
403             pos += newPosition - position;
404             avail -= newPosition - position;
405             readBytes = newPosition;
406             return;
407         }
408
409         if ( rs != null ) rs.position( newPosition );
410         else if ( fileChannel != null ) fileChannel.position( newPosition );
411         else throw new UnsupportedOperationException JavaDoc( "position() can only be called if the underlying byte stream implements the RepositionableStream interface or if the getChannel() method of the underlying byte stream exists and returns a FileChannel" );
412         readBytes = newPosition;
413
414         avail = Math.max( 0, is.read( buffer ) );
415         pos = 0;
416     }
417
418     public long position() throws IOException JavaDoc {
419         return readBytes;
420     }
421
422     /** Returns the length of the underlying input stream, if it is {@linkplain MeasurableInputStream measurable}.
423      *
424      * @return the length of the underlying input stream.
425      * @throws UnsupportedOperationException if the underlying input stream is not {@linkplain MeasurableInputStream measurable}.
426      */

427
428     public long length() throws IOException JavaDoc {
429         if ( ms == null ) throw new UnsupportedOperationException JavaDoc();
430         return ms.length();
431     }
432
433
434     public long skip( long n ) throws IOException JavaDoc {
435         if ( n <= avail ) {
436             final int m = (int)n;
437             pos += m;
438             avail -= m;
439             readBytes += n;
440             return n;
441         }
442
443         final int head = avail;
444         n -= head;
445         avail = 0;
446
447         final int residual = (int)( n % buffer.length );
448         long result;
449         if ( ( result = is.skip( n - residual ) ) < n - residual ) {
450             avail = 0;
451             readBytes += result + head;
452             return result + head;
453         }
454
455         avail = Math.max( is.read( buffer ), 0 );
456         pos = Math.min( residual, avail );
457         avail -= pos;
458         final long t = result + head + pos;
459         readBytes += t;
460         return t;
461     }
462
463
464     public int available() throws IOException JavaDoc {
465         return (int)Math.min( is.available() + (long)avail, Integer.MAX_VALUE );
466     }
467
468     public void close() throws IOException JavaDoc {
469         if ( is == null ) return;
470         if ( is != System.in ) is.close();
471         is = null;
472         buffer = null;
473     }
474
475     /** Resets the internal logic of this fast buffered input stream, clearing the buffer.
476      *
477      * <p>All buffering information is discarded, and the number of bytes read so far
478      * (and thus, also the {@linkplain #position() current position})
479      * is adjusted to reflect this fact.
480      *
481      * <p>This method is mainly useful for re-reading
482      * files that have been overwritten externally.
483      */

484
485     public void flush() {
486         if ( is == null ) return;
487         readBytes += avail;
488         avail = pos = 0;
489     }
490
491     /** Resets the internal logic of this fast buffered input stream.
492      *
493      * @deprecated As of <samp>fastutil</samp> 5.0.4, replaced by {@link #flush()}. The old
494      * semantics of this method does not contradict {@link InputStream}'s contract, as
495      * the semantics of {@link #reset()} is undefined if {@link InputStream#markSupported()}
496      * returns false. On the other hand, the name was really a poor choice.
497      */

498     @Deprecated JavaDoc
499     public void reset() {
500         flush();
501     }
502 }
503
Popular Tags