KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > lexer > Stream


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Stream.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/01/02 16:24:53 $
10
// $Revision: 1.12 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.lexer;
28
29 import java.io.IOException JavaDoc;
30 import java.io.InputStream JavaDoc;
31 import java.lang.Runnable JavaDoc;
32
33 /**
34  * Provides for asynchronous fetching from a stream.
35  *
36  */

37 public class Stream extends InputStream JavaDoc implements Runnable JavaDoc
38 {
39     public int fills = 0;
40     public int reallocations = 0;
41     public int synchronous = 0;
42
43     /**
44      * An initial buffer size.
45      */

46     protected static final int BUFFER_SIZE = 4096;
47
48     /**
49      * Return value when no more characters are left.
50      */

51     protected static final int EOF = -1;
52
53     /**
54      * The underlying stream.
55      */

56     protected volatile InputStream JavaDoc mIn;
57
58     /**
59      * The bytes read so far.
60      */

61     public volatile byte[] mBuffer;
62
63     /**
64      * The number of valid bytes in the buffer.
65      */

66     public volatile int mLevel;
67
68     /**
69      * The offset of the next byte returned by read().
70      */

71     protected int mOffset;
72
73     /**
74      * The content length from the HTTP header.
75      */

76     protected int mContentLength;
77
78     /**
79      * The bookmark.
80      */

81     protected int mMark;
82
83     /**
84      * Construct a stream with no assumptions about the number of bytes available.
85      * @param in The input stream to use.
86      */

87     public Stream (InputStream JavaDoc in)
88     {
89         this (in, 0);
90     }
91
92     /**
93      * Construct a stream to read the given number of bytes.
94      * @param in The input stream to use.
95      * @param bytes The maximum number of bytes to read.
96      * This should be set to the ContentLength from the HTTP header.
97      * A negative or zero value indicates an unknown number of bytes.
98      */

99     public Stream (InputStream JavaDoc in, int bytes)
100     {
101         mIn = in;
102         mBuffer = null;
103         mLevel = 0;
104         mOffset = 0;
105         mContentLength = bytes < 0 ? 0 : bytes;
106         mMark = -1;
107     }
108
109     /**
110      * Fetch more bytes from the underlying stream.
111      * Has no effect if the underlying stream has been drained.
112      * @param force If <code>true</code>, an attempt is made to read from the
113      * underlying stream, even if bytes are available, If <code>false</code>,
114      * a read of the underlying stream will not occur if there are already
115      * bytes available.
116      * @return <code>true</code> if not at the end of the input stream.
117      * @exception IOException If the underlying stream read() or available() throws one.
118      */

119     protected synchronized boolean fill (boolean force)
120         throws
121             IOException JavaDoc
122     {
123         int size;
124         byte[] buffer;
125         int read;
126         boolean ret;
127
128         ret = false;
129
130         if (null != mIn) // mIn goes null when it's been sucked dry
131
{
132             if (!force)
133             { // check for change of state while waiting on the monitor in a synchronous call
134
if (0 != available ())
135                     return (true);
136                 synchronous++;
137             }
138
139             // get some buffer space
140
if (0 == mContentLength)
141             { // unknown content length... keep doubling
142
if (null == mBuffer)
143                 {
144                     mBuffer = new byte[Math.max (BUFFER_SIZE, mIn.available ())];
145                     buffer = mBuffer;
146                 }
147                 else
148                 {
149                     if (mBuffer.length - mLevel < BUFFER_SIZE / 2)
150                         buffer = new byte[Math.max (mBuffer.length * 2, mBuffer.length + mIn.available ())];
151                     else
152                         buffer = mBuffer;
153                 }
154                 size = buffer.length - mLevel;
155             }
156             else
157             { // known content length... allocate once
158
size = mContentLength - mLevel;
159                 if (null == mBuffer)
160                     mBuffer = new byte[size];
161                 buffer = mBuffer;
162             }
163
164             // read into the end of the 'new' buffer
165
read = mIn.read (buffer, mLevel, size);
166             if (-1 == read)
167             {
168                 mIn.close ();
169                 mIn = null;
170             }
171             else
172             {
173                 if (mBuffer != buffer)
174                 { // copy the bytes previously read
175
System.arraycopy (mBuffer, 0, buffer, 0, mLevel);
176                     mBuffer = buffer;
177                     reallocations++;
178                 }
179                 mLevel += read;
180                 if ((0 != mContentLength) && (mLevel == mContentLength))
181                 {
182                     mIn.close ();
183                     mIn = null;
184                 }
185                 ret = true;
186                 fills++;
187             }
188         }
189
190         return (ret);
191     }
192
193     //
194
// Runnable interface
195
//
196

197     /**
198      * Continually read the underlying stream untill exhausted.
199      * @see java.lang.Thread#run()
200      */

201     public void run ()
202     {
203         boolean filled;
204
205         do
206         { // keep hammering the socket with no delay, it's metered upstream
207
try
208             {
209                 filled = fill (true);
210             }
211             catch (IOException JavaDoc ioe)
212             {
213                 ioe.printStackTrace ();
214                 // exit the thread if there is a problem,
215
// let the synchronous reader find out about it
216
filled = false;
217             }
218         }
219         while (filled);
220     }
221
222     //
223
// InputStream overrides
224
//
225

226     /**
227      * Reads the next byte of data from the input stream. The value byte is
228      * returned as an <code>int</code> in the range <code>0</code> to
229      * <code>255</code>. If no byte is available because the end of the stream
230      * has been reached, the value <code>-1</code> is returned. This method
231      * blocks until input data is available, the end of the stream is detected,
232      * or an exception is thrown.
233      * @return The next byte of data, or <code>-1</code> if the end of the
234      * stream is reached.
235      * @exception IOException If an I/O error occurs.
236      */

237     public int read () throws IOException JavaDoc
238     {
239         int ret;
240
241         // The following is unsynchronized code.
242
// Some would argue that unsynchronized access isn't thread safe
243
// but I think I can rationalize it in this case...
244
// The two volatile members are mLevel and mBuffer (besides mIn).
245
// If (mOffset >= mLevel) turns false after the test, fill is
246
// superflously called, but it's synchronized and figures it out.
247
// (mOffset < mLevel) only goes more true by the operation of the
248
// background thread, it increases the value of mLevel
249
// and volatile int access is atomic.
250
// If mBuffer changes by the operation of the background thread,
251
// the array pointed to can only be bigger than the previous buffer,
252
// and hence no array bounds exception can be raised.
253
if (0 == available ())
254             fill (false);
255         if (0 != available ())
256             ret = mBuffer[mOffset++] & 0xff;
257         else
258             ret = EOF;
259
260         return (ret);
261     }
262
263     /**
264      * Returns the number of bytes that can be read (or skipped over) from
265      * this input stream without blocking by the next caller of a method for
266      * this input stream. The next caller might be the same thread or or
267      * another thread.
268      * @return The number of bytes that can be read from this input stream
269      * without blocking.
270      * @exception IOException If an I/O error occurs.
271      */

272     public int available () throws IOException JavaDoc
273     {
274         return (mLevel - mOffset);
275     }
276
277     /**
278      * Closes this input stream and releases any system resources associated
279      * with the stream.
280      * @exception IOException If an I/O error occurs.
281      */

282     public synchronized void close () throws IOException JavaDoc
283     {
284         if (null != mIn)
285         {
286             mIn.close ();
287             mIn = null;
288         }
289         mBuffer = null;
290         mLevel = 0;
291         mOffset = 0;
292         mContentLength =0;
293         mMark = -1;
294     }
295
296     /**
297      * Repositions this stream to the position at the time the
298      * <code>mark</code> method was last called on this input stream.
299      *
300      * <p> The general contract of <code>reset</code> is:
301      *
302      * <p><ul>
303      *
304      * <li> If the method <code>markSupported</code> returns
305      * <code>true</code>, then:
306      *
307      * <ul><li> If the method <code>mark</code> has not been called since
308      * the stream was created, or the number of bytes read from the stream
309      * since <code>mark</code> was last called is larger than the argument
310      * to <code>mark</code> at that last call, then an
311      * <code>IOException</code> might be thrown.
312      *
313      * <li> If such an <code>IOException</code> is not thrown, then the
314      * stream is reset to a state such that all the bytes read since the
315      * most recent call to <code>mark</code> (or since the start of the
316      * file, if <code>mark</code> has not been called) will be resupplied
317      * to subsequent callers of the <code>read</code> method, followed by
318      * any bytes that otherwise would have been the next input data as of
319      * the time of the call to <code>reset</code>. </ul>
320      *
321      * <li> If the method <code>markSupported</code> returns
322      * <code>false</code>, then:
323      *
324      * <ul><li> The call to <code>reset</code> may throw an
325      * <code>IOException</code>.
326      *
327      * <li> If an <code>IOException</code> is not thrown, then the stream
328      * is reset to a fixed state that depends on the particular type of the
329      * input stream and how it was created. The bytes that will be supplied
330      * to subsequent callers of the <code>read</code> method depend on the
331      * particular type of the input stream. </ul></ul>
332      *
333      * @exception IOException <em>Never thrown. Just for subclassers.</em>
334      * @see java.io.InputStream#mark(int)
335      * @see java.io.IOException
336      *
337      */

338     public void reset () throws IOException JavaDoc
339     {
340         if (-1 != mMark)
341             mOffset = mMark;
342         else
343             mOffset = 0;
344     }
345
346     /**
347      * Tests if this input stream supports the <code>mark</code> and
348      * <code>reset</code> methods. Whether or not <code>mark</code> and
349      * <code>reset</code> are supported is an invariant property of a
350      * particular input stream instance. The <code>markSupported</code> method
351      * of <code>InputStream</code> returns <code>false</code>.
352      *
353      * @return <code>true</code>.
354      * @see java.io.InputStream#mark(int)
355      * @see java.io.InputStream#reset()
356      *
357      */

358     public boolean markSupported ()
359     {
360         return (true);
361     }
362
363     /**
364      * Marks the current position in this input stream. A subsequent call to
365      * the <code>reset</code> method repositions this stream at the last marked
366      * position so that subsequent reads re-read the same bytes.
367      *
368      * <p> The <code>readlimit</code> arguments tells this input stream to
369      * allow that many bytes to be read before the mark position gets
370      * invalidated.
371      *
372      * <p> The general contract of <code>mark</code> is that, if the method
373      * <code>markSupported</code> returns <code>true</code>, the stream somehow
374      * remembers all the bytes read after the call to <code>mark</code> and
375      * stands ready to supply those same bytes again if and whenever the method
376      * <code>reset</code> is called. However, the stream is not required to
377      * remember any data at all if more than <code>readlimit</code> bytes are
378      * read from the stream before <code>reset</code> is called.
379      *
380      * @param readlimit <em>Not used.</em>
381      * @see java.io.InputStream#reset()
382      *
383      */

384     public void mark (int readlimit)
385     {
386         mMark = mOffset;
387     }
388 }
389
Popular Tags