KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > lexer > Source


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/Source.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/03/13 14:51:44 $
10
// $Revision: 1.18 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.lexer;
28
29 import java.io.IOException JavaDoc;
30 import java.io.Reader JavaDoc;
31 import java.io.Serializable JavaDoc;
32
33 import org.htmlparser.util.ParserException;
34
35 /**
36  * A buffered source of characters.
37  * A Source is very similar to a Reader, like:
38  * <pre>
39  * new InputStreamReader (connection.getInputStream (), charset)
40  * </pre>
41  * It differs from the above, in three ways:
42  * <ul>
43  * <li>the fetching of bytes may be asynchronous</li>
44  * <li>the character set may be changed, which resets the input stream</li>
45  * <li>characters may be requested more than once, so in general they will be buffered</li>
46  * </ul>
47  */

48 public abstract class Source
49     extends
50         Reader JavaDoc
51     implements
52         Serializable JavaDoc
53 {
54     /**
55      * Return value when the source is exhausted.
56      * Has a value of {@value}.
57      */

58     public static final int EOF = -1;
59
60     /**
61      * Get the encoding being used to convert characters.
62      * @return The current encoding.
63      */

64     public abstract String JavaDoc getEncoding ();
65
66     /**
67      * Set the encoding to the given character set.
68      * If the current encoding is the same as the requested encoding,
69      * this method is a no-op. Otherwise any subsequent characters read from
70      * this source will have been decoded using the given character set.<p>
71      * If characters have already been consumed from this source, it is expected
72      * that an exception will be thrown if the characters read so far would
73      * be different if the encoding being set was used from the start.
74      * @param character_set The character set to use to convert characters.
75      * @exception ParserException If a character mismatch occurs between
76      * characters already provided and those that would have been returned
77      * had the new character set been in effect from the beginning. An
78      * exception is also thrown if the character set is not recognized.
79      */

80     public abstract void setEncoding (String JavaDoc character_set)
81         throws
82             ParserException;
83
84     //
85
// Reader overrides
86
//
87

88     /**
89      * Does nothing.
90      * It's supposed to close the source, but use {@link #destroy} instead.
91      * @see #destroy
92      */

93     public abstract void close () throws IOException JavaDoc;
94
95     /**
96      * Read a single character.
97      * This method will block until a character is available,
98      * an I/O error occurs, or the source is exhausted.
99      * @return The character read, as an integer in the range 0 to 65535
100      * (<tt>0x00-0xffff</tt>), or {@link #EOF} if the source is exhausted.
101      * @exception IOException If an I/O error occurs.
102      */

103     public abstract int read () throws IOException JavaDoc;
104
105     /**
106      * Read characters into a portion of an array. This method will block
107      * until some input is available, an I/O error occurs, or the source is
108      * exhausted.
109      * @param cbuf Destination buffer
110      * @param off Offset at which to start storing characters
111      * @param len Maximum number of characters to read
112      * @return The number of characters read, or {@link #EOF} if the source is
113      * exhausted.
114      * @exception IOException If an I/O error occurs.
115      */

116     public abstract int read (char[] cbuf, int off, int len) throws IOException JavaDoc;
117
118     /**
119      * Read characters into an array.
120      * This method will block until some input is available, an I/O error occurs,
121      * or the source is exhausted.
122      * @param cbuf Destination buffer.
123      * @return The number of characters read, or {@link #EOF} if the source is
124      * exhausted.
125      * @exception IOException If an I/O error occurs.
126      */

127     public abstract int read (char[] cbuf) throws IOException JavaDoc;
128
129     /**
130      * Tell whether this source is ready to be read.
131      * @return <code>true</code> if the next read() is guaranteed not to block
132      * for input, <code>false</code> otherwise.
133      * Note that returning false does not guarantee that the next read will block.
134      * @exception IOException If an I/O error occurs.
135      */

136     public abstract boolean ready () throws IOException JavaDoc;
137
138     /**
139      * Reset the source.
140      * Repositions the read point to begin at zero.
141      * @exception IllegalStateException If the source has been closed.
142      */

143     public abstract void reset ();
144
145     /**
146      * Tell whether this source supports the mark() operation.
147      * @return <code>true</code> if and only if this source supports the mark
148      * operation.
149      */

150     public abstract boolean markSupported ();
151
152     /**
153      * Mark the present position.
154      * Subsequent calls to {@link #reset}
155      * will attempt to reposition the source to this point. Not all
156      * sources support the mark() operation.
157      * @param readAheadLimit The minimum number of characters that can be read
158      * before this mark becomes invalid.
159      * @exception IOException If an I/O error occurs.
160      */

161     public abstract void mark (int readAheadLimit) throws IOException JavaDoc;
162
163     /**
164      * Skip characters.
165      * This method will block until some characters are available,
166      * an I/O error occurs, or the source is exhausted.
167      * <em>Note: n is treated as an int</em>
168      * @param n The number of characters to skip.
169      * @return The number of characters actually skipped
170      * @exception IllegalArgumentException If <code>n</code> is negative.
171      * @exception IOException If an I/O error occurs.
172      */

173     public abstract long skip (long n) throws IOException JavaDoc;
174
175     //
176
// Methods not in your Daddy's Reader
177
//
178

179     /**
180      * Undo the read of a single character.
181      * @exception IOException If the source is closed or no characters have
182      * been read.
183      */

184     public abstract void unread () throws IOException JavaDoc;
185
186     /**
187      * Retrieve a character again.
188      * @param offset The offset of the character.
189      * @return The character at <code>offset</code>.
190      * @exception IOException If the source is closed or the offset is beyond
191      * {@link #offset()}.
192      */

193     public abstract char getCharacter (int offset) throws IOException JavaDoc;
194
195     /**
196      * Retrieve characters again.
197      * @param array The array of characters.
198      * @param offset The starting position in the array where characters are to be placed.
199      * @param start The starting position, zero based.
200      * @param end The ending position
201      * (exclusive, i.e. the character at the ending position is not included),
202      * zero based.
203      * @exception IOException If the source is closed or the start or end is
204      * beyond {@link #offset()}.
205      */

206     public abstract void getCharacters (char[] array, int offset, int start, int end) throws IOException JavaDoc;
207
208     /**
209      * Retrieve a string comprised of characters already read.
210      * @param offset The offset of the first character.
211      * @param length The number of characters to retrieve.
212      * @return A string containing the <code>length</code> characters at <code>offset</code>.
213      * @exception IOException If the source is closed.
214      */

215     public abstract String JavaDoc getString (int offset, int length) throws IOException JavaDoc;
216
217     /**
218      * Append characters already read into a <code>StringBuffer</code>.
219      * @param buffer The buffer to append to.
220      * @param offset The offset of the first character.
221      * @param length The number of characters to retrieve.
222      * @exception IOException If the source is closed or the offset or
223      * (offset + length) is beyond {@link #offset()}.
224      */

225     public abstract void getCharacters (StringBuffer JavaDoc buffer, int offset, int length) throws IOException JavaDoc;
226
227     /**
228      * Close the source.
229      * Once a source has been closed, further {@link #read() read},
230      * {@link #ready ready}, {@link #mark mark}, {@link #reset reset},
231      * {@link #skip skip}, {@link #unread unread},
232      * {@link #getCharacter getCharacter} or {@link #getString getString}
233      * invocations will throw an IOException.
234      * Closing a previously-closed source, however, has no effect.
235      * @exception IOException If an I/O error occurs.
236      */

237     public abstract void destroy () throws IOException JavaDoc;
238
239     /**
240      * Get the position (in characters).
241      * @return The number of characters that have already been read, or
242      * {@link #EOF} if the source is closed.
243      */

244     public abstract int offset ();
245
246     /**
247      * Get the number of available characters.
248      * @return The number of characters that can be read without blocking.
249      */

250     public abstract int available ();
251 }
252
Popular Tags