KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > lexer > StringSource


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/lexer/StringSource.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/02/13 22:45:47 $
10
// $Revision: 1.2 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.lexer;
28
29 import java.io.IOException JavaDoc;
30 import org.htmlparser.util.ParserException;
31
32 /**
33  * A source of characters based on a String.
34  */

35 public class StringSource
36     extends
37         Source
38 {
39     /**
40      * The source of characters.
41      */

42     protected String JavaDoc mString;
43
44     /**
45      * The current offset into the string.
46      */

47     protected int mOffset;
48
49     /**
50      * The encoding to report.
51      * Only used by {@link #getEncoding}.
52      */

53     protected String JavaDoc mEncoding;
54
55     /**
56      * The bookmark.
57      */

58     protected int mMark;
59
60     /**
61      * Construct a source using the provided string.
62      * Until it is set, the encoding will be reported as ISO-8859-1.
63      * @param string The source of characters.
64      */

65     public StringSource (String JavaDoc string)
66     {
67         this (string, "ISO-8859-1");
68     }
69
70     /**
71      * Construct a source using the provided string and encoding.
72      * The encoding is only used by {@link #getEncoding}.
73      * @param string The source of characters.
74      * @param character_set The encoding to report.
75      */

76     public StringSource (String JavaDoc string, String JavaDoc character_set)
77     {
78         mString = (null == string) ? "" : string;
79         mOffset = 0;
80         mEncoding = character_set;
81         mMark = -1;
82     }
83
84     /**
85      * Get the encoding being used to convert characters.
86      * @return The current encoding.
87      */

88     public String JavaDoc getEncoding ()
89     {
90         return (mEncoding);
91     }
92
93     /**
94      * Set the encoding to the given character set.
95      * This simply sets the encoding reported by {@link #getEncoding}.
96      * @param character_set The character set to use to convert characters.
97      * @exception ParserException <em>Not thrown</em>.
98      */

99     public void setEncoding (String JavaDoc character_set)
100         throws
101             ParserException
102     {
103         mEncoding = character_set;
104     }
105
106     //
107
// Reader overrides
108
//
109

110     /**
111      * Does nothing.
112      * It's supposed to close the source, but use destroy() instead.
113      * @see #destroy
114      */

115     public void close () throws IOException JavaDoc
116     {
117     }
118
119     /**
120      * Read a single character.
121      * @return The character read, as an integer in the range 0 to 65535
122      * (<tt>0x00-0xffff</tt>), or {@link #EOF EOF} if the source is exhausted.
123      * @exception IOException If an I/O error occurs.
124      */

125     public int read () throws IOException JavaDoc
126     {
127         int ret;
128
129         if (null == mString)
130             throw new IOException JavaDoc ("source is closed");
131         else if (mOffset >= mString.length ())
132             ret = EOF;
133         else
134         {
135             ret = mString.charAt (mOffset);
136             mOffset++;
137         }
138
139         return (ret);
140     }
141
142     /**
143      * Read characters into a portion of an array.
144      * @param cbuf Destination buffer
145      * @param off Offset at which to start storing characters
146      * @param len Maximum number of characters to read
147      * @return The number of characters read, or {@link #EOF EOF} if the source
148      * is exhausted.
149      * @exception IOException If an I/O error occurs.
150      */

151     public int read (char[] cbuf, int off, int len) throws IOException JavaDoc
152     {
153         int length;
154         int ret;
155
156         if (null == mString)
157             throw new IOException JavaDoc ("source is closed");
158         else
159         {
160             length = mString.length ();
161             if (mOffset >= length)
162                 ret = EOF;
163             else
164             {
165                 if (len > length - mOffset)
166                     len = length - mOffset;
167                 mString.getChars (mOffset, mOffset + len, cbuf, off);
168                 mOffset += len;
169                 ret = len;
170             }
171         }
172
173         return (ret);
174     }
175
176     /**
177      * Read characters into an array.
178      * @param cbuf Destination buffer.
179      * @return The number of characters read, or {@link #EOF EOF} if the source
180      * is exhausted.
181      * @exception IOException If an I/O error occurs.
182      */

183
184     public int read (char[] cbuf) throws IOException JavaDoc
185     {
186         return (read (cbuf, 0, cbuf.length));
187     }
188
189     /**
190      * Tell whether this source is ready to be read.
191      * @return Equivalent to a non-zero {@link #available()}, i.e. there are
192      * still more characters to read.
193      * @exception IOException Thrown if the source is closed.
194      */

195     public boolean ready () throws IOException JavaDoc
196     {
197         if (null == mString)
198             throw new IOException JavaDoc ("source is closed");
199         return (mOffset < mString.length ());
200     }
201
202     /**
203      * Reset the source.
204      * Repositions the read point to begin at zero.
205      * @exception IllegalStateException If the source has been closed.
206      */

207     public void reset ()
208     {
209         if (null == mString)
210             throw new IllegalStateException JavaDoc ("source is closed");
211         else
212             if (-1 != mMark)
213                 mOffset = mMark;
214             else
215                 mOffset = 0;
216     }
217
218     /**
219      * Tell whether this source supports the mark() operation.
220      * @return <code>true</code>.
221      */

222     public boolean markSupported ()
223     {
224         return (true);
225     }
226
227     /**
228      * Mark the present position in the source.
229      * Subsequent calls to {@link #reset()}
230      * will attempt to reposition the source to this point.
231      * @param readAheadLimit <em>Not used.</em>
232      * @exception IOException Thrown if the source is closed.
233      *
234      */

235     public void mark (int readAheadLimit) throws IOException JavaDoc
236     {
237         if (null == mString)
238             throw new IOException JavaDoc ("source is closed");
239         mMark = mOffset;
240     }
241
242     /**
243      * Skip characters.
244      * <em>Note: n is treated as an int</em>
245      * @param n The number of characters to skip.
246      * @return The number of characters actually skipped
247      * @exception IllegalArgumentException If <code>n</code> is negative.
248      * @exception IOException If the source is closed.
249      */

250     public long skip (long n) throws IOException JavaDoc
251     {
252         int length;
253         long ret;
254
255         if (null == mString)
256             throw new IOException JavaDoc ("source is closed");
257         if (n < 0)
258             throw new IllegalArgumentException JavaDoc ("cannot skip backwards");
259         else
260         {
261             length = mString.length ();
262             if (mOffset >= length)
263                 n = 0L;
264             else if (n > length - mOffset)
265                 n = length - mOffset;
266             mOffset += n;
267             ret = n;
268         }
269         
270         return (ret);
271     }
272
273     //
274
// Methods not in your Daddy's Reader
275
//
276

277     /**
278      * Undo the read of a single character.
279      * @exception IOException If no characters have been read or the source is closed.
280      */

281     public void unread () throws IOException JavaDoc
282     {
283         if (null == mString)
284             throw new IOException JavaDoc ("source is closed");
285         else if (mOffset <= 0)
286             throw new IOException JavaDoc ("can't unread no characters");
287         else
288             mOffset--;
289     }
290
291     /**
292      * Retrieve a character again.
293      * @param offset The offset of the character.
294      * @return The character at <code>offset</code>.
295      * @exception IOException If the source is closed or an attempt is made to
296      * read beyond {@link #offset()}.
297      */

298     public char getCharacter (int offset) throws IOException JavaDoc
299     {
300         char ret;
301
302         if (null == mString)
303             throw new IOException JavaDoc ("source is closed");
304         else if (offset >= mOffset)
305             throw new IOException JavaDoc ("read beyond current offset");
306         else
307             ret = mString.charAt (offset);
308
309         return (ret);
310     }
311
312     /**
313      * Retrieve characters again.
314      * @param array The array of characters.
315      * @param offset The starting position in the array where characters are to be placed.
316      * @param start The starting position, zero based.
317      * @param end The ending position
318      * (exclusive, i.e. the character at the ending position is not included),
319      * zero based.
320      * @exception IOException If the source is closed or an attempt is made to
321      * read beyond {@link #offset()}.
322      */

323     public void getCharacters (char[] array, int offset, int start, int end) throws IOException JavaDoc
324     {
325         if (null == mString)
326             throw new IOException JavaDoc ("source is closed");
327         else
328         {
329             if (end > mOffset)
330                 throw new IOException JavaDoc ("read beyond current offset");
331             else
332                 mString.getChars (start, end, array, offset);
333         }
334     }
335
336     /**
337      * Retrieve a string comprised of characters already read.
338      * Asking for characters ahead of {@link #offset()} will throw an exception.
339      * @param offset The offset of the first character.
340      * @param length The number of characters to retrieve.
341      * @return A string containing the <code>length</code> characters at <code>offset</code>.
342      * @exception IOException If the source is closed or an attempt is made to
343      * read beyond {@link #offset()}.
344      */

345     public String JavaDoc getString (int offset, int length) throws IOException JavaDoc
346     {
347         String JavaDoc ret;
348
349         if (null == mString)
350             throw new IOException JavaDoc ("source is closed");
351         else
352         {
353             if (offset + length > mOffset)
354                 throw new IOException JavaDoc ("read beyond end of string");
355             else
356                 ret = mString.substring (offset, offset + length);
357         }
358         
359         return (ret);
360     }
361
362     /**
363      * Append characters already read into a <code>StringBuffer</code>.
364      * Asking for characters ahead of {@link #offset()} will throw an exception.
365      * @param buffer The buffer to append to.
366      * @param offset The offset of the first character.
367      * @param length The number of characters to retrieve.
368      * @exception IOException If the source is closed or an attempt is made to
369      * read beyond {@link #offset()}.
370      */

371     public void getCharacters (StringBuffer JavaDoc buffer, int offset, int length) throws IOException JavaDoc
372     {
373         if (null == mString)
374             throw new IOException JavaDoc ("source is closed");
375         else
376         {
377             if (offset + length > mOffset)
378                 throw new IOException JavaDoc ("read beyond end of string");
379             else
380                 buffer.append (mString.substring (offset, offset + length));
381         }
382     }
383
384     /**
385      * Close the source.
386      * Once a source has been closed, further {@link #read() read},
387      * {@link #ready ready}, {@link #mark mark}, {@link #reset reset},
388      * {@link #skip skip}, {@link #unread unread},
389      * {@link #getCharacter getCharacter} or {@link #getString getString}
390      * invocations will throw an IOException.
391      * Closing a previously-closed source, however, has no effect.
392      * @exception IOException <em>Not thrown</em>
393      */

394     public void destroy () throws IOException JavaDoc
395     {
396         mString = null;
397     }
398
399     /**
400      * Get the position (in characters).
401      * @return The number of characters that have already been read, or
402      * {@link #EOF EOF} if the source is closed.
403      */

404     public int offset ()
405     {
406         int ret;
407
408         if (null == mString)
409             ret = EOF;
410         else
411             ret = mOffset;
412
413         return (ret);
414     }
415
416     /**
417      * Get the number of available characters.
418      * @return The number of characters that can be read or zero if the source
419      * is closed.
420      */

421     public int available ()
422     {
423         int ret;
424
425         if (null == mString)
426             ret = 0;
427         else
428             ret = mString.length () - mOffset;
429
430         return (ret);
431     }
432 }
433
Popular Tags