KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > tests > lexerTests > StreamTests


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2004/01/14 02:53:47 $
10
// $Revision: 1.16 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.tests.lexerTests;
28
29 import java.io.BufferedInputStream JavaDoc;
30 import java.io.ByteArrayInputStream JavaDoc;
31 import java.io.IOException JavaDoc;
32 import java.net.MalformedURLException JavaDoc;
33 import java.net.URL JavaDoc;
34 import java.net.URLConnection JavaDoc;
35 import java.util.ArrayList JavaDoc;
36
37 import org.htmlparser.lexer.Stream;
38 import org.htmlparser.tests.ParserTestCase;
39
40 public class StreamTests extends ParserTestCase
41 {
42     static
43     {
44         System.setProperty ("org.htmlparser.tests.lexerTests.StreamTests", "StreamTests");
45     }
46
47     /**
48      * Test the first level stream class.
49      */

50     public StreamTests (String JavaDoc name)
51     {
52         super (name);
53     }
54
55     /**
56      * Test initialization with a null value.
57      */

58     public void testNull () throws IOException JavaDoc
59     {
60         Stream JavaDoc stream;
61
62         stream = new Stream JavaDoc (null);
63         assertTrue ("erroneous character", -1 == stream.read ());
64     }
65
66     /**
67      * Test initialization with an empty input stream.
68      */

69     public void testEmpty () throws IOException JavaDoc
70     {
71         Stream JavaDoc stream;
72
73         stream = new Stream JavaDoc (new ByteArrayInputStream JavaDoc (new byte[0]));
74         assertTrue ("erroneous character", -1 == stream.read ());
75     }
76
77     /**
78      * Test initialization with an input stream having only one byte.
79      */

80     public void testOneByte () throws IOException JavaDoc
81     {
82         Stream JavaDoc stream;
83
84         stream = new Stream JavaDoc (new ByteArrayInputStream JavaDoc (new byte[] { (byte)0x42 }));
85         assertTrue ("erroneous character", 0x42 == stream.read ());
86         assertTrue ("erroneous character", -1 == stream.read ());
87     }
88
89     /**
90      * Test that the same bytes are returned as with a naked input stream.
91      */

92     public void testSameBytes () throws IOException JavaDoc
93     {
94         String JavaDoc link;
95         URL JavaDoc url;
96         URLConnection JavaDoc connection1;
97         URLConnection JavaDoc connection2;
98         BufferedInputStream JavaDoc in;
99         int b1;
100         int b2;
101         Stream JavaDoc stream;
102         int index;
103
104         // pick a big file
105
link = "http://htmlparser.sourceforge.net/HTMLParser_Coverage.html";
106         try
107         {
108             url = new URL JavaDoc (link);
109             connection1 = url.openConnection ();
110             connection1.connect ();
111             in = new BufferedInputStream JavaDoc (connection1.getInputStream ());
112             connection2 = url.openConnection ();
113             connection2.connect ();
114             stream = new Stream JavaDoc (connection2.getInputStream ());
115             index = 0;
116             while (-1 != (b1 = in.read ()))
117             {
118                 b2 = stream.read ();
119                 if (b1 != b2)
120                     fail ("bytes differ at position " + index + ", expected " + b1 + ", actual " + b2);
121                 index++;
122             }
123             b2 = stream.read ();
124             stream.close ();
125             in.close ();
126             assertTrue ("extra bytes", b2 == -1);
127         }
128         catch (MalformedURLException JavaDoc murle)
129         {
130             fail ("bad url " + link);
131         }
132     }
133
134     /**
135      * Test that threading works and is faster than a naked input stream.
136      * This, admittedly contrived, test illustrates the following principles:
137      * <li>the underlying network code is already multi-threaded, so there may
138      * not be a need to use application level threading in most cases</li>
139      * <li>results may vary based on network connection speed, JVM, and
140      * especially application usage pattterns</li>
141      * <li>issues only show up with large files, in my case greater than
142      * about 72,400 bytes, since the underlying network code reads that far
143      * into the socket before throttling back and waiting</li>
144      * <li>this is only applicable to TCP/IP usage, disk access would not
145      * have this problem, since the cost of reading disk is much less than
146      * the round-trip cost of a TCP/IP handshake</li>
147      * So, what does it do? It sets up to read a URL two ways, once with a
148      * naked input stream, and then with the Stream class. In each case, before
149      * reading, it delays about 2 seconds (for me anyway) to allow the java.net
150      * implementation to read ahead and then throttle back. The threaded Stream
151      * though keeps reading while this delay is going on and hence gets a big
152      * chunk of the file in memory. This advantage translates to a faster
153      * spin through the bytes after the delay.
154      */

155     public void testThreaded () throws IOException JavaDoc
156     {
157         String JavaDoc link;
158         URL JavaDoc url;
159         URLConnection JavaDoc connection;
160         BufferedInputStream JavaDoc in;
161         int index;
162         long begin;
163         double bytes_per_second;
164         int delay;
165         Stream JavaDoc stream;
166         long time1;
167         long time2;
168         Thread JavaDoc thread;
169         long available1;
170         long available2;
171
172         // pick a big file
173
link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html";
174         try
175         {
176             url = new URL JavaDoc (link);
177
178             // estimate the connection speed
179
System.gc ();
180             index = 0;
181             connection = url.openConnection ();
182             connection.connect ();
183             in = new BufferedInputStream JavaDoc (connection.getInputStream ());
184             begin = System.currentTimeMillis ();
185             while (-1 != in.read ())
186                 index++;
187             bytes_per_second = 1000.0 * index / (System.currentTimeMillis () - begin);
188             in.close ();
189
190             delay = (int)(1.5 * 1000 * bytes_per_second / 72400); // 72400 is the throttle limit on my machine
191

192             // try the naked input stream
193
System.gc ();
194             index = 0;
195             available1 = 0;
196             connection = url.openConnection ();
197             connection.connect ();
198             in = new BufferedInputStream JavaDoc (connection.getInputStream ());
199             try
200             {
201                 Thread.sleep (delay);
202             }
203             catch (Exception JavaDoc e)
204             {
205                 e.printStackTrace ();
206             }
207             begin = System.currentTimeMillis ();
208             do
209             {
210                 index++;
211                 if (0 == index % 1000)
212                     available1 += in.available ();
213             }
214             while (-1 != in.read ());
215             time1 = System.currentTimeMillis () - begin;
216             in.close ();
217
218             // try a threaded stream
219
System.gc ();
220             index = 0;
221             available2 = 0;
222             connection = url.openConnection ();
223             connection.connect ();
224             int length = connection.getContentLength ();
225             stream = new Stream JavaDoc (connection.getInputStream (), length);
226             thread = new Thread JavaDoc (stream);
227             thread.setPriority (Thread.NORM_PRIORITY - 1);
228             thread.start ();
229             try
230             {
231                 Thread.sleep (delay);
232             }
233             catch (Exception JavaDoc e)
234             {
235                 e.printStackTrace ();
236             }
237             begin = System.currentTimeMillis ();
238             do
239             {
240                 index++;
241                 if (0 == index % 1000)
242                     available2 += stream.available ();
243             }
244             while (-1 != stream.read ());
245             time2 = System.currentTimeMillis () - begin;
246
247 // System.out.println ("fills: " + stream.fills);
248
// System.out.println ("reallocations: " + stream.reallocations);
249
// System.out.println ("synchronous: " + stream.synchronous);
250
// System.out.println ("buffer size: " + stream.mBuffer.length);
251
// System.out.println ("bytes: " + stream.mLevel);
252
stream.close ();
253
254 // System.out.println ("time (" + time2 + ") vs. (" + time1 + ") for " + index + " bytes");
255
double samples = index / 1000;
256 // System.out.println ("average available bytes (" + available2/samples + ") vs. (" + available1/samples + ")");
257

258             assertTrue ("slower (" + time2 + ") vs. (" + time1 + ")", time2 < time1);
259             assertTrue ("average available bytes not greater (" + available2/samples + ") vs. (" + available1/samples + ")", available2 > available1);
260         }
261         catch (MalformedURLException JavaDoc murle)
262         {
263             fail ("bad url " + link);
264         }
265     }
266
267     /**
268      * Test that mark and reset work as per the contract.
269      */

270     public void testMarkReset () throws IOException JavaDoc
271     {
272         String JavaDoc link;
273         ArrayList JavaDoc bytes1;
274         ArrayList JavaDoc bytes2;
275         URL JavaDoc url;
276         URLConnection JavaDoc connection;
277         Stream JavaDoc stream;
278         int b;
279         int index;
280
281         // pick a small file > 2000 bytes
282
link = "http://htmlparser.sourceforge.net/javadoc_1_3/overview-summary.html";
283         bytes1 = new ArrayList JavaDoc ();
284         bytes2 = new ArrayList JavaDoc ();
285         try
286         {
287             url = new URL JavaDoc (link);
288             connection = url.openConnection ();
289             connection.connect ();
290             stream = new Stream JavaDoc (connection.getInputStream ());
291             assertTrue ("mark not supported", stream.markSupported ());
292
293             for (int i = 0; i < 1000; i++)
294             {
295                 b = stream.read ();
296                 bytes1.add (new Byte JavaDoc ((byte)b));
297             }
298             stream.reset ();
299             for (int i = 0; i < 1000; i++)
300             {
301                 b = stream.read ();
302                 bytes2.add (new Byte JavaDoc ((byte)b));
303             }
304
305             index = 0;
306             while (index < bytes1.size ())
307             {
308                 assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index));
309                 index++;
310             }
311
312             bytes1.clear ();
313             bytes2.clear ();
314
315             stream.mark (1000); // the 1000 is ignored
316
for (int i = 0; i < 1000; i++)
317             {
318                 b = stream.read ();
319                 bytes1.add (new Byte JavaDoc ((byte)b));
320             }
321             stream.reset ();
322             for (int i = 0; i < 1000; i++)
323             {
324                 b = stream.read ();
325                 bytes2.add (new Byte JavaDoc ((byte)b));
326             }
327             stream.close ();
328
329             index = 0;
330             while (index < bytes1.size ())
331             {
332                 assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index));
333                 index++;
334             }
335         }
336         catch (MalformedURLException JavaDoc murle)
337         {
338             fail ("bad url " + link);
339         }
340     }
341
342     /**
343      * Test that mark and reset work as per the contract when threaded.
344      */

345     public void testMarkResetThreaded () throws IOException JavaDoc
346     {
347         String JavaDoc link;
348         ArrayList JavaDoc bytes1;
349         ArrayList JavaDoc bytes2;
350         URL JavaDoc url;
351         URLConnection JavaDoc connection;
352         Stream JavaDoc stream;
353         int b;
354         int index;
355
356         // pick a small file > 2000 bytes
357
link = "http://htmlparser.sourceforge.net/javadoc_1_3/overview-summary.html";
358         bytes1 = new ArrayList JavaDoc ();
359         bytes2 = new ArrayList JavaDoc ();
360         try
361         {
362             url = new URL JavaDoc (link);
363             connection = url.openConnection ();
364             connection.connect ();
365             stream = new Stream JavaDoc (connection.getInputStream ());
366             (new Thread JavaDoc (stream)).start ();
367             assertTrue ("mark not supported", stream.markSupported ());
368
369             for (int i = 0; i < 1000; i++)
370             {
371                 b = stream.read ();
372                 bytes1.add (new Byte JavaDoc ((byte)b));
373             }
374             stream.reset ();
375             for (int i = 0; i < 1000; i++)
376             {
377                 b = stream.read ();
378                 bytes2.add (new Byte JavaDoc ((byte)b));
379             }
380
381             index = 0;
382             while (index < bytes1.size ())
383             {
384                 assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index));
385                 index++;
386             }
387
388             bytes1.clear ();
389             bytes2.clear ();
390
391             stream.mark (1000); // the 1000 is ignored
392
for (int i = 0; i < 1000; i++)
393             {
394                 b = stream.read ();
395                 bytes1.add (new Byte JavaDoc ((byte)b));
396             }
397             stream.reset ();
398             for (int i = 0; i < 1000; i++)
399             {
400                 b = stream.read ();
401                 bytes2.add (new Byte JavaDoc ((byte)b));
402             }
403             stream.close ();
404
405             index = 0;
406             while (index < bytes1.size ())
407             {
408                 assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index));
409                 index++;
410             }
411         }
412         catch (MalformedURLException JavaDoc murle)
413         {
414             fail ("bad url " + link);
415         }
416     }
417
418     /**
419      * Test close.
420      */

421     public void testClose () throws IOException JavaDoc
422     {
423         Stream JavaDoc stream;
424
425         stream = new Stream JavaDoc (new ByteArrayInputStream JavaDoc (new byte[] { (byte)0x42, (byte)0x78 }));
426         assertTrue ("erroneous character", 0x42 == stream.read ());
427         stream.close ();
428         assertTrue ("not closed", -1 == stream.read ());
429    }
430 }
431
Popular Tags