StreamTests


1   // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2   // http://sourceforge.org/projects/htmlparser
3   // Copyright (C) 2004 Derrick Oswald
4   //
5   // Revision Control Information
6   //
7   // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/StreamTests.java,v $
8   // $Author: derrickoswald $
9   // $Date: 2004/01/14 02:53:47 $
10  // $Revision: 1.16 $
11  //
12  // This library is free software; you can redistribute it and/or
13  // modify it under the terms of the GNU Lesser General Public
14  // License as published by the Free Software Foundation; either
15  // version 2.1 of the License, or (at your option) any later version.
16  //
17  // This library is distributed in the hope that it will be useful,
18  // but WITHOUT ANY WARRANTY; without even the implied warranty of
19  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  // Lesser General Public License for more details.
21  //
22  // You should have received a copy of the GNU Lesser General Public
23  // License along with this library; if not, write to the Free Software
24  // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  //
26  
27  package org.htmlparser.tests.lexerTests;
28  
29  import java.io.BufferedInputStream  ;
30  import java.io.ByteArrayInputStream  ;
31  import java.io.IOException  ;
32  import java.net.MalformedURLException  ;
33  import java.net.URL  ;
34  import java.net.URLConnection  ;
35  import java.util.ArrayList  ;
36  
37  import org.htmlparser.lexer.Stream;
38  import org.htmlparser.tests.ParserTestCase;
39  
40  public class StreamTests extends ParserTestCase
41  {
42      static
43      {
44          System.setProperty ("org.htmlparser.tests.lexerTests.StreamTests", "StreamTests");
45      }
46  
47      /**
48       * Test the first level stream class.
49       */
50      public StreamTests (String   name)
51      {
52          super (name);
53      }
54  
55      /**
56       * Test initialization with a null value.
57       */
58      public void testNull () throws IOException  
59      {
60          Stream   stream;
61  
62          stream = new Stream   (null);
63          assertTrue ("erroneous character", -1 == stream.read ());
64      }
65  
66      /**
67       * Test initialization with an empty input stream.
68       */
69      public void testEmpty () throws IOException  
70      {
71          Stream   stream;
72  
73          stream = new Stream   (new ByteArrayInputStream   (new byte[0]));
74          assertTrue ("erroneous character", -1 == stream.read ());
75      }
76  
77      /**
78       * Test initialization with an input stream having only one byte.
79       */
80      public void testOneByte () throws IOException  
81      {
82          Stream   stream;
83  
84          stream = new Stream   (new ByteArrayInputStream   (new byte[] { (byte)0x42 }));
85          assertTrue ("erroneous character", 0x42 == stream.read ());
86          assertTrue ("erroneous character", -1 == stream.read ());
87      }
88  
89      /**
90       * Test that the same bytes are returned as with a naked input stream.
91       */
92      public void testSameBytes () throws IOException  
93      {
94          String   link;
95          URL   url;
96          URLConnection   connection1;
97          URLConnection   connection2;
98          BufferedInputStream   in;
99          int b1;
100         int b2;
101         Stream   stream;
102         int index;
103 
104         // pick a big file
105         link = "http://htmlparser.sourceforge.net/HTMLParser_Coverage.html";
106         try
107         {
108             url = new URL   (link);
109             connection1 = url.openConnection ();
110             connection1.connect ();
111             in = new BufferedInputStream   (connection1.getInputStream ());
112             connection2 = url.openConnection ();
113             connection2.connect ();
114             stream = new Stream   (connection2.getInputStream ());
115             index = 0;
116             while (-1 != (b1 = in.read ()))
117             {
118                 b2 = stream.read ();
119                 if (b1 != b2)
120                     fail ("bytes differ at position " + index + ", expected " + b1 + ", actual " + b2);
121                 index++;
122             }
123             b2 = stream.read ();
124             stream.close ();
125             in.close ();
126             assertTrue ("extra bytes", b2 == -1);
127         }
128         catch (MalformedURLException   murle)
129         {
130             fail ("bad url " + link);
131         }
132     }
133 
134     /**
135      * Test that threading works and is faster than a naked input stream.
136      * This, admittedly contrived, test illustrates the following principles:
137      * <li>the underlying network code is already multi-threaded, so there may
138      * not be a need to use application level threading in most cases</li>
139      * <li>results may vary based on network connection speed, JVM, and
140      * especially application usage pattterns</li>
141      * <li>issues only show up with large files, in my case greater than
142      * about 72,400 bytes, since the underlying network code reads that far
143      * into the socket before throttling back and waiting</li>
144      * <li>this is only applicable to TCP/IP usage, disk access would not
145      * have this problem, since the cost of reading disk is much less than
146      * the round-trip cost of a TCP/IP handshake</li>
147      * So, what does it do? It sets up to read a URL two ways, once with a
148      * naked input stream, and then with the Stream class. In each case, before
149      * reading, it delays about 2 seconds (for me anyway) to allow the java.net
150      * implementation to read ahead and then throttle back. The threaded Stream
151      * though keeps reading while this delay is going on and hence gets a big
152      * chunk of the file in memory. This advantage translates to a faster
153      * spin through the bytes after the delay.
154      */
155     public void testThreaded () throws IOException  
156     {
157         String   link;
158         URL   url;
159         URLConnection   connection;
160         BufferedInputStream   in;
161         int index;
162         long begin;
163         double bytes_per_second;
164         int delay;
165         Stream   stream;
166         long time1;
167         long time2;
168         Thread   thread;
169         long available1;
170         long available2;
171 
172         // pick a big file
173         link = "http://htmlparser.sourceforge.net/javadoc_1_3/index-all.html";
174         try
175         {
176             url = new URL   (link);
177 
178             // estimate the connection speed
179             System.gc ();
180             index = 0;
181             connection = url.openConnection ();
182             connection.connect ();
183             in = new BufferedInputStream   (connection.getInputStream ());
184             begin = System.currentTimeMillis ();
185             while (-1 != in.read ())
186                 index++;
187             bytes_per_second = 1000.0 * index / (System.currentTimeMillis () - begin);
188             in.close ();
189 
190             delay = (int)(1.5 * 1000 * bytes_per_second / 72400); // 72400 is the throttle limit on my machine
191 
192             // try the naked input stream
193             System.gc ();
194             index = 0;
195             available1 = 0;
196             connection = url.openConnection ();
197             connection.connect ();
198             in = new BufferedInputStream   (connection.getInputStream ());
199             try
200             {
201                 Thread.sleep (delay);
202             }
203             catch (Exception   e)
204             {
205                 e.printStackTrace ();
206             }
207             begin = System.currentTimeMillis ();
208             do
209             {
210                 index++;
211                 if (0 == index % 1000)
212                     available1 += in.available ();
213             }
214             while (-1 != in.read ());
215             time1 = System.currentTimeMillis () - begin;
216             in.close ();
217 
218             // try a threaded stream
219             System.gc ();
220             index = 0;
221             available2 = 0;
222             connection = url.openConnection ();
223             connection.connect ();
224             int length = connection.getContentLength ();
225             stream = new Stream   (connection.getInputStream (), length);
226             thread = new Thread   (stream);
227             thread.setPriority (Thread.NORM_PRIORITY - 1);
228             thread.start ();
229             try
230             {
231                 Thread.sleep (delay);
232             }
233             catch (Exception   e)
234             {
235                 e.printStackTrace ();
236             }
237             begin = System.currentTimeMillis ();
238             do
239             {
240                 index++;
241                 if (0 == index % 1000)
242                     available2 += stream.available ();
243             }
244             while (-1 != stream.read ());
245             time2 = System.currentTimeMillis () - begin;
246 
247 //            System.out.println ("fills: " + stream.fills);
248 //            System.out.println ("reallocations: " + stream.reallocations);
249 //            System.out.println ("synchronous: " + stream.synchronous);
250 //            System.out.println ("buffer size: " + stream.mBuffer.length);
251 //            System.out.println ("bytes: " + stream.mLevel);
252             stream.close ();
253 
254 //            System.out.println ("time (" + time2 + ") vs. (" + time1 + ") for " + index + " bytes");
255             double samples = index / 1000;
256 //            System.out.println ("average available bytes (" + available2/samples + ") vs. (" + available1/samples + ")");
257 
258             assertTrue ("slower (" + time2 + ") vs. (" + time1 + ")", time2 < time1);
259             assertTrue ("average available bytes not greater (" + available2/samples + ") vs. (" + available1/samples + ")", available2 > available1);
260         }
261         catch (MalformedURLException   murle)
262         {
263             fail ("bad url " + link);
264         }
265     }
266 
267     /**
268      * Test that mark and reset work as per the contract.
269      */
270     public void testMarkReset () throws IOException  
271     {
272         String   link;
273         ArrayList   bytes1;
274         ArrayList   bytes2;
275         URL   url;
276         URLConnection   connection;
277         Stream   stream;
278         int b;
279         int index;
280 
281         // pick a small file > 2000 bytes
282         link = "http://htmlparser.sourceforge.net/javadoc_1_3/overview-summary.html";
283         bytes1 = new ArrayList   ();
284         bytes2 = new ArrayList   ();
285         try
286         {
287             url = new URL   (link);
288             connection = url.openConnection ();
289             connection.connect ();
290             stream = new Stream   (connection.getInputStream ());
291             assertTrue ("mark not supported", stream.markSupported ());
292 
293             for (int i = 0; i < 1000; i++)
294             {
295                 b = stream.read ();
296                 bytes1.add (new Byte   ((byte)b));
297             }
298             stream.reset ();
299             for (int i = 0; i < 1000; i++)
300             {
301                 b = stream.read ();
302                 bytes2.add (new Byte   ((byte)b));
303             }
304 
305             index = 0;
306             while (index < bytes1.size ())
307             {
308                 assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index));
309                 index++;
310             }
311 
312             bytes1.clear ();
313             bytes2.clear ();
314 
315             stream.mark (1000); // the 1000 is ignored
316             for (int i = 0; i < 1000; i++)
317             {
318                 b = stream.read ();
319                 bytes1.add (new Byte   ((byte)b));
320             }
321             stream.reset ();
322             for (int i = 0; i < 1000; i++)
323             {
324                 b = stream.read ();
325                 bytes2.add (new Byte   ((byte)b));
326             }
327             stream.close ();
328 
329             index = 0;
330             while (index < bytes1.size ())
331             {
332                 assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index));
333                 index++;
334             }
335         }
336         catch (MalformedURLException   murle)
337         {
338             fail ("bad url " + link);
339         }
340     }
341 
342     /**
343      * Test that mark and reset work as per the contract when threaded.
344      */
345     public void testMarkResetThreaded () throws IOException  
346     {
347         String   link;
348         ArrayList   bytes1;
349         ArrayList   bytes2;
350         URL   url;
351         URLConnection   connection;
352         Stream   stream;
353         int b;
354         int index;
355 
356         // pick a small file > 2000 bytes
357         link = "http://htmlparser.sourceforge.net/javadoc_1_3/overview-summary.html";
358         bytes1 = new ArrayList   ();
359         bytes2 = new ArrayList   ();
360         try
361         {
362             url = new URL   (link);
363             connection = url.openConnection ();
364             connection.connect ();
365             stream = new Stream   (connection.getInputStream ());
366             (new Thread   (stream)).start ();
367             assertTrue ("mark not supported", stream.markSupported ());
368 
369             for (int i = 0; i < 1000; i++)
370             {
371                 b = stream.read ();
372                 bytes1.add (new Byte   ((byte)b));
373             }
374             stream.reset ();
375             for (int i = 0; i < 1000; i++)
376             {
377                 b = stream.read ();
378                 bytes2.add (new Byte   ((byte)b));
379             }
380 
381             index = 0;
382             while (index < bytes1.size ())
383             {
384                 assertEquals ("bytes differ at position " + index, bytes1.get (index), bytes2.get (index));
385                 index++;
386             }
387 
388             bytes1.clear ();
389             bytes2.clear ();
390 
391             stream.mark (1000); // the 1000 is ignored
392             for (int i = 0; i < 1000; i++)
393             {
394                 b = stream.read ();
395                 bytes1.add (new Byte   ((byte)b));
396             }
397             stream.reset ();
398             for (int i = 0; i < 1000; i++)
399             {
400                 b = stream.read ();
401                 bytes2.add (new Byte   ((byte)b));
402             }
403             stream.close ();
404 
405             index = 0;
406             while (index < bytes1.size ())
407             {
408                 assertEquals ("bytes differ at position " + (index + 1000), bytes1.get (index), bytes2.get (index));
409                 index++;
410             }
411         }
412         catch (MalformedURLException   murle)
413         {
414             fail ("bad url " + link);
415         }
416     }
417 
418     /**
419      * Test close.
420      */
421     public void testClose () throws IOException  
422     {
423         Stream   stream;
424 
425         stream = new Stream   (new ByteArrayInputStream   (new byte[] { (byte)0x42, (byte)0x78 }));
426         assertTrue ("erroneous character", 0x42 == stream.read ());
427         stream.close ();
428         assertTrue ("not closed", -1 == stream.read ());
429    }
430 }
431
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags