KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > util > TestGZIPUtils


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.util;
5
6 import junit.framework.TestCase;
7
8 import java.io.IOException JavaDoc;
9
10 /** Unit tests for GZIPUtils methods. */
11 public class TestGZIPUtils extends TestCase {
12   public TestGZIPUtils(String JavaDoc name) {
13     super(name);
14   }
15
16   /* a short, highly compressable, string */
17   String JavaDoc SHORT_TEST_STRING=
18     "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
19
20   /* a short, highly compressable, string */
21   String JavaDoc LONGER_TEST_STRING=
22     SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
23     + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
24     + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
25     + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING;
26
27   /* a snapshot of the nutch webpage */
28   String JavaDoc WEBPAGE=
29   "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
30   + "<html>\n"
31   + "<head>\n"
32   + " <meta http-equiv=\"content-type\"\n"
33   + " content=\"text/html; charset=ISO-8859-1\">\n"
34   + " <title>Nutch</title>\n"
35   + "</head>\n"
36   + "<body>\n"
37   + "<h1\n"
38   + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
39   + " HREF=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
40   + "<small>an open source web-search engine</small></h1>\n"
41   + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
42   + "<table\n"
43   + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
44   + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
45   + " <tbody>\n"
46   + " <tr>\n"
47   + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
48   + " HREF=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
49   + " </td>\n"
50   + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
51   + " HREF=\"tutorial.html\">Tutorial</a><br>\n"
52   + " </td>\n"
53   + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
54   + " HREF=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
55   + " </td>\n"
56   + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
57   + " HREF=\"api/index.html\">Javadoc</a><br>\n"
58   + " </td>\n"
59   + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
60   + " HREF=\"http://sourceforge.net/tracker/?atid=491356&amp;group_id=59548&amp;func=browse\">Bugs</a><br>\n"
61   + " </td>\n"
62   + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
63   + " HREF=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
64   + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
65   + " HREF=\"policies.html\">Policies</a><br>\n"
66   + " </td>\n"
67   + " </tr>\n"
68   + " </tbody>\n"
69   + "</table>\n"
70   + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
71   + "<h2>Introduction</h2>\n"
72   + "Nutch is a nascent effort to implement an open-source web search\n"
73   + "engine. Web search is a basic requirement for internet navigation, yet\n"
74   + "the number of web search engines is decreasing. Today's oligopoly could\n"
75   + "soon be a monopoly, with a single company controlling nearly all web\n"
76   + "search for its commercial gain. &nbsp;That would not be good for the\n"
77   + "users of internet. &nbsp;Nutch aims to enable anyone to easily and\n"
78   + "cost-effectively deploy a world-class web search engine.<br>\n"
79   + "<br>\n"
80   + "To succeed, the Nutch software must be able to:<br>\n"
81   + "<ul>\n"
82   + " <li> crawl several billion pages per month</li>\n"
83   + " <li>maintain an index of these pages</li>\n"
84   + " <li>search that index up to 1000 times per second</li>\n"
85   + " <li>provide very high quality search results</li>\n"
86   + " <li>operate at minimal cost</li>\n"
87   + "</ul>\n"
88   + "<h2>Status</h2>\n"
89   + "Currently we're just a handful of developers working part-time to put\n"
90   + "together a demo. &nbsp;The demo is coded entirely in Java. &nbsp;However\n"
91   + "persistent data is written in well-documented formats so that modules\n"
92   + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
93   + "project progresses.<br>\n"
94   + "<br>\n"
95   + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
96   + " HREF=\"http://sourceforge.net\"> </a>\n"
97   + "<div style=\"text-align: center;\"><a HREF=\"http://sourceforge.net\"><img\n"
98   + " SRC=\"http://sourceforge.net/sflogo.php?group_id=59548&amp;type=1\"\n"
99   + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
100   + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
101   + "</body>\n"
102   + "</html>\n";
103
104   // tests
105

106   public void testZipUnzip() {
107     byte[] testBytes= SHORT_TEST_STRING.getBytes();
108     testZipUnzip(testBytes);
109     testBytes= LONGER_TEST_STRING.getBytes();
110     testZipUnzip(testBytes);
111     testBytes= WEBPAGE.getBytes();
112     testZipUnzip(testBytes);
113   }
114
115   public void testZipUnzipBestEffort() {
116     byte[] testBytes= SHORT_TEST_STRING.getBytes();
117     testZipUnzipBestEffort(testBytes);
118     testBytes= LONGER_TEST_STRING.getBytes();
119     testZipUnzipBestEffort(testBytes);
120     testBytes= WEBPAGE.getBytes();
121     testZipUnzipBestEffort(testBytes);
122   }
123   
124   public void testTruncation() {
125     byte[] testBytes= SHORT_TEST_STRING.getBytes();
126     testTruncation(testBytes);
127     testBytes= LONGER_TEST_STRING.getBytes();
128     testTruncation(testBytes);
129     testBytes= WEBPAGE.getBytes();
130     testTruncation(testBytes);
131   }
132
133   public void testLimit() {
134     byte[] testBytes= SHORT_TEST_STRING.getBytes();
135     testLimit(testBytes);
136     testBytes= LONGER_TEST_STRING.getBytes();
137     testLimit(testBytes);
138     testBytes= WEBPAGE.getBytes();
139     testLimit(testBytes);
140   }
141
142   // helpers
143

144   public void testZipUnzip(byte[] origBytes) {
145     byte[] compressedBytes= GZIPUtils.zip(origBytes);
146
147     assertTrue("compressed array is not smaller!",
148            compressedBytes.length < origBytes.length);
149
150     byte[] uncompressedBytes= null;
151     try {
152       uncompressedBytes= GZIPUtils.unzip(compressedBytes);
153     } catch (IOException JavaDoc e) {
154       e.printStackTrace();
155       assertTrue("caught exception '" + e + "' during unzip()",
156          false);
157     }
158     assertTrue("uncompressedBytes is wrong size",
159            uncompressedBytes.length == origBytes.length);
160
161     for (int i= 0; i < origBytes.length; i++)
162       if (origBytes[i] != uncompressedBytes[i])
163     assertTrue("uncompressedBytes does not match origBytes", false);
164   }
165
166   public void testZipUnzipBestEffort(byte[] origBytes) {
167     byte[] compressedBytes= GZIPUtils.zip(origBytes);
168
169     assertTrue("compressed array is not smaller!",
170            compressedBytes.length < origBytes.length);
171
172     byte[] uncompressedBytes= GZIPUtils.unzipBestEffort(compressedBytes);
173     assertTrue("uncompressedBytes is wrong size",
174            uncompressedBytes.length == origBytes.length);
175
176     for (int i= 0; i < origBytes.length; i++)
177       if (origBytes[i] != uncompressedBytes[i])
178     assertTrue("uncompressedBytes does not match origBytes", false);
179   }
180
181   public void testTruncation(byte[] origBytes) {
182     byte[] compressedBytes= GZIPUtils.zip(origBytes);
183
184     System.out.println("original data has len " + origBytes.length);
185     System.out.println("compressed data has len "
186                + compressedBytes.length);
187
188     for (int i= compressedBytes.length; i >= 0; i--) {
189
190       byte[] truncCompressed= new byte[i];
191
192       for (int j= 0; j < i; j++)
193     truncCompressed[j]= compressedBytes[j];
194
195       byte[] trunc= GZIPUtils.unzipBestEffort(truncCompressed);
196
197       if (trunc == null) {
198     System.out.println("truncated to len "
199                + i + ", trunc is null");
200       } else {
201     System.out.println("truncated to len "
202                + i + ", trunc.length= "
203                + trunc.length);
204
205     for (int j= 0; j < trunc.length; j++)
206       if (trunc[j] != origBytes[j])
207         assertTrue("truncated/uncompressed array differs at pos "
208                + j + " (compressed data had been truncated to len "
209                + i + ")", false);
210       }
211     }
212   }
213
214   public void testLimit(byte[] origBytes) {
215     byte[] compressedBytes= GZIPUtils.zip(origBytes);
216
217     assertTrue("compressed array is not smaller!",
218                compressedBytes.length < origBytes.length);
219
220     for (int i= 0; i < origBytes.length; i++) {
221
222       byte[] uncompressedBytes=
223         GZIPUtils.unzipBestEffort(compressedBytes, i);
224
225       assertTrue("uncompressedBytes is wrong size",
226                  uncompressedBytes.length == i);
227
228       for (int j= 0; j < i; j++)
229         if (origBytes[j] != uncompressedBytes[j])
230           assertTrue("uncompressedBytes does not match origBytes", false);
231     }
232   }
233
234 }
235
Popular Tags