1 2 3 4 package net.nutch.util; 5 6 import junit.framework.TestCase; 7 8 import java.io.IOException ; 9 10 11 public class TestGZIPUtils extends TestCase { 12 public TestGZIPUtils(String name) { 13 super(name); 14 } 15 16 17 String SHORT_TEST_STRING= 18 "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc"; 19 20 21 String LONGER_TEST_STRING= 22 SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 23 + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 24 + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING 25 + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING; 26 27 28 String WEBPAGE= 29 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n" 30 + "<html>\n" 31 + "<head>\n" 32 + " <meta http-equiv=\"content-type\"\n" 33 + " content=\"text/html; charset=ISO-8859-1\">\n" 34 + " <title>Nutch</title>\n" 35 + "</head>\n" 36 + "<body>\n" 37 + "<h1\n" 38 + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n" 39 + " HREF=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n" 40 + "<small>an open source web-search engine</small></h1>\n" 41 + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n" 42 + "<table\n" 43 + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n" 44 + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n" 45 + " <tbody>\n" 46 + " <tr>\n" 47 + " <td style=\"vertical-align: top; text-align: center;\"><a\n" 48 + " HREF=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n" 49 + " </td>\n" 50 + " <td style=\"vertical-align: top; text-align: center;\"><a\n" 51 + " HREF=\"tutorial.html\">Tutorial</a><br>\n" 52 + " </td>\n" 53 + " <td style=\"vertical-align: top; text-align: center;\"><a\n" 54 + " HREF=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n" 55 + " </td>\n" 56 + " <td style=\"vertical-align: top; text-align: center;\"><a\n" 57 + " HREF=\"api/index.html\">Javadoc</a><br>\n" 58 + " </td>\n" 59 + " <td style=\"vertical-align: top; text-align: center;\"><a\n" 60 + " HREF=\"http://sourceforge.net/tracker/?atid=491356&group_id=59548&func=browse\">Bugs</a><br>\n" 61 + " </td>\n" 62 + " <td style=\"vertical-align: top; text-align: center;\"><a\n" 63 + " HREF=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n" 64 + " <td style=\"vertical-align: top; text-align: center;\"><a\n" 65 + " HREF=\"policies.html\">Policies</a><br>\n" 66 + " </td>\n" 67 + " </tr>\n" 68 + " </tbody>\n" 69 + "</table>\n" 70 + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n" 71 + "<h2>Introduction</h2>\n" 72 + "Nutch is a nascent effort to implement an open-source web search\n" 73 + "engine. Web search is a basic requirement for internet navigation, yet\n" 74 + "the number of web search engines is decreasing. Today's oligopoly could\n" 75 + "soon be a monopoly, with a single company controlling nearly all web\n" 76 + "search for its commercial gain. That would not be good for the\n" 77 + "users of internet. Nutch aims to enable anyone to easily and\n" 78 + "cost-effectively deploy a world-class web search engine.<br>\n" 79 + "<br>\n" 80 + "To succeed, the Nutch software must be able to:<br>\n" 81 + "<ul>\n" 82 + " <li> crawl several billion pages per month</li>\n" 83 + " <li>maintain an index of these pages</li>\n" 84 + " <li>search that index up to 1000 times per second</li>\n" 85 + " <li>provide very high quality search results</li>\n" 86 + " <li>operate at minimal cost</li>\n" 87 + "</ul>\n" 88 + "<h2>Status</h2>\n" 89 + "Currently we're just a handful of developers working part-time to put\n" 90 + "together a demo. The demo is coded entirely in Java. However\n" 91 + "persistent data is written in well-documented formats so that modules\n" 92 + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n" 93 + "project progresses.<br>\n" 94 + "<br>\n" 95 + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n" 96 + " HREF=\"http://sourceforge.net\"> </a>\n" 97 + "<div style=\"text-align: center;\"><a HREF=\"http://sourceforge.net\"><img\n" 98 + " SRC=\"http://sourceforge.net/sflogo.php?group_id=59548&type=1\"\n" 99 + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n" 100 + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n" 101 + "</body>\n" 102 + "</html>\n"; 103 104 106 public void testZipUnzip() { 107 byte[] testBytes= SHORT_TEST_STRING.getBytes(); 108 testZipUnzip(testBytes); 109 testBytes= LONGER_TEST_STRING.getBytes(); 110 testZipUnzip(testBytes); 111 testBytes= WEBPAGE.getBytes(); 112 testZipUnzip(testBytes); 113 } 114 115 public void testZipUnzipBestEffort() { 116 byte[] testBytes= SHORT_TEST_STRING.getBytes(); 117 testZipUnzipBestEffort(testBytes); 118 testBytes= LONGER_TEST_STRING.getBytes(); 119 testZipUnzipBestEffort(testBytes); 120 testBytes= WEBPAGE.getBytes(); 121 testZipUnzipBestEffort(testBytes); 122 } 123 124 public void testTruncation() { 125 byte[] testBytes= SHORT_TEST_STRING.getBytes(); 126 testTruncation(testBytes); 127 testBytes= LONGER_TEST_STRING.getBytes(); 128 testTruncation(testBytes); 129 testBytes= WEBPAGE.getBytes(); 130 testTruncation(testBytes); 131 } 132 133 public void testLimit() { 134 byte[] testBytes= SHORT_TEST_STRING.getBytes(); 135 testLimit(testBytes); 136 testBytes= LONGER_TEST_STRING.getBytes(); 137 testLimit(testBytes); 138 testBytes= WEBPAGE.getBytes(); 139 testLimit(testBytes); 140 } 141 142 144 public void testZipUnzip(byte[] origBytes) { 145 byte[] compressedBytes= GZIPUtils.zip(origBytes); 146 147 assertTrue("compressed array is not smaller!", 148 compressedBytes.length < origBytes.length); 149 150 byte[] uncompressedBytes= null; 151 try { 152 uncompressedBytes= GZIPUtils.unzip(compressedBytes); 153 } catch (IOException e) { 154 e.printStackTrace(); 155 assertTrue("caught exception '" + e + "' during unzip()", 156 false); 157 } 158 assertTrue("uncompressedBytes is wrong size", 159 uncompressedBytes.length == origBytes.length); 160 161 for (int i= 0; i < origBytes.length; i++) 162 if (origBytes[i] != uncompressedBytes[i]) 163 assertTrue("uncompressedBytes does not match origBytes", false); 164 } 165 166 public void testZipUnzipBestEffort(byte[] origBytes) { 167 byte[] compressedBytes= GZIPUtils.zip(origBytes); 168 169 assertTrue("compressed array is not smaller!", 170 compressedBytes.length < origBytes.length); 171 172 byte[] uncompressedBytes= GZIPUtils.unzipBestEffort(compressedBytes); 173 assertTrue("uncompressedBytes is wrong size", 174 uncompressedBytes.length == origBytes.length); 175 176 for (int i= 0; i < origBytes.length; i++) 177 if (origBytes[i] != uncompressedBytes[i]) 178 assertTrue("uncompressedBytes does not match origBytes", false); 179 } 180 181 public void testTruncation(byte[] origBytes) { 182 byte[] compressedBytes= GZIPUtils.zip(origBytes); 183 184 System.out.println("original data has len " + origBytes.length); 185 System.out.println("compressed data has len " 186 + compressedBytes.length); 187 188 for (int i= compressedBytes.length; i >= 0; i--) { 189 190 byte[] truncCompressed= new byte[i]; 191 192 for (int j= 0; j < i; j++) 193 truncCompressed[j]= compressedBytes[j]; 194 195 byte[] trunc= GZIPUtils.unzipBestEffort(truncCompressed); 196 197 if (trunc == null) { 198 System.out.println("truncated to len " 199 + i + ", trunc is null"); 200 } else { 201 System.out.println("truncated to len " 202 + i + ", trunc.length= " 203 + trunc.length); 204 205 for (int j= 0; j < trunc.length; j++) 206 if (trunc[j] != origBytes[j]) 207 assertTrue("truncated/uncompressed array differs at pos " 208 + j + " (compressed data had been truncated to len " 209 + i + ")", false); 210 } 211 } 212 } 213 214 public void testLimit(byte[] origBytes) { 215 byte[] compressedBytes= GZIPUtils.zip(origBytes); 216 217 assertTrue("compressed array is not smaller!", 218 compressedBytes.length < origBytes.length); 219 220 for (int i= 0; i < origBytes.length; i++) { 221 222 byte[] uncompressedBytes= 223 GZIPUtils.unzipBestEffort(compressedBytes, i); 224 225 assertTrue("uncompressedBytes is wrong size", 226 uncompressedBytes.length == i); 227 228 for (int j= 0; j < i; j++) 229 if (origBytes[j] != uncompressedBytes[j]) 230 assertTrue("uncompressedBytes does not match origBytes", false); 231 } 232 } 233 234 } 235 | Popular Tags |