KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > creativecommons > nutch > TestCCParseFilter


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package org.creativecommons.nutch;
5
6 import net.nutch.parse.*;
7 import net.nutch.protocol.Content;
8
9 import java.util.Properties JavaDoc;
10 import java.io.*;
11 import java.net.URL JavaDoc;
12
13 import junit.framework.TestCase;
14
15 public class TestCCParseFilter extends TestCase {
16
17   private static final File testDir =
18     new File(System.getProperty("test.input"));
19
20   public void testPages() throws Exception JavaDoc {
21     pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
22              "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
23     pageTest(new File(testDir, "rel.html"), "http://foo.com/",
24              "http://creativecommons.org/licenses/by-nc/2.0", "rel", null);
25     pageTest(new File(testDir, "rdf.html"), "http://foo.com/",
26              "http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text");
27   }
28
29   public void pageTest(File file, String JavaDoc url,
30                        String JavaDoc license, String JavaDoc location, String JavaDoc type)
31     throws Exception JavaDoc {
32
33     String JavaDoc contentType = "text/html";
34     InputStream in = new FileInputStream(file);
35     ByteArrayOutputStream out = new ByteArrayOutputStream((int)file.length());
36     byte[] buffer = new byte[1024];
37     int i;
38     while ((i = in.read(buffer)) != -1) {
39       out.write(buffer, 0, i);
40     }
41     in.close();
42     byte[] bytes = out.toByteArray();
43
44     Parser parser = ParserFactory.getParser(contentType, url);
45     Content content =
46       new Content(url, url, bytes, contentType, new Properties JavaDoc());
47     Parse parse = parser.getParse(content);
48
49     Properties JavaDoc metadata = parse.getData().getMetadata();
50     assertEquals(license, metadata.get("License-Url"));
51     assertEquals(location, metadata.get("License-Location"));
52     assertEquals(type, metadata.get("Work-Type"));
53   }
54 }
55
Popular Tags