1 2 3 4 package org.creativecommons.nutch; 5 6 import net.nutch.parse.*; 7 import net.nutch.protocol.Content; 8 9 import java.util.Properties ; 10 import java.io.*; 11 import java.net.URL ; 12 13 import junit.framework.TestCase; 14 15 public class TestCCParseFilter extends TestCase { 16 17 private static final File testDir = 18 new File(System.getProperty("test.input")); 19 20 public void testPages() throws Exception { 21 pageTest(new File(testDir, "anchor.html"), "http://foo.com/", 22 "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null); 23 pageTest(new File(testDir, "rel.html"), "http://foo.com/", 24 "http://creativecommons.org/licenses/by-nc/2.0", "rel", null); 25 pageTest(new File(testDir, "rdf.html"), "http://foo.com/", 26 "http://creativecommons.org/licenses/by-nc/1.0", "rdf", "text"); 27 } 28 29 public void pageTest(File file, String url, 30 String license, String location, String type) 31 throws Exception { 32 33 String contentType = "text/html"; 34 InputStream in = new FileInputStream(file); 35 ByteArrayOutputStream out = new ByteArrayOutputStream((int)file.length()); 36 byte[] buffer = new byte[1024]; 37 int i; 38 while ((i = in.read(buffer)) != -1) { 39 out.write(buffer, 0, i); 40 } 41 in.close(); 42 byte[] bytes = out.toByteArray(); 43 44 Parser parser = ParserFactory.getParser(contentType, url); 45 Content content = 46 new Content(url, url, bytes, contentType, new Properties ()); 47 Parse parse = parser.getParse(content); 48 49 Properties metadata = parse.getData().getMetadata(); 50 assertEquals(license, metadata.get("License-Url")); 51 assertEquals(location, metadata.get("License-Location")); 52 assertEquals(type, metadata.get("Work-Type")); 53 } 54 } 55
| Popular Tags
|