1 2 3 4 package net.nutch.parse.ext; 5 6 import net.nutch.protocol.ProtocolFactory; 7 import net.nutch.protocol.Protocol; 8 import net.nutch.protocol.Content; 9 import net.nutch.protocol.ProtocolException; 10 11 import net.nutch.parse.ParserFactory; 12 import net.nutch.parse.Parser; 13 import net.nutch.parse.Parse; 14 import net.nutch.parse.ParseException; 15 16 import junit.framework.TestCase; 17 18 import java.io.File ; 19 import java.io.FileOutputStream ; 20 import java.io.IOException ; 21 22 33 public class TestExtParser extends TestCase { 34 private File tempFile = null; 35 private String urlString = null; 36 private Content content = null;; 37 private Parser parser = null;; 38 private Parse parse = null; 39 40 private String expectedText = "nutch rocks nutch rocks nutch rocks"; 41 private String expectedMD5sum = "df46711a1a48caafc98b1c3b83aa1526"; 43 44 public TestExtParser(String name) { 45 super(name); 46 } 47 48 protected void setUp() throws ProtocolException, IOException { 49 String path = System.getProperty("test.data"); 52 if (path != null) { 53 File tempDir = new File (path); 54 if (!tempDir.exists()) 55 tempDir.mkdir(); 56 tempFile = File.createTempFile("nutch.test.plugin.ExtParser.","",tempDir); 57 } else { 58 tempFile = File.createTempFile("nutch.test.plugin.ExtParser.",""); 60 } 61 urlString = tempFile.toURL().toString(); 62 63 FileOutputStream fos = new FileOutputStream (tempFile); 64 fos.write(expectedText.getBytes()); 65 fos.close(); 66 67 Protocol protocol = ProtocolFactory.getProtocol(urlString); 69 content = protocol.getContent(urlString); 70 protocol = null; 71 } 72 73 protected void tearDown() { 74 content = null; 76 77 } 81 82 public void testIt() throws ParseException { 83 String contentType; 84 85 if (!System.getProperty("os.name").equalsIgnoreCase("linux")) 87 return; 88 89 for (int i=0; i<10; i++) { 91 contentType = "application/vnd.nutch.example.cat"; 93 content.setContentType(contentType); 94 parser = ParserFactory.getParser(contentType, urlString); 95 parse = parser.getParse(content); 96 assertEquals(expectedText,parse.getText()); 97 98 contentType = "application/vnd.nutch.example.md5sum"; 100 content.setContentType(contentType); 101 parser = ParserFactory.getParser(contentType, urlString); 102 parse = parser.getParse(content); 103 assertTrue(parse.getText().startsWith(expectedMD5sum)); 104 } 105 } 106 107 } 108 | Popular Tags |