1 package net.nutch.parse.rtf; 2 3 import junit.framework.TestCase; 4 import net.nutch.parse.Parse; 5 import net.nutch.parse.ParseException; 6 import net.nutch.parse.Parser; 7 import net.nutch.parse.ParserFactory; 8 import net.nutch.protocol.Content; 9 import net.nutch.protocol.Protocol; 10 import net.nutch.protocol.ProtocolException; 11 import net.nutch.protocol.ProtocolFactory; 12 13 import java.util.Properties ; 14 15 20 public class TestRTFParser extends TestCase { 21 22 private String fileSeparator = System.getProperty("file.separator"); 23 private String sampleDir = System.getProperty("test.data", "."); 25 private String rtfFile = "test.rtf"; 29 30 public TestRTFParser(String name) { 31 super(name); 32 } 33 34 protected void setUp() { 35 } 36 37 protected void tearDown() { 38 } 39 40 public void testIt() throws ProtocolException, ParseException { 41 42 String urlString; 43 Protocol protocol; 44 Content content; 45 Parser parser; 46 Parse parse; 47 48 urlString = "file:" + sampleDir + fileSeparator + rtfFile; 49 protocol = ProtocolFactory.getProtocol(urlString); 50 content = protocol.getContent(urlString); 51 52 parser = ParserFactory.getParser(content.getContentType(), urlString); 53 parse = parser.getParse(content); 54 String text = parse.getText(); 55 assertEquals("The quick brown fox jumps over the lazy dog", text.trim()); 56 57 String title = parse.getData().getTitle(); 58 Properties meta = parse.getData().getMetadata(); 59 assertEquals("test rft document", title); 60 assertEquals("tests", meta.getProperty("subject")); 61 62 63 64 } 65 66 67 } 68 | Popular Tags |