1 2 3 4 package net.nutch.parse.pdf; 5 6 import net.nutch.protocol.ProtocolFactory; 7 import net.nutch.protocol.Protocol; 8 import net.nutch.protocol.Content; 9 import net.nutch.protocol.ProtocolException; 10 11 import net.nutch.parse.ParserFactory; 12 import net.nutch.parse.Parser; 13 import net.nutch.parse.Parse; 14 import net.nutch.parse.ParseException; 15 16 import junit.framework.TestCase; 17 18 23 public class TestPdfParser extends TestCase { 24 25 private String fileSeparator = System.getProperty("file.separator"); 26 private String sampleDir = System.getProperty("test.data","."); 28 private String [] sampleFiles = {"pdftest.pdf"}; 32 33 private String expectedText = "A VERY SMALL PDF FILE"; 34 35 public TestPdfParser(String name) { 36 super(name); 37 } 38 39 protected void setUp() {} 40 41 protected void tearDown() {} 42 43 public void testIt() throws ProtocolException, ParseException { 44 String urlString; 45 Protocol protocol; 46 Content content; 47 Parser parser; 48 Parse parse; 49 50 for (int i=0; i<sampleFiles.length; i++) { 51 urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i]; 52 53 protocol = ProtocolFactory.getProtocol(urlString); 54 content = protocol.getContent(urlString); 55 56 parser = ParserFactory.getParser(content.getContentType(), urlString); 57 parse = parser.getParse(content); 58 59 int index = parse.getText().indexOf(expectedText); 60 assertTrue(index > 0); 61 } 62 } 63 64 } 65 | Popular Tags |