1 31 32 package org.opencms.search.extractors; 33 34 import java.io.InputStream ; 35 36 import junit.framework.TestCase; 37 38 41 public class TestPdfExtraction extends TestCase { 42 43 48 public TestPdfExtraction(String arg0) { 49 super(arg0); 50 } 51 52 57 public void testBasicPdfExtration() throws Exception { 58 59 InputStream in = getClass().getClassLoader().getResourceAsStream("org/opencms/search/extractors/test1.pdf"); 61 62 I_CmsExtractionResult extractionResult = CmsExtractorPdf.getExtractor().extractText(in); 64 String result = extractionResult.getContent(); 65 66 System.out.println("---------------------------------------------------------------"); 67 System.out.println("Extracted from PDF:"); 68 System.out.println(result); 69 70 assertTrue(result.indexOf("Alkacon Software") > -1); 71 assertTrue(result.indexOf("The OpenCms experts") > -1); 72 assertTrue(result.indexOf("Some content here.") > -1); 73 assertTrue(result.indexOf("Some content there.") > -1); 74 assertTrue(result.indexOf("Some content on a second sheet.") > -1); 75 assertTrue(result.indexOf("Some content on the third sheet.") > -1); 76 assertTrue(result.indexOf("\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u20ac") > -1); 77 } 78 } 79 | Popular Tags |