| 1 31 32 package org.opencms.search.extractors; 33 34 import java.io.InputStream ; 35 import java.util.Map ; 36 37 import junit.framework.TestCase; 38 39 42 public class TestMsWordExtraction extends TestCase { 43 44 49 public TestMsWordExtraction(String arg0) { 50 super(arg0); 51 } 52 53 58 public void testBasicWordExtration() throws Exception { 59 60 InputStream in = getClass().getClassLoader().getResourceAsStream("org/opencms/search/extractors/test1.doc"); 62 63 I_CmsExtractionResult extractionResult = CmsExtractorMsWord.getExtractor().extractText(in); 65 String result = extractionResult.getContent(); 66 67 System.out.println("---------------------------------------------------------------"); 68 System.out.println("Extracted from MS Word:"); 69 System.out.println(result); 70 71 assertTrue(result.indexOf("Alkacon Software") > -1); 72 assertTrue(result.indexOf("The OpenCms experts") > -1); 73 assertTrue(result.indexOf("Some content here.") > -1); 74 assertTrue(result.indexOf("Some content there.") > -1); 75 assertTrue(result.indexOf("Some content on a second sheet.") > -1); 76 assertTrue(result.indexOf("Some content on the third sheet.") > -1); 77 assertTrue(result.indexOf("\u00e4\u00f6\u00fc\u00c4\u00d6\u00dc\u00df\u20ac") > -1); 78 79 Map meta = extractionResult.getMetaInfo(); 80 assertEquals("Alkacon Software - The OpenCms experts", meta.get(I_CmsExtractionResult.META_TITLE)); 81 assertEquals("This is the subject", meta.get(I_CmsExtractionResult.META_SUBJECT)); 82 assertEquals("Alexander Kandzior", meta.get(I_CmsExtractionResult.META_AUTHOR)); 83 assertEquals("Alkacon Software", meta.get(I_CmsExtractionResult.META_COMPANY)); 84 assertEquals("This is the comment", meta.get(I_CmsExtractionResult.META_COMMENTS)); 85 assertEquals("Key1, Key2", meta.get(I_CmsExtractionResult.META_KEYWORDS)); 86 } 87 } 88 | Popular Tags |