1 31 32 package org.opencms.search.extractors; 33 34 import java.io.InputStream ; 35 import java.util.Map ; 36 37 import org.apache.poi.poifs.eventfilesystem.POIFSReader; 38 39 import org.textmining.text.extraction.WordExtractor; 40 41 50 public final class CmsExtractorMsWord extends A_CmsTextExtractorMsOfficeBase { 51 52 53 private static final CmsExtractorMsWord INSTANCE = new CmsExtractorMsWord(); 54 55 58 private CmsExtractorMsWord() { 59 60 } 62 63 68 public static I_CmsTextExtractor getExtractor() { 69 70 return INSTANCE; 71 } 72 73 76 public I_CmsExtractionResult extractText(InputStream in, String encoding) throws Exception { 77 78 WordExtractor wordExtractor = new WordExtractor(); 80 String result = wordExtractor.extractText(getStreamCopy(in)); 81 result = removeControlChars(result); 82 83 POIFSReader reader = new POIFSReader(); 85 reader.registerListener(this); 86 reader.read(getStreamCopy(in)); 87 Map metaInfo = extractMetaInformation(); 88 89 cleanup(); 91 92 return new CmsExtractionResult(result, metaInfo); 94 } 95 } | Popular Tags |