1 23 24 package org.apache.slide.extractor; 25 26 31 32 import java.io.*; 33 34 import org.textmining.text.extraction.WordExtractor; 35 36 public class MSWordExtractor extends AbstractContentExtractor { 37 38 public MSWordExtractor(String uri, String contentType, String namespace) { 39 super(uri, contentType, namespace); 40 } 41 42 public Reader extract(InputStream content) throws ExtractorException { 43 try { 44 WordExtractor extractor = 45 new WordExtractor(); 46 String text = extractor.extractText(content); 47 48 StringReader reader = new StringReader(text); 49 return reader; 50 } 51 catch(Exception e) { 52 throw new ExtractorException(e.getMessage()); 53 } 54 } 55 56 public static void main(String [] args) throws Exception 57 { 58 FileInputStream in = new FileInputStream(args[0]); 59 60 MSWordExtractor ex = new MSWordExtractor(null, null, null); 61 62 Reader reader = ex.extract(in); 63 64 int c; 65 do 66 { 67 c = reader.read(); 68 69 System.out.print((char)c); 70 } 71 while( c != -1 ); 72 } 73 } | Popular Tags |