1 40 package org.dspace.app.mediafilter; 41 42 import java.io.ByteArrayInputStream ; 43 import java.io.InputStream ; 44 45 import org.textmining.text.extraction.WordExtractor; 46 47 53 public class WordFilter extends MediaFilter 54 { 55 public String getFilteredName(String oldFilename) 56 { 57 return oldFilename + ".txt"; 58 } 59 60 64 public String getBundleName() 65 { 66 return "TEXT"; 67 } 68 69 72 public String getFormatString() 73 { 74 return "Text"; 75 } 76 77 80 public String getDescription() 81 { 82 return "Extracted text"; 83 } 84 85 91 public InputStream getDestinationStream(InputStream source) 92 throws Exception 93 { 94 WordExtractor e = new WordExtractor(); 97 String extractedText = e.extractText(source); 98 99 if (MediaFilterManager.isVerbose) 102 { 103 System.out.println(extractedText); 104 } 105 106 byte[] textBytes = extractedText.getBytes(); 108 ByteArrayInputStream bais = new ByteArrayInputStream (textBytes); 109 110 return bais; } 112 } 113 | Popular Tags |