1 40 package org.dspace.app.mediafilter; 41 42 import java.io.ByteArrayInputStream ; 43 import java.io.InputStream ; 44 45 import javax.swing.text.Document ; 46 import javax.swing.text.html.HTMLEditorKit ; 47 48 54 public class HTMLFilter extends MediaFilter 55 { 56 57 public String getFilteredName(String oldFilename) 58 { 59 return oldFilename + ".txt"; 60 } 61 62 66 public String getBundleName() 67 { 68 return "TEXT"; 69 } 70 71 74 public String getFormatString() 75 { 76 return "Text"; 77 } 78 79 82 public String getDescription() 83 { 84 return "Extracted text"; 85 } 86 87 93 public InputStream getDestinationStream(InputStream source) 94 throws Exception 95 { 96 HTMLEditorKit kit = new HTMLEditorKit (); 99 Document doc = kit.createDefaultDocument(); 100 101 doc.putProperty("IgnoreCharsetDirective", new Boolean (true)); 102 103 kit.read(source, doc, 0); 104 105 String extractedText = doc.getText(0, doc.getLength()); 106 107 byte[] textBytes = extractedText.getBytes(); 109 ByteArrayInputStream bais = new ByteArrayInputStream (textBytes); 110 111 return bais; } 113 } 114 | Popular Tags |