1 16 package org.outerj.daisy.textextraction.impl; 17 18 import org.apache.avalon.framework.configuration.Configurable; 19 import org.apache.avalon.framework.configuration.Configuration; 20 import org.apache.avalon.framework.configuration.ConfigurationException; 21 import org.apache.avalon.framework.logger.AbstractLogEnabled; 22 import org.outerj.daisy.textextraction.TextExtractor; 23 24 import java.util.Map ; 25 import java.util.HashMap ; 26 import java.io.InputStream ; 27 import java.io.BufferedInputStream ; 28 29 33 public class TextExtractorImpl extends AbstractLogEnabled implements TextExtractor, Configurable { 34 private Map extractorsByMimeType = new HashMap (); 35 36 37 public void configure(Configuration configuration) throws ConfigurationException { 38 Configuration[] extractorConf = configuration.getChild("extractors").getChildren("extractor"); 39 for (int i = 0; i < extractorConf.length; i++) { 40 String mimeType = extractorConf[i].getAttribute("mimeType"); 41 String className = extractorConf[i].getAttribute("class"); 42 try { 43 Class clazz = Class.forName(className); 44 extractorsByMimeType.put(mimeType, clazz); 45 getLogger().debug("Registered class " + className + " to handle mime-type " + mimeType); 46 } catch (ClassNotFoundException e) { 47 throw new ConfigurationException("Class not found: " + className + " specified at: " + extractorConf[i].getLocation()); 48 } 49 } 50 } 51 52 public String getText(String mimeType, InputStream is) throws Exception { 53 try { 54 Class clazz = (Class )extractorsByMimeType.get(mimeType); 55 56 if (clazz != null) { 57 MimetypeTextExtractor extractor = (MimetypeTextExtractor)clazz.newInstance(); 58 BufferedInputStream bis = new BufferedInputStream (is); 59 return extractor.getText(bis); 60 } else { 61 if (getLogger().isDebugEnabled()) 62 getLogger().debug("No textextractor registered for mimetype " + mimeType); 63 } 64 return null; 65 } finally { 66 is.close(); 67 } 68 } 69 70 public boolean supportsMimeType(String mimeType) { 71 return extractorsByMimeType.containsKey(mimeType); 72 } 73 } 74 | Popular Tags |