KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > textextraction > impl > TextExtractorImpl


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.textextraction.impl;
17
18 import org.apache.avalon.framework.configuration.Configurable;
19 import org.apache.avalon.framework.configuration.Configuration;
20 import org.apache.avalon.framework.configuration.ConfigurationException;
21 import org.apache.avalon.framework.logger.AbstractLogEnabled;
22 import org.outerj.daisy.textextraction.TextExtractor;
23
24 import java.util.Map JavaDoc;
25 import java.util.HashMap JavaDoc;
26 import java.io.InputStream JavaDoc;
27 import java.io.BufferedInputStream JavaDoc;
28
29 /**
30  * @avalon.component version="1.0" name="textextractor" lifestyle="singleton"
31  * @avalon.service type="org.outerj.daisy.textextraction.TextExtractor"
32  */

33 public class TextExtractorImpl extends AbstractLogEnabled implements TextExtractor, Configurable {
34     private Map JavaDoc extractorsByMimeType = new HashMap JavaDoc();
35
36
37     public void configure(Configuration configuration) throws ConfigurationException {
38         Configuration[] extractorConf = configuration.getChild("extractors").getChildren("extractor");
39         for (int i = 0; i < extractorConf.length; i++) {
40             String JavaDoc mimeType = extractorConf[i].getAttribute("mimeType");
41             String JavaDoc className = extractorConf[i].getAttribute("class");
42             try {
43                 Class JavaDoc clazz = Class.forName(className);
44                 extractorsByMimeType.put(mimeType, clazz);
45                 getLogger().debug("Registered class " + className + " to handle mime-type " + mimeType);
46             } catch (ClassNotFoundException JavaDoc e) {
47                 throw new ConfigurationException("Class not found: " + className + " specified at: " + extractorConf[i].getLocation());
48             }
49         }
50     }
51
52     public String JavaDoc getText(String JavaDoc mimeType, InputStream JavaDoc is) throws Exception JavaDoc {
53         try {
54             Class JavaDoc clazz = (Class JavaDoc)extractorsByMimeType.get(mimeType);
55
56             if (clazz != null) {
57                 MimetypeTextExtractor extractor = (MimetypeTextExtractor)clazz.newInstance();
58                 BufferedInputStream JavaDoc bis = new BufferedInputStream JavaDoc(is);
59                 return extractor.getText(bis);
60             } else {
61                 if (getLogger().isDebugEnabled())
62                     getLogger().debug("No textextractor registered for mimetype " + mimeType);
63             }
64             return null;
65         } finally {
66             is.close();
67         }
68     }
69
70     public boolean supportsMimeType(String JavaDoc mimeType) {
71         return extractorsByMimeType.containsKey(mimeType);
72     }
73 }
74
Popular Tags