KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > blandware > atleap > common > parsers > word > WordPlainTextExtractor


1 /*
2  * Copyright 2005 Blandware (http://www.blandware.com)
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package com.blandware.atleap.common.parsers.word;
17
18 import com.blandware.atleap.common.parsers.SpecificPlainTextExtractor;
19 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException;
20 import org.textmining.text.extraction.WordExtractor;
21
22 import java.io.InputStream JavaDoc;
23 import java.io.Writer JavaDoc;
24
25 /**
26  * An extractor that extracts a plain text from MS Word documents.
27  *
28  * @see SpecificPlainTextExtractor
29  * @author Roman Puchkovskiy <a HREF="mailto:roman.puchkovskiy@blandware.com">
30  * &lt;roman.puchkovskiy@blandware.com&gt;</a>
31  * @version $Revision: 1.3 $ $Date: 2005/08/14 12:27:56 $
32  */

33 public class WordPlainTextExtractor implements SpecificPlainTextExtractor {
34     public WordPlainTextExtractor() {
35     }
36
37     /**
38      * Extracts a plain text from an MS Word document.
39      *
40      * @param input the input stream that supplies an MS Word document for
41      * extraction
42      * @param output the writer that will accept the extracted text
43      * @param encoding ignored
44      * @throws PlainTextExtractorException throwed on exception raised during
45      * extracting
46      */

47     public void extract(InputStream JavaDoc input, Writer JavaDoc output, String JavaDoc encoding)
48             throws PlainTextExtractorException {
49         try {
50             WordExtractor extractor = new WordExtractor();
51             String JavaDoc str = extractor.extractText(input);
52             output.write(str);
53         } catch (Exception JavaDoc e) {
54             throw new PlainTextExtractorException(e);
55         }
56     }
57
58     /**
59      * @see com.blandware.atleap.common.parsers.SpecificPlainTextExtractor#getUsedEncoding()
60      */

61     public String JavaDoc getUsedEncoding() {
62         return null;
63     }
64 }
65
Popular Tags