KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > blandware > atleap > common > parsers > txt > TXTPlainTextExtractor


1 /*
2  * Copyright 2005 Blandware (http://www.blandware.com)
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package com.blandware.atleap.common.parsers.txt;
17
18 import com.blandware.atleap.common.Constants;
19 import com.blandware.atleap.common.parsers.SpecificPlainTextExtractor;
20 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException;
21
22 import java.io.*;
23
24
25 /**
26  * An extractor that 'extracts' a plain text from a plain text. Created for
27  * consistency.
28  *
29  * @author Roman Puchkovskiy <a HREF="mailto:roman.puchkovskiy@blandware.com">
30  * &lt;roman.puchkovskiy@blandware.com&gt;</a>
31  * @version $Revision: 1.3 $ $Date: 2005/08/14 12:27:55 $
32  */

33 public class TXTPlainTextExtractor implements SpecificPlainTextExtractor {
34     private static final int BUFFER_SIZE = 4096;
35     private char[] buffer = new char[BUFFER_SIZE];
36
37     protected String JavaDoc usedEncoding = null;
38
39     public TXTPlainTextExtractor() {
40     }
41
42     /**
43      * 'Extracts' a plain text from a plain text. Actually just converts bytes to
44      * chars.
45      *
46      * @param input the input stream that supplies a plain text for extraction
47      * @param output the writer that will accept the extracted text
48      * @param encoding If specified, extractor assumes that the input text has
49      * this <code>encoding</code>. If <code>null</code>, the
50      * default encoding is used (currently <code>Constants.DEFAULT_ENCODING</code>).
51      * @throws PlainTextExtractorException throwed on exception raised during
52      * extracting
53      */

54     public void extract(InputStream input, Writer output, String JavaDoc encoding)
55             throws PlainTextExtractorException {
56         int readChars;
57
58         try {
59             if (encoding == null || encoding.trim().length() == 0) {
60                 encoding = Constants.DEFAULT_ENCODING;
61             }
62             usedEncoding = encoding;
63             Reader reader = new BufferedReader(new InputStreamReader(input, encoding));
64             while (true) {
65                 readChars = reader.read(buffer);
66                 if (readChars <= 0) {
67                     break;
68                 }
69                 output.write(buffer, 0, readChars);
70             }
71         } catch (IOException e) {
72             throw new PlainTextExtractorException(e);
73         }
74     }
75
76     /**
77      * @see com.blandware.atleap.common.parsers.SpecificPlainTextExtractor#getUsedEncoding()
78      */

79     public String JavaDoc getUsedEncoding() {
80         return usedEncoding;
81     }
82 }
83
Popular Tags