KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > blandware > atleap > common > parsers > SpecificPlainTextExtractor


1 /*
2  * Copyright 2005 Blandware (http://www.blandware.com)
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package com.blandware.atleap.common.parsers;
17
18 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException;
19
20 import java.io.InputStream JavaDoc;
21 import java.io.Writer JavaDoc;
22
23 /**
24  * An interface for an extractor which will extract a plain text from the
25  * documents of a specific format.
26  *
27  * @author Roman Puchkovskiy <a HREF="mailto:roman.puchkovskiy@blandware.com">
28  * &lt;roman.puchkovskiy@blandware.com&gt;</a>
29  * @version $Revision: 1.4 $ $Date: 2005/08/14 12:27:54 $
30  */

31 public interface SpecificPlainTextExtractor {
32     /**
33      * Extracts a plain text from a document.
34      *
35      * @param input the input stream that supplies a document for extraction
36      * @param output the writer that will accept the extracted text
37      * @param encoding the encoding of the document in <code>input</code>.
38      * Extractor may ignore the <code>encoding</code> if it doesn't make sence
39      * in the corresponding format. Otherwise, if it is <code>null</code>, then
40      * the extractor will choose the encoding itself. If the
41      * <code>encoding</code> make sence and is not <code>null</code>, then the
42      * extractor should use it. If the extractor finds out the encoding from
43      * document by itself, it uses it and ignores given (or default) encoding.
44      * @throws com.blandware.atleap.common.parsers.exception.PlainTextExtractorException throwed on exception raised during
45      * extracting
46      */

47     public void extract(InputStream JavaDoc input, Writer JavaDoc output, String JavaDoc encoding)
48             throws PlainTextExtractorException;
49
50     /**
51      * <p>
52      * Returns encoding that was used for extracting. If encoding has no sense
53      * for particular document format or it's unknown for extractor, returns
54      * <code>null</code>.
55      * </p>
56      * <p>
57      * This method should be called after calling <code>extract</code>; before
58      * it this method may return anything.
59      * </p>
60      *
61      * @return encoding used or <code>null</code>
62      */

63     public String JavaDoc getUsedEncoding();
64 }
65
Popular Tags