KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > blandware > atleap > common > parsers > rtf > RTFPlainTextExtractor


1 /*
2  * Copyright 2005 Blandware (http://www.blandware.com)
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package com.blandware.atleap.common.parsers.rtf;
17
18 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException;
19 import com.blandware.atleap.common.parsers.SpecificPlainTextExtractor;
20 import com.blandware.atleap.common.Constants;
21
22 import java.io.InputStream JavaDoc;
23 import java.io.StringWriter JavaDoc;
24 import java.io.Writer JavaDoc;
25 import java.util.List JavaDoc;
26
27 /**
28  * An extractor that extracts a plain text from RTF documents.
29  *
30  * @see SpecificPlainTextExtractor
31  * @author Roman Puchkovskiy <a HREF="mailto:roman.puchkovskiy@blandware.com">
32  * &lt;roman.puchkovskiy@blandware.com&gt;</a>
33  * @version $Revision: 1.4 $ $Date: 2005/08/14 12:27:55 $
34  */

35 public class RTFPlainTextExtractor implements SpecificPlainTextExtractor,
36         RTFParserDelegate {
37     private StringWriter JavaDoc buffer = null;
38     private boolean inIgnorableDestination;
39     private int ignorableDestBraceLevel;
40     private int braceLevel;
41
42     public RTFPlainTextExtractor() {
43     }
44
45     /**
46      * Extracts a plain text from an RTF document.
47      *
48      * @param input the input stream that supplies an MS Excel document for
49      * extraction
50      * @param output the writer that will accept the extracted text
51      * @param encoding ignored
52      * @throws PlainTextExtractorException throwed on exception raised during
53      * extracting
54      */

55     public void extract(InputStream JavaDoc input, Writer JavaDoc output, String JavaDoc encoding)
56             throws PlainTextExtractorException {
57         // TODO: 'Special' symbols like '(c)', '--' and so on
58
braceLevel = 0;
59         inIgnorableDestination = false;
60         buffer = new StringWriter JavaDoc();
61         RTFParser parser = new RTFParser(input);
62
63         parser.setNewLine(Constants.EOL);
64         parser.setDelegate(this);
65         try {
66             parser.parse();
67             output.write(buffer.toString());
68         } catch (Exception JavaDoc e) {
69             throw new PlainTextExtractorException(e);
70         }
71     }
72
73     private void tryToWriteOutput(String JavaDoc str, int context) {
74         if (context == IN_DOCUMENT) {
75             if (!inIgnorableDestination) {
76                 if (buffer != null) {
77                     buffer.write(str);
78                 }
79             }
80         }
81     }
82
83     public void text(String JavaDoc text, String JavaDoc style, int context) {
84         tryToWriteOutput(text, context);
85     }
86
87     public void controlSymbol(String JavaDoc controlSymbol, int context) {
88         if (controlSymbol.equals("\\*")) {
89             // Handle ignorable destination: ignore it
90
if (inIgnorableDestination) {
91                 // Do nothing: just continue to ignore
92
} else {
93                 inIgnorableDestination = true;
94                 ignorableDestBraceLevel = braceLevel;
95             }
96         }
97     }
98
99     public void controlWord(String JavaDoc controlWord, int value, int context) {
100         if (controlWord.equals("\\cell")) {
101             tryToWriteOutput(" ", context);
102         } else if (controlWord.equals("\\row")) {
103             tryToWriteOutput(Constants.EOL, context);
104         } else if (controlWord.equals("\\object") || controlWord.equals("\\pict")) {
105             // Handle object and picture destinations: ignore them
106
if (inIgnorableDestination) {
107                 // Do nothing: just continue to ignore
108
} else {
109                 inIgnorableDestination = true;
110                 ignorableDestBraceLevel = braceLevel;
111             }
112         }
113     }
114
115     public void openGroup(int depth) {
116         braceLevel++;
117     }
118
119     public void closeGroup(int depth) {
120         braceLevel--;
121         if (inIgnorableDestination && braceLevel < ignorableDestBraceLevel) {
122             inIgnorableDestination = false;
123         }
124     }
125
126     public void styleList(List JavaDoc styles) {
127     }
128
129     public void startDocument() {
130     }
131
132     public void endDocument() {
133     }
134
135     /**
136      * @see com.blandware.atleap.common.parsers.SpecificPlainTextExtractor#getUsedEncoding()
137      */

138     public String JavaDoc getUsedEncoding() {
139         return null;
140     }
141 }
142
Popular Tags