KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > blandware > atleap > common > parsers > xml > XMLPlainTextExtractor


1 /*
2  * Copyright 2005 Blandware (http://www.blandware.com)
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package com.blandware.atleap.common.parsers.xml;
17
18 import com.blandware.atleap.common.parsers.SpecificPlainTextExtractor;
19 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException;
20
21 import javax.xml.parsers.SAXParser JavaDoc;
22 import javax.xml.parsers.SAXParserFactory JavaDoc;
23 import java.io.InputStream JavaDoc;
24 import java.io.Writer JavaDoc;
25
26 /**
27  * An extractor that extracts a plain text from XML documents.
28  *
29  * @see SpecificPlainTextExtractor
30  * @author Roman Puchkovskiy <a HREF="mailto:roman.puchkovskiy@blandware.com">
31  * &lt;roman.puchkovskiy@blandware.com&gt;</a>
32  * @version $Revision: 1.3 $ $Date: 2005/08/14 12:27:56 $
33  */

34 public class XMLPlainTextExtractor implements SpecificPlainTextExtractor {
35     public XMLPlainTextExtractor() {
36     }
37
38     /**
39      * Extracts a plain text from an XML document.
40      *
41      * @param input the input stream that supplies an XML document for
42      * extraction
43      * @param output the writer that will accept the extracted text
44      * @param encoding ignored
45      * @throws PlainTextExtractorException throwed on exception raised during
46      * extracting
47      */

48     public void extract(InputStream JavaDoc input, Writer JavaDoc output, String JavaDoc encoding)
49             throws PlainTextExtractorException {
50         try {
51             SAXParserFactory JavaDoc factory = SAXParserFactory.newInstance();
52             SAXParser JavaDoc parser = factory.newSAXParser();
53             parser.parse(input, new TextExtractingContentHandler(output));
54         } catch (Exception JavaDoc e) {
55             throw new PlainTextExtractorException(e);
56         }
57     }
58
59     /**
60      * @see com.blandware.atleap.common.parsers.SpecificPlainTextExtractor#getUsedEncoding()
61      */

62     public String JavaDoc getUsedEncoding() {
63         return null;
64     }
65 }
66
Popular Tags