KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > textextraction > impl > XmlTextExtractor


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.textextraction.impl;
17
18 import org.xml.sax.helpers.DefaultHandler JavaDoc;
19 import org.xml.sax.SAXException JavaDoc;
20
21 import javax.xml.parsers.SAXParserFactory JavaDoc;
22 import javax.xml.parsers.SAXParser JavaDoc;
23 import java.io.InputStream JavaDoc;
24
25 /**
26  * Extracts all text between tags in an XML document. Only works (of course)
27  * for well formed XML documents.
28  *
29  */

30 public class XmlTextExtractor implements MimetypeTextExtractor {
31     public String JavaDoc getText(InputStream JavaDoc is) throws Exception JavaDoc {
32         SAXParserFactory JavaDoc factory = SAXParserFactory.newInstance();
33         SAXParser JavaDoc parser = factory.newSAXParser();
34         MyHandler handler = new MyHandler();
35         parser.parse(is, handler);
36         return handler.getText();
37     }
38
39     private static class MyHandler extends DefaultHandler JavaDoc {
40         StringBuffer JavaDoc text = new StringBuffer JavaDoc();
41
42         public void characters(char ch[], int start, int length) throws SAXException JavaDoc {
43             text.append(ch, start, length);
44         }
45
46         public String JavaDoc getText() {
47             return text.toString();
48         }
49     }
50 }
51
Popular Tags