KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > textextraction > impl > MSWordTextExtractor


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.textextraction.impl;
17
18 import java.io.InputStream JavaDoc;
19 import org.apache.poi.hwpf.HWPFDocument;
20 import org.apache.poi.hwpf.usermodel.Range;
21
22 /**
23  * Text extractor for Microsoft Word files.
24  */

25 public class MSWordTextExtractor implements MimetypeTextExtractor {
26     public String JavaDoc getText(InputStream JavaDoc is) throws Exception JavaDoc {
27         HWPFDocument wordDoc = new HWPFDocument(is);
28         Range range = wordDoc.getRange();
29         return range.text();
30     }
31 }
32
Popular Tags