KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > slide > extractor > MSWordExtractor


1 /*
2  * $Header: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSWordExtractor.java,v 1.1.2.1 2004/09/29 15:01:26 unico Exp $
3  * $Revision: 1.1.2.1 $
4  * $Date: 2004/09/29 15:01:26 $
5  *
6  * ====================================================================
7  *
8  * Copyright 2004 The Apache Software Foundation
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  * http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  *
22  */

23
24 package org.apache.slide.extractor;
25
26 /**
27  * Author: Ryan Rhodes
28  * Date: Jun 26, 2004
29  * Time: 12:34:29 AM
30  */

31
32 import java.io.*;
33
34 import org.textmining.text.extraction.WordExtractor;
35
36 public class MSWordExtractor extends AbstractContentExtractor {
37
38     public MSWordExtractor(String JavaDoc uri, String JavaDoc contentType, String JavaDoc namespace) {
39         super(uri, contentType, namespace);
40     }
41
42     public Reader extract(InputStream content) throws ExtractorException {
43         try {
44             WordExtractor extractor =
45                     new WordExtractor();
46             String JavaDoc text = extractor.extractText(content);
47
48             StringReader reader = new StringReader(text);
49             return reader;
50         }
51         catch(Exception JavaDoc e) {
52             throw new ExtractorException(e.getMessage());
53         }
54     }
55
56         public static void main(String JavaDoc[] args) throws Exception JavaDoc
57         {
58             FileInputStream in = new FileInputStream(args[0]);
59
60             MSWordExtractor ex = new MSWordExtractor(null, null, null);
61
62             Reader reader = ex.extract(in);
63
64             int c;
65             do
66             {
67                 c = reader.read();
68
69                 System.out.print((char)c);
70             }
71             while( c != -1 );
72         }
73 }
Popular Tags