KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > services > fileextraction > TextExtractor


1 package org.jahia.services.fileextraction;
2
3 import java.io.IOException JavaDoc;
4 import java.io.InputStream JavaDoc;
5 import java.io.InputStreamReader JavaDoc;
6
7 import org.jahia.utils.FileUtils;
8
9
10 /**
11  * <p>Title: HDF Text content extractor file</p>
12  * <p>Description: This classes uses Jakarta's POI library to extract content
13  * from HDF formatted files. Some of the code in this class is not used
14  * currently but might be needed to process other types of files based on
15  * the POI File system.</p>
16  * <p>Copyright: Copyright (c) 2003</p>
17  * <p>Company: Jahia Ltd</p>
18  * @author Serge Huber
19  * @version 1.0
20  */

21
22 public class TextExtractor implements FileExtractor {
23
24     public TextExtractor(){
25     }
26
27     /**
28     *
29     * @param path String
30     * @param lastModified long
31     * @param fileStream InputStream
32     * @throws Exception
33     * @return String
34     */

35    public ExtractedDocument getExtractedDocument(
36                                              String JavaDoc path,
37                                              long lastModified,
38                                              InputStream JavaDoc fileStream)
39    throws Exception JavaDoc{
40         return getExtractedDocument(path,lastModified,fileStream,null);
41    }
42
43     /**
44     *
45     * @param path String
46     * @param lastModified long
47     * @param fileStream InputStream
48     * @param charSet String
49     * @throws Exception
50     * @return String
51     */

52    public ExtractedDocument getExtractedDocument(
53                                              String JavaDoc path,
54                                              long lastModified,
55                                              InputStream JavaDoc fileStream,
56                                              String JavaDoc charSet)
57    throws Exception JavaDoc{
58        ExtractedDocumentImpl extDoc = new ExtractedDocumentImpl();
59        extDoc.setContent(this.getContentAsString(path,lastModified,fileStream,charSet));
60        return extDoc;
61    }
62     
63     /**
64      * This method returns all the text of an HDF file.
65      *
66      * @param path String
67      * @param lastModified long
68      * @param fileStream InputStream
69      * @throws IOException thrown if there was an error while parsing the
70      * file format, notably if the file is an RTF file instead of a HDF file.
71      * @return String
72      */

73     public String JavaDoc getContentAsString(String JavaDoc path, long lastModified,
74                                      InputStream JavaDoc fileStream)
75     throws IOException JavaDoc {
76         return getContentAsString(path, lastModified, fileStream, null);
77     }
78
79     /**
80      * This method returns all the text of an HDF file.
81      *
82      * @param path String
83      * @param lastModified long
84      * @param fileStream InputStream
85      * @param charSet String
86      * @throws IOException thrown if there was an error while parsing the
87      * file format, notably if the file is an RTF file instead of a HDF file.
88      * @return String
89      */

90     public String JavaDoc getContentAsString(String JavaDoc path, long lastModified,
91                                      InputStream JavaDoc fileStream, String JavaDoc charSet)
92     throws IOException JavaDoc {
93         InputStreamReader JavaDoc reader = null;
94         if ( charSet != null ){
95             reader = new InputStreamReader JavaDoc(fileStream, charSet);
96         } else {
97             reader = new InputStreamReader JavaDoc(fileStream);
98         }
99         return FileUtils.readerToString(reader);
100     }
101 }
102
Popular Tags