KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > services > fileextraction > JahiaMSWordExtractor


1 package org.jahia.services.fileextraction;
2
3 import java.io.IOException JavaDoc;
4 import java.io.InputStream JavaDoc;
5 import java.io.Reader JavaDoc;
6
7 import org.apache.slide.extractor.MSWordExtractor;
8 import org.jahia.utils.FileUtils;
9
10
11 /**
12  * <p>Title: HDF Text content extractor file</p>
13  * <p>Description: This classes uses Jakarta's POI library to extract content
14  * from HDF formatted files. Some of the code in this class is not used
15  * currently but might be needed to process other types of files based on
16  * the POI File system.</p>
17  * <p>Copyright: Copyright (c) 2003</p>
18  * <p>Company: Jahia Ltd</p>
19  * @version 1.0
20  */

21
22 public class JahiaMSWordExtractor extends JahiaOfficeExtractor {
23
24     private static org.apache.log4j.Logger logger =
25         org.apache.log4j.Logger.getLogger (JahiaMSWordExtractor.class);
26
27     public JahiaMSWordExtractor(){
28     }
29    
30     /**
31      * This method returns all the text of an HDF file.
32      *
33      * @param path String
34      * @param lastModified long
35      * @param fileStream InputStream
36      * @throws IOException thrown if there was an error while parsing the
37      * file format, notably if the file is an RTF file instead of a HDF file.
38      * @return String
39      */

40     public String JavaDoc getContentAsString(String JavaDoc path, long lastModified,
41                                      InputStream JavaDoc fileStream)
42     throws IOException JavaDoc {
43         return getContentAsString(path, lastModified, fileStream, null);
44     }
45
46     /**
47      * This method returns all the text of an HDF file.
48      *
49      * @param path String
50      * @param lastModified long
51      * @param fileStream InputStream
52      * @param charSet String
53      * @throws IOException thrown if there was an error while parsing the file
54      * @return String
55      */

56     public String JavaDoc getContentAsString(String JavaDoc path, long lastModified,
57                                      InputStream JavaDoc fileStream, String JavaDoc charSet)
58     throws IOException JavaDoc {
59         MSWordExtractor ex = new MSWordExtractor("","","");
60         try {
61             Reader JavaDoc reader = ex.extract(fileStream);
62             return FileUtils.readerToString(reader);
63         } catch ( Throwable JavaDoc t ){
64             logger.debug("Exception extraction file " + path, t);
65             throw new IOException JavaDoc(t.getMessage());
66         }
67     }
68
69 }
70
Popular Tags