KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > services > fileextraction > JahiaOfficeExtractor


1 package org.jahia.services.fileextraction;
2
3 import java.io.ByteArrayInputStream JavaDoc;
4 import java.io.ByteArrayOutputStream JavaDoc;
5 import java.io.InputStream JavaDoc;
6 import java.util.HashMap JavaDoc;
7 import java.util.Iterator JavaDoc;
8 import java.util.Map JavaDoc;
9
10 import org.apache.commons.io.CopyUtils;
11 import org.apache.slide.common.PropertyName;
12 import org.apache.slide.extractor.OfficeExtractor;
13 import org.jahia.services.sites.JahiaSitesSlideService;
14
15
16 public abstract class JahiaOfficeExtractor implements FileExtractor {
17
18     private static org.apache.log4j.Logger logger =
19         org.apache.log4j.Logger.getLogger (JahiaOfficeExtractor.class);
20         
21
22    /**
23     *
24     * @param path String
25     * @param lastModified long
26     * @param fileStream InputStream
27     * @throws Exception
28     * @return String
29     */

30    public ExtractedDocument getExtractedDocument(
31                                              String JavaDoc path,
32                                              long lastModified,
33                                              InputStream JavaDoc fileStream)
34    throws Exception JavaDoc{
35         return this.getExtractedDocument(path,lastModified,fileStream,null);
36    }
37
38    /**
39    *
40    * @param path String
41    * @param lastModified long
42    * @param fileStream InputStream
43    * @param charSet String
44    * @throws Exception
45    * @return String
46    */

47    public ExtractedDocument getExtractedDocument(
48                                             String JavaDoc path,
49                                             long lastModified,
50                                             InputStream JavaDoc fileStream,
51                                             String JavaDoc charSet)
52    throws Exception JavaDoc{
53
54        //create a tmp output stream with the size of the content.
55
ByteArrayOutputStream JavaDoc out = new ByteArrayOutputStream JavaDoc();
56        CopyUtils.copy(fileStream, out);
57        out.flush();
58        byte[] contents = out.toByteArray();
59        ExtractedDocument extDoc = this.getPropertiesExtractedDocument(new ByteArrayInputStream JavaDoc(contents));
60        extDoc.setContent(this.getContentAsString(path,lastModified,new ByteArrayInputStream JavaDoc(contents),charSet));
61     return extDoc;
62    
63    }
64    
65     /**
66      *
67      * @param path String
68      * @param lastModified long
69      * @param fileStream InputStream
70      * @throws Exception
71      * @return String
72      */

73     public abstract String JavaDoc getContentAsString(String JavaDoc path,
74                                               long lastModified,
75                                               InputStream JavaDoc fileStream)
76     throws Exception JavaDoc;
77
78     /**
79      *
80      * @param path String
81      * @param lastModified long
82      * @param fileStream InputStream
83      * @return String
84      */

85     public abstract String JavaDoc getContentAsString(String JavaDoc path,
86                                               long lastModified,
87                                               InputStream JavaDoc fileStream,
88                                               String JavaDoc charSet) throws Exception JavaDoc;
89
90     public Map JavaDoc extract(InputStream JavaDoc content){
91         Map JavaDoc map = null;
92         try {
93             OfficeExtractor officeExtractor = new OfficeExtractor("","","");
94             officeExtractor.configure(((JahiaSitesSlideService)JahiaSitesSlideService.getInstance()).getConfiguration().getConfiguration("office-property-mapping"));
95             map = officeExtractor.extract(content);
96         } catch ( Throwable JavaDoc t ){
97             logger.debug("Exception extraction Office properties", t);
98         }
99         
100         if ( map == null ){
101             map = new HashMap JavaDoc();
102         }
103         return map;
104     }
105
106     /**
107     *
108     * @param path String
109     * @param lastModified long
110     * @param fileStream InputStream
111     * @throws Exception
112     * @return String
113     */

114     public ExtractedDocument getPropertiesExtractedDocument(InputStream JavaDoc fileStream)
115     throws Exception JavaDoc{
116         ExtractedDocumentImpl extDoc = new ExtractedDocumentImpl();
117         Map JavaDoc properties = this.extract(fileStream);
118         Iterator JavaDoc iterator = properties.keySet().iterator();
119         Object JavaDoc objKey = null;
120         String JavaDoc key = null;
121         PropertyName propName = null;
122         while ( iterator.hasNext() ){
123             key = null;
124             objKey = iterator.next();
125             if ( objKey instanceof String JavaDoc ){
126                 key = (String JavaDoc)objKey;
127                 extDoc.setProperty(key,properties.get(key));
128             } else {
129                 propName = (PropertyName)objKey;
130                 // unless we use all slide extractors( PDF too, we actually ignore the name space prefix and use the Jahia field props prefix )
131
//key = propName.toString();
132
//extDoc.setProperty(key,properties.get(propName));
133
extDoc.setProperty(propName.getName(),properties.get(propName));
134             }
135             //logger.debug("Found Office property : " + key + ", val = " + val);
136
}
137         return extDoc;
138     }
139     
140 }
141
Popular Tags