KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > snapper > parsers > ExcelParser


1 package org.enhydra.snapper.parsers;
2
3 import org.enhydra.snapper.api.*;
4
5 import org.apache.poi.poifs.eventfilesystem.POIFSReader;
6 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
7 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
8 import org.apache.poi.poifs.filesystem.*;
9 import org.apache.poi.hpsf.PropertySetFactory;
10 import org.apache.poi.hpsf.SummaryInformation;
11 import org.apache.poi.hssf.usermodel.*;
12 import org.apache.poi.hssf.record.*;
13 import org.apache.poi.hssf.eventusermodel.*;
14
15 import java.io.*;
16
17 public class ExcelParser implements org.enhydra.snapper.api.Parser {
18       private String JavaDoc fileName;
19       private String JavaDoc parsedText;
20       private String JavaDoc title = "";
21       StringBuffer JavaDoc sb = new StringBuffer JavaDoc ();
22       
23       /**
24        * Using HSSF in POI library, this method will parse
25        * the Excel file, and extract all the values for
26        * every sheet, and combine them into a single
27        * String.
28        */

29       
30       public String JavaDoc parse(InputStream is) throws java.io.IOException JavaDoc{
31             // initializing the InputStream from a file using
32
// POIFSFileSystem, before converting the result
33
// into an HSSFWorkbook instance
34
HSSFWorkbook wb = null;
35             POIFSFileSystem fs = null;
36             POIFSReader r = null;
37             MyPOIFSReaderListener mypoi =null;
38             try {
39
40                 fs = new POIFSFileSystem (is);
41
42                 r = new POIFSReader();
43                 mypoi = new MyPOIFSReaderListener();
44                 r.registerListener(mypoi,
45                     "\005SummaryInformation");
46                 is.reset();
47                 r.read(is);
48                 title = mypoi.getTitle();
49                 
50                 // start appending the values
51

52             
53             // get the Workbook (excel part) stream in a InputStream
54
InputStream din = fs.createDocumentInputStream("Workbook");
55                 // construct out HSSFRequest object
56
HSSFRequest req = new HSSFRequest();
57                 // lazy listen for ALL records with the listener shown above
58
req.addListenerForAllRecords(new POIEvent());
59                 // create our event factory
60
HSSFEventFactory factory = new HSSFEventFactory();
61                 // process our events based on the document input stream
62
factory.processEvents(req, din);
63                 // once all the events are processed close our file input stream
64
is.close();
65                 
66                 // and our document input stream (don't want to leak these!)
67
din.close();
68                 parsedText = sb.toString().trim();
69
70
71             parsedText = TextFilter.filterForbiddenCharacters (parsedText, TextFilter.DEFAULT_FORBIDDEN_CHARACTERS);
72             factory = null;
73             req = null;
74             din = null;
75             r = null;
76                 mypoi = null;
77                 fs = null;
78                 wb = null;
79                 is = null;
80                 sb = null;
81                 return parsedText;
82             } catch (Throwable JavaDoc e) {
83                 try{
84                     is.close();
85                     }
86                 catch (Exception JavaDoc ex) {}
87                 sb = null;
88                 r = null;
89                 mypoi = null;
90                 fs = null;
91                 wb = null;
92                 is = null;
93                 try{
94                     ParserManager.logger.debug("***** File could not be parsed: " + fileName);
95                 } catch (Exception JavaDoc ex) {
96                     System.out.println("***** File could not be parsed: " + fileName);
97                     }
98                 return "";
99
100             }
101
102             
103             
104           
105       }
106       
107       public void parse () {
108         // initializing the InputStream from a file using
109
// POIFSFileSystem, before converting the result
110
// into an HSSFWorkbook instance
111
HSSFWorkbook wb = null;
112         InputStream din = null;
113         POIFSReader r = null;
114         POIFSFileSystem fs = null;
115         FileInputStream in = null;
116         FileInputStream in2 = null;
117         MyPOIFSReaderListener mypoi = null;
118         try {
119             in = new FileInputStream(fileName);
120             in2 = new FileInputStream(fileName);
121             fs = new POIFSFileSystem (in);
122             //wb = new HSSFWorkbook (fs);
123
/// summary info
124
r = new POIFSReader();
125             mypoi = new MyPOIFSReaderListener();
126             r.registerListener(mypoi,
127                 "\005SummaryInformation");
128             //in.reset();
129
r.read(in2);
130             title = mypoi.getTitle();
131             
132             // start appending the values
133

134         
135         // get the Workbook (excel part) stream in a InputStream
136
din = fs.createDocumentInputStream("Workbook");
137             // construct out HSSFRequest object
138
HSSFRequest req = new HSSFRequest();
139             // lazy listen for ALL records with the listener shown above
140
req.addListenerForAllRecords(new POIEvent());
141             // create our event factory
142
HSSFEventFactory factory = new HSSFEventFactory();
143             // process our events based on the document input stream
144
factory.processEvents(req, din);
145             // once all the events are processed close our file input stream
146
in.close();
147             in2.close();
148             // and our document input stream (don't want to leak these!)
149
din.close();
150             parsedText = sb.toString().trim();
151
152
153         parsedText = TextFilter.filterForbiddenCharacters (
154           parsedText,
155           TextFilter.DEFAULT_FORBIDDEN_CHARACTERS);
156             r = null;
157             mypoi = null;
158             fs = null;
159             in = null;
160             in2 = null;
161             sb = null;
162         } catch (Throwable JavaDoc e) {
163             r = null;
164             mypoi = null;
165             fs = null;
166             try{
167                 in.close();
168                 in2.close();}
169             catch (Exception JavaDoc ex) {}
170             in = null;
171             in2 = null;
172             sb = null;
173             sb = null;
174             try{
175                 ParserManager.logger.debug("***** File could not be parsed: " + fileName);
176             } catch (Exception JavaDoc ex) {
177                 System.out.println("***** File could not be parsed: " + fileName);
178                 }
179             return;
180
181         }
182
183         
184         }
185       
186       /**
187        * This is a helper method to retrieve the value of a
188        * cell regardles of its type, which will be converted
189        * into a String.
190        *
191        * @param cell
192        * @return
193        */

194       private String JavaDoc getCellValue (HSSFCell cell) {
195         if (cell == null) return null;
196
197         String JavaDoc result = null;
198
199         int cellType = cell.getCellType();
200         switch (cellType) {
201           case HSSFCell.CELL_TYPE_BLANK:
202             result = "";
203             break;
204           case HSSFCell.CELL_TYPE_BOOLEAN:
205             result = cell.getBooleanCellValue() ?
206               "true" : "false";
207             break;
208           case HSSFCell.CELL_TYPE_ERROR:
209             result = "ERROR: " + cell.getErrorCellValue();
210             break;
211           case HSSFCell.CELL_TYPE_FORMULA:
212             result = cell.getCellFormula();
213             break;
214           case HSSFCell.CELL_TYPE_NUMERIC:
215             HSSFCellStyle cellStyle = cell.getCellStyle();
216             short dataFormat = cellStyle.getDataFormat();
217
218             // assumption is made that dataFormat = 15,
219
// when cellType is HSSFCell.CELL_TYPE_NUMERIC
220
// is equal to a DATE format.
221
if (dataFormat == 15) {
222               result = cell.getDateCellValue().toString();
223             } else {
224               result = String.valueOf (
225                 cell.getNumericCellValue());
226             }
227
228             break;
229           case HSSFCell.CELL_TYPE_STRING:
230             result = cell.getStringCellValue();
231             break;
232           default: break;
233         }
234
235         return result;
236       }
237
238       public void setFileName(String JavaDoc fileName) {
239         this.fileName = fileName;
240       }
241
242       public String JavaDoc getParsedText() {
243         return parsedText;
244       }
245       
246       public String JavaDoc getTitle() {
247         return title;
248       }
249       
250       class MyPOIFSReaderListener implements POIFSReaderListener
251         {
252             String JavaDoc title;
253             public void processPOIFSReaderEvent(POIFSReaderEvent event)
254             {
255                 SummaryInformation si = null;
256                 try
257                 {
258                     si = (SummaryInformation)
259                     PropertySetFactory.create(event.getStream());
260                 }
261                 catch (Exception JavaDoc ex)
262                 {
263                     throw new RuntimeException JavaDoc
264                         ("Property set stream \"" + event.getPath() +
265                             event.getName() + "\": " + ex);
266                 }
267
268                 title = si.getTitle();
269
270                 
271             }
272             
273             public String JavaDoc getTitle(){
274                 return title;
275             }
276         }
277         
278         
279 public class POIEvent
280         implements HSSFListener
281 {
282     private SSTRecord sstrec;
283  
284     /**
285      * This method listens for incoming records and handles them as required.
286      * @param record The record that was found while reading.
287      */

288     public void processRecord(Record record)
289     {
290         switch (record.getSid())
291         {
292             // the BOFRecord can represent either the beginning of a sheet or the workbook
293
case BOFRecord.sid:
294                 BOFRecord bof = (BOFRecord) record;
295                 if (bof.getType() == bof.TYPE_WORKBOOK)
296                 {
297                 // System.out.println("Encountered workbook");
298
// assigned to the class level member
299
} else if (bof.getType() == bof.TYPE_WORKSHEET)
300                 {
301                   // System.out.println("Encountered sheet reference");
302
}
303                 break;
304             case BoundSheetRecord.sid:
305                 BoundSheetRecord bsr = (BoundSheetRecord) record;
306               // System.out.println("New sheet named: " + bsr.getSheetname());
307
sb.append(bsr.getSheetname());
308                 break;
309             case RowRecord.sid:
310                 RowRecord rowrec = (RowRecord) record;
311               // System.out.println("Row found, first column at "
312
// + rowrec.getFirstCol() + " last column at " + rowrec.getLastCol());
313
break;
314             case NumberRecord.sid:
315                 NumberRecord numrec = (NumberRecord) record;
316            // System.out.println("Cell found with value " + numrec.getValue()
317
// + " at row " + numrec.getRow() + " and column " + numrec.getColumn());
318
break;
319                 // SSTRecords store a array of unique strings used in Excel.
320
case SSTRecord.sid:
321                 sstrec = (SSTRecord) record;
322                 for (int k = 0; k < sstrec.getNumUniqueStrings(); k++)
323                 {
324                  sb.append(sstrec.getString(k));
325             // System.out.println("String table value " + k + " = " + sstrec.getString(k));
326
}
327                 break;
328             case LabelSSTRecord.sid:
329                 LabelSSTRecord lrec = (LabelSSTRecord) record;
330                 sb.append(sstrec.getString(lrec.getSSTIndex()));
331             // System.out.println("String cell found with value "
332
// + sstrec.getString(lrec.getSSTIndex()));
333
break;
334         }
335     }
336  
337 }
338       
339     }
Popular Tags