KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > textextraction > impl > MSExcelTextExtractor


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.textextraction.impl;
17
18 import java.io.InputStream JavaDoc;
19 import java.util.Iterator JavaDoc;
20
21 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
22 import org.apache.poi.hssf.usermodel.HSSFSheet;
23 import org.apache.poi.hssf.usermodel.HSSFRow;
24 import org.apache.poi.hssf.usermodel.HSSFCell;
25
26 /**
27  * Text extractor for Microsoft Excel files.
28  */

29 public class MSExcelTextExtractor implements MimetypeTextExtractor {
30     public String JavaDoc getText(InputStream JavaDoc is) throws Exception JavaDoc {
31         HSSFWorkbook excelWb = new HSSFWorkbook(is);
32         StringBuffer JavaDoc contentBuffer = new StringBuffer JavaDoc();
33         //contentBuffer.append("starting extraction\n");
34
int numberOfSheets = excelWb.getNumberOfSheets();
35         //contentBuffer.append("Number of sheets: " + numberOfSheets + "\n");
36
for(int i=0; i<numberOfSheets; i++) {
37             HSSFSheet sheet = excelWb.getSheetAt(i);
38             int numberOfRows = sheet.getPhysicalNumberOfRows();
39             if(numberOfRows > 0) {
40                 //contentBuffer.append("Number of rows: " + numberOfRows + "\n");
41
Iterator JavaDoc rowIt = sheet.rowIterator();
42                 while(rowIt.hasNext()) {
43                     HSSFRow row = (HSSFRow)rowIt.next();
44                     if(row != null) {
45                         Iterator JavaDoc it = row.cellIterator();
46                         while(it.hasNext()) {
47                             HSSFCell cell = (HSSFCell)it.next();
48                             switch(cell.getCellType()) {
49                                 case HSSFCell.CELL_TYPE_NUMERIC:
50                                     String JavaDoc num = Double.toString(cell.getNumericCellValue()).trim();
51                                     if(num.length() > 0)
52                                         contentBuffer.append(num + " ");
53                                     break;
54                                 case HSSFCell.CELL_TYPE_STRING:
55                                     try {
56                                         String JavaDoc text = cell.getStringCellValue().trim();
57                                         if(text.length() > 0)
58                                             contentBuffer.append(text + " ");
59                                     }
60                                     catch(Exception JavaDoc e) {
61                                     }
62                                     break;
63                                 default:
64                                     //might cause error !!!
65
try {
66                                         String JavaDoc otext = cell.getStringCellValue().trim();
67                                         if(otext.length() > 0)
68                                             contentBuffer.append(otext + " ");
69                                     }
70                                     catch(Exception JavaDoc e) {
71                                     }
72                                     break;
73                             }
74                         }
75                     }
76                 }
77             }
78         }
79         
80         return contentBuffer.toString();
81     }
82 }
83
Popular Tags