KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > blandware > atleap > common > parsers > excel > ExcelPlainTextExtractor


1 /*
2  * Copyright 2005 Blandware (http://www.blandware.com)
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package com.blandware.atleap.common.parsers.excel;
17
18 import com.blandware.atleap.common.Constants;
19 import com.blandware.atleap.common.parsers.SpecificPlainTextExtractor;
20 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException;
21 import org.apache.poi.hssf.usermodel.HSSFCell;
22 import org.apache.poi.hssf.usermodel.HSSFRow;
23 import org.apache.poi.hssf.usermodel.HSSFSheet;
24 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
25 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
26
27 import java.io.IOException JavaDoc;
28 import java.io.InputStream JavaDoc;
29 import java.io.Writer JavaDoc;
30
31 /**
32  * An extractor that extracts a plain text from MS Excel workbooks.
33  *
34  * @see SpecificPlainTextExtractor
35  * @author Roman Puchkovskiy <a HREF="mailto:roman.puchkovskiy@blandware.com">
36  * &lt;roman.puchkovskiy@blandware.com&gt;</a>
37  * @version $Revision: 1.5 $ $Date: 2006/03/16 11:09:36 $
38  */

39 public class ExcelPlainTextExtractor implements SpecificPlainTextExtractor {
40     /**
41      * Constructs new ExcelPlainTextExtractor instance
42      */

43     public ExcelPlainTextExtractor() {
44     }
45
46     /**
47      * Extracts a plain text from an MS Excel document.
48      *
49      * @param input the input stream that supplies an MS Excel document for
50      * extraction
51      * @param output the writer that will accept the extracted text
52      * @param encoding ignored
53      * @throws com.blandware.atleap.common.parsers.exception.PlainTextExtractorException throwed on exception raised during
54      * extracting
55      */

56     public void extract(InputStream JavaDoc input, Writer JavaDoc output, String JavaDoc encoding)
57             throws PlainTextExtractorException {
58         try {
59             POIFSFileSystem fs = new POIFSFileSystem(input);
60             HSSFWorkbook workbook = new HSSFWorkbook(fs);
61             int numberOfSheets = workbook.getNumberOfSheets();
62
63             // Cycle through sheets
64
for (int i = 0; i < numberOfSheets; i++) {
65                 // Extract a sheet name, if exists
66
String JavaDoc sheetName = workbook.getSheetName(i);
67                 if (sheetName != null) {
68                     output.write(sheetName);
69                     output.write(Constants.EOL);
70                 }
71                 HSSFSheet sheet = workbook.getSheetAt(i);
72                 if (sheet != null) {
73                     int firstRowNumber = sheet.getFirstRowNum();
74                     int lastRowNumber = sheet.getLastRowNum();
75                     // Cycle through rows
76
for (int j = firstRowNumber; j <= lastRowNumber; j++) {
77                         HSSFRow row = sheet.getRow(j);
78                         if (row != null) {
79                             short firstCellNumber = row.getFirstCellNum();
80                             short lastCellNumber = row.getLastCellNum();
81                             // This variable is needed to prevent extra spaces
82
// at beginning of lines
83
boolean cellIsPut = false;
84                             // Cycle throgh cells
85
for (short k = firstCellNumber; k <= lastCellNumber; k++) {
86                                 HSSFCell cell = row.getCell(k);
87                                 if (cell != null) {
88                                     int type = cell.getCellType();
89                                     String JavaDoc value = null;
90                                     switch (type) {
91                                     case HSSFCell.CELL_TYPE_NUMERIC:
92                                         value = "" + cell.getNumericCellValue();
93                                         break;
94                                     case HSSFCell.CELL_TYPE_STRING:
95                                         value = cell.getStringCellValue();
96                                         break;
97                                     case HSSFCell.CELL_TYPE_FORMULA:
98                                         value = "" + cell.getNumericCellValue();
99                                         break;
100                                     case HSSFCell.CELL_TYPE_BLANK:
101                                         value = "";
102                                         break;
103                                     case HSSFCell.CELL_TYPE_BOOLEAN:
104                                         value = cell.getBooleanCellValue()
105                                                 ? "true" : "false";
106                                         break;
107                                     case HSSFCell.CELL_TYPE_ERROR:
108                                         value = "";
109                                         break;
110                                     default:
111                                         value = "";
112                                         break;
113                                     }
114                                     if (cellIsPut) {
115                                         output.write(' ');
116                                     } else {
117                                         cellIsPut = true;
118                                     }
119                                     output.write(value);
120                                 }
121                             } // loop through cells
122
output.write(Constants.EOL);
123                         }
124                     } // loop through rows
125
}
126             } // loop through sheets
127
} catch (IOException JavaDoc e) {
128             throw new PlainTextExtractorException(e);
129         }
130     }
131
132     /**
133      * @see com.blandware.atleap.common.parsers.SpecificPlainTextExtractor#getUsedEncoding()
134      */

135     public String JavaDoc getUsedEncoding() {
136         return null;
137     }
138 }
139
Popular Tags