1 16 package org.outerj.daisy.textextraction.impl; 17 18 import java.io.InputStream ; 19 import java.util.Iterator ; 20 21 import org.apache.poi.hssf.usermodel.HSSFWorkbook; 22 import org.apache.poi.hssf.usermodel.HSSFSheet; 23 import org.apache.poi.hssf.usermodel.HSSFRow; 24 import org.apache.poi.hssf.usermodel.HSSFCell; 25 26 29 public class MSExcelTextExtractor implements MimetypeTextExtractor { 30 public String getText(InputStream is) throws Exception { 31 HSSFWorkbook excelWb = new HSSFWorkbook(is); 32 StringBuffer contentBuffer = new StringBuffer (); 33 int numberOfSheets = excelWb.getNumberOfSheets(); 35 for(int i=0; i<numberOfSheets; i++) { 37 HSSFSheet sheet = excelWb.getSheetAt(i); 38 int numberOfRows = sheet.getPhysicalNumberOfRows(); 39 if(numberOfRows > 0) { 40 Iterator rowIt = sheet.rowIterator(); 42 while(rowIt.hasNext()) { 43 HSSFRow row = (HSSFRow)rowIt.next(); 44 if(row != null) { 45 Iterator it = row.cellIterator(); 46 while(it.hasNext()) { 47 HSSFCell cell = (HSSFCell)it.next(); 48 switch(cell.getCellType()) { 49 case HSSFCell.CELL_TYPE_NUMERIC: 50 String num = Double.toString(cell.getNumericCellValue()).trim(); 51 if(num.length() > 0) 52 contentBuffer.append(num + " "); 53 break; 54 case HSSFCell.CELL_TYPE_STRING: 55 try { 56 String text = cell.getStringCellValue().trim(); 57 if(text.length() > 0) 58 contentBuffer.append(text + " "); 59 } 60 catch(Exception e) { 61 } 62 break; 63 default: 64 try { 66 String otext = cell.getStringCellValue().trim(); 67 if(otext.length() > 0) 68 contentBuffer.append(otext + " "); 69 } 70 catch(Exception e) { 71 } 72 break; 73 } 74 } 75 } 76 } 77 } 78 } 79 80 return contentBuffer.toString(); 81 } 82 } 83 | Popular Tags |