1 31 32 package org.opencms.search.extractors; 33 34 import org.opencms.util.CmsStringUtil; 35 36 import java.io.IOException ; 37 import java.io.InputStream ; 38 import java.util.Iterator ; 39 import java.util.Map ; 40 41 import org.apache.poi.hssf.usermodel.HSSFCell; 42 import org.apache.poi.hssf.usermodel.HSSFRow; 43 import org.apache.poi.hssf.usermodel.HSSFSheet; 44 import org.apache.poi.hssf.usermodel.HSSFWorkbook; 45 import org.apache.poi.poifs.eventfilesystem.POIFSReader; 46 47 56 public final class CmsExtractorMsExcel extends A_CmsTextExtractorMsOfficeBase { 57 58 59 private static final CmsExtractorMsExcel INSTANCE = new CmsExtractorMsExcel(); 60 61 64 private CmsExtractorMsExcel() { 65 66 } 68 69 74 public static I_CmsTextExtractor getExtractor() { 75 76 return INSTANCE; 77 } 78 79 82 public I_CmsExtractionResult extractText(InputStream in, String encoding) throws Exception { 83 84 String result = extractTableContent(getStreamCopy(in)); 86 result = removeControlChars(result); 87 88 POIFSReader reader = new POIFSReader(); 90 reader.registerListener(this); 91 reader.read(getStreamCopy(in)); 92 Map metaInfo = extractMetaInformation(); 93 94 return new CmsExtractionResult(result, metaInfo); 96 } 97 98 105 protected String extractTableContent(InputStream in) throws IOException { 106 107 HSSFWorkbook excelWb = new HSSFWorkbook(in); 108 StringBuffer result = new StringBuffer (4096); 109 110 int numberOfSheets = excelWb.getNumberOfSheets(); 111 112 for (int i = 0; i < numberOfSheets; i++) { 113 HSSFSheet sheet = excelWb.getSheetAt(i); 114 int numberOfRows = sheet.getPhysicalNumberOfRows(); 115 if (numberOfRows > 0) { 116 117 if (CmsStringUtil.isNotEmpty(excelWb.getSheetName(i))) { 118 if (i > 0) { 120 result.append("\n\n"); 121 } 122 result.append(excelWb.getSheetName(i).trim()); 123 result.append(":\n\n"); 124 } 125 126 Iterator rowIt = sheet.rowIterator(); 127 while (rowIt.hasNext()) { 128 HSSFRow row = (HSSFRow)rowIt.next(); 129 if (row != null) { 130 boolean hasContent = false; 131 Iterator it = row.cellIterator(); 132 while (it.hasNext()) { 133 HSSFCell cell = (HSSFCell)it.next(); 134 String text = null; 135 try { 136 switch (cell.getCellType()) { 137 case HSSFCell.CELL_TYPE_BLANK: 138 case HSSFCell.CELL_TYPE_ERROR: 139 break; 141 case HSSFCell.CELL_TYPE_NUMERIC: 142 text = Double.toString(cell.getNumericCellValue()); 143 break; 144 case HSSFCell.CELL_TYPE_BOOLEAN: 145 text = Boolean.toString(cell.getBooleanCellValue()); 146 break; 147 case HSSFCell.CELL_TYPE_STRING: 148 default: 149 text = cell.getStringCellValue(); 150 break; 151 } 152 } catch (Exception e) { 153 } 155 if (CmsStringUtil.isNotEmpty(text)) { 156 result.append(text.trim()); 157 result.append(' '); 158 hasContent = true; 159 } 160 } 161 if (hasContent) { 162 result.append('\n'); 164 } 165 } 166 } 167 } 168 } 169 170 return result.toString(); 171 } 172 173 } | Popular Tags |