1 31 32 package org.opencms.search.extractors; 33 34 import org.opencms.i18n.CmsEncoder; 35 36 import java.io.InputStream ; 37 import java.util.Map ; 38 39 import org.apache.poi.poifs.eventfilesystem.POIFSReader; 40 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; 41 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; 42 import org.apache.poi.poifs.filesystem.DocumentInputStream; 43 import org.apache.poi.util.LittleEndian; 44 45 54 public final class CmsExtractorMsPowerPoint extends A_CmsTextExtractorMsOfficeBase implements POIFSReaderListener { 55 56 57 private StringBuffer m_buffer; 58 59 62 private CmsExtractorMsPowerPoint() { 63 64 m_buffer = new StringBuffer (4096); 65 } 66 67 72 public static I_CmsTextExtractor getExtractor() { 73 74 return new CmsExtractorMsPowerPoint(); 76 } 77 78 81 public I_CmsExtractionResult extractText(InputStream in, String encoding) throws Exception { 82 83 POIFSReader reader = new POIFSReader(); 84 reader.registerListener(this); 85 reader.read(in); 86 87 Map metaInfo = extractMetaInformation(); 89 String result = removeControlChars(m_buffer.toString()); 90 91 m_buffer = new StringBuffer (4096); 93 cleanup(); 94 95 return new CmsExtractionResult(result, metaInfo); 97 } 98 99 102 public void processPOIFSReaderEvent(POIFSReaderEvent event) { 103 104 try { 105 106 super.processPOIFSReaderEvent(event); 108 109 if (!event.getName().startsWith(POWERPOINT_EVENT_NAME)) { 111 return; 112 } 113 114 DocumentInputStream input = event.getStream(); 115 byte[] buffer = new byte[input.available()]; 116 input.read(buffer, 0, input.available()); 117 118 for (int i = 0; i < buffer.length - 20; i++) { 119 int type = LittleEndian.getUShort(buffer, i + 2); 120 int size = (int)LittleEndian.getUInt(buffer, i + 4) + 3; 121 122 String encoding = null; 123 switch (type) { 124 case PPT_TEXTBYTE_ATOM: 125 encoding = ENCODING_CP1252; 128 case PPT_TEXTCHAR_ATOM: 129 if (encoding == null) { 130 encoding = ENCODING_UTF16; 132 } 133 int start = i + 4 + 1; 134 int end = start + size; 135 136 byte[] buf = new byte[size]; 137 System.arraycopy(buffer, start, buf, 0, buf.length); 138 139 m_buffer.append(CmsEncoder.createString(buf, encoding)); 140 i = end; 141 default: 142 } 144 } 145 } catch (Exception e) { 146 } 148 } 149 } | Popular Tags |