1 23 24 package org.apache.slide.extractor; 25 26 import org.apache.poi.util.LittleEndian; 27 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener; 28 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent; 29 import org.apache.poi.poifs.eventfilesystem.POIFSReader; 30 import org.apache.poi.poifs.filesystem.DocumentInputStream; 31 32 import java.io.*; 33 34 39 public class MSPowerPointExtractor extends AbstractContentExtractor implements POIFSReaderListener 40 { 41 private ByteArrayOutputStream writer = new ByteArrayOutputStream(); 42 43 public MSPowerPointExtractor(String uri, String contentType, String namespace) { 44 super(uri, contentType, namespace); 45 } 46 47 public Reader extract(InputStream content) throws ExtractorException { 48 try { 49 POIFSReader reader = new POIFSReader(); 50 reader.registerListener(this); 51 reader.read(content); 52 53 return new InputStreamReader(new ByteArrayInputStream(writer.toByteArray())); 54 } 55 catch(Exception e) { 56 throw new ExtractorException(e.getMessage()); 57 } 58 } 59 60 public void processPOIFSReaderEvent(POIFSReaderEvent event) 61 { 62 try{ 63 if(!event.getName().equalsIgnoreCase("PowerPoint Document")) 64 return; 65 66 DocumentInputStream input = event.getStream(); 67 68 byte[] buffer = new byte[input.available()]; 69 input.read(buffer, 0, input.available()); 70 71 for(int i=0; i<buffer.length-20; i++) 72 { 73 long type = LittleEndian.getUShort(buffer,i+2); 74 long size = LittleEndian.getUInt(buffer,i+4); 75 76 if(type==4008) 77 { 78 writer.write(buffer, i + 4 + 1, (int) size +3); 79 i = i + 4 + 1 + (int) size - 1; 80 81 } 82 } 83 } 84 catch (Exception e) 85 { 86 87 } 88 } 89 90 public static void main(String [] args) throws Exception 91 { 92 FileInputStream in = new FileInputStream(args[0]); 93 94 MSPowerPointExtractor ex = new MSPowerPointExtractor(null, null, null); 95 96 Reader reader = ex.extract(in); 97 98 int c; 99 do 100 { 101 c = reader.read(); 102 103 System.out.print((char)c); 104 } 105 while( c != -1 ); 106 } 107 } | Popular Tags |