KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > outerj > daisy > textextraction > impl > MSPowerPointTextExtractor


1 /*
2  * Copyright 2004 Outerthought bvba and Schaubroeck nv
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.outerj.daisy.textextraction.impl;
17
18 import org.apache.poi.util.LittleEndian;
19 import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
20 import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
21 import org.apache.poi.poifs.eventfilesystem.POIFSReader;
22 import org.apache.poi.poifs.filesystem.DocumentInputStream;
23 import java.io.*;
24
25 /**
26  * Text extractor for Microsoft PowerPoint files.
27  */

28 public class MSPowerPointTextExtractor implements MimetypeTextExtractor, POIFSReaderListener {
29
30     //private StringBuffer contentBuffer = new StringBuffer();
31
private ByteArrayOutputStream writer = new ByteArrayOutputStream();
32
33     public String JavaDoc getText(InputStream is) throws Exception JavaDoc {
34            POIFSReader reader = new POIFSReader();
35            reader.registerListener(this);
36            reader.read(is);
37
38            return writer.toString();
39     }
40     
41     public void processPOIFSReaderEvent(POIFSReaderEvent event) {
42         try {
43             DocumentInputStream input = event.getStream();
44
45             byte[] buffer = new byte[input.available()];
46             input.read(buffer, 0, input.available());
47             
48             for(int i=0; i<buffer.length-20; i++) {
49                 long type = LittleEndian.getUShort(buffer,i+2);
50                 long size = LittleEndian.getUInt(buffer,i+4);
51                 
52                 if(type==4008) {
53                     writer.write(buffer, i + 4 + 1, (int)size +3);
54                     i = i + 4 + 1 + (int)size -1;
55                 }
56             }
57         }
58         catch (Exception JavaDoc e) {
59             
60         }
61     }
62 }
63
Popular Tags