KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > blandware > atleap > common > parsers > ppt > PowerPointPlainTextExtractor


1 /*
2  * Copyright 2005 Blandware (http://www.blandware.com)
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package com.blandware.atleap.common.parsers.ppt;
17
18 import com.blandware.atleap.common.parsers.exception.PlainTextExtractorException;
19 import com.blandware.atleap.common.parsers.SpecificPlainTextExtractor;
20 import com.blandware.atleap.common.Constants;
21 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
22 import org.apache.poi.util.LittleEndian;
23
24 import java.io.IOException JavaDoc;
25 import java.io.InputStream JavaDoc;
26 import java.io.Writer JavaDoc;
27 import java.util.HashMap JavaDoc;
28 import java.util.Map JavaDoc;
29
30 /**
31  * An extractor that extracts a plain text from MS PowerPoint presentations.
32  * Currently PowerPoint'97 and higher supported.
33  *
34  * @see SpecificPlainTextExtractor
35  * @author Roman Puchkovskiy <a HREF="mailto:roman.puchkovskiy@blandware.com">
36  * &lt;roman.puchkovskiy@blandware.com&gt;</a>
37  * @version $Revision: 1.3 $ $Date: 2005/08/14 12:27:55 $
38  */

39 public class PowerPointPlainTextExtractor
40         implements SpecificPlainTextExtractor {
41     /*
42      * PowerPoint'97 presentation is stored in "PowerPoint Document" stream.
43      * It is a sequence of records of the following format:
44      * + unsigned short - code; a record is a container if and
45      * only if ((code & 0xF) == 0xF)
46      * + unsigned short - type of the record
47      * + unsigned int - size of the record
48      * + size bytes - content of the record
49      * If the record is a container then its content is a similar sequence of
50      * records. Thus the presentation data is a tree, which leaves are called
51      * 'atoms'.
52      * This class uses some sort of a finite automation, where 'symbols' are
53      * types of records. When a parser goes down the document tree, it
54      * 'feeds' these types to the 'automation' so its state changes. When a
55      * parser reaches an atom, it triggers some action.
56     */

57
58     private static final int RECORD_HEADER_LEN = 8;
59     private static final int MAGIC = -476987297;
60
61     // Pseudo record type to handle the end of a container
62
private static final int SYMBOL_CONTAINER_END = -1;
63
64     // States of an 'automation'
65
private static final int STATE_NONE = -1;
66     private static final int STATE_SLIDE = 3;
67     private static final int STATE_SLIDE_PPDRAWING = 4;
68     private static final int STATE_SLIDE_PPDRAWING_61442 = 5;
69     private static final int STATE_SLIDE_PPDRAWING_61442_61443 = 6;
70     private static final int STATE_SLIDE_PPDRAWING_61442_61443_61444 = 7;
71     private static final int STATE_SLIDE_PPDRAWING_61442_61443_61444_61453 = 8;
72     private static final int STATE_NOTES = 9;
73     private static final int STATE_NOTES_PPDRAWING = 10;
74     private static final int STATE_NOTES_PPDRAWING_61442 = 11;
75     private static final int STATE_NOTES_PPDRAWING_61442_61443 = 12;
76     private static final int STATE_NOTES_PPDRAWING_61442_61443_61444 = 13;
77     private static final int STATE_NOTES_PPDRAWING_61442_61443_61444_61453 = 14;
78
79     private static final int STATE_DOC_BEGIN = 15;
80     private static final int STATE_HEADER_FOOTER = 16;
81     private static final int STATE_SLIDE_LIST_WITH_TEXT = 17;
82
83     private static final int STATE_MAIN_MASTER = 40;
84
85     // 'Automation' actions - triggered on atoms
86
private static final int ACTION_HANDLE_U16 = 0;
87     private static final int ACTION_HANDLE_ASCII = 1;
88     private static final int ACTION_HANDLE_FORMATTED = 2;
89     private static final int ACTION_HANDLE_HEADER_FOOTER = 3;
90     private static final int ACTION_HANDLE_SLIDE_PERSIST_ATOM = 4;
91     private static final int ACTION_HANDLE_PERSIST_TEXT_ASCII = 5;
92     private static final int ACTION_HANDLE_PERSIST_TEXT_U16 = 6;
93     private static final int ACTION_HANDLE_SLIDE_LIST_END = 7;
94
95     // Record types
96
private static final int RECORD_TYPE_DOCUMENT = 1000;
97     private static final int RECORD_TYPE_SLIDE = 1006;
98     private static final int RECORD_TYPE_NOTES = 1008;
99     private static final int RECORD_TYPE_SLIDE_PERSIST_ATOM = 1011;
100     private static final int RECORD_TYPE_MAIN_MASTER = 1016;
101     private static final int RECORD_TYPE_PPDRAWING = 1036;
102     private static final int RECORD_TYPE_TEXT_CHARS_ATOM = 4000;
103     private static final int RECORD_TYPE_TEXT_BYTES_ATOM = 4008;
104     private static final int RECORD_TYPE_CSTRING = 4026;
105     private static final int RECORD_TYPE_HEADERS_FOOTERS = 4057;
106     private static final int RECORD_TYPE_SLIDE_LIST_WITH_TEXT = 4080;
107
108     private Writer JavaDoc writer;
109     // Stores a transition function for an "automation"
110
private static Map JavaDoc map;
111     // Reference to current slide
112
private int curSlideRef;
113     // Texts left in current slide
114
private int textsLeft;
115     private long[] persistentDirectories;
116     private boolean inMasterSlideList;
117     // If we didn't parse any slide yet in current list
118
private boolean firstSlide;
119
120     static {
121         map = new HashMap JavaDoc();
122
123         // In the beginning of the document
124
map.put(new TransitionFunctionArg(STATE_DOC_BEGIN, RECORD_TYPE_HEADERS_FOOTERS),
125                 new TransitionFunctionValue(STATE_HEADER_FOOTER, 0));
126         map.put(new TransitionFunctionArg(STATE_DOC_BEGIN, RECORD_TYPE_SLIDE_LIST_WITH_TEXT),
127                 new TransitionFunctionValue(STATE_SLIDE_LIST_WITH_TEXT, 0));
128
129         // In slide list with text
130
map.put(new TransitionFunctionArg(STATE_SLIDE_LIST_WITH_TEXT, RECORD_TYPE_SLIDE_PERSIST_ATOM),
131                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_SLIDE_PERSIST_ATOM));
132         map.put(new TransitionFunctionArg(STATE_SLIDE_LIST_WITH_TEXT, RECORD_TYPE_TEXT_BYTES_ATOM),
133                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_PERSIST_TEXT_ASCII));
134         map.put(new TransitionFunctionArg(STATE_SLIDE_LIST_WITH_TEXT, RECORD_TYPE_TEXT_CHARS_ATOM),
135                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_PERSIST_TEXT_U16));
136         map.put(new TransitionFunctionArg(STATE_SLIDE_LIST_WITH_TEXT, SYMBOL_CONTAINER_END),
137                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_SLIDE_LIST_END));
138
139         // In main master
140
map.put(new TransitionFunctionArg(STATE_MAIN_MASTER, RECORD_TYPE_HEADERS_FOOTERS),
141                 new TransitionFunctionValue(STATE_HEADER_FOOTER, 0));
142
143         // In slide
144
map.put(new TransitionFunctionArg(STATE_SLIDE, RECORD_TYPE_HEADERS_FOOTERS),
145                 new TransitionFunctionValue(STATE_HEADER_FOOTER, 0));
146         map.put(new TransitionFunctionArg(STATE_SLIDE, RECORD_TYPE_CSTRING),
147                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_U16));
148         map.put(new TransitionFunctionArg(STATE_SLIDE, RECORD_TYPE_PPDRAWING),
149                 new TransitionFunctionValue(STATE_SLIDE_PPDRAWING, 0));
150         map.put(new TransitionFunctionArg(STATE_SLIDE_PPDRAWING, 61442),
151                 new TransitionFunctionValue(STATE_SLIDE_PPDRAWING_61442, 0));
152         map.put(new TransitionFunctionArg(STATE_SLIDE_PPDRAWING_61442, 61443),
153                 new TransitionFunctionValue(STATE_SLIDE_PPDRAWING_61442_61443, 0));
154         map.put(new TransitionFunctionArg(STATE_SLIDE_PPDRAWING_61442_61443, 61443),
155                 new TransitionFunctionValue(STATE_SLIDE_PPDRAWING_61442_61443, 0));
156         map.put(new TransitionFunctionArg(STATE_SLIDE_PPDRAWING_61442_61443, 61444),
157                 new TransitionFunctionValue(STATE_SLIDE_PPDRAWING_61442_61443_61444, 0));
158         map.put(new TransitionFunctionArg(STATE_SLIDE_PPDRAWING_61442_61443_61444, 61453),
159                 new TransitionFunctionValue(STATE_SLIDE_PPDRAWING_61442_61443_61444_61453, 0));
160         map.put(new TransitionFunctionArg(STATE_SLIDE_PPDRAWING_61442_61443_61444_61453, RECORD_TYPE_TEXT_CHARS_ATOM),
161                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_U16));
162         map.put(new TransitionFunctionArg(STATE_SLIDE_PPDRAWING_61442_61443_61444_61453, RECORD_TYPE_TEXT_BYTES_ATOM),
163                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_ASCII));
164
165         // In notes
166
map.put(new TransitionFunctionArg(STATE_NOTES, RECORD_TYPE_CSTRING),
167                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_U16));
168         map.put(new TransitionFunctionArg(STATE_NOTES, RECORD_TYPE_PPDRAWING),
169                 new TransitionFunctionValue(STATE_NOTES_PPDRAWING, 0));
170         map.put(new TransitionFunctionArg(STATE_NOTES_PPDRAWING, 61442),
171                 new TransitionFunctionValue(STATE_NOTES_PPDRAWING_61442, 0));
172         map.put(new TransitionFunctionArg(STATE_NOTES_PPDRAWING_61442, 61443),
173                 new TransitionFunctionValue(STATE_NOTES_PPDRAWING_61442_61443, 0));
174         map.put(new TransitionFunctionArg(STATE_NOTES_PPDRAWING_61442_61443, 61443),
175                 new TransitionFunctionValue(STATE_NOTES_PPDRAWING_61442_61443, 0));
176         map.put(new TransitionFunctionArg(STATE_NOTES_PPDRAWING_61442_61443, 61444),
177                 new TransitionFunctionValue(STATE_NOTES_PPDRAWING_61442_61443_61444, 0));
178         map.put(new TransitionFunctionArg(STATE_NOTES_PPDRAWING_61442_61443_61444, 61453),
179                 new TransitionFunctionValue(STATE_NOTES_PPDRAWING_61442_61443_61444_61453, 0));
180         map.put(new TransitionFunctionArg(STATE_NOTES_PPDRAWING_61442_61443_61444_61453, RECORD_TYPE_TEXT_CHARS_ATOM),
181                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_U16));
182         map.put(new TransitionFunctionArg(STATE_NOTES_PPDRAWING_61442_61443_61444_61453, RECORD_TYPE_TEXT_BYTES_ATOM),
183                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_ASCII));
184
185         // In header-footer
186
map.put(new TransitionFunctionArg(STATE_HEADER_FOOTER, RECORD_TYPE_CSTRING),
187                 new TransitionFunctionValue(STATE_NONE, ACTION_HANDLE_HEADER_FOOTER));
188     }
189
190     public PowerPointPlainTextExtractor() {
191         writer = null;
192     }
193
194     /**
195      * Extracts a plain text from an MS PowerPoint presentation.
196      *
197      * @param input the input stream that supplies an MS PowerPoint presentation
198      * for extraction
199      * @param output the writer that will accept the extracted text
200      * @param encoding ignored
201      * @throws PlainTextExtractorException throwed on exception raised during
202      * extracting
203      */

204     public void extract(InputStream JavaDoc input, Writer JavaDoc output, String JavaDoc encoding)
205             throws PlainTextExtractorException {
206         writer = output;
207
208         try {
209             POIFSFileSystem fs = new POIFSFileSystem(input);
210             // First obtain a UserEdit record
211
InputStream JavaDoc currentUserStream = fs.createDocumentInputStream("Current User");
212             byte[] currentUserBytes = new byte[currentUserStream.available()];
213             currentUserStream.read(currentUserBytes);
214             currentUserStream.close();
215             int magic = LittleEndian.getInt(currentUserBytes, 12);
216             if (magic != MAGIC) {
217                 throw new PlainTextExtractorException("That's not a supported PowerPoint Document");
218             } else {
219                 int lastEdit = (int) LittleEndian.getUInt(currentUserBytes, 16);
220                 InputStream JavaDoc documentStream = fs.createDocumentInputStream("PowerPoint Document");
221                 byte[] documentBytes = new byte[documentStream.available()];
222                 documentStream.read(documentBytes);
223                 documentStream.close();
224
225                 // Map persistent references to document stream offsets
226
persistentDirectories = buildPersistentDirectories(documentBytes, lastEdit);
227
228                 // Obtain a Document record
229
int refToDoc = (int) LittleEndian.getUInt(documentBytes, lastEdit + RECORD_HEADER_LEN + 16);
230                 if (persistentDirectories[refToDoc] != -1) {
231                     int offsetToDoc = (int) persistentDirectories[refToDoc];
232                     int docLen = (int) LittleEndian.getUInt(documentBytes, offsetToDoc + 4);
233                     int docType = LittleEndian.getUShort(documentBytes, offsetToDoc + 2);
234                     if (docType != RECORD_TYPE_DOCUMENT) {
235                         throw new PlainTextExtractorException("PPT parser: Document container expected");
236                     } else {
237                         inMasterSlideList = false;
238                         firstSlide = true;
239                         // Parse document
240
decode(documentBytes, offsetToDoc + RECORD_HEADER_LEN,
241                                docLen, STATE_DOC_BEGIN);
242                     }
243                 }
244             }
245         } catch (IOException JavaDoc e) {
246             throw new PlainTextExtractorException(e);
247         }
248     }
249
250     /**
251      * Extracts persistent directories which show what records are 'up to date'
252      * -- those, that contain last edits.
253      *
254      * @param documentBytes bytes that are extracted from the
255      * 'Powerpoint Document' stream
256      * @param lastEdit the offset of the last <code>UserEdit</code> record
257      * @return array of long with indices from 0 to
258      * <code>maxPersistWritten</code> where values are -1 if that reference
259      * is not exist, and offset of the corresponding record otherwise.
260      * Reference 0 is invalid so the value at index 0 is always -1.
261      */

262     private long[] buildPersistentDirectories(byte[] documentBytes,
263                                               int lastEdit) {
264         // Allocate an array of sufficient size
265
int maxPersistWritten = (int) LittleEndian.getUInt(documentBytes, lastEdit + RECORD_HEADER_LEN + 20);
266         long[] result = new long[maxPersistWritten + 1];
267         for (int i = 0; i <= maxPersistWritten; i++) {
268             result[i] = -1;
269         }
270
271         do {
272             int curDirs = (int) LittleEndian.getUInt(documentBytes, lastEdit + RECORD_HEADER_LEN + 12);
273             addPersistantDirectories(result, documentBytes, curDirs);
274             // Get previous UserEdit record
275
lastEdit = (int) LittleEndian.getUInt(documentBytes, lastEdit + RECORD_HEADER_LEN + 8);
276         } while (lastEdit != 0);
277
278         return result;
279     }
280
281     /**
282      * Processes directories for a particular 'edit'. This method is called
283      * from last to first 'edits' so it only adds new directories and doesn't
284      * modify the existing ones.
285      *
286      * @param dirs existing directories (extracted from later 'edits')
287      * @param documentBytes bytes that are extracted from the
288      * 'Powerpoint Document' stream
289      * @param curDirs the offset of the <code>PersistPtrIncrementalBlock</code>
290      * that will supply directories
291      */

292     private void addPersistantDirectories(long[] dirs, byte[] documentBytes,
293                                           int curDirs) {
294         long size = LittleEndian.getUInt(documentBytes, curDirs + 4);
295         int pointer = 0;
296         while (pointer < size) {
297             long firstField = LittleEndian.getUInt(documentBytes, curDirs + RECORD_HEADER_LEN + pointer);
298             pointer += 4;
299             // Number of references in the chunk
300
int refNum = (int) (firstField >> 20);
301             // Starting index for the chunk
302
int refStart = (int) (firstField & 0xFFFFF);
303             for (int i = 0; (i < refNum) && (pointer < size); i++) {
304                 if (dirs[refStart + i] == -1) {
305                     dirs[refStart + i] = LittleEndian.getUInt(documentBytes, curDirs + RECORD_HEADER_LEN + pointer);
306                 }
307                 pointer += 4;
308             }
309         }
310     }
311
312     /**
313      * Processes a sequence of records. For each record get its <code>type</code>
314      * and, if 'automation' knows, what to do with that record, process it,
315      * otherwise bypass the record.
316      *
317      * @param buffer byte array which contains bytes representing records
318      * @param begin the beginning of first record in the <code>buffer</code>
319      * @param len length of records to process
320      * @param state current 'automation' state
321      * @throws IOException
322      */

323     private void decode(byte[] buffer, int begin, int len, int state)
324             throws IOException JavaDoc {
325         // TODO: Support for PowerPoint 95 (and older?) presentations
326
if ((state == STATE_SLIDE_PPDRAWING || state == STATE_NOTES_PPDRAWING) && inMasterSlideList) {
327             // Ignore content of masters
328
return;
329         }
330
331         int pointer = 0;
332         TransitionFunctionArg arg;
333         TransitionFunctionValue value;
334
335         while (pointer < len) {
336             int code = LittleEndian.getUShort(buffer, begin + pointer);
337             int type = LittleEndian.getUShort(buffer, begin + pointer + 2);
338             long size = LittleEndian.getUInt(buffer, begin + pointer + 4);
339
340             // Get 'automation' instructions
341
arg = new TransitionFunctionArg(state, type);
342             value = (TransitionFunctionValue) map.get(arg);
343             if (value != null) {
344                 // This record is acceptable in current state, process it
345
boolean recordIsContainer = ((code & 0xF) == 0xF);
346                 if (recordIsContainer) {
347                     if (value.state == STATE_SLIDE_LIST_WITH_TEXT && code == 31) {
348                         // That is a container that contains masters
349
boolean temp = inMasterSlideList;
350                         inMasterSlideList = true;
351                         decode(buffer, begin + pointer + RECORD_HEADER_LEN, (int) size, value.state);
352                         inMasterSlideList = temp;
353                     } else {
354                         decode(buffer, begin + pointer + RECORD_HEADER_LEN, (int) size, value.state);
355                     }
356                 } else {
357                     executeAction(value.action, buffer, begin + pointer + RECORD_HEADER_LEN, size);
358                 }
359             }
360             pointer += (size + RECORD_HEADER_LEN);
361         }
362
363         // Process 'container end' event
364
arg = new TransitionFunctionArg(state, SYMBOL_CONTAINER_END);
365         value = (TransitionFunctionValue) map.get(arg);
366         if (value != null) {
367             executeAction(value.action, buffer, 0, 0);
368         }
369     }
370
371     /**
372      * Executes an actions that correspond to particular atom in particular
373      * state.
374      *
375      * @param action action to execute
376      * @param buffer byte buffer that contains the atom record
377      * @param begin the beginning of the atom in the <code>buffer</code>
378      * @param len length of atom
379      * @throws IOException
380      */

381     private void executeAction(int action, byte[] buffer, int begin, long len)
382             throws IOException JavaDoc {
383         switch (action) {
384         case ACTION_HANDLE_ASCII:
385             handleAscii(buffer, begin, (int) len);
386             break;
387         case ACTION_HANDLE_U16:
388             handleU16(buffer, begin, (int) len);
389             break;
390         case ACTION_HANDLE_FORMATTED:
391             handleFormatted(buffer, begin, (int) len);
392             break;
393         case ACTION_HANDLE_HEADER_FOOTER:
394             handleHeaderFooter(buffer, begin, (int) len);
395             break;
396         case ACTION_HANDLE_SLIDE_PERSIST_ATOM:
397             handleSlidePersistAtom(buffer, begin, (int) len);
398             break;
399         case ACTION_HANDLE_PERSIST_TEXT_ASCII:
400             handlePersistTextAscii(buffer, begin, (int) len);
401             break;
402         case ACTION_HANDLE_PERSIST_TEXT_U16:
403             handlePersistTextU16(buffer, begin, (int) len);
404             break;
405         case ACTION_HANDLE_SLIDE_LIST_END:
406             handleSlideListEnd(buffer, begin, (int) len);
407             break;
408         default:
409             break;
410         }
411     }
412
413     private void handleSlideListEnd(byte[] buffer, int begin, int len)
414             throws IOException JavaDoc {
415         if (!firstSlide) {
416             // Process current slide
417
handleSlideBegin(buffer);
418         }
419         firstSlide = true;
420     }
421
422     private void handlePersistTextAscii(byte[] buffer, int begin, int len)
423             throws IOException JavaDoc {
424         handleAscii(buffer, begin, len);
425         if (textsLeft > 0) {
426             textsLeft--;
427         }
428     }
429
430     private void handlePersistTextU16(byte[] buffer, int begin, int len)
431             throws IOException JavaDoc {
432         handleU16(buffer, begin, len);
433         if (textsLeft > 0) {
434             textsLeft--;
435         }
436     }
437
438     private void handleSlidePersistAtom(byte[] buffer, int begin, int len)
439             throws IOException JavaDoc {
440         // This will begin the next slide
441
if (!firstSlide) {
442             // Process current slide
443
handleSlideBegin(buffer);
444         } else {
445             firstSlide = false;
446         }
447         curSlideRef = (int) LittleEndian.getUInt(buffer, begin);
448         textsLeft = (int) LittleEndian.getUInt(buffer, begin + 8);
449     }
450
451     private void handleSlideBegin(byte[] buffer) throws IOException JavaDoc {
452         int curSlideOffset = (int) persistentDirectories[curSlideRef];
453         int curSlideType = LittleEndian.getUShort(buffer, curSlideOffset + 2);
454         int curSlideSize = (int) LittleEndian.getUInt(buffer, curSlideOffset + 4);
455         switch (curSlideType) {
456         case RECORD_TYPE_SLIDE:
457             decode(buffer, curSlideOffset + RECORD_HEADER_LEN,
458                    curSlideSize, STATE_SLIDE);
459             break;
460         case RECORD_TYPE_NOTES:
461             decode(buffer, curSlideOffset + RECORD_HEADER_LEN,
462                    curSlideSize, STATE_NOTES);
463             break;
464         case RECORD_TYPE_MAIN_MASTER:
465             decode(buffer, curSlideOffset + RECORD_HEADER_LEN,
466                    curSlideSize, STATE_MAIN_MASTER);
467             break;
468         default:
469             break;
470         }
471     }
472
473     private void handleHeaderFooter(byte[] buffer, int begin, int len)
474             throws IOException JavaDoc {
475         writer.write(new String JavaDoc(buffer, begin, len, "UTF-16LE"));
476         writer.write(Constants.EOL);
477     }
478
479     private void handleAscii(byte[] buffer, int begin, int len)
480             throws IOException JavaDoc {
481         writer.write(new String JavaDoc(buffer, begin, len));
482         writer.write(Constants.EOL);
483     }
484
485     private void handleU16(byte[] buffer, int begin, int len)
486             throws IOException JavaDoc {
487         writer.write(new String JavaDoc(buffer, begin, len, "UTF-16LE"));
488         writer.write(Constants.EOL);
489     }
490
491     private void handleFormatted(byte[] buffer, int begin, int len) {
492         // TODO: Extract text from "formatted" records
493
}
494
495     /**
496      * Just a struct with two fields: <code>state</code> and <code>type</code>.
497      * Represents an argument of the transition function.
498      */

499     private static class TransitionFunctionArg {
500         public int state;
501         public int type;
502
503         TransitionFunctionArg(int newState, int newType) {
504             state = newState;
505             type = newType;
506         }
507
508         public boolean equals(Object JavaDoc obj) {
509             TransitionFunctionArg arg = (TransitionFunctionArg) obj;
510             return arg.state == this.state && arg.type == this.type;
511         }
512
513         public int hashCode() {
514             int result;
515             result = state;
516             result = 29 * result + type;
517             return result;
518         }
519     }
520
521     /**
522      * Just a struct with two fields: <code>state</code> and <code>action</code>.
523      * Represents a value of the transition function.
524      */

525     private static class TransitionFunctionValue {
526         public int state;
527         public int action;
528
529         TransitionFunctionValue(int newState, int newAction) {
530             state = newState;
531             action = newAction;
532         }
533     }
534
535     /**
536      * @see com.blandware.atleap.common.parsers.SpecificPlainTextExtractor#getUsedEncoding()
537      */

538     public String JavaDoc getUsedEncoding() {
539         return null;
540     }
541 }
542
Popular Tags