KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > pdfbox > util > PDFStreamEngine


1 /**
2  * Copyright (c) 2003-2006, www.pdfbox.org
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  * 3. Neither the name of pdfbox; nor the names of its
14  * contributors may be used to endorse or promote products derived from this
15  * software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * http://www.pdfbox.org
29  *
30  */

31 package org.pdfbox.util;
32
33 import java.io.IOException JavaDoc;
34
35 import java.util.ArrayList JavaDoc;
36 import java.util.HashMap JavaDoc;
37 import java.util.Iterator JavaDoc;
38 import java.util.List JavaDoc;
39 import java.util.Map JavaDoc;
40 import java.util.Properties JavaDoc;
41 import java.util.Stack JavaDoc;
42
43 import org.pdfbox.cos.COSObject;
44 import org.pdfbox.cos.COSStream;
45 import org.pdfbox.exceptions.WrappedIOException;
46
47 import org.pdfbox.pdmodel.PDPage;
48 import org.pdfbox.pdmodel.PDResources;
49
50 import org.pdfbox.pdmodel.font.PDFont;
51
52 import org.pdfbox.pdmodel.graphics.PDGraphicsState;
53
54 import org.pdfbox.util.operator.OperatorProcessor;
55
56 /**
57  * This class will run through a PDF content stream and execute certain operations
58  * and provide a callback interface for clients that want to do things with the stream.
59  * See the PDFTextStripper class for an example of how to use this class.
60  *
61  * @author <a HREF="mailto:ben@benlitchfield.com">Ben Litchfield</a>
62  * @version $Revision: 1.35 $
63  */

64 public class PDFStreamEngine
65 {
66     private static final byte[] SPACE_BYTES = { (byte)32 };
67
68     private PDGraphicsState graphicsState = null;
69
70     private Matrix textMatrix = null;
71     private Matrix textLineMatrix = null;
72     private Stack JavaDoc graphicsStack = new Stack JavaDoc();
73     //private PDResources resources = null;
74

75     private Map JavaDoc operators = new HashMap JavaDoc();
76     
77     private Stack JavaDoc streamResourcesStack = new Stack JavaDoc();
78     
79     private PDPage page;
80     
81     private Map JavaDoc documentFontCache = new HashMap JavaDoc();
82     
83     /**
84      * This is a simple internal class used by the Stream engine to handle the
85      * resources stack.
86      */

87     private static class StreamResources
88     {
89         private Map JavaDoc fonts;
90         private Map JavaDoc colorSpaces;
91         private Map JavaDoc xobjects;
92         private Map JavaDoc graphicsStates;
93         private PDResources resources;
94     }
95
96     /**
97      * Constructor.
98      */

99     public PDFStreamEngine()
100     {
101         //default constructor
102
}
103     
104     /**
105      * Constructor with engine properties. The property keys are all
106      * PDF operators, the values are class names used to execute those
107      * operators.
108      *
109      * @param properties The engine properties.
110      *
111      * @throws IOException If there is an error setting the engine properties.
112      */

113     public PDFStreamEngine( Properties JavaDoc properties ) throws IOException JavaDoc
114     {
115         try
116         {
117             Iterator JavaDoc keys = properties.keySet().iterator();
118             while( keys.hasNext() )
119             {
120                 String JavaDoc operator = (String JavaDoc)keys.next();
121                 String JavaDoc operatorClass = properties.getProperty( operator );
122                 OperatorProcessor op = (OperatorProcessor)Class.forName( operatorClass ).newInstance();
123                 registerOperatorProcessor(operator, op);
124             }
125         }
126         catch( Exception JavaDoc e )
127         {
128             throw new WrappedIOException( e );
129         }
130     }
131     
132     /**
133      * Register a custom operator processor with the engine.
134      *
135      * @param operator The operator as a string.
136      * @param op Processor instance.
137      */

138     public void registerOperatorProcessor( String JavaDoc operator, OperatorProcessor op )
139     {
140         op.setContext( this );
141         operators.put( operator, op );
142     }
143     
144     /**
145      * This method must be called between processing documents. The
146      * PDFStreamEngine caches information for the document between pages
147      * and this will release the cached information. This only needs
148      * to be called if processing a new document.
149      *
150      */

151     public void resetEngine()
152     {
153         documentFontCache.clear();
154     }
155
156     /**
157      * This will process the contents of the stream.
158      *
159      * @param aPage The page.
160      * @param resources The location to retrieve resources.
161      * @param cosStream the Stream to execute.
162      *
163      *
164      * @throws IOException if there is an error accessing the stream.
165      */

166     public void processStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException JavaDoc
167     {
168         graphicsState = new PDGraphicsState();
169         textMatrix = null;
170         textLineMatrix = null;
171         graphicsStack.clear();
172         streamResourcesStack.clear();
173         
174         processSubStream( aPage, resources, cosStream );
175     }
176     
177     /**
178      * Process a sub stream of the current stream.
179      *
180      * @param aPage The page used for drawing.
181      * @param resources The resources used when processing the stream.
182      * @param cosStream The stream to process.
183      *
184      * @throws IOException If there is an exception while processing the stream.
185      */

186     public void processSubStream( PDPage aPage, PDResources resources, COSStream cosStream ) throws IOException JavaDoc
187     {
188         page = aPage;
189         if( resources != null )
190         {
191             StreamResources sr = new StreamResources();
192             sr.fonts = resources.getFonts( documentFontCache );
193             sr.colorSpaces = resources.getColorSpaces();
194             sr.xobjects = resources.getXObjects();
195             sr.graphicsStates = resources.getGraphicsStates();
196             sr.resources = resources;
197             streamResourcesStack.push(sr);
198         }
199         try
200         {
201             List JavaDoc arguments = new ArrayList JavaDoc();
202             List JavaDoc tokens = cosStream.getStreamTokens();
203             if( tokens != null )
204             {
205                 Iterator JavaDoc iter = tokens.iterator();
206                 while( iter.hasNext() )
207                 {
208                     Object JavaDoc next = iter.next();
209                     if( next instanceof COSObject )
210                     {
211                         arguments.add( ((COSObject)next).getObject() );
212                     }
213                     else if( next instanceof PDFOperator )
214                     {
215                         processOperator( (PDFOperator)next, arguments );
216                         arguments = new ArrayList JavaDoc();
217                     }
218                     else
219                     {
220                         arguments.add( next );
221                     }
222                 }
223             }
224         }
225         finally
226         {
227             if( resources != null )
228             {
229                 streamResourcesStack.pop();
230             }
231         }
232         
233     }
234
235     /**
236      * A method provided as an event interface to allow a subclass to perform
237      * some specific functionality when a character needs to be displayed.
238      *
239      * @param text The character to be displayed.
240      */

241     protected void showCharacter( TextPosition text )
242     {
243         //subclasses can override to provide specific functionality.
244
}
245
246     /**
247      * You should override this method if you want to perform an action when a
248      * string is being shown.
249      *
250      * @param string The string to display.
251      *
252      * @throws IOException If there is an error showing the string
253      */

254     public void showString( byte[] string ) throws IOException JavaDoc
255     {
256         float spaceWidth = 0;
257         float spacing = 0;
258         StringBuffer JavaDoc stringResult = new StringBuffer JavaDoc(string.length);
259         
260         float characterHorizontalDisplacement = 0;
261         float characterVerticalDisplacement = 0;
262         float spaceDisplacement = 0;
263         float fontSize = graphicsState.getTextState().getFontSize();
264         float horizontalScaling = graphicsState.getTextState().getHorizontalScalingPercent()/100f;
265         float verticalScaling = horizontalScaling;//not sure if this is right but what else to do???
266
float rise = graphicsState.getTextState().getRise();
267         final float wordSpacing = graphicsState.getTextState().getWordSpacing();
268         final float characterSpacing = graphicsState.getTextState().getCharacterSpacing();
269         float wordSpacingDisplacement = 0;
270         
271         PDFont font = graphicsState.getTextState().getFont();
272         
273         //This will typically be 1000 but in the case of a type3 font
274
//this might be a different number
275
float glyphSpaceToTextSpaceFactor = 1f/font.getFontMatrix().getValue( 0, 0 );
276         float averageWidth = font.getAverageFontWidth();
277
278         Matrix initialMatrix = new Matrix();
279         initialMatrix.setValue(0,0,1);
280         initialMatrix.setValue(0,1,0);
281         initialMatrix.setValue(0,2,0);
282         initialMatrix.setValue(1,0,0);
283         initialMatrix.setValue(1,1,1);
284         initialMatrix.setValue(1,2,0);
285         initialMatrix.setValue(2,0,0);
286         initialMatrix.setValue(2,1,rise);
287         initialMatrix.setValue(2,2,1);
288
289
290         //this
291
int codeLength = 1;
292         Matrix ctm = graphicsState.getCurrentTransformationMatrix();
293         
294         //lets see what the space displacement should be
295
spaceDisplacement = (font.getFontWidth( SPACE_BYTES, 0, 1 )/glyphSpaceToTextSpaceFactor);
296         if( spaceDisplacement == 0 )
297         {
298             spaceDisplacement = (averageWidth/glyphSpaceToTextSpaceFactor);
299             //The average space width appears to be higher than necessary
300
//so lets make it a little bit smaller.
301
spaceDisplacement *= .80f;
302         }
303         int pageRotation = page.findRotation();
304         Matrix trm = initialMatrix.multiply( textMatrix ).multiply( ctm );
305         float x = trm.getValue(2,0);
306         float y = trm.getValue(2,1);
307         if( pageRotation == 0 )
308         {
309             trm.setValue( 2,1, -y + page.findMediaBox().getHeight() );
310         }
311         else if( pageRotation == 90 )
312         {
313             trm.setValue( 2,0, y );
314             trm.setValue( 2,1, x );
315         }
316         else if( pageRotation == 270 )
317         {
318             trm.setValue( 2,0, -y + page.findMediaBox().getHeight() );
319             trm.setValue( 2,1, x );
320         }
321         for( int i=0; i<string.length; i+=codeLength )
322         {
323             codeLength = 1;
324
325             String JavaDoc c = font.encode( string, i, codeLength );
326             if( c == null && i+1<string.length)
327             {
328                 //maybe a multibyte encoding
329
codeLength++;
330                 c = font.encode( string, i, codeLength );
331             }
332             stringResult.append( c );
333
334             //todo, handle horizontal displacement
335
characterHorizontalDisplacement += (font.getFontWidth( string, i, codeLength )/glyphSpaceToTextSpaceFactor);
336             characterVerticalDisplacement =
337                 Math.max(
338                     characterVerticalDisplacement,
339                     font.getFontHeight( string, i, codeLength)/glyphSpaceToTextSpaceFactor);
340
341
342             // PDF Spec - 5.5.2 Word Spacing
343
//
344
// Word spacing works the same was as character spacing, but applies
345
// only to the space character, code 32.
346
//
347
// Note: Word spacing is applied to every occurrence of the single-byte
348
// character code 32 in a string. This can occur when using a simple
349
// font or a composite font that defines code 32 as a single-byte code.
350
// It does not apply to occurrences of the byte value 32 in multiple-byte
351
// codes.
352
//
353
// RDD - My interpretation of this is that only character code 32's that
354
// encode to spaces should have word spacing applied. Cases have been
355
// observed where a font has a space character with a character code
356
// other than 32, and where word spacing (Tw) was used. In these cases,
357
// applying word spacing to either the non-32 space or to the character
358
// code 32 non-space resulted in errors consistent with this interpretation.
359
//
360
if( (string[i] == 0x20) && c.equals( " " ) )
361             {
362                 spacing += wordSpacing + characterSpacing;
363             }
364             else
365             {
366                 spacing += characterSpacing;
367             }
368             // We want to update the textMatrix using the width, in text space units.
369
//
370

371         }
372         
373         //The adjustment will always be zero. The adjustment as shown in the
374
//TJ operator will be handled separately.
375
float adjustment=0;
376         //todo, need to compute the vertical displacement
377
float ty = 0;
378         float tx = ((characterHorizontalDisplacement-adjustment/glyphSpaceToTextSpaceFactor)*fontSize + spacing)
379                    *horizontalScaling;
380         
381         float xScale = trm.getXScale();
382         float yScale = trm.getYScale();
383         float xPos = trm.getXPosition();
384         float yPos = trm.getYPosition();
385         spaceWidth = spaceDisplacement * xScale * fontSize;
386         wordSpacingDisplacement = wordSpacing*xScale * fontSize;
387         Matrix td = new Matrix();
388         td.setValue( 2, 0, tx );
389         td.setValue( 2, 1, ty );
390         
391         float xPosBefore = textMatrix.getXPosition();
392         float yPosBefore = textMatrix.getYPosition();
393         textMatrix = td.multiply( textMatrix );
394
395         float totalStringWidth = 0;
396         float totalStringHeight = characterVerticalDisplacement * fontSize * yScale;
397         if( pageRotation == 0 )
398         {
399             totalStringWidth = (textMatrix.getXPosition() - xPosBefore);
400         }
401         else if( pageRotation == 90 )
402         {
403             totalStringWidth = (textMatrix.getYPosition() - yPosBefore);
404         }
405         else if( pageRotation == 270 )
406         {
407             totalStringWidth = (yPosBefore - textMatrix.getYPosition());
408         }
409         showCharacter(
410                 new TextPosition(
411                     xPos,
412                     yPos,
413                     xScale,
414                     yScale,
415                     totalStringWidth,
416                     totalStringHeight,
417                     spaceWidth,
418                     stringResult.toString(),
419                     font,
420                     fontSize,
421                     wordSpacingDisplacement ));
422     }
423     
424     /**
425      * This is used to handle an operation.
426      *
427      * @param operation The operation to perform.
428      * @param arguments The list of arguments.
429      *
430      * @throws IOException If there is an error processing the operation.
431      */

432     public void processOperator( String JavaDoc operation, List JavaDoc arguments ) throws IOException JavaDoc
433     {
434         PDFOperator oper = PDFOperator.getOperator( operation );
435         processOperator( oper, arguments );
436     }
437
438     /**
439      * This is used to handle an operation.
440      *
441      * @param operator The operation to perform.
442      * @param arguments The list of arguments.
443      *
444      * @throws IOException If there is an error processing the operation.
445      */

446     protected void processOperator( PDFOperator operator, List JavaDoc arguments ) throws IOException JavaDoc
447     {
448         String JavaDoc operation = operator.getOperation();
449         OperatorProcessor processor = (OperatorProcessor)operators.get( operation );
450         if( processor != null )
451         {
452             processor.process( operator, arguments );
453         }
454     }
455    
456     /**
457      * @return Returns the colorSpaces.
458      */

459     public Map JavaDoc getColorSpaces()
460     {
461         return ((StreamResources) streamResourcesStack.peek()).colorSpaces;
462     }
463     
464     /**
465      * @return Returns the colorSpaces.
466      */

467     public Map JavaDoc getXObjects()
468     {
469         return ((StreamResources) streamResourcesStack.peek()).xobjects;
470     }
471     
472     /**
473      * @param value The colorSpaces to set.
474      */

475     public void setColorSpaces(Map JavaDoc value)
476     {
477         ((StreamResources) streamResourcesStack.peek()).colorSpaces = value;
478     }
479     /**
480      * @return Returns the fonts.
481      */

482     public Map JavaDoc getFonts()
483     {
484         return ((StreamResources) streamResourcesStack.peek()).fonts;
485     }
486     /**
487      * @param value The fonts to set.
488      */

489     public void setFonts(Map JavaDoc value)
490     {
491         ((StreamResources) streamResourcesStack.peek()).fonts = value;
492     }
493     /**
494      * @return Returns the graphicsStack.
495      */

496     public Stack JavaDoc getGraphicsStack()
497     {
498         return graphicsStack;
499     }
500     /**
501      * @param value The graphicsStack to set.
502      */

503     public void setGraphicsStack(Stack JavaDoc value)
504     {
505         graphicsStack = value;
506     }
507     /**
508      * @return Returns the graphicsState.
509      */

510     public PDGraphicsState getGraphicsState()
511     {
512         return graphicsState;
513     }
514     /**
515      * @param value The graphicsState to set.
516      */

517     public void setGraphicsState(PDGraphicsState value)
518     {
519         graphicsState = value;
520     }
521     /**
522      * @return Returns the graphicsStates.
523      */

524     public Map JavaDoc getGraphicsStates()
525     {
526         return ((StreamResources) streamResourcesStack.peek()).graphicsStates;
527     }
528     /**
529      * @param value The graphicsStates to set.
530      */

531     public void setGraphicsStates(Map JavaDoc value)
532     {
533         ((StreamResources) streamResourcesStack.peek()).graphicsStates = value;
534     }
535     /**
536      * @return Returns the textLineMatrix.
537      */

538     public Matrix getTextLineMatrix()
539     {
540         return textLineMatrix;
541     }
542     /**
543      * @param value The textLineMatrix to set.
544      */

545     public void setTextLineMatrix(Matrix value)
546     {
547         textLineMatrix = value;
548     }
549     /**
550      * @return Returns the textMatrix.
551      */

552     public Matrix getTextMatrix()
553     {
554         return textMatrix;
555     }
556     /**
557      * @param value The textMatrix to set.
558      */

559     public void setTextMatrix(Matrix value)
560     {
561         textMatrix = value;
562     }
563     /**
564      * @return Returns the resources.
565      */

566     public PDResources getResources()
567     {
568         return ((StreamResources) streamResourcesStack.peek()).resources;
569     }
570     
571     /**
572      * Get the current page that is being processed.
573      *
574      * @return The page being processed.
575      */

576     public PDPage getCurrentPage()
577     {
578         return page;
579     }
580 }
Popular Tags