KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > pdfbox > examples > util > RemoveAllText


1 /**
2  * Copyright (c) 2006, www.pdfbox.org
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  * 3. Neither the name of pdfbox; nor the names of its
14  * contributors may be used to endorse or promote products derived from this
15  * software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * http://www.pdfbox.org
29  *
30  */

31 package org.pdfbox.examples.util;
32
33 import org.pdfbox.pdfparser.PDFStreamParser;
34 import org.pdfbox.pdfwriter.ContentStreamWriter;
35
36 import org.pdfbox.pdmodel.PDDocument;
37 import org.pdfbox.pdmodel.PDPage;
38 import org.pdfbox.pdmodel.common.PDStream;
39 import org.pdfbox.util.PDFOperator;
40
41 import java.util.ArrayList JavaDoc;
42 import java.util.List JavaDoc;
43
44 /**
45  * This is an example on how to remove all text from PDF document.
46  *
47  * Usage: java org.pdfbox.examples.util.RemoveAllText <input-pdf> <output-pdf>
48  *
49  * @author <a HREF="mailto:ben@benlitchfield.com">Ben Litchfield</a>
50  * @version $Revision: 1.2 $
51  */

52 public class RemoveAllText
53 {
54     /**
55      * Default constructor.
56      */

57     private RemoveAllText()
58     {
59         //example class should not be instantiated
60
}
61     
62     /**
63      * This will remove all text from a PDF document.
64      *
65      * @param args The command line arguments.
66      *
67      * @throws Exception If there is an error parsing the document.
68      */

69     public static void main( String JavaDoc[] args ) throws Exception JavaDoc
70     {
71         if( args.length != 2 )
72         {
73             usage();
74         }
75         else
76         {
77             PDDocument document = null;
78             try
79             {
80                 document = PDDocument.load( args[0] );
81                 if( document.isEncrypted() )
82                 {
83                     System.err.println( "Error: Encrypted documents are not supported for this example." );
84                     System.exit( 1 );
85                 }
86                 List JavaDoc allPages = document.getDocumentCatalog().getAllPages();
87                 for( int i=0; i<allPages.size(); i++ )
88                 {
89                     PDPage page = (PDPage)allPages.get( i );
90                     PDFStreamParser parser = new PDFStreamParser(page.getContents());
91                     parser.parse();
92                     List JavaDoc tokens = parser.getTokens();
93                     List JavaDoc newTokens = new ArrayList JavaDoc();
94                     for( int j=0; j<tokens.size(); j++)
95                     {
96                         Object JavaDoc token = tokens.get( j );
97                         if( token instanceof PDFOperator )
98                         {
99                             PDFOperator op = (PDFOperator)token;
100                             if( op.getOperation().equals( "TJ") || op.getOperation().equals( "Tj" ))
101                             {
102                                 //remove the one argument to this operator
103
newTokens.remove( newTokens.size() -1 );
104                                 continue;
105                             }
106                         }
107                         newTokens.add( token );
108                         
109                     }
110                     PDStream newContents = new PDStream( document );
111                     ContentStreamWriter writer = new ContentStreamWriter( newContents.createOutputStream() );
112                     writer.writeTokens( newTokens );
113                     newContents.addCompression();
114                     page.setContents( newContents );
115                 }
116                 document.save( args[1] );
117             }
118             finally
119             {
120                 if( document != null )
121                 {
122                     document.close();
123                 }
124             }
125         }
126     }
127
128     /**
129      * This will print the usage for this document.
130      */

131     private static void usage()
132     {
133         System.err.println( "Usage: java org.pdfbox.examples.pdmodel.RemoveAllText <input-pdf> <output-pdf>" );
134     }
135
136 }
Popular Tags