KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > pdfbox > ExtractImages


1 /**
2  * Copyright (c) 2003-2004, www.pdfbox.org
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  * 3. Neither the name of pdfbox; nor the names of its
14  * contributors may be used to endorse or promote products derived from this
15  * software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * http://www.pdfbox.org
29  *
30  */

31 package org.pdfbox;
32
33 import java.io.File JavaDoc;
34 import java.io.IOException JavaDoc;
35 import java.util.Iterator JavaDoc;
36 import java.util.List JavaDoc;
37 import java.util.Map JavaDoc;
38
39 import org.pdfbox.pdmodel.PDDocument;
40 import org.pdfbox.pdmodel.PDPage;
41 import org.pdfbox.pdmodel.PDResources;
42 import org.pdfbox.pdmodel.encryption.AccessPermission;
43 import org.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
44 import org.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
45
46 /**
47  * This will read a read pdf and extract images. <br/><br/>
48  *
49  * usage: java org.pdfbox.ExtractImages &lt;pdffile&gt; &lt;password&gt; [imageprefix]
50  *
51  * @author <a HREF="mailto:ben@benlitchfield.com">Ben Litchfield</a>
52  * @version $Revision: 1.7 $
53  */

54 public class ExtractImages
55 {
56     private int imageCounter = 1;
57     
58     private static final String JavaDoc PASSWORD = "-password";
59     private static final String JavaDoc PREFIX = "-prefix";
60     
61     /**
62      * This is the entry point for the application.
63      *
64      * @param args The command-line arguments.
65      *
66      * @throws Exception If there is an error decrypting the document.
67      */

68     public static void main( String JavaDoc[] args ) throws Exception JavaDoc
69     {
70         ExtractImages extractor = new ExtractImages();
71         extractor.extractImages( args );
72     }
73
74     private void extractImages( String JavaDoc[] args ) throws Exception JavaDoc
75     {
76         if( args.length < 1 || args.length > 3 )
77         {
78             usage();
79         }
80         else
81         {
82             String JavaDoc pdfFile = null;
83             String JavaDoc password = "";
84             String JavaDoc prefix = null;
85             for( int i=0; i<args.length; i++ )
86             {
87                 if( args[i].equals( PASSWORD ) )
88                 {
89                     i++;
90                     if( i >= args.length )
91                     {
92                         usage();
93                     }
94                     password = args[i];
95                 }
96                 else if( args[i].equals( PREFIX ) )
97                 {
98                     i++;
99                     if( i >= args.length )
100                     {
101                         usage();
102                     }
103                     prefix = args[i];
104                 }
105                 else
106                 {
107                     if( pdfFile == null )
108                     {
109                         pdfFile = args[i];
110                     }
111                 }
112             }
113             if(pdfFile == null)
114             {
115                 usage();
116             }
117             else
118             {
119                 if( prefix == null && pdfFile.length() >4 )
120                 {
121                     prefix = pdfFile.substring( 0, pdfFile.length() -4 );
122                 }
123     
124                 PDDocument document = null;
125     
126                 try
127                 {
128                     document = PDDocument.load( pdfFile );
129     
130                     if( document.isEncrypted() )
131                     {
132                     
133                         StandardDecryptionMaterial spm = new StandardDecryptionMaterial(password);
134                         document.openProtection(spm);
135                         AccessPermission ap = document.getCurrentAccessPermission();
136                             
137                         
138                         if( ! ap.canExtractContent() )
139                         {
140                             throw new IOException JavaDoc(
141                                 "Error: You do not have permission to extract images." );
142                         }
143                     }
144                     
145                     List JavaDoc pages = document.getDocumentCatalog().getAllPages();
146                     Iterator JavaDoc iter = pages.iterator();
147                     while( iter.hasNext() )
148                     {
149                         PDPage page = (PDPage)iter.next();
150                         PDResources resources = page.getResources();
151                         Map JavaDoc images = resources.getImages();
152                         if( images != null )
153                         {
154                             Iterator JavaDoc imageIter = images.keySet().iterator();
155                             while( imageIter.hasNext() )
156                             {
157                                 String JavaDoc key = (String JavaDoc)imageIter.next();
158                                 PDXObjectImage image = (PDXObjectImage)images.get( key );
159                                 String JavaDoc name = getUniqueFileName( key, image.getSuffix() );
160                                 System.out.println( "Writing image:" + name );
161                                 image.write2file( name );
162                             }
163                         }
164                     }
165                 }
166                 finally
167                 {
168                     if( document != null )
169                     {
170                         document.close();
171                     }
172                 }
173             }
174         }
175     }
176     
177     private String JavaDoc getUniqueFileName( String JavaDoc prefix, String JavaDoc suffix )
178     {
179         String JavaDoc uniqueName = null;
180         File JavaDoc f = null;
181         while( f == null || f.exists() )
182         {
183             uniqueName = prefix + "-" + imageCounter;
184             f = new File JavaDoc( uniqueName + "." + suffix );
185             imageCounter++;
186         }
187         return uniqueName;
188     }
189
190     /**
191      * This will print the usage requirements and exit.
192      */

193     private static void usage()
194     {
195         System.err.println( "Usage: java org.pdfbox.ExtractImages [OPTIONS] <PDF file>\n" +
196             " -password <password> Password to decrypt document\n" +
197             " -prefix <image-prefix> Image prefix(default to pdf name)\n" +
198             " <PDF file> The PDF document to use\n"
199             );
200         System.exit( 1 );
201     }
202
203 }
Popular Tags