KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > pdfbox > ant > PDFToTextTask


1 /**
2  * Copyright (c) 2003, www.pdfbox.org
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  * this list of conditions and the following disclaimer in the documentation
12  * and/or other materials provided with the distribution.
13  * 3. Neither the name of pdfbox; nor the names of its
14  * contributors may be used to endorse or promote products derived from this
15  * software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * http://www.pdfbox.org
29  *
30  */

31 package org.pdfbox.ant;
32
33 import java.io.File JavaDoc;
34
35 import java.util.ArrayList JavaDoc;
36 import java.util.Iterator JavaDoc;
37 import java.util.List JavaDoc;
38
39 import org.apache.tools.ant.DirectoryScanner;
40 import org.apache.tools.ant.Task;
41
42 import org.apache.tools.ant.types.FileSet;
43
44 /**
45  * This is an ant task that will allow pdf documents to be converted using an
46  * and task.
47  *
48  * @author <a HREF="ben@benlitchfield.com">Ben Litchfield</a>
49  * @version $Revision: 1.8 $
50  */

51 public class PDFToTextTask extends Task
52 {
53     private List JavaDoc fileSets = new ArrayList JavaDoc();
54
55     /**
56      * Adds a set of files (nested fileset attribute).
57      *
58      * @param set Another fileset to add.
59      */

60     public void addFileset( FileSet set )
61     {
62         fileSets.add( set );
63     }
64
65     /**
66      * This will perform the execution.
67      */

68     public void execute()
69     {
70         log( "PDFToTextTask executing" );
71         Iterator JavaDoc fileSetIter = fileSets.iterator();
72         while( fileSetIter.hasNext() )
73         {
74             FileSet next = (FileSet)fileSetIter.next();
75             DirectoryScanner dirScanner = next.getDirectoryScanner( getProject() );
76             dirScanner.scan();
77             String JavaDoc[] files = dirScanner.getIncludedFiles();
78             for( int i=0; i<files.length; i++ )
79             {
80                 File JavaDoc f = new File JavaDoc( dirScanner.getBasedir(), files[i] );
81                 log( "processing: " + f.getAbsolutePath() );
82                 String JavaDoc pdfFile = f.getAbsolutePath();
83                 if( pdfFile.toUpperCase().endsWith( ".PDF" ) )
84                 {
85                     String JavaDoc textFile = pdfFile.substring( 0, pdfFile.length() -3 );
86                     textFile = textFile + "txt";
87                     try
88                     {
89                         org.pdfbox.ExtractText.main( new String JavaDoc[] { pdfFile, textFile } );
90                     }
91                     catch( Exception JavaDoc e )
92                     {
93                         log( "Error processing " + pdfFile + e.getMessage() );
94                     }
95                 }
96             }
97
98         }
99     }
100 }
Popular Tags