1 31 package org.pdfbox.ant; 32 33 import java.io.File ; 34 35 import java.util.ArrayList ; 36 import java.util.Iterator ; 37 import java.util.List ; 38 39 import org.apache.tools.ant.DirectoryScanner; 40 import org.apache.tools.ant.Task; 41 42 import org.apache.tools.ant.types.FileSet; 43 44 51 public class PDFToTextTask extends Task 52 { 53 private List fileSets = new ArrayList (); 54 55 60 public void addFileset( FileSet set ) 61 { 62 fileSets.add( set ); 63 } 64 65 68 public void execute() 69 { 70 log( "PDFToTextTask executing" ); 71 Iterator fileSetIter = fileSets.iterator(); 72 while( fileSetIter.hasNext() ) 73 { 74 FileSet next = (FileSet)fileSetIter.next(); 75 DirectoryScanner dirScanner = next.getDirectoryScanner( getProject() ); 76 dirScanner.scan(); 77 String [] files = dirScanner.getIncludedFiles(); 78 for( int i=0; i<files.length; i++ ) 79 { 80 File f = new File ( dirScanner.getBasedir(), files[i] ); 81 log( "processing: " + f.getAbsolutePath() ); 82 String pdfFile = f.getAbsolutePath(); 83 if( pdfFile.toUpperCase().endsWith( ".PDF" ) ) 84 { 85 String textFile = pdfFile.substring( 0, pdfFile.length() -3 ); 86 textFile = textFile + "txt"; 87 try 88 { 89 org.pdfbox.ExtractText.main( new String [] { pdfFile, textFile } ); 90 } 91 catch( Exception e ) 92 { 93 log( "Error processing " + pdfFile + e.getMessage() ); 94 } 95 } 96 } 97 98 } 99 } 100 } | Popular Tags |