1 31 package org.pdfbox.examples.util; 32 33 import org.pdfbox.exceptions.InvalidPasswordException; 34 35 import org.pdfbox.pdmodel.PDDocument; 36 import org.pdfbox.pdmodel.PDPage; 37 import org.pdfbox.util.PDFTextStripperByArea; 38 39 import java.awt.Rectangle ; 40 41 import java.util.List ; 42 43 51 public class ExtractTextByArea 52 { 53 private ExtractTextByArea() 54 { 55 } 57 58 59 66 public static void main( String [] args ) throws Exception 67 { 68 if( args.length != 1 ) 69 { 70 usage(); 71 } 72 else 73 { 74 PDDocument document = null; 75 try 76 { 77 document = PDDocument.load( args[0] ); 78 if( document.isEncrypted() ) 79 { 80 try 81 { 82 document.decrypt( "" ); 83 } 84 catch( InvalidPasswordException e ) 85 { 86 System.err.println( "Error: Document is encrypted with a password." ); 87 System.exit( 1 ); 88 } 89 } 90 PDFTextStripperByArea stripper = new PDFTextStripperByArea(); 91 stripper.setSortByPosition( true ); 92 Rectangle rect = new Rectangle ( 10, 280, 275, 60 ); 93 stripper.addRegion( "class1", rect ); 94 List allPages = document.getDocumentCatalog().getAllPages(); 95 PDPage firstPage = (PDPage)allPages.get( 0 ); 96 stripper.extractRegions( firstPage ); 97 System.out.println( "Text in the area:" + rect ); 98 System.out.println( stripper.getTextForRegion( "class1" ) ); 99 100 } 101 finally 102 { 103 if( document != null ) 104 { 105 document.close(); 106 } 107 } 108 } 109 } 110 111 114 private static void usage() 115 { 116 System.err.println( "Usage: java org.pdfbox.examples.util.ExtractTextByArea <input-pdf>" ); 117 } 118 119 } | Popular Tags |