1 package org.enhydra.snapper.parsers; 2 3 import java.io.File ; 4 import java.io.FileInputStream ; 5 import java.io.IOException ; 6 import java.io.InputStream ; 7 import java.io.StringWriter ; 8 9 import org.enhydra.snapper.api.*; 10 import org.pdfbox.pdmodel.PDDocument; 11 import org.pdfbox.pdmodel.PDDocumentInformation; 12 import org.pdfbox.util.PDFTextStripper; 13 import org.pdfbox.pdfparser.PDFParser; 14 15 public class SnapperPDFParser implements org.enhydra.snapper.api.Parser { 16 String parsedText, fileName, title, name; 17 18 public String parse(InputStream is) throws java.io.IOException { return "";} 19 20 public void parse(){ 21 parsedText = null; 22 title = null; 23 name = null; 24 PDDocument document = null; 25 PDDocumentInformation info = null; 26 PDFTextStripper stripper = null; 27 FileInputStream in = null; 28 try 29 { 30 in = new FileInputStream (new File (fileName)); PDFParser parser = new PDFParser(in); 32 parser.parse(); 33 document = parser.getPDDocument(); 34 35 stripper = new PDFTextStripper(); 36 String pdfFile = null; 37 String textFile = null; 38 39 if( document.isEncrypted() ) 40 { 41 try{ 42 ParserManager.logger.debug("Document "+ fileName + " is encrypted!"); 43 } catch (Exception ex) { 44 System.out.println("Document "+ fileName + " is encrypted!"); 45 } 46 document.decrypt(""); 47 } 48 49 info = document.getDocumentInformation(); 50 51 title = info.getTitle(); 52 StringWriter writer = new StringWriter (); 53 stripper.writeText(document, writer); 54 parsedText = writer.getBuffer().toString(); 55 56 document.close(); 57 58 in.close(); 59 in = null; 60 writer.close(); 61 writer = null; 62 stripper = null; 63 64 document = null; 65 parser = null; 66 info = null; 67 68 } catch (Exception ex){ 69 70 if (document != null) { 71 try{ 72 document.close(); 73 } 74 catch (IOException e){} 75 } 76 document = null; 77 stripper = null; 78 info=null; 79 try{ 80 ParserManager.logger.debug("***** File could not be parsed: " + fileName); 81 } catch (Exception e) { 82 System.out.println("***** File could not be parsed: " + fileName); 83 } 84 } 85 catch (Throwable ex){ 86 87 document = null; 88 stripper = null; 89 info=null; 90 try{ 91 ParserManager.logger.debug("***** File could not be parsed: " + fileName); 92 } catch (Exception e) { 93 System.out.println("***** File could not be parsed: " + fileName); 94 } 95 } 96 } 97 98 99 public void setFileName(String fileName){ 100 this.fileName = fileName; 101 } 102 103 public String getParsedText() { 104 return parsedText; 105 } 106 107 public String getTitle() { 108 return title; 109 } 110 public String getName() { 111 return title; 112 } 113 114 } | Popular Tags |