KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > snapper > parsers > SnapperPDFParser


1 package org.enhydra.snapper.parsers;
2
3 import java.io.File JavaDoc;
4 import java.io.FileInputStream JavaDoc;
5 import java.io.IOException JavaDoc;
6 import java.io.InputStream JavaDoc;
7 import java.io.StringWriter JavaDoc;
8
9 import org.enhydra.snapper.api.*;
10 import org.pdfbox.pdmodel.PDDocument;
11 import org.pdfbox.pdmodel.PDDocumentInformation;
12 import org.pdfbox.util.PDFTextStripper;
13 import org.pdfbox.pdfparser.PDFParser;
14
15 public class SnapperPDFParser implements org.enhydra.snapper.api.Parser {
16     String JavaDoc parsedText, fileName, title, name;
17     
18     public String JavaDoc parse(InputStream JavaDoc is) throws java.io.IOException JavaDoc{ return "";}
19     
20     public void parse(){
21         parsedText = null;
22         title = null;
23         name = null;
24         PDDocument document = null;
25         PDDocumentInformation info = null;
26         PDFTextStripper stripper = null;
27         FileInputStream JavaDoc in = null;
28          try
29             {
30             in = new FileInputStream JavaDoc(new File JavaDoc(fileName));//LucenePDFDocument.getDocument(new File(fileName)).getField("contents").readerValue();
31
PDFParser parser = new PDFParser(in);
32             parser.parse();
33             document = parser.getPDDocument();
34             
35             stripper = new PDFTextStripper();
36             String JavaDoc pdfFile = null;
37             String JavaDoc textFile = null;
38         
39             if( document.isEncrypted() )
40             {
41                try{
42                     ParserManager.logger.debug("Document "+ fileName + " is encrypted!");
43                 } catch (Exception JavaDoc ex) {
44                     System.out.println("Document "+ fileName + " is encrypted!");
45                     }
46                     document.decrypt("");
47                   }
48
49                  info = document.getDocumentInformation();
50
51                  title = info.getTitle();
52                  StringWriter JavaDoc writer = new StringWriter JavaDoc();
53                  stripper.writeText(document, writer);
54                  parsedText = writer.getBuffer().toString();
55
56             document.close();
57             
58             in.close();
59             in = null;
60             writer.close();
61             writer = null;
62             stripper = null;
63             
64             document = null;
65             parser = null;
66             info = null;
67             
68             } catch (Exception JavaDoc ex){
69                 
70                 if (document != null) {
71                     try{
72                         document.close();
73                     }
74                     catch (IOException JavaDoc e){}
75                 }
76                 document = null;
77                 stripper = null;
78                 info=null;
79                 try{
80                     ParserManager.logger.debug("***** File could not be parsed: " + fileName);
81                 } catch (Exception JavaDoc e) {
82                     System.out.println("***** File could not be parsed: " + fileName);
83                     }
84                 }
85             catch (Throwable JavaDoc ex){
86                 
87                 document = null;
88                 stripper = null;
89                 info=null;
90                 try{
91                     ParserManager.logger.debug("***** File could not be parsed: " + fileName);
92                 } catch (Exception JavaDoc e) {
93                     System.out.println("***** File could not be parsed: " + fileName);
94                     }
95             }
96          }
97
98     
99     public void setFileName(String JavaDoc fileName){
100         this.fileName = fileName;
101     }
102     
103     public String JavaDoc getParsedText() {
104         return parsedText;
105       }
106     
107     public String JavaDoc getTitle() {
108         return title;
109       }
110     public String JavaDoc getName() {
111         return title;
112       }
113     
114 }
Popular Tags