PDFParserWrapper


1   /*
2    * Copyright  1999-2004 The Apache Software Foundation
3    *
4    *  Licensed under the Apache License, Version 2.0 (the "License");
5    *  you may not use this file except in compliance with the License.
6    *  You may obtain a copy of the License at
7    *
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    *
10   *  Unless required by applicable law or agreed to in writing, software
11   *  distributed under the License is distributed on an "AS IS" BASIS,
12   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   *  See the License for the specific language governing permissions and
14   *  limitations under the License.
15   *
16   */
17  
18  /* $Id: PDFParserWrapper.java 42598 2004-03-01 16:18:28Z gregor $  */
19  
20  package org.apache.lenya.lucene.parser;
21  
22  import java.io.File  ;
23  import java.io.FileNotFoundException  ;
24  import java.io.IOException  ;
25  import java.io.InputStreamReader  ;
26  import java.io.Reader  ;
27  import java.net.MalformedURLException  ;
28  import java.net.URI  ;
29  import java.net.URLConnection  ;
30  
31  public class PDFParserWrapper extends AbstractHTMLParser {
32      /** Creates a new instance of PDFParserWrapper */
33      public PDFParserWrapper() {
34      }
35  
36      /** Returns a reader that reads the contents of the HTML document.
37       *
38       */
39      public Reader   getReader() throws IOException   {
40          return getParser().getReader();
41      }
42  
43      /** Returns the title of the HTML document.
44       *
45       */
46      public String   getTitle() throws IOException   {
47          try {
48              return getParser().getTitle();
49          } catch (InterruptedException   e) {
50              throw new IOException  (e.getMessage());
51          }
52      }
53  
54      /** Returns the keywords of the HTML document.
55       *
56       */
57      public String   getKeywords() throws IOException   {
58          try {
59              return getParser().getKeywords();
60          } catch (InterruptedException   e) {
61              throw new IOException  (e.getMessage());
62          }
63      }
64  
65      org.apache.lenya.lucene.html.HTMLParser parser;
66  
67      protected org.apache.lenya.lucene.html.HTMLParser getParser() {
68          return parser;
69      }
70  
71      /**
72       * DOCUMENT ME!
73       *
74       * @param file DOCUMENT ME!
75       *
76       * @throws ParseException DOCUMENT ME!
77       */
78      public void parse(File   file) throws ParseException {
79          try {
80              parser = new org.apache.lenya.lucene.html.HTMLParser(file);
81          } catch (FileNotFoundException   e) {
82              throw new ParseException(e);
83          }
84      }
85  
86      /**
87       * DOCUMENT ME!
88       *
89       * @param uri DOCUMENT ME!
90       *
91       * @throws ParseException DOCUMENT ME!
92       */
93      public void parse(URI   uri) throws ParseException {
94          try {
95              URLConnection   connection = uri.toURL().openConnection();
96              Reader   reader = new InputStreamReader  (connection.getInputStream());
97              parser = new org.apache.lenya.lucene.html.HTMLParser(reader);
98          } catch (MalformedURLException   e) {
99              throw new ParseException(e);
100         } catch (IOException   e) {
101             throw new ParseException(e);
102         }
103     }
104 }
105
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags