KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lenya > lucene > parser > PDFParserWrapper


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17
18 /* $Id: PDFParserWrapper.java 42598 2004-03-01 16:18:28Z gregor $ */
19
20 package org.apache.lenya.lucene.parser;
21
22 import java.io.File JavaDoc;
23 import java.io.FileNotFoundException JavaDoc;
24 import java.io.IOException JavaDoc;
25 import java.io.InputStreamReader JavaDoc;
26 import java.io.Reader JavaDoc;
27 import java.net.MalformedURLException JavaDoc;
28 import java.net.URI JavaDoc;
29 import java.net.URLConnection JavaDoc;
30
31 public class PDFParserWrapper extends AbstractHTMLParser {
32     /** Creates a new instance of PDFParserWrapper */
33     public PDFParserWrapper() {
34     }
35
36     /** Returns a reader that reads the contents of the HTML document.
37      *
38      */

39     public Reader JavaDoc getReader() throws IOException JavaDoc {
40         return getParser().getReader();
41     }
42
43     /** Returns the title of the HTML document.
44      *
45      */

46     public String JavaDoc getTitle() throws IOException JavaDoc {
47         try {
48             return getParser().getTitle();
49         } catch (InterruptedException JavaDoc e) {
50             throw new IOException JavaDoc(e.getMessage());
51         }
52     }
53
54     /** Returns the keywords of the HTML document.
55      *
56      */

57     public String JavaDoc getKeywords() throws IOException JavaDoc {
58         try {
59             return getParser().getKeywords();
60         } catch (InterruptedException JavaDoc e) {
61             throw new IOException JavaDoc(e.getMessage());
62         }
63     }
64
65     org.apache.lenya.lucene.html.HTMLParser parser;
66
67     protected org.apache.lenya.lucene.html.HTMLParser getParser() {
68         return parser;
69     }
70
71     /**
72      * DOCUMENT ME!
73      *
74      * @param file DOCUMENT ME!
75      *
76      * @throws ParseException DOCUMENT ME!
77      */

78     public void parse(File JavaDoc file) throws ParseException {
79         try {
80             parser = new org.apache.lenya.lucene.html.HTMLParser(file);
81         } catch (FileNotFoundException JavaDoc e) {
82             throw new ParseException(e);
83         }
84     }
85
86     /**
87      * DOCUMENT ME!
88      *
89      * @param uri DOCUMENT ME!
90      *
91      * @throws ParseException DOCUMENT ME!
92      */

93     public void parse(URI JavaDoc uri) throws ParseException {
94         try {
95             URLConnection JavaDoc connection = uri.toURL().openConnection();
96             Reader JavaDoc reader = new InputStreamReader JavaDoc(connection.getInputStream());
97             parser = new org.apache.lenya.lucene.html.HTMLParser(reader);
98         } catch (MalformedURLException JavaDoc e) {
99             throw new ParseException(e);
100         } catch (IOException JavaDoc e) {
101             throw new ParseException(e);
102         }
103     }
104 }
105
Popular Tags