1 /*2 * Copyright 1999-2004 The Apache Software Foundation3 *4 * Licensed under the Apache License, Version 2.0 (the "License");5 * you may not use this file except in compliance with the License.6 * You may obtain a copy of the License at7 *8 * http://www.apache.org/licenses/LICENSE-2.09 *10 * Unless required by applicable law or agreed to in writing, software11 * distributed under the License is distributed on an "AS IS" BASIS,12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.13 * See the License for the specific language governing permissions and14 * limitations under the License.15 *16 */17 18 /* $Id: PDFParserWrapper.java 42598 2004-03-01 16:18:28Z gregor $ */19 20 package org.apache.lenya.lucene.parser;21 22 import java.io.File ;23 import java.io.FileNotFoundException ;24 import java.io.IOException ;25 import java.io.InputStreamReader ;26 import java.io.Reader ;27 import java.net.MalformedURLException ;28 import java.net.URI ;29 import java.net.URLConnection ;30 31 public class PDFParserWrapper extends AbstractHTMLParser {32 /** Creates a new instance of PDFParserWrapper */33 public PDFParserWrapper() {34 }35 36 /** Returns a reader that reads the contents of the HTML document.37 *38 */39 public Reader getReader() throws IOException {40 return getParser().getReader();41 }42 43 /** Returns the title of the HTML document.44 *45 */46 public String getTitle() throws IOException {47 try {48 return getParser().getTitle();49 } catch (InterruptedException e) {50 throw new IOException (e.getMessage());51 }52 }53 54 /** Returns the keywords of the HTML document.55 *56 */57 public String getKeywords() throws IOException {58 try {59 return getParser().getKeywords();60 } catch (InterruptedException e) {61 throw new IOException (e.getMessage());62 }63 }64 65 org.apache.lenya.lucene.html.HTMLParser parser;66 67 protected org.apache.lenya.lucene.html.HTMLParser getParser() {68 return parser;69 }70 71 /**72 * DOCUMENT ME!73 *74 * @param file DOCUMENT ME!75 *76 * @throws ParseException DOCUMENT ME!77 */78 public void parse(File file) throws ParseException {79 try {80 parser = new org.apache.lenya.lucene.html.HTMLParser(file);81 } catch (FileNotFoundException e) {82 throw new ParseException(e);83 }84 }85 86 /**87 * DOCUMENT ME!88 *89 * @param uri DOCUMENT ME!90 *91 * @throws ParseException DOCUMENT ME!92 */93 public void parse(URI uri) throws ParseException {94 try {95 URLConnection connection = uri.toURL().openConnection();96 Reader reader = new InputStreamReader (connection.getInputStream());97 parser = new org.apache.lenya.lucene.html.HTMLParser(reader);98 } catch (MalformedURLException e) {99 throw new ParseException(e);100 } catch (IOException e) {101 throw new ParseException(e);102 }103 }104 }105