KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > parse > rtf > RTFParseFactory


1 /* Copyright (c) 2004 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.parse.rtf;
5
6 import net.nutch.parse.*;
7 import net.nutch.parse.ParseException;
8 import net.nutch.protocol.Content;
9
10 import java.io.ByteArrayInputStream JavaDoc;
11 import java.io.InputStreamReader JavaDoc;
12 import java.io.Reader JavaDoc;
13 import java.util.Properties JavaDoc;
14
15 import com.etranslate.tm.processing.rtf.RTFParser;
16
17 /**
18  * A parser for RTF documents
19  * @author Andy Hedges
20  */

21 public class RTFParseFactory implements Parser {
22
23   public Parse getParse(Content content) throws ParseException {
24     byte[] raw = content.getContent();
25     Reader JavaDoc reader = new InputStreamReader JavaDoc(new ByteArrayInputStream JavaDoc(raw));
26     RTFParserDelegateImpl delegate = new RTFParserDelegateImpl();
27     RTFParser rtfParser = null;
28     rtfParser = RTFParser.createParser(reader);
29     rtfParser.setNewLine("\n");
30     rtfParser.setDelegate(delegate);
31
32     try {
33       rtfParser.parse();
34     } catch (com.etranslate.tm.processing.rtf.ParseException e) {
35       throw new ParseException("Exception parsing RTF document", e);
36     }
37
38     Properties JavaDoc metadata = new Properties JavaDoc();
39     metadata.putAll(content.getMetadata());
40     metadata.putAll(delegate.getMetaData());
41     String JavaDoc title = metadata.getProperty("title");
42
43     if(title != null){
44       metadata.remove(title);
45     } else {
46       title = "";
47     }
48
49     ParseData parseData = new ParseData(title, new Outlink[0], metadata);
50
51     return new ParseImpl(delegate.getText(), parseData);
52   }
53
54
55 }
56
Popular Tags