KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > app > mediafilter > HTMLFilter


1 /*
2  * HTMLFilter.java
3  *
4  * Version: $Revision: 1.7 $
5  *
6  * Date: $Date: 2005/07/29 15:56:07 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40 package org.dspace.app.mediafilter;
41
42 import java.io.ByteArrayInputStream JavaDoc;
43 import java.io.InputStream JavaDoc;
44
45 import javax.swing.text.Document JavaDoc;
46 import javax.swing.text.html.HTMLEditorKit JavaDoc;
47
48 /*
49  *
50  * to do: helpful error messages - can't find mediafilter.cfg - can't
51  * instantiate filter - bitstream format doesn't exist
52  *
53  */

54 public class HTMLFilter extends MediaFilter
55 {
56
57     public String JavaDoc getFilteredName(String JavaDoc oldFilename)
58     {
59         return oldFilename + ".txt";
60     }
61
62     /**
63      * @return String bundle name
64      *
65      */

66     public String JavaDoc getBundleName()
67     {
68         return "TEXT";
69     }
70
71     /**
72      * @return String bitstreamformat
73      */

74     public String JavaDoc getFormatString()
75     {
76         return "Text";
77     }
78
79     /**
80      * @return String description
81      */

82     public String JavaDoc getDescription()
83     {
84         return "Extracted text";
85     }
86
87     /**
88      * @param source
89      * source input stream
90      *
91      * @return InputStream the resulting input stream
92      */

93     public InputStream JavaDoc getDestinationStream(InputStream JavaDoc source)
94             throws Exception JavaDoc
95     {
96         // try and read the document - set to ignore character set directive,
97
// assuming that the input stream is already set properly (I hope)
98
HTMLEditorKit JavaDoc kit = new HTMLEditorKit JavaDoc();
99         Document JavaDoc doc = kit.createDefaultDocument();
100
101         doc.putProperty("IgnoreCharsetDirective", new Boolean JavaDoc(true));
102
103         kit.read(source, doc, 0);
104
105         String JavaDoc extractedText = doc.getText(0, doc.getLength());
106
107         // generate an input stream with the extracted text
108
byte[] textBytes = extractedText.getBytes();
109         ByteArrayInputStream JavaDoc bais = new ByteArrayInputStream JavaDoc(textBytes);
110
111         return bais; // will this work? or will the byte array be out of scope?
112
}
113 }
114
Popular Tags