KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > dspace > app > mediafilter > WordFilter


1 /*
2  * WordFilter.java
3  *
4  * Version: $Revision: 1.7 $
5  *
6  * Date: $Date: 2005/07/29 15:56:07 $
7  *
8  * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
9  * Institute of Technology. All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are
13  * met:
14  *
15  * - Redistributions of source code must retain the above copyright
16  * notice, this list of conditions and the following disclaimer.
17  *
18  * - Redistributions in binary form must reproduce the above copyright
19  * notice, this list of conditions and the following disclaimer in the
20  * documentation and/or other materials provided with the distribution.
21  *
22  * - Neither the name of the Hewlett-Packard Company nor the name of the
23  * Massachusetts Institute of Technology nor the names of their
24  * contributors may be used to endorse or promote products derived from
25  * this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
34  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
35  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
36  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
37  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38  * DAMAGE.
39  */

40 package org.dspace.app.mediafilter;
41
42 import java.io.ByteArrayInputStream JavaDoc;
43 import java.io.InputStream JavaDoc;
44
45 import org.textmining.text.extraction.WordExtractor;
46
47 /*
48  *
49  * to do: helpful error messages - can't find mediafilter.cfg - can't
50  * instantiate filter - bitstream format doesn't exist
51  *
52  */

53 public class WordFilter extends MediaFilter
54 {
55     public String JavaDoc getFilteredName(String JavaDoc oldFilename)
56     {
57         return oldFilename + ".txt";
58     }
59
60     /**
61      * @return String bundle name
62      *
63      */

64     public String JavaDoc getBundleName()
65     {
66         return "TEXT";
67     }
68
69     /**
70      * @return String bitstreamformat
71      */

72     public String JavaDoc getFormatString()
73     {
74         return "Text";
75     }
76
77     /**
78      * @return String description
79      */

80     public String JavaDoc getDescription()
81     {
82         return "Extracted text";
83     }
84
85     /**
86      * @param source
87      * source input stream
88      *
89      * @return InputStream the resulting input stream
90      */

91     public InputStream JavaDoc getDestinationStream(InputStream JavaDoc source)
92             throws Exception JavaDoc
93     {
94         // get input stream from bitstream
95
// pass to filter, get string back
96
WordExtractor e = new WordExtractor();
97         String JavaDoc extractedText = e.extractText(source);
98
99         // if verbose flag is set, print out extracted text
100
// to STDOUT
101
if (MediaFilterManager.isVerbose)
102         {
103             System.out.println(extractedText);
104         }
105
106         // generate an input stream with the extracted text
107
byte[] textBytes = extractedText.getBytes();
108         ByteArrayInputStream JavaDoc bais = new ByteArrayInputStream JavaDoc(textBytes);
109
110         return bais; // will this work? or will the byte array be out of scope?
111
}
112 }
113
Popular Tags