KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > alfresco > repo > content > transform > StringExtractingContentTransformer


1 /*
2  * Copyright (C) 2005 Alfresco, Inc.
3  *
4  * Licensed under the Mozilla Public License version 1.1
5  * with a permitted attribution clause. You may obtain a
6  * copy of the License at
7  *
8  * http://www.alfresco.org/legal/license.txt
9  *
10  * Unless required by applicable law or agreed to in writing,
11  * software distributed under the License is distributed on an
12  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13  * either express or implied. See the License for the specific
14  * language governing permissions and limitations under the
15  * License.
16  */

17 package org.alfresco.repo.content.transform;
18
19 import java.io.InputStreamReader JavaDoc;
20 import java.io.OutputStreamWriter JavaDoc;
21 import java.io.Reader JavaDoc;
22 import java.io.Writer JavaDoc;
23 import java.util.Map JavaDoc;
24
25 import org.alfresco.repo.content.MimetypeMap;
26 import org.alfresco.service.cmr.repository.ContentReader;
27 import org.alfresco.service.cmr.repository.ContentWriter;
28 import org.apache.commons.logging.Log;
29 import org.apache.commons.logging.LogFactory;
30
31 /**
32  * Converts any textual format to plain text.
33  * <p>
34  * The transformation is sensitive to the source and target string encodings.
35  *
36  * @author Derek Hulley
37  */

38 public class StringExtractingContentTransformer extends AbstractContentTransformer
39 {
40     public static final String JavaDoc PREFIX_TEXT = "text/";
41     
42     private static final Log logger = LogFactory.getLog(StringExtractingContentTransformer.class);
43     
44     /**
45      * Gives a high reliability for all translations from <i>text/sometype</i> to
46      * <i>text/plain</i>. As the text formats are already text, the characters
47      * are preserved and no actual conversion takes place.
48      * <p>
49      * Extraction of text from binary data is wholly unreliable.
50      */

51     public double getReliability(String JavaDoc sourceMimetype, String JavaDoc targetMimetype)
52     {
53         if (!targetMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN))
54         {
55             // can only convert to plain text
56
return 0.0;
57         }
58         else if (sourceMimetype.equals(MimetypeMap.MIMETYPE_TEXT_PLAIN))
59         {
60             // conversions from any plain text format are very reliable
61
return 1.0;
62         }
63         else if (sourceMimetype.startsWith(PREFIX_TEXT))
64         {
65             // the source is text, but probably with some kind of markup
66
return 0.1;
67         }
68         else
69         {
70             // extracting text from binary is not useful
71
return 0.0;
72         }
73     }
74
75     /**
76      * Text to text conversions are done directly using the content reader and writer string
77      * manipulation methods.
78      * <p>
79      * Extraction of text from binary content attempts to take the possible character
80      * encoding into account. The text produced from this will, if the encoding was correct,
81      * be unformatted but valid.
82      */

83     @Override JavaDoc
84     public void transformInternal(ContentReader reader, ContentWriter writer, Map JavaDoc<String JavaDoc, Object JavaDoc> options)
85             throws Exception JavaDoc
86     {
87         // is this a straight text-text transformation
88
transformText(reader, writer);
89     }
90     
91     /**
92      * Transformation optimized for text-to-text conversion
93      */

94     private void transformText(ContentReader reader, ContentWriter writer) throws Exception JavaDoc
95     {
96         // get a char reader and writer
97
Reader JavaDoc charReader = null;
98         Writer JavaDoc charWriter = null;
99         try
100         {
101             if (reader.getEncoding() == null)
102             {
103                 charReader = new InputStreamReader JavaDoc(reader.getContentInputStream());
104             }
105             else
106             {
107                 charReader = new InputStreamReader JavaDoc(reader.getContentInputStream(), reader.getEncoding());
108             }
109             if (writer.getEncoding() == null)
110             {
111                 charWriter = new OutputStreamWriter JavaDoc(writer.getContentOutputStream());
112             }
113             else
114             {
115                 charWriter = new OutputStreamWriter JavaDoc(writer.getContentOutputStream(), writer.getEncoding());
116             }
117             // copy from the one to the other
118
char[] buffer = new char[1024];
119             int readCount = 0;
120             while (readCount > -1)
121             {
122                 // write the last read count number of bytes
123
charWriter.write(buffer, 0, readCount);
124                 // fill the buffer again
125
readCount = charReader.read(buffer);
126             }
127         }
128         finally
129         {
130             if (charReader != null)
131             {
132                 try { charReader.close(); } catch (Throwable JavaDoc e) { logger.error(e); }
133             }
134             if (charWriter != null)
135             {
136                 try { charWriter.close(); } catch (Throwable JavaDoc e) { logger.error(e); }
137             }
138         }
139         // done
140
}
141 }
142
Popular Tags