KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > alfresco > repo > content > transform > HtmlParserContentTransformer


1 /*
2  * Copyright (C) 2005 Alfresco, Inc.
3  *
4  * Licensed under the Mozilla Public License version 1.1
5  * with a permitted attribution clause. You may obtain a
6  * copy of the License at
7  *
8  * http://www.alfresco.org/legal/license.txt
9  *
10  * Unless required by applicable law or agreed to in writing,
11  * software distributed under the License is distributed on an
12  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13  * either express or implied. See the License for the specific
14  * language governing permissions and limitations under the
15  * License.
16  */

17 package org.alfresco.repo.content.transform;
18
19 import java.io.File JavaDoc;
20 import java.util.Map JavaDoc;
21
22 import org.alfresco.repo.content.MimetypeMap;
23 import org.alfresco.service.cmr.repository.ContentReader;
24 import org.alfresco.service.cmr.repository.ContentWriter;
25 import org.alfresco.util.TempFileProvider;
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.htmlparser.beans.StringBean;
29
30 /**
31  * @see http://htmlparser.sourceforge.net/
32  * @see org.htmlparser.beans.StringBean
33  *
34  * @author Derek Hulley
35  */

36 public class HtmlParserContentTransformer extends AbstractContentTransformer
37 {
38     private static final Log logger = LogFactory.getLog(HtmlParserContentTransformer.class);
39     
40     /**
41      * Only support HTML to TEXT.
42      */

43     public double getReliability(String JavaDoc sourceMimetype, String JavaDoc targetMimetype)
44     {
45         if (!MimetypeMap.MIMETYPE_HTML.equals(sourceMimetype) ||
46             !MimetypeMap.MIMETYPE_TEXT_PLAIN.equals(targetMimetype))
47         {
48             // only support HTML -> TEXT
49
return 0.0;
50         }
51         else
52         {
53             return 1.0;
54         }
55     }
56
57     public void transformInternal(ContentReader reader, ContentWriter writer, Map JavaDoc<String JavaDoc, Object JavaDoc> options)
58             throws Exception JavaDoc
59     {
60         // we can only work from a file
61
File JavaDoc htmlFile = TempFileProvider.createTempFile("HtmlParserContentTransformer_", ".html");
62         reader.getContent(htmlFile);
63         
64         // create the extractor
65
StringBean extractor = new StringBean();
66         extractor.setCollapse(false);
67         extractor.setLinks(false);
68         extractor.setReplaceNonBreakingSpaces(false);
69         extractor.setURL(htmlFile.getAbsolutePath());
70
71         // get the text
72
String JavaDoc text = extractor.getStrings();
73         // write it to the writer
74
writer.putContent(text);
75     }
76 }
77
Popular Tags