1 31 32 package org.opencms.search.extractors; 33 34 import java.io.StringReader ; 35 import java.util.regex.Pattern ; 36 37 import javax.swing.text.Document ; 38 import javax.swing.text.rtf.RTFEditorKit ; 39 40 49 public final class CmsExtractorRtf extends A_CmsTextExtractor { 50 51 52 private static final CmsExtractorRtf INSTANCE = new CmsExtractorRtf(); 53 54 55 private static final Pattern TS_REMOVE_PATTERN = Pattern.compile("\\{\\\\\\*\\\\ts[^\\}]*\\}", Pattern.DOTALL); 56 57 60 private CmsExtractorRtf() { 61 62 } 64 65 70 public static I_CmsTextExtractor getExtractor() { 71 72 return INSTANCE; 73 } 74 75 78 public I_CmsExtractionResult extractText(byte[] content, String encoding) throws Exception { 79 80 String input = new String (content); 82 83 input = TS_REMOVE_PATTERN.matcher(input).replaceAll(""); 87 88 RTFEditorKit rtfEditor = new RTFEditorKit (); 90 Document doc = rtfEditor.createDefaultDocument(); 91 rtfEditor.read(new StringReader (input), doc, 0); 92 93 String result = doc.getText(0, doc.getLength()); 94 result = removeControlChars(result); 95 96 return new CmsExtractionResult(result); 97 } 98 99 117 } | Popular Tags |