1 2 package org.opencms.util; 3 4 5 import java.util.HashMap ; 6 import java.util.Iterator ; 7 import java.util.List ; 8 import java.util.Map ; 9 10 import org.htmlparser.Tag; 11 import org.htmlparser.Text; 12 import org.htmlparser.util.Translate; 13 14 17 public class CmsHtml2TextConverter extends CmsHtmlParser { 18 19 20 private boolean m_appendBr; 21 22 23 private Map m_attributeMap; 24 25 26 private int m_brCount; 27 28 29 private int m_indent; 30 31 32 private int m_lineLength; 33 34 35 private String m_marker; 36 37 38 private int m_maxLineLength; 39 40 41 private int m_storedBrCount; 42 43 46 public CmsHtml2TextConverter() { 47 48 m_result = new StringBuffer (512); 49 m_maxLineLength = 100; 50 m_attributeMap = new HashMap (16); 51 } 52 53 63 public static String html2text(String html, String encoding) throws Exception { 64 65 CmsHtml2TextConverter visitor = new CmsHtml2TextConverter(); 67 return visitor.process(html, encoding); 68 } 69 70 73 public void visitEndTag(Tag tag) { 74 75 m_appendBr = false; 76 appendLinebreaks(tag, false); 77 String attribute = (String )m_attributeMap.remove(tag.getParent()); 78 if (attribute != null) { 79 appendText(attribute); 80 } 81 } 82 83 86 public void visitStringNode(Text text) { 87 88 appendText(text.toPlainTextString()); 89 } 90 91 94 public void visitTag(Tag tag) { 95 96 m_appendBr = true; 97 appendLinebreaks(tag, true); 98 99 if (tag.getTagName().equals("IMG")) { 100 appendText("##IMG##"); 101 } 102 103 String href = tag.getAttribute("href"); 104 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(href)) { 105 appendAttribute(tag, " [" + href.trim() + "]"); 106 } 107 String src = tag.getAttribute("src"); 108 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(src)) { 109 appendAttribute(tag, " [" + src.trim() + "]"); 110 } 111 String title = tag.getAttribute("title"); 112 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(title)) { 113 appendAttribute(tag, " {" + title.trim() + "}"); 114 } 115 String alt = tag.getAttribute("alt"); 116 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(alt)) { 117 appendAttribute(tag, " {" + alt.trim() + "}"); 118 } 119 } 120 121 private void appendAttribute(Tag tag, String text) { 122 123 if (tag.getTagName().equals("IMG")) { 124 appendText(text); 125 } else { 126 String current = (String )m_attributeMap.get(tag); 127 if (current != null) { 128 text = current + text; 129 } 130 m_attributeMap.put(tag, text); 131 } 132 } 133 134 private void appendIndentation() { 135 136 if (m_lineLength <= m_indent) { 137 int len = (m_marker != null) ? m_indent - (m_marker.length() + 1) : m_indent; 138 for (int i = 0; i < len; i++) { 139 m_result.append(' '); 140 } 141 if (m_marker != null) { 142 m_result.append(m_marker); 143 m_result.append(' '); 144 m_marker = null; 145 } 146 } 147 } 148 149 private void appendLinebreak(int count) { 150 151 appendLinebreak(count, false); 152 } 153 154 private void appendLinebreak(int count, boolean force) { 155 156 if (m_appendBr) { 157 if (m_storedBrCount > count) { 158 count = m_storedBrCount; 159 } 160 m_storedBrCount = 0; 161 if (force) { 162 m_brCount = 0; 163 } 164 while (m_brCount < count) { 165 m_result.append("\r\n"); 166 m_brCount++; 167 } 168 m_lineLength = m_indent; 169 } else { 170 while (m_storedBrCount < count) { 171 m_storedBrCount++; 172 } 173 } 174 } 175 176 private void appendLinebreaks(Tag tag, boolean open) { 177 178 String name = tag.getTagName(); 179 int pos = TAG_LIST.indexOf(name); 180 181 switch (pos) { 182 case 0: setMarker("=", open); 184 setIndentation(2, open); 185 appendLinebreak(2); 186 break; 187 case 1: setMarker("==", open); 189 setIndentation(3, open); 190 appendLinebreak(2); 191 break; 192 case 2: setMarker("===", open); 194 setIndentation(4, open); 195 appendLinebreak(2); 196 break; 197 case 3: setMarker("====", open); 199 setIndentation(5, open); 200 appendLinebreak(2); 201 break; 202 case 4: setMarker("=====", open); 204 setIndentation(6, open); 205 appendLinebreak(2); 206 break; 207 case 5: setMarker("=======", open); 209 setIndentation(7, open); 210 appendLinebreak(2); 211 break; 212 case 6: case 7: appendLinebreak(2); 215 break; 216 case 8: break; 218 case 9: appendLinebreak(1, true); 220 break; 221 case 10: case 11: appendLinebreak(2); 224 break; 225 case 12: setMarker("*", open); 227 setIndentation(5, open); 228 appendLinebreak(1); 229 break; 230 case 13: setIndentation(5, open); 232 appendLinebreak(2); 233 if (open) { 234 appendLinebreak(1); 235 appendText("-----"); 236 appendLinebreak(1); 237 } 238 break; 239 case 14: setMarker("--", open); 241 appendLinebreak(2); 242 break; 243 case 15: if (!open) { 245 appendLinebreak(1); 246 appendText("-----"); 247 appendLinebreak(1); 248 } 249 break; 250 case 16: case 17: case 18: case 19: appendLinebreak(1); 255 break; 256 default: } 258 } 259 260 private void appendText(String text) { 261 262 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(text)) { 263 text = Translate.decode(text); 264 text = collapse(text); 265 } 266 if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(text)) { 267 268 if (m_storedBrCount > 0) { 269 m_appendBr = true; 270 appendLinebreak(m_storedBrCount); 271 } 272 appendIndentation(); 273 m_brCount = 0; 274 275 List wordList = CmsStringUtil.splitAsList(text, ' '); 276 Iterator i = wordList.iterator(); 277 while (i.hasNext()) { 278 String word = (String )i.next(); 279 boolean hasNbsp = ((word.charAt(0) == 160) || (word.charAt(word.length() - 1) == 160)); 280 if ((word.length() + 1 + m_lineLength) > m_maxLineLength) { 281 m_appendBr = true; 282 appendLinebreak(1); 283 appendIndentation(); 284 m_brCount = 0; 285 } else { 286 if (!hasNbsp 287 && (m_lineLength > m_indent) 288 && (m_result.charAt(m_result.length() - 1) != 160) 289 && (m_result.charAt(m_result.length() - 1) != 32)) { 290 291 m_result.append(' '); 292 m_lineLength++; 293 } 294 } 295 m_result.append(word); 296 m_lineLength += word.length(); 297 } 298 } 299 } 300 301 private void setIndentation(int length, boolean open) { 302 303 if (open) { 304 m_indent += length; 305 } else { 306 m_indent -= length; 307 if (m_indent < 0) { 308 m_indent = 0; 309 } 310 } 311 } 312 313 private void setMarker(String marker, boolean open) { 314 315 if (open) { 316 m_marker = marker; 317 } 318 } 319 } | Popular Tags |