1 31 32 package org.opencms.util; 33 34 import org.opencms.jsp.parse.DivTag; 35 36 import java.util.ArrayList ; 37 import java.util.Arrays ; 38 import java.util.Iterator ; 39 import java.util.List ; 40 41 import org.htmlparser.Parser; 42 import org.htmlparser.PrototypicalNodeFactory; 43 import org.htmlparser.Remark; 44 import org.htmlparser.Tag; 45 import org.htmlparser.Text; 46 import org.htmlparser.lexer.Lexer; 47 import org.htmlparser.lexer.Page; 48 import org.htmlparser.tags.Div; 49 import org.htmlparser.util.ParserException; 50 import org.htmlparser.visitors.NodeVisitor; 51 52 67 public class CmsHtmlParser extends NodeVisitor implements I_CmsHtmlNodeVisitor { 68 69 70 private List m_noAutoCloseTags; 71 72 73 protected static final String [] TAG_ARRAY = new String [] { 77 "H1", 78 "H2", 79 "H3", 80 "H4", 81 "H5", 82 "H6", 83 "P", 84 "DIV", 85 "SPAN", 86 "BR", 87 "OL", 88 "UL", 89 "LI", 90 "TABLE", 91 "TD", 92 "TR", 93 "TH", 94 "THEAD", 95 "TBODY", 96 "TFOOT"}; 97 98 99 protected static final List TAG_LIST = Arrays.asList(TAG_ARRAY); 100 101 102 protected boolean m_echo; 103 104 105 protected StringBuffer m_result; 106 107 108 private String m_configuration = ""; 109 110 114 public CmsHtmlParser() { 115 116 this(false); 117 } 118 119 125 public CmsHtmlParser(boolean echo) { 126 127 m_result = new StringBuffer (1024); 128 m_echo = echo; 129 m_noAutoCloseTags = new ArrayList (32); 130 } 131 132 133 139 private PrototypicalNodeFactory configureNoAutoCorrectionTags() { 140 141 PrototypicalNodeFactory factory = new PrototypicalNodeFactory(); 142 143 String tagName; 144 Iterator it = m_noAutoCloseTags.iterator(); 145 Div div = new Div(); 146 List divNames = Arrays.asList(div.getIds()); 147 while (it.hasNext()) { 148 tagName = ((String )it.next()); 149 if (divNames.contains(tagName)) { 151 factory.unregisterTag(new Div()); 152 factory.registerTag(new DivTag()); 153 } 154 } 156 return factory; 157 } 158 159 162 public String getConfiguration() { 163 164 return m_configuration; 165 } 166 167 170 public String getResult() { 171 172 return m_result.toString(); 173 } 174 175 183 public String getTagHtml(Tag tag) { 184 185 StringBuffer result = new StringBuffer (32); 186 result.append('<'); 187 result.append(tag.getText()); 188 result.append('>'); 189 return result.toString(); 190 } 191 192 195 public String process(String html, String encoding) throws ParserException { 196 m_result = new StringBuffer (); 197 Parser parser = new Parser(); 198 Lexer lexer = new Lexer(); 199 200 Page page = new Page(html, encoding); 202 lexer.setPage(page); 203 parser.setLexer(lexer); 204 205 if (m_noAutoCloseTags != null && m_noAutoCloseTags.size() > 0) { 206 PrototypicalNodeFactory factory = configureNoAutoCorrectionTags(); 210 lexer.setNodeFactory(factory); 211 } 212 213 parser.visitAllNodesWith(this); 215 return getResult(); 217 } 218 219 223 public void setConfiguration(String configuration) { 224 225 if (CmsStringUtil.isNotEmpty(configuration)) { 226 m_configuration = configuration; 227 } 228 229 } 230 231 234 public void visitEndTag(Tag tag) { 235 236 if (m_echo) { 237 m_result.append(getTagHtml(tag)); 238 } 239 } 240 241 244 public void visitRemarkNode(Remark remark) { 245 246 if (m_echo) { 247 m_result.append(remark.toHtml()); 248 } 249 } 250 251 254 public void visitStringNode(Text text) { 255 256 if (m_echo) { 257 m_result.append(text.getText()); 258 } 259 } 260 261 264 public void visitTag(Tag tag) { 265 266 if (m_echo) { 267 m_result.append(getTagHtml(tag)); 268 } 269 } 270 271 279 protected String collapse(String string) { 280 281 int len = string.length(); 282 StringBuffer result = new StringBuffer (len); 283 int state = 0; 284 for (int i = 0; i < len; i++) { 285 char c = string.charAt(i); 286 switch (c) { 287 case '\u0020': 290 case '\u0009': 291 case '\u000C': 292 case '\u200B': 293 case '\r': 294 case '\n': 295 if (0 != state) { 296 state = 1; 297 } 298 break; 299 default: 300 if (1 == state) { 301 result.append(' '); 302 } 303 state = 2; 304 result.append(c); 305 } 306 } 307 return result.toString(); 308 } 309 310 316 public List getNoAutoCloseTags() { 317 318 return m_noAutoCloseTags; 319 } 320 321 327 public void setNoAutoCloseTags(List noAutoCloseTagList) { 328 329 m_noAutoCloseTags.clear(); 331 if (noAutoCloseTagList != null) { 332 Iterator it = noAutoCloseTagList.iterator(); 333 while (it.hasNext()) { 334 m_noAutoCloseTags.add(((String )it.next()).toUpperCase()); 335 } 336 } 337 } 338 } | Popular Tags |