1 17 18 package org.ajaxanywhere.parser; 19 20 import javax.swing.text.MutableAttributeSet ; 21 import javax.swing.text.html.HTML ; 22 import javax.swing.text.html.HTMLEditorKit ; 23 import javax.swing.text.html.parser.ParserDelegator ; 24 import java.io.IOException ; 25 import java.io.Reader ; 26 import java.io.StringReader ; 27 import java.util.*; 28 import java.util.regex.Pattern ; 29 30 public class ResponseParserHTML implements ResponseParser { 31 private static ResponseParser ourInstance = new ResponseParserHTML(); 32 public static final Pattern SCRIPT_START_PATTERN = Pattern.compile("<script",Pattern.CASE_INSENSITIVE); 33 public static final Pattern SCRIPT_END_PATTERN = Pattern.compile("</script",Pattern.CASE_INSENSITIVE); 34 35 public static ResponseParser getInstance() { 36 return ourInstance; 37 } 38 39 private ResponseParserHTML() { 40 } 41 42 public ResponseBean parse(String html) { 43 47 String dummy = findDummy(html); 48 html = html.replaceAll("<>", "<" + dummy + ">").replaceAll("</>", "</" + dummy + ">"); 49 50 html = SCRIPT_START_PATTERN.matcher(html).replaceAll("<DIR"); 53 html = SCRIPT_END_PATTERN.matcher(html).replaceAll("</DIR"); 54 55 ResponseBean responseBean = doParse(html); 56 String htmlContent = responseBean.getHtmlContent(); 57 if (htmlContent != null) 58 responseBean.setHtmlContent(htmlContent.replaceAll("<" + dummy + ">", "<>").replaceAll("</" + dummy + ">", "</>")); 59 for (int i = 0; i < responseBean.getScriptContents().size(); i++) { 60 String s = (String ) responseBean.getScriptContents().get(i); 61 responseBean.getScriptContents().set(i, s.replaceAll("<" + dummy + ">", "<>").replaceAll("</" + dummy + ">", "</>")); 62 } 63 return responseBean; 64 } 65 66 private String findDummy(String html) { 67 String dummy; 68 do { 69 dummy = Double.toString(Math.random()); 70 } while (html.indexOf(dummy) != -1); 71 72 return dummy; 73 } 74 75 public ResponseBean doParse(final String html) { 76 77 ResponseBean res = new ResponseBean(); 78 try { 79 final StringBuffer contentHTML = new StringBuffer (); 80 final List scripts = res.getScriptContents(); 81 final Set images = res.getImages(); 82 83 HTMLEditorKit.ParserCallback callback = new HTMLEditorKit.ParserCallback () { 84 private boolean insideScript; 85 private StringBuffer scriptContent = new StringBuffer (); 86 private int lastStop; 87 88 private void append(String str) { 89 if (insideScript) { 90 scriptContent.append(str); 91 } else { 92 contentHTML.append(str); 93 } 94 } 95 96 public void appendSinceLastStop(int newPos) { 97 if (lastStop > newPos) 98 return; 99 append(html.substring(lastStop, newPos)); 100 lastStop = newPos; 101 } 102 103 private void flushScript() { 104 int posScriptEnd = scriptContent.indexOf(">"); 105 if (posScriptEnd == -1) 106 posScriptEnd = 0; 107 int posC1 = scriptContent.indexOf("<!--", posScriptEnd); 108 int posC11 = scriptContent.indexOf("<![CDATA[", posScriptEnd); 109 int posQ1 = scriptContent.indexOf("'", posScriptEnd); 110 int posQ2 = scriptContent.indexOf("\"", posScriptEnd); 111 112 if ((posC1 != -1) && (posQ2 == -1 || posC1 < posQ2) && (posQ1 == -1 || posC1 < posQ1)) 113 scriptContent.delete(posC1, posC1 + 4); 114 115 if ((posC11 != -1) && (posQ2 == -1 || posC11 < posQ2) && (posQ1 == -1 || posC11 < posQ1)) 116 scriptContent.delete(posC11, posC11 + 9); 117 118 119 posQ1 = scriptContent.lastIndexOf("'", posScriptEnd); 120 posQ2 = scriptContent.lastIndexOf("\"", posScriptEnd); 121 int posC2 = scriptContent.indexOf("-->", posScriptEnd); 122 int posC22 = scriptContent.indexOf("//]]>", posScriptEnd); 123 124 if ((posC2 != -1) && (posQ2 == -1 || posC2 > posQ2) && (posQ1 == -1 || posC2 > posQ1)) 125 scriptContent.delete(posC2, posC2 + 3); 126 if ((posC22 != -1) && (posQ2 == -1 || posC22 > posQ2) && (posQ1 == -1 || posC22 > posQ1)) 127 scriptContent.delete(posC22, posC22 + 5); 128 129 int len = scriptContent.length(); 130 if (len >0 && scriptContent.charAt(len -1) =='>'){ 131 int lastEndTagPos = scriptContent.lastIndexOf("</"); 132 if (lastEndTagPos!=-1) 133 scriptContent.setLength(lastEndTagPos); 134 } 135 scripts.add(scriptContent.toString()); 136 scriptContent.setLength(0); 137 } 138 139 public void handleText(char[] data, int pos) { 140 appendSinceLastStop(pos); 141 } 142 143 public void handleComment(char[] data, int pos) { 144 appendSinceLastStop(pos); 145 if (data == null) 146 flushScript(); 147 } 148 149 public void handleError(String errorMsg, int pos) { 150 151 } 152 153 public void handleEndTag(HTML.Tag tag, int pos) { 154 if (pos == -1) 155 return; 156 if (lastStop > pos) 157 return; 158 appendSinceLastStop(pos); 159 160 if (tag == HTML.Tag.DIR && insideScript) { 161 int posScriptEnd = html.indexOf('>', pos); 162 163 if (posScriptEnd != -1) 164 lastStop = posScriptEnd + 1; 165 166 insideScript = false; 167 flushScript(); 168 } 169 } 170 171 public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, int pos) { 172 handleStartTag(tag, attributes, pos); 173 } 174 175 public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int pos) { 176 appendSinceLastStop(pos); 177 if (tag == HTML.Tag.DIR) { 178 insideScript = true; 179 } else if (tag == HTML.Tag.IMG) { 180 images.add(attributes.getAttribute(HTML.Attribute.SRC)); 181 } 182 } 183 184 }; 185 186 Reader reader = new StringReader (html); 187 new ParserDelegator ().parse(reader, callback, false); 188 callback.handleComment(null, html.length()); 189 190 res.setHtmlContent(contentHTML.toString()); 191 return res; 192 193 } catch (IOException e) { 194 throw new RuntimeException (e.toString()); } 196 } 197 198 } 199 | Popular Tags |