KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > ajaxanywhere > parser > ResponseParserHTML


1 /*
2 Copyright 2005 Vitaliy Shevchuk (shevit@users.sourceforge.net)
3
4    Licensed under the Apache License, Version 2.0 (the "License");
5    you may not use this file except in compliance with the License.
6    You may obtain a copy of the License at
7
8        http://www.apache.org/licenses/LICENSE-2.0
9
10    Unless required by applicable law or agreed to in writing, software
11    distributed under the License is distributed on an "AS IS" BASIS,
12    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13    See the License for the specific language governing permissions and
14    limitations under the License.
15
16 */

17
18 package org.ajaxanywhere.parser;
19
20 import javax.swing.text.MutableAttributeSet JavaDoc;
21 import javax.swing.text.html.HTML JavaDoc;
22 import javax.swing.text.html.HTMLEditorKit JavaDoc;
23 import javax.swing.text.html.parser.ParserDelegator JavaDoc;
24 import java.io.IOException JavaDoc;
25 import java.io.Reader JavaDoc;
26 import java.io.StringReader JavaDoc;
27 import java.util.*;
28 import java.util.regex.Pattern JavaDoc;
29
30 public class ResponseParserHTML implements ResponseParser {
31     private static ResponseParser ourInstance = new ResponseParserHTML();
32     public static final Pattern JavaDoc SCRIPT_START_PATTERN = Pattern.compile("<script",Pattern.CASE_INSENSITIVE);
33     public static final Pattern JavaDoc SCRIPT_END_PATTERN = Pattern.compile("</script",Pattern.CASE_INSENSITIVE);
34
35     public static ResponseParser getInstance() {
36         return ourInstance;
37     }
38
39     private ResponseParserHTML() {
40     }
41
42     public ResponseBean parse(String JavaDoc html) {
43         // javax.swing.text.html.parser bug workaround.
44
// Swing HTML parser interpretes incorrectly <> and </> substrings
45
// therefore we replace <> and </> with dummy strings and replace them back after parsing.
46

47         String JavaDoc dummy = findDummy(html);
48         html = html.replaceAll("<>", "<" + dummy + ">").replaceAll("</>", "</" + dummy + ">");
49
50         // JSE 6 regression workarpond : http://forum.java.sun.com/thread.jspa?threadID=5118473
51
// let's use a depricated DIR tag instead of SCRIPT
52
html = SCRIPT_START_PATTERN.matcher(html).replaceAll("<DIR");
53         html = SCRIPT_END_PATTERN.matcher(html).replaceAll("</DIR");
54
55         ResponseBean responseBean = doParse(html);
56         String JavaDoc htmlContent = responseBean.getHtmlContent();
57         if (htmlContent != null)
58             responseBean.setHtmlContent(htmlContent.replaceAll("<" + dummy + ">", "<>").replaceAll("</" + dummy + ">", "</>"));
59         for (int i = 0; i < responseBean.getScriptContents().size(); i++) {
60             String JavaDoc s = (String JavaDoc) responseBean.getScriptContents().get(i);
61             responseBean.getScriptContents().set(i, s.replaceAll("<" + dummy + ">", "<>").replaceAll("</" + dummy + ">", "</>"));
62         }
63         return responseBean;
64     }
65
66     private String JavaDoc findDummy(String JavaDoc html) {
67         String JavaDoc dummy;
68         do {
69             dummy = Double.toString(Math.random());
70         } while (html.indexOf(dummy) != -1);
71
72         return dummy;
73     }
74
75     public ResponseBean doParse(final String JavaDoc html) {
76
77         ResponseBean res = new ResponseBean();
78         try {
79             final StringBuffer JavaDoc contentHTML = new StringBuffer JavaDoc();
80             final List scripts = res.getScriptContents();
81             final Set JavaDoc images = res.getImages();
82
83             HTMLEditorKit.ParserCallback JavaDoc callback = new HTMLEditorKit.ParserCallback JavaDoc() {
84                 private boolean insideScript;
85                 private StringBuffer JavaDoc scriptContent = new StringBuffer JavaDoc();
86                 private int lastStop;
87
88                 private void append(String JavaDoc str) {
89                     if (insideScript) {
90                         scriptContent.append(str);
91                     } else {
92                         contentHTML.append(str);
93                     }
94                 }
95
96                 public void appendSinceLastStop(int newPos) {
97                     if (lastStop > newPos)
98                         return;
99                     append(html.substring(lastStop, newPos));
100                     lastStop = newPos;
101                 }
102
103                 private void flushScript() {
104                     int posScriptEnd = scriptContent.indexOf(">");
105                     if (posScriptEnd == -1)
106                         posScriptEnd = 0;
107                     int posC1 = scriptContent.indexOf("<!--", posScriptEnd);
108                     int posC11 = scriptContent.indexOf("<![CDATA[", posScriptEnd);
109                     int posQ1 = scriptContent.indexOf("'", posScriptEnd);
110                     int posQ2 = scriptContent.indexOf("\"", posScriptEnd);
111
112                     if ((posC1 != -1) && (posQ2 == -1 || posC1 < posQ2) && (posQ1 == -1 || posC1 < posQ1))
113                         scriptContent.delete(posC1, posC1 + 4);
114
115                     if ((posC11 != -1) && (posQ2 == -1 || posC11 < posQ2) && (posQ1 == -1 || posC11 < posQ1))
116                         scriptContent.delete(posC11, posC11 + 9);
117
118
119                     posQ1 = scriptContent.lastIndexOf("'", posScriptEnd);
120                     posQ2 = scriptContent.lastIndexOf("\"", posScriptEnd);
121                     int posC2 = scriptContent.indexOf("-->", posScriptEnd);
122                     int posC22 = scriptContent.indexOf("//]]>", posScriptEnd);
123
124                     if ((posC2 != -1) && (posQ2 == -1 || posC2 > posQ2) && (posQ1 == -1 || posC2 > posQ1))
125                         scriptContent.delete(posC2, posC2 + 3);
126                     if ((posC22 != -1) && (posQ2 == -1 || posC22 > posQ2) && (posQ1 == -1 || posC22 > posQ1))
127                         scriptContent.delete(posC22, posC22 + 5);
128
129                     int len = scriptContent.length();
130                     if (len >0 && scriptContent.charAt(len -1) =='>'){
131                         int lastEndTagPos = scriptContent.lastIndexOf("</");
132                         if (lastEndTagPos!=-1)
133                             scriptContent.setLength(lastEndTagPos);
134                     }
135                     scripts.add(scriptContent.toString());
136                     scriptContent.setLength(0);
137                 }
138
139                 public void handleText(char[] data, int pos) {
140                     appendSinceLastStop(pos);
141                 }
142
143                 public void handleComment(char[] data, int pos) {
144                     appendSinceLastStop(pos);
145                     if (data == null)
146                         flushScript();
147                 }
148
149                 public void handleError(String JavaDoc errorMsg, int pos) {
150
151                 }
152
153                 public void handleEndTag(HTML.Tag JavaDoc tag, int pos) {
154                     if (pos == -1)
155                         return;
156                     if (lastStop > pos)
157                         return;
158                     appendSinceLastStop(pos);
159
160                     if (tag == HTML.Tag.DIR && insideScript) { /** DIR is prevously replaced SCRIPT tag to **/
161                         int posScriptEnd = html.indexOf('>', pos);
162
163                         if (posScriptEnd != -1)
164                             lastStop = posScriptEnd + 1;
165
166                         insideScript = false;
167                         flushScript();
168                     }
169                 }
170
171                 public void handleSimpleTag(HTML.Tag JavaDoc tag, MutableAttributeSet JavaDoc attributes, int pos) {
172                     handleStartTag(tag, attributes, pos);
173                 }
174
175                 public void handleStartTag(HTML.Tag JavaDoc tag, MutableAttributeSet JavaDoc attributes, int pos) {
176                     appendSinceLastStop(pos);
177                     if (tag == HTML.Tag.DIR) { /** DIR is prevously replaced SCRIPT tag to **/
178                         insideScript = true;
179                     } else if (tag == HTML.Tag.IMG) {
180                         images.add(attributes.getAttribute(HTML.Attribute.SRC));
181                     }
182                 }
183
184             };
185
186             Reader JavaDoc reader = new StringReader JavaDoc(html);
187             new ParserDelegator JavaDoc().parse(reader, callback, false);
188             callback.handleComment(null, html.length());
189
190             res.setHtmlContent(contentHTML.toString());
191             return res;
192
193         } catch (IOException JavaDoc e) {
194             throw new RuntimeException JavaDoc(e.toString()); // this should never heppen
195
}
196     }
197
198 }
199
Popular Tags