1 11 package org.eclipse.jdt.internal.ui.text; 12 13 import java.io.IOException ; 14 import java.io.PushbackReader ; 15 import java.io.Reader ; 16 import java.util.HashMap ; 17 import java.util.HashSet ; 18 import java.util.Map ; 19 import java.util.Set ; 20 21 import org.eclipse.swt.SWT; 22 import org.eclipse.swt.custom.StyleRange; 23 24 import org.eclipse.jface.text.TextPresentation; 25 26 import org.eclipse.jdt.internal.ui.JavaUIMessages; 27 28 29 33 public class HTML2TextReader extends SubstitutionTextReader { 34 35 private static final String EMPTY_STRING= ""; private static final Map fgEntityLookup; 37 private static final Set fgTags; 38 39 static { 40 41 fgTags= new HashSet (); 42 fgTags.add("b"); fgTags.add("br"); fgTags.add("br/"); fgTags.add("div"); fgTags.add("h1"); fgTags.add("h2"); fgTags.add("h3"); fgTags.add("h4"); fgTags.add("h5"); fgTags.add("p"); fgTags.add("dl"); fgTags.add("dt"); fgTags.add("dd"); fgTags.add("li"); fgTags.add("ul"); fgTags.add("pre"); fgTags.add("head"); 60 fgEntityLookup= new HashMap (7); 61 fgEntityLookup.put("lt", "<"); fgEntityLookup.put("gt", ">"); fgEntityLookup.put("nbsp", " "); fgEntityLookup.put("amp", "&"); fgEntityLookup.put("circ", "^"); fgEntityLookup.put("tilde", "~"); fgEntityLookup.put("quot", "\""); } 69 70 private int fCounter= 0; 71 private TextPresentation fTextPresentation; 72 private int fBold= 0; 73 private int fStartOffset= -1; 74 private boolean fInParagraph= false; 75 private boolean fIsPreformattedText= false; 76 private boolean fIgnore= false; 77 78 85 public HTML2TextReader(Reader reader, TextPresentation presentation) { 86 super(new PushbackReader (reader)); 87 fTextPresentation= presentation; 88 } 89 90 public int read() throws IOException { 91 int c= super.read(); 92 if (c != -1) 93 ++ fCounter; 94 return c; 95 } 96 97 protected void startBold() { 98 if (fBold == 0) 99 fStartOffset= fCounter; 100 ++ fBold; 101 } 102 103 protected void startPreformattedText() { 104 fIsPreformattedText= true; 105 setSkipWhitespace(false); 106 } 107 108 protected void stopPreformattedText() { 109 fIsPreformattedText= false; 110 setSkipWhitespace(true); 111 } 112 113 protected void stopBold() { 114 -- fBold; 115 if (fBold == 0) { 116 if (fTextPresentation != null) { 117 fTextPresentation.addStyleRange(new StyleRange(fStartOffset, fCounter - fStartOffset, null, null, SWT.BOLD)); 118 } 119 fStartOffset= -1; 120 } 121 } 122 123 126 protected String computeSubstitution(int c) throws IOException { 127 128 if (c == '<') 129 return processHTMLTag(); 130 else if (fIgnore) 131 return EMPTY_STRING; 132 else if (c == '&') 133 return processEntity(); 134 else if (fIsPreformattedText) 135 return processPreformattedText(c); 136 137 return null; 138 } 139 140 private String html2Text(String html) { 141 142 if (html == null || html.length() == 0) 143 return EMPTY_STRING; 144 145 html= html.toLowerCase(); 146 147 String tag= html; 148 if ('/' == tag.charAt(0)) 149 tag= tag.substring(1); 150 151 if (!fgTags.contains(tag)) 152 return EMPTY_STRING; 153 154 155 if ("pre".equals(html)) { startPreformattedText(); 157 return EMPTY_STRING; 158 } 159 160 if ("/pre".equals(html)) { stopPreformattedText(); 162 return EMPTY_STRING; 163 } 164 165 if (fIsPreformattedText) 166 return EMPTY_STRING; 167 168 if ("b".equals(html)) { startBold(); 170 return EMPTY_STRING; 171 } 172 173 if ((html.length() > 1 && html.charAt(0) == 'h' && Character.isDigit(html.charAt(1))) || "dt".equals(html)) { startBold(); 175 return EMPTY_STRING; 176 } 177 178 if ("dl".equals(html)) return LINE_DELIM; 180 181 if ("dd".equals(html)) return "\t"; 184 if ("li".equals(html)) return LINE_DELIM + JavaUIMessages.HTML2TextReader_listItemPrefix; 187 188 if ("/b".equals(html)) { stopBold(); 190 return EMPTY_STRING; 191 } 192 193 if ("p".equals(html)) { fInParagraph= true; 195 return LINE_DELIM; 196 } 197 198 if ("br".equals(html) || "br/".equals(html) || "div".equals(html)) return LINE_DELIM; 200 201 if ("/p".equals(html)) { boolean inParagraph= fInParagraph; 203 fInParagraph= false; 204 return inParagraph ? EMPTY_STRING : LINE_DELIM; 205 } 206 207 if ((html.startsWith("/h") && html.length() > 2 && Character.isDigit(html.charAt(2))) || "/dt".equals(html)) { stopBold(); 209 return LINE_DELIM; 210 } 211 212 if ("/dd".equals(html)) return LINE_DELIM; 214 215 if ("head".equals(html)) { fIgnore= true; 217 return EMPTY_STRING; 218 } 219 220 if ("/head".equals(html)) { fIgnore= false; 222 return EMPTY_STRING; 223 } 224 225 return EMPTY_STRING; 226 } 227 228 231 private String processHTMLTag() throws IOException { 232 233 StringBuffer buf= new StringBuffer (); 234 int ch; 235 do { 236 237 ch= nextChar(); 238 239 while (ch != -1 && ch != '>') { 240 buf.append(Character.toLowerCase((char) ch)); 241 ch= nextChar(); 242 if (ch == '"'){ 243 buf.append(Character.toLowerCase((char) ch)); 244 ch= nextChar(); 245 while (ch != -1 && ch != '"'){ 246 buf.append(Character.toLowerCase((char) ch)); 247 ch= nextChar(); 248 } 249 } 250 if (ch == '<'){ 251 unread(ch); 252 return '<' + buf.toString(); 253 } 254 } 255 256 if (ch == -1) 257 return null; 258 259 int tagLen= buf.length(); 260 if ((tagLen >= 3 && "!--".equals(buf.substring(0, 3))) && !(tagLen >= 5 && "--".equals(buf.substring(tagLen - 2)))) { buf.append(ch); 265 } else { 266 break; 267 } 268 } while (true); 269 270 return html2Text(buf.toString()); 271 } 272 273 private String processPreformattedText(int c) { 274 if (c == '\r' || c == '\n') 275 fCounter++; 276 return null; 277 } 278 279 280 private void unread(int ch) throws IOException { 281 ((PushbackReader ) getReader()).unread(ch); 282 } 283 284 protected String entity2Text(String symbol) { 285 if (symbol.length() > 1 && symbol.charAt(0) == '#') { 286 int ch; 287 try { 288 if (symbol.charAt(1) == 'x') { 289 ch= Integer.parseInt(symbol.substring(2), 16); 290 } else { 291 ch= Integer.parseInt(symbol.substring(1), 10); 292 } 293 return EMPTY_STRING + (char)ch; 294 } catch (NumberFormatException e) { 295 } 296 } else { 297 String str= (String ) fgEntityLookup.get(symbol); 298 if (str != null) { 299 return str; 300 } 301 } 302 return "&" + symbol; } 304 305 308 private String processEntity() throws IOException { 309 StringBuffer buf= new StringBuffer (); 310 int ch= nextChar(); 311 while (Character.isLetterOrDigit((char)ch) || ch == '#') { 312 buf.append((char) ch); 313 ch= nextChar(); 314 } 315 316 if (ch == ';') 317 return entity2Text(buf.toString()); 318 319 buf.insert(0, '&'); 320 if (ch != -1) 321 buf.append((char) ch); 322 return buf.toString(); 323 } 324 } 325 | Popular Tags |