1 11 package org.eclipse.jface.internal.text.link.contentassist; 12 13 14 import java.io.IOException ; 15 import java.io.PushbackReader ; 16 import java.io.Reader ; 17 import java.util.HashMap ; 18 import java.util.HashSet ; 19 import java.util.Map ; 20 import java.util.Set ; 21 22 import org.eclipse.swt.SWT; 23 import org.eclipse.swt.custom.StyleRange; 24 25 import org.eclipse.jface.text.TextPresentation; 26 27 28 32 public class HTML2TextReader extends SubstitutionTextReader { 33 34 private static final String EMPTY_STRING= ""; private static final Map fgEntityLookup; 36 private static final Set fgTags; 37 38 static { 39 40 fgTags= new HashSet (); 41 fgTags.add("b"); fgTags.add("br"); fgTags.add("br/"); fgTags.add("div"); fgTags.add("h1"); fgTags.add("h2"); fgTags.add("h3"); fgTags.add("h4"); fgTags.add("h5"); fgTags.add("p"); fgTags.add("dl"); fgTags.add("dt"); fgTags.add("dd"); fgTags.add("li"); fgTags.add("ul"); fgTags.add("pre"); fgTags.add("head"); 59 fgEntityLookup= new HashMap (7); 60 fgEntityLookup.put("lt", "<"); fgEntityLookup.put("gt", ">"); fgEntityLookup.put("nbsp", " "); fgEntityLookup.put("amp", "&"); fgEntityLookup.put("circ", "^"); fgEntityLookup.put("tilde", "~"); fgEntityLookup.put("quot", "\""); } 68 69 private int fCounter= 0; 70 private TextPresentation fTextPresentation; 71 private int fBold= 0; 72 private int fStartOffset= -1; 73 private boolean fInParagraph= false; 74 private boolean fIsPreformattedText= false; 75 private boolean fIgnore= false; 76 77 84 public HTML2TextReader(Reader reader, TextPresentation presentation) { 85 super(new PushbackReader (reader)); 86 fTextPresentation= presentation; 87 } 88 89 public int read() throws IOException { 90 int c= super.read(); 91 if (c != -1) 92 ++ fCounter; 93 return c; 94 } 95 96 protected void startBold() { 97 if (fBold == 0) 98 fStartOffset= fCounter; 99 ++ fBold; 100 } 101 102 protected void startPreformattedText() { 103 fIsPreformattedText= true; 104 setSkipWhitespace(false); 105 } 106 107 protected void stopPreformattedText() { 108 fIsPreformattedText= false; 109 setSkipWhitespace(true); 110 } 111 112 protected void stopBold() { 113 -- fBold; 114 if (fBold == 0) { 115 if (fTextPresentation != null) { 116 fTextPresentation.addStyleRange(new StyleRange(fStartOffset, fCounter - fStartOffset, null, null, SWT.BOLD)); 117 } 118 fStartOffset= -1; 119 } 120 } 121 122 125 protected String computeSubstitution(int c) throws IOException { 126 127 if (c == '<') 128 return processHTMLTag(); 129 else if (fIgnore) 130 return EMPTY_STRING; 131 else if (c == '&') 132 return processEntity(); 133 else if (fIsPreformattedText) 134 return processPreformattedText(c); 135 136 return null; 137 } 138 139 private String html2Text(String html) { 140 141 if (html == null || html.length() == 0) 142 return EMPTY_STRING; 143 144 html= html.toLowerCase(); 145 146 String tag= html; 147 if ('/' == tag.charAt(0)) 148 tag= tag.substring(1); 149 150 if (!fgTags.contains(tag)) 151 return EMPTY_STRING; 152 153 154 if ("pre".equals(html)) { startPreformattedText(); 156 return EMPTY_STRING; 157 } 158 159 if ("/pre".equals(html)) { stopPreformattedText(); 161 return EMPTY_STRING; 162 } 163 164 if (fIsPreformattedText) 165 return EMPTY_STRING; 166 167 if ("b".equals(html)) { startBold(); 169 return EMPTY_STRING; 170 } 171 172 if ((html.length() > 1 && html.charAt(0) == 'h' && Character.isDigit(html.charAt(1))) || "dt".equals(html)) { startBold(); 174 return EMPTY_STRING; 175 } 176 177 if ("dl".equals(html)) return LINE_DELIM; 179 180 if ("dd".equals(html)) return "\t"; 183 if ("li".equals(html)) return LINE_DELIM + ContentAssistMessages.getString("HTML2TextReader.listItemPrefix"); 187 if ("/b".equals(html)) { stopBold(); 189 return EMPTY_STRING; 190 } 191 192 if ("p".equals(html)) { fInParagraph= true; 194 return LINE_DELIM; 195 } 196 197 if ("br".equals(html) || "br/".equals(html) || "div".equals(html)) return LINE_DELIM; 199 200 if ("/p".equals(html)) { boolean inParagraph= fInParagraph; 202 fInParagraph= false; 203 return inParagraph ? EMPTY_STRING : LINE_DELIM; 204 } 205 206 if ((html.startsWith("/h") && html.length() > 2 && Character.isDigit(html.charAt(2))) || "/dt".equals(html)) { stopBold(); 208 return LINE_DELIM; 209 } 210 211 if ("/dd".equals(html)) return LINE_DELIM; 213 214 if ("head".equals(html)) { fIgnore= true; 216 return EMPTY_STRING; 217 } 218 219 if ("/head".equals(html)) { fIgnore= false; 221 return EMPTY_STRING; 222 } 223 224 return EMPTY_STRING; 225 } 226 227 230 private String processHTMLTag() throws IOException { 231 232 StringBuffer buf= new StringBuffer (); 233 int ch; 234 do { 235 236 ch= nextChar(); 237 238 while (ch != -1 && ch != '>') { 239 buf.append(Character.toLowerCase((char) ch)); 240 ch= nextChar(); 241 if (ch == '"'){ 242 buf.append(Character.toLowerCase((char) ch)); 243 ch= nextChar(); 244 while (ch != -1 && ch != '"'){ 245 buf.append(Character.toLowerCase((char) ch)); 246 ch= nextChar(); 247 } 248 } 249 if (ch == '<'){ 250 unread(ch); 251 return '<' + buf.toString(); 252 } 253 } 254 255 if (ch == -1) 256 return null; 257 258 int tagLen= buf.length(); 259 if ((tagLen >= 3 && "!--".equals(buf.substring(0, 3))) && !(tagLen >= 5 && "--".equals(buf.substring(tagLen - 2)))) { buf.append(ch); 264 } else { 265 break; 266 } 267 } while (true); 268 269 return html2Text(buf.toString()); 270 } 271 272 private String processPreformattedText(int c) { 273 if (c == '\r' || c == '\n') 274 fCounter++; 275 return null; 276 } 277 278 279 private void unread(int ch) throws IOException { 280 ((PushbackReader ) getReader()).unread(ch); 281 } 282 283 protected String entity2Text(String symbol) { 284 if (symbol.length() > 1 && symbol.charAt(0) == '#') { 285 int ch; 286 try { 287 if (symbol.charAt(1) == 'x') { 288 ch= Integer.parseInt(symbol.substring(2), 16); 289 } else { 290 ch= Integer.parseInt(symbol.substring(1), 10); 291 } 292 return EMPTY_STRING + (char)ch; 293 } catch (NumberFormatException e) { 294 } 295 } else { 296 String str= (String ) fgEntityLookup.get(symbol); 297 if (str != null) { 298 return str; 299 } 300 } 301 return "&" + symbol; } 303 304 307 private String processEntity() throws IOException { 308 StringBuffer buf= new StringBuffer (); 309 int ch= nextChar(); 310 while (Character.isLetterOrDigit((char)ch) || ch == '#') { 311 buf.append((char) ch); 312 ch= nextChar(); 313 } 314 315 if (ch == ';') 316 return entity2Text(buf.toString()); 317 318 buf.insert(0, '&'); 319 if (ch != -1) 320 buf.append((char) ch); 321 return buf.toString(); 322 } 323 } 324 | Popular Tags |