1 11 package org.eclipse.ant.internal.ui.editor.derived; 12 13 14 import java.io.IOException ; 15 import java.io.PushbackReader ; 16 import java.io.Reader ; 17 import java.util.HashMap ; 18 import java.util.HashSet ; 19 import java.util.Map ; 20 import java.util.Set ; 21 22 import org.eclipse.jface.text.TextPresentation; 23 import org.eclipse.swt.SWT; 24 import org.eclipse.swt.custom.StyleRange; 25 26 32 public class HTML2TextReader extends SubstitutionTextReader { 33 34 private static final String EMPTY_STRING= ""; private static final Map fgEntityLookup; 36 private static final Set fgTags; 37 38 static { 39 40 fgTags= new HashSet (); 41 fgTags.add("b"); fgTags.add("br"); fgTags.add("h5"); fgTags.add("p"); fgTags.add("dl"); fgTags.add("dt"); fgTags.add("dd"); fgTags.add("li"); fgTags.add("ul"); fgTags.add("pre"); 52 fgEntityLookup= new HashMap (7); 53 fgEntityLookup.put("lt", "<"); fgEntityLookup.put("gt", ">"); fgEntityLookup.put("nbsp", " "); fgEntityLookup.put("amp", "&"); fgEntityLookup.put("circ", "^"); fgEntityLookup.put("tilde", "~"); fgEntityLookup.put("quot", "\""); } 61 62 private int fCounter= 0; 63 private TextPresentation fTextPresentation; 64 private int fBold= 0; 65 private int fStartOffset= -1; 66 private boolean fInParagraph= false; 67 private boolean fIsPreformattedText= false; 68 69 74 public HTML2TextReader(Reader reader, TextPresentation presentation) { 75 super(new PushbackReader (reader)); 76 fTextPresentation= presentation; 77 } 78 79 public int read() throws IOException { 80 int c= super.read(); 81 if (c != -1) 82 ++ fCounter; 83 return c; 84 } 85 86 protected void startBold() { 87 if (fBold == 0) 88 fStartOffset= fCounter; 89 ++ fBold; 90 } 91 92 protected void startPreformattedText() { 93 fIsPreformattedText= true; 94 setSkipWhitespace(false); 95 } 96 97 protected void stopPreformattedText() { 98 fIsPreformattedText= false; 99 setSkipWhitespace(true); 100 } 101 102 protected void stopBold() { 103 -- fBold; 104 if (fBold == 0) { 105 if (fTextPresentation != null) { 106 fTextPresentation.addStyleRange(new StyleRange(fStartOffset, fCounter - fStartOffset, null, null, SWT.BOLD)); 107 } 108 fStartOffset= -1; 109 } 110 } 111 112 115 protected String computeSubstitution(int c) throws IOException { 116 117 if (c == '<') 118 return processHTMLTag(); 119 else if (c == '&') 120 return processEntity(); 121 else if (fIsPreformattedText) 122 return processPreformattedText(c); 123 124 return null; 125 } 126 127 private String html2Text(String html) { 128 129 String tag= html; 130 if ('/' == tag.charAt(0)) 131 tag= tag.substring(1); 132 133 if (!fgTags.contains(tag)) 134 return EMPTY_STRING; 135 136 137 if ("pre".equals(html)) { startPreformattedText(); 139 return EMPTY_STRING; 140 } 141 142 if ("/pre".equals(html)) { stopPreformattedText(); 144 return EMPTY_STRING; 145 } 146 147 if (fIsPreformattedText) 148 return EMPTY_STRING; 149 150 if ("b".equals(html)) { startBold(); 152 return EMPTY_STRING; 153 } 154 155 if ("h5".equals(html) || "dt".equals(html)) { startBold(); 157 return EMPTY_STRING; 158 } 159 160 if ("dl".equals(html)) return LINE_DELIM; 162 163 if ("dd".equals(html)) return "\t"; 166 if ("li".equals(html)) return LINE_DELIM + "\t-"; 169 if ("/b".equals(html)) { stopBold(); 171 return EMPTY_STRING; 172 } 173 174 if ("p".equals(html)) { fInParagraph= true; 176 return LINE_DELIM; 177 } 178 179 if ("br".equals(html)) return LINE_DELIM; 181 182 if ("/p".equals(html)) { boolean inParagraph= fInParagraph; 184 fInParagraph= false; 185 return inParagraph ? EMPTY_STRING : LINE_DELIM; 186 } 187 188 if ("/h5".equals(html) || "/dt".equals(html)) { stopBold(); 190 return LINE_DELIM; 191 } 192 193 if ("/dd".equals(html)) return LINE_DELIM; 195 196 return EMPTY_STRING; 197 } 198 199 202 private String processHTMLTag() throws IOException { 203 204 StringBuffer buf= new StringBuffer (); 205 int ch; 206 do { 207 208 ch= nextChar(); 209 210 while (ch != -1 && ch != '>') { 211 buf.append(Character.toLowerCase((char) ch)); 212 ch= nextChar(); 213 if (ch == '"'){ 214 buf.append(Character.toLowerCase((char) ch)); 215 ch= nextChar(); 216 while (ch != -1 && ch != '"'){ 217 buf.append(Character.toLowerCase((char) ch)); 218 ch= nextChar(); 219 } 220 } 221 if (ch == '<'){ 222 unread(ch); 223 return '<' + buf.toString(); 224 } 225 } 226 227 if (ch == -1) 228 return null; 229 230 int tagLen= buf.length(); 231 if ((tagLen >= 3 && "!--".equals(buf.substring(0, 3))) && !(tagLen >= 5 && "--".equals(buf.substring(tagLen - 2)))) { buf.append(ch); 236 } else { 237 break; 238 } 239 } while (true); 240 241 return html2Text(buf.toString()); 242 } 243 244 private String processPreformattedText(int c) { 245 if (c == '\r' || c == '\n') 246 fCounter++; 247 return null; 248 } 249 250 251 private void unread(int ch) throws IOException { 252 ((PushbackReader ) getReader()).unread(ch); 253 } 254 255 protected String entity2Text(String symbol) { 256 if (symbol.length() > 1 && symbol.charAt(0) == '#') { 257 int ch; 258 try { 259 if (symbol.charAt(1) == 'x') { 260 ch= Integer.parseInt(symbol.substring(2), 16); 261 } else { 262 ch= Integer.parseInt(symbol.substring(1), 10); 263 } 264 return EMPTY_STRING + (char)ch; 265 } catch (NumberFormatException e) { 266 } 267 } else { 268 String str= (String ) fgEntityLookup.get(symbol); 269 if (str != null) { 270 return str; 271 } 272 } 273 return "&" + symbol; } 275 276 279 private String processEntity() throws IOException { 280 StringBuffer buf= new StringBuffer (); 281 int ch= nextChar(); 282 while (Character.isLetterOrDigit((char)ch) || ch == '#') { 283 buf.append((char) ch); 284 ch= nextChar(); 285 } 286 287 if (ch == ';') 288 return entity2Text(buf.toString()); 289 290 buf.insert(0, '&'); 291 if (ch != -1) 292 buf.append((char) ch); 293 return buf.toString(); 294 } 295 } 296 | Popular Tags |