KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > ant > internal > ui > editor > derived > HTML2TextReader


1 /*******************************************************************************
2  * Copyright (c) 2000, 2005 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.ant.internal.ui.editor.derived;
12
13
14 import java.io.IOException JavaDoc;
15 import java.io.PushbackReader JavaDoc;
16 import java.io.Reader JavaDoc;
17 import java.util.HashMap JavaDoc;
18 import java.util.HashSet JavaDoc;
19 import java.util.Map JavaDoc;
20 import java.util.Set JavaDoc;
21
22 import org.eclipse.jface.text.TextPresentation;
23 import org.eclipse.swt.SWT;
24 import org.eclipse.swt.custom.StyleRange;
25
26 /**
27  * Reads the text contents from a reader of HTML contents and translates
28  * the tags or cut them out.
29  *
30  * @see org.eclipse.jdt.internal.ui.text.HTML2TextReader
31  */

32 public class HTML2TextReader extends SubstitutionTextReader {
33     
34     private static final String JavaDoc EMPTY_STRING= ""; //$NON-NLS-1$
35
private static final Map JavaDoc fgEntityLookup;
36     private static final Set JavaDoc fgTags;
37     
38     static {
39         
40         fgTags= new HashSet JavaDoc();
41         fgTags.add("b"); //$NON-NLS-1$
42
fgTags.add("br"); //$NON-NLS-1$
43
fgTags.add("h5"); //$NON-NLS-1$
44
fgTags.add("p"); //$NON-NLS-1$
45
fgTags.add("dl"); //$NON-NLS-1$
46
fgTags.add("dt"); //$NON-NLS-1$
47
fgTags.add("dd"); //$NON-NLS-1$
48
fgTags.add("li"); //$NON-NLS-1$
49
fgTags.add("ul"); //$NON-NLS-1$
50
fgTags.add("pre"); //$NON-NLS-1$
51

52         fgEntityLookup= new HashMap JavaDoc(7);
53         fgEntityLookup.put("lt", "<"); //$NON-NLS-1$ //$NON-NLS-2$
54
fgEntityLookup.put("gt", ">"); //$NON-NLS-1$ //$NON-NLS-2$
55
fgEntityLookup.put("nbsp", " "); //$NON-NLS-1$ //$NON-NLS-2$
56
fgEntityLookup.put("amp", "&"); //$NON-NLS-1$ //$NON-NLS-2$
57
fgEntityLookup.put("circ", "^"); //$NON-NLS-1$ //$NON-NLS-2$
58
fgEntityLookup.put("tilde", "~"); //$NON-NLS-2$ //$NON-NLS-1$
59
fgEntityLookup.put("quot", "\""); //$NON-NLS-1$ //$NON-NLS-2$
60
}
61     
62     private int fCounter= 0;
63     private TextPresentation fTextPresentation;
64     private int fBold= 0;
65     private int fStartOffset= -1;
66     private boolean fInParagraph= false;
67     private boolean fIsPreformattedText= false;
68     
69     /**
70      * Transforms the html text from the reader to formatted text.
71      * @param presentation If not <code>null</code>, formattings will be applied to
72      * the presentation.
73     */

74     public HTML2TextReader(Reader JavaDoc reader, TextPresentation presentation) {
75         super(new PushbackReader JavaDoc(reader));
76         fTextPresentation= presentation;
77     }
78     
79     public int read() throws IOException JavaDoc {
80         int c= super.read();
81         if (c != -1)
82             ++ fCounter;
83         return c;
84     }
85     
86     protected void startBold() {
87         if (fBold == 0)
88             fStartOffset= fCounter;
89         ++ fBold;
90     }
91
92     protected void startPreformattedText() {
93         fIsPreformattedText= true;
94         setSkipWhitespace(false);
95     }
96
97     protected void stopPreformattedText() {
98         fIsPreformattedText= false;
99         setSkipWhitespace(true);
100     }
101     
102     protected void stopBold() {
103         -- fBold;
104         if (fBold == 0) {
105             if (fTextPresentation != null) {
106                 fTextPresentation.addStyleRange(new StyleRange(fStartOffset, fCounter - fStartOffset, null, null, SWT.BOLD));
107             }
108             fStartOffset= -1;
109         }
110     }
111     
112     /**
113      * @see SubstitutionTextReader#computeSubstitution(char)
114      */

115     protected String JavaDoc computeSubstitution(int c) throws IOException JavaDoc {
116         
117         if (c == '<')
118             return processHTMLTag();
119         else if (c == '&')
120             return processEntity();
121         else if (fIsPreformattedText)
122             return processPreformattedText(c);
123         
124         return null;
125     }
126
127     private String JavaDoc html2Text(String JavaDoc html) {
128         
129         String JavaDoc tag= html;
130         if ('/' == tag.charAt(0))
131             tag= tag.substring(1);
132             
133         if (!fgTags.contains(tag))
134             return EMPTY_STRING;
135
136
137         if ("pre".equals(html)) { //$NON-NLS-1$
138
startPreformattedText();
139             return EMPTY_STRING;
140         }
141
142         if ("/pre".equals(html)) { //$NON-NLS-1$
143
stopPreformattedText();
144             return EMPTY_STRING;
145         }
146
147         if (fIsPreformattedText)
148             return EMPTY_STRING;
149
150         if ("b".equals(html)) { //$NON-NLS-1$
151
startBold();
152             return EMPTY_STRING;
153         }
154                 
155         if ("h5".equals(html) || "dt".equals(html)) { //$NON-NLS-1$ //$NON-NLS-2$
156
startBold();
157             return EMPTY_STRING;
158         }
159         
160         if ("dl".equals(html)) //$NON-NLS-1$
161
return LINE_DELIM;
162         
163         if ("dd".equals(html)) //$NON-NLS-1$
164
return "\t"; //$NON-NLS-1$
165

166         if ("li".equals(html)) //$NON-NLS-1$
167
return LINE_DELIM + "\t-"; //$NON-NLS-1$
168

169         if ("/b".equals(html)) { //$NON-NLS-1$
170
stopBold();
171             return EMPTY_STRING;
172         }
173
174         if ("p".equals(html)) { //$NON-NLS-1$
175
fInParagraph= true;
176             return LINE_DELIM;
177         }
178
179         if ("br".equals(html)) //$NON-NLS-1$
180
return LINE_DELIM;
181         
182         if ("/p".equals(html)) { //$NON-NLS-1$
183
boolean inParagraph= fInParagraph;
184             fInParagraph= false;
185             return inParagraph ? EMPTY_STRING : LINE_DELIM;
186         }
187             
188         if ("/h5".equals(html) || "/dt".equals(html)) { //$NON-NLS-1$ //$NON-NLS-2$
189
stopBold();
190             return LINE_DELIM;
191         }
192         
193         if ("/dd".equals(html)) //$NON-NLS-1$
194
return LINE_DELIM;
195                 
196         return EMPTY_STRING;
197     }
198     
199     /*
200      * A '<' has been read. Process a html tag
201      */

202     private String JavaDoc processHTMLTag() throws IOException JavaDoc {
203         
204         StringBuffer JavaDoc buf= new StringBuffer JavaDoc();
205         int ch;
206         do {
207             
208             ch= nextChar();
209             
210             while (ch != -1 && ch != '>') {
211                 buf.append(Character.toLowerCase((char) ch));
212                 ch= nextChar();
213                 if (ch == '"'){
214                     buf.append(Character.toLowerCase((char) ch));
215                     ch= nextChar();
216                     while (ch != -1 && ch != '"'){
217                         buf.append(Character.toLowerCase((char) ch));
218                         ch= nextChar();
219                     }
220                 }
221                 if (ch == '<'){
222                     unread(ch);
223                     return '<' + buf.toString();
224                 }
225             }
226             
227             if (ch == -1)
228                 return null;
229             
230             int tagLen= buf.length();
231             // needs special treatment for comments
232
if ((tagLen >= 3 && "!--".equals(buf.substring(0, 3))) //$NON-NLS-1$
233
&& !(tagLen >= 5 && "--".equals(buf.substring(tagLen - 2)))) { //$NON-NLS-1$
234
// unfinished comment
235
buf.append(ch);
236             } else {
237                 break;
238             }
239         } while (true);
240          
241         return html2Text(buf.toString());
242     }
243
244     private String JavaDoc processPreformattedText(int c) {
245         if (c == '\r' || c == '\n')
246             fCounter++;
247         return null;
248     }
249
250     
251     private void unread(int ch) throws IOException JavaDoc {
252         ((PushbackReader JavaDoc) getReader()).unread(ch);
253     }
254     
255     protected String JavaDoc entity2Text(String JavaDoc symbol) {
256         if (symbol.length() > 1 && symbol.charAt(0) == '#') {
257             int ch;
258             try {
259                 if (symbol.charAt(1) == 'x') {
260                     ch= Integer.parseInt(symbol.substring(2), 16);
261                 } else {
262                     ch= Integer.parseInt(symbol.substring(1), 10);
263                 }
264                 return EMPTY_STRING + (char)ch;
265             } catch (NumberFormatException JavaDoc e) {
266             }
267         } else {
268             String JavaDoc str= (String JavaDoc) fgEntityLookup.get(symbol);
269             if (str != null) {
270                 return str;
271             }
272         }
273         return "&" + symbol; // not found //$NON-NLS-1$
274
}
275     
276     /*
277      * A '&' has been read. Process a entity
278      */

279     private String JavaDoc processEntity() throws IOException JavaDoc {
280         StringBuffer JavaDoc buf= new StringBuffer JavaDoc();
281         int ch= nextChar();
282         while (Character.isLetterOrDigit((char)ch) || ch == '#') {
283             buf.append((char) ch);
284             ch= nextChar();
285         }
286         
287         if (ch == ';')
288             return entity2Text(buf.toString());
289         
290         buf.insert(0, '&');
291         if (ch != -1)
292             buf.append((char) ch);
293         return buf.toString();
294     }
295 }
296
Popular Tags