KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > netbeans > modules > editor > java > HTMLJavadocParser


1 /*
2  * The contents of this file are subject to the terms of the Common Development
3  * and Distribution License (the License). You may not use this file except in
4  * compliance with the License.
5  *
6  * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7  * or http://www.netbeans.org/cddl.txt.
8  *
9  * When distributing Covered Code, include this CDDL Header Notice in each file
10  * and include the License file at http://www.netbeans.org/cddl.txt.
11  * If applicable, add the following below the CDDL Header, with the fields
12  * enclosed by brackets [] replaced by your own identifying information:
13  * "Portions Copyrighted [year] [name of copyright owner]"
14  *
15  * The Original Software is NetBeans. The Initial Developer of the Original
16  * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17  * Microsystems, Inc. All Rights Reserved.
18  */

19
20 package org.netbeans.modules.editor.java;
21
22 import java.io.IOException JavaDoc;
23 import java.io.InputStream JavaDoc;
24 import java.io.InputStreamReader JavaDoc;
25 import java.io.Reader JavaDoc;
26 import java.net.URL JavaDoc;
27 import java.util.StringTokenizer JavaDoc;
28 import javax.swing.text.ChangedCharSetException JavaDoc;
29 import javax.swing.text.MutableAttributeSet JavaDoc;
30 import javax.swing.text.html.HTML JavaDoc;
31 import javax.swing.text.html.HTMLEditorKit JavaDoc;
32 import javax.swing.text.html.parser.ParserDelegator JavaDoc;
33
34 /**
35  * HTML Parser. It retrieves sections of the javadoc HTML file.
36  *
37  * @author Martin Roskanin
38  */

39 public class HTMLJavadocParser {
40     
41
42     /** Gets the javadoc text from the given URL
43      * @param url nbfs protocol URL
44      * @param pkg true if URL should be retrieved for a package
45      */

46     public static String JavaDoc getJavadocText(URL JavaDoc url, boolean pkg) {
47         if (url == null) return null;
48         
49         HTMLEditorKit.Parser JavaDoc parser;
50         InputStream JavaDoc is = null;
51         
52         String JavaDoc charset = null;
53         for (;;) {
54             try{
55                 is = url.openStream();
56                 parser = new ParserDelegator JavaDoc();
57                 String JavaDoc urlStr = url.toString();
58                 int offsets[] = new int[2];
59                 Reader JavaDoc reader = charset == null?new InputStreamReader JavaDoc(is): new InputStreamReader JavaDoc(is, charset);
60                 
61                 if (pkg){
62                     // package description
63
offsets = parsePackage(reader, parser, charset != null);
64                 }else if (urlStr.indexOf('#')>0){
65                     // member javadoc info
66
String JavaDoc memberName = urlStr.substring(urlStr.indexOf('#')+1);
67                     if (memberName.length()>0) offsets = parseMember(reader, memberName, parser, charset != null);
68                 }else{
69                     // class javadoc info
70
offsets = parseClass(reader, parser, charset != null);
71                 }
72                 
73                 if (offsets !=null && offsets[0]!=-1 && offsets[1]>offsets[0]){
74                     return getTextFromURLStream(url, offsets[0], offsets[1], charset);
75                 }
76                 break;
77             } catch (ChangedCharSetException JavaDoc e) {
78                 if (charset == null) {
79                     charset = getCharSet(e);
80                     //restart with valid charset
81
} else {
82                     e.printStackTrace();
83                     break;
84                 }
85             } catch(IOException JavaDoc ioe){
86                 ioe.printStackTrace();
87                 break;
88             }finally{
89                 parser = null;
90                 if (is!=null) {
91                     try{
92                         is.close();
93                     }catch(IOException JavaDoc ioe){
94                         ioe.printStackTrace();
95                     }
96                 }
97             }
98         }
99         return null;
100     }
101     
102     private static String JavaDoc getCharSet(ChangedCharSetException JavaDoc e) {
103         String JavaDoc spec = e.getCharSetSpec();
104         if (e.keyEqualsCharSet()) {
105             //charsetspec contains only charset
106
return spec;
107         }
108         
109         //charsetspec is in form "text/html; charset=UTF-8"
110

111         int index = spec.indexOf(";"); // NOI18N
112
if (index != -1) {
113             spec = spec.substring(index + 1);
114         }
115         
116         spec = spec.toLowerCase();
117         
118         StringTokenizer JavaDoc st = new StringTokenizer JavaDoc(spec, " \t=", true); //NOI18N
119
boolean foundCharSet = false;
120         boolean foundEquals = false;
121         while (st.hasMoreTokens()) {
122             String JavaDoc token = st.nextToken();
123             if (token.equals(" ") || token.equals("\t")) { //NOI18N
124
continue;
125             }
126             if (foundCharSet == false && foundEquals == false
127                     && token.equals("charset")) { //NOI18N
128
foundCharSet = true;
129                 continue;
130             } else if (foundEquals == false && token.equals("=")) {//NOI18N
131
foundEquals = true;
132                 continue;
133             } else if (foundEquals == true && foundCharSet == true) {
134                 return token;
135             }
136             
137             foundCharSet = false;
138             foundEquals = false;
139         }
140         
141         return null;
142     }
143     
144     private static String JavaDoc getTextFromURLStream(URL JavaDoc url, int startOffset, int endOffset, String JavaDoc charset) throws IOException JavaDoc{
145         
146         if (url == null) return null;
147         
148         if (startOffset>endOffset) throw new IOException JavaDoc();
149         InputStream JavaDoc fis = url.openStream();
150         InputStreamReader JavaDoc fisreader = charset == null ? new InputStreamReader JavaDoc(fis) : new InputStreamReader JavaDoc(fis, charset);
151         int len = endOffset - startOffset;
152         int bytesAlreadyRead = 0;
153         char buffer[] = new char[len];
154         int bytesToSkip = startOffset;
155         long bytesSkipped = 0;
156         do {
157             bytesSkipped = fisreader.skip(bytesToSkip);
158             bytesToSkip -= bytesSkipped;
159         } while ((bytesToSkip > 0) && (bytesSkipped > 0));
160
161         do {
162             int count = fisreader.read(buffer, bytesAlreadyRead, len - bytesAlreadyRead);
163             if (count < 0){
164                 break;
165             }
166             bytesAlreadyRead += count;
167         } while (bytesAlreadyRead < len);
168         fisreader.close();
169         return new String JavaDoc(buffer);
170     }
171
172     
173     /** Retrieves the position (start offset and end offset) of class javadoc info
174       * in the raw html file */

175     private static int[] parseClass(Reader JavaDoc reader, final HTMLEditorKit.Parser JavaDoc parser, boolean ignoreCharset) throws IOException JavaDoc {
176         final int INIT = 0;
177         // javadoc HTML comment '======== START OF CLASS DATA ========'
178
final int CLASS_DATA_START = 1;
179         // start of the text we need. Located just after first P.
180
final int TEXT_START = 2;
181
182         final int state[] = new int[1];
183         final int offset[] = new int[2];
184
185         offset[0] = -1; //start offset
186
offset[1] = -1; //end offset
187
state[0] = INIT;
188
189         HTMLEditorKit.ParserCallback JavaDoc callback = new HTMLEditorKit.ParserCallback JavaDoc() {
190
191             int nextHRPos = -1;
192             int lastHRPos = -1;
193
194             public void handleSimpleTag(HTML.Tag JavaDoc t, MutableAttributeSet JavaDoc a, int pos) {
195                 if (t == HTML.Tag.HR){
196                     if (state[0] == TEXT_START){
197                         nextHRPos = pos;
198                     }
199                     lastHRPos = pos;
200                 }
201             }
202
203             public void handleStartTag(HTML.Tag JavaDoc t, MutableAttributeSet JavaDoc a, int pos) {
204                 if (t == HTML.Tag.P && state[0] == CLASS_DATA_START){
205                     state[0] = TEXT_START;
206                 }
207                 if (t == HTML.Tag.A && state[0] == TEXT_START) {
208                     String JavaDoc attrName = (String JavaDoc)a.getAttribute(HTML.Attribute.NAME);
209                     if (attrName!=null && attrName.length()>0){
210                         if (nextHRPos!=-1){
211                             offset[1] = nextHRPos;
212                         }else{
213                             offset[1] = pos;
214                         }
215                         state[0] = INIT;
216                     }
217                 }
218             }
219
220             public void handleComment(char[] data, int pos){
221                 String JavaDoc comment = String.valueOf(data);
222                 if (comment!=null){
223                     if (comment.indexOf("START OF CLASS DATA")>0){ //NOI18N
224
state[0] = CLASS_DATA_START;
225                     } else if (comment.indexOf("NESTED CLASS SUMMARY")>0){ //NOI18N
226
if (lastHRPos!=-1){
227                             offset[1] = lastHRPos;
228                         }else{
229                             offset[1] = pos;
230                         }
231                     }
232                 }
233             }
234             
235             public void handleText(char[] data, int pos) {
236                 if (state[0] == TEXT_START && offset[0] < 0)
237                     offset[0] = pos;
238             }
239         };
240
241         parser.parse(reader, callback, ignoreCharset);
242         callback = null;
243         return offset;
244     }
245
246     /** Retrieves the position (start offset and end offset) of member javadoc info
247       * in the raw html file */

248     private static int[] parseMember(Reader JavaDoc reader, final String JavaDoc name, final HTMLEditorKit.Parser JavaDoc parser, boolean ignoreCharset) throws IOException JavaDoc {
249         final int INIT = 0;
250         // 'A' tag with the name we are looking for.
251
final int A_OPEN = 1;
252         // close tag of 'A'
253
final int A_CLOSE = 2;
254         // PRE close tag after the A_CLOSE
255
final int PRE_CLOSE = 3;
256
257         final int state[] = new int[1];
258         final int offset[] = new int[2];
259
260         offset[0] = -1; //start offset
261
offset[1] = -1; //end offset
262
state[0] = INIT;
263
264         HTMLEditorKit.ParserCallback JavaDoc callback = new HTMLEditorKit.ParserCallback JavaDoc() {
265
266             int hrPos = -1;
267
268             public void handleSimpleTag(HTML.Tag JavaDoc t, MutableAttributeSet JavaDoc a, int pos) {
269                 if (t == HTML.Tag.HR && state[0]!=INIT){
270                     if (state[0] == PRE_CLOSE){
271                         hrPos = pos;
272                     }
273                 }
274             }
275
276             public void handleStartTag(HTML.Tag JavaDoc t, MutableAttributeSet JavaDoc a, int pos) {
277
278                 if (t == HTML.Tag.A) {
279                     String JavaDoc attrName = (String JavaDoc)a.getAttribute(HTML.Attribute.NAME);
280                     if (name.equals(attrName)){
281                         // we have found desired javadoc member info anchor
282
state[0] = A_OPEN;
283                     } else {
284                         if (state[0] == PRE_CLOSE && attrName!=null){
285                             // reach the end of retrieved javadoc info
286
state[0] = INIT;
287                             offset[1] = (hrPos!=-1) ? hrPos : pos;
288                         }
289                     }
290                 } else if (t == HTML.Tag.DD && state[0] == PRE_CLOSE && offset[0] < 0){
291                     offset[0] = pos;
292                 }
293
294             }
295
296             public void handleEndTag(HTML.Tag JavaDoc t, int pos){
297                 if (t == HTML.Tag.A && state[0] == A_OPEN){
298                     state[0] = A_CLOSE;
299                 } else if (t == HTML.Tag.PRE && state[0] == A_CLOSE){
300                     state[0] = PRE_CLOSE;
301                 }
302             }
303
304         };
305
306         parser.parse(reader, callback, ignoreCharset);
307         callback = null;
308         return offset;
309     }
310
311     /** Retrieves the position (start offset and end offset) of member javadoc info
312       * in the raw html file */

313     private static int[] parsePackage(Reader JavaDoc reader, final HTMLEditorKit.Parser JavaDoc parser, boolean ignoreCharset) throws IOException JavaDoc {
314         final String JavaDoc name = "package_description"; //NOI18N
315
final int INIT = 0;
316         // 'A' tag with the name we are looking for.
317
final int A_OPEN = 1;
318
319         final int state[] = new int[1];
320         final int offset[] = new int[2];
321
322         offset[0] = -1; //start offset
323
offset[1] = -1; //end offset
324
state[0] = INIT;
325
326         HTMLEditorKit.ParserCallback JavaDoc callback = new HTMLEditorKit.ParserCallback JavaDoc() {
327
328             int hrPos = -1;
329
330             public void handleSimpleTag(HTML.Tag JavaDoc t, MutableAttributeSet JavaDoc a, int pos) {
331                 if (t == HTML.Tag.HR && state[0]!=INIT){
332                     if (state[0] == A_OPEN){
333                         hrPos = pos;
334                         offset[1] = pos;
335                     }
336                 }
337             }
338
339             public void handleStartTag(HTML.Tag JavaDoc t, MutableAttributeSet JavaDoc a, int pos) {
340
341                 if (t == HTML.Tag.A) {
342                     String JavaDoc attrName = (String JavaDoc)a.getAttribute(HTML.Attribute.NAME);
343                     if (name.equals(attrName)){
344                         // we have found desired javadoc member info anchor
345
state[0] = A_OPEN;
346                         offset[0] = pos;
347                     } else {
348                         if (state[0] == A_OPEN && attrName!=null){
349                             // reach the end of retrieved javadoc info
350
state[0] = INIT;
351                             offset[1] = (hrPos!=-1) ? hrPos : pos;
352                         }
353                     }
354                 }
355             }
356         };
357
358         parser.parse(reader, callback, ignoreCharset);
359         callback = null;
360         return offset;
361     }
362     
363 }
364
Popular Tags