HTMLJavadocParser


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  package org.netbeans.modules.editor.java;
21  
22  import java.io.IOException  ;
23  import java.io.InputStream  ;
24  import java.io.InputStreamReader  ;
25  import java.io.Reader  ;
26  import java.net.URL  ;
27  import java.util.StringTokenizer  ;
28  import javax.swing.text.ChangedCharSetException  ;
29  import javax.swing.text.MutableAttributeSet  ;
30  import javax.swing.text.html.HTML  ;
31  import javax.swing.text.html.HTMLEditorKit  ;
32  import javax.swing.text.html.parser.ParserDelegator  ;
33  
34  /**
35   *  HTML Parser. It retrieves sections of the javadoc HTML file.
36   *
37   * @author  Martin Roskanin
38   */
39  public class HTMLJavadocParser {
40      
41  
42      /** Gets the javadoc text from the given URL
43       *  @param url nbfs protocol URL
44       *  @param pkg true if URL should be retrieved for a package
45       */
46      public static String   getJavadocText(URL   url, boolean pkg) {
47          if (url == null) return null;
48          
49          HTMLEditorKit.Parser   parser;
50          InputStream   is = null;
51          
52          String   charset = null;
53          for (;;) {
54              try{
55                  is = url.openStream();
56                  parser = new ParserDelegator  ();
57                  String   urlStr = url.toString();
58                  int offsets[] = new int[2];
59                  Reader   reader = charset == null?new InputStreamReader  (is): new InputStreamReader  (is, charset);
60                  
61                  if (pkg){
62                      // package description
63                      offsets = parsePackage(reader, parser, charset != null);
64                  }else if (urlStr.indexOf('#')>0){
65                      // member javadoc info
66                      String   memberName = urlStr.substring(urlStr.indexOf('#')+1);
67                      if (memberName.length()>0) offsets = parseMember(reader, memberName, parser, charset != null);
68                  }else{
69                      // class javadoc info
70                      offsets = parseClass(reader, parser, charset != null);
71                  }
72                  
73                  if (offsets !=null && offsets[0]!=-1 && offsets[1]>offsets[0]){
74                      return getTextFromURLStream(url, offsets[0], offsets[1], charset);
75                  }
76                  break;
77              } catch (ChangedCharSetException   e) {
78                  if (charset == null) {
79                      charset = getCharSet(e);
80                      //restart with valid charset
81                  } else {
82                      e.printStackTrace();
83                      break;
84                  }
85              } catch(IOException   ioe){
86                  ioe.printStackTrace();
87                  break;
88              }finally{
89                  parser = null;
90                  if (is!=null) {
91                      try{
92                          is.close();
93                      }catch(IOException   ioe){
94                          ioe.printStackTrace();
95                      }
96                  }
97              }
98          }
99          return null;
100     }
101     
102     private static String   getCharSet(ChangedCharSetException   e) {
103         String   spec = e.getCharSetSpec();
104         if (e.keyEqualsCharSet()) {
105             //charsetspec contains only charset
106             return spec;
107         }
108         
109         //charsetspec is in form "text/html; charset=UTF-8"
110                 
111         int index = spec.indexOf(";"); // NOI18N
112         if (index != -1) {
113             spec = spec.substring(index + 1);
114         }
115         
116         spec = spec.toLowerCase();
117         
118         StringTokenizer   st = new StringTokenizer  (spec, " \t=", true); //NOI18N
119         boolean foundCharSet = false;
120         boolean foundEquals = false;
121         while (st.hasMoreTokens()) {
122             String   token = st.nextToken();
123             if (token.equals(" ") || token.equals("\t")) { //NOI18N
124                 continue;
125             }
126             if (foundCharSet == false && foundEquals == false
127                     && token.equals("charset")) { //NOI18N
128                 foundCharSet = true;
129                 continue;
130             } else if (foundEquals == false && token.equals("=")) {//NOI18N
131                 foundEquals = true;
132                 continue;
133             } else if (foundEquals == true && foundCharSet == true) {
134                 return token;
135             }
136             
137             foundCharSet = false;
138             foundEquals = false;
139         }
140         
141         return null;
142     }
143     
144     private static String   getTextFromURLStream(URL   url, int startOffset, int endOffset, String   charset) throws IOException  {
145         
146         if (url == null) return null;
147         
148         if (startOffset>endOffset) throw new IOException  ();
149         InputStream   fis = url.openStream();
150         InputStreamReader   fisreader = charset == null ? new InputStreamReader  (fis) : new InputStreamReader  (fis, charset);
151         int len = endOffset - startOffset;
152         int bytesAlreadyRead = 0;
153         char buffer[] = new char[len];
154         int bytesToSkip = startOffset;
155         long bytesSkipped = 0;
156         do {
157             bytesSkipped = fisreader.skip(bytesToSkip);
158             bytesToSkip -= bytesSkipped;
159         } while ((bytesToSkip > 0) && (bytesSkipped > 0));
160 
161         do {
162             int count = fisreader.read(buffer, bytesAlreadyRead, len - bytesAlreadyRead);
163             if (count < 0){
164                 break;
165             }
166             bytesAlreadyRead += count;
167         } while (bytesAlreadyRead < len);
168         fisreader.close();
169         return new String  (buffer);
170     }
171 
172     
173     /** Retrieves the position (start offset and end offset) of class javadoc info
174       * in the raw html file */
175     private static int[] parseClass(Reader   reader, final HTMLEditorKit.Parser   parser, boolean ignoreCharset) throws IOException   {
176         final int INIT = 0;
177         // javadoc HTML comment '======== START OF CLASS DATA ========'
178         final int CLASS_DATA_START = 1;
179         // start of the text we need. Located just after first P.
180         final int TEXT_START = 2;
181 
182         final int state[] = new int[1];
183         final int offset[] = new int[2];
184 
185         offset[0] = -1; //start offset
186         offset[1] = -1; //end offset
187         state[0] = INIT;
188 
189         HTMLEditorKit.ParserCallback   callback = new HTMLEditorKit.ParserCallback  () {
190 
191             int nextHRPos = -1;
192             int lastHRPos = -1;
193 
194             public void handleSimpleTag(HTML.Tag   t, MutableAttributeSet   a, int pos) {
195                 if (t == HTML.Tag.HR){
196                     if (state[0] == TEXT_START){
197                         nextHRPos = pos;
198                     }
199                     lastHRPos = pos;
200                 }
201             }
202 
203             public void handleStartTag(HTML.Tag   t, MutableAttributeSet   a, int pos) {
204                 if (t == HTML.Tag.P && state[0] == CLASS_DATA_START){
205                     state[0] = TEXT_START;
206                 }
207                 if (t == HTML.Tag.A && state[0] == TEXT_START) {
208                     String   attrName = (String  )a.getAttribute(HTML.Attribute.NAME);
209                     if (attrName!=null && attrName.length()>0){
210                         if (nextHRPos!=-1){
211                             offset[1] = nextHRPos;
212                         }else{
213                             offset[1] = pos;
214                         }
215                         state[0] = INIT;
216                     }
217                 }
218             }
219 
220             public void handleComment(char[] data, int pos){
221                 String   comment = String.valueOf(data);
222                 if (comment!=null){
223                     if (comment.indexOf("START OF CLASS DATA")>0){ //NOI18N
224                         state[0] = CLASS_DATA_START;
225                     } else if (comment.indexOf("NESTED CLASS SUMMARY")>0){ //NOI18N
226                         if (lastHRPos!=-1){
227                             offset[1] = lastHRPos;
228                         }else{
229                             offset[1] = pos;
230                         }
231                     }
232                 }
233             }
234             
235             public void handleText(char[] data, int pos) {
236                 if (state[0] == TEXT_START && offset[0] < 0)
237                     offset[0] = pos;
238             }
239         };        
240 
241         parser.parse(reader, callback, ignoreCharset);
242         callback = null;
243         return offset;
244     }
245 
246     /** Retrieves the position (start offset and end offset) of member javadoc info
247       * in the raw html file */
248     private static int[] parseMember(Reader   reader, final String   name, final HTMLEditorKit.Parser   parser, boolean ignoreCharset) throws IOException   {
249         final int INIT = 0;
250         // 'A' tag with the name we are looking for.
251         final int A_OPEN = 1;
252         // close tag of 'A'
253         final int A_CLOSE = 2;
254         // PRE close tag after the A_CLOSE
255         final int PRE_CLOSE = 3;
256 
257         final int state[] = new int[1];
258         final int offset[] = new int[2];
259 
260         offset[0] = -1; //start offset
261         offset[1] = -1; //end offset
262         state[0] = INIT;
263 
264         HTMLEditorKit.ParserCallback   callback = new HTMLEditorKit.ParserCallback  () {
265 
266             int hrPos = -1;
267 
268             public void handleSimpleTag(HTML.Tag   t, MutableAttributeSet   a, int pos) {
269                 if (t == HTML.Tag.HR && state[0]!=INIT){
270                     if (state[0] == PRE_CLOSE){
271                         hrPos = pos;
272                     }
273                 }
274             }
275 
276             public void handleStartTag(HTML.Tag   t, MutableAttributeSet   a, int pos) {
277 
278                 if (t == HTML.Tag.A) {
279                     String   attrName = (String  )a.getAttribute(HTML.Attribute.NAME);
280                     if (name.equals(attrName)){
281                         // we have found desired javadoc member info anchor
282                         state[0] = A_OPEN;
283                     } else {
284                         if (state[0] == PRE_CLOSE && attrName!=null){
285                             // reach the end of retrieved javadoc info
286                             state[0] = INIT;
287                             offset[1] = (hrPos!=-1) ? hrPos : pos;
288                         }
289                     }
290                 } else if (t == HTML.Tag.DD && state[0] == PRE_CLOSE && offset[0] < 0){
291                     offset[0] = pos;
292                 }
293 
294             }
295 
296             public void handleEndTag(HTML.Tag   t, int pos){
297                 if (t == HTML.Tag.A && state[0] == A_OPEN){
298                     state[0] = A_CLOSE;
299                 } else if (t == HTML.Tag.PRE && state[0] == A_CLOSE){
300                     state[0] = PRE_CLOSE;
301                 }
302             }
303 
304         };
305 
306         parser.parse(reader, callback, ignoreCharset);
307         callback = null;
308         return offset;
309     }
310 
311     /** Retrieves the position (start offset and end offset) of member javadoc info
312       * in the raw html file */
313     private static int[] parsePackage(Reader   reader, final HTMLEditorKit.Parser   parser, boolean ignoreCharset) throws IOException   {
314         final String   name = "package_description"; //NOI18N
315         final int INIT = 0;
316         // 'A' tag with the name we are looking for.
317         final int A_OPEN = 1;
318 
319         final int state[] = new int[1];
320         final int offset[] = new int[2];
321 
322         offset[0] = -1; //start offset
323         offset[1] = -1; //end offset
324         state[0] = INIT;
325 
326         HTMLEditorKit.ParserCallback   callback = new HTMLEditorKit.ParserCallback  () {
327 
328             int hrPos = -1;
329 
330             public void handleSimpleTag(HTML.Tag   t, MutableAttributeSet   a, int pos) {
331                 if (t == HTML.Tag.HR && state[0]!=INIT){
332                     if (state[0] == A_OPEN){
333                         hrPos = pos;
334                         offset[1] = pos;
335                     }
336                 }
337             }
338 
339             public void handleStartTag(HTML.Tag   t, MutableAttributeSet   a, int pos) {
340 
341                 if (t == HTML.Tag.A) {
342                     String   attrName = (String  )a.getAttribute(HTML.Attribute.NAME);
343                     if (name.equals(attrName)){
344                         // we have found desired javadoc member info anchor
345                         state[0] = A_OPEN;
346                         offset[0] = pos;
347                     } else {
348                         if (state[0] == A_OPEN && attrName!=null){
349                             // reach the end of retrieved javadoc info
350                             state[0] = INIT;
351                             offset[1] = (hrPos!=-1) ? hrPos : pos;
352                         }
353                     }
354                 } 
355             }
356         };
357 
358         parser.parse(reader, callback, ignoreCharset);
359         callback = null;
360         return offset;
361     }
362     
363 }
364
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags