|                                                                                                              1
 19
 20  package org.netbeans.modules.editor.java;
 21
 22  import java.io.IOException
  ; 23  import java.io.InputStream
  ; 24  import java.io.InputStreamReader
  ; 25  import java.io.Reader
  ; 26  import java.net.URL
  ; 27  import java.util.StringTokenizer
  ; 28  import javax.swing.text.ChangedCharSetException
  ; 29  import javax.swing.text.MutableAttributeSet
  ; 30  import javax.swing.text.html.HTML
  ; 31  import javax.swing.text.html.HTMLEditorKit
  ; 32  import javax.swing.text.html.parser.ParserDelegator
  ; 33
 34
 39  public class HTMLJavadocParser {
 40
 41
 42
 46      public static String
  getJavadocText(URL  url, boolean pkg) { 47          if (url == null) return null;
 48
 49          HTMLEditorKit.Parser
  parser; 50          InputStream
  is = null; 51
 52          String
  charset = null; 53          for (;;) {
 54              try{
 55                  is = url.openStream();
 56                  parser = new ParserDelegator
  (); 57                  String
  urlStr = url.toString(); 58                  int offsets[] = new int[2];
 59                  Reader
  reader = charset == null?new InputStreamReader  (is): new InputStreamReader  (is, charset); 60
 61                  if (pkg){
 62                                          offsets = parsePackage(reader, parser, charset != null);
 64                  }else if (urlStr.indexOf('#')>0){
 65                                          String
  memberName = urlStr.substring(urlStr.indexOf('#')+1); 67                      if (memberName.length()>0) offsets = parseMember(reader, memberName, parser, charset != null);
 68                  }else{
 69                                          offsets = parseClass(reader, parser, charset != null);
 71                  }
 72
 73                  if (offsets !=null && offsets[0]!=-1 && offsets[1]>offsets[0]){
 74                      return getTextFromURLStream(url, offsets[0], offsets[1], charset);
 75                  }
 76                  break;
 77              } catch (ChangedCharSetException
  e) { 78                  if (charset == null) {
 79                      charset = getCharSet(e);
 80                                      } else {
 82                      e.printStackTrace();
 83                      break;
 84                  }
 85              } catch(IOException
  ioe){ 86                  ioe.printStackTrace();
 87                  break;
 88              }finally{
 89                  parser = null;
 90                  if (is!=null) {
 91                      try{
 92                          is.close();
 93                      }catch(IOException
  ioe){ 94                          ioe.printStackTrace();
 95                      }
 96                  }
 97              }
 98          }
 99          return null;
 100     }
 101
 102     private static String
  getCharSet(ChangedCharSetException  e) { 103         String
  spec = e.getCharSetSpec(); 104         if (e.keyEqualsCharSet()) {
 105                         return spec;
 107         }
 108
 109
 111         int index = spec.indexOf(";");         if (index != -1) {
 113             spec = spec.substring(index + 1);
 114         }
 115
 116         spec = spec.toLowerCase();
 117
 118         StringTokenizer
  st = new StringTokenizer  (spec, " \t=", true);         boolean foundCharSet = false; 120         boolean foundEquals = false;
 121         while (st.hasMoreTokens()) {
 122             String
  token = st.nextToken(); 123             if (token.equals(" ") || token.equals("\t")) {                 continue;
 125             }
 126             if (foundCharSet == false && foundEquals == false
 127                     && token.equals("charset")) {                 foundCharSet = true;
 129                 continue;
 130             } else if (foundEquals == false && token.equals("=")) {                foundEquals = true;
 132                 continue;
 133             } else if (foundEquals == true && foundCharSet == true) {
 134                 return token;
 135             }
 136
 137             foundCharSet = false;
 138             foundEquals = false;
 139         }
 140
 141         return null;
 142     }
 143
 144     private static String
  getTextFromURLStream(URL  url, int startOffset, int endOffset, String  charset) throws IOException  { 145
 146         if (url == null) return null;
 147
 148         if (startOffset>endOffset) throw new IOException
  (); 149         InputStream
  fis = url.openStream(); 150         InputStreamReader
  fisreader = charset == null ? new InputStreamReader  (fis) : new InputStreamReader  (fis, charset); 151         int len = endOffset - startOffset;
 152         int bytesAlreadyRead = 0;
 153         char buffer[] = new char[len];
 154         int bytesToSkip = startOffset;
 155         long bytesSkipped = 0;
 156         do {
 157             bytesSkipped = fisreader.skip(bytesToSkip);
 158             bytesToSkip -= bytesSkipped;
 159         } while ((bytesToSkip > 0) && (bytesSkipped > 0));
 160
 161         do {
 162             int count = fisreader.read(buffer, bytesAlreadyRead, len - bytesAlreadyRead);
 163             if (count < 0){
 164                 break;
 165             }
 166             bytesAlreadyRead += count;
 167         } while (bytesAlreadyRead < len);
 168         fisreader.close();
 169         return new String
  (buffer); 170     }
 171
 172
 173
 175     private static int[] parseClass(Reader
  reader, final HTMLEditorKit.Parser  parser, boolean ignoreCharset) throws IOException  { 176         final int INIT = 0;
 177                 final int CLASS_DATA_START = 1;
 179                 final int TEXT_START = 2;
 181
 182         final int state[] = new int[1];
 183         final int offset[] = new int[2];
 184
 185         offset[0] = -1;         offset[1] = -1;         state[0] = INIT;
 188
 189         HTMLEditorKit.ParserCallback
  callback = new HTMLEditorKit.ParserCallback  () { 190
 191             int nextHRPos = -1;
 192             int lastHRPos = -1;
 193
 194             public void handleSimpleTag(HTML.Tag
  t, MutableAttributeSet  a, int pos) { 195                 if (t == HTML.Tag.HR){
 196                     if (state[0] == TEXT_START){
 197                         nextHRPos = pos;
 198                     }
 199                     lastHRPos = pos;
 200                 }
 201             }
 202
 203             public void handleStartTag(HTML.Tag
  t, MutableAttributeSet  a, int pos) { 204                 if (t == HTML.Tag.P && state[0] == CLASS_DATA_START){
 205                     state[0] = TEXT_START;
 206                 }
 207                 if (t == HTML.Tag.A && state[0] == TEXT_START) {
 208                     String
  attrName = (String  )a.getAttribute(HTML.Attribute.NAME); 209                     if (attrName!=null && attrName.length()>0){
 210                         if (nextHRPos!=-1){
 211                             offset[1] = nextHRPos;
 212                         }else{
 213                             offset[1] = pos;
 214                         }
 215                         state[0] = INIT;
 216                     }
 217                 }
 218             }
 219
 220             public void handleComment(char[] data, int pos){
 221                 String
  comment = String.valueOf(data); 222                 if (comment!=null){
 223                     if (comment.indexOf("START OF CLASS DATA")>0){                         state[0] = CLASS_DATA_START;
 225                     } else if (comment.indexOf("NESTED CLASS SUMMARY")>0){                         if (lastHRPos!=-1){
 227                             offset[1] = lastHRPos;
 228                         }else{
 229                             offset[1] = pos;
 230                         }
 231                     }
 232                 }
 233             }
 234
 235             public void handleText(char[] data, int pos) {
 236                 if (state[0] == TEXT_START && offset[0] < 0)
 237                     offset[0] = pos;
 238             }
 239         };
 240
 241         parser.parse(reader, callback, ignoreCharset);
 242         callback = null;
 243         return offset;
 244     }
 245
 246
 248     private static int[] parseMember(Reader
  reader, final String  name, final HTMLEditorKit.Parser  parser, boolean ignoreCharset) throws IOException  { 249         final int INIT = 0;
 250                 final int A_OPEN = 1;
 252                 final int A_CLOSE = 2;
 254                 final int PRE_CLOSE = 3;
 256
 257         final int state[] = new int[1];
 258         final int offset[] = new int[2];
 259
 260         offset[0] = -1;         offset[1] = -1;         state[0] = INIT;
 263
 264         HTMLEditorKit.ParserCallback
  callback = new HTMLEditorKit.ParserCallback  () { 265
 266             int hrPos = -1;
 267
 268             public void handleSimpleTag(HTML.Tag
  t, MutableAttributeSet  a, int pos) { 269                 if (t == HTML.Tag.HR && state[0]!=INIT){
 270                     if (state[0] == PRE_CLOSE){
 271                         hrPos = pos;
 272                     }
 273                 }
 274             }
 275
 276             public void handleStartTag(HTML.Tag
  t, MutableAttributeSet  a, int pos) { 277
 278                 if (t == HTML.Tag.A) {
 279                     String
  attrName = (String  )a.getAttribute(HTML.Attribute.NAME); 280                     if (name.equals(attrName)){
 281                                                 state[0] = A_OPEN;
 283                     } else {
 284                         if (state[0] == PRE_CLOSE && attrName!=null){
 285                                                         state[0] = INIT;
 287                             offset[1] = (hrPos!=-1) ? hrPos : pos;
 288                         }
 289                     }
 290                 } else if (t == HTML.Tag.DD && state[0] == PRE_CLOSE && offset[0] < 0){
 291                     offset[0] = pos;
 292                 }
 293
 294             }
 295
 296             public void handleEndTag(HTML.Tag
  t, int pos){ 297                 if (t == HTML.Tag.A && state[0] == A_OPEN){
 298                     state[0] = A_CLOSE;
 299                 } else if (t == HTML.Tag.PRE && state[0] == A_CLOSE){
 300                     state[0] = PRE_CLOSE;
 301                 }
 302             }
 303
 304         };
 305
 306         parser.parse(reader, callback, ignoreCharset);
 307         callback = null;
 308         return offset;
 309     }
 310
 311
 313     private static int[] parsePackage(Reader
  reader, final HTMLEditorKit.Parser  parser, boolean ignoreCharset) throws IOException  { 314         final String
  name = "package_description";         final int INIT = 0; 316                 final int A_OPEN = 1;
 318
 319         final int state[] = new int[1];
 320         final int offset[] = new int[2];
 321
 322         offset[0] = -1;         offset[1] = -1;         state[0] = INIT;
 325
 326         HTMLEditorKit.ParserCallback
  callback = new HTMLEditorKit.ParserCallback  () { 327
 328             int hrPos = -1;
 329
 330             public void handleSimpleTag(HTML.Tag
  t, MutableAttributeSet  a, int pos) { 331                 if (t == HTML.Tag.HR && state[0]!=INIT){
 332                     if (state[0] == A_OPEN){
 333                         hrPos = pos;
 334                         offset[1] = pos;
 335                     }
 336                 }
 337             }
 338
 339             public void handleStartTag(HTML.Tag
  t, MutableAttributeSet  a, int pos) { 340
 341                 if (t == HTML.Tag.A) {
 342                     String
  attrName = (String  )a.getAttribute(HTML.Attribute.NAME); 343                     if (name.equals(attrName)){
 344                                                 state[0] = A_OPEN;
 346                         offset[0] = pos;
 347                     } else {
 348                         if (state[0] == A_OPEN && attrName!=null){
 349                                                         state[0] = INIT;
 351                             offset[1] = (hrPos!=-1) ? hrPos : pos;
 352                         }
 353                     }
 354                 }
 355             }
 356         };
 357
 358         parser.parse(reader, callback, ignoreCharset);
 359         callback = null;
 360         return offset;
 361     }
 362
 363 }
 364
                                                                                                                                                                                                             |                                                                       
 
 
 
 
 
                                                                                   Popular Tags                                                                                                                                                                                              |