KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > caucho > doc > javadoc > IndexParser


1 /*
2  * Copyright (c) 1998-2003 Caucho Technology -- all rights reserved
3  *
4  * Caucho Technology permits redistribution, modification and use
5  * of this file in source and binary form ("the Software") under the
6  * Caucho Developer Source License ("the License"). The following
7  * conditions must be met:
8  *
9  * 1. Each copy or derived work of the Software must preserve the copyright
10  * notice and this notice unmodified.
11  *
12  * 2. Redistributions of the Software in source or binary form must include
13  * an unmodified copy of the License, normally in a plain ASCII text
14  *
15  * 3. The names "Resin" or "Caucho" are trademarks of Caucho Technology and
16  * may not be used to endorse products derived from this software.
17  * "Resin" or "Caucho" may not appear in the names of products derived
18  * from this software.
19  *
20  * This Software is provided "AS IS," without a warranty of any kind.
21  * ALL EXPRESS OR IMPLIED REPRESENTATIONS AND WARRANTIES, INCLUDING ANY
22  * IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE
23  * OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED.
24  *
25  * CAUCHO TECHNOLOGY AND ITS LICENSORS SHALL NOT BE LIABLE FOR ANY DAMAGES
26  * SUFFERED BY LICENSEE OR ANY THIRD PARTY AS A RESULT OF USING OR
27  * DISTRIBUTING SOFTWARE. IN NO EVENT WILL CAUCHO OR ITS LICENSORS BE LIABLE
28  * FOR ANY LOST REVENUE, PROFIT OR DATA, OR FOR DIRECT, INDIRECT, SPECIAL,
29  * CONSEQUENTIAL, INCIDENTAL OR PUNITIVE DAMAGES, HOWEVER CAUSED AND
30  * REGARDLESS OF THE THEORY OF LIABILITY, ARISING OUT OF THE USE OF OR
31  * INABILITY TO USE SOFTWARE, EVEN IF HE HAS BEEN ADVISED OF THE POSSIBILITY
32  * OF SUCH DAMAGES.
33  *
34  * @author Sam
35  */

36
37 package com.caucho.doc.javadoc;
38
39 import com.caucho.log.Log;
40 import com.caucho.util.CharBuffer;
41 import com.caucho.util.L10N;
42 import com.caucho.vfs.IOExceptionWrapper;
43 import com.caucho.vfs.ReadStream;
44
45 import java.io.IOException JavaDoc;
46
47 import java.util.logging.Level JavaDoc;
48 import java.util.logging.Logger JavaDoc;
49
50 /**
51  * Parse a javadoc generated html index file.
52  */

53 public class IndexParser {
54   static protected final Logger JavaDoc log = Log.open(IndexParser.class);
55   static final L10N L = new L10N(IndexParser.class);
56
57   static private final int EOF = -1;
58   static private final int EODL = -2;
59
60   
61   static public final int TYPE_PACKAGE = 0x01;
62   static public final int TYPE_CLASS = 0x02;
63   static public final int TYPE_INTERFACE = 0x04;
64   static public final int TYPE_ENUM = 0x08;
65   static public final int TYPE_ANNOTATION = 0x10;
66   static public final int TYPE_EXCEPTION = 0x20;
67   static public final int TYPE_ERROR = 0x40;
68   static public final int TYPE_CONSTRUCTOR = 0x80;
69   static public final int TYPE_METHOD = 0x100;
70   static public final int TYPE_VARIABLE = 0x200;
71
72   static public final int MODIFIER_STATIC = 0x1000;
73
74   private ReadStream _rs = null;
75   private Callback _callback;
76
77   private int _currLine = 0;
78   private int _lastDTLine = -1;
79
80   public IndexParser(ReadStream rs, Callback callback)
81   {
82     _rs = rs;
83     _callback = callback;
84   }
85
86   public void parse()
87     throws IOException JavaDoc
88   {
89     // find <DL>
90

91     int ch = 0;;
92     while (ch != EOF) {
93       ch = readChar();
94       if (ch == '<') {
95         ch = readChar();
96         if (ch == 'D' || ch == 'd') {
97           ch = readChar();
98           if (ch == 'L' || ch == 'l') {
99             ch = readChar(); // eat '>'
100
ch = parseDL();
101           }
102         }
103       }
104     }
105   }
106
107   public interface Callback {
108     public void item(String JavaDoc path, String JavaDoc anchor, String JavaDoc name, String JavaDoc fullname, int typ, int modifier, String JavaDoc description);
109   }
110
111   /**
112    * A String describing the read source and the current line of parsing.
113    */

114   public String JavaDoc getLineInfo()
115   {
116     CharBuffer cb = CharBuffer.allocate();
117     cb.append(_rs.getPath().toString());
118     cb.append(':');
119     cb.append(_currLine);
120     return cb.close();
121   }
122
123   protected boolean readLine(CharBuffer cb)
124     throws IOException JavaDoc
125   {
126     boolean n = _rs.readLine(cb);
127     if (n) _currLine++;
128     return n;
129   }
130
131   protected int readChar()
132     throws IOException JavaDoc
133   {
134     int n = _rs.readChar();
135     if (n == '\n') {
136       _currLine++;
137     }
138     else if (n == '\r') {
139       if (_rs.readChar() != '\n')
140         _rs.unread();
141       else {
142         _currLine++;
143         n = '\n';
144       }
145     }
146
147     return n;
148   }
149
150   // return EOF or EODL
151
private int parseDL()
152     throws IOException JavaDoc
153   {
154     boolean ignore = true; // ignore the first, it's just junk until the first <DT>
155
int ch = 0;
156
157     while (ch != EOF && ch != EODL) {
158       ch = readChar();
159
160       _lastDTLine = _currLine;
161       ch = parseDT(ignore);
162       ignore = false;
163     }
164     return ch;
165   }
166
167   // return EOF or EODL or last char read
168
// ignore means just ignore, used to get to first DT
169
private int parseDT(boolean ignore)
170     throws IOException JavaDoc
171   {
172     int r = EOF;
173
174     // fill dt until there is another <DT>
175
// or there is a </DL>
176
// once it's full, call readDT()
177

178     CharBuffer dt = ignore ? null : CharBuffer.allocate();
179     CharBuffer cbb = CharBuffer.allocate();
180
181     int ch;
182
183     while ((ch = readChar()) != -1) {
184       if (ch == '<') {
185         cbb.append((char)ch);
186         ch = readChar();
187         if (ch == 'D' || ch == 'd') {
188           cbb.append((char)ch);
189           ch = readChar();
190           if (ch == 'T' || ch == 't') {
191             cbb.clear();
192             if (!ignore) readDT(dt);
193             break;
194           }
195         } else if (ch == '/') {
196           cbb.append((char)ch);
197           ch = readChar();
198           if (ch == 'D' || ch == 'd') {
199             cbb.append((char)ch);
200             ch = readChar();
201             if (ch == 'L' || ch == 'l') {
202               if (!ignore) readDT(dt);
203               ch = EODL;
204               break;
205             }
206           }
207         }
208         if (dt != null) dt.append(cbb);
209         cbb.clear();
210       }
211       if (dt != null) dt.append((char)ch);
212     }
213
214     cbb.free();
215     if (dt != null) dt.free();
216
217     return ch;
218   }
219
220   private void readDT(CharBuffer cb)
221     throws IOException JavaDoc
222   {
223     String JavaDoc parseDescr = ""; // the step being performed
224

225     CharBuffer t = CharBuffer.allocate();
226
227     try {
228       if (log.isLoggable(Level.FINEST))
229         log.finest(L.l("<DT> entry from line {0} is [[{1}]]",String.valueOf(_lastDTLine),cb.toString()));
230
231       String JavaDoc path;
232       String JavaDoc anchor = null;
233       String JavaDoc fullname;
234       String JavaDoc name;
235       int typ;
236       int modifier = 0;
237       String JavaDoc description;
238
239       int i = 0;
240       parseDescr = "parsing href, looking for first \"";
241       i = readToAndEat(cb,i,'\"',null);
242       parseDescr = "parsing href, looking for next \"";
243       i = readToAndEat(cb,i,'\"',t);
244       while (t.startsWith("../"))
245         t.delete(0,3);
246
247       int ai = t.indexOf('#');
248       if (ai > -1) {
249         path = t.substring(0,ai);
250         anchor = t.substring(ai + 1);
251       } else {
252         path = t.toString();
253       }
254       t.clear();
255       if (log.isLoggable(Level.FINEST)) {
256         log.finest(L.l("path: [{0}]",path));
257         log.finest(L.l("anchor: [{0}]",anchor));
258       }
259
260       parseDescr = "using href to determine fullName";
261       t.append(path);
262       t.setLength(t.length() - 5); // drop .hmtl
263
for (int ti = t.length() - 1; ti >= 0; ti--) {
264         if (t.charAt(ti) == '/')
265           t.setCharAt(ti,'.');
266       }
267       if (anchor != null) {
268         t.append('.');
269         t.append(anchor);
270       }
271       if (t.endsWith(".package-summary"))
272           t.setLength(t.length() - 16);
273
274       fullname = t.toString();
275       t.clear();
276       if (log.isLoggable(Level.FINEST)) {
277         log.finest(L.l("fullname: [{0}]",fullname));
278       }
279
280       parseDescr = "parsing name, looking for opening <B>";
281       i = readToAndEat(cb,i,"<B>",null);
282       parseDescr = "parsing name, looking for closing </B>";
283       i = readToAndEat(cb,i,"<",t);
284       name = t.toString();
285       t.clear();
286
287       if (log.isLoggable(Level.FINEST)) {
288         log.finest(L.l("name: [{0}]",name));
289       }
290
291       parseDescr = "parsing description, `-' marks beginning";
292       i = readToAndEat(cb,i,'-',null);
293       parseDescr = "parsing description, removing markup";
294       clean(cb,i);
295
296       parseDescr = "parsing description";
297
298       // < 1.4 has "package ", 1.5 has "Package "
299
if (cb.startsWith("package ")) {
300         typ = TYPE_PACKAGE;
301       }
302       else if (cb.startsWith("Package ")) {
303         typ = TYPE_PACKAGE;
304       }
305       else if (cb.startsWith("class ")) {
306         typ = TYPE_CLASS;
307       }
308       else if (cb.startsWith("Class ")) {
309         typ = TYPE_CLASS;
310       }
311       else if (cb.startsWith("enum ")) {
312         typ = TYPE_ENUM;
313       }
314       else if (cb.startsWith("Enum ")) {
315         typ = TYPE_ENUM;
316       }
317       else if (cb.startsWith("annotation ")) {
318         typ = TYPE_ANNOTATION;
319       }
320       else if (cb.startsWith("Annotation ")) {
321         typ = TYPE_ANNOTATION;
322       }
323       else if (cb.startsWith("interface ")) {
324         typ = TYPE_INTERFACE;
325       }
326       else if (cb.startsWith("Interface ")) {
327         typ = TYPE_INTERFACE;
328       }
329       else if (cb.startsWith("exception ")) {
330         typ = TYPE_EXCEPTION;
331       }
332       else if (cb.startsWith("Exception ")) {
333         typ = TYPE_EXCEPTION;
334       }
335       else if (cb.startsWith("error ")) {
336         typ = TYPE_ERROR;
337       }
338       else if (cb.startsWith("Error ")) {
339         typ = TYPE_ERROR;
340       }
341       else if (cb.startsWith("Constructor")) {
342         typ = TYPE_CONSTRUCTOR;
343       }
344       else if (cb.startsWith("Method")) {
345         typ = TYPE_METHOD;
346       }
347       else if (cb.startsWith("Static method")) {
348         typ = TYPE_METHOD;
349         modifier = MODIFIER_STATIC;
350       }
351       else if (cb.startsWith("Variable")) {
352         typ = TYPE_VARIABLE;
353       }
354       else if (cb.startsWith("Static variable")) {
355         typ = TYPE_VARIABLE;
356         modifier = MODIFIER_STATIC;
357       }
358       else {
359         throw new IndexOutOfBoundsException JavaDoc(L.l("cannot determine type from `{0}'",cb.close()));
360       }
361
362       if (log.isLoggable(Level.FINEST)) {
363         log.finest(L.l("type: [{0}]",typ));
364       }
365
366       parseDescr = "parsing description, remove first sentence";
367       eatSentence(cb);
368       description = cb.toString();
369
370       if (log.isLoggable(Level.FINEST))
371         log.finest(L.l("description: [{0}]",description));
372
373
374       // do the callback
375
_callback.item(path,anchor,name,fullname,typ,modifier,description);
376
377     } catch (IndexOutOfBoundsException JavaDoc ex) {
378       String JavaDoc msg = L.l("parsing error {0}: {1}, {2}",parseDescr, ex.getMessage(),getLineInfo());
379       if (log.isLoggable(Level.FINE)) {
380         log.fine(msg);
381         log.fine(L.l("buffer was [[{0}]]",cb.toString()));
382       }
383       throw new IOExceptionWrapper(msg,ex);
384     } finally {
385       t.free();
386     }
387   }
388
389   private int readToAndEat(CharBuffer in, int i, char after, CharBuffer out)
390   {
391     int l = in.length();
392     char ch;
393     while ( (ch = in.charAt(i)) != after) {
394       if (out != null)
395         out.append(ch);
396       i++;
397       if (i >= l)
398         throw new IndexOutOfBoundsException JavaDoc(L.l("error looking for `{0}'",new Character JavaDoc(after)));
399     }
400     return ++i;
401   }
402
403   private int readToAndEat(CharBuffer in, int i, String JavaDoc after, CharBuffer out)
404   {
405     int al = after.length();
406
407     while (!in.regionMatches(i,after,0,al) ) {
408       if (out != null)
409         out.append(in.charAt(i));
410       i++;
411       if (i >= in.length())
412         throw new IndexOutOfBoundsException JavaDoc(L.l("error looking for `{0}'",after));
413     }
414     return i+al;
415   }
416
417   private void eatSentence(CharBuffer cb)
418   {
419     log.finest("eat sentence [[" + cb.toString() + "]]");
420     int cbl = cb.length();
421     int i = 0;
422     if (cb.startsWith("package ")) {
423       // second " " marks end of first sentence
424
i = cb.indexOf(' ') + 1;
425       if (i < cbl)
426         i = cb.indexOf(' ',i) + 1;
427       if (i <= 0)
428         i = cbl;
429     }
430     else {
431       // ". " marks end of first sentence
432
do {
433         int d = cb.indexOf('.',i);
434         if (d > -1) {
435           i = d + 1;
436           if (i >= cbl || Character.isWhitespace(cb.charAt(i)))
437             break;
438           else {
439             i++;
440           }
441         }
442         else
443           break;
444       } while (i < cbl);
445     }
446
447     // strip whitespace from beginning
448
while (i < cbl && (Character.isWhitespace(cb.charAt(i)) || cb.charAt(i) == '.')) {
449       i++;
450     }
451
452     if (i >= cbl) {
453       cb.clear();
454     }
455     else {
456       cb.delete(0,i);
457     }
458   }
459
460   /**
461    * remove whitespace or '.' at begining and whitespace at end, fix first
462    * sentence (add .), strip out equivalent of regexp match "<.*>", replace
463    * &nbsp; with space, replace newlines with space, and merge multiple spaces
464    * into a single space;
465    */

466   private void clean(CharBuffer cb, int i)
467   {
468     CharBuffer r = CharBuffer.allocate();
469
470     for (;;) {
471       i = eatWhitespace(cb,i);
472       if (i < cb.length() && cb.charAt(i) == '.')
473         i++;
474       else
475         break;
476     }
477
478     boolean lastws = false; // reduce multiple ws to a single space
479
while (i < cb.length()) {
480       char ch = cb.charAt(i);
481       if (ch == '\n')
482         ch = ' ';
483       if (ch == '\r')
484         ch = ' ';
485
486       if (Character.isWhitespace(ch)) {
487         if (lastws) {
488           i++;
489           continue;
490         }
491       }
492
493       if (ch == '<') {
494         if (cb.charAt(i+1) == '/' && cb.charAt(i+2) == 'A')
495           r.append(". ");
496
497         // have to watch for stray < that are not really markup
498
// only something that matches "</?[A-Za-z]>" counts as markup
499

500         int cn = (i + 1 >= cb.length()) ? -1 : cb.charAt(i+1);
501         if (cn == '/')
502           cn = (i + 2 >= cb.length()) ? cn : cb.charAt(i+2);
503         if ((cn >= 'a' && cn <= 'z') || (cn >= 'A' && cn <= 'Z')) {
504           i = eatUntil(cb,++i,'>');
505           if (cn == 'D' || cn == 'd')
506             r.append(' ');
507           i++;
508           continue;
509         }
510       }
511       if (cb.regionMatches(i,"&nbsp;",0,6) ) {
512         r.append(' ');
513         i += 5;
514         lastws = true;
515       }
516       else {
517         r.append(ch);
518         lastws = Character.isWhitespace(ch);
519       }
520       i++;
521     }
522
523     int l = r.length() - 1;
524     while (l > 0 && Character.isWhitespace(r.charAt(l))) {
525       r.setLength(l--);
526     }
527
528     cb.clear();
529     cb.append(r);
530   }
531
532   private int eatWhitespace(CharBuffer cb, int i)
533   {
534     while (i < cb.length() && Character.isWhitespace(cb.charAt(i))) {
535       i++;
536     }
537     return i;
538   }
539
540   private int eatUntil(CharBuffer cb, int i, char until)
541   {
542     int l = cb.length();
543     while (cb.charAt(i) != until) {
544       i++;
545       if (i >= l)
546         throw new IndexOutOfBoundsException JavaDoc(L.l("error looking for `{0}'",new Character JavaDoc(until)));
547     }
548     return i;
549   }
550
551 }
552
Popular Tags