SourceCodeCommentParser


1   /*
2    * The contents of this file are subject to the terms of the Common Development
3    * and Distribution License (the License). You may not use this file except in
4    * compliance with the License.
5    *
6    * You can obtain a copy of the License at http://www.netbeans.org/cddl.html
7    * or http://www.netbeans.org/cddl.txt.
8    *
9    * When distributing Covered Code, include this CDDL Header Notice in each file
10   * and include the License file at http://www.netbeans.org/cddl.txt.
11   * If applicable, add the following below the CDDL Header, with the fields
12   * enclosed by brackets [] replaced by your own identifying information:
13   * "Portions Copyrighted [year] [name of copyright owner]"
14   *
15   * The Original Software is NetBeans. The Initial Developer of the Original
16   * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
17   * Microsystems, Inc. All Rights Reserved.
18   */
19  
20  // @todo The current version does not handle comment tokens inside "" or ''
21  //       correct! (remember that such a section may span multiple lines!!!)
22  
23  package org.netbeans.modules.tasklist.docscan;
24  
25  import org.netbeans.modules.tasklist.providers.SuggestionContext;
26  
27  import java.io.BufferedReader  ;
28  import java.io.File  ;
29  import java.io.FileReader  ;
30  import java.io.IOException  ;
31  import java.io.Reader  ;
32  import java.io.StringReader  ;
33  import java.util.regex.Pattern  ;
34  import java.util.regex.Matcher  ;
35  
36  
37  /**
38   * The Source Code Comment Parser allows you to read the comments in a source
39   * code line by line.
40   *
41   * @author Trond Norbye
42   */
43  final class SourceCodeCommentParser {
44  
45      /**
46       * Default instance, treat all lines as comments!!
47       */
48      public SourceCodeCommentParser() {
49          parser = new SourceParser();
50      }
51      
52      /**
53       * Create a new instance of the SourceCodeCommentParser that only supports
54       * singe line comments
55       * @param lineComment The start tag of a "single line comment"
56       */
57      public SourceCodeCommentParser(String   lineComment) {
58          parser = new CommentParser(lineComment);
59      }
60      
61      /**
62       * Create a new instance of SourceCodeCommentParser that only supports
63       * block comments (like C)
64       * @param blockStart the start tag of a block comment
65       * @param blockEnd the end tag of a block comment
66       */
67      public SourceCodeCommentParser(String   blockStart, String   blockEnd) {
68          parser = new CommentParser(null, blockStart, blockEnd);
69      }
70  
71      /**
72       * Create a new instance of SourceCodeCommentParser that supports single-
73       * line comments, and multiline comments
74       * @param lineComment the start tag for a single-line comment
75       * @param blockStart the start tag of a block comment
76       * @param blockEnd the end tag of a block comment
77       */
78      public SourceCodeCommentParser(String   lineComment, 
79                                     String   blockStart, 
80                                     String   blockEnd) {
81          parser = new CommentParser(lineComment, blockStart, blockEnd);
82      }
83      
84      /**
85       * Set the document to parse
86       * @param doc the document to parse
87       */
88      public void setDocument(SuggestionContext env) {
89          parser.setDocument(env);
90      }
91      
92      /**
93       * get the range for the next comment line...
94       * @param ret Where to store the result
95       * @return false when EOF, true otherwise
96       */
97      public boolean nextRegion(CommentRegion reg) throws IOException   {
98          return parser.nextRegion(reg);
99      }
100 
101     /**
102      * I don't know if this was a smart thing to do, but instead of testing
103      * each time if I should skip comments or not, I decided to create an
104      * an internal parser that I could extend to my needs... The most generic
105      * parser treats everything as comments, and should hence "work" for all
106      * unknown file types ;)
107      */
108     private static class SourceParser {
109         
110         /**
111          * Create a new instance of the SourceParser
112          */
113         public SourceParser() {
114             text = null;
115             curr = 0;
116             matcher = null;
117         }
118         
119         /**
120          * Get the indexes of the next comment region..
121          * @param ret Where to store the result
122          * @return false when EOF, true otherwise
123          * @throws java.io.IOException if a read error occurs on the input
124          *         stream.
125          */
126         public boolean nextRegion(CommentRegion reg) throws IOException   {
127             if (text == null) {
128                 return false;
129             }
130 
131             reg.start = curr;
132             reg.stop = text.length();
133 
134             if (reg.start == reg.stop) {
135                 return false;
136             }
137 
138             curr = reg.stop;
139             return true;
140         }
141                 
142         /**
143          * Set the document to parse
144          * @param doc the document to parse
145          */
146         public void setDocument(SuggestionContext env) {
147             text = env.getCharSequence().toString();
148 
149             if (pattern != null) {
150                 matcher = pattern.matcher(text);
151             }
152         }
153 
154         /**
155          * Append all characters in a string to a stringbuffer as \\unnnn
156          * @param buf destination buffer
157          * @param str the string to append
158          */
159         protected void appendEncodedChars(StringBuffer   buf, String   str) {
160             int len = str.length();
161             
162             for (int ii = 0; ii < len; ++ii) {
163                 String   s = Integer.toHexString((int)str.charAt(ii));
164                 
165                 buf.append("\\u");
166                 for(int i = 0, n = 4 - s.length(); i < n; i++) {
167                     buf.append('0');
168                 }
169                 buf.append(s);
170             }
171         }
172 
173         /**
174          * A StringBuffer that I use towards the source reader to avoid the
175          * creation of a lot of strings...
176          */
177         protected String   text;
178         
179         /** current position in the text*/
180         protected int curr;
181 
182         /** A matcher that may be utilized by a subclass... */
183         protected Matcher   matcher;
184         /** The pattern to search for in the text */
185         protected Pattern   pattern;
186 
187     }
188     
189     /**
190      * The comment parser exstend the source parser with functionality to
191      * create single line comments, and a block of lines that are treated as
192      * a comment.
193      */
194     private static class CommentParser extends SourceParser {
195         /**
196          * Create a new instance of the comment parser that only supports
197          * a "single-line" comments
198          * @param lineComment the token to start a line comment
199          */
200         public CommentParser(String   lineComment) {
201             this(lineComment, null, null);
202         }
203         
204         /**
205          * Create a new instance of the comment parser that supports:
206          * @param lineComment the token for a single line comment
207          * @param blockStart the start token for a multiline comment block
208          * @param blockEnd the end token for a multiline comment block
209          */
210         public CommentParser(String   lineComment,
211                              String   blockStart,
212                              String   blockEnd) {
213             super();
214             this.lineComment = lineComment;
215             this.blockStart = blockStart;
216             this.blockEnd = blockEnd;
217 
218             StringBuffer   sb = new StringBuffer  ();
219             
220             boolean needor = false;
221 
222             if (lineComment != null) {
223                 appendEncodedChars(sb, lineComment);
224                 needor = true;
225             }
226 
227             if (blockStart != null) {
228                 if (needor) {
229                     sb.append('|');
230                 }
231                 appendEncodedChars(sb, blockStart);
232             }
233 
234             pattern = Pattern.compile(sb.toString());
235             matcher = null;
236         }
237 
238         /**
239          * Get the next line of text from the file.
240          * @param reg Where to store the result
241          * @return false when EOF, true otherwise
242          * @throws java.io.IOException if a read error occurs on the input
243          *         stream.
244          */
245         public boolean nextRegion(CommentRegion reg) throws IOException   {
246             boolean ret = false;
247             
248             if (matcher != null && matcher.find(curr)) {
249                 String   token = text.substring(matcher.start(), matcher.end());
250 
251                 reg.start = matcher.start();
252 
253                 if (lineComment != null && lineComment.equals(token)) {
254                     int idx = text.indexOf("\n", reg.start);
255                     if (idx != -1) {
256                         reg.stop = idx;
257                     } else {
258                         reg.stop = text.length();
259                     }
260                 } else if (blockStart != null) {
261                     int idx = text.indexOf(blockEnd, reg.start);
262                     if (idx != -1) {
263                         reg.stop = idx + blockEnd.length();
264                     } else {
265                         reg.stop = text.length();
266                     }
267                 } else {
268                     return false;  // no need to scan for commens if these are not defined at all
269                 }
270 
271                 curr = reg.stop + 1;
272                 ret = true;
273             }
274             return ret;
275         }
276         
277         /** The string that indicates the start of a single line comment */
278         protected String    lineComment;
279         /** The string that indicates the start of a multiline comment */
280         protected String    blockStart;
281         /** The string that indicates the end of a multiline comment */
282         protected String    blockEnd;
283 
284     }
285 
286     /** A little handy struct to pass up to the parent.. */
287     public static class CommentRegion {
288         /** The position in the text where the comment starts */
289         public int start;
290         /** The position in the text where the comment ends */
291         public int stop; 
292         
293         /** Create a new instance */
294         public CommentRegion() {
295             start = stop = 0;
296         }
297     }
298 
299     /** The parser used by this SourceCodeCommentParser */
300     private SourceParser parser;
301 }
302
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags