KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > jdt > internal > ui > text > spelling > SpellCheckIterator


1 /*******************************************************************************
2  * Copyright (c) 2000, 2007 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.jdt.internal.ui.text.spelling;
12
13 import com.ibm.icu.text.BreakIterator;
14
15 import java.util.LinkedList JavaDoc;
16 import java.util.Locale JavaDoc;
17
18 import org.eclipse.jface.text.IDocument;
19 import org.eclipse.jface.text.IRegion;
20 import org.eclipse.jface.text.TextUtilities;
21
22 import org.eclipse.jdt.internal.corext.refactoring.nls.NLSElement;
23
24 import org.eclipse.jdt.internal.ui.text.javadoc.IHtmlTagConstants;
25 import org.eclipse.jdt.internal.ui.text.javadoc.IJavaDocTagConstants;
26 import org.eclipse.jdt.internal.ui.text.spelling.engine.DefaultSpellChecker;
27 import org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellCheckIterator;
28
29
30 /**
31  * Iterator to spell check javadoc comment regions.
32  *
33  * @since 3.0
34  */

35 public class SpellCheckIterator implements ISpellCheckIterator {
36
37     /** The content of the region */
38     protected final String JavaDoc fContent;
39
40     /** The line delimiter */
41     private final String JavaDoc fDelimiter;
42
43     /** The last token */
44     protected String JavaDoc fLastToken= null;
45
46     /** The next break */
47     protected int fNext= 1;
48
49     /** The offset of the region */
50     protected final int fOffset;
51
52     /** The predecessor break */
53     private int fPredecessor;
54
55     /** The previous break */
56     protected int fPrevious= 0;
57
58     /** The sentence breaks */
59     private final LinkedList JavaDoc fSentenceBreaks= new LinkedList JavaDoc();
60
61     /** Does the current word start a sentence? */
62     private boolean fStartsSentence= false;
63
64     /** The successor break */
65     protected int fSuccessor;
66
67     /** The word iterator */
68     private final BreakIterator fWordIterator;
69
70     private boolean fIsIgnoringSingleLetters;
71
72     /**
73      * Creates a new spell check iterator.
74      *
75      * @param document the document containing the specified partition
76      * @param region the region to spell check
77      * @param locale the locale to use for spell checking
78      */

79     public SpellCheckIterator(IDocument document, IRegion region, Locale JavaDoc locale) {
80         this(document, region, locale, BreakIterator.getWordInstance(locale));
81     }
82
83     /**
84      * Creates a new spell check iterator.
85      *
86      * @param document the document containing the specified partition
87      * @param region the region to spell check
88      * @param locale the locale to use for spell checking
89      * @param breakIterator the break-iterator
90      */

91     public SpellCheckIterator(IDocument document, IRegion region, Locale JavaDoc locale, BreakIterator breakIterator) {
92         fOffset= region.getOffset();
93         fWordIterator= breakIterator;
94         fDelimiter= TextUtilities.getDefaultLineDelimiter(document);
95
96         String JavaDoc content;
97         try {
98
99             content= document.get(region.getOffset(), region.getLength());
100             if (content.startsWith(NLSElement.TAG_PREFIX))
101                 content= ""; //$NON-NLS-1$
102

103         } catch (Exception JavaDoc exception) {
104             content= ""; //$NON-NLS-1$
105
}
106         fContent= content;
107
108         fWordIterator.setText(content);
109         fPredecessor= fWordIterator.first();
110         fSuccessor= fWordIterator.next();
111
112         final BreakIterator iterator= BreakIterator.getSentenceInstance(locale);
113         iterator.setText(content);
114
115         int offset= iterator.current();
116         while (offset != BreakIterator.DONE) {
117
118             fSentenceBreaks.add(new Integer JavaDoc(offset));
119             offset= iterator.next();
120         }
121     }
122     
123     /*
124      * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellCheckIterator#setIgnoreSingleLetters(boolean)
125      * @since 3.3
126      */

127     public void setIgnoreSingleLetters(boolean state) {
128         fIsIgnoringSingleLetters= state;
129     }
130
131     /*
132      * @see org.eclipse.spelling.done.ISpellCheckIterator#getBegin()
133      */

134     public final int getBegin() {
135         return fPrevious + fOffset;
136     }
137
138     /*
139      * @see org.eclipse.spelling.done.ISpellCheckIterator#getEnd()
140      */

141     public final int getEnd() {
142         return fNext + fOffset - 1;
143     }
144
145     /*
146      * @see java.util.Iterator#hasNext()
147      */

148     public final boolean hasNext() {
149         return fSuccessor != BreakIterator.DONE;
150     }
151
152     /**
153      * Does the specified token consist of at least one letter and digits
154      * only?
155      *
156      * @param begin the begin index
157      * @param end the end index
158      * @return <code>true</code> iff the token consists of digits and at
159      * least one letter only, <code>false</code> otherwise
160      */

161     protected final boolean isAlphaNumeric(final int begin, final int end) {
162
163         char character= 0;
164
165         boolean letter= false;
166         for (int index= begin; index < end; index++) {
167
168             character= fContent.charAt(index);
169             if (Character.isLetter(character))
170                 letter= true;
171
172             if (!Character.isLetterOrDigit(character))
173                 return false;
174         }
175         return letter;
176     }
177
178     /**
179      * Checks the last token against the given tags?
180      *
181      * @param tags the tags to check
182      * @return <code>true</code> iff the last token is in the given array
183      */

184     protected final boolean isToken(final String JavaDoc[] tags) {
185         return isToken(fLastToken, tags);
186     }
187     
188     /**
189      * Checks the given token against the given tags?
190      *
191      * @param token the token to check
192      * @param tags the tags to check
193      * @return <code>true</code> iff the last token is in the given array
194      * @since 3.3
195      */

196     protected final boolean isToken(final String JavaDoc token, final String JavaDoc[] tags) {
197
198         if (token != null) {
199
200             for (int index= 0; index < tags.length; index++) {
201
202                 if (token.equals(tags[index]))
203                     return true;
204             }
205         }
206         return false;
207     }
208
209     /**
210      * Is the current token a single letter token surrounded by
211      * non-whitespace characters?
212      *
213      * @param begin the begin index
214      * @return <code>true</code> iff the token is a single letter token,
215      * <code>false</code> otherwise
216      */

217     protected final boolean isSingleLetter(final int begin) {
218         if (!Character.isLetter(fContent.charAt(begin)))
219             return false;
220
221         if (begin > 0 && !Character.isWhitespace(fContent.charAt(begin - 1)))
222             return false;
223
224         if (begin < fContent.length() - 1 && !Character.isWhitespace(fContent.charAt(begin + 1)))
225             return false;
226         
227         return true;
228     }
229
230     /**
231      * Does the specified token look like an URL?
232      *
233      * @param begin the begin index
234      * @return <code>true</code> iff this token look like an URL,
235      * <code>false</code> otherwise
236      */

237     protected final boolean isUrlToken(final int begin) {
238
239         for (int index= 0; index < DefaultSpellChecker.URL_PREFIXES.length; index++) {
240
241             if (fContent.startsWith(DefaultSpellChecker.URL_PREFIXES[index], begin))
242                 return true;
243         }
244         return false;
245     }
246
247     /**
248      * Does the specified token consist of whitespace only?
249      *
250      * @param begin the begin index
251      * @param end the end index
252      * @return <code>true</code> iff the token consists of whitespace
253      * only, <code>false</code> otherwise
254      */

255     protected final boolean isWhitespace(final int begin, final int end) {
256
257         for (int index= begin; index < end; index++) {
258
259             if (!Character.isWhitespace(fContent.charAt(index)))
260                 return false;
261         }
262         return true;
263     }
264
265     /*
266      * @see java.util.Iterator#next()
267      */

268     public Object JavaDoc next() {
269
270         String JavaDoc token= nextToken();
271         while (token == null && fSuccessor != BreakIterator.DONE)
272             token= nextToken();
273
274         fLastToken= token;
275
276         return token;
277     }
278
279     /**
280      * Advances the end index to the next word break.
281      */

282     protected final void nextBreak() {
283
284         fNext= fSuccessor;
285         fPredecessor= fSuccessor;
286
287         fSuccessor= fWordIterator.next();
288     }
289
290     /**
291      * Returns the next sentence break.
292      *
293      * @return the next sentence break
294      */

295     protected final int nextSentence() {
296         return ((Integer JavaDoc) fSentenceBreaks.getFirst()).intValue();
297     }
298
299     /**
300      * Determines the next token to be spell checked.
301      *
302      * @return the next token to be spell checked, or <code>null</code>
303      * iff the next token is not a candidate for spell checking.
304      */

305     protected String JavaDoc nextToken() {
306
307         String JavaDoc token= null;
308
309         fPrevious= fPredecessor;
310         fStartsSentence= false;
311
312         nextBreak();
313
314         boolean update= false;
315         if (fNext - fPrevious > 0) {
316
317             if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IJavaDocTagConstants.JAVADOC_TAG_PREFIX) {
318
319                 nextBreak();
320                 if (Character.isLetter(fContent.charAt(fPrevious + 1))) {
321                     update= true;
322                     token= fContent.substring(fPrevious, fNext);
323                 } else
324                     fPredecessor= fNext;
325
326             } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IHtmlTagConstants.HTML_TAG_PREFIX && (Character.isLetter(fContent.charAt(fNext)) || fContent.charAt(fNext) == '/')) {
327
328                 if (fContent.startsWith(IHtmlTagConstants.HTML_CLOSE_PREFIX, fPrevious))
329                     nextBreak();
330
331                 nextBreak();
332
333                 if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == IHtmlTagConstants.HTML_TAG_POSTFIX) {
334
335                     nextBreak();
336                     if (fSuccessor != BreakIterator.DONE) {
337                         update= true;
338                         token= fContent.substring(fPrevious, fNext);
339                     }
340                 }
341             } else if (fSuccessor != BreakIterator.DONE && fContent.charAt(fPrevious) == IHtmlTagConstants.HTML_ENTITY_START && (Character.isLetter(fContent.charAt(fNext)))) {
342                 nextBreak();
343                 if (fSuccessor != BreakIterator.DONE && fContent.charAt(fNext) == IHtmlTagConstants.HTML_ENTITY_END) {
344                     nextBreak();
345                     if (isToken(fContent.substring(fPrevious, fNext), IHtmlTagConstants.HTML_ENTITY_CODES)) {
346                         skipTokens(fPrevious, IHtmlTagConstants.HTML_ENTITY_END);
347                         update= true;
348                     } else
349                         token= fContent.substring(fPrevious, fNext);
350                 } else
351                     token= fContent.substring(fPrevious, fNext);
352                 
353                 update= true;
354             } else if (!isWhitespace(fPrevious, fNext) && isAlphaNumeric(fPrevious, fNext)) {
355
356                 if (isUrlToken(fPrevious))
357                     skipTokens(fPrevious, ' ');
358                 else if (isToken(IJavaDocTagConstants.JAVADOC_PARAM_TAGS))
359                     fLastToken= null;
360                 else if (isToken(IJavaDocTagConstants.JAVADOC_REFERENCE_TAGS)) {
361                     fLastToken= null;
362                     skipTokens(fPrevious, fDelimiter.charAt(0));
363                 } else if (fNext - fPrevious > 1 || isSingleLetter(fPrevious) && !fIsIgnoringSingleLetters)
364                     token= fContent.substring(fPrevious, fNext);
365
366                 update= true;
367             }
368         }
369
370         if (update && fSentenceBreaks.size() > 0) {
371
372             if (fPrevious >= nextSentence()) {
373
374                 while (fSentenceBreaks.size() > 0 && fPrevious >= nextSentence())
375                     fSentenceBreaks.removeFirst();
376
377                 fStartsSentence= (fLastToken == null) || (token != null);
378             }
379         }
380         return token;
381     }
382
383     /*
384      * @see java.util.Iterator#remove()
385      */

386     public final void remove() {
387         throw new UnsupportedOperationException JavaDoc();
388     }
389
390     /**
391      * Skip the tokens until the stop character is reached.
392      *
393      * @param begin the begin index
394      * @param stop the stop character
395      */

396     protected final void skipTokens(final int begin, final char stop) {
397
398         int end= begin;
399
400         while (end < fContent.length() && fContent.charAt(end) != stop)
401             end++;
402
403         if (end < fContent.length()) {
404
405             fNext= end;
406             fPredecessor= fNext;
407
408             fSuccessor= fWordIterator.following(fNext);
409         } else
410             fSuccessor= BreakIterator.DONE;
411     }
412
413     /*
414      * @see org.eclipse.spelling.done.ISpellCheckIterator#startsSentence()
415      */

416     public final boolean startsSentence() {
417         return fStartsSentence;
418     }
419 }
420
Popular Tags