KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > jdt > internal > ui > text > JavaBreakIterator


1 /*******************************************************************************
2  * Copyright (c) 2000, 2006 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.jdt.internal.ui.text;
12
13 import com.ibm.icu.text.BreakIterator;
14 import java.text.CharacterIterator JavaDoc;
15
16 import org.eclipse.core.runtime.Assert;
17
18
19
20 /**
21  * A java break iterator. It returns all breaks, including before and after
22  * whitespace, and it returns all camel case breaks.
23  * <p>
24  * A line break may be any of "\n", "\r", "\r\n", "\n\r".
25  * </p>
26  *
27  * @since 3.0
28  */

29 public class JavaBreakIterator extends BreakIterator {
30
31     /**
32      * A run of common characters.
33      */

34     protected static abstract class Run {
35         /** The length of this run. */
36         protected int length;
37
38         public Run() {
39             init();
40         }
41
42         /**
43          * Returns <code>true</code> if this run consumes <code>ch</code>,
44          * <code>false</code> otherwise. If <code>true</code> is returned,
45          * the length of the receiver is adjusted accordingly.
46          *
47          * @param ch the character to test
48          * @return <code>true</code> if <code>ch</code> was consumed
49          */

50         protected boolean consume(char ch) {
51             if (isValid(ch)) {
52                 length++;
53                 return true;
54             }
55             return false;
56         }
57
58         /**
59          * Whether this run accepts that character; does not update state. Called
60          * from the default implementation of <code>consume</code>.
61          *
62          * @param ch the character to test
63          * @return <code>true</code> if <code>ch</code> is accepted
64          */

65         protected abstract boolean isValid(char ch);
66
67         /**
68          * Resets this run to the initial state.
69          */

70         protected void init() {
71             length= 0;
72         }
73     }
74
75     static final class Whitespace extends Run {
76         protected boolean isValid(char ch) {
77             return Character.isWhitespace(ch) && ch != '\n' && ch != '\r';
78         }
79     }
80
81     static final class LineDelimiter extends Run {
82         /** State: INIT -> delimiter -> EXIT. */
83         private char fState;
84         private static final char INIT= '\0';
85         private static final char EXIT= '\1';
86
87         /*
88          * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init()
89          */

90         protected void init() {
91             super.init();
92             fState= INIT;
93         }
94
95         /*
96          * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consume(char)
97          */

98         protected boolean consume(char ch) {
99             if (!isValid(ch) || fState == EXIT)
100                 return false;
101
102             if (fState == INIT) {
103                 fState= ch;
104                 length++;
105                 return true;
106             } else if (fState != ch) {
107                 fState= EXIT;
108                 length++;
109                 return true;
110             } else {
111                 return false;
112             }
113         }
114
115         protected boolean isValid(char ch) {
116             return ch == '\n' || ch == '\r';
117         }
118     }
119
120     static final class Identifier extends Run {
121         /*
122          * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
123          */

124         protected boolean isValid(char ch) {
125             return Character.isJavaIdentifierPart(ch);
126         }
127     }
128
129     static final class CamelCaseIdentifier extends Run {
130         /* states */
131         private static final int S_INIT= 0;
132         private static final int S_LOWER= 1;
133         private static final int S_ONE_CAP= 2;
134         private static final int S_ALL_CAPS= 3;
135         private static final int S_EXIT= 4;
136         private static final int S_EXIT_MINUS_ONE= 5;
137
138         /* character types */
139         private static final int K_INVALID= 0;
140         private static final int K_LOWER= 1;
141         private static final int K_UPPER= 2;
142         private static final int K_OTHER= 3;
143
144         private int fState;
145
146         private final static int[][] MATRIX= new int[][] {
147                 // K_INVALID, K_LOWER, K_UPPER, K_OTHER
148
{ S_EXIT, S_LOWER, S_ONE_CAP, S_LOWER }, // S_INIT
149
{ S_EXIT, S_LOWER, S_EXIT, S_LOWER }, // S_LOWER
150
{ S_EXIT, S_LOWER, S_ALL_CAPS, S_LOWER }, // S_ONE_CAP
151
{ S_EXIT, S_EXIT_MINUS_ONE, S_ALL_CAPS, S_LOWER }, // S_ALL_CAPS
152
};
153
154         /*
155          * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#init()
156          */

157         protected void init() {
158             super.init();
159             fState= S_INIT;
160         }
161
162         /*
163          * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#consumes(char)
164          */

165         protected boolean consume(char ch) {
166             int kind= getKind(ch);
167             fState= MATRIX[fState][kind];
168             switch (fState) {
169                 case S_LOWER:
170                 case S_ONE_CAP:
171                 case S_ALL_CAPS:
172                     length++;
173                     return true;
174                 case S_EXIT:
175                     return false;
176                 case S_EXIT_MINUS_ONE:
177                     length--;
178                     return false;
179                 default:
180                     Assert.isTrue(false);
181                     return false;
182             }
183         }
184
185         /**
186          * Determines the kind of a character.
187          *
188          * @param ch the character to test
189          */

190         private int getKind(char ch) {
191             if (Character.isUpperCase(ch))
192                 return K_UPPER;
193             if (Character.isLowerCase(ch))
194                 return K_LOWER;
195             if (Character.isJavaIdentifierPart(ch)) // _, digits...
196
return K_OTHER;
197             return K_INVALID;
198         }
199
200         /*
201          * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
202          */

203         protected boolean isValid(char ch) {
204             return Character.isJavaIdentifierPart(ch);
205         }
206     }
207
208     static final class Other extends Run {
209         /*
210          * @see org.eclipse.jdt.internal.ui.text.JavaBreakIterator.Run#isValid(char)
211          */

212         protected boolean isValid(char ch) {
213             return !Character.isWhitespace(ch) && !Character.isJavaIdentifierPart(ch);
214         }
215     }
216
217     private static final Run WHITESPACE= new Whitespace();
218     private static final Run DELIMITER= new LineDelimiter();
219     private static final Run CAMELCASE= new CamelCaseIdentifier(); // new Identifier();
220
private static final Run OTHER= new Other();
221
222     /** The platform break iterator (word instance) used as a base. */
223     protected final BreakIterator fIterator;
224     /** The text we operate on. */
225     protected CharSequence JavaDoc fText;
226     /** our current position for the stateful methods. */
227     private int fIndex;
228
229
230     /**
231      * Creates a new break iterator.
232      */

233     public JavaBreakIterator() {
234         fIterator= BreakIterator.getWordInstance();
235         fIndex= fIterator.current();
236     }
237
238     /*
239      * @see java.text.BreakIterator#current()
240      */

241     public int current() {
242         return fIndex;
243     }
244
245     /*
246      * @see java.text.BreakIterator#first()
247      */

248     public int first() {
249         fIndex= fIterator.first();
250         return fIndex;
251     }
252
253     /*
254      * @see java.text.BreakIterator#following(int)
255      */

256     public int following(int offset) {
257         // work around too eager IAEs in standard implementation
258
if (offset == getText().getEndIndex())
259             return DONE;
260
261         int next= fIterator.following(offset);
262         if (next == DONE)
263             return DONE;
264
265         // TODO deal with complex script word boundaries
266
// Math.min(offset + run.length, next) does not work
267
// since BreakIterator.getWordInstance considers _ as boundaries
268
// seems to work fine, however
269
Run run= consumeRun(offset);
270         return offset + run.length;
271
272     }
273
274     /**
275      * Consumes a run of characters at the limits of which we introduce a break.
276      * @param offset the offset to start at
277      * @return the run that was consumed
278      */

279     private Run consumeRun(int offset) {
280         // assert offset < length
281

282         char ch= fText.charAt(offset);
283         int length= fText.length();
284         Run run= getRun(ch);
285         while (run.consume(ch) && offset < length - 1) {
286             offset++;
287             ch= fText.charAt(offset);
288         }
289
290         return run;
291     }
292
293     /**
294      * Returns a run based on a character.
295      *
296      * @param ch the character to test
297      * @return the correct character given <code>ch</code>
298      */

299     private Run getRun(char ch) {
300         Run run;
301         if (WHITESPACE.isValid(ch))
302             run= WHITESPACE;
303         else if (DELIMITER.isValid(ch))
304             run= DELIMITER;
305         else if (CAMELCASE.isValid(ch))
306             run= CAMELCASE;
307         else if (OTHER.isValid(ch))
308             run= OTHER;
309         else {
310             Assert.isTrue(false);
311             return null;
312         }
313
314         run.init();
315         return run;
316     }
317
318     /*
319      * @see java.text.BreakIterator#getText()
320      */

321     public CharacterIterator JavaDoc getText() {
322         return fIterator.getText();
323     }
324
325     /*
326      * @see java.text.BreakIterator#isBoundary(int)
327      */

328     public boolean isBoundary(int offset) {
329         if (offset == getText().getBeginIndex())
330             return true;
331         else
332             return following(offset - 1) == offset;
333     }
334
335     /*
336      * @see java.text.BreakIterator#last()
337      */

338     public int last() {
339         fIndex= fIterator.last();
340         return fIndex;
341     }
342
343     /*
344      * @see java.text.BreakIterator#next()
345      */

346     public int next() {
347         fIndex= following(fIndex);
348         return fIndex;
349     }
350
351     /*
352      * @see java.text.BreakIterator#next(int)
353      */

354     public int next(int n) {
355         return fIterator.next(n);
356     }
357
358     /*
359      * @see java.text.BreakIterator#preceding(int)
360      */

361     public int preceding(int offset) {
362         if (offset == getText().getBeginIndex())
363             return DONE;
364
365         if (isBoundary(offset - 1))
366             return offset - 1;
367
368         int previous= offset - 1;
369         do {
370             previous= fIterator.preceding(previous);
371         } while (!isBoundary(previous));
372
373         int last= DONE;
374         while (previous < offset) {
375             last= previous;
376             previous= following(previous);
377         }
378
379         return last;
380     }
381
382     /*
383      * @see java.text.BreakIterator#previous()
384      */

385     public int previous() {
386         fIndex= preceding(fIndex);
387         return fIndex;
388     }
389
390     /*
391      * @see java.text.BreakIterator#setText(java.lang.String)
392      */

393     public void setText(String JavaDoc newText) {
394         setText((CharSequence JavaDoc) newText);
395     }
396
397     /**
398      * Creates a break iterator given a char sequence.
399      * @param newText the new text
400      */

401     public void setText(CharSequence JavaDoc newText) {
402         fText= newText;
403         fIterator.setText(new SequenceCharacterIterator(newText));
404         first();
405     }
406
407     /*
408      * @see java.text.BreakIterator#setText(java.text.CharacterIterator)
409      */

410     public void setText(CharacterIterator JavaDoc newText) {
411         if (newText instanceof CharSequence JavaDoc) {
412             fText= (CharSequence JavaDoc) newText;
413             fIterator.setText(newText);
414             first();
415         } else {
416             throw new UnsupportedOperationException JavaDoc("CharacterIterator not supported"); //$NON-NLS-1$
417
}
418     }
419 }
420
Popular Tags