KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > text > UCharacterIterator


1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2004, International Business Machines Corporation and *
4  * others. All Rights Reserved. *
5  *******************************************************************************
6  */

7 package com.ibm.icu.text;
8
9
10 import java.text.CharacterIterator JavaDoc;
11
12 import com.ibm.icu.impl.CharacterIteratorWrapper;
13 import com.ibm.icu.impl.ReplaceableUCharacterIterator;
14 import com.ibm.icu.impl.UCharArrayIterator;
15 import com.ibm.icu.impl.UCharacterIteratorWrapper;
16 import com.ibm.icu.impl.UCharacterProperty;
17
18
19 /**
20  * Abstract class that defines an API for iteration on text objects.This is an
21  * interface for forward and backward iteration and random access into a text
22  * object. Forward iteration is done with post-increment and backward iteration
23  * is done with pre-decrement semantics, while the
24  * <code>java.text.CharacterIterator</code> interface methods provided forward
25  * iteration with "pre-increment" and backward iteration with pre-decrement
26  * semantics. This API is more efficient for forward iteration over code points.
27  * The other major difference is that this API can do both code unit and code point
28  * iteration, <code>java.text.CharacterIterator</code> can only iterate over
29  * code units and is limited to BMP (0 - 0xFFFF)
30  * @author Ram
31  * @stable ICU 2.4
32  */

33 public abstract class UCharacterIterator
34                       implements Cloneable JavaDoc,UForwardCharacterIterator {
35
36     /**
37      * Protected default constructor for the subclasses
38      * @stable ICU 2.4
39      */

40     protected UCharacterIterator(){
41     }
42     
43     // static final methods ----------------------------------------------------
44

45     /**
46      * Returns a <code>UCharacterIterator</code> object given a
47      * <code>Replaceable</code> object.
48      * @param source a valid source as a <code>Replaceable</code> object
49      * @return UCharacterIterator object
50      * @exception IllegalArgumentException if the argument is null
51      * @stable ICU 2.4
52      */

53     public static final UCharacterIterator getInstance(Replaceable source){
54         return new ReplaceableUCharacterIterator(source);
55     }
56     
57     /**
58      * Returns a <code>UCharacterIterator</code> object given a
59      * source string.
60      * @param source a string
61      * @return UCharacterIterator object
62      * @exception IllegalArgumentException if the argument is null
63      * @stable ICU 2.4
64      */

65     public static final UCharacterIterator getInstance(String JavaDoc source){
66         return new ReplaceableUCharacterIterator(source);
67     }
68     
69     /**
70      * Returns a <code>UCharacterIterator</code> object given a
71      * source character array.
72      * @param source an array of UTF-16 code units
73      * @return UCharacterIterator object
74      * @exception IllegalArgumentException if the argument is null
75      * @stable ICU 2.4
76      */

77     public static final UCharacterIterator getInstance(char[] source){
78         return getInstance(source,0,source.length);
79     }
80     
81     /**
82      * Returns a <code>UCharacterIterator</code> object given a
83      * source character array.
84      * @param source an array of UTF-16 code units
85      * @return UCharacterIterator object
86      * @exception IllegalArgumentException if the argument is null
87      * @stable ICU 2.4
88      */

89     public static final UCharacterIterator getInstance(char[] source, int start, int limit){
90         return new UCharArrayIterator(source,start,limit);
91     }
92     /**
93      * Returns a <code>UCharacterIterator</code> object given a
94      * source StringBuffer.
95      * @param source an string buffer of UTF-16 code units
96      * @return UCharacterIterator object
97      * @exception IllegalArgumentException if the argument is null
98      * @stable ICU 2.4
99      */

100     public static final UCharacterIterator getInstance(StringBuffer JavaDoc source){
101         return new ReplaceableUCharacterIterator(source);
102     }
103
104     /**
105      * Returns a <code>UCharacterIterator</code> object given a
106      * CharacterIterator.
107      * @param source a valid CharacterIterator object.
108      * @return UCharacterIterator object
109      * @exception IllegalArgumentException if the argument is null
110      * @stable ICU 2.4
111      */

112     public static final UCharacterIterator getInstance(CharacterIterator JavaDoc source){
113         return new CharacterIteratorWrapper(source);
114     }
115        
116     // public methods ----------------------------------------------------------
117
/**
118      * Returns a <code>java.text.CharacterIterator</code> object for
119      * the underlying text of this iterator. The returned iterator is
120      * independent of this iterator.
121      * @return java.text.CharacterIterator object
122      * @stable ICU 2.4
123      */

124     public CharacterIterator JavaDoc getCharacterIterator(){
125         return new UCharacterIteratorWrapper(this);
126     }
127    
128     /**
129      * Returns the code unit at the current index. If index is out
130      * of range, returns DONE. Index is not changed.
131      * @return current code unit
132      * @stable ICU 2.4
133      */

134     public abstract int current();
135     
136     /**
137      * Returns the codepoint at the current index.
138      * If the current index is invalid, DONE is returned.
139      * If the current index points to a lead surrogate, and there is a following
140      * trail surrogate, then the code point is returned. Otherwise, the code
141      * unit at index is returned. Index is not changed.
142      * @return current codepoint
143      * @stable ICU 2.4
144      */

145     public int currentCodePoint(){
146         int ch = current();
147         if(UTF16.isLeadSurrogate((char)ch)){
148             // advance the index to get the
149
// next code point
150
next();
151             // due to post increment semantics
152
// current() after next() actually
153
// returns the char we want
154
int ch2 = current();
155             // current should never change
156
// the current index so back off
157
previous();
158             
159             if(UTF16.isTrailSurrogate((char)ch2)){
160                 // we found a surrogate pair
161
// return the codepoint
162
return UCharacterProperty.getRawSupplementary(
163                                                           (char)ch,(char)ch2
164                                                              );
165             }
166         }
167         return ch;
168     }
169     
170     /**
171      * Returns the length of the text
172      * @return length of the text
173      * @stable ICU 2.4
174      */

175     public abstract int getLength();
176
177     
178     /**
179      * Gets the current index in text.
180      * @return current index in text.
181      * @stable ICU 2.4
182      */

183     public abstract int getIndex();
184
185
186     /**
187      * Returns the UTF16 code unit at index, and increments to the next
188      * code unit (post-increment semantics). If index is out of
189      * range, DONE is returned, and the iterator is reset to the limit
190      * of the text.
191      * @return the next UTF16 code unit, or DONE if the index is at the limit
192      * of the text.
193      * @stable ICU 2.4
194      */

195     public abstract int next();
196
197     /**
198      * Returns the code point at index, and increments to the next code
199      * point (post-increment semantics). If index does not point to a
200      * valid surrogate pair, the behavior is the same as
201      * <code>next()<code>. Otherwise the iterator is incremented past
202      * the surrogate pair, and the code point represented by the pair
203      * is returned.
204      * @return the next codepoint in text, or DONE if the index is at
205      * the limit of the text.
206      * @stable ICU 2.4
207      */

208     public int nextCodePoint(){
209         int ch1 = next();
210         if(UTF16.isLeadSurrogate((char)ch1)){
211             int ch2 = next();
212             if(UTF16.isTrailSurrogate((char)ch2)){
213                 return UCharacterProperty.getRawSupplementary((char)ch1,
214                                                               (char)ch2);
215             }else if (ch2 != DONE) {
216                 // unmatched surrogate so back out
217
previous();
218             }
219         }
220         return ch1;
221     }
222
223     /**
224      * Decrement to the position of the previous code unit in the
225      * text, and return it (pre-decrement semantics). If the
226      * resulting index is less than 0, the index is reset to 0 and
227      * DONE is returned.
228      * @return the previous code unit in the text, or DONE if the new
229      * index is before the start of the text.
230      * @stable ICU 2.4
231      */

232     public abstract int previous();
233
234     
235     /**
236      * Retreat to the start of the previous code point in the text,
237      * and return it (pre-decrement semantics). If the index is not
238      * preceeded by a valid surrogate pair, the behavior is the same
239      * as <code>previous()</code>. Otherwise the iterator is
240      * decremented to the start of the surrogate pair, and the code
241      * point represented by the pair is returned.
242      * @return the previous code point in the text, or DONE if the new
243      * index is before the start of the text.
244      * @stable ICU 2.4
245      */

246     public int previousCodePoint(){
247         int ch1 = previous();
248         if(UTF16.isTrailSurrogate((char)ch1)){
249             int ch2 = previous();
250             if(UTF16.isLeadSurrogate((char)ch2)){
251                 return UCharacterProperty.getRawSupplementary((char)ch2,
252                                                               (char)ch1);
253             }else if (ch2 != DONE) {
254                 //unmatched trail surrogate so back out
255
next();
256             }
257         }
258         return ch1;
259     }
260
261     /**
262      * Sets the index to the specified index in the text.
263      * @param index the index within the text.
264      * @exception IndexOutOfBoundsException is thrown if an invalid index is
265      * supplied
266      * @stable ICU 2.4
267      */

268     public abstract void setIndex(int index);
269
270     /**
271      * Sets the current index to the limit.
272      * @stable ICU 2.4
273      */

274     public void setToLimit() {
275         setIndex(getLength());
276     }
277     
278     /**
279      * Sets the current index to the start.
280      * @stable ICU 2.4
281      */

282     public void setToStart() {
283         setIndex(0);
284     }
285
286     /**
287      * Fills the buffer with the underlying text storage of the iterator
288      * If the buffer capacity is not enough a exception is thrown. The capacity
289      * of the fill in buffer should at least be equal to length of text in the
290      * iterator obtained by calling <code>getLength()</code).
291      * <b>Usage:</b>
292      *
293      * <code>
294      * <pre>
295      * UChacterIterator iter = new UCharacterIterator.getInstance(text);
296      * char[] buf = new char[iter.getLength()];
297      * iter.getText(buf);
298      *
299      * OR
300      * char[] buf= new char[1];
301      * int len = 0;
302      * for(;;){
303      * try{
304      * len = iter.getText(buf);
305      * break;
306      * }catch(IndexOutOfBoundsException e){
307      * buf = new char[iter.getLength()];
308      * }
309      * }
310      * </pre>
311      * </code>
312      *
313      * @param fillIn an array of chars to fill with the underlying UTF-16 code
314      * units.
315      * @param offset the position within the array to start putting the data.
316      * @return the number of code units added to fillIn, as a convenience
317      * @exception IndexOutOfBounds exception if there is not enough
318      * room after offset in the array, or if offset < 0.
319      * @stable ICU 2.4
320      */

321     public abstract int getText(char[] fillIn, int offset);
322
323     /**
324      * Convenience override for <code>getText(char[], int)>/code> that provides
325      * an offset of 0.
326      * @param fillIn an array of chars to fill with the underlying UTF-16 code
327      * units.
328      * @return the number of code units added to fillIn, as a convenience
329      * @exception IndexOutOfBounds exception if there is not enough
330      * room in the array.
331      * @stable ICU 2.4
332      */

333     public final int getText(char[] fillIn) {
334         return getText(fillIn, 0);
335     }
336          
337     /**
338      * Convenience method for returning the underlying text storage as as string
339      * @return the underlying text storage in the iterator as a string
340      * @stable ICU 2.4
341      */

342     public String JavaDoc getText() {
343         char[] text = new char[getLength()];
344         getText(text);
345         return new String JavaDoc(text);
346     }
347        
348     /**
349      * Moves the current position by the number of code units
350      * specified, either forward or backward depending on the sign
351      * of delta (positive or negative respectively). If the resulting
352      * index would be less than zero, the index is set to zero, and if
353      * the resulting index would be greater than limit, the index is
354      * set to limit.
355      *
356      * @param delta the number of code units to move the current
357      * index.
358      * @return the new index.
359      * @exception IndexOutOfBoundsException is thrown if an invalid index is
360      * supplied
361      * @stable ICU 2.4
362      *
363      */

364     public int moveIndex(int delta) {
365         int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
366         setIndex(x);
367         return x;
368     }
369
370     /**
371      * Moves the current position by the number of code points
372      * specified, either forward or backward depending on the sign of
373      * delta (positive or negative respectively). If the current index
374      * is at a trail surrogate then the first adjustment is by code
375      * unit, and the remaining adjustments are by code points. If the
376      * resulting index would be less than zero, the index is set to
377      * zero, and if the resulting index would be greater than limit,
378      * the index is set to limit.
379      * @param delta the number of code units to move the current index.
380      * @return the new index
381      * @exception IndexOutOfBoundsException is thrown if an invalid delta is
382      * supplied
383      * @stable ICU 2.4
384      */

385     public int moveCodePointIndex(int delta){
386         if(delta>0){
387             while(delta>0 && nextCodePoint() != DONE){delta--;}
388         }else{
389             while(delta<0 && previousCodePoint() != DONE){delta++;}
390         }
391         if(delta!=0){
392             throw new IndexOutOfBoundsException JavaDoc();
393         }
394           
395         return getIndex();
396     }
397
398     /**
399      * Creates a copy of this iterator, independent from other iterators.
400      * If it is not possible to clone the iterator, returns null.
401      * @return copy of this iterator
402      * @stable ICU 2.4
403      */

404     public Object JavaDoc clone() throws CloneNotSupportedException JavaDoc{
405         return super.clone();
406     }
407     
408 }
409
410
Popular Tags