KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > text > ComposedCharIter


1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2004, International Business Machines Corporation and *
4  * others. All Rights Reserved. *
5  *******************************************************************************
6  */

7 package com.ibm.icu.text;
8 import com.ibm.icu.impl.NormalizerImpl;
9
10 /**
11  * <tt>ComposedCharIter</tt> is an iterator class that returns all
12  * of the precomposed characters defined in the Unicode standard, along
13  * with their decomposed forms. This is often useful when building
14  * data tables (<i>e.g.</i> collation tables) which need to treat composed
15  * and decomposed characters equivalently.
16  * <p>
17  * For example, imagine that you have built a collation table with ordering
18  * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all
19  * characters used in a particular language. When you process input text using
20  * this table, the text must first be decomposed so that it matches the form
21  * used in the table. This can impose a performance penalty that may be
22  * unacceptable in some situations.
23  * <p>
24  * You can avoid this problem by ensuring that the collation table contains
25  * rules for both the decomposed <i>and</i> composed versions of each character.
26  * To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the
27  * composed characters in Unicode. If the decomposition for that character
28  * consists solely of characters that are listed in your ruleset, you can
29  * add a new rule for the composed character that makes it equivalent to
30  * its decomposition sequence.
31  * <p>
32  * Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table
33  * of the composed characters in Unicode. If you want to iterate over the
34  * composed characters in a particular string, use {@link Normalizer} instead.
35  * <p>
36  * When constructing a <tt>ComposedCharIter</tt> there is one
37  * optional feature that you can enable or disable:
38  * <ul>
39  * <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul
40  * characters and their corresponding Jamo decompositions.
41  * This option is off by default (<i>i.e.</i> Hangul processing is enabled)
42  * since the Unicode standard specifies that Hangul to Jamo
43  * is a canonical decomposition.
44  * </ul>
45  * <p>
46  * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the
47  * <a HREF="http://www.unicode.org" target="unicode">Unicode Standard</a>.
48  * It will be updated as later versions of Unicode are released.
49  * @deprecated ICU 2.2
50  */

51 ///CLOVER:OFF
52
public final class ComposedCharIter {
53     
54     /**
55      * Constant that indicates the iteration has completed.
56      * {@link #next} returns this value when there are no more composed characters
57      * over which to iterate.
58      * @deprecated ICU 2.2
59      */

60     public static final char DONE = (char) Normalizer.DONE;
61     
62     /**
63      * Construct a new <tt>ComposedCharIter</tt>. The iterator will return
64      * all Unicode characters with canonical decompositions, including Korean
65      * Hangul characters.
66      * @deprecated ICU 2.2
67      */

68     public ComposedCharIter() {
69         compat = false;
70         options =0;
71     }
72     
73     
74     /**
75      * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
76      * <p>
77      * @param compat <tt>false</tt> for canonical decompositions only;
78      * <tt>true</tt> for both canonical and compatibility
79      * decompositions.
80      *
81      * @param options Optional decomposition features. Currently, the only
82      * supported option is {@link Normalizer#IGNORE_HANGUL}, which
83      * causes this <tt>ComposedCharIter</tt> not to iterate
84      * over the Hangul characters and their corresponding
85      * Jamo decompositions.
86      * @deprecated ICU 2.2
87      */

88     public ComposedCharIter(boolean compat, int options) {
89         this.compat = compat;
90         this.options = options;
91     }
92     
93     /**
94      * Determines whether there any precomposed Unicode characters not yet returned
95      * by {@link #next}.
96      * @deprecated ICU 2.2
97      */

98     public boolean hasNext() {
99         if (nextChar == Normalizer.DONE) {
100             findNextChar();
101         }
102         return nextChar != Normalizer.DONE;
103     }
104     
105     /**
106      * Returns the next precomposed Unicode character.
107      * Repeated calls to <tt>next</tt> return all of the precomposed characters defined
108      * by Unicode, in ascending order. After all precomposed characters have
109      * been returned, {@link #hasNext} will return <tt>false</tt> and further calls
110      * to <tt>next</tt> will return {@link #DONE}.
111      * @deprecated ICU 2.2
112      */

113     public char next() {
114         if (nextChar == Normalizer.DONE) {
115             findNextChar();
116         }
117         curChar = nextChar;
118         nextChar = Normalizer.DONE;
119         return (char) curChar;
120     }
121     
122     /**
123      * Returns the Unicode decomposition of the current character.
124      * This method returns the decomposition of the precomposed character most
125      * recently returned by {@link #next}. The resulting decomposition is
126      * affected by the settings of the options passed to the constructor.
127      * @deprecated ICU 2.2
128      */

129     public String JavaDoc decomposition() {
130         // the decomposition buffer contains the decomposition of
131
// current char so just return it
132
return new String JavaDoc(decompBuf,0, bufLen);
133     }
134     
135     private void findNextChar() {
136         int c=curChar+1;
137         for(;;){
138            if(c < 0xFFFF){
139                bufLen = NormalizerImpl.getDecomposition(c,compat,
140                                                         decompBuf,0,
141                                                         decompBuf.length);
142                if(bufLen>0){
143                     // the curChar can be decomposed... so it is a composed char
144
// cache the result
145
break;
146                }
147                c++;
148            }else{
149                c=Normalizer.DONE;
150                break;
151            }
152         }
153         nextChar=c;
154     }
155     
156     private int options;
157     private boolean compat;
158     private char[] decompBuf = new char[100];
159     private int bufLen=0;
160     private int curChar = 0;
161     private int nextChar = Normalizer.DONE;
162     
163
164 };
165
Popular Tags