ComposedCharIter


1   /*
2    *******************************************************************************
3    * Copyright (C) 1996-2004, International Business Machines Corporation and    *
4    * others. All Rights Reserved.                                                *
5    *******************************************************************************
6    */
7   package com.ibm.icu.text;
8   import com.ibm.icu.impl.NormalizerImpl;
9   
10  /**
11   * <tt>ComposedCharIter</tt> is an iterator class that returns all
12   * of the precomposed characters defined in the Unicode standard, along
13   * with their decomposed forms.  This is often useful when building
14   * data tables (<i>e.g.</i> collation tables) which need to treat composed
15   * and decomposed characters equivalently.
16   * <p>
17   * For example, imagine that you have built a collation table with ordering
18   * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all
19   * characters used in a particular language.  When you process input text using
20   * this table, the text must first be decomposed so that it matches the form
21   * used in the table.  This can impose a performance penalty that may be
22   * unacceptable in some situations.
23   * <p>
24   * You can avoid this problem by ensuring that the collation table contains
25   * rules for both the decomposed <i>and</i> composed versions of each character.
26   * To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the
27   * composed characters in Unicode.  If the decomposition for that character
28   * consists solely of characters that are listed in your ruleset, you can
29   * add a new rule for the composed character that makes it equivalent to
30   * its decomposition sequence.
31   * <p>
32   * Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table
33   * of the composed characters in Unicode.  If you want to iterate over the
34   * composed characters in a particular string, use {@link Normalizer} instead.
35   * <p>
36   * When constructing a <tt>ComposedCharIter</tt> there is one
37   * optional feature that you can enable or disable:
38   * <ul>
39   *   <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul
40   *          characters and their corresponding Jamo decompositions.
41   *          This option is off by default (<i>i.e.</i> Hangul processing is enabled)
42   *          since the Unicode standard specifies that Hangul to Jamo 
43   *          is a canonical decomposition.
44   * </ul>
45   * <p>
46   * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the
47   * <a HREF="http://www.unicode.org" target="unicode">Unicode Standard</a>.
48   * It will be updated as later versions of Unicode are released.
49   * @deprecated ICU 2.2
50   */
51  ///CLOVER:OFF
52  public final class ComposedCharIter {
53      
54      /**
55       * Constant that indicates the iteration has completed.
56       * {@link #next} returns this value when there are no more composed characters
57       * over which to iterate.
58       * @deprecated ICU 2.2
59       */
60      public static final  char DONE = (char) Normalizer.DONE;
61      
62      /**
63       * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return
64       * all Unicode characters with canonical decompositions, including Korean
65       * Hangul characters.
66       * @deprecated ICU 2.2
67       */
68      public ComposedCharIter() {
69          compat = false;
70          options =0;
71      }
72      
73      
74      /**
75       * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
76       * <p>
77       * @param compat    <tt>false</tt> for canonical decompositions only;
78       *                  <tt>true</tt> for both canonical and compatibility
79       *                  decompositions.
80       *
81       * @param options   Optional decomposition features.  Currently, the only
82       *                  supported option is {@link Normalizer#IGNORE_HANGUL}, which
83       *                  causes this <tt>ComposedCharIter</tt> not to iterate
84       *                  over the Hangul characters and their corresponding
85       *                  Jamo decompositions.
86       * @deprecated ICU 2.2
87       */
88      public ComposedCharIter(boolean compat, int options) {
89          this.compat = compat;
90          this.options = options;
91      }
92      
93      /**
94       * Determines whether there any precomposed Unicode characters not yet returned
95       * by {@link #next}.
96       * @deprecated ICU 2.2
97       */
98      public boolean hasNext() {
99          if (nextChar == Normalizer.DONE)  {
100             findNextChar();
101         }
102         return nextChar != Normalizer.DONE;
103     }
104     
105     /**
106      * Returns the next precomposed Unicode character.
107      * Repeated calls to <tt>next</tt> return all of the precomposed characters defined
108      * by Unicode, in ascending order.  After all precomposed characters have
109      * been returned, {@link #hasNext} will return <tt>false</tt> and further calls
110      * to <tt>next</tt> will return {@link #DONE}.
111      * @deprecated ICU 2.2
112      */
113     public char next() {
114         if (nextChar == Normalizer.DONE)  {
115             findNextChar();
116         }
117         curChar = nextChar;
118         nextChar = Normalizer.DONE;
119         return (char) curChar;
120     }
121     
122     /**
123      * Returns the Unicode decomposition of the current character.
124      * This method returns the decomposition of the precomposed character most
125      * recently returned by {@link #next}.  The resulting decomposition is
126      * affected by the settings of the options passed to the constructor.
127      * @deprecated ICU 2.2
128      */
129     public String   decomposition() {
130         // the decomposition buffer contains the decomposition of 
131         // current char so just return it
132         return new String  (decompBuf,0, bufLen);
133     }
134     
135     private void findNextChar() {
136         int c=curChar+1;
137         for(;;){
138            if(c < 0xFFFF){
139                bufLen = NormalizerImpl.getDecomposition(c,compat,
140                                                         decompBuf,0,
141                                                         decompBuf.length);
142                if(bufLen>0){
143                     // the curChar can be decomposed... so it is a composed char
144                     // cache the result     
145                     break;
146                }
147                c++;
148            }else{
149                c=Normalizer.DONE;
150                break;
151            }
152         }
153         nextChar=c;  
154     }
155     
156     private int options;
157     private boolean compat;
158     private char[] decompBuf = new char[100];
159     private int bufLen=0;
160     private int curChar = 0;
161     private int nextChar = Normalizer.DONE;
162     
163 
164 };
165
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags