Collator


1   /*
2    * @(#)Collator.java    1.39 04/05/05
3    *
4    * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5    * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6    */
7   
8   /*
9    * (C) Copyright Taligent, Inc. 1996-1998 -  All Rights Reserved
10   * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved
11   *
12   *   The original version of this source code and documentation is copyrighted
13   * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
14   * materials are provided under terms of a License Agreement between Taligent
15   * and Sun. This technology is protected by multiple US and International
16   * patents. This notice and attribution to Taligent may not be removed.
17   *   Taligent is a registered trademark of Taligent, Inc.
18   *
19   */
20  
21  package java.text;
22  
23  import java.util.Locale  ;
24  import java.util.MissingResourceException  ;
25  import java.util.ResourceBundle  ;
26  import sun.misc.SoftCache;
27  import sun.text.resources.LocaleData;
28  
29  
30  /**
31   * The <code>Collator</code> class performs locale-sensitive
32   * <code>String</code> comparison. You use this class to build
33   * searching and sorting routines for natural language text.
34   *
35   * <p>
36   * <code>Collator</code> is an abstract base class. Subclasses
37   * implement specific collation strategies. One subclass,
38   * <code>RuleBasedCollator</code>, is currently provided with
39   * the Java 2 platform and is applicable to a wide set of languages. Other
40   * subclasses may be created to handle more specialized needs.
41   *
42   * <p>
43   * Like other locale-sensitive classes, you can use the static
44   * factory method, <code>getInstance</code>, to obtain the appropriate
45   * <code>Collator</code> object for a given locale. You will only need
46   * to look at the subclasses of <code>Collator</code> if you need
47   * to understand the details of a particular collation strategy or
48   * if you need to modify that strategy.
49   *
50   * <p>
51   * The following example shows how to compare two strings using
52   * the <code>Collator</code> for the default locale.
53   * <blockquote>
54   * <pre>
55   * // Compare two strings in the default locale
56   * Collator myCollator = Collator.getInstance();
57   * if( myCollator.compare("abc", "ABC") < 0 )
58   *     System.out.println("abc is less than ABC");
59   * else
60   *     System.out.println("abc is greater than or equal to ABC");
61   * </pre>
62   * </blockquote>
63   *
64   * <p>
65   * You can set a <code>Collator</code>'s <em>strength</em> property
66   * to determine the level of difference considered significant in
67   * comparisons. Four strengths are provided: <code>PRIMARY</code>,
68   * <code>SECONDARY</code>, <code>TERTIARY</code>, and <code>IDENTICAL</code>.
69   * The exact assignment of strengths to language features is
70   * locale dependant.  For example, in Czech, "e" and "f" are considered
71   * primary differences, while "e" and "�" are secondary differences,
72   * "e" and "E" are tertiary differences and "e" and "e" are identical.
73   * The following shows how both case and accents could be ignored for
74   * US English.
75   * <blockquote>
76   * <pre>
77   * //Get the Collator for US English and set its strength to PRIMARY
78   * Collator usCollator = Collator.getInstance(Locale.US);
79   * usCollator.setStrength(Collator.PRIMARY);
80   * if( usCollator.compare("abc", "ABC") == 0 ) {
81   *     System.out.println("Strings are equivalent");
82   * }
83   * </pre>
84   * </blockquote>
85   * <p>
86   * For comparing <code>String</code>s exactly once, the <code>compare</code>
87   * method provides the best performance. When sorting a list of
88   * <code>String</code>s however, it is generally necessary to compare each
89   * <code>String</code> multiple times. In this case, <code>CollationKey</code>s
90   * provide better performance. The <code>CollationKey</code> class converts
91   * a <code>String</code> to a series of bits that can be compared bitwise
92   * against other <code>CollationKey</code>s. A <code>CollationKey</code> is
93   * created by a <code>Collator</code> object for a given <code>String</code>.
94   * <br>
95   * <strong>Note:</strong> <code>CollationKey</code>s from different
96   * <code>Collator</code>s can not be compared. See the class description
97   * for {@link CollationKey}
98   * for an example using <code>CollationKey</code>s.
99   *
100  * @see         RuleBasedCollator
101  * @see         CollationKey
102  * @see         CollationElementIterator
103  * @see         Locale
104  * @version     1.39, 05/05/04
105  * @author      Helena Shih, Laura Werner, Richard Gillam
106  */
107 
108 public abstract class Collator
109     implements java.util.Comparator  <Object  >, Cloneable  
110 {
111     /**
112      * Collator strength value.  When set, only PRIMARY differences are
113      * considered significant during comparison. The assignment of strengths
114      * to language features is locale dependant. A common example is for
115      * different base letters ("a" vs "b") to be considered a PRIMARY difference.
116      * @see java.text.Collator#setStrength
117      * @see java.text.Collator#getStrength
118      */
119     public final static int PRIMARY = 0;
120     /**
121      * Collator strength value.  When set, only SECONDARY and above differences are
122      * considered significant during comparison. The assignment of strengths
123      * to language features is locale dependant. A common example is for
124      * different accented forms of the same base letter ("a" vs "�") to be
125      * considered a SECONDARY difference.
126      * @see java.text.Collator#setStrength
127      * @see java.text.Collator#getStrength
128      */
129     public final static int SECONDARY = 1;
130     /**
131      * Collator strength value.  When set, only TERTIARY and above differences are
132      * considered significant during comparison. The assignment of strengths
133      * to language features is locale dependant. A common example is for
134      * case differences ("a" vs "A") to be considered a TERTIARY difference.
135      * @see java.text.Collator#setStrength
136      * @see java.text.Collator#getStrength
137      */
138     public final static int TERTIARY = 2;
139 
140     /**
141      * Collator strength value.  When set, all differences are
142      * considered significant during comparison. The assignment of strengths
143      * to language features is locale dependant. A common example is for control
144      * characters ("&#092;u0001" vs "&#092;u0002") to be considered equal at the
145      * PRIMARY, SECONDARY, and TERTIARY levels but different at the IDENTICAL
146      * level.  Additionally, differences between pre-composed accents such as
147      * "&#092;u00C0" (A-grave) and combining accents such as "A&#092;u0300"
148      * (A, combining-grave) will be considered significant at the IDENTICAL
149      * level if decomposition is set to NO_DECOMPOSITION.
150      */
151     public final static int IDENTICAL = 3;
152 
153     /**
154      * Decomposition mode value. With NO_DECOMPOSITION
155      * set, accented characters will not be decomposed for collation. This
156      * is the default setting and provides the fastest collation but 
157      * will only produce correct results for languages that do not use accents.
158      * @see java.text.Collator#getDecomposition
159      * @see java.text.Collator#setDecomposition
160      */
161     public final static int NO_DECOMPOSITION = 0;
162 
163     /**
164      * Decomposition mode value. With CANONICAL_DECOMPOSITION
165      * set, characters that are canonical variants according to Unicode 
166      * standard will be decomposed for collation. This should be used to get 
167      * correct collation of accented characters.
168      * <p>
169      * CANONICAL_DECOMPOSITION corresponds to Normalization Form D as
170      * described in 
171      * <a HREF="http://www.unicode.org/unicode/reports/tr15/">Unicode 
172      * Technical Report #15</a>.
173      * @see java.text.Collator#getDecomposition
174      * @see java.text.Collator#setDecomposition
175      */
176     public final static int CANONICAL_DECOMPOSITION = 1;
177 
178     /**
179      * Decomposition mode value. With FULL_DECOMPOSITION
180      * set, both Unicode canonical variants and Unicode compatibility variants
181      * will be decomposed for collation.  This causes not only accented
182      * characters to be collated, but also characters that have special formats
183      * to be collated with their norminal form. For example, the half-width and
184      * full-width ASCII and Katakana characters are then collated together.
185      * FULL_DECOMPOSITION is the most complete and therefore the slowest
186      * decomposition mode.
187      * <p>
188      * FULL_DECOMPOSITION corresponds to Normalization Form KD as
189      * described in 
190      * <a HREF="http://www.unicode.org/unicode/reports/tr15/">Unicode 
191      * Technical Report #15</a>.
192      * @see java.text.Collator#getDecomposition
193      * @see java.text.Collator#setDecomposition
194      */
195     public final static int FULL_DECOMPOSITION = 2;
196 
197     /**
198      * Gets the Collator for the current default locale.
199      * The default locale is determined by java.util.Locale.getDefault.
200      * @return the Collator for the default locale.(for example, en_US)
201      * @see java.util.Locale#getDefault
202      */
203     public static synchronized Collator   getInstance() {
204         return getInstance(Locale.getDefault());
205     }
206 
207     /**
208      * Gets the Collator for the desired locale.
209      * @param desiredLocale the desired locale.
210      * @return the Collator for the desired locale.
211      * @see java.util.Locale
212      * @see java.util.ResourceBundle
213      */
214     public static synchronized
215     Collator   getInstance(Locale   desiredLocale)
216     {
217         RuleBasedCollator   result = null;
218         result = (RuleBasedCollator  ) cache.get(desiredLocale);
219         if (result != null) {
220                  return (Collator  )result.clone();  // make the world safe
221         }
222 
223         // Load the resource of the desired locale from resource
224         // manager.
225         String   colString = "";
226         int decomp = CANONICAL_DECOMPOSITION;
227         
228         try {
229             ResourceBundle   resource = LocaleData.getLocaleElements(desiredLocale);
230 
231             colString = resource.getString("CollationElements");
232             decomp = ((Integer  )resource.getObject("CollationDecomp")).intValue();
233         } catch (MissingResourceException   e) {
234             // Use default values
235         }
236         try
237         {
238             result = new RuleBasedCollator  ( CollationRules.DEFAULTRULES +
239                                             colString,
240                                             decomp );
241         }
242         catch(ParseException   foo)
243         {
244             // predefined tables should contain correct grammar
245             try {
246                 result = new RuleBasedCollator  ( CollationRules.DEFAULTRULES );
247             } catch (ParseException   bar) {
248                 // do nothing
249             }
250         }
251         // Now that RuleBasedCollator adds expansions for pre-composed characters
252         // into their decomposed equivalents, the default collators don't need
253         // to have decomposition turned on.  Laura, 5/5/98, bug 4114077
254         result.setDecomposition(NO_DECOMPOSITION);
255         
256         cache.put(desiredLocale,result);
257         return (Collator  )result.clone();
258     }
259 
260     /**
261      * Compares the source string to the target string according to the
262      * collation rules for this Collator.  Returns an integer less than,
263      * equal to or greater than zero depending on whether the source String is
264      * less than, equal to or greater than the target string.  See the Collator
265      * class description for an example of use.
266      * <p>
267      * For a one time comparison, this method has the best performance. If a
268      * given String will be involved in multiple comparisons, CollationKey.compareTo
269      * has the best performance. See the Collator class description for an example
270      * using CollationKeys.
271      * @param source the source string.
272      * @param target the target string.
273      * @return Returns an integer value. Value is less than zero if source is less than
274      * target, value is zero if source and target are equal, value is greater than zero
275      * if source is greater than target.
276      * @see java.text.CollationKey
277      * @see java.text.Collator#getCollationKey
278      */
279     public abstract int compare(String   source, String   target);
280 
281     /**
282      * Compares its two arguments for order.  Returns a negative integer,
283      * zero, or a positive integer as the first argument is less than, equal
284      * to, or greater than the second.
285      * <p>
286      * This implementation merely returns
287      *  <code> compare((String)o1, (String)o2) </code>.
288      * 
289      * @return a negative integer, zero, or a positive integer as the
290      *         first argument is less than, equal to, or greater than the
291      *         second. 
292      * @exception ClassCastException the arguments cannot be cast to Strings.
293      * @see java.util.Comparator
294      * @since   1.2
295      */
296     public int compare(Object   o1, Object   o2) {
297     return compare((String  )o1, (String  )o2);
298     }
299 
300     /**
301      * Transforms the String into a series of bits that can be compared bitwise
302      * to other CollationKeys. CollationKeys provide better performance than
303      * Collator.compare when Strings are involved in multiple comparisons.
304      * See the Collator class description for an example using CollationKeys.
305      * @param source the string to be transformed into a collation key.
306      * @return the CollationKey for the given String based on this Collator's collation
307      * rules. If the source String is null, a null CollationKey is returned.
308      * @see java.text.CollationKey
309      * @see java.text.Collator#compare
310      */
311     public abstract CollationKey   getCollationKey(String   source);
312 
313     /**
314      * Convenience method for comparing the equality of two strings based on
315      * this Collator's collation rules.
316      * @param source the source string to be compared with.
317      * @param target the target string to be compared with.
318      * @return true if the strings are equal according to the collation
319      * rules.  false, otherwise.
320      * @see java.text.Collator#compare
321      */
322     public boolean equals(String   source, String   target)
323     {
324         return (compare(source, target) == Collator.EQUAL);
325     }
326 
327     /**
328      * Returns this Collator's strength property.  The strength property determines
329      * the minimum level of difference considered significant during comparison.
330      * See the Collator class description for an example of use.
331      * @return this Collator's current strength property.
332      * @see java.text.Collator#setStrength
333      * @see java.text.Collator#PRIMARY
334      * @see java.text.Collator#SECONDARY
335      * @see java.text.Collator#TERTIARY
336      * @see java.text.Collator#IDENTICAL
337      */
338     public synchronized int getStrength()
339     {
340         return strength;
341     }
342 
343     /**
344      * Sets this Collator's strength property.  The strength property determines
345      * the minimum level of difference considered significant during comparison.
346      * See the Collator class description for an example of use.
347      * @param newStrength  the new strength value.
348      * @see java.text.Collator#getStrength
349      * @see java.text.Collator#PRIMARY
350      * @see java.text.Collator#SECONDARY
351      * @see java.text.Collator#TERTIARY
352      * @see java.text.Collator#IDENTICAL
353      * @exception  IllegalArgumentException If the new strength value is not one of
354      * PRIMARY, SECONDARY, TERTIARY or IDENTICAL.
355      */
356     public synchronized void setStrength(int newStrength) {
357         if ((newStrength != PRIMARY) &&
358             (newStrength != SECONDARY) &&
359             (newStrength != TERTIARY) &&
360             (newStrength != IDENTICAL))
361             throw new IllegalArgumentException  ("Incorrect comparison level.");
362         strength = newStrength;
363     }
364 
365     /**
366      * Get the decomposition mode of this Collator. Decomposition mode
367      * determines how Unicode composed characters are handled. Adjusting
368      * decomposition mode allows the user to select between faster and more
369      * complete collation behavior.
370      * <p>The three values for decomposition mode are:
371      * <UL>
372      * <LI>NO_DECOMPOSITION,
373      * <LI>CANONICAL_DECOMPOSITION
374      * <LI>FULL_DECOMPOSITION.
375      * </UL>
376      * See the documentation for these three constants for a description
377      * of their meaning.
378      * @return the decomposition mode
379      * @see java.text.Collator#setDecomposition
380      * @see java.text.Collator#NO_DECOMPOSITION
381      * @see java.text.Collator#CANONICAL_DECOMPOSITION
382      * @see java.text.Collator#FULL_DECOMPOSITION
383      */
384     public synchronized int getDecomposition()
385     {
386         return decmp;
387     }
388     /**
389      * Set the decomposition mode of this Collator. See getDecomposition
390      * for a description of decomposition mode.
391      * @param decompositionMode  the new decomposition mode.
392      * @see java.text.Collator#getDecomposition
393      * @see java.text.Collator#NO_DECOMPOSITION
394      * @see java.text.Collator#CANONICAL_DECOMPOSITION
395      * @see java.text.Collator#FULL_DECOMPOSITION
396      * @exception IllegalArgumentException If the given value is not a valid decomposition
397      * mode.
398      */
399     public synchronized void setDecomposition(int decompositionMode) {
400         if ((decompositionMode != NO_DECOMPOSITION) &&
401             (decompositionMode != CANONICAL_DECOMPOSITION) &&
402             (decompositionMode != FULL_DECOMPOSITION))
403             throw new IllegalArgumentException  ("Wrong decomposition mode.");
404         decmp = decompositionMode;
405     }
406 
407     /**
408      * Returns an array of all locales for which the
409      * <code>getInstance</code> methods of this class can return
410      * localized instances.
411      * The array returned must contain at least a <code>Locale</code>
412      * instance equal to {@link java.util.Locale#US Locale.US}.
413      *
414      * @return An array of locales for which localized
415      *         <code>Collator</code> instances are available.
416      */
417     public static synchronized Locale  [] getAvailableLocales() {
418         return LocaleData.getAvailableLocales("CollationElements");
419     }
420 
421     /**
422      * Overrides Cloneable
423      */
424     public Object   clone()
425     {
426         try {
427             return (Collator  )super.clone();
428         } catch (CloneNotSupportedException   e) {
429             throw new InternalError  ();
430         }
431     }
432 
433     /**
434      * Compares the equality of two Collators.
435      * @param that the Collator to be compared with this.
436      * @return true if this Collator is the same as that Collator;
437      * false otherwise.
438      */
439     public boolean equals(Object   that)
440     {
441         if (this == that) return true;
442         if (that == null) return false;
443         if (getClass() != that.getClass()) return false;
444         Collator   other = (Collator  ) that;
445         return ((strength == other.strength) &&
446                 (decmp == other.decmp));
447     }
448 
449     /**
450      * Generates the hash code for this Collator.
451      */
452     abstract public int hashCode();
453 
454     /**
455      * Default constructor.  This constructor is
456      * protected so subclasses can get access to it. Users typically create
457      * a Collator sub-class by calling the factory method getInstance.
458      * @see java.text.Collator#getInstance
459      */
460     protected Collator()
461     {
462         strength = TERTIARY;
463         decmp = CANONICAL_DECOMPOSITION;
464     }
465 
466     private int strength = 0;
467     private int decmp = 0;
468     private static SoftCache cache = new SoftCache();
469 
470     //
471     // FIXME: These three constants should be removed.
472     //
473     /**
474      * LESS is returned if source string is compared to be less than target
475      * string in the compare() method.
476      * @see java.text.Collator#compare
477      */
478     final static int LESS = -1;
479     /**
480      * EQUAL is returned if source string is compared to be equal to target
481      * string in the compare() method.
482      * @see java.text.Collator#compare
483      */
484     final static int EQUAL = 0;
485     /**
486      * GREATER is returned if source string is compared to be greater than
487      * target string in the compare() method.
488      * @see java.text.Collator#compare
489      */
490     final static int GREATER = 1;
491  }
492
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags