KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > text > CollationKey


1 /**
2 *******************************************************************************
3 * Copyright (C) 1996-2006, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 *******************************************************************************
6 */

7 package com.ibm.icu.text;
8
9 /**
10  * <p>A <code>CollationKey</code> represents a <code>String</code>
11  * under the rules of a specific <code>Collator</code>
12  * object. Comparing two <code>CollationKey</code>s returns the
13  * relative order of the <code>String</code>s they represent.</p>
14  *
15  * <p>Since the rule set of <code>Collator</code>s can differ, the
16  * sort orders of the same string under two different
17  * <code>Collator</code>s might differ. Hence comparing
18  * <code>CollationKey</code>s generated from different
19  * <code>Collator</code>s can give incorrect results.</p>
20  
21  * <p>Both the method
22  * <code>CollationKey.compareTo(CollationKey)</code> and the method
23  * <code>Collator.compare(String, String)</code> compare two strings
24  * and returns their relative order. The performance characterictics
25  * of these two approaches can differ.</p>
26  *
27  * <p>During the construction of a <code>CollationKey</code>, the
28  * entire source string is examined and processed into a series of
29  * bits terminated by a null, that are stored in the <code>CollationKey</code>.
30  * When <code>CollationKey.compareTo(CollationKey)</code> executes, it
31  * performs bitwise comparison on the bit sequences. This can incurs
32  * startup cost when creating the <code>CollationKey</code>, but once
33  * the key is created, binary comparisons are fast. This approach is
34  * recommended when the same strings are to be compared over and over
35  * again.</p>
36  *
37  * <p>On the other hand, implementations of
38  * <code>Collator.compare(String, String)</code> can examine and
39  * process the strings only until the first characters differing in
40  * order. This approach is recommended if the strings are to be
41  * compared only once.</p>
42  *
43  * <p>More information about the composition of the bit sequence can
44  * be found in the
45  * <a HREF="http://icu.sourceforge.net/userguide/Collate_ServiceArchitecture.html">
46  * user guide</a>.</p>
47  *
48  * <p>The following example shows how <code>CollationKey</code>s can be used
49  * to sort a list of <code>String</code>s.</p>
50  * <blockquote>
51  * <pre>
52  * // Create an array of CollationKeys for the Strings to be sorted.
53  * Collator myCollator = Collator.getInstance();
54  * CollationKey[] keys = new CollationKey[3];
55  * keys[0] = myCollator.getCollationKey("Tom");
56  * keys[1] = myCollator.getCollationKey("Dick");
57  * keys[2] = myCollator.getCollationKey("Harry");
58  * sort( keys );
59  * <br>
60  * //...
61  * <br>
62  * // Inside body of sort routine, compare keys this way
63  * if( keys[i].compareTo( keys[j] ) > 0 )
64  * // swap keys[i] and keys[j]
65  * <br>
66  * //...
67  * <br>
68  * // Finally, when we've returned from sort.
69  * System.out.println( keys[0].getSourceString() );
70  * System.out.println( keys[1].getSourceString() );
71  * System.out.println( keys[2].getSourceString() );
72  * </pre>
73  * </blockquote>
74  * </p>
75  * <p>
76  * This class is not subclassable
77  * </p>
78  * @see Collator
79  * @see RuleBasedCollator
80  * @author Syn Wee Quek
81  * @stable ICU 2.8
82  */

83 public final class CollationKey implements Comparable JavaDoc
84 {
85     // public inner classes -------------------------------------------------
86

87     /**
88      * Options that used in the API CollationKey.getBound() for getting a
89      * CollationKey based on the bound mode requested.
90      * @stable ICU 2.6
91      */

92     public static final class BoundMode
93     {
94         /*
95          * do not change the values assigned to the members of this enum.
96          * Underlying code depends on them having these numbers
97          */

98          
99         /**
100          * Lower bound
101          * @stable ICU 2.6
102          */

103         public static final int LOWER = 0;
104
105         /**
106          * Upper bound that will match strings of exact size
107          * @stable ICU 2.6
108          */

109         public static final int UPPER = 1;
110
111         /**
112          * Upper bound that will match all the strings that have the same
113          * initial substring as the given string
114          * @stable ICU 2.6
115          */

116         public static final int UPPER_LONG = 2;
117
118         /**
119          * Number of bound mode
120          * @stable ICU 2.6
121          */

122         public static final int COUNT = 3;
123         
124         /**
125          * Private Constructor
126          */

127         ///CLOVER:OFF
128
private BoundMode(){}
129         ///CLOVER:ON
130
}
131     
132     // public constructor ---------------------------------------------------
133

134     /**
135      * CollationKey constructor.
136      * This constructor is given public access, unlike the JDK version, to
137      * allow access to users extending the Collator class. See
138      * {@link Collator#getCollationKey(String)}.
139      * @param source string this CollationKey is to represent
140      * @param key array of bytes that represent the collation order of argument
141      * source terminated by a null
142      * @see Collator
143      * @stable ICU 2.8
144      */

145     public CollationKey(String JavaDoc source, byte key[])
146     {
147         m_source_ = source;
148         m_key_ = key;
149         m_hashCode_ = 0;
150         m_length_ = -1;
151     }
152     
153     /**
154      * CollationKey constructor that forces key to release its internal byte
155      * array for adoption. key will have a null byte array after this
156      * construction.
157      * @param source string this CollationKey is to represent
158      * @param key RawCollationKey object that represents the collation order of
159      * argument source.
160      * @see Collator
161      * @see RawCollationKey
162      * @stable ICU 2.8
163      */

164     public CollationKey(String JavaDoc source, RawCollationKey key)
165     {
166         m_source_ = source;
167         m_key_ = key.releaseBytes();
168         m_hashCode_ = 0;
169         m_length_ = -1;
170     }
171     
172     // public getters -------------------------------------------------------
173

174     /**
175      * Return the source string that this CollationKey represents.
176      * @return source string that this CollationKey represents
177      * @stable ICU 2.8
178      */

179     public String JavaDoc getSourceString()
180     {
181         return m_source_;
182     }
183
184     /**
185      * <p>Duplicates and returns the value of this CollationKey as a sequence
186      * of big-endian bytes terminated by a null.</p>
187      *
188      * <p>If two CollationKeys can be legitimately compared, then one can
189      * compare the byte arrays of each to obtain the same result, e.g.
190      * <pre>
191      * byte key1[] = collationkey1.toByteArray();
192      * byte key2[] = collationkey2.toByteArray();
193      * int key, targetkey;
194      * int i = 0;
195      * do {
196      * key = key1[i] & 0xFF;
197      * targetkey = key2[i] & 0xFF;
198      * if (key &lt; targetkey) {
199      * System.out.println("String 1 is less than string 2");
200      * return;
201      * }
202      * if (targetkey &lt; key) {
203      * System.out.println("String 1 is more than string 2");
204      * }
205      * i ++;
206      * } while (key != 0 && targetKey != 0);
207      *
208      * System.out.println("Strings are equal.");
209      * </pre>
210      * </p>
211      * @return CollationKey value in a sequence of big-endian byte bytes
212      * terminated by a null.
213      * @stable ICU 2.8
214      */

215     public byte[] toByteArray()
216     {
217         int length = 0;
218         while (true) {
219             if (m_key_[length] == 0) {
220               break;
221             }
222             length ++;
223         }
224         length ++;
225         byte result[] = new byte[length];
226         System.arraycopy(m_key_, 0, result, 0, length);
227         return result;
228     }
229
230     // public other methods -------------------------------------------------
231

232     /**
233      * <p>Compare this CollationKey to another CollationKey. The
234      * collation rules of the Collator that created this key are
235      * applied.</p>
236      *
237      * <p><strong>Note:</strong> Comparison between CollationKeys
238      * created by different Collators might return incorrect
239      * results. See class documentation.</p>
240      *
241      * @param target target CollationKey
242      * @return an integer value. If the value is less than zero this CollationKey
243      * is less than than target, if the value is zero they are equal, and
244      * if the value is greater than zero this CollationKey is greater
245      * than target.
246      * @exception NullPointerException is thrown if argument is null.
247      * @see Collator#compare(String, String)
248      * @stable ICU 2.8
249      */

250     public int compareTo(CollationKey target)
251     {
252     for (int i = 0;; ++i) {
253         int l = m_key_[i]&0xff;
254         int r = target.m_key_[i]&0xff;
255         if (l < r) {
256         return -1;
257         } else if (l > r) {
258         return 1;
259         } else if (l == 0) {
260         return 0;
261         }
262     }
263     }
264
265     /**
266      * <p>Compare this CollationKey with the specified Object. The
267      * collation rules of the Collator that created this key are
268      * applied.</p>
269      *
270      * <p>See note in compareTo(CollationKey) for warnings about possible
271      * incorrect results.</p>
272      *
273      * @param obj the Object to be compared to.
274      * @return Returns a negative integer, zero, or a positive integer
275      * respectively if this CollationKey is less than, equal to, or
276      * greater than the given Object.
277      * @exception ClassCastException is thrown when the argument is not
278      * a CollationKey. NullPointerException is thrown when the argument
279      * is null.
280      * @see #compareTo(CollationKey)
281      * @stable ICU 2.8
282      */

283     public int compareTo(Object JavaDoc obj)
284     {
285        return compareTo((CollationKey)obj);
286     }
287
288     /**
289      * <p>Compare this CollationKey and the specified Object for
290      * equality. The collation rules of the Collator that created
291      * this key are applied.</p>
292      *
293      * <p>See note in compareTo(CollationKey) for warnings about
294      * possible incorrect results.</p>
295      *
296      * @param target the object to compare to.
297      * @return true if the two keys compare as equal, false otherwise.
298      * @see #compareTo(CollationKey)
299      * @exception ClassCastException is thrown when the argument is not
300      * a CollationKey. NullPointerException is thrown when the argument
301      * is null.
302      * @stable ICU 2.8
303      */

304     public boolean equals(Object JavaDoc target)
305     {
306         if (!(target instanceof CollationKey)) {
307             return false;
308         }
309         
310         return equals((CollationKey)target);
311     }
312     
313     /**
314      * <p>
315      * Compare this CollationKey and the argument target CollationKey for
316      * equality.
317      * The collation
318      * rules of the Collator object which created these objects are applied.
319      * </p>
320      * <p>
321      * See note in compareTo(CollationKey) for warnings of incorrect results
322      * </p>
323      * @param target the CollationKey to compare to.
324      * @return true if two objects are equal, false otherwise.
325      * @exception NullPointerException is thrown when the argument is null.
326      * @stable ICU 2.8
327      */

328     public boolean equals(CollationKey target)
329     {
330         if (this == target) {
331             return true;
332         }
333         if (target == null) {
334             return false;
335         }
336         CollationKey other = (CollationKey)target;
337         int i = 0;
338         while (true) {
339             if (m_key_[i] != other.m_key_[i]) {
340                 return false;
341             }
342             if (m_key_[i] == 0) {
343               break;
344             }
345             i ++;
346         }
347         return true;
348     }
349
350     /**
351      * <p>Returns a hash code for this CollationKey. The hash value is calculated
352      * on the key itself, not the String from which the key was created. Thus
353      * if x and y are CollationKeys, then x.hashCode(x) == y.hashCode()
354      * if x.equals(y) is true. This allows language-sensitive comparison in a
355      * hash table.
356      * </p>
357      * @return the hash value.
358      * @stable ICU 2.8
359      */

360     public int hashCode()
361     {
362         if (m_hashCode_ == 0) {
363             if (m_key_ == null) {
364                 m_hashCode_ = 1;
365             }
366             else {
367                 int size = m_key_.length >> 1;
368                 StringBuffer JavaDoc key = new StringBuffer JavaDoc(size);
369                 int i = 0;
370                 while (m_key_[i] != 0 && m_key_[i + 1] != 0) {
371                     key.append((char)((m_key_[i] << 8) | m_key_[i + 1]));
372                     i += 2;
373                 }
374                 if (m_key_[i] != 0) {
375                     key.append((char)(m_key_[i] << 8));
376                 }
377                 m_hashCode_ = key.toString().hashCode();
378             }
379         }
380         return m_hashCode_;
381     }
382     
383     /**
384      * <p>
385      * Produce a bound for the sort order of a given collation key and a
386      * strength level. This API does not attempt to find a bound for the
387      * CollationKey String representation, hence null will be returned in its
388      * place.
389      * </p>
390      * <p>
391      * Resulting bounds can be used to produce a range of strings that are
392      * between upper and lower bounds. For example, if bounds are produced
393      * for a sortkey of string "smith", strings between upper and lower
394      * bounds with primary strength would include "Smith", "SMITH", "sMiTh".
395      * </p>
396      * <p>
397      * There are two upper bounds that can be produced. If BoundMode.UPPER
398      * is produced, strings matched would be as above. However, if a bound
399      * is produced using BoundMode.UPPER_LONG is used, the above example will
400      * also match "Smithsonian" and similar.
401      * </p>
402      * <p>
403      * For more on usage, see example in test procedure
404      * <a HREF="http://dev.icu-project.org/cgi-bin/viewcvs.cgi/~checkout~/icu4j/src/com/ibm/icu/dev/test/collator/CollationAPITest.java">
405      * src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds.
406      * </a>
407      * </p>
408      * <p>
409      * Collation keys produced may be compared using the <TT>compare</TT> API.
410      * </p>
411      * @param boundType Mode of bound required. It can be BoundMode.LOWER, which
412      * produces a lower inclusive bound, BoundMode.UPPER, that
413      * produces upper bound that matches strings of the same
414      * length or BoundMode.UPPER_LONG that matches strings that
415      * have the same starting substring as the source string.
416      * @param noOfLevels Strength levels required in the resulting bound
417      * (for most uses, the recommended value is PRIMARY). This
418      * strength should be less than the maximum strength of
419      * this CollationKey.
420      * See users guide for explanation on the strength levels a
421      * collation key can have.
422      * @return the result bounded CollationKey with a valid sort order but
423      * a null String representation.
424      * @exception IllegalArgumentException thrown when the strength level
425      * requested is higher than or equal to the strength in this
426      * CollationKey.
427      * In the case of an Exception, information
428      * about the maximum strength to use will be returned in the
429      * Exception. The user can then call getBound() again with the
430      * appropriate strength.
431      * @see CollationKey
432      * @see CollationKey.BoundMode
433      * @see Collator#PRIMARY
434      * @see Collator#SECONDARY
435      * @see Collator#TERTIARY
436      * @see Collator#QUATERNARY
437      * @see Collator#IDENTICAL
438      * @stable ICU 2.6
439      */

440     public CollationKey getBound(int boundType, int noOfLevels)
441     {
442         // Scan the string until we skip enough of the key OR reach the end of
443
// the key
444
int offset = 0;
445         int keystrength = Collator.PRIMARY;
446         
447         if (noOfLevels > Collator.PRIMARY) {
448             while (offset < m_key_.length && m_key_[offset] != 0) {
449                 if (m_key_[offset ++]
450                         == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) {
451                     keystrength ++;
452                     noOfLevels --;
453                     if (noOfLevels == Collator.PRIMARY
454                         || offset == m_key_.length || m_key_[offset] == 0) {
455                         offset --;
456                         break;
457                     }
458                 }
459             }
460         }
461         
462         if (noOfLevels > 0) {
463             throw new IllegalArgumentException JavaDoc(
464                                   "Source collation key has only "
465                                   + keystrength
466                                   + " strength level. Call getBound() again "
467                                   + " with noOfLevels < " + keystrength);
468         }
469         
470         // READ ME: this code assumes that the values for BoundMode variables
471
// will not changes. They are set so that the enum value corresponds to
472
// the number of extra bytes each bound type needs.
473
byte resultkey[] = new byte[offset + boundType + 1];
474         System.arraycopy(m_key_, 0, resultkey, 0, offset);
475         switch (boundType) {
476             case BoundMode.LOWER: // = 0
477
// Lower bound just gets terminated. No extra bytes
478
break;
479             case BoundMode.UPPER: // = 1
480
// Upper bound needs one extra byte
481
resultkey[offset ++] = 2;
482                 break;
483             case BoundMode.UPPER_LONG: // = 2
484
// Upper long bound needs two extra bytes
485
resultkey[offset ++] = (byte)0xFF;
486                 resultkey[offset ++] = (byte)0xFF;
487                 break;
488             default:
489                 throw new IllegalArgumentException JavaDoc(
490                                                 "Illegal boundType argument");
491         }
492         resultkey[offset ++] = 0;
493         return new CollationKey(null, resultkey);
494     }
495
496
497     
498     /**
499      * <p>
500      * Merges this CollationKey with another. Only the sorting order of the
501      * CollationKeys will be merged. This API does not attempt to merge the
502      * String representations of the CollationKeys, hence null will be returned
503      * as the String representation.
504      * </p>
505      * <p>
506      * The strength levels are merged with their corresponding counterparts
507      * (PRIMARIES with PRIMARIES, SECONDARIES with SECONDARIES etc.).
508      * </p>
509      * <p>
510      * The merged String representation of the result CollationKey will be a
511      * concatenation of the String representations of the 2 source
512      * CollationKeys.
513      * </p>
514      * <p>
515      * Between the values from the same level a separator is inserted.
516      * example (uncompressed):
517      * <pre>
518      * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00
519      * will be merged as
520      * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00
521      * </pre>
522      * </p>
523      * <p>
524      * This allows for concatenating of first and last names for sorting, among
525      * other things.
526      * </p>
527      * </p>
528      * @param source CollationKey to merge with
529      * @return a CollationKey that contains the valid merged sorting order
530      * with a null String representation,
531      * i.e. <tt>new CollationKey(null, merge_sort_order)</tt>
532      * @exception IllegalArgumentException thrown if source CollationKey
533      * argument is null or of 0 length.
534      * @stable ICU 2.6
535      */

536     public CollationKey merge(CollationKey source)
537     {
538         // check arguments
539
if (source == null || source.getLength() == 0) {
540             throw new IllegalArgumentException JavaDoc(
541                       "CollationKey argument can not be null or of 0 length");
542         }
543     
544         getLength(); // gets the length of this sort key
545
int sourcelength = source.getLength();
546         // 1 extra for the last strength that has no seperators
547
byte result[] = new byte[m_length_ + sourcelength + 2];
548     
549         // merge the sort keys with the same number of levels
550
int rindex = 0;
551         int index = 0;
552         int sourceindex = 0;
553         while (true) {
554             // while both have another level
555
// copy level from src1 not including 00 or 01
556
// unsigned issues
557
while (m_key_[index] < 0 || m_key_[index] >= MERGE_SEPERATOR_) {
558                 result[rindex ++] = m_key_[index ++];
559             }
560     
561             // add a 02 merge separator
562
result[rindex ++] = MERGE_SEPERATOR_;
563     
564             // copy level from src2 not including 00 or 01
565
while (source.m_key_[sourceindex] < 0
566                    || source.m_key_[sourceindex] >= MERGE_SEPERATOR_) {
567                 result[rindex ++] = source.m_key_[sourceindex ++];
568             }
569     
570             // if both sort keys have another level, then add a 01 level
571
// separator and continue
572
if (m_key_[index] == RuleBasedCollator.SORT_LEVEL_TERMINATOR_
573                 && source.m_key_[sourceindex]
574                         == RuleBasedCollator.SORT_LEVEL_TERMINATOR_) {
575                 ++ index;
576                 ++ sourceindex;
577                 result[rindex ++] = RuleBasedCollator.SORT_LEVEL_TERMINATOR_;
578             }
579             else {
580                 break;
581             }
582         }
583     
584         // here, at least one sort key is finished now, but the other one
585
// might have some contents left from containing more levels;
586
// that contents is just appended to the result
587
if (m_key_[index] != 0) {
588             System.arraycopy(m_key_, index, result, rindex,
589                              m_length_ - index);
590         }
591         else if (source.m_key_[sourceindex] != 0) {
592             System.arraycopy(source.m_key_, sourceindex, result, rindex,
593                              source.m_length_ - sourceindex);
594         }
595         result[result.length - 1] = 0;
596     
597         // trust that neither sort key contained illegally embedded zero bytes
598
return new CollationKey(null, result);
599     }
600
601     // private data members -------------------------------------------------
602

603     /**
604      * Sequence of bytes that represents the sort key
605      */

606     private byte m_key_[];
607     
608     /**
609      * Source string this CollationKey represents
610      */

611     private String JavaDoc m_source_;
612
613     /**
614      * Hash code for the key
615      */

616     private int m_hashCode_;
617     /**
618      * Gets the length of this CollationKey
619      */

620     private int m_length_;
621     /**
622      * Collation key merge seperator
623      */

624     private static final int MERGE_SEPERATOR_ = 2;
625     
626     // private methods ------------------------------------------------------
627

628     /**
629      * Gets the length of the CollationKey
630      * @return length of the CollationKey
631      */

632     private int getLength()
633     {
634         if (m_length_ >= 0) {
635             return m_length_;
636         }
637         int length = m_key_.length;
638         for (int index = 0; index < length; index ++) {
639             if (m_key_[index] == 0) {
640                 length = index;
641                 break;
642             }
643         }
644         m_length_ = length;
645         return m_length_;
646     }
647 }
648
Popular Tags