KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > java > text > CollationRules


1 /*
2  * @(#)CollationRules.java 1.26 03/12/19
3  *
4  * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7
8 /*
9  * (C) Copyright Taligent, Inc. 1996,1997 - All Rights Reserved
10  * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
11  *
12  * The original version of this source code and documentation is copyrighted
13  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
14  * materials are provided under terms of a License Agreement between Taligent
15  * and Sun. This technology is protected by multiple US and International
16  * patents. This notice and attribution to Taligent may not be removed.
17  * Taligent is a registered trademark of Taligent, Inc.
18  *
19  */

20
21 package java.text;
22 /**
23  * CollationRules contains the default en_US collation rules as a base
24  * for building other collation tables.
25  * <p>Note that decompositions are done before these rules are used,
26  * so they do not have to contain accented characters, such as A-grave.
27  * @see RuleBasedCollator
28  * @see LocaleElements
29  * @version 1.26, 12/19/03
30  * @author Helena Shih, Mark Davis
31  */

32 final class CollationRules {
33     final static String JavaDoc DEFAULTRULES = new String JavaDoc(
34         "" // no FRENCH accent order by default, add in French Delta
35
// IGNORABLES (up to first < character)
36
// COMPLETELY IGNORE format characters
37
+ "='\u200B'=\u200C=\u200D=\u200E=\u200F"
38         // Control Characters
39
+ "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot
40
+ "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ...
41
+ "='\u000b' =\u000e" //vt,, so
42
+ "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc3
43
+ "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can
44
+ "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs
45
+ "=\u001e =\u001f =\u007f" //rs, us, del
46
//....then the C1 Latin 1 reserved control codes
47
+ "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085"
48         + "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b"
49         + "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091"
50         + "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097"
51         + "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d"
52         + "=\u009e =\u009f"
53         // IGNORE except for secondary, tertiary difference
54
// Spaces
55
+ ";'\u0020';'\u00A0'" // spaces
56
+ ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'" // spaces
57
+ ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'" // spaces
58
+ ";'\u200A';'\u3000';'\uFEFF'" // spaces
59
+ ";'\r' ;'\t' ;'\n';'\f';'\u000b'" // whitespace
60

61         // Non-spacing accents
62

63         + ";\u0301" // non-spacing acute accent
64
+ ";\u0300" // non-spacing grave accent
65
+ ";\u0306" // non-spacing breve accent
66
+ ";\u0302" // non-spacing circumflex accent
67
+ ";\u030c" // non-spacing caron/hacek accent
68
+ ";\u030a" // non-spacing ring above accent
69
+ ";\u030d" // non-spacing vertical line above
70
+ ";\u0308" // non-spacing diaeresis accent
71
+ ";\u030b" // non-spacing double acute accent
72
+ ";\u0303" // non-spacing tilde accent
73
+ ";\u0307" // non-spacing dot above/overdot accent
74
+ ";\u0304" // non-spacing macron accent
75
+ ";\u0337" // non-spacing short slash overlay (overstruck diacritic)
76
+ ";\u0327" // non-spacing cedilla accent
77
+ ";\u0328" // non-spacing ogonek accent
78
+ ";\u0323" // non-spacing dot-below/underdot accent
79
+ ";\u0332" // non-spacing underscore/underline accent
80
// with the rest of the general diacritical marks in binary order
81
+ ";\u0305" // non-spacing overscore/overline
82
+ ";\u0309" // non-spacing hook above
83
+ ";\u030e" // non-spacing double vertical line above
84
+ ";\u030f" // non-spacing double grave
85
+ ";\u0310" // non-spacing chandrabindu
86
+ ";\u0311" // non-spacing inverted breve
87
+ ";\u0312" // non-spacing turned comma above/cedilla above
88
+ ";\u0313" // non-spacing comma above
89
+ ";\u0314" // non-spacing reversed comma above
90
+ ";\u0315" // non-spacing comma above right
91
+ ";\u0316" // non-spacing grave below
92
+ ";\u0317" // non-spacing acute below
93
+ ";\u0318" // non-spacing left tack below
94
+ ";\u0319" // non-spacing tack below
95
+ ";\u031a" // non-spacing left angle above
96
+ ";\u031b" // non-spacing horn
97
+ ";\u031c" // non-spacing left half ring below
98
+ ";\u031d" // non-spacing up tack below
99
+ ";\u031e" // non-spacing down tack below
100
+ ";\u031f" // non-spacing plus sign below
101
+ ";\u0320" // non-spacing minus sign below
102
+ ";\u0321" // non-spacing palatalized hook below
103
+ ";\u0322" // non-spacing retroflex hook below
104
+ ";\u0324" // non-spacing double dot below
105
+ ";\u0325" // non-spacing ring below
106
+ ";\u0326" // non-spacing comma below
107
+ ";\u0329" // non-spacing vertical line below
108
+ ";\u032a" // non-spacing bridge below
109
+ ";\u032b" // non-spacing inverted double arch below
110
+ ";\u032c" // non-spacing hacek below
111
+ ";\u032d" // non-spacing circumflex below
112
+ ";\u032e" // non-spacing breve below
113
+ ";\u032f" // non-spacing inverted breve below
114
+ ";\u0330" // non-spacing tilde below
115
+ ";\u0331" // non-spacing macron below
116
+ ";\u0333" // non-spacing double underscore
117
+ ";\u0334" // non-spacing tilde overlay
118
+ ";\u0335" // non-spacing short bar overlay
119
+ ";\u0336" // non-spacing long bar overlay
120
+ ";\u0338" // non-spacing long slash overlay
121
+ ";\u0339" // non-spacing right half ring below
122
+ ";\u033a" // non-spacing inverted bridge below
123
+ ";\u033b" // non-spacing square below
124
+ ";\u033c" // non-spacing seagull below
125
+ ";\u033d" // non-spacing x above
126
+ ";\u033e" // non-spacing vertical tilde
127
+ ";\u033f" // non-spacing double overscore
128
//+ ";\u0340" // non-spacing grave tone mark == \u0300
129
//+ ";\u0341" // non-spacing acute tone mark == \u0301
130
+ ";\u0342;"
131     //+ "\u0343;" // == \u0313
132
+ "\u0344;\u0345;\u0360;\u0361" // newer
133
+ ";\u0483;\u0484;\u0485;\u0486" // Cyrillic accents
134

135         + ";\u20D0;\u20D1;\u20D2" // symbol accents
136
+ ";\u20D3;\u20D4;\u20D5" // symbol accents
137
+ ";\u20D6;\u20D7;\u20D8" // symbol accents
138
+ ";\u20D9;\u20DA;\u20DB" // symbol accents
139
+ ";\u20DC;\u20DD;\u20DE" // symbol accents
140
+ ";\u20DF;\u20E0;\u20E1" // symbol accents
141

142         + ",'\u002D';\u00AD" // dashes
143
+ ";\u2010;\u2011;\u2012" // dashes
144
+ ";\u2013;\u2014;\u2015" // dashes
145
+ ";\u2212" // dashes
146

147         // other punctuation
148

149         + "<'\u005f'" // underline/underscore (spacing)
150
+ "<\u00af" // overline or macron (spacing)
151
+ "<'\u002c'" // comma (spacing)
152
+ "<'\u003b'" // semicolon
153
+ "<'\u003a'" // colon
154
+ "<'\u0021'" // exclamation point
155
+ "<\u00a1" // inverted exclamation point
156
+ "<'\u003f'" // question mark
157
+ "<\u00bf" // inverted question mark
158
+ "<'\u002f'" // slash
159
+ "<'\u002e'" // period/full stop
160
+ "<\u00b4" // acute accent (spacing)
161
+ "<'\u0060'" // grave accent (spacing)
162
+ "<'\u005e'" // circumflex accent (spacing)
163
+ "<\u00a8" // diaresis/umlaut accent (spacing)
164
+ "<'\u007e'" // tilde accent (spacing)
165
+ "<\u00b7" // middle dot (spacing)
166
+ "<\u00b8" // cedilla accent (spacing)
167
+ "<'\u0027'" // apostrophe
168
+ "<'\"'" // quotation marks
169
+ "<\u00ab" // left angle quotes
170
+ "<\u00bb" // right angle quotes
171
+ "<'\u0028'" // left parenthesis
172
+ "<'\u0029'" // right parenthesis
173
+ "<'\u005b'" // left bracket
174
+ "<'\u005d'" // right bracket
175
+ "<'\u007b'" // left brace
176
+ "<'\u007d'" // right brace
177
+ "<\u00a7" // section symbol
178
+ "<\u00b6" // paragraph symbol
179
+ "<\u00a9" // copyright symbol
180
+ "<\u00ae" // registered trademark symbol
181
+ "<'\u0040'" // at sign
182
+ "<\u00a4" // international currency symbol
183
+ "<\u0e3f" // baht sign
184
+ "<\u00a2" // cent sign
185
+ "<\u20a1" // colon sign
186
+ "<\u20a2" // cruzeiro sign
187
+ "<'\u0024'" // dollar sign
188
+ "<\u20ab" // dong sign
189
+ "<\u20ac" // euro sign
190
+ "<\u20a3" // franc sign
191
+ "<\u20a4" // lira sign
192
+ "<\u20a5" // mill sign
193
+ "<\u20a6" // naira sign
194
+ "<\u20a7" // peseta sign
195
+ "<\u00a3" // pound-sterling sign
196
+ "<\u20a8" // rupee sign
197
+ "<\u20aa" // new shekel sign
198
+ "<\u20a9" // won sign
199
+ "<\u00a5" // yen sign
200
+ "<'\u002a'" // asterisk
201
+ "<'\\'" // backslash
202
+ "<'\u0026'" // ampersand
203
+ "<'\u0023'" // number sign
204
+ "<'\u0025'" // percent sign
205
+ "<'\u002b'" // plus sign
206
+ "<\u00b1" // plus-or-minus sign
207
+ "<\u00f7" // divide sign
208
+ "<\u00d7" // multiply sign
209
+ "<'\u003c'" // less-than sign
210
+ "<'\u003d'" // equal sign
211
+ "<'\u003e'" // greater-than sign
212
+ "<\u00ac" // end of line symbol/logical NOT symbol
213
+ "<'\u007c'" // vertical line/logical OR symbol
214
+ "<\u00a6" // broken vertical line
215
+ "<\u00b0" // degree symbol
216
+ "<\u00b5" // micro symbol
217

218         // NUMERICS
219

220         + "<0<1<2<3<4<5<6<7<8<9"
221         + "<\u00bc<\u00bd<\u00be" // 1/4,1/2,3/4 fractions
222

223         // NON-IGNORABLES
224
+ "<a,A"
225         + "<b,B"
226         + "<c,C"
227         + "<d,D"
228         + "<\u00F0,\u00D0" // eth
229
+ "<e,E"
230         + "<f,F"
231         + "<g,G"
232         + "<h,H"
233         + "<i,I"
234         + "<j,J"
235         + "<k,K"
236         + "<l,L"
237         + "<m,M"
238         + "<n,N"
239         + "<o,O"
240         + "<p,P"
241         + "<q,Q"
242         + "<r,R"
243         + "<s, S & SS,\u00DF" // s-zet
244
+ "<t,T"
245         + "& TH, \u00DE &TH, \u00FE " // thorn
246
+ "<u,U"
247         + "<v,V"
248         + "<w,W"
249         + "<x,X"
250         + "<y,Y"
251         + "<z,Z"
252         + "&AE,\u00C6" // ae & AE ligature
253
+ "&AE,\u00E6"
254         + "&OE,\u0152" // oe & OE ligature
255
+ "&OE,\u0153"
256     );
257 }
258
Popular Tags