KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > text > RuleBasedNumberFormat


1 //##header 1189099963000 FOUNDATION
2
/*
3  *******************************************************************************
4  * Copyright (C) 1996-2006, International Business Machines Corporation and *
5  * others. All Rights Reserved. *
6  *******************************************************************************
7  */

8
9 package com.ibm.icu.text;
10
11 //import com.ibm.icu.impl.ICULocaleData;
12
import com.ibm.icu.impl.ICUDebug;
13 import com.ibm.icu.impl.ICUResourceBundle;
14 import com.ibm.icu.impl.UCharacterProperty;
15 import com.ibm.icu.impl.Utility;
16 import com.ibm.icu.util.ULocale;
17 import com.ibm.icu.util.UResourceBundle;
18
19 import java.math.BigInteger JavaDoc;
20 import java.text.FieldPosition JavaDoc;
21 import java.text.ParsePosition JavaDoc;
22 import java.util.Arrays JavaDoc;
23 import java.util.HashMap JavaDoc;
24 import java.util.Locale JavaDoc;
25 import java.util.Map JavaDoc;
26 import java.util.MissingResourceException JavaDoc;
27 import java.util.Set JavaDoc;
28
29 //import java.util.ResourceBundle;
30

31
32 /**
33  * <p>A class that formats numbers according to a set of rules. This number formatter is
34  * typically used for spelling out numeric values in words (e.g., 25,3476 as
35  * &quot;twenty-five thousand three hundred seventy-six&quot; or &quot;vingt-cinq mille trois
36  * cents soixante-seize&quot; or
37  * &quot;funfundzwanzigtausenddreihundertsechsundsiebzig&quot;), but can also be used for
38  * other complicated formatting tasks, such as formatting a number of seconds as hours,
39  * minutes and seconds (e.g., 3,730 as &quot;1:02:10&quot;).</p>
40  *
41  * <p>The resources contain three predefined formatters for each locale: spellout, which
42  * spells out a value in words (123 is &quot;one hundred twenty-three&quot;); ordinal, which
43  * appends an ordinal suffix to the end of a numeral (123 is &quot;123rd&quot;); and
44  * duration, which shows a duration in seconds as hours, minutes, and seconds (123 is
45  * &quot;2:03&quot;).&nbsp; The client can also define more specialized <tt>RuleBasedNumberFormat</tt>s
46  * by supplying programmer-defined rule sets.</p>
47  *
48  * <p>The behavior of a <tt>RuleBasedNumberFormat</tt> is specified by a textual description
49  * that is either passed to the constructor as a <tt>String</tt> or loaded from a resource
50  * bundle. In its simplest form, the description consists of a semicolon-delimited list of <em>rules.</em>
51  * Each rule has a string of output text and a value or range of values it is applicable to.
52  * In a typical spellout rule set, the first twenty rules are the words for the numbers from
53  * 0 to 19:</p>
54  *
55  * <pre>zero; one; two; three; four; five; six; seven; eight; nine;
56  * ten; eleven; twelve; thirteen; fourteen; fifteen; sixteen; seventeen; eighteen; nineteen;</pre>
57  *
58  * <p>For larger numbers, we can use the preceding set of rules to format the ones place, and
59  * we only have to supply the words for the multiples of 10:</p>
60  *
61  * <pre>20: twenty[-&gt;&gt;];
62  * 30: thirty{-&gt;&gt;];
63  * 40: forty[-&gt;&gt;];
64  * 50: fifty[-&gt;&gt;];
65  * 60: sixty[-&gt;&gt;];
66  * 70: seventy[-&gt;&gt;];
67  * 80: eighty[-&gt;&gt;];
68  * 90: ninety[-&gt;&gt;];</pre>
69  *
70  * <p>In these rules, the <em>base value</em> is spelled out explicitly and set off from the
71  * rule's output text with a colon. The rules are in a sorted list, and a rule is applicable
72  * to all numbers from its own base value to one less than the next rule's base value. The
73  * &quot;&gt;&gt;&quot; token is called a <em>substitution</em> and tells the fomatter to
74  * isolate the number's ones digit, format it using this same set of rules, and place the
75  * result at the position of the &quot;&gt;&gt;&quot; token. Text in brackets is omitted if
76  * the number being formatted is an even multiple of 10 (the hyphen is a literal hyphen; 24
77  * is &quot;twenty-four,&quot; not &quot;twenty four&quot;).</p>
78  *
79  * <p>For even larger numbers, we can actually look up several parts of the number in the
80  * list:</p>
81  *
82  * <pre>100: &lt;&lt; hundred[ &gt;&gt;];</pre>
83  *
84  * <p>The &quot;&lt;&lt;&quot; represents a new kind of substitution. The &lt;&lt; isolates
85  * the hundreds digit (and any digits to its left), formats it using this same rule set, and
86  * places the result where the &quot;&lt;&lt;&quot; was. Notice also that the meaning of
87  * &gt;&gt; has changed: it now refers to both the tens and the ones digits. The meaning of
88  * both substitutions depends on the rule's base value. The base value determines the rule's <em>divisor,</em>
89  * which is the highest power of 10 that is less than or equal to the base value (the user
90  * can change this). To fill in the substitutions, the formatter divides the number being
91  * formatted by the divisor. The integral quotient is used to fill in the &lt;&lt;
92  * substitution, and the remainder is used to fill in the &gt;&gt; substitution. The meaning
93  * of the brackets changes similarly: text in brackets is omitted if the value being
94  * formatted is an even multiple of the rule's divisor. The rules are applied recursively, so
95  * if a substitution is filled in with text that includes another substitution, that
96  * substitution is also filled in.</p>
97  *
98  * <p>This rule covers values up to 999, at which point we add another rule:</p>
99  *
100  * <pre>1000: &lt;&lt; thousand[ &gt;&gt;];</pre>
101  *
102  * <p>Again, the meanings of the brackets and substitution tokens shift because the rule's
103  * base value is a higher power of 10, changing the rule's divisor. This rule can actually be
104  * used all the way up to 999,999. This allows us to finish out the rules as follows:</p>
105  *
106  * <pre>1,000,000: &lt;&lt; million[ &gt;&gt;];
107  * 1,000,000,000: &lt;&lt; billion[ &gt;&gt;];
108  * 1,000,000,000,000: &lt;&lt; trillion[ &gt;&gt;];
109  * 1,000,000,000,000,000: OUT OF RANGE!;</pre>
110  *
111  * <p>Commas, periods, and spaces can be used in the base values to improve legibility and
112  * are ignored by the rule parser. The last rule in the list is customarily treated as an
113  * &quot;overflow rule,&quot; applying to everything from its base value on up, and often (as
114  * in this example) being used to print out an error message or default representation.
115  * Notice also that the size of the major groupings in large numbers is controlled by the
116  * spacing of the rules: because in English we group numbers by thousand, the higher rules
117  * are separated from each other by a factor of 1,000.</p>
118  *
119  * <p>To see how these rules actually work in practice, consider the following example:
120  * Formatting 25,430 with this rule set would work like this:</p>
121  *
122  * <table border="0" width="630">
123  * <tr>
124  * <td width="21"></td>
125  * <td width="257" valign="top"><strong>&lt;&lt; thousand &gt;&gt;</strong></td>
126  * <td width="340" valign="top">[the rule whose base value is 1,000 is applicable to 25,340]</td>
127  * </tr>
128  * <tr>
129  * <td width="21"></td>
130  * <td width="257" valign="top"><strong>twenty-&gt;&gt;</strong> thousand &gt;&gt;</td>
131  * <td width="340" valign="top">[25,340 over 1,000 is 25. The rule for 20 applies.]</td>
132  * </tr>
133  * <tr>
134  * <td width="21"></td>
135  * <td width="257" valign="top">twenty-<strong>five</strong> thousand &gt;&gt;</td>
136  * <td width="340" valign="top">[25 mod 10 is 5. The rule for 5 is &quot;five.&quot;</td>
137  * </tr>
138  * <tr>
139  * <td width="21"></td>
140  * <td width="257" valign="top">twenty-five thousand <strong>&lt;&lt; hundred &gt;&gt;</strong></td>
141  * <td width="340" valign="top">[25,340 mod 1,000 is 340. The rule for 100 applies.]</td>
142  * </tr>
143  * <tr>
144  * <td width="21"></td>
145  * <td width="257" valign="top">twenty-five thousand <strong>three</strong> hundred &gt;&gt;</td>
146  * <td width="340" valign="top">[340 over 100 is 3. The rule for 3 is &quot;three.&quot;]</td>
147  * </tr>
148  * <tr>
149  * <td width="21"></td>
150  * <td width="257" valign="top">twenty-five thousand three hundred <strong>forty</strong></td>
151  * <td width="340" valign="top">[340 mod 100 is 40. The rule for 40 applies. Since 40 divides
152  * evenly by 10, the hyphen and substitution in the brackets are omitted.]</td>
153  * </tr>
154  * </table>
155  *
156  * <p>The above syntax suffices only to format positive integers. To format negative numbers,
157  * we add a special rule:</p>
158  *
159  * <pre>-x: minus &gt;&gt;;</pre>
160  *
161  * <p>This is called a <em>negative-number rule,</em> and is identified by &quot;-x&quot;
162  * where the base value would be. This rule is used to format all negative numbers. the
163  * &gt;&gt; token here means &quot;find the number's absolute value, format it with these
164  * rules, and put the result here.&quot;</p>
165  *
166  * <p>We also add a special rule called a <em>fraction rule </em>for numbers with fractional
167  * parts:</p>
168  *
169  * <pre>x.x: &lt;&lt; point &gt;&gt;;</pre>
170  *
171  * <p>This rule is used for all positive non-integers (negative non-integers pass through the
172  * negative-number rule first and then through this rule). Here, the &lt;&lt; token refers to
173  * the number's integral part, and the &gt;&gt; to the number's fractional part. The
174  * fractional part is formatted as a series of single-digit numbers (e.g., 123.456 would be
175  * formatted as &quot;one hundred twenty-three point four five six&quot;).</p>
176  *
177  * <p>To see how this rule syntax is applied to various languages, examine the resource data.</p>
178  *
179  * <p>There is actually much more flexibility built into the rule language than the
180  * description above shows. A formatter may own multiple rule sets, which can be selected by
181  * the caller, and which can use each other to fill in their substitutions. Substitutions can
182  * also be filled in with digits, using a DecimalFormat object. There is syntax that can be
183  * used to alter a rule's divisor in various ways. And there is provision for much more
184  * flexible fraction handling. A complete description of the rule syntax follows:</p>
185  *
186  * <hr>
187  *
188  * <p>The description of a <tt>RuleBasedNumberFormat</tt>'s behavior consists of one or more <em>rule
189  * sets.</em> Each rule set consists of a name, a colon, and a list of <em>rules.</em> A rule
190  * set name must begin with a % sign. Rule sets with names that begin with a single % sign
191  * are <em>public:</em> the caller can specify that they be used to format and parse numbers.
192  * Rule sets with names that begin with %% are <em>private:</em> they exist only for the use
193  * of other rule sets. If a formatter only has one rule set, the name may be omitted.</p>
194  *
195  * <p>The user can also specify a special &quot;rule set&quot; named <tt>%%lenient-parse</tt>.
196  * The body of <tt>%%lenient-parse</tt> isn't a set of number-formatting rules, but a <tt>RuleBasedCollator</tt>
197  * description which is used to define equivalences for lenient parsing. For more information
198  * on the syntax, see <tt>RuleBasedCollator</tt>. For more information on lenient parsing,
199  * see <tt>setLenientParse()</tt>. <em>Note:</em> symbols that have syntactic meaning
200  * in collation rules, such as '&amp;', have no particular meaning when appearing outside
201  * of the <tt>lenient-parse</tt> rule set.</p>
202  *
203  * <p>The body of a rule set consists of an ordered, semicolon-delimited list of <em>rules.</em>
204  * Internally, every rule has a base value, a divisor, rule text, and zero, one, or two <em>substitutions.</em>
205  * These parameters are controlled by the description syntax, which consists of a <em>rule
206  * descriptor,</em> a colon, and a <em>rule body.</em></p>
207  *
208  * <p>A rule descriptor can take one of the following forms (text in <em>italics</em> is the
209  * name of a token):</p>
210  *
211  * <table border="0" width="100%">
212  * <tr>
213  * <td width="5%" valign="top"></td>
214  * <td width="8%" valign="top"><em>bv</em>:</td>
215  * <td valign="top"><em>bv</em> specifies the rule's base value. <em>bv</em> is a decimal
216  * number expressed using ASCII digits. <em>bv</em> may contain spaces, period, and commas,
217  * which are irgnored. The rule's divisor is the highest power of 10 less than or equal to
218  * the base value.</td>
219  * </tr>
220  * <tr>
221  * <td width="5%" valign="top"></td>
222  * <td width="8%" valign="top"><em>bv</em>/<em>rad</em>:</td>
223  * <td valign="top"><em>bv</em> specifies the rule's base value. The rule's divisor is the
224  * highest power of <em>rad</em> less than or equal to the base value.</td>
225  * </tr>
226  * <tr>
227  * <td width="5%" valign="top"></td>
228  * <td width="8%" valign="top"><em>bv</em>&gt;:</td>
229  * <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
230  * let the radix be 10, and the exponent be the highest exponent of the radix that yields a
231  * result less than or equal to the base value. Every &gt; character after the base value
232  * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
233  * raised to the power of the exponent; otherwise, the divisor is 1.</td>
234  * </tr>
235  * <tr>
236  * <td width="5%" valign="top"></td>
237  * <td width="8%" valign="top"><em>bv</em>/<em>rad</em>&gt;:</td>
238  * <td valign="top"><em>bv</em> specifies the rule's base value. To calculate the divisor,
239  * let the radix be <em>rad</em>, and the exponent be the highest exponent of the radix that
240  * yields a result less than or equal to the base value. Every &gt; character after the radix
241  * decreases the exponent by 1. If the exponent is positive or 0, the divisor is the radix
242  * raised to the power of the exponent; otherwise, the divisor is 1.</td>
243  * </tr>
244  * <tr>
245  * <td width="5%" valign="top"></td>
246  * <td width="8%" valign="top">-x:</td>
247  * <td valign="top">The rule is a negative-number rule.</td>
248  * </tr>
249  * <tr>
250  * <td width="5%" valign="top"></td>
251  * <td width="8%" valign="top">x.x:</td>
252  * <td valign="top">The rule is an <em>improper fraction rule.</em></td>
253  * </tr>
254  * <tr>
255  * <td width="5%" valign="top"></td>
256  * <td width="8%" valign="top">0.x:</td>
257  * <td valign="top">The rule is a <em>proper fraction rule.</em></td>
258  * </tr>
259  * <tr>
260  * <td width="5%" valign="top"></td>
261  * <td width="8%" valign="top">x.0:</td>
262  * <td valign="top">The rule is a <em>master rule.</em></td>
263  * </tr>
264  * <tr>
265  * <td width="5%" valign="top"></td>
266  * <td width="8%" valign="top"><em>nothing</em></td>
267  * <td valign="top">If the rule's rule descriptor is left out, the base value is one plus the
268  * preceding rule's base value (or zero if this is the first rule in the list) in a normal
269  * rule set.&nbsp; In a fraction rule set, the base value is the same as the preceding rule's
270  * base value.</td>
271  * </tr>
272  * </table>
273  *
274  * <p>A rule set may be either a regular rule set or a <em>fraction rule set,</em> depending
275  * on whether it is used to format a number's integral part (or the whole number) or a
276  * number's fractional part. Using a rule set to format a rule's fractional part makes it a
277  * fraction rule set.</p>
278  *
279  * <p>Which rule is used to format a number is defined according to one of the following
280  * algorithms: If the rule set is a regular rule set, do the following:
281  *
282  * <ul>
283  * <li>If the rule set includes a master rule (and the number was passed in as a <tt>double</tt>),
284  * use the master rule.&nbsp; (If the number being formatted was passed in as a <tt>long</tt>,
285  * the master rule is ignored.)</li>
286  * <li>If the number is negative, use the negative-number rule.</li>
287  * <li>If the number has a fractional part and is greater than 1, use the improper fraction
288  * rule.</li>
289  * <li>If the number has a fractional part and is between 0 and 1, use the proper fraction
290  * rule.</li>
291  * <li>Binary-search the rule list for the rule with the highest base value less than or equal
292  * to the number. If that rule has two substitutions, its base value is not an even multiple
293  * of its divisor, and the number <em>is</em> an even multiple of the rule's divisor, use the
294  * rule that precedes it in the rule list. Otherwise, use the rule itself.</li>
295  * </ul>
296  *
297  * <p>If the rule set is a fraction rule set, do the following:
298  *
299  * <ul>
300  * <li>Ignore negative-number and fraction rules.</li>
301  * <li>For each rule in the list, multiply the number being formatted (which will always be
302  * between 0 and 1) by the rule's base value. Keep track of the distance between the result
303  * the nearest integer.</li>
304  * <li>Use the rule that produced the result closest to zero in the above calculation. In the
305  * event of a tie or a direct hit, use the first matching rule encountered. (The idea here is
306  * to try each rule's base value as a possible denominator of a fraction. Whichever
307  * denominator produces the fraction closest in value to the number being formatted wins.) If
308  * the rule following the matching rule has the same base value, use it if the numerator of
309  * the fraction is anything other than 1; if the numerator is 1, use the original matching
310  * rule. (This is to allow singular and plural forms of the rule text without a lot of extra
311  * hassle.)</li>
312  * </ul>
313  *
314  * <p>A rule's body consists of a string of characters terminated by a semicolon. The rule
315  * may include zero, one, or two <em>substitution tokens,</em> and a range of text in
316  * brackets. The brackets denote optional text (and may also include one or both
317  * substitutions). The exact meanings of the substitution tokens, and under what conditions
318  * optional text is omitted, depend on the syntax of the substitution token and the context.
319  * The rest of the text in a rule body is literal text that is output when the rule matches
320  * the number being formatted.</p>
321  *
322  * <p>A substitution token begins and ends with a <em>token character.</em> The token
323  * character and the context together specify a mathematical operation to be performed on the
324  * number being formatted. An optional <em>substitution descriptor </em>specifies how the
325  * value resulting from that operation is used to fill in the substitution. The position of
326  * the substitution token in the rule body specifies the location of the resultant text in
327  * the original rule text.</p>
328  *
329  * <p>The meanings of the substitution token characters are as follows:</p>
330  *
331  * <table border="0" width="100%">
332  * <tr>
333  * <td width="37"></td>
334  * <td width="23">&gt;&gt;</td>
335  * <td width="165" valign="top">in normal rule</td>
336  * <td>Divide the number by the rule's divisor and format the remainder</td>
337  * </tr>
338  * <tr>
339  * <td width="37"></td>
340  * <td width="23"></td>
341  * <td width="165" valign="top">in negative-number rule</td>
342  * <td>Find the absolute value of the number and format the result</td>
343  * </tr>
344  * <tr>
345  * <td width="37"></td>
346  * <td width="23"></td>
347  * <td width="165" valign="top">in fraction or master rule</td>
348  * <td>Isolate the number's fractional part and format it.</td>
349  * </tr>
350  * <tr>
351  * <td width="37"></td>
352  * <td width="23"></td>
353  * <td width="165" valign="top">in rule in fraction rule set</td>
354  * <td>Not allowed.</td>
355  * </tr>
356  * <tr>
357  * <td width="37"></td>
358  * <td width="23">&gt;&gt;&gt;</td>
359  * <td width="165" valign="top">in normal rule</td>
360  * <td>Divide the number by the rule's divisor and format the remainder,
361  * but bypass the normal rule-selection process and just use the
362  * rule that precedes this one in this rule list.</td>
363  * </tr>
364  * <tr>
365  * <td width="37"></td>
366  * <td width="23"></td>
367  * <td width="165" valign="top">in all other rules</td>
368  * <td>Not allowed.</td>
369  * </tr>
370  * <tr>
371  * <td width="37"></td>
372  * <td width="23">&lt;&lt;</td>
373  * <td width="165" valign="top">in normal rule</td>
374  * <td>Divide the number by the rule's divisor and format the quotient</td>
375  * </tr>
376  * <tr>
377  * <td width="37"></td>
378  * <td width="23"></td>
379  * <td width="165" valign="top">in negative-number rule</td>
380  * <td>Not allowed.</td>
381  * </tr>
382  * <tr>
383  * <td width="37"></td>
384  * <td width="23"></td>
385  * <td width="165" valign="top">in fraction or master rule</td>
386  * <td>Isolate the number's integral part and format it.</td>
387  * </tr>
388  * <tr>
389  * <td width="37"></td>
390  * <td width="23"></td>
391  * <td width="165" valign="top">in rule in fraction rule set</td>
392  * <td>Multiply the number by the rule's base value and format the result.</td>
393  * </tr>
394  * <tr>
395  * <td width="37"></td>
396  * <td width="23">==</td>
397  * <td width="165" valign="top">in all rule sets</td>
398  * <td>Format the number unchanged</td>
399  * </tr>
400  * <tr>
401  * <td width="37"></td>
402  * <td width="23">[]</td>
403  * <td width="165" valign="top">in normal rule</td>
404  * <td>Omit the optional text if the number is an even multiple of the rule's divisor</td>
405  * </tr>
406  * <tr>
407  * <td width="37"></td>
408  * <td width="23"></td>
409  * <td width="165" valign="top">in negative-number rule</td>
410  * <td>Not allowed.</td>
411  * </tr>
412  * <tr>
413  * <td width="37"></td>
414  * <td width="23"></td>
415  * <td width="165" valign="top">in improper-fraction rule</td>
416  * <td>Omit the optional text if the number is between 0 and 1 (same as specifying both an
417  * x.x rule and a 0.x rule)</td>
418  * </tr>
419  * <tr>
420  * <td width="37"></td>
421  * <td width="23"></td>
422  * <td width="165" valign="top">in master rule</td>
423  * <td>Omit the optional text if the number is an integer (same as specifying both an x.x
424  * rule and an x.0 rule)</td>
425  * </tr>
426  * <tr>
427  * <td width="37"></td>
428  * <td width="23"></td>
429  * <td width="165" valign="top">in proper-fraction rule</td>
430  * <td>Not allowed.</td>
431  * </tr>
432  * <tr>
433  * <td width="37"></td>
434  * <td width="23"></td>
435  * <td width="165" valign="top">in rule in fraction rule set</td>
436  * <td>Omit the optional text if multiplying the number by the rule's base value yields 1.</td>
437  * </tr>
438  * </table>
439  *
440  * <p>The substitution descriptor (i.e., the text between the token characters) may take one
441  * of three forms:</p>
442  *
443  * <table border="0" width="100%">
444  * <tr>
445  * <td width="42"></td>
446  * <td width="166" valign="top">a rule set name</td>
447  * <td>Perform the mathematical operation on the number, and format the result using the
448  * named rule set.</td>
449  * </tr>
450  * <tr>
451  * <td width="42"></td>
452  * <td width="166" valign="top">a DecimalFormat pattern</td>
453  * <td>Perform the mathematical operation on the number, and format the result using a
454  * DecimalFormat with the specified pattern.&nbsp; The pattern must begin with 0 or #.</td>
455  * </tr>
456  * <tr>
457  * <td width="42"></td>
458  * <td width="166" valign="top">nothing</td>
459  * <td>Perform the mathematical operation on the number, and format the result using the rule
460  * set containing the current rule, except:<ul>
461  * <li>You can't have an empty substitution descriptor with a == substitution.</li>
462  * <li>If you omit the substitution descriptor in a &gt;&gt; substitution in a fraction rule,
463  * format the result one digit at a time using the rule set containing the current rule.</li>
464  * <li>If you omit the substitution descriptor in a &lt;&lt; substitution in a rule in a
465  * fraction rule set, format the result using the default rule set for this formatter.</li>
466  * </ul>
467  * </td>
468  * </tr>
469  * </table>
470  *
471  * <p>Whitespace is ignored between a rule set name and a rule set body, between a rule
472  * descriptor and a rule body, or between rules. If a rule body begins with an apostrophe,
473  * the apostrophe is ignored, but all text after it becomes significant (this is how you can
474  * have a rule's rule text begin with whitespace). There is no escape function: the semicolon
475  * is not allowed in rule set names or in rule text, and the colon is not allowed in rule set
476  * names. The characters beginning a substitution token are always treated as the beginning
477  * of a substitution token.</p>
478  *
479  * <p>See the resource data and the demo program for annotated examples of real rule sets
480  * using these features.</p>
481  *
482  * @author Richard Gillam
483  * @see NumberFormat
484  * @see DecimalFormat
485  * @stable ICU 2.0
486  */

487 public class RuleBasedNumberFormat extends NumberFormat {
488
489     //-----------------------------------------------------------------------
490
// constants
491
//-----------------------------------------------------------------------
492

493     // Generated by serialver from JDK 1.4.1_01
494
static final long serialVersionUID = -7664252765575395068L;
495     
496     /**
497      * Puts a copyright in the .class file
498      */

499     private static final String JavaDoc copyrightNotice
500         = "Copyright \u00a91997-2004 IBM Corp. All rights reserved.";
501
502     /**
503      * Selector code that tells the constructor to create a spellout formatter
504      * @stable ICU 2.0
505      */

506     public static final int SPELLOUT = 1;
507
508     /**
509      * Selector code that tells the constructor to create an ordinal formatter
510      * @stable ICU 2.0
511      */

512     public static final int ORDINAL = 2;
513
514     /**
515      * Selector code that tells the constructor to create a duration formatter
516      * @stable ICU 2.0
517      */

518     public static final int DURATION = 3;
519
520     //-----------------------------------------------------------------------
521
// data members
522
//-----------------------------------------------------------------------
523

524     /**
525      * The formatter's rule sets.
526      */

527     private transient NFRuleSet[] ruleSets = null;
528
529     /**
530      * A pointer to the formatter's default rule set. This is always included
531      * in ruleSets.
532      */

533     private transient NFRuleSet defaultRuleSet = null;
534
535     /**
536      * The formatter's locale. This is used to create DecimalFormatSymbols and
537      * Collator objects.
538      * @serial
539      */

540     private ULocale locale = null;
541
542     /**
543      * Collator to be used in lenient parsing. This variable is lazy-evaluated:
544      * the collator is actually created the first time the client does a parse
545      * with lenient-parse mode turned on.
546      */

547     private transient Collator collator = null;
548
549     /**
550      * The DecimalFormatSymbols object that any DecimalFormat objects this
551      * formatter uses should use. This variable is lazy-evaluated: it isn't
552      * filled in if the rule set never uses a DecimalFormat pattern.
553      */

554     private transient DecimalFormatSymbols decimalFormatSymbols = null;
555
556     /**
557      * Flag specifying whether lenient parse mode is on or off. Off by default.
558      * @serial
559      */

560     private boolean lenientParse = false;
561
562     /**
563      * If the description specifies lenient-parse rules, they're stored here until
564      * the collator is created.
565      */

566     private transient String JavaDoc lenientParseRules;
567
568     /**
569      * If the description specifies post-process rules, they're stored here until
570      * post-processing is required.
571      */

572     private transient String JavaDoc postProcessRules;
573
574     /**
575      * Post processor lazily constructed from the postProcessRules.
576      */

577     private transient RBNFPostProcessor postProcessor;
578
579     /**
580      * Localizations for rule set names.
581      * @serial
582      */

583     private Map JavaDoc ruleSetDisplayNames;
584
585     /**
586      * The public rule set names;
587      * @serial
588      */

589     private String JavaDoc[] publicRuleSetNames;
590     
591     private static final boolean DEBUG = ICUDebug.enabled("rbnf");
592
593     //-----------------------------------------------------------------------
594
// constructors
595
//-----------------------------------------------------------------------
596

597     /**
598      * Creates a RuleBasedNumberFormat that behaves according to the description
599      * passed in. The formatter uses the default locale.
600      * @param description A description of the formatter's desired behavior.
601      * See the class documentation for a complete explanation of the description
602      * syntax.
603      * @stable ICU 2.0
604      */

605     public RuleBasedNumberFormat(String JavaDoc description) {
606         locale = ULocale.getDefault();
607         init(description, null);
608     }
609
610     /**
611      * Creates a RuleBasedNumberFormat that behaves according to the description
612      * passed in. The formatter uses the default locale.
613      * <p>
614      * The localizations data provides information about the public
615      * rule sets and their localized display names for different
616      * locales. The first element in the list is an array of the names
617      * of the public rule sets. The first element in this array is
618      * the initial default ruleset. The remaining elements in the
619      * list are arrays of localizations of the names of the public
620      * rule sets. Each of these is one longer than the initial array,
621      * with the first String being the ULocale ID, and the remaining
622      * Strings being the localizations of the rule set names, in the
623      * same order as the initial array.
624      * @param description A description of the formatter's desired behavior.
625      * See the class documentation for a complete explanation of the description
626      * syntax.
627      * @param localizations a list of localizations for the rule set
628      * names in the description.
629      * @draft ICU 3.2
630      * @provisional This API might change or be removed in a future release.
631      */

632     public RuleBasedNumberFormat(String JavaDoc description, String JavaDoc[][] localizations) {
633         locale = ULocale.getDefault();
634         init(description, localizations);
635     }
636
637     /**
638      * Creates a RuleBasedNumberFormat that behaves according to the description
639      * passed in. The formatter uses the specified locale to determine the
640      * characters to use when formatting in numerals, and to define equivalences
641      * for lenient parsing.
642      * @param description A description of the formatter's desired behavior.
643      * See the class documentation for a complete explanation of the description
644      * syntax.
645      * @param locale A locale, which governs which characters are used for
646      * formatting values in numerals, and which characters are equivalent in
647      * lenient parsing.
648      * @stable ICU 2.0
649      */

650     public RuleBasedNumberFormat(String JavaDoc description, Locale locale) {
651         this(description, ULocale.forLocale(locale));
652     }
653
654     /**
655      * Creates a RuleBasedNumberFormat that behaves according to the description
656      * passed in. The formatter uses the specified locale to determine the
657      * characters to use when formatting in numerals, and to define equivalences
658      * for lenient parsing.
659      * @param description A description of the formatter's desired behavior.
660      * See the class documentation for a complete explanation of the description
661      * syntax.
662      * @param locale A locale, which governs which characters are used for
663      * formatting values in numerals, and which characters are equivalent in
664      * lenient parsing.
665      * @draft ICU 3.2
666      * @provisional This API might change or be removed in a future release.
667      */

668     public RuleBasedNumberFormat(String JavaDoc description, ULocale locale) {
669         this.locale = locale;
670         init(description, null);
671     }
672
673     /**
674      * Creates a RuleBasedNumberFormat that behaves according to the description
675      * passed in. The formatter uses the specified locale to determine the
676      * characters to use when formatting in numerals, and to define equivalences
677      * for lenient parsing.
678      * <p>
679      * The localizations data provides information about the public
680      * rule sets and their localized display names for different
681      * locales. The first element in the list is an array of the names
682      * of the public rule sets. The first element in this array is
683      * the initial default ruleset. The remaining elements in the
684      * list are arrays of localizations of the names of the public
685      * rule sets. Each of these is one longer than the initial array,
686      * with the first String being the ULocale ID, and the remaining
687      * Strings being the localizations of the rule set names, in the
688      * same order as the initial array.
689      * @param description A description of the formatter's desired behavior.
690      * See the class documentation for a complete explanation of the description
691      * syntax.
692      * @param localizations a list of localizations for the rule set names in the description.
693      * @param locale A ulocale that governs which characters are used for
694      * formatting values in numerals, and determines which characters are equivalent in
695      * lenient parsing.
696      * @draft ICU 3.2
697      * @provisional This API might change or be removed in a future release.
698      */

699     public RuleBasedNumberFormat(String JavaDoc description, String JavaDoc[][] localizations, ULocale locale) {
700         this.locale = locale;
701         init(description, localizations);
702     }
703
704     /**
705      * Creates a RuleBasedNumberFormat from a predefined description. The selector
706      * code choosed among three possible predefined formats: spellout, ordinal,
707      * and duration.
708      * @param locale The locale for the formatter.
709      * @param format A selector code specifying which kind of formatter to create for that
710      * locale. There are three legal values: SPELLOUT, which creates a formatter that
711      * spells out a value in words in the desired language, ORDINAL, which attaches
712      * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
713      * and DURATION, which formats a duration in seconds as hours, minutes, and seconds.
714      * @stable ICU 2.0
715      */

716     public RuleBasedNumberFormat(Locale locale, int format) {
717         this(ULocale.forLocale(locale), format);
718     }
719
720     /**
721      * Creates a RuleBasedNumberFormat from a predefined description. The selector
722      * code choosed among three possible predefined formats: spellout, ordinal,
723      * and duration.
724      * @param locale The locale for the formatter.
725      * @param format A selector code specifying which kind of formatter to create for that
726      * locale. There are three legal values: SPELLOUT, which creates a formatter that
727      * spells out a value in words in the desired language, ORDINAL, which attaches
728      * an ordinal suffix from the desired language to the end of a number (e.g. "123rd"),
729      * and DURATION, which formats a duration in seconds as hours, minutes, and seconds.
730      * @draft ICU 3.2
731      * @provisional This API might change or be removed in a future release.
732      */

733     public RuleBasedNumberFormat(ULocale locale, int format) {
734         this.locale = locale;
735
736         ICUResourceBundle bundle = (ICUResourceBundle)UResourceBundle.
737             getBundleInstance(ICUResourceBundle.ICU_RBNF_BASE_NAME, locale);
738
739         // TODO: determine correct actual/valid locale. Note ambiguity
740
// here -- do actual/valid refer to pattern, DecimalFormatSymbols,
741
// or Collator?
742
ULocale uloc = bundle.getULocale();
743         setLocale(uloc, uloc);
744
745         String JavaDoc description = "";
746         String JavaDoc[][] localizations = null;
747
748         try {
749             description = bundle.getString(rulenames[format-1]);
750             ICUResourceBundle locb = bundle.get(locnames[format-1]);
751             localizations = new String JavaDoc[locb.getSize()][];
752             for (int i = 0; i < localizations.length; ++i) {
753                 localizations[i] = locb.get(i).getStringArray();
754             }
755         }
756         catch (MissingResourceException JavaDoc e) {
757             // might have description and no localizations, or no description...
758
}
759
760         init(description, localizations);
761     }
762
763     private static final String JavaDoc[] rulenames = {
764         "SpelloutRules", "OrdinalRules", "DurationRules",
765     };
766     private static final String JavaDoc[] locnames = {
767         "SpelloutLocalizations", "OrdinalLocalizations", "DurationLocalizations",
768     };
769
770     /**
771      * Creates a RuleBasedNumberFormat from a predefined description. Uses the
772      * default locale.
773      * @param format A selector code specifying which kind of formatter to create.
774      * There are three legal values: SPELLOUT, which creates a formatter that spells
775      * out a value in words in the default locale's langyage, ORDINAL, which attaches
776      * an ordinal suffix from the default locale's language to a numeral, and
777      * DURATION, which formats a duration in seconds as hours, minutes, and seconds.
778      * @stable ICU 2.0
779      */

780     public RuleBasedNumberFormat(int format) {
781         this(ULocale.getDefault(), format);
782     }
783
784     //-----------------------------------------------------------------------
785
// boilerplate
786
//-----------------------------------------------------------------------
787

788     /**
789      * Duplicates this formatter.
790      * @return A RuleBasedNumberFormat that is equal to this one.
791      * @stable ICU 2.0
792      */

793     public Object JavaDoc clone() {
794         return super.clone();
795     }
796
797     /**
798      * Tests two RuleBasedNumberFormats for equality.
799      * @param that The formatter to compare against this one.
800      * @return true if the two formatters have identical behavior.
801      * @stable ICU 2.0
802      */

803     public boolean equals(Object JavaDoc that) {
804         // if the other object isn't a RuleBasedNumberFormat, that's
805
// all we need to know
806
if (!(that instanceof RuleBasedNumberFormat)) {
807             return false;
808         } else {
809             // cast the other object's pointer to a pointer to a
810
// RuleBasedNumberFormat
811
RuleBasedNumberFormat that2 = (RuleBasedNumberFormat)that;
812
813             // compare their locales and lenient-parse modes
814
if (!locale.equals(that2.locale) || lenientParse != that2.lenientParse) {
815                 return false;
816             }
817
818             // if that succeeds, then compare their rule set lists
819
if (ruleSets.length != that2.ruleSets.length) {
820                 return false;
821             }
822             for (int i = 0; i < ruleSets.length; i++) {
823                 if (!ruleSets[i].equals(that2.ruleSets[i])) {
824                     return false;
825                 }
826             }
827
828             return true;
829         }
830     }
831
832     /**
833      * Generates a textual description of this formatter.
834      * @return a String containing a rule set that will produce a RuleBasedNumberFormat
835      * with identical behavior to this one. This won't necessarily be identical
836      * to the rule set description that was originally passed in, but will produce
837      * the same result.
838      * @stable ICU 2.0
839      */

840     public String JavaDoc toString() {
841
842         // accumulate the descriptions of all the rule sets in a
843
// StringBuffer, then cast it to a String and return it
844
StringBuffer JavaDoc result = new StringBuffer JavaDoc();
845         for (int i = 0; i < ruleSets.length; i++) {
846             result.append(ruleSets[i].toString());
847         }
848         return result.toString();
849     }
850
851     /**
852      * Writes this object to a stream.
853      * @param out The stream to write to.
854      */

855     private void writeObject(java.io.ObjectOutputStream JavaDoc out)
856         throws java.io.IOException JavaDoc {
857         // we just write the textual description to the stream, so we
858
// have an implementation-independent streaming format
859
out.writeUTF(this.toString());
860         out.writeObject(this.locale);
861     }
862
863     /**
864      * Reads this object in from a stream.
865      * @param in The stream to read from.
866      */

867     private void readObject(java.io.ObjectInputStream JavaDoc in)
868         throws java.io.IOException JavaDoc, java.lang.ClassNotFoundException JavaDoc {
869
870         // read the description in from the stream
871
String JavaDoc description = in.readUTF();
872         ULocale loc;
873         
874         try {
875             loc = (ULocale) in.readObject();
876         } catch (Exception JavaDoc e) {
877             loc = ULocale.getDefault();
878         }
879
880         // build a brand-new RuleBasedNumberFormat from the description,
881
// then steal its substructure. This object's substructure and
882
// the temporary RuleBasedNumberFormat drop on the floor and
883
// get swept up by the garbage collector
884
RuleBasedNumberFormat temp = new RuleBasedNumberFormat(description, loc);
885         ruleSets = temp.ruleSets;
886         defaultRuleSet = temp.defaultRuleSet;
887         publicRuleSetNames = temp.publicRuleSetNames;
888         decimalFormatSymbols = temp.decimalFormatSymbols;
889         locale = temp.locale;
890     }
891
892
893     //-----------------------------------------------------------------------
894
// public API functions
895
//-----------------------------------------------------------------------
896

897     /**
898      * Returns a list of the names of all of this formatter's public rule sets.
899      * @return A list of the names of all of this formatter's public rule sets.
900      * @stable ICU 2.0
901      */

902     public String JavaDoc[] getRuleSetNames() {
903         return (String JavaDoc[])publicRuleSetNames.clone();
904     }
905
906     /**
907      * Return a list of locales for which there are locale-specific display names
908      * for the rule sets in this formatter. If there are no localized display names, return null.
909      * @return an array of the ulocales for which there is rule set display name information
910      * @draft ICU 3.2
911      * @provisional This API might change or be removed in a future release.
912      */

913     public ULocale[] getRuleSetDisplayNameLocales() {
914         if (ruleSetDisplayNames != null) {
915             Set JavaDoc s = ruleSetDisplayNames.keySet();
916             String JavaDoc[] locales = (String JavaDoc[])s.toArray(new String JavaDoc[s.size()]);
917             Arrays.sort(locales, String.CASE_INSENSITIVE_ORDER);
918             ULocale[] result = new ULocale[locales.length];
919             for (int i = 0; i < locales.length; ++i) {
920                 result[i] = new ULocale(locales[i]);
921             }
922             return result;
923         }
924         return null;
925     }
926
927     private String JavaDoc[] getNameListForLocale(ULocale locale) {
928         if (locale != null && ruleSetDisplayNames != null) {
929             String JavaDoc[] localeNames = { locale.getBaseName(), ULocale.getDefault().getBaseName() };
930             for (int i = 0; i < localeNames.length; ++i) {
931                 String JavaDoc lname = localeNames[i];
932                 while (lname.length() > 0) {
933                     String JavaDoc[] names = (String JavaDoc[])ruleSetDisplayNames.get(lname);
934                     if (names != null) {
935                         return names;
936                     }
937                     lname = ULocale.getFallback(lname);
938                 }
939             }
940         }
941         return null;
942     }
943
944     /**
945      * Return the rule set display names for the provided locale. These are in the same order
946      * as those returned by getRuleSetNames. The locale is matched against the locales for
947      * which there is display name data, using normal fallback rules. If no locale matches,
948      * the default display names are returned. (These are the internal rule set names minus
949      * the leading '%'.)
950      * @return an array of the locales that have display name information
951      * @see #getRuleSetNames
952      * @draft ICU 3.2
953      * @provisional This API might change or be removed in a future release.
954      */

955     public String JavaDoc[] getRuleSetDisplayNames(ULocale locale) {
956         String JavaDoc[] names = getNameListForLocale(locale);
957         if (names != null) {
958             return (String JavaDoc[])names.clone();
959         }
960         names = getRuleSetNames();
961         for (int i = 0; i < names.length; ++i) {
962             names[i] = names[i].substring(1);
963         }
964         return names;
965     }
966
967     /**
968      * Return the rule set display names for the current default locale.
969      * @return an array of the display names
970      * @draft ICU 3.2
971      * @see #getRuleSetDisplayNames(ULocale)
972      * @provisional This API might change or be removed in a future release.
973      */

974     public String JavaDoc[] getRuleSetDisplayNames() {
975         return getRuleSetDisplayNames(ULocale.getDefault());
976     }
977
978     /**
979      * Return the rule set display name for the provided rule set and locale.
980      * The locale is matched against the locales for which there is display name data, using
981      * normal fallback rules. If no locale matches, the default display name is returned.
982      * @return the display name for the rule set
983      * @draft ICU 3.2
984      * @see #getRuleSetDisplayNames
985      * @throws IllegalArgumentException if ruleSetName is not a valid rule set name for this format
986      * @provisional This API might change or be removed in a future release.
987      */

988     public String JavaDoc getRuleSetDisplayName(String JavaDoc ruleSetName, ULocale locale) {
989         String JavaDoc[] rsnames = publicRuleSetNames;
990         for (int ix = 0; ix < rsnames.length; ++ix) {
991             if (rsnames[ix].equals(ruleSetName)) {
992                 String JavaDoc[] names = getNameListForLocale(locale);
993                 if (names != null) {
994                     return names[ix];
995                 }
996                 return rsnames[ix].substring(1);
997             }
998         }
999         throw new IllegalArgumentException JavaDoc("unrecognized rule set name: " + ruleSetName);
1000    }
1001
1002    /**
1003     * Return the rule set display name for the provided rule set in the current default locale.
1004     * @return the display name for the rule set
1005     * @draft ICU 3.2
1006     * @see #getRuleSetDisplayName(String,ULocale)
1007     * @provisional This API might change or be removed in a future release.
1008     */

1009    public String JavaDoc getRuleSetDisplayName(String JavaDoc ruleSetName) {
1010        return getRuleSetDisplayName(ruleSetName, ULocale.getDefault());
1011    }
1012
1013    /**
1014     * Formats the specified number according to the specified rule set.
1015     * @param number The number to format.
1016     * @param ruleSet The name of the rule set to format the number with.
1017     * This must be the name of a valid public rule set for this formatter.
1018     * @return A textual representation of the number.
1019     * @stable ICU 2.0
1020     */

1021    public String JavaDoc format(double number, String JavaDoc ruleSet) throws IllegalArgumentException JavaDoc {
1022        if (ruleSet.startsWith("%%")) {
1023            throw new IllegalArgumentException JavaDoc("Can't use internal rule set");
1024        }
1025        return format(number, findRuleSet(ruleSet));
1026    }
1027
1028    /**
1029     * Formats the specified number according to the specified rule set.
1030     * (If the specified rule set specifies a master ["x.0"] rule, this function
1031     * ignores it. Convert the number to a double first if you ned it.) This
1032     * function preserves all the precision in the long-- it doesn't convert it
1033     * to a double.
1034     * @param number The number to format.
1035     * @param ruleSet The name of the rule set to format the number with.
1036     * This must be the name of a valid public rule set for this formatter.
1037     * @return A textual representation of the number.
1038     * @stable ICU 2.0
1039     */

1040    public String JavaDoc format(long number, String JavaDoc ruleSet) throws IllegalArgumentException JavaDoc {
1041        if (ruleSet.startsWith("%%")) {
1042            throw new IllegalArgumentException JavaDoc("Can't use internal rule set");
1043        }
1044        return format(number, findRuleSet(ruleSet));
1045    }
1046
1047    /**
1048     * Formats the specified number using the formatter's default rule set.
1049     * (The default rule set is the last public rule set defined in the description.)
1050     * @param number The number to format.
1051     * @param toAppendTo A StringBuffer that the result should be appended to.
1052     * @param ignore This function doesn't examine or update the field position.
1053     * @return toAppendTo
1054     * @stable ICU 2.0
1055     */

1056    public StringBuffer JavaDoc format(double number,
1057                               StringBuffer JavaDoc toAppendTo,
1058                               FieldPosition JavaDoc ignore) {
1059        // this is one of the inherited format() methods. Since it doesn't
1060
// have a way to select the rule set to use, it just uses the
1061
// default one
1062
toAppendTo.append(format(number, defaultRuleSet));
1063        return toAppendTo;
1064    }
1065
1066    /**
1067     * Formats the specified number using the formatter's default rule set.
1068     * (The default rule set is the last public rule set defined in the description.)
1069     * (If the specified rule set specifies a master ["x.0"] rule, this function
1070     * ignores it. Convert the number to a double first if you ned it.) This
1071     * function preserves all the precision in the long-- it doesn't convert it
1072     * to a double.
1073     * @param number The number to format.
1074     * @param toAppendTo A StringBuffer that the result should be appended to.
1075     * @param ignore This function doesn't examine or update the field position.
1076     * @return toAppendTo
1077     * @stable ICU 2.0
1078     */

1079    public StringBuffer JavaDoc format(long number,
1080                               StringBuffer JavaDoc toAppendTo,
1081                               FieldPosition JavaDoc ignore) {
1082        // this is one of the inherited format() methods. Since it doesn't
1083
// have a way to select the rule set to use, it just uses the
1084
// default one
1085
toAppendTo.append(format(number, defaultRuleSet));
1086        return toAppendTo;
1087    }
1088
1089    /**
1090     * <strong><font face=helvetica color=red>NEW</font></strong>
1091     * Implement com.ibm.icu.text.NumberFormat:
1092     * Format a BigInteger.
1093     * @stable ICU 2.0
1094     */

1095    public StringBuffer JavaDoc format(BigInteger JavaDoc number,
1096                               StringBuffer JavaDoc toAppendTo,
1097                               FieldPosition JavaDoc pos) {
1098        return format(new com.ibm.icu.math.BigDecimal(number), toAppendTo, pos);
1099    }
1100
1101//#ifndef FOUNDATION
1102
//## /**
1103
//## * <strong><font face=helvetica color=red>NEW</font></strong>
1104
//## * Implement com.ibm.icu.text.NumberFormat:
1105
//## * Format a BigDecimal.
1106
//## * @stable ICU 2.0
1107
//## */
1108
//## public StringBuffer format(java.math.BigDecimal number,
1109
//## StringBuffer toAppendTo,
1110
//## FieldPosition pos) {
1111
//## return format(new com.ibm.icu.math.BigDecimal(number), toAppendTo, pos);
1112
//## }
1113
//#endif
1114

1115    /**
1116     * <strong><font face=helvetica color=red>NEW</font></strong>
1117     * Implement com.ibm.icu.text.NumberFormat:
1118     * Format a BigDecimal.
1119     * @stable ICU 2.0
1120     */

1121    public StringBuffer JavaDoc format(com.ibm.icu.math.BigDecimal number,
1122                               StringBuffer JavaDoc toAppendTo,
1123                               FieldPosition JavaDoc pos) {
1124        // TEMPORARY:
1125
return format(number.doubleValue(), toAppendTo, pos);
1126    }
1127
1128    /**
1129     * Parses the specfied string, beginning at the specified position, according
1130     * to this formatter's rules. This will match the string against all of the
1131     * formatter's public rule sets and return the value corresponding to the longest
1132     * parseable substring. This function's behavior is affected by the lenient
1133     * parse mode.
1134     * @param text The string to parse
1135     * @param parsePosition On entry, contains the position of the first character
1136     * in "text" to examine. On exit, has been updated to contain the position
1137     * of the first character in "text" that wasn't consumed by the parse.
1138     * @return The number that corresponds to the parsed text. This will be an
1139     * instance of either Long or Double, depending on whether the result has a
1140     * fractional part.
1141     * @see #setLenientParseMode
1142     * @stable ICU 2.0
1143     */

1144    public Number JavaDoc parse(String JavaDoc text, ParsePosition JavaDoc parsePosition) {
1145
1146        // parsePosition tells us where to start parsing. We copy the
1147
// text in the string from here to the end inro a new string,
1148
// and create a new ParsePosition and result variable to use
1149
// for the duration of the parse operation
1150
String JavaDoc workingText = text.substring(parsePosition.getIndex());
1151        ParsePosition JavaDoc workingPos = new ParsePosition JavaDoc(0);
1152        Number JavaDoc tempResult = null;
1153
1154        // keep track of the largest number of characters consumed in
1155
// the various trials, and the result that corresponds to it
1156
Number JavaDoc result = new Long JavaDoc(0);
1157        ParsePosition JavaDoc highWaterMark = new ParsePosition JavaDoc(workingPos.getIndex());
1158
1159        // iterate over the public rule sets (beginning with the default one)
1160
// and try parsing the text with each of them. Keep track of which
1161
// one consumes the most characters: that's the one that determines
1162
// the result we return
1163
for (int i = ruleSets.length - 1; i >= 0; i--) {
1164            // skip private rule sets
1165
if (ruleSets[i].getName().startsWith("%%")) {
1166                continue;
1167            }
1168
1169            // try parsing the string with the rule set. If it gets past the
1170
// high-water mark, update the high-water mark and the result
1171
tempResult = ruleSets[i].parse(workingText, workingPos, Double.MAX_VALUE);
1172            if (workingPos.getIndex() > highWaterMark.getIndex()) {
1173                result = tempResult;
1174                highWaterMark.setIndex(workingPos.getIndex());
1175            }
1176            // commented out because this API on ParsePosition doesn't exist in 1.1.x
1177
// if (workingPos.getErrorIndex() > highWaterMark.getErrorIndex()) {
1178
// highWaterMark.setErrorIndex(workingPos.getErrorIndex());
1179
// }
1180

1181            // if we manage to use up all the characters in the string,
1182
// we don't have to try any more rule sets
1183
if (highWaterMark.getIndex() == workingText.length()) {
1184                break;
1185            }
1186
1187            // otherwise, reset our internal parse position to the
1188
// beginning and try again with the next rule set
1189
workingPos.setIndex(0);
1190        }
1191
1192        // add the high water mark to our original parse position and
1193
// return the result
1194
parsePosition.setIndex(parsePosition.getIndex() + highWaterMark.getIndex());
1195        // commented out because this API on ParsePosition doesn't exist in 1.1.x
1196
// if (highWaterMark.getIndex() == 0) {
1197
// parsePosition.setErrorIndex(parsePosition.getIndex() + highWaterMark.getErrorIndex());
1198
// }
1199
return result;
1200    }
1201
1202    /**
1203     * Turns lenient parse mode on and off.
1204     *
1205     * When in lenient parse mode, the formatter uses a Collator for parsing the text.
1206     * Only primary differences are treated as significant. This means that case
1207     * differences, accent differences, alternate spellings of the same letter
1208     * (e.g., ae and a-umlaut in German), ignorable characters, etc. are ignored in
1209     * matching the text. In many cases, numerals will be accepted in place of words
1210     * or phrases as well.
1211     *
1212     * For example, all of the following will correctly parse as 255 in English in
1213     * lenient-parse mode:
1214     * <br>"two hundred fifty-five"
1215     * <br>"two hundred fifty five"
1216     * <br>"TWO HUNDRED FIFTY-FIVE"
1217     * <br>"twohundredfiftyfive"
1218     * <br>"2 hundred fifty-5"
1219     *
1220     * The Collator used is determined by the locale that was
1221     * passed to this object on construction. The description passed to this object
1222     * on construction may supply additional collation rules that are appended to the
1223     * end of the default collator for the locale, enabling additional equivalences
1224     * (such as adding more ignorable characters or permitting spelled-out version of
1225     * symbols; see the demo program for examples).
1226     *
1227     * It's important to emphasize that even strict parsing is relatively lenient: it
1228     * will accept some text that it won't produce as output. In English, for example,
1229     * it will correctly parse "two hundred zero" and "fifteen hundred".
1230     *
1231     * @param enabled If true, turns lenient-parse mode on; if false, turns it off.
1232     * @see RuleBasedCollator
1233     * @stable ICU 2.0
1234     */

1235    public void setLenientParseMode(boolean enabled) {
1236        lenientParse = enabled;
1237
1238        // if we're leaving lenient-parse mode, throw away the collator
1239
// we've been using
1240
if (!enabled) {
1241            collator = null;
1242        }
1243    }
1244
1245    /**
1246     * Returns true if lenient-parse mode is turned on. Lenient parsing is off
1247     * by default.
1248     * @return true if lenient-parse mode is turned on.
1249     * @see #setLenientParseMode
1250     * @stable ICU 2.0
1251     */

1252    public boolean lenientParseEnabled() {
1253        return lenientParse;
1254    }
1255
1256    /**
1257     * Override the default rule set to use. If ruleSetName is null, reset
1258     * to the initial default rule set.
1259     * @param ruleSetName the name of the rule set, or null to reset the initial default.
1260     * @throws IllegalArgumentException if ruleSetName is not the name of a public ruleset.
1261     * @stable ICU 2.0
1262     */

1263    public void setDefaultRuleSet(String JavaDoc ruleSetName) {
1264        if (ruleSetName == null) {
1265            if (publicRuleSetNames.length > 0) {
1266                defaultRuleSet = findRuleSet(publicRuleSetNames[0]);
1267            } else {
1268            defaultRuleSet = null;
1269            int n = ruleSets.length;
1270        while (--n >= 0) {
1271          if (ruleSets[n].isPublic()) {
1272            defaultRuleSet = ruleSets[n];
1273            break;
1274          }
1275        }
1276            }
1277        } else if (ruleSetName.startsWith("%%")) {
1278            throw new IllegalArgumentException JavaDoc("cannot use private rule set: " + ruleSetName);
1279        } else {
1280            defaultRuleSet = findRuleSet(ruleSetName);
1281        }
1282    }
1283
1284    /**
1285     * Return the name of the current default rule set.
1286     * @return the name of the current default rule set, if it is public, else the empty string.
1287     * @stable ICU 3.0
1288     */

1289    public String JavaDoc getDefaultRuleSetName() {
1290        if (defaultRuleSet != null && defaultRuleSet.isPublic()) {
1291            return defaultRuleSet.getName();
1292        }
1293        return "";
1294    }
1295
1296    //-----------------------------------------------------------------------
1297
// package-internal API
1298
//-----------------------------------------------------------------------
1299

1300    /**
1301     * Returns a reference to the formatter's default rule set. The default
1302     * rule set is the last public rule set in the description, or the one
1303     * most recently set by setDefaultRuleSet.
1304     * @return The formatter's default rule set.
1305     */

1306    NFRuleSet getDefaultRuleSet() {
1307        return defaultRuleSet;
1308    }
1309
1310    /**
1311     * Returns the collator to use for lenient parsing. The collator is lazily created:
1312     * this function creates it the first time it's called.
1313     * @return The collator to use for lenient parsing, or null if lenient parsing
1314     * is turned off.
1315     */

1316    Collator getCollator() {
1317        // lazy-evaulate the collator
1318
if (collator == null && lenientParse) {
1319            try {
1320                // create a default collator based on the formatter's locale,
1321
// then pull out that collator's rules, append any additional
1322
// rules specified in the description, and create a _new_
1323
// collator based on the combinaiton of those rules
1324
RuleBasedCollator temp = (RuleBasedCollator)Collator.getInstance(locale);
1325                String JavaDoc rules = temp.getRules() + lenientParseRules;
1326
1327                collator = new RuleBasedCollator(rules);
1328                collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
1329            }
1330            catch (Exception JavaDoc e) {
1331                // If we get here, it means we have a malformed set of
1332
// collation rules, which hopefully won't happen
1333
if(DEBUG){
1334                    e.printStackTrace();
1335                }
1336                collator = null;
1337            }
1338        }
1339
1340        // if lenient-parse mode is off, this will be null
1341
// (see setLenientParseMode())
1342
return collator;
1343    }
1344
1345    /**
1346     * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1347     * instances owned by this formatter. This object is lazily created: this function
1348     * creates it the first time it's called.
1349     * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1350     * instances owned by this formatter.
1351     */

1352    DecimalFormatSymbols getDecimalFormatSymbols() {
1353        // lazy-evaluate the DecimalFormatSymbols object. This object
1354
// is shared by all DecimalFormat instances belonging to this
1355
// formatter
1356
if (decimalFormatSymbols == null) {
1357            decimalFormatSymbols = new DecimalFormatSymbols(locale);
1358        }
1359        return decimalFormatSymbols;
1360    }
1361
1362    //-----------------------------------------------------------------------
1363
// construction implementation
1364
//-----------------------------------------------------------------------
1365

1366    /**
1367     * This extracts the special information from the rule sets before the
1368     * main parsing starts. Extra whitespace must have already been removed
1369     * from the description. If found, the special information is removed from the
1370     * description and returned, otherwise the description is unchanged and null
1371     * is returned. Note: the trailing semicolon at the end of the special
1372     * rules is stripped.
1373     * @param description the rbnf description with extra whitespace removed
1374     * @param specialName the name of the special rule text to extract
1375     * @return the special rule text, or null if the rule was not found
1376     */

1377    private String JavaDoc extractSpecial(StringBuffer JavaDoc description, String JavaDoc specialName) {
1378        String JavaDoc result = null;
1379        int lp = Utility.indexOf(description, specialName);
1380        if (lp != -1) {
1381            // we've got to make sure we're not in the middle of a rule
1382
// (where specialName would actually get treated as
1383
// rule text)
1384
if (lp == 0 || description.charAt(lp - 1) == ';') {
1385                // locate the beginning and end of the actual special
1386
// rules (there may be whitespace between the name and
1387
// the first token in the description)
1388
int lpEnd = Utility.indexOf(description, ";%", lp);
1389
1390                if (lpEnd == -1) {
1391                    lpEnd = description.length() - 1; // later we add 1 back to get the '%'
1392
}
1393                int lpStart = lp + specialName.length();
1394                while (lpStart < lpEnd &&
1395                       UCharacterProperty.isRuleWhiteSpace(description.charAt(lpStart))) {
1396                    ++lpStart;
1397                }
1398
1399                // copy out the special rules
1400
result = description.substring(lpStart, lpEnd);
1401
1402                // remove the special rule from the description
1403
description.delete(lp, lpEnd+1); // delete the semicolon but not the '%'
1404
}
1405        }
1406        return result;
1407    }
1408
1409    /**
1410     * This function parses the description and uses it to build all of
1411     * internal data structures that the formatter uses to do formatting
1412     * @param description The description of the formatter's desired behavior.
1413     * This is either passed in by the caller or loaded out of a resource
1414     * by one of the constructors, and is in the description format specified
1415     * in the class docs.
1416     */

1417    private void init(String JavaDoc description, String JavaDoc[][] localizations) {
1418        initLocalizations(localizations);
1419
1420        // start by stripping the trailing whitespace from all the rules
1421
// (this is all the whitespace follwing each semicolon in the
1422
// description). This allows us to look for rule-set boundaries
1423
// by searching for ";%" without having to worry about whitespace
1424
// between the ; and the %
1425
StringBuffer JavaDoc descBuf = stripWhitespace(description);
1426
1427        // check to see if there's a set of lenient-parse rules. If there
1428
// is, pull them out into our temporary holding place for them,
1429
// and delete them from the description before the real desciption-
1430
// parsing code sees them
1431

1432        lenientParseRules = extractSpecial(descBuf, "%%lenient-parse:");
1433        postProcessRules = extractSpecial(descBuf, "%%post-process:");
1434
1435        // pre-flight parsing the description and count the number of
1436
// rule sets (";%" marks the end of one rule set and the beginning
1437
// of the next)
1438
int numRuleSets = 0;
1439        for (int p = Utility.indexOf(descBuf, ";%"); p != -1; p = Utility.indexOf(descBuf, ";%", p)) {
1440            ++numRuleSets;
1441            ++p;
1442        }
1443        ++numRuleSets;
1444
1445        // our rule list is an array of the apprpriate size
1446
ruleSets = new NFRuleSet[numRuleSets];
1447
1448        // divide up the descriptions into individual rule-set descriptions
1449
// and store them in a temporary array. At each step, we also
1450
// new up a rule set, but all this does is initialize its name
1451
// and remove it from its description. We can't actually parse
1452
// the rest of the descriptions and finish initializing everything
1453
// because we have to know the names and locations of all the rule
1454
// sets before we can actually set everything up
1455
String JavaDoc[] ruleSetDescriptions = new String JavaDoc[numRuleSets];
1456
1457        int curRuleSet = 0;
1458        int start = 0;
1459        for (int p = Utility.indexOf(descBuf, ";%"); p != -1; p = Utility.indexOf(descBuf, ";%", start)) {
1460            ruleSetDescriptions[curRuleSet] = descBuf.substring(start, p + 1);
1461            ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet);
1462            ++curRuleSet;
1463            start = p + 1;
1464        }
1465        ruleSetDescriptions[curRuleSet] = descBuf.substring(start);
1466        ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet);
1467
1468        // now we can take note of the formatter's default rule set, which
1469
// is the last public rule set in the description (it's the last
1470
// rather than the first so that a user can create a new formatter
1471
// from an existing formatter and change its default bevhaior just
1472
// by appending more rule sets to the end)
1473

1474        // {dlf} Initialization of a fraction rule set requires the default rule
1475
// set to be known. For purposes of initialization, this is always the
1476
// last public rule set, no matter what the localization data says.
1477
defaultRuleSet = ruleSets[ruleSets.length - 1];
1478        for (int i = ruleSets.length - 1; i >= 0; --i) {
1479            if (!ruleSets[i].getName().startsWith("%%")) {
1480                defaultRuleSet = ruleSets[i];
1481                break;
1482            }
1483        }
1484
1485        // finally, we can go back through the temporary descriptions
1486
// list and finish seting up the substructure (and we throw
1487
// away the temporary descriptions as we go)
1488
for (int i = 0; i < ruleSets.length; i++) {
1489            ruleSets[i].parseRules(ruleSetDescriptions[i], this);
1490            ruleSetDescriptions[i] = null;
1491        }
1492
1493        // Now that the rules are initialized, the 'real' default rule
1494
// set can be adjusted by the localization data.
1495

1496        // count the number of public rule sets
1497
// (public rule sets have names that begin with % instead of %%)
1498
int publicRuleSetCount = 0;
1499        for (int i = 0; i < ruleSets.length; i++) {
1500            if (!ruleSets[i].getName().startsWith("%%")) {
1501                ++publicRuleSetCount;
1502            }
1503        }
1504
1505        // prepare an array of the proper size and copy the names into it
1506
String JavaDoc[] publicRuleSetTemp = new String JavaDoc[publicRuleSetCount];
1507        publicRuleSetCount = 0;
1508        for (int i = ruleSets.length - 1; i >= 0; i--) {
1509            if (!ruleSets[i].getName().startsWith("%%")) {
1510                publicRuleSetTemp[publicRuleSetCount++] = ruleSets[i].getName();
1511            }
1512        }
1513
1514        if (publicRuleSetNames != null) {
1515            // confirm the names, if any aren't in the rules, that's an error
1516
// it is ok if the rules contain public rule sets that are not in this list
1517
loop: for (int i = 0; i < publicRuleSetNames.length; ++i) {
1518                String JavaDoc name = publicRuleSetNames[i];
1519                for (int j = 0; j < publicRuleSetTemp.length; ++j) {
1520                    if (name.equals(publicRuleSetTemp[j])) {
1521                        continue loop;
1522                    }
1523                }
1524                throw new IllegalArgumentException JavaDoc("did not find public rule set: " + name);
1525            }
1526
1527            defaultRuleSet = findRuleSet(publicRuleSetNames[0]); // might be different
1528
} else {
1529            publicRuleSetNames = publicRuleSetTemp;
1530        }
1531    }
1532
1533    /**
1534     * Take the localizations array and create a Map from the locale strings to
1535     * the localization arrays.
1536     */

1537    private void initLocalizations(String JavaDoc[][] localizations) {
1538        if (localizations != null) {
1539            publicRuleSetNames = (String JavaDoc[])localizations[0].clone();
1540
1541            Map JavaDoc m = new HashMap JavaDoc();
1542            for (int i = 1; i < localizations.length; ++i) {
1543                String JavaDoc[] data = localizations[i];
1544                String JavaDoc locale = data[0];
1545                String JavaDoc[] names = new String JavaDoc[data.length-1];
1546                if (names.length != publicRuleSetNames.length) {
1547                    throw new IllegalArgumentException JavaDoc("public name length: " + publicRuleSetNames.length +
1548                                                       " != localized names[" + i + "] length: " + names.length);
1549                }
1550                System.arraycopy(data, 1, names, 0, names.length);
1551                m.put(locale, names);
1552            }
1553
1554            if (!m.isEmpty()) {
1555                ruleSetDisplayNames = m;
1556            }
1557        }
1558    }
1559
1560    /**
1561     * This function is used by init() to strip whitespace between rules (i.e.,
1562     * after semicolons).
1563     * @param description The formatter description
1564     * @return The description with all the whitespace that follows semicolons
1565     * taken out.
1566     */

1567    private StringBuffer JavaDoc stripWhitespace(String JavaDoc description) {
1568        // since we don't have a method that deletes characters (why?!!)
1569
// create a new StringBuffer to copy the text into
1570
StringBuffer JavaDoc result = new StringBuffer JavaDoc();
1571
1572        // iterate through the characters...
1573
int start = 0;
1574        while (start != -1 && start < description.length()) {
1575            // seek to the first non-whitespace character...
1576
while (start < description.length()
1577                   && UCharacterProperty.isRuleWhiteSpace(description.charAt(start))) {
1578                ++start;
1579            }
1580
1581            //if the first non-whitespace character is semicolon, skip it and continue
1582
if (start < description.length() && description.charAt(start) == ';') {
1583                start += 1;
1584                continue;
1585            }
1586
1587            // locate the next semicolon in the text and copy the text from
1588
// our current position up to that semicolon into the result
1589
int p;
1590            p = description.indexOf(';', start);
1591            if (p == -1) {
1592                // or if we don't find a semicolon, just copy the rest of
1593
// the string into the result
1594
result.append(description.substring(start));
1595                start = -1;
1596            }
1597            else if (p < description.length()) {
1598                result.append(description.substring(start, p + 1));
1599                start = p + 1;
1600            }
1601
1602            // when we get here, we've seeked off the end of the sring, and
1603
// we terminate the loop (we continue until *start* is -1 rather
1604
// than until *p* is -1, because otherwise we'd miss the last
1605
// rule in the description)
1606
else {
1607                start = -1;
1608            }
1609        }
1610        return result;
1611    }
1612
1613    /**
1614     * This function is called ONLY DURING CONSTRUCTION to fill in the
1615     * defaultRuleSet variable once we've set up all the rule sets.
1616     * The default rule set is the last public rule set in the description.
1617     * (It's the last rather than the first so that a caller can append
1618     * text to the end of an existing formatter description to change its
1619     * behavior.)
1620     */

1621    private void initDefaultRuleSet() {
1622        // seek backward from the end of the list until we reach a rule set
1623
// whose name DOESN'T begin with %%. That's the default rule set
1624
for (int i = ruleSets.length - 1; i >= 0; --i) {
1625            if (!ruleSets[i].getName().startsWith("%%")) {
1626                defaultRuleSet = ruleSets[i];
1627                return;
1628            }
1629        }
1630        defaultRuleSet = ruleSets[ruleSets.length - 1];
1631    }
1632
1633    //-----------------------------------------------------------------------
1634
// formatting implementation
1635
//-----------------------------------------------------------------------
1636

1637    /**
1638     * Bottleneck through which all the public format() methods
1639     * that take a double pass. By the time we get here, we know
1640     * which rule set we're using to do the formatting.
1641     * @param number The number to format
1642     * @param ruleSet The rule set to use to format the number
1643     * @return The text that resulted from formatting the number
1644     */

1645    private String JavaDoc format(double number, NFRuleSet ruleSet) {
1646        // all API format() routines that take a double vector through
1647
// here. Create an empty string buffer where the result will
1648
// be built, and pass it to the rule set (along with an insertion
1649
// position of 0 and the number being formatted) to the rule set
1650
// for formatting
1651
StringBuffer JavaDoc result = new StringBuffer JavaDoc();
1652        ruleSet.format(number, result, 0);
1653        postProcess(result, ruleSet);
1654        return result.toString();
1655    }
1656
1657    /**
1658     * Bottleneck through which all the public format() methods
1659     * that take a long pass. By the time we get here, we know
1660     * which rule set we're using to do the formatting.
1661     * @param number The number to format
1662     * @param ruleSet The rule set to use to format the number
1663     * @return The text that resulted from formatting the number
1664     */

1665    private String JavaDoc format(long number, NFRuleSet ruleSet) {
1666        // all API format() routines that take a double vector through
1667
// here. We have these two identical functions-- one taking a
1668
// double and one taking a long-- the couple digits of precision
1669
// that long has but double doesn't (both types are 8 bytes long,
1670
// but double has to borrow some of the mantissa bits to hold
1671
// the exponent).
1672
// Create an empty string buffer where the result will
1673
// be built, and pass it to the rule set (along with an insertion
1674
// position of 0 and the number being formatted) to the rule set
1675
// for formatting
1676
StringBuffer JavaDoc result = new StringBuffer JavaDoc();
1677        ruleSet.format(number, result, 0);
1678        postProcess(result, ruleSet);
1679        return result.toString();
1680    }
1681
1682    /**
1683     * Post-process the rules if we have a post-processor.
1684     */

1685    private void postProcess(StringBuffer JavaDoc result, NFRuleSet ruleSet) {
1686        if (postProcessRules != null) {
1687            if (postProcessor == null) {
1688                int ix = postProcessRules.indexOf(";");
1689                if (ix == -1) {
1690                    ix = postProcessRules.length();
1691                }
1692                String JavaDoc ppClassName = postProcessRules.substring(0, ix).trim();
1693                try {
1694                    Class JavaDoc cls = Class.forName(ppClassName);
1695                    postProcessor = (RBNFPostProcessor)cls.newInstance();
1696                    postProcessor.init(this, postProcessRules);
1697                }
1698                catch (Exception JavaDoc e) {
1699                    // if debug, print it out
1700
System.out.println("could not locate " + ppClassName + ", error " +
1701                                       e.getClass().getName() + ", " + e.getMessage());
1702                    postProcessor = null;
1703                    postProcessRules = null; // don't try again
1704
return;
1705                }
1706            }
1707
1708            postProcessor.process(result, ruleSet);
1709        }
1710    }
1711
1712    /**
1713     * Returns the named rule set. Throws an IllegalArgumentException
1714     * if this formatter doesn't have a rule set with that name.
1715     * @param name The name of the desired rule set
1716     * @return The rule set with that name
1717     */

1718    NFRuleSet findRuleSet(String JavaDoc name) throws IllegalArgumentException JavaDoc {
1719        for (int i = 0; i < ruleSets.length; i++) {
1720            if (ruleSets[i].getName().equals(name)) {
1721                return ruleSets[i];
1722            }
1723        }
1724        throw new IllegalArgumentException JavaDoc("No rule set named " + name);
1725    }
1726}
1727
Popular Tags