KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > java > text > MergeCollation


1 /*
2  * @(#)MergeCollation.java 1.17 03/12/19
3  *
4  * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
5  * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
6  */

7
8 /*
9  * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
10  * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved
11  *
12  * The original version of this source code and documentation is copyrighted
13  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
14  * materials are provided under terms of a License Agreement between Taligent
15  * and Sun. This technology is protected by multiple US and International
16  * patents. This notice and attribution to Taligent may not be removed.
17  * Taligent is a registered trademark of Taligent, Inc.
18  *
19  */

20
21 package java.text;
22
23 import java.util.ArrayList JavaDoc;
24
25 /**
26  * Utility class for normalizing and merging patterns for collation.
27  * Patterns are strings of the form <entry>*, where <entry> has the
28  * form:
29  * <pattern> := <entry>*
30  * <entry> := <separator><chars>{"/"<extension>}
31  * <separator> := "=", ",", ";", "<", "&"
32  * <chars>, and <extension> are both arbitrary strings.
33  * unquoted whitespaces are ignored.
34  * 'xxx' can be used to quote characters
35  * One difference from Collator is that & is used to reset to a current
36  * point. Or, in other words, it introduces a new sequence which is to
37  * be added to the old.
38  * That is: "a < b < c < d" is the same as "a < b & b < c & c < d" OR
39  * "a < b < d & b < c"
40  * XXX: make '' be a single quote.
41  * @see PatternEntry
42  * @version 1.17 12/19/03
43  * @author Mark Davis, Helena Shih
44  */

45
46 final class MergeCollation {
47
48     /**
49      * Creates from a pattern
50      * @exception ParseException If the input pattern is incorrect.
51      */

52     public MergeCollation(String JavaDoc pattern) throws ParseException JavaDoc
53     {
54         for (int i = 0; i < statusArray.length; i++)
55             statusArray[i] = 0;
56         setPattern(pattern);
57     }
58
59     /**
60      * recovers current pattern
61      */

62     public String JavaDoc getPattern() {
63         return getPattern(true);
64     }
65
66     /**
67      * recovers current pattern.
68      * @param withWhiteSpace puts spacing around the entries, and \n
69      * before & and <
70      */

71     public String JavaDoc getPattern(boolean withWhiteSpace) {
72         StringBuffer JavaDoc result = new StringBuffer JavaDoc();
73         PatternEntry JavaDoc tmp = null;
74         ArrayList JavaDoc extList = null;
75         int i;
76         for (i = 0; i < patterns.size(); ++i) {
77             PatternEntry JavaDoc entry = (PatternEntry JavaDoc) patterns.get(i);
78             if (entry.extension.length() != 0) {
79                 if (extList == null)
80                     extList = new ArrayList JavaDoc();
81                 extList.add(entry);
82             } else {
83                 if (extList != null) {
84                     PatternEntry JavaDoc last = findLastWithNoExtension(i-1);
85                     for (int j = extList.size() - 1; j >= 0 ; j--) {
86                         tmp = (PatternEntry JavaDoc)(extList.get(j));
87                         tmp.addToBuffer(result, false, withWhiteSpace, last);
88                     }
89                     extList = null;
90                 }
91                 entry.addToBuffer(result, false, withWhiteSpace, null);
92             }
93         }
94         if (extList != null) {
95             PatternEntry JavaDoc last = findLastWithNoExtension(i-1);
96             for (int j = extList.size() - 1; j >= 0 ; j--) {
97                 tmp = (PatternEntry JavaDoc)(extList.get(j));
98                 tmp.addToBuffer(result, false, withWhiteSpace, last);
99             }
100             extList = null;
101         }
102         return result.toString();
103     }
104
105     private final PatternEntry JavaDoc findLastWithNoExtension(int i) {
106         for (--i;i >= 0; --i) {
107             PatternEntry JavaDoc entry = (PatternEntry JavaDoc) patterns.get(i);
108             if (entry.extension.length() == 0) {
109                 return entry;
110             }
111         }
112         return null;
113     }
114
115     /**
116      * emits the pattern for collation builder.
117      * @return emits the string in the format understable to the collation
118      * builder.
119      */

120     public String JavaDoc emitPattern() {
121         return emitPattern(true);
122     }
123
124     /**
125      * emits the pattern for collation builder.
126      * @param withWhiteSpace puts spacing around the entries, and \n
127      * before & and <
128      * @return emits the string in the format understable to the collation
129      * builder.
130      */

131     public String JavaDoc emitPattern(boolean withWhiteSpace) {
132         StringBuffer JavaDoc result = new StringBuffer JavaDoc();
133         for (int i = 0; i < patterns.size(); ++i)
134         {
135             PatternEntry JavaDoc entry = (PatternEntry JavaDoc) patterns.get(i);
136             if (entry != null) {
137                 entry.addToBuffer(result, true, withWhiteSpace, null);
138             }
139         }
140         return result.toString();
141     }
142
143     /**
144      * sets the pattern.
145      */

146     public void setPattern(String JavaDoc pattern) throws ParseException JavaDoc
147     {
148         patterns.clear();
149         addPattern(pattern);
150     }
151
152     /**
153      * adds a pattern to the current one.
154      * @param pattern the new pattern to be added
155      */

156     public void addPattern(String JavaDoc pattern) throws ParseException JavaDoc
157     {
158         if (pattern == null)
159             return;
160         
161         PatternEntry.Parser JavaDoc parser = new PatternEntry.Parser JavaDoc(pattern);
162         
163         PatternEntry JavaDoc entry = parser.next();
164         while (entry != null) {
165             fixEntry(entry);
166             entry = parser.next();
167         }
168     }
169
170     /**
171      * gets count of separate entries
172      * @return the size of pattern entries
173      */

174     public int getCount() {
175         return patterns.size();
176     }
177
178     /**
179      * gets count of separate entries
180      * @param index the offset of the desired pattern entry
181      * @return the requested pattern entry
182      */

183     public PatternEntry JavaDoc getItemAt(int index) {
184         return (PatternEntry JavaDoc) patterns.get(index);
185     }
186
187     //============================================================
188
// privates
189
//============================================================
190
ArrayList JavaDoc patterns = new ArrayList JavaDoc(); // a list of PatternEntries
191

192     private transient PatternEntry JavaDoc saveEntry = null;
193     private transient PatternEntry JavaDoc lastEntry = null;
194     
195     // This is really used as a local variable inside fixEntry, but we cache
196
// it here to avoid newing it up every time the method is called.
197
private transient StringBuffer JavaDoc excess = new StringBuffer JavaDoc();
198
199     //
200
// When building a MergeCollation, we need to do lots of searches to see
201
// whether a given entry is already in the table. Since we're using an
202
// array, this would make the algorithm O(N*N). To speed things up, we
203
// use this bit array to remember whether the array contains any entries
204
// starting with each Unicode character. If not, we can avoid the search.
205
// Using BitSet would make this easier, but it's significantly slower.
206
//
207
private transient byte[] statusArray = new byte[8192];
208     private final byte BITARRAYMASK = (byte)0x1;
209     private final int BYTEPOWER = 3;
210     private final int BYTEMASK = (1 << BYTEPOWER) - 1;
211
212     /*
213       If the strength is RESET, then just change the lastEntry to
214       be the current. (If the current is not in patterns, signal an error).
215       If not, then remove the current entry, and add it after lastEntry
216       (which is usually at the end).
217       */

218     private final void fixEntry(PatternEntry JavaDoc newEntry) throws ParseException JavaDoc
219     {
220         // check to see whether the new entry has the same characters as the previous
221
// entry did (this can happen when a pattern declaring a difference between two
222
// strings that are canonically equivalent is normalized). If so, and the strength
223
// is anything other than IDENTICAL or RESET, throw an exception (you can't
224
// declare a string to be unequal to itself). --rtg 5/24/99
225
if (lastEntry != null && newEntry.chars.equals(lastEntry.chars)
226                 && newEntry.extension.equals(lastEntry.extension)) {
227             if (newEntry.strength != Collator.IDENTICAL
228                 && newEntry.strength != PatternEntry.RESET) {
229                     throw new ParseException JavaDoc("The entries " + lastEntry + " and "
230                             + newEntry + " are adjacent in the rules, but have conflicting "
231                             + "strengths: A character can't be unequal to itself.", -1);
232             } else {
233                 // otherwise, just skip this entry and behave as though you never saw it
234
return;
235             }
236         }
237         
238         boolean changeLastEntry = true;
239         if (newEntry.strength != PatternEntry.RESET) {
240             int oldIndex = -1;
241
242             if ((newEntry.chars.length() == 1)) {
243             
244                 char c = newEntry.chars.charAt(0);
245                 int statusIndex = c >> BYTEPOWER;
246                 byte bitClump = statusArray[statusIndex];
247                 byte setBit = (byte)(BITARRAYMASK << (c & BYTEMASK));
248                 
249                 if (bitClump != 0 && (bitClump & setBit) != 0) {
250                     oldIndex = patterns.lastIndexOf(newEntry);
251                 } else {
252                     // We're going to add an element that starts with this
253
// character, so go ahead and set its bit.
254
statusArray[statusIndex] = (byte)(bitClump | setBit);
255                 }
256             } else {
257                 oldIndex = patterns.lastIndexOf(newEntry);
258             }
259             if (oldIndex != -1) {
260                 patterns.remove(oldIndex);
261             }
262             
263             excess.setLength(0);
264             int lastIndex = findLastEntry(lastEntry, excess);
265
266             if (excess.length() != 0) {
267                 newEntry.extension = excess + newEntry.extension;
268                 if (lastIndex != patterns.size()) {
269                     lastEntry = saveEntry;
270                     changeLastEntry = false;
271                 }
272             }
273             if (lastIndex == patterns.size()) {
274                 patterns.add(newEntry);
275                 saveEntry = newEntry;
276             } else {
277                 patterns.add(lastIndex, newEntry);
278             }
279         }
280         if (changeLastEntry) {
281             lastEntry = newEntry;
282         }
283     }
284
285     private final int findLastEntry(PatternEntry JavaDoc entry,
286                               StringBuffer JavaDoc excessChars) throws ParseException JavaDoc
287     {
288         if (entry == null)
289             return 0;
290             
291         if (entry.strength != PatternEntry.RESET) {
292             // Search backwards for string that contains this one;
293
// most likely entry is last one
294

295             int oldIndex = -1;
296             if ((entry.chars.length() == 1)) {
297                 int index = entry.chars.charAt(0) >> BYTEPOWER;
298                 if ((statusArray[index] &
299                     (BITARRAYMASK << (entry.chars.charAt(0) & BYTEMASK))) != 0) {
300                     oldIndex = patterns.lastIndexOf(entry);
301                 }
302             } else {
303                 oldIndex = patterns.lastIndexOf(entry);
304             }
305             if ((oldIndex == -1))
306                 throw new ParseException JavaDoc("couldn't find last entry: "
307                                           + entry, oldIndex);
308             return oldIndex + 1;
309         } else {
310             int i;
311             for (i = patterns.size() - 1; i >= 0; --i) {
312                 PatternEntry JavaDoc e = (PatternEntry JavaDoc) patterns.get(i);
313                 if (e.chars.regionMatches(0,entry.chars,0,
314                                               e.chars.length())) {
315                     excessChars.append(entry.chars.substring(e.chars.length(),
316                                                             entry.chars.length()));
317                     break;
318                 }
319             }
320             if (i == -1)
321                 throw new ParseException JavaDoc("couldn't find: " + entry, i);
322             return i + 1;
323         }
324     }
325 }
326
327
Popular Tags