KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > text > BreakTransliterator


1 /*
2  *******************************************************************************
3  * Copyright (C) 1996-2006, International Business Machines Corporation and *
4  * others. All Rights Reserved. *
5  *******************************************************************************
6  */

7 package com.ibm.icu.text;
8
9 import com.ibm.icu.lang.UCharacter;
10 import com.ibm.icu.util.ULocale;
11
12 import java.text.CharacterIterator JavaDoc;
13
14
15 /**
16  * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter.
17  * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator.
18  */

19 final class BreakTransliterator extends Transliterator {
20     private BreakIterator bi;
21     private String JavaDoc insertion;
22     private int[] boundaries = new int[50];
23     private int boundaryCount = 0;
24
25     public BreakTransliterator(String JavaDoc ID, UnicodeFilter filter, BreakIterator bi, String JavaDoc insertion) {
26         super(ID, filter);
27         this.bi = bi;
28         this.insertion = insertion;
29     }
30
31     public BreakTransliterator(String JavaDoc ID, UnicodeFilter filter) {
32         this(ID, filter, null, " ");
33     }
34
35     public String JavaDoc getInsertion() {
36         return insertion;
37     }
38
39     public void setInsertion(String JavaDoc insertion) {
40         this.insertion = insertion;
41     }
42
43     public BreakIterator getBreakIterator() {
44         // Defer initialization of BreakIterator because it is slow,
45
// typically over 2000 ms.
46
if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH"));
47         return bi;
48     }
49
50     public void setBreakIterator(BreakIterator bi) {
51         this.bi = bi;
52     }
53
54     static final int LETTER_OR_MARK_MASK =
55           (1<<Character.UPPERCASE_LETTER)
56         | (1<<Character.LOWERCASE_LETTER)
57         | (1<<Character.TITLECASE_LETTER)
58         | (1<<Character.MODIFIER_LETTER)
59         | (1<<Character.OTHER_LETTER)
60         | (1<<Character.COMBINING_SPACING_MARK)
61         | (1<<Character.NON_SPACING_MARK)
62         | (1<<Character.ENCLOSING_MARK)
63         ;
64     protected void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
65         boundaryCount = 0;
66         int boundary = 0;
67         getBreakIterator(); // Lazy-create it if necessary
68
bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));
69         // TODO: fix clumsy workaround used below.
70
/*
71         char[] tempBuffer = new char[text.length()];
72         text.getChars(0, text.length(), tempBuffer, 0);
73         bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
74         */

75         // end debugging
76

77         // To make things much easier, we will stack the boundaries, and then insert at the end.
78
// generally, we won't need too many, since we will be filtered.
79

80         for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) {
81             if (boundary == 0) continue;
82             // HACK: Check to see that preceeding item was a letter
83

84             int cp = UTF16.charAt(text, boundary-1);
85             int type = UCharacter.getType(cp);
86             //System.out.println(Integer.toString(cp,16) + " (before): " + type);
87
if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
88
89             cp = UTF16.charAt(text, boundary);
90             type = UCharacter.getType(cp);
91             //System.out.println(Integer.toString(cp,16) + " (after): " + type);
92
if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
93
94             if (boundaryCount >= boundaries.length) { // realloc if necessary
95
int[] temp = new int[boundaries.length * 2];
96                 System.arraycopy(boundaries, 0, temp, 0, boundaries.length);
97                 boundaries = temp;
98             }
99
100             boundaries[boundaryCount++] = boundary;
101             //System.out.println(boundary);
102
}
103
104         int delta = 0;
105         int lastBoundary = 0;
106
107         if (boundaryCount != 0) { // if we found something, adjust
108
delta = boundaryCount * insertion.length();
109             lastBoundary = boundaries[boundaryCount-1];
110
111             // we do this from the end backwards, so that we don't have to keep updating.
112

113             while (boundaryCount > 0) {
114                 boundary = boundaries[--boundaryCount];
115                 text.replace(boundary, boundary, insertion);
116             }
117         }
118
119         // Now fix up the return values
120
pos.contextLimit += delta;
121         pos.limit += delta;
122         pos.start = incremental ? lastBoundary + delta : pos.limit;
123     }
124
125
126     /**
127      * Registers standard variants with the system. Called by
128      * Transliterator during initialization.
129      */

130     static void register() {
131         // false means that it is invisible
132
Transliterator trans = new BreakTransliterator("Any-BreakInternal", null);
133         Transliterator.registerInstance(trans, false);
134         /*
135         Transliterator.registerFactory("Any-Break", new Transliterator.Factory() {
136             public Transliterator getInstance(String ID) {
137                 return new BreakTransliterator("Any-Break", null);
138             }
139         });
140         */

141     }
142
143     // Hack, just to get a real character iterator.
144

145     static final class ReplaceableCharacterIterator implements CharacterIterator JavaDoc
146     {
147         private Replaceable text;
148         private int begin;
149         private int end;
150         // invariant: begin <= pos <= end
151
private int pos;
152
153         /**
154         * Constructs an iterator with an initial index of 0.
155         */

156         public ReplaceableCharacterIterator(Replaceable text)
157         {
158             this(text, 0);
159         }
160
161         /**
162         * Constructs an iterator with the specified initial index.
163         *
164         * @param text The String to be iterated over
165         * @param pos Initial iterator position
166         */

167         public ReplaceableCharacterIterator(Replaceable text, int pos)
168         {
169         this(text, 0, text.length(), pos);
170         }
171
172         /**
173         * Constructs an iterator over the given range of the given string, with the
174         * index set at the specified position.
175         *
176         * @param text The String to be iterated over
177         * @param begin Index of the first character
178         * @param end Index of the character following the last character
179         * @param pos Initial iterator position
180         */

181         public ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos) {
182             if (text == null) {
183                 throw new NullPointerException JavaDoc();
184             }
185             this.text = text;
186
187             if (begin < 0 || begin > end || end > text.length()) {
188                 throw new IllegalArgumentException JavaDoc("Invalid substring range");
189             }
190
191             if (pos < begin || pos > end) {
192                 throw new IllegalArgumentException JavaDoc("Invalid position");
193             }
194
195             this.begin = begin;
196             this.end = end;
197             this.pos = pos;
198         }
199
200         /**
201         * Reset this iterator to point to a new string. This package-visible
202         * method is used by other java.text classes that want to avoid allocating
203         * new ReplaceableCharacterIterator objects every time their setText method
204         * is called.
205         *
206         * @param text The String to be iterated over
207         */

208         public void setText(Replaceable text) {
209             if (text == null) {
210                 throw new NullPointerException JavaDoc();
211             }
212             this.text = text;
213             this.begin = 0;
214             this.end = text.length();
215             this.pos = 0;
216         }
217
218         /**
219         * Implements CharacterIterator.first() for String.
220         * @see CharacterIterator#first
221         */

222         public char first()
223         {
224             pos = begin;
225             return current();
226         }
227
228         /**
229         * Implements CharacterIterator.last() for String.
230         * @see CharacterIterator#last
231         */

232         public char last()
233         {
234             if (end != begin) {
235                 pos = end - 1;
236             } else {
237                 pos = end;
238             }
239             return current();
240         }
241
242         /**
243         * Implements CharacterIterator.setIndex() for String.
244         * @see CharacterIterator#setIndex
245         */

246         public char setIndex(int p)
247         {
248         if (p < begin || p > end) {
249                 throw new IllegalArgumentException JavaDoc("Invalid index");
250         }
251             pos = p;
252             return current();
253         }
254
255         /**
256         * Implements CharacterIterator.current() for String.
257         * @see CharacterIterator#current
258         */

259         public char current()
260         {
261             if (pos >= begin && pos < end) {
262                 return text.charAt(pos);
263             }
264             else {
265                 return DONE;
266             }
267         }
268
269         /**
270         * Implements CharacterIterator.next() for String.
271         * @see CharacterIterator#next
272         */

273         public char next()
274         {
275             if (pos < end - 1) {
276                 pos++;
277                 return text.charAt(pos);
278             }
279             else {
280                 pos = end;
281                 return DONE;
282             }
283         }
284
285         /**
286         * Implements CharacterIterator.previous() for String.
287         * @see CharacterIterator#previous
288         */

289         public char previous()
290         {
291             if (pos > begin) {
292                 pos--;
293                 return text.charAt(pos);
294             }
295             else {
296                 return DONE;
297             }
298         }
299
300         /**
301         * Implements CharacterIterator.getBeginIndex() for String.
302         * @see CharacterIterator#getBeginIndex
303         */

304         public int getBeginIndex()
305         {
306             return begin;
307         }
308
309         /**
310         * Implements CharacterIterator.getEndIndex() for String.
311         * @see CharacterIterator#getEndIndex
312         */

313         public int getEndIndex()
314         {
315             return end;
316         }
317
318         /**
319         * Implements CharacterIterator.getIndex() for String.
320         * @see CharacterIterator#getIndex
321         */

322         public int getIndex()
323         {
324             return pos;
325         }
326
327         /**
328         * Compares the equality of two ReplaceableCharacterIterator objects.
329         * @param obj the ReplaceableCharacterIterator object to be compared with.
330         * @return true if the given obj is the same as this
331         * ReplaceableCharacterIterator object; false otherwise.
332         */

333         public boolean equals(Object JavaDoc obj)
334         {
335             if (this == obj) {
336                 return true;
337             }
338             if (!(obj instanceof ReplaceableCharacterIterator)) {
339                 return false;
340             }
341
342             ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj;
343
344             if (hashCode() != that.hashCode()) {
345                 return false;
346             }
347             if (!text.equals(that.text)) {
348                 return false;
349             }
350             if (pos != that.pos || begin != that.begin || end != that.end) {
351                 return false;
352             }
353             return true;
354         }
355
356         /**
357         * Computes a hashcode for this iterator.
358         * @return A hash code
359         */

360         public int hashCode()
361         {
362             return text.hashCode() ^ pos ^ begin ^ end;
363         }
364
365         /**
366         * Creates a copy of this iterator.
367         * @return A copy of this
368         */

369         public Object JavaDoc clone()
370         {
371             try {
372                 ReplaceableCharacterIterator other
373                 = (ReplaceableCharacterIterator) super.clone();
374                 return other;
375             }
376             catch (CloneNotSupportedException JavaDoc e) {
377                 throw new IllegalStateException JavaDoc();
378             }
379         }
380
381     }
382
383 }
384
Popular Tags