KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > text > StringReplacer


1 /*
2 **********************************************************************
3 * Copyright (c) 2002, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 01/14/2002 aliu Creation.
8 **********************************************************************
9 */

10
11 package com.ibm.icu.text;
12 import com.ibm.icu.impl.Utility;
13
14 /**
15  * A replacer that produces static text as its output. The text may
16  * contain transliterator stand-in characters that represent nested
17  * UnicodeReplacer objects, making it possible to encode a tree of
18  * replacers in a StringReplacer. A StringReplacer that contains such
19  * stand-ins is called a <em>complex</em> StringReplacer. A complex
20  * StringReplacer has a slower processing loop than a non-complex one.
21  * @author Alan Liu
22  */

23 class StringReplacer implements UnicodeReplacer {
24
25     /**
26      * Output text, possibly containing stand-in characters that
27      * represent nested UnicodeReplacers.
28      */

29     private String JavaDoc output;
30
31     /**
32      * Cursor position. Value is ignored if hasCursor is false.
33      */

34     private int cursorPos;
35
36     /**
37      * True if this object outputs a cursor position.
38      */

39     private boolean hasCursor;
40
41     /**
42      * A complex object contains nested replacers and requires more
43      * complex processing. StringReplacers are initially assumed to
44      * be complex. If no nested replacers are seen during processing,
45      * then isComplex is set to false, and future replacements are
46      * short circuited for better performance.
47      */

48     private boolean isComplex;
49
50     /**
51      * Object that translates stand-in characters in 'output' to
52      * UnicodeReplacer objects.
53      */

54     private final RuleBasedTransliterator.Data data;
55
56     /**
57      * Construct a StringReplacer that sets the emits the given output
58      * text and sets the cursor to the given position.
59      * @param theOutput text that will replace input text when the
60      * replace() method is called. May contain stand-in characters
61      * that represent nested replacers.
62      * @param theCursorPos cursor position that will be returned by
63      * the replace() method
64      * @param theData transliterator context object that translates
65      * stand-in characters to UnicodeReplacer objects
66      */

67     public StringReplacer(String JavaDoc theOutput,
68                           int theCursorPos,
69                           RuleBasedTransliterator.Data theData) {
70         output = theOutput;
71         cursorPos = theCursorPos;
72         hasCursor = true;
73         data = theData;
74         isComplex = true;
75     }
76
77     /**
78      * Construct a StringReplacer that sets the emits the given output
79      * text and does not modify the cursor.
80      * @param theOutput text that will replace input text when the
81      * replace() method is called. May contain stand-in characters
82      * that represent nested replacers.
83      * @param theData transliterator context object that translates
84      * stand-in characters to UnicodeReplacer objects
85      */

86     public StringReplacer(String JavaDoc theOutput,
87                           RuleBasedTransliterator.Data theData) {
88         output = theOutput;
89         cursorPos = 0;
90         hasCursor = false;
91         data = theData;
92         isComplex = true;
93     }
94
95 //= public static UnicodeReplacer valueOf(String output,
96
//= int cursorPos,
97
//= RuleBasedTransliterator.Data data) {
98
//= if (output.length() == 1) {
99
//= char c = output.charAt(0);
100
//= UnicodeReplacer r = data.lookupReplacer(c);
101
//= if (r != null) {
102
//= return r;
103
//= }
104
//= }
105
//= return new StringReplacer(output, cursorPos, data);
106
//= }
107

108     /**
109      * UnicodeReplacer API
110      */

111     public int replace(Replaceable text,
112                        int start,
113                        int limit,
114                        int[] cursor) {
115         int outLen;
116         int newStart = 0;
117
118         // NOTE: It should be possible to _always_ run the complex
119
// processing code; just slower. If not, then there is a bug
120
// in the complex processing code.
121

122         // Simple (no nested replacers) Processing Code :
123
if (!isComplex) {
124             text.replace(start, limit, output);
125             outLen = output.length();
126
127             // Setup default cursor position (for cursorPos within output)
128
newStart = cursorPos;
129         }
130
131         // Complex (nested replacers) Processing Code :
132
else {
133             /* When there are segments to be copied, use the Replaceable.copy()
134              * API in order to retain out-of-band data. Copy everything to the
135              * end of the string, then copy them back over the key. This preserves
136              * the integrity of indices into the key and surrounding context while
137              * generating the output text.
138              */

139             StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
140             int oOutput; // offset into 'output'
141
isComplex = false;
142
143             // The temporary buffer starts at tempStart, and extends
144
// to destLimit + tempExtra. The start of the buffer has a single
145
// character from before the key. This provides style
146
// data when addition characters are filled into the
147
// temporary buffer. If there is nothing to the left, use
148
// the non-character U+FFFF, which Replaceable subclasses
149
// should treat specially as a "no-style character."
150
// destStart points to the point after the style context
151
// character, so it is tempStart+1 or tempStart+2.
152
int tempStart = text.length(); // start of temp buffer
153
int destStart = tempStart; // copy new text to here
154
if (start > 0) {
155                 int len = UTF16.getCharCount(text.char32At(start-1));
156                 text.copy(start-len, start, tempStart);
157                 destStart += len;
158             } else {
159                 text.replace(tempStart, tempStart, "\uFFFF");
160                 destStart++;
161             }
162             int destLimit = destStart;
163             int tempExtra = 0; // temp chars after destLimit
164

165             for (oOutput=0; oOutput<output.length(); ) {
166                 if (oOutput == cursorPos) {
167                     // Record the position of the cursor
168
newStart = destLimit - destStart; // relative to start
169
}
170                 int c = UTF16.charAt(output, oOutput);
171
172                 // When we are at the last position copy the right style
173
// context character into the temporary buffer. We don't
174
// do this before because it will provide an incorrect
175
// right context for previous replace() operations.
176
int nextIndex = oOutput + UTF16.getCharCount(c);
177                 if (nextIndex == output.length()) {
178                     tempExtra = UTF16.getCharCount(text.char32At(limit));
179                     text.copy(limit, limit+tempExtra, destLimit);
180                 }
181
182                 UnicodeReplacer r = data.lookupReplacer(c);
183                 if (r == null) {
184                     // Accumulate straight (non-segment) text.
185
UTF16.append(buf, c);
186                 } else {
187                     isComplex = true;
188
189                     // Insert any accumulated straight text.
190
if (buf.length() > 0) {
191                         text.replace(destLimit, destLimit, buf.toString());
192                         destLimit += buf.length();
193                         buf.setLength(0);
194                     }
195
196                     // Delegate output generation to replacer object
197
int len = r.replace(text, destLimit, destLimit, cursor);
198                     destLimit += len;
199                 }
200                 oOutput = nextIndex;
201             }
202             // Insert any accumulated straight text.
203
if (buf.length() > 0) {
204                 text.replace(destLimit, destLimit, buf.toString());
205                 destLimit += buf.length();
206             }
207             if (oOutput == cursorPos) {
208                 // Record the position of the cursor
209
newStart = destLimit - destStart; // relative to start
210
}
211
212             outLen = destLimit - destStart;
213
214             // Copy new text to start, and delete it
215
text.copy(destStart, destLimit, start);
216             text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
217
218             // Delete the old text (the key)
219
text.replace(start + outLen, limit + outLen, "");
220         }
221
222         if (hasCursor) {
223             // Adjust the cursor for positions outside the key. These
224
// refer to code points rather than code units. If cursorPos
225
// is within the output string, then use newStart, which has
226
// already been set above.
227
if (cursorPos < 0) {
228                 newStart = start;
229                 int n = cursorPos;
230                 // Outside the output string, cursorPos counts code points
231
while (n < 0 && newStart > 0) {
232                     newStart -= UTF16.getCharCount(text.char32At(newStart-1));
233                     ++n;
234                 }
235                 newStart += n;
236             } else if (cursorPos > output.length()) {
237                 newStart = start + outLen;
238                 int n = cursorPos - output.length();
239                 // Outside the output string, cursorPos counts code points
240
while (n > 0 && newStart < text.length()) {
241                     newStart += UTF16.getCharCount(text.char32At(newStart));
242                     --n;
243                 }
244                 newStart += n;
245             } else {
246                 // Cursor is within output string. It has been set up above
247
// to be relative to start.
248
newStart += start;
249             }
250
251             cursor[0] = newStart;
252         }
253
254         return outLen;
255     }
256
257     /**
258      * UnicodeReplacer API
259      */

260     public String JavaDoc toReplacerPattern(boolean escapeUnprintable) {
261         StringBuffer JavaDoc rule = new StringBuffer JavaDoc();
262         StringBuffer JavaDoc quoteBuf = new StringBuffer JavaDoc();
263
264         int cursor = cursorPos;
265
266         // Handle a cursor preceding the output
267
if (hasCursor && cursor < 0) {
268             while (cursor++ < 0) {
269                 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
270             }
271             // Fall through and append '|' below
272
}
273
274         for (int i=0; i<output.length(); ++i) {
275             if (hasCursor && i == cursor) {
276                 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
277             }
278             char c = output.charAt(i); // Ok to use 16-bits here
279

280             UnicodeReplacer r = data.lookupReplacer(c);
281             if (r == null) {
282                 Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
283             } else {
284                 StringBuffer JavaDoc buf = new StringBuffer JavaDoc(" ");
285                 buf.append(r.toReplacerPattern(escapeUnprintable));
286                 buf.append(' ');
287                 Utility.appendToRule(rule, buf.toString(),
288                                      true, escapeUnprintable, quoteBuf);
289             }
290         }
291
292         // Handle a cursor after the output. Use > rather than >= because
293
// if cursor == output.length() it is at the end of the output,
294
// which is the default position, so we need not emit it.
295
if (hasCursor && cursor > output.length()) {
296             cursor -= output.length();
297             while (cursor-- > 0) {
298                 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
299             }
300             Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
301         }
302         // Flush quoteBuf out to result
303
Utility.appendToRule(rule, -1,
304                              true, escapeUnprintable, quoteBuf);
305
306         return rule.toString();
307     }
308
309     /**
310      * Union the set of all characters that may output by this object
311      * into the given set.
312      * @param toUnionTo the set into which to union the output characters
313      */

314     public void addReplacementSetTo(UnicodeSet toUnionTo) {
315         int ch;
316         for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
317             ch = UTF16.charAt(output, i);
318             UnicodeReplacer r = data.lookupReplacer(ch);
319             if (r == null) {
320                 toUnionTo.add(ch);
321             } else {
322                 r.addReplacementSetTo(toUnionTo);
323             }
324         }
325     }
326 }
327
328 //eof
329
Popular Tags