KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > impl > PrettyPrinter


1 /**
2  *******************************************************************************
3  * Copyright (C) 1996-2006, international Business Machines Corporation and *
4  * others. All Rights Reserved. *
5  *******************************************************************************
6  */

7
8 package com.ibm.icu.impl;
9
10 import java.util.Comparator JavaDoc;
11 import java.util.Iterator JavaDoc;
12 import java.util.Set JavaDoc;
13 import java.util.TreeSet JavaDoc;
14
15 import com.ibm.icu.impl.CollectionUtilities.MultiComparator;
16 import com.ibm.icu.lang.UCharacter;
17 import com.ibm.icu.text.Collator;
18 import com.ibm.icu.text.RuleBasedCollator;
19 import com.ibm.icu.text.Transliterator;
20 import com.ibm.icu.text.UTF16;
21 import com.ibm.icu.text.UnicodeSet;
22 import com.ibm.icu.text.UnicodeSetIterator;
23 import com.ibm.icu.util.ULocale;
24
25 /** Provides more flexible formatting of UnicodeSet patterns.
26  */

27 public class PrettyPrinter {
28     private static final UnicodeSet patternWhitespace = (UnicodeSet) new UnicodeSet("[[:Cn:][:Default_Ignorable_Code_Point:][:patternwhitespace:]]").freeze();
29     private static final UnicodeSet sortAtEnd = (UnicodeSet) new UnicodeSet("[[:Cn:][:Cs:][:Co:][:Ideographic:]]").freeze();
30     
31     private boolean first = true;
32     private StringBuffer JavaDoc target = new StringBuffer JavaDoc();
33     private int firstCodePoint = -2;
34     private int lastCodePoint = -2;
35     private boolean compressRanges = true;
36     private String JavaDoc lastString = "";
37     private UnicodeSet toQuote = new UnicodeSet(patternWhitespace);
38     private Transliterator quoter = null;
39     
40     private Comparator JavaDoc ordering;
41     private Comparator JavaDoc spaceComp = Collator.getInstance(ULocale.ROOT);
42     {
43         setOrdering(Collator.getInstance(ULocale.ROOT));
44         ((RuleBasedCollator)spaceComp).setStrength(RuleBasedCollator.PRIMARY);
45     }
46     
47     public Transliterator getQuoter() {
48         return quoter;
49     }
50
51     public PrettyPrinter setQuoter(Transliterator quoter) {
52         this.quoter = quoter;
53         return this; // for chaining
54
}
55
56     public boolean isCompressRanges() {
57         return compressRanges;
58     }
59     
60     /**
61      * @param compressRanges if you want abcde instead of a-e, make this false
62      * @return
63      */

64     public PrettyPrinter setCompressRanges(boolean compressRanges) {
65         this.compressRanges = compressRanges;
66         return this;
67     }
68     
69     public Comparator JavaDoc getOrdering() {
70         return ordering;
71     }
72     
73     /**
74      * @param ordering the resulting ordering of the list of characters in the pattern
75      * @return
76      */

77     public PrettyPrinter setOrdering(Comparator JavaDoc ordering) {
78         this.ordering = new MultiComparator(new Comparator JavaDoc[] {ordering, new UTF16.StringComparator(true,false,0)});
79         return this;
80     }
81     
82     public Comparator JavaDoc getSpaceComparator() {
83         return spaceComp;
84     }
85     
86     /**
87      * @param spaceComp if the comparison returns non-zero, then a space will be inserted between characters
88      * @return this, for chaining
89      */

90     public PrettyPrinter setSpaceComparator(Comparator JavaDoc spaceComp) {
91         this.spaceComp = spaceComp;
92         return this;
93     }
94     
95     public UnicodeSet getToQuote() {
96         return toQuote;
97     }
98     
99     /**
100      * a UnicodeSet of extra characters to quote with \\uXXXX-style escaping (will automatically quote pattern whitespace)
101      * @param toQuote
102      */

103     public PrettyPrinter setToQuote(UnicodeSet toQuote) {
104         toQuote = (UnicodeSet)toQuote.clone();
105         toQuote.addAll(patternWhitespace);
106         this.toQuote = toQuote;
107         return this;
108     }
109         
110     /**
111      * Get the pattern for a particular set.
112      * @param uset
113      * @return formatted UnicodeSet
114      */

115     public String JavaDoc toPattern(UnicodeSet uset) {
116         first = true;
117         UnicodeSet putAtEnd = new UnicodeSet(uset).retainAll(sortAtEnd); // remove all the unassigned gorp for now
118
// make sure that comparison separates all strings, even canonically equivalent ones
119
Set JavaDoc orderedStrings = new TreeSet JavaDoc(ordering);
120         for (UnicodeSetIterator it = new UnicodeSetIterator(uset); it.nextRange();) {
121             if (it.codepoint == it.IS_STRING) {
122                 orderedStrings.add(it.string);
123             } else {
124                 for (int i = it.codepoint; i <= it.codepointEnd; ++i) {
125                     if (!putAtEnd.contains(i)) {
126                         orderedStrings.add(UTF16.valueOf(i));
127                     }
128                 }
129             }
130         }
131         target.setLength(0);
132         target.append("[");
133         for (Iterator JavaDoc it = orderedStrings.iterator(); it.hasNext();) {
134             appendUnicodeSetItem((String JavaDoc) it.next());
135         }
136         for (UnicodeSetIterator it = new UnicodeSetIterator(putAtEnd); it.next();) { // add back the unassigned gorp
137
appendUnicodeSetItem(it.codepoint);
138         }
139         flushLast();
140         target.append("]");
141         String JavaDoc sresult = target.toString();
142         
143         // double check the results. This can be removed once we have more tests.
144
// try {
145
// UnicodeSet doubleCheck = new UnicodeSet(sresult);
146
// if (!uset.equals(doubleCheck)) {
147
// throw new IllegalStateException("Failure to round-trip in pretty-print " + uset + " => " + sresult + "\r\n source-result: " + new UnicodeSet(uset).removeAll(doubleCheck) + "\r\n result-source: " + new UnicodeSet(doubleCheck).removeAll(uset));
148
// }
149
// } catch (RuntimeException e) {
150
// throw (RuntimeException) new IllegalStateException("Failure to round-trip in pretty-print " + uset).initCause(e);
151
// }
152
return sresult;
153     }
154     
155     private PrettyPrinter appendUnicodeSetItem(String JavaDoc s) {
156         int cp;
157         if (UTF16.hasMoreCodePointsThan(s, 1)) {
158             flushLast();
159             addSpace(s);
160             target.append("{");
161             for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
162                 appendQuoted(cp = UTF16.charAt(s, i));
163             }
164             target.append("}");
165             lastString = s;
166         } else {
167             appendUnicodeSetItem(UTF16.charAt(s, 0));
168         }
169         return this;
170     }
171
172     private void appendUnicodeSetItem(int cp) {
173         if (!compressRanges)
174             flushLast();
175         if (cp == lastCodePoint + 1) {
176             lastCodePoint = cp; // continue range
177
} else { // start range
178
flushLast();
179             firstCodePoint = lastCodePoint = cp;
180         }
181     }
182     /**
183      *
184      */

185     private void addSpace(String JavaDoc s) {
186         if (first) {
187             first = false;
188         } else if (spaceComp.compare(s, lastString) != 0) {
189             target.append(' ');
190         } else {
191             int cp = UTF16.charAt(s,0);
192             int type = UCharacter.getType(cp);
193             if (type == UCharacter.NON_SPACING_MARK || type == UCharacter.ENCLOSING_MARK) {
194                 target.append(' ');
195             } else if (type == UCharacter.SURROGATE && cp >= UTF16.TRAIL_SURROGATE_MIN_VALUE) {
196                 target.append(' '); // make sure we don't accidentally merge two surrogates
197
}
198         }
199     }
200     
201     private void flushLast() {
202         if (lastCodePoint >= 0) {
203             addSpace(UTF16.valueOf(firstCodePoint));
204             if (firstCodePoint != lastCodePoint) {
205                 appendQuoted(firstCodePoint);
206                 target.append(firstCodePoint + 1 == lastCodePoint ? ' ' : '-');
207             }
208             appendQuoted(lastCodePoint);
209             lastString = UTF16.valueOf(lastCodePoint);
210             firstCodePoint = lastCodePoint = -2;
211         }
212     }
213     PrettyPrinter appendQuoted(int codePoint) {
214         if (toQuote.contains(codePoint)) {
215             if (quoter != null) {
216                 target.append(quoter.transliterate(UTF16.valueOf(codePoint)));
217                 return this;
218             }
219             if (codePoint > 0xFFFF) {
220                 target.append("\\U");
221                 target.append(Utility.hex(codePoint,8));
222             } else {
223                 target.append("\\u");
224                 target.append(Utility.hex(codePoint,4));
225             }
226             return this;
227         }
228         switch (codePoint) {
229         case '[': // SET_OPEN:
230
case ']': // SET_CLOSE:
231
case '-': // HYPHEN:
232
case '^': // COMPLEMENT:
233
case '&': // INTERSECTION:
234
case '\\': //BACKSLASH:
235
case '{':
236         case '}':
237         case '$':
238         case ':':
239             target.append('\\');
240             break;
241         default:
242             // Escape whitespace
243
if (patternWhitespace.contains(codePoint)) {
244                 target.append('\\');
245             }
246         break;
247         }
248         UTF16.append(target, codePoint);
249         return this;
250     }
251 // Appender append(String s) {
252
// target.append(s);
253
// return this;
254
// }
255
// public String toString() {
256
// return target.toString();
257
// }
258
}
259
Popular Tags