KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > fop > util > CharUtilities


1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18 /* $Id: CharUtilities.java 426576 2006-07-28 15:44:37Z jeremias $ */
19
20 package org.apache.fop.util;
21
22 /**
23  * This class provides utilities to distinguish various kinds of Unicode
24  * whitespace and to get character widths in a given FontState.
25  */

26 public class CharUtilities {
27
28     /**
29      * Character code used to signal a character boundary in
30      * inline content, such as an inline with borders and padding
31      * or a nested block object.
32      */

33     public static final char CODE_EOT = 0;
34
35     /**
36      * Character class: Unicode white space
37      */

38     public static final int UCWHITESPACE = 0;
39     /**
40      * Character class: Line feed
41      */

42     public static final int LINEFEED = 1;
43     /**
44      * Character class: Boundary between text runs
45      */

46     public static final int EOT = 2;
47     /**
48      * Character class: non-whitespace
49      */

50     public static final int NONWHITESPACE = 3;
51     /**
52      * Character class: XML whitespace
53      */

54     public static final int XMLWHITESPACE = 4;
55
56
57     /** normal space */
58     public static final char SPACE = '\u0020';
59     /** non-breaking space */
60     public static final char NBSPACE = '\u00A0';
61     /** zero-width space */
62     public static final char ZERO_WIDTH_SPACE = '\u200B';
63     /** zero-width no-break space (= byte order mark) */
64     public static final char ZERO_WIDTH_NOBREAK_SPACE = '\uFEFF';
65     /** soft hyphen */
66     public static final char SOFT_HYPHEN = '\u00AD';
67     
68     
69     /**
70      * Utility class: Constructor prevents instantiating when subclassed.
71      */

72     protected CharUtilities() {
73         throw new UnsupportedOperationException JavaDoc();
74     }
75
76     /**
77      * Return the appropriate CharClass constant for the type
78      * of the passed character.
79      * @param c character to inspect
80      * @return the determined character class
81      */

82     public static int classOf(char c) {
83         if (c == CODE_EOT) { return EOT; }
84         if (c == '\n') { return LINEFEED; }
85         if (c == ' ' || c == '\r' || c == '\t') { return XMLWHITESPACE; }
86         if (isAnySpace(c)) { return UCWHITESPACE; }
87         return NONWHITESPACE;
88     }
89
90
91     /**
92      * Helper method to determine if the character is a
93      * space with normal behavior. Normal behavior means that
94      * it's not non-breaking.
95      * @param c character to inspect
96      * @return True if the character is a normal space
97      */

98     public static boolean isBreakableSpace(char c) {
99         return (c == SPACE || isFixedWidthSpace(c));
100     }
101     
102     /**
103      * Method to determine if the character is a (breakable) fixed-width space.
104      * @param c the character to check
105      * @return true if the character has a fixed-width
106      */

107     public static boolean isFixedWidthSpace(char c) {
108         return (c >= '\u2000' && c <= '\u200B') || c == '\u3000';
109 // c == '\u2000' // en quad
110
// c == '\u2001' // em quad
111
// c == '\u2002' // en space
112
// c == '\u2003' // em space
113
// c == '\u2004' // three-per-em space
114
// c == '\u2005' // four--per-em space
115
// c == '\u2006' // six-per-em space
116
// c == '\u2007' // figure space
117
// c == '\u2008' // punctuation space
118
// c == '\u2009' // thin space
119
// c == '\u200A' // hair space
120
// c == '\u200B' // zero width space
121
// c == '\u3000' // ideographic space
122
}
123     
124     /**
125      * Method to determine if the character is a nonbreaking
126      * space.
127      * @param c character to check
128      * @return True if the character is a nbsp
129      */

130     public static boolean isNonBreakableSpace(char c) {
131         return
132             (c == NBSPACE // no-break space
133
|| c == '\u202F' // narrow no-break space
134
|| c == '\u3000' // ideographic space
135
|| c == ZERO_WIDTH_NOBREAK_SPACE); // zero width no-break space
136
}
137
138     /**
139      * Method to determine if the character is an adjustable
140      * space.
141      * @param c character to check
142      * @return True if the character is adjustable
143      */

144     public static boolean isAdjustableSpace(char c) {
145         //TODO: are there other kinds of adjustable spaces?
146
return
147             (c == '\u0020' // normal space
148
|| c == NBSPACE); // no-break space
149
}
150     
151     /**
152      * Determines if the character represents any kind of space.
153      * @param c character to check
154      * @return True if the character represents any kind of space
155      */

156     public static boolean isAnySpace(char c) {
157         boolean ret = (isBreakableSpace(c) || isNonBreakableSpace(c));
158         return ret;
159     }
160     
161     /**
162      * Indicates whether a character is classified as "Alphabetic" by the Unicode standard.
163      * @param ch the character
164      * @return true if the character is "Alphabetic"
165      */

166     public static boolean isAlphabetic(char ch) {
167         //http://www.unicode.org/Public/UNIDATA/UCD.html#Alphabetic
168
//Generated from: Other_Alphabetic + Lu + Ll + Lt + Lm + Lo + Nl
169
int generalCategory = Character.getType(ch);
170         switch (generalCategory) {
171         case Character.UPPERCASE_LETTER: //Lu
172
case Character.LOWERCASE_LETTER: //Ll
173
case Character.TITLECASE_LETTER: //Lt
174
case Character.MODIFIER_LETTER: //Lm
175
case Character.OTHER_LETTER: //Lo
176
case Character.LETTER_NUMBER: //Nl
177
return true;
178         default:
179             //TODO if (ch in Other_Alphabetic) return true; (Probably need ICU4J for that)
180
//Other_Alphabetic contains mostly more exotic characters
181
return false;
182         }
183     }
184     
185 }
186
187
Popular Tags