KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > utils > TextHtml


1 //
2
// ____.
3
// __/\ ______| |__/\. _______
4
// __ .____| | \ | +----+ \
5
// _______| /--| | | - \ _ | : - \_________
6
// \\______: :---| : : | : | \________>
7
// |__\---\_____________:______: :____|____:_____\
8
// /_____|
9
//
10
// . . . i n j a h i a w e t r u s t . . .
11
//
12
//
13
// TextHtml
14
//
15
// 23.10.2001 MAP Initial release
16
// 24.01.2002 MAP Method "text2html()".
17
// Check for the ordinal value of char before addressing
18
// "symbolicCode" array. Non ISO8859-1 char.
19
//
20
//
21

22 package org.jahia.utils;
23
24 import java.util.Arrays JavaDoc;
25
26
27 /**
28  * This class convert a text in an HTML text format with symbolic code (&xxxx;),
29  * it also convert a given HTML text format which contain symbolic code to text.
30  * @version 1.1
31  * @author Philippe Martin
32  */

33 public class TextHtml {
34     /**
35      * Method text2html: Convert a text to an HTML format.
36      *
37      * @param text: The original text string
38      * @return The converted HTML text including symbolic codes string
39      */

40     public static String JavaDoc text2html(String JavaDoc text) {
41         if (text == null)
42             return text;
43         StringBuffer JavaDoc t = new StringBuffer JavaDoc(text.length() + 10); // 10 is just a test value, could be anything, should affect performance
44
for (int i = 0; i < text.length(); i++) {
45             char c = text.charAt(i);
46             // Check for non ISO8859-1 characters
47
if ((int)c < symbolicCode.length) { // Maybe slower than "(int)c & 0xFF != 0" but more evolutive
48
String JavaDoc sc = symbolicCode[(int)c];
49                 if ("".equals(sc)) {
50                     t = t.append(c);
51                 }
52                 else {
53                     t = t.append(sc);
54                 }
55             }
56             else {
57                 t = t.append(c);
58             }
59         }
60         return t.toString();
61     }
62
63     /**
64      * Method html2text: Convert an HTML text format to a normal text format.
65      *
66      * @param text: The original HTML text string
67      * @return The converted text without symbolic codes string
68      */

69     public static String JavaDoc html2text(String JavaDoc text) {
70         if (text == null)
71             return text;
72         StringBuffer JavaDoc t = new StringBuffer JavaDoc(text.length());
73         for (int i = 0; i < text.length(); i++) {
74             char c = text.charAt(i);
75             if (c == '&') {
76                 String JavaDoc code = String.valueOf(c);
77                 do {
78                     if (++i >= text.length())
79                         break;
80                     if (text.charAt(i) == '&') {
81                         i--;
82                         break;
83                     }
84                     code += text.charAt(i);
85                 } while (text.charAt(i) != ';');
86                 int index = Arrays.binarySearch(sortedSymbolicCode,
87                                               new NumericSymbolicCode(code, 0));
88                 // Does the extracting code correspond to something ?
89
if (index >= 0) {
90                     t = t.append((char)sortedSymbolicCode[index].getNumericCode());
91                 }
92                 else {
93                     t = t.append(code);
94                 }
95             }
96             else {
97                 t = t.append(c);
98             }
99         }
100         return t.toString();
101     }
102
103     /**
104      * Array of symbolic code order by numeric code ! <br>
105      * The symbolic codes and their position correspond to the ISO 8859-1 set
106      * of char. The empty definitions mean that there is no symbolic codes for
107      * that character or this symbolic code is not used.
108      */

109     private static final String JavaDoc[] symbolicCode = {
110         // 0
111
"", "", "", "", "", "", "", "", "", "",
112         // 10
113
"", "", "", "", "", "", "", "", "", "",
114         // 20
115
"", "", "", "", "",
116         "&#25;", // yen sign
117
"", "", "", "",
118         // 30
119
"", "", "", "",
120         "&quot;", // quotation mark
121
"", "", "", "", "", // unfortunately &apos; is not supported in HTML 4, only XHTML 1.0
122
// 40
123
"", "", "", "", "", "", "", "", "", "",
124         // 50
125
"", "", "", "", "", "", "", "", "", "",
126         // 60
127
"", "", "", "",
128         "&#64;", // commercial at
129
"", "", "", "", "",
130         // 70
131
"", "", "", "", "", "", "", "", "", "",
132         // 80
133
"", "", "", "", "", "", "", "", "", "",
134         // 90
135
"", "", "", "", "", "",
136         "&#96;", // grave accent
137
"", "", "",
138         // 100
139
"", "", "", "", "", "", "", "", "", "",
140         // 110-130
141
"", "", "", "", "", "", "", "", "", "",
142         "", "", "", "", "", "", "", "", "", "",
143         "", "", "", "", "", "", "", "", "", "",
144         // 140
145
"", "", "", "", "", "",
146         "&#146;", // other apostrophe
147
"", "", "",
148         // 150
149
"", "", "", "", "", "", "", "", "", "",
150         // 160
151
"", // non breaking space (should be &nbsp;)
152
"&iexcl;", // invertedexclamation sign
153
"&cent;", // cent sign
154
"&pound;", // pound sterling sign
155
"&curren;", // general currency sign
156
"&yen;", // yen sign
157
"&brvbar;", // broken vertical bar
158
"&sect;", // section sign (legal)
159
"&uml;", // umlaut (dieresis)
160
"&copy;", // copyright
161
// 170
162
"&ordf;", // feminine ordinal
163
"&laquo;", // guillemot left
164
"&not;", // not sign
165
"&shy;", // soft hyphen
166
"&reg;", // registered trademark
167
"&macr;", // macron accent
168
"&deg;", // degree sign
169
"&plusmn;", // plus or minus
170
"&sup2;", // raised to square(superscript two)
171
"&sup3;", // superscript three
172
// 180
173
"&acute;", // acute accent
174
"&micro;", // micron sign
175
"&para", // paragraph sign, Pi
176
"&middot;", // middle dot
177
"&cedil;", // cedilla mark
178
"&supl;", // raised to one(superscript one)
179
"&ordm;", // masculine ordinal
180
"&raquo;", // guillemot right
181
"&frac14;", // one-forth fraction
182
"&frac12;", // half fraction
183
// 190
184
"&frac34;", // three-forths fraction
185
"&iquest;", // inverted question mark
186
"&Agrave;", // A with grave accent
187
"&Aacute;", // A with acute accent
188
"&Acirc;", // A with circumflex accent
189
"&Atilde;", // A with tilde accent
190
"&Auml;", // A with angstrom
191
"&Aring;", // A with umlaut mark
192
"&AElig;", // AE dipthong (ligature)
193
"&Ccedil;", // C with cedilla mark
194
// 200
195
"&Egrave;", // E with grave accent
196
"&Eacute;", // E with acute accent
197
"&Ecirc;", // E with circumflex accent
198
"&Euml;", // E with umlaut mark
199
"&Igrave;", // I with grave accent
200
"&Iacute;", // I with acute accent
201
"&Icirc;", // I with circumflex accent
202
"&Iuml;", // I with umlaut mark
203
"&ETH;", // Icelandic Capital Eth
204
"&Ntilde;", // N with tilde accent
205
// 210
206
"&Ograve;", // O with grave accent
207
"&Oacute;", // O with acute accent
208
"&Ocirc;", // O with circumflex accent
209
"&Otilde;", // O with tilde accent
210
"&Ouml;", // O with umlaut mark
211
"&times;", // multiply sign
212
"&Oslash;", // O slash
213
"&Ugrave;", // U with grave accent
214
"&Uacute;", // U with acute accent
215
"&Ucirc;", // U with circumflex accent
216
// 220
217
"&Uuml;", // U with umlaut mark
218
"&Yacute;", // Y with acute accent
219
"&THORN;", // Icelandic Capital Thorn
220
"&szlig;", // small sharp s(sz ligature)
221
"&agrave;", // a with grave accent
222
"&aacute;", // a with acute accent
223
"&acirc;", // a with circumflex accent
224
"&atilde;", // a with tilde accent
225
"&auml;", // a with angstrom
226
"&aring;", // a with umlaut mark
227
// 230
228
"&aelig;", // ae dipthong (ligature)
229
"&ccedil;", // c with cedilla mark
230
"&egrave;", // e with grave accent
231
"&eacute;", // e with acute accent
232
"&ecirc;", // e with circumflex accent
233
"&euml;", // e with umlaut mark
234
"&igrave;", // i with grave accent
235
"&iacute;", // i with acute accent
236
"&icirc;", // i with circumflex accent
237
"&iuml;", // i with umlaut mark
238
// 240
239
"&eth;", // Icelandic small eth
240
"&ntilde;", // n with tilde accent
241
"&ograve", // o with grave accent
242
"&oacute;", // o with acute accent
243
"&ocirc;", // o with circumflex accent
244
"&otilde", // o with tilde accent
245
"&ouml;", // o with umlaut mark
246
"&divide;", // divide sign
247
"&oslash;", // o slash
248
"&ugrave;", // u with grave accent
249
// 250
250
"&uacute;", // u with acute accent
251
"&ucirc;", // u with circumflex accent
252
"&uuml;", // u with umlaut mark
253
"&yacute;", // y with acute accent
254
"&thorn;", // Icelandic small thorn
255
"&yuml;", // y with umlaut mark
256
};
257
258     /**
259      * Array of symbolic code order symbolic code !<br>
260      * This array is the reciprocal from the 'symbolicCode' array.
261      */

262     private static NumericSymbolicCode[] sortedSymbolicCode =
263                                     new NumericSymbolicCode[symbolicCode.length];
264
265     /**
266      * This class is the structure used for the 'sortedSymbolicCode' array.
267      * Each symbolic code string (sorted by alphabetical order) have its numerical
268      * corresponding code.<br>
269      * This class also implements the 'Comparable' interface to ease the sorting
270      * process in the initialisation bloc.
271     */

272     final private static class NumericSymbolicCode implements Comparable JavaDoc {
273
274         public NumericSymbolicCode(String JavaDoc symbolicCode, int numericCode) {
275             this.symbolicCode = symbolicCode;
276             this.numericCode = numericCode;
277         }
278
279         public String JavaDoc getSymbolicCode() {
280             return symbolicCode;
281         }
282
283         public int getNumericCode() {
284             return numericCode;
285         }
286
287         public int compareTo(Object JavaDoc object) {
288             NumericSymbolicCode nsc = (NumericSymbolicCode)object;
289             return symbolicCode.compareTo(nsc.symbolicCode);
290         }
291
292         private String JavaDoc symbolicCode;
293         private int numericCode;
294     }
295
296     /**
297     * Initialization and sorting of the 'sortedSymbolicCode'
298     */

299     static {
300         for (int i = 0; i < symbolicCode.length; i++) {
301               sortedSymbolicCode[i] = new NumericSymbolicCode(symbolicCode[i], i);
302         }
303         Arrays.sort(sortedSymbolicCode);
304     }
305 }
306
Popular Tags