KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > freecs > util > HtmlEncoder


1 /*
2  * Helma License Notice
3  *
4  * The contents of this file are subject to the Helma License
5  * Version 2.0 (the "License"). You may not use this file except in
6  * compliance with the License. A copy of the License is available at
7  * http://adele.helma.org/download/helma/license.txt
8  *
9  * http://adele.helma.org/download/helma/license.txt:
10  *
11
12  Copyright (c) 1999-2002 Helma Project. All rights reserved.
13
14  Redistribution and use in source and binary forms, with or without
15  modification, are permitted provided that the following conditions
16  are met:
17
18  1. Redistributions of source code must retain the above copyright
19     notice, this list of conditions and the following disclaimer.
20
21  2. Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in
23     the documentation and/or other materials provided with the
24     distribution.
25
26  3. Products derived from this software may not be called "Helma"
27     or "Hop", nor may "Helma" or "Hop" appear in their name, without
28     prior written permission of the Helma Project Group. For written
29     permission, please contact helma@helma.org.
30
31
32  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
33  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
35  DISCLAIMED. IN NO EVENT SHALL THE HELMA PROJECT OR ITS
36  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
37  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
38  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
39  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
41  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
43  OF THE POSSIBILITY OF SUCH DAMAGE.
44  */

45
46 package freecs.util;
47
48 import java.util.*;
49
50 /**
51  * This is a utility class to encode special characters and do formatting
52  * for HTML output.
53  * @author Hannes Wallnoefer
54  */

55 public final class HtmlEncoder {
56
57     // transformation table for characters 128 to 255. These actually fall into two
58
// groups, put together for efficiency: "Windows" chacacters 128-159 such as
59
// "smart quotes", which are encoded to valid Unicode entities, and
60
// valid ISO-8859 caracters 160-255, which are encoded to the symbolic HTML
61
// entity. Everything >= 256 is encoded to a numeric entity.
62
//
63
// for mor on HTML entities see http://www.pemberley.com/janeinfo/latin1.html and
64
// ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
65
//
66
static final String JavaDoc[] transform = {
67         "€", // 128
68
"", // empty string means character is undefined in unicode
69
"‚",
70         "ƒ",
71         "„",
72         "…",
73         "†",
74         "‡",
75         "ˆ",
76         "‰",
77         "Š",
78         "‹",
79         "Œ",
80         "",
81         "Ž",
82         "",
83         "",
84         "‘",
85         "’",
86         "“",
87         "”",
88         "•",
89         "–",
90         "—",
91         "˜",
92         "™",
93         "š",
94         "›",
95         "œ",
96         "",
97         "ž",
98         "Ÿ", // 159
99
" ", // 160
100
"¡",
101         "¢",
102         "£",
103         "¤",
104         "¥",
105         "¦",
106         "§",
107         "¨",
108         "©",
109         "ª",
110         "«",
111         "¬",
112         "­",
113         "®",
114         "¯",
115         "°",
116         "±",
117         "²",
118         "³",
119         "´",
120         "µ",
121         "¶",
122         "·",
123         "¸",
124         "¹",
125         "º",
126         "»",
127         "¼",
128         "½",
129         "¾",
130         "¿",
131         "À",
132         "Á",
133         "Â",
134         "Ã",
135         "Ä",
136         "Å",
137         "Æ",
138         "Ç",
139         "È",
140         "É",
141         "Ê",
142         "Ë",
143         "Ì",
144         "Í",
145         "Î",
146         "Ï",
147         "Ð",
148         "Ñ",
149         "Ò",
150         "Ó",
151         "Ô",
152         "Õ",
153         "Ö",
154         "×",
155         "Ø",
156         "Ù",
157         "Ú",
158         "Û",
159         "Ü",
160         "Ý",
161         "Þ",
162         "ß",
163         "à",
164         "á",
165         "â",
166         "ã",
167         "ä",
168         "å",
169         "æ",
170         "ç",
171         "è",
172         "é",
173         "ê",
174         "ë",
175         "ì",
176         "í",
177         "î",
178         "ï",
179         "ð",
180         "ñ",
181         "ò",
182         "ó",
183         "ô",
184         "õ",
185         "ö",
186         "÷",
187         "ø",
188         "ù",
189         "ú",
190         "û",
191         "ü",
192         "ý",
193         "þ",
194         "ÿ" // 255
195
};
196
197     static final HashSet allTags = new HashSet();
198
199     static {
200         allTags.add("a");
201         allTags.add("abbr");
202         allTags.add("acronym");
203         allTags.add("address");
204         allTags.add("applet");
205         allTags.add("area");
206         allTags.add("b");
207         allTags.add("base");
208         allTags.add("basefont");
209         allTags.add("bdo");
210         allTags.add("bgsound");
211         allTags.add("big");
212         allTags.add("blink");
213         allTags.add("blockquote");
214         allTags.add("bq");
215         allTags.add("body");
216         allTags.add("br");
217         allTags.add("button");
218         allTags.add("caption");
219         allTags.add("center");
220         allTags.add("cite");
221         allTags.add("code");
222         allTags.add("col");
223         allTags.add("colgroup");
224         allTags.add("del");
225         allTags.add("dfn");
226         allTags.add("dir");
227         allTags.add("div");
228         allTags.add("dl");
229         allTags.add("dt");
230         allTags.add("dd");
231         allTags.add("em");
232         allTags.add("embed");
233         allTags.add("fieldset");
234         allTags.add("font");
235         allTags.add("form");
236         allTags.add("frame");
237         allTags.add("frameset");
238         allTags.add("h1");
239         allTags.add("h2");
240         allTags.add("h3");
241         allTags.add("h4");
242         allTags.add("h5");
243         allTags.add("h6");
244         allTags.add("head");
245         allTags.add("html");
246         allTags.add("hr");
247         allTags.add("i");
248         allTags.add("iframe");
249         allTags.add("img");
250         allTags.add("input");
251         allTags.add("ins");
252         allTags.add("isindex");
253         allTags.add("kbd");
254         allTags.add("label");
255         allTags.add("legend");
256         allTags.add("li");
257         allTags.add("link");
258         allTags.add("listing");
259         allTags.add("map");
260         allTags.add("marquee");
261         allTags.add("menu");
262         allTags.add("meta");
263         allTags.add("nobr");
264         allTags.add("noframes");
265         allTags.add("noscript");
266         allTags.add("object");
267         allTags.add("ol");
268         allTags.add("option");
269         allTags.add("optgroup");
270         allTags.add("p");
271         allTags.add("param");
272         allTags.add("plaintext");
273         allTags.add("pre");
274         allTags.add("q");
275         allTags.add("s");
276         allTags.add("samp");
277         allTags.add("script");
278         allTags.add("select");
279         allTags.add("small");
280         allTags.add("span");
281         allTags.add("strike");
282         allTags.add("strong");
283         allTags.add("style");
284         allTags.add("sub");
285         allTags.add("sup");
286         allTags.add("table");
287         allTags.add("tbody");
288         allTags.add("td");
289         allTags.add("textarea");
290         allTags.add("tfoot");
291         allTags.add("th");
292         allTags.add("thead");
293         allTags.add("title");
294         allTags.add("tr");
295         allTags.add("tt");
296         allTags.add("u");
297         allTags.add("ul");
298         allTags.add("var");
299         allTags.add("wbr");
300         allTags.add("xmp");
301     }
302
303     // HTML block tags need to suppress automatic newline to <br>
304
// conversion around them to look good. However, they differ
305
// in how many newlines around them should ignored. These sets
306
// help to treat each tag right in newline conversion.
307
static final HashSet internalTags = new HashSet();
308     static final HashSet blockTags = new HashSet();
309     static final HashSet semiBlockTags = new HashSet();
310
311     static {
312         // actual block level elements
313
semiBlockTags.add("address");
314         semiBlockTags.add("dir");
315         semiBlockTags.add("div");
316         semiBlockTags.add("table");
317
318         blockTags.add("blockquote");
319         blockTags.add("center");
320         blockTags.add("dl");
321         blockTags.add("fieldset");
322         blockTags.add("form");
323         blockTags.add("h1");
324         blockTags.add("h2");
325         blockTags.add("h3");
326         blockTags.add("h4");
327         blockTags.add("h5");
328         blockTags.add("h6");
329         blockTags.add("hr");
330         blockTags.add("isindex");
331         blockTags.add("ol");
332         blockTags.add("p");
333         blockTags.add("pre");
334         blockTags.add("ul");
335
336         internalTags.add("menu");
337         internalTags.add("noframes");
338         internalTags.add("noscript");
339
340         /// to be treated as block level elements
341
semiBlockTags.add("th");
342
343         blockTags.add("br");
344         blockTags.add("dd");
345         blockTags.add("dt");
346         blockTags.add("frameset");
347         blockTags.add("li");
348         blockTags.add("td");
349
350         internalTags.add("tbody");
351         internalTags.add("tfoot");
352         internalTags.add("thead");
353         internalTags.add("tr");
354     }
355
356     // set of tags that are always empty
357
static final HashSet emptyTags = new HashSet();
358
359     static {
360         emptyTags.add("area");
361         emptyTags.add("base");
362         emptyTags.add("basefont");
363         emptyTags.add("br");
364         emptyTags.add("col");
365         emptyTags.add("frame");
366         emptyTags.add("hr");
367         emptyTags.add("img");
368         emptyTags.add("input");
369         emptyTags.add("isindex");
370         emptyTags.add("link");
371         emptyTags.add("meta");
372         emptyTags.add("param");
373     }
374
375     static final byte TAG_NAME = 0;
376     static final byte TAG_SPACE = 1;
377     static final byte TAG_ATT_NAME = 2;
378     static final byte TAG_ATT_VAL = 3;
379
380     static final byte TEXT = 0;
381     static final byte SEMIBLOCK = 1;
382     static final byte BLOCK = 2;
383     static final byte INTERNAL = 3;
384
385     static final String JavaDoc newLine = System.getProperty("line.separator");
386
387     /**
388      * Do "smart" encodging on a string. This means that valid HTML entities and tags,
389      * Helma macros and HTML comments are passed through unescaped, while
390      * other occurrences of '<', '>' and '&' are encoded to HTML entities.
391      */

392     public final static String JavaDoc encode(String JavaDoc str) {
393         if (str == null) {
394             return null;
395         }
396
397         int l = str.length();
398
399         if (l == 0) {
400             return "";
401         }
402
403         // try to make stringbuffer large enough from the start
404
StringBuffer JavaDoc ret = new StringBuffer JavaDoc(Math.round(l * 1.4f));
405
406         encode(str, ret, false, null);
407
408         return ret.toString();
409     }
410
411     /**
412      * Do "smart" encodging on a string. This means that valid HTML entities and tags,
413      * Helma macros and HTML comments are passed through unescaped, while
414      * other occurrences of '<', '>' and '&' are encoded to HTML entities.
415      */

416     public final static void encode(String JavaDoc str, StringBuffer JavaDoc ret) {
417         encode(str, ret, false, null);
418     }
419
420     /**
421      * Do "smart" encodging on a string. This means that valid HTML entities and tags,
422      * Helma macros and HTML comments are passed through unescaped, while
423      * other occurrences of '<', '>' and '&' are encoded to HTML entities.
424      *
425      * @param str the string to encode
426      * @param ret the string buffer to encode to
427      * @param paragraphs if true use p tags for paragraphs, otherwise just use br's
428      * @param allowedTags a set containing the names of allowed tags as strings. All other
429      * tags will be escaped
430      */

431     public final static void encode(String JavaDoc str, StringBuffer JavaDoc ret,
432                                     boolean paragraphs, Set allowedTags) {
433         if (str == null) {
434             return;
435         }
436
437         int l = str.length();
438
439         // where to insert the <p> tag in case we want to create a paragraph later on
440
int paragraphStart = ret.length();
441
442         // what kind of element/text are we leaving and entering?
443
// this is one of TEXT|SEMIBLOCK|BLOCK|INTERNAL
444
// depending on this information, we decide whether and how to insert
445
// paragraphs and line breaks. "entering" a tag means we're at the '<'
446
// and exiting means we're at the '>', not that it's a start or close tag.
447
byte entering = TEXT;
448         byte exiting = TEXT;
449
450         Stack openTags = new Stack();
451
452         // are we currently within a < and a > that consitute some kind of tag?
453
// we use tag balancing to know whether we are inside a tag (and should
454
// pass things through unchanged) or outside (and should encode stuff).
455
boolean insideTag = false;
456
457         // are we inside an HTML tag?
458
boolean insideHtmlTag = false;
459         boolean insideCloseTag = false;
460         byte htmlTagMode = TAG_NAME;
461
462         // if we are inside a <code> tag, we encode everything to make
463
// documentation work easier
464
boolean insideCodeTag = false;
465         boolean insidePreTag = false;
466
467         // are we within a Helma <% macro %> tag? We treat macro tags and
468
// comments specially, since we can't rely on tag balancing
469
// to know when we leave a macro tag or comment.
470
boolean insideMacroTag = false;
471
472         // are we inside an HTML comment?
473
boolean insideComment = false;
474
475         // the quotation mark we are in within an HTML or Macro tag, if any
476
char htmlQuoteChar = '\u0000';
477         char macroQuoteChar = '\u0000';
478
479         // number of newlines met since the last non-whitespace character
480
int linebreaks = 0;
481
482         // did we meet a backslash escape?
483
boolean escape = false;
484
485         boolean triggerBreak = false;
486
487         for (int i = 0; i < l; i++) {
488             char c = str.charAt(i);
489
490             // step one: check if this is the beginning of an HTML tag, comment or
491
// Helma macro.
492
if (c == '<') {
493                 if (i < (l - 2)) {
494                     if (!insideMacroTag && ('%' == str.charAt(i + 1))) {
495                         // this is the beginning of a Helma macro tag
496
if (!insideCodeTag) {
497                             insideMacroTag = insideTag = true;
498                             macroQuoteChar = '\u0000';
499                         }
500                     } else if (('!' == str.charAt(i + 1)) && ('-' == str.charAt(i + 2))) {
501                         // the beginning of an HTML comment?
502
if (!insideCodeTag) {
503                             insideComment = insideTag = ((i < (l - 3)) &&
504                                                         ('-' == str.charAt(i + 3)));
505                         }
506                     } else if (!insideTag) {
507                         // check if this is a HTML tag.
508
insideCloseTag = ('/' == str.charAt(i + 1));
509                         int tagStart = insideCloseTag ? (i + 2) : (i + 1);
510                         int j = tagStart;
511
512                         while ((j < l) && Character.isLetterOrDigit(str.charAt(j)))
513                             j++;
514
515                         if ((j > tagStart) && (j < l)) {
516                             String JavaDoc tagName = str.substring(tagStart, j).toLowerCase();
517
518                             if ("code".equals(tagName) && insideCloseTag &&
519                                     insideCodeTag) {
520                                 insideCodeTag = false;
521                             }
522
523                             if (((allowedTags == null) || allowedTags.contains(tagName)) &&
524                                     allTags.contains(tagName) && !insideCodeTag) {
525                                 insideHtmlTag = insideTag = true;
526                                 htmlQuoteChar = '\u0000';
527                                 htmlTagMode = TAG_NAME;
528
529                                 exiting = entering;
530                                 entering = TEXT;
531
532                                 if (internalTags.contains(tagName)) {
533                                     entering = INTERNAL;
534                                 } else if (blockTags.contains(tagName)) {
535                                     entering = BLOCK;
536                                 } else if (semiBlockTags.contains(tagName)) {
537                                     entering = paragraphs ? BLOCK : SEMIBLOCK;
538                                 }
539
540                                 if (entering > 0) {
541                                     triggerBreak = !insidePreTag;
542                                 }
543
544                                 if (insideCloseTag) {
545                                     int t = openTags.search(tagName);
546
547                                     if (t == -1) {
548                                         i = j;
549                                         insideHtmlTag = insideTag = false;
550
551                                         continue;
552                                     } else if (t > 1) {
553                                         for (int k = 1; k < t; k++) {
554                                             Object JavaDoc tag = openTags.pop();
555                                             if (!emptyTags.contains(tag)) {
556                                                 ret.append("</");
557                                                 ret.append(tag);
558                                                 ret.append(">");
559                                             }
560                                         }
561                                     }
562
563                                     openTags.pop();
564                                 } else {
565                                     openTags.push(tagName);
566                                 }
567
568                                 if ("code".equals(tagName) && !insideCloseTag) {
569                                     insideCodeTag = true;
570                                 }
571
572                                 if ("pre".equals(tagName)) {
573                                     insidePreTag = !insideCloseTag;
574                                 }
575                             }
576                         }
577                     }
578                 } // if (i < l-2)
579
}
580
581             if ((triggerBreak || linebreaks > 0) && !Character.isWhitespace(c)) {
582
583                 if (!insideTag) {
584                     exiting = entering;
585                     entering = TEXT;
586                     if (exiting >= SEMIBLOCK) {
587                         paragraphStart = ret.length();
588                     }
589                 }
590
591                 if (entering != INTERNAL && exiting != INTERNAL) {
592                     int swallowBreaks = 0;
593                     if (paragraphs &&
594                           (entering != BLOCK || exiting != BLOCK) &&
595                           (exiting < BLOCK) &&
596                           (linebreaks > 1) &&
597                           paragraphStart < ret.length()) {
598                         ret.insert(paragraphStart, "<p>");
599                         ret.append("</p>");
600                         swallowBreaks = 2;
601                     }
602
603                     // treat entering a SEMIBLOCK as entering a TEXT
604
int _entering = entering == SEMIBLOCK ? TEXT : entering;
605                     for (int k = linebreaks-1; k>=0; k--) {
606                         if (k >= swallowBreaks && k >= _entering && k >= exiting) {
607                             ret.append("<br />");
608                         }
609                         ret.append(newLine);
610                     }
611                     if (exiting >= SEMIBLOCK || linebreaks > 1) {
612                         paragraphStart = ret.length();
613                     }
614
615                 }
616
617                 linebreaks = 0;
618                 triggerBreak = false;
619             }
620
621             switch (c) {
622                 case '<':
623
624                     if (insideTag) {
625                         ret.append('<');
626                     } else {
627                         ret.append("&lt;");
628                     }
629
630                     break;
631
632                 case '&':
633
634                     // check if this is an HTML entity already,
635
// in which case we pass it though unchanged
636
if ((i < (l - 3)) && !insideCodeTag) {
637                         // is this a numeric entity?
638
if (str.charAt(i + 1) == '#') {
639                             int j = i + 2;
640
641                             while ((j < l) && Character.isDigit(str.charAt(j)))
642                                 j++;
643
644                             if ((j < l) && (str.charAt(j) == ';')) {
645                                 ret.append("&");
646
647                                 break;
648                             }
649                         } else {
650                             int j = i + 1;
651
652                             while ((j < l) && Character.isLetterOrDigit(str.charAt(j)))
653                                 j++;
654
655                             if ((j < l) && (str.charAt(j) == ';')) {
656                                 ret.append("&");
657
658                                 break;
659                             }
660                         }
661                     }
662
663                     // we didn't reach a break, so encode the ampersand as HTML entity
664
ret.append("&amp;");
665
666                     break;
667
668                 case '\\':
669                     ret.append(c);
670
671                     if (insideTag && !insideComment) {
672                         escape = !escape;
673                     }
674
675                     break;
676
677                 case '"':
678                 case '\'':
679                     ret.append(c);
680
681                     if (!insideComment) {
682                         // check if the quote is escaped
683
if (insideMacroTag) {
684                             if (escape) {
685                                 escape = false;
686                             } else if (macroQuoteChar == c) {
687                                 macroQuoteChar = '\u0000';
688                             } else if (macroQuoteChar == '\u0000') {
689                                 macroQuoteChar = c;
690                             }
691                         } else if (insideHtmlTag) {
692                             if (escape) {
693                                 escape = false;
694                             } else if (htmlQuoteChar == c) {
695                                 htmlQuoteChar = '\u0000';
696                                 htmlTagMode = TAG_SPACE;
697                             } else if (htmlQuoteChar == '\u0000') {
698                                 htmlQuoteChar = c;
699                             }
700                         }
701                     }
702
703                     break;
704
705                 case '\n':
706                     if (insideTag || insidePreTag) {
707                         ret.append('\n');
708                     } else {
709                         linebreaks++;
710                     }
711
712                     break;
713                 case '\r':
714                     if (insideTag || insidePreTag) {
715                         ret.append('\r');
716                     }
717                     break;
718
719                 case '>':
720
721                     // For Helma macro tags and comments, we overrule tag balancing,
722
// i.e. we don't require that '<' and '>' be balanced within
723
// macros and comments. Rather, we check for the matching closing tag.
724
if (insideComment) {
725                         ret.append('>');
726                         insideComment = !((str.charAt(i - 2) == '-') &&
727                                         (str.charAt(i - 1) == '-'));
728                     } else if (insideMacroTag) {
729                         ret.append('>');
730                         insideMacroTag = !((str.charAt(i - 1) == '%') &&
731                                          (macroQuoteChar == '\u0000'));
732                     } else if (insideHtmlTag) {
733                         ret.append('>');
734
735                         // only leave HTML tag if quotation marks are balanced
736
// within that tag.
737
insideHtmlTag = htmlQuoteChar != '\u0000';
738
739                         // Check if this is an empty tag so we don't generate an
740
// additional </close> tag.
741
if (str.charAt(i - 1) == '/') {
742                             // this is to avoid misinterpreting tags like
743
// <a HREF=http://foo/> as empty
744
if (htmlTagMode != TAG_ATT_VAL && htmlTagMode != TAG_ATT_NAME) {
745                                 openTags.pop();
746                             }
747                         }
748
749                         exiting = entering;
750                         if (exiting > 0) {
751                            triggerBreak = !insidePreTag;
752                         }
753
754                     } else {
755                         ret.append("&gt;");
756                     }
757
758                     // check if we still are inside any kind of tag
759
insideTag = insideComment || insideMacroTag || insideHtmlTag;
760                     insideCloseTag = insideTag;
761
762                     break;
763
764                 default:
765
766                     if (insideHtmlTag && !insideCloseTag) {
767                         switch(htmlTagMode) {
768                             case TAG_NAME:
769                                 if (!Character.isLetterOrDigit(c)) {
770                                     htmlTagMode = TAG_SPACE;
771                                 }
772                                 break;
773                             case TAG_SPACE:
774                                 if (Character.isLetterOrDigit(c)) {
775                                     htmlTagMode = TAG_ATT_NAME;
776                                 }
777                                 break;
778                             case TAG_ATT_NAME:
779                                 if (c == '=') {
780                                     htmlTagMode = TAG_ATT_VAL;
781                                 } else if (c == ' ') {
782                                     htmlTagMode = TAG_SPACE;
783                                 }
784                                 break;
785                             case TAG_ATT_VAL:
786                                 if (Character.isWhitespace(c) && htmlQuoteChar == '\u0000') {
787                                     htmlTagMode = TAG_SPACE;
788                                 }
789                                 break;
790                         }
791                     }
792                     if (c < 128) {
793                         ret.append(c);
794                     } else if ((c >= 128) && (c < 256)) {
795                         ret.append(transform[c - 128]);
796                     } else {
797                         ret.append("&#");
798                         ret.append((int) c);
799                         ret.append(";");
800                     }
801
802                     escape = false;
803             }
804         }
805
806         // if tags were opened but not closed, close them.
807
int o = openTags.size();
808
809         if (o > 0) {
810             for (int k = 0; k < o; k++) {
811                 Object JavaDoc tag = openTags.pop();
812                 if (!emptyTags.contains(tag)) {
813                     ret.append("</");
814                     ret.append(tag);
815                     ret.append(">");
816                 }
817             }
818         }
819
820         // add remaining newlines we may have collected
821
int swallowBreaks = 0;
822         if (paragraphs && entering < BLOCK) {
823             ret.insert(paragraphStart, "<p>");
824             ret.append("</p>");
825             swallowBreaks = 2;
826         }
827
828         if (linebreaks > 0) {
829             for (int i = linebreaks-1; i>=0; i--) {
830                 if (i >= swallowBreaks && i > exiting) {
831                     ret.append("<br />");
832                 }
833                 ret.append(newLine);
834             }
835         }
836     }
837
838     /**
839      *
840      */

841     public final static String JavaDoc encodeFormValue(String JavaDoc str) {
842         if (str == null) {
843             return null;
844         }
845
846         int l = str.length();
847
848         if (l == 0) {
849             return "";
850         }
851
852         StringBuffer JavaDoc ret = new StringBuffer JavaDoc(Math.round(l * 1.2f));
853
854         encodeAll(str, ret, false);
855
856         return ret.toString();
857     }
858
859     /**
860      *
861      */

862     public final static void encodeFormValue(String JavaDoc str, StringBuffer JavaDoc ret) {
863         encodeAll(str, ret, false);
864     }
865
866     /**
867      *
868      */

869     public final static String JavaDoc encodeAll(String JavaDoc str) {
870         if (str == null) {
871             return null;
872         }
873
874         int l = str.length();
875
876         if (l == 0) {
877             return "";
878         }
879
880         StringBuffer JavaDoc ret = new StringBuffer JavaDoc(Math.round(l * 1.2f));
881
882         encodeAll(str, ret, true);
883
884         return ret.toString();
885     }
886
887     /**
888      *
889      */

890     public final static void encodeAll(String JavaDoc str, StringBuffer JavaDoc ret) {
891         encodeAll(str, ret, true);
892     }
893
894     /**
895      *
896      */

897     public final static void encodeAll(String JavaDoc str, StringBuffer JavaDoc ret, boolean encodeNewline) {
898         if (str == null) {
899             return;
900         }
901
902         int l = str.length();
903
904         for (int i = 0; i < l; i++) {
905             char c = str.charAt(i);
906
907             switch (c) {
908                 case '<':
909                     ret.append("&lt;");
910
911                     break;
912
913                 case '>':
914                     ret.append("&gt;");
915
916                     break;
917
918                 case '&':
919                     ret.append("&amp;");
920
921                     break;
922
923                 case '"':
924                     ret.append("&quot;");
925
926                     break;
927
928                 case '\n':
929                     if (encodeNewline) {
930                         ret.append("<br />");
931                     }
932                     ret.append('\n');
933
934                     break;
935
936                 default:
937
938                     // ret.append (c);
939
if (c < 128) {
940                         ret.append(c);
941                     } else if ((c >= 128) && (c < 256)) {
942                         ret.append(transform[c - 128]);
943                     } else {
944                         ret.append("&#");
945                         ret.append((int) c);
946                         ret.append(";");
947                     }
948             }
949         }
950     }
951
952     /**
953      *
954      *
955      * @param str ...
956      *
957      * @return ...
958      */

959     public final static String JavaDoc encodeXml(String JavaDoc str) {
960         if (str == null) {
961             return null;
962         }
963
964         int l = str.length();
965
966         if (l == 0) {
967             return "";
968         }
969
970         StringBuffer JavaDoc ret = new StringBuffer JavaDoc(Math.round(l * 1.2f));
971
972         encodeXml(str, ret);
973
974         return ret.toString();
975     }
976
977     /**
978      *
979      *
980      * @param str ...
981      * @param ret ...
982      */

983     public final static void encodeXml(String JavaDoc str, StringBuffer JavaDoc ret) {
984         if (str == null) {
985             return;
986         }
987
988         int l = str.length();
989
990         for (int i = 0; i < l; i++) {
991             char c = str.charAt(i);
992
993             switch (c) {
994                 case '<':
995                     ret.append("&lt;");
996
997                     break;
998
999                 case '>':
1000                    ret.append("&gt;");
1001
1002                    break;
1003
1004                case '&':
1005                    ret.append("&amp;");
1006
1007                    break;
1008
1009                case '"':
1010                    ret.append("&quot;");
1011
1012                    break;
1013
1014                case '\'':
1015                    ret.append("&apos;");
1016
1017                    break;
1018
1019                default:
1020
1021                    if (c < 0x20) {
1022                        // sort out invalid XML characters below 0x20 - all but 0x9, 0xA and 0xD.
1023
// The trick is an adaption of java.lang.Character.isSpace().
1024
if (((((1L << 0x9) | (1L << 0xA) | (1L << 0xD)) >> c) & 1L) != 0) {
1025                            ret.append(c);
1026                        }
1027                    } else {
1028                        ret.append(c);
1029                    }
1030            }
1031        }
1032    }
1033
1034    // test method
1035
public static String JavaDoc printCharRange(int from, int to) {
1036        StringBuffer JavaDoc response = new StringBuffer JavaDoc();
1037
1038        for (int i = from; i < to; i++) {
1039            response.append(i);
1040            response.append(" ");
1041            response.append((char) i);
1042            response.append(" ");
1043
1044            if (i < 128) {
1045                response.append((char) i);
1046            } else if ((i >= 128) && (i < 256)) {
1047                response.append(transform[i - 128]);
1048            } else {
1049                response.append("&#");
1050                response.append(i);
1051                response.append(";");
1052            }
1053
1054            response.append("\r\n");
1055        }
1056
1057        return response.toString();
1058    }
1059
1060    // for testing...
1061
public static void main(String JavaDoc[] args) {
1062        for (int i = 0; i < args.length; i++)
1063            System.err.println(encode(args[i]));
1064    }
1065}
1066 // end of class
1067
Popular Tags