KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > h2 > tools > indexer > HtmlConverter


1 /*
2  * Copyright 2004-2006 H2 Group. Licensed under the H2 License, Version 1.0 (http://h2database.com/html/license.html).
3  * Initial Developer: H2 Group
4  */

5 package org.h2.tools.indexer;
6
7 import java.util.HashMap JavaDoc;
8
9 public class HtmlConverter {
10     private static HashMap JavaDoc charMap = new HashMap JavaDoc();
11     
12     private static final String JavaDoc[] CHARS = {
13     "quot:34", "amp:38", "lt:60", "gt:62", "nbsp:160", "iexcl:161", "cent:162",
14     "pound:163", "curren:164", "yen:165", "brvbar:166", "sect:167", "uml:168",
15     "copy:169", "ordf:170", "laquo:171", "not:172", "shy:173", "reg:174", "macr:175",
16     "deg:176", "plusmn:177", "sup2:178", "sup3:179", "acute:180", "micro:181",
17     "para:182", "middot:183", "cedil:184", "sup1:185", "ordm:186", "raquo:187",
18     "frac14:188", "frac12:189", "frac34:190", "iquest:191", "Agrave:192",
19     "Aacute:193", "Acirc:194", "Atilde:195", "Auml:196", "Aring:197", "AElig:198",
20     "Ccedil:199", "Egrave:200", "Eacute:201", "Ecirc:202", "Euml:203", "Igrave:204",
21     "Iacute:205", "Icirc:206", "Iuml:207", "ETH:208", "Ntilde:209", "Ograve:210",
22     "Oacute:211", "Ocirc:212", "Otilde:213", "Ouml:214", "times:215", "Oslash:216",
23     "Ugrave:217", "Uacute:218", "Ucirc:219", "Uuml:220", "Yacute:221", "THORN:222",
24     "szlig:223", "agrave:224", "aacute:225", "acirc:226", "atilde:227", "auml:228",
25     "aring:229", "aelig:230", "ccedil:231", "egrave:232", "eacute:233", "ecirc:234",
26     "euml:235", "igrave:236", "iacute:237", "icirc:238", "iuml:239", "eth:240",
27     "ntilde:241", "ograve:242", "oacute:243", "ocirc:244", "otilde:245", "ouml:246",
28     "divide:247", "oslash:248", "ugrave:249", "uacute:250", "ucirc:251", "uuml:252",
29     "yacute:253", "thorn:254", "yuml:255", "Alpha:913", "alpha:945", "Beta:914",
30     "beta:946", "Gamma:915", "gamma:947", "Delta:916", "delta:948", "Epsilon:917",
31     "epsilon:949", "Zeta:918", "zeta:950", "Eta:919", "eta:951", "Theta:920",
32     "theta:952", "Iota:921", "iota:953", "Kappa:922", "kappa:954", "Lambda:923",
33     "lambda:955", "Mu:924", "mu:956", "Nu:925", "nu:957", "Xi:926", "xi:958",
34     "Omicron:927", "omicron:959", "Pi:928", "pi:960", "Rho:929", "rho:961",
35     "Sigma:931", "sigmaf:962", "sigma:963", "Tau:932", "tau:964", "Upsilon:933",
36     "upsilon:965", "Phi:934", "phi:966", "Chi:935", "chi:967", "Psi:936", "psi:968",
37     "Omega:937", "omega:969", "thetasym:977", "upsih:978", "piv:982", "forall:8704",
38     "part:8706", "exist:8707", "empty:8709", "nabla:8711", "isin:8712", "notin:8713",
39     "ni:8715", "prod:8719", "sum:8721", "minus:8722", "lowast:8727", "radic:8730",
40     "prop:8733", "infin:8734", "ang:8736", "and:8743", "or:8744", "cap:8745", "cup:8746",
41     "int:8747", "there4:8756", "sim:8764", "cong:8773", "asymp:8776", "ne:8800",
42     "equiv:8801", "le:8804", "ge:8805", "sub:8834", "sup:8835", "nsub:8836", "sube:8838",
43     "supe:8839", "oplus:8853", "otimes:8855", "perp:8869", "sdot:8901", "loz:9674",
44     "lceil:8968", "rceil:8969", "lfloor:8970", "rfloor:8971", "lang:9001", "rang:9002",
45     "larr:8592", "uarr:8593", "rarr:8594", "darr:8595", "harr:8596", "crarr:8629",
46     "lArr:8656", "uArr:8657", "rArr:8658", "dArr:8659", "hArr:8660", "bull:8226",
47     "prime:8242", "oline:8254", "frasl:8260", "weierp:8472", "image:8465", "real:8476",
48     "trade:8482", "euro:8364", "alefsym:8501", "spades:9824", "clubs:9827", "hearts:9829",
49     "diams:9830", "ensp:8194", "emsp:8195", "thinsp:8201", "zwnj:8204", "zwj:8205",
50     "lrm:8206", "rlm:8207", "ndash:8211", "mdash:8212", "lsquo:8216", "rsquo:8217",
51     "sbquo:8218", "ldquo:8220", "rdquo:8221", "bdquo:8222", "dagger:8224",
52     "Dagger:8225", "hellip:8230", "permil:8240", "lsaquo:8249", "rsaquo:8250"
53     };
54     
55     static {
56         for(int i=0; i<CHARS.length; i++) {
57             String JavaDoc token = CHARS[i];
58             int idx = token.indexOf(':');
59             String JavaDoc key = token.substring(0, idx);
60             int ch = Integer.parseInt(token.substring(idx+1));
61             charMap.put(key, new Character JavaDoc((char)ch));
62         }
63     }
64     
65     public static String JavaDoc convertHtml(String JavaDoc html) {
66         if(html == null) {
67             return null;
68         }
69         if(html.length() == 0) {
70             return html;
71         }
72         if(html.indexOf('&') < 0) {
73             return html;
74         }
75         StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
76         for(int i=0; i<html.length(); i++) {
77             char ch = html.charAt(i);
78             if(ch!='&') {
79                 buff.append(ch);
80                 continue;
81             }
82             int idx = html.indexOf(';', i+1);
83             if(idx < 0) {
84                 buff.append("???");
85                 continue;
86             }
87             String JavaDoc key = html.substring(i+1, idx);
88             Character JavaDoc repl;
89             if(key.startsWith("#")) {
90                 try {
91                     int code = Integer.parseInt(key.substring(1));
92                     if(code < 0 || code > 0xffff) {
93                         repl = null;
94                     } else {
95                         repl = new Character JavaDoc((char)code);
96                     }
97                 } catch(NumberFormatException JavaDoc e) {
98                     repl = null;
99                 }
100             } else {
101                 repl = (Character JavaDoc) charMap.get(key);
102             }
103             if(repl == null) {
104                 buff.append("???" + key + "???");
105                 continue;
106             } else {
107                 buff.append(repl.charValue());
108             }
109             i = idx;
110         }
111         return buff.toString();
112     }
113     
114
115 }
116
Popular Tags