1 package com.icl.saxon.output; 2 import com.icl.saxon.*; 3 import com.icl.saxon.om.NamePool; 4 import com.icl.saxon.sort.HashMap; 5 import org.xml.sax.Locator ; 6 import org.xml.sax.Attributes ; 7 import com.icl.saxon.tree.AttributeCollection; 8 import java.io.*; 9 10 import javax.xml.transform.OutputKeys ; 11 import javax.xml.transform.TransformerException ; 12 13 17 18 public class HTMLEmitter extends XMLEmitter { 19 20 23 24 private final static int REP_NATIVE = 0; 25 private final static int REP_ENTITY = 1; 26 private final static int REP_DECIMAL = 2; 27 private final static int REP_HEX = 3; 28 29 private int nonASCIIRepresentation = REP_ENTITY; 30 private int excludedRepresentation = REP_DECIMAL; 31 private String mediaType = "text/html"; 32 private int inScript; 33 private boolean started = false; 34 35 38 39 private static int representationCode(String rep) { 40 if (rep.equalsIgnoreCase("native")) return REP_NATIVE; 41 if (rep.equalsIgnoreCase("entity")) return REP_ENTITY; 42 if (rep.equalsIgnoreCase("decimal")) return REP_DECIMAL; 43 if (rep.equalsIgnoreCase("hex")) return REP_HEX; 44 return REP_ENTITY; 45 } 46 47 50 51 static HashMap emptyTags = new HashMap(101); 52 53 static { 54 setEmptyTag("area"); 55 setEmptyTag("base"); 56 setEmptyTag("basefont"); 57 setEmptyTag("br"); 58 setEmptyTag("col"); 59 setEmptyTag("frame"); 60 setEmptyTag("hr"); 61 setEmptyTag("img"); 62 setEmptyTag("input"); 63 setEmptyTag("isindex"); 64 setEmptyTag("link"); 65 setEmptyTag("meta"); 66 setEmptyTag("param"); 67 } 68 69 private static void setEmptyTag(String tag) { 70 emptyTags.set(tag); 71 } 72 73 protected static boolean isEmptyTag(String tag) { 74 return emptyTags.get(tag); 75 } 76 77 80 81 83 private static HashMap booleanAttributes = new HashMap(101); 84 private static HashMap booleanCombinations = new HashMap(203); 85 86 static { 87 setBooleanAttribute("area", "nohref"); 88 setBooleanAttribute("button", "disabled"); 89 setBooleanAttribute("dir", "compact"); 90 setBooleanAttribute("dl", "compact"); 91 setBooleanAttribute("frame", "noresize"); 92 setBooleanAttribute("hr", "noshade"); 93 setBooleanAttribute("img", "ismap"); 94 setBooleanAttribute("input", "checked"); 95 setBooleanAttribute("input", "disabled"); 96 setBooleanAttribute("input", "readonly"); 97 setBooleanAttribute("menu", "compact"); 98 setBooleanAttribute("object", "declare"); 99 setBooleanAttribute("ol", "compact"); 100 setBooleanAttribute("optgroup", "disabled"); 101 setBooleanAttribute("option", "selected"); 102 setBooleanAttribute("option", "disabled"); 103 setBooleanAttribute("script", "defer"); 104 setBooleanAttribute("select", "multiple"); 105 setBooleanAttribute("select", "disabled"); 106 setBooleanAttribute("td", "nowrap"); 107 setBooleanAttribute("textarea", "disabled"); 108 setBooleanAttribute("textarea", "readonly"); 109 setBooleanAttribute("th", "nowrap"); 110 setBooleanAttribute("ul", "compact"); 111 } 112 113 private static void setBooleanAttribute(String element, String attribute) { 114 booleanAttributes.set(attribute); 115 booleanCombinations.set(element + "+" + attribute); 116 } 117 118 private static boolean isBooleanAttribute(String element, String attribute, String value) { 119 if (!attribute.equalsIgnoreCase(value)) return false; 120 if (!booleanAttributes.get(attribute)) return false; 121 return booleanCombinations.get(element + "+" + attribute); 122 } 123 124 127 128 130 private static HashMap urlAttributes = new HashMap(101); 131 private static HashMap urlCombinations = new HashMap(203); 132 133 static { 134 setUrlAttribute("form", "action"); 135 setUrlAttribute("body", "background"); 136 setUrlAttribute("q", "cite"); 137 setUrlAttribute("blockquote", "cite"); 138 setUrlAttribute("del", "cite"); 139 setUrlAttribute("ins", "cite"); 140 setUrlAttribute("object", "classid"); 141 setUrlAttribute("object", "codebase"); 142 setUrlAttribute("applet", "codebase"); 143 setUrlAttribute("object", "data"); 144 setUrlAttribute("a", "href"); 145 setUrlAttribute("a", "name"); setUrlAttribute("area", "href"); 147 setUrlAttribute("link", "href"); 148 setUrlAttribute("base", "href"); 149 setUrlAttribute("img", "longdesc"); 150 setUrlAttribute("frame", "longdesc"); 151 setUrlAttribute("iframe", "longdesc"); 152 setUrlAttribute("head", "profile"); 153 setUrlAttribute("script", "src"); 154 setUrlAttribute("input", "src"); 155 setUrlAttribute("frame", "src"); 156 setUrlAttribute("iframe", "src"); 157 setUrlAttribute("img", "src"); 158 setUrlAttribute("img", "usemap"); 159 setUrlAttribute("input", "usemap"); 160 setUrlAttribute("object", "usemap"); 161 } 162 163 private static void setUrlAttribute(String element, String attribute) { 164 urlAttributes.set(attribute); 165 urlCombinations.set(element + "+" + attribute); 166 } 167 168 public static boolean isUrlAttribute(String element, String attribute) { 169 if (!urlAttributes.get(attribute)) return false; 170 return urlCombinations.get(element + "+" + attribute); 171 } 172 173 176 177 public HTMLEmitter() { 178 179 } 180 181 184 185 public void startDocument() throws TransformerException { 186 if (started) return; 187 started = true; 189 String mime = outputProperties.getProperty(OutputKeys.MEDIA_TYPE); 190 if (mime!=null) { 191 mediaType = mime; 192 } 193 194 String systemId = outputProperties.getProperty(OutputKeys.DOCTYPE_SYSTEM); 195 String publicId = outputProperties.getProperty(OutputKeys.DOCTYPE_PUBLIC); 196 197 if (systemId!=null || publicId!=null) { 198 writeDocType("html", systemId, publicId); 199 } 200 201 empty = false; 202 inScript = -1000000; 203 204 String representation = outputProperties.getProperty( 205 SaxonOutputKeys.CHARACTER_REPRESENTATION); 206 if (representation!=null) { 207 String nonASCIIrep; 208 String excludedRep; 209 int semi = representation.indexOf(';'); 210 if (semi < 0) { 211 nonASCIIrep = representation; 212 excludedRep = representation; 213 } else { 214 nonASCIIrep = representation.substring(0, semi).trim(); 215 excludedRep = representation.substring(semi+1).trim(); 216 } 217 nonASCIIRepresentation = representationCode(nonASCIIrep); 218 excludedRepresentation = representationCode(excludedRep); 219 if (excludedRepresentation==REP_NATIVE) { 220 excludedRepresentation = REP_ENTITY; 221 } 222 } 223 224 } 225 226 229 230 public void startElement(int nameCode, Attributes atts, 231 int[] namespaces, int nscount) throws TransformerException { 232 String name = namePool.getLocalName(nameCode); 233 short uriCode = namePool.getURICode(nameCode); 234 if (uriCode==0 && (name.equalsIgnoreCase("script") || 235 name.equalsIgnoreCase("style"))) { 236 inScript = 0; 237 } 238 inScript++; 239 super.startElement(nameCode, atts, namespaces, nscount); 240 closeStartTag(-1, false); 242 if (uriCode==0 && name.equalsIgnoreCase("head")) { 244 String omitMeta = outputProperties.getProperty( 245 SaxonOutputKeys.OMIT_META_TAG); 246 if (!("yes".equals(omitMeta))) { 247 248 String encoding = outputProperties.getProperty(OutputKeys.ENCODING); 249 if (encoding==null) encoding = "utf-8"; 250 251 AttributeCollection metaatts = new AttributeCollection(namePool); 252 metaatts.addAttribute("", "", "http-equiv", "CDATA", "Content-Type"); 253 metaatts.addAttribute("", "", "content", "CDATA", mediaType + "; charset=" + encoding); 254 255 try {writer.write("\n ");} catch (java.io.IOException err){} 256 int meta = namePool.allocate("", "", "meta"); 257 startElement(meta, metaatts, new int[0], 0); 258 endElement(meta); try {writer.write("\n ");} catch (java.io.IOException err){} 260 } 261 } 262 } 263 264 269 270 protected void writeAttribute(int elCode, String attname, String type, String value) throws TransformerException { 271 try { 272 String elname = namePool.getDisplayName(elCode); 273 short uriCode = namePool.getURICode(elCode); 274 if (uriCode==0 && isBooleanAttribute(elname, attname, value)) { 275 testCharacters(attname); 276 writer.write(attname); 277 } else if (uriCode==0 && isUrlAttribute(elname, attname) && !type.equals("NO-ESC")) { 278 String esc = escapeURL(value); 279 super.writeAttribute(elCode, attname, type, esc); 280 } else { 281 super.writeAttribute(elCode, attname, type, value); 282 } 283 } catch (java.io.IOException err) { 284 throw new TransformerException (err); 285 } 286 } 287 288 289 292 293 protected void writeEscape(char ch[], int start, int length, boolean inAttribute) 294 throws java.io.IOException { 295 296 int segstart = start; 297 boolean[] specialChars = (inAttribute ? specialInAtt : specialInText); 298 299 while (segstart < start+length) { 300 int i = segstart; 301 302 304 while (i < start+length && 305 (ch[i]<128 ? 306 !specialChars[ch[i]] : 307 (characterSet.inCharset(ch[i]) ? 308 nonASCIIRepresentation == REP_NATIVE && ch[i]!=160 : 309 false) 310 ) 311 ) { 312 i++; 313 } 314 315 317 writer.write(ch, segstart, i-segstart); 318 319 321 if (i == start+length) return; 322 323 if (ch[i]<127) { 324 325 327 if (inAttribute) { 328 if (ch[i]=='<') { 329 writer.write('<'); } else if (ch[i]=='>') { 331 writer.write(">"); } else if (ch[i]=='&') { 333 if (i+1<start+length && ch[i+1]=='{') { 334 writer.write('&'); } else { 336 writer.write("&"); 337 } 338 } else if (ch[i]=='\"') { 339 writer.write("""); 340 } else if (ch[i]=='\n') { 341 writer.write("
"); 342 } 343 } else { 344 if (ch[i]=='<') { 345 writer.write("<"); 346 } else if (ch[i]=='>') { 347 writer.write(">"); } else if (ch[i]=='&') { 349 writer.write("&"); 350 } 351 } 352 353 } else if (ch[i]==160) { 354 writer.write(" "); 356 357 } else if (ch[i]>=55296 && ch[i]<=56319) { 359 365 int charval = (((int)ch[i] - 55296) * 1024) + ((int)ch[i+1] - 56320) + 65536; 367 outputCharacterReference(charval); 368 i++; 369 370 371 } else if (characterSet.inCharset(ch[i])) { 372 switch(nonASCIIRepresentation) { 373 case REP_NATIVE: 374 writer.write(ch[i]); 375 break; 376 case REP_ENTITY: 377 if (ch[i]>160 && ch[i]<=255) { 378 379 381 writer.write('&'); 382 writer.write(latin1Entities[(int)ch[i]-160]); 383 writer.write(';'); 384 break; 385 } 386 case REP_DECIMAL: 388 preferHex = false; 389 outputCharacterReference(ch[i]); 390 break; 391 case REP_HEX: 392 preferHex = true; 393 default: 395 outputCharacterReference(ch[i]); 396 break; 397 } 398 399 } else { preferHex = (excludedRepresentation==REP_HEX); 401 outputCharacterReference((int)ch[i]); 402 } 403 404 segstart = ++i; 405 } 406 407 } 408 409 413 414 public void endElement(int nameCode) throws TransformerException { 415 String name = namePool.getLocalName(nameCode); 416 short uriCode = namePool.getURICode(nameCode); 417 inScript--; 418 if (inScript==0) { 419 inScript = -1000000; 420 } 421 422 if (uriCode!=0 || !isEmptyTag(name)) { 423 super.endElement(nameCode); 424 } 425 426 } 427 428 431 432 public void characters (char[] ch, int start, int length) 433 throws TransformerException { 434 if (inScript>0 && escaping) { 435 setEscaping(false); 436 super.characters(ch, start, length); 437 setEscaping(true); 438 } else { 439 super.characters(ch, start, length); 440 } 441 } 442 443 446 447 public void processingInstruction (String target, String data) 448 throws TransformerException 449 { 450 try { 451 writer.write("<?"); 452 writer.write(target); 453 writer.write(' '); 454 writer.write(data); 455 writer.write('>'); 456 } catch (java.io.IOException err) { 457 throw new TransformerException (err); 458 } 459 } 460 461 462 private static String escapeURL(String url) throws TransformerException { 463 StringBuffer sb = new StringBuffer (); 464 String hex = "0123456789ABCDEF"; 465 for (int i=0; i<url.length(); i++) { 466 char ch = url.charAt(i); 467 if (ch<32 || ch>126) { 468 ByteArrayOutputStream baw = new ByteArrayOutputStream(); 469 try { 470 OutputStreamWriter osw = new OutputStreamWriter(baw, "UTF8"); 471 osw.write(ch); 472 osw.close(); 473 } catch (UnsupportedEncodingException err1) { 474 throw new TransformerException (err1); 475 } catch (java.io.IOException err) { 476 throw new TransformerException (err); 477 } 478 byte[] array = baw.toByteArray(); 479 for (int b=0; b<array.length; b++) { 480 int v = (array[b]>=0 ? array[b] : 256 + array[b]); 481 sb.append('%'); 482 sb.append(hex.charAt(v/16)); 483 sb.append(hex.charAt(v%16)); 484 } 485 486 } else { 487 sb.append(ch); 488 } 489 } 490 return sb.toString(); 491 } 492 493 private static String [] latin1Entities = { 494 495 "nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar", "sect", "uml", "copy", "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro", "para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14", "frac12", "frac34", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml" }; 679 680 681 } 682 683 | Popular Tags |