1 package net.sf.saxon.event; 2 import net.sf.saxon.charcode.UnicodeCharacterSet; 3 import net.sf.saxon.trans.DynamicError; 4 import net.sf.saxon.trans.XPathException; 5 import net.sf.saxon.om.FastStringBuffer; 6 7 import javax.xml.transform.OutputKeys ; 8 9 13 14 public class HTMLEmitter extends XMLEmitter { 15 16 19 20 private static final int REP_NATIVE = 0; 21 private static final int REP_ENTITY = 1; 22 private static final int REP_DECIMAL = 2; 23 private static final int REP_HEX = 3; 24 25 private int nonASCIIRepresentation = REP_NATIVE; 26 private int excludedRepresentation = REP_DECIMAL; 27 private int inScript; 29 private boolean started = false; 30 private String elementName; 31 private short uriCode; 32 35 38 39 private static int representationCode(String rep) { 40 if (rep.equalsIgnoreCase("native")) return REP_NATIVE; 41 if (rep.equalsIgnoreCase("entity")) return REP_ENTITY; 42 if (rep.equalsIgnoreCase("decimal")) return REP_DECIMAL; 43 if (rep.equalsIgnoreCase("hex")) return REP_HEX; 44 return REP_ENTITY; 45 } 46 47 50 51 static HTMLTagHashSet emptyTags = new HTMLTagHashSet(31); 52 53 static { 54 setEmptyTag("area"); 55 setEmptyTag("base"); 56 setEmptyTag("basefont"); 57 setEmptyTag("br"); 58 setEmptyTag("col"); 59 setEmptyTag("frame"); 60 setEmptyTag("hr"); 61 setEmptyTag("img"); 62 setEmptyTag("input"); 63 setEmptyTag("isindex"); 64 setEmptyTag("link"); 65 setEmptyTag("meta"); 66 setEmptyTag("param"); 67 } 68 69 private static void setEmptyTag(String tag) { 70 emptyTags.add(tag); 71 } 72 73 protected static boolean isEmptyTag(String tag) { 74 return emptyTags.contains(tag); 75 } 76 77 80 81 83 private static HTMLTagHashSet booleanAttributes = new HTMLTagHashSet(31); 84 private static HTMLTagHashSet booleanCombinations = new HTMLTagHashSet(53); 85 86 static { 87 setBooleanAttribute("area", "nohref"); 88 setBooleanAttribute("button", "disabled"); 89 setBooleanAttribute("dir", "compact"); 90 setBooleanAttribute("dl", "compact"); 91 setBooleanAttribute("frame", "noresize"); 92 setBooleanAttribute("hr", "noshade"); 93 setBooleanAttribute("img", "ismap"); 94 setBooleanAttribute("input", "checked"); 95 setBooleanAttribute("input", "disabled"); 96 setBooleanAttribute("input", "readonly"); 97 setBooleanAttribute("menu", "compact"); 98 setBooleanAttribute("object", "declare"); 99 setBooleanAttribute("ol", "compact"); 100 setBooleanAttribute("optgroup", "disabled"); 101 setBooleanAttribute("option", "selected"); 102 setBooleanAttribute("option", "disabled"); 103 setBooleanAttribute("script", "defer"); 104 setBooleanAttribute("select", "multiple"); 105 setBooleanAttribute("select", "disabled"); 106 setBooleanAttribute("td", "nowrap"); 107 setBooleanAttribute("textarea", "disabled"); 108 setBooleanAttribute("textarea", "readonly"); 109 setBooleanAttribute("th", "nowrap"); 110 setBooleanAttribute("ul", "compact"); 111 } 112 113 private static void setBooleanAttribute(String element, String attribute) { 114 booleanAttributes.add(attribute); 115 booleanCombinations.add(element + '+' + attribute); 116 } 117 118 private static boolean isBooleanAttribute(String element, String attribute, String value) { 119 if (!attribute.equalsIgnoreCase(value)) return false; 120 if (!booleanAttributes.contains(attribute)) return false; 121 return booleanCombinations.contains(element + '+' + attribute); 122 } 123 124 127 128 130 173 176 177 public HTMLEmitter() { 178 179 } 180 181 184 185 public void open() throws XPathException {} 186 187 protected void openDocument() throws XPathException { 188 if (writer==null) { 189 makeWriter(); 190 } 191 if (started) return; 192 started = true; 193 196 201 String byteOrderMark = outputProperties.getProperty(SaxonOutputKeys.BYTE_ORDER_MARK); 202 203 if ("yes".equals(byteOrderMark) && 204 "UTF-8".equalsIgnoreCase(outputProperties.getProperty(OutputKeys.ENCODING))) { 205 try { 206 writer.write('\uFEFF'); 207 } catch (java.io.IOException err) { 208 } 210 } 211 212 215 String systemId = outputProperties.getProperty(OutputKeys.DOCTYPE_SYSTEM); 216 String publicId = outputProperties.getProperty(OutputKeys.DOCTYPE_PUBLIC); 217 218 if (systemId!=null || publicId!=null) { 219 writeDocType("html", systemId, publicId); 220 } 221 222 empty = false; 223 inScript = -1000000; 224 225 String representation = outputProperties.getProperty( 226 SaxonOutputKeys.CHARACTER_REPRESENTATION); 227 if (representation!=null) { 228 String nonASCIIrep; 229 String excludedRep; 230 int semi = representation.indexOf(';'); 231 if (semi < 0) { 232 nonASCIIrep = representation; 233 excludedRep = representation; 234 } else { 235 nonASCIIrep = representation.substring(0, semi).trim(); 236 excludedRep = representation.substring(semi+1).trim(); 237 } 238 nonASCIIRepresentation = representationCode(nonASCIIrep); 239 excludedRepresentation = representationCode(excludedRep); 240 if (excludedRepresentation==REP_NATIVE) { 241 excludedRepresentation = REP_ENTITY; 242 } 243 } 244 245 } 246 247 250 251 public void startElement(int nameCode, int typeCode, int locationId, int properties) throws XPathException { 252 253 super.startElement(nameCode, typeCode, locationId, properties); 254 uriCode = namePool.getURICode(nameCode); 255 elementName = (String )elementStack.peek(); 256 257 if (uriCode==0 && 258 ( elementName.equalsIgnoreCase("script") || 259 elementName.equalsIgnoreCase("style"))) { 260 inScript = 0; 261 } 262 inScript++; 263 } 264 265 public void startContent() throws XPathException { 266 closeStartTag(); } 268 269 274 275 protected void writeAttribute(int elCode, String attname, CharSequence value, int properties) throws XPathException { 276 try { 277 if (uriCode==0) { 278 if (isBooleanAttribute(elementName, attname, value.toString())) { 279 writer.write(attname); 280 return; 281 } 282 } 289 super.writeAttribute(elCode, attname, value, properties); 290 } catch (java.io.IOException err) { 291 throw new DynamicError(err); 292 } 293 } 294 295 296 299 300 protected void writeEscape(final CharSequence chars, final boolean inAttribute) 301 throws java.io.IOException , XPathException { 302 303 int segstart = 0; 304 final boolean[] specialChars = (inAttribute ? specialInAtt : specialInText); 305 boolean disabled = false; 306 307 while (segstart < chars.length()) { 308 int i = segstart; 309 310 312 while (i < chars.length() && 313 (chars.charAt(i)<127 ? 314 !specialChars[chars.charAt(i)] : 315 (characterSet.inCharset(chars.charAt(i)) ? 316 nonASCIIRepresentation == REP_NATIVE && chars.charAt(i)>160 : 317 false) 318 ) 319 ) { 320 i++; 321 } 322 323 325 if (i == chars.length()) { 326 if (segstart == 0) { 327 writeCharSequence(chars); 328 } else { 329 writeCharSequence(chars.subSequence(segstart, i)); 330 } 331 return; 332 } 333 334 if (i > segstart) { 336 writeCharSequence(chars.subSequence(segstart, i)); 337 } 338 339 final char c = chars.charAt(i); 340 341 if (c==0) { 342 disabled = !disabled; 344 } else if (disabled) { 345 writer.write(c); 346 } else if (c<=127) { 347 348 350 if (inAttribute) { 351 if (c=='<') { 352 writer.write('<'); } else if (c=='>') { 354 writer.write(">"); } else if (c=='&') { 356 if (i+1<chars.length() && chars.charAt(i+1)=='{') { 357 writer.write('&'); } else { 359 writer.write("&"); 360 } 361 } else if (c=='\"') { 362 writer.write("""); 363 } else if (c=='\n') { 364 writer.write("
"); 365 } 366 } else { 367 if (c=='<') { 368 writer.write("<"); 369 } else if (c=='>') { 370 writer.write(">"); } else if (c=='&') { 372 writer.write("&"); 373 } 374 } 375 376 } else if (c==160) { 377 writer.write(" "); 379 380 } else if (c>=127 && c<160) { 381 DynamicError err = new DynamicError("Illegal HTML character: decimal " + (int)c); 383 err.setErrorCode("SERE0014"); 384 throw err; 385 386 } else if (c>=55296 && c<=56319) { 388 394 int charval = (((int)c - 55296) * 1024) + ((int)chars.charAt(i+1) - 56320) + 65536; 396 outputCharacterReference(charval); 397 i++; 398 399 400 } else if (characterSet.inCharset(c)) { 401 switch(nonASCIIRepresentation) { 402 case REP_NATIVE: 403 writer.write(c); 404 break; 405 case REP_ENTITY: 406 if (c>160 && c<=255) { 407 408 410 writer.write('&'); 411 writer.write(latin1Entities[(int)c-160]); 412 writer.write(';'); 413 break; 414 } 415 case REP_DECIMAL: 417 preferHex = false; 418 outputCharacterReference(c); 419 break; 420 case REP_HEX: 421 preferHex = true; 422 default: 424 outputCharacterReference(c); 425 break; 426 } 427 428 } else { preferHex = (excludedRepresentation==REP_HEX); 430 outputCharacterReference((int)c); 431 } 432 433 segstart = ++i; 434 } 435 436 } 437 438 441 442 public void endElement() throws XPathException { 443 String name = (String )elementStack.peek(); 444 inScript--; 445 if (inScript==0) { 446 inScript = -1000000; 447 } 448 449 if (isEmptyTag(name) && uriCode==0) { 450 elementStack.pop(); 452 } else { 453 super.endElement(); 454 } 455 456 } 457 458 461 462 public void characters (CharSequence chars, int locationId, int properties) 463 throws XPathException { 464 int options = properties; 465 if (inScript>0) { 466 options |= ReceiverOptions.DISABLE_ESCAPING; 467 } 468 super.characters(chars, locationId, options); 469 } 470 471 474 475 public void processingInstruction (String target, CharSequence data, int locationId, int properties) 476 throws XPathException 477 { 478 if (empty) { 479 openDocument(); 480 } 481 for (int i=0; i<data.length(); i++) { 482 if (data.charAt(i) == '>') { 483 DynamicError err = new DynamicError("A processing instruction in HTML must not contain a > character"); 484 err.setErrorCode("SERE0015"); 485 throw err; 486 } 487 } 488 try { 489 writer.write("<?"); 490 writer.write(target); 491 writer.write(' '); 492 writeCharSequence(data); 493 writer.write('>'); 494 } catch (java.io.IOException err) { 495 throw new DynamicError(err); 496 } 497 } 498 499 public static CharSequence escapeURL(CharSequence url) { 500 FastStringBuffer sb = new FastStringBuffer(url.length() + 20); 501 final String hex = "0123456789ABCDEF"; 502 503 for (int i=0; i<url.length(); i++) { 504 char ch = url.charAt(i); 505 if (ch<32 || ch>126) { 506 byte[] array = new byte[4]; 507 int used = UnicodeCharacterSet.getUTF8Encoding(ch, 508 (i+1 < url.length() ? url.charAt(i+1): ' '), array); 509 for (int b=0; b<used; b++) { 510 int v = (array[b]>=0 ? array[b] : 256 + array[b]); 511 sb.append('%'); 512 sb.append(hex.charAt(v/16)); 513 sb.append(hex.charAt(v%16)); 514 } 515 516 } else { 517 sb.append(ch); 518 } 519 } 520 return sb; 521 } 522 523 524 private static final String [] latin1Entities = { 525 526 "nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar", "sect", "uml", "copy", "ordf", "laquo", "not", "shy", "reg", "macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro", "para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14", "frac12", "frac34", "iquest", "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml" }; 710 711 712 } 713 714 | Popular Tags |