1 16 package org.outerj.daisy.htmlcleaner; 17 18 import org.xml.sax.SAXException ; 19 import org.xml.sax.Locator ; 20 import org.xml.sax.Attributes ; 21 import org.xml.sax.ContentHandler ; 22 23 import java.io.OutputStream ; 24 import java.io.IOException ; 25 import java.io.Writer ; 26 import java.io.OutputStreamWriter ; 27 import java.util.*; 28 29 52 class StylingHtmlSerializer implements ContentHandler { 53 private Writer writer; 54 private LineRenderer line; 55 private StartElementInfo currentStartElement; 56 private boolean inPreElement = false; 57 private HtmlCleanerTemplate template; 58 private OutputElementDescriptor dummy = new OutputElementDescriptor(0, 0, 0, 0, true); 59 60 public StylingHtmlSerializer(HtmlCleanerTemplate template) { 61 this.template = template; 62 } 63 64 public void setOutputStream(OutputStream outputStream) throws IOException { 65 this.writer = new OutputStreamWriter (outputStream, "UTF-8"); 68 line = new LineRenderer(); 69 currentStartElement = null; 70 } 71 72 public void startDocument() throws SAXException { 73 } 74 75 public void endDocument() throws SAXException { 76 try { 77 line.flushLine(false); 78 writer.flush(); 79 } catch (IOException e) { 80 throw new SAXException (e); 81 } 82 } 83 84 public void characters(char ch[], int start, int length) throws SAXException { 85 writePendingStartElement(false); 86 line.writeText(escapeReservedCharacters(new String (ch, start, length))); 87 } 88 89 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { 90 writePendingStartElement(false); 91 currentStartElement = new StartElementInfo(localName, atts); 92 if (localName.equals("pre")) { 93 line.flushLine(false); 94 OutputElementDescriptor descriptor = getElementDescriptor("pre"); 95 line.newLines(descriptor.getNewLinesBeforeOpenTag()); 96 inPreElement = true; 97 } 98 } 99 100 public void writePendingStartElement(boolean empty) throws SAXException { 101 if (currentStartElement != null) { 102 String localName = currentStartElement.getLocalName(); 103 Attributes atts = currentStartElement.getAttrs(); 104 105 StringBuffer tag = new StringBuffer (localName.length() + 2 + (atts.getLength() * 50)); 106 tag.append('<').append(localName); 107 108 if (atts.getLength() > 0) { 109 for (int i = 0; i < atts.getLength(); i++) { 110 tag.append(' '); 111 tag.append(atts.getLocalName(i)); 112 tag.append("=\""); 113 tag.append(escapeAttribute(atts.getValue(i))); 114 tag.append('"'); 115 } 116 } 117 118 if (empty) { 119 tag.append("/>"); 120 } else { 121 tag.append('>'); 122 } 123 124 OutputElementDescriptor descriptor = getElementDescriptor(localName); 125 if (!inPreElement) 126 line.newLines(descriptor.getNewLinesBeforeOpenTag()); 127 line.writeStartTag(tag.toString(), descriptor); 128 if (!inPreElement) { 129 if (empty) 130 line.newLines(descriptor.getNewLinesAfterCloseTag()); 131 else 132 line.newLines(descriptor.getNewLinesAfterOpenTag()); 133 } 134 135 if (localName.equals("pre")) 136 line.flushLine(false); 137 138 currentStartElement = null; 139 } 140 } 141 142 public void endElement(String namespaceURI, String localName, String qName) throws SAXException { 143 if (localName.equals("pre")) 144 inPreElement = false; 145 146 if (currentStartElement != null) { 147 writePendingStartElement(true); 148 } else { 149 String tag = "</" + localName + ">"; 150 OutputElementDescriptor descriptor = getElementDescriptor(localName); 151 if (!inPreElement) 152 line.newLines(descriptor.getNewLinesBeforeCloseTag()); 153 line.writeEndTag(tag, descriptor); 154 if (!inPreElement) 155 line.newLines(descriptor.getNewLinesAfterCloseTag()); 156 } 157 } 158 159 public void ignorableWhitespace(char ch[], int start, int length) throws SAXException { 160 } 161 162 public void endPrefixMapping(String prefix) throws SAXException { 163 } 164 165 public void skippedEntity(String name) throws SAXException { 166 } 167 168 public void setDocumentLocator(Locator locator) { 169 } 170 171 public void processingInstruction(String target, String data) throws SAXException { 172 } 173 174 public void startPrefixMapping(String prefix, String uri) throws SAXException { 175 } 176 177 public void endCDATA() throws SAXException { 178 } 179 180 public void endDTD() throws SAXException { 181 } 182 183 public void startCDATA() throws SAXException { 184 } 185 186 public void comment(char ch[], int start, int length) throws SAXException { 187 } 188 189 public void endEntity(String name) throws SAXException { 190 } 191 192 public void startEntity(String name) throws SAXException { 193 } 194 195 public void startDTD(String name, String publicId, String systemId) throws SAXException { 196 } 197 198 private OutputElementDescriptor getElementDescriptor(String localName) { 199 OutputElementDescriptor descriptor = (OutputElementDescriptor)template.outputElementDescriptors.get(localName); 200 if (descriptor != null) 201 return descriptor; 202 return dummy; 203 } 204 205 208 private String escapeAttribute(String value) { 209 StringBuffer newValue = new StringBuffer (value.length() + 10); 210 for (int i = 0; i < value.length(); i++) { 211 char c = value.charAt(i); 212 switch (c) { 213 case '"': 214 newValue.append("""); 215 break; 216 case '<': newValue.append("<"); 218 break; 219 case '>': 220 newValue.append(">"); 221 break; 222 case '&': 223 newValue.append("&"); 224 break; 225 default: 226 newValue.append(c); 227 } 228 } 229 return newValue.toString(); 230 } 231 232 private String escapeReservedCharacters(String text) { 233 StringBuffer newText = new StringBuffer (text.length() + 10); 234 for (int i = 0; i < text.length(); i++) { 235 char c = text.charAt(i); 236 switch (c) { 237 case '<': 238 newText.append("<"); 239 break; 240 case '>': newText.append(">"); 242 break; 243 case '&': 244 newText.append("&"); 245 break; 246 default: 247 if (c == 0x9 || c == 0xA || c == 0xD || (c >= 0x20 && c <= 0xD7FF) || (c >= 0xE000 && c <= 0xFFFD) 250 || (c >= 0x10000 && c <= 0x10FFFF)) 251 newText.append(c); 252 } 253 } 254 return newText.toString(); 255 } 256 257 262 private class LineRenderer { 263 private Line line = new Line(); 264 265 268 public void newLines(int count) throws SAXException { 269 try { 270 if (count == 0) 271 return; 272 273 if (line.getLength() > 0) 274 flushLine(false); 275 276 if (count == 1) 277 writer.write('\n'); 278 else 279 for (int i = 0; i < count; i++) 280 writer.write('\n'); 281 } catch (IOException e) { 282 throw new SAXException (e); 283 } 284 } 285 286 public void writeText(String text) throws SAXException { 287 try { 288 if (inPreElement) { 289 writer.write(text); 290 } else { 291 List words = getWords(text); 292 293 if (startsWithWhitespace(text)) 294 line.addSpace(); 295 296 if (words.size() > 0) { 297 298 Iterator wordsIt = words.iterator(); 299 boolean firstWord = true; 300 301 while (wordsIt.hasNext()) { 302 String word = (String )wordsIt.next(); 303 if (line.getLength() > 0 && line.getLength() + word.length() + 1 > template.maxLineWidth) { 304 if (!line.endsOnWordOrSpace()) 305 writeText(line.emptyIfPossibleBeforeWord(), true); 306 else 307 writeText(line.empty(), true); 308 } 309 310 if (!firstWord) 311 line.addSpace(); 312 line.addWord(word); 313 314 firstWord = false; 315 } 316 317 if (endsWithWhitespace(text)) 318 line.addSpace(); 319 } 320 } 321 } catch (IOException e) { 322 throw new SAXException (e); 323 } 324 } 325 326 private void writeText(String text, boolean newLine) throws IOException { 327 if (text != null) { 328 writer.write(text); 329 if (newLine) 330 writer.write('\n'); 331 } 332 } 333 334 private void flushLine(boolean newLine) throws SAXException { 335 try { 336 writeText(line.empty(), newLine); 337 } catch (IOException e) { 338 throw new SAXException (e); 339 } 340 } 341 342 private boolean startsWithWhitespace(String text) { 343 if (text.length() == 0) 344 return false; 345 return Character.isWhitespace(text.charAt(0)); 346 } 347 348 private boolean endsWithWhitespace(String text) { 349 if (text.length() == 0) 350 return false; 351 return Character.isWhitespace(text.charAt(text.length() - 1)); 352 } 353 354 public void writeStartTag(String text, OutputElementDescriptor descriptor) throws SAXException { 355 try { 356 if (inPreElement) { 357 writer.write(text); 358 } else { 359 if (line.getLength() > 0 && line.getLength() + 1 + text.length() > template.maxLineWidth) { 361 String toWrite = null; 362 if (descriptor.isInline()) 363 toWrite = line.emptyIfPossibleBeforeInlineTag(); 364 else 365 toWrite = line.empty(); 366 writeText(toWrite, true); 367 } 368 369 line.addStartTag(text, descriptor); 370 } 371 } catch (IOException e) { 372 throw new SAXException (e); 373 } 374 } 375 376 public void writeEndTag(String text, OutputElementDescriptor descriptor) throws SAXException { 377 try { 378 if (inPreElement) { 379 writer.write(text); 380 } else { 381 if (line.getLength() > 0 && line.getLength() + text.length() > template.maxLineWidth) { 383 String toWrite = null; 384 if (descriptor.isInline()) 385 toWrite = line.emptyIfPossibleBeforeInlineTag(); 386 else 387 toWrite = line.empty(); 388 writeText(toWrite, true); 389 } 390 391 line.addEndTag(text, descriptor); 392 } 393 } catch (IOException e) { 394 throw new SAXException (e); 395 } 396 } 397 398 private List getWords(String text) { 399 ArrayList words = new ArrayList(); 400 int beginWord = -1; 401 for (int i = 0; i < text.length(); i++) { 402 if (Character.isWhitespace(text.charAt(i))) { 403 if (beginWord != -1) { 404 String newWord = text.substring(beginWord, i); 405 words.add(newWord); 406 beginWord = -1; 407 } 408 } else if (beginWord == -1) { 409 beginWord = i; 410 } 411 } 412 413 if (beginWord != -1) { 414 String newWord = text.substring(beginWord); 415 words.add(newWord); 416 } 417 418 return words; 419 } 420 } 421 422 429 private class Line { 430 private List lineItems = new ArrayList(); 431 private int length = 0; 432 433 public void addStartTag(String text, OutputElementDescriptor descriptor) { 434 lineItems.add(new StartTag(text, descriptor)); 435 length += text.length(); 436 } 437 438 public void addEndTag(String text, OutputElementDescriptor descriptor) { 439 if (!descriptor.isInline() && lineItems.size() > 0 && getLastLineItem() instanceof Space) { 440 lineItems.remove(lineItems.size() - 1); 441 length--; } 443 lineItems.add(new EndTag(text, descriptor)); 444 length += text.length(); 445 } 446 447 public void addWord(String text) { 448 lineItems.add(new Word(text)); 449 length += text.length(); 450 } 451 452 public void addSpace() { 453 boolean addSpace = true; 454 455 if (lineItems.size() > 0) { 456 LineItem lastItem = getLastLineItem(); 457 if (lastItem instanceof Space) 458 addSpace = false; 459 else if (lastItem instanceof Tag && !((Tag)lastItem).descriptor.isInline()) 460 addSpace = false; 461 } else if (lineItems.size() == 0) { 462 addSpace = false; 463 } 464 465 if (addSpace) { 466 lineItems.add(new Space()); 467 length += 1; 468 } 469 } 470 471 public boolean endsOnWordOrSpace() { 472 if (lineItems.size() > 0) { 473 LineItem lineItem = getLastLineItem(); 474 return lineItem instanceof Word || lineItem instanceof Space; 475 } else { 476 return false; 477 } 478 } 479 480 public int getLength() { 481 return length; 482 } 483 484 public String empty() { 485 return empty(lineItems.size() - 1); 486 } 487 488 492 public String empty(int until) { 493 StringBuffer text = new StringBuffer (); 494 int lastPos = until; 495 for (int i = 0; i <= until; i++) { 496 LineItem lineItem = (LineItem)lineItems.get(i); 497 if (i == lastPos && lineItem instanceof Space) 498 continue; 499 text.append(lineItem.text); 500 } 501 lineItems = new ArrayList(lineItems.subList(until + 1, lineItems.size())); 502 recalcLength(); 503 return text.toString(); 504 } 505 506 private void recalcLength() { 507 int newLength = 0; 508 for (int i = 0; i < lineItems.size(); i++) { 509 newLength += ((LineItem)lineItems.get(i)).text.length(); 510 } 511 this.length = newLength; 512 } 513 514 521 public String emptyIfPossibleBeforeWord() { 522 LineItem lineItem = getLastLineItem(); 523 if (lineItem instanceof Tag && ((Tag)lineItem).descriptor.isInline()) { 524 int splitPoint = searchSplitPoint(); 525 if (splitPoint == -1) 526 return null; 527 else 528 return empty(splitPoint); 529 } else { 530 return empty(); 531 } 532 } 533 534 public String emptyIfPossibleBeforeInlineTag() { 535 LineItem lineItem = getLastLineItem(); 536 if ((lineItem instanceof Tag && ((Tag)lineItem).descriptor.isInline()) || lineItem instanceof Word) { 537 int splitPoint = searchSplitPoint(); 538 if (splitPoint == -1) 539 return null; 540 else 541 return empty(splitPoint); 542 } else { 543 return empty(); 544 } 545 } 546 547 554 private int searchSplitPoint() { 555 if (lineItems.size() < 2) { 556 return -1; 558 } 559 560 LineItem previousLineItem = getLastLineItem(); 561 for (int i = lineItems.size() - 2; i >= 0; i--) { 562 LineItem currentLineItem = (LineItem)lineItems.get(i); 563 if (currentLineItem instanceof Word && previousLineItem instanceof Word) { 564 return i; 566 } else if (currentLineItem instanceof Space) { 567 return i; 568 } else if (currentLineItem instanceof Tag && !((Tag)currentLineItem).descriptor.isInline()) { 569 return i; 570 } 571 previousLineItem = currentLineItem; 572 } 573 return -1; 574 } 575 576 private LineItem getLastLineItem() { 577 return (LineItem)lineItems.get(lineItems.size() - 1); 578 } 579 580 abstract class LineItem { 581 final String text; 582 583 public LineItem(String text) { 584 this.text = text; 585 } 586 } 587 588 abstract class Tag extends LineItem { 589 final OutputElementDescriptor descriptor; 590 591 public Tag(String text, OutputElementDescriptor descriptor) { 592 super(text); 593 this.descriptor = descriptor; 594 } 595 } 596 597 class StartTag extends Tag { 598 public StartTag(String text, OutputElementDescriptor descriptor) { 599 super(text, descriptor); 600 } 601 } 602 603 class EndTag extends Tag { 604 public EndTag(String text, OutputElementDescriptor descriptor) { 605 super(text, descriptor); 606 } 607 } 608 609 class Word extends LineItem { 610 611 public Word(String text) { 612 super(text); 613 } 614 } 615 616 class Space extends LineItem { 617 public Space() { 618 super(" "); 619 } 620 } 621 } 622 623 private static class StartElementInfo { 624 private final String localName; 625 private final Attributes attrs; 626 627 public StartElementInfo(String localName, Attributes attrs) { 628 this.localName = localName; 629 this.attrs = attrs; 630 } 631 632 public String getLocalName() { 633 return localName; 634 } 635 636 public Attributes getAttrs() { 637 return attrs; 638 } 639 } 640 } 641 | Popular Tags |