1 47 48 package com.lowagie.text.html.simpleparser; 49 50 import java.io.File ; 51 import java.io.IOException ; 52 import java.io.Reader ; 53 import java.util.ArrayList ; 54 import java.util.HashMap ; 55 import java.util.Stack ; 56 import java.util.StringTokenizer ; 57 58 import com.lowagie.text.Chunk; 59 import com.lowagie.text.DocListener; 60 import com.lowagie.text.DocumentException; 61 import com.lowagie.text.Element; 62 import com.lowagie.text.ExceptionConverter; 63 import com.lowagie.text.FontFactoryImp; 64 import com.lowagie.text.HeaderFooter; 65 import com.lowagie.text.Image; 66 import com.lowagie.text.ListItem; 67 import com.lowagie.text.Paragraph; 68 import com.lowagie.text.Phrase; 69 import com.lowagie.text.Rectangle; 70 import com.lowagie.text.TextElementArray; 71 import com.lowagie.text.pdf.PdfPTable; 72 import com.lowagie.text.xml.simpleparser.SimpleXMLDocHandler; 73 import com.lowagie.text.xml.simpleparser.SimpleXMLParser; 74 75 public class HTMLWorker implements SimpleXMLDocHandler, DocListener { 76 77 protected ArrayList objectList; 78 protected DocListener document; 79 private Paragraph currentParagraph; 80 private ChainedProperties cprops = new ChainedProperties(); 81 private Stack stack = new Stack (); 82 private boolean pendingTR = false; 83 private boolean pendingTD = false; 84 private boolean pendingLI = false; 85 private StyleSheet style = new StyleSheet(); 86 private boolean isPRE = false; 87 private Stack tableState = new Stack (); 88 private boolean skipText = false; 89 private HashMap interfaceProps; 90 private FactoryProperties factoryProperties = new FactoryProperties(); 91 92 93 public HTMLWorker(DocListener document) { 94 this.document = document; 95 } 96 97 public void setStyleSheet(StyleSheet style) { 98 this.style = style; 99 } 100 101 public StyleSheet getStyleSheet() { 102 return style; 103 } 104 105 public void setInterfaceProps(HashMap interfaceProps) { 106 this.interfaceProps = interfaceProps; 107 FontFactoryImp ff = null; 108 if (interfaceProps != null) 109 ff = (FontFactoryImp)interfaceProps.get("font_factory"); 110 if (ff != null) 111 factoryProperties.setFontImp(ff); 112 } 113 114 public HashMap getInterfaceProps() { 115 return interfaceProps; 116 } 117 118 public void parse(Reader reader) throws IOException { 119 SimpleXMLParser.parse(this, null, reader, true); 120 } 121 122 public static ArrayList parseToList(Reader reader, StyleSheet style) throws IOException { 123 return parseToList(reader, style, null); 124 } 125 126 public static ArrayList parseToList(Reader reader, StyleSheet style, HashMap interfaceProps) throws IOException { 127 HTMLWorker worker = new HTMLWorker(null); 128 if (style != null) 129 worker.style = style; 130 worker.document = worker; 131 worker.setInterfaceProps(interfaceProps); 132 worker.objectList = new ArrayList (); 133 worker.parse(reader); 134 return worker.objectList; 135 } 136 137 public void endDocument() { 138 try { 139 for (int k = 0; k < stack.size(); ++k) 140 document.add((Element)stack.elementAt(k)); 141 if (currentParagraph != null) 142 document.add(currentParagraph); 143 currentParagraph = null; 144 } 145 catch (Exception e) { 146 throw new ExceptionConverter(e); 147 } 148 } 149 150 public void startDocument() { 151 HashMap h = new HashMap (); 152 style.applyStyle("body", h); 153 cprops.addToChain("body", h); 154 } 155 156 public void startElement(String tag, HashMap h) { 157 if (!tagsSupported.containsKey(tag)) 158 return; 159 try { 160 style.applyStyle(tag, h); 161 String follow = (String )FactoryProperties.followTags.get(tag); 162 if (follow != null) { 163 HashMap prop = new HashMap (); 164 prop.put(follow, null); 165 cprops.addToChain(follow, prop); 166 return; 167 } 168 FactoryProperties.insertStyle(h); 169 if (tag.equals("a")) { 170 cprops.addToChain(tag, h); 171 if (currentParagraph == null) 172 currentParagraph = new Paragraph(); 173 stack.push(currentParagraph); 174 currentParagraph = new Paragraph(); 175 return; 176 } 177 if (tag.equals("br")) { 178 if (currentParagraph == null) 179 currentParagraph = new Paragraph(); 180 currentParagraph.add(factoryProperties.createChunk("\n", cprops)); 181 return; 182 } 183 if (tag.equals("font") || tag.equals("span")) { 184 cprops.addToChain(tag, h); 185 return; 186 } 187 if (tag.equals("img")) { 188 String src = (String )h.get("src"); 189 if (src == null) 190 return; 191 cprops.addToChain(tag, h); 192 Image img = null; 193 if (interfaceProps != null) { 194 ImageProvider ip = (ImageProvider)interfaceProps.get("img_provider"); 195 if (ip != null) 196 img = ip.getImage(src, h, cprops, document); 197 if (img == null) { 198 HashMap images = (HashMap )interfaceProps.get("img_static"); 199 if (images != null) { 200 Image tim = (Image)images.get(src); 201 if (tim != null) 202 img = Image.getInstance(tim); 203 } else { 204 if (!src.startsWith("http")) { String baseurl = (String )interfaceProps.get("img_baseurl"); 206 if (baseurl != null) { 207 src = baseurl+src; 208 img = Image.getInstance(src); 209 } 210 } 211 } 212 } 213 } 214 if (img == null) { 215 if (!src.startsWith("http")) { 216 String path = cprops.getProperty("image_path"); 217 if (path == null) 218 path = ""; 219 src = new File (path, src).getPath(); 220 } 221 img = Image.getInstance(src); 222 } 223 String align = (String )h.get("align"); 224 String width = (String )h.get("width"); 225 String height = (String )h.get("height"); 226 String before = cprops.getProperty("before"); 227 String after = cprops.getProperty("after"); 228 if (before != null) 229 img.setSpacingBefore(Float.parseFloat(before)); 230 if (after != null) 231 img.setSpacingAfter(Float.parseFloat(after)); 232 float wp = lengthParse(width, (int)img.getWidth()); 233 float lp = lengthParse(height, (int)img.getHeight()); 234 if (wp > 0 && lp > 0) 235 img.scalePercent(wp > lp ? lp : wp); 236 else if (wp > 0) 237 img.scalePercent(wp); 238 else if (lp > 0) 239 img.scalePercent(lp); 240 img.setWidthPercentage(0); 241 if (align != null) { 242 endElement("p"); 243 int ralign = Image.MIDDLE; 244 if (align.equalsIgnoreCase("left")) 245 ralign = Image.LEFT; 246 else if (align.equalsIgnoreCase("right")) 247 ralign = Image.RIGHT; 248 img.setAlignment(ralign); 249 Img i = null; 250 boolean skip = false; 251 if (interfaceProps != null) { 252 i = (Img)interfaceProps.get("img_interface"); 253 if (i != null) 254 skip = i.process(img, h, cprops, document); 255 } 256 if (!skip) 257 document.add(img); 258 cprops.removeChain(tag); 259 } 260 else { 261 cprops.removeChain(tag); 262 if (currentParagraph == null) 263 currentParagraph = FactoryProperties.createParagraph(cprops); 264 currentParagraph.add(new Chunk(img, 0, 0)); 265 } 266 return; 267 } 268 endElement("p"); 269 if (tag.equals("h1") || tag.equals("h2") || tag.equals("h3") || tag.equals("h4") || tag.equals("h5") || tag.equals("h6")) { 270 if (!h.containsKey("size")) { 271 int v = 7 - Integer.parseInt(tag.substring(1)); 272 h.put("size", Integer.toString(v)); 273 } 274 cprops.addToChain(tag, h); 275 return; 276 } 277 if (tag.equals("ul")) { 278 if (pendingLI) 279 endElement("li"); 280 skipText = true; 281 cprops.addToChain(tag, h); 282 com.lowagie.text.List list = new com.lowagie.text.List(false, 10); 283 list.setListSymbol("\u2022"); 284 stack.push(list); 285 return; 286 } 287 if (tag.equals("ol")) { 288 if (pendingLI) 289 endElement("li"); 290 skipText = true; 291 cprops.addToChain(tag, h); 292 com.lowagie.text.List list = new com.lowagie.text.List(true, 10); 293 stack.push(list); 294 return; 295 } 296 if (tag.equals("li")) { 297 if (pendingLI) 298 endElement("li"); 299 skipText = false; 300 pendingLI = true; 301 cprops.addToChain(tag, h); 302 stack.push(FactoryProperties.createListItem(cprops)); 303 return; 304 } 305 if (tag.equals("div") || tag.equals("body")) { 306 cprops.addToChain(tag, h); 307 return; 308 } 309 if (tag.equals("pre")) { 310 if (!h.containsKey("face")) { 311 h.put("face", "Courier"); 312 } 313 cprops.addToChain(tag, h); 314 isPRE = true; 315 return; 316 } 317 if (tag.equals("p")) { 318 cprops.addToChain(tag, h); 319 currentParagraph = FactoryProperties.createParagraph(h); 320 return; 321 } 322 if (tag.equals("tr")) { 323 if (pendingTR) 324 endElement("tr"); 325 skipText = true; 326 pendingTR = true; 327 cprops.addToChain("tr", h); 328 return; 329 } 330 if (tag.equals("td") || tag.equals("th")) { 331 if (pendingTD) 332 endElement(tag); 333 skipText = false; 334 pendingTD = true; 335 cprops.addToChain("td", h); 336 stack.push(new IncCell(tag, cprops)); 337 return; 338 } 339 if (tag.equals("table")) { 340 cprops.addToChain("table", h); 341 IncTable table = new IncTable(h); 342 stack.push(table); 343 tableState.push(new boolean[]{pendingTR, pendingTD}); 344 pendingTR = pendingTD = false; 345 skipText = true; 346 return; 347 } 348 } 349 catch (Exception e) { 350 throw new ExceptionConverter(e); 351 } 352 } 353 354 public void endElement(String tag) { 355 if (!tagsSupported.containsKey(tag)) 356 return; 357 try { 358 String follow = (String )FactoryProperties.followTags.get(tag); 359 if (follow != null) { 360 cprops.removeChain(follow); 361 return; 362 } 363 if (tag.equals("font") || tag.equals("span")) { 364 cprops.removeChain(tag); 365 return; 366 } 367 if (tag.equals("a")) { 368 if (currentParagraph == null) 369 currentParagraph = new Paragraph(); 370 ALink i = null; 371 boolean skip = false; 372 if (interfaceProps != null) { 373 i = (ALink)interfaceProps.get("alink_interface"); 374 if (i != null) 375 skip = i.process(currentParagraph, cprops); 376 } 377 if (!skip) { 378 String href = cprops.getProperty("href"); 379 if (href != null) { 380 ArrayList chunks = currentParagraph.getChunks(); 381 for (int k = 0; k < chunks.size(); ++k) { 382 Chunk ck = (Chunk)chunks.get(k); 383 ck.setAnchor(href); 384 } 385 } 386 } 387 Paragraph tmp = (Paragraph)stack.pop(); 388 Phrase tmp2 = new Phrase(); 389 tmp2.add(currentParagraph); 390 tmp.add(tmp2); 391 currentParagraph = tmp; 392 cprops.removeChain("a"); 393 return; 394 } 395 if (tag.equals("br")) { 396 return; 397 } 398 if (currentParagraph != null) { 399 if (stack.empty()) 400 document.add(currentParagraph); 401 else { 402 Object obj = stack.pop(); 403 if (obj instanceof TextElementArray) { 404 TextElementArray current = (TextElementArray)obj; 405 current.add(currentParagraph); 406 } 407 stack.push(obj); 408 } 409 } 410 currentParagraph = null; 411 if (tag.equals("ul") || tag.equals("ol")) { 412 if (pendingLI) 413 endElement("li"); 414 skipText = false; 415 cprops.removeChain(tag); 416 if (stack.empty()) 417 return; 418 Object obj = stack.pop(); 419 if (!(obj instanceof com.lowagie.text.List)) { 420 stack.push(obj); 421 return; 422 } 423 if (stack.empty()) 424 document.add((Element)obj); 425 else 426 ((TextElementArray)stack.peek()).add(obj); 427 return; 428 } 429 if (tag.equals("li")) { 430 pendingLI = false; 431 skipText = true; 432 cprops.removeChain(tag); 433 if (stack.empty()) 434 return; 435 Object obj = stack.pop(); 436 if (!(obj instanceof ListItem)) { 437 stack.push(obj); 438 return; 439 } 440 if (stack.empty()) { 441 document.add((Element)obj); 442 return; 443 } 444 Object list = stack.pop(); 445 if (!(list instanceof com.lowagie.text.List)) { 446 stack.push(list); 447 return; 448 } 449 ListItem item = (ListItem)obj; 450 ((com.lowagie.text.List)list).add(item); 451 ArrayList cks = item.getChunks(); 452 if (!cks.isEmpty()) 453 item.getListSymbol().setFont(((Chunk)cks.get(0)).getFont()); 454 stack.push(list); 455 return; 456 } 457 if (tag.equals("div") || tag.equals("body")) { 458 cprops.removeChain(tag); 459 return; 460 } 461 if (tag.equals("pre")) { 462 cprops.removeChain(tag); 463 isPRE = false; 464 return; 465 } 466 if (tag.equals("p")) { 467 cprops.removeChain(tag); 468 return; 469 } 470 if (tag.equals("h1") || tag.equals("h2") || tag.equals("h3") || tag.equals("h4") || tag.equals("h5") || tag.equals("h6")) { 471 cprops.removeChain(tag); 472 return; 473 } 474 if (tag.equals("table")) { 475 if (pendingTR) 476 endElement("tr"); 477 cprops.removeChain("table"); 478 IncTable table = (IncTable) stack.pop(); 479 PdfPTable tb = table.buildTable(); 480 tb.setSplitRows(true); 481 if (stack.empty()) 482 document.add(tb); 483 else 484 ((TextElementArray)stack.peek()).add(tb); 485 boolean state[] = (boolean[])tableState.pop(); 486 pendingTR = state[0]; 487 pendingTD = state[1]; 488 skipText = false; 489 return; 490 } 491 if (tag.equals("tr")) { 492 if (pendingTD) 493 endElement("td"); 494 pendingTR = false; 495 cprops.removeChain("tr"); 496 ArrayList cells = new ArrayList (); 497 IncTable table = null; 498 while (true) { 499 Object obj = stack.pop(); 500 if (obj instanceof IncCell) { 501 cells.add(((IncCell)obj).getCell()); 502 } 503 if (obj instanceof IncTable) { 504 table = (IncTable)obj; 505 break; 506 } 507 } 508 table.addCols(cells); 509 table.endRow(); 510 stack.push(table); 511 skipText = true; 512 return; 513 } 514 if (tag.equals("td") || tag.equals("th")) { 515 pendingTD = false; 516 cprops.removeChain("td"); 517 skipText = true; 518 return; 519 } 520 } 521 catch (Exception e) { 522 throw new ExceptionConverter(e); 523 } 524 } 525 526 public void text(String str) { 527 if (skipText) 528 return; 529 String content = str; 530 if (isPRE) { 531 if (currentParagraph == null) 532 currentParagraph = new Paragraph(); 533 currentParagraph.add(factoryProperties.createChunk(content, cprops)); 534 return; 535 } 536 if (content.trim().length() == 0 && content.indexOf(' ') < 0) { 537 return; 538 } 539 540 StringBuffer buf = new StringBuffer (); 541 int len = content.length(); 542 char character; 543 boolean newline = false; 544 for (int i = 0; i < len; i++) { 545 switch(character = content.charAt(i)) { 546 case ' ': 547 if (!newline) { 548 buf.append(character); 549 } 550 break; 551 case '\n': 552 if (i > 0) { 553 newline = true; 554 buf.append(' '); 555 } 556 break; 557 case '\r': 558 break; 559 case '\t': 560 break; 561 default: 562 newline = false; 563 buf.append(character); 564 } 565 } 566 if (currentParagraph == null) 567 currentParagraph = FactoryProperties.createParagraph(cprops); 568 currentParagraph.add(factoryProperties.createChunk(buf.toString(), cprops)); 569 } 570 571 public boolean add(Element element) throws DocumentException { 572 objectList.add(element); 573 return true; 574 } 575 576 public void clearTextWrap() throws DocumentException { 577 } 578 579 public void close() { 580 } 581 582 public boolean newPage() { 583 return true; 584 } 585 586 public void open() { 587 } 588 589 public void resetFooter() { 590 } 591 592 public void resetHeader() { 593 } 594 595 public void resetPageCount() { 596 } 597 598 public void setFooter(HeaderFooter footer) { 599 } 600 601 public void setHeader(HeaderFooter header) { 602 } 603 604 public boolean setMarginMirroring(boolean marginMirroring) { 605 return true; 606 } 607 608 public boolean setMargins(float marginLeft, float marginRight, float marginTop, float marginBottom) { 609 return true; 610 } 611 612 public void setPageCount(int pageN) { 613 } 614 615 public boolean setPageSize(Rectangle pageSize) { 616 return true; 617 } 618 619 public static final String tagsSupportedString = "ol ul li a pre font span br p div body table td th tr i b u sub sup em strong" 620 + " h1 h2 h3 h4 h5 h6 img"; 621 622 public static final HashMap tagsSupported = new HashMap (); 623 624 static { 625 StringTokenizer tok = new StringTokenizer (tagsSupportedString); 626 while (tok.hasMoreTokens()) 627 tagsSupported.put(tok.nextToken(), null); 628 } 629 630 private static float lengthParse(String txt, int c) { 631 if (txt == null) 632 return -1; 633 if (txt.endsWith("%")) { 634 float vf = Float.parseFloat(txt.substring(0, txt.length() - 1)); 635 return vf; 636 } 637 int v = Integer.parseInt(txt); 638 return (float)v / c * 100f; 639 } 640 } 641 | Popular Tags |