KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > lowagie > text > html > simpleparser > HTMLWorker


1 /*
2  * Copyright 2004 Paulo Soares
3  *
4  * The contents of this file are subject to the Mozilla Public License Version 1.1
5  * (the "License"); you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
7  *
8  * Software distributed under the License is distributed on an "AS IS" basis,
9  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10  * for the specific language governing rights and limitations under the License.
11  *
12  * The Original Code is 'iText, a free JAVA-PDF library'.
13  *
14  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
15  * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
16  * All Rights Reserved.
17  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
18  * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
19  *
20  * Contributor(s): all the names of the contributors are added in the source code
21  * where applicable.
22  *
23  * Alternatively, the contents of this file may be used under the terms of the
24  * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
25  * provisions of LGPL are applicable instead of those above. If you wish to
26  * allow use of your version of this file only under the terms of the LGPL
27  * License and not to allow others to use your version of this file under
28  * the MPL, indicate your decision by deleting the provisions above and
29  * replace them with the notice and other provisions required by the LGPL.
30  * If you do not delete the provisions above, a recipient may use your version
31  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
32  *
33  * This library is free software; you can redistribute it and/or modify it
34  * under the terms of the MPL as stated above or under the terms of the GNU
35  * Library General Public License as published by the Free Software Foundation;
36  * either version 2 of the License, or any later version.
37  *
38  * This library is distributed in the hope that it will be useful, but WITHOUT
39  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
40  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
41  * details.
42  *
43  * If you didn't download this code from the following link, you should check if
44  * you aren't using an obsolete version:
45  * http://www.lowagie.com/iText/
46  */

47
48 package com.lowagie.text.html.simpleparser;
49
50 import java.io.File JavaDoc;
51 import java.io.IOException JavaDoc;
52 import java.io.Reader JavaDoc;
53 import java.util.ArrayList JavaDoc;
54 import java.util.HashMap JavaDoc;
55 import java.util.Stack JavaDoc;
56 import java.util.StringTokenizer JavaDoc;
57
58 import com.lowagie.text.Chunk;
59 import com.lowagie.text.DocListener;
60 import com.lowagie.text.DocumentException;
61 import com.lowagie.text.Element;
62 import com.lowagie.text.ExceptionConverter;
63 import com.lowagie.text.FontFactoryImp;
64 import com.lowagie.text.HeaderFooter;
65 import com.lowagie.text.Image;
66 import com.lowagie.text.ListItem;
67 import com.lowagie.text.Paragraph;
68 import com.lowagie.text.Phrase;
69 import com.lowagie.text.Rectangle;
70 import com.lowagie.text.TextElementArray;
71 import com.lowagie.text.pdf.PdfPTable;
72 import com.lowagie.text.xml.simpleparser.SimpleXMLDocHandler;
73 import com.lowagie.text.xml.simpleparser.SimpleXMLParser;
74
75 public class HTMLWorker implements SimpleXMLDocHandler, DocListener {
76     
77     protected ArrayList JavaDoc objectList;
78     protected DocListener document;
79     private Paragraph currentParagraph;
80     private ChainedProperties cprops = new ChainedProperties();
81     private Stack JavaDoc stack = new Stack JavaDoc();
82     private boolean pendingTR = false;
83     private boolean pendingTD = false;
84     private boolean pendingLI = false;
85     private StyleSheet style = new StyleSheet();
86     private boolean isPRE = false;
87     private Stack JavaDoc tableState = new Stack JavaDoc();
88     private boolean skipText = false;
89     private HashMap JavaDoc interfaceProps;
90     private FactoryProperties factoryProperties = new FactoryProperties();
91     
92     /** Creates a new instance of HTMLWorker */
93     public HTMLWorker(DocListener document) {
94         this.document = document;
95     }
96     
97     public void setStyleSheet(StyleSheet style) {
98         this.style = style;
99     }
100     
101     public StyleSheet getStyleSheet() {
102         return style;
103     }
104     
105     public void setInterfaceProps(HashMap JavaDoc interfaceProps) {
106         this.interfaceProps = interfaceProps;
107         FontFactoryImp ff = null;
108         if (interfaceProps != null)
109             ff = (FontFactoryImp)interfaceProps.get("font_factory");
110         if (ff != null)
111             factoryProperties.setFontImp(ff);
112     }
113     
114     public HashMap JavaDoc getInterfaceProps() {
115         return interfaceProps;
116     }
117     
118     public void parse(Reader JavaDoc reader) throws IOException JavaDoc {
119         SimpleXMLParser.parse(this, null, reader, true);
120     }
121     
122     public static ArrayList JavaDoc parseToList(Reader JavaDoc reader, StyleSheet style) throws IOException JavaDoc {
123         return parseToList(reader, style, null);
124     }
125     
126     public static ArrayList JavaDoc parseToList(Reader JavaDoc reader, StyleSheet style, HashMap JavaDoc interfaceProps) throws IOException JavaDoc {
127         HTMLWorker worker = new HTMLWorker(null);
128         if (style != null)
129             worker.style = style;
130         worker.document = worker;
131         worker.setInterfaceProps(interfaceProps);
132         worker.objectList = new ArrayList JavaDoc();
133         worker.parse(reader);
134         return worker.objectList;
135     }
136     
137     public void endDocument() {
138         try {
139             for (int k = 0; k < stack.size(); ++k)
140                 document.add((Element)stack.elementAt(k));
141             if (currentParagraph != null)
142                 document.add(currentParagraph);
143             currentParagraph = null;
144         }
145         catch (Exception JavaDoc e) {
146             throw new ExceptionConverter(e);
147         }
148     }
149     
150     public void startDocument() {
151         HashMap JavaDoc h = new HashMap JavaDoc();
152         style.applyStyle("body", h);
153         cprops.addToChain("body", h);
154     }
155     
156     public void startElement(String JavaDoc tag, HashMap JavaDoc h) {
157         if (!tagsSupported.containsKey(tag))
158             return;
159         try {
160             style.applyStyle(tag, h);
161             String JavaDoc follow = (String JavaDoc)FactoryProperties.followTags.get(tag);
162             if (follow != null) {
163                 HashMap JavaDoc prop = new HashMap JavaDoc();
164                 prop.put(follow, null);
165                 cprops.addToChain(follow, prop);
166                 return;
167             }
168             FactoryProperties.insertStyle(h);
169             if (tag.equals("a")) {
170                 cprops.addToChain(tag, h);
171                 if (currentParagraph == null)
172                     currentParagraph = new Paragraph();
173                 stack.push(currentParagraph);
174                 currentParagraph = new Paragraph();
175                 return;
176             }
177             if (tag.equals("br")) {
178                 if (currentParagraph == null)
179                     currentParagraph = new Paragraph();
180                 currentParagraph.add(factoryProperties.createChunk("\n", cprops));
181                 return;
182             }
183             if (tag.equals("font") || tag.equals("span")) {
184                 cprops.addToChain(tag, h);
185                 return;
186             }
187             if (tag.equals("img")) {
188                 String JavaDoc src = (String JavaDoc)h.get("src");
189                 if (src == null)
190                     return;
191                 cprops.addToChain(tag, h);
192                 Image img = null;
193                 if (interfaceProps != null) {
194                     ImageProvider ip = (ImageProvider)interfaceProps.get("img_provider");
195                     if (ip != null)
196                         img = ip.getImage(src, h, cprops, document);
197                     if (img == null) {
198                         HashMap JavaDoc images = (HashMap JavaDoc)interfaceProps.get("img_static");
199                         if (images != null) {
200                             Image tim = (Image)images.get(src);
201                             if (tim != null)
202                                 img = Image.getInstance(tim);
203                         } else {
204                             if (!src.startsWith("http")) { // relative src references only
205
String JavaDoc baseurl = (String JavaDoc)interfaceProps.get("img_baseurl");
206                                 if (baseurl != null) {
207                                     src = baseurl+src;
208                                     img = Image.getInstance(src);
209                                 }
210                             }
211                         }
212                     }
213                 }
214                 if (img == null) {
215                     if (!src.startsWith("http")) {
216                         String JavaDoc path = cprops.getProperty("image_path");
217                         if (path == null)
218                             path = "";
219                         src = new File JavaDoc(path, src).getPath();
220                     }
221                     img = Image.getInstance(src);
222                 }
223                 String JavaDoc align = (String JavaDoc)h.get("align");
224                 String JavaDoc width = (String JavaDoc)h.get("width");
225                 String JavaDoc height = (String JavaDoc)h.get("height");
226                 String JavaDoc before = cprops.getProperty("before");
227                 String JavaDoc after = cprops.getProperty("after");
228                 if (before != null)
229                     img.setSpacingBefore(Float.parseFloat(before));
230                 if (after != null)
231                     img.setSpacingAfter(Float.parseFloat(after));
232                 float wp = lengthParse(width, (int)img.getWidth());
233                 float lp = lengthParse(height, (int)img.getHeight());
234                 if (wp > 0 && lp > 0)
235                     img.scalePercent(wp > lp ? lp : wp);
236                 else if (wp > 0)
237                     img.scalePercent(wp);
238                 else if (lp > 0)
239                     img.scalePercent(lp);
240                 img.setWidthPercentage(0);
241                 if (align != null) {
242                     endElement("p");
243                     int ralign = Image.MIDDLE;
244                     if (align.equalsIgnoreCase("left"))
245                         ralign = Image.LEFT;
246                     else if (align.equalsIgnoreCase("right"))
247                         ralign = Image.RIGHT;
248                     img.setAlignment(ralign);
249                     Img i = null;
250                     boolean skip = false;
251                     if (interfaceProps != null) {
252                         i = (Img)interfaceProps.get("img_interface");
253                         if (i != null)
254                             skip = i.process(img, h, cprops, document);
255                     }
256                     if (!skip)
257                         document.add(img);
258                     cprops.removeChain(tag);
259                 }
260                 else {
261                     cprops.removeChain(tag);
262                     if (currentParagraph == null)
263                         currentParagraph = FactoryProperties.createParagraph(cprops);
264                     currentParagraph.add(new Chunk(img, 0, 0));
265                 }
266                 return;
267             }
268             endElement("p");
269             if (tag.equals("h1") || tag.equals("h2") || tag.equals("h3") || tag.equals("h4") || tag.equals("h5") || tag.equals("h6")) {
270                 if (!h.containsKey("size")) {
271                     int v = 7 - Integer.parseInt(tag.substring(1));
272                     h.put("size", Integer.toString(v));
273                 }
274                 cprops.addToChain(tag, h);
275                 return;
276             }
277             if (tag.equals("ul")) {
278                 if (pendingLI)
279                     endElement("li");
280                 skipText = true;
281                 cprops.addToChain(tag, h);
282                 com.lowagie.text.List list = new com.lowagie.text.List(false, 10);
283                 list.setListSymbol("\u2022");
284                 stack.push(list);
285                 return;
286             }
287             if (tag.equals("ol")) {
288                 if (pendingLI)
289                     endElement("li");
290                 skipText = true;
291                 cprops.addToChain(tag, h);
292                 com.lowagie.text.List list = new com.lowagie.text.List(true, 10);
293                 stack.push(list);
294                 return;
295             }
296             if (tag.equals("li")) {
297                 if (pendingLI)
298                     endElement("li");
299                 skipText = false;
300                 pendingLI = true;
301                 cprops.addToChain(tag, h);
302                 stack.push(FactoryProperties.createListItem(cprops));
303                 return;
304             }
305             if (tag.equals("div") || tag.equals("body")) {
306                 cprops.addToChain(tag, h);
307                 return;
308             }
309             if (tag.equals("pre")) {
310                 if (!h.containsKey("face")) {
311                     h.put("face", "Courier");
312                 }
313                 cprops.addToChain(tag, h);
314                 isPRE = true;
315                 return;
316             }
317             if (tag.equals("p")) {
318                 cprops.addToChain(tag, h);
319                 currentParagraph = FactoryProperties.createParagraph(h);
320                 return;
321             }
322             if (tag.equals("tr")) {
323                 if (pendingTR)
324                     endElement("tr");
325                 skipText = true;
326                 pendingTR = true;
327                 cprops.addToChain("tr", h);
328                 return;
329             }
330             if (tag.equals("td") || tag.equals("th")) {
331                 if (pendingTD)
332                     endElement(tag);
333                 skipText = false;
334                 pendingTD = true;
335                 cprops.addToChain("td", h);
336                 stack.push(new IncCell(tag, cprops));
337                 return;
338             }
339             if (tag.equals("table")) {
340                 cprops.addToChain("table", h);
341                 IncTable table = new IncTable(h);
342                 stack.push(table);
343                 tableState.push(new boolean[]{pendingTR, pendingTD});
344                 pendingTR = pendingTD = false;
345                 skipText = true;
346                 return;
347             }
348         }
349         catch (Exception JavaDoc e) {
350             throw new ExceptionConverter(e);
351         }
352     }
353     
354     public void endElement(String JavaDoc tag) {
355         if (!tagsSupported.containsKey(tag))
356             return;
357         try {
358             String JavaDoc follow = (String JavaDoc)FactoryProperties.followTags.get(tag);
359             if (follow != null) {
360                 cprops.removeChain(follow);
361                 return;
362             }
363             if (tag.equals("font") || tag.equals("span")) {
364                 cprops.removeChain(tag);
365                 return;
366             }
367             if (tag.equals("a")) {
368                 if (currentParagraph == null)
369                     currentParagraph = new Paragraph();
370                 ALink i = null;
371                 boolean skip = false;
372                 if (interfaceProps != null) {
373                     i = (ALink)interfaceProps.get("alink_interface");
374                     if (i != null)
375                         skip = i.process(currentParagraph, cprops);
376                 }
377                 if (!skip) {
378                     String JavaDoc href = cprops.getProperty("href");
379                     if (href != null) {
380                         ArrayList JavaDoc chunks = currentParagraph.getChunks();
381                         for (int k = 0; k < chunks.size(); ++k) {
382                             Chunk ck = (Chunk)chunks.get(k);
383                             ck.setAnchor(href);
384                         }
385                     }
386                 }
387                 Paragraph tmp = (Paragraph)stack.pop();
388                 Phrase tmp2 = new Phrase();
389                 tmp2.add(currentParagraph);
390                 tmp.add(tmp2);
391                 currentParagraph = tmp;
392                 cprops.removeChain("a");
393                 return;
394             }
395             if (tag.equals("br")) {
396                 return;
397             }
398             if (currentParagraph != null) {
399                 if (stack.empty())
400                     document.add(currentParagraph);
401                 else {
402                     Object JavaDoc obj = stack.pop();
403                     if (obj instanceof TextElementArray) {
404                         TextElementArray current = (TextElementArray)obj;
405                         current.add(currentParagraph);
406                     }
407                     stack.push(obj);
408                 }
409             }
410             currentParagraph = null;
411             if (tag.equals("ul") || tag.equals("ol")) {
412                 if (pendingLI)
413                     endElement("li");
414                 skipText = false;
415                 cprops.removeChain(tag);
416                 if (stack.empty())
417                     return;
418                 Object JavaDoc obj = stack.pop();
419                 if (!(obj instanceof com.lowagie.text.List)) {
420                     stack.push(obj);
421                     return;
422                 }
423                 if (stack.empty())
424                     document.add((Element)obj);
425                 else
426                     ((TextElementArray)stack.peek()).add(obj);
427                 return;
428             }
429             if (tag.equals("li")) {
430                 pendingLI = false;
431                 skipText = true;
432                 cprops.removeChain(tag);
433                 if (stack.empty())
434                     return;
435                 Object JavaDoc obj = stack.pop();
436                 if (!(obj instanceof ListItem)) {
437                     stack.push(obj);
438                     return;
439                 }
440                 if (stack.empty()) {
441                     document.add((Element)obj);
442                     return;
443                 }
444                 Object JavaDoc list = stack.pop();
445                 if (!(list instanceof com.lowagie.text.List)) {
446                     stack.push(list);
447                     return;
448                 }
449                 ListItem item = (ListItem)obj;
450                 ((com.lowagie.text.List)list).add(item);
451                 ArrayList JavaDoc cks = item.getChunks();
452                 if (!cks.isEmpty())
453                     item.getListSymbol().setFont(((Chunk)cks.get(0)).getFont());
454                 stack.push(list);
455                 return;
456             }
457             if (tag.equals("div") || tag.equals("body")) {
458                 cprops.removeChain(tag);
459                 return;
460             }
461             if (tag.equals("pre")) {
462                 cprops.removeChain(tag);
463                 isPRE = false;
464                 return;
465             }
466             if (tag.equals("p")) {
467                 cprops.removeChain(tag);
468                 return;
469             }
470             if (tag.equals("h1") || tag.equals("h2") || tag.equals("h3") || tag.equals("h4") || tag.equals("h5") || tag.equals("h6")) {
471                 cprops.removeChain(tag);
472                 return;
473             }
474             if (tag.equals("table")) {
475                 if (pendingTR)
476                     endElement("tr");
477                 cprops.removeChain("table");
478                 IncTable table = (IncTable) stack.pop();
479                 PdfPTable tb = table.buildTable();
480                 tb.setSplitRows(true);
481                 if (stack.empty())
482                     document.add(tb);
483                 else
484                     ((TextElementArray)stack.peek()).add(tb);
485                 boolean state[] = (boolean[])tableState.pop();
486                 pendingTR = state[0];
487                 pendingTD = state[1];
488                 skipText = false;
489                 return;
490             }
491             if (tag.equals("tr")) {
492                 if (pendingTD)
493                     endElement("td");
494                 pendingTR = false;
495                 cprops.removeChain("tr");
496                 ArrayList JavaDoc cells = new ArrayList JavaDoc();
497                 IncTable table = null;
498                 while (true) {
499                     Object JavaDoc obj = stack.pop();
500                     if (obj instanceof IncCell) {
501                         cells.add(((IncCell)obj).getCell());
502                     }
503                     if (obj instanceof IncTable) {
504                         table = (IncTable)obj;
505                         break;
506                     }
507                 }
508                 table.addCols(cells);
509                 table.endRow();
510                 stack.push(table);
511                 skipText = true;
512                 return;
513             }
514             if (tag.equals("td") || tag.equals("th")) {
515                 pendingTD = false;
516                 cprops.removeChain("td");
517                 skipText = true;
518                 return;
519             }
520         }
521         catch (Exception JavaDoc e) {
522             throw new ExceptionConverter(e);
523         }
524     }
525     
526     public void text(String JavaDoc str) {
527         if (skipText)
528             return;
529         String JavaDoc content = str;
530         if (isPRE) {
531             if (currentParagraph == null)
532                 currentParagraph = new Paragraph();
533             currentParagraph.add(factoryProperties.createChunk(content, cprops));
534             return;
535         }
536         if (content.trim().length() == 0 && content.indexOf(' ') < 0) {
537             return;
538         }
539         
540         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
541         int len = content.length();
542         char character;
543         boolean newline = false;
544         for (int i = 0; i < len; i++) {
545             switch(character = content.charAt(i)) {
546                 case ' ':
547                     if (!newline) {
548                         buf.append(character);
549                     }
550                     break;
551                 case '\n':
552                     if (i > 0) {
553                         newline = true;
554                         buf.append(' ');
555                     }
556                     break;
557                 case '\r':
558                     break;
559                 case '\t':
560                     break;
561                     default:
562                         newline = false;
563                         buf.append(character);
564             }
565         }
566         if (currentParagraph == null)
567             currentParagraph = FactoryProperties.createParagraph(cprops);
568         currentParagraph.add(factoryProperties.createChunk(buf.toString(), cprops));
569     }
570     
571     public boolean add(Element element) throws DocumentException {
572         objectList.add(element);
573         return true;
574     }
575     
576     public void clearTextWrap() throws DocumentException {
577     }
578     
579     public void close() {
580     }
581     
582     public boolean newPage() {
583         return true;
584     }
585     
586     public void open() {
587     }
588     
589     public void resetFooter() {
590     }
591     
592     public void resetHeader() {
593     }
594     
595     public void resetPageCount() {
596     }
597     
598     public void setFooter(HeaderFooter footer) {
599     }
600     
601     public void setHeader(HeaderFooter header) {
602     }
603     
604     public boolean setMarginMirroring(boolean marginMirroring) {
605         return true;
606     }
607     
608     public boolean setMargins(float marginLeft, float marginRight, float marginTop, float marginBottom) {
609         return true;
610     }
611     
612     public void setPageCount(int pageN) {
613     }
614     
615     public boolean setPageSize(Rectangle pageSize) {
616         return true;
617     }
618     
619     public static final String JavaDoc tagsSupportedString = "ol ul li a pre font span br p div body table td th tr i b u sub sup em strong"
620         + " h1 h2 h3 h4 h5 h6 img";
621     
622     public static final HashMap JavaDoc tagsSupported = new HashMap JavaDoc();
623     
624     static {
625         StringTokenizer JavaDoc tok = new StringTokenizer JavaDoc(tagsSupportedString);
626         while (tok.hasMoreTokens())
627             tagsSupported.put(tok.nextToken(), null);
628     }
629     
630     private static float lengthParse(String JavaDoc txt, int c) {
631         if (txt == null)
632             return -1;
633         if (txt.endsWith("%")) {
634             float vf = Float.parseFloat(txt.substring(0, txt.length() - 1));
635             return vf;
636         }
637         int v = Integer.parseInt(txt);
638         return (float)v / c * 100f;
639     }
640 }
641
Popular Tags