KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlcleaner > HtmlCleaner


1 /* Copyright (c) 2006-2007, Vladimir Nikic
2     All rights reserved.
3     
4     Redistribution and use of this software in source and binary forms,
5     with or without modification, are permitted provided that the following
6     conditions are met:
7     
8     * Redistributions of source code must retain the above
9       copyright notice, this list of conditions and the
10       following disclaimer.
11     
12     * Redistributions in binary form must reproduce the above
13       copyright notice, this list of conditions and the
14       following disclaimer in the documentation and/or other
15       materials provided with the distribution.
16     
17     * The name of HtmlCleaner may not be used to endorse or promote
18       products derived from this software without specific prior
19       written permission.
20
21     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31     POSSIBILITY OF SUCH DAMAGE.
32     
33     You can contact Vladimir Nikic by sending e-mail to
34     nikic_vladimir@yahoo.com. Please include the word "HtmlCleaner" in the
35     subject line.
36 */

37
38 package org.htmlcleaner;
39
40 import java.io.*;
41 import java.net.URL JavaDoc;
42 import java.util.*;
43
44 /**
45  * Main HtmlCleaner class.
46  *
47  * <p>It represents public interface to the user. It's task is to call tokenizer with
48  * specified source HTML, traverse list of produced token list and create internal
49  * object model. It also offers a set of methods to write resulting XML to string,
50  * file or any output stream.</p>
51  * <p>Typical usage is the following:</p>
52  *
53  * <xmp>
54  * HtmlCleaner cleaner = new HtmlCleaner(...); // one of few constructors
55  * cleaner.setXXX(...) // optionally, set cleaner's behaviour
56  * clener.clean(); // calls cleaning process
57  * cleaner.writeXmlXXX(...) // writes resulting XML to string, file or any output stream
58  * </xmp>
59  *
60  * Created by: Vladimir Nikic <br/>
61  * Date: November, 2006
62  */

63 public class HtmlCleaner {
64
65     public static final String JavaDoc DEFAULT_CHARSET = System.getProperty("file.encoding");
66     
67     private static final int WRITE_METHOD_SIMPLE = 0;
68     private static final int WRITE_METHOD_COMPACT = 1;
69     private static final int WRITE_METHOD_PRETTY = 2;
70
71     /**
72      * Contains information about single open tag
73      */

74     private class TagPos {
75         private int position;
76         private String JavaDoc name;
77         private TagInfo info;
78
79         TagPos(int position, String JavaDoc name) {
80             this.position = position;
81             this.name = name;
82             this.info = tagInfoProvider.getTagInfo(name);
83         }
84     }
85
86     /**
87      * Class that contains information and mathods for managing list of open,
88      * but unhandled tags.
89      */

90     private class OpenTags {
91         private List list = new ArrayList();
92         private TagPos last = null;
93         private Set set = new HashSet();
94
95         private boolean isEmpty() {
96             return list.isEmpty();
97         }
98
99         private void addTag(String JavaDoc tagName, int position) {
100             last = new TagPos(position, tagName);
101             list.add(last);
102             set.add(tagName);
103         }
104
105         private void removeTag(String JavaDoc tagName) {
106             ListIterator it = list.listIterator( list.size() );
107             while ( it.hasPrevious() ) {
108                 TagPos currTagPos = (TagPos) it.previous();
109                 if (tagName.equals(currTagPos.name)) {
110                     it.remove();
111                     break;
112                 }
113             }
114
115             last = list.isEmpty() ? null : (TagPos) list.get( list.size() - 1 );
116         }
117
118         private TagPos findFirstTagPos() {
119             return list.isEmpty() ? null : (TagPos) list.get(0);
120         }
121
122         private TagPos getLastTagPos() {
123             return last;
124         }
125
126         private TagPos findTag(String JavaDoc tagName) {
127             if (tagName != null) {
128                 ListIterator it = list.listIterator( list.size() );
129                 while ( it.hasPrevious() ) {
130                     TagPos currTagPos = (TagPos) it.previous();
131                     if (tagName.equals(currTagPos.name)) {
132                         return currTagPos;
133                     }
134                 }
135             }
136
137             return null;
138         }
139
140         private boolean tagExists(String JavaDoc tagName) {
141             TagPos tagPos = findTag(tagName);
142             return tagPos != null;
143         }
144
145         private TagPos findTagToPlaceRubbish() {
146             TagPos result = null, prev = null;
147
148             if ( !isEmpty() ) {
149                 ListIterator it = list.listIterator( list.size() );
150                 while ( it.hasPrevious() ) {
151                     result = (TagPos) it.previous();
152                     if ( result.info == null || result.info.allowsAnything() ) {
153                         if (prev != null) {
154                             return prev;
155                         }
156                     }
157                     prev = result;
158                 }
159             }
160
161             return result;
162         }
163         
164         private boolean tagEncountered(String JavaDoc tagName) {
165             return set.contains(tagName);
166         }
167         
168         /**
169          * Checks if any of tags specified in the set are already open.
170          * @param tags
171          */

172         private boolean someAlreadyOpen(Set tags) {
173             Iterator it = list.iterator();
174             while ( it.hasNext() ) {
175                 TagPos curr = (TagPos) it.next();
176                 if ( tags.contains(curr.name) ) {
177                     return true;
178                 }
179             }
180             
181             
182             return false;
183         }
184     }
185
186     private ITagInfoProvider tagInfoProvider;
187
188     private Reader reader;
189     private transient OpenTags _openTags = new OpenTags();
190     private transient DoctypeToken _docType = null;
191     private Set allTags = new TreeSet();
192
193     private boolean advancedXmlEscape = true;
194     private boolean useCdataForScriptAndStyle = true;
195     private boolean translateSpecialEntities = true;
196     private boolean recognizeUnicodeChars = true;
197     private boolean omitUnknownTags = false;
198     private boolean omitDeprecatedTags = false;
199     private boolean omitComments = false;
200     private boolean omitXmlDeclaration = false;
201     private boolean omitDoctypeDeclaration = true;
202     private boolean omitXmlnsAttributes = false;
203     private String JavaDoc hyphenReplacementInComment = "=";
204
205     private TagNode htmlNode;
206     private TagNode bodyNode;
207     private TagNode headNode;
208
209     /**
210      * Constructor - creates the instance with specified html
211      * content as String.
212      * @param htmlContent
213      */

214     public HtmlCleaner(String JavaDoc htmlContent, ITagInfoProvider tagInfoProvider) {
215         this.reader = new StringReader(htmlContent);
216         this.tagInfoProvider = tagInfoProvider == null ? HtmlTagProvider.getInstance() : tagInfoProvider;
217     }
218     /**
219      * Constructor - creates the instance with specified html
220      * content as String.
221      * @param htmlContent
222      */

223     public HtmlCleaner(String JavaDoc htmlContent) {
224         this(htmlContent, HtmlTagProvider.getInstance());
225     }
226
227     /**
228      * Constructor - creates the instance for specified file.
229      * @param file
230      * @param charset
231      * @throws IOException
232      */

233     public HtmlCleaner(File file, String JavaDoc charset, ITagInfoProvider tagInfoProvider) throws IOException {
234         FileInputStream in = new FileInputStream(file);
235         this.reader = new InputStreamReader(in, charset);
236         this.tagInfoProvider = tagInfoProvider == null ? HtmlTagProvider.getInstance() : tagInfoProvider;
237     }
238
239     /**
240      * Constructor - creates the instance for specified file.
241      * @param file
242      * @param charset
243      * @throws IOException
244      */

245     public HtmlCleaner(File file, String JavaDoc charset) throws IOException {
246         this(file, charset, HtmlTagProvider.getInstance());
247     }
248
249     /**
250      * Constructor - creates the instance for specified file and charset.
251      * @param file
252      * @throws IOException
253      */

254     public HtmlCleaner(File file, ITagInfoProvider tagInfoProvider) throws IOException {
255         this(file, DEFAULT_CHARSET, tagInfoProvider);
256     }
257
258     /**
259      * Constructor - creates the instance for specified file and charset.
260      * @param file
261      * @throws IOException
262      */

263     public HtmlCleaner(File file) throws IOException {
264         this(file, DEFAULT_CHARSET, HtmlTagProvider.getInstance());
265     }
266
267     /**
268      * Constructor - creates the instance for specified URL and charset.
269      * @param url
270      * @param charset
271      * @throws IOException
272      */

273     public HtmlCleaner(URL JavaDoc url, String JavaDoc charset, ITagInfoProvider tagInfoProvider) throws IOException {
274         StringBuffer JavaDoc content = Utils.readUrl(url, charset);
275         this.reader = new StringReader( content.toString() );
276         this.tagInfoProvider = tagInfoProvider == null ? HtmlTagProvider.getInstance() : tagInfoProvider;
277     }
278
279     /**
280      * Constructor - creates the instance for specified URL and charset.
281      * @param url
282      * @param tagInfoProvider
283      * @throws IOException
284      */

285     public HtmlCleaner(URL JavaDoc url, ITagInfoProvider tagInfoProvider) throws IOException {
286         this(url, DEFAULT_CHARSET, tagInfoProvider);
287     }
288
289     /**
290      * Constructor - creates the instance for specified URL and charset.
291      * @param url
292      * @param charset
293      * @throws IOException
294      */

295     public HtmlCleaner(URL JavaDoc url, String JavaDoc charset) throws IOException {
296         this(url, charset, HtmlTagProvider.getInstance());
297     }
298
299     /**
300      * Constructor - creates the instance for specified URL and charset.
301      * @param url
302      * @throws IOException
303      */

304     public HtmlCleaner(URL JavaDoc url) throws IOException {
305         this(url, DEFAULT_CHARSET, HtmlTagProvider.getInstance());
306     }
307
308     /**
309      * Constructor - creates the instance for the specified inpout stream
310      * @param in
311      * @param tagInfoProvider
312      */

313     public HtmlCleaner(InputStream in, ITagInfoProvider tagInfoProvider) {
314         this.reader = new InputStreamReader(in);
315         this.tagInfoProvider = tagInfoProvider == null ? HtmlTagProvider.getInstance() : tagInfoProvider;
316     }
317
318     /**
319      * Constructor - creates the instance for the specified inpout stream
320      * @param in
321      */

322     public HtmlCleaner(InputStream in) {
323         this(in, HtmlTagProvider.getInstance());
324     }
325
326     DoctypeToken getDoctype() {
327         return _docType;
328     }
329
330     void setDoctype(DoctypeToken type) {
331         _docType = type;
332     }
333
334     /**
335      * Constructor - creates the instance for the specified inpout stream
336      * and the charset
337      * @param in
338      * @param charset
339      * @throws IOException
340      */

341     public HtmlCleaner(InputStream in, String JavaDoc charset) throws IOException {
342         reader = new InputStreamReader(in, charset);
343     }
344
345     public void clean() throws IOException {
346         allTags.clear();
347
348         htmlNode = new TagNode("html");
349         bodyNode = new TagNode("body");
350         headNode = new TagNode("head");
351         htmlNode.addChild(headNode);
352         htmlNode.addChild(bodyNode);
353
354         HtmlTokenizer htmlTokenizer = new HtmlTokenizer(this);
355
356         htmlTokenizer.start();
357
358         List nodeList = htmlTokenizer.getTokenList();
359         closeAll(nodeList);
360         createDocumentNodes(nodeList);
361     }
362     
363     Reader getReader() {
364         return reader;
365     }
366
367     /**
368      * Add attributes from specified map to the specified tag.
369      * If some attribute already exist it is preserved.
370      * @param tag
371      * @param attributes
372      */

373     private void addAttributesToTag(TagNode tag, Map attributes) {
374         if (attributes != null) {
375             Map tagAttributes = tag.getAttributes();
376             Iterator it = attributes.entrySet().iterator();
377             while (it.hasNext()) {
378                 Map.Entry currEntry = (Map.Entry) it.next();
379                 String JavaDoc attName = (String JavaDoc) currEntry.getKey();
380                 if ( !tagAttributes.containsKey(attName) ) {
381                     String JavaDoc attValue = (String JavaDoc) currEntry.getValue();
382                     tag.addAttribute(attName, attValue);
383                 }
384             }
385         }
386     }
387
388     /**
389      * Checks if open fatal tag is missing if there is a fatal tag for
390      * the specified tag.
391      * @param tag
392      */

393     private boolean isFatalTagSatisfied(TagInfo tag) {
394         if (tag != null) {
395             String JavaDoc fatalTagName = tag.getFatalTag();
396             return fatalTagName == null ? true : _openTags.tagExists(fatalTagName);
397         }
398
399         return true;
400     }
401
402     /**
403      * Check if specified tag requires parent tag, but that parent
404      * tag is missing in the appropriate context.
405      * @param tag
406      */

407     private boolean mustAddRequiredParent(TagInfo tag) {
408         if (tag != null) {
409             String JavaDoc requiredParent = tag.getRequiredParent();
410             if (requiredParent != null) {
411                 String JavaDoc fatalTag = tag.getFatalTag();
412                 int fatalTagPositon = -1;
413                 if (fatalTag != null) {
414                     TagPos tagPos =_openTags.findTag(fatalTag);
415                     if (tagPos != null) {
416                         fatalTagPositon = tagPos.position;
417                     }
418                 }
419
420                 // iterates through the list of open tags from the end and check if there is some higher
421
ListIterator it = _openTags.list.listIterator( _openTags.list.size() );
422                 while ( it.hasPrevious() ) {
423                     TagPos currTagPos = (TagPos) it.previous();
424                     if (tag.isHigher(currTagPos.name)) {
425                         return currTagPos.position <= fatalTagPositon;
426                     }
427                 }
428
429                 return true;
430             }
431         }
432
433         return false;
434     }
435
436     private TagNode createTagNode(TagNode startTagToken) {
437         startTagToken.setFormed();
438         return startTagToken;
439     }
440
441     private boolean isAllowedInLastOpenTag(BaseToken token) {
442         TagPos last = _openTags.getLastTagPos();
443         if (last != null) {
444              if (last.info != null) {
445                  return last.info.allowsItem(token);
446              }
447         }
448
449         return true;
450     }
451
452     private void saveToLastOpenTag(List nodeList, Object JavaDoc tokenToAdd) {
453         TagPos last = _openTags.getLastTagPos();
454         if ( last != null && last.info != null && last.info.isIgnorePermitted() ) {
455             return;
456         }
457
458         TagPos rubbishPos = _openTags.findTagToPlaceRubbish();
459         if (rubbishPos != null) {
460             TagNode startTagToken = (TagNode) nodeList.get(rubbishPos.position);
461             startTagToken.addItemForMoving(tokenToAdd);
462         }
463     }
464     
465     private boolean isStartToken(Object JavaDoc o) {
466         return (o instanceof TagNode) && !((TagNode)o).isFormed();
467     }
468
469     void makeTree(List nodeList, ListIterator nodeIterator) {
470         // process while not reach the end of the list
471
while ( nodeIterator.hasNext() ) {
472             BaseToken token = (BaseToken) nodeIterator.next();
473
474             if (token instanceof EndTagToken) {
475                 EndTagToken endTagToken = (EndTagToken) token;
476                 String JavaDoc tagName = endTagToken.getName();
477                 TagInfo tag = tagInfoProvider.getTagInfo(tagName);
478
479                 if ( (tag == null && omitUnknownTags) || (tag != null && tag.isDeprecated() && omitDeprecatedTags) ) {
480                     nodeIterator.set(null);
481                 } else if ( tag != null && !tag.allowsBody() ) {
482                     nodeIterator.set(null);
483                 } else {
484                     TagPos matchingPosition = _openTags.findTag(tagName);
485
486                     if (matchingPosition != null) {
487                         closeSnippet(nodeList, matchingPosition, endTagToken);
488                     } else if ( !isAllowedInLastOpenTag(token) ) {
489                         saveToLastOpenTag(nodeList, token);
490                     }
491
492                     nodeIterator.set(null);
493                 }
494             } else if ( isStartToken(token) ) {
495                 TagNode startTagToken = (TagNode) token;
496                 String JavaDoc tagName = startTagToken.getName();
497                 TagInfo tag = tagInfoProvider.getTagInfo(tagName);
498
499                 // add tag to set of all tags
500
allTags.add(tagName);
501
502                 // HTML open tag
503
if ( "html".equals(tagName) ) {
504                     addAttributesToTag(htmlNode, startTagToken.getAttributes());
505                     nodeIterator.set(null);
506                 // BODY open tag
507
} else if ( "body".equals(tagName) ) {
508                     addAttributesToTag(bodyNode, startTagToken.getAttributes());
509                     nodeIterator.set(null);
510                 // HEAD open tag
511
} else if ( "head".equals(tagName) ) {
512                     addAttributesToTag(headNode, startTagToken.getAttributes());
513                     nodeIterator.set(null);
514                 // unknows HTML tag and unknown tags are not allowed
515
} else if ( (tag == null && omitUnknownTags) || (tag != null && tag.isDeprecated() && omitDeprecatedTags) ) {
516                     nodeIterator.set(null);
517                 } else if ( tag != null && tag.hasPermittedTags() && _openTags.someAlreadyOpen(tag.getPermittedTags()) ) {
518                     nodeIterator.set(null);
519                 // if tag that must be unique, ignore this occurence
520
} else if ( tag != null && tag.isUnique() && _openTags.tagEncountered(tagName) ) {
521                     nodeIterator.set(null);
522                 // if there is no required outer tag without that this open tag is ignored
523
} else if ( !isFatalTagSatisfied(tag) ) {
524                     nodeIterator.set(null);
525                 // if there is no required parent tag - it must be added before this open tag
526
} else if ( mustAddRequiredParent(tag) ) {
527                     String JavaDoc requiredParent = tag.getRequiredParent();
528                     TagNode requiredParentStartToken = new TagNode(requiredParent);
529                     nodeIterator.previous();
530                     nodeIterator.add(requiredParentStartToken);
531                     nodeIterator.previous();
532                 // if last open tag has lower presidence then this, it must be closed
533
} else if ( tag != null && !_openTags.isEmpty() && tag.isMustCloseTag( tagInfoProvider.getTagInfo(_openTags.getLastTagPos().name)) ) {
534                     List closed = closeSnippet(nodeList, _openTags.getLastTagPos(), startTagToken);
535                     int closedCount = closed.size();
536
537                     // it is needed to copy some tags again in front of current, if there are any
538
if ( tag.hasCopyTags() && closedCount > 0 ) {
539                         // first iterates over list from the back and collects all start tokens
540
// in sequence that must be copied
541
ListIterator closedIt = closed.listIterator(closedCount);
542                         List toBeCopied = new ArrayList();
543                         while (closedIt.hasPrevious()) {
544                             TagNode currStartToken = (TagNode) closedIt.previous();
545                             if ( tag.isCopy(currStartToken.getName()) ) {
546                                 toBeCopied.add(0, currStartToken);
547                             } else {
548                                 break;
549                             }
550                         }
551
552                         if (toBeCopied.size() > 0) {
553                             Iterator copyIt = toBeCopied.iterator();
554                             while (copyIt.hasNext()) {
555                                 TagNode currStartToken = (TagNode) copyIt.next();
556                                 nodeIterator.add( currStartToken.makeCopy() );
557                             }
558                             
559                             // back to the previous place, before adding new start tokens
560
for (int i = 0; i < toBeCopied.size(); i++) {
561                                 nodeIterator.previous();
562                             }
563                         }
564                     }
565
566                     nodeIterator.previous();
567                 // if this open tag is not allowed inside last open tag, then it must be moved to the place where it can be
568
} else if ( !isAllowedInLastOpenTag(token) ) {
569                     saveToLastOpenTag(nodeList, token);
570                     nodeIterator.set(null);
571                 // if it is known HTML tag but doesn't allow body, it is immidiately closed
572
} else if ( tag != null && !tag.allowsBody() ) {
573                     TagNode newTagNode = createTagNode(startTagToken);
574                     if ( tag.isHeadTag() ) {
575                         headNode.addChild(newTagNode);
576                         nodeIterator.set(null);
577                     } else {
578                         nodeIterator.set(newTagNode);
579                     }
580                 // default case - just remember this open tag and go further
581
} else {
582                     _openTags.addTag( tagName, nodeIterator.previousIndex() );
583                 }
584             } else {
585                 if ( !isAllowedInLastOpenTag(token) ) {
586                     saveToLastOpenTag(nodeList, token);
587                     nodeIterator.set(null);
588                 }
589             }
590         }
591     }
592
593     private void createDocumentNodes(List listNodes) {
594         Iterator it = listNodes.iterator();
595         while (it.hasNext()) {
596             Object JavaDoc child = it.next();
597
598             if (child == null) {
599                 continue;
600             }
601
602             TagNode parent = bodyNode;
603             boolean toAdd = true;
604
605             if (child instanceof TagNode) {
606                 TagInfo tag = tagInfoProvider.getTagInfo( ((TagNode)child).getName() );
607                 if (tag != null) {
608                     if ( tag.isHeadTag() || (tag.isHeadAndBodyTag() && bodyNode.getChildren().isEmpty()) ) {
609                         parent = headNode;
610                     }
611                 }
612             } else {
613                 if (child instanceof ContentToken) {
614                     toAdd = !"".equals( ((ContentToken)child).toString() );
615                 }
616             }
617
618             if (toAdd) {
619                 parent.addChild(child);
620             }
621         }
622     }
623
624     private List closeSnippet(List nodeList, TagPos tagPos, Object JavaDoc toNode) {
625         List closed = new ArrayList();
626         ListIterator it = nodeList.listIterator(tagPos.position);
627
628         TagNode tagNode = null;
629         Object JavaDoc item = it.next();
630         boolean isListEnd = false;
631
632         while ( (toNode == null && !isListEnd) || (toNode != null && item != toNode) ) {
633             if ( isStartToken(item) ) {
634                 TagNode startTagToken = (TagNode) item;
635                 closed.add(startTagToken);
636                 List itemsToMove = startTagToken.getItemsToMove();
637                 if (itemsToMove != null) {
638                     OpenTags prevOpenTags = _openTags;
639                     _openTags = new OpenTags();
640                     makeTree(itemsToMove, itemsToMove.listIterator(0));
641                     closeAll(itemsToMove);
642                     startTagToken.setItemsToMove(null);
643                     _openTags = prevOpenTags;
644                 }
645                 
646                 TagNode newTagNode = createTagNode(startTagToken);
647
648                 TagInfo tag = tagInfoProvider.getTagInfo( newTagNode.getName() );
649                 if ( tag != null && tag.isHeadTag() ) {
650                     headNode.addChild(newTagNode);
651                     it.set(null);
652                 } else if (tagNode != null) {
653                     tagNode.addChildren(itemsToMove);
654                     tagNode.addChild(newTagNode);
655                     it.set(null);
656                 } else {
657                     if (itemsToMove != null) {
658                         itemsToMove.add(newTagNode);
659                         it.set(itemsToMove);
660                     } else {
661                         it.set(newTagNode);
662                     }
663                 }
664
665                 _openTags.removeTag( newTagNode.getName() );
666                 tagNode = newTagNode;
667             } else {
668                 if (tagNode != null) {
669                     it.set(null);
670                     if (item != null) {
671                         tagNode.addChild(item);
672                     }
673                 }
674             }
675             
676             if ( it.hasNext() ) {
677                 item = it.next();
678             } else {
679                 isListEnd = true;
680             }
681         }
682         
683         return closed;
684     }
685
686     /**
687      * Close all unclosed tags if there are any.
688      */

689     private void closeAll(List nodeList) {
690         TagPos firstTagPos = _openTags.findFirstTagPos();
691         if (firstTagPos != null) {
692             closeSnippet(nodeList, firstTagPos, null);
693         }
694     }
695
696     // setters and getters
697

698     public boolean isOmitUnknownTags() {
699         return omitUnknownTags;
700     }
701
702     public void setOmitUnknownTags(boolean omitUnknownTags) {
703         this.omitUnknownTags = omitUnknownTags;
704     }
705     
706     public boolean isOmitDeprecatedTags() {
707         return omitDeprecatedTags;
708     }
709     
710     public void setOmitDeprecatedTags(boolean omitDeprecatedTags) {
711         this.omitDeprecatedTags = omitDeprecatedTags;
712     }
713
714     public boolean isAdvancedXmlEscape() {
715         return advancedXmlEscape;
716     }
717
718     public void setAdvancedXmlEscape(boolean advancedXmlEscape) {
719         this.advancedXmlEscape = advancedXmlEscape;
720     }
721
722     public boolean isUseCdataForScriptAndStyle() {
723         return useCdataForScriptAndStyle;
724     }
725
726     public void setUseCdataForScriptAndStyle(boolean useCdataForScriptAndStyle) {
727         this.useCdataForScriptAndStyle = useCdataForScriptAndStyle;
728     }
729
730     public boolean isTranslateSpecialEntities() {
731         return translateSpecialEntities;
732     }
733
734     public void setTranslateSpecialEntities(boolean translateSpecialEntities) {
735         this.translateSpecialEntities = translateSpecialEntities;
736     }
737
738     public boolean isRecognizeUnicodeChars() {
739         return recognizeUnicodeChars;
740     }
741
742     public void setRecognizeUnicodeChars(boolean recognizeUnicodeChars) {
743         this.recognizeUnicodeChars = recognizeUnicodeChars;
744     }
745
746     public boolean isOmitComments() {
747         return omitComments;
748     }
749
750     public void setOmitComments(boolean omitComments) {
751         this.omitComments = omitComments;
752     }
753
754     public boolean isOmitXmlDeclaration() {
755         return omitXmlDeclaration;
756     }
757
758     public void setOmitXmlDeclaration(boolean omitXmlDeclaration) {
759         this.omitXmlDeclaration = omitXmlDeclaration;
760     }
761     
762     public boolean isOmitDoctypeDeclaration() {
763         return omitDoctypeDeclaration;
764     }
765
766     public void setOmitDoctypeDeclaration(boolean omitDoctypeDeclaration) {
767         this.omitDoctypeDeclaration = omitDoctypeDeclaration;
768     }
769
770     public boolean isOmitXmlnsAttributes() {
771         return omitXmlnsAttributes;
772     }
773
774     public void setOmitXmlnsAttributes(boolean omitXmlnsAttributes) {
775         this.omitXmlnsAttributes = omitXmlnsAttributes;
776     }
777
778     public String JavaDoc getHyphenReplacementInComment() {
779         return hyphenReplacementInComment;
780     }
781
782     public void setHyphenReplacementInComment(String JavaDoc hyphenReplacementInComment) {
783         this.hyphenReplacementInComment = hyphenReplacementInComment;
784     }
785
786     public Set getAllTags() {
787         return allTags;
788     }
789
790     // methods for writing result
791

792     /**
793      * The most general way to serialize resulting XML.
794      * @param xmlSerializer
795      * @throws IOException
796      */

797     public void writeXml(XmlSerializer xmlSerializer) throws IOException {
798         xmlSerializer.createXml(htmlNode);
799     }
800     
801     private void writeXml(Writer writer, int method) throws IOException {
802         XmlSerializer xmlSerializer = null;
803         
804         if (WRITE_METHOD_COMPACT == method) {
805             xmlSerializer = new CompactXmlSerializer(writer, this);
806         } else if (WRITE_METHOD_PRETTY == method) {
807             xmlSerializer = new PrettyXmlSerializer(writer, this);
808         } else {
809             xmlSerializer = new SimpleXmlSerializer(writer, this);
810         }
811
812         xmlSerializer.createXml(htmlNode);
813     }
814
815     private void writeToStream(OutputStream out, String JavaDoc charset, int method) throws IOException {
816         BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(out, charset) );
817         writeXml(writer, method);
818     }
819
820     private void writeToStream(OutputStream out, int method) throws IOException {
821         BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(out) );
822         writeXml(writer, method);
823     }
824
825     public void writeXmlToStream(OutputStream out) throws IOException {
826         writeToStream(out, WRITE_METHOD_SIMPLE);
827     }
828
829     public void writeXmlToStream(OutputStream out, String JavaDoc charset) throws IOException {
830         writeToStream(out, charset, WRITE_METHOD_SIMPLE);
831     }
832
833     public void writeCompactXmlToStream(OutputStream out) throws IOException {
834         writeToStream(out, WRITE_METHOD_COMPACT);
835     }
836     
837     public void writeCompactXmlToStream(OutputStream out, String JavaDoc charset) throws IOException {
838         writeToStream(out, charset, WRITE_METHOD_COMPACT);
839     }
840
841     public void writePrettyXmlToStream(OutputStream out) throws IOException {
842         writeToStream(out, WRITE_METHOD_PRETTY);
843     }
844
845     public void writePrettyXmlToStream(OutputStream out, String JavaDoc charset) throws IOException {
846         writeToStream(out, charset, WRITE_METHOD_PRETTY);
847     }
848
849     private void writeToFile(String JavaDoc fileName, String JavaDoc charset, int method) throws IOException {
850         writeToStream(new FileOutputStream(fileName), charset, method );
851     }
852
853     private void writeToFile(String JavaDoc fileName, int method) throws IOException {
854         writeToStream( new FileOutputStream(fileName), method );
855     }
856
857     public void writeXmlToFile(String JavaDoc fileName) throws IOException {
858         writeToFile(fileName, WRITE_METHOD_SIMPLE);
859     }
860
861     public void writeXmlToFile(String JavaDoc fileName, String JavaDoc charset) throws IOException {
862         writeToFile(fileName, charset, WRITE_METHOD_SIMPLE);
863     }
864     
865     public void writeCompactXmlToFile(String JavaDoc fileName) throws IOException {
866         writeToFile(fileName, WRITE_METHOD_COMPACT);
867     }
868     
869     public void writeCompactXmlToFile(String JavaDoc fileName, String JavaDoc charset) throws IOException {
870         writeToFile(fileName, charset, WRITE_METHOD_COMPACT);
871     }
872
873     public void writePrettyXmlToFile(String JavaDoc fileName) throws IOException {
874         writeToFile(fileName, WRITE_METHOD_PRETTY);
875     }
876
877     public void writePrettyXmlToFile(String JavaDoc fileName, String JavaDoc charset) throws IOException {
878         writeToFile(fileName, charset, WRITE_METHOD_PRETTY);
879     }
880
881     public String JavaDoc getXmlAsString() throws IOException {
882         StringWriter writer = new StringWriter();
883         writeXml(writer, WRITE_METHOD_SIMPLE);
884
885         return writer.getBuffer().toString();
886     }
887
888     public String JavaDoc getCompactXmlAsString() throws IOException {
889         StringWriter writer = new StringWriter();
890         writeXml(writer, WRITE_METHOD_COMPACT);
891
892         return writer.getBuffer().toString();
893     }
894     
895     public String JavaDoc getPrettyXmlAsString() throws IOException {
896         StringWriter writer = new StringWriter();
897         writeXml(writer, WRITE_METHOD_PRETTY);
898         
899         return writer.getBuffer().toString();
900     }
901
902 }
Popular Tags