KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > opencms > workplace > tools > content > CmsTagReplaceParser


1 /*
2  * File : $Source: /usr/local/cvs/opencms/src-modules/org/opencms/workplace/tools/content/CmsTagReplaceParser.java,v $
3  * Date : $Date: 2006/03/27 14:52:27 $
4  * Version: $Revision: 1.2 $
5  *
6  * This library is part of OpenCms -
7  * the Open Source Content Mananagement System
8  *
9  * Copyright (C) 2005 Alkacon Software GmbH (http://www.alkacon.com)
10  *
11  * This library is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * This library is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * For further information about Alkacon Software GmbH, please see the
22  * company website: http://www.alkacon.com
23  *
24  * For further information about OpenCms, please see the
25  * project website: http://www.opencms.org
26  *
27  * You should have received a copy of the GNU Lesser General Public
28  * License along with this library; if not, write to the Free Software
29  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30  */

31
32 package org.opencms.workplace.tools.content;
33
34 import org.opencms.util.CmsHtmlParser;
35 import org.opencms.util.CmsHtmlTagRemoveFactory;
36 import org.opencms.util.CmsStringUtil;
37 import org.opencms.util.I_CmsHtmlNodeVisitor;
38
39 import java.util.Iterator JavaDoc;
40
41 import org.htmlparser.NodeFactory;
42 import org.htmlparser.Parser;
43 import org.htmlparser.Tag;
44 import org.htmlparser.lexer.Lexer;
45 import org.htmlparser.lexer.Page;
46 import org.htmlparser.util.ParserException;
47
48 /**
49  *
50  * Html parser / visitor combination that visits a document and replaces Tag names by using the
51  * replacement configuration of a {@link org.opencms.workplace.tools.content.CmsTagReplaceSettings}
52  * instance.
53  * <p>
54  *
55  * Instances are reusable.
56  * <p>
57  *
58  * @author Achim Westermann
59  *
60  * @version $Revision: 1.2 $
61  *
62  * @since 6.1.7
63  *
64  */

65 public final class CmsTagReplaceParser extends CmsHtmlParser implements I_CmsHtmlNodeVisitor {
66
67     /** A tag factory that is able to make tags invisible to visitors. */
68     private final NodeFactory m_nodeFactory;
69
70     /**
71      * Boolean flag that is set to true if during last call to {@link #process(String, String)}
72      * content was changed.
73      */

74     private boolean m_changedContent;
75     /**
76      * The settings to use for replacing tags.
77      */

78     private final CmsTagReplaceSettings m_settings;
79
80     /**
81      * Default constructor that turns echo on and uses the settings for replacing tags.
82      * <p>
83      *
84      * @param settings the settings to use for tag replacement.
85      */

86     public CmsTagReplaceParser(CmsTagReplaceSettings settings) {
87
88         // echo on
89
super(true);
90         m_settings = settings;
91         CmsHtmlTagRemoveFactory nodeFactory = new CmsHtmlTagRemoveFactory();
92         // add the removals of the settings to the tag factory:
93
Iterator JavaDoc itDeleteTags = m_settings.getDeleteTags().iterator();
94         while (itDeleteTags.hasNext()) {
95             nodeFactory.addTagRemoval((Tag)itDeleteTags.next());
96         }
97         m_nodeFactory = nodeFactory;
98
99     }
100
101     /**
102      * Overridden to also return the attributes of the Tag.
103      * <p>
104      *
105      * @see org.opencms.util.CmsHtmlParser#getTagHtml(org.htmlparser.Tag)
106      */

107     public String JavaDoc getTagHtml(Tag tag) {
108
109         if (CmsStringUtil.isEmpty(tag.getTagName())) {
110             return "";
111         }
112         StringBuffer JavaDoc result = new StringBuffer JavaDoc(32);
113         result.append('<');
114         // Tag name is the first "Attribute"...
115
Iterator JavaDoc itAttributes = tag.getAttributesEx().iterator();
116         while (itAttributes.hasNext()) {
117             result.append(itAttributes.next().toString());
118             // avoid trailing whitespaces like <H1 >
119
// in 2nd run htmlparser 1.5 would turn the whitespace into an Attribute with null name
120
if (itAttributes.hasNext()) {
121                 result.append(' ');
122             }
123         }
124         result.append('>');
125         return result.toString();
126     }
127
128     /**
129      * Extracts the text from the given html content, assuming the given html encoding.
130      * <p>
131      * Additionally tags are replaced / removed according to the configuration of this instance.
132      * <p>
133      *
134      * <h3>Please note:</h3>
135      * There are static process methods in the superclass that will not do the replacements /
136      * removals. Don't mix them up with this method.
137      * <p>
138      *
139      * @param html the content to extract the plain text from.
140      *
141      * @param encoding the encoding to use.
142      *
143      * @return the text extracted from the given html content.
144      *
145      * @throws ParserException if something goes wrong.
146      */

147     public String JavaDoc process(String JavaDoc html, String JavaDoc encoding) throws ParserException {
148
149         // clear from potential previous run:
150
m_result = new StringBuffer JavaDoc();
151         m_changedContent = false;
152
153         // initialize a parser with the given charset
154
Parser parser = new Parser();
155         parser.setNodeFactory(m_nodeFactory);
156         Lexer lexer = new Lexer();
157         Page page = new Page(html, encoding);
158         lexer.setPage(page);
159         parser.setLexer(lexer);
160         // process the page using the given visitor
161
parser.visitAllNodesWith(this);
162         // return the result
163
return getResult();
164     }
165
166     /**
167      * @see org.opencms.util.CmsHtmlParser#visitEndTag(org.htmlparser.Tag)
168      */

169     public void visitEndTag(Tag tag) {
170
171         boolean change = m_settings.replace(tag);
172         if (change) {
173             m_changedContent = true;
174         }
175         super.visitEndTag(tag);
176     }
177
178     /**
179      * @see org.opencms.util.CmsHtmlParser#visitTag(org.htmlparser.Tag)
180      */

181     public void visitTag(Tag tag) {
182
183         boolean change = m_settings.replace(tag);
184         if (change) {
185             m_changedContent = true;
186         }
187         super.visitTag(tag);
188     }
189
190     /**
191      * Returns the changedContent.
192      * <p>
193      *
194      * @return the changedContent
195      */

196     public boolean isChangedContent() {
197
198         return m_changedContent;
199     }
200
201 }
202
Popular Tags