KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > cocoon > transformation > LinkRewriterTransformer


1 /*
2  * Copyright 1999-2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.cocoon.transformation;
17
18 import java.io.IOException JavaDoc;
19 import java.util.HashMap JavaDoc;
20 import java.util.HashSet JavaDoc;
21 import java.util.Map JavaDoc;
22 import java.util.Set JavaDoc;
23 import java.util.StringTokenizer JavaDoc;
24
25 import org.apache.avalon.framework.activity.Disposable;
26 import org.apache.avalon.framework.activity.Initializable;
27 import org.apache.avalon.framework.configuration.Configuration;
28 import org.apache.avalon.framework.configuration.ConfigurationException;
29 import org.apache.avalon.framework.parameters.ParameterException;
30 import org.apache.avalon.framework.parameters.Parameters;
31 import org.apache.cocoon.ProcessingException;
32 import org.apache.cocoon.components.modules.input.InputModuleHelper;
33 import org.apache.cocoon.environment.SourceResolver;
34 import org.apache.cocoon.transformation.helpers.VariableConfiguration;
35 import org.apache.regexp.RE;
36 import org.apache.regexp.RECompiler;
37 import org.apache.regexp.REProgram;
38 import org.apache.regexp.RESyntaxException;
39 import org.xml.sax.Attributes JavaDoc;
40 import org.xml.sax.SAXException JavaDoc;
41 import org.xml.sax.helpers.AttributesImpl JavaDoc;
42
43 /**
44  * Rewrites URIs in links to a value determined by an InputModule.
45  * The URI scheme identifies the InputModule to use, and the rest of the URI is
46  * used as the attribute name.
47  *
48  * <h3>Example</h3>
49  * <p>For instance, if we had an {@link
50  * org.apache.cocoon.components.modules.input.XMLFileModule}, configured to
51  * read values from an XML file:
52  * <pre>
53  * &lt;site&gt;
54  * &lt;faq&gt;
55  * &lt;how_to_boil_eggs HREF="faq/eggs.html"/&gt;
56  * &lt;/faq&gt;
57  * &lt;/site&gt;
58  * </pre>
59  *
60  * mapped to the prefix 'site:', then <code>&lt;link
61  * HREF="site:/site/faq/how_to_boil_eggs/@href"&gt;</code> would be replaced
62  * with <code>&lt;link HREF="faq/eggs.html"&gt;</code>
63  *
64  * <h3>InputModule Configuration</h3>
65  * <p>InputModules are configured twice; first statically in
66  * <code>cocoon.xconf</code>, and then dynamically at runtime, with dynamic
67  * configuration (if any) taking precedence. Transformer allows
68  * you to pass a dynamic configuration to used InputModules as follows.
69  *
70  * <p>First, a template Configuration is specified in the static
71  * &lt;map:components&gt; block of the sitemap within &lt;input-module&gt; tags:
72  * <pre>
73  * &lt;map:transformer name="linkrewriter"
74  * SRC="org.apache.cocoon.transformation.LinkRewriterTransformer"&gt;
75  * &lt;link-attrs&gt;href src&lt;/link-attrs&gt;
76  * &lt;schemes&gt;site ext&lt;/schemes&gt;
77  * &lt;input-module name="site"&gt;
78  * &lt;file SRC="cocoon://samples/link/linkmap" reloadable="true"/&gt;
79  * &lt;/input-module&gt;
80  * &lt;input-module name="mapper"&gt;
81  * &lt;input-module name="site"&gt;
82  * &lt;file SRC="{src}" reloadable="true"/&gt;
83  * &lt;/input-module&gt;
84  * &lt;prefix&gt;/site/&lt;/prefix&gt;
85  * &lt;suffix&gt;/@href&lt;/suffix&gt;
86  * &lt;/input-module&gt;
87  * &lt;/map:transformer&gt;
88  * </pre>
89  *
90  * Here, we have first configured which attributes to examine, and which URL
91  * schemes to consider rewriting. In this example, &lt;a HREF="site:index"&gt;
92  * would be processed. See below for more configuration options.
93  *
94  * <p>Then, we have established dynamic configuration templates for two modules,
95  * 'site' (an {@link org.apache.cocoon.components.modules.input.XMLFileModule}
96  * and 'mapper' (A {@link
97  * org.apache.cocoon.components.modules.input.SimpleMappingMetaModule}. All
98  * other InputModules will use their static configs. Note that, when
99  * configuring a meta InputModule like 'mapper', we need to also configure the
100  * 'inner' module (here, 'site') with a nested &lt;input-module&gt;.
101  *
102  * <p>There is one further twist; to have <em>really</em> dynamic configuration,
103  * we need information available only when the transformer actually runs. This
104  * is why the above config was called a "template" configuration; it needs to
105  * be 'instantiated' and provided extra info, namely:
106  * <ul>
107  * <li>The {src} string will be replaced with the map:transform @src attribute value.
108  * <li>Any other {variables} will be replaced with map:parameter values
109  * </ul>
110  *
111  * With the above config template, we can have a matcher like:
112  *
113  * <pre>
114  * &lt;map:match pattern="**welcome"&gt;
115  * &lt;map:generate SRC="index.xml"/&gt;
116  * &lt;map:transform type="linkrewriter" SRC="cocoon:/{1}linkmap"/&gt;
117  * &lt;map:serialize type="xml"/&gt;
118  * &lt;/map:match&gt;
119  * </pre>
120  *
121  * Which would cause the 'mapper' XMLFileModule to be configured with a
122  * different XML file, depending on the request.
123  *
124  * <p>Similarly, we could use a dynamic prefix:
125  * <pre>
126  * &lt;prefix&gt;{prefix}&lt;/prefix&gt;
127  * </pre>
128  * in the template config, and:
129  * <pre>
130  * &lt;map:parameter name="prefix" value="/site/"/&gt;
131  * </pre>
132  * in the map:transform
133  *
134  * <p>A live example of LinkRewriterTransformer can be found in the <a
135  * HREF="http://forrest.apache.org/">Apache Forrest</a> sitemap.
136  *
137  * <h3>Transformer Configuration</h3>
138  * <p>
139  * The following configuration entries in map:transformer block are recognised:
140  * <dl>
141  * <dt>link-attrs</dt>
142  * <dd>Space-separated list of attributes to consider links (to be
143  * transformed). The whole value of the attribute is considered link and
144  * transformed.</dd>
145  *
146  * <dt>link-attr</dt>
147  * <dd>0..n of these elements each specify an attribute containing link(s)
148  * (to be transformed) and optionally a regular expression to locate
149  * substring(s) of the attribute value considered link(s). Has two
150  * attributes:
151  * <dl>
152  * <dt>name</dt>
153  * <dd>(required) name of the attribute whose value contains link(s).</dd>
154  * <dt>pattern</dt>
155  * <dd>(optional) regular expression such that when matched against the
156  * attribute value, all parenthesized expressions (except number 0) will
157  * be considered links that should be transformed. If absent, the whole value
158  * of the attribute is considered to be a link, as if the attribute was
159  * included in 'link-attrs'.</dd>
160  * </dl>
161  * </dd>
162  *
163  * <dt>schemes</dt>
164  * <dd>Space-separated list of URI schemes to explicitly include.
165  * If specified, all URIs with unlisted schemes will <i>not</i> be converted.</dd>
166  *
167  * <dt>exclude-schemes</dt>
168  * <dd>Space-separated list of URI schemes to explicitly exclude.
169  * Defaults to 'http https ftp news mailto'.</dd>
170  *
171  * <dt>bad-link-str</dt>
172  * <dd>String to use for links with a correct InputModule prefix, but no value
173  * therein. Defaults to the original URI.</dd>
174  *
175  * <dt>namespace-uri</dt>
176  * <dd>The namespace uri of elements whose attributes are considered for
177  * transformation. Defaults to the empty namespace ("").</dd>
178  * </dl>
179  *
180  * <p>
181  * The attributes considered to contain links are a <em>set</em> of the attributes
182  * specified in 'link-attrs' element and all 'link-attr' elements. Each attribute
183  * should be specified only once either in 'link-attrs' or 'link-attr'; i.e. an
184  * attribute can have at most 1 regular expression associated with it. If neither
185  * 'link-attrs' nor 'link-attr' configuration is present, defaults to 'href'.
186  *
187  * <p>Below is an example of regular expression usage that will transform links
188  * <code>x1</code> and <code>x2</code> in
189  * <code>&lt;action target="foo url(x1) bar url(x2)"/&gt;</code>:
190  *
191  * <pre>
192  * &lt;map:transformer name="linkrewriter"
193  * SRC="org.apache.cocoon.transformation.LinkRewriterTransformer"&gt;
194  * &lt;link-attr name="target" pattern="(?:url\((.*?)\).*?){1,2}$"/&gt;
195  * &lt;!-- additional configuration ... --&gt;
196  * &lt;/map:transformer&gt;
197  * </pre>
198  *
199  * <p>
200  * When matched against the value of <code>target</code> attribute above,
201  * the parenthesized expressions are:<br/>
202  * <samp>
203  * $0 = url(x1) bar url(x2)<br/>
204  * $1 = x1<br/>
205  * $2 = x2<br/>
206  * </samp>
207  *
208  * <p>
209  * Expression number 0 is always discarded by the transformer and the rest
210  * are considered links and re-written.
211  *
212  * <p>If present, map:parameter's from the map:transform block override the
213  * corresponding configuration entries from map:transformer. As an exception,
214  * 'link-attr' parameters are not recognised; 'link-attrs' parameter overrides
215  * both 'link-attrs' and 'link-attr' configuration.
216  *
217  * <p>
218  *
219  * @version $Id: LinkRewriterTransformer.java 292131 2005-09-28 05:51:38Z antonio $
220  */

221 public class LinkRewriterTransformer extends AbstractSAXTransformer
222                                      implements Initializable, Disposable {
223
224     private final static String JavaDoc NAMESPACE = "";
225
226     /**
227      * A guardian object denoting absense of regexp pattern for a given
228      * attribute. Used as value in linkAttrs and origLinkAttrs maps.
229      */

230     private final static Object JavaDoc NO_REGEXP = new Object JavaDoc();
231
232     //
233
// Configure()'d parameters
234
//
235

236     /** Configuration passed to the component once through configure(). */
237     private Configuration origConf;
238
239     private String JavaDoc origBadLinkStr;
240     private String JavaDoc origInSchemes;
241     private String JavaDoc origOutSchemes;
242     private String JavaDoc origNamespaceURI;
243
244     /**
245      * A map where keys are those attributes which are considered 'links'.
246      * Obtained from configuration passed to the component once through
247      * the configure() method.
248      *
249      * <p>Map contains NO_REGEXP object for attributes whose whole values are
250      * considered links, or compiled RE expressions for attributes whose values
251      * might contain a link.
252      */

253     private Map JavaDoc origLinkAttrs;
254
255     //
256
// Setup()'d parameters
257
//
258

259     /**
260      * Derivation of origConf with variables obtained from setup() parameters.
261      * Recreated once per invocation.
262      */

263     private Configuration conf;
264
265     /**
266      * String to use for links with a correct InputModule prefix, but no value
267      * therein.
268      */

269     private String JavaDoc badLinkStr;
270
271     /** Set containing schemes (protocols) of links to process */
272     private Set JavaDoc inSchemes;
273
274     /** Set containing schemes (protocols) of links to exclude from processing */
275     private Set JavaDoc outSchemes;
276
277     /**
278      * A map of attributes considered 'links' and corresponding RE expression
279      * or NO_REGEXP object. Recreated once per invocation or copied from
280      * origLinkAttrs based on setup() method parameters.
281      */

282     private Map JavaDoc linkAttrs;
283
284     private InputModuleHelper modHelper;
285
286
287     /**
288      * Configure this component from the map:transformer block. Called before
289      * initialization and setup.
290      */

291     public void configure(Configuration conf) throws ConfigurationException {
292         super.configure(conf);
293
294         this.origConf = conf;
295         this.origBadLinkStr = conf.getChild("bad-link-str").getValue(null);
296         this.origInSchemes = conf.getChild("schemes").getValue("");
297         this.origOutSchemes = conf.getChild("exclude-schemes").getValue("http https ftp news mailto");
298
299         this.origNamespaceURI = conf.getChild("namespace-uri").getValue(NAMESPACE);
300
301         /*
302          * Setup origLinkAttrs map from the original Configuration:
303          * 1. Parse link-attrs Configuration
304          * 2. Process link-attr Children, warn if overwriting
305          * 3. If no link-attrs, and no link-attr are available, defaults to "href"
306          */

307
308         String JavaDoc linkAttrsValue = conf.getChild("link-attrs").getValue("");
309         this.origLinkAttrs = split(linkAttrsValue, " ", NO_REGEXP);
310
311         Configuration[] attrConfs = conf.getChildren("link-attr");
312         if (attrConfs.length > 0) {
313             RECompiler compiler = new RECompiler();
314             for (int i = 0; i < attrConfs.length; i++) {
315                 String JavaDoc attr = attrConfs[i].getAttribute("name");
316                 if (getLogger().isWarnEnabled() && origLinkAttrs.containsKey(attr)) {
317                     getLogger().warn("Duplicate configuration entry found for attribute '" +
318                                      attr + "', overwriting previous configuration");
319                 }
320
321                 String JavaDoc pattern = attrConfs[i].getAttribute("pattern", null);
322                 if (pattern == null) {
323                     this.origLinkAttrs.put(attr, NO_REGEXP);
324                 } else {
325                     try {
326                         this.origLinkAttrs.put(attr, compiler.compile(pattern));
327                     } catch (RESyntaxException e) {
328                         String JavaDoc msg = "Invalid regexp pattern '" + pattern + "' specified for attribute '" + attr + "'";
329                         throw new ConfigurationException(msg, attrConfs[i], e);
330                     }
331                 }
332             }
333         }
334
335         // If nothing configured, default to href attribute
336
if (this.origLinkAttrs.size() == 0) {
337             this.origLinkAttrs.put("href", NO_REGEXP);
338         }
339     }
340
341     /**
342      * Initiate resources prior to this component becoming active.
343      */

344     public void initialize() throws Exception JavaDoc {
345         this.modHelper = new InputModuleHelper();
346         this.modHelper.setup(this.manager);
347     }
348
349     /**
350      * Setup this component to handle a map:transform instance.
351      */

352     public void setup(SourceResolver resolver,
353                       Map JavaDoc objectModel,
354                       String JavaDoc src,
355                       Parameters parameters)
356             throws ProcessingException, SAXException JavaDoc, IOException JavaDoc {
357         super.setup(resolver, objectModel, src, parameters);
358
359         this.badLinkStr = parameters.getParameter("bad-link-str", // per-request config
360
this.origBadLinkStr); // else fall back to per-instance config
361

362         this.namespaceURI = parameters.getParameter("namespace-uri", this.origNamespaceURI);
363
364         this.inSchemes = split(parameters.getParameter("schemes", this.origInSchemes), " ");
365         this.outSchemes = split(parameters.getParameter("exclude-schemes", this.origOutSchemes), " ");
366
367         this.linkAttrs = this.origLinkAttrs;
368         if (parameters.isParameter("link-attrs")) {
369             try {
370                 this.linkAttrs = split(parameters.getParameter("link-attrs"), " ", NO_REGEXP);
371             } catch (ParameterException ex) {
372                 // shouldn't happen
373
}
374         }
375
376         if (getLogger().isDebugEnabled()) {
377             getLogger().debug("bad-link-str = " + badLinkStr);
378             getLogger().debug("link-attrs = " + linkAttrs);
379             getLogger().debug("schemes = " + inSchemes);
380             getLogger().debug("exclude-schemes = " + outSchemes);
381             getLogger().debug("namespace-uri = " + namespaceURI);
382         }
383
384         // Generate conf
385
VariableConfiguration varConf = new VariableConfiguration(this.origConf);
386         varConf.addVariable("src", src);
387         varConf.addVariables(parameters);
388         try {
389             this.conf = varConf.getConfiguration();
390         } catch (ConfigurationException ce) {
391             throw new ProcessingException("Couldn't create dynamic config ", ce);
392         }
393     }
394
395     /** Recycle this component for use in another map:transform. */
396     public void recycle() {
397         // Note: configure() and initialize() are not called after every
398
// recycle, so don't null origConf, origLinkAttrs, etc.
399
this.conf = null;
400         this.badLinkStr = null;
401         this.linkAttrs = null;
402         this.inSchemes = null;
403         this.outSchemes = null;
404
405         super.recycle();
406     }
407
408     /**
409      * Split a string into a Set of strings.
410      *
411      * @param str String to split
412      * @param delim Delimiter character
413      * @return A Set of strings in 'str'
414      */

415     private Set JavaDoc split(String JavaDoc str, String JavaDoc delim) {
416         if (str == null) {
417             return null;
418         }
419
420         Set JavaDoc tokens = new HashSet JavaDoc();
421         StringTokenizer JavaDoc st = new StringTokenizer JavaDoc(str, delim);
422         while (st.hasMoreTokens()) {
423             tokens.add(st.nextToken());
424         }
425         return tokens;
426     }
427
428     /**
429      * Split a string and create a Map where keys are the tokens from the string.
430      *
431      * @param str String to split
432      * @param delim Delimiter character
433      * @param valueObj Object to insert in the Map (may be null)
434      * @return A Map of strings in 'str'
435      */

436     private Map JavaDoc split(String JavaDoc str, String JavaDoc delim, Object JavaDoc valueObj) {
437         if (str == null) {
438             return null;
439         }
440
441         // valueObj may be null, because HashMap permits null values
442
Map JavaDoc schemes = new HashMap JavaDoc();
443         StringTokenizer JavaDoc st = new StringTokenizer JavaDoc(str, delim);
444         while (st.hasMoreTokens()) {
445             String JavaDoc pfx = st.nextToken();
446             if (schemes.containsKey(pfx) && getLogger().isWarnEnabled()) {
447                 getLogger().warn("Duplicate configuration entry found for attribute '" +
448                                  pfx + "', overwriting previous configuration");
449             }
450             schemes.put(pfx, valueObj);
451         }
452         return schemes;
453     }
454
455     /**
456      * Start processing elements of our namespace.
457      * This hook is invoked for each sax event with our namespace.
458      * @param uri The namespace of the element.
459      * @param name The local name of the element.
460      * @param raw The qualified name of the element.
461      * @param attr The attributes of the element.
462      */

463     public void startTransformingElement(String JavaDoc uri,
464                                          String JavaDoc name,
465                                          String JavaDoc raw,
466                                          Attributes JavaDoc attr)
467             throws ProcessingException, IOException JavaDoc, SAXException JavaDoc {
468         boolean matched = false;
469
470         for (int attrIdx = 0; attrIdx < attr.getLength(); attrIdx++) {
471             String JavaDoc attrName = attr.getQName(attrIdx);
472
473             String JavaDoc attrValue = createTransformedAttr(attrName, attr.getValue(attrIdx));
474             if (attrValue != null) {
475                 if (!matched) {
476                     attr = new AttributesImpl JavaDoc(attr);
477                     matched = true;
478                 }
479                 ((AttributesImpl JavaDoc) attr).setValue(attrIdx, attrValue);
480             }
481         }
482         super.startTransformingElement(uri, name, raw, attr);
483     }
484
485     /**
486      * Rewrite set of links in an attribute.
487      *
488      * @param attrName QName of the attribute containing unconverted link(s).
489      * @param oldAttrValue value of the attribute containing unconverted link(s).
490      * @return new value of the attribute based on <code>oldAttrValue</code>, but with link(s) rewritten. If not
491      * modified, returns null (for example, if attribute not found in <code>linkAttrs</code> or not matched to
492      * regexp pattern).
493      */

494     private String JavaDoc createTransformedAttr(
495         String JavaDoc attrName,
496         String JavaDoc oldAttrValue) {
497         if (!linkAttrs.containsKey(attrName)) {
498             return null;
499         }
500
501         String JavaDoc newAttrValue = null;
502         Object JavaDoc reProgram = linkAttrs.get(attrName);
503         if (reProgram == NO_REGEXP) {
504             newAttrValue = createTransformedLink(oldAttrValue);
505         } else {
506             // must be instanceof REProgram
507
RE r = new RE((REProgram) reProgram);
508             if (r.match(oldAttrValue)) {
509                 StringBuffer JavaDoc bufOut = new StringBuffer JavaDoc(oldAttrValue);
510                 int offset = 0;
511                 String JavaDoc link = null;
512                 String JavaDoc newLink = null;
513                 boolean modified = false;
514
515                 // skip the first paren
516
for (int i = 1; i < r.getParenCount(); i++) {
517                     link = r.getParen(i);
518                     newLink = createTransformedLink(link);
519                     if (newLink != null) {
520                         bufOut.replace(r.getParenStart(i) + offset,
521                                        r.getParenEnd(i) + offset,
522                                        newLink);
523                         offset += newLink.length() - r.getParenLength(i);
524                         modified = true;
525                     }
526                 }
527                 if (modified) {
528                     newAttrValue = bufOut.toString();
529                 }
530             }
531         }
532
533         return newAttrValue;
534     }
535
536     /**
537      * Rewrite a link - use InputModule to obtain new value for the link based on <code>oldLink</code>.
538      *
539      * @param oldLink value of the unconverted link.
540      * @return new value of the link. If not modified, returns null (for example, if link scheme
541      * is in <code>outSchemes</code>.
542      */

543     private String JavaDoc createTransformedLink(String JavaDoc oldLink) {
544         String JavaDoc newLink = null;
545         int i = oldLink.indexOf(":");
546         if (i != -1) {
547             String JavaDoc scheme = oldLink.substring(0, i);
548             String JavaDoc addr = oldLink.substring(i + 1);
549             if (outSchemes.contains(scheme)) {
550                 if (getLogger().isDebugEnabled()) {
551                     getLogger().debug("Ignoring link '" + oldLink + "'");
552                 }
553             } else if (inSchemes.contains(scheme) || inSchemes.size() == 0) {
554                 // If the link wasn't deliberately excluded from a
555
// list of 'good' links, then include it.
556
try {
557                     newLink = (String JavaDoc) modHelper.getAttribute(this.objectModel,
558                                                               getConf(scheme),
559                                                               scheme,
560                                                               addr,
561                                                               (badLinkStr != null? badLinkStr: scheme + ":" + addr));
562                     if (getLogger().isDebugEnabled()) {
563                         getLogger().debug("Converted link '" + oldLink + "' to '" + newLink + "'");
564                     }
565                 } catch (org.apache.avalon.framework.CascadingRuntimeException e) {
566                     // Rethrow Configuration errors
567
if (e.getCause() instanceof ConfigurationException) {
568                         throw e;
569                     }
570
571                     // Swallow IM errors, usually prefixes like 'telnet' that aren't
572
// bound to an InputModule. These should really be declared in
573
// 'exclude-schemes', hence the 'error' classification of this log.
574
if (getLogger().isErrorEnabled()) {
575                         getLogger().error("Error rewriting link '" + oldLink + "': " +
576                                           e.getMessage());
577                     }
578                 }
579             }
580         }
581         return newLink;
582     }
583
584     /**
585      * Retrieve a dynamic configuration for a specific InputModule.
586      *
587      * @param scheme InputModule name
588      * @return Configuration for specified scheme, from the map:transformer block.
589      */

590     private Configuration getConf(String JavaDoc scheme) {
591         Configuration[] schemeConfs = this.conf.getChildren("input-module");
592         for (int i = 0; i < schemeConfs.length; i++) {
593             if (scheme.equals(schemeConfs[i].getAttribute("name", null))) {
594                 return schemeConfs[i];
595             }
596         }
597         return null;
598     }
599
600     /* (non-Javadoc)
601      * @see org.apache.avalon.framework.activity.Disposable#dispose()
602      */

603     public void dispose() {
604         if (this.modHelper != null) {
605             this.modHelper.releaseAll();
606             this.modHelper = null;
607         }
608         super.dispose();
609     }
610 }
611
Popular Tags