KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > commons > digester > ExtendedBaseRules


1 /* $Id: ExtendedBaseRules.java 179716 2005-06-03 04:06:00Z skitching $
2  *
3  * Copyright 2001-2004 The Apache Software Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18
19 package org.apache.commons.digester;
20
21
22 import java.util.ArrayList JavaDoc;
23 import java.util.Collections JavaDoc;
24 import java.util.Comparator JavaDoc;
25 import java.util.HashMap JavaDoc;
26 import java.util.Iterator JavaDoc;
27 import java.util.List JavaDoc;
28 import java.util.Map JavaDoc;
29
30
31 /**
32  * <p>Extension of {@link RulesBase} for complex schema.</p>
33  *
34  * <p>This is an extension of the basic pattern matching scheme
35  * intended to improve support for mapping complex xml-schema.
36  * It is intended to be a minimal extension of the standard rules
37  * big enough to support complex schema but without the full generality
38  * offered by more exotic matching pattern rules.</p>
39  *
40  * <h4>When should you use this rather than the original?</h4>
41  *
42  * <p>
43  * This pattern-matching engine is complex and slower than the basic
44  * default RulesBase class, but offers more functionality:
45  * <ul>
46  * <li>Universal patterns allow patterns to be specified which will match
47  * regardless of whether there are "better matching" patterns available.</li>
48  * <li>Parent-match patterns (eg "a/b/?") allow matching for all direct
49  * children of a specified element.</li>
50  * <li>Ancestor-match patterns (eg "a/b/*") allow matching all elements
51  * nested within a specified element to any nesting depth.</li>
52  * <li>Completely-wild patterns ("*" or "!*") allow matching all elements.</li>
53  * </ul>
54  * </p>
55  *
56  * <h4>Universal Match Patterns</h4>
57  *
58  * <p>The default RulesBase pattern-matching engine always attempts to find
59  * the "best matching pattern", and will ignore rules associated with other
60  * patterns that match but are not "as good". As an example, if the pattern
61  * "a/b/c" is associated with rules 1 and 2, and "*&#47;c" is associated with
62  * rules 3 and 4 then element "a/b/c" will cause only rules 1 and 2 to execute.
63  * Rules 3 and 4 do have matching patterns, but because the patterns are shorter
64  * and include wildcard characters they are regarded as being "not as good" as
65  * a direct match. In general, exact patterns are better than wildcard patterns,
66  * and among multiple patterns with wildcards, the longest is preferred.
67  * See the RulesBase class for more information.</p>
68  *
69  * <p>This feature of preferring "better" patterns can be a powerful tool.
70  * However it also means that patterns can interact in unexpected ways.</p>
71  *
72  * <p>When using the ExtendedBaseRules, any pattern prefixed with '!' bypasses
73  * the "best match" feature. Even if there is an exact match or a longer
74  * wildcard match, patterns prefixed by '!' will still be tested to see if
75  * they match, and if so their associated Rule objects will be included in
76  * the set of rules to be executed in the normal manner.</p>
77  *
78  * <ul>
79  * <li>Pattern <code>"!*&#47;a/b"</code> matches whenever an 'b' element
80  * is inside an 'a'.</li>
81  * <li>Pattern <code>"!a/b/?"</code> matches any child of a parent
82  * matching <code>"a/b"</code> (see "Parent Match Patterns").</li>
83  * <li>Pattern <code>"!*&#47;a/b/?"</code> matches any child of a parent
84  * matching <code>"!*&#47;a/b"</code> (see "Parent Match Patterns").</li>
85  * <li>Pattern <code>"!a/b/*"</code> matches any element whose path
86  * starts with "a" then "b" (see "Ancestor Match Patterns").</li>
87  * <li>Pattern <code>"!*&#47;a/b/*"</code> matches any elements whose path
88  * contains 'a/b' (see "Ancestor Match Patterns").</li>
89  * </ul>
90  *
91  * <h4>Parent Match Patterns</h4>
92  *
93  * <p>
94  * These will match direct child elements of a particular parent element.
95  * <ul>
96  * <li>
97  * <code>"a/b/c/?"</code> matches any child whose parent matches
98  * <code>"a/b/c"</code>. Exact parent rules take precedence over Ancestor
99  * Match patterns.
100  * </li>
101  * <li>
102  * <code>"*&#47;a/b/c/?"</code> matches any child whose parent matches
103  * <code>"*&#47;a/b/c"</code>. The longest matching still applies to parent
104  * matches but the length excludes the '?', which effectively means
105  * that standard wildcard matches with the same level of depth are
106  * chosen in preference.
107  * </li>
108  * </ul>
109  * </p>
110  *
111  * <h4>Ancestor Match Patterns</h4>
112  *
113  * <p>
114  * These will match elements whose parentage includes a particular sequence
115  * of elements.
116  * <ul>
117  * <li>
118  * <code>"a/b/*"</code> matches any element whose path starts with
119  * 'a' then 'b'. Exact parent and parent match rules take precedence.
120  * The longest ancestor match will take precedence.
121  * </li>
122  * <li>
123  * <code>"*&#47;a/b/*"</code> matches any elements whose path contains
124  * an element 'a' followed by an element 'b'. The longest matching still
125  * applies but the length excludes the '*' at the end.
126  * </li>
127  * </ul>
128  * </p>
129  *
130  * <h4>Completely Wild Patterns</h4>
131  *
132  * <p>Pattern <code>"*"</code> matches every pattern that isn't matched by
133  * any other basic rule.</p>
134  *
135  * <p>Pattern <code>"!*"</code> matches every pattern.</p>
136  *
137  * <h4>Using The Extended Rules</h4>
138  *
139  * <p>By default, a Digester instance uses a {@link RulesBase} instance as
140  * its pattern matching engine. To use an ExtendedBaseRules instance, call
141  * the Digester.setRules method before adding any Rule objects to the digester
142  * instance:
143  * <pre>
144  * Digester digester = new Digester();
145  * digester.setRules( new ExtendedBaseRules() );
146  * </pre></p>
147  *
148  * <p>The most important thing to remember when using the extended rules is
149  * that universal and non-universal patterns are completely independent.
150  * Universal patterns are never affected by the addition of new patterns
151  * or the removal of existing ones. Non-universal patterns are never affected
152  * by the addition of new <em>universal</em> patterns or the removal of
153  * existing <em>universal</em> patterns. As in the basic matching rules,
154  * non-universal (basic) patterns <strong>can</strong> be affected by the
155  * addition of new <em>non-universal</em> patterns or the removal of existing
156  * <em>non-universal</em> patterns, because only rules associated with the
157  * "best matching" pattern for each xml element are executed.
158  *
159  * <p> This means that you can use universal patterns to build up the simple
160  * parts of your structure - for example defining universal creation and
161  * property setting rules. More sophisticated and complex mapping will require
162  * non-universal patterns and this might mean that some of the universal rules
163  * will need to be replaced by a series of special cases using non-universal
164  * rules. But by using universal rules as your backbone, these additions
165  * should not break your existing rules.</p>
166  */

167
168
169 public class ExtendedBaseRules extends RulesBase {
170
171
172     // ----------------------------------------------------- Instance Variables
173

174     /**
175      * Counts the entry number for the rules.
176      */

177     private int counter = 0;
178
179
180     /**
181      * The decision algorithm used (unfortunately) doesn't preserve the entry
182      * order.
183      * This map is used by a comparator which orders the list of matches
184      * before it's returned.
185      * This map stores the entry number keyed by the rule.
186      */

187     private Map JavaDoc order = new HashMap JavaDoc();
188
189
190     // --------------------------------------------------------- Public Methods
191

192
193     /**
194      * Register a new Rule instance matching the specified pattern.
195      *
196      * @param pattern Nesting pattern to be matched for this Rule
197      * @param rule Rule instance to be registered
198      */

199     public void add(String JavaDoc pattern, Rule rule) {
200         super.add(pattern, rule);
201         counter++;
202         order.put(rule, new Integer JavaDoc(counter));
203     }
204
205
206     /**
207      * Return a List of all registered Rule instances that match the specified
208      * nesting pattern, or a zero-length List if there are no matches. If more
209      * than one Rule instance matches, they <strong>must</strong> be returned
210      * in the order originally registered through the <code>add()</code>
211      * method.
212      *
213      * @param pattern Nesting pattern to be matched
214      */

215     public List JavaDoc match(String JavaDoc namespace, String JavaDoc pattern) {
216         // calculate the pattern of the parent
217
// (if the element has one)
218
String JavaDoc parentPattern = "";
219         int lastIndex = pattern.lastIndexOf('/');
220
221         boolean hasParent = true;
222         if (lastIndex == -1) {
223             // element has no parent
224
hasParent = false;
225
226         } else {
227             // calculate the pattern of the parent
228
parentPattern = pattern.substring(0, lastIndex);
229
230         }
231
232
233         // we keep the list of universal matches separate
234
List JavaDoc universalList = new ArrayList JavaDoc(counter);
235
236         // Universal all wildards ('!*')
237
// These are always matched so always add them
238
List JavaDoc tempList = (List JavaDoc) this.cache.get("!*");
239         if (tempList != null) {
240             universalList.addAll(tempList);
241         }
242
243         // Universal exact parent match
244
// need to get this now since only wildcards are considered later
245
tempList = (List JavaDoc) this.cache.get("!" + parentPattern + "/?");
246         if (tempList != null) {
247             universalList.addAll(tempList);
248         }
249
250
251         // base behaviour means that if we certain matches, we don't continue
252
// but we just have a single combined loop and so we have to set
253
// a variable
254
boolean ignoreBasicMatches = false;
255
256
257         // see if we have an exact basic pattern match
258
List JavaDoc rulesList = (List JavaDoc) this.cache.get(pattern);
259         if (rulesList != null) {
260             // we have a match!
261
// so ignore all basic matches from now on
262
ignoreBasicMatches = true;
263
264         } else {
265
266             // see if we have an exact child match
267
if (hasParent) {
268                 // matching children takes preference
269
rulesList = (List JavaDoc) this.cache.get(parentPattern + "/?");
270                 if (rulesList != null) {
271                     // we have a match!
272
// so ignore all basic matches from now on
273
ignoreBasicMatches = true;
274                     
275                 } else {
276                     // we don't have a match yet - so try exact ancester
277
//
278
rulesList = findExactAncesterMatch(pattern);
279                     if (rulesList != null) {
280                         // we have a match!
281
// so ignore all basic matches from now on
282
ignoreBasicMatches = true;
283                     }
284                 }
285             }
286         }
287
288
289         // OK - we're ready for the big loop!
290
// Unlike the basic rules case,
291
// we have to go through for all those universal rules in all cases.
292

293         // Find the longest key, ie more discriminant
294
String JavaDoc longKey = "";
295         int longKeyLength = 0;
296         
297         Iterator JavaDoc keys = this.cache.keySet().iterator();
298         while (keys.hasNext()) {
299             String JavaDoc key = (String JavaDoc) keys.next();
300
301             // find out if it's a univeral pattern
302
// set a flag
303
boolean isUniversal = key.startsWith("!");
304             if (isUniversal) {
305                 // and find the underlying key
306
key = key.substring(1, key.length());
307             }
308
309                     
310             // don't need to check exact matches
311
boolean wildcardMatchStart = key.startsWith("*/");
312             boolean wildcardMatchEnd = key.endsWith("/*");
313             if (wildcardMatchStart || (isUniversal && wildcardMatchEnd)) {
314
315                 boolean parentMatched = false;
316                 boolean basicMatched = false;
317                 boolean ancesterMatched = false;
318                 
319                 boolean parentMatchEnd = key.endsWith("/?");
320                 if (parentMatchEnd) {
321                     // try for a parent match
322
parentMatched = parentMatch(key, pattern, parentPattern);
323
324                 } else if (wildcardMatchEnd) {
325                     // check for ancester match
326
if (wildcardMatchStart) {
327                         String JavaDoc patternBody = key.substring(2, key.length() - 2);
328                         if (pattern.endsWith(patternBody)) {
329                             ancesterMatched = true;
330                         } else {
331                             ancesterMatched = (pattern.indexOf(patternBody + "/") > -1);
332                         }
333                     } else {
334                         String JavaDoc bodyPattern = key.substring(0, key.length() - 2);
335                         if (pattern.startsWith(bodyPattern))
336                         {
337                             if (pattern.length() == bodyPattern.length()) {
338                                 // exact match
339
ancesterMatched = true;
340                             } else {
341                                 ancesterMatched = ( pattern.charAt(bodyPattern.length()) == '/' );
342                             }
343                         } else {
344                             ancesterMatched = false;
345                         }
346                     }
347                 } else {
348                     // try for a base match
349
basicMatched = basicMatch(key, pattern);
350                 }
351
352                 if (parentMatched || basicMatched || ancesterMatched) {
353                     if (isUniversal) {
354                         // universal rules go straight in
355
// (no longest matching rule)
356
tempList = (List JavaDoc) this.cache.get("!" + key);
357                         if (tempList != null) {
358                             universalList.addAll(tempList);
359                         }
360
361                     } else {
362                         if (!ignoreBasicMatches) {
363                             // ensure that all parent matches are SHORTER
364
// than rules with same level of matching.
365
//
366
// the calculations below don't work for universal
367
// matching, but we don't care because in that case
368
// this if-stmt is not entered.
369
int keyLength = key.length();
370                             if (wildcardMatchStart) {
371                                 --keyLength;
372                             }
373                             if (wildcardMatchEnd) {
374                                 --keyLength;
375                             } else if (parentMatchEnd) {
376                                 --keyLength;
377                             }
378
379                             if (keyLength > longKeyLength) {
380                                 rulesList = (List JavaDoc) this.cache.get(key);
381                                 longKey = key;
382                                 longKeyLength = keyLength;
383                             }
384                         }
385                     }
386                 }
387             }
388         }
389
390
391         // '*' works in practice as a default matching
392
// (this is because anything is a deeper match!)
393
if (rulesList == null) {
394             rulesList = (List JavaDoc) this.cache.get("*");
395         }
396
397         // if we've matched a basic pattern, then add to the universal list
398
if (rulesList != null) {
399             universalList.addAll(rulesList);
400         }
401
402
403         // don't filter if namespace is null
404
if (namespace != null) {
405             // remove invalid namespaces
406
Iterator JavaDoc it = universalList.iterator();
407             while (it.hasNext()) {
408                 Rule rule = (Rule) it.next();
409                 String JavaDoc ns_uri = rule.getNamespaceURI();
410                 if (ns_uri != null && !ns_uri.equals(namespace)) {
411                     it.remove();
412                 }
413             }
414         }
415
416
417         // need to make sure that the collection is sort in the order
418
// of addition. We use a custom comparator for this
419
Collections.sort(
420                 universalList,
421                 new Comparator JavaDoc() {
422
423                     public int compare(Object JavaDoc o1, Object JavaDoc o2) throws ClassCastException JavaDoc {
424                         // Get the entry order from the map
425
Integer JavaDoc i1 = (Integer JavaDoc) order.get(o1);
426                         Integer JavaDoc i2 = (Integer JavaDoc) order.get(o2);
427
428                         // and use that to perform the comparison
429
if (i1 == null) {
430                             if (i2 == null) {
431
432                                 return 0;
433
434                             } else {
435
436                                 return -1;
437
438                             }
439                         } else if (i2 == null) {
440                             return 1;
441                         }
442
443                         return (i1.intValue() - i2.intValue());
444                     }
445                 });
446
447         return universalList;
448     }
449
450     /**
451      * Matching parent.
452      */

453     private boolean parentMatch(String JavaDoc key, String JavaDoc pattern, String JavaDoc parentPattern) {
454         return parentPattern.endsWith(key.substring(1, key.length() - 2));
455     }
456
457     /**
458      * Standard match.
459      * Matches the end of the pattern to the key.
460      */

461     private boolean basicMatch(String JavaDoc key, String JavaDoc pattern) {
462         return (pattern.equals(key.substring(2)) ||
463                 pattern.endsWith(key.substring(1)));
464     }
465     
466     /**
467      * Finds an exact ancester match for given pattern
468      */

469     private List JavaDoc findExactAncesterMatch(String JavaDoc parentPattern) {
470         List JavaDoc matchingRules = null;
471         int lastIndex = parentPattern.length();
472         while (lastIndex-- > 0) {
473             lastIndex = parentPattern.lastIndexOf('/', lastIndex);
474             if (lastIndex > 0) {
475                 matchingRules = (List JavaDoc) this.cache.get(parentPattern.substring(0, lastIndex) + "/*");
476                 if (matchingRules != null) {
477                     return matchingRules;
478                 }
479             }
480         }
481         return null;
482     }
483 }
484
Popular Tags