KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > xerces > impl > dtd > models > DFAContentModel


1 /*
2  * Copyright 1999-2002,2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16
17 package org.apache.xerces.impl.dtd.models;
18
19 import org.apache.xerces.xni.QName;
20 import org.apache.xerces.impl.dtd.XMLContentSpec;
21
22 /**
23  * DFAContentModel is the derivative of ContentModel that does
24  * all of the non-trivial element content validation. This class does
25  * the conversion from the regular expression to the DFA that
26  * it then uses in its validation algorithm.
27  * <p>
28  * <b>Note:</b> Upstream work insures that this class will never see
29  * a content model with PCDATA in it. Any model with PCDATA is 'mixed'
30  * and is handled via the MixedContentModel class since mixed models
31  * are very constrained in form and easily handled via a special case.
32  * This also makes implementation of this class much easier.
33  *
34  * @xerces.internal
35  *
36  * @version $Id: DFAContentModel.java,v 1.7 2004/10/04 22:00:42 mrglavas Exp $
37  */

38 public class DFAContentModel
39     implements ContentModelValidator {
40
41     //
42
// Constants
43
//
44
// special strings
45

46     /** Epsilon string. */
47     private static String JavaDoc fEpsilonString = "<<CMNODE_EPSILON>>";
48
49     /** End-of-content string. */
50     private static String JavaDoc fEOCString = "<<CMNODE_EOC>>";
51
52     /** initializing static members **/
53     static {
54         fEpsilonString = fEpsilonString.intern();
55         fEOCString = fEOCString.intern();
56     }
57
58     // debugging
59

60     /** Set to true to debug content model validation. */
61     private static final boolean DEBUG_VALIDATE_CONTENT = false;
62
63     //
64
// Data
65
//
66

67     /* this is the EquivClassComparator object */
68     //private EquivClassComparator comparator = null;
69

70     /**
71      * This is the map of unique input symbol elements to indices into
72      * each state's per-input symbol transition table entry. This is part
73      * of the built DFA information that must be kept around to do the
74      * actual validation.
75      */

76     private QName fElemMap[] = null;
77
78     /**
79      * This is a map of whether the element map contains information
80      * related to ANY models.
81      */

82     private int fElemMapType[] = null;
83
84     /** The element map size. */
85     private int fElemMapSize = 0;
86
87     /** Boolean to distinguish Schema Mixed Content */
88     private boolean fMixed;
89
90     /**
91      * The NFA position of the special EOC (end of content) node. This
92      * is saved away since it's used during the DFA build.
93      */

94     private int fEOCPos = 0;
95
96
97     /**
98      * This is an array of booleans, one per state (there are
99      * fTransTableSize states in the DFA) that indicates whether that
100      * state is a final state.
101      */

102     private boolean fFinalStateFlags[] = null;
103
104     /**
105      * The list of follow positions for each NFA position (i.e. for each
106      * non-epsilon leaf node.) This is only used during the building of
107      * the DFA, and is let go afterwards.
108      */

109     private CMStateSet fFollowList[] = null;
110
111     /**
112      * This is the head node of our intermediate representation. It is
113      * only non-null during the building of the DFA (just so that it
114      * does not have to be passed all around.) Once the DFA is built,
115      * this is no longer required so its nulled out.
116      */

117     private CMNode fHeadNode = null;
118
119     /**
120      * The count of leaf nodes. This is an important number that set some
121      * limits on the sizes of data structures in the DFA process.
122      */

123     private int fLeafCount = 0;
124
125     /**
126      * An array of non-epsilon leaf nodes, which is used during the DFA
127      * build operation, then dropped.
128      */

129     private CMLeaf fLeafList[] = null;
130
131     /** Array mapping ANY types to the leaf list. */
132     private int fLeafListType[] = null;
133
134     //private ContentLeafNameTypeVector fLeafNameTypeVector = null;
135

136     /**
137      * The string pool of our parser session. This is set during construction
138      * and kept around.
139      */

140     //private StringPool fStringPool = null;
141

142     /**
143      * This is the transition table that is the main by product of all
144      * of the effort here. It is an array of arrays of ints. The first
145      * dimension is the number of states we end up with in the DFA. The
146      * second dimensions is the number of unique elements in the content
147      * model (fElemMapSize). Each entry in the second dimension indicates
148      * the new state given that input for the first dimension's start
149      * state.
150      * <p>
151      * The fElemMap array handles mapping from element indexes to
152      * positions in the second dimension of the transition table.
153      */

154     private int fTransTable[][] = null;
155
156     /**
157      * The number of valid entries in the transition table, and in the other
158      * related tables such as fFinalStateFlags.
159      */

160     private int fTransTableSize = 0;
161
162     /**
163      * Flag that indicates that even though we have a "complicated"
164      * content model, it is valid to have no content. In other words,
165      * all parts of the content model are optional. For example:
166      * <pre>
167      * &lt;!ELEMENT AllOptional (Optional*,NotRequired?)&gt;
168      * </pre>
169      */

170     private boolean fEmptyContentIsValid = false;
171
172     // temp variables
173

174     /** Temporary qualified name. */
175     private QName fQName = new QName();
176
177     //
178
// Constructors
179
//
180

181
182     //
183
// Constructors
184
//
185

186     /**
187      * Constructs a DFA content model.
188      *
189      * @param syntaxTree The syntax tree of the content model.
190      * @param leafCount The number of leaves.
191      * @param mixed
192      *
193      */

194     public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) {
195         // Store away our index and pools in members
196
//fStringPool = stringPool;
197
fLeafCount = leafCount;
198
199
200         // this is for Schema Mixed Content
201
fMixed = mixed;
202
203         //
204
// Ok, so lets grind through the building of the DFA. This method
205
// handles the high level logic of the algorithm, but it uses a
206
// number of helper classes to do its thing.
207
//
208
// In order to avoid having hundreds of references to the error and
209
// string handlers around, this guy and all of his helper classes
210
// just throw a simple exception and we then pass it along.
211
//
212
buildDFA(syntaxTree);
213     }
214
215     //
216
// ContentModelValidator methods
217
//
218

219     /**
220      * Check that the specified content is valid according to this
221      * content model. This method can also be called to do 'what if'
222      * testing of content models just to see if they would be valid.
223      * <p>
224      * A value of -1 in the children array indicates a PCDATA node. All other
225      * indexes will be positive and represent child elements. The count can be
226      * zero, since some elements have the EMPTY content model and that must be
227      * confirmed.
228      *
229      * @param children The children of this element. Each integer is an index within
230      * the <code>StringPool</code> of the child element name. An index
231      * of -1 is used to indicate an occurrence of non-whitespace character
232      * data.
233      * @param offset Offset into the array where the children starts.
234      * @param length The number of entries in the <code>children</code> array.
235      *
236      * @return The value -1 if fully valid, else the 0 based index of the child
237      * that first failed. If the value returned is equal to the number
238      * of children, then the specified children are valid but additional
239      * content is required to reach a valid ending state.
240      *
241      */

242     public int validate(QName[] children, int offset, int length) {
243
244         if (DEBUG_VALIDATE_CONTENT)
245             System.out.println("DFAContentModel#validateContent");
246
247         //
248
// A DFA content model must *always* have at least 1 child
249
// so a failure is given if no children present.
250
//
251
// Defect 782: This is an incorrect statement because a DFA
252
// content model is also used for constructions such as:
253
//
254
// (Optional*,NotRequired?)
255
//
256
// where a perfectly valid content would be NO CHILDREN.
257
// Therefore, if there are no children, we must check to
258
// see if the CMNODE_EOC marker is a valid start state! -Ac
259
//
260
if (length == 0) {
261             if (DEBUG_VALIDATE_CONTENT) {
262                 System.out.println("!!! no children");
263                 System.out.println("elemMap="+fElemMap);
264                 for (int i = 0; i < fElemMap.length; i++) {
265                     String JavaDoc uri = fElemMap[i].uri;
266                     String JavaDoc localpart = fElemMap[i].localpart;
267                     
268                     System.out.println("fElemMap["+i+"]="+uri+","+
269                                        localpart+" ("+
270                                        uri+", "+
271                                        localpart+
272                                        ')');
273                                        
274                 }
275                 System.out.println("EOCIndex="+fEOCString);
276             }
277
278             return fEmptyContentIsValid ? -1 : 0;
279
280         } // if child count == 0
281

282         //
283
// Lets loop through the children in the array and move our way
284
// through the states. Note that we use the fElemMap array to map
285
// an element index to a state index.
286
//
287
int curState = 0;
288         for (int childIndex = 0; childIndex < length; childIndex++)
289         {
290             // Get the current element index out
291
final QName curElem = children[offset + childIndex];
292             // ignore mixed text
293
if (fMixed && curElem.localpart == null) {
294                 continue;
295             }
296
297             // Look up this child in our element map
298
int elemIndex = 0;
299             for (; elemIndex < fElemMapSize; elemIndex++)
300             {
301                 int type = fElemMapType[elemIndex] & 0x0f ;
302                 if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
303                     //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
304
if (fElemMap[elemIndex].rawname == curElem.rawname) {
305                         break;
306                     }
307                 }
308                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
309                     String JavaDoc uri = fElemMap[elemIndex].uri;
310                     if (uri == null || uri == curElem.uri) {
311                         break;
312                     }
313                 }
314                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
315                     if (curElem.uri == null) {
316                         break;
317                     }
318                 }
319                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
320                     if (fElemMap[elemIndex].uri != curElem.uri) {
321                         break;
322                     }
323                 }
324             }
325
326             // If we didn't find it, then obviously not valid
327
if (elemIndex == fElemMapSize) {
328                 if (DEBUG_VALIDATE_CONTENT) {
329                     System.out.println("!!! didn't find it");
330
331                     System.out.println("curElem : " +curElem );
332                     for (int i=0; i<fElemMapSize; i++) {
333                         System.out.println("fElemMap["+i+"] = " +fElemMap[i] );
334                         System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] );
335                     }
336                 }
337
338                 return childIndex;
339             }
340
341             //
342
// Look up the next state for this input symbol when in the
343
// current state.
344
//
345
curState = fTransTable[curState][elemIndex];
346
347             // If its not a legal transition, then invalid
348
if (curState == -1) {
349                 if (DEBUG_VALIDATE_CONTENT)
350                     System.out.println("!!! not a legal transition");
351                 return childIndex;
352             }
353         }
354
355         //
356
// We transitioned all the way through the input list. However, that
357
// does not mean that we ended in a final state. So check whether
358
// our ending state is a final state.
359
//
360
if (DEBUG_VALIDATE_CONTENT)
361             System.out.println("curState="+curState+", childCount="+length);
362         if (!fFinalStateFlags[curState])
363             return length;
364
365         // success!
366
return -1;
367     } // validate
368

369
370     //
371
// Private methods
372
//
373

374     /**
375      * Builds the internal DFA transition table from the given syntax tree.
376      *
377      * @param syntaxTree The syntax tree.
378      *
379      * @exception CMException Thrown if DFA cannot be built.
380      */

381     private void buildDFA(CMNode syntaxTree)
382     {
383         //
384
// The first step we need to take is to rewrite the content model
385
// using our CMNode objects, and in the process get rid of any
386
// repetition short cuts, converting them into '*' style repetitions
387
// or getting rid of repetitions altogether.
388
//
389
// The conversions done are:
390
//
391
// x+ -> (x|x*)
392
// x? -> (x|epsilon)
393
//
394
// This is a relatively complex scenario. What is happening is that
395
// we create a top level binary node of which the special EOC value
396
// is set as the right side node. The the left side is set to the
397
// rewritten syntax tree. The source is the original content model
398
// info from the decl pool. The rewrite is done by buildSyntaxTree()
399
// which recurses the decl pool's content of the element and builds
400
// a new tree in the process.
401
//
402
// Note that, during this operation, we set each non-epsilon leaf
403
// node's DFA state position and count the number of such leafs, which
404
// is left in the fLeafCount member.
405
//
406
// The nodeTmp object is passed in just as a temp node to use during
407
// the recursion. Otherwise, we'd have to create a new node on every
408
// level of recursion, which would be piggy in Java (as is everything
409
// for that matter.)
410
//
411

412     /* MODIFIED (Jan, 2001)
413      *
414      * Use following rules.
415      * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x)
416      * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x)
417      *
418      * The same computation of follow as x* is applied to x+
419      *
420      * The modification drastically reduces computation time of
421      * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
422      */

423
424         fQName.setValues(null, fEOCString, fEOCString, null);
425         CMLeaf nodeEOC = new CMLeaf(fQName);
426         fHeadNode = new CMBinOp
427         (
428             XMLContentSpec.CONTENTSPECNODE_SEQ
429             , syntaxTree
430             , nodeEOC
431         );
432
433         //
434
// And handle specially the EOC node, which also must be numbered
435
// and counted as a non-epsilon leaf node. It could not be handled
436
// in the above tree build because it was created before all that
437
// started. We save the EOC position since its used during the DFA
438
// building loop.
439
//
440
fEOCPos = fLeafCount;
441         nodeEOC.setPosition(fLeafCount++);
442
443         //
444
// Ok, so now we have to iterate the new tree and do a little more
445
// work now that we know the leaf count. One thing we need to do is
446
// to calculate the first and last position sets of each node. This
447
// is cached away in each of the nodes.
448
//
449
// Along the way we also set the leaf count in each node as the
450
// maximum state count. They must know this in order to create their
451
// first/last pos sets.
452
//
453
// We also need to build an array of references to the non-epsilon
454
// leaf nodes. Since we iterate it in the same way as before, this
455
// will put them in the array according to their position values.
456
//
457
fLeafList = new CMLeaf[fLeafCount];
458         fLeafListType = new int[fLeafCount];
459         postTreeBuildInit(fHeadNode, 0);
460
461         //
462
// And, moving onward... We now need to build the follow position
463
// sets for all the nodes. So we allocate an array of state sets,
464
// one for each leaf node (i.e. each DFA position.)
465
//
466
fFollowList = new CMStateSet[fLeafCount];
467         for (int index = 0; index < fLeafCount; index++)
468             fFollowList[index] = new CMStateSet(fLeafCount);
469         calcFollowList(fHeadNode);
470         //
471
// And finally the big push... Now we build the DFA using all the
472
// states and the tree we've built up. First we set up the various
473
// data structures we are going to use while we do this.
474
//
475
// First of all we need an array of unique element names in our
476
// content model. For each transition table entry, we need a set of
477
// contiguous indices to represent the transitions for a particular
478
// input element. So we need to a zero based range of indexes that
479
// map to element types. This element map provides that mapping.
480
//
481
fElemMap = new QName[fLeafCount];
482         fElemMapType = new int[fLeafCount];
483         fElemMapSize = 0;
484         for (int outIndex = 0; outIndex < fLeafCount; outIndex++)
485         {
486             fElemMap[outIndex] = new QName();
487
488             /****
489             if ( (fLeafListType[outIndex] & 0x0f) != 0 ) {
490                 if (fLeafNameTypeVector == null) {
491                     fLeafNameTypeVector = new ContentLeafNameTypeVector();
492                 }
493             }
494             /****/

495
496             // Get the current leaf's element index
497
final QName element = fLeafList[outIndex].getElement();
498
499             // See if the current leaf node's element index is in the list
500
int inIndex = 0;
501             for (; inIndex < fElemMapSize; inIndex++)
502             {
503                 if (fElemMap[inIndex].rawname == element.rawname) {
504                     break;
505                 }
506             }
507
508             // If it was not in the list, then add it, if not the EOC node
509
if (inIndex == fElemMapSize) {
510                 fElemMap[fElemMapSize].setValues(element);
511                 fElemMapType[fElemMapSize] = fLeafListType[outIndex];
512                 fElemMapSize++;
513             }
514         }
515         // set up the fLeafNameTypeVector object if there is one.
516
/*****
517         if (fLeafNameTypeVector != null) {
518             fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
519         }
520
521     /***
522     * Optimization(Jan, 2001); We sort fLeafList according to
523     * elemIndex which is *uniquely* associated to each leaf.
524     * We are *assuming* that each element appears in at least one leaf.
525     **/

526
527     int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
528     int fSortCount = 0;
529
530     for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
531         for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
532             final QName leaf = fLeafList[leafIndex].getElement();
533             final QName element = fElemMap[elemIndex];
534             if (leaf.rawname == element.rawname) {
535                 fLeafSorter[fSortCount++] = leafIndex;
536             }
537         }
538         fLeafSorter[fSortCount++] = -1;
539     }
540
541     /* Optimization(Jan, 2001) */
542
543         //
544
// Next lets create some arrays, some that that hold transient
545
// information during the DFA build and some that are permament.
546
// These are kind of sticky since we cannot know how big they will
547
// get, but we don't want to use any Java collections because of
548
// performance.
549
//
550
// Basically they will probably be about fLeafCount*2 on average,
551
// but can be as large as 2^(fLeafCount*2), worst case. So we start
552
// with fLeafCount*4 as a middle ground. This will be very unlikely
553
// to ever have to expand, though it if does, the overhead will be
554
// somewhat ugly.
555
//
556
int curArraySize = fLeafCount * 4;
557         CMStateSet[] statesToDo = new CMStateSet[curArraySize];
558         fFinalStateFlags = new boolean[curArraySize];
559         fTransTable = new int[curArraySize][];
560
561         //
562
// Ok we start with the initial set as the first pos set of the
563
// head node (which is the seq node that holds the content model
564
// and the EOC node.)
565
//
566
CMStateSet setT = fHeadNode.firstPos();
567
568         //
569
// Init our two state flags. Basically the unmarked state counter
570
// is always chasing the current state counter. When it catches up,
571
// that means we made a pass through that did not add any new states
572
// to the lists, at which time we are done. We could have used a
573
// expanding array of flags which we used to mark off states as we
574
// complete them, but this is easier though less readable maybe.
575
//
576
int unmarkedState = 0;
577         int curState = 0;
578
579         //
580
// Init the first transition table entry, and put the initial state
581
// into the states to do list, then bump the current state.
582
//
583
fTransTable[curState] = makeDefStateList();
584         statesToDo[curState] = setT;
585         curState++;
586
587         /* Optimization(Jan, 2001); This is faster for
588          * a large content model such as, "(t001+|t002+|.... |t500+)".
589          */

590
591     java.util.Hashtable JavaDoc stateTable = new java.util.Hashtable JavaDoc();
592
593         /* Optimization(Jan, 2001) */
594
595         //
596
// Ok, almost done with the algorithm... We now enter the
597
// loop where we go until the states done counter catches up with
598
// the states to do counter.
599
//
600
while (unmarkedState < curState)
601         {
602             //
603
// Get the first unmarked state out of the list of states to do.
604
// And get the associated transition table entry.
605
//
606
setT = statesToDo[unmarkedState];
607             int[] transEntry = fTransTable[unmarkedState];
608
609             // Mark this one final if it contains the EOC state
610
fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
611
612             // Bump up the unmarked state count, marking this state done
613
unmarkedState++;
614
615             // Loop through each possible input symbol in the element map
616
CMStateSet newSet = null;
617         /* Optimization(Jan, 2001) */
618             int sorterIndex = 0;
619         /* Optimization(Jan, 2001) */
620             for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
621             {
622                 //
623
// Build up a set of states which is the union of all of
624
// the follow sets of DFA positions that are in the current
625
// state. If we gave away the new set last time through then
626
// create a new one. Otherwise, zero out the existing one.
627
//
628
if (newSet == null)
629                     newSet = new CMStateSet(fLeafCount);
630                 else
631                     newSet.zeroBits();
632
633         /* Optimization(Jan, 2001) */
634                 int leafIndex = fLeafSorter[sorterIndex++];
635
636                 while (leafIndex != -1) {
637             // If this leaf index (DFA position) is in the current set...
638
if (setT.getBit(leafIndex))
639                     {
640                         //
641
// If this leaf is the current input symbol, then we
642
// want to add its follow list to the set of states to
643
// transition to from the current state.
644
//
645
newSet.union(fFollowList[leafIndex]);
646                             }
647
648                    leafIndex = fLeafSorter[sorterIndex++];
649     }
650         /* Optimization(Jan, 2001) */
651
652                 //
653
// If this new set is not empty, then see if its in the list
654
// of states to do. If not, then add it.
655
//
656
if (!newSet.isEmpty())
657                 {
658                     //
659
// Search the 'states to do' list to see if this new
660
// state set is already in there.
661
//
662

663         /* Optimization(Jan, 2001) */
664         Integer JavaDoc stateObj = (Integer JavaDoc)stateTable.get(newSet);
665         int stateIndex = (stateObj == null ? curState : stateObj.intValue());
666         /* Optimization(Jan, 2001) */
667
668                     // If we did not find it, then add it
669
if (stateIndex == curState)
670                     {
671                         //
672
// Put this new state into the states to do and init
673
// a new entry at the same index in the transition
674
// table.
675
//
676
statesToDo[curState] = newSet;
677                         fTransTable[curState] = makeDefStateList();
678
679         /* Optimization(Jan, 2001) */
680                         stateTable.put(newSet, new Integer JavaDoc(curState));
681         /* Optimization(Jan, 2001) */
682
683                         // We now have a new state to do so bump the count
684
curState++;
685
686                         //
687
// Null out the new set to indicate we adopted it.
688
// This will cause the creation of a new set on the
689
// next time around the loop.
690
//
691
newSet = null;
692                     }
693
694                     //
695
// Now set this state in the transition table's entry
696
// for this element (using its index), with the DFA
697
// state we will move to from the current state when we
698
// see this input element.
699
//
700
transEntry[elemIndex] = stateIndex;
701
702                     // Expand the arrays if we're full
703
if (curState == curArraySize)
704                     {
705                         //
706
// Yikes, we overflowed the initial array size, so
707
// we've got to expand all of these arrays. So adjust
708
// up the size by 50% and allocate new arrays.
709
//
710
final int newSize = (int)(curArraySize * 1.5);
711                         CMStateSet[] newToDo = new CMStateSet[newSize];
712                         boolean[] newFinalFlags = new boolean[newSize];
713                         int[][] newTransTable = new int[newSize][];
714
715                         // Copy over all of the existing content
716
for (int expIndex = 0; expIndex < curArraySize; expIndex++)
717                         {
718                             newToDo[expIndex] = statesToDo[expIndex];
719                             newFinalFlags[expIndex] = fFinalStateFlags[expIndex];
720                             newTransTable[expIndex] = fTransTable[expIndex];
721                         }
722
723                         // Store the new array size
724
curArraySize = newSize;
725                         statesToDo = newToDo;
726                         fFinalStateFlags = newFinalFlags;
727                         fTransTable = newTransTable;
728                     }
729                 }
730             }
731         }
732
733         // Check to see if we can set the fEmptyContentIsValid flag.
734
fEmptyContentIsValid = ((CMBinOp)fHeadNode).getLeft().isNullable();
735
736         //
737
// And now we can say bye bye to the temp representation since we've
738
// built the DFA.
739
//
740
if (DEBUG_VALIDATE_CONTENT)
741             dumpTree(fHeadNode, 0);
742         fHeadNode = null;
743         fLeafList = null;
744         fFollowList = null;
745
746     }
747
748     /**
749      * Calculates the follow list of the current node.
750      *
751      * @param nodeCur The curent node.
752      *
753      * @exception CMException Thrown if follow list cannot be calculated.
754      */

755     private void calcFollowList(CMNode nodeCur)
756     {
757         // Recurse as required
758
if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
759         {
760             // Recurse only
761
calcFollowList(((CMBinOp)nodeCur).getLeft());
762             calcFollowList(((CMBinOp)nodeCur).getRight());
763         }
764          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)
765         {
766             // Recurse first
767
calcFollowList(((CMBinOp)nodeCur).getLeft());
768             calcFollowList(((CMBinOp)nodeCur).getRight());
769
770             //
771
// Now handle our level. We use our left child's last pos
772
// set and our right child's first pos set, so go ahead and
773
// get them ahead of time.
774
//
775
final CMStateSet last = ((CMBinOp)nodeCur).getLeft().lastPos();
776             final CMStateSet first = ((CMBinOp)nodeCur).getRight().firstPos();
777
778             //
779
// Now, for every position which is in our left child's last set
780
// add all of the states in our right child's first set to the
781
// follow set for that position.
782
//
783
for (int index = 0; index < fLeafCount; index++)
784             {
785                 if (last.getBit(index))
786                     fFollowList[index].union(first);
787             }
788         }
789          /***
790          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
791         {
792             // Recurse first
793             calcFollowList(((CMUniOp)nodeCur).getChild());
794
795             //
796             // Now handle our level. We use our own first and last position
797             // sets, so get them up front.
798             //
799             final CMStateSet first = nodeCur.firstPos();
800             final CMStateSet last = nodeCur.lastPos();
801
802             //
803             // For every position which is in our last position set, add all
804             // of our first position states to the follow set for that
805             // position.
806             //
807             for (int index = 0; index < fLeafCount; index++)
808             {
809                 if (last.getBit(index))
810                     fFollowList[index].union(first);
811             }
812         }
813          else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
814               || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE))
815         {
816             throw new RuntimeException("ImplementationMessages.VAL_NIICM");
817         }
818         /***/

819          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
820         || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
821         {
822             // Recurse first
823
calcFollowList(((CMUniOp)nodeCur).getChild());
824
825             //
826
// Now handle our level. We use our own first and last position
827
// sets, so get them up front.
828
//
829
final CMStateSet first = nodeCur.firstPos();
830             final CMStateSet last = nodeCur.lastPos();
831
832             //
833
// For every position which is in our last position set, add all
834
// of our first position states to the follow set for that
835
// position.
836
//
837
for (int index = 0; index < fLeafCount; index++)
838             {
839                 if (last.getBit(index))
840                     fFollowList[index].union(first);
841             }
842         }
843        
844         else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
845             // Recurse only
846
calcFollowList(((CMUniOp)nodeCur).getChild());
847         }
848          /***/
849     }
850
851     /**
852      * Dumps the tree of the current node to standard output.
853      *
854      * @param nodeCur The current node.
855      * @param level The maximum levels to output.
856      *
857      * @exception CMException Thrown on error.
858      */

859     private void dumpTree(CMNode nodeCur, int level)
860     {
861         for (int index = 0; index < level; index++)
862             System.out.print(" ");
863
864         int type = nodeCur.type();
865         if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
866         || (type == XMLContentSpec.CONTENTSPECNODE_SEQ))
867         {
868             if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
869                 System.out.print("Choice Node ");
870             else
871                 System.out.print("Seq Node ");
872
873             if (nodeCur.isNullable())
874                 System.out.print("Nullable ");
875
876             System.out.print("firstPos=");
877             System.out.print(nodeCur.firstPos().toString());
878             System.out.print(" lastPos=");
879             System.out.println(nodeCur.lastPos().toString());
880
881             dumpTree(((CMBinOp)nodeCur).getLeft(), level+1);
882             dumpTree(((CMBinOp)nodeCur).getRight(), level+1);
883         }
884          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
885         {
886             System.out.print("Rep Node ");
887
888             if (nodeCur.isNullable())
889                 System.out.print("Nullable ");
890
891             System.out.print("firstPos=");
892             System.out.print(nodeCur.firstPos().toString());
893             System.out.print(" lastPos=");
894             System.out.println(nodeCur.lastPos().toString());
895
896             dumpTree(((CMUniOp)nodeCur).getChild(), level+1);
897         }
898          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
899         {
900             System.out.print
901             (
902                 "Leaf: (pos="
903                 + ((CMLeaf)nodeCur).getPosition()
904                 + "), "
905                 + ((CMLeaf)nodeCur).getElement()
906                 + "(elemIndex="
907                 + ((CMLeaf)nodeCur).getElement()
908                 + ") "
909             );
910
911             if (nodeCur.isNullable())
912                 System.out.print(" Nullable ");
913
914             System.out.print("firstPos=");
915             System.out.print(nodeCur.firstPos().toString());
916             System.out.print(" lastPos=");
917             System.out.println(nodeCur.lastPos().toString());
918         }
919          else
920         {
921             throw new RuntimeException JavaDoc("ImplementationMessages.VAL_NIICM");
922         }
923     }
924
925
926     /**
927      * -1 is used to represent bad transitions in the transition table
928      * entry for each state. So each entry is initialized to an all -1
929      * array. This method creates a new entry and initializes it.
930      */

931     private int[] makeDefStateList()
932     {
933         int[] retArray = new int[fElemMapSize];
934         for (int index = 0; index < fElemMapSize; index++)
935             retArray[index] = -1;
936         return retArray;
937     }
938
939     /** Post tree build initialization. */
940     private int postTreeBuildInit(CMNode nodeCur, int curIndex)
941     {
942         // Set the maximum states on this node
943
nodeCur.setMaxStates(fLeafCount);
944
945         // Recurse as required
946
if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY ||
947             (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL ||
948             (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
949             // REVISIT: Don't waste these structures.
950
QName qname = new QName(null, null, null, ((CMAny)nodeCur).getURI());
951             fLeafList[curIndex] = new CMLeaf(qname, ((CMAny)nodeCur).getPosition());
952             fLeafListType[curIndex] = nodeCur.type();
953             curIndex++;
954         }
955         else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
956         || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ))
957         {
958             curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getLeft(), curIndex);
959             curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getRight(), curIndex);
960         }
961          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
962          || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE
963          || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE)
964         {
965             curIndex = postTreeBuildInit(((CMUniOp)nodeCur).getChild(), curIndex);
966         }
967          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
968         {
969             //
970
// Put this node in the leaf list at the current index if its
971
// a non-epsilon leaf.
972
//
973
final QName node = ((CMLeaf)nodeCur).getElement();
974             if (node.localpart != fEpsilonString) {
975                 fLeafList[curIndex] = (CMLeaf)nodeCur;
976                 fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
977                 curIndex++;
978             }
979         }
980          else
981         {
982             throw new RuntimeException JavaDoc("ImplementationMessages.VAL_NIICM: type="+nodeCur.type());
983         }
984         return curIndex;
985     }
986
987 } // class DFAContentModel
988
Popular Tags