KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xerces > internal > impl > dtd > models > DFAContentModel


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package com.sun.org.apache.xerces.internal.impl.dtd.models;
59
60 import com.sun.org.apache.xerces.internal.xni.QName;
61 import com.sun.org.apache.xerces.internal.impl.dtd.XMLContentSpec;
62
63 /**
64  * @version $Id: DFAContentModel.java,v 1.4 2002/05/29 17:59:37 neilg Exp $
65  *
66  * DFAContentModel is the derivative of ContentModel that does
67  * all of the non-trivial element content validation. This class does
68  * the conversion from the regular expression to the DFA that
69  * it then uses in its validation algorithm.
70  * <p>
71  * <b>Note:</b> Upstream work insures that this class will never see
72  * a content model with PCDATA in it. Any model with PCDATA is 'mixed'
73  * and is handled via the MixedContentModel class since mixed models
74  * are very constrained in form and easily handled via a special case.
75  * This also makes implementation of this class much easier.
76  *
77  */

78 public class DFAContentModel
79     implements ContentModelValidator {
80
81     //
82
// Constants
83
//
84
// special strings
85

86     /** Epsilon string. */
87     private static String JavaDoc fEpsilonString = "<<CMNODE_EPSILON>>";
88
89     /** End-of-content string. */
90     private static String JavaDoc fEOCString = "<<CMNODE_EOC>>";
91
92     /** initializing static members **/
93     static {
94         fEpsilonString = fEpsilonString.intern();
95         fEOCString = fEOCString.intern();
96     }
97
98     // debugging
99

100     /** Set to true to debug content model validation. */
101     private static final boolean DEBUG_VALIDATE_CONTENT = false;
102
103     //
104
// Data
105
//
106

107     /* this is the EquivClassComparator object */
108     //private EquivClassComparator comparator = null;
109

110     /**
111      * This is the map of unique input symbol elements to indices into
112      * each state's per-input symbol transition table entry. This is part
113      * of the built DFA information that must be kept around to do the
114      * actual validation.
115      */

116     private QName fElemMap[] = null;
117
118     /**
119      * This is a map of whether the element map contains information
120      * related to ANY models.
121      */

122     private int fElemMapType[] = null;
123
124     /** The element map size. */
125     private int fElemMapSize = 0;
126
127     /** Boolean to distinguish Schema Mixed Content */
128     private boolean fMixed;
129
130     /**
131      * The NFA position of the special EOC (end of content) node. This
132      * is saved away since it's used during the DFA build.
133      */

134     private int fEOCPos = 0;
135
136
137     /**
138      * This is an array of booleans, one per state (there are
139      * fTransTableSize states in the DFA) that indicates whether that
140      * state is a final state.
141      */

142     private boolean fFinalStateFlags[] = null;
143
144     /**
145      * The list of follow positions for each NFA position (i.e. for each
146      * non-epsilon leaf node.) This is only used during the building of
147      * the DFA, and is let go afterwards.
148      */

149     private CMStateSet fFollowList[] = null;
150
151     /**
152      * This is the head node of our intermediate representation. It is
153      * only non-null during the building of the DFA (just so that it
154      * does not have to be passed all around.) Once the DFA is built,
155      * this is no longer required so its nulled out.
156      */

157     private CMNode fHeadNode = null;
158
159     /**
160      * The count of leaf nodes. This is an important number that set some
161      * limits on the sizes of data structures in the DFA process.
162      */

163     private int fLeafCount = 0;
164
165     /**
166      * An array of non-epsilon leaf nodes, which is used during the DFA
167      * build operation, then dropped.
168      */

169     private CMLeaf fLeafList[] = null;
170
171     /** Array mapping ANY types to the leaf list. */
172     private int fLeafListType[] = null;
173
174     //private ContentLeafNameTypeVector fLeafNameTypeVector = null;
175

176     /**
177      * The string pool of our parser session. This is set during construction
178      * and kept around.
179      */

180     //private StringPool fStringPool = null;
181

182     /**
183      * This is the transition table that is the main by product of all
184      * of the effort here. It is an array of arrays of ints. The first
185      * dimension is the number of states we end up with in the DFA. The
186      * second dimensions is the number of unique elements in the content
187      * model (fElemMapSize). Each entry in the second dimension indicates
188      * the new state given that input for the first dimension's start
189      * state.
190      * <p>
191      * The fElemMap array handles mapping from element indexes to
192      * positions in the second dimension of the transition table.
193      */

194     private int fTransTable[][] = null;
195
196     /**
197      * The number of valid entries in the transition table, and in the other
198      * related tables such as fFinalStateFlags.
199      */

200     private int fTransTableSize = 0;
201
202     /**
203      * Flag that indicates that even though we have a "complicated"
204      * content model, it is valid to have no content. In other words,
205      * all parts of the content model are optional. For example:
206      * <pre>
207      * &lt;!ELEMENT AllOptional (Optional*,NotRequired?)&gt;
208      * </pre>
209      */

210     private boolean fEmptyContentIsValid = false;
211
212     // temp variables
213

214     /** Temporary qualified name. */
215     private QName fQName = new QName();
216
217     //
218
// Constructors
219
//
220

221
222     //
223
// Constructors
224
//
225

226     /**
227      * Constructs a DFA content model.
228      *
229      * @param syntaxTree The syntax tree of the content model.
230      * @param leafCount The number of leaves.
231      * @param dtd if it is created for a DTDGrammar.
232      *
233      */

234
235     public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) {
236         // Store away our index and pools in members
237
//fStringPool = stringPool;
238
fLeafCount = leafCount;
239
240
241         // this is for Schema Mixed Content
242
fMixed = mixed;
243
244         //
245
// Ok, so lets grind through the building of the DFA. This method
246
// handles the high level logic of the algorithm, but it uses a
247
// number of helper classes to do its thing.
248
//
249
// In order to avoid having hundreds of references to the error and
250
// string handlers around, this guy and all of his helper classes
251
// just throw a simple exception and we then pass it along.
252
//
253
buildDFA(syntaxTree);
254     }
255
256     //
257
// ContentModelValidator methods
258
//
259

260     /**
261      * Check that the specified content is valid according to this
262      * content model. This method can also be called to do 'what if'
263      * testing of content models just to see if they would be valid.
264      * <p>
265      * A value of -1 in the children array indicates a PCDATA node. All other
266      * indexes will be positive and represent child elements. The count can be
267      * zero, since some elements have the EMPTY content model and that must be
268      * confirmed.
269      *
270      * @param children The children of this element. Each integer is an index within
271      * the <code>StringPool</code> of the child element name. An index
272      * of -1 is used to indicate an occurrence of non-whitespace character
273      * data.
274      * @param offset Offset into the array where the children starts.
275      * @param length The number of entries in the <code>children</code> array.
276      *
277      * @return The value -1 if fully valid, else the 0 based index of the child
278      * that first failed. If the value returned is equal to the number
279      * of children, then the specified children are valid but additional
280      * content is required to reach a valid ending state.
281      *
282      */

283     public int validate(QName[] children, int offset, int length) {
284
285         if (DEBUG_VALIDATE_CONTENT)
286             System.out.println("DFAContentModel#validateContent");
287
288         //
289
// A DFA content model must *always* have at least 1 child
290
// so a failure is given if no children present.
291
//
292
// Defect 782: This is an incorrect statement because a DFA
293
// content model is also used for constructions such as:
294
//
295
// (Optional*,NotRequired?)
296
//
297
// where a perfectly valid content would be NO CHILDREN.
298
// Therefore, if there are no children, we must check to
299
// see if the CMNODE_EOC marker is a valid start state! -Ac
300
//
301
if (length == 0) {
302             if (DEBUG_VALIDATE_CONTENT) {
303                 System.out.println("!!! no children");
304                 System.out.println("elemMap="+fElemMap);
305                 for (int i = 0; i < fElemMap.length; i++) {
306                     String JavaDoc uri = fElemMap[i].uri;
307                     String JavaDoc localpart = fElemMap[i].localpart;
308                     
309                     System.out.println("fElemMap["+i+"]="+uri+","+
310                                        localpart+" ("+
311                                        uri+", "+
312                                        localpart+
313                                        ')');
314                                        
315                 }
316                 System.out.println("EOCIndex="+fEOCString);
317             }
318
319             return fEmptyContentIsValid ? -1 : 0;
320
321         } // if child count == 0
322

323         //
324
// Lets loop through the children in the array and move our way
325
// through the states. Note that we use the fElemMap array to map
326
// an element index to a state index.
327
//
328
int curState = 0;
329         for (int childIndex = 0; childIndex < length; childIndex++)
330         {
331             // Get the current element index out
332
final QName curElem = children[offset + childIndex];
333             // ignore mixed text
334
if (fMixed && curElem.localpart == null) {
335                 continue;
336             }
337
338             // Look up this child in our element map
339
int elemIndex = 0;
340             for (; elemIndex < fElemMapSize; elemIndex++)
341             {
342                 int type = fElemMapType[elemIndex] & 0x0f ;
343                 if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
344                     //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
345
if (fElemMap[elemIndex].rawname == curElem.rawname) {
346                         break;
347                     }
348                 }
349                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
350                     String JavaDoc uri = fElemMap[elemIndex].uri;
351                     if (uri == null || uri == curElem.uri) {
352                         break;
353                     }
354                 }
355                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
356                     if (curElem.uri == null) {
357                         break;
358                     }
359                 }
360                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
361                     if (fElemMap[elemIndex].uri != curElem.uri) {
362                         break;
363                     }
364                 }
365             }
366
367             // If we didn't find it, then obviously not valid
368
if (elemIndex == fElemMapSize) {
369                 if (DEBUG_VALIDATE_CONTENT) {
370                     System.out.println("!!! didn't find it");
371
372                     System.out.println("curElem : " +curElem );
373                     for (int i=0; i<fElemMapSize; i++) {
374                         System.out.println("fElemMap["+i+"] = " +fElemMap[i] );
375                         System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] );
376                     }
377                 }
378
379                 return childIndex;
380             }
381
382             //
383
// Look up the next state for this input symbol when in the
384
// current state.
385
//
386
curState = fTransTable[curState][elemIndex];
387
388             // If its not a legal transition, then invalid
389
if (curState == -1) {
390                 if (DEBUG_VALIDATE_CONTENT)
391                     System.out.println("!!! not a legal transition");
392                 return childIndex;
393             }
394         }
395
396         //
397
// We transitioned all the way through the input list. However, that
398
// does not mean that we ended in a final state. So check whether
399
// our ending state is a final state.
400
//
401
if (DEBUG_VALIDATE_CONTENT)
402             System.out.println("curState="+curState+", childCount="+length);
403         if (!fFinalStateFlags[curState])
404             return length;
405
406         // success!
407
return -1;
408     } // validate
409

410
411     //
412
// Private methods
413
//
414

415     /**
416      * Builds the internal DFA transition table from the given syntax tree.
417      *
418      * @param syntaxTree The syntax tree.
419      *
420      * @exception CMException Thrown if DFA cannot be built.
421      */

422     private void buildDFA(CMNode syntaxTree)
423     {
424         //
425
// The first step we need to take is to rewrite the content model
426
// using our CMNode objects, and in the process get rid of any
427
// repetition short cuts, converting them into '*' style repetitions
428
// or getting rid of repetitions altogether.
429
//
430
// The conversions done are:
431
//
432
// x+ -> (x|x*)
433
// x? -> (x|epsilon)
434
//
435
// This is a relatively complex scenario. What is happening is that
436
// we create a top level binary node of which the special EOC value
437
// is set as the right side node. The the left side is set to the
438
// rewritten syntax tree. The source is the original content model
439
// info from the decl pool. The rewrite is done by buildSyntaxTree()
440
// which recurses the decl pool's content of the element and builds
441
// a new tree in the process.
442
//
443
// Note that, during this operation, we set each non-epsilon leaf
444
// node's DFA state position and count the number of such leafs, which
445
// is left in the fLeafCount member.
446
//
447
// The nodeTmp object is passed in just as a temp node to use during
448
// the recursion. Otherwise, we'd have to create a new node on every
449
// level of recursion, which would be piggy in Java (as is everything
450
// for that matter.)
451
//
452

453     /* MODIFIED (Jan, 2001)
454      *
455      * Use following rules.
456      * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x)
457      * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x)
458      *
459      * The same computation of follow as x* is applied to x+
460      *
461      * The modification drastically reduces computation time of
462      * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
463      */

464
465         fQName.setValues(null, fEOCString, fEOCString, null);
466         CMLeaf nodeEOC = new CMLeaf(fQName);
467         fHeadNode = new CMBinOp
468         (
469             XMLContentSpec.CONTENTSPECNODE_SEQ
470             , syntaxTree
471             , nodeEOC
472         );
473
474         //
475
// And handle specially the EOC node, which also must be numbered
476
// and counted as a non-epsilon leaf node. It could not be handled
477
// in the above tree build because it was created before all that
478
// started. We save the EOC position since its used during the DFA
479
// building loop.
480
//
481
fEOCPos = fLeafCount;
482         nodeEOC.setPosition(fLeafCount++);
483
484         //
485
// Ok, so now we have to iterate the new tree and do a little more
486
// work now that we know the leaf count. One thing we need to do is
487
// to calculate the first and last position sets of each node. This
488
// is cached away in each of the nodes.
489
//
490
// Along the way we also set the leaf count in each node as the
491
// maximum state count. They must know this in order to create their
492
// first/last pos sets.
493
//
494
// We also need to build an array of references to the non-epsilon
495
// leaf nodes. Since we iterate it in the same way as before, this
496
// will put them in the array according to their position values.
497
//
498
fLeafList = new CMLeaf[fLeafCount];
499         fLeafListType = new int[fLeafCount];
500         postTreeBuildInit(fHeadNode, 0);
501
502         //
503
// And, moving onward... We now need to build the follow position
504
// sets for all the nodes. So we allocate an array of state sets,
505
// one for each leaf node (i.e. each DFA position.)
506
//
507
fFollowList = new CMStateSet[fLeafCount];
508         for (int index = 0; index < fLeafCount; index++)
509             fFollowList[index] = new CMStateSet(fLeafCount);
510         calcFollowList(fHeadNode);
511         //
512
// And finally the big push... Now we build the DFA using all the
513
// states and the tree we've built up. First we set up the various
514
// data structures we are going to use while we do this.
515
//
516
// First of all we need an array of unique element names in our
517
// content model. For each transition table entry, we need a set of
518
// contiguous indices to represent the transitions for a particular
519
// input element. So we need to a zero based range of indexes that
520
// map to element types. This element map provides that mapping.
521
//
522
fElemMap = new QName[fLeafCount];
523         fElemMapType = new int[fLeafCount];
524         fElemMapSize = 0;
525         for (int outIndex = 0; outIndex < fLeafCount; outIndex++)
526         {
527             fElemMap[outIndex] = new QName();
528
529             /****
530             if ( (fLeafListType[outIndex] & 0x0f) != 0 ) {
531                 if (fLeafNameTypeVector == null) {
532                     fLeafNameTypeVector = new ContentLeafNameTypeVector();
533                 }
534             }
535             /****/

536
537             // Get the current leaf's element index
538
final QName element = fLeafList[outIndex].getElement();
539
540             // See if the current leaf node's element index is in the list
541
int inIndex = 0;
542             for (; inIndex < fElemMapSize; inIndex++)
543             {
544                 if (fElemMap[inIndex].rawname == element.rawname) {
545                     break;
546                 }
547             }
548
549             // If it was not in the list, then add it, if not the EOC node
550
if (inIndex == fElemMapSize) {
551                 fElemMap[fElemMapSize].setValues(element);
552                 fElemMapType[fElemMapSize] = fLeafListType[outIndex];
553                 fElemMapSize++;
554             }
555         }
556         // set up the fLeafNameTypeVector object if there is one.
557
/*****
558         if (fLeafNameTypeVector != null) {
559             fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
560         }
561
562     /***
563     * Optimization(Jan, 2001); We sort fLeafList according to
564     * elemIndex which is *uniquely* associated to each leaf.
565     * We are *assuming* that each element appears in at least one leaf.
566     **/

567
568     int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
569     int fSortCount = 0;
570
571     for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
572         for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
573             final QName leaf = fLeafList[leafIndex].getElement();
574             final QName element = fElemMap[elemIndex];
575             if (leaf.rawname == element.rawname) {
576                 fLeafSorter[fSortCount++] = leafIndex;
577             }
578         }
579         fLeafSorter[fSortCount++] = -1;
580     }
581
582     /* Optimization(Jan, 2001) */
583
584         //
585
// Next lets create some arrays, some that that hold transient
586
// information during the DFA build and some that are permament.
587
// These are kind of sticky since we cannot know how big they will
588
// get, but we don't want to use any Java collections because of
589
// performance.
590
//
591
// Basically they will probably be about fLeafCount*2 on average,
592
// but can be as large as 2^(fLeafCount*2), worst case. So we start
593
// with fLeafCount*4 as a middle ground. This will be very unlikely
594
// to ever have to expand, though it if does, the overhead will be
595
// somewhat ugly.
596
//
597
int curArraySize = fLeafCount * 4;
598         CMStateSet[] statesToDo = new CMStateSet[curArraySize];
599         fFinalStateFlags = new boolean[curArraySize];
600         fTransTable = new int[curArraySize][];
601
602         //
603
// Ok we start with the initial set as the first pos set of the
604
// head node (which is the seq node that holds the content model
605
// and the EOC node.)
606
//
607
CMStateSet setT = fHeadNode.firstPos();
608
609         //
610
// Init our two state flags. Basically the unmarked state counter
611
// is always chasing the current state counter. When it catches up,
612
// that means we made a pass through that did not add any new states
613
// to the lists, at which time we are done. We could have used a
614
// expanding array of flags which we used to mark off states as we
615
// complete them, but this is easier though less readable maybe.
616
//
617
int unmarkedState = 0;
618         int curState = 0;
619
620         //
621
// Init the first transition table entry, and put the initial state
622
// into the states to do list, then bump the current state.
623
//
624
fTransTable[curState] = makeDefStateList();
625         statesToDo[curState] = setT;
626         curState++;
627
628         /* Optimization(Jan, 2001); This is faster for
629          * a large content model such as, "(t001+|t002+|.... |t500+)".
630          */

631
632     java.util.Hashtable JavaDoc stateTable = new java.util.Hashtable JavaDoc();
633
634         /* Optimization(Jan, 2001) */
635
636         //
637
// Ok, almost done with the algorithm... We now enter the
638
// loop where we go until the states done counter catches up with
639
// the states to do counter.
640
//
641
while (unmarkedState < curState)
642         {
643             //
644
// Get the first unmarked state out of the list of states to do.
645
// And get the associated transition table entry.
646
//
647
setT = statesToDo[unmarkedState];
648             int[] transEntry = fTransTable[unmarkedState];
649
650             // Mark this one final if it contains the EOC state
651
fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
652
653             // Bump up the unmarked state count, marking this state done
654
unmarkedState++;
655
656             // Loop through each possible input symbol in the element map
657
CMStateSet newSet = null;
658         /* Optimization(Jan, 2001) */
659             int sorterIndex = 0;
660         /* Optimization(Jan, 2001) */
661             for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
662             {
663                 //
664
// Build up a set of states which is the union of all of
665
// the follow sets of DFA positions that are in the current
666
// state. If we gave away the new set last time through then
667
// create a new one. Otherwise, zero out the existing one.
668
//
669
if (newSet == null)
670                     newSet = new CMStateSet(fLeafCount);
671                 else
672                     newSet.zeroBits();
673
674         /* Optimization(Jan, 2001) */
675                 int leafIndex = fLeafSorter[sorterIndex++];
676
677                 while (leafIndex != -1) {
678             // If this leaf index (DFA position) is in the current set...
679
if (setT.getBit(leafIndex))
680                     {
681                         //
682
// If this leaf is the current input symbol, then we
683
// want to add its follow list to the set of states to
684
// transition to from the current state.
685
//
686
newSet.union(fFollowList[leafIndex]);
687                             }
688
689                    leafIndex = fLeafSorter[sorterIndex++];
690     }
691         /* Optimization(Jan, 2001) */
692
693                 //
694
// If this new set is not empty, then see if its in the list
695
// of states to do. If not, then add it.
696
//
697
if (!newSet.isEmpty())
698                 {
699                     //
700
// Search the 'states to do' list to see if this new
701
// state set is already in there.
702
//
703

704         /* Optimization(Jan, 2001) */
705         Integer JavaDoc stateObj = (Integer JavaDoc)stateTable.get(newSet);
706         int stateIndex = (stateObj == null ? curState : stateObj.intValue());
707         /* Optimization(Jan, 2001) */
708
709                     // If we did not find it, then add it
710
if (stateIndex == curState)
711                     {
712                         //
713
// Put this new state into the states to do and init
714
// a new entry at the same index in the transition
715
// table.
716
//
717
statesToDo[curState] = newSet;
718                         fTransTable[curState] = makeDefStateList();
719
720         /* Optimization(Jan, 2001) */
721                         stateTable.put(newSet, new Integer JavaDoc(curState));
722         /* Optimization(Jan, 2001) */
723
724                         // We now have a new state to do so bump the count
725
curState++;
726
727                         //
728
// Null out the new set to indicate we adopted it.
729
// This will cause the creation of a new set on the
730
// next time around the loop.
731
//
732
newSet = null;
733                     }
734
735                     //
736
// Now set this state in the transition table's entry
737
// for this element (using its index), with the DFA
738
// state we will move to from the current state when we
739
// see this input element.
740
//
741
transEntry[elemIndex] = stateIndex;
742
743                     // Expand the arrays if we're full
744
if (curState == curArraySize)
745                     {
746                         //
747
// Yikes, we overflowed the initial array size, so
748
// we've got to expand all of these arrays. So adjust
749
// up the size by 50% and allocate new arrays.
750
//
751
final int newSize = (int)(curArraySize * 1.5);
752                         CMStateSet[] newToDo = new CMStateSet[newSize];
753                         boolean[] newFinalFlags = new boolean[newSize];
754                         int[][] newTransTable = new int[newSize][];
755
756                         // Copy over all of the existing content
757
for (int expIndex = 0; expIndex < curArraySize; expIndex++)
758                         {
759                             newToDo[expIndex] = statesToDo[expIndex];
760                             newFinalFlags[expIndex] = fFinalStateFlags[expIndex];
761                             newTransTable[expIndex] = fTransTable[expIndex];
762                         }
763
764                         // Store the new array size
765
curArraySize = newSize;
766                         statesToDo = newToDo;
767                         fFinalStateFlags = newFinalFlags;
768                         fTransTable = newTransTable;
769                     }
770                 }
771             }
772         }
773
774         // Check to see if we can set the fEmptyContentIsValid flag.
775
fEmptyContentIsValid = ((CMBinOp)fHeadNode).getLeft().isNullable();
776
777         //
778
// And now we can say bye bye to the temp representation since we've
779
// built the DFA.
780
//
781
if (DEBUG_VALIDATE_CONTENT)
782             dumpTree(fHeadNode, 0);
783         fHeadNode = null;
784         fLeafList = null;
785         fFollowList = null;
786
787     }
788
789     /**
790      * Calculates the follow list of the current node.
791      *
792      * @param nodeCur The curent node.
793      *
794      * @exception CMException Thrown if follow list cannot be calculated.
795      */

796     private void calcFollowList(CMNode nodeCur)
797     {
798         // Recurse as required
799
if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
800         {
801             // Recurse only
802
calcFollowList(((CMBinOp)nodeCur).getLeft());
803             calcFollowList(((CMBinOp)nodeCur).getRight());
804         }
805          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)
806         {
807             // Recurse first
808
calcFollowList(((CMBinOp)nodeCur).getLeft());
809             calcFollowList(((CMBinOp)nodeCur).getRight());
810
811             //
812
// Now handle our level. We use our left child's last pos
813
// set and our right child's first pos set, so go ahead and
814
// get them ahead of time.
815
//
816
final CMStateSet last = ((CMBinOp)nodeCur).getLeft().lastPos();
817             final CMStateSet first = ((CMBinOp)nodeCur).getRight().firstPos();
818
819             //
820
// Now, for every position which is in our left child's last set
821
// add all of the states in our right child's first set to the
822
// follow set for that position.
823
//
824
for (int index = 0; index < fLeafCount; index++)
825             {
826                 if (last.getBit(index))
827                     fFollowList[index].union(first);
828             }
829         }
830          /***
831          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
832         {
833             // Recurse first
834             calcFollowList(((CMUniOp)nodeCur).getChild());
835
836             //
837             // Now handle our level. We use our own first and last position
838             // sets, so get them up front.
839             //
840             final CMStateSet first = nodeCur.firstPos();
841             final CMStateSet last = nodeCur.lastPos();
842
843             //
844             // For every position which is in our last position set, add all
845             // of our first position states to the follow set for that
846             // position.
847             //
848             for (int index = 0; index < fLeafCount; index++)
849             {
850                 if (last.getBit(index))
851                     fFollowList[index].union(first);
852             }
853         }
854          else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
855               || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE))
856         {
857             throw new RuntimeException("ImplementationMessages.VAL_NIICM");
858         }
859         /***/

860          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
861         || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
862         {
863             // Recurse first
864
calcFollowList(((CMUniOp)nodeCur).getChild());
865
866             //
867
// Now handle our level. We use our own first and last position
868
// sets, so get them up front.
869
//
870
final CMStateSet first = nodeCur.firstPos();
871             final CMStateSet last = nodeCur.lastPos();
872
873             //
874
// For every position which is in our last position set, add all
875
// of our first position states to the follow set for that
876
// position.
877
//
878
for (int index = 0; index < fLeafCount; index++)
879             {
880                 if (last.getBit(index))
881                     fFollowList[index].union(first);
882             }
883         }
884        
885         else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
886             // Recurse only
887
calcFollowList(((CMUniOp)nodeCur).getChild());
888         }
889          /***/
890     }
891
892     /**
893      * Dumps the tree of the current node to standard output.
894      *
895      * @param nodeCur The current node.
896      * @param level The maximum levels to output.
897      *
898      * @exception CMException Thrown on error.
899      */

900     private void dumpTree(CMNode nodeCur, int level)
901     {
902         for (int index = 0; index < level; index++)
903             System.out.print(" ");
904
905         int type = nodeCur.type();
906         if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
907         || (type == XMLContentSpec.CONTENTSPECNODE_SEQ))
908         {
909             if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
910                 System.out.print("Choice Node ");
911             else
912                 System.out.print("Seq Node ");
913
914             if (nodeCur.isNullable())
915                 System.out.print("Nullable ");
916
917             System.out.print("firstPos=");
918             System.out.print(nodeCur.firstPos().toString());
919             System.out.print(" lastPos=");
920             System.out.println(nodeCur.lastPos().toString());
921
922             dumpTree(((CMBinOp)nodeCur).getLeft(), level+1);
923             dumpTree(((CMBinOp)nodeCur).getRight(), level+1);
924         }
925          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
926         {
927             System.out.print("Rep Node ");
928
929             if (nodeCur.isNullable())
930                 System.out.print("Nullable ");
931
932             System.out.print("firstPos=");
933             System.out.print(nodeCur.firstPos().toString());
934             System.out.print(" lastPos=");
935             System.out.println(nodeCur.lastPos().toString());
936
937             dumpTree(((CMUniOp)nodeCur).getChild(), level+1);
938         }
939          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
940         {
941             System.out.print
942             (
943                 "Leaf: (pos="
944                 + ((CMLeaf)nodeCur).getPosition()
945                 + "), "
946                 + ((CMLeaf)nodeCur).getElement()
947                 + "(elemIndex="
948                 + ((CMLeaf)nodeCur).getElement()
949                 + ") "
950             );
951
952             if (nodeCur.isNullable())
953                 System.out.print(" Nullable ");
954
955             System.out.print("firstPos=");
956             System.out.print(nodeCur.firstPos().toString());
957             System.out.print(" lastPos=");
958             System.out.println(nodeCur.lastPos().toString());
959         }
960          else
961         {
962             throw new RuntimeException JavaDoc("ImplementationMessages.VAL_NIICM");
963         }
964     }
965
966
967     /**
968      * -1 is used to represent bad transitions in the transition table
969      * entry for each state. So each entry is initialized to an all -1
970      * array. This method creates a new entry and initializes it.
971      */

972     private int[] makeDefStateList()
973     {
974         int[] retArray = new int[fElemMapSize];
975         for (int index = 0; index < fElemMapSize; index++)
976             retArray[index] = -1;
977         return retArray;
978     }
979
980     /** Post tree build initialization. */
981     private int postTreeBuildInit(CMNode nodeCur, int curIndex)
982     {
983         // Set the maximum states on this node
984
nodeCur.setMaxStates(fLeafCount);
985
986         // Recurse as required
987
if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY ||
988             (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL ||
989             (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
990             // REVISIT: Don't waste these structures.
991
QName qname = new QName(null, null, null, ((CMAny)nodeCur).getURI());
992             fLeafList[curIndex] = new CMLeaf(qname, ((CMAny)nodeCur).getPosition());
993             fLeafListType[curIndex] = nodeCur.type();
994             curIndex++;
995         }
996         else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
997         || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ))
998         {
999             curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getLeft(), curIndex);
1000            curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getRight(), curIndex);
1001        }
1002         else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
1003         || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE
1004         || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE)
1005        {
1006            curIndex = postTreeBuildInit(((CMUniOp)nodeCur).getChild(), curIndex);
1007        }
1008         else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
1009        {
1010            //
1011
// Put this node in the leaf list at the current index if its
1012
// a non-epsilon leaf.
1013
//
1014
final QName node = ((CMLeaf)nodeCur).getElement();
1015            if (node.localpart != fEpsilonString) {
1016                fLeafList[curIndex] = (CMLeaf)nodeCur;
1017                fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
1018                curIndex++;
1019            }
1020        }
1021         else
1022        {
1023            throw new RuntimeException JavaDoc("ImplementationMessages.VAL_NIICM: type="+nodeCur.type());
1024        }
1025        return curIndex;
1026    }
1027
1028} // class DFAContentModel
1029
Popular Tags