DFAContentModel


1   /*
2    * Copyright 1999-2002,2004 The Apache Software Foundation.
3    * 
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * 
8    *      http://www.apache.org/licenses/LICENSE-2.0
9    * 
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  
17  package org.apache.xerces.impl.dtd.models;
18  
19  import org.apache.xerces.xni.QName;
20  import org.apache.xerces.impl.dtd.XMLContentSpec;
21  
22  /**
23   * DFAContentModel is the derivative of ContentModel that does
24   * all of the non-trivial element content validation. This class does 
25   * the conversion from the regular expression to the DFA that 
26   * it then uses in its validation algorithm.
27   * <p>
28   * <b>Note:</b> Upstream work insures that this class will never see
29   * a content model with PCDATA in it. Any model with PCDATA is 'mixed' 
30   * and is handled via the MixedContentModel class since mixed models 
31   * are very constrained in form and easily handled via a special case. 
32   * This also makes implementation of this class much easier.
33   * 
34   * @xerces.internal
35   * 
36   * @version $Id: DFAContentModel.java,v 1.7 2004/10/04 22:00:42 mrglavas Exp $
37   */
38  public class DFAContentModel
39      implements ContentModelValidator {
40  
41      //
42      // Constants
43      //
44      // special strings
45  
46      /** Epsilon string. */
47      private static String   fEpsilonString = "<<CMNODE_EPSILON>>";
48  
49      /** End-of-content string. */
50      private static String   fEOCString = "<<CMNODE_EOC>>";
51  
52      /** initializing static members **/
53      static {
54          fEpsilonString = fEpsilonString.intern();
55          fEOCString = fEOCString.intern();
56      }
57  
58      // debugging
59  
60      /** Set to true to debug content model validation. */
61      private static final boolean DEBUG_VALIDATE_CONTENT = false;
62  
63      //
64      // Data
65      //
66  
67      /* this is the EquivClassComparator object */
68      //private EquivClassComparator comparator = null;
69  
70      /**
71       * This is the map of unique input symbol elements to indices into
72       * each state's per-input symbol transition table entry. This is part
73       * of the built DFA information that must be kept around to do the
74       * actual validation.
75       */
76      private QName fElemMap[] = null;
77  
78      /**
79       * This is a map of whether the element map contains information 
80       * related to ANY models.
81       */
82      private int fElemMapType[] = null;
83  
84      /** The element map size. */
85      private int fElemMapSize = 0;
86  
87      /** Boolean to distinguish Schema Mixed Content */
88      private boolean fMixed;
89  
90      /**
91       * The NFA position of the special EOC (end of content) node. This
92       * is saved away since it's used during the DFA build.
93       */
94      private int fEOCPos = 0;
95  
96  
97      /**
98       * This is an array of booleans, one per state (there are
99       * fTransTableSize states in the DFA) that indicates whether that
100      * state is a final state.
101      */
102     private boolean fFinalStateFlags[] = null;
103 
104     /**
105      * The list of follow positions for each NFA position (i.e. for each
106      * non-epsilon leaf node.) This is only used during the building of
107      * the DFA, and is let go afterwards.
108      */
109     private CMStateSet fFollowList[] = null;
110 
111     /**
112      * This is the head node of our intermediate representation. It is
113      * only non-null during the building of the DFA (just so that it
114      * does not have to be passed all around.) Once the DFA is built,
115      * this is no longer required so its nulled out.
116      */
117     private CMNode fHeadNode = null;
118 
119     /**
120      * The count of leaf nodes. This is an important number that set some
121      * limits on the sizes of data structures in the DFA process.
122      */
123     private int fLeafCount = 0;
124 
125     /**
126      * An array of non-epsilon leaf nodes, which is used during the DFA
127      * build operation, then dropped.
128      */
129     private CMLeaf fLeafList[] = null;
130 
131     /** Array mapping ANY types to the leaf list. */
132     private int fLeafListType[] = null;
133 
134     //private ContentLeafNameTypeVector fLeafNameTypeVector = null;
135 
136     /**
137      * The string pool of our parser session. This is set during construction
138      * and kept around.
139      */
140     //private StringPool fStringPool = null;
141 
142     /**
143      * This is the transition table that is the main by product of all
144      * of the effort here. It is an array of arrays of ints. The first
145      * dimension is the number of states we end up with in the DFA. The
146      * second dimensions is the number of unique elements in the content
147      * model (fElemMapSize). Each entry in the second dimension indicates
148      * the new state given that input for the first dimension's start
149      * state.
150      * <p>
151      * The fElemMap array handles mapping from element indexes to
152      * positions in the second dimension of the transition table.
153      */
154     private int fTransTable[][] = null;
155 
156     /**
157      * The number of valid entries in the transition table, and in the other
158      * related tables such as fFinalStateFlags.
159      */
160     private int fTransTableSize = 0;
161 
162     /**
163      * Flag that indicates that even though we have a "complicated"
164      * content model, it is valid to have no content. In other words,
165      * all parts of the content model are optional. For example:
166      * <pre>
167      *      &lt;!ELEMENT AllOptional (Optional*,NotRequired?)&gt;
168      * </pre>
169      */
170     private boolean fEmptyContentIsValid = false;
171 
172     // temp variables
173 
174     /** Temporary qualified name. */
175     private QName fQName = new QName();
176 
177     //
178     // Constructors
179     //
180 
181 
182     //
183     // Constructors
184     //
185 
186     /**
187      * Constructs a DFA content model.
188      *
189      * @param syntaxTree    The syntax tree of the content model.
190      * @param leafCount     The number of leaves.
191      * @param mixed
192      *
193      */
194     public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) {
195         // Store away our index and pools in members
196         //fStringPool = stringPool;
197         fLeafCount = leafCount;
198 
199 
200         // this is for Schema Mixed Content
201         fMixed = mixed;
202 
203         //
204         //  Ok, so lets grind through the building of the DFA. This method
205         //  handles the high level logic of the algorithm, but it uses a
206         //  number of helper classes to do its thing.
207         //
208         //  In order to avoid having hundreds of references to the error and
209         //  string handlers around, this guy and all of his helper classes
210         //  just throw a simple exception and we then pass it along.
211         //
212         buildDFA(syntaxTree);
213     }
214 
215     //
216     // ContentModelValidator methods
217     //
218 
219     /**
220      * Check that the specified content is valid according to this
221      * content model. This method can also be called to do 'what if' 
222      * testing of content models just to see if they would be valid.
223      * <p>
224      * A value of -1 in the children array indicates a PCDATA node. All other 
225      * indexes will be positive and represent child elements. The count can be
226      * zero, since some elements have the EMPTY content model and that must be 
227      * confirmed.
228      *
229      * @param children The children of this element.  Each integer is an index within
230      *                 the <code>StringPool</code> of the child element name.  An index
231      *                 of -1 is used to indicate an occurrence of non-whitespace character
232      *                 data.
233      * @param offset Offset into the array where the children starts.
234      * @param length The number of entries in the <code>children</code> array.
235      *
236      * @return The value -1 if fully valid, else the 0 based index of the child
237      *         that first failed. If the value returned is equal to the number
238      *         of children, then the specified children are valid but additional
239      *         content is required to reach a valid ending state.
240      *
241      */
242     public int validate(QName[] children, int offset, int length) {
243 
244         if (DEBUG_VALIDATE_CONTENT) 
245             System.out.println("DFAContentModel#validateContent");
246 
247         //
248         // A DFA content model must *always* have at least 1 child
249         // so a failure is given if no children present.
250         // 
251         // Defect 782: This is an incorrect statement because a DFA
252         // content model is also used for constructions such as:
253         //
254         //     (Optional*,NotRequired?)
255         //
256         // where a perfectly valid content would be NO CHILDREN.
257         // Therefore, if there are no children, we must check to
258         // see if the CMNODE_EOC marker is a valid start state! -Ac
259         //
260         if (length == 0) {
261             if (DEBUG_VALIDATE_CONTENT) {
262                 System.out.println("!!! no children");
263                 System.out.println("elemMap="+fElemMap);
264                 for (int i = 0; i < fElemMap.length; i++) {
265                     String   uri = fElemMap[i].uri;
266                     String   localpart = fElemMap[i].localpart;
267                     
268                     System.out.println("fElemMap["+i+"]="+uri+","+
269                                        localpart+" ("+
270                                        uri+", "+
271                                        localpart+
272                                        ')');
273                                        
274                 }
275                 System.out.println("EOCIndex="+fEOCString);
276             }
277 
278             return fEmptyContentIsValid ? -1 : 0;
279 
280         } // if child count == 0
281 
282         //
283         //  Lets loop through the children in the array and move our way
284         //  through the states. Note that we use the fElemMap array to map
285         //  an element index to a state index.
286         //
287         int curState = 0;
288         for (int childIndex = 0; childIndex < length; childIndex++)
289         {
290             // Get the current element index out
291             final QName curElem = children[offset + childIndex];
292             // ignore mixed text
293             if (fMixed && curElem.localpart == null) {
294                 continue;
295             }
296 
297             // Look up this child in our element map
298             int elemIndex = 0;
299             for (; elemIndex < fElemMapSize; elemIndex++)
300             {
301                 int type = fElemMapType[elemIndex] & 0x0f ;
302                 if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
303                     //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
304                     if (fElemMap[elemIndex].rawname == curElem.rawname) {
305                         break;
306                     }
307                 }
308                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
309                     String   uri = fElemMap[elemIndex].uri;
310                     if (uri == null || uri == curElem.uri) {
311                         break;
312                     }
313                 }
314                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
315                     if (curElem.uri == null) {
316                         break;
317                     }
318                 }
319                 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
320                     if (fElemMap[elemIndex].uri != curElem.uri) {
321                         break;
322                     }
323                 }
324             }
325 
326             // If we didn't find it, then obviously not valid
327             if (elemIndex == fElemMapSize) {
328                 if (DEBUG_VALIDATE_CONTENT) {
329                     System.out.println("!!! didn't find it");
330 
331                     System.out.println("curElem : " +curElem );
332                     for (int i=0; i<fElemMapSize; i++) {
333                         System.out.println("fElemMap["+i+"] = " +fElemMap[i] );
334                         System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] );
335                     }
336                 }
337 
338                 return childIndex;
339             }
340 
341             //
342             //  Look up the next state for this input symbol when in the
343             //  current state.
344             //
345             curState = fTransTable[curState][elemIndex];
346 
347             // If its not a legal transition, then invalid
348             if (curState == -1) {
349                 if (DEBUG_VALIDATE_CONTENT) 
350                     System.out.println("!!! not a legal transition");
351                 return childIndex;
352             }
353         }
354 
355         //
356         //  We transitioned all the way through the input list. However, that
357         //  does not mean that we ended in a final state. So check whether
358         //  our ending state is a final state.
359         //
360         if (DEBUG_VALIDATE_CONTENT) 
361             System.out.println("curState="+curState+", childCount="+length);
362         if (!fFinalStateFlags[curState])
363             return length;
364 
365         // success!
366         return -1;
367     } // validate
368 
369 
370     //
371     // Private methods
372     //
373 
374     /** 
375      * Builds the internal DFA transition table from the given syntax tree.
376      *
377      * @param syntaxTree The syntax tree.
378      *
379      * @exception CMException Thrown if DFA cannot be built.
380      */
381     private void buildDFA(CMNode syntaxTree) 
382     {
383         //
384         //  The first step we need to take is to rewrite the content model
385         //  using our CMNode objects, and in the process get rid of any
386         //  repetition short cuts, converting them into '*' style repetitions
387         //  or getting rid of repetitions altogether.
388         //
389         //  The conversions done are:
390         //
391         //  x+ -> (x|x*)
392         //  x? -> (x|epsilon)
393         //
394         //  This is a relatively complex scenario. What is happening is that
395         //  we create a top level binary node of which the special EOC value
396         //  is set as the right side node. The the left side is set to the
397         //  rewritten syntax tree. The source is the original content model
398         //  info from the decl pool. The rewrite is done by buildSyntaxTree()
399         //  which recurses the decl pool's content of the element and builds
400         //  a new tree in the process.
401         //
402         //  Note that, during this operation, we set each non-epsilon leaf
403         //  node's DFA state position and count the number of such leafs, which
404         //  is left in the fLeafCount member.
405         //
406         //  The nodeTmp object is passed in just as a temp node to use during
407         //  the recursion. Otherwise, we'd have to create a new node on every
408         //  level of recursion, which would be piggy in Java (as is everything
409         //  for that matter.)
410         //
411 
412     /* MODIFIED (Jan, 2001) 
413      *
414      * Use following rules.
415      *   nullable(x+) := nullable(x), first(x+) := first(x),  last(x+) := last(x)
416      *   nullable(x?) := true, first(x?) := first(x),  last(x?) := last(x)
417      *
418      * The same computation of follow as x* is applied to x+
419      *
420      * The modification drastically reduces computation time of
421      * "(a, (b, a+, (c, (b, a+)+, a+, (d,  (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
422      */
423 
424         fQName.setValues(null, fEOCString, fEOCString, null);
425         CMLeaf nodeEOC = new CMLeaf(fQName);
426         fHeadNode = new CMBinOp
427         (
428             XMLContentSpec.CONTENTSPECNODE_SEQ
429             , syntaxTree
430             , nodeEOC
431         );
432 
433         //
434         //  And handle specially the EOC node, which also must be numbered
435         //  and counted as a non-epsilon leaf node. It could not be handled
436         //  in the above tree build because it was created before all that
437         //  started. We save the EOC position since its used during the DFA
438         //  building loop.
439         //
440         fEOCPos = fLeafCount;
441         nodeEOC.setPosition(fLeafCount++);
442 
443         //
444         //  Ok, so now we have to iterate the new tree and do a little more
445         //  work now that we know the leaf count. One thing we need to do is
446         //  to calculate the first and last position sets of each node. This
447         //  is cached away in each of the nodes.
448         //
449         //  Along the way we also set the leaf count in each node as the
450         //  maximum state count. They must know this in order to create their
451         //  first/last pos sets.
452         //
453         //  We also need to build an array of references to the non-epsilon
454         //  leaf nodes. Since we iterate it in the same way as before, this
455         //  will put them in the array according to their position values.
456         //
457         fLeafList = new CMLeaf[fLeafCount];
458         fLeafListType = new int[fLeafCount];
459         postTreeBuildInit(fHeadNode, 0);
460 
461         //
462         //  And, moving onward... We now need to build the follow position
463         //  sets for all the nodes. So we allocate an array of state sets,
464         //  one for each leaf node (i.e. each DFA position.)
465         //
466         fFollowList = new CMStateSet[fLeafCount];
467         for (int index = 0; index < fLeafCount; index++)
468             fFollowList[index] = new CMStateSet(fLeafCount);
469         calcFollowList(fHeadNode);
470         //
471         //  And finally the big push... Now we build the DFA using all the
472         //  states and the tree we've built up. First we set up the various
473         //  data structures we are going to use while we do this.
474         //
475         //  First of all we need an array of unique element names in our
476         //  content model. For each transition table entry, we need a set of
477         //  contiguous indices to represent the transitions for a particular
478         //  input element. So we need to a zero based range of indexes that
479         //  map to element types. This element map provides that mapping.
480         //
481         fElemMap = new QName[fLeafCount];
482         fElemMapType = new int[fLeafCount];
483         fElemMapSize = 0;
484         for (int outIndex = 0; outIndex < fLeafCount; outIndex++)
485         {
486             fElemMap[outIndex] = new QName();
487 
488             /****
489             if ( (fLeafListType[outIndex] & 0x0f) != 0 ) {
490                 if (fLeafNameTypeVector == null) {
491                     fLeafNameTypeVector = new ContentLeafNameTypeVector();
492                 }
493             }
494             /****/
495 
496             // Get the current leaf's element index
497             final QName element = fLeafList[outIndex].getElement();
498 
499             // See if the current leaf node's element index is in the list
500             int inIndex = 0;
501             for (; inIndex < fElemMapSize; inIndex++)
502             {
503                 if (fElemMap[inIndex].rawname == element.rawname) {
504                     break;
505                 }
506             }
507 
508             // If it was not in the list, then add it, if not the EOC node
509             if (inIndex == fElemMapSize) {
510                 fElemMap[fElemMapSize].setValues(element);
511                 fElemMapType[fElemMapSize] = fLeafListType[outIndex];
512                 fElemMapSize++;
513             }
514         }
515         // set up the fLeafNameTypeVector object if there is one.
516         /*****
517         if (fLeafNameTypeVector != null) {
518             fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize);
519         }
520 
521     /*** 
522     * Optimization(Jan, 2001); We sort fLeafList according to 
523     * elemIndex which is *uniquely* associated to each leaf.  
524     * We are *assuming* that each element appears in at least one leaf.
525     **/
526 
527     int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
528     int fSortCount = 0;
529 
530     for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
531         for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
532             final QName leaf = fLeafList[leafIndex].getElement();
533             final QName element = fElemMap[elemIndex];
534             if (leaf.rawname == element.rawname) {
535                 fLeafSorter[fSortCount++] = leafIndex;
536             }
537         }
538         fLeafSorter[fSortCount++] = -1;
539     }
540 
541     /* Optimization(Jan, 2001) */
542 
543         //
544         //  Next lets create some arrays, some that that hold transient
545         //  information during the DFA build and some that are permament.
546         //  These are kind of sticky since we cannot know how big they will
547         //  get, but we don't want to use any Java collections because of
548         //  performance.
549         //
550         //  Basically they will probably be about fLeafCount*2 on average,
551         //  but can be as large as 2^(fLeafCount*2), worst case. So we start
552         //  with fLeafCount*4 as a middle ground. This will be very unlikely
553         //  to ever have to expand, though it if does, the overhead will be
554         //  somewhat ugly.
555         //
556         int curArraySize = fLeafCount * 4;
557         CMStateSet[] statesToDo = new CMStateSet[curArraySize];
558         fFinalStateFlags = new boolean[curArraySize];
559         fTransTable = new int[curArraySize][];
560 
561         //
562         //  Ok we start with the initial set as the first pos set of the
563         //  head node (which is the seq node that holds the content model
564         //  and the EOC node.)
565         //
566         CMStateSet setT = fHeadNode.firstPos();
567 
568         //
569         //  Init our two state flags. Basically the unmarked state counter
570         //  is always chasing the current state counter. When it catches up,
571         //  that means we made a pass through that did not add any new states
572         //  to the lists, at which time we are done. We could have used a
573         //  expanding array of flags which we used to mark off states as we
574         //  complete them, but this is easier though less readable maybe.
575         //
576         int unmarkedState = 0;
577         int curState = 0;
578 
579         //
580         //  Init the first transition table entry, and put the initial state
581         //  into the states to do list, then bump the current state.
582         //
583         fTransTable[curState] = makeDefStateList();
584         statesToDo[curState] = setT;
585         curState++;
586 
587         /* Optimization(Jan, 2001); This is faster for
588          * a large content model such as, "(t001+|t002+|.... |t500+)".
589          */
590 
591     java.util.Hashtable   stateTable = new java.util.Hashtable  ();
592 
593         /* Optimization(Jan, 2001) */
594 
595         //
596         //  Ok, almost done with the algorithm... We now enter the
597         //  loop where we go until the states done counter catches up with
598         //  the states to do counter.
599         //
600         while (unmarkedState < curState)
601         {
602             //
603             //  Get the first unmarked state out of the list of states to do.
604             //  And get the associated transition table entry.
605             //
606             setT = statesToDo[unmarkedState];
607             int[] transEntry = fTransTable[unmarkedState];
608 
609             // Mark this one final if it contains the EOC state
610             fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos);
611 
612             // Bump up the unmarked state count, marking this state done
613             unmarkedState++;
614 
615             // Loop through each possible input symbol in the element map
616             CMStateSet newSet = null;
617         /* Optimization(Jan, 2001) */
618             int sorterIndex = 0;
619         /* Optimization(Jan, 2001) */
620             for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++)
621             {
622                 //
623                 //  Build up a set of states which is the union of all of
624                 //  the follow sets of DFA positions that are in the current
625                 //  state. If we gave away the new set last time through then
626                 //  create a new one. Otherwise, zero out the existing one.
627                 //
628                 if (newSet == null)
629                     newSet = new CMStateSet(fLeafCount);
630                 else
631                     newSet.zeroBits();
632 
633         /* Optimization(Jan, 2001) */
634                 int leafIndex = fLeafSorter[sorterIndex++];
635 
636                 while (leafIndex != -1) {
637             // If this leaf index (DFA position) is in the current set...
638                     if (setT.getBit(leafIndex))
639                     {
640                         //
641                         //  If this leaf is the current input symbol, then we
642                         //  want to add its follow list to the set of states to
643                         //  transition to from the current state.
644                         //
645                                 newSet.union(fFollowList[leafIndex]);
646                             }
647 
648                    leafIndex = fLeafSorter[sorterIndex++];
649     }
650         /* Optimization(Jan, 2001) */
651 
652                 //
653                 //  If this new set is not empty, then see if its in the list
654                 //  of states to do. If not, then add it.
655                 //
656                 if (!newSet.isEmpty())
657                 {
658                     //
659                     //  Search the 'states to do' list to see if this new
660                     //  state set is already in there.
661                     //
662 
663         /* Optimization(Jan, 2001) */
664         Integer   stateObj = (Integer  )stateTable.get(newSet);
665         int stateIndex = (stateObj == null ? curState : stateObj.intValue());
666         /* Optimization(Jan, 2001) */
667 
668                     // If we did not find it, then add it
669                     if (stateIndex == curState)
670                     {
671                         //
672                         //  Put this new state into the states to do and init
673                         //  a new entry at the same index in the transition
674                         //  table.
675                         //
676                         statesToDo[curState] = newSet;
677                         fTransTable[curState] = makeDefStateList();
678 
679         /* Optimization(Jan, 2001) */
680                         stateTable.put(newSet, new Integer  (curState));
681         /* Optimization(Jan, 2001) */
682 
683                         // We now have a new state to do so bump the count
684                         curState++;
685 
686                         //
687                         //  Null out the new set to indicate we adopted it.
688                         //  This will cause the creation of a new set on the
689                         //  next time around the loop.
690                         //
691                         newSet = null;
692                     }
693 
694                     //
695                     //  Now set this state in the transition table's entry
696                     //  for this element (using its index), with the DFA
697                     //  state we will move to from the current state when we
698                     //  see this input element.
699                     //
700                     transEntry[elemIndex] = stateIndex;
701 
702                     // Expand the arrays if we're full
703                     if (curState == curArraySize)
704                     {
705                         //
706                         //  Yikes, we overflowed the initial array size, so
707                         //  we've got to expand all of these arrays. So adjust
708                         //  up the size by 50% and allocate new arrays.
709                         //
710                         final int newSize = (int)(curArraySize * 1.5);
711                         CMStateSet[] newToDo = new CMStateSet[newSize];
712                         boolean[] newFinalFlags = new boolean[newSize];
713                         int[][] newTransTable = new int[newSize][];
714 
715                         // Copy over all of the existing content
716                         for (int expIndex = 0; expIndex < curArraySize; expIndex++)
717                         {
718                             newToDo[expIndex] = statesToDo[expIndex];
719                             newFinalFlags[expIndex] = fFinalStateFlags[expIndex];
720                             newTransTable[expIndex] = fTransTable[expIndex];
721                         }
722 
723                         // Store the new array size
724                         curArraySize = newSize;
725                         statesToDo = newToDo;
726                         fFinalStateFlags = newFinalFlags;
727                         fTransTable = newTransTable;
728                     }
729                 }
730             }
731         }
732 
733         // Check to see if we can set the fEmptyContentIsValid flag.
734         fEmptyContentIsValid = ((CMBinOp)fHeadNode).getLeft().isNullable();
735 
736         //
737         //  And now we can say bye bye to the temp representation since we've
738         //  built the DFA.
739         //
740         if (DEBUG_VALIDATE_CONTENT) 
741             dumpTree(fHeadNode, 0);
742         fHeadNode = null;
743         fLeafList = null;
744         fFollowList = null;
745 
746     }
747 
748     /**
749      * Calculates the follow list of the current node.
750      *
751      * @param nodeCur The curent node.
752      *
753      * @exception CMException Thrown if follow list cannot be calculated.
754      */
755     private void calcFollowList(CMNode nodeCur) 
756     {
757         // Recurse as required
758         if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
759         {
760             // Recurse only
761             calcFollowList(((CMBinOp)nodeCur).getLeft());
762             calcFollowList(((CMBinOp)nodeCur).getRight());
763         }
764          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)
765         {
766             // Recurse first
767             calcFollowList(((CMBinOp)nodeCur).getLeft());
768             calcFollowList(((CMBinOp)nodeCur).getRight());
769 
770             //
771             //  Now handle our level. We use our left child's last pos
772             //  set and our right child's first pos set, so go ahead and
773             //  get them ahead of time.
774             //
775             final CMStateSet last  = ((CMBinOp)nodeCur).getLeft().lastPos();
776             final CMStateSet first = ((CMBinOp)nodeCur).getRight().firstPos();
777 
778             //
779             //  Now, for every position which is in our left child's last set
780             //  add all of the states in our right child's first set to the
781             //  follow set for that position.
782             //
783             for (int index = 0; index < fLeafCount; index++)
784             {
785                 if (last.getBit(index))
786                     fFollowList[index].union(first);
787             }
788         }
789          /***
790          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
791         {
792             // Recurse first
793             calcFollowList(((CMUniOp)nodeCur).getChild());
794 
795             //
796             //  Now handle our level. We use our own first and last position
797             //  sets, so get them up front.
798             //
799             final CMStateSet first = nodeCur.firstPos();
800             final CMStateSet last  = nodeCur.lastPos();
801 
802             //
803             //  For every position which is in our last position set, add all
804             //  of our first position states to the follow set for that
805             //  position.
806             //
807             for (int index = 0; index < fLeafCount; index++)
808             {
809                 if (last.getBit(index))
810                     fFollowList[index].union(first);
811             }
812         }
813          else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
814               ||  (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE))
815         {
816             throw new RuntimeException("ImplementationMessages.VAL_NIICM");
817         }
818         /***/
819          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
820         || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE)
821         {
822             // Recurse first
823             calcFollowList(((CMUniOp)nodeCur).getChild());
824 
825             //
826             //  Now handle our level. We use our own first and last position
827             //  sets, so get them up front.
828             //
829             final CMStateSet first = nodeCur.firstPos();
830             final CMStateSet last  = nodeCur.lastPos();
831 
832             //
833             //  For every position which is in our last position set, add all
834             //  of our first position states to the follow set for that
835             //  position.
836             //
837             for (int index = 0; index < fLeafCount; index++)
838             {
839                 if (last.getBit(index))
840                     fFollowList[index].union(first);
841             }
842         }
843        
844         else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) {
845             // Recurse only
846             calcFollowList(((CMUniOp)nodeCur).getChild());
847         }
848          /***/
849     }
850 
851     /**
852      * Dumps the tree of the current node to standard output.
853      *
854      * @param nodeCur The current node.
855      * @param level   The maximum levels to output.
856      *
857      * @exception CMException Thrown on error.
858      */
859     private void dumpTree(CMNode nodeCur, int level) 
860     {
861         for (int index = 0; index < level; index++)
862             System.out.print("   ");
863 
864         int type = nodeCur.type();
865         if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
866         ||  (type == XMLContentSpec.CONTENTSPECNODE_SEQ))
867         {
868             if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE)
869                 System.out.print("Choice Node ");
870             else
871                 System.out.print("Seq Node ");
872 
873             if (nodeCur.isNullable())
874                 System.out.print("Nullable ");
875 
876             System.out.print("firstPos=");
877             System.out.print(nodeCur.firstPos().toString());
878             System.out.print(" lastPos=");
879             System.out.println(nodeCur.lastPos().toString());
880 
881             dumpTree(((CMBinOp)nodeCur).getLeft(), level+1);
882             dumpTree(((CMBinOp)nodeCur).getRight(), level+1);
883         }
884          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE)
885         {
886             System.out.print("Rep Node ");
887 
888             if (nodeCur.isNullable())
889                 System.out.print("Nullable ");
890 
891             System.out.print("firstPos=");
892             System.out.print(nodeCur.firstPos().toString());
893             System.out.print(" lastPos=");
894             System.out.println(nodeCur.lastPos().toString());
895 
896             dumpTree(((CMUniOp)nodeCur).getChild(), level+1);
897         }
898          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
899         {
900             System.out.print
901             (
902                 "Leaf: (pos="
903                 + ((CMLeaf)nodeCur).getPosition()
904                 + "), "
905                 + ((CMLeaf)nodeCur).getElement()
906                 + "(elemIndex="
907                 + ((CMLeaf)nodeCur).getElement()
908                 + ") "
909             );
910 
911             if (nodeCur.isNullable())
912                 System.out.print(" Nullable ");
913 
914             System.out.print("firstPos=");
915             System.out.print(nodeCur.firstPos().toString());
916             System.out.print(" lastPos=");
917             System.out.println(nodeCur.lastPos().toString());
918         }
919          else
920         {
921             throw new RuntimeException  ("ImplementationMessages.VAL_NIICM");
922         }
923     }
924 
925 
926     /**
927      * -1 is used to represent bad transitions in the transition table
928      * entry for each state. So each entry is initialized to an all -1
929      * array. This method creates a new entry and initializes it.
930      */
931     private int[] makeDefStateList()
932     {
933         int[] retArray = new int[fElemMapSize];
934         for (int index = 0; index < fElemMapSize; index++)
935             retArray[index] = -1;
936         return retArray;
937     }
938 
939     /** Post tree build initialization. */
940     private int postTreeBuildInit(CMNode nodeCur, int curIndex) 
941     {
942         // Set the maximum states on this node
943         nodeCur.setMaxStates(fLeafCount);
944 
945         // Recurse as required
946         if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY ||
947             (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL ||
948             (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
949             // REVISIT: Don't waste these structures.
950             QName qname = new QName(null, null, null, ((CMAny)nodeCur).getURI());
951             fLeafList[curIndex] = new CMLeaf(qname, ((CMAny)nodeCur).getPosition());
952             fLeafListType[curIndex] = nodeCur.type();
953             curIndex++;
954         }
955         else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE)
956         ||  (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ))
957         {
958             curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getLeft(), curIndex);
959             curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getRight(), curIndex);
960         }
961          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE
962          || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE
963          || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE)
964         {
965             curIndex = postTreeBuildInit(((CMUniOp)nodeCur).getChild(), curIndex);
966         }
967          else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF)
968         {
969             //
970             //  Put this node in the leaf list at the current index if its
971             //  a non-epsilon leaf.
972             //
973              final QName node = ((CMLeaf)nodeCur).getElement();
974             if (node.localpart != fEpsilonString) {
975                 fLeafList[curIndex] = (CMLeaf)nodeCur;
976                 fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF;
977                 curIndex++;
978             }
979         }
980          else
981         {
982             throw new RuntimeException  ("ImplementationMessages.VAL_NIICM: type="+nodeCur.type());
983         }
984         return curIndex;
985     }
986 
987 } // class DFAContentModel
988
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags