KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > ibm > icu > text > RBBITableBuilder


1 /*
2 **********************************************************************
3 * Copyright (c) 2002-2006, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 */

7
8 package com.ibm.icu.text;
9
10 import java.util.List JavaDoc;
11 import java.util.ArrayList JavaDoc;
12 import java.util.Set JavaDoc;
13 import java.util.HashSet JavaDoc;
14 import java.util.SortedSet JavaDoc;
15 import java.util.TreeSet JavaDoc;
16 import java.util.Iterator JavaDoc;
17 import java.util.HashMap JavaDoc;
18 import java.util.Map JavaDoc;
19 import java.util.Collection JavaDoc;
20
21 import com.ibm.icu.impl.Assert;
22 import com.ibm.icu.lang.UCharacter;
23 import com.ibm.icu.lang.UProperty;
24
25 //
26
// class RBBITableBuilder is part of the RBBI rule compiler.
27
// It builds the state transition table used by the RBBI runtime
28
// from the expression syntax tree generated by the rule scanner.
29
//
30
// This class is part of the RBBI implementation only.
31
// There is no user-visible public API here.
32
//
33
class RBBITableBuilder {
34     
35     
36     
37     //
38
// RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors,
39
// one for each state.
40
static class RBBIStateDescriptor {
41         boolean fMarked;
42         int fAccepting;
43         int fLookAhead;
44         SortedSet JavaDoc fTagVals;
45         int fTagsIdx;
46         Set JavaDoc fPositions; // Set of parse tree positions associated
47
// with this state. Unordered (it's a set).
48
// UVector contents are RBBINode *
49

50         int[] fDtran; // Transitions out of this state.
51
// indexed by input character
52
// contents is int index of dest state
53
// in RBBITableBuilder.fDStates
54

55         RBBIStateDescriptor(int maxInputSymbol) {
56             fTagVals = new TreeSet JavaDoc();
57             fPositions = new HashSet JavaDoc();
58             fDtran = new int[maxInputSymbol+1]; // fDtran needs to be pre-sized.
59
// It is indexed by input symbols, and will
60
// hold the next state number for each
61
// symbol.
62
}
63     }
64     
65     
66     private RBBIRuleBuilder fRB;
67     private int fRootIx; // The array index into RBBIRuleBuilder.fTreeRoots
68
// for the parse tree to operate on.
69
// Too bad Java can't do indirection more easily!
70

71     private List JavaDoc fDStates; // D states (Aho's terminology)
72
// Index is state number
73
// Contents are RBBIStateDescriptor pointers.
74

75     //-----------------------------------------------------------------------------
76
//
77
// Constructor for RBBITableBuilder.
78
//
79
// rootNode is an index into the array of root nodes that is held by
80
// the overall RBBIRuleBuilder.
81
//-----------------------------------------------------------------------------
82
RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx) {
83            fRootIx = rootNodeIx;
84            fRB = rb;
85            fDStates = new ArrayList JavaDoc();
86         }
87
88
89
90  
91        //-----------------------------------------------------------------------------
92
//
93
// RBBITableBuilder::build - This is the main function for building the DFA state transtion
94
// table from the RBBI rules parse tree.
95
//
96
//-----------------------------------------------------------------------------
97
void build() {
98            // If there were no rules, just return. This situation can easily arise
99
// for the reverse rules.
100
if (fRB.fTreeRoots[fRootIx]==null) {
101                return;
102            }
103
104            //
105
// Walk through the tree, replacing any references to $variables with a copy of the
106
// parse tree for the substition expression.
107
//
108
fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables();
109            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) {
110                System.out.println("Parse tree after flattening variable references.");
111                fRB.fTreeRoots[fRootIx].printTree(true);
112            }
113
114            //
115
// If the rules contained any references to {bof}
116
// add a {bof} <cat> <former root of tree> to the
117
// tree. Means that all matches must start out with the
118
// {bof} fake character.
119
//
120
if (fRB.fSetBuilder.sawBOF()) {
121                RBBINode bofTop = new RBBINode(RBBINode.opCat);
122                RBBINode bofLeaf = new RBBINode(RBBINode.leafChar);
123                bofTop.fLeftChild = bofLeaf;
124                bofTop.fRightChild = fRB.fTreeRoots[fRootIx];
125                bofLeaf.fParent = bofTop;
126                bofLeaf.fVal = 2; // Reserved value for {bof}.
127
fRB.fTreeRoots[fRootIx] = bofTop;
128            }
129
130            //
131
// Add a unique right-end marker to the expression.
132
// Appears as a cat-node, left child being the original tree,
133
// right child being the end marker.
134
//
135
RBBINode cn = new RBBINode(RBBINode.opCat);
136            cn.fLeftChild = fRB.fTreeRoots[fRootIx];
137            fRB.fTreeRoots[fRootIx].fParent = cn;
138            cn.fRightChild = new RBBINode(RBBINode.endMark);
139            cn.fRightChild.fParent = cn;
140            fRB.fTreeRoots[fRootIx] = cn;
141
142            //
143
// Replace all references to UnicodeSets with the tree for the equivalent
144
// expression.
145
//
146
fRB.fTreeRoots[fRootIx].flattenSets();
147            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) {
148                System.out.println("Parse tree after flattening Unicode Set references.");
149                fRB.fTreeRoots[fRootIx].printTree(true);
150            }
151
152
153            //
154
// calculate the functions nullable, firstpos, lastpos and followpos on
155
// nodes in the parse tree.
156
// See the alogrithm description in Aho.
157
// Understanding how this works by looking at the code alone will be
158
// nearly impossible.
159
//
160
calcNullable(fRB.fTreeRoots[fRootIx]);
161            calcFirstPos(fRB.fTreeRoots[fRootIx]);
162            calcLastPos(fRB.fTreeRoots[fRootIx]);
163            calcFollowPos(fRB.fTreeRoots[fRootIx]);
164            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) {
165                System.out.print("\n");
166                printPosSets(fRB.fTreeRoots[fRootIx]);
167            }
168
169            //
170
// For "chained" rules, modify the followPos sets
171
//
172
if (fRB.fChainRules) {
173                calcChainedFollowPos(fRB.fTreeRoots[fRootIx]);
174            }
175
176            //
177
// BOF (start of input) test fixup.
178
//
179
if (fRB.fSetBuilder.sawBOF()) {
180                bofFixup();
181            }
182
183            //
184
// Build the DFA state transition tables.
185
//
186
buildStateTable();
187            flagAcceptingStates();
188            flagLookAheadStates();
189            flagTaggedStates();
190
191            //
192
// Update the global table of rule status {tag} values
193
// The rule builder has a global vector of status values that are common
194
// for all tables. Merge the ones from this table into the global set.
195
//
196
mergeRuleStatusVals();
197
198            if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("states")>=0) {printStates();};
199        }
200
201
202
203        //-----------------------------------------------------------------------------
204
//
205
// calcNullable. Impossible to explain succinctly. See Aho, section 3.9
206
//
207
//-----------------------------------------------------------------------------
208
void calcNullable(RBBINode n) {
209            if (n == null) {
210                return;
211            }
212            if (n.fType == RBBINode.setRef ||
213                n.fType == RBBINode.endMark ) {
214                // These are non-empty leaf node types.
215
n.fNullable = false;
216                return;
217            }
218
219            if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) {
220                // Lookahead marker node. It's a leaf, so no recursion on children.
221
// It's nullable because it does not match any literal text from the input stream.
222
n.fNullable = true;
223                return;
224            }
225
226
227            // The node is not a leaf.
228
// Calculate nullable on its children.
229
calcNullable(n.fLeftChild);
230            calcNullable(n.fRightChild);
231
232            // Apply functions from table 3.40 in Aho
233
if (n.fType == RBBINode.opOr) {
234                n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable;
235            }
236            else if (n.fType == RBBINode.opCat) {
237                n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable;
238            }
239            else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) {
240                n.fNullable = true;
241            }
242            else {
243                n.fNullable = false;
244            }
245        }
246
247
248
249
250        //-----------------------------------------------------------------------------
251
//
252
// calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9
253
//
254
//-----------------------------------------------------------------------------
255
void calcFirstPos(RBBINode n) {
256            if (n == null) {
257                return;
258            }
259            if (n.fType == RBBINode.leafChar ||
260                n.fType == RBBINode.endMark ||
261                n.fType == RBBINode.lookAhead ||
262                n.fType == RBBINode.tag) {
263                // These are non-empty leaf node types.
264
n.fFirstPosSet.add(n);
265                return;
266            }
267
268            // The node is not a leaf.
269
// Calculate firstPos on its children.
270
calcFirstPos(n.fLeftChild);
271            calcFirstPos(n.fRightChild);
272
273            // Apply functions from table 3.40 in Aho
274
if (n.fType == RBBINode.opOr) {
275                n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
276                n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
277            }
278            else if (n.fType == RBBINode.opCat) {
279                n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
280                if (n.fLeftChild.fNullable) {
281                    n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
282                }
283            }
284            else if (n.fType == RBBINode.opStar ||
285                     n.fType == RBBINode.opQuestion ||
286                     n.fType == RBBINode.opPlus) {
287                n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
288            }
289        }
290
291
292
293        //-----------------------------------------------------------------------------
294
//
295
// calcLastPos. Impossible to explain succinctly. See Aho, section 3.9
296
//
297
//-----------------------------------------------------------------------------
298
void calcLastPos(RBBINode n) {
299            if (n == null) {
300                return;
301            }
302            if (n.fType == RBBINode.leafChar ||
303                n.fType == RBBINode.endMark ||
304                n.fType == RBBINode.lookAhead ||
305                n.fType == RBBINode.tag) {
306                // These are non-empty leaf node types.
307
n.fLastPosSet.add(n);
308                return;
309            }
310
311            // The node is not a leaf.
312
// Calculate lastPos on its children.
313
calcLastPos(n.fLeftChild);
314            calcLastPos(n.fRightChild);
315
316            // Apply functions from table 3.40 in Aho
317
if (n.fType == RBBINode.opOr) {
318                n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
319                n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
320            }
321            else if (n.fType == RBBINode.opCat) {
322                n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
323                if (n.fRightChild.fNullable) {
324                    n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
325                }
326            }
327            else if (n.fType == RBBINode.opStar ||
328                     n.fType == RBBINode.opQuestion ||
329                     n.fType == RBBINode.opPlus) {
330                n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
331            }
332        }
333
334
335
336        //-----------------------------------------------------------------------------
337
//
338
// calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
339
//
340
//-----------------------------------------------------------------------------
341
void calcFollowPos(RBBINode n) {
342            if (n == null ||
343                n.fType == RBBINode.leafChar ||
344                n.fType == RBBINode.endMark) {
345                return;
346            }
347
348            calcFollowPos(n.fLeftChild);
349            calcFollowPos(n.fRightChild);
350
351            // Aho rule #1
352
if (n.fType == RBBINode.opCat) {
353                RBBINode i; // is 'i' in Aho's description
354

355                Set JavaDoc LastPosOfLeftChild = n.fLeftChild.fLastPosSet;
356
357                Iterator JavaDoc ix = LastPosOfLeftChild.iterator();
358                while (ix.hasNext()) {
359                    i = (RBBINode )ix.next();
360                    i.fFollowPos.addAll(n.fRightChild.fFirstPosSet);
361                }
362            }
363
364            // Aho rule #2
365
if (n.fType == RBBINode.opStar ||
366                n.fType == RBBINode.opPlus) {
367                RBBINode i; // again, n and i are the names from Aho's description.
368
Iterator JavaDoc ix = n.fLastPosSet.iterator();
369                while (ix.hasNext()) {
370                    i = (RBBINode) ix.next();
371                    i.fFollowPos.addAll(n.fFirstPosSet);
372                }
373
374            }
375
376
377
378        }
379
380
381        //-----------------------------------------------------------------------------
382
//
383
// calcChainedFollowPos. Modify the previously calculated followPos sets
384
// to implement rule chaining. NOT described by Aho
385
//
386
//-----------------------------------------------------------------------------
387
void calcChainedFollowPos(RBBINode tree) {
388
389            List JavaDoc endMarkerNodes = new ArrayList JavaDoc();
390            List JavaDoc leafNodes = new ArrayList JavaDoc();
391
392             // get a list of all endmarker nodes.
393
tree.findNodes(endMarkerNodes, RBBINode.endMark);
394
395            // get a list all leaf nodes
396
tree.findNodes(leafNodes, RBBINode.leafChar);
397
398            // Get all nodes that can be the start a match, which is FirstPosition()
399
// of the portion of the tree corresponding to user-written rules.
400
// See the tree description in bofFixup().
401
RBBINode userRuleRoot = tree;
402            if (fRB.fSetBuilder.sawBOF()) {
403                userRuleRoot = tree.fLeftChild.fRightChild;
404            }
405            Assert.assrt(userRuleRoot != null);
406            Set JavaDoc matchStartNodes = userRuleRoot.fFirstPosSet;
407
408
409            // Iteratate over all leaf nodes,
410
//
411
Iterator JavaDoc endNodeIx = leafNodes.iterator();
412
413            while (endNodeIx.hasNext()) {
414                RBBINode tNode = (RBBINode)endNodeIx.next();
415                RBBINode endNode = null;
416
417                // Identify leaf nodes that correspond to overall rule match positions.
418
// These include an endMarkerNode in their followPos sets.
419
Iterator JavaDoc i = endMarkerNodes.iterator();
420                while (i.hasNext()) {
421                    RBBINode endMarkerNode = (RBBINode)i.next();
422                    if (tNode.fFollowPos.contains(endMarkerNode)) {
423                        endNode = tNode;
424                        break;
425                    }
426                }
427                if (endNode == null) {
428                    // node wasn't an end node. Try again with the next.
429
continue;
430                }
431
432                // We've got a node that can end a match.
433

434                // Line Break Specific hack: If this node's val correspond to the $CM char class,
435
// don't chain from it.
436
// TODO: Add rule syntax for this behavior, get specifics out of here and
437
// into the rule file.
438
if (fRB.fLBCMNoChain) {
439                    int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal);
440                    if (c != -1) {
441                        // c == -1 occurs with sets containing only the {eof} marker string.
442
int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
443                        if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) {
444                            continue;
445                        }
446                    }
447                }
448
449
450                // Now iterate over the nodes that can start a match, looking for ones
451
// with the same char class as our ending node.
452
RBBINode startNode;
453                Iterator JavaDoc startNodeIx = matchStartNodes.iterator();
454                while (startNodeIx.hasNext()) {
455                    startNode = (RBBINode )startNodeIx.next();
456                    if (startNode.fType != RBBINode.leafChar) {
457                        continue;
458                    }
459
460                    if (endNode.fVal == startNode.fVal) {
461                        // The end val (character class) of one possible match is the
462
// same as the start of another.
463

464                        // Add all nodes from the followPos of the start node to the
465
// followPos set of the end node, which will have the effect of
466
// letting matches transition from a match state at endNode
467
// to the second char of a match starting with startNode.
468
endNode.fFollowPos.addAll(startNode.fFollowPos);
469                    }
470                }
471            }
472        }
473
474
475        //-----------------------------------------------------------------------------
476
//
477
// bofFixup. Fixup for state tables that include {bof} beginning of input testing.
478
// Do an swizzle similar to chaining, modifying the followPos set of
479
// the bofNode to include the followPos nodes from other {bot} nodes
480
// scattered through the tree.
481
//
482
// This function has much in common with calcChainedFollowPos().
483
//
484
//-----------------------------------------------------------------------------
485
void bofFixup() {
486            //
487
// The parse tree looks like this ...
488
// fTree root --. <cat>
489
// / \
490
// <cat> <#end node>
491
// / \
492
// <bofNode> rest
493
// of tree
494
//
495
// We will be adding things to the followPos set of the <bofNode>
496
//
497
RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild;
498            Assert.assrt(bofNode.fType == RBBINode.leafChar);
499            Assert.assrt(bofNode.fVal == 2);
500
501            // Get all nodes that can be the start a match of the user-written rules
502
// (excluding the fake bofNode)
503
// We want the nodes that can start a match in the
504
// part labeled "rest of tree"
505
//
506
Set JavaDoc matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
507            Iterator JavaDoc startNodeIt = matchStartNodes.iterator();
508            while (startNodeIt.hasNext()) {
509                RBBINode startNode = (RBBINode)startNodeIt.next();
510                if (startNode.fType != RBBINode.leafChar) {
511                    continue;
512                }
513
514                if (startNode.fVal == bofNode.fVal) {
515                    // We found a leaf node corresponding to a {bof} that was
516
// explicitly written into a rule.
517
// Add everything from the followPos set of this node to the
518
// followPos set of the fake bofNode at the start of the tree.
519
//
520
bofNode.fFollowPos.addAll(startNode.fFollowPos);
521                }
522            }
523        }
524
525        //-----------------------------------------------------------------------------
526
//
527
// buildStateTable() Determine the set of runtime DFA states and the
528
// transition tables for these states, by the algorithm
529
// of fig. 3.44 in Aho.
530
//
531
// Most of the comments are quotes of Aho's psuedo-code.
532
//
533
//-----------------------------------------------------------------------------
534
void buildStateTable() {
535            //
536
// Add a dummy state 0 - the stop state. Not from Aho.
537
int lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1;
538            RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol);
539            fDStates.add(failState);
540
541            // initially, the only unmarked state in Dstates is firstpos(root),
542
// where toot is the root of the syntax tree for (r)#;
543
RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol);
544            initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet);
545            fDStates.add(initialState);
546
547            // while there is an unmarked state T in Dstates do begin
548
for (;;) {
549                RBBIStateDescriptor T = null;
550                int tx;
551                for (tx=1; tx<fDStates.size(); tx++) {
552                    RBBIStateDescriptor temp = (RBBIStateDescriptor )fDStates.get(tx);
553                    if (temp.fMarked == false) {
554                        T = temp;
555                        break;
556                    }
557                }
558                if (T == null) {
559                    break;
560                }
561
562                // mark T;
563
T.fMarked = true;
564
565                // for each input symbol a do begin
566
int a;
567                for (a = 1; a<=lastInputSymbol; a++) {
568                    // let U be the set of positions that are in followpos(p)
569
// for some position p in T
570
// such that the symbol at position p is a;
571
Set JavaDoc U = null;
572                    RBBINode p;
573                    Iterator JavaDoc pit = T.fPositions.iterator();
574                    while (pit.hasNext()) {
575                        p = (RBBINode )pit.next();
576                        if ((p.fType == RBBINode.leafChar) && (p.fVal == a)) {
577                            if (U == null) {
578                                U = new HashSet JavaDoc();
579                            }
580                            U.addAll(p.fFollowPos);
581                        }
582                    }
583
584                    // if U is not empty and not in DStates then
585
int ux = 0;
586                    boolean UinDstates = false;
587                    if (U != null) {
588                        Assert.assrt(U.size() > 0);
589                        int ix;
590                        for (ix=0; ix<fDStates.size(); ix++) {
591                            RBBIStateDescriptor temp2;
592                            temp2 = (RBBIStateDescriptor )fDStates.get(ix);
593                            if (U.equals(temp2.fPositions)) {
594                                U = temp2.fPositions;
595                                ux = ix;
596                                UinDstates = true;
597                                break;
598                            }
599                        }
600
601                        // Add U as an unmarked state to Dstates
602
if (!UinDstates)
603                        {
604                            RBBIStateDescriptor newState = new RBBIStateDescriptor(lastInputSymbol);
605                            newState.fPositions = U;
606                            fDStates.add(newState);
607                            ux = fDStates.size()-1;
608                        }
609
610                        // Dtran[T, a] := U;
611
T.fDtran[a] = ux;
612                    }
613                }
614            }
615        }
616
617
618
619        //-----------------------------------------------------------------------------
620
//
621
// flagAcceptingStates Identify accepting states.
622
// First get a list of all of the end marker nodes.
623
// Then, for each state s,
624
// if s contains one of the end marker nodes in its list of tree positions then
625
// s is an accepting state.
626
//
627
//-----------------------------------------------------------------------------
628
void flagAcceptingStates() {
629            List JavaDoc endMarkerNodes = new ArrayList JavaDoc();
630            RBBINode endMarker;
631            int i;
632            int n;
633
634            fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark);
635
636            for (i=0; i<endMarkerNodes.size(); i++) {
637                endMarker = (RBBINode)endMarkerNodes.get(i);
638                for (n=0; n<fDStates.size(); n++) {
639                    RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n);
640                    //if (sd.fPositions.indexOf(endMarker) >= 0) {
641
if (sd.fPositions.contains(endMarker)) {
642                        // Any non-zero value for fAccepting means this is an accepting node.
643
// The value is what will be returned to the user as the break status.
644
// If no other value was specified, force it to -1.
645

646                        if (sd.fAccepting==0) {
647                         // State hasn't been marked as accepting yet. Do it now.
648
sd.fAccepting = endMarker.fVal;
649                            if (sd.fAccepting == 0) {
650                                sd.fAccepting = -1;
651                         }
652                        }
653                        if (sd.fAccepting==-1 && endMarker.fVal != 0) {
654                         // Both lookahead and non-lookahead accepting for this state.
655
// Favor the look-ahead. Expedient for line break.
656
// TODO: need a more elegant resolution for conflicting rules.
657
sd.fAccepting = endMarker.fVal;
658                     }
659                         // implicit else:
660
// if sd.fAccepting already had a value other than 0 or -1, leave it be.
661

662                        // If the end marker node is from a look-ahead rule, set
663
// the fLookAhead field or this state also.
664
if (endMarker.fLookAheadEnd) {
665                         // TODO: don't change value if already set?
666
// TODO: allow for more than one active look-ahead rule in engine.
667
// Make value here an index to a side array in engine?
668
sd.fLookAhead = sd.fAccepting;
669                        }
670                    }
671                }
672            }
673        }
674
675
676        //-----------------------------------------------------------------------------
677
//
678
// flagLookAheadStates Very similar to flagAcceptingStates, above.
679
//
680
//-----------------------------------------------------------------------------
681
void flagLookAheadStates() {
682            List JavaDoc lookAheadNodes = new ArrayList JavaDoc();
683            RBBINode lookAheadNode;
684            int i;
685            int n;
686
687            fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead);
688            for (i=0; i<lookAheadNodes.size(); i++) {
689                lookAheadNode = (RBBINode )lookAheadNodes.get(i);
690
691                for (n=0; n<fDStates.size(); n++) {
692                    RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n);
693                    if (sd.fPositions.contains(lookAheadNode)) {
694                        sd.fLookAhead = lookAheadNode.fVal;
695                    }
696                }
697            }
698        }
699
700
701
702
703        //-----------------------------------------------------------------------------
704
//
705
// flagTaggedStates
706
//
707
//-----------------------------------------------------------------------------
708
void flagTaggedStates() {
709            List JavaDoc tagNodes = new ArrayList JavaDoc();
710            RBBINode tagNode;
711            int i;
712            int n;
713
714            fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag);
715            for (i=0; i<tagNodes.size(); i++) { // For each tag node t (all of 'em)
716
tagNode = (RBBINode )tagNodes.get(i);
717
718                for (n=0; n<fDStates.size(); n++) { // For each state s (row in the state table)
719
RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n);
720                    if (sd.fPositions.contains(tagNode)) { // if s include the tag node t
721
sd.fTagVals.add(new Integer JavaDoc(tagNode.fVal));
722                    }
723                }
724            }
725        }
726
727
728
729        //-----------------------------------------------------------------------------
730
//
731
// mergeRuleStatusVals
732
//
733
// Allocate positions in the global array of rule status {tag} values
734
//
735
// The RBBI runtime uses an array of {sets of status values} that can
736
// be returned for boundaries. Each accepting state that has non-zero
737
// status includes an index into this array. The format of the array
738
// is
739
// Num of status values in group 1
740
// status val
741
// status val
742
// ...
743
// Num of status vals in group 2
744
// status val
745
// status val
746
// ...
747
// etc.
748
//
749
//
750
//-----------------------------------------------------------------------------
751

752        void mergeRuleStatusVals() {
753            //
754
// The basic outline of what happens here is this...
755
//
756
// for each state in this state table
757
// if the status tag list for this state is in the global statuses list
758
// record where and
759
// continue with the next state
760
// else
761
// add the tag list for this state to the global list.
762
//
763
int i;
764            int n;
765            
766            // Pre-load a single tag of {0} into the table.
767
// We will need this as a default, for rule sets with no explicit tagging,
768
// or with explicit tagging of {0}.
769
if (fRB.fRuleStatusVals.size() == 0) {
770                fRB.fRuleStatusVals.add(new Integer JavaDoc(1)); // Num of statuses in group
771
fRB.fRuleStatusVals.add(new Integer JavaDoc(0)); // and our single status of zero
772

773                SortedSet JavaDoc s0 = new TreeSet JavaDoc();
774                Integer JavaDoc izero = new Integer JavaDoc(0);
775                fRB.fStatusSets.put(s0, izero);
776                SortedSet JavaDoc s1 = new TreeSet JavaDoc();
777                s1.add(izero);
778                fRB.fStatusSets.put(s0, izero);
779            }
780
781            // For each state, check whether the state's status tag values are
782
// already entered into the status values array, and add them if not.
783
for (n=0; n<fDStates.size(); n++) {
784                RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(n);
785                Set JavaDoc statusVals = sd.fTagVals;
786                Integer JavaDoc arrayIndexI = (Integer JavaDoc)fRB.fStatusSets.get(statusVals);
787                if (arrayIndexI == null) {
788                    // This is the first encounter of this set of status values.
789
// Add them to the statusSets map, This map associates
790
// the set of status values with an index in the runtime status
791
// values array.
792
arrayIndexI = new Integer JavaDoc(fRB.fRuleStatusVals.size());
793                    fRB.fStatusSets.put(statusVals, arrayIndexI);
794                    
795                    // Add the new set of status values to the vector of values that
796
// will eventually become the array used by the runtime engine.
797
fRB.fRuleStatusVals.add(new Integer JavaDoc(statusVals.size()));
798                    Iterator JavaDoc it = statusVals.iterator();
799                    while (it.hasNext()) {
800                        fRB.fRuleStatusVals.add(it.next());
801                    }
802                
803                }
804                
805                // Save the runtime array index back into the state descriptor.
806
sd.fTagsIdx = arrayIndexI.intValue();
807            }
808        }
809
810
811
812
813
814
815
816        //-----------------------------------------------------------------------------
817
//
818
// printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos
819
// for each node in the tree.
820
//
821
//-----------------------------------------------------------------------------
822

823        void printPosSets(RBBINode n) {
824            if (n==null) {
825                return;
826            }
827            RBBINode.printNode(n);
828            System.out.print(" Nullable: " + n.fNullable);
829
830            System.out.print(" firstpos: ");
831            printSet(n.fFirstPosSet);
832
833            System.out.print(" lastpos: ");
834            printSet(n.fLastPosSet);
835
836            System.out.print(" followpos: ");
837            printSet(n.fFollowPos);
838
839            printPosSets(n.fLeftChild);
840            printPosSets(n.fRightChild);
841        }
842        
843
844
845
846        //-----------------------------------------------------------------------------
847
//
848
// getTableSize() Calculate the size in bytes of the runtime form of this
849
// state transition table.
850
//
851
// Note: Refer to common/rbbidata.h from ICU4C for the declarations
852
// of the structures being matched by this calculation.
853
//
854
//-----------------------------------------------------------------------------
855
int getTableSize() {
856            int size = 0;
857            int numRows;
858            int numCols;
859            int rowSize;
860
861            if (fRB.fTreeRoots[fRootIx] == null) {
862                return 0;
863            }
864
865            size = /*sizeof(RBBIStateTable) - 4 */ 16; // The header, with no rows to the table.
866

867            numRows = fDStates.size();
868            numCols = fRB.fSetBuilder.getNumCharCategories();
869
870            // Note The declaration of RBBIStateTableRow is for a table of two columns.
871
// Therefore we subtract two from numCols when determining
872
// how much storage to add to a row for the total columns.
873
// rowSize = sizeof(RBBIStateTableRow) + sizeof(uint16_t)*(numCols-2);
874
rowSize = 8 + 2*numCols;
875            size += numRows * rowSize;
876            while (size % 8 > 0) { // Size must be multiple of 8 bytes in size.
877
size++;
878            }
879
880            return size;
881        }
882
883
884
885        //-----------------------------------------------------------------------------
886
//
887
// exportTable() export the state transition table in the ICU4C format.
888
//
889
// Most of the table is 16 bit shorts. This function exports
890
// the whole thing as an array of shorts.
891
//
892
// The size of the array must be rounded up to a multiple of
893
// 8 bytes.
894
//
895
// See struct RBBIStateTable in ICU4C, common/rbbidata.h
896
//
897
//-----------------------------------------------------------------------------
898

899        short [] exportTable() {
900            int state;
901            int col;
902
903            if (fRB.fTreeRoots[fRootIx] == null) {
904                return new short[0];
905            }
906
907            Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
908                fDStates.size() < 0x7fff);
909
910            int numStates = fDStates.size();
911     
912            // Size of table size in shorts.
913
// the "4" is the size of struct RBBIStateTableRow, the row header part only.
914
int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories();
915            int tableSize = getTableSize() / 2;
916
917            
918            short [] table = new short[tableSize];
919            
920            //
921
// Fill in the header fields.
922
// Annoying because they really want to be ints, not shorts.
923
//
924
// RBBIStateTable.fNumStates
925
table[RBBIDataWrapper.NUMSTATES] = (short)(numStates >>> 16);
926            table[RBBIDataWrapper.NUMSTATES+1] = (short)(numStates & 0x0000ffff);
927
928            // RBBIStateTable.fRowLen
929
table[RBBIDataWrapper.ROWLEN] = (short)(rowLen >>> 16);
930            table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLen & 0x0000ffff);
931            
932            // RBBIStateTable.fFlags
933
int flags = 0;
934            if (fRB.fLookAheadHardBreak) {
935                flags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
936            }
937            if (fRB.fSetBuilder.sawBOF()) {
938                flags |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
939            }
940            table[RBBIDataWrapper.FLAGS] = (short)(flags >>> 16);
941            table[RBBIDataWrapper.FLAGS+1] = (short)(flags & 0x0000ffff);
942            
943            int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
944            for (state=0; state<numStates; state++) {
945                RBBIStateDescriptor sd = (RBBIStateDescriptor )fDStates.get(state);
946                int row = 8 + state*rowLen;
947                Assert.assrt (-32768 < sd.fAccepting && sd.fAccepting <= 32767);
948                Assert.assrt (-32768 < sd.fLookAhead && sd.fLookAhead <= 32767);
949                table[row + RBBIDataWrapper.ACCEPTING] = (short)sd.fAccepting;
950                table[row + RBBIDataWrapper.LOOKAHEAD] = (short)sd.fLookAhead;
951                table[row + RBBIDataWrapper.TAGIDX] = (short)sd.fTagsIdx;
952                for (col=0; col<numCharCategories; col++) {
953                    table[row + RBBIDataWrapper.NEXTSTATES + col] = (short)sd.fDtran[col];
954                }
955            }
956            return table;
957        }
958
959
960
961        //-----------------------------------------------------------------------------
962
//
963
// printSet Debug function. Print the contents of a set of Nodes
964
//
965
//-----------------------------------------------------------------------------
966

967        void printSet(Collection JavaDoc s) {
968            Iterator JavaDoc it = s.iterator();
969            while (it.hasNext()) {
970                RBBINode n = (RBBINode)it.next();
971                RBBINode.printInt(n.fSerialNum, 8);
972            }
973            System.out.println();
974        }
975        
976
977
978        //-----------------------------------------------------------------------------
979
//
980
// printStates Debug Function. Dump the fully constructed state transition table.
981
//
982
//-----------------------------------------------------------------------------
983

984        void printStates() {
985            int c; // input "character"
986
int n; // state number
987

988            System.out.print("state | i n p u t s y m b o l s \n");
989            System.out.print(" | Acc LA Tag");
990            for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
991                RBBINode.printInt((int)c, 3);
992            }
993            System.out.print("\n");
994            System.out.print(" |---------------");
995            for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
996                System.out.print("---");
997            }
998            System.out.print("\n");
999
1000           for (n=0; n<fDStates.size(); n++) {
1001               RBBIStateDescriptor sd = (RBBIStateDescriptor)fDStates.get(n);
1002               RBBINode.printInt(n, 5);
1003               System.out.print(" | ");
1004               
1005               RBBINode.printInt(sd.fAccepting, 3);
1006               RBBINode.printInt(sd.fLookAhead, 4);
1007               RBBINode.printInt(sd.fTagsIdx, 6);
1008               System.out.print(" ");
1009               for (c=0; c<fRB.fSetBuilder.getNumCharCategories(); c++) {
1010                   RBBINode.printInt(sd.fDtran[c], 3);
1011               }
1012               System.out.print("\n");
1013           }
1014           System.out.print("\n\n");
1015       }
1016       
1017
1018
1019
1020       //-----------------------------------------------------------------------------
1021
//
1022
// printRuleStatusTable Debug Function. Dump the common rule status table
1023
//
1024
//-----------------------------------------------------------------------------
1025

1026       void printRuleStatusTable() {
1027           int thisRecord = 0;
1028           int nextRecord = 0;
1029           int i;
1030           List JavaDoc tbl = fRB.fRuleStatusVals;
1031
1032           System.out.print("index | tags \n");
1033           System.out.print("-------------------\n");
1034
1035           while (nextRecord < tbl.size()) {
1036               thisRecord = nextRecord;
1037               nextRecord = thisRecord + ((Integer JavaDoc)tbl.get(thisRecord)).intValue() + 1;
1038               RBBINode.printInt(thisRecord, 7);
1039               for (i=thisRecord+1; i<nextRecord; i++) {
1040                   int val = ((Integer JavaDoc)tbl.get(i)).intValue();
1041                   RBBINode.printInt(val, 7);
1042               }
1043               System.out.print("\n");
1044           }
1045           System.out.print("\n\n");
1046       }
1047       
1048
1049
1050}
1051
Popular Tags