KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > percederberg > grammatica > parser > LookAheadSet


1 /*
2  * LookAheadSet.java
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public License
6  * as published by the Free Software Foundation; either version 2.1
7  * of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free
16  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
17  * MA 02111-1307, USA.
18  *
19  * Copyright (c) 2003-2005 Per Cederberg. All rights reserved.
20  */

21
22 package net.percederberg.grammatica.parser;
23
24 import java.util.ArrayList JavaDoc;
25
26 /**
27  * A token look-ahead set. This class contains a set of token id
28  * sequences. All sequences in the set are limited in length, so that
29  * no single sequence is longer than a maximum value. This class also
30  * filters out duplicates. Each token sequence also contains a repeat
31  * flag, allowing the look-ahead set to contain information about
32  * possible infinite repetitions of certain sequences. That
33  * information is important when conflicts arise between two
34  * look-ahead sets, as such a conflict cannot be resolved if the
35  * conflicting sequences can be repeated (would cause infinite loop).
36  *
37  * @author Per Cederberg, <per at percederberg dot net>
38  * @version 1.5
39  */

40 class LookAheadSet {
41
42     /**
43      * The set of token look-ahead sequences. Each sequence in
44      * turn is represented by an ArrayList with Integers for the
45      * token id:s.
46      */

47     private ArrayList JavaDoc elements = new ArrayList JavaDoc();
48
49     /**
50      * The maximum length of any look-ahead sequence.
51      */

52     private int maxLength;
53
54     /**
55      * Creates a new look-ahead set with the specified maximum
56      * length.
57      *
58      * @param maxLength the maximum token sequence length
59      */

60     public LookAheadSet(int maxLength) {
61         this.maxLength = maxLength;
62     }
63
64     /**
65      * Creates a duplicate look-ahead set, possibly with a different
66      * maximum length.
67      *
68      * @param maxLength the maximum token sequence length
69      * @param set the look-ahead set to copy
70      */

71     public LookAheadSet(int maxLength, LookAheadSet set) {
72         this(maxLength);
73         addAll(set);
74     }
75
76     /**
77      * Checks if this look-ahead set is empty.
78      *
79      * @return true if this look-ahead set is empty, or
80      * false otherwise
81      *
82      * @since 1.5
83      */

84     public boolean isEmpty() {
85         return elements.size() == 0;
86     }
87
88     /**
89      * Returns the length of the shortest token sequence in this
90      * set. This method will return zero (0) if the set is empty.
91      *
92      * @return the length of the shortest token sequence
93      */

94     public int getMinLength() {
95         Sequence seq;
96         int min = -1;
97
98         for (int i = 0; i < elements.size(); i++) {
99             seq = (Sequence) elements.get(i);
100             if (min < 0 || seq.length() < min) {
101                 min = seq.length();
102             }
103         }
104         return (min < 0) ? 0 : min;
105     }
106
107     /**
108      * Returns the length of the longest token sequence in this
109      * set. This method will return zero (0) if the set is empty.
110      *
111      * @return the length of the longest token sequence
112      */

113     public int getMaxLength() {
114         Sequence seq;
115         int max = 0;
116
117         for (int i = 0; i < elements.size(); i++) {
118             seq = (Sequence) elements.get(i);
119             if (seq.length() > max) {
120                 max = seq.length();
121             }
122         }
123         return max;
124     }
125
126     /**
127      * Returns a list of the initial token id:s in this look-ahead
128      * set. The list returned will not contain any duplicates.
129      *
130      * @return a list of the inital token id:s in this look-ahead set
131      */

132     public int[] getInitialTokens() {
133         ArrayList JavaDoc list = new ArrayList JavaDoc();
134         int[] result;
135         Integer JavaDoc token;
136         int i;
137
138         for (i = 0; i < elements.size(); i++) {
139             token = ((Sequence) elements.get(i)).getToken(0);
140             if (token != null && !list.contains(token)) {
141                 list.add(token);
142             }
143         }
144         result = new int[list.size()];
145         for (i = 0; i < list.size(); i++) {
146             result[i] = ((Integer JavaDoc) list.get(i)).intValue();
147         }
148         return result;
149     }
150
151     /**
152      * Checks if this look-ahead set contains a repetitive token
153      * sequence.
154      *
155      * @return true if at least one token sequence is repetitive, or
156      * false otherwise
157      */

158     public boolean isRepetitive() {
159         Sequence seq;
160
161         for (int i = 0; i < elements.size(); i++) {
162             seq = (Sequence) elements.get(i);
163             if (seq.isRepetitive()) {
164                 return true;
165             }
166         }
167         return false;
168     }
169
170     /**
171      * Checks if the next token(s) in the parser match any token
172      * sequence in this set.
173      *
174      * @param parser the parser to check
175      *
176      * @return true if the next tokens are in the set, or
177      * false otherwise
178      */

179     public boolean isNext(Parser parser) {
180         Sequence seq;
181
182         for (int i = 0; i < elements.size(); i++) {
183             seq = (Sequence) elements.get(i);
184             if (seq.isNext(parser)) {
185                 return true;
186             }
187         }
188         return false;
189     }
190
191     /**
192      * Checks if the next token(s) in the parser match any token
193      * sequence in this set.
194      *
195      * @param parser the parser to check
196      * @param length the maximum number of tokens to check
197      *
198      * @return true if the next tokens are in the set, or
199      * false otherwise
200      */

201     public boolean isNext(Parser parser, int length) {
202         Sequence seq;
203
204         for (int i = 0; i < elements.size(); i++) {
205             seq = (Sequence) elements.get(i);
206             if (seq.isNext(parser, length)) {
207                 return true;
208             }
209         }
210         return false;
211     }
212
213     /**
214      * Checks if another look-ahead set has an overlapping token
215      * sequence. An overlapping token sequence is a token sequence
216      * that is identical to another sequence, but for the length. I.e.
217      * one of the two sequences may be longer than the other.
218      *
219      * @param set the look-ahead set to check
220      *
221      * @return true if there is some token sequence that overlaps, or
222      * false otherwise
223      *
224      * @since 1.5
225      */

226     public boolean hasOverlap(LookAheadSet set) {
227         for (int i = 0; i < elements.size(); i++) {
228             if (set.hasOverlap((Sequence) elements.get(i))) {
229                 return true;
230             }
231         }
232         return false;
233     }
234
235     /**
236      * Checks if a token sequence is overlapping. An overlapping token
237      * sequence is a token sequence that is identical to another
238      * sequence, but for the length. I.e. one of the two sequences may
239      * be longer than the other.
240      *
241      * @param seq the token sequence to check
242      *
243      * @return true if there is some token sequence that overlaps, or
244      * false otherwise
245      *
246      * @since 1.5
247      */

248     private boolean hasOverlap(Sequence seq) {
249         Sequence elem;
250
251         for (int i = 0; i < elements.size(); i++) {
252             elem = (Sequence) elements.get(i);
253             if (seq.startsWith(elem) || elem.startsWith(seq)) {
254                 return true;
255             }
256         }
257         return false;
258     }
259
260     /**
261      * Checks if some token sequence is present in both this set
262      * and a specified one.
263      *
264      * @param set the look-ahead set to compare with
265      *
266      * @return true if the look-ahead sets intersect, or
267      * false otherwise
268      *
269      * @since 1.5
270      */

271     public boolean hasIntersection(LookAheadSet set) {
272         for (int i = 0; i < elements.size(); i++) {
273             if (set.contains((Sequence) elements.get(i))) {
274                 return true;
275             }
276         }
277         return false;
278     }
279
280     /**
281      * Checks if the specified token sequence is present in the
282      * set.
283      *
284      * @param elem the token sequence to check
285      *
286      * @return true if the sequence is present in this set, or
287      * false otherwise
288      */

289     private boolean contains(Sequence elem) {
290         return findSequence(elem) != null;
291     }
292
293     /**
294      * Finds an identical token sequence if present in the set.
295      *
296      * @param elem the token sequence to search for
297      *
298      * @return an identical the token sequence if found, or
299      * null if not found
300      */

301     private Sequence findSequence(Sequence elem) {
302         for (int i = 0; i < elements.size(); i++) {
303             if (elements.get(i).equals(elem)) {
304                 return (Sequence) elements.get(i);
305             }
306         }
307         return null;
308     }
309
310     /**
311      * Adds a token sequence to this set. The sequence will only be
312      * added if it is not already in the set. Also, if the sequence is
313      * longer than the allowed maximum, a truncated sequence will be
314      * added instead.
315      *
316      * @param seq the token sequence to add
317      */

318     private void add(Sequence seq) {
319         if (seq.length() > maxLength) {
320             seq = new Sequence(maxLength, seq);
321         }
322         if (!contains(seq)) {
323             elements.add(seq);
324         }
325     }
326
327     /**
328      * Adds a new token sequence with a single token to this set. The
329      * sequence will only be added if it is not already in the set.
330      *
331      * @param token the token to add
332      */

333     public void add(int token) {
334         add(new Sequence(false, token));
335     }
336
337     /**
338      * Adds all the token sequences from a specified set. Only
339      * sequences not already in this set will be added.
340      *
341      * @param set the set to add from
342      */

343     public void addAll(LookAheadSet set) {
344         for (int i = 0; i < set.elements.size(); i++) {
345             add((Sequence) set.elements.get(i));
346         }
347     }
348
349     /**
350      * Adds an empty token sequence to this set. The sequence will
351      * only be added if it is not already in the set.
352      */

353     public void addEmpty() {
354         add(new Sequence());
355     }
356
357     /**
358      * Removes a token sequence from this set.
359      *
360      * @param seq the token sequence to remove
361      */

362     private void remove(Sequence seq) {
363         elements.remove(seq);
364     }
365
366     /**
367      * Removes all the token sequences from a specified set. Only
368      * sequences already in this set will be removed.
369      *
370      * @param set the set to remove from
371      */

372     public void removeAll(LookAheadSet set) {
373         for (int i = 0; i < set.elements.size(); i++) {
374             remove((Sequence) set.elements.get(i));
375         }
376     }
377
378     /**
379      * Creates a new look-ahead set that is the result of reading the
380      * specified token. The new look-ahead set will contain the
381      * rest of all the token sequences that started with the specified
382      * token.
383      *
384      * @param token the token to read
385      *
386      * @return a new look-ahead set containing the remaining tokens
387      */

388     public LookAheadSet createNextSet(int token) {
389         LookAheadSet result = new LookAheadSet(maxLength - 1);
390         Sequence seq;
391         Integer JavaDoc value;
392
393         for (int i = 0; i < elements.size(); i++) {
394             seq = (Sequence) elements.get(i);
395             value = seq.getToken(0);
396             if (value != null && value.intValue() == token) {
397                 result.add(seq.subsequence(1));
398             }
399         }
400         return result;
401     }
402
403     /**
404      * Creates a new look-ahead set that is the intersection of
405      * this set with another set. The token sequences in the net set
406      * will only have the repeat flag set if it was set in both the
407      * identical token sequences.
408      *
409      * @param set the set to intersect with
410      *
411      * @return a new look-ahead set containing the intersection
412      */

413     public LookAheadSet createIntersection(LookAheadSet set) {
414         LookAheadSet result = new LookAheadSet(maxLength);
415         Sequence seq1;
416         Sequence seq2;
417
418         for (int i = 0; i < elements.size(); i++) {
419             seq1 = (Sequence) elements.get(i);
420             seq2 = set.findSequence(seq1);
421             if (seq2 != null && seq1.isRepetitive()) {
422                 result.add(seq2);
423             } else if (seq2 != null) {
424                 result.add(seq1);
425             }
426         }
427         return result;
428     }
429
430     /**
431      * Creates a new look-ahead set that is the combination of
432      * this set with another set. The combination is created by
433      * creating new token sequences that consist of appending all
434      * elements from the specified set onto all elements in this set.
435      * This is sometimes referred to as the cartesian product.
436      *
437      * @param set the set to combine with
438      *
439      * @return a new look-ahead set containing the combination
440      */

441     public LookAheadSet createCombination(LookAheadSet set) {
442         LookAheadSet result = new LookAheadSet(maxLength);
443         Sequence first;
444         Sequence second;
445
446         // Handle special cases
447
if (this.isEmpty()) {
448             return set;
449         } else if (set.isEmpty()) {
450             return this;
451         }
452
453         // Create combinations
454
for (int i = 0; i < elements.size(); i++) {
455             first = (Sequence) elements.get(i);
456             if (first.length() >= maxLength) {
457                 result.add(first);
458             } else if (first.length() <= 0) {
459                 result.addAll(set);
460             } else {
461                 for (int j = 0; j < set.elements.size(); j++) {
462                     second = (Sequence) set.elements.get(j);
463                     result.add(first.concat(maxLength, second));
464                 }
465             }
466         }
467         return result;
468     }
469
470     /**
471      * Creates a new look-ahead set with overlaps from another. All
472      * token sequences in this set that overlaps with the other set
473      * will be added to the new look-ahead set.
474      *
475      * @param set the look-ahead set to check with
476      *
477      * @return a new look-ahead set containing the overlaps
478      */

479     public LookAheadSet createOverlaps(LookAheadSet set) {
480         LookAheadSet result = new LookAheadSet(maxLength);
481         Sequence seq;
482
483         for (int i = 0; i < elements.size(); i++) {
484             seq = (Sequence) elements.get(i);
485             if (set.hasOverlap(seq)) {
486                 result.add(seq);
487             }
488         }
489         return result;
490     }
491
492     /**
493      * Creates a new look-ahead set filter. The filter will contain
494      * all sequences from this set, possibly left trimmed by each one
495      * of the sequences in the specified set.
496      *
497      * @param set the look-ahead set to trim with
498      *
499      * @return a new look-ahead set filter
500      */

501     public LookAheadSet createFilter(LookAheadSet set) {
502         LookAheadSet result = new LookAheadSet(maxLength);
503         Sequence first;
504         Sequence second;
505
506         // Handle special cases
507
if (this.isEmpty() || set.isEmpty()) {
508             return this;
509         }
510
511         // Create combinations
512
for (int i = 0; i < elements.size(); i++) {
513             first = (Sequence) elements.get(i);
514             for (int j = 0; j < set.elements.size(); j++) {
515                 second = (Sequence) set.elements.get(j);
516                 if (first.startsWith(second)) {
517                     result.add(first.subsequence(second.length()));
518                 }
519             }
520         }
521         return result;
522     }
523
524     /**
525      * Creates a new identical look-ahead set, except for the repeat
526      * flag being set in each token sequence.
527      *
528      * @return a new repetitive look-ahead set
529      */

530     public LookAheadSet createRepetitive() {
531         LookAheadSet result = new LookAheadSet(maxLength);
532         Sequence seq;
533
534         for (int i = 0; i < elements.size(); i++) {
535             seq = (Sequence) elements.get(i);
536             if (seq.isRepetitive()) {
537                 result.add(seq);
538             } else {
539                 result.add(new Sequence(true, seq));
540             }
541         }
542         return result;
543     }
544
545     /**
546      * Returns a string representation of this object.
547      *
548      * @return a string representation of this object
549      */

550     public String JavaDoc toString() {
551         return toString(null);
552     }
553
554     /**
555      * Returns a string representation of this object.
556      *
557      * @param tokenizer the tokenizer containing the tokens
558      *
559      * @return a string representation of this object
560      */

561     public String JavaDoc toString(Tokenizer tokenizer) {
562         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
563         Sequence seq;
564
565         buffer.append("{");
566         for (int i = 0; i < elements.size(); i++) {
567             seq = (Sequence) elements.get(i);
568             buffer.append("\n ");
569             buffer.append(seq.toString(tokenizer));
570         }
571         buffer.append("\n}");
572         return buffer.toString();
573     }
574
575
576     /**
577      * A token sequence. This class contains a list of token ids. It
578      * is immutable after creation, meaning that no changes will be
579      * made to an instance after creation.
580      *
581      * @author Per Cederberg, <per at percederberg dot net>
582      * @version 1.0
583      */

584     private class Sequence {
585
586         /**
587          * The repeat flag. If this flag is set, the token sequence
588          * or some part of it may be repeated infinitely.
589          */

590         private boolean repeat = false;
591
592         /**
593          * The list of token ids in this sequence.
594          */

595         private ArrayList JavaDoc tokens = null;
596
597         /**
598          * Creates a new empty token sequence. The repeat flag will be
599          * set to false.
600          */

601         public Sequence() {
602             this.repeat = false;
603             this.tokens = new ArrayList JavaDoc(0);
604         }
605
606         /**
607          * Creates a new token sequence with a single token.
608          *
609          * @param repeat the repeat flag value
610          * @param token the token to add
611          */

612         public Sequence(boolean repeat, int token) {
613             this.repeat = false;
614             this.tokens = new ArrayList JavaDoc(1);
615             this.tokens.add(new Integer JavaDoc(token));
616         }
617
618         /**
619          * Creates a new token sequence that is a duplicate of another
620          * sequence. Only a limited number of tokens will be copied
621          * however. The repeat flag from the original will be kept
622          * intact.
623          *
624          * @param length the maximum number of tokens to copy
625          * @param seq the sequence to copy
626          */

627         public Sequence(int length, Sequence seq) {
628             this.repeat = seq.repeat;
629             this.tokens = new ArrayList JavaDoc(length);
630             if (seq.length() < length) {
631                 length = seq.length();
632             }
633             for (int i = 0; i < length; i++) {
634                 tokens.add(seq.tokens.get(i));
635             }
636         }
637
638         /**
639          * Creates a new token sequence that is a duplicate of another
640          * sequence. The new value of the repeat flag will be used
641          * however.
642          *
643          * @param repeat the new repeat flag value
644          * @param seq the sequence to copy
645          */

646         public Sequence(boolean repeat, Sequence seq) {
647             this.repeat = repeat;
648             this.tokens = seq.tokens;
649         }
650
651         /**
652          * Returns the length of the token sequence.
653          *
654          * @return the number of tokens in the sequence
655          */

656         public int length() {
657             return tokens.size();
658         }
659
660         /**
661          * Returns a token at a specified position in the sequence.
662          *
663          * @param pos the sequence position
664          *
665          * @return the token id found, or null
666          */

667         public Integer JavaDoc getToken(int pos) {
668             if (pos >= 0 && pos < tokens.size()) {
669                 return (Integer JavaDoc) tokens.get(pos);
670             } else {
671                 return null;
672             }
673         }
674
675         /**
676          * Checks if this sequence is equal to another object. Only
677          * token sequences with the same tokens in the same order will
678          * be considered equal. The repeat flag will be disregarded.
679          *
680          * @param obj the object to compare with
681          *
682          * @return true if the objects are equal, or
683          * false otherwise
684          */

685         public boolean equals(Object JavaDoc obj) {
686             if (obj instanceof Sequence) {
687                 return tokens.equals(((Sequence) obj).tokens);
688             } else {
689                 return false;
690             }
691         }
692
693         /**
694          * Checks if this token sequence starts with the tokens from
695          * another sequence. If the other sequence is longer than this
696          * sequence, this method will always return false.
697          *
698          * @param seq the token sequence to check
699          *
700          * @return true if this sequence starts with the other, or
701          * false otherwise
702          */

703         public boolean startsWith(Sequence seq) {
704             if (length() < seq.length()) {
705                 return false;
706             }
707             for (int i = 0; i < seq.tokens.size(); i++) {
708                 if (!tokens.get(i).equals(seq.tokens.get(i))) {
709                     return false;
710                 }
711             }
712             return true;
713         }
714
715         /**
716          * Checks if this token sequence is repetitive. A repetitive
717          * token sequence is one with the repeat flag set.
718          *
719          * @return true if this token sequence is repetitive, or
720          * false otherwise
721          */

722         public boolean isRepetitive() {
723             return repeat;
724         }
725
726         /**
727          * Checks if the next token(s) in the parser matches this
728          * token sequence.
729          *
730          * @param parser the parser to check
731          *
732          * @return true if the next tokens are in the sequence, or
733          * false otherwise
734          */

735         public boolean isNext(Parser parser) {
736             Token token;
737             Integer JavaDoc id;
738
739             for (int i = 0; i < tokens.size(); i++) {
740                 id = (Integer JavaDoc) tokens.get(i);
741                 token = parser.peekToken(i);
742                 if (token == null || token.getId() != id.intValue()) {
743                     return false;
744                 }
745             }
746             return true;
747         }
748
749         /**
750          * Checks if the next token(s) in the parser matches this
751          * token sequence.
752          *
753          * @param parser the parser to check
754          * @param length the maximum number of tokens to check
755          *
756          * @return true if the next tokens are in the sequence, or
757          * false otherwise
758          */

759         public boolean isNext(Parser parser, int length) {
760             Token token;
761             Integer JavaDoc id;
762
763             if (length > tokens.size()) {
764                 length = tokens.size();
765             }
766             for (int i = 0; i < length; i++) {
767                 id = (Integer JavaDoc) tokens.get(i);
768                 token = parser.peekToken(i);
769                 if (token == null || token.getId() != id.intValue()) {
770                     return false;
771                 }
772             }
773             return true;
774         }
775
776         /**
777          * Returns a string representation of this object.
778          *
779          * @return a string representation of this object
780          */

781         public String JavaDoc toString() {
782             return toString(null);
783         }
784
785         /**
786          * Returns a string representation of this object.
787          *
788          * @param tokenizer the tokenizer containing the tokens
789          *
790          * @return a string representation of this object
791          */

792         public String JavaDoc toString(Tokenizer tokenizer) {
793             StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
794             String JavaDoc str;
795             Integer JavaDoc id;
796
797             if (tokenizer == null) {
798                 buffer.append(tokens.toString());
799             } else {
800                 buffer.append("[");
801                 for (int i = 0; i < tokens.size(); i++) {
802                     id = (Integer JavaDoc) tokens.get(i);
803                     str = tokenizer.getPatternDescription(id.intValue());
804                     if (i > 0) {
805                         buffer.append(" ");
806                     }
807                     buffer.append(str);
808                 }
809                 buffer.append("]");
810             }
811             if (repeat) {
812                 buffer.append(" *");
813             }
814             return buffer.toString();
815         }
816
817         /**
818          * Creates a new token sequence that is the concatenation of
819          * this sequence and another. A maximum length for the new
820          * sequence is also specified.
821          *
822          * @param length the maximum length of the result
823          * @param seq the other sequence
824          *
825          * @return the concatenated token sequence
826          */

827         public Sequence concat(int length, Sequence seq) {
828             Sequence res = new Sequence(length, this);
829
830             if (seq.repeat) {
831                 res.repeat = true;
832             }
833             length -= this.length();
834             if (length > seq.length()) {
835                 res.tokens.addAll(seq.tokens);
836             } else {
837                 for (int i = 0; i < length; i++) {
838                     res.tokens.add(seq.tokens.get(i));
839                 }
840             }
841             return res;
842         }
843
844         /**
845          * Creates a new token sequence that is a subsequence of this
846          * one.
847          *
848          * @param start the subsequence start position
849          *
850          * @return the new token subsequence
851          */

852         public Sequence subsequence(int start) {
853             Sequence res = new Sequence(length(), this);
854
855             while (start > 0 && res.tokens.size() > 0) {
856                 res.tokens.remove(0);
857                 start--;
858             }
859             return res;
860         }
861     }
862 }
863
Popular Tags