KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > sablecc > sablecc > alphabet > Alphabet


1 /* This file is part of SableCC ( http://sablecc.org ).
2  *
3  * Copyright 2007 Etienne M. Gagnon <egagnon@j-meg.com>
4  * Copyright 2007 Patrick Pelletier <pp.pelletier@gmail.com>
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 package org.sablecc.sablecc.alphabet;
20
21 import java.util.Collection JavaDoc;
22 import java.util.Collections JavaDoc;
23 import java.util.Iterator JavaDoc;
24 import java.util.LinkedHashMap JavaDoc;
25 import java.util.LinkedList JavaDoc;
26 import java.util.Map JavaDoc;
27 import java.util.SortedMap JavaDoc;
28 import java.util.SortedSet JavaDoc;
29 import java.util.TreeMap JavaDoc;
30 import java.util.TreeSet JavaDoc;
31
32 import org.sablecc.sablecc.exception.InternalException;
33
34 /**
35  * An alphabet is a set of symbols. Two symbols of an alphabet may not contain
36  * overlapping intervals.
37  */

38
39 public final class Alphabet<T extends Comparable JavaDoc<? super T>> {
40
41     /**
42      * The sorted set of symbols of this alphabet. Includes the complement
43      * symbol, if present.
44      */

45     private SortedSet JavaDoc<Symbol<T>> symbols;
46
47     /**
48      * The complement symbol of this alphabet. Is <code>null</code> when the
49      * alphabet does not contain a complement symbol.
50      */

51     private Symbol<T> complementSymbol;
52
53     /**
54      * A <code>SortedMap</code> that maps each interval contained in a symbol
55      * of this alphabet to its symbol.
56      * <p>
57      * Does not contain mappings for the complement symbol, if present.
58      */

59     private SortedMap JavaDoc<Interval<T>, Symbol<T>> intervalMap;
60
61     /**
62      * Cached string representation. Is <code>null</code> when not yet
63      * computed.
64      */

65     private String JavaDoc toString;
66
67     /**
68      * Constructs a new alphabet with the provided collection of symbols.
69      *
70      * @param symbols
71      * the collection of symbols.
72      * @throws InternalException
73      * if the collection of symbols is <code>null</code>.
74      */

75     public Alphabet(
76             Collection JavaDoc<Symbol<T>> symbols) {
77
78         if (symbols == null) {
79             throw new InternalException("symbols may not be null");
80         }
81
82         init(symbols);
83     }
84
85     /**
86      * Constructs a new alphabet with the provided symbol.
87      *
88      * @param symbol
89      * the symbol.
90      * @throws InternalException
91      * if the symbol is <code>null</code>.
92      */

93     public Alphabet(
94             Symbol<T> symbol) {
95
96         if (symbol == null) {
97             throw new InternalException("symbol may not be null");
98         }
99
100         Collection JavaDoc<Symbol<T>> symbols = new LinkedList JavaDoc<Symbol<T>>();
101         symbols.add(symbol);
102
103         init(symbols);
104     }
105
106     /**
107      * Constructs a new alphabet with the provided interval.
108      *
109      * @param interval
110      * the interval.
111      * @throws InternalException
112      * if the interval is <code>null</code>.
113      */

114     public Alphabet(
115             Interval<T> interval) {
116
117         if (interval == null) {
118             throw new InternalException("interval may not be null");
119         }
120
121         Collection JavaDoc<Symbol<T>> symbols = new LinkedList JavaDoc<Symbol<T>>();
122         symbols.add(new Symbol<T>(interval));
123
124         init(symbols);
125     }
126
127     /**
128      * Constructs an empty alphabet.
129      */

130     public Alphabet() {
131
132         init(new LinkedList JavaDoc<Symbol<T>>());
133     }
134
135     /**
136      * Initializes this alphabet using the provided collection of symbols. This
137      * method must be called by all constructors. It fills the
138      * <code>symbols</code> and <code>intervalMap</code> instance variables
139      * and detects overlapping intervals.
140      *
141      * @param symbols
142      * the collection of symbols.
143      * @throws InternalException
144      * if two distinct symbols have overlapping intervals, if a
145      * symbol is <code>null</code>, or if there are multiple
146      * complement symbols.
147      */

148     private void init(
149             Collection JavaDoc<Symbol<T>> symbols) {
150
151         this.symbols = Collections
152                 .unmodifiableSortedSet(new TreeSet JavaDoc<Symbol<T>>(symbols));
153
154         // compute interval map
155
TreeMap JavaDoc<Interval<T>, Symbol<T>> intervalMap = new TreeMap JavaDoc<Interval<T>, Symbol<T>>();
156
157         for (Symbol<T> symbol : symbols) {
158             if (symbol == null) {
159                 throw new InternalException("symbol may not be null");
160             }
161
162             if (symbol.isComplement()) {
163
164                 if (this.complementSymbol != null) {
165                     throw new InternalException(
166                             "alphabet may not contain multiple complement symbols");
167                 }
168
169                 this.complementSymbol = symbol;
170                 continue;
171             }
172
173             for (Interval<T> interval : symbol.getIntervals()) {
174                 if (intervalMap.put(interval, symbol) != null) {
175                     throw new InternalException(
176                             "distinct symbols may not have overlapping intervals");
177                 }
178             }
179         }
180
181         this.intervalMap = Collections.unmodifiableSortedMap(intervalMap);
182
183         // check for overlapping intervals
184
Interval<T> previous = null;
185         for (Interval<T> current : this.intervalMap.keySet()) {
186             if (previous != null && previous.intersects(current)) {
187                 throw new InternalException(
188                         "distinct symbols may not have overlapping intervals");
189             }
190
191             previous = current;
192         }
193     }
194
195     /**
196      * Returns the set of symbols of this alphabet.
197      *
198      * @return the set of symbols.
199      */

200     public SortedSet JavaDoc<Symbol<T>> getSymbols() {
201
202         return this.symbols;
203     }
204
205     /**
206      * Returns the complement symbol of this alphabet.
207      *
208      * @return the complement symbol.
209      * @throws InternalException
210      * if this alphabet does not contain a complement symbol.
211      */

212     public Symbol<T> getComplementSymbol() {
213
214         if (this.complementSymbol == null) {
215             throw new InternalException(
216                     "this alphabet does not contain a complement symbol");
217         }
218
219         return this.complementSymbol;
220     }
221
222     /**
223      * Returns the interval map of this alphabet. The <code>SortedMap</code>
224      * maps each interval contained in a symbol of this alphabet to its symbol.
225      * <p>
226      * The interval map does not contain mappings to the complement symbol, if
227      * present in this alphabet.
228      *
229      * @return the interval map.
230      */

231     public SortedMap JavaDoc<Interval<T>, Symbol<T>> getIntervalMap() {
232
233         return this.intervalMap;
234     }
235
236     /**
237      * Returns the string representation of this alphabet.
238      *
239      * @return the string representation.
240      */

241     @Override JavaDoc
242     public String JavaDoc toString() {
243
244         if (this.toString == null) {
245             StringBuilder JavaDoc sb = new StringBuilder JavaDoc();
246
247             sb.append("Alphabet:{ ");
248
249             boolean first = true;
250             for (Symbol<T> symbol : this.symbols) {
251                 if (first) {
252                     first = false;
253                 }
254                 else {
255                     sb.append(", ");
256                 }
257
258                 sb.append(symbol);
259             }
260
261             sb.append(" }");
262
263             this.toString = sb.toString();
264         }
265
266         return this.toString;
267     }
268
269     /**
270      * Returns whether this alphabet contains a complement symbol.
271      *
272      * @return <code>true</code> if this alphabet contains a complement
273      * symbol.
274      */

275     public boolean containsComplementSymbol() {
276
277         return this.complementSymbol != null;
278     }
279
280     /**
281      * Merges this alphabet with the provided one.
282      * <p>
283      * Merging two alphabets <code>A</code> and <code>B</code> consists of
284      * creating a new alphabet <code>C</code> containing a minimal number of
285      * symbols, with the following property: For every symbol <code>x</code>
286      * element of <code>(A union B)</code>, there exists a corresponding
287      * subset <code>S</code> of <code>C</code>, such that:
288      * <code>merge(S) == x</code>.
289      *
290      * @param alphabet
291      * the alphabet to merge this one with.
292      * @return an instance of <code>AlphabetMergeResult</code> containing the
293      * merge result.
294      * @throws InternalException
295      * if the provided alphabet is <code>null</code>.
296      */

297     public AlphabetMergeResult<T> mergeWith(
298             Alphabet<T> alphabet) {
299
300         if (alphabet == null) {
301             throw new InternalException("alphabet may not be null");
302         }
303
304         // no need to really merge, when merging with self
305
if (alphabet == this) {
306             return new AlphabetMergeResult<T>(this);
307         }
308
309         /*
310          * In theoretical terms, an alphabet is a set of symbols.
311          *
312          * Merging two alphabets A and B consists of creating a new alphabet C
313          * containing a minimal number of symbols, with the following property:
314          *
315          * For every symbol x element of (A union B), there exists a
316          * corresponding subset S of C, such that: merge(S) == x.
317          *
318          * As a direct consequence, every new symbol w element of C is related
319          * to a pair (x,y) where x is element of (A union {null}) and y is
320          * element of (B union {null}).
321          *
322          * Our algorithm proceeds by finding these pairs to identify the symbols
323          * of the new alphabet.
324          *
325          */

326
327         // First, we compute a map of (symbol pair,interval set)
328
Map JavaDoc<SymbolPair<T>, SortedSet JavaDoc<Interval<T>>> symbolPairIntervalSetMap = computeSymbolPairIntervalSetMap(
329                 this, alphabet);
330
331         // list of new symbols
332
Collection JavaDoc<Symbol<T>> newSymbols = new LinkedList JavaDoc<Symbol<T>>();
333
334         // SortedMaps to map old symbols to sets of new symbols
335
SortedMap JavaDoc<Symbol<T>, SortedSet JavaDoc<Symbol<T>>> alphabet1SymbolMap = new TreeMap JavaDoc<Symbol<T>, SortedSet JavaDoc<Symbol<T>>>();
336         SortedMap JavaDoc<Symbol<T>, SortedSet JavaDoc<Symbol<T>>> alphabet2SymbolMap = new TreeMap JavaDoc<Symbol<T>, SortedSet JavaDoc<Symbol<T>>>();
337
338         // if either alphabets contains a complement symbol, the new alphabet
339
// will contain one.
340
Symbol<T> complementSymbol;
341
342         if (this.containsComplementSymbol()
343                 || alphabet.containsComplementSymbol()) {
344             complementSymbol = new Symbol<T>();
345
346             newSymbols.add(complementSymbol);
347
348             if (this.containsComplementSymbol()) {
349
350                 SortedSet JavaDoc<Symbol<T>> collection = new TreeSet JavaDoc<Symbol<T>>();
351                 collection.add(complementSymbol);
352                 alphabet1SymbolMap.put(this.complementSymbol, collection);
353             }
354
355             if (alphabet.containsComplementSymbol()) {
356
357                 SortedSet JavaDoc<Symbol<T>> collection = new TreeSet JavaDoc<Symbol<T>>();
358                 collection.add(complementSymbol);
359                 alphabet2SymbolMap.put(alphabet.complementSymbol, collection);
360             }
361         }
362         else {
363             complementSymbol = null;
364         }
365
366         for (Map.Entry JavaDoc<SymbolPair<T>, SortedSet JavaDoc<Interval<T>>> entry : symbolPairIntervalSetMap
367                 .entrySet()) {
368
369             Symbol<T> oldSymbol1 = entry.getKey().getSymbol1();
370             Symbol<T> oldSymbol2 = entry.getKey().getSymbol2();
371
372             // if no old (non-complement) symbol matches, don't create a symbol
373
if (oldSymbol1 == null && oldSymbol2 == null) {
374                 continue;
375             }
376
377             // we can make a new symbol that relates to the pair
378
Symbol<T> newSymbol = new Symbol<T>(entry.getValue());
379
380             newSymbols.add(newSymbol);
381
382             // we add the associations in the (old symol, set of new symbols)
383
// maps
384

385             if (oldSymbol1 != null) {
386                 SortedSet JavaDoc<Symbol<T>> collection = alphabet1SymbolMap
387                         .get(oldSymbol1);
388
389                 if (collection == null) {
390                     collection = new TreeSet JavaDoc<Symbol<T>>();
391                     alphabet1SymbolMap.put(oldSymbol1, collection);
392                 }
393
394                 collection.add(newSymbol);
395             }
396             else if (this.containsComplementSymbol()) {
397                 SortedSet JavaDoc<Symbol<T>> collection = alphabet1SymbolMap
398                         .get(this.complementSymbol);
399                 collection.add(newSymbol);
400             }
401
402             if (oldSymbol2 != null) {
403                 SortedSet JavaDoc<Symbol<T>> collection = alphabet2SymbolMap
404                         .get(oldSymbol2);
405
406                 if (collection == null) {
407                     collection = new TreeSet JavaDoc<Symbol<T>>();
408                     alphabet2SymbolMap.put(oldSymbol2, collection);
409                 }
410
411                 collection.add(newSymbol);
412             }
413             else if (alphabet.containsComplementSymbol()) {
414                 SortedSet JavaDoc<Symbol<T>> collection = alphabet2SymbolMap
415                         .get(alphabet.complementSymbol);
416                 collection.add(newSymbol);
417             }
418         }
419
420         return new AlphabetMergeResult<T>(new Alphabet<T>(newSymbols), this,
421                 alphabet1SymbolMap, alphabet, alphabet2SymbolMap);
422     }
423
424     /**
425      * Computes a <code>Map</code> that maps each symbol pair
426      * <code>(x,y)</code> to a set of shared intervals, where <code>x</code>
427      * is a symbol of <code>alphabet1</code> or <code>null</code>, and
428      * <code>y</code> is a symbol of <code>alphabet2</code> or
429      * <code>null</code>.
430      * <p>
431      * This methods does not explicitly take into account complement symbols,
432      * but a <code>null</code> symbol, in a symbol pair, represents complement
433      * symbol.
434      * <p>
435      * The particular property of this implementation is that it does so in
436      * linear time by only mapping pairs that have a non-empty shared interval
437      * set. The intuitive algorithm would have analyzed all possible pairs,
438      * leading to quadratic running time.
439      *
440      * @param alphabet1
441      * the first alphabet.
442      * @param alphabet2
443      * the second alphabet.
444      * @return the (symbol pair,interval set) map.
445      */

446     private static <T extends Comparable JavaDoc<? super T>> Map JavaDoc<SymbolPair<T>, SortedSet JavaDoc<Interval<T>>> computeSymbolPairIntervalSetMap(
447             Alphabet<T> alphabet1,
448             Alphabet<T> alphabet2) {
449
450         Map JavaDoc<SymbolPair<T>, SortedSet JavaDoc<Interval<T>>> symbolPairIntervalSetMap = new LinkedHashMap JavaDoc<SymbolPair<T>, SortedSet JavaDoc<Interval<T>>>();
451
452         /*
453          * We find all intervals of new symbols by analyzing the space starting
454          * from the smallest lower bound of an interval to the highest upper
455          * bound.
456          */

457
458         // currently analyzed sorted map entries
459
Map.Entry JavaDoc<Interval<T>, Symbol<T>> entry1 = null;
460         Map.Entry JavaDoc<Interval<T>, Symbol<T>> entry2 = null;
461
462         // iterators
463
Iterator JavaDoc<Map.Entry JavaDoc<Interval<T>, Symbol<T>>> i1 = alphabet1.intervalMap
464                 .entrySet().iterator();
465         Iterator JavaDoc<Map.Entry JavaDoc<Interval<T>, Symbol<T>>> i2 = alphabet2.intervalMap
466                 .entrySet().iterator();
467
468         AdjacencyRealm<T> adjacencyRealm = null;
469         T lastUpperBound = null;
470
471         while (entry1 != null || entry2 != null || i1.hasNext() || i2.hasNext()) {
472             // if possible, make sure that entry1 and entry2 are filled
473
if (entry1 == null && i1.hasNext()) {
474                 entry1 = i1.next();
475             }
476
477             if (entry2 == null && i2.hasNext()) {
478                 entry2 = i2.next();
479             }
480
481             // Compute the lower bound of the new interval
482
T lowerBound;
483
484             if (lastUpperBound == null) {
485                 // On the first iteration we need to apply a special treatment
486

487                 // We get the adjacencyRealm
488
if (entry1 != null) {
489                     adjacencyRealm = entry1.getKey().getAdjacencyRealm();
490                 }
491                 else {
492                     adjacencyRealm = entry2.getKey().getAdjacencyRealm();
493                 }
494
495                 // We pick the smallest lower bound
496
if (entry1 == null) {
497                     lowerBound = entry2.getKey().getLowerBound();
498                 }
499                 else if (entry2 == null) {
500                     lowerBound = entry1.getKey().getLowerBound();
501                 }
502                 else {
503                     lowerBound = AdjacencyRealm.min(entry1.getKey()
504                             .getLowerBound(), entry2.getKey().getLowerBound());
505                 }
506             }
507             else {
508                 lowerBound = adjacencyRealm.next(lastUpperBound);
509             }
510
511             // compute the upper bound of the new interval
512
T upperBound;
513
514             {
515                 T upperBoundCandidate1 = null;
516                 T upperBoundCandidate2 = null;
517
518                 if (entry1 != null) {
519                     if (lowerBound.compareTo(entry1.getKey().getLowerBound()) < 0) {
520                         upperBoundCandidate1 = adjacencyRealm.previous(entry1
521                                 .getKey().getLowerBound());
522                     }
523                     else {
524                         upperBoundCandidate1 = entry1.getKey().getUpperBound();
525                     }
526                 }
527
528                 if (entry2 != null) {
529                     if (lowerBound.compareTo(entry2.getKey().getLowerBound()) < 0) {
530                         upperBoundCandidate2 = adjacencyRealm.previous(entry2
531                                 .getKey().getLowerBound());
532                     }
533                     else {
534                         upperBoundCandidate2 = entry2.getKey().getUpperBound();
535                     }
536                 }
537
538                 if (upperBoundCandidate1 == null) {
539                     upperBound = upperBoundCandidate2;
540                 }
541                 else if (upperBoundCandidate2 == null) {
542                     upperBound = upperBoundCandidate1;
543                 }
544                 else {
545                     upperBound = AdjacencyRealm.min(upperBoundCandidate1,
546                             upperBoundCandidate2);
547                 }
548             }
549
550             // create new interval, and related symbol pair
551
Interval<T> newInterval = adjacencyRealm.createInterval(lowerBound,
552                     upperBound);
553
554             Symbol<T> symbol1;
555             if (entry1 != null && newInterval.intersects(entry1.getKey())) {
556                 symbol1 = entry1.getValue();
557             }
558             else {
559                 symbol1 = null;
560             }
561
562             Symbol<T> symbol2;
563             if (entry2 != null && newInterval.intersects(entry2.getKey())) {
564                 symbol2 = entry2.getValue();
565             }
566             else {
567                 symbol2 = null;
568             }
569
570             SymbolPair<T> symbolPair = new SymbolPair<T>(symbol1, symbol2);
571
572             // add interval in (symbol pair,interval set) map
573
SortedSet JavaDoc<Interval<T>> intervalSet = symbolPairIntervalSetMap
574                     .get(symbolPair);
575             if (intervalSet == null) {
576                 intervalSet = new TreeSet JavaDoc<Interval<T>>();
577                 symbolPairIntervalSetMap.put(symbolPair, intervalSet);
578             }
579
580             intervalSet.add(newInterval);
581
582             // save last upper bound
583
lastUpperBound = upperBound;
584
585             // update current entries
586
if (entry1 != null
587                     && lastUpperBound
588                             .compareTo(entry1.getKey().getUpperBound()) >= 0) {
589                 entry1 = null;
590             }
591             if (entry2 != null
592                     && lastUpperBound
593                             .compareTo(entry2.getKey().getUpperBound()) >= 0) {
594                 entry2 = null;
595             }
596         }
597
598         return symbolPairIntervalSetMap;
599     }
600 }
601
Popular Tags