KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > prefuse > data > search > Trie


1 package prefuse.data.search;
2
3 import java.util.Iterator JavaDoc;
4 import java.util.LinkedList JavaDoc;
5 import java.util.NoSuchElementException JavaDoc;
6
7 import prefuse.data.Tuple;
8
9
10 /**
11  * A trie data structure for fast-lookup of words based on their
12  * prefixes. The name "Trie" is a play on the words "tree" and
13  * "retrieval". This class builds a tree structure representing a set of
14  * words by their prefixes. It is useful for performing prefix-based
15  * searches over large amounts of text in an efficient manner.
16  *
17  * @version 1.0
18  * @author <a HREF="http://jheer.org">jeffrey heer</a>
19  * @see PrefixSearchTupleSet
20  */

21 public class Trie {
22
23     /**
24      * Base class for nodes in the trie structure.
25      */

26     public class TrieNode {
27         boolean isLeaf;
28         int leafCount = 0;
29     }
30     
31     /**
32      * A TrieNode implementation representing a branch in the tree. The
33      * class maintains a list of characters (the next character in the
34      * prefix) and associated children TrieNodes for each.
35      */

36     public class TrieBranch extends TrieNode {
37         char[] chars = new char[] {0};
38         TrieNode[] children = new TrieNode[1];
39     }
40     
41     /**
42      * A TrieNode implementation representing a leaf in the tree. The class
43      * stores the word and tuple for the leaf, as well as a reference to the
44      * successor leaf node in the trie.
45      */

46     public class TrieLeaf extends TrieNode {
47         public TrieLeaf(String JavaDoc word, Tuple t) {
48             this.word = word;
49             tuple = t;
50             next = null;
51             leafCount = 1;
52         }
53         String JavaDoc word;
54         Tuple tuple;
55         TrieLeaf next;
56     }
57     
58     /**
59      * An iterator for traversing a subtree of the Trie.
60      */

61     public class TrieIterator implements Iterator JavaDoc {
62         private LinkedList JavaDoc queue;
63         public TrieIterator(TrieNode node) {
64             queue = new LinkedList JavaDoc();
65             queue.add(node);
66         }
67         public boolean hasNext() {
68             return !queue.isEmpty();
69         }
70         public Object JavaDoc next() {
71             if ( queue.isEmpty() )
72                 throw new NoSuchElementException JavaDoc();
73             
74             TrieNode n = (TrieNode)queue.removeFirst();
75             Object JavaDoc o;
76             if ( n instanceof TrieLeaf ) {
77                 TrieLeaf l = (TrieLeaf)n;
78                 o = l.tuple;
79                 if ( l.next != null )
80                     queue.addFirst(l.next);
81                 return o;
82             } else {
83                 TrieBranch b = (TrieBranch)n;
84                 for ( int i = b.chars.length-1; i > 0; i-- ) {
85                     queue.addFirst(b.children[i]);
86                 }
87                 if ( b.children[0] != null )
88                     queue.addFirst(b.children[0]);
89                 return next();
90             }
91         }
92         public void remove() {
93             throw new UnsupportedOperationException JavaDoc();
94         }
95     } // end of inner clas TrieIterator
96

97     private TrieBranch root = new TrieBranch();
98     private boolean caseSensitive = false;
99     
100     /**
101      * Create a new Trie with the specified case-sensitivity.
102      * @param caseSensitive true if the index should be case sensitive for
103      * indexed words, false otherwise.
104      */

105     public Trie(boolean caseSensitive) {
106         this.caseSensitive = caseSensitive;
107     }
108     
109     /**
110      * Indicates if this Trie's index takes the case of letters
111      * into account.
112      * @return true if the index is case-sensitive, false otherwise
113      */

114     public boolean isCaseSensitive() {
115         return caseSensitive;
116     }
117     
118     /**
119      * Add a new word to the trie, associated with the given Tuple.
120      * @param word the word to add to the Trie
121      * @param t the Tuple associated with the word
122      */

123     public void addString(String JavaDoc word, Tuple t) {
124         TrieLeaf leaf = new TrieLeaf(word,t);
125         addLeaf(root, leaf, 0);
126     }
127     
128     /**
129      * Remove a word/Tuple pair from the trie.
130      * @param word the word to remove
131      * @param t the associate Tuple to remove
132      */

133     public void removeString(String JavaDoc word, Tuple t) {
134         removeLeaf(root, word, t, 0);
135     }
136     
137     private final int getIndex(char[] chars, char c) {
138         for ( int i=0; i<chars.length; i++ )
139             if ( chars[i] == c ) return i;
140         return -1;
141     }
142     
143     private final char getChar(String JavaDoc s, int i) {
144         char c = ( i < 0 || i >= s.length() ? 0 : s.charAt(i) );
145         return ( caseSensitive ? c : Character.toLowerCase(c) );
146     }
147     
148     private final TrieNode equalityCheck(String JavaDoc word, TrieLeaf l) {
149         if ( caseSensitive ) {
150             return l.word.startsWith(word) ? l : null;
151         } else {
152             // do our own looping to avoid string allocation for case change
153
int len = word.length();
154             if ( len > l.word.length() ) return null;
155             for ( int i=0; i<len; ++i ) {
156                 char c1 = Character.toLowerCase(word.charAt(i));
157                 char c2 = Character.toLowerCase(l.word.charAt(i));
158                 if ( c1 != c2 ) return null;
159             }
160             return l;
161         }
162     }
163     
164     private boolean removeLeaf(TrieBranch b, String JavaDoc word, Tuple t, int depth) {
165         char c = getChar(word, depth);
166         int i = getIndex(b.chars, c);
167         
168         if ( i == -1 ) {
169             // couldn't find leaf
170
return false;
171         } else {
172             TrieNode n = b.children[i];
173             if ( n instanceof TrieBranch ) {
174                 TrieBranch tb = (TrieBranch)n;
175                 boolean rem = removeLeaf(tb, word, t, depth+1);
176                 if ( rem ) {
177                     b.leafCount--;
178                     if ( tb.leafCount == 1 )
179                         b.children[i] = tb.children[tb.children[0]!=null?0:1];
180                 }
181                 return rem;
182             } else {
183                 TrieLeaf nl = (TrieLeaf)n;
184                 if ( nl.tuple == t ) {
185                     b.children[i] = nl.next;
186                     if ( nl.next == null )
187                         repairBranch(b,i);
188                     b.leafCount--;
189                     return true;
190                 } else {
191                     TrieLeaf nnl = nl.next;
192                     while ( nnl != null && nnl.tuple != t ) {
193                         nl = nnl; nnl = nnl.next;
194                     }
195                     if ( nnl == null )
196                         return false; // couldn't find leaf
197

198                     // update leaf counts
199
for ( TrieLeaf tl = (TrieLeaf)n; tl.tuple != t; tl = tl.next )
200                         tl.leafCount--;
201                     
202                     nl.next = nnl.next;
203                     b.leafCount--;
204                     return true;
205                 }
206             }
207         }
208     }
209     
210     private void repairBranch(TrieBranch b, int i) {
211         if ( i == 0 ) {
212             b.children[0] = null;
213         } else {
214             int len = b.chars.length;
215             char[] nchars = new char[len-1];
216             TrieNode[] nkids = new TrieNode[len-1];
217             System.arraycopy(b.chars,0,nchars,0,i);
218             System.arraycopy(b.children,0,nkids,0,i);
219             System.arraycopy(b.chars,i+1,nchars,i,len-i-1);
220             System.arraycopy(b.children,i+1,nkids,i,len-i-1);
221             b.chars = nchars;
222             b.children = nkids;
223         }
224     }
225     
226     private void addLeaf(TrieBranch b, TrieLeaf l, int depth) {
227         b.leafCount += l.leafCount;
228         
229         char c = getChar(l.word, depth);
230         int i = getIndex(b.chars, c);
231         
232         if ( i == -1 ) {
233             addChild(b,l,c);
234         } else {
235             TrieNode n = b.children[i];
236             if ( n == null ) {
237                 // we have completely spelled out the word
238
b.children[i] = l;
239             } else if ( n instanceof TrieBranch ) {
240                 // recurse down the tree
241
addLeaf((TrieBranch)n,l,depth+1);
242             } else {
243                 // node is a leaf, need to do a split?
244
TrieLeaf nl = (TrieLeaf)n;
245                 if ( i==0 || (caseSensitive ? nl.word.equals(l.word)
246                                   : nl.word.equalsIgnoreCase(l.word)) )
247                 {
248                     // same word, so chain the entries
249
for ( ; nl.next != null; nl = nl.next )
250                         nl.leafCount++;
251                     nl.leafCount++;
252                     nl.next = l;
253                 } else {
254                     // different words, need to do a split
255
TrieBranch nb = new TrieBranch();
256                     b.children[i] = nb;
257                     addLeaf(nb,nl,depth+1);
258                     addLeaf(nb,l,depth+1);
259                 }
260             }
261         }
262     }
263     
264     private void addChild(TrieBranch b, TrieNode n, char c) {
265         int len = b.chars.length;
266         char[] nchars = new char[len+1];
267         TrieNode[] nkids = new TrieNode[len+1];
268         System.arraycopy(b.chars,0,nchars,0,len);
269         System.arraycopy(b.children,0,nkids,0,len);
270         nchars[len] = c;
271         nkids[len] = n;
272         b.chars = nchars;
273         b.children = nkids;
274     }
275     
276     /**
277      * Look up the given word in this Trie. If a match is found, a TrieNode
278      * is returned. This node is the root of a subtree containing all the
279      * matches to the query.
280      * @param word the word to lookup
281      * @return the TrieNode root of the subtree containing all matches. A
282      * null value is returned if no match is found.
283      */

284     public TrieNode find(String JavaDoc word) {
285         return (word.length() < 1 ? null : find(word, root, 0));
286     }
287     
288     private TrieNode find(String JavaDoc word, TrieBranch b, int depth) {
289         char c = getChar(word, depth);
290         int i = getIndex(b.chars, c);
291         if ( i == -1 ) {
292             return null; // not in trie
293
} else if ( word.length()-1 == depth ) {
294             return b.children[i]; // end of search
295
} else if ( b.children[i] instanceof TrieLeaf ) {
296             return equalityCheck(word, (TrieLeaf)b.children[i]);
297         } else {
298             return find(word, (TrieBranch)b.children[i], depth+1); // recurse
299
}
300     }
301     
302 } // end of class Trie
303
Popular Tags