1 2 3 4 package net.nutch.util; 5 6 7 import java.util.Arrays ; 8 import java.util.LinkedList ; 9 import java.util.ListIterator ; 10 11 16 public abstract class TrieStringMatcher { 17 protected TrieNode root; 18 19 protected TrieStringMatcher() { 20 this.root= new TrieNode('\000', false); 21 } 22 23 26 protected class TrieNode implements Comparable { 27 protected TrieNode[] children; 28 protected LinkedList childrenList; 29 protected char nodeChar; 30 protected boolean terminal; 31 32 38 TrieNode(char nodeChar, boolean isTerminal) { 39 this.nodeChar= nodeChar; 40 this.terminal= isTerminal; 41 this.childrenList= new LinkedList (); 42 } 43 44 48 boolean isTerminal() { 49 return terminal; 50 } 51 52 58 TrieNode getChildAddIfNotPresent(char nextChar, boolean isTerminal) { 59 if (childrenList == null) { 60 childrenList= new LinkedList (); 61 childrenList.addAll(Arrays.asList(children)); 62 children= null; 63 } 64 65 if (childrenList.size() == 0) { 66 TrieNode newNode= new TrieNode(nextChar, isTerminal); 67 childrenList.add(newNode); 68 return newNode; 69 } 70 71 ListIterator iter= childrenList.listIterator(); 72 TrieNode node= (TrieNode) iter.next(); 73 while ( (node.nodeChar < nextChar) && iter.hasNext() ) 74 node= (TrieNode) iter.next(); 75 76 if (node.nodeChar == nextChar) { 77 node.terminal= node.terminal | isTerminal; 78 return node; 79 } 80 81 if (node.nodeChar > nextChar) 82 iter.previous(); 83 84 TrieNode newNode= new TrieNode(nextChar, isTerminal); 85 iter.add(newNode); 86 return newNode; 87 } 88 89 94 TrieNode getChild(char nextChar) { 95 if (children == null) { 96 children= (TrieNode[]) 97 childrenList.toArray(new TrieNode[childrenList.size()]); 98 childrenList= null; 99 Arrays.sort(children); 100 } 101 102 int min= 0; 103 int max= children.length - 1; 104 int mid= 0; 105 while (min < max) { 106 mid= (min + max) / 2; 107 if (children[mid].nodeChar == nextChar) 108 return children[mid]; 109 if (children[mid].nodeChar < nextChar) 110 min= mid + 1; 111 else max= mid - 1; 113 } 114 115 if (min == max) 116 if (children[min].nodeChar == nextChar) 117 return children[min]; 118 119 return null; 120 } 121 122 public int compareTo(Object o) { 123 TrieNode other= (TrieNode) o; 124 if (this.nodeChar < other.nodeChar) 125 return -1; 126 if (this.nodeChar == other.nodeChar) 127 return 0; 128 return 1; 130 } 131 } 132 133 138 protected final TrieNode matchChar(TrieNode node, String s, int idx) { 139 return node.getChild(s.charAt(idx)); 140 } 141 142 148 protected final void addPatternForward(String s) { 149 TrieNode node= root; 150 int stop= s.length() - 1; 151 int i; 152 if (s.length() > 0) { 153 for (i= 0; i < stop; i++) 154 node= node.getChildAddIfNotPresent(s.charAt(i), false); 155 node= node.getChildAddIfNotPresent(s.charAt(i), true); 156 } 157 } 158 159 165 protected final void addPatternBackward(String s) { 166 TrieNode node= root; 167 if (s.length() > 0) { 168 for (int i= s.length()-1; i > 0; i--) 169 node= node.getChildAddIfNotPresent(s.charAt(i), false); 170 node= node.getChildAddIfNotPresent(s.charAt(0), true); 171 } 172 } 173 174 178 public abstract boolean matches(String input); 179 180 185 public abstract String shortestMatch(String input); 186 187 192 public abstract String longestMatch(String input); 193 194 } 195 | Popular Tags |