KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > ant > internal > ui > dtd > schema > NfmParser


1 /*******************************************************************************
2  * Copyright (c) 2002, 2005 Object Factory Inc.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * Object Factory Inc. - Initial implementation
10  *******************************************************************************/

11 package org.eclipse.ant.internal.ui.dtd.schema;
12
13 import com.ibm.icu.text.MessageFormat;
14 import java.util.ArrayList JavaDoc;
15 import java.util.HashMap JavaDoc;
16 import java.util.Iterator JavaDoc;
17 import java.util.List JavaDoc;
18
19 import org.eclipse.ant.internal.ui.dtd.ParseError;
20 import org.eclipse.ant.internal.ui.dtd.util.SortedMap;
21 import org.eclipse.ant.internal.ui.dtd.util.SortedMapFactory;
22
23 /**
24  * NfmParser parses an NFA and returns an equivalent DFA if it can do so in
25  * linear time and space (in terms of the original NFA).<p>
26  *
27  * As used here, NfmParser is called lazily when someone actually asks for a
28  * Dfm. This is for performance reasons. Why go to the work of calculating all
29  * those Dfms if nobody ever uses them?
30  *
31  * Well-formed errors in content models have already been detected. The only
32  * error that can arise in NfmParser is an ambiguity error.
33  *
34  * Bruggemann-Klein showed that if an NFA is 1-unambiguous, an epsilon-free NFA
35  * constructed from it in linear time is actually a DFA. This is obvious
36  * in NfmParser. The algorithm works by removing all ambiguous transitions as
37  * the graph is constructed, then proving that the reduced graph is equivalent
38  * to the original in time linear in the number of ambiguous transitions.
39  *
40  * An effort is made to keep the DFA small, but there is no optimization
41  * step, as DFAs are small anyway, with some linear inflation around *.
42  * In a pathological case, like the classical (a*,a*,a*,..., a*) the number
43  * of transitions in the DFA can be quadratic but this algorithm will not blow
44  * up exponentially.
45  *
46  * @author Bob Foster
47  */

48 public class NfmParser {
49     
50     public Dfm parse(Nfm nfm) throws ParseError {
51         
52         // Parse nfm into dfm
53

54         Dfm dfm = parseStart(nfm.getStart(), nfm.getStop());
55         
56         // Make list of dfms in graph
57

58         ArrayList JavaDoc dfms = new ArrayList JavaDoc();
59         collect(dfm, dfms);
60         
61         // Detect accept conflicts
62

63         HashMap JavaDoc duplicates = new HashMap JavaDoc();
64         detect(dfms, duplicates);
65         
66         // Replace duplicate dfms in graph
67

68         replace(dfms, duplicates);
69         
70         // Allow nfm memory to be re-used
71

72         Nfm.free(nfm);
73         NfmNode.freeAll();
74         
75         return dfm;
76     }
77     
78     private void reportError(String JavaDoc name) throws ParseError {
79         throw new ParseError(MessageFormat.format(AntDTDSchemaMessages.NfmParser_Ambiguous, new String JavaDoc[]{name}));
80     }
81
82     public static void collect(Dfm dfm, List JavaDoc dfms) {
83         dfms.add(dfm);
84         collect1(dfm, dfms);
85     }
86     
87     private static void collect1(Dfm dfm, List JavaDoc dfms) {
88         Object JavaDoc[] follows = dfm.getValues();
89         if (follows != null) {
90             for (int i = 0; i < follows.length; i++) {
91                 Dfm follow = (Dfm) follows[i];
92                 if (!dfms.contains(follow)) {
93                     dfms.add(follow);
94                     collect1(follow, dfms);
95                 }
96             }
97         }
98     }
99
100
101     /**
102      * Replace duplicate dfms found during conflict resolution.
103      */

104     private void replace(ArrayList JavaDoc dfms, HashMap JavaDoc removed) {
105         for (int i = 0; i < dfms.size(); i++) {
106             Dfm dfm = (Dfm) dfms.get(i);
107             Object JavaDoc[] follows = dfm.getValues();
108             if (follows != null) {
109                 for (int j = 0; j < follows.length; j++) {
110                     Dfm replacement, follow = (Dfm) follows[j];
111                     while ((replacement = (Dfm) removed.get(follow)) != null)
112                         follow = replacement;
113                     follows[j] = follow;
114                 }
115             }
116         }
117         
118         // release dfms so can be re-used
119
Iterator JavaDoc rit = removed.keySet().iterator();
120         while (rit.hasNext())
121             Dfm.free((Dfm)rit.next());
122     }
123
124
125     /**
126      * Detect conflicts in each state. Two transitions are a potential conflict
127      * if they accept the same string value. They are an actual conflict if
128      * their follow dfms are not identical and they are an actual ambiguity if
129      * the transition atoms of the follow dfms are not pairwise identical.
130      * This is derived from the rule of Bruggemann-Klein, which determines
131      * that (a|a)b is not ambiguous, but both (a,b)|(a,c) and (a,b)|(a,b) are.
132      * The latter might be surprising, but that's committee work for you.
133      * If two transitions are not ambiguous, one can be removed without
134      * affecting the language accepted, and thus we have converted our
135      * epsilon-free NFA into a DFA. If any two transitions are ambiguous,
136      * we report an error and our responsibility ends. Note that no transitions
137      * can be removed until all have been checked; this might disguise the
138      * ambiguity in, e.g., ((a|a),b,(a|a))*.
139      */

140     private void detect(ArrayList JavaDoc dfms, HashMap JavaDoc duplicates) throws ParseError {
141         for (Iterator JavaDoc iter = dfms.iterator(); iter.hasNext();) {
142             Dfm dfm = (Dfm) iter.next();
143             
144             Object JavaDoc[] accepts = dfm.getKeys();
145             Object JavaDoc[] follows = dfm.getValues();
146             if (accepts != null) {
147                 String JavaDoc last = null;
148                 for (int i = 0, lasti = -1; i < accepts.length; i++) {
149                     String JavaDoc accept = accepts[i].toString();
150                     // accepts strings are interned allowing identity comparison
151

152                     if (last != null && last == accept) {
153                         if (follows[i] != follows[lasti])
154                             checkConflict(new Conflict(accept, (Dfm)follows[lasti], (Dfm)follows[i]));
155                     }
156                     else {
157                         last = accept;
158                         lasti = i;
159                     }
160                 }
161             }
162         }
163         
164         // once more for removal
165

166         for (Iterator JavaDoc iter = dfms.iterator(); iter.hasNext();) {
167             Dfm dfm = (Dfm) iter.next();
168             
169             // record conflicts
170
Object JavaDoc[] accepts = dfm.getKeys();
171             Object JavaDoc[] follows = dfm.getValues();
172             boolean remove = false;
173             if (accepts != null) {
174                 boolean[] removes = new boolean[accepts.length];
175                 String JavaDoc last = null;
176                 for (int i = 0, lasti = -1; i < accepts.length; i++) {
177                     String JavaDoc accept = accepts[i].toString();
178                     
179                     if (last != null && last == accept) {
180                         remove = true;
181                         removes[i] = true;
182                         if (follows[i] != follows[lasti]) {
183                             Dfm dfmhi = (Dfm) follows[i];
184                             Dfm dfmlo = (Dfm) follows[lasti];
185                             if (dfmhi.id < dfmlo.id) {
186                                 Dfm tmp = dfmhi;
187                                 dfmhi = dfmlo;
188                                 dfmlo = tmp;
189                             }
190                             Dfm dup = (Dfm) duplicates.get(dfmhi);
191                             if (dup == null || dfmlo.id < dup.id) {
192                                 duplicates.put(dfmhi, dfmlo);
193                             } else {
194                                 duplicates.put(dfmlo, dup);
195                             }
196                         }
197                     }
198                     else {
199                         last = accept;
200                         lasti = i;
201                     }
202                 }
203             
204                 if (remove) {
205                     SortedMap map = dfm.getMap();
206                     int i = 0;
207                     for (Iterator JavaDoc iterator = map.keyIterator(); iterator.hasNext(); i++) {
208                         iterator.next();
209                         if (removes[i])
210                             iterator.remove();
211                     }
212                     SortedMapFactory.freeMap(map);
213                 }
214             }
215         }
216     }
217     
218     /**
219      * Check conflict and report ambiguity.
220      * @param conflict Potential ambiguity
221      */

222     private void checkConflict(Conflict conflict) throws ParseError {
223         if (conflict.dfm1.accepting != conflict.dfm2.accepting) {
224             reportError(conflict.name);
225         }
226         Object JavaDoc[] accept1 = conflict.dfm1.getKeys();
227         Object JavaDoc[] accept2 = conflict.dfm2.getKeys();
228         if ((accept1 == null) != (accept2 == null)) {
229             reportError(conflict.name);
230         }
231         if (accept1 != null) {
232             if (accept1.length != accept2.length) {
233                 reportError(conflict.name);
234             }
235             for (int j = 0; j < accept2.length; j++) {
236                 if (accept1[j] != accept2[j]) {
237                     reportError(conflict.name);
238                 }
239             }
240         }
241     }
242
243     /**
244      * Recursive parse that visits every node reachable from
245      * the start symbol.
246      */

247     private Dfm parseStart(NfmNode start, NfmNode accept) {
248         // mark the start node
249
Dfm result = Dfm.dfm(false);
250         start.dfm = result;
251
252         // we can minimize alias dfms by marking all starting transfer links
253
while (start.next1 != null && start.next2 == null && start.symbol == null) {
254             start = start.next1;
255             start.dfm = result;
256         }
257         
258         Dfm parsed = parse(1, start, accept);
259         result.merge(parsed);
260         
261         Dfm.free(parsed);
262         
263         return result;
264     }
265     
266     private void parseNext(int mark, Dfm result, NfmNode start, NfmNode accept) {
267         Dfm parsed = parse(mark+1, start, accept);
268         result.merge(parsed);
269         
270         Dfm.free(parsed);
271     }
272     
273     /**
274      * Recursive parse that visits every node reachable from
275      * the start symbol.
276      */

277     private Dfm parse(int mark, NfmNode start, NfmNode accept) {
278         
279         // eliminate useless recursion (note that accept node has no branches)
280
while (start.next1 != null && start.next2 == null && start.symbol == null)
281             start = start.next1;
282             
283         // if we reached the accept node, return an empty dfm that accepts
284
if (start == accept)
285             return Dfm.dfm(true);
286         
287         // for a symbol, construct a dfm that accepts the symbol
288
if (start.symbol != null) {
289             Dfm nextdfm = null;
290             NfmNode next = start.next1, snext = next;
291             while (snext.dfm == null && snext.next1 != null && snext.next2 == null && snext.symbol == null)
292                 snext = snext.next1;
293             if (snext.dfm != null) {
294                 for (NfmNode n = next; n != snext; n = n.next1)
295                     n.dfm = snext.dfm;
296                 nextdfm = snext.dfm;
297             }
298             else {
299                 nextdfm = Dfm.dfm(false);
300                 snext.dfm = nextdfm;
301                 for (NfmNode n = next; n != snext; n = n.next1)
302                     n.dfm = nextdfm;
303                 parseNext(mark, nextdfm, snext, accept);
304             }
305             Dfm dfm = Dfm.dfm(start.symbol, nextdfm);
306             return dfm;
307         }
308         
309         // otherwise, follow both branches and return the combined result
310
Dfm dfm1 = null, dfm2 = null;
311         int saveMark;
312         if (start.next1 != null && start.next1.mark != mark) {
313             saveMark = start.next1.mark;
314             start.next1.mark = mark;
315             dfm1 = parse(mark, start.next1, accept);
316             start.next1.mark = saveMark;
317         }
318         if (start.next2 != null && start.next2.mark != mark) {
319             saveMark = start.next2.mark;
320             start.next2.mark = mark;
321             dfm2 = parse(mark, start.next2, accept);
322             start.next2.mark = saveMark;
323         }
324             
325         if (dfm2 != null) {
326             if (dfm1 != null)
327                 dfm1.merge(dfm2);
328             else
329                 dfm1 = dfm2;
330         }
331         return dfm1;
332     }
333
334     private static class Conflict {
335         public String JavaDoc name;
336         public Dfm dfm1, dfm2;
337         public Conflict(String JavaDoc name, Dfm dfm1, Dfm dfm2) {
338             this.name = name;
339             this.dfm1 = dfm1;
340             this.dfm2 = dfm2;
341         }
342         public int hashCode() {
343             return dfm1.hashCode() + dfm2.hashCode();
344         }
345         public boolean equals(Object JavaDoc o) {
346             if (o == this)
347                 return true;
348             if (!(o instanceof Conflict))
349                 return false;
350             Conflict other = (Conflict) o;
351             return (dfm1 == other.dfm1 && dfm2 == other.dfm2)
352                 || (dfm1 == other.dfm2 && dfm2 == other.dfm1);
353         }
354     }
355     
356 }
357
Popular Tags