KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > jdt > internal > ui > text > spelling > engine > AbstractSpellDictionary


1 /*******************************************************************************
2  * Copyright (c) 2000, 2007 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11
12 package org.eclipse.jdt.internal.ui.text.spelling.engine;
13
14 import java.io.BufferedReader JavaDoc;
15 import java.io.FileNotFoundException JavaDoc;
16 import java.io.IOException JavaDoc;
17 import java.io.InputStream JavaDoc;
18 import java.io.InputStreamReader JavaDoc;
19 import java.net.MalformedURLException JavaDoc;
20 import java.net.URL JavaDoc;
21 import java.nio.charset.Charset JavaDoc;
22 import java.nio.charset.CharsetDecoder JavaDoc;
23 import java.nio.charset.CodingErrorAction JavaDoc;
24 import java.nio.charset.MalformedInputException JavaDoc;
25 import java.util.ArrayList JavaDoc;
26 import java.util.HashMap JavaDoc;
27 import java.util.HashSet JavaDoc;
28 import java.util.Iterator JavaDoc;
29 import java.util.Map JavaDoc;
30 import java.util.Set JavaDoc;
31
32 import org.eclipse.core.runtime.IStatus;
33 import org.eclipse.core.runtime.Status;
34
35 import org.eclipse.core.resources.ResourcesPlugin;
36
37 import org.eclipse.jdt.internal.corext.util.Messages;
38
39 import org.eclipse.jdt.ui.JavaUI;
40 import org.eclipse.jdt.ui.PreferenceConstants;
41
42 import org.eclipse.jdt.internal.ui.JavaPlugin;
43 import org.eclipse.jdt.internal.ui.JavaUIMessages;
44
45 /**
46  * Partial implementation of a spell dictionary.
47  *
48  * @since 3.0
49  */

50 public abstract class AbstractSpellDictionary implements ISpellDictionary {
51
52     /** The bucket capacity */
53     protected static final int BUCKET_CAPACITY= 4;
54
55     /** The word buffer capacity */
56     protected static final int BUFFER_CAPACITY= 32;
57
58     /** The distance threshold */
59     protected static final int DISTANCE_THRESHOLD= 160;
60
61     /** The hash capacity */
62     protected static final int HASH_CAPACITY= 22 * 1024;
63
64     /** The phonetic distance algorithm */
65     private IPhoneticDistanceAlgorithm fDistanceAlgorithm= new DefaultPhoneticDistanceAlgorithm();
66
67     /** The mapping from phonetic hashes to word lists */
68     private final Map JavaDoc fHashBuckets= new HashMap JavaDoc(HASH_CAPACITY);
69
70     /** The phonetic hash provider */
71     private IPhoneticHashProvider fHashProvider= new DefaultPhoneticHashProvider();
72
73     /** Is the dictionary already loaded? */
74     private boolean fLoaded= false;
75     /**
76      * Must the dictionary be loaded?
77      * @since 3.2
78      */

79     private boolean fMustLoad= true;
80
81     /**
82      * Tells whether to strip non-letters at word boundaries.
83      * @since 3.3
84      */

85     boolean fIsStrippingNonLetters= true;
86
87     /**
88      * Returns all candidates with the same phonetic hash.
89      *
90      * @param hash
91      * The hash to retrieve the candidates of
92      * @return Array of candidates for the phonetic hash
93      */

94     protected final Object JavaDoc getCandidates(final String JavaDoc hash) {
95         return fHashBuckets.get(hash);
96     }
97
98     /**
99      * Returns all candidates that have a phonetic hash within a bounded
100      * distance to the specified word.
101      *
102      * @param word
103      * The word to find the nearest matches for
104      * @param sentence
105      * <code>true</code> iff the proposals start a new sentence,
106      * <code>false</code> otherwise
107      * @param hashs
108      * Array of close hashes to find the matches
109      * @return Set of ranked words with bounded distance to the specified word
110      */

111     protected final Set JavaDoc getCandidates(final String JavaDoc word, final boolean sentence, final ArrayList JavaDoc hashs) {
112
113         int distance= 0;
114         String JavaDoc hash= null;
115
116         final StringBuffer JavaDoc buffer= new StringBuffer JavaDoc(BUFFER_CAPACITY);
117         final HashSet JavaDoc result= new HashSet JavaDoc(BUCKET_CAPACITY * hashs.size());
118
119         for (int index= 0; index < hashs.size(); index++) {
120
121             hash= (String JavaDoc)hashs.get(index);
122
123             final Object JavaDoc candidates= getCandidates(hash);
124             if (candidates == null)
125                 continue;
126             else if (candidates instanceof String JavaDoc) {
127                 String JavaDoc candidate= (String JavaDoc)candidates;
128                 distance= fDistanceAlgorithm.getDistance(word, candidate);
129                 if (distance < DISTANCE_THRESHOLD) {
130                     buffer.setLength(0);
131                     buffer.append(candidate);
132                     if (sentence)
133                         buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
134                     result.add(new RankedWordProposal(buffer.toString(), -distance));
135                 }
136                 continue;
137             }
138
139             final ArrayList JavaDoc candidateList= (ArrayList JavaDoc)candidates;
140             for (int offset= 0; offset < candidateList.size(); offset++) {
141
142                 String JavaDoc candidate= (String JavaDoc)candidateList.get(offset);
143                 distance= fDistanceAlgorithm.getDistance(word, candidate);
144
145                 if (distance < DISTANCE_THRESHOLD) {
146
147                     buffer.setLength(0);
148                     buffer.append(candidate);
149
150                     if (sentence)
151                         buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
152
153                     result.add(new RankedWordProposal(buffer.toString(), -distance));
154                 }
155             }
156         }
157         return result;
158     }
159
160     /**
161      * Returns all approximations that have a phonetic hash with smallest
162      * possible distance to the specified word.
163      *
164      * @param word
165      * The word to find the nearest matches for
166      * @param sentence
167      * <code>true</code> iff the proposals start a new sentence,
168      * <code>false</code> otherwise
169      * @param result
170      * Set of ranked words with smallest possible distance to the
171      * specified word
172      */

173     protected final void getCandidates(final String JavaDoc word, final boolean sentence, final Set JavaDoc result) {
174
175         int distance= 0;
176         int minimum= Integer.MAX_VALUE;
177
178         StringBuffer JavaDoc buffer= new StringBuffer JavaDoc(BUFFER_CAPACITY);
179
180         final Object JavaDoc candidates= getCandidates(fHashProvider.getHash(word));
181         if (candidates == null)
182             return;
183         else if (candidates instanceof String JavaDoc) {
184             String JavaDoc candidate= (String JavaDoc)candidates;
185             distance= fDistanceAlgorithm.getDistance(word, candidate);
186             buffer.append(candidate);
187             if (sentence)
188                 buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
189             result.add(new RankedWordProposal(buffer.toString(), -distance));
190             return;
191         }
192
193         final ArrayList JavaDoc candidateList= (ArrayList JavaDoc)candidates;
194         final ArrayList JavaDoc matches= new ArrayList JavaDoc(candidateList.size());
195
196         for (int index= 0; index < candidateList.size(); index++) {
197             String JavaDoc candidate= (String JavaDoc)candidateList.get(index);
198             distance= fDistanceAlgorithm.getDistance(word, candidate);
199
200             if (distance <= minimum) {
201                 
202                 if (distance < minimum)
203                     matches.clear();
204
205                 buffer.setLength(0);
206                 buffer.append(candidate);
207
208                 if (sentence)
209                     buffer.setCharAt(0, Character.toUpperCase(buffer.charAt(0)));
210
211                 matches.add(new RankedWordProposal(buffer.toString(), -distance));
212                 minimum= distance;
213             }
214         }
215
216         result.addAll(matches);
217     }
218     
219     /**
220      * Tells whether this dictionary is empty.
221      *
222      * @return <code>true</code> if this dictionary is empty
223      * @since 3.3
224      */

225     protected boolean isEmpty() {
226         return fHashBuckets.size() == 0;
227     }
228
229     /**
230      * Returns the used phonetic distance algorithm.
231      *
232      * @return The phonetic distance algorithm
233      */

234     protected final IPhoneticDistanceAlgorithm getDistanceAlgorithm() {
235         return fDistanceAlgorithm;
236     }
237
238     /**
239      * Returns the used phonetic hash provider.
240      *
241      * @return The phonetic hash provider
242      */

243     protected final IPhoneticHashProvider getHashProvider() {
244         return fHashProvider;
245     }
246
247     /*
248      * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#getProposals(java.lang.String,boolean)
249      */

250     public Set JavaDoc getProposals(final String JavaDoc word, final boolean sentence) {
251
252         try {
253
254             if (!fLoaded) {
255                 synchronized (this) {
256                     fLoaded= load(getURL());
257                     if (fLoaded)
258                         compact();
259                 }
260             }
261
262         } catch (MalformedURLException JavaDoc exception) {
263             // Do nothing
264
}
265
266         final String JavaDoc hash= fHashProvider.getHash(word);
267         final char[] mutators= fHashProvider.getMutators();
268
269         final ArrayList JavaDoc neighborhood= new ArrayList JavaDoc((word.length() + 1) * (mutators.length + 2));
270         neighborhood.add(hash);
271
272         final Set JavaDoc candidates= getCandidates(word, sentence, neighborhood);
273         neighborhood.clear();
274
275         char previous= 0;
276         char next= 0;
277
278         char[] characters= word.toCharArray();
279         for (int index= 0; index < word.length() - 1; index++) {
280
281             next= characters[index];
282             previous= characters[index + 1];
283
284             characters[index]= previous;
285             characters[index + 1]= next;
286
287             neighborhood.add(fHashProvider.getHash(new String JavaDoc(characters)));
288
289             characters[index]= next;
290             characters[index + 1]= previous;
291         }
292
293         final String JavaDoc sentinel= word + " "; //$NON-NLS-1$
294

295         characters= sentinel.toCharArray();
296         int offset= characters.length - 1;
297
298         while (true) {
299
300             for (int index= 0; index < mutators.length; index++) {
301
302                 characters[offset]= mutators[index];
303                 neighborhood.add(fHashProvider.getHash(new String JavaDoc(characters)));
304             }
305
306             if (offset == 0)
307                 break;
308
309             characters[offset]= characters[offset - 1];
310             --offset;
311         }
312
313         char mutated= 0;
314         characters= word.toCharArray();
315
316         for (int index= 0; index < word.length(); index++) {
317
318             mutated= characters[index];
319             for (int mutator= 0; mutator < mutators.length; mutator++) {
320
321                 characters[index]= mutators[mutator];
322                 neighborhood.add(fHashProvider.getHash(new String JavaDoc(characters)));
323             }
324             characters[index]= mutated;
325         }
326
327         characters= word.toCharArray();
328         final char[] deleted= new char[characters.length - 1];
329
330         for (int index= 0; index < deleted.length; index++)
331             deleted[index]= characters[index];
332
333         next= characters[characters.length - 1];
334         offset= deleted.length;
335
336         while (true) {
337
338             neighborhood.add(fHashProvider.getHash(new String JavaDoc(characters)));
339             if (offset == 0)
340                 break;
341
342             previous= next;
343             next= deleted[offset - 1];
344
345             deleted[offset - 1]= previous;
346             --offset;
347         }
348
349         neighborhood.remove(hash);
350         final Set JavaDoc matches= getCandidates(word, sentence, neighborhood);
351
352         if (matches.size() == 0 && candidates.size() == 0)
353             getCandidates(word, sentence, candidates);
354
355         candidates.addAll(matches);
356
357         return candidates;
358     }
359
360     /**
361      * Returns the URL of the dictionary word list.
362      *
363      * @throws MalformedURLException
364      * if the URL could not be retrieved
365      * @return The URL of the dictionary word list
366      */

367     protected abstract URL JavaDoc getURL() throws MalformedURLException JavaDoc;
368
369     /**
370      * Hashes the word into the dictionary.
371      *
372      * @param word
373      * The word to hash in the dictionary
374      */

375     protected final void hashWord(final String JavaDoc word) {
376
377         final String JavaDoc hash= fHashProvider.getHash(word);
378         Object JavaDoc bucket= fHashBuckets.get(hash);
379
380         if (bucket == null) {
381             fHashBuckets.put(hash, word);
382         } else if (bucket instanceof ArrayList JavaDoc) {
383             ((ArrayList JavaDoc)bucket).add(word);
384         } else {
385             ArrayList JavaDoc list= new ArrayList JavaDoc(BUCKET_CAPACITY);
386             list.add(bucket);
387             list.add(word);
388             fHashBuckets.put(hash, list);
389         }
390     }
391
392     /*
393      * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#isCorrect(java.lang.String)
394      */

395     public boolean isCorrect(String JavaDoc word) {
396         word= stripNonLetters(word);
397         try {
398             
399             if (!fLoaded) {
400                 synchronized (this) {
401                     fLoaded= load(getURL());
402                     if (fLoaded)
403                         compact();
404                 }
405             }
406
407         } catch (MalformedURLException JavaDoc exception) {
408             // Do nothing
409
}
410
411         final Object JavaDoc candidates= getCandidates(fHashProvider.getHash(word));
412         if (candidates == null)
413             return false;
414         else if (candidates instanceof String JavaDoc) {
415             String JavaDoc candidate= (String JavaDoc)candidates;
416             if (candidate.equals(word) || candidate.equals(word.toLowerCase()))
417                 return true;
418             return false;
419         }
420         final ArrayList JavaDoc candidateList= (ArrayList JavaDoc)candidates;
421         if (candidateList.contains(word) || candidateList.contains(word.toLowerCase()))
422             return true;
423
424         return false;
425     }
426     
427     /*
428      * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#setStripNonLetters(boolean)
429      * @since 3.3
430      */

431     public void setStripNonLetters(boolean state) {
432         fIsStrippingNonLetters= state;
433     }
434     
435     /**
436      * Strips non-letter characters from the given word.
437      * <p>
438      * This will only happen if the corresponding preference is enabled.
439      * </p>
440      *
441      * @param word the word to strip
442      * @return the stripped word
443      * @since 3.3
444      */

445     protected String JavaDoc stripNonLetters(String JavaDoc word) {
446         if (!fIsStrippingNonLetters)
447             return word;
448         
449         int i= 0;
450         int j= word.length() - 1;
451         while (i <= j && !Character.isLetter(word.charAt(i)))
452             i++;
453         if (i > j)
454             return ""; //$NON-NLS-1$
455

456         while (j > i && !Character.isLetter(word.charAt(j)))
457             j--;
458         
459         return word.substring(i, j+1);
460     }
461
462     /*
463      * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#isLoaded()
464      */

465     public final synchronized boolean isLoaded() {
466         return fLoaded || fHashBuckets.size() > 0;
467     }
468
469     /**
470      * Loads a dictionary word list from disk.
471      *
472      * @param url
473      * The URL of the word list to load
474      * @return <code>true</code> iff the word list could be loaded, <code>false</code>
475      * otherwise
476      */

477     protected synchronized boolean load(final URL JavaDoc url) {
478          if (!fMustLoad)
479              return fLoaded;
480
481         if (url != null) {
482             InputStream JavaDoc stream= null;
483             int line= 0;
484             try {
485                 stream= url.openStream();
486                 if (stream != null) {
487                     String JavaDoc word= null;
488                     
489                     // Setup a reader with a decoder in order to read over malformed input if needed.
490
CharsetDecoder JavaDoc decoder= Charset.forName(getEncoding()).newDecoder();
491                     decoder.onMalformedInput(CodingErrorAction.REPORT);
492                     decoder.onUnmappableCharacter(CodingErrorAction.REPORT);
493                     final BufferedReader JavaDoc reader= new BufferedReader JavaDoc(new InputStreamReader JavaDoc(stream, decoder));
494                     
495                     boolean doRead= true;
496                     while (doRead) {
497                         try {
498                             word= reader.readLine();
499                         } catch (MalformedInputException JavaDoc ex) {
500                             // Tell the decoder to replace malformed input in order to read the line.
501
decoder.onMalformedInput(CodingErrorAction.REPLACE);
502                             decoder.reset();
503                             word= reader.readLine();
504                             decoder.onMalformedInput(CodingErrorAction.REPORT);
505                             
506                             String JavaDoc message= Messages.format(JavaUIMessages.AbstractSpellingDictionary_encodingError, new String JavaDoc[] { word, decoder.replacement(), url.toString() });
507                             IStatus status= new Status(IStatus.ERROR, JavaUI.ID_PLUGIN, IStatus.OK, message, ex);
508                             JavaPlugin.log(status);
509                             
510                             doRead= word != null;
511                             continue;
512                         }
513                         doRead= word != null;
514                         if (doRead)
515                             hashWord(word);
516                     }
517                     return true;
518                 }
519             } catch (FileNotFoundException JavaDoc ex) {
520                 String JavaDoc urlString= url.toString();
521                 String JavaDoc lowercaseUrlString= urlString.toLowerCase();
522                 if (urlString.equals(lowercaseUrlString))
523                     JavaPlugin.log(ex);
524                 else
525                     try {
526                         return load(new URL JavaDoc(lowercaseUrlString));
527                     } catch (MalformedURLException JavaDoc e) {
528                         JavaPlugin.log(e);
529                     }
530             } catch (IOException JavaDoc exception) {
531                 if (line > 0) {
532                     String JavaDoc message= Messages.format(JavaUIMessages.AbstractSpellingDictionary_encodingError, new Object JavaDoc[] { new Integer JavaDoc(line), url.toString() });
533                     IStatus status= new Status(IStatus.ERROR, JavaUI.ID_PLUGIN, IStatus.OK, message, exception);
534                     JavaPlugin.log(status);
535                 } else
536                     JavaPlugin.log(exception);
537             } finally {
538                 fMustLoad= false;
539                 try {
540                     if (stream != null)
541                         stream.close();
542                 } catch (IOException JavaDoc x) {
543                 }
544             }
545         }
546         return false;
547     }
548
549     /**
550      * Compacts the dictionary.
551      *
552      * @since 3.3.
553      */

554     private void compact() {
555         Iterator JavaDoc iter= fHashBuckets.values().iterator();
556         while (iter.hasNext()) {
557             Object JavaDoc element= iter.next();
558             if (element instanceof ArrayList JavaDoc)
559                 ((ArrayList JavaDoc)element).trimToSize();
560         }
561     }
562
563     /**
564      * Sets the phonetic distance algorithm to use.
565      *
566      * @param algorithm
567      * The phonetic distance algorithm
568      */

569     protected final void setDistanceAlgorithm(final IPhoneticDistanceAlgorithm algorithm) {
570         fDistanceAlgorithm= algorithm;
571     }
572
573     /**
574      * Sets the phonetic hash provider to use.
575      *
576      * @param provider
577      * The phonetic hash provider
578      */

579     protected final void setHashProvider(final IPhoneticHashProvider provider) {
580         fHashProvider= provider;
581     }
582
583     /*
584      * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#unload()
585      */

586     public synchronized void unload() {
587         fLoaded= false;
588         fMustLoad= true;
589         fHashBuckets.clear();
590     }
591
592     /*
593      * @see org.eclipse.jdt.ui.text.spelling.engine.ISpellDictionary#acceptsWords()
594      */

595     public boolean acceptsWords() {
596         return false;
597     }
598
599     /*
600      * @see org.eclipse.jdt.internal.ui.text.spelling.engine.ISpellDictionary#addWord(java.lang.String)
601      */

602     public void addWord(final String JavaDoc word) {
603         // Do nothing
604
}
605     
606     /**
607      * Returns the encoding of this dictionary.
608      *
609      * @return the encoding of this dictionary
610      * @since 3.3
611      */

612     protected String JavaDoc getEncoding() {
613         String JavaDoc encoding= JavaPlugin.getDefault().getPreferenceStore().getString(PreferenceConstants.SPELLING_USER_DICTIONARY_ENCODING);
614         if (encoding == null || encoding.length() == 0)
615             encoding= ResourcesPlugin.getEncoding();
616         return encoding;
617     }
618     
619 }
620
Popular Tags