KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > ofbiz > product > product > KeywordSearch


1 /*
2  * $Id: KeywordSearch.java 5540 2005-08-13 20:48:15Z jonesde $
3  *
4  * Copyright (c) 2001-2005 The Open For Business Project (www.ofbiz.org)
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included
13  * in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
20  * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
21  * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */

23 package org.ofbiz.product.product;
24
25 import java.util.HashMap JavaDoc;
26 import java.util.HashSet JavaDoc;
27 import java.util.Iterator JavaDoc;
28 import java.util.List JavaDoc;
29 import java.util.Map JavaDoc;
30 import java.util.Set JavaDoc;
31 import java.util.StringTokenizer JavaDoc;
32 import java.util.TreeSet JavaDoc;
33
34 import org.ofbiz.base.util.Debug;
35 import org.ofbiz.base.util.UtilMisc;
36 import org.ofbiz.base.util.UtilProperties;
37 import org.ofbiz.base.util.UtilValidate;
38 import org.ofbiz.entity.GenericDelegator;
39 import org.ofbiz.entity.GenericEntityException;
40 import org.ofbiz.entity.GenericValue;
41
42 /**
43  * Does a product search by keyword using the PRODUCT_KEYWORD table.
44  * <br/>Special thanks to Glen Thorne and the Weblogic Commerce Server for ideas.
45  *
46  * @author <a HREF="mailto:jonesde@ofbiz.org">David E. Jones</a>
47  * @version $Rev: 5540 $
48  * @since 2.1
49  */

50 public class KeywordSearch {
51
52     public static final String JavaDoc module = KeywordSearch.class.getName();
53
54     public static Set JavaDoc thesaurusRelsToInclude = new HashSet JavaDoc();
55     public static Set JavaDoc thesaurusRelsForReplace = new HashSet JavaDoc();
56
57     static {
58         thesaurusRelsToInclude.add("KWTR_UF");
59         thesaurusRelsToInclude.add("KWTR_USE");
60         thesaurusRelsToInclude.add("KWTR_CS");
61         thesaurusRelsToInclude.add("KWTR_NT");
62         thesaurusRelsToInclude.add("KWTR_BT");
63         thesaurusRelsToInclude.add("KWTR_RT");
64
65         thesaurusRelsForReplace.add("KWTR_USE");
66         thesaurusRelsForReplace.add("KWTR_CS");
67     }
68
69     public static String JavaDoc getSeparators() {
70         // String separators = ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_";
71
String JavaDoc seps = UtilProperties.getPropertyValue("prodsearch", "index.keyword.separators", ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_");
72         return seps;
73     }
74     
75     public static String JavaDoc getStopWordBagOr() {
76         return UtilProperties.getPropertyValue("prodsearch", "stop.word.bag.or");
77     }
78     public static String JavaDoc getStopWordBagAnd() {
79         return UtilProperties.getPropertyValue("prodsearch", "stop.word.bag.and");
80     }
81     
82     public static boolean getRemoveStems() {
83         String JavaDoc removeStemsStr = UtilProperties.getPropertyValue("prodsearch", "remove.stems");
84         return "true".equals(removeStemsStr);
85     }
86     public static Set JavaDoc getStemSet() {
87         String JavaDoc stemBag = UtilProperties.getPropertyValue("prodsearch", "stem.bag");
88         Set JavaDoc stemSet = new TreeSet JavaDoc();
89         if (UtilValidate.isNotEmpty(stemBag)) {
90             String JavaDoc curToken;
91             StringTokenizer JavaDoc tokenizer = new StringTokenizer JavaDoc(stemBag, ": ");
92             while (tokenizer.hasMoreTokens()) {
93                 curToken = tokenizer.nextToken();
94                 stemSet.add(curToken);
95             }
96         }
97         return stemSet;
98     }
99     
100     public static void processForKeywords(String JavaDoc str, Map JavaDoc keywords, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) {
101         String JavaDoc separators = getSeparators();
102         String JavaDoc stopWordBagOr = getStopWordBagOr();
103         String JavaDoc stopWordBagAnd = getStopWordBagAnd();
104
105         boolean removeStems = getRemoveStems();
106         Set JavaDoc stemSet = getStemSet();
107         
108         processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd);
109     }
110     
111     public static void processKeywordsForIndex(String JavaDoc str, Map JavaDoc keywords, String JavaDoc separators, String JavaDoc stopWordBagAnd, String JavaDoc stopWordBagOr, boolean removeStems, Set JavaDoc stemSet) {
112         processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, false, false, false, false);
113     }
114
115     public static void processForKeywords(String JavaDoc str, Map JavaDoc keywords, String JavaDoc separators, String JavaDoc stopWordBagAnd, String JavaDoc stopWordBagOr, boolean removeStems, Set JavaDoc stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) {
116         Set JavaDoc keywordSet = makeKeywordSet(str, separators, forSearch);
117         fixupKeywordSet(keywordSet, keywords, stopWordBagAnd, stopWordBagOr, removeStems, stemSet, forSearch, anyPrefix, anySuffix, isAnd);
118     }
119     
120     public static void fixupKeywordSet(Set JavaDoc keywordSet, Map JavaDoc keywords, String JavaDoc stopWordBagAnd, String JavaDoc stopWordBagOr, boolean removeStems, Set JavaDoc stemSet, boolean forSearch, boolean anyPrefix, boolean anySuffix, boolean isAnd) {
121         if (keywordSet == null) {
122             return;
123         }
124         
125         Iterator JavaDoc keywordIter = keywordSet.iterator();
126         while (keywordIter.hasNext()) {
127             String JavaDoc token = (String JavaDoc) keywordIter.next();
128             
129             // when cleaning up the tokens the ordering is inportant: check stop words, remove stems, then get rid of 1 character tokens (1 digit okay)
130

131             // check stop words
132
String JavaDoc colonToken = ":" + token + ":";
133             if (forSearch) {
134                 if ((isAnd && stopWordBagAnd.indexOf(colonToken) >= 0) || (!isAnd && stopWordBagOr.indexOf(colonToken) >= 0)) {
135                     continue;
136                 }
137             } else {
138                 if (stopWordBagOr.indexOf(colonToken) >= 0 && stopWordBagAnd.indexOf(colonToken) >= 0) {
139                     continue;
140                 }
141             }
142             
143             // remove stems
144
if (removeStems) {
145                 Iterator JavaDoc stemIter = stemSet.iterator();
146                 while (stemIter.hasNext()) {
147                     String JavaDoc stem = (String JavaDoc) stemIter.next();
148                     if (token.endsWith(stem)) {
149                         token = token.substring(0, token.length() - stem.length());
150                     }
151                 }
152             }
153             
154             // get rid of all length 0 tokens now
155
if (token.length() == 0) {
156                 continue;
157             }
158             
159             // get rid of all length 1 character only tokens, pretty much useless
160
if (token.length() == 1 && Character.isLetter(token.charAt(0))) {
161                 continue;
162             }
163
164             if (forSearch) {
165                 StringBuffer JavaDoc strSb = new StringBuffer JavaDoc();
166                 if (anyPrefix) strSb.append('%');
167                 strSb.append(token);
168                 if (anySuffix) strSb.append('%');
169                 // replace all %% with %
170
int dblPercIdx = -1;
171                 while ((dblPercIdx = strSb.indexOf("%%")) >= 0) {
172                     //Debug.logInfo("before strSb: " + strSb, module);
173
strSb.replace(dblPercIdx, dblPercIdx+2, "%");
174                     //Debug.logInfo("after strSb: " + strSb, module);
175
}
176                 token = strSb.toString();
177             }
178             
179             // group by word, add up weight
180
Long JavaDoc curWeight = (Long JavaDoc) keywords.get(token);
181             if (curWeight == null) {
182                 keywords.put(token, new Long JavaDoc(1));
183             } else {
184                 keywords.put(token, new Long JavaDoc(curWeight.longValue() + 1));
185             }
186         }
187     }
188
189     public static Set JavaDoc makeKeywordSet(String JavaDoc str, String JavaDoc separators, boolean forSearch) {
190         if (separators == null) separators = getSeparators();
191         
192         Set JavaDoc keywords = new TreeSet JavaDoc();
193         if (str.length() > 0) {
194             if (forSearch) {
195                 // remove %_*? from separators if is for a search
196
StringBuffer JavaDoc sb = new StringBuffer JavaDoc(separators);
197                 if (sb.indexOf("%") >= 0) sb.deleteCharAt(sb.indexOf("%"));
198                 if (sb.indexOf("_") >= 0) sb.deleteCharAt(sb.indexOf("_"));
199                 if (sb.indexOf("*") >= 0) sb.deleteCharAt(sb.indexOf("*"));
200                 if (sb.indexOf("?") >= 0) sb.deleteCharAt(sb.indexOf("?"));
201                 separators = sb.toString();
202             }
203             
204             StringTokenizer JavaDoc tokener = new StringTokenizer JavaDoc(str, separators, false);
205             while (tokener.hasMoreTokens()) {
206                 // make sure it is lower case before doing anything else
207
String JavaDoc token = tokener.nextToken().toLowerCase();
208
209                 if (forSearch) {
210                     // these characters will only be present if it is for a search, ie not for indexing
211
token = token.replace('*', '%');
212                     token = token.replace('?', '_');
213                 }
214                 
215                 keywords.add(token);
216             }
217         }
218         return keywords;
219     }
220     
221     public static Set JavaDoc fixKeywordsForSearch(Set JavaDoc keywordSet, boolean anyPrefix, boolean anySuffix, boolean removeStems, boolean isAnd) {
222         Map JavaDoc keywords = new HashMap JavaDoc();
223         fixupKeywordSet(keywordSet, keywords, getStopWordBagAnd(), getStopWordBagOr(), removeStems, getStemSet(), true, anyPrefix, anySuffix, isAnd);
224         return keywords.keySet();
225     }
226
227     public static boolean expandKeywordForSearch(String JavaDoc enteredKeyword, Set JavaDoc addToSet, GenericDelegator delegator) {
228         boolean replaceEnteredKeyword = false;
229
230         try {
231             List JavaDoc thesaurusList = delegator.findByAndCache("KeywordThesaurus", UtilMisc.toMap("enteredKeyword", enteredKeyword));
232             Iterator JavaDoc thesaurusIter = thesaurusList.iterator();
233             while (thesaurusIter.hasNext()) {
234                 GenericValue keywordThesaurus = (GenericValue) thesaurusIter.next();
235                 String JavaDoc relationshipEnumId = (String JavaDoc) keywordThesaurus.get("relationshipEnumId");
236                 if (thesaurusRelsToInclude.contains(relationshipEnumId)) {
237                     addToSet.addAll(makeKeywordSet(keywordThesaurus.getString("alternateKeyword"), null, true));
238                     if (thesaurusRelsForReplace.contains(relationshipEnumId)) {
239                         replaceEnteredKeyword = true;
240                     }
241                 }
242             }
243         } catch (GenericEntityException e) {
244             Debug.logError(e, "Error expanding entered keyword", module);
245         }
246
247         Debug.logInfo("Expanded keyword [" + enteredKeyword + "], got set: " + addToSet, module);
248         return replaceEnteredKeyword;
249     }
250
251     public static void induceKeywords(GenericValue product) throws GenericEntityException {
252         if (product == null) return;
253         KeywordIndex.indexKeywords(product, false);
254     }
255     
256     public static void induceKeywords(GenericValue product, boolean doAll) throws GenericEntityException {
257         if (product == null) return;
258         KeywordIndex.indexKeywords(product, doAll);
259     }
260 }
261
Popular Tags