KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > alfresco > util > SearchLanguageConversion


1 /*
2  * Copyright (C) 2005 Alfresco, Inc.
3  *
4  * Licensed under the Mozilla Public License version 1.1
5  * with a permitted attribution clause. You may obtain a
6  * copy of the License at
7  *
8  * http://www.alfresco.org/legal/license.txt
9  *
10  * Unless required by applicable law or agreed to in writing,
11  * software distributed under the License is distributed on an
12  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13  * either express or implied. See the License for the specific
14  * language governing permissions and limitations under the
15  * License.
16  */

17 package org.alfresco.util;
18
19 import org.alfresco.repo.search.impl.lucene.QueryParser;
20
21
22 /**
23  * Helper class to provide conversions between different search languages
24  * @author Derek Hulley
25  */

26 public class SearchLanguageConversion
27 {
28     /**
29      * XPath like query language summary:
30      * <ul>
31      * <li>Escape: \</li>
32      * <li>Single char search: _</li>
33      * <li>Multiple char search: %</li>
34      * <li>Reserved: \%_</li>
35      * </ul>
36      */

37     public static LanguageDefinition DEF_XPATH_LIKE = new SimpleLanguageDef('\\', "%", "_", "\\%_");
38     /**
39      * Regular expression query language summary:
40      * <ul>
41      * <li>Escape: \</li>
42      * <li>Single char search: .</li>
43      * <li>Multiple char search: .*</li>
44      * <li>Reserved: \*.+?^$(){}|</li>
45      * </ul>
46      */

47     public static LanguageDefinition DEF_REGEX = new SimpleLanguageDef('\\', ".*", ".", "\\*.+?^$(){}|");
48     /**
49      * Lucene syntax summary: {@link QueryParser#escape(String) Lucene Query Parser}
50      */

51     public static LanguageDefinition DEF_LUCENE = new LuceneLanguageDef();
52     /**
53      * CIFS name patch query language summary:
54      * <ul>
55      * <li>Escape: \ (but not used)</li>
56      * <li>Single char search: ?</li>
57      * <li>Multiple char search: *</li>
58      * <li>Reserved: "*\<>?/:|¬£%&+;</li>
59      * </ul>
60      */

61     public static LanguageDefinition DEF_CIFS = new SimpleLanguageDef('\\', "*", "?", "\"*\\<>?/:|¬£%&+;");
62
63     /**
64      * Escape a string according to the <b>XPath</b> like function syntax.
65      *
66      * @param str the string to escape
67      * @return Returns the escaped string
68      */

69     public static String JavaDoc escapeForXPathLike(String JavaDoc str)
70     {
71         return escape(DEF_XPATH_LIKE, str);
72     }
73     
74     /**
75      * Escape a string according to the <b>regex</b> language syntax.
76      *
77      * @param str the string to escape
78      * @return Returns the escaped string
79      */

80     public static String JavaDoc escapeForRegex(String JavaDoc str)
81     {
82         return escape(DEF_REGEX, str);
83     }
84     
85     /**
86      * Escape a string according to the <b>Lucene</b> query syntax.
87      *
88      * @param str the string to escape
89      * @return Returns the escaped string
90      */

91     public static String JavaDoc escapeForLucene(String JavaDoc str)
92     {
93         return escape(DEF_LUCENE, str);
94     }
95     
96     /**
97      * Generic escaping using the language definition
98      */

99     private static String JavaDoc escape(LanguageDefinition def, String JavaDoc str)
100     {
101         StringBuilder JavaDoc sb = new StringBuilder JavaDoc(str.length() * 2);
102         
103         char[] chars = str.toCharArray();
104         for (int i = 0; i < chars.length; i++)
105         {
106             // first check for reserved chars
107
if (def.isReserved(chars[i]))
108             {
109                 // escape it
110
sb.append(def.escapeChar);
111             }
112             sb.append(chars[i]);
113         }
114         return sb.toString();
115     }
116     
117     /**
118      * Convert an <b>xpath</b> like function clause into a <b>regex</b> query.
119      *
120      * @param xpathLikeClause
121      * @return Returns a valid regular expression that is equivalent to the
122      * given <b>xpath</b> like clause.
123      */

124     public static String JavaDoc convertXPathLikeToRegex(String JavaDoc xpathLikeClause)
125     {
126         return "(?s)" + convert(DEF_XPATH_LIKE, DEF_REGEX, xpathLikeClause);
127     }
128     
129     /**
130      * Convert an <b>xpath</b> like function clause into a <b>Lucene</b> query.
131      *
132      * @param xpathLikeClause
133      * @return Returns a valid <b>Lucene</b> expression that is equivalent to the
134      * given <b>xpath</b> like clause.
135      */

136     public static String JavaDoc convertXPathLikeToLucene(String JavaDoc xpathLikeClause)
137     {
138         return convert(DEF_XPATH_LIKE, DEF_LUCENE, xpathLikeClause);
139     }
140     
141     /**
142      * Convert a <b>CIFS</b> name path into the equivalent <b>Lucene</b> query.
143      *
144      * @param cifsNamePath the CIFS named path
145      * @return Returns a valid <b>Lucene</b> expression that is equivalent to the
146      * given CIFS name path
147      */

148     public static String JavaDoc convertCifsToLucene(String JavaDoc cifsNamePath)
149     {
150         return convert(DEF_CIFS, DEF_LUCENE, cifsNamePath);
151     }
152     
153     public static String JavaDoc convert(LanguageDefinition from, LanguageDefinition to, String JavaDoc query)
154     {
155         char[] chars = query.toCharArray();
156         
157         StringBuilder JavaDoc sb = new StringBuilder JavaDoc(chars.length * 2);
158         
159         boolean escaping = false;
160         
161         for (int i = 0; i < chars.length; i++)
162         {
163             if (escaping) // if we are currently escaping, just escape the current character
164
{
165                 sb.append(to.escapeChar); // the to format escape char
166
sb.append(chars[i]); // the current char
167
escaping = false;
168             }
169             else if (chars[i] == from.escapeChar) // not escaping and have escape char
170
{
171                 escaping = true;
172             }
173             else if (query.startsWith(from.multiCharWildcard, i)) // not escaping but have multi-char wildcard
174
{
175                 // translate the wildcard
176
sb.append(to.multiCharWildcard);
177             }
178             else if (query.startsWith(from.singleCharWildcard, i)) // have single-char wildcard
179
{
180                 // translate the wildcard
181
sb.append(to.singleCharWildcard);
182             }
183             else if (to.isReserved(chars[i])) // reserved character
184
{
185                 sb.append(to.escapeChar).append(chars[i]);
186             }
187             else // just a normal char in both
188
{
189                 sb.append(chars[i]);
190             }
191         }
192         return sb.toString();
193     }
194     
195     /**
196      * Simple store of special characters for a given query language
197      */

198     public static abstract class LanguageDefinition
199     {
200         public final char escapeChar;
201         public final String JavaDoc multiCharWildcard;
202         public final String JavaDoc singleCharWildcard;
203         
204         public LanguageDefinition(char escapeChar, String JavaDoc multiCharWildcard, String JavaDoc singleCharWildcard)
205         {
206             this.escapeChar = escapeChar;
207             this.multiCharWildcard = multiCharWildcard;
208             this.singleCharWildcard = singleCharWildcard;
209         }
210         public abstract boolean isReserved(char ch);
211     }
212     private static class SimpleLanguageDef extends LanguageDefinition
213     {
214         private String JavaDoc reserved;
215         public SimpleLanguageDef(char escapeChar, String JavaDoc multiCharWildcard, String JavaDoc singleCharWildcard, String JavaDoc reserved)
216         {
217             super(escapeChar, multiCharWildcard, singleCharWildcard);
218             this.reserved = reserved;
219         }
220         @Override JavaDoc
221         public boolean isReserved(char ch)
222         {
223             return (reserved.indexOf(ch) > -1);
224         }
225     }
226     private static class LuceneLanguageDef extends LanguageDefinition
227     {
228         private String JavaDoc reserved;
229         public LuceneLanguageDef()
230         {
231             super('\\', "*", "?");
232             init();
233         }
234         /**
235          * Discovers all the reserved chars
236          */

237         private void init()
238         {
239             StringBuilder JavaDoc sb = new StringBuilder JavaDoc(20);
240             for (char ch = 0; ch < 256; ch++)
241             {
242                 char[] chars = new char[] {ch};
243                 String JavaDoc unescaped = new String JavaDoc(chars);
244                 // check it
245
String JavaDoc escaped = QueryParser.escape(unescaped);
246                 if (!escaped.equals(unescaped))
247                 {
248                     // it was escaped
249
sb.append(ch);
250                 }
251             }
252             reserved = sb.toString();
253         }
254         @Override JavaDoc
255         public boolean isReserved(char ch)
256         {
257             return (reserved.indexOf(ch) > -1);
258         }
259     }
260 }
261
Popular Tags