KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > info > magnolia > cms > taglibs > util > SearchResultSnippetTag


1 /**
2  *
3  * Magnolia and its source-code is licensed under the LGPL.
4  * You may copy, adapt, and redistribute this file for commercial or non-commercial use.
5  * When copying, adapting, or redistributing this document in keeping with the guidelines above,
6  * you are required to provide proper attribution to obinary.
7  * If you reproduce or distribute the document without making any substantive modifications to its content,
8  * please use the following attribution line:
9  *
10  * Copyright 1993-2006 obinary Ltd. (http://www.obinary.com) All rights reserved.
11  *
12  */

13 package info.magnolia.cms.taglibs.util;
14
15 import info.magnolia.cms.core.Content;
16 import info.magnolia.cms.core.ItemType;
17 import info.magnolia.cms.core.NodeData;
18
19 import java.io.IOException JavaDoc;
20 import java.util.ArrayList JavaDoc;
21 import java.util.Collection JavaDoc;
22 import java.util.Iterator JavaDoc;
23
24 import javax.jcr.PropertyType;
25 import javax.servlet.jsp.JspException JavaDoc;
26 import javax.servlet.jsp.JspWriter JavaDoc;
27 import javax.servlet.jsp.tagext.TagSupport JavaDoc;
28
29 import org.apache.commons.lang.ArrayUtils;
30 import org.apache.commons.lang.StringUtils;
31 import org.apache.commons.lang.exception.NestableRuntimeException;
32 import org.slf4j.Logger;
33 import org.slf4j.LoggerFactory;
34
35
36 /**
37  * @author Fabrizio Giustina
38  * @version $Revision: 6606 $ ($Author: fgiust $)
39  */

40 public class SearchResultSnippetTag extends TagSupport JavaDoc {
41
42     /**
43      * Stable serialVersionUID.
44      */

45     private static final long serialVersionUID = 222L;
46
47     /**
48      * Logger.
49      */

50     private static Logger log = LoggerFactory.getLogger(SearchResultSnippetTag.class);
51
52     /**
53      * Start level.
54      */

55     private Content page;
56
57     /**
58      * Query, natural language.
59      */

60     private String JavaDoc query;
61
62     /**
63      * Number of chars to include in result.
64      */

65     private int chars = 100;
66
67     /**
68      * Maximum number of snippets to include in result.
69      */

70     private int maxSnippets = 3;
71
72     /**
73      * Setter for <code>query</code>.
74      * @param query The query to set.
75      */

76     public void setQuery(String JavaDoc query) {
77         this.query = query;
78     }
79
80     /**
81      * Setter for <code>chars</code>.
82      * @param chars The chars to set.
83      */

84     public void setChars(int chars) {
85         this.chars = chars;
86     }
87
88     /**
89      * Setter for <code>maxSnippets</code>.
90      * @param maxSnippets The maxSnippets to set.
91      */

92     public void setMaxSnippets(int maxSnippets) {
93         this.maxSnippets = maxSnippets;
94     }
95
96     /**
97      * Setter for <code>page</code>.
98      * @param page The page to set.
99      */

100     public void setPage(Content page) {
101         this.page = page;
102     }
103
104     /**
105      * @see javax.servlet.jsp.tagext.TagSupport#doStartTag()
106      */

107     public int doStartTag() throws JspException JavaDoc {
108
109         JspWriter JavaDoc out = this.pageContext.getOut();
110         try {
111             Iterator JavaDoc iterator = getSnippets().iterator();
112             while (iterator.hasNext()) {
113                 out.println(iterator.next());
114             }
115         }
116         catch (IOException JavaDoc e) {
117             // should never happen
118
throw new NestableRuntimeException(e);
119         }
120         return EVAL_PAGE;
121     }
122
123     /**
124      * Extract a collection of snippets from any paragraph in the given page.
125      * @return a collection of Strings.
126      * @todo avoid overlapping snippets (use regexp insted of simple indexOfs)
127      * @todo only extract snippets from user-configured properties
128      * @todo abbreviate on whitespace and puntuation, detect start of sentences
129      * @todo replace ampersand in regexp
130      * @todo break methods and write junits
131      */

132     public Collection JavaDoc getSnippets() {
133
134         log.debug("collecting snippets"); //$NON-NLS-1$
135

136         Collection JavaDoc snippets = new ArrayList JavaDoc();
137         String JavaDoc[] searchTerms = StringUtils.split(this.query);
138
139         Collection JavaDoc paragraphCollections = this.page.getChildren(ItemType.CONTENTNODE);
140
141         Iterator JavaDoc iterator = paragraphCollections.iterator();
142         outer : while (iterator.hasNext()) {
143             Content paragraphCollection = (Content) iterator.next();
144
145             Collection JavaDoc paragraphs = paragraphCollection.getChildren();
146
147             Iterator JavaDoc parIterator = paragraphs.iterator();
148             while (parIterator.hasNext()) {
149                 Content paragraph = (Content) parIterator.next();
150
151                 log.debug("Iterating on paragraph {}", paragraph); //$NON-NLS-1$
152

153                 Collection JavaDoc properties = paragraph.getNodeDataCollection();
154
155                 Iterator JavaDoc dataIterator = properties.iterator();
156                 while (dataIterator.hasNext()) {
157                     NodeData property = (NodeData) dataIterator.next();
158                     if (property.getType() != PropertyType.BINARY) {
159
160                         String JavaDoc resultString = property.getString();
161
162                         log.debug("Iterating on property {}", property.getName()); //$NON-NLS-1$
163
log.debug("Property value is {}", resultString); //$NON-NLS-1$
164

165                         // a quick and buggy way to avoid configuration properties, we should allow the user to
166
// configure a list of nodeData to search for...
167
if (resultString.length() < 20) {
168                             continue;
169                         }
170
171                         for (int j = 0; j < searchTerms.length; j++) {
172                             String JavaDoc searchTerm = StringUtils.lowerCase(searchTerms[j]);
173
174                             // exclude keywords and words with less than 2 chars
175
if (!ArrayUtils.contains(SimpleSearchTag.KEYWORDS, searchTerm) && searchTerm.length() > 2) {
176
177                                 log.debug("Looking for search term [{}] in [{}]", searchTerm, resultString); //$NON-NLS-1$
178

179                                 // first check, avoid using heavy string replaceAll operations if the search term is not
180
// there
181
if (!StringUtils.contains(resultString.toLowerCase(), searchTerm)) {
182                                     continue;
183                                 }
184
185                                 // strips out html tags using a regexp
186
resultString = stripHtmlTags(resultString);
187
188                                 // only get first matching keyword
189
int pos = resultString.toLowerCase().indexOf(searchTerm);
190                                 if (pos > -1) {
191
192                                     int posEnd = pos + searchTerm.length();
193                                     int from = (pos - chars / 2);
194                                     if (from < 0) {
195                                         from = 0;
196                                     }
197
198                                     int to = from + chars;
199                                     if (to > resultString.length()) {
200                                         to = resultString.length();
201                                     }
202
203                                     StringBuffer JavaDoc snippet = new StringBuffer JavaDoc();
204
205                                     snippet.append(StringUtils.substring(resultString, from, pos));
206                                     snippet.append("<strong>"); //$NON-NLS-1$
207
snippet.append(StringUtils.substring(resultString, pos, posEnd));
208                                     snippet.append("</strong>"); //$NON-NLS-1$
209
snippet.append(StringUtils.substring(resultString, posEnd, to));
210
211                                     if (from > 0) {
212                                         snippet.insert(0, "... "); //$NON-NLS-1$
213
}
214                                     if (to < resultString.length()) {
215                                         snippet.append("... "); //$NON-NLS-1$
216
}
217
218                                     log.debug("Search term found, adding snippet {}", snippet); //$NON-NLS-1$
219

220                                     snippets.add(snippet);
221                                     if (snippets.size() >= this.maxSnippets) {
222
223                                         log.debug("Maximum number of snippets ({}) reached, exiting", //$NON-NLS-1$
224
Integer.toString(this.maxSnippets));
225
226                                         break outer;
227                                     }
228                                 }
229                             }
230                         }
231                     }
232                 }
233             }
234         }
235
236         return snippets;
237     }
238
239     /**
240      * @param resultString
241      * @return
242      */

243     protected String JavaDoc stripHtmlTags(String JavaDoc resultString) {
244         return resultString.replaceAll("\\<(.*?\\s*)*\\>", StringUtils.EMPTY); //$NON-NLS-1$
245
}
246
247     /**
248      * @see javax.servlet.jsp.tagext.TagSupport#release()
249      */

250     public void release() {
251         this.query = null;
252         this.page = null;
253         this.chars = 100;
254         this.maxSnippets = 3;
255         super.release();
256     }
257
258 }
259
Popular Tags