KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > jforum > dao > generic > GenericSearchIndexerDAO


1 /*
2  * Copyright (c) Rafael Steil
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms,
6  * with or without modification, are permitted provided
7  * that the following conditions are met:
8  *
9  * 1) Redistributions of source code must retain the above
10  * copyright notice, this list of conditions and the
11  * following disclaimer.
12  * 2) Redistributions in binary form must reproduce the
13  * above copyright notice, this list of conditions and
14  * the following disclaimer in the documentation and/or
15  * other materials provided with the distribution.
16  * 3) Neither the name of "Rafael Steil" nor
17  * the names of its contributors may be used to endorse
18  * or promote products derived from this software without
19  * specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
22  * HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
23  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
24  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
25  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
27  * THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
32  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
34  * IN CONTRACT, STRICT LIABILITY, OR TORT
35  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
36  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
37  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
38  *
39  * Created on Feb 22, 2005 4:24:18 PM
40  * The JForum Project
41  * http://www.jforum.net
42  */

43 package net.jforum.dao.generic;
44
45 import java.sql.Connection JavaDoc;
46 import java.sql.PreparedStatement JavaDoc;
47 import java.sql.ResultSet JavaDoc;
48 import java.sql.SQLException JavaDoc;
49 import java.sql.Statement JavaDoc;
50 import java.util.ArrayList JavaDoc;
51 import java.util.Arrays JavaDoc;
52 import java.util.HashSet JavaDoc;
53 import java.util.Iterator JavaDoc;
54 import java.util.List JavaDoc;
55 import java.util.Set JavaDoc;
56 import java.util.StringTokenizer JavaDoc;
57 import net.jforum.entities.Post;
58 import net.jforum.util.preferences.ConfigKeys;
59 import net.jforum.util.preferences.SystemGlobals;
60 import org.apache.log4j.Logger;
61
62 /**
63  * @author Rafael Steil
64  * @version $Id: GenericSearchIndexerDAO.java,v 1.13 2006/01/26 12:09:50 rafaelsteil Exp $
65  */

66 public class GenericSearchIndexerDAO extends AutoKeys implements net.jforum.dao.SearchIndexerDAO
67 {
68     private static final Logger log = Logger.getLogger(GenericSearchIndexerDAO.class);
69     
70     private Connection JavaDoc conn;
71     
72     /**
73      * @see net.jforum.dao.SearchIndexerDAO#setConnection(java.sql.Connection)
74      */

75     public void setConnection(Connection JavaDoc conn)
76     {
77         this.conn = conn;
78     }
79     
80     /**
81      * @see net.jforum.dao.SearchIndexerDAO#indexSearchWords(java.util.List)
82      */

83     public void insertSearchWords(List JavaDoc posts) throws Exception JavaDoc
84     {
85         int minWordSize = SystemGlobals.getIntValue(ConfigKeys.SEARCH_MIN_WORD_SIZE);
86         int maxWordSize = SystemGlobals.getIntValue(ConfigKeys.SEARCH_MAX_WORD_SIZE);
87         int searchMaxWordsMessage = SystemGlobals.getIntValue(ConfigKeys.SEARCH_MAX_WORDS_MESSAGE);
88         String JavaDoc wordFilterRegex = SystemGlobals.getValue(ConfigKeys.SEARCH_WORD_FILTER_REGEX);
89         StringBuffer JavaDoc sb = new StringBuffer JavaDoc(512);
90         
91         // Allow for a set of words to be excluded from indexing...
92
String JavaDoc excludeWordsString = SystemGlobals.getValue("search.exclude.words");
93         Set JavaDoc excludeWords = new HashSet JavaDoc();
94         if (excludeWordsString != null) {
95             String JavaDoc[] words = excludeWordsString.split(",");
96             for (int i = 0; words != null && i < words.length; i++) {
97                 excludeWords.add(words[i].trim());
98             }
99         }
100         
101         String JavaDoc matchSql = SystemGlobals.getSql("SearchModel.associateWordToPost");
102         PreparedStatement JavaDoc words = this.conn.prepareStatement(SystemGlobals.getSql("SearchModel.insertWords"));
103         
104         for (Iterator JavaDoc iter = posts.iterator(); iter.hasNext(); ) {
105             Post p = (Post)iter.next();
106
107             String JavaDoc text = new StringBuffer JavaDoc(p.getText()).append(" ")
108                 .append(p.getSubject()).toString();
109             
110             text = text.toLowerCase().replaceAll("[\\.\\\\\\/~\\^\\&\\(\\)\\-_+=!@;#\\$%\"\'\\[\\]\\{\\}\\?<\\:>,\\*\n\r\t]", " ");
111
112             Set JavaDoc allWords = new HashSet JavaDoc();
113
114             sb.delete(0, sb.length());
115             
116             StringTokenizer JavaDoc st = new StringTokenizer JavaDoc(text, " ");
117             
118             // Go through all words
119
while (st.hasMoreTokens() &&
120                     (searchMaxWordsMessage < 1 || allWords.size() < searchMaxWordsMessage)) {
121                 String JavaDoc w = st.nextToken().trim();
122                 
123                 if (w.length() < minWordSize) {
124                     continue;
125                 }
126                 else if (w.length() > maxWordSize) {
127                     w = w.substring(0, maxWordSize);
128                 }
129                 
130                 if (!allWords.contains(w) && !excludeWords.contains(w) &&
131                         (wordFilterRegex == null || w.matches(wordFilterRegex))) {
132                     allWords.add(w);
133                     sb.append('\'').append(w).append('\'').append(",");
134                 }
135             }
136
137             String JavaDoc in = sb.substring(0, sb.length() - 1);
138             
139             String JavaDoc sql = SystemGlobals.getSql("SearchModel.selectExistingWords");
140             sql = sql.replaceAll("#IN#", in);
141             
142             Statement JavaDoc s = this.conn.createStatement();
143             ResultSet JavaDoc rs = s.executeQuery(sql);
144             
145             List JavaDoc newWords = new ArrayList JavaDoc();
146             
147             while (rs.next()) {
148                 newWords.add(rs.getString("word"));
149             }
150             
151             rs.close();
152             s.close();
153             
154             allWords.removeAll(newWords);
155             
156             // Insert the remaining words
157
for (Iterator JavaDoc witer = allWords.iterator(); witer.hasNext(); ) {
158                 String JavaDoc ww = (String JavaDoc)witer.next();
159                 
160                 words.setString(1, ww);
161                 words.setInt(2, ww.hashCode());
162                 
163                 try {
164                     words.executeUpdate();
165                 }
166                 catch (SQLException JavaDoc e) {
167                     log.error("Cannot index word: \"" + ww + "\"", e);
168                     throw e;
169                 }
170             }
171             
172             sql = matchSql.replaceAll("#ID#", String.valueOf(p.getId())).replaceAll("#IN#", in);
173             
174             Statement JavaDoc match = this.conn.createStatement();
175             match.executeUpdate(sql);
176             match.close();
177         }
178         
179         words.close();
180     }
181     
182     /**
183      * @see net.jforum.dao.SearchIndexerDAO#insertSearchWords(net.jforum.entities.Post)
184      */

185     public void insertSearchWords(final Post post) throws Exception JavaDoc
186     {
187         this.insertSearchWords(Arrays.asList(new Post[] { post }));
188     }
189 }
190
Popular Tags