1 5 package org.h2.fulltext; 6 7 import java.sql.Connection ; 8 import java.sql.DatabaseMetaData ; 9 import java.sql.PreparedStatement ; 10 import java.sql.ResultSet ; 11 import java.sql.SQLException ; 12 import java.sql.Statement ; 13 import java.sql.Types ; 14 import java.util.ArrayList ; 15 import java.util.Arrays ; 16 import java.util.HashMap ; 17 import java.util.HashSet ; 18 import java.util.Iterator ; 19 import java.util.StringTokenizer ; 20 21 import org.h2.api.Trigger; 22 import org.h2.tools.SimpleResultSet; 23 import org.h2.util.ByteUtils; 24 import org.h2.util.JdbcUtils; 25 import org.h2.util.StringUtils; 26 import org.h2.value.DataType; 27 28 public class FullText implements Trigger { 29 30 private static final String TRIGGER_PREFIX = "FT_"; 31 private static final String SCHEMA = "FT"; 32 private static final String FIELD_QUERY = "query"; 33 private IndexInfo index; 34 private int[] dataTypes; 35 private PreparedStatement prepInsertWord, prepInsertRow, prepInsertMap; 36 private PreparedStatement prepDeleteRow, prepDeleteMap; 37 private PreparedStatement prepSelectRow; 38 39 48 public static void createIndex(Connection conn, String schema, String table, String columnList) throws SQLException { 49 init(conn); 50 PreparedStatement prep = conn.prepareStatement("INSERT INTO "+SCHEMA+".INDEXES(SCHEMA, TABLE, COLUMNS) VALUES(?, ?, ?)"); 51 prep.setString(1, schema); 52 prep.setString(2, table); 53 prep.setString(3, columnList); 54 prep.execute(); 55 createTrigger(conn, schema, table); 56 indexExistingRows(conn, schema, table); 57 } 58 59 private static void createTrigger(Connection conn, String schema, String table) throws SQLException { 60 Statement stat = conn.createStatement(); 61 String trigger = StringUtils.quoteIdentifier(schema) + "." + StringUtils.quoteIdentifier(TRIGGER_PREFIX + table); 62 stat.execute("DROP TRIGGER IF EXISTS " + trigger); 63 StringBuffer buff = new StringBuffer ("CREATE TRIGGER IF NOT EXISTS "); 64 buff.append(trigger); 65 buff.append(" AFTER INSERT, UPDATE, DELETE ON "); 66 buff.append(StringUtils.quoteIdentifier(schema) + "." + StringUtils.quoteIdentifier(table)); 67 buff.append(" FOR EACH ROW CALL \""); 68 buff.append(FullText.class.getName()); 69 buff.append("\""); 70 stat.execute(buff.toString()); 71 } 72 73 private static void indexExistingRows(Connection conn, String schema, String table) throws SQLException { 74 FullText existing = new FullText(); 75 existing.init(conn, schema, null, table); 76 StringBuffer buff = new StringBuffer ("SELECT * FROM "); 77 buff.append(StringUtils.quoteIdentifier(schema) + "." + StringUtils.quoteIdentifier(table)); 78 ResultSet rs = conn.createStatement().executeQuery(buff.toString()); 79 int columnCount = rs.getMetaData().getColumnCount(); 80 while(rs.next()) { 81 Object [] row = new Object [columnCount]; 82 for(int i=0; i<columnCount; i++) { 83 row[i] = rs.getObject(i+1); 84 } 85 existing.fire(conn, null, row); 86 } 87 } 88 89 94 public static void reindex(Connection conn) throws SQLException { 95 init(conn); 96 removeAllTriggers(conn); 97 FullTextSettings setting = FullTextSettings.getInstance(conn); 98 setting.getWordList().clear(); 99 Statement stat = conn.createStatement(); 100 stat.execute("TRUNCATE TABLE "+SCHEMA+".WORDS"); 101 stat.execute("TRUNCATE TABLE "+SCHEMA+".ROWS"); 102 stat.execute("TRUNCATE TABLE "+SCHEMA+".MAP"); 103 ResultSet rs = stat.executeQuery("SELECT * FROM "+SCHEMA+".INDEXES"); 104 while(rs.next()) { 105 String schema = rs.getString("SCHEMA"); 106 String table = rs.getString("TABLE"); 107 createTrigger(conn, schema, table); 108 indexExistingRows(conn, schema, table); 109 } 110 } 111 112 120 public static void setIgnoreList(Connection conn, String commaSeparatedList) throws SQLException { 121 init(conn); 122 FullTextSettings setting = FullTextSettings.getInstance(conn); 123 setIgnoreList(setting, commaSeparatedList); 124 Statement stat = conn.createStatement(); 125 stat.execute("TRUNCATE TABLE "+SCHEMA+".IGNORELIST"); 126 PreparedStatement prep = conn.prepareStatement("INSERT INTO "+SCHEMA+".IGNORELIST VALUES(?)"); 127 prep.setString(1, commaSeparatedList); 128 prep.execute(); 129 } 130 131 private static void setIgnoreList(FullTextSettings setting, String commaSeparatedList) { 132 String [] list = StringUtils.arraySplit(commaSeparatedList, ',', true); 133 HashSet set = setting.getIgnoreList(); 134 for(int i=0; i<list.length; i++) { 135 String word = list[i]; 136 word = setting.convertWord(word); 137 if(word != null) { 138 set.add(list[i]); 139 } 140 } 141 } 142 143 private static void removeAllTriggers(Connection conn) throws SQLException { 144 Statement stat = conn.createStatement(); 145 ResultSet rs = stat.executeQuery("SELECT * FROM INFORMATION_SCHEMA.TRIGGERS"); 146 Statement stat2 = conn.createStatement(); 147 while(rs.next()) { 148 String schema = rs.getString("TRIGGER_SCHEMA"); 149 String name = rs.getString("TRIGGER_NAME"); 150 if(name.startsWith(TRIGGER_PREFIX)) { 151 name = StringUtils.quoteIdentifier(schema) + "." + StringUtils.quoteIdentifier(name); 152 stat2.execute("DROP TRIGGER " + name); 153 } 154 } 155 } 156 157 161 public static void dropAll(Connection conn) throws SQLException { 162 init(conn); 163 Statement stat = conn.createStatement(); 164 stat.execute("DROP SCHEMA IF EXISTS " + SCHEMA); 165 removeAllTriggers(conn); 166 FullTextSettings setting = FullTextSettings.getInstance(conn); 167 setting.getIgnoreList().clear(); 168 setting.getWordList().clear(); 169 } 170 171 189 public static void init(Connection conn) throws SQLException { 190 Statement stat = conn.createStatement(); 191 stat.execute("CREATE SCHEMA IF NOT EXISTS " + SCHEMA); 192 stat.execute("CREATE TABLE IF NOT EXISTS "+SCHEMA+".INDEXES(ID INT AUTO_INCREMENT PRIMARY KEY, SCHEMA VARCHAR, TABLE VARCHAR, COLUMNS VARCHAR, UNIQUE(SCHEMA, TABLE))"); 193 stat.execute("CREATE MEMORY TABLE IF NOT EXISTS "+SCHEMA+".WORDS(ID INT AUTO_INCREMENT PRIMARY KEY, NAME VARCHAR, UNIQUE(NAME))"); 194 stat.execute("CREATE TABLE IF NOT EXISTS "+SCHEMA+".ROWS(ID IDENTITY, HASH INT, INDEXID INT, KEY VARCHAR, UNIQUE(HASH, INDEXID, KEY))"); 195 196 199 stat.execute("CREATE TABLE IF NOT EXISTS "+SCHEMA+".MAP(ROWID INT, WORDID INT, PRIMARY KEY(WORDID, ROWID))"); 201 202 stat.execute("CREATE TABLE IF NOT EXISTS "+SCHEMA+".IGNORELIST(LIST VARCHAR)"); 203 stat.execute("CREATE ALIAS IF NOT EXISTS FT_CREATE_INDEX FOR \"" + FullText.class.getName()+".createIndex\""); 204 stat.execute("CREATE ALIAS IF NOT EXISTS FT_SEARCH FOR \"" + FullText.class.getName()+".search\""); 205 stat.execute("CREATE ALIAS IF NOT EXISTS FT_REINDEX FOR \"" + FullText.class.getName()+".reindex\""); 206 stat.execute("CREATE ALIAS IF NOT EXISTS FT_DROP_ALL FOR \"" + FullText.class.getName()+".dropAll\""); 207 FullTextSettings setting = FullTextSettings.getInstance(conn); 208 ResultSet rs = stat.executeQuery("SELECT * FROM " + SCHEMA+".IGNORELIST"); 209 while(rs.next()) { 210 String commaSeparatedList = rs.getString(1); 211 setIgnoreList(setting, commaSeparatedList); 212 } 213 rs = stat.executeQuery("SELECT * FROM " + SCHEMA+".WORDS"); 214 HashMap map = setting.getWordList(); 215 while(rs.next()) { 216 String word = rs.getString("NAME"); 217 long id = rs.getLong("ID"); 218 word = setting.convertWord(word); 219 if(word != null) { 220 map.put(word, new Long (id)); 221 } 222 } 223 } 224 225 228 public void init(Connection conn, String schemaName, String triggerName, String tableName) throws SQLException { 229 init(conn); 230 FullTextSettings setting = FullTextSettings.getInstance(conn); 231 ArrayList keyList = new ArrayList (); 232 DatabaseMetaData meta = conn.getMetaData(); 233 ResultSet rs = meta.getColumns(null, schemaName, tableName, null); 234 ArrayList columnList = new ArrayList (); 235 while(rs.next()) { 236 columnList.add(rs.getString("COLUMN_NAME")); 237 } 238 dataTypes = new int[columnList.size()]; 239 index = new IndexInfo(); 240 index.schemaName = schemaName; 241 index.tableName = tableName; 242 index.columnNames = new String [columnList.size()]; 243 columnList.toArray(index.columnNames); 244 rs = meta.getColumns(null, schemaName, tableName, null); 245 for(int i=0; rs.next(); i++) { 246 dataTypes[i] = rs.getInt("DATA_TYPE"); 247 } 248 if(keyList.size() == 0) { 249 rs = meta.getPrimaryKeys(null, schemaName, tableName); 250 while(rs.next()) { 251 keyList.add(rs.getString("COLUMN_NAME")); 252 } 253 } 254 if(keyList.size() == 0) { 255 throw new SQLException ("No primary key for table " + tableName); 256 } 257 ArrayList indexList = new ArrayList (); 258 PreparedStatement prep = conn.prepareStatement( 259 "SELECT ID, COLUMNS FROM "+SCHEMA+".INDEXES WHERE SCHEMA=? AND TABLE=?"); 260 prep.setString(1, schemaName); 261 prep.setString(2, tableName); 262 rs = prep.executeQuery(); 263 if(rs.next()) { 264 index.id = rs.getInt(1); 265 String columns = rs.getString(2); 266 if(columns != null) { 267 String [] list = StringUtils.arraySplit(columns, ',', true); 268 for(int i=0; i<list.length; i++) { 269 indexList.add(list[i]); 270 } 271 } 272 } 273 if(indexList.size() == 0) { 274 indexList.addAll(columnList); 275 } 276 index.keys = new int[keyList.size()]; 277 setColumns(index.keys, keyList, columnList); 278 index.indexColumns = new int[indexList.size()]; 279 setColumns(index.indexColumns, indexList, columnList); 280 setting.addIndexInfo(index); 281 prepInsertWord = conn.prepareStatement("INSERT INTO "+SCHEMA+".WORDS(NAME) VALUES(?)"); 282 prepInsertRow = conn.prepareStatement("INSERT INTO "+SCHEMA+".ROWS(HASH, INDEXID, KEY) VALUES(?, ?, ?)"); 283 prepInsertMap = conn.prepareStatement("INSERT INTO "+SCHEMA+".MAP(ROWID, WORDID) VALUES(?, ?)"); 284 prepDeleteRow = conn.prepareStatement("DELETE FROM "+SCHEMA+".ROWS WHERE HASH=? AND INDEXID=? AND KEY=?"); 285 prepDeleteMap = conn.prepareStatement("DELETE FROM "+SCHEMA+".MAP WHERE ROWID=? AND WORDID=?"); 286 prepSelectRow = conn.prepareStatement("SELECT ID FROM "+SCHEMA+".ROWS WHERE HASH=? AND INDEXID=? AND KEY=?"); 287 288 PreparedStatement prepSelectMapByWordId = conn.prepareStatement("SELECT ROWID FROM "+SCHEMA+".MAP WHERE WORDID=?"); 289 PreparedStatement prepSelectRowById = conn.prepareStatement("SELECT KEY, INDEXID FROM "+SCHEMA+".ROWS WHERE ID=?"); 290 setting.setPrepSelectMapByWordId(prepSelectMapByWordId); 291 setting.setPrepSelectRowById(prepSelectRowById); 292 } 293 294 private void setColumns(int[] index, ArrayList keys, ArrayList columns) throws SQLException { 295 for(int i=0; i<keys.size(); i++) { 296 String key = (String ) keys.get(i); 297 int found = -1; 298 for(int j=0; found == -1 && j<columns.size(); j++) { 299 String column = (String )columns.get(j); 300 if(column.equals(key)) { 301 found = j; 302 } 303 } 304 if(found < 0) { 305 throw new SQLException ("FULLTEXT", "Column not found: " + key); 306 } 307 index[i] = found; 308 } 309 } 310 311 314 public void fire(Connection conn, Object [] oldRow, Object [] newRow) throws SQLException { 315 FullTextSettings setting = FullTextSettings.getInstance(conn); 316 if(oldRow != null) { 317 delete(setting, oldRow); 318 } 319 if(newRow != null) { 320 insert(setting, newRow); 321 } 322 } 323 324 private String getKey(Object [] row) throws SQLException { 325 StringBuffer buff = new StringBuffer (); 326 for(int i=0; i<index.keys.length; i++) { 327 if(i>0) { 328 buff.append(" AND "); 329 } 330 int columnIndex = index.keys[i]; 331 buff.append(StringUtils.quoteIdentifier(index.columnNames[columnIndex])); 332 Object o = row[columnIndex]; 333 if(o==null) { 334 buff.append(" IS NULL"); 335 } else { 336 buff.append("="); 337 buff.append(quoteSQL(o, dataTypes[columnIndex])); 338 } 339 } 340 String key = buff.toString(); 341 return key; 342 } 343 344 private String quoteString(String data) { 345 if(data.indexOf('\'') < 0) { 346 return "'" + data + "'"; 347 } 348 StringBuffer buff = new StringBuffer (data.length()+2); 349 buff.append('\''); 350 for(int i=0; i < data.length(); i++) { 351 char ch = data.charAt(i); 352 if(ch == '\'') { 353 buff.append(ch); 354 } 355 buff.append(ch); 356 } 357 buff.append('\''); 358 return buff.toString(); 359 } 360 361 private String quoteBinary(byte[] data) { 362 return "'" + ByteUtils.convertBytesToString(data) + "'"; 363 } 364 365 private String asString(Object data, int type) throws SQLException { 366 if(data == null) { 367 return "NULL"; 368 } 369 switch(type) { 370 case Types.BIT: 371 case DataType.TYPE_BOOLEAN: 372 case Types.INTEGER: 373 case Types.BIGINT: 374 case Types.DECIMAL: 375 case Types.DOUBLE: 376 case Types.FLOAT: 377 case Types.NUMERIC: 378 case Types.REAL: 379 case Types.SMALLINT: 380 case Types.TINYINT: 381 case Types.DATE: 382 case Types.TIME: 383 case Types.TIMESTAMP: 384 case Types.LONGVARCHAR: 385 case Types.CHAR: 386 case Types.VARCHAR: 387 return data.toString(); 388 case Types.VARBINARY: 389 case Types.LONGVARBINARY: 390 case Types.BINARY: 391 case Types.JAVA_OBJECT: 392 case Types.CLOB: 393 case Types.OTHER: 394 case Types.BLOB: 395 case Types.STRUCT: 396 case Types.REF: 397 case Types.NULL: 398 case Types.ARRAY: 399 case DataType.TYPE_DATALINK: 400 case Types.DISTINCT: 401 throw new SQLException ("FULLTEXT", "Unsupported column data type: " + type); 402 } 403 return ""; 404 } 405 406 private String quoteSQL(Object data, int type) throws SQLException { 407 if(data == null) { 408 return "NULL"; 409 } 410 switch(type) { 411 case Types.BIT: 412 case DataType.TYPE_BOOLEAN: 413 case Types.INTEGER: 414 case Types.BIGINT: 415 case Types.DECIMAL: 416 case Types.DOUBLE: 417 case Types.FLOAT: 418 case Types.NUMERIC: 419 case Types.REAL: 420 case Types.SMALLINT: 421 case Types.TINYINT: 422 return data.toString(); 423 case Types.DATE: 424 case Types.TIME: 425 case Types.TIMESTAMP: 426 case Types.LONGVARCHAR: 427 case Types.CHAR: 428 case Types.VARCHAR: 429 return quoteString(data.toString()); 430 case Types.VARBINARY: 431 case Types.LONGVARBINARY: 432 case Types.BINARY: 433 return quoteBinary((byte[])data); 434 case Types.JAVA_OBJECT: 435 case Types.CLOB: 436 case Types.OTHER: 437 case Types.BLOB: 438 case Types.STRUCT: 439 case Types.REF: 440 case Types.NULL: 441 case Types.ARRAY: 442 case DataType.TYPE_DATALINK: 443 case Types.DISTINCT: 444 throw new SQLException ("FULLTEXT", "Unsupported key data type: " + type); 445 } 446 return ""; 447 } 448 449 private static void addWords(FullTextSettings setting, HashSet set, String text) { 450 StringTokenizer tokenizer = new StringTokenizer (text, " \t\n\r\f+\"*%&/()=?'!,.;:-_#@|^~`{}[]"); 451 while(tokenizer.hasMoreTokens()) { 452 String word = tokenizer.nextToken(); 453 word = setting.convertWord(word); 454 if(word != null) { 455 set.add(word); 456 } 457 } 458 } 459 460 private int[] getWordIds(FullTextSettings setting, Object [] row) throws SQLException { 461 HashSet words = new HashSet (); 462 for(int i=0; i<index.indexColumns.length; i++) { 463 int idx = index.indexColumns[i]; 464 String data = asString(row[idx], dataTypes[idx]); 465 addWords(setting, words, data); 466 } 467 HashMap allWords = setting.getWordList(); 468 int[] wordIds = new int[words.size()]; 469 Iterator it = words.iterator(); 470 for(int i=0; it.hasNext(); i++) { 471 String word = (String ) it.next(); 472 Integer wId = (Integer ) allWords.get(word); 473 int wordId; 474 if(wId == null) { 475 prepInsertWord.setString(1, word); 476 prepInsertWord.execute(); 477 ResultSet rs = JdbcUtils.getGeneratedKeys(prepInsertWord); 478 rs.next(); 479 wordId = rs.getInt(1); 480 allWords.put(word, new Integer (wordId)); 481 } else { 482 wordId = wId.intValue(); 483 } 484 wordIds[i] = wordId; 485 } 486 Arrays.sort(wordIds); 487 return wordIds; 488 } 489 490 private void insert(FullTextSettings setting, Object [] row) throws SQLException { 491 String key = getKey(row); 492 int hash = key.hashCode(); 493 prepInsertRow.setInt(1, hash); 494 prepInsertRow.setLong(2, index.id); 495 prepInsertRow.setString(3, key); 496 prepInsertRow.execute(); 497 ResultSet rs = JdbcUtils.getGeneratedKeys(prepInsertRow); 498 rs.next(); 499 long rowId = rs.getLong(1); 500 prepInsertMap.setLong(1, rowId); 501 int[] wordIds = getWordIds(setting, row); 502 for(int i=0; i<wordIds.length; i++) { 503 prepInsertMap.setInt(2, wordIds[i]); 504 prepInsertMap.execute(); 505 } 506 } 507 508 private void delete(FullTextSettings setting, Object [] row) throws SQLException { 509 String key = getKey(row); 510 int hash = key.hashCode(); 511 prepSelectRow.setInt(1, hash); 512 prepSelectRow.setLong(2, index.id); 513 prepSelectRow.setString(3, key); 514 ResultSet rs = prepSelectRow.executeQuery(); 515 if(rs.next()) { 516 long rowId = rs.getLong(1); 517 prepDeleteMap.setLong(1, rowId); 518 int[] wordIds = getWordIds(setting, row); 519 for(int i=0; i<wordIds.length; i++) { 520 prepDeleteMap.setInt(2, wordIds[i]); 521 prepDeleteMap.executeUpdate(); 522 } 523 prepDeleteRow.setInt(1, hash); 524 prepDeleteRow.setLong(2, index.id); 525 prepDeleteRow.setString(3, key); 526 prepDeleteRow.executeUpdate(); 527 } 528 } 529 530 539 public static ResultSet search(Connection conn, String text, int limit, int offset) throws SQLException { 540 SimpleResultSet result = new SimpleResultSet(); 541 result.addColumn(FIELD_QUERY, Types.VARCHAR, 0, 0); 542 if(text == null) { 543 return result; 545 } 546 FullTextSettings setting = FullTextSettings.getInstance(conn); 547 HashSet words = new HashSet (); 548 addWords(setting, words, text); 549 HashSet rIds = null, lastRowIds = null; 550 HashMap allWords = setting.getWordList(); 551 PreparedStatement prepSelectMapByWordId = setting.getPrepSelectMapByWordId(); 552 for(Iterator it = words.iterator(); it.hasNext(); ) { 553 lastRowIds = rIds; 554 rIds = new HashSet (); 555 String word = (String ) it.next(); 556 Integer wId = (Integer ) allWords.get(word); 557 if(wId == null) { 558 continue; 559 } 560 prepSelectMapByWordId.setInt(1, wId.intValue()); 561 ResultSet rs = prepSelectMapByWordId.executeQuery(); 562 while(rs.next()) { 563 Long rId = new Long (rs.getLong(1)); 564 if(lastRowIds == null || lastRowIds.contains(rId)) { 565 rIds.add(rId); 566 } 567 } 568 } 569 if(rIds == null || rIds.size() == 0) { 570 return result; 571 } 572 PreparedStatement prepSelectRowById = setting.getPrepSelectRowById(); 573 int rowCount = 0; 574 for(Iterator it = rIds.iterator(); it.hasNext(); ) { 575 long rowId = ((Long )it.next()).longValue(); 576 prepSelectRowById.setLong(1, rowId); 577 ResultSet rs = prepSelectRowById.executeQuery(); 578 if(!rs.next()) { 579 continue; 580 } 581 if(offset > 0) { 582 offset--; 583 } else { 584 String key = rs.getString(1); 585 long indexId = rs.getLong(2); 586 IndexInfo index = setting.getIndexInfo(indexId); 587 StringBuffer buff = new StringBuffer (); 588 buff.append(StringUtils.quoteIdentifier(index.schemaName)); 589 buff.append('.'); 590 buff.append(StringUtils.quoteIdentifier(index.tableName)); 591 buff.append(" WHERE "); 592 buff.append(key); 593 String query = buff.toString(); 594 result.addRow(new String []{query}); 595 rowCount++; 596 if(limit > 0 && rowCount >= limit) { 597 break; 598 } 599 } 600 } 601 return result; 602 } 603 604 } 605 | Popular Tags |