KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lucene > search > QueryTermVector


1 package org.apache.lucene.search;
2
3 /**
4  * Copyright 2004 The Apache Software Foundation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18
19 import org.apache.lucene.analysis.Analyzer;
20 import org.apache.lucene.analysis.Token;
21 import org.apache.lucene.analysis.TokenStream;
22 import org.apache.lucene.index.TermFreqVector;
23
24 import java.io.IOException JavaDoc;
25 import java.io.StringReader JavaDoc;
26 import java.util.*;
27
28 /**
29  *
30  *
31  **/

32 public class QueryTermVector implements TermFreqVector {
33   private String JavaDoc [] terms = new String JavaDoc[0];
34   private int [] termFreqs = new int[0];
35
36   public String JavaDoc getField() { return null; }
37
38   /**
39    *
40    * @param queryTerms The original list of terms from the query, can contain duplicates
41    */

42   public QueryTermVector(String JavaDoc [] queryTerms) {
43
44     processTerms(queryTerms);
45   }
46
47   public QueryTermVector(String JavaDoc queryString, Analyzer analyzer) {
48     if (analyzer != null)
49     {
50       TokenStream stream = analyzer.tokenStream("", new StringReader JavaDoc(queryString));
51       if (stream != null)
52       {
53         Token next = null;
54         List terms = new ArrayList();
55         try {
56           while ((next = stream.next()) != null)
57           {
58             terms.add(next.termText());
59           }
60           processTerms((String JavaDoc[])terms.toArray(new String JavaDoc[terms.size()]));
61         } catch (IOException JavaDoc e) {
62         }
63       }
64     }
65   }
66   
67   private void processTerms(String JavaDoc[] queryTerms) {
68     if (queryTerms != null) {
69       Arrays.sort(queryTerms);
70       Map tmpSet = new HashMap(queryTerms.length);
71       //filter out duplicates
72
List tmpList = new ArrayList(queryTerms.length);
73       List tmpFreqs = new ArrayList(queryTerms.length);
74       int j = 0;
75       for (int i = 0; i < queryTerms.length; i++) {
76         String JavaDoc term = queryTerms[i];
77         Integer JavaDoc position = (Integer JavaDoc)tmpSet.get(term);
78         if (position == null) {
79           tmpSet.put(term, new Integer JavaDoc(j++));
80           tmpList.add(term);
81           tmpFreqs.add(new Integer JavaDoc(1));
82         }
83         else {
84           Integer JavaDoc integer = (Integer JavaDoc)tmpFreqs.get(position.intValue());
85           tmpFreqs.set(position.intValue(), new Integer JavaDoc(integer.intValue() + 1));
86         }
87       }
88       terms = (String JavaDoc[])tmpList.toArray(terms);
89       //termFreqs = (int[])tmpFreqs.toArray(termFreqs);
90
termFreqs = new int[tmpFreqs.size()];
91       int i = 0;
92       for (Iterator iter = tmpFreqs.iterator(); iter.hasNext();) {
93         Integer JavaDoc integer = (Integer JavaDoc) iter.next();
94         termFreqs[i++] = integer.intValue();
95       }
96     }
97   }
98   
99   public final String JavaDoc toString() {
100         StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
101         sb.append('{');
102         for (int i=0; i<terms.length; i++) {
103             if (i>0) sb.append(", ");
104             sb.append(terms[i]).append('/').append(termFreqs[i]);
105         }
106         sb.append('}');
107         return sb.toString();
108     }
109   
110
111   public int size() {
112     return terms.length;
113   }
114
115   public String JavaDoc[] getTerms() {
116     return terms;
117   }
118
119   public int[] getTermFrequencies() {
120     return termFreqs;
121   }
122
123   public int indexOf(String JavaDoc term) {
124     int res = Arrays.binarySearch(terms, term);
125         return res >= 0 ? res : -1;
126   }
127
128   public int[] indexesOf(String JavaDoc[] terms, int start, int len) {
129     int res[] = new int[len];
130
131     for (int i=0; i < len; i++) {
132         res[i] = indexOf(terms[i]);
133     }
134     return res;
135   }
136
137 }
138
Popular Tags