KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > enhydra > snapper > wrapper > lucene > Field


1 package org.enhydra.snapper.wrapper.lucene;
2
3 /**
4
5  */

6
7 import java.io.Reader JavaDoc;
8 import java.util.Date JavaDoc;
9
10
11 /**
12   A field is a section of a Document. Each field has two parts, a name and a
13   value. Values may be free text, provided as a String or as a Reader, or they
14   may be atomic keywords, which are not further processed. Such keywords may
15   be used to represent dates, urls, etc. Fields are optionally stored in the
16   index, so that they may be returned with hits on the document.
17   */

18
19 public final class Field implements java.io.Serializable JavaDoc {
20   private String JavaDoc name = "body";
21   private String JavaDoc stringValue = null;
22   private boolean storeTermVector = false;
23   private Reader JavaDoc readerValue = null;
24   private boolean isStored = false;
25   private boolean isIndexed = true;
26   private boolean isTokenized = true;
27
28   private float boost = 1.0f;
29
30   /** Sets the boost factor hits on this field. This value will be
31    * multiplied into the score of all hits on this this field of this
32    * document.
33    *
34    * <p>The boost is multiplied by {@link Document#getBoost()} of the document
35    * containing this field. If a document has multiple fields with the same
36    * name, all such values are multiplied together. This product is then
37    * multipled by the value {@link Similarity#lengthNorm(String,int)}, and
38    * rounded by {@link Similarity#encodeNorm(float)} before it is stored in the
39    * index. One should attempt to ensure that this product does not overflow
40    * the range of that encoding.
41    *
42    * @see Document#setBoost(float)
43    * @see Similarity#lengthNorm(String, int)
44    * @see Similarity#encodeNorm(float)
45    */

46   public void setBoost(float boost) {
47     this.boost = boost;
48   }
49
50   /** Returns the boost factor for hits on any field of this document.
51    *
52    * <p>The default value is 1.0.
53    *
54    * <p>Note: this value is not stored directly with the document in the index.
55    * Documents returned from {@link IndexReader#document(int)} and {@link
56    * Hits#doc(int)} may thus not have the same value present as when this field
57    * was indexed.
58    *
59    * @see #setBoost(float)
60    */

61   public float getBoost() {
62     return boost;
63   }
64
65   /** Constructs a String-valued Field that is not tokenized, but is indexed
66     and stored. Useful for non-text fields, e.g. date or url.
67    */

68   public static final Field Keyword(String JavaDoc name, String JavaDoc value) {
69     return new Field(name, value, true, true, false);
70   }
71
72   /** Constructs a String-valued Field that is not tokenized nor indexed,
73     but is stored in the index, for return with hits. */

74   public static final Field UnIndexed(String JavaDoc name, String JavaDoc value) {
75     return new Field(name, value, true, false, false);
76   }
77
78   /** Constructs a String-valued Field that is tokenized and indexed,
79     and is stored in the index, for return with hits. Useful for short text
80     fields, like "title" or "subject". Term vector will not be stored for this field. */

81   public static final Field Text(String JavaDoc name, String JavaDoc value) {
82     return Text(name, value, false);
83   }
84
85   /** Constructs a Date-valued Field that is not tokenized and is indexed,
86       and stored in the index, for return with hits. */

87   public static final Field Keyword(String JavaDoc name, Date JavaDoc value) {
88     return new Field(name, DateField.dateToString(value), true, true, false);
89   }
90
91   /** Constructs a String-valued Field that is tokenized and indexed,
92     and is stored in the index, for return with hits. Useful for short text
93     fields, like "title" or "subject". */

94   public static final Field Text(String JavaDoc name, String JavaDoc value, boolean storeTermVector) {
95     return new Field(name, value, true, true, true, storeTermVector);
96   }
97
98   /** Constructs a String-valued Field that is tokenized and indexed,
99     but that is not stored in the index. Term vector will not be stored for this field. */

100   public static final Field UnStored(String JavaDoc name, String JavaDoc value) {
101     return UnStored(name, value, false);
102   }
103
104   /** Constructs a String-valued Field that is tokenized and indexed,
105     but that is not stored in the index. */

106   public static final Field UnStored(String JavaDoc name, String JavaDoc value, boolean storeTermVector) {
107     return new Field(name, value, false, true, true, storeTermVector);
108   }
109
110   /** Constructs a Reader-valued Field that is tokenized and indexed, but is
111     not stored in the index verbatim. Useful for longer text fields, like
112     "body". Term vector will not be stored for this field. */

113   public static final Field Text(String JavaDoc name, Reader JavaDoc value) {
114     return Text(name, value, false);
115   }
116
117   /** Constructs a Reader-valued Field that is tokenized and indexed, but is
118     not stored in the index verbatim. Useful for longer text fields, like
119     "body". */

120   public static final Field Text(String JavaDoc name, Reader JavaDoc value, boolean storeTermVector) {
121     Field f = new Field(name, value);
122     f.storeTermVector = storeTermVector;
123     return f;
124   }
125
126   /** The name of the field (e.g., "date", "subject", "title", or "body")
127     as an interned string. */

128   public String JavaDoc name() { return name; }
129
130   /** The value of the field as a String, or null. If null, the Reader value
131     is used. Exactly one of stringValue() and readerValue() must be set. */

132   public String JavaDoc stringValue() { return stringValue; }
133   /** The value of the field as a Reader, or null. If null, the String value
134     is used. Exactly one of stringValue() and readerValue() must be set. */

135   public Reader JavaDoc readerValue() { return readerValue; }
136
137
138   /** Create a field by specifying all parameters except for <code>storeTermVector</code>,
139    * which is set to <code>false</code>.
140    */

141   public Field(String JavaDoc name, String JavaDoc string,
142            boolean store, boolean index, boolean token) {
143     this(name, string, store, index, token, false);
144   }
145
146   /**
147    *
148    * @param name The name of the field
149    * @param string The string to process
150    * @param store true if the field should store the string
151    * @param index true if the field should be indexed
152    * @param token true if the field should be tokenized
153    * @param storeTermVector true if we should store the Term Vector info
154    */

155   public Field(String JavaDoc name, String JavaDoc string,
156            boolean store, boolean index, boolean token, boolean storeTermVector) {
157     if (name == null)
158       throw new IllegalArgumentException JavaDoc("name cannot be null");
159     if (string == null)
160       throw new IllegalArgumentException JavaDoc("value cannot be null");
161     if (!index && storeTermVector)
162       throw new IllegalArgumentException JavaDoc("cannot store a term vector for fields that are not indexed.");
163
164     this.name = name.intern(); // field names are interned
165
this.stringValue = string;
166     this.isStored = store;
167     this.isIndexed = index;
168     this.isTokenized = token;
169     this.storeTermVector = storeTermVector;
170   }
171
172   Field(String JavaDoc name, Reader JavaDoc reader) {
173     if (name == null)
174       throw new IllegalArgumentException JavaDoc("name cannot be null");
175     if (reader == null)
176       throw new IllegalArgumentException JavaDoc("value cannot be null");
177
178     this.name = name.intern(); // field names are interned
179
this.readerValue = reader;
180   }
181
182   /** True iff the value of the field is to be stored in the index for return
183     with search hits. It is an error for this to be true if a field is
184     Reader-valued. */

185   public final boolean isStored() { return isStored; }
186
187   /** True iff the value of the field is to be indexed, so that it may be
188     searched on. */

189   public final boolean isIndexed() { return isIndexed; }
190
191   /** True iff the value of the field should be tokenized as text prior to
192     indexing. Un-tokenized fields are indexed as a single word and may not be
193     Reader-valued. */

194   public final boolean isTokenized() { return isTokenized; }
195
196   /** True iff the term or terms used to index this field are stored as a term
197    * vector, available from {@link IndexReader#getTermFreqVector(int,String)}.
198    * These methods do not provide access to the original content of the field,
199    * only to terms used to index it. If the original content must be
200    * preserved, use the <code>stored</code> attribute instead.
201    *
202    * @see IndexReader#getTermFreqVector(int, String)
203    */

204   public final boolean isTermVectorStored() { return storeTermVector; }
205
206   /** Prints a Field for human consumption. */
207   public final String JavaDoc toString() {
208     if (isStored && isIndexed && !isTokenized)
209       return "Keyword<" + name + ":" + stringValue + ">";
210     else if (isStored && !isIndexed && !isTokenized)
211       return "Unindexed<" + name + ":" + stringValue + ">";
212     else if (isStored && isIndexed && isTokenized && stringValue!=null)
213       return "Text<" + name + ":" + stringValue + ">";
214     else if (!isStored && isIndexed && isTokenized && readerValue!=null)
215       return "Text<" + name + ":" + readerValue + ">";
216     else if (!isStored && isIndexed && isTokenized)
217     {
218       return "UnStored<" + name + ">";
219     }
220     else
221     {
222       return super.toString();
223     }
224   }
225
226 }
227
228
Popular Tags