KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > alfresco > repo > search > impl > lucene > analysis > NumericEncoder


1 /*
2  * Copyright (C) 2005 Alfresco, Inc.
3  *
4  * Licensed under the Mozilla Public License version 1.1
5  * with a permitted attribution clause. You may obtain a
6  * copy of the License at
7  *
8  * http://www.alfresco.org/legal/license.txt
9  *
10  * Unless required by applicable law or agreed to in writing,
11  * software distributed under the License is distributed on an
12  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13  * either express or implied. See the License for the specific
14  * language governing permissions and limitations under the
15  * License.
16  */

17 package org.alfresco.repo.search.impl.lucene.analysis;
18
19 /**
20  * Support to encode numeric types in the lucene index.
21  *
22  * To support range queries in the lucene index numeric types need to be indexed
23  * specially. This has been addressed for int and long types for lucene and
24  * limited support (via scaling) for float and double.
25  *
26  * The implementation splits an int, long, float or double into the sign bit,
27  * optional exponent and mantissa either from the int or long format or its IEEE
28  * 754 byte representation.
29  *
30  * To index content so small negative numbers are indexed correctly and are
31  * after big negative numbers in range queries.
32  *
33  * The algorithm finds the sign, if the number is negative, then the mantissa
34  * and exponent are XORed against the appropriate masks. This reverses the
35  * order. As negative numbers appear first in the list their sign bit is 0 and
36  * positive numbers are 1.
37  *
38  * @author Andy Hind
39  */

40 public class NumericEncoder
41 {
42     /*
43      * Constants for integer encoding
44      */

45
46     static int INTEGER_SIGN_MASK = 0x80000000;
47
48     /*
49      * Constants for long encoding
50      */

51
52     static long LONG_SIGN_MASK = 0x8000000000000000L;
53
54     /*
55      * Constants for float encoding
56      */

57
58     static int FLOAT_SIGN_MASK = 0x80000000;
59
60     static int FLOAT_EXPONENT_MASK = 0x7F800000;
61
62     static int FLOAT_MANTISSA_MASK = 0x007FFFFF;
63
64     /*
65      * Constants for double encoding
66      */

67
68     static long DOUBLE_SIGN_MASK = 0x8000000000000000L;
69
70     static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L;
71
72     static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL;
73
74     private NumericEncoder()
75     {
76         super();
77     }
78
79     /**
80      * Encode an integer into a string that orders correctly using string
81      * comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as
82      * ffffffff.
83      *
84      * @param intToEncode
85      * @return
86      */

87     public static String JavaDoc encode(int intToEncode)
88     {
89         int replacement = intToEncode ^ INTEGER_SIGN_MASK;
90         return encodeToHex(replacement);
91     }
92
93     /**
94      * Encode a long into a string that orders correctly using string comparison
95      * Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as
96      * ffffffffffffffff.
97      *
98      * @param longToEncode
99      * @return
100      */

101     public static String JavaDoc encode(long longToEncode)
102     {
103         long replacement = longToEncode ^ LONG_SIGN_MASK;
104         return encodeToHex(replacement);
105     }
106
107     /**
108      * Encode a float into a string that orders correctly according to string
109      * comparison. Note that there is no negative NaN but there are codings that
110      * imply this. So NaN and -Infinity may not compare as expected.
111      *
112      * @param floatToEncode
113      * @return
114      */

115     public static String JavaDoc encode(float floatToEncode)
116     {
117         int bits = Float.floatToIntBits(floatToEncode);
118         int sign = bits & FLOAT_SIGN_MASK;
119         int exponent = bits & FLOAT_EXPONENT_MASK;
120         int mantissa = bits & FLOAT_MANTISSA_MASK;
121         if (sign != 0)
122         {
123             exponent ^= FLOAT_EXPONENT_MASK;
124             mantissa ^= FLOAT_MANTISSA_MASK;
125         }
126         sign ^= FLOAT_SIGN_MASK;
127         int replacement = sign | exponent | mantissa;
128         return encodeToHex(replacement);
129     }
130
131     /**
132      * Encode a double into a string that orders correctly according to string
133      * comparison. Note that there is no negative NaN but there are codings that
134      * imply this. So NaN and -Infinity may not compare as expected.
135      *
136      * @param doubleToEncode
137      * @return
138      */

139     public static String JavaDoc encode(double doubleToEncode)
140     {
141         long bits = Double.doubleToLongBits(doubleToEncode);
142         long sign = bits & DOUBLE_SIGN_MASK;
143         long exponent = bits & DOUBLE_EXPONENT_MASK;
144         long mantissa = bits & DOUBLE_MANTISSA_MASK;
145         if (sign != 0)
146         {
147             exponent ^= DOUBLE_EXPONENT_MASK;
148             mantissa ^= DOUBLE_MANTISSA_MASK;
149         }
150         sign ^= DOUBLE_SIGN_MASK;
151         long replacement = sign | exponent | mantissa;
152         return encodeToHex(replacement);
153     }
154
155     private static String JavaDoc encodeToHex(int i)
156     {
157         char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' };
158         int charPos = 8;
159         do
160         {
161             buf[--charPos] = DIGITS[i & MASK];
162             i >>>= 4;
163         }
164         while (i != 0);
165         return new String JavaDoc(buf);
166     }
167
168     private static String JavaDoc encodeToHex(long l)
169     {
170         char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' };
171         int charPos = 16;
172         do
173         {
174             buf[--charPos] = DIGITS[(int) l & MASK];
175             l >>>= 4;
176         }
177         while (l != 0);
178         return new String JavaDoc(buf);
179     }
180
181     private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e',
182             'f' };
183
184     private static final int MASK = (1 << 4) - 1;
185 }
186
Popular Tags