KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > sf > saxon > codenorm > NormalizerData


1 package net.sf.saxon.codenorm;
2
3 import net.sf.saxon.om.XMLChar;
4
5 import java.util.BitSet JavaDoc;
6
7 /**
8  * Accesses the Normalization Data used for Forms C and D.<br>
9  * Copyright © 1998-1999 Unicode, Inc. All Rights Reserved.<br>
10  * The Unicode Consortium makes no expressed or implied warranty of any
11  * kind, and assumes no liability for errors or omissions.
12  * No liability is assumed for incidental and consequential damages
13  * in connection with or arising out of the use of the information here.
14  * @author Mark Davis
15  */

16 public class NormalizerData {
17     static final String JavaDoc copyright = "Copyright © 1998-1999 Unicode, Inc.";
18
19     /**
20     * Constant for use in getPairwiseComposition
21     */

22     public static final int NOT_COMPOSITE = '\uFFFF';
23
24     /**
25     * Gets the combining class of a character from the
26     * Unicode Character Database.
27     * @param ch the source character
28     * @return value from 0 to 255
29     */

30     public int getCanonicalClass(int ch) {
31         return canonicalClass.get(ch);
32     }
33
34     /**
35     * Returns the composite of the two characters. If the two
36     * characters don't combine, returns NOT_COMPOSITE.
37     * Only has to worry about BMP characters, since those are the only ones that can ever compose.
38     * @param first first character (e.g. 'c')
39     * @param first second character (e.g. '¸' cedilla)
40     * @return composite (e.g. 'ç')
41     */

42     public char getPairwiseComposition(int first, int second) {
43         if (first < 0 || first > 0x10FFFF || second < 0 || second > 0x10FFFF) return NOT_COMPOSITE;
44         return (char)compose.get((first << 16) | second);
45     }
46
47     /**
48     * Gets recursive decomposition of a character from the
49     * Unicode Character Database.
50     * @param canonical If true
51     * bit is on in this byte, then selects the recursive
52     * canonical decomposition, otherwise selects
53     * the recursive compatibility and canonical decomposition.
54     * @param ch the source character
55     * @param buffer buffer to be filled with the decomposition
56     */

57     public void getRecursiveDecomposition(boolean canonical, int ch, StringBuffer JavaDoc buffer) {
58         String JavaDoc decomp = decompose.get(ch);
59         if (decomp != null && !(canonical && isCompatibility.get(ch))) {
60             for (int i = 0; i < decomp.length(); ++i) {
61                 getRecursiveDecomposition(canonical, decomp.charAt(i), buffer);
62             }
63         } else { // if no decomp, append
64
//UTF16.append(buffer, ch);
65
if (ch<65536) {
66                 buffer.append((char)ch);
67             } else { // output a surrogate pair
68
buffer.append(XMLChar.highSurrogate(ch));
69                 buffer.append(XMLChar.lowSurrogate(ch));
70             }
71         }
72     }
73
74     // =================================================
75
// PRIVATES
76
// =================================================
77

78     /**
79      * Only accessed by NormalizerBuilder.
80      */

81     NormalizerData(IntHashtable canonicalClass, IntStringHashtable decompose,
82       IntHashtable compose, BitSet JavaDoc isCompatibility, BitSet JavaDoc isExcluded) {
83         this.canonicalClass = canonicalClass;
84         this.decompose = decompose;
85         this.compose = compose;
86         this.isCompatibility = isCompatibility;
87         this.isExcluded = isExcluded;
88     }
89
90     /**
91     * Just accessible for testing.
92     */

93     boolean getExcluded (char ch) {
94         return isExcluded.get(ch);
95     }
96
97     /**
98     * Just accessible for testing.
99     */

100     String JavaDoc getRawDecompositionMapping (char ch) {
101         return decompose.get(ch);
102     }
103
104     /**
105     * For now, just use IntHashtable
106     * Two-stage tables would be used in an optimized implementation.
107     */

108     private IntHashtable canonicalClass;
109
110     /**
111     * The main data table maps chars to a 32-bit int.
112     * It holds either a pair: top = first, bottom = second
113     * or singleton: top = 0, bottom = single.
114     * If there is no decomposition, the value is 0.
115     * Two-stage tables would be used in an optimized implementation.
116     * An optimization could also map chars to a small index, then use that
117     * index in a small array of ints.
118     */

119     private IntStringHashtable decompose;
120
121     /**
122     * Maps from pairs of characters to single.
123     * If there is no decomposition, the value is NOT_COMPOSITE.
124     */

125     private IntHashtable compose;
126
127     /**
128     * Tells whether decomposition is canonical or not.
129     */

130     private BitSet JavaDoc isCompatibility = new BitSet JavaDoc();
131
132     /**
133     * Tells whether character is script-excluded or not.
134     * Used only while building, and for testing.
135     */

136
137     private BitSet JavaDoc isExcluded = new BitSet JavaDoc();
138 }
Popular Tags