1 package net.sf.saxon.codenorm; 2 3 import net.sf.saxon.om.XMLChar; 4 5 18 19 public class Normalizer { 20 21 24 public Normalizer(byte form) { 25 this.form = form; 26 if (data == null) { 27 data = UnicodeDataParser.build(); } 29 } 30 31 34 static final byte 35 COMPATIBILITY_MASK = 1, 36 COMPOSITION_MASK = 2; 37 38 41 public static final byte 42 D = 0 , 43 C = COMPOSITION_MASK, 44 KD = COMPATIBILITY_MASK, 45 KC = (byte)(COMPATIBILITY_MASK + COMPOSITION_MASK); 46 47 53 public StringBuffer normalize(String source, StringBuffer target) { 54 55 58 if (source.length() != 0) { 59 internalDecompose(source, target); 60 if ((form & COMPOSITION_MASK) != 0) { 61 internalCompose(target); 62 } 63 } 64 return target; 65 } 66 67 72 public String normalize(String source) { 73 return normalize(source, new StringBuffer (source.length()+8)).toString(); 74 } 75 76 80 83 private byte form; 84 85 95 private void internalDecompose(String source, StringBuffer target) { 96 StringBuffer buffer = new StringBuffer (8); 97 boolean canonical = (form & COMPATIBILITY_MASK) == 0; 98 int ch32; 99 for (int i = 0; i < source.length();) { 101 buffer.setLength(0); 102 ch32 = source.charAt(i++); 104 if (XMLChar.isHighSurrogate(ch32)) { 105 char low = source.charAt(i++); 106 ch32 = XMLChar.supplemental((char)ch32, low); 107 } 108 data.getRecursiveDecomposition(canonical, ch32, buffer); 109 110 114 int ch; 115 for (int j = 0; j < buffer.length();) { 117 ch = buffer.charAt(j++); 119 if (XMLChar.isHighSurrogate(ch32)) { 120 char low = buffer.charAt(j++); 121 ch = XMLChar.supplemental((char)ch, low); 122 } 123 int chClass = data.getCanonicalClass(ch); 124 int k = target.length(); if (chClass != 0) { 126 127 129 int ch2; 130 while (k > 0) { 131 ch2 = target.charAt(k-1); 132 if (XMLChar.isSurrogate(ch2)) { 133 k--; 134 char high = buffer.charAt(k-1); 135 ch2 = XMLChar.supplemental(high, (char)ch2); 136 } 137 if (data.getCanonicalClass(ch2) <= chClass) break; 138 k--; 139 } 140 } 145 if (ch < 65536) { 146 target.insert(k, (char)ch); 147 } else { 148 String s = "" + XMLChar.highSurrogate(ch) + XMLChar.lowSurrogate(ch); 149 target.insert(k, s); 150 } 151 } 153 } 154 } 155 156 162 private void internalCompose(StringBuffer target) { 163 164 int starterPos = 0; 165 int starterCh = target.charAt(0); 168 int compPos = 1; 169 if (XMLChar.isHighSurrogate(starterCh)) { 170 starterCh = XMLChar.supplemental((char)starterCh, target.charAt(1)); 171 compPos++; 172 } 173 int lastClass = data.getCanonicalClass(starterCh); 174 if (lastClass != 0) lastClass = 256; int oldLen = target.length(); 176 177 179 int ch; 180 for (int decompPos = compPos; decompPos < target.length();) { 182 ch = target.charAt(decompPos++); 183 if (XMLChar.isHighSurrogate(ch)) { 184 ch = XMLChar.supplemental((char)ch, target.charAt(decompPos++)); 185 } 186 int chClass = data.getCanonicalClass(ch); 188 int composite = data.getPairwiseComposition(starterCh, ch); 189 if (composite != NormalizerData.NOT_COMPOSITE && (lastClass < chClass || lastClass == 0)) { 190 setCharAt(target, starterPos, composite); 191 starterCh = composite; 194 } else { 195 if (chClass == 0) { 196 starterPos = compPos; 197 starterCh = ch; 198 } 199 lastClass = chClass; 200 setCharAt(target, compPos, ch); 201 if (target.length() != oldLen) { decompPos += target.length() - oldLen; 203 oldLen = target.length(); 204 } 205 compPos += (ch<65536 ? 1 : 2); 206 } 207 } 208 target.setLength(compPos); 209 } 210 211 216 217 private static void setCharAt(StringBuffer target, int offset, int ch32) { 218 if (ch32 < 65536) { 219 if (XMLChar.isHighSurrogate(target.charAt(offset))) { 220 target.setCharAt(offset, (char)ch32); 221 target.deleteCharAt(offset+1); 222 } else { 223 target.setCharAt(offset, (char)ch32); 224 } 225 } else { 226 if (XMLChar.isHighSurrogate(target.charAt(offset))) { 227 target.setCharAt(offset, XMLChar.highSurrogate(ch32)); 228 target.setCharAt(offset+1, XMLChar.lowSurrogate(ch32)); 229 } else { 230 target.setCharAt(offset, XMLChar.highSurrogate(ch32)); 231 target.insert(offset+1, XMLChar.lowSurrogate(ch32)); 232 } 233 } 234 } 235 236 240 private static NormalizerData data = null; 241 242 245 boolean getExcluded (char ch) { 246 return data.getExcluded(ch); 247 } 248 249 252 String getRawDecompositionMapping (char ch) { 253 return data.getRawDecompositionMapping(ch); 254 } 255 } | Popular Tags |