1 28 29 package com.caucho.xml; 30 31 34 public class XmlChar { 35 static boolean isAsciiNameChar[]; 36 37 private XmlChar() {} 38 39 public static boolean isWhitespace(int ch) 40 { 41 return ch <= 0x20 && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd); 42 } 43 44 public static boolean isChar(int ch) 45 { 46 return (ch >= 0x20 && ch <= 0xd7ff || 47 ch == 0x9 || 48 ch == 0xa || 49 ch == 0xd || 50 ch >= 0xe000 && ch <= 0xfff0); 51 } 52 53 public static boolean isNameStart(int ch) 54 { 55 return (ch >= 0x41 && ch <= 0x5a || 56 ch >= 0x61 && ch <= 0x7a || 57 ch == '_' || ch == ':' || 58 ch > 0x7f && (isBaseChar(ch) || 59 isIdeographic(ch))); 60 } 61 62 65 public static boolean []getAsciiNameCharArray() 66 { 67 return isAsciiNameChar; 68 } 69 70 73 public static boolean isNameChar(int ch) 74 { 75 if (ch < 0x20) 76 return false; 77 else if (ch < 128) 78 return isAsciiNameChar[ch]; 79 else 80 return (isBaseChar(ch) || 81 isIdeographic(ch) || 82 isCombiningChar(ch) || 83 isExtender(ch) || 84 isDigit(ch)); 85 } 86 87 private static boolean isBaseChar(int ch) 88 { 89 return (ch <= 0xff && (ch >= 0x0041 && ch <= 0x005A || 90 ch >= 0x0061 && ch <= 0x007A || 91 ch >= 0x00C0 && ch <= 0x00D6 || 92 ch >= 0x00D8 && ch <= 0x00F6 || 93 ch >= 0x00F8 && ch <= 0x00FF) || 94 ch <= 0x1f5 && (ch >= 0x0100 && ch <= 0x0131 || 95 ch >= 0x0134 && ch <= 0x013E || 96 ch >= 0x0141 && ch <= 0x0148 || 97 ch >= 0x014A && ch <= 0x017E || 98 ch >= 0x0180 && ch <= 0x01C3 || 99 ch >= 0x01CD && ch <= 0x01F0 || 100 ch >= 0x01F4 && ch <= 0x01F5) || 101 ch <= 0x2ff && (ch >= 0x01FA && ch <= 0x0217 || 102 ch >= 0x0250 && ch <= 0x02A8 || 103 ch >= 0x02BB && ch <= 0x02C1) || 104 ch <= 0x3ff && (ch == 0x0386 || 105 ch >= 0x0388 && ch <= 0x038A || 106 ch == 0x038C || 107 ch >= 0x038E && ch <= 0x03A1 || 108 ch >= 0x03A3 && ch <= 0x03CE || 109 ch >= 0x03D0 && ch <= 0x03D6 || 110 ch == 0x03DA || 111 ch == 0x03DC || 112 ch == 0x03DE || 113 ch == 0x03E0 || 114 ch >= 0x03E2 && ch <= 0x03F3) || 115 ch <= 0x4ff && (ch >= 0x0401 && ch <= 0x040C || 116 ch >= 0x040E && ch <= 0x044F || 117 ch >= 0x0451 && ch <= 0x045C || 118 ch >= 0x045E && ch <= 0x0481 || 119 ch >= 0x0490 && ch <= 0x04C4 || 120 ch >= 0x04C7 && ch <= 0x04C8 || 121 ch >= 0x04CB && ch <= 0x04CC || 122 ch >= 0x04D0 && ch <= 0x04EB || 123 ch >= 0x04EE && ch <= 0x04F5 || 124 ch >= 0x04F8 && ch <= 0x04F9) || 125 ch <= 0x5ff && (ch >= 0x0531 && ch <= 0x0556 || 126 ch == 0x0559 || 127 ch >= 0x0561 && ch <= 0x0586 || 128 ch >= 0x05D0 && ch <= 0x05EA || 129 ch >= 0x05F0 && ch <= 0x05F2) || 130 ch <= 0x6ff && (ch >= 0x0621 && ch <= 0x063A || 131 ch >= 0x0641 && ch <= 0x064A || 132 ch >= 0x0671 && ch <= 0x06B7 || 133 ch >= 0x06BA && ch <= 0x06BE || 134 ch >= 0x06C0 && ch <= 0x06CE || 135 ch >= 0x06D0 && ch <= 0x06D3 || 136 ch == 0x06D5 || 137 ch >= 0x06E5 && ch <= 0x06E6) || 138 ch <= 0x9ff && (ch >= 0x0905 && ch <= 0x0939 || 139 ch == 0x093D || 140 ch >= 0x0958 && ch <= 0x0961 || 141 ch >= 0x0985 && ch <= 0x098C || 142 ch >= 0x098F && ch <= 0x0990 || 143 ch >= 0x0993 && ch <= 0x09A8 || 144 ch >= 0x09AA && ch <= 0x09B0 || 145 ch == 0x09B2 || 146 ch >= 0x09B6 && ch <= 0x09B9 || 147 ch >= 0x09DC && ch <= 0x09DD || 148 ch >= 0x09DF && ch <= 0x09E1 || 149 ch >= 0x09F0 && ch <= 0x09F1) || 150 ch <= 0xaff && (ch >= 0x0A05 && ch <= 0x0A0A || 151 ch >= 0x0A0F && ch <= 0x0A10 || 152 ch >= 0x0A13 && ch <= 0x0A28 || 153 ch >= 0x0A2A && ch <= 0x0A30 || 154 ch >= 0x0A32 && ch <= 0x0A33 || 155 ch >= 0x0A35 && ch <= 0x0A36 || 156 ch >= 0x0A38 && ch <= 0x0A39 || 157 ch >= 0x0A59 && ch <= 0x0A5C || 158 ch == 0x0A5E || 159 ch >= 0x0A72 && ch <= 0x0A74 || 160 ch >= 0x0A85 && ch <= 0x0A8B || 161 ch == 0x0A8D || 162 ch >= 0x0A8F && ch <= 0x0A91 || 163 ch >= 0x0A93 && ch <= 0x0AA8 || 164 ch >= 0x0AAA && ch <= 0x0AB0 || 165 ch >= 0x0AB2 && ch <= 0x0AB3 || 166 ch >= 0x0AB5 && ch <= 0x0AB9 || 167 ch == 0x0ABD || 168 ch == 0x0AE0) || 169 ch <= 0xbff && (ch >= 0x0B05 && ch <= 0x0B0C || 170 ch >= 0x0B0F && ch <= 0x0B10 || 171 ch >= 0x0B13 && ch <= 0x0B28 || 172 ch >= 0x0B2A && ch <= 0x0B30 || 173 ch >= 0x0B32 && ch <= 0x0B33 || 174 ch >= 0x0B36 && ch <= 0x0B39 || 175 ch == 0x0B3D || 176 ch >= 0x0B5C && ch <= 0x0B5D || 177 ch >= 0x0B5F && ch <= 0x0B61 || 178 ch >= 0x0B85 && ch <= 0x0B8A || 179 ch >= 0x0B8E && ch <= 0x0B90 || 180 ch >= 0x0B92 && ch <= 0x0B95 || 181 ch >= 0x0B99 && ch <= 0x0B9A || 182 ch == 0x0B9C || 183 ch >= 0x0B9E && ch <= 0x0B9F || 184 ch >= 0x0BA3 && ch <= 0x0BA4 || 185 ch >= 0x0BA8 && ch <= 0x0BAA || 186 ch >= 0x0BAE && ch <= 0x0BB5 || 187 ch >= 0x0BB7 && ch <= 0x0BB9) || 188 ch <= 0xcff && (ch >= 0x0C05 && ch <= 0x0C0C || 189 ch >= 0x0C0E && ch <= 0x0C10 || 190 ch >= 0x0C12 && ch <= 0x0C28 || 191 ch >= 0x0C2A && ch <= 0x0C33 || 192 ch >= 0x0C35 && ch <= 0x0C39 || 193 ch >= 0x0C60 && ch <= 0x0C61 || 194 ch >= 0x0C85 && ch <= 0x0C8C || 195 ch >= 0x0C8E && ch <= 0x0C90 || 196 ch >= 0x0C92 && ch <= 0x0CA8 || 197 ch >= 0x0CAA && ch <= 0x0CB3 || 198 ch >= 0x0CB5 && ch <= 0x0CB9 || 199 ch == 0x0CDE || 200 ch >= 0x0CE0 && ch <= 0x0CE1) || 201 ch <= 0xdff && (ch >= 0x0D05 && ch <= 0x0D0C || 202 ch >= 0x0D0E && ch <= 0x0D10 || 203 ch >= 0x0D12 && ch <= 0x0D28 || 204 ch >= 0x0D2A && ch <= 0x0D39 || 205 ch >= 0x0D60 && ch <= 0x0D61) || 206 ch <= 0xfff && (ch >= 0x0E01 && ch <= 0x0E2E || 207 ch == 0x0E30 || 208 ch >= 0x0E32 && ch <= 0x0E33 || 209 ch >= 0x0E40 && ch <= 0x0E45 || 210 ch >= 0x0E81 && ch <= 0x0E82 || 211 ch == 0x0E84 || 212 ch >= 0x0E87 && ch <= 0x0E88 || 213 ch == 0x0E8A || 214 ch == 0x0E8D || 215 ch >= 0x0E94 && ch <= 0x0E97 || 216 ch >= 0x0E99 && ch <= 0x0E9F || 217 ch >= 0x0EA1 && ch <= 0x0EA3 || 218 ch == 0x0EA5 || 219 ch == 0x0EA7 || 220 ch >= 0x0EAA && ch <= 0x0EAB || 221 ch >= 0x0EAD && ch <= 0x0EAE || 222 ch == 0x0EB0 || 223 ch >= 0x0EB2 && ch <= 0x0EB3 || 224 ch == 0x0EBD || 225 ch >= 0x0EC0 && ch <= 0x0EC4 || 226 ch >= 0x0F40 && ch <= 0x0F47 || 227 ch >= 0x0F49 && ch <= 0x0F69) || 228 ch <= 0x10ff && (ch >= 0x10A0 && ch <= 0x10C5 || 229 ch >= 0x10D0 && ch <= 0x10F6) || 230 ch <= 0x11ff && (ch == 0x1100 || 231 ch >= 0x1102 && ch <= 0x1103 || 232 ch >= 0x1105 && ch <= 0x1107 || 233 ch == 0x1109 || 234 ch >= 0x110B && ch <= 0x110C || 235 ch >= 0x110E && ch <= 0x1112 || 236 ch == 0x113C || 237 ch == 0x113E || 238 ch == 0x1140 || 239 ch == 0x114C || 240 ch == 0x114E || 241 ch == 0x1150 || 242 ch >= 0x1154 && ch <= 0x1155 || 243 ch == 0x1159 || 244 ch >= 0x115F && ch <= 0x1161 || 245 ch == 0x1163 || 246 ch == 0x1165 || 247 ch == 0x1167 || 248 ch == 0x1169 || 249 ch >= 0x116D && ch <= 0x116E || 250 ch >= 0x1172 && ch <= 0x1173 || 251 ch == 0x1175 || 252 ch == 0x119E || 253 ch == 0x11A8 || 254 ch == 0x11AB || 255 ch >= 0x11AE && ch <= 0x11AF || 256 ch >= 0x11B7 && ch <= 0x11B8 || 257 ch == 0x11BA || 258 ch >= 0x11BC && ch <= 0x11C2 || 259 ch == 0x11EB || 260 ch == 0x11F0 || 261 ch == 0x11F9) || 262 ch <= 0x1fff && (ch >= 0x1E00 && ch <= 0x1E9B || 263 ch >= 0x1EA0 && ch <= 0x1EF9 || 264 ch >= 0x1F00 && ch <= 0x1F15 || 265 ch >= 0x1F18 && ch <= 0x1F1D || 266 ch >= 0x1F20 && ch <= 0x1F45 || 267 ch >= 0x1F48 && ch <= 0x1F4D || 268 ch >= 0x1F50 && ch <= 0x1F57 || 269 ch == 0x1F59 || 270 ch == 0x1F5B || 271 ch == 0x1F5D || 272 ch >= 0x1F5F && ch <= 0x1F7D || 273 ch >= 0x1F80 && ch <= 0x1FB4 || 274 ch >= 0x1FB6 && ch <= 0x1FBC || 275 ch == 0x1FBE || 276 ch >= 0x1FC2 && ch <= 0x1FC4 || 277 ch >= 0x1FC6 && ch <= 0x1FCC || 278 ch >= 0x1FD0 && ch <= 0x1FD3 || 279 ch >= 0x1FD6 && ch <= 0x1FDB || 280 ch >= 0x1FE0 && ch <= 0x1FEC || 281 ch >= 0x1FF2 && ch <= 0x1FF4 || 282 ch >= 0x1FF6 && ch <= 0x1FFC) || 283 ch == 0x2126 || 284 ch >= 0x212A && ch <= 0x212B || 285 ch == 0x212E || 286 ch >= 0x2180 && ch <= 0x2182 || 287 ch >= 0x3041 && ch <= 0x3094 || 288 ch >= 0x30A1 && ch <= 0x30FA || 289 ch >= 0x3105 && ch <= 0x312C || 290 ch >= 0xAC00 && ch <= 0xD7A3); 291 } 292 293 private static boolean isIdeographic(int ch) 294 { 295 return (ch >= 0x4e00 && ch <= 0x9fa5 || ch == 0x3007 || 296 ch >= 0x3021 && ch <= 0x3029); 297 } 298 299 private static boolean isCombiningChar(int ch) 300 { 301 if (ch < 0x300) 302 return false; 303 304 return (ch <= 0x6ff && (ch >= 0x0300 && ch <= 0x0345 || 305 ch >= 0x0360 && ch <= 0x0361 || 306 ch >= 0x0483 && ch <= 0x0486 || 307 ch >= 0x0591 && ch <= 0x05A1 || 308 ch >= 0x05A3 && ch <= 0x05B9 || 309 ch >= 0x05BB && ch <= 0x05BD || 310 ch == 0x05BF || 311 ch >= 0x05C1 && ch <= 0x05C2 || 312 ch == 0x05C4 || 313 ch >= 0x064B && ch <= 0x0652 || 314 ch == 0x0670 || 315 ch >= 0x06D6 && ch <= 0x06DC || 316 ch >= 0x06DD && ch <= 0x06DF || 317 ch >= 0x06E0 && ch <= 0x06E4 || 318 ch >= 0x06E7 && ch <= 0x06E8 || 319 ch >= 0x06EA && ch <= 0x06ED) || 320 ch <= 0x9ff && (ch >= 0x0901 && ch <= 0x0903 || 321 ch == 0x093C || 322 ch >= 0x093E && ch <= 0x094C || 323 ch == 0x094D || 324 ch >= 0x0951 && ch <= 0x0954 || 325 ch >= 0x0962 && ch <= 0x0963 || 326 ch >= 0x0981 && ch <= 0x0983 || 327 ch == 0x09BC || 328 ch == 0x09BE || 329 ch == 0x09BF || 330 ch >= 0x09C0 && ch <= 0x09C4 || 331 ch >= 0x09C7 && ch <= 0x09C8 || 332 ch >= 0x09CB && ch <= 0x09CD || 333 ch == 0x09D7 || 334 ch >= 0x09E2 && ch <= 0x09E3) || 335 ch <= 0xaff && (ch == 0x0A02 || 336 ch == 0x0A3C || 337 ch == 0x0A3E || 338 ch == 0x0A3F || 339 ch >= 0x0A40 && ch <= 0x0A42 || 340 ch >= 0x0A47 && ch <= 0x0A48 || 341 ch >= 0x0A4B && ch <= 0x0A4D || 342 ch >= 0x0A70 && ch <= 0x0A71 || 343 ch >= 0x0A81 && ch <= 0x0A83 || 344 ch == 0x0ABC || 345 ch >= 0x0ABE && ch <= 0x0AC5 || 346 ch >= 0x0AC7 && ch <= 0x0AC9 || 347 ch >= 0x0ACB && ch <= 0x0ACD) || 348 ch <= 0xbff && (ch >= 0x0B01 && ch <= 0x0B03 || 349 ch == 0x0B3C || 350 ch >= 0x0B3E && ch <= 0x0B43 || 351 ch >= 0x0B47 && ch <= 0x0B48 || 352 ch >= 0x0B4B && ch <= 0x0B4D || 353 ch >= 0x0B56 && ch <= 0x0B57 || 354 ch >= 0x0B82 && ch <= 0x0B83 || 355 ch >= 0x0BBE && ch <= 0x0BC2 || 356 ch >= 0x0BC6 && ch <= 0x0BC8 || 357 ch >= 0x0BCA && ch <= 0x0BCD || 358 ch == 0x0BD7) || 359 ch <= 0xc00 && (ch >= 0x0C01 && ch <= 0x0C03 || 360 ch >= 0x0C3E && ch <= 0x0C44 || 361 ch >= 0x0C46 && ch <= 0x0C48 || 362 ch >= 0x0C4A && ch <= 0x0C4D || 363 ch >= 0x0C55 && ch <= 0x0C56 || 364 ch >= 0x0C82 && ch <= 0x0C83 || 365 ch >= 0x0CBE && ch <= 0x0CC4 || 366 ch >= 0x0CC6 && ch <= 0x0CC8 || 367 ch >= 0x0CCA && ch <= 0x0CCD || 368 ch >= 0x0CD5 && ch <= 0x0CD6) || 369 ch <= 0xeff && (ch >= 0x0D02 && ch <= 0x0D03 || 370 ch >= 0x0D3E && ch <= 0x0D43 || 371 ch >= 0x0D46 && ch <= 0x0D48 || 372 ch >= 0x0D4A && ch <= 0x0D4D || 373 ch == 0x0D57 || 374 ch == 0x0E31 || 375 ch >= 0x0E34 && ch <= 0x0E3A || 376 ch >= 0x0E47 && ch <= 0x0E4E || 377 ch == 0x0EB1 || 378 ch >= 0x0EB4 && ch <= 0x0EB9 || 379 ch >= 0x0EBB && ch <= 0x0EBC || 380 ch >= 0x0EC8 && ch <= 0x0ECD) || 381 ch <= 0xfff && (ch >= 0x0F18 && ch <= 0x0F19 || 382 ch == 0x0F35 || 383 ch == 0x0F37 || 384 ch == 0x0F39 || 385 ch == 0x0F3E || 386 ch == 0x0F3F || 387 ch >= 0x0F71 && ch <= 0x0F84 || 388 ch >= 0x0F86 && ch <= 0x0F8B || 389 ch >= 0x0F90 && ch <= 0x0F95 || 390 ch == 0x0F97 || 391 ch >= 0x0F99 && ch <= 0x0FAD || 392 ch >= 0x0FB1 && ch <= 0x0FB7 || 393 ch == 0x0FB9) || 394 ch >= 0x20D0 && ch <= 0x20DC || 395 ch == 0x20E1 || 396 ch >= 0x302A && ch <= 0x302F || 397 ch == 0x3099 || 398 ch == 0x309A); 399 } 400 401 private static boolean isDigit(int ch) 402 { 403 return (ch >= 0x0030 && ch <= 0x0039 || 404 ch >= 0x0660 && ch <= 0x0669 || 405 ch >= 0x06F0 && ch <= 0x06F9 || 406 ch >= 0x0966 && ch <= 0x096F || 407 ch >= 0x09E6 && ch <= 0x09EF || 408 ch >= 0x0A66 && ch <= 0x0A6F || 409 ch >= 0x0AE6 && ch <= 0x0AEF || 410 ch >= 0x0B66 && ch <= 0x0B6F || 411 ch >= 0x0BE7 && ch <= 0x0BEF || 412 ch >= 0x0C66 && ch <= 0x0C6F || 413 ch >= 0x0CE6 && ch <= 0x0CEF || 414 ch >= 0x0D66 && ch <= 0x0D6F || 415 ch >= 0x0E50 && ch <= 0x0E59 || 416 ch >= 0x0ED0 && ch <= 0x0ED9 || 417 ch >= 0x0F20 && ch <= 0x0F29); 418 } 419 420 private static boolean isExtender(int ch) 421 { 422 return (ch == 0x00B7 || 423 ch == 0x02D0 || 424 ch == 0x02D1 || 425 ch == 0x0387 || 426 ch == 0x0640 || 427 ch == 0x0E46 || 428 ch == 0x0EC6 || 429 ch == 0x3005 || 430 ch >= 0x3031 && ch <= 0x3035 || 431 ch >= 0x309D && ch <= 0x309E || 432 ch >= 0x30FC && ch <= 0x30FE); 433 } 434 435 static { 436 isAsciiNameChar = new boolean[128]; 437 for (int i = 0x30; i <= 0x39; i++) 438 isAsciiNameChar[i] = true; 439 for (int i = 0x41; i <= 0x5a; i++) 440 isAsciiNameChar[i] = true; 441 for (int i = 0x61; i <= 0x7a; i++) 442 isAsciiNameChar[i] = true; 443 isAsciiNameChar['_'] = true; 444 isAsciiNameChar[':'] = true; 445 isAsciiNameChar['.'] = true; 446 isAsciiNameChar['-'] = true; 447 } 448 } 449 | Popular Tags |