1 package net.sf.saxon.charcode; 2 3 6 7 public final class UnicodeCharacterSet implements CharacterSet { 8 9 private static UnicodeCharacterSet theInstance = new UnicodeCharacterSet(); 10 11 14 15 private UnicodeCharacterSet() {} 16 17 public static UnicodeCharacterSet getInstance() { 18 return theInstance; 19 } 20 21 public boolean inCharset(int c) { 22 return true; 23 } 24 25 33 34 public static int getUTF8Encoding(char in, char in2, byte[] out) { 35 int i = (int)in; 37 if (i<=0x7f) { 38 out[0] = (byte)i; 39 return 1; 40 } else if (i<=0x7ff) { 41 out[0] = (byte)(0xc0 | ((in >> 6) & 0x1f)); 42 out[1] = (byte)(0x80 | (in & 0x3f)); 43 return 2; 44 } else if (i>=0xd800 && i<=0xdbff) { 45 int j = (int)in2; 47 if (!(j>=0xdc00 && j<=0xdfff)) { 48 throw new IllegalArgumentException ("Malformed Unicode Surrogate Pair (" + i + "," + j + ")"); 49 } 50 byte xxxxxx = (byte)(j & 0x3f); 51 byte yyyyyy = (byte)(((i & 0x03) << 4) | ((j >> 6) & 0x0f)); 52 byte zzzz = (byte)((i >> 2) & 0x0f); 53 byte uuuuu = (byte)(((i >> 6) & 0x0f) + 1); 54 out[0] = (byte)(0xf0 | ((uuuuu >> 2) & 0x07)); 55 out[1] = (byte)(0x80 | ((uuuuu & 0x03) << 4) | zzzz); 56 out[2] = (byte)(0x80 | yyyyyy); 57 out[3] = (byte)(0x80 | xxxxxx); 58 return 4; 59 } else if (i>=0xdc00 && i<=0xdfff) { 60 return 0; 62 } else { 63 out[0] = (byte)(0xe0 | ((in >> 12) & 0x0f)); 64 out[1] = (byte)(0x80 | ((in >> 6) & 0x3f)); 65 out[2] = (byte)(0x80 | (in & 0x3f)); 66 return 3; 67 } 68 } 69 70 } 71 72 | Popular Tags |