1 18 19 package org.apache.batik.util.io; 20 21 import java.io.IOException ; 22 import java.io.InputStream ; 23 24 31 public class UTF8Decoder extends AbstractCharDecoder { 32 33 37 protected final static byte[] UTF8_BYTES = { 38 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 39 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 40 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 41 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 42 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 43 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 44 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 45 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,0,0,0,0,0,0,0,0, 46 }; 47 48 51 protected int nextChar = -1; 52 53 56 public UTF8Decoder(InputStream is) { 57 super(is); 58 } 59 60 64 public int readChar() throws IOException { 65 if (nextChar != -1) { 66 int result = nextChar; 67 nextChar = -1; 68 return result; 69 } 70 if (position == count) { 71 fillBuffer(); 72 } 73 if (count == -1) { 74 return END_OF_STREAM; 75 } 76 int b1 = buffer[position++] & 0xff; 77 switch (UTF8_BYTES[b1]) { 78 default: 79 charError("UTF-8"); 80 81 case 1: 82 return b1; 83 84 case 2: 85 if (position == count) { 86 fillBuffer(); 87 } 88 if (count == -1) { 89 endOfStreamError("UTF-8"); 90 } 91 return ((b1 & 0x1f) << 6) | (buffer[position++] & 0x3f); 92 93 case 3: 94 if (position == count) { 95 fillBuffer(); 96 } 97 if (count == -1) { 98 endOfStreamError("UTF-8"); 99 } 100 int b2 = buffer[position++]; 101 if (position == count) { 102 fillBuffer(); 103 } 104 if (count == -1) { 105 endOfStreamError("UTF-8"); 106 } 107 int b3 = buffer[position++]; 108 if ((b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80) { 109 charError("UTF-8"); 110 } 111 return ((b1 & 0x1f) << 12) | ((b2 & 0x3f) << 6) | (b3 & 0x1f); 112 113 case 4: 114 if (position == count) { 115 fillBuffer(); 116 } 117 if (count == -1) { 118 endOfStreamError("UTF-8"); 119 } 120 b2 = buffer[position++]; 121 if (position == count) { 122 fillBuffer(); 123 } 124 if (count == -1) { 125 endOfStreamError("UTF-8"); 126 } 127 b3 = buffer[position++]; 128 if (position == count) { 129 fillBuffer(); 130 } 131 if (count == -1) { 132 endOfStreamError("UTF-8"); 133 } 134 int b4 = buffer[position++]; 135 if ((b2 & 0xc0) != 0x80 || 136 (b3 & 0xc0) != 0x80 || 137 (b4 & 0xc0) != 0x80) { 138 charError("UTF-8"); 139 } 140 int c = ((b1 & 0x1f) << 18) 141 | ((b2 & 0x3f) << 12) 142 | ((b3 & 0x1f) << 6) 143 | (b4 & 0x1f); 144 nextChar = (c - 0x10000) % 0x400 + 0xdc00; 145 return (c - 0x10000) / 0x400 + 0xd800; 146 } 147 } 148 } 149 | Popular Tags |