1 7 package com.ibm.icu.text; 8 9 19 abstract class CharsetRecog_2022 extends CharsetRecognizer { 20 21 22 34 int match(byte [] text, int textLen, byte [][] escapeSequences) { 35 int i, j; 36 int escN; 37 int hits = 0; 38 int misses = 0; 39 int shifts = 0; 40 int quality; 41 scanInput: 42 for (i=0; i<textLen; i++) { 43 if (text[i] == 0x1b) { 44 checkEscapes: 45 for (escN=0; escN<escapeSequences.length; escN++) { 46 byte [] seq = escapeSequences[escN]; 47 48 for (j=1; j<seq.length; j++) { 49 if (seq[j] != text[i+j]) { 50 continue checkEscapes; 51 } 52 } 53 54 hits++; 55 i += seq.length-1; 56 continue scanInput; 57 } 58 59 misses++; 60 } 61 62 if (text[i] == 0x0e || text[i] == 0x0f) { 63 shifts++; 65 } 66 } 67 68 if (hits == 0) { 69 return 0; 70 } 71 72 quality = (100*hits - 100*misses) / (hits + misses); 79 80 if (hits+shifts < 5) { 84 quality -= (5-(hits+shifts))*10; 85 } 86 87 if (quality < 0) { 88 quality = 0; 89 } 90 return quality; 91 } 92 93 94 95 96 static class CharsetRecog_2022JP extends CharsetRecog_2022 { 97 private byte [] [] escapeSequences = { 98 {0x1b, 0x24, 0x28, 0x43}, {0x1b, 0x24, 0x28, 0x44}, {0x1b, 0x24, 0x40}, {0x1b, 0x24, 0x41}, {0x1b, 0x24, 0x42}, {0x1b, 0x26, 0x40}, {0x1b, 0x28, 0x42}, {0x1b, 0x28, 0x48}, {0x1b, 0x28, 0x49}, {0x1b, 0x28, 0x4a}, {0x1b, 0x2e, 0x41}, {0x1b, 0x2e, 0x46} }; 111 112 String getName() { 113 return "ISO-2022-JP"; 114 } 115 116 int match(CharsetDetector det) { 117 return match(det.fInputBytes, det.fInputLen, escapeSequences); 118 } 119 } 120 121 static class CharsetRecog_2022KR extends CharsetRecog_2022 { 122 private byte [] [] escapeSequences = { 123 {0x1b, 0x24, 0x29, 0x43} 124 }; 125 126 String getName() { 127 return "ISO-2022-KR"; 128 } 129 130 int match(CharsetDetector det) { 131 return match(det.fInputBytes, det.fInputLen, escapeSequences); 132 } 133 134 } 135 136 static class CharsetRecog_2022CN extends CharsetRecog_2022 { 137 private byte [] [] escapeSequences = { 138 {0x1b, 0x24, 0x29, 0x41}, {0x1b, 0x24, 0x29, 0x47}, {0x1b, 0x24, 0x2A, 0x48}, {0x1b, 0x24, 0x29, 0x45}, {0x1b, 0x24, 0x2B, 0x49}, {0x1b, 0x24, 0x2B, 0x4A}, {0x1b, 0x24, 0x2B, 0x4B}, {0x1b, 0x24, 0x2B, 0x4C}, {0x1b, 0x24, 0x2B, 0x4D}, {0x1b, 0x4e}, {0x1b, 0x4f}, }; 150 151 String getName() { 152 return "ISO-2022-CN"; 153 } 154 155 156 int match(CharsetDetector det) { 157 return match(det.fInputBytes, det.fInputLen, escapeSequences); 158 } 159 } 160 161 } 162 163 | Popular Tags |