1 /** 2 ******************************************************************************* 3 * Copyright (C) 2005, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package com.ibm.icu.text; 8 9 /** 10 * Abstract class for recognizing a single charset. 11 * Part of the implementation of ICU's CharsetDetector. 12 * 13 * Each specific charset that can be recognized will have an instance 14 * of some subclass of this class. All interaction between the overall 15 * CharsetDetector and the stuff specific to an individual charset happens 16 * via the interface provided here. 17 * 18 * Instances of CharsetDetector DO NOT have or maintain 19 * state pertaining to a specific match or detect operation. 20 * The WILL be shared by multiple instances of CharsetDetector. 21 * They encapsulate const charset-specific information. 22 * 23 * @internal 24 */ 25 abstract class CharsetRecognizer { 26 /** 27 * Get the IANA name of this charset. 28 * @return the charset name. 29 */ 30 abstract String getName(); 31 32 /** 33 * Get the ISO language code for this charset. 34 * @return the language code, or <code>null</code> if the language cannot be determined. 35 */ 36 public String getLanguage() 37 { 38 return null; 39 } 40 41 /** 42 * Test the match of this charset with the input text data 43 * which is obtained via the CharsetDetector object. 44 * 45 * @param det The CharsetDetector, which contains the input text 46 * to be checked for being in this charset. 47 * @return Two values packed into one int (Damn java, anyhow) 48 * <br/> 49 * bits 0-7: the match confidence, ranging from 0-100 50 * <br/> 51 * bits 8-15: The match reason, an enum-like value. 52 */ 53 abstract int match(CharsetDetector det); 54 55 } 56