1 5 package com.teamkonzept.lib; 6 7 import java.util.*; 8 9 12 public class TKHtmlConverter extends TKConverter { 13 public final static String CONV_ID = "HTML"; 14 public final static String CONV_NAME = "HTML-ISO-8859_1"; 15 16 public String getName() 17 { 18 return CONV_NAME; 19 } 20 21 public final static String [] HTML_CODES = { 22 null, null, null, null, null, null, null, null, null, null, 23 null, null, null, null, null, null, null, null, null, null, 24 null, null, null, null, null, null, null, null, null, null, 25 null, null, null, null, "quot", null, null, null, "amp", null, 26 null, null, null, null, null, null, null, null, null, null, 27 null, null, null, null, null, null, null, null, null, null, 28 "lt", null, "gt", null, null, null, null, null, null, null, 29 null, null, null, null, null, null, null, null, null, null, 30 null, null, null, null, null, null, null, null, null, null, 31 null, null, null, null, null, null, null, null, null, null, 32 null, null, null, null, null, null, null, null, null, null, 33 null, null, null, null, null, null, null, null, null, null, 34 null, null, null, null, null, null, null, null, null, null, 35 null, null, null, null, null, null, null, null, null, null, 36 null, null, null, null, null, null, null, null, null, null, 37 null, null, null, null, null, null, null, null, null, null, 38 "nbsp", null, null, null, null, null, null, null, null, null, 39 null, null, null, "shy", null, null, null, null, null, null, 40 null, null, null, null, null, null, null, null, null, null, 41 null, null, "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", 42 "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", 43 "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", null, "Oslash", "Ugrave", "Uacute", "Ucirc", 44 "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", 45 "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", 46 "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", null, "oslash", "ugrave", 47 "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml" 48 }; 49 50 public final static byte AMPERCENT = (byte) '&'; 51 public final static byte HASH = (byte) '#'; 52 public final static byte SEMICOLON = (byte) ';'; 53 public static Hashtable NAME_HASH = null; 54 55 public int getMaxBytesPerChar() 56 { 57 return 8; 58 } 59 60 public int minCharSize(int byteCount) 61 { 62 return byteCount; 63 } 64 65 public synchronized Hashtable getNameHash() 66 { 67 if( NAME_HASH != null ) return NAME_HASH; 68 NAME_HASH = new Hashtable(38); 69 String html; 70 for( int i=0; i<=255; i++ ) { 71 if( (html = HTML_CODES[i]) != null ) NAME_HASH.put( html, new Integer (i) ); 72 } 73 return NAME_HASH; 74 } 75 76 public int charsToBytes(char src[], byte dst[], int srcBegin, int length, int dstBegin) 77 { 78 int lastPos = srcBegin+length; 79 int firstPos = dstBegin; 80 for( int i=srcBegin; i<lastPos; i++ ) { 81 char c = src[i]; 82 byte b = (byte) c; 83 int code = (int) c; 84 String subst = HTML_CODES[ code ]; 85 if( subst == null ) { 86 if( c <='\u0080' ) { 88 dst[ dstBegin++ ] = b; 89 } 90 else if( c < '\u0100' ) { 91 dst[ dstBegin++ ] = AMPERCENT; 92 dst[ dstBegin++ ] = HASH; 93 dst[ dstBegin++ ] = (byte) Character.forDigit( ( code / 100 ), 10 ); 94 dst[ dstBegin++ ] = (byte) Character.forDigit( ( code % 100 / 10 ), 10 ); 95 dst[ dstBegin++ ] = (byte) Character.forDigit( ( code % 10 ), 10 ); 96 dst[ dstBegin++ ] = SEMICOLON; 97 } 98 } 99 else { 100 dst[ dstBegin++ ] = AMPERCENT; 101 int len = subst.length(); 102 subst.getBytes( 0, len, dst, dstBegin ); 103 dstBegin += len; 104 dst[ dstBegin++ ] = SEMICOLON; 105 } 106 } 107 108 return dstBegin - firstPos; 109 } 110 111 public int bytesToChars(byte src[], char dst[], int srcBegin, int length, int dstBegin) 112 { 113 int lastPos = srcBegin+length; 114 int firstPos = dstBegin; 115 int i=srcBegin; 116 Hashtable nameHash = getNameHash(); 117 118 while( i<lastPos ) { 119 byte b = src[i++]; 120 if( b == AMPERCENT ) { 121 int startPos = i; 122 while( src[i] != SEMICOLON ) i++; 123 if( src[i] == HASH ) { 124 String decStr = new String ( src, 0, startPos+1, i-startPos-1 ); 125 dst[dstBegin++] = (char) Integer.parseInt( decStr ); 126 } 127 else { 128 String code = new String ( src, 0, startPos, i-startPos ); 129 dst[dstBegin++] = 130 (char) ((Integer )nameHash.get(code)).intValue(); 131 } 132 i++; 133 } 134 else { 135 dst[dstBegin++] = (char) b; 136 } 137 } 138 return dstBegin - firstPos; 139 } 140 141 } 142 143 | Popular Tags |