1 package org.jahia.clipbuilder.html.util; 2 import org.htmlparser.util.ParserUtils; 3 import org.htmlparser.*; 4 import org.htmlparser.filters.TagNameFilter; 5 import org.htmlparser.util.NodeList; 6 import java.util.Iterator ; 7 import java.nio.charset.Charset ; 8 9 23 public abstract class HTMLUtilities { 24 private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(HTMLUtilities.class); 25 26 27 35 public static String getEncoding(String html, String defaultEncoding) throws Exception { 36 String encoding = null; 37 Parser parser = ParserUtils.createParserParsingAnInputString(html.toLowerCase()); 38 TagNameFilter metaFilter = new TagNameFilter("meta"); 39 NodeList metaList = parser.parse(metaFilter); 40 for (int i = 0; i < metaList.size(); i++) { 42 String content = ((Tag) metaList.elementAt(i)).getAttribute("content"); 43 if (content != null) { 44 content = content.toLowerCase(); 45 String foundEncoding = getEncodingFromContentValue(content); 46 if (foundEncoding != null) { 47 encoding = foundEncoding; 48 } 49 } 50 } 51 if (encoding == null) { 52 return defaultEncoding; 53 } 54 55 logger.debug(encoding); 56 return encoding; 57 } 58 59 60 66 private static String getEncodingFromContentValue(String content) { 67 String encoding = null; 68 if (content == null) { 69 return encoding; 70 } 71 content = content.toLowerCase(); 72 Iterator it = Charset.availableCharsets().values().iterator(); 73 while (it.hasNext()) { 74 String currentEncoding = it.next().toString(); 75 if (content.indexOf(currentEncoding.toLowerCase()) > 0) { 76 encoding = currentEncoding; 77 break; 78 } 79 } 80 if (encoding == null) { 81 return null; 82 } 83 return encoding.toLowerCase(); 84 } 85 86 } 87 | Popular Tags |