KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jahia > clipbuilder > html > util > HTMLUtilities


1 package org.jahia.clipbuilder.html.util;
2 import org.htmlparser.util.ParserUtils;
3 import org.htmlparser.*;
4 import org.htmlparser.filters.TagNameFilter;
5 import org.htmlparser.util.NodeList;
6 import java.util.Iterator JavaDoc;
7 import java.nio.charset.Charset JavaDoc;
8
9 /**
10  * <p>
11  *
12  * Title: </p> <p>
13  *
14  * Description: </p> <p>
15  *
16  * Copyright: Copyright (c) 2005</p> <p>
17  *
18  * Company: </p>
19  *
20  *@author TLILI Khaled
21  *@version 1.0
22  */

23 public abstract class HTMLUtilities {
24     private static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(HTMLUtilities.class);
25
26
27     /**
28      * Gets the Encoding attribute of the HTMLUtilities class
29      *
30      *@param html Description of Parameter
31      *@param defaultEncoding Description of Parameter
32      *@return The Encoding value
33      *@exception Exception Description of Exception
34      */

35     public static String JavaDoc getEncoding(String JavaDoc html, String JavaDoc defaultEncoding) throws Exception JavaDoc {
36         String JavaDoc encoding = null;
37         Parser parser = ParserUtils.createParserParsingAnInputString(html.toLowerCase());
38         TagNameFilter metaFilter = new TagNameFilter("meta");
39         NodeList metaList = parser.parse(metaFilter);
40         //concat all form tagfound
41
for (int i = 0; i < metaList.size(); i++) {
42             String JavaDoc content = ((Tag) metaList.elementAt(i)).getAttribute("content");
43             if (content != null) {
44                 content = content.toLowerCase();
45                 String JavaDoc foundEncoding = getEncodingFromContentValue(content);
46                 if (foundEncoding != null) {
47                     encoding = foundEncoding;
48                 }
49             }
50         }
51         if (encoding == null) {
52             return defaultEncoding;
53         }
54
55         logger.debug(encoding);
56         return encoding;
57     }
58
59
60     /**
61      * Gets the EncodingFromContentValue attribute of the HTMLUtilities class
62      *
63      *@param content Description of Parameter
64      *@return The EncodingFromContentValue value
65      */

66     private static String JavaDoc getEncodingFromContentValue(String JavaDoc content) {
67         String JavaDoc encoding = null;
68         if (content == null) {
69             return encoding;
70         }
71         content = content.toLowerCase();
72         Iterator JavaDoc it = Charset.availableCharsets().values().iterator();
73         while (it.hasNext()) {
74             String JavaDoc currentEncoding = it.next().toString();
75             if (content.indexOf(currentEncoding.toLowerCase()) > 0) {
76                 encoding = currentEncoding;
77                 break;
78             }
79         }
80         if (encoding == null) {
81             return null;
82         }
83         return encoding.toLowerCase();
84     }
85
86 }
87
Popular Tags