1 19 20 33 package org.htmlparser.tests; 34 35 import java.io.BufferedReader ; 36 import java.io.ByteArrayInputStream ; 37 import java.io.File ; 38 import java.io.FileReader ; 39 import java.io.IOException ; 40 import java.io.UnsupportedEncodingException ; 41 42 import org.w3c.dom.Document ; 43 import org.w3c.dom.NamedNodeMap ; 44 import org.w3c.dom.Node ; 45 import org.w3c.dom.NodeList ; 46 import org.w3c.tidy.Tidy; 47 import org.xml.sax.SAXException ; 48 49 68 public class BenchmarkTidy 69 { 70 71 protected static String utfEncodingName; 72 73 76 public BenchmarkTidy(String data) 77 { 78 try 79 { 80 Document doc = (Document ) getDOM(data); 81 parseNodes(doc, "img", false, "src"); 82 } 83 catch (SAXException e) 84 { 85 e.printStackTrace(); 86 } 87 } 88 89 protected void parseNodes( 90 Document html, 91 String htmlTag, 92 boolean type, 93 String srcTag) 94 { 95 96 NodeList nodeList = html.getElementsByTagName(htmlTag); 97 boolean uniqueBinary; 98 99 for (int i = 0; i < nodeList.getLength(); i++) 100 { 101 uniqueBinary = true; 102 Node tempNode = nodeList.item(i); 103 104 NamedNodeMap nnm = tempNode.getAttributes(); 106 Node namedItem = null; 107 108 if (type) 109 { 110 namedItem = nnm.getNamedItem("type"); 112 if (namedItem == null) 113 { 114 break; 115 } 116 String inputType = namedItem.getNodeValue(); 117 118 if (inputType != null && inputType.equalsIgnoreCase("image")) 119 { 120 } 122 else 123 { 124 break; 125 } 126 } 127 namedItem = nnm.getNamedItem(srcTag); 128 System.out.println("Image Tag: " + htmlTag + " SRC=" + namedItem); 129 } 130 } 131 132 protected static Tidy getParser() 133 { 134 Tidy tidy = new Tidy(); 135 tidy.setCharEncoding(org.w3c.tidy.Configuration.UTF8); 136 tidy.setQuiet(true); 137 tidy.setShowWarnings(false); 138 139 return tidy; 140 } 141 142 protected static Node getDOM(String text) throws SAXException 143 { 144 145 try 146 { 147 Node node = 148 getParser().parseDOM( 149 new ByteArrayInputStream ( 150 text.getBytes(getUTFEncodingName())), 151 null); 152 153 return node; 154 } 155 catch (UnsupportedEncodingException e) 156 { 157 158 throw new RuntimeException ("UTF-8 encoding failed - " + e); 159 } 160 } 161 162 protected static String getUTFEncodingName() 163 { 164 if (utfEncodingName == null) 165 { 166 String versionNum = System.getProperty("java.version"); 167 if (versionNum.startsWith("1.1")) 168 { 169 utfEncodingName = "UTF8"; 170 } 171 else 172 { 173 utfEncodingName = "UTF-8"; 174 } 175 } 176 return utfEncodingName; 177 } 178 179 public static void main(String [] args) 180 { 181 if (args != null && args.length > 0) 182 { 183 try 184 { 185 File input = new File (args[0]); 186 187 StringBuffer buff = new StringBuffer (); 188 BufferedReader reader = 189 new BufferedReader (new FileReader (input)); 190 String line = null; 191 while ((line = reader.readLine()) != null) 192 { 193 buff.append(line); 194 } 195 long start = System.currentTimeMillis(); 196 BenchmarkTidy test = new BenchmarkTidy(buff.toString()); 197 System.out.println( 198 "Elapsed time ms: " + (System.currentTimeMillis() - start)); 199 } 200 catch (IOException e) 201 { 202 e.printStackTrace(); 203 } 204 } 205 else 206 { 207 System.out.println("Please provide a filename"); 208 } 209 } 210 } 211 | Popular Tags |