1 16 package web.rss; 17 18 import java.io.BufferedReader ; 19 import java.io.BufferedWriter ; 20 import java.io.ByteArrayInputStream ; 21 import java.io.File ; 22 import java.io.FileNotFoundException ; 23 import java.io.FileReader ; 24 import java.io.FileWriter ; 25 import java.io.FilenameFilter ; 26 import java.io.IOException ; 27 import java.io.InputStream ; 28 import java.io.Reader ; 29 import java.io.StringReader ; 30 import java.text.MessageFormat ; 31 import java.text.ParseException ; 32 import java.util.Enumeration ; 33 import java.util.Hashtable ; 34 import java.util.Properties ; 35 36 import javax.xml.parsers.DocumentBuilder ; 37 import javax.xml.parsers.DocumentBuilderFactory ; 38 39 import org.apache.commons.digester.Digester; 40 import org.apache.commons.httpclient.HttpClient; 41 import org.apache.commons.httpclient.HttpStatus; 42 import org.apache.commons.httpclient.methods.GetMethod; 43 import org.apache.commons.logging.Log; 44 import org.apache.commons.logging.LogFactory; 45 import org.w3c.dom.Document ; 46 import org.w3c.dom.Element ; 47 48 54 public abstract class RssHunter { 55 56 static Log log; 57 58 static Hashtable hunters; 59 static{ 60 log = LogFactory.getLog(RssHunter.class); 61 hunters = new Hashtable (); 62 InputStream in = RssHunter.class.getResourceAsStream("rss.properties"); 63 if(in!=null){ 64 Properties props = new Properties (); 65 try{ 66 props.load(in); 67 }catch(IOException e){ 68 log.error("load res.properties failed.", e); 69 }finally{ 70 if(in!=null){ 71 try{ 72 in.close(); 73 }catch(Exception e){} 74 } 75 } 76 Enumeration keys = props.keys(); 77 while(keys.hasMoreElements()){ 78 String key = (String )keys.nextElement(); 79 try{ 80 RssHunter hunter = (RssHunter)Class.forName(props.getProperty(key)).newInstance(); 81 hunters.put(key.toLowerCase(), hunter); 82 }catch(Exception e){ 83 log.error("initialize RssHunter failure.",e); 84 } 85 } 86 } 87 } 88 89 protected RssHunter(){ 90 } 91 92 97 protected static RssHunter getHunter(String protocol){ 98 return (RssHunter)hunters.get(protocol.toLowerCase()); 99 } 100 101 107 public static Channel parse(String url) throws Exception { 108 String [] result = getContent(url); 109 String encoding = getEncoding(result[0].substring(0,50)); 110 try{ 111 RssHunter hunter = getHunter(result[1]); 112 return hunter.parse(new StringReader (result[0].trim())); 113 }catch(Exception e){ 114 System.out.println("in parse mode="+result[1]+",url="+url); 115 throw e; 116 } 117 } 118 119 125 protected static String getEncoding(String xml) throws ParseException { 126 MessageFormat mf = new MessageFormat ("{1}encoding=\"{0}\"{2}"); 127 try{ 128 return (String )(mf.parse(xml)[0]); 129 }catch(Exception e){ 130 return "UTF-8"; 131 } 132 } 133 134 140 protected static String [] getContent(String url) throws Exception 141 { 142 StringBuffer content = new StringBuffer (); 143 StringBuffer mode = new StringBuffer (); 144 long lastReload = load(url, content, mode); 145 if(needReload(lastReload) || content.length()==0 || (mode!=null && mode.length()==0)) 146 { 147 HttpClient client = new HttpClient(); 148 GetMethod get = new GetMethod(url); 149 get.addRequestHeader("user-agent","DLOG4J(http://www.javayou.com) RssHunter 1.0"); 150 try{ 151 client.executeMethod(get); 152 String charset = get.getResponseCharSet(); 153 if(get.getStatusCode() == HttpStatus.SC_OK){ 154 String ct = get.getResponseBodyAsString().trim(); 155 String encoding = getEncoding(ct.substring(0,50)); 156 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 157 DocumentBuilder db = dbf.newDocumentBuilder(); 158 Document doc = db.parse(new ByteArrayInputStream (ct.getBytes(charset))); 159 String sMode = null; 160 Element elem = doc.getDocumentElement(); 161 if("feed".equals(elem.getNodeName())) 162 sMode = "atom"; 163 else 164 if("rss".equals(elem.getNodeName())) 165 sMode = "rss"; 166 else 167 if("rdf:RDF".equals(elem.getNodeName())) 168 sMode = "rdf"; 169 else 170 throw new IllegalArgumentException (url); 171 172 mode = new StringBuffer (sMode); 173 174 ct = new String (ct.getBytes(charset),encoding); 175 save(url, ct, sMode); 176 content = new StringBuffer (ct); 177 } 178 }catch(Exception e){ 179 log.error("fetch content from " + url +" failed.", e); 180 }finally{ 181 get.releaseConnection(); 182 } 183 } 184 return new String []{content.toString().trim(),mode.toString()}; 185 } 186 193 protected static long load(String url, StringBuffer content, StringBuffer mode) throws IOException { 194 String path = getCachePath(); 195 BufferedReader reader = null; 196 long lastModified = 0L; 197 try{ 198 File f = new File (path); 199 if(f.exists()){ 200 final String pattern = Math.abs(url.hashCode()) + "."; 201 File [] fs = f.listFiles(new FilenameFilter (){ 202 public boolean accept(File dir, String name) { 203 return name.startsWith(pattern); 204 }}); 205 if(fs.length>0){ 206 mode.append(fs[0].getName().substring(pattern.length())); 207 lastModified = fs[0].lastModified(); 208 reader = new BufferedReader (new FileReader (fs[0])); 209 String lineSep = System.getProperty("line.separator"); 210 StringBuffer tmpContent = new StringBuffer (); 211 do{ 212 String line = reader.readLine(); 213 if(line==null) 214 break; 215 tmpContent.append(line); 216 tmpContent.append(lineSep); 217 }while(true); 218 content.append(tmpContent.toString().trim()); 219 } 220 } 221 }catch(FileNotFoundException e){ 222 }finally{ 223 if(reader!=null) 224 reader.close(); 225 } 226 return lastModified; 227 } 228 234 protected static void save(String url, String content, String mode) throws IOException { 235 StringBuffer path = new StringBuffer (getCachePath()); 236 path.append(Math.abs(url.hashCode())); 237 path.append('.'); 238 path.append(mode); 239 BufferedWriter writer = null; 240 try{ 241 File f = new File (path.toString()); 242 if(!f.getParentFile().exists()) 243 f.getParentFile().mkdirs(); 244 writer = new BufferedWriter (new FileWriter (f)); 245 writer.write(content); 246 }finally{ 247 if(writer!=null) 248 writer.close(); 249 } 250 } 251 255 protected static String getCachePath(){ 256 String tmpDir = System.getProperty("java.io.tmpdir"); 257 if(!tmpDir.endsWith(File.separator)) 258 tmpDir += File.separator; 259 tmpDir += "dlog4j_cache"; 260 tmpDir += File.separator; 261 return tmpDir; 262 } 263 264 270 protected static boolean needReload(long lastReload){ 271 long currentTime = System.currentTimeMillis(); 272 return (currentTime - lastReload) > 3600000; 273 } 274 275 281 protected abstract Channel parse(Reader content) throws Exception ; 282 283 288 protected Digester getDigester(){ 289 Digester digester = new Digester(); 290 digester.push(new Channel()); 291 digester.setNamespaceAware(true); 292 digester.setValidating(false); 293 return digester; 294 } 295 296 301 protected Digester getDigester(Channel channel){ 302 Digester digester = new Digester(); 303 digester.push(channel); 304 digester.setNamespaceAware(true); 305 digester.setValidating(false); 306 return digester; 307 } 308 309 public static void main(String [] args) throws Exception { 310 Channel site = parse(args[0]); 311 System.out.println("site.title:"+site.getTitle()); 312 System.out.println("site.link:"+site.getLink()); 313 System.out.println("site.description:"+site.getDescription()); 314 System.out.println("============ ITEMS ============"); 315 for(int i=0;i<site.getItems().size();i++){ 316 Item log = (Item)site.getItems().get(i); 317 System.out.println("log.title:"+log.getTitle()); 318 System.out.println("log.link:"+log.getLink()); 319 System.out.println("log.description:"+log.getDescription()); 320 System.out.println("-----------------------------------"); 321 } 322 } 323 324 } 325 | Popular Tags |