1 18 package org.apache.roller.util; 19 20 import java.io.BufferedReader ; 21 import java.io.IOException ; 22 import java.io.InputStream ; 23 import java.io.InputStreamReader ; 24 import java.io.StringReader ; 25 import java.net.MalformedURLException ; 26 import java.net.URL ; 27 import java.util.Arrays ; 28 import java.util.Iterator ; 29 import java.util.List ; 30 31 import javax.swing.text.MutableAttributeSet ; 32 import javax.swing.text.html.HTML ; 33 import javax.swing.text.html.HTMLEditorKit ; 34 import javax.swing.text.html.HTML.Tag; 35 import javax.swing.text.html.HTMLEditorKit.Parser; 36 import javax.swing.text.html.HTMLEditorKit.ParserCallback; 37 38 import org.apache.commons.logging.Log; 39 import org.apache.commons.logging.LogFactory; 40 41 import com.sun.syndication.feed.synd.SyndEntry; 42 import com.sun.syndication.feed.synd.SyndFeed; 43 import com.sun.syndication.io.FeedException; 44 import com.sun.syndication.io.SyndFeedInput; 45 46 51 public class LinkbackExtractor 52 { 53 private static Log mLogger = LogFactory.getFactory().getInstance( 54 LinkbackExtractor.class); 55 private boolean mFound = false; 56 private String mTitle = ""; 57 private String mRssLink = null; 58 private String mExcerpt = null; 59 private String mPermalink = null; 60 private int mStart = 0; 61 private int mEnd = 0; 62 private int mMaxExcerpt = 500; private String mRequestURL = null; 64 private String mRequestURLWWW = null; 65 private String mRefererURL; 66 67 74 public LinkbackExtractor(String refererURL, String requestURL) 75 throws MalformedURLException , IOException 76 { 77 try 78 { 79 extractByParsingHtml(refererURL, requestURL); 80 if (mRssLink != null) 81 { 82 extractByParsingRss(mRssLink, requestURL); 83 } 84 } 85 catch (Exception e) 86 { 87 if (mLogger.isDebugEnabled()) 88 { 89 mLogger.debug("Extracting linkback", e); 90 } 91 } 92 } 93 94 private void extractByParsingHtml(String refererURL, String requestURL) 96 throws MalformedURLException , IOException 97 { 98 URL url = new URL (refererURL); 99 InputStream is = url.openStream(); 100 101 mRefererURL = refererURL; 102 103 if (requestURL.startsWith("http://www.")) 104 { 105 mRequestURLWWW = requestURL; 106 mRequestURL = "http://" + mRequestURLWWW.substring(11); 107 } 108 else 109 { 110 mRequestURL = requestURL; 111 mRequestURLWWW = "http://www." + mRequestURL.substring(7); 112 } 113 114 Parser parser = (new HTMLEditorKit () { 116 public Parser getParser() 117 { 118 return super.getParser(); 119 } 120 }).getParser(); 121 122 StringBuffer sb = new StringBuffer (); 124 InputStreamReader isr = new InputStreamReader (is); 125 BufferedReader br = new BufferedReader (isr); 126 try 127 { 128 String line = null; 129 while ((line = br.readLine()) != null) 130 { 131 sb.append(line); 132 } 133 } 134 finally 135 { 136 br.close(); 137 } 138 139 StringReader sr = new StringReader (sb.toString()); 142 parser.parse(sr, new LinkbackCallback(), true); 143 144 if (mStart != 0 && mEnd != 0 && mEnd > mStart) 145 { 146 mExcerpt = sb.toString().substring(mStart, mEnd); 147 mExcerpt = Utilities.removeHTML(mExcerpt); 148 149 if (mExcerpt.length() > mMaxExcerpt) 150 { 151 mExcerpt = mExcerpt.substring(0, mMaxExcerpt) + "..."; 152 } 153 } 154 155 if (mTitle.startsWith(">") && mTitle.length() > 1) 156 { 157 mTitle = mTitle.substring(1); 158 } 159 } 160 161 private void extractByParsingRss(String rssLink, String requestURL) 163 throws IllegalArgumentException , MalformedURLException , FeedException, IOException 164 { 165 SyndFeedInput feedInput = new SyndFeedInput(); 166 SyndFeed feed = feedInput.build( 167 new InputStreamReader (new URL (rssLink).openStream())); 168 Iterator itemIter = feed.getEntries().iterator(); 169 String feedTitle = feed.getTitle(); 170 171 int count = 0; 172 173 if (mLogger.isDebugEnabled()) 174 { 175 mLogger.debug("Feed parsed, title: " + feedTitle); 176 } 177 178 while (itemIter.hasNext()) 179 { 180 count++; 181 SyndEntry item = (SyndEntry) itemIter.next(); 182 if (item.getDescription().getValue().indexOf(requestURL) != -1) 183 { 184 mFound = true; 185 mPermalink = item.getLink().toString(); 186 if (feedTitle != null && feedTitle.trim().length() > 0) 187 { 188 mTitle = feedTitle + ": " + item.getTitle(); 189 } 190 else 191 { 192 mTitle = item.getTitle(); 193 } 194 mExcerpt = item.getDescription().getValue(); 195 mExcerpt = Utilities.removeHTML(mExcerpt); 196 if (mExcerpt.length() > mMaxExcerpt) 197 { 198 mExcerpt = mExcerpt.substring(0, mMaxExcerpt) + "..."; 199 } 200 break; 201 } 202 } 203 204 if (mLogger.isDebugEnabled()) 205 { 206 mLogger.debug("Parsed " + count + " articles, found linkback=" 207 + mFound); 208 } 209 } 210 211 217 public String getExcerpt() 218 { 219 return mExcerpt; 220 } 221 222 228 public String getTitle() 229 { 230 return mTitle; 231 } 232 233 239 public String getPermalink() 240 { 241 return mPermalink; 242 } 243 244 251 public void setPermalink(String permalink) 252 { 253 mPermalink = permalink; 254 } 255 256 258 265 private final class LinkbackCallback extends ParserCallback 266 { 267 private Tag[] mDivTags = { Tag.TD, Tag.DIV, Tag.SPAN, 269 Tag.BLOCKQUOTE, Tag.P, Tag.LI, 270 Tag.BR, Tag.HR, Tag.PRE, Tag.H1, 271 Tag.H2, Tag.H3, Tag.H4, Tag.H5, 272 Tag.H6 }; 273 274 private List mList = Arrays.asList(mDivTags); 275 276 private Tag mCurrentTag = null; 277 278 288 public void handleStartTag(Tag tag, MutableAttributeSet atts, int pos) 289 { 290 if (mList.contains(tag) && !mFound) 291 { 292 mStart = pos; 293 } 294 else if (mList.contains(tag) && mFound && mEnd == 0) 295 { 296 mEnd = pos; 297 } 298 else if (tag.equals(Tag.A)) 299 { 300 String href = (String ) atts.getAttribute(HTML.Attribute.HREF); 301 if (href == null) 302 return; 303 int hashPos = href.lastIndexOf('#'); 304 if (hashPos != -1) 305 { 306 href = href.substring(0, hashPos); 307 } 308 if (href != null 309 && (href.equals(mRequestURL) || href 310 .equals(mRequestURLWWW))) 311 { 312 mFound = true; 313 } 314 else 315 { 316 320 } 321 } 322 mCurrentTag = tag; 323 } 324 325 328 public void handleSimpleTag(Tag tag, MutableAttributeSet atts, int pos) 329 { 330 if (mList.contains(tag) && mFound && mEnd == 0) 331 { 332 mEnd = pos; 333 } 334 else if (tag.equals(Tag.LINK)) 335 { 336 String title = (String ) atts.getAttribute(HTML.Attribute.TITLE); 338 String type = (String ) atts.getAttribute(HTML.Attribute.TYPE); 339 if (title != null && type != null 340 && type.equals("application/rss+xml") 341 && title.equals("RSS")) 342 { 343 mRssLink = (String ) atts.getAttribute(HTML.Attribute.HREF); 344 345 if (mLogger.isDebugEnabled()) 346 { 347 mLogger.debug("Found RSS link " + mRssLink); 348 } 349 350 if (mRssLink.startsWith("/") && mRssLink.length() > 1) 351 { 352 try 353 { 354 URL url = new URL (mRefererURL); 355 mRssLink = url.getProtocol() + "://" 356 + url.getHost() + ":" + url.getPort() 357 + mRssLink; 358 } 359 catch (MalformedURLException e) 360 { 361 mRssLink = null; 362 if (mLogger.isDebugEnabled()) 363 { 364 mLogger.debug("Determining RSS URL", e); 365 } 366 } 367 } 368 else if (!mRssLink.startsWith("http")) 369 { 370 int slash = mRefererURL.lastIndexOf("/"); 371 if (slash != -1) 372 { 373 mRssLink = mRefererURL.substring(0, slash) + "/" 374 + mRssLink; 375 } 376 } 377 if (mLogger.isDebugEnabled()) 378 { 379 mLogger.debug("Qualified RSS link is " + mRssLink); 380 } 381 } 382 } 383 } 384 385 393 public void handleEndTag(Tag tag, int pos) 394 { 395 if (mList.contains(tag) && mFound && mEnd == 0) 396 { 397 mEnd = pos; 398 } 399 else if (mList.contains(tag) && !mFound) 400 { 401 mStart = pos; 402 } 403 else 404 { 405 mCurrentTag = null; 406 } 407 } 408 409 412 public void handleText(char[] data, int pos) 413 { 414 if (mCurrentTag != null && mCurrentTag.equals(Tag.TITLE)) 415 { 416 String newText = new String (data); 417 if (mTitle.length() < 50) 418 { 419 mTitle += newText; 420 } 421 } 422 } 423 } 424 } 425 426 | Popular Tags |