KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > roller > business > RefererManagerImpl


1 package org.roller.business;
2
3 import java.text.SimpleDateFormat JavaDoc;
4 import java.util.ArrayList JavaDoc;
5 import java.util.Arrays JavaDoc;
6 import java.util.Date JavaDoc;
7 import java.util.Iterator JavaDoc;
8 import java.util.LinkedList JavaDoc;
9 import java.util.List JavaDoc;
10
11 import org.apache.commons.lang.StringUtils;
12 import org.apache.commons.logging.Log;
13 import org.apache.commons.logging.LogFactory;
14 import org.roller.RollerException;
15 import org.roller.config.RollerRuntimeConfig;
16 import org.roller.model.ParsedRequest;
17 import org.roller.model.RefererManager;
18 import org.roller.model.Roller;
19 import org.roller.model.RollerFactory;
20 import org.roller.pojos.RefererData;
21 import org.roller.pojos.WeblogEntryData;
22 import org.roller.pojos.WebsiteData;
23 import org.roller.util.DateUtil;
24 import org.roller.util.LinkbackExtractor;
25 import org.roller.util.Utilities;
26
27
28 /**
29  * Abstract base implementation using PersistenceStrategy.
30  * @author Dave Johnson
31  * @author Lance Lavandowska
32  */

33 public abstract class RefererManagerImpl implements RefererManager
34 {
35     static Log mLogger =
36         LogFactory.getFactory().getInstance(RefererManagerImpl.class);
37
38     protected static final String JavaDoc DAYHITS = "dayHits";
39     protected static final String JavaDoc TOTALHITS = "totalHits";
40
41     protected PersistenceStrategy mStrategy;
42     protected Date JavaDoc mRefDate = new Date JavaDoc();
43     protected SimpleDateFormat JavaDoc mDateFormat = DateUtil.get8charDateFormat();
44
45     protected abstract List JavaDoc getReferersWithSameTitle(
46                     WebsiteData website,
47                     String JavaDoc requestUrl,
48                     String JavaDoc title,
49                     String JavaDoc excerpt)
50                     throws RollerException;
51                     
52     protected abstract List JavaDoc getExistingReferers(
53                     WebsiteData website,
54                     String JavaDoc dateString,
55                     String JavaDoc permalink) throws RollerException;
56
57     protected abstract List JavaDoc getReferersToWebsite(
58                     WebsiteData website,
59                     String JavaDoc refererUrl) throws RollerException;
60
61     protected abstract List JavaDoc getMatchingReferers(
62                     WebsiteData website,
63                     String JavaDoc requestUrl,
64                     String JavaDoc refererUrl) throws RollerException;
65
66     //-----------------------------------------------------------------------
67

68     public RefererManagerImpl()
69     {
70     }
71
72     //-----------------------------------------------------------------------
73

74     protected abstract int getHits(WebsiteData website, String JavaDoc type)
75         throws RollerException;
76
77     //------------------------------------------------------------------------
78

79     public void release()
80     {
81     }
82
83     //-----------------------------------------------------------------------
84
public synchronized void forceTurnover(String JavaDoc websiteId) throws RollerException
85     {
86         mLogger.debug("forceTurnover");
87         checkForTurnover(true, websiteId);
88     }
89
90     //--------------------------------------------------------- Get hit counts
91

92     public int getDayHits(WebsiteData website) throws RollerException
93     {
94         return getHits(website, DAYHITS);
95     }
96
97     //-----------------------------------------------------------------------
98

99     public int getTotalHits(WebsiteData website) throws RollerException
100     {
101         return getHits(website, TOTALHITS);
102     }
103
104
105     //------------------------------------------------------- Referer Storage
106

107     /**
108      * @see org.roller.pojos.RefererManager#removeReferer(java.lang.String)
109      */

110     public void removeReferer(String JavaDoc id) throws RollerException
111     {
112         mStrategy.remove(id, RefererData.class);
113     }
114
115     //-----------------------------------------------------------------------
116

117     /**
118      * @see org.roller.pojos.RefererManager#retrieveReferer(java.lang.String)
119      */

120     public RefererData retrieveReferer(String JavaDoc id) throws RollerException
121     {
122         return (RefererData)mStrategy.load(id,RefererData.class);
123     }
124
125     //-----------------------------------------------------------------------
126

127     /**
128      * @see org.roller.pojos.RefererManager#storeReferer(
129      * org.roller.pojos.RefererData)
130      */

131     public void storeReferer(RefererData data) throws RollerException
132     {
133         mStrategy.store(data);
134     }
135
136     //-----------------------------------------------------------------------
137
public List JavaDoc getEntryReferers(String JavaDoc entryId, boolean authorized)
138         throws RollerException
139     {
140         //TODO: Redesign this so this is performed using the DB query, and
141
// not in java code for perf/memory reasons
142
List JavaDoc authorizedvisible = new ArrayList JavaDoc();
143         List JavaDoc referers = getReferersToEntry(entryId);
144         for (Iterator JavaDoc rItr = referers.iterator(); rItr.hasNext();)
145         {
146             RefererData referer = (RefererData) rItr.next();
147             if ( referer.getVisible().booleanValue() || authorized )
148             {
149                 authorizedvisible.add( referer );
150             }
151         }
152
153         return authorizedvisible;
154     }
155
156     //------------------------------------------------------------------------
157

158     /**
159      * Process incoming request for referer information.
160      *
161      * <p>If there is no referer, treat it as a direct request.</p>
162      *
163      * <p>If there is a referer and there is no record for that referer, then
164      * parse the refering page for title and excerpt surround the refering link.
165      * If the excerpt cannot be found, then ignore the referer because it is
166      * fake - probably a referer spam.
167      * </p>
168      *
169      * @return boolean True if the referer header contains an ignore/spam word.
170      * @see org.roller.pojos.RefererManager#processRequest(ParsedRequest)
171      */

172     public boolean processRequest( ParsedRequest request )
173     {
174         String JavaDoc msg = "processRequest";
175         if ( request.getWebsite() == null ) return false;
176
177         try
178         {
179             List JavaDoc matchRef = null;
180
181             String JavaDoc requestUrl = request.getRequestURL();
182             String JavaDoc refererUrl = request.getRefererURL();
183             WebsiteData website = request.getWebsite();
184             WeblogEntryData entry = request.getWeblogEntry();
185             String JavaDoc selfSiteFragment = "/page/" + website.getUser().getUserName();
186
187             String JavaDoc dateString = null;
188             if ( request.getDateString()!=null && request.isDateSpecified())
189             {
190                 dateString = request.getDateString();
191             }
192
193             if (mLogger.isDebugEnabled())
194             {
195                 mLogger.debug( msg+": refurl="+refererUrl );
196             }
197
198             /* Check Referer URL against selfSiteFragment (treat as direct),
199              * against a regex for an self-site editor page (direct),
200              * and against the Spam lists.
201              */

202             if ( refererUrl != null )
203             {
204                 // treat own URL as direct
205
if (refererUrl.indexOf(selfSiteFragment) != -1)
206                 {
207                     refererUrl = null;
208                 }
209                 else
210                 {
211                     // treat editor referral as direct
212
int lastSlash = requestUrl.indexOf("/", 8);
213                     if (lastSlash == -1) lastSlash = requestUrl.length();
214                     String JavaDoc requestSite = requestUrl.substring(0, lastSlash);
215                     if (refererUrl.matches(requestSite + ".*\\.do.*"))
216                     {
217                         refererUrl = null;
218                     }
219                     else
220                     {
221                         // If referer URL contains spamWords or ignoreWords then don't log it.
222
boolean isRefererSpam = checkForSpam(refererUrl, website);
223                         if (isRefererSpam) return true;
224                     }
225                 }
226             }
227
228             // try to find existing RefererData for refererUrl
229
if (refererUrl == null || refererUrl.trim().length() < 8)
230             {
231                 refererUrl = "direct";
232
233                 // Get referer specified by referer URL of direct
234
matchRef = getReferersToWebsite(website, refererUrl);
235             }
236             else
237             {
238                 refererUrl = Utilities.stripJsessionId(refererUrl);
239
240                 // Query for referer with same referer and request URLs
241
matchRef = getMatchingReferers(website, requestUrl, refererUrl);
242
243                 // If referer was not found, try adding or leaving off 'www'
244
if ( matchRef.size() == 0 )
245                 {
246                     String JavaDoc secondTryUrl = null;
247                     if ( refererUrl.startsWith("http://www") )
248                     {
249                         secondTryUrl = "http://"+refererUrl.substring(11);
250                     }
251                     else
252                     {
253                         secondTryUrl = "http://www"+refererUrl.substring(7);
254                     }
255
256                     matchRef = getMatchingReferers(
257                         website, requestUrl, secondTryUrl);
258                     if ( matchRef.size() == 1 )
259                     {
260                         refererUrl = secondTryUrl;
261                     }
262                 }
263             }
264
265             if (matchRef.size() == 1)
266             {
267                 // Referer was found in database, so bump up hit count
268
RefererData ref = (RefererData)matchRef.get(0);
269
270                 ref.setDayHits(
271                     new Integer JavaDoc(ref.getDayHits().intValue() + 1));
272                 ref.setTotalHits(
273                     new Integer JavaDoc(ref.getTotalHits().intValue() + 1));
274
275                 if (mLogger.isDebugEnabled())
276                 {
277                     mLogger.debug(
278                         "Incrementing hit count on existing referer: "+refererUrl);
279                 }
280
281                 storeReferer(ref);
282                 mStrategy.commit();
283             }
284             else if (matchRef.size() == 0)
285             {
286                 // Referer was not found in database, so new Referer object
287
Integer JavaDoc one = new Integer JavaDoc(1);
288                 RefererData ref =
289                     new RefererData(
290                         null,
291                         website,
292                         entry,
293                         dateString,
294                         refererUrl,
295                         null,
296                         requestUrl,
297                         null,
298                         null,
299                         Boolean.FALSE,
300                         Boolean.FALSE,
301                         one,
302                         one);
303
304                  if (mLogger.isDebugEnabled())
305                  {
306                     mLogger.debug("newReferer="+ref.getRefererUrl());
307                  }
308
309                  String JavaDoc refurl = ref.getRefererUrl();
310
311                  // If not a direct or search engine then search for linkback
312
if ( request.isEnableLinkback()
313                       && request.isDateSpecified()
314                       && !refurl.equals("direct")
315                       && !refurl.startsWith("http://google")
316                       && !refurl.startsWith("http://www.google")
317                       && !refurl.startsWith("http://search.netscape")
318                       && !refurl.startsWith("http://www.blinkpro")
319                       && !refurl.startsWith("http://auto.search.msn")
320                       && !refurl.startsWith("http://search.yahoo")
321                       && !refurl.startsWith("http://uk.search.yahoo")
322                       && !refurl.startsWith("http://www.javablogs.com")
323                       && !refurl.startsWith("http://www.teoma")
324                     )
325                  {
326                      // Launch thread to extract referer linkback
327

328                     try
329                     {
330                         Roller mRoller = RollerFactory.getRoller();
331                        mRoller.getThreadManager().executeInBackground(
332                           new LinkbackExtractorRunnable(ref) );
333                     }
334                     catch (InterruptedException JavaDoc e) {
335                         mLogger.warn("Interrupted during linkback extraction",e);
336                     }
337                  }
338                  else
339                  {
340                      storeReferer(ref);
341                      mStrategy.commit();
342                  }
343             }
344         }
345         catch (RollerException pe)
346         {
347             mLogger.error(msg, pe);
348         }
349         catch (NullPointerException JavaDoc npe)
350         {
351             mLogger.error(msg, npe);
352         }
353         
354         return false;
355     }
356     
357     /**
358      * Check the Referer URL against the Site-wide RefererSpamWords list
359      * and against the user's own IgnoreWords list. If the Referer contains
360      * any of the words from either list consider it Spam.
361      *
362      * @param refererUrl
363      * @return
364      * @throws RollerException
365      */

366     private boolean checkForSpam(String JavaDoc refererUrl, WebsiteData website) throws RollerException
367     {
368         String JavaDoc spamwords = RollerRuntimeConfig.getProperty("spam.referers.ignorewords");
369         LinkedList JavaDoc spamWords = new LinkedList JavaDoc(Arrays.asList(
370                 StringUtils.split(StringUtils.deleteWhitespace(spamwords), ",")));
371     
372         if ( website.getIgnoreWords() != null )
373         {
374             spamWords.addAll(
375                 Arrays.asList(StringUtils.split(
376                     StringUtils.deleteWhitespace(
377                         website.getIgnoreWords()),",")));
378         }
379         for( Iterator JavaDoc i = spamWords.iterator(); i.hasNext(); )
380         {
381             String JavaDoc word = (String JavaDoc)i.next();
382             if (refererUrl.indexOf(word) != -1)
383             {
384                 if (mLogger.isDebugEnabled())
385                 {
386                     mLogger.debug("Flagged a Spam because '" + word +
387                                   "' was found in '" + refererUrl + "'");
388                 }
389                 refererUrl = null;
390                 return true;
391             }
392         }
393         return false;
394     }
395
396     /**
397      * Use LinkbackExtractor to parse title and excerpt from referer
398      */

399     class LinkbackExtractorRunnable implements Runnable JavaDoc
400     {
401
402         private RefererData mReferer = null;
403
404         public LinkbackExtractorRunnable( RefererData referer)
405         {
406             mReferer = referer;
407         }
408
409         public void run()
410         {
411
412             try
413             {
414                 LinkbackExtractor lb = new LinkbackExtractor(
415                     mReferer.getRefererUrl(),mReferer.getRequestUrl());
416
417                 if ( lb.getTitle()!=null && lb.getExcerpt()!=null )
418                 {
419                     mReferer.setTitle(lb.getTitle());
420                     mReferer.setExcerpt(lb.getExcerpt());
421
422
423                     if ( lb.getPermalink() != null )
424                     {
425                         // The presence of a permalink indicates that this
426
// linkback was parsed out of an RSS feed and is
427
// presumed to be a good linkback.
428

429                         mReferer.setRefererPermalink(lb.getPermalink());
430
431                         // See if this request/permalink is in the DB
432
List JavaDoc matchRef = getExistingReferers(
433                             mReferer.getWebsite(),
434                             mReferer.getDateString(),
435                             mReferer.getRefererPermalink());
436
437                         // If it is the first, then set it to be visible
438
if ( matchRef.size() == 0 )
439                         {
440                             mReferer.setVisible(Boolean.TRUE);
441                         }
442                         else
443                         {
444                             // We can't throw away duplicates or we will
445
// end up reparsing them everytime a hit comes
446
// in from one of them, but we can mark them
447
// as duplicates.
448
mReferer.setDuplicate(Boolean.TRUE);
449                         }
450
451                         storeReferer(mReferer);
452                     }
453
454                     else
455                     {
456                         // Store the new referer
457
storeReferer(mReferer);
458
459                         // Hacky Referer URL weighting kludge:
460
//
461
// If there are multple referers to a request URL,
462
// then we want to pick the best one. The others
463
// are marked as duplicates. To do this we use a
464
// weight. The weight formula is:
465
//
466
// w = URL length + (100 if URL contains anchor)
467

468                         // LOOP: find the referer with the highest weight
469
Boolean JavaDoc visible = Boolean.FALSE;
470                         List JavaDoc refs= getReferersWithSameTitle(
471                             mReferer.getWebsite(),
472                             mReferer.getRequestUrl(),
473                             lb.getTitle(),
474                             lb.getExcerpt());
475                         RefererData chosen = null;
476                         int maxweight = 0;
477                         for (Iterator JavaDoc rdItr = refs.iterator();rdItr.hasNext();)
478                         {
479                             RefererData referer = (RefererData) rdItr.next();
480
481                             int weight = referer.getRefererUrl().length();
482                             if (referer.getRefererUrl().indexOf('#') != -1)
483                             {
484                                 weight += 100;
485                             }
486
487                             if ( weight > maxweight )
488                             {
489                                 chosen = referer;
490                                 maxweight = weight;
491                             }
492
493                             if (referer.getVisible().booleanValue())
494                             {
495                                 // If any are visible then chosen
496
// replacement must be visible as well.
497
visible = Boolean.TRUE;
498                             }
499
500                         }
501
502                         // LOOP: to mark all of the lower weight ones
503
// as duplicates
504
for (Iterator JavaDoc rdItr = refs.iterator();rdItr.hasNext();) {
505                             RefererData referer = (RefererData) rdItr.next();
506
507                             if (referer != chosen)
508                             {
509                                 referer.setDuplicate(Boolean.TRUE);
510                             }
511                             else
512                             {
513                                 referer.setDuplicate(Boolean.FALSE);
514                                 referer.setVisible(visible);
515                             }
516                             storeReferer(referer);
517                         }
518
519
520                     }
521                 }
522                 else
523                 {
524                     // It is not a linkback, but store it anyway
525
storeReferer(mReferer);
526
527                     mLogger.info("No excerpt found at refering URL "
528                         + mReferer.getRefererUrl());
529                 }
530             }
531             catch (Exception JavaDoc e)
532             {
533                 mLogger.error("Processing linkback",e);
534             }
535             finally
536             {
537                 try {
538                     mStrategy.release();
539                 }
540                 catch (RollerException e) {
541                     mLogger.error(
542                     "Exception logged by ManagerSupport.releaseDatabase()");
543                 }
544             }
545
546         }
547
548     }
549
550 }
551
552
553
Popular Tags