KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > archive > crawler > extractor > CrawlUriSWFAction


1 /*
2  * CrawlUriSWFAction
3  *
4  * $Id: CrawlUriSWFAction.java,v 1.6 2006/08/11 06:09:46 gojomo Exp $
5  *
6  * Created on March 15, 2004
7  *
8  * Copyright (C) 2003 Internet Archive.
9  *
10  * This file is part of the Heritrix web crawler (crawler.archive.org).
11  *
12  * Heritrix is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU Lesser Public License as published by
14  * the Free Software Foundation; either version 2.1 of the License, or
15  * any later version.
16  *
17  * Heritrix is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Lesser Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser Public License
23  * along with Heritrix; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  */

26
27 package org.archive.crawler.extractor;
28
29 import java.io.IOException JavaDoc;
30
31 import org.archive.crawler.datamodel.CrawlURI;
32 import org.archive.crawler.framework.CrawlController;
33
34 import com.anotherbigidea.flash.writers.SWFActionsImpl;
35
36 /**
37  * SWF action that handles discovered URIs.
38  *
39  * @author Igor Ranitovic
40  */

41 public class CrawlUriSWFAction
42 extends SWFActionsImpl {
43     CrawlURI curi;
44     CrawlController controller; // for error reporting
45

46     private long linkCount;
47     static final String JavaDoc JSSTRING = "javascript:";
48
49     /**
50      *
51      * @param curi
52      */

53     public CrawlUriSWFAction(CrawlURI curi, CrawlController controller) {
54         assert (curi != null) : "CrawlURI should not be null";
55         this.curi = curi;
56         this.controller = controller;
57         this.linkCount = 0;
58     }
59
60     /**
61      * Overwrite handling of discovered URIs.
62      *
63      * @param url Discovered URL.
64      * @param target Discovered target (currently not being used.)
65      * @throws IOException
66      */

67     public void getURL(String JavaDoc url, String JavaDoc target)
68     throws IOException JavaDoc {
69         // I have done tests on a few tens of swf files and have not seen a need
70
// to use 'target.' Most of the time 'target' is not set, or it is set
71
// to '_self' or '_blank'.
72
if (url.startsWith(JSSTRING)) {
73             linkCount =+ ExtractorJS.considerStrings(curi, url, controller, false);
74         } else {
75             curi.createAndAddLinkRelativeToVia(url,Link.EMBED_MISC,Link.EMBED_HOP);
76             linkCount++;
77         }
78     }
79     
80     /**
81      * @return Total number of links extracted from a swf file.
82      */

83     public long getLinkCount() {
84         return linkCount;
85     }
86 }
87
Popular Tags