ImageScanner


1   // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/scanners/ImageScanner.java,v 1.5 2004/02/10 13:41:09 woolfel Exp $
2   /*
3    * ====================================================================
4    * Copyright 2002-2004 The Apache Software Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   * 
18   */
19  
20  // The developers of JMeter and Apache are greatful to the developers
21  // of HTMLParser for giving Apache Software Foundation a non-exclusive
22  // license. The performance benefits of HTMLParser are clear and the
23  // users of JMeter will benefit from the hard work the HTMLParser
24  // team. For detailed information about HTMLParser, the project is
25  // hosted on sourceforge at http://htmlparser.sourceforge.net/.
26  //
27  // HTMLParser was originally created by Somik Raha in 2000. Since then
28  // a healthy community of users has formed and helped refine the
29  // design so that it is able to tackle the difficult task of parsing
30  // dirty HTML. Derrick Oswald is the current lead developer and was kind
31  // enough to assist JMeter.
32  
33  package org.htmlparser.scanners;
34  //////////////////
35  // Java Imports //
36  //////////////////
37  import java.util.Hashtable  ;
38  
39  import org.htmlparser.tags.ImageTag;
40  import org.htmlparser.tags.Tag;
41  import org.htmlparser.tags.data.TagData;
42  import org.htmlparser.util.LinkProcessor;
43  import org.htmlparser.util.ParserException;
44  import org.htmlparser.util.ParserUtils;
45  /**
46   * Scans for the Image Tag. This is a subclass of TagScanner, and is called using a 
47   * variant of the template method. If the evaluate() method returns true, that means the
48   * given string contains an image tag. Extraction is done by the scan method thereafter
49   * by the user of this class.
50   */
51  public class ImageScanner extends TagScanner
52  {
53      public static final String   IMAGE_SCANNER_ID = "IMG";
54      private Hashtable   table;
55      private LinkProcessor processor;
56      /**
57       * Overriding the default constructor
58       */
59      public ImageScanner()
60      {
61          super();
62          processor = new LinkProcessor();
63      }
64      /**
65       * Overriding the constructor to accept the filter 
66       */
67      public ImageScanner(String   filter, LinkProcessor processor)
68      {
69          super(filter);
70          this.processor = processor;
71      }
72      /**
73       * Extract the location of the image, given the string to be parsed, and the url
74       * of the html page in which this tag exists.
75       * @param s String to be parsed
76       * @param url URL of web page being parsed
77       */
78      public String   extractImageLocn(Tag tag, String   url) throws ParserException
79      {
80          String   relativeLink = null;
81          try
82          {
83              table = tag.getAttributes();
84              relativeLink = (String  ) table.get("SRC");
85  
86              if (relativeLink != null)
87              {
88                  relativeLink = ParserUtils.removeChars(relativeLink, '\n');
89                  relativeLink = ParserUtils.removeChars(relativeLink, '\r');
90              }
91              if (relativeLink == null || relativeLink.length() == 0)
92              {
93                  // try fix
94                  String   tagText = tag.getText().toUpperCase();
95                  int indexSrc = tagText.indexOf("SRC");
96                  if (indexSrc != -1)
97                  {
98                      // There is a missing equals.
99                      tag.setText(
100                         tag.getText().substring(0, indexSrc + 3)
101                             + "="
102                             + tag.getText().substring(
103                                 indexSrc + 3,
104                                 tag.getText().length()));
105                     table = tag.redoParseAttributes();
106                     relativeLink = (String  ) table.get("SRC");
107 
108                 }
109             }
110             if (relativeLink == null)
111                 return "";
112             else
113                 return processor.extract(relativeLink, url);
114         }
115         catch (Exception   e)
116         {
117             throw new ParserException(
118                 "HTMLImageScanner.extractImageLocn() : Error in extracting image location, relativeLink = "
119                     + relativeLink
120                     + ", url = "
121                     + url,
122                 e);
123         }
124     }
125 
126     public String  [] getID()
127     {
128         String  [] ids = new String  [1];
129         ids[0] = IMAGE_SCANNER_ID;
130         return ids;
131     }
132 
133     protected Tag createTag(TagData tagData, Tag tag, String   url)
134         throws ParserException
135     {
136         String   link = extractImageLocn(tag, url);
137         return new ImageTag(tagData, link);
138     }
139 
140 }
141
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags