1 19 20 33 package org.htmlparser.scanners; 34 35 import java.util.Hashtable ; 39 40 import org.htmlparser.tags.LinkTag; 41 import org.htmlparser.tags.Tag; 42 import org.htmlparser.tags.data.CompositeTagData; 43 import org.htmlparser.tags.data.LinkData; 44 import org.htmlparser.tags.data.TagData; 45 import org.htmlparser.util.LinkProcessor; 46 import org.htmlparser.util.ParserException; 47 import org.htmlparser.util.ParserUtils; 48 49 55 public class LinkScanner extends CompositeTagScanner 56 { 57 private static final String MATCH_NAME[] = { "A" }; 58 public static final String LINK_SCANNER_ID = "A"; 59 public static final String DIRTY_TAG_MESSAGE = 60 " is a dirty link tag - the tag was not closed. \nWe encountered an open tag, before the previous end tag was found.\nCorrecting this.."; 61 private LinkProcessor processor; 62 private final static String ENDERS[] = 63 { "TD", "TR", "FORM", "LI", "BODY", "HTML" }; 64 private final static String ENDTAG_ENDERS[] = 65 { "TD", "TR", "FORM", "LI", "BODY", "HTML" }; 66 67 70 public LinkScanner() 71 { 72 this(""); 73 } 74 75 78 public LinkScanner(String filter) 79 { 80 super(filter, MATCH_NAME, ENDERS, ENDTAG_ENDERS, false); 81 processor = new LinkProcessor(); 82 } 83 84 public Tag createTag(TagData tagData, CompositeTagData compositeTagData) 85 throws ParserException 86 { 87 88 String link = 89 extractLink( 90 compositeTagData.getStartTag(), 91 tagData.getUrlBeingParsed()); 92 int mailto = link.indexOf("mailto"); 93 boolean mailLink = false; 94 if (mailto == 0) 95 { 96 mailto = link.indexOf(":"); 98 link = link.substring(mailto + 1); 99 mailLink = true; 100 } 101 int javascript = link.indexOf("javascript:"); 102 boolean javascriptLink = false; 103 if (javascript == 0) 104 { 105 link = link.substring(11); 106 javascriptLink = true; 108 } 109 String accessKey = getAccessKey(compositeTagData.getStartTag()); 110 String myLinkText = compositeTagData.getChildren().toString(); 111 112 LinkTag linkTag = 113 new LinkTag( 114 tagData, 115 compositeTagData, 116 new LinkData( 117 link, 118 myLinkText, 119 accessKey, 120 mailLink, 121 javascriptLink)); 122 linkTag.setThisScanner(this); 123 return linkTag; 124 } 125 126 133 public boolean evaluate(String s, TagScanner previousOpenScanner) 134 { 135 char ch; 136 boolean ret; 137 138 s = absorbLeadingBlanks(s); 140 if (5 > s.length()) 141 ret = false; 142 else 143 { 144 ch = s.charAt(0); 145 if ((ch == 'a' || ch == 'A') 146 && Character.isWhitespace(s.charAt(1))) 147 ret = -1 != s.toUpperCase().indexOf("HREF"); 148 else 149 ret = false; 150 } 151 152 return (ret); 153 } 154 155 159 public String extractLink(Tag tag, String url) throws ParserException 160 { 161 try 162 { 163 Hashtable table = tag.getAttributes(); 164 String relativeLink = (String ) table.get("HREF"); 165 if (relativeLink != null) 166 { 167 relativeLink = ParserUtils.removeChars(relativeLink, '\n'); 168 relativeLink = ParserUtils.removeChars(relativeLink, '\r'); 169 } 170 return processor.extract(relativeLink, url); 171 } 172 catch (Exception e) 173 { 174 String msg; 175 if (tag != null) 176 msg = tag.getText(); 177 else 178 msg = "null"; 179 throw new ParserException( 180 "HTMLLinkScanner.extractLink() : Error while extracting link from tag " 181 + msg 182 + ", url = " 183 + url, 184 e); 185 } 186 } 187 188 193 private String getAccessKey(Tag tag) 194 { 195 return tag.getAttribute("ACCESSKEY"); 196 } 197 198 public BaseHrefScanner createBaseHREFScanner(String filter) 199 { 200 return new BaseHrefScanner(filter, processor); 201 } 202 203 public ImageScanner createImageScanner(String filter) 204 { 205 return new ImageScanner(filter, processor); 206 } 207 208 211 public String [] getID() 212 { 213 return MATCH_NAME; 214 } 215 216 } 217 | Popular Tags |