1 19 20 33 package org.htmlparser.tests.scannersTests; 34 35 import org.htmlparser.scanners.MetaTagScanner; 36 import org.htmlparser.tags.EndTag; 37 import org.htmlparser.tags.MetaTag; 38 import org.htmlparser.tests.ParserTestCase; 39 import org.htmlparser.util.ParserException; 40 41 public class MetaTagScannerTest extends ParserTestCase 42 { 43 44 public MetaTagScannerTest(String name) 45 { 46 super(name); 47 } 48 49 public void testScan() throws ParserException 50 { 51 createParser( 52 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\">\n" 53 + "<html>\n" 54 + "<head><title>SpamCop - Welcome to SpamCop\n" 55 + "</title>\n" 56 + "<META name=\"description\" content=\"Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.\">\n" 57 + "<META name=\"keywords\" content=\"SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns\">\n" 58 + "<META name=\"language\" content=\"en\">\n" 59 + "<META name=\"owner\" content=\"service@admin.spamcop.net\">\n" 60 + "<META HTTP-EQUIV=\"content-type\" CONTENT=\"text/html; charset=ISO-8859-1\">", 61 "http://www.google.com/test/index.html"); 62 MetaTagScanner scanner = new MetaTagScanner("-t"); 63 parser.addScanner(scanner); 64 65 parseAndAssertNodeCount(11); 66 assertTrue("Node 5 should be End Tag", node[5] instanceof EndTag); 67 assertTrue("Node 6 should be META Tag", node[6] instanceof MetaTag); 68 MetaTag metaTag; 69 metaTag = (MetaTag) node[6]; 70 assertEquals( 71 "Meta Tag 6 Name", 72 "description", 73 metaTag.getMetaTagName()); 74 assertEquals( 75 "Meta Tag 6 Contents", 76 "Protecting the internet community through technology, not legislation. SpamCop eliminates spam. Automatically file spam reports with the network administrators who can stop spam at the source. Subscribe, and filter your email through powerful statistical analysis before it reaches your inbox.", 77 metaTag.getMetaContent()); 78 79 assertTrue("Node 7 should be META Tag", node[7] instanceof MetaTag); 80 assertTrue("Node 8 should be META Tag", node[8] instanceof MetaTag); 81 assertTrue("Node 9 should be META Tag", node[9] instanceof MetaTag); 82 83 metaTag = (MetaTag) node[7]; 84 assertEquals("Meta Tag 7 Name", "keywords", metaTag.getMetaTagName()); 85 assertEquals( 86 "Meta Tag 7 Contents", 87 "SpamCop spam cop email filter abuse header headers parse parser utility script net net-abuse filter mail program system trace traceroute dns", 88 metaTag.getMetaContent()); 89 assertNull("Meta Tag 7 Http-Equiv", metaTag.getHttpEquiv()); 90 91 metaTag = (MetaTag) node[8]; 92 assertEquals("Meta Tag 8 Name", "language", metaTag.getMetaTagName()); 93 assertEquals("Meta Tag 8 Contents", "en", metaTag.getMetaContent()); 94 assertNull("Meta Tag 8 Http-Equiv", metaTag.getHttpEquiv()); 95 96 metaTag = (MetaTag) node[9]; 97 assertEquals("Meta Tag 9 Name", "owner", metaTag.getMetaTagName()); 98 assertEquals( 99 "Meta Tag 9 Contents", 100 "service@admin.spamcop.net", 101 metaTag.getMetaContent()); 102 assertNull("Meta Tag 9 Http-Equiv", metaTag.getHttpEquiv()); 103 104 metaTag = (MetaTag) node[10]; 105 assertNull("Meta Tag 10 Name", metaTag.getMetaTagName()); 106 assertEquals( 107 "Meta Tag 10 Contents", 108 "text/html; charset=ISO-8859-1", 109 metaTag.getMetaContent()); 110 assertEquals( 111 "Meta Tag 10 Http-Equiv", 112 "content-type", 113 metaTag.getHttpEquiv()); 114 115 assertEquals("This Scanner", scanner, metaTag.getThisScanner()); 116 } 117 118 public void testScanTagsInMeta() throws ParserException 119 { 120 createParser( 121 "<META NAME=\"Description\" CONTENT=\"Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles\">", 122 "http://www.google.com/test/index.html"); 123 MetaTagScanner scanner = new MetaTagScanner("-t"); 124 parser.addScanner(scanner); 125 parseAndAssertNodeCount(1); 126 assertTrue("Node should be meta tag", node[0] instanceof MetaTag); 127 MetaTag metaTag = (MetaTag) node[0]; 128 assertEquals("Meta Tag Name", "Description", metaTag.getMetaTagName()); 129 assertEquals( 130 "Content", 131 "Ethnoburb </I>versus Chinatown: Two Types of Urban Ethnic Communities in Los Angeles", 132 metaTag.getMetaContent()); 133 } 134 135 139 public void testMetaTagBug() throws ParserException 140 { 141 createParser( 142 "<html>" 143 + "<head>" 144 + "<meta http-equiv=\"content-type\"" 145 + " content=\"text/html;" 146 + " charset=windows-1252\">" 147 + "</head>" 148 + "</html>"); 149 parser.registerScanners(); 150 parseAndAssertNodeCount(5); 151 assertType("Meta Tag expected", MetaTag.class, node[2]); 152 MetaTag metaTag = (MetaTag) node[2]; 153 154 assertStringEquals( 155 "http-equiv", 156 "content-type", 157 metaTag.getHttpEquiv()); 158 assertStringEquals( 159 "content", 160 "text/html; charset=windows-1252", 161 metaTag.getMetaContent()); 162 } 163 164 168 public void testMetaTagWithOpenTagSymbol() throws ParserException 169 { 170 createParser( 171 "<html>" 172 + "<head>" 173 + "<title>Parser Test 2</title>" 174 + "<meta name=\"foo\" content=\"a<b\">" 175 + "</head>" 176 + "<body>" 177 + "<a HREF=\"http://www.yahoo.com/\">Yahoo!</a><br>" 178 + "<a HREF=\"http://www.excite.com\">Excite</a>" 179 + "</body>" 180 + "</html>"); 181 parser.registerScanners(); 182 parseAndAssertNodeCount(11); 183 assertType("meta tag", MetaTag.class, node[3]); 184 MetaTag metaTag = (MetaTag) node[3]; 185 assertStringEquals("meta content", "a<b", metaTag.getMetaContent()); 186 } 187 } 188 | Popular Tags |