1 27 package org.htmlparser.tests.parserHelperTests; 28 29 import org.htmlparser.PrototypicalNodeFactory; 30 import org.htmlparser.Remark; 31 import org.htmlparser.Text; 32 import org.htmlparser.tags.HeadTag; 33 import org.htmlparser.tags.Html; 34 import org.htmlparser.tags.LinkTag; 35 import org.htmlparser.tags.MetaTag; 36 import org.htmlparser.tests.ParserTestCase; 37 import org.htmlparser.util.ParserException; 38 39 public class StringParserTest extends ParserTestCase { 40 41 static 42 { 43 System.setProperty ("org.htmlparser.tests.parserHelperTests.StringParserTest", "StringParserTest"); 44 } 45 46 public StringParserTest(String name) { 47 super(name); 48 } 49 50 58 public void testTextBug1() throws ParserException { 59 createParser("<HTML><HEAD><TITLE>Google</TITLE>"); 60 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 61 parseAndAssertNodeCount(5); 62 assertTrue("Fourth node should be a Text",node[3] instanceof Text); 64 Text stringNode = (Text)node[3]; 65 assertEquals("Text of the Text","Google",stringNode.getText()); 66 } 67 68 77 public void testTextBug2() throws ParserException { 78 80 createParser("view these documents, you must have <A HREF='http://www.adobe.com'>Adobe \n"+ 81 "Acrobat Reader</A> installed on your computer."); 82 parseAndAssertNodeCount(3); 83 assertTrue("First node should be a Text",node[0] instanceof Text); 85 Text stringNode = (Text)node[0]; 86 assertEquals("Text of the Text","view these documents, you must have ",stringNode.getText()); 87 assertTrue("Second node should be a link node",node[1] instanceof LinkTag); 88 LinkTag linkNode = (LinkTag)node[1]; 89 assertEquals("Link is","http://www.adobe.com",linkNode.getLink()); 90 assertEquals("Link text is","Adobe \nAcrobat Reader",linkNode.getLinkText()); 91 92 assertTrue("Third node should be a string node",node[2] instanceof Text); 93 Text stringNode2 = (Text)node[2]; 94 assertEquals("Contents of third node"," installed on your computer.",stringNode2.getText()); 95 } 96 97 103 public void testTagCharsInText() throws ParserException { 104 createParser("<a HREF=\"http://asgard.ch\">[> ASGARD <]</a>"); 105 parseAndAssertNodeCount(1); 106 assertTrue("Node identified must be a link tag",node[0] instanceof LinkTag); 107 LinkTag linkTag = (LinkTag) node[0]; 108 assertEquals("[> ASGARD <]",linkTag.getLinkText()); 109 assertEquals("http://asgard.ch",linkTag.getLink()); 110 } 111 112 public void testToPlainTextString() throws ParserException { 113 createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); 114 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 115 parseAndAssertNodeCount(10); 116 assertTrue("Fourth Node identified must be a string node",node[3] instanceof Text); 117 Text stringNode = (Text)node[3]; 118 assertEquals("First String Node","This is the Title",stringNode.toPlainTextString()); 119 assertTrue("Eighth Node identified must be a string node",node[7] instanceof Text); 120 stringNode = (Text)node[7]; 121 assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toPlainTextString()); 122 } 123 124 public void testToHTML() throws ParserException { 125 createParser("<HTML><HEAD><TITLE>This is the Title</TITLE></HEAD><BODY>Hello World, this is the HTML Parser</BODY></HTML>"); 126 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 127 parseAndAssertNodeCount(10); 128 assertTrue("Fourth Node identified must be a string node",node[3] instanceof Text); 129 Text stringNode = (Text)node[3]; 130 assertEquals("First String Node","This is the Title",stringNode.toHtml()); 131 assertTrue("Eighth Node identified must be a string node",node[7] instanceof Text); 132 stringNode = (Text)node[7]; 133 assertEquals("Second string node","Hello World, this is the HTML Parser",stringNode.toHtml()); 134 } 135 136 public void testEmptyLines() throws ParserException { 137 createParser( 138 "David Nirenberg (Center for Advanced Study in the Behavorial Sciences, Stanford).<br>\n"+ 139 " \n"+ 140 "<br>" 141 ); 142 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 143 parseAndAssertNodeCount(4); 144 assertTrue("Third Node identified must be a string node",node[2] instanceof Text); 145 } 146 147 151 public void testStringBeingMissedBug() throws ParserException { 152 createParser( 153 "Before Comment <!-- Comment --> After Comment" 154 ); 155 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 156 parseAndAssertNodeCount(3); 157 assertTrue("First node should be Text",node[0] instanceof Text); 158 assertTrue("Second node should be Remark",node[1] instanceof Remark); 159 assertTrue("Third node should be Text",node[2] instanceof Text); 160 Text stringNode = (Text)node[0]; 161 assertEquals("First String node contents","Before Comment ",stringNode.getText()); 162 Text stringNode2 = (Text)node[2]; 163 assertEquals("Second String node contents"," After Comment",stringNode2.getText()); 164 Remark remarkNode = (Remark)node[1]; 165 assertEquals("Remark Node contents"," Comment ",remarkNode.getText()); 166 167 } 168 169 173 public void testLastLineWithOneChar() throws ParserException { 174 createParser("a"); 175 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 176 parseAndAssertNodeCount(1); 177 assertTrue("First node should be Text",node[0] instanceof Text); 178 Text stringNode = (Text)node[0]; 179 assertEquals("First String node contents","a",stringNode.getText()); 180 } 181 182 public void testStringWithEmptyLine() throws ParserException { 183 String text = "a\n\nb"; 184 createParser(text); 185 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 186 parseAndAssertNodeCount(1); 187 assertTrue("First node should be Text",node[0] instanceof Text); 188 Text stringNode = (Text)node[0]; 189 assertStringEquals("First String node contents",text,stringNode.getText()); 190 } 191 192 196 public void testStringParserBug() throws Exception { 197 createParser( 198 "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 " + 199 "Transitional//EN\">" + 200 "<html>" + 201 "<head>" + 202 "<title>Untitled Document</title>" + 203 "<meta http-equiv=\"Content-Type\" content=\"text/html; " + 204 "charset=iso-8859-1\">" + 205 "</head>" + 206 "<script language=\"JavaScript\" type=\"text/JavaScript\">" + 207 "// if this fails, output a 'hello' \n" + 208 "if (true) " + 209 "{ " + 210 "//something good...\n" + 211 "} " + 212 "</script>" + 213 "<body>" + 214 "</body>" + 215 "</html>" 216 ); 217 parseAndAssertNodeCount(2); 218 assertTrue(node[1] instanceof Html); 219 Html htmlTag = (Html)node[1]; 220 assertTrue("The HTML tag should have 3 nodes", 3 == htmlTag.getChildCount ()); 221 assertTrue("The first child should be a HEAD tag",htmlTag.getChild(0) instanceof HeadTag); 222 HeadTag headTag = (HeadTag)htmlTag.getChild(0); 223 assertTrue("The HEAD tag should have 2 nodes", 2 == headTag.getChildCount ()); 224 assertTrue("The second child should be a META tag",headTag.getChild(1) instanceof MetaTag); 225 MetaTag metaTag = (MetaTag)headTag.getChild(1); 226 227 assertStringEquals( 228 "content", 229 "text/html; charset=iso-8859-1", 230 metaTag.getAttribute("CONTENT") 231 ); 232 } 233 234 public void testStringWithLineBreaks() throws Exception { 235 String text = "Testing &\nRefactoring"; 236 createParser(text); 237 parser.setNodeFactory (new PrototypicalNodeFactory (true)); 238 parseAndAssertNodeCount(1); 239 assertType("first node",Text.class,node[0]); 240 Text stringNode = (Text)node[0]; 241 assertStringEquals("text",text,stringNode.toPlainTextString()); 242 } 243 244 } 245 | Popular Tags |