KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > parserHelper > CompositeTagScannerHelper


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/parserHelper/CompositeTagScannerHelper.java,v 1.2 2004/02/10 13:41:08 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser.parserHelper;
34
35 import org.htmlparser.Node;
36 import org.htmlparser.NodeReader;
37 import org.htmlparser.scanners.CompositeTagScanner;
38 import org.htmlparser.tags.CompositeTag;
39 import org.htmlparser.tags.EndTag;
40 import org.htmlparser.tags.Tag;
41 import org.htmlparser.tags.data.CompositeTagData;
42 import org.htmlparser.tags.data.TagData;
43 import org.htmlparser.util.NodeList;
44 import org.htmlparser.util.ParserException;
45
46 public class CompositeTagScannerHelper
47 {
48     private CompositeTagScanner scanner;
49     private Tag tag;
50     private String JavaDoc url;
51     private NodeReader reader;
52     private String JavaDoc currLine;
53     private Tag endTag;
54     private NodeList nodeList;
55     private boolean endTagFound;
56     private int startingLineNumber;
57     private int endingLineNumber;
58     private boolean balance_quotes;
59
60     public CompositeTagScannerHelper(
61         CompositeTagScanner scanner,
62         Tag tag,
63         String JavaDoc url,
64         NodeReader reader,
65         String JavaDoc currLine,
66         boolean balance_quotes)
67     {
68
69         this.scanner = scanner;
70         this.tag = tag;
71         this.url = url;
72         this.reader = reader;
73         this.currLine = currLine;
74         this.endTag = null;
75         this.nodeList = new NodeList();
76         this.endTagFound = false;
77         this.balance_quotes = balance_quotes;
78     }
79
80     public Tag scan() throws ParserException
81     {
82         this.startingLineNumber = reader.getLastLineNumber();
83         if (shouldCreateEndTagAndExit())
84         {
85             return createEndTagAndRepositionReader();
86         }
87         scanner.beforeScanningStarts();
88         Node currentNode = tag;
89
90         doEmptyXmlTagCheckOn(currentNode);
91         if (!endTagFound)
92         {
93             do
94             {
95                 currentNode = reader.readElement(balance_quotes);
96                 if (currentNode == null)
97                     continue;
98                 currLine = reader.getCurrentLine();
99                 if (currentNode instanceof Tag)
100                     doForceCorrectionCheckOn((Tag) currentNode);
101
102                 doEmptyXmlTagCheckOn(currentNode);
103                 if (!endTagFound)
104                     doChildAndEndTagCheckOn(currentNode);
105             }
106             while (currentNode != null && !endTagFound);
107         }
108         if (endTag == null)
109         {
110             createCorrectionEndTagBefore(reader.getLastReadPosition() + 1);
111         }
112
113         this.endingLineNumber = reader.getLastLineNumber();
114         return createTag();
115     }
116
117     private boolean shouldCreateEndTagAndExit()
118     {
119         return scanner.shouldCreateEndTagAndExit();
120     }
121
122     private Tag createEndTagAndRepositionReader()
123     {
124         createCorrectionEndTagBefore(tag.elementBegin());
125         reader.setPosInLine(tag.elementBegin());
126         reader.setDontReadNextLine(true);
127         return endTag;
128     }
129
130     private void createCorrectionEndTagBefore(int pos)
131     {
132         String JavaDoc endTagName = tag.getTagName();
133         int endTagBegin = pos;
134         int endTagEnd = endTagBegin + endTagName.length() + 2;
135         endTag =
136             new EndTag(
137                 new TagData(endTagBegin, endTagEnd, endTagName, currLine));
138     }
139
140     private void createCorrectionEndTagBefore(Tag possibleEndTagCauser)
141     {
142         String JavaDoc endTagName = tag.getTagName();
143         int endTagBegin = possibleEndTagCauser.elementBegin();
144         int endTagEnd = endTagBegin + endTagName.length() + 2;
145         possibleEndTagCauser.setTagBegin(endTagEnd + 1);
146         reader.addNextParsedNode(possibleEndTagCauser);
147         endTag =
148             new EndTag(
149                 new TagData(endTagBegin, endTagEnd, endTagName, currLine));
150     }
151
152     private StringBuffer JavaDoc createModifiedLine(String JavaDoc endTagName, int endTagBegin)
153     {
154         StringBuffer JavaDoc newLine = new StringBuffer JavaDoc();
155         newLine.append(currLine.substring(0, endTagBegin));
156         newLine.append("</");
157         newLine.append(endTagName);
158         newLine.append(">");
159         newLine.append(currLine.substring(endTagBegin, currLine.length()));
160         return newLine;
161     }
162
163     private Tag createTag() throws ParserException
164     {
165         CompositeTag newTag =
166             (CompositeTag) scanner.createTag(
167                 new TagData(
168                     tag.elementBegin(),
169                     endTag.elementEnd(),
170                     startingLineNumber,
171                     endingLineNumber,
172                     tag.getText(),
173                     currLine,
174                     url,
175                     tag.isEmptyXmlTag()),
176                 new CompositeTagData(tag, endTag, nodeList));
177         for (int i = 0; i < newTag.getChildCount(); i++)
178         {
179             Node child = newTag.childAt(i);
180             child.setParent(newTag);
181         }
182         return newTag;
183     }
184
185     private void doChildAndEndTagCheckOn(Node currentNode)
186     {
187         if (currentNode instanceof EndTag)
188         {
189             EndTag possibleEndTag = (EndTag) currentNode;
190             if (isExpectedEndTag(possibleEndTag))
191             {
192                 endTagFound = true;
193                 endTag = possibleEndTag;
194                 return;
195             }
196         }
197         nodeList.add(currentNode);
198         scanner.childNodeEncountered(currentNode);
199     }
200
201     private boolean isExpectedEndTag(EndTag possibleEndTag)
202     {
203         return possibleEndTag.getTagName().equals(tag.getTagName());
204     }
205
206     private void doEmptyXmlTagCheckOn(Node currentNode)
207     {
208         if (currentNode instanceof Tag)
209         {
210             Tag possibleEndTag = (Tag) currentNode;
211             if (isXmlEndTag(tag))
212             {
213                 endTag = possibleEndTag;
214                 endTagFound = true;
215             }
216         }
217     }
218
219     private void doForceCorrectionCheckOn(Tag possibleEndTagCauser)
220     {
221         if (isEndTagMissing(possibleEndTagCauser))
222         {
223             createCorrectionEndTagBefore(possibleEndTagCauser);
224
225             endTagFound = true;
226         }
227     }
228
229     private boolean isEndTagMissing(Tag possibleEndTag)
230     {
231         return scanner.isTagToBeEndedFor(possibleEndTag)
232             || isSelfChildTagRecievedIncorrectly(possibleEndTag);
233     }
234
235     private boolean isSelfChildTagRecievedIncorrectly(Tag possibleEndTag)
236     {
237         return (
238             !(possibleEndTag instanceof EndTag)
239                 && !scanner.isAllowSelfChildren()
240                 && possibleEndTag.getTagName().equals(tag.getTagName()));
241     }
242
243     public boolean isXmlEndTag(Tag tag)
244     {
245         String JavaDoc tagText = tag.getText();
246         int lastSlash = tagText.lastIndexOf("/");
247         return (lastSlash == tagText.length() - 1 || tag.isEmptyXmlTag())
248             && tag.getText().indexOf("://") == -1;
249     }
250 }
251
Popular Tags