KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > parserHelper > StringParser


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/parserHelper/StringParser.java,v 1.2 2004/02/10 13:41:08 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser.parserHelper;
34
35 import org.htmlparser.Node;
36 import org.htmlparser.NodeReader;
37 import org.htmlparser.StringNode;
38
39 public class StringParser
40 {
41     private final static int BEFORE_PARSE_BEGINS_STATE = 0;
42     private final static int PARSE_HAS_BEGUN_STATE = 1;
43     private final static int PARSE_COMPLETED_STATE = 2;
44     private final static int PARSE_IGNORE_STATE = 3;
45
46     /**
47      * Returns true if the text at <code>pos</code> in <code>line</code> should be scanned as a tag.
48      * Basically an open angle followed by a known special character or a letter.
49      * @param line The current line being parsed.
50      * @param pos The position in the line to examine.
51      * @return <code>true</code> if we think this is the start of a tag.
52      */

53     private boolean beginTag(String JavaDoc line, int pos)
54     {
55         char ch;
56         boolean ret;
57
58         ret = false;
59
60         if (pos + 2 <= line.length())
61             if ('<' == line.charAt(pos))
62             {
63                 ch = line.charAt(pos + 1);
64                 // the order of these tests might be optimized for speed
65
if ('/' == ch
66                     || '%' == ch
67                     || Character.isLetter(ch)
68                     || '!' == ch)
69                     ret = true;
70             }
71
72         return (ret);
73     }
74
75     /**
76      * Locate the StringNode within the input string, by parsing from the given position
77      * @param reader HTML reader to be provided so as to allow reading of next line
78      * @param input Input String
79      * @param position Position to start parsing from
80      * @param balance_quotes If <code>true</code> enter ignoring state on
81      * encountering quotes.
82      */

83     public Node find(
84         NodeReader reader,
85         String JavaDoc input,
86         int position,
87         boolean balance_quotes)
88     {
89         StringBuffer JavaDoc textBuffer = new StringBuffer JavaDoc();
90         int state = BEFORE_PARSE_BEGINS_STATE;
91         int textBegin = position;
92         int textEnd = position;
93         int inputLen = input.length();
94         char ch;
95         char ignore_ender = '\"';
96         for (int i = position;
97             (i < inputLen && state != PARSE_COMPLETED_STATE);
98             i++)
99         {
100             ch = input.charAt(i);
101             if (ch == '<' && state != PARSE_IGNORE_STATE)
102             {
103                 if (beginTag(input, i))
104                 {
105                     state = PARSE_COMPLETED_STATE;
106                     textEnd = i - 1;
107                 }
108             }
109             if (balance_quotes && (ch == '\'' || ch == '"'))
110             {
111                 if (state == PARSE_IGNORE_STATE)
112                 {
113                     if (ch == ignore_ender)
114                         state = PARSE_HAS_BEGUN_STATE;
115                 }
116                 else
117                 {
118                     ignore_ender = ch;
119                     state = PARSE_IGNORE_STATE;
120                 }
121             }
122             if (state == BEFORE_PARSE_BEGINS_STATE)
123             {
124                 state = PARSE_HAS_BEGUN_STATE;
125             }
126             if (state == PARSE_HAS_BEGUN_STATE || state == PARSE_IGNORE_STATE)
127             {
128                 textBuffer.append(input.charAt(i));
129             }
130             // Patch by Cedric Rosa
131
if (state == BEFORE_PARSE_BEGINS_STATE && i == inputLen - 1)
132                 state = PARSE_HAS_BEGUN_STATE;
133             if (state == PARSE_HAS_BEGUN_STATE && i == inputLen - 1)
134             {
135                 do
136                 {
137                     input = reader.getNextLine();
138                     if (input != null && input.length() == 0)
139                         textBuffer.append(Node.getLineSeparator());
140                 }
141                 while (input != null && input.length() == 0);
142
143                 if (input == null)
144                 {
145                     textEnd = i;
146                     state = PARSE_COMPLETED_STATE;
147
148                 }
149                 else
150                 {
151                     textBuffer.append(Node.getLineSeparator());
152                     inputLen = input.length();
153                     i = -1;
154                 }
155
156             }
157         }
158         return new StringNode(textBuffer, textBegin, textEnd);
159     }
160 }
161
Popular Tags