KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > RemarkNodeParser


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/RemarkNodeParser.java,v 1.2 2004/02/10 13:41:10 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser;
34 public class RemarkNodeParser
35 {
36     public final static int REMARK_NODE_BEFORE_PARSING_STATE = 0;
37     public final static int REMARK_NODE_OPENING_ANGLE_BRACKET_STATE = 1;
38     public final static int REMARK_NODE_EXCLAMATION_RECEIVED_STATE = 2;
39     public final static int REMARK_NODE_FIRST_DASH_RECEIVED_STATE = 3;
40     public final static int REMARK_NODE_ACCEPTING_STATE = 4;
41     public final static int REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE = 5;
42     public final static int REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE = 6;
43     public final static int REMARK_NODE_ACCEPTED_STATE = 7;
44     public final static int REMARK_NODE_ILLEGAL_STATE = 8;
45     public final static int REMARK_NODE_FINISHED_PARSING_STATE = 2;
46
47     /**
48      * Locate the remark tag withing the input string, by parsing from the given position
49      * @param reader HTML reader to be provided so as to allow reading of next line
50      * @param input Input String
51      * @param position Position to start parsing from
52      */

53     public RemarkNode find(NodeReader reader, String JavaDoc input, int position)
54     {
55         int state = REMARK_NODE_BEFORE_PARSING_STATE;
56         StringBuffer JavaDoc tagContents = new StringBuffer JavaDoc();
57         int tagBegin = 0;
58         int tagEnd = 0;
59         int i = position;
60         int inputLen = input.length();
61         char ch, prevChar = ' ';
62         while (i < inputLen && state < REMARK_NODE_ACCEPTED_STATE)
63         {
64             ch = input.charAt(i);
65             if (state == REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE)
66             {
67                 if (ch == '>')
68                 {
69                     state = REMARK_NODE_ACCEPTED_STATE;
70                     tagEnd = i;
71                 }
72                 else if (ch == '-')
73                 {
74                     tagContents.append(prevChar);
75                 }
76                 else
77                 {
78                     // Rollback last 2 characters (assumed same)
79
state = REMARK_NODE_ACCEPTING_STATE;
80                     tagContents.append(prevChar);
81                     tagContents.append(prevChar);
82                 }
83
84             }
85
86             if (state == REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE)
87             {
88                 if (ch == '-')
89                 {
90                     state = REMARK_NODE_CLOSING_SECOND_DASH_RECEIVED_STATE;
91                 }
92                 else
93                 {
94                     // Rollback
95
state = REMARK_NODE_ACCEPTING_STATE;
96                     tagContents.append(prevChar);
97                 }
98             }
99             if (state == REMARK_NODE_ACCEPTING_STATE)
100             {
101                 if (ch == '-')
102                 {
103                     state = REMARK_NODE_CLOSING_FIRST_DASH_RECEIVED_STATE;
104                 } /*else
105                                 if (ch == '<')
106                                 {
107                                     state=REMARK_NODE_ILLEGAL_STATE;
108                                 } */

109             }
110             if (state == REMARK_NODE_ACCEPTING_STATE)
111             {
112                 // We can append contents now
113
tagContents.append(ch);
114             }
115
116             if (state == REMARK_NODE_FIRST_DASH_RECEIVED_STATE)
117             {
118                 if (ch == '-')
119                 {
120                     state = REMARK_NODE_ACCEPTING_STATE;
121                     // Do a lookahead and see if the next char is >
122
if (input.length() > i + 1 && input.charAt(i + 1) == '>')
123                     {
124                         state = REMARK_NODE_ACCEPTED_STATE;
125                         tagEnd = i + 1;
126                     }
127                 }
128                 else
129                     state = REMARK_NODE_ILLEGAL_STATE;
130             }
131             if (state == REMARK_NODE_EXCLAMATION_RECEIVED_STATE)
132             {
133                 if (ch == '-')
134                     state = REMARK_NODE_FIRST_DASH_RECEIVED_STATE;
135                 else if (ch == '>')
136                 {
137                     state = REMARK_NODE_ACCEPTED_STATE;
138                     tagEnd = i;
139                 }
140                 else
141                     state = REMARK_NODE_ILLEGAL_STATE;
142             }
143             if (state == REMARK_NODE_OPENING_ANGLE_BRACKET_STATE)
144             {
145                 if (ch == '!')
146                     state = REMARK_NODE_EXCLAMATION_RECEIVED_STATE;
147                 else
148                     state = REMARK_NODE_ILLEGAL_STATE;
149                 // This is not a remark tag
150
}
151             if (state == REMARK_NODE_BEFORE_PARSING_STATE)
152             {
153                 if (ch == '<')
154                 {
155                     // Transition from State 0 to State 1 - Record data till > is encountered
156
tagBegin = i;
157                     state = REMARK_NODE_OPENING_ANGLE_BRACKET_STATE;
158                 }
159                 else if (ch != ' ')
160                 {
161                     // Its not a space, hence this is probably a string node, not a remark node
162
state = REMARK_NODE_ILLEGAL_STATE;
163                 }
164             }
165             // if (state > REMARK_NODE_OPENING_ANGLE_BRACKET_STATE && state < REMARK_NODE_ACCEPTED_STATE && i == input.length() - 1)
166
if (state >= REMARK_NODE_ACCEPTING_STATE
167                 && state < REMARK_NODE_ACCEPTED_STATE
168                 && i == input.length() - 1)
169             {
170                 // We need to continue parsing to the next line
171
//input = reader.getNextLine();
172
tagContents.append(Node.getLineSeparator());
173                 do
174                 {
175                     input = reader.getNextLine();
176                 }
177                 while (input != null && input.length() == 0);
178                 if (input != null)
179                     inputLen = input.length();
180                 else
181                     inputLen = -1;
182                 i = -1;
183             }
184             if (state == REMARK_NODE_ILLEGAL_STATE)
185             {
186                 return null;
187             }
188             i++;
189             prevChar = ch;
190         }
191         if (state == REMARK_NODE_ACCEPTED_STATE)
192             return new RemarkNode(tagBegin, tagEnd, tagContents.toString());
193         else
194             return null;
195     }
196 }
197
Popular Tags