KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > parserHelper > AttributeParser


1 // $Header: /home/cvs/jakarta-jmeter/src/htmlparser/org/htmlparser/parserHelper/AttributeParser.java,v 1.3 2004/02/10 13:41:08 woolfel Exp $
2
/*
3  * ====================================================================
4  * Copyright 2002-2004 The Apache Software Foundation.
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  */

19
20 // The developers of JMeter and Apache are greatful to the developers
21
// of HTMLParser for giving Apache Software Foundation a non-exclusive
22
// license. The performance benefits of HTMLParser are clear and the
23
// users of JMeter will benefit from the hard work the HTMLParser
24
// team. For detailed information about HTMLParser, the project is
25
// hosted on sourceforge at http://htmlparser.sourceforge.net/.
26
//
27
// HTMLParser was originally created by Somik Raha in 2000. Since then
28
// a healthy community of users has formed and helped refine the
29
// design so that it is able to tackle the difficult task of parsing
30
// dirty HTML. Derrick Oswald is the current lead developer and was kind
31
// enough to assist JMeter.
32

33 package org.htmlparser.parserHelper;
34
35 import java.util.Hashtable JavaDoc;
36 import java.util.StringTokenizer JavaDoc;
37
38 import org.htmlparser.tags.Tag;
39
40
41 /**
42  * To change this generated comment edit the template variable "typecomment":
43  * Window>Preferences>Java>Templates.
44  * To enable and disable the creation of type comments go to
45  * Window>Preferences>Java>Code Generation.
46  * @author Somik Raha, Kaarle Kaila
47  * @version 7 AUG 2001
48  */

49 public class AttributeParser
50 {
51     private final String JavaDoc delima = " \t\r\n\f=\"'>";
52     private final String JavaDoc delimb = " \t\r\n\f\"'>";
53     private final char doubleQuote = '\"';
54     private final char singleQuote = '\'';
55     private String JavaDoc delim;
56
57     /**
58     * Method to break the tag into pieces.
59     * @param returns a Hastable with elements containing the
60     * pieces of the tag. The tag-name has the value field set to
61     * the constant Tag.TAGNAME. In addition the tag-name is
62     * stored into the Hashtable with the name Tag.TAGNAME
63     * where the value is the name of the tag.
64     * Tag parameters without value
65     * has the value "". Parameters with value are represented
66     * in the Hastable by a name/value pair.
67     * As html is case insensitive but Hastable is not are all
68     * names converted into UPPERCASE to the Hastable
69     * E.g extract the href values from A-tag's and print them
70     * <pre>
71     *
72     * Tag tag;
73     * Hashtable h;
74     * String tmp;
75     * try {
76     * NodeReader in = new NodeReader(new FileReader(path),2048);
77     * Parser p = new Parser(in);
78     * Enumeration en = p.elements();
79     * while (en.hasMoreElements()) {
80     * try {
81     * tag = (Tag)en.nextElement();
82     * h = tag.parseParameters();
83     * tmp = (String)h.get(tag.TAGNAME);
84     * if (tmp != null && tmp.equalsIgnoreCase("A")) {;
85     * System.out.println("URL is :" + h.get("HREF"));
86     * }
87     * } catch (ClassCastException ce){}
88     * }
89     * }
90     * catch (IOException ie) {
91     * ie.printStackTrace();
92     * }
93     * </pre>
94     *
95     */

96     public Hashtable JavaDoc parseAttributes(Tag tag)
97     {
98         Hashtable JavaDoc h = new Hashtable JavaDoc();
99         String JavaDoc element, name, value, nextPart = null;
100         String JavaDoc empty = null;
101         name = null;
102         value = null;
103         element = null;
104         boolean waitingForEqual = false;
105         delim = delima;
106         StringTokenizer JavaDoc tokenizer =
107             new StringTokenizer JavaDoc(tag.getText(), delim, true);
108         while (true)
109         {
110             nextPart = getNextPart(tokenizer, delim);
111             delim = delima;
112             if (element == null && nextPart != null && !nextPart.equals("="))
113             {
114                 element = nextPart;
115                 putDataIntoTable(h, element, null, true);
116             }
117             else
118             {
119                 if (nextPart != null)
120                 {
121                     if (name == null)
122                     {
123                         if (0 < nextPart.length() && !nextPart.substring(0, 1).equals(" "))
124                         {
125                             name = nextPart;
126                             waitingForEqual = true;
127                         }
128                     }
129                     else
130                     {
131                         if (waitingForEqual)
132                         {
133                             if (nextPart.equals("="))
134                             {
135                                 waitingForEqual = false;
136                                 delim = delimb;
137                             }
138                             else
139                             {
140                                 putDataIntoTable(h, name, "", false);
141                                 name = nextPart;
142                                 value = null;
143                             }
144                         }
145                         if (!waitingForEqual && !nextPart.equals("="))
146                         {
147                             value = nextPart;
148                             putDataIntoTable(h, name, value, false);
149                             name = null;
150                             value = null;
151                         }
152                     }
153                 }
154                 else
155                 {
156                     if (name != null)
157                     {
158                         if (name.equals("/"))
159                         {
160                             putDataIntoTable(h, Tag.EMPTYTAG, "", false);
161                         }
162                         else
163                         {
164                             putDataIntoTable(h, name, "", false);
165                         }
166                         name = null;
167                         value = null;
168                     }
169                     break;
170                 }
171             }
172         }
173         if (null == element) // handle no tag contents
174
putDataIntoTable(h, "", null, true);
175         return h;
176     }
177
178     private String JavaDoc getNextPart(StringTokenizer JavaDoc tokenizer, String JavaDoc deli)
179     {
180         String JavaDoc tokenAccumulator = null;
181         boolean isDoubleQuote = false;
182         boolean isSingleQuote = false;
183         boolean isDataReady = false;
184         String JavaDoc currentToken;
185         while (isDataReady == false && tokenizer.hasMoreTokens())
186         {
187             currentToken = tokenizer.nextToken(deli);
188             //
189
// First let's combine tokens that are inside "" or ''
190
//
191
if (isDoubleQuote || isSingleQuote)
192             {
193                 if (isDoubleQuote && currentToken.charAt(0) == doubleQuote)
194                 {
195                     isDoubleQuote = false;
196                     isDataReady = true;
197                 }
198                 else if (
199                     isSingleQuote && currentToken.charAt(0) == singleQuote)
200                 {
201                     isSingleQuote = false;
202                     isDataReady = true;
203                 }
204                 else
205                 {
206                     tokenAccumulator += currentToken;
207                     continue;
208                 }
209             }
210             else if (currentToken.charAt(0) == doubleQuote)
211             {
212                 isDoubleQuote = true;
213                 tokenAccumulator = "";
214                 continue;
215             }
216             else if (currentToken.charAt(0) == singleQuote)
217             {
218                 isSingleQuote = true;
219                 tokenAccumulator = "";
220                 continue;
221             }
222             else
223                 tokenAccumulator = currentToken;
224
225             if (tokenAccumulator.equals(currentToken))
226             {
227
228                 if (delim.indexOf(tokenAccumulator) >= 0)
229                 {
230                     if (tokenAccumulator.equals("="))
231                     {
232                         isDataReady = true;
233                     }
234                 }
235                 else
236                 {
237
238                     isDataReady = true;
239                 }
240             }
241             else
242                 isDataReady = true;
243
244         }
245         return tokenAccumulator;
246     }
247
248     private void putDataIntoTable(
249         Hashtable JavaDoc h,
250         String JavaDoc name,
251         String JavaDoc value,
252         boolean isName)
253     {
254         if (isName && value == null)
255             value = Tag.TAGNAME;
256         else if (value == null)
257             value = ""; // Hashtable does not accept nulls
258
if (isName)
259         {
260             // store tagname as tag.TAGNAME,tag
261
h.put(value, name.toUpperCase());
262         }
263         else
264         {
265             // store tag parameters as NAME, value
266
h.put(name.toUpperCase(), value);
267         }
268     }
269 }
270
Popular Tags