KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > jmeter > protocol > http > util > accesslog > TCLogParser


1 // $Header: /home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/util/accesslog/TCLogParser.java,v 1.10.2.1 2004/05/20 18:08:54 mstover1 Exp $
2
/*
3  * Copyright 2003-2004 The Apache Software Foundation.
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17 */

18
19 package org.apache.jmeter.protocol.http.util.accesslog;
20
21 import java.io.BufferedReader JavaDoc;
22 import java.io.File JavaDoc;
23 import java.io.FileReader JavaDoc;
24 import java.io.IOException JavaDoc;
25 import java.util.StringTokenizer JavaDoc;
26 import java.util.Vector JavaDoc;
27
28 import org.apache.jmeter.junit.JMeterTestCase;
29 import org.apache.jorphan.logging.LoggingManager;
30 import org.apache.log.Logger;
31
32 /**
33  * Description:<br>
34  * <br>
35  * Currently the parser only handles GET/POST
36  * requests. It's easy enough to add support
37  * for other request methods by changing
38  * checkMethod. The is a complete rewrite of
39  * a tool I wrote for myself earlier. The older
40  * algorithm was basic and did not provide the
41  * same level of flexibility I want, so I
42  * wrote a new one using a totally new algorithm.
43  * This implementation reads one line at a time
44  * using BufferedReader. When it gets to the end
45  * of the file and the sampler needs to get more
46  * requests, the parser will re-initialize the
47  * BufferedReader. The implementation uses
48  * StringTokenizer to create tokens.<p>
49  * The parse algorithm is the following:<p>
50  * <ol>
51  * <li> cleans the entry by looking for backslash "\"
52  * <li> looks to see if GET or POST is in the line
53  * <li> tokenizes using quotes "
54  * <li> finds the token with the request method
55  * <li> gets the string of the token and tokenizes it using space
56  * <li> finds the first token beginning with slash character
57  * <li> tokenizes the string using question mark "?"
58  * <li> get the path from the first token
59  * <li> returns the second token and checks it for parameters
60  * <li> tokenizes the string using ampersand "&"
61  * <li> parses each token to name/value pairs
62  * </ol>
63  * <p>
64  * Extending this class is fairly simple. Most
65  * access logs use the same format starting from
66  * the request method. Therefore, changing the
67  * implementation of cleanURL(string) method
68  * should be sufficient to support new log
69  * formats. Tomcat uses common log format, so
70  * any webserver that uses the format should
71  * work with this parser. Servers that are known
72  * to use non standard formats are IIS and Netscape.
73  * <p>
74  *
75  * @version $Revision: 1.10.2.1 $ last updated $Date: 2004/05/20 18:08:54 $
76  * Created on: June 23, 2003<br>
77  */

78
79 public class TCLogParser implements LogParser
80 {
81    static Logger log = LoggingManager.getLoggerForClass();
82
83     public static final String JavaDoc GET = "GET";
84     public static final String JavaDoc POST = "POST";
85
86     /** protected members **/
87     protected String JavaDoc RMETHOD = null;
88     /**
89      * The path to the access log file
90      */

91     protected String JavaDoc URL_PATH = null;
92     /**
93      * A counter used by the parser.
94      * it is the real count of lines
95      * parsed
96      */

97     protected int COUNT = 0;
98     /**
99      * the number of lines the user
100      * wishes to parse
101      */

102     protected int PARSECOUNT = -1;
103     protected boolean useFILE = true;
104
105     protected File JavaDoc SOURCE = null;
106     protected String JavaDoc FILENAME = null;
107     protected BufferedReader JavaDoc READER = null;
108
109     /**
110      * Handles to supporting classes
111      */

112     protected Generator GEN = null;
113     protected Filter FILTER = null;
114
115 //TODO downcase UPPER case variables
116

117     /**
118      *
119      */

120     public TCLogParser()
121     {
122         super();
123     }
124
125     /**
126      * @param source
127      */

128     public TCLogParser(String JavaDoc source)
129     {
130         setSourceFile(source);
131     }
132
133     /**
134      * Set the Generator
135      * @param generator
136      */

137     public void setGenerator(Generator generator)
138     {
139         this.GEN = generator;
140     }
141
142     /**
143      * Calls this method to set whether or not
144      * to use the path in the log. We may want
145      * to provide the ability to filter the
146      * log file later on. By default, the parser
147      * uses the file in the log.
148      * @param file
149      */

150     public void setUseParsedFile(boolean file)
151     {
152         this.useFILE = file;
153     }
154
155     /**
156      * Use the filter to include/exclude files
157      * in the access logs. This is provided as
158      * a convienance and reduce the need to
159      * spend hours cleaning up log files.
160      * @param filter
161      */

162     public void setFilter(Filter filter)
163     {
164         FILTER = filter;
165     }
166
167     /**
168      * Sets the source file.
169      * @param source
170      */

171     public void setSourceFile(String JavaDoc source)
172     {
173         this.FILENAME = source;
174     }
175
176     /**
177      * Creates a new File object.
178      * @param filename
179      */

180     public File JavaDoc openFile(String JavaDoc filename)
181     {
182         return new File JavaDoc(filename);
183     }
184
185     /**
186      * parse the entire file.
187      * @return boolean success/failure
188      */

189     public boolean parse()
190     {
191         if (this.SOURCE == null)
192         {
193             this.SOURCE = this.openFile(this.FILENAME);
194         }
195         try
196         {
197             if (this.READER == null)
198             {
199                 this.READER = new BufferedReader JavaDoc(new FileReader JavaDoc(this.SOURCE));
200             }
201             parse(this.READER);
202         }
203         catch (Exception JavaDoc exception)
204         {
205             exception.printStackTrace();
206         }
207         return true;
208     }
209
210     /**
211      * parse a set number of lines from
212      * the access log. Keep in mind the
213      * number of lines parsed will depend
214      * the filter and number of lines in
215      * the log. The method returns the
216      * actual lines parsed.
217      * @param count
218      * @return lines parsed
219      */

220     public int parse(int count)
221     {
222         if (count > 0)
223         {
224             this.PARSECOUNT = count;
225         }
226         this.parse();
227         return COUNT;
228     }
229
230     /**
231      * The method is responsible for reading each
232      * line, and breaking out of the while loop
233      * if a set number of lines is given.
234      * @param breader
235      */

236     protected void parse(BufferedReader JavaDoc breader)
237     {
238         String JavaDoc line = null;
239         try
240         {
241             // read one line at a time using
242
// BufferedReader
243
line = breader.readLine();
244             if (line == null && COUNT >= this.PARSECOUNT)
245             {
246                 this.READER.close();
247                 this.READER = null;
248                 this.READER = new BufferedReader JavaDoc(new FileReader JavaDoc(this.SOURCE));
249                 parse(this.READER);
250             }
251             while (line != null)
252             {
253                 if (line.length() > 0)
254                 {
255                     this.parseLine(line);
256                 }
257                 // we check the count to see if we have exceeded
258
// the number of lines to parse. There's no way
259
// to know where to stop in the file. Therefore
260
// we use break to escape the while loop when
261
// we've reached the count.
262
if (this.PARSECOUNT != -1 && COUNT >= this.PARSECOUNT)
263                 {
264                     break;
265                 }
266             }
267         }
268         catch (IOException JavaDoc ioe)
269         {
270             ioe.printStackTrace();
271         }
272     }
273
274     /**
275      * parseLine calls the other parse methods
276      * to parse the given text.
277      * @param line
278      */

279     protected void parseLine(String JavaDoc line)
280     {
281         // we clean the line to get
282
// rid of extra stuff
283
line = this.cleanURL(line);
284         // now we set request method
285
this.GEN.setMethod(this.RMETHOD);
286         if (FILTER != null)
287         {
288             if (!FILTER.isFiltered(line))
289             {
290                 // increment the current count
291
COUNT++;
292                 // we filter the line first, before we try
293
// to separate the URL into file and
294
// parameters.
295
line = FILTER.filter(line);
296                 if (line != null)
297                 {
298                     createUrl(line);
299                 }
300             }
301         }
302         else
303         {
304             // increment the current count
305
COUNT++;
306             // in the case when the filter is not set, we
307
// parse all the lines
308
createUrl(line);
309         }
310     }
311
312     /**
313     * @param line
314     */

315    private void createUrl(String JavaDoc line)
316    {
317       String JavaDoc paramString = null;
318         // check the URL for "?" symbol
319
paramString = this.stripFile(line);
320         if(paramString != null)
321         {
322            this.checkParamFormat(line);
323            // now that we have stripped the file, we can parse the parameters
324
this.convertStringToJMRequest(paramString);
325         }
326    }
327
328    /**
329      * The method cleans the URL using the following
330      * algorithm.
331      * <ol>
332      * <li> check for double quotes
333      * <li> check the request method
334      * <li> tokenize using double quotes
335      * <li> find first token containing request method
336      * <li> tokenize string using space
337      * <li> find first token that begins with "/"
338      * </ol>
339      * Example Tomcat log entry:<p>
340      * 127.0.0.1 - - [08/Jan/2003:07:03:54 -0500]
341      * "GET /addrbook/ HTTP/1.1" 200 1981
342      * <p>
343      * @param entry
344      * @return cleaned url
345      */

346     public String JavaDoc cleanURL(String JavaDoc entry)
347     {
348         String JavaDoc url = entry;
349         // if the string contains atleast one double
350
// quote and checkMethod is true, go ahead
351
// and tokenize the string.
352
if (entry.indexOf("\"") > -1 && checkMethod(entry))
353         {
354             StringTokenizer JavaDoc tokens = null;
355             // we tokenize using double quotes. this means
356
// for tomcat we should have 3 tokens if there
357
// isn't any additional information in the logs
358
tokens = this.tokenize(entry, "\"");
359             while (tokens.hasMoreTokens())
360             {
361                 String JavaDoc toke = (String JavaDoc) tokens.nextToken();
362                 // if checkMethod on the token is true
363
// we tokenzie it using space and escape
364
// the while loop. Only the first matching
365
// token will be used
366
if (checkMethod(toke))
367                 {
368                     StringTokenizer JavaDoc token2 = this.tokenize(toke, " ");
369                     while (token2.hasMoreTokens())
370                     {
371                         String JavaDoc t = (String JavaDoc) token2.nextElement();
372                         if(t.equalsIgnoreCase(GET))
373                         {
374                            RMETHOD = GET;
375                         }
376                         else if(t.equalsIgnoreCase(POST))
377                         {
378                            RMETHOD = POST;
379                         }
380                         // there should only be one token
381
// that starts with slash character
382
if (t.startsWith("/"))
383                         {
384                             url = t;
385                             break;
386                         }
387                     }
388                     break;
389                 }
390             }
391             return url;
392         }
393         else
394         {
395             // we return the original string
396
return url;
397         }
398     }
399
400     /**
401      * The method checks for POST and GET
402      * methods currently. The other methods
403      * aren't supported yet.
404      * @param text
405      * @return if method is supported
406      */

407     public boolean checkMethod(String JavaDoc text)
408     {
409         if (text.indexOf("GET") > -1)
410         {
411             this.RMETHOD = GET;
412             return true;
413         }
414         else if (text.indexOf("POST") > -1)
415         {
416             this.RMETHOD = POST;
417             return true;
418         }
419         else
420         {
421             return false;
422         }
423     }
424
425     /**
426      * Tokenize the URL into two tokens. If the URL has more than one "?", the
427      * parse may fail. Only the first two tokens are used. The first token is
428      * automatically parsed and set at URL_PATH.
429      * @param url
430      * @return String parameters
431      */

432     public String JavaDoc stripFile(String JavaDoc url)
433     {
434         if (url.indexOf("?") > -1)
435         {
436             StringTokenizer JavaDoc tokens = this.tokenize(url, "?");
437             this.URL_PATH = tokens.nextToken();
438             this.GEN.setPath(URL_PATH);
439             return tokens.nextToken();
440         }
441         else
442         {
443             this.GEN.setPath(url);
444             return null;
445         }
446     }
447
448     /**
449      * Checks the string to make sure it has /path/file?name=value format. If
450      * the string doesn't have "?", it will return false.
451      * @param url
452      * @return boolean
453      */

454     public boolean checkURL(String JavaDoc url)
455     {
456         if (url.indexOf("?") > -1)
457         {
458             return true;
459         }
460         else
461         {
462             return false;
463         }
464     }
465
466     /**
467      * Checks the string to see if it contains "&" and "=". If it does, return
468      * true, so that it can be parsed.
469      * @param text
470      * @return boolean
471      */

472     public boolean checkParamFormat(String JavaDoc text)
473     {
474         if (text.indexOf("&") > -1 && text.indexOf("=") > -1)
475         {
476             return true;
477         }
478         else
479         {
480             return false;
481         }
482     }
483
484     /**
485      * Convert a single line into XML
486      * @param text
487      */

488     public void convertStringToJMRequest(String JavaDoc text)
489     {
490         this.GEN.setParams(this.convertStringtoNVPair(text));
491     }
492
493     /**
494      * Parse the string parameters into NVPair[]
495      * array. Once they are parsed, it is returned.
496      * The method uses parseOneParameter(string)
497      * to convert each pair.
498      * @param stringparams
499      */

500     public NVPair[] convertStringtoNVPair(String JavaDoc stringparams)
501     {
502         Vector JavaDoc vparams = this.parseParameters(stringparams);
503         NVPair[] nvparams = new NVPair[vparams.size()];
504         // convert the Parameters
505
for (int idx = 0; idx < nvparams.length; idx++)
506         {
507             nvparams[idx] = this.parseOneParameter((String JavaDoc) vparams.get(idx));
508         }
509         return nvparams;
510     }
511
512     /**
513      * Method expects name and value to be separated
514      * by an equal sign "=". The method uses StringTokenizer
515      * to make a NVPair object. If there happens to be more
516      * than one "=" sign, the others are ignored. The chance
517      * of a string containing more than one is unlikely
518      * and would not conform to HTTP spec. I should double
519      * check the protocol spec to make sure this is
520      * accurate.
521      * @param parameter to be parsed
522      * @return NVPair
523      */

524     protected NVPair parseOneParameter(String JavaDoc parameter)
525     {
526         String JavaDoc name = null;
527         String JavaDoc value = null;
528         try
529         {
530             StringTokenizer JavaDoc param = this.tokenize(parameter, "=");
531             name = param.nextToken();
532             value = param.nextToken();
533         }
534         catch (Exception JavaDoc e)
535         {
536             // do nothing. it's naive, but since
537
// the utility is meant to parse access
538
// logs the formatting should be correct
539
}
540         if (value == null)
541         {
542             value = "";
543         }
544         return new NVPair(name.trim(), value.trim());
545     }
546
547     /**
548      * Method uses StringTokenizer to convert the string
549      * into single pairs. The string should conform to
550      * HTTP protocol spec, which means the name/value
551      * pairs are separated by the ampersand symbol "&".
552      * Some one could write the querystrings by hand,
553      * but that would be round about and go against the
554      * purpose of this utility.
555      * @param parameters
556      * @return Vector
557      */

558     protected Vector JavaDoc parseParameters(String JavaDoc parameters)
559     {
560         Vector JavaDoc parsedParams = new Vector JavaDoc();
561         StringTokenizer JavaDoc paramtokens = this.tokenize(parameters, "&");
562         while (paramtokens.hasMoreElements())
563         {
564             parsedParams.add(paramtokens.nextElement());
565         }
566         return parsedParams;
567     }
568
569     /**
570      * Parses the line using java.util.StringTokenizer.
571      * @param line line to be parsed
572      * @param delim delimiter
573      * @return StringTokenizer
574      */

575     public StringTokenizer JavaDoc tokenize(String JavaDoc line, String JavaDoc delim)
576     {
577         return new StringTokenizer JavaDoc(line, delim);
578     }
579
580     public void close()
581     {
582         try
583         {
584             this.READER.close();
585             this.READER = null;
586             this.SOURCE = null;
587         }
588         catch (IOException JavaDoc e)
589         {
590             // do nothing
591
}
592     }
593     //TODO write some more tests
594

595     ///////////////////////////// Start of Test Code //////////////////////////
596

597     public static class Test extends JMeterTestCase
598     {
599         private static final TCLogParser tclp = new TCLogParser();
600
601         private static final String JavaDoc URL1 =
602         "127.0.0.1 - - [08/Jan/2003:07:03:54 -0500] \"GET /addrbook/ HTTP/1.1\" 200 1981";
603
604         private static final String JavaDoc URL2 =
605         "127.0.0.1 - - [08/Jan/2003:07:03:54 -0500] \"GET /addrbook?x=y HTTP/1.1\" 200 1981";
606
607         public void testConstruct() throws Exception JavaDoc
608         {
609             TCLogParser tcp;
610             tcp = new TCLogParser();
611             assertNull("Should not have set the filename",tcp.FILENAME);
612
613             String JavaDoc file = "testfiles/access.log";
614             tcp = new TCLogParser(file);
615             assertEquals("Filename should have been saved",file,tcp.FILENAME);
616         }
617         
618         public void testcleanURL() throws Exception JavaDoc
619         {
620            tclp.GEN = new StandardGenerator();
621            tclp.GEN.generateRequest();
622             String JavaDoc res = tclp.cleanURL(URL1);
623             assertEquals("/addrbook/",res);
624             assertNull(tclp.stripFile(res));
625         }
626         public void testcheckURL() throws Exception JavaDoc
627         {
628             assertFalse("URL is not have a query",tclp.checkURL(URL1));
629             assertTrue("URL is a query",tclp.checkURL(URL2));
630         }
631     }
632
633 }
Popular Tags