KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > htmlparser > http > ConnectionManager


1 // HTMLParser Library $Name: v1_5_20050313 $ - A java-based parser for HTML
2
// http://sourceforge.org/projects/htmlparser
3
// Copyright (C) 2004 Derrick Oswald
4
//
5
// Revision Control Information
6
//
7
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/http/ConnectionManager.java,v $
8
// $Author: derrickoswald $
9
// $Date: 2005/03/12 11:52:21 $
10
// $Revision: 1.2 $
11
//
12
// This library is free software; you can redistribute it and/or
13
// modify it under the terms of the GNU Lesser General Public
14
// License as published by the Free Software Foundation; either
15
// version 2.1 of the License, or (at your option) any later version.
16
//
17
// This library is distributed in the hope that it will be useful,
18
// but WITHOUT ANY WARRANTY; without even the implied warranty of
19
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20
// Lesser General Public License for more details.
21
//
22
// You should have received a copy of the GNU Lesser General Public
23
// License along with this library; if not, write to the Free Software
24
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
//
26

27 package org.htmlparser.http;
28
29 import java.io.File JavaDoc;
30 import java.io.IOException JavaDoc;
31 import java.net.HttpURLConnection JavaDoc;
32 import java.net.MalformedURLException JavaDoc;
33 import java.net.URL JavaDoc;
34 import java.net.URLConnection JavaDoc;
35 import java.net.UnknownHostException JavaDoc;
36 import java.text.ParseException JavaDoc;
37 import java.text.SimpleDateFormat JavaDoc;
38 import java.util.Date JavaDoc;
39 import java.util.Enumeration JavaDoc;
40 import java.util.Hashtable JavaDoc;
41 import java.util.List JavaDoc;
42 import java.util.Map JavaDoc;
43 import java.util.Properties JavaDoc;
44 import java.util.StringTokenizer JavaDoc;
45 import java.util.Vector JavaDoc;
46
47 import org.htmlparser.util.ParserException;
48
49 /**
50  * Handles proxies, password protected URLs and request properties including cookies.
51  */

52 public class ConnectionManager
53 {
54     /**
55      * Default Request header fields.
56      * So far this is just "User-Agent" and "Accept-Encoding".
57      */

58     protected static Hashtable JavaDoc mDefaultRequestProperties = new Hashtable JavaDoc ();
59     static
60     {
61         mDefaultRequestProperties.put ("User-Agent", "HTMLParser/" + org.htmlparser.Parser.VERSION_NUMBER);
62         mDefaultRequestProperties.put ("Accept-Encoding", "gzip, deflate");
63     }
64    
65     /**
66      * Messages for page not there (404).
67      */

68     static private final String JavaDoc[] mFourOhFour =
69     {
70         "The web site you seek cannot be located, but countless more exist",
71         "You step in the stream, but the water has moved on. This page is not here.",
72         "Yesterday the page existed. Today it does not. The internet is like that.",
73         "That page was so big. It might have been very useful. But now it is gone.",
74         "Three things are certain: death, taxes and broken links. Guess which has occured.",
75         "Chaos reigns within. Reflect, repent and enter the correct URL. Order shall return.",
76         "Stay the patient course. Of little worth is your ire. The page is not found.",
77         "A non-existant URL reduces your expensive computer to a simple stone.",
78         "Many people have visited that page. Today, you are not one of the lucky ones.",
79         "Cutting the wind with a knife. Bookmarking a URL. Both are ephemeral.",
80     };
81
82     /**
83      * Base 64 character translation table.
84      */

85     static private final char[] mCharacterTable =
86         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/".toCharArray();
87  
88     /**
89      * Request header fields.
90      */

91     protected Hashtable JavaDoc mRequestProperties;
92     
93     /**
94      * The proxy server name.
95      */

96     protected String JavaDoc mProxyHost;
97
98     /**
99      * The proxy port number.
100      */

101     protected int mProxyPort;
102
103     /**
104      * The proxy username name.
105      */

106     protected String JavaDoc mProxyUser;
107
108     /**
109      * The proxy user password.
110      */

111     protected String JavaDoc mProxyPassword;
112
113     /**
114      * The username name for accessing the URL.
115      */

116     protected String JavaDoc mUser;
117
118     /**
119      * The user password for accessing the URL.
120      */

121     protected String JavaDoc mPassword;
122
123     /**
124      * Cookie storage, a hashtable (by site or host) of vectors of Cookies.
125      * This will be null if cookie processing is disabled (default).
126      */

127     protected Hashtable JavaDoc mCookieJar;
128
129     /**
130      * The object to be notified prior to and after each connection.
131      */

132     protected ConnectionMonitor mMonitor;
133
134     /**
135      * Create a connection manager.
136      */

137     public ConnectionManager ()
138     {
139         this (getDefaultRequestProperties ());
140     }
141
142     /**
143      * Create a connection manager with the given connection properties.
144      * @param properties Name value pairs that are to be added to the HTTP request.
145      */

146     public ConnectionManager (Hashtable JavaDoc properties)
147     {
148         mRequestProperties = properties;
149         mProxyHost = null;
150         mProxyPort = 0;
151         mProxyUser = null;
152         mProxyPassword = null;
153         mUser = null;
154         mPassword = null;
155         mCookieJar = null;
156         mMonitor = null;
157
158     }
159
160     //
161
// static methods
162
//
163

164     /**
165      * Get the current default request header properties.
166      * A String-to-String map of header keys and values.
167      * These fields are set by the parser when creating a connection.
168      */

169     public static Hashtable JavaDoc getDefaultRequestProperties ()
170     {
171         return (mDefaultRequestProperties);
172     }
173
174     /**
175      * Set the default request header properties.
176      * A String-to-String map of header keys and values.
177      * These fields are set by the parser when creating a connection.
178      * Some of these can be set directly on a <code>URLConnection</code>,
179      * i.e. If-Modified-Since is set with setIfModifiedSince(long),
180      * but since the parser transparently opens the connection on behalf
181      * of the developer, these properties are not available before the
182      * connection is fetched. Setting these request header fields affects all
183      * subsequent connections opened by the parser. For more direct control
184      * create a <code>URLConnection</code> and set it on the parser.<p>
185      * From <a HREF="http://www.ietf.org/rfc/rfc2616.txt">RFC 2616 Hypertext Transfer Protocol -- HTTP/1.1</a>:
186      * <pre>
187      * 5.3 Request Header Fields
188      *
189      * The request-header fields allow the client to pass additional
190      * information about the request, and about the client itself, to the
191      * server. These fields act as request modifiers, with semantics
192      * equivalent to the parameters on a programming language method
193      * invocation.
194      *
195      * request-header = Accept ; Section 14.1
196      * | Accept-Charset ; Section 14.2
197      * | Accept-Encoding ; Section 14.3
198      * | Accept-Language ; Section 14.4
199      * | Authorization ; Section 14.8
200      * | Expect ; Section 14.20
201      * | From ; Section 14.22
202      * | Host ; Section 14.23
203      * | If-Match ; Section 14.24
204      * | If-Modified-Since ; Section 14.25
205      * | If-None-Match ; Section 14.26
206      * | If-Range ; Section 14.27
207      * | If-Unmodified-Since ; Section 14.28
208      * | Max-Forwards ; Section 14.31
209      * | Proxy-Authorization ; Section 14.34
210      * | Range ; Section 14.35
211      * | Referer ; Section 14.36
212      * | TE ; Section 14.39
213      * | User-Agent ; Section 14.43
214      *
215      * Request-header field names can be extended reliably only in
216      * combination with a change in the protocol version. However, new or
217      * experimental header fields MAY be given the semantics of request-
218      * header fields if all parties in the communication recognize them to
219      * be request-header fields. Unrecognized header fields are treated as
220      * entity-header fields.
221      * </pre>
222      */

223     public static void setDefaultRequestProperties (Hashtable JavaDoc properties)
224     {
225         mDefaultRequestProperties = properties;
226     }
227
228     /**
229      * Gets the request header for the connection.
230      * <em>This header is generated from the contents of the connection
231      * and may not be exactly the same as the request that will be sent.</em>
232      * @param connection The connection to convert into an HTTP request header.
233      * @return The string that would be sent by the HTTP request.
234      */

235     public static String JavaDoc getRequestHeader (HttpURLConnection JavaDoc connection)
236     {
237         // dump it
238
StringBuffer JavaDoc buffer;
239         Map JavaDoc map;
240         String JavaDoc key;
241         List JavaDoc items;
242         
243         buffer = new StringBuffer JavaDoc (1024);
244         buffer.append (connection.getRequestMethod ());
245         buffer.append (" ");
246         buffer.append (connection.getURL ());
247         buffer.append (" HTTP/1.1\n");
248         map = connection.getRequestProperties ();
249         for (java.util.Iterator JavaDoc iterator = map.keySet ().iterator (); iterator.hasNext ();)
250         {
251             key = (String JavaDoc)iterator.next ();
252             items = (List JavaDoc)map.get (key);
253             buffer.append (key);
254             buffer.append (": ");
255             for (int i = 0; i < items.size (); i++)
256             {
257                 if (0 != i)
258                     buffer.append (", ");
259                 buffer.append (items.get (i));
260             }
261             buffer.append ("\n");
262         }
263         
264         return (buffer.toString ());
265     }
266
267     /**
268      * Gets the response header for the connection.
269      * Calling this method on an un-connected connection will
270      * generate an error, as will an attempt to get information
271      * from a connected but invalid connection.
272      * <em>This header is generated from the contents of the connection
273      * and may not be exactly the same as the response that was received.</em>
274      * @param connection The connection to convert into an HTTP response header.
275      * @return The string that was sent as the HTTP response.
276      */

277     public static String JavaDoc getResponseHeader (HttpURLConnection JavaDoc connection)
278     {
279         // dump it
280
StringBuffer JavaDoc buffer;
281         int code;
282         String JavaDoc message;
283         String JavaDoc key;
284         String JavaDoc value;
285
286         buffer = new StringBuffer JavaDoc (1024);
287         try
288         {
289             code = connection.getResponseCode ();
290             if (-1 != code)
291             {
292                 message = connection.getResponseMessage ();
293                 buffer.append ("HTTP/1.1 ");
294                 buffer.append (code);
295                 buffer.append (" ");
296                 buffer.append (message);
297                 buffer.append ("\n");
298                 for (int i = 0; null != (value = connection.getHeaderField (i)); i++)
299                 {
300                     key = connection.getHeaderFieldKey (i);
301                     if (null != key)
302                     {
303                         buffer.append (key);
304                         buffer.append (": ");
305                         buffer.append (value);
306                         buffer.append ("\n");
307                     }
308                 }
309             }
310         }
311         catch (IOException JavaDoc ioe)
312         {
313             buffer.append (ioe.toString ());
314         }
315
316         return (buffer.toString ());
317     }
318
319     /**
320      * Get the current request header properties.
321      * A String-to-String map of header keys and values,
322      * excluding proxy items, cookies and URL authorization.
323      */

324     public Hashtable JavaDoc getRequestProperties ()
325     {
326         return (mRequestProperties);
327     }
328
329     /**
330      * Set the current request properties.
331      * Replaces the current set of fixed request properties with the given set.
332      * This does not replace the Proxy-Authorization property which is
333      * constructed from the values of {@link #setProxyUser}
334      * and {@link #setProxyPassword} values or the Authorization property
335      * which is constructed from the {@link #setUser}
336      * and {@link #setPassword} values. Nor does it replace the
337      * Cookie property which is constructed from the current cookie jar.
338      * @param properties The new fixed properties.
339      */

340     public void setRequestProperties (Hashtable JavaDoc properties)
341     {
342         mRequestProperties = properties;
343     }
344
345     /**
346      * @return Returns the proxy host.
347      */

348     public String JavaDoc getProxyHost ()
349     {
350         return (mProxyHost);
351     }
352
353     /**
354      * @param host The host to use for proxy access.
355      * <em>Note: You must also set the proxy port.</em>
356      */

357     public void setProxyHost (String JavaDoc host)
358     {
359         mProxyHost = host;
360     }
361
362     /**
363      * @return Returns the proxy port.
364      */

365     public int getProxyPort ()
366     {
367         return (mProxyPort);
368     }
369     
370     /**
371      * @param port The proxy port.
372      */

373     public void setProxyPort (int port)
374     {
375         mProxyPort = port;
376     }
377
378     /**
379      * @return Returns the proxy user,
380      * or <code>null</code> if no proxy authorization is required.
381      */

382     public String JavaDoc getProxyUser ()
383     {
384         return (mProxyUser);
385     }
386
387     /**
388      * @param user The proxy user name.
389      * <em>Note: You must also set the proxy {@link #setProxyPassword password}.</em>
390      */

391     public void setProxyUser (String JavaDoc user)
392     {
393         mProxyUser = user;
394     }
395
396     /**
397      * @return Returns the proxy password.
398      */

399     public String JavaDoc getProxyPassword ()
400     {
401         return (mProxyPassword);
402     }
403
404     /**
405      * @param password The password for the proxy user.
406      */

407     public void setProxyPassword (String JavaDoc password)
408     {
409         mProxyPassword = password;
410     }
411
412     /**
413      * @return Returns the username that will be used to access the URL,
414      * or <code>null</code> if no authorization is required.
415      */

416     public String JavaDoc getUser ()
417     {
418         return (mUser);
419     }
420
421     /**
422      * @param user The user name for accessing the URL.
423      * <em>Note: You must also set the {@link #setPassword password}.</em>
424      */

425     public void setUser (String JavaDoc user)
426     {
427         mUser = user;
428     }
429
430     /**
431      * @return Returns the URL password.
432      */

433     public String JavaDoc getPassword ()
434     {
435         return (mPassword);
436     }
437
438     /**
439      * @param password The password for the URL.
440      */

441     public void setPassword (String JavaDoc password)
442     {
443         mPassword = password;
444     }
445
446     /**
447      * Predicate to determine if cookie processing is currently enabled.
448      * @return <code>true</code> if cookies are being processed.
449      */

450     public boolean getCookieProcessingEnabled ()
451     {
452         return (null != mCookieJar);
453     }
454
455     /**
456      * Enables and disabled cookie processing.
457      * @param enable if <code>true</code> cookie processing will occur,
458      * else cookie processing will be turned off.
459      */

460     public void setCookieProcessingEnabled (boolean enable)
461     {
462         if (enable)
463             mCookieJar = (null == mCookieJar) ? new Hashtable JavaDoc () : mCookieJar;
464         else
465             mCookieJar = null;
466     }
467
468     /**
469      * Adds a cookie to the cookie jar.
470      * @param cookie The cookie to add.
471      * @param domain The domain to use in case the cookie has no domain attribute.
472      */

473     public void setCookie (Cookie cookie, String JavaDoc domain)
474     {
475         String JavaDoc path;
476         Vector JavaDoc cookies;
477         Cookie probe;
478
479         if (null != cookie.getDomain ())
480             domain = cookie.getDomain ();
481         path = cookie.getPath ();
482         if (null == mCookieJar)
483             mCookieJar = new Hashtable JavaDoc (); // turn on cookie processing
484
cookies = (Vector JavaDoc)mCookieJar.get (domain);
485         if (null != cookies)
486         {
487             for (int j = 0; j < cookies.size (); j++)
488             {
489                 probe = (Cookie)cookies.elementAt (j);
490                 if (probe.getName ().equalsIgnoreCase (cookie.getName ()))
491                 {
492                     // we keep paths sorted most specific to least
493
if (probe.getPath ().equals (path))
494                     {
495                         cookies.setElementAt (cookie, j); // replace
496
break;
497                     }
498                     else if (path.startsWith (probe.getPath ()))
499                     {
500                         cookies.insertElementAt (cookie, j);
501                         break;
502                     }
503                 }
504             }
505         }
506         else
507         { // new cookie list needed
508
cookies = new Vector JavaDoc ();
509             cookies.addElement (cookie);
510             mCookieJar.put (domain, cookies);
511         }
512         
513     }
514
515     /**
516      * @return Returns the monitor, or null if none has been assigned.
517      */

518     public ConnectionMonitor getMonitor ()
519     {
520         return mMonitor;
521     }
522
523     /**
524      * @param monitor The monitor to set.
525      */

526     public void setMonitor (ConnectionMonitor monitor)
527     {
528         mMonitor = monitor;
529     }
530
531     /**
532      * Opens a connection using the given url.
533      * @param url The url to open.
534      * @exception ParserException if an i/o exception occurs accessing the url.
535      */

536     public URLConnection JavaDoc openConnection (URL JavaDoc url)
537         throws
538             ParserException
539     {
540         Properties JavaDoc sysprops;
541         Hashtable JavaDoc properties;
542         Enumeration JavaDoc enumeration;
543         String JavaDoc key;
544         String JavaDoc value;
545         String JavaDoc set = null; // old proxySet value
546
String JavaDoc host = null; // old proxyHost value
547
String JavaDoc port = null; // old proxyPort value
548
String JavaDoc host2 = null; // old http.proxyHost value
549
String JavaDoc port2 = null; // old http.proxyPort value
550
HttpURLConnection JavaDoc http;
551         URLConnection JavaDoc ret;
552
553         try
554         {
555             try
556             {
557                 // set up for proxy
558
if ((null != getProxyHost ()) && (0 != getProxyPort ()))
559                 {
560                     sysprops = System.getProperties ();
561                     set = (String JavaDoc)sysprops.put ("proxySet", "true");
562                     host = (String JavaDoc)sysprops.put ("proxyHost", getProxyHost ());
563                     port = (String JavaDoc)sysprops.put ("proxyPort", Integer.toString (getProxyPort ()));
564                     // see http://java.sun.com/j2se/1.4.2/docs/guide/net/properties.html
565
host2 = (String JavaDoc)sysprops.put ("http.proxyHost", getProxyHost ());
566                     port2 = (String JavaDoc)sysprops.put ("http.proxyPort", Integer.toString (getProxyPort ()));
567                     System.setProperties (sysprops);
568                     
569                 }
570     
571                 // open the connection... but don't connect yet
572
ret = url.openConnection ();
573                 if (ret instanceof HttpURLConnection JavaDoc)
574                 {
575                     http = (HttpURLConnection JavaDoc)ret;
576                     
577                     // set the fixed request properties
578
properties = getRequestProperties ();
579                     if (null != properties)
580                         for (enumeration = properties.keys (); enumeration.hasMoreElements ();)
581                         {
582                             key = (String JavaDoc)enumeration.nextElement ();
583                             value = (String JavaDoc)properties.get (key);
584                             ret.setRequestProperty (key, value);
585                         }
586     
587                     // set the proxy name and password
588
if ((null != getProxyUser ()) && (null != getProxyPassword ()))
589                     {
590                         String JavaDoc authorization = getProxyUser () + ":" + getProxyPassword ();
591                         String JavaDoc encodedauthorization = encode (authorization.getBytes("ISO-8859-1"));
592                         ret.setRequestProperty ("Proxy-Authorization", encodedauthorization);
593                     }
594                     
595                     // set the URL name and password
596
if ((null != getUser ()) && (null != getPassword ()))
597                     {
598                         String JavaDoc authorization = getUser () + ":" + getPassword ();
599                         String JavaDoc encodedauthorization = encode (authorization.getBytes("ISO-8859-1"));
600                         ret.setRequestProperty ("Authorization", "Basic " + encodedauthorization);
601                     }
602     
603                     // set the cookies based on the url
604
addCookies (ret);
605
606                     if (null != getMonitor ())
607                         getMonitor ().preConnect (http);
608                 }
609                 else
610                     http = null;
611
612                 try
613                 {
614                     ret.connect ();
615                     
616                     if (null != http)
617                     {
618                         if (null != getMonitor ())
619                             getMonitor ().postConnect (http);
620     
621                         parseCookies (ret);
622                     }
623                 }
624                 catch (UnknownHostException JavaDoc uhe)
625                 {
626                     int message = (int)(Math.random () * mFourOhFour.length);
627                     throw new ParserException (mFourOhFour[message], uhe);
628                 }
629                 catch (IOException JavaDoc ioe)
630                 {
631                     throw new ParserException (ioe.getMessage (), ioe);
632                 }
633             }
634             finally
635             {
636                 if ((null != getProxyHost ()) && (0 != getProxyPort ()))
637                 {
638                     sysprops = System.getProperties ();
639                     if (null != set)
640                         sysprops.put ("proxySet", set);
641                     else
642                         sysprops.remove ("proxySet");
643                     if (null != host)
644                         sysprops.put ("proxyHost", host);
645                     else
646                         sysprops.remove ("proxyHost");
647                     if (null != port)
648                         sysprops.put ("proxyPort", port);
649                     else
650                         sysprops.remove ("proxyPort");
651                     if (null != host2)
652                         sysprops.put ("http.proxyHost", host2);
653                     else
654                         sysprops.remove ("http.proxyHost");
655                     if (null != port2)
656                         sysprops.put ("http.proxyPort", port2);
657                     else
658                         sysprops.remove ("http.proxyPort");
659                     System.setProperties (sysprops);
660                 }
661             }
662         }
663         catch (IOException JavaDoc ioe)
664         {
665             String JavaDoc msg = "HTMLParser.openConnection() : Error in opening a connection to " + url.toExternalForm ();
666             ParserException ex = new ParserException (msg, ioe);
667             throw ex;
668         }
669
670         return (ret);
671     }
672
673     /**
674      * Encodes a byte array into BASE64 in accordance with <a HREF="http://www.faqs.org/rfcs/rfc2045.html">RFC 2045</a>.
675      * @param array The bytes to convert.
676      * @return A BASE64 encoded string.
677      */

678     public final static String JavaDoc encode (byte[] array)
679     {
680         int last; // last byte
681
int count; // character count
682
int separators; // line separator count
683
int length; // length of returned string
684
char[] encoded; // encoded characters
685
int left; // bytes left
686
int end;
687         int block; // encoding buffer
688
int r; // shift count
689
int n; // byte to encode
690
int index; // index into output array
691
String JavaDoc ret;
692
693         if ((null != array) && (0 != array.length))
694         {
695             last = array.length - 1;
696             count = (last / 3 + 1) << 2;
697             separators = (count - 1) / 76;
698             length = count + separators;
699             encoded = new char[length];
700             index = 0;
701             separators = 0;
702             for (int i = 0; i <= last; i += 3)
703             {
704                 left = last - i;
705                 end = (left > 1 ? 2 : left);
706     
707                 // collect 1 to 3 bytes to encode
708
block = 0;
709                 r = 16;
710                 for (int j = 0; j <= end; j++)
711                 {
712                     n = array[i + j];
713                     block += (n < 0 ? n + 256 : n) << r;
714                     r -= 8;
715                 }
716     
717                 // encode into 2-4 chars padding with '=' if no data left
718
encoded[index++] = mCharacterTable[(block >>> 18) & 0x3f];
719                 encoded[index++] = mCharacterTable[(block >>> 12) & 0x3f];
720                 encoded[index++] = left > 0 ? mCharacterTable[(block >>> 6) & 0x3f] : '=';
721                 encoded[index++] = left > 1 ? mCharacterTable[block & 0x3f] : '=';
722     
723                 if ((0 == (index - separators) % 76) && (index < length))
724                 {
725                     encoded[index++] = '\n';
726                     separators += 1;
727                 }
728             }
729             ret = new String JavaDoc (encoded);
730         }
731         else
732             ret = "";
733
734         return (ret);
735     }
736
737     /**
738      * Turn spaces into %20.
739      * ToDo: make this more generic (see RFE #1010593 provide URL encoding/decoding utilities).
740      * @param url The url containing spaces.
741      * @return The URL with spaces as %20 sequences.
742      */

743     public String JavaDoc fixSpaces (String JavaDoc url)
744     {
745         int index;
746         int length;
747         char ch;
748         StringBuffer JavaDoc buffer;
749
750         index = url.indexOf (' ');
751         if (-1 != index)
752         {
753             length = url.length ();
754             buffer = new StringBuffer JavaDoc (length * 3);
755             buffer.append (url.substring (0, index));
756             for (int i = index; i < length; i++)
757             {
758                 ch = url.charAt (i);
759                 if (ch==' ')
760                     buffer.append ("%20");
761                 else
762                     buffer.append (ch);
763             }
764             url = buffer.toString ();
765         }
766
767         return (url);
768     }
769
770     /**
771      * Opens a connection based on a given string.
772      * The string is either a file, in which case <code>file://localhost</code>
773      * is prepended to a canonical path derived from the string, or a url that
774      * begins with one of the known protocol strings, i.e. <code>http://</code>.
775      * Embedded spaces are silently converted to %20 sequences.
776      * @param string The name of a file or a url.
777      * @exception ParserException if the string is not a valid url or file.
778      */

779     public URLConnection JavaDoc openConnection (String JavaDoc string)
780         throws
781             ParserException
782     {
783         final String JavaDoc prefix = "file://localhost";
784         String JavaDoc resource;
785         URL JavaDoc url;
786         StringBuffer JavaDoc buffer;
787         URLConnection JavaDoc ret;
788
789         try
790         {
791             url = new URL JavaDoc (fixSpaces (string));
792             ret = openConnection (url);
793         }
794         catch (MalformedURLException JavaDoc murle)
795         { // try it as a file
796
try
797             {
798                 File JavaDoc file = new File JavaDoc (string);
799                 resource = file.getCanonicalPath ();
800                 buffer = new StringBuffer JavaDoc (prefix.length () + resource.length ());
801                 buffer.append (prefix);
802                 if (!resource.startsWith ("/"))
803                     buffer.append ("/");
804                 buffer.append (resource);
805                 url = new URL JavaDoc (fixSpaces (buffer.toString ()));
806                 ret = openConnection (url);
807             }
808             catch (MalformedURLException JavaDoc murle2)
809             {
810                 String JavaDoc msg = "HTMLParser.openConnection() : Error in opening a connection to " + string;
811                 ParserException ex = new ParserException (msg, murle2);
812                 throw ex;
813             }
814             catch (IOException JavaDoc ioe)
815             {
816                 String JavaDoc msg = "HTMLParser.openConnection() : Error in opening a connection to " + string;
817                 ParserException ex = new ParserException (msg, ioe);
818                 throw ex;
819             }
820         }
821
822         return (ret);
823     }
824
825     /**
826      * Generate a HTTP cookie header value string from the cookie jar.
827      * <pre>
828      * The syntax for the header is:
829      *
830      * cookie = "Cookie:" cookie-version
831      * 1*((";" | ",") cookie-value)
832      * cookie-value = NAME "=" VALUE [";" path] [";" domain]
833      * cookie-version = "$Version" "=" value
834      * NAME = attr
835      * VALUE = value
836      * path = "$Path" "=" value
837      * domain = "$Domain" "=" value
838      *
839      * </pre>
840      * @param connection The connection being accessed.
841      * @see <a HREF="http://www.ietf.org/rfc/rfc2109.txt">RFC 2109</a>
842      * @see <a HREF="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
843      */

844     public void addCookies (URLConnection JavaDoc connection)
845     {
846         Vector JavaDoc list;
847         URL JavaDoc url;
848         String JavaDoc host;
849         String JavaDoc path;
850         String JavaDoc domain;
851
852         if (null != mCookieJar)
853         {
854             list = null;
855             // get the site from the URL
856
url = connection.getURL ();
857             host = url.getHost ();
858             path = url.getPath ();
859             if (0 == path.length ())
860                 path = "/";
861             if (null != host)
862             { // http://www.objectsdevelopment.com/portal/modules/freecontent/content/javawebserver.html
863
list = addCookies ((Vector JavaDoc)mCookieJar.get (host), path, list);
864                 domain = getDomain (host);
865                 if (null != domain)
866                     list = addCookies ((Vector JavaDoc)mCookieJar.get (domain), path, list);
867                 else
868                     // maybe it is the domain we're accessing
869
list = addCookies ((Vector JavaDoc)mCookieJar.get ("." + host), path, list);
870             }
871             if (null != list)
872                 connection.setRequestProperty ("Cookie", generateCookieProperty (list));
873         }
874     }
875
876     /**
877      * Add qualified cookies from cookies into list.
878      * @param cookies The list of cookies to check (may be null).
879      * @param path The path being accessed.
880      * @param list The list of qualified cookies.
881      * @return The list of qualified cookies.
882      */

883     protected Vector JavaDoc addCookies (Vector JavaDoc cookies, String JavaDoc path, Vector JavaDoc list)
884     {
885         Cookie cookie;
886         Date JavaDoc expires;
887         Date JavaDoc now;
888
889         if (null != cookies)
890         {
891             now = new Date JavaDoc ();
892             for (int i = 0; i < cookies.size (); i++)
893             {
894                 cookie = (Cookie)cookies.elementAt (i);
895                 expires = cookie.getExpiryDate ();
896                 if ((null != expires) && expires.before (now))
897                 {
898                     cookies.remove (i);
899                     i--; // dick with the loop variable
900
}
901                 else
902                     if (path.startsWith (cookie.getPath ()))
903                     {
904                         if (null == list)
905                             list = new Vector JavaDoc ();
906                         list.addElement (cookie);
907                     }
908             }
909         }
910         
911         return (list);
912     }
913
914     /**
915      * Get the domain from a host.
916      * @param host The supposed host name.
917      * @return The domain (with the leading dot),
918      * or null if the domain cannot be determined.
919      */

920     protected String JavaDoc getDomain (String JavaDoc host)
921     {
922         StringTokenizer JavaDoc tokenizer;
923         int count;
924         String JavaDoc server;
925         int length;
926         boolean ok;
927         char c;
928         String JavaDoc ret;
929         
930         ret = null;
931         
932         tokenizer = new StringTokenizer JavaDoc (host, ".");
933         count = tokenizer.countTokens ();
934         if (3 <= count)
935         {
936             // have at least two dots,
937
// check if we were handed an IP address by mistake
938
length = host.length ();
939             ok = false;
940             for (int i = 0; i < length && !ok; i++)
941             {
942                 c = host.charAt (i);
943                 if (!(Character.isDigit (c) || (c == '.')))
944                     ok = true;
945             }
946             if (ok)
947             {
948                 // so take everything after the first token
949
server = tokenizer.nextToken ();
950                 length = server.length ();
951                 ret = host.substring (length);
952             }
953         }
954
955         return (ret);
956     }
957
958     /**
959      * Creates the cookie request property value from the list of valid cookies for the domain.
960      * @param cookies The list of valid cookies to be encoded in the request.
961      * @return A string suitable for inclusion as the value of the "Cookie:" request property.
962      */

963     protected String JavaDoc generateCookieProperty (Vector JavaDoc cookies)
964     {
965         int version;
966         Cookie cookie;
967         StringBuffer JavaDoc buffer;
968         String JavaDoc ret;
969         
970         ret = null;
971
972         buffer = new StringBuffer JavaDoc ();
973         version = 0;
974         for (int i = 0; i < cookies.size (); i++)
975             version = Math.max (version, ((Cookie)cookies.elementAt (i)).getVersion ());
976         if (0 != version)
977         {
978             buffer.append ("$Version=\"");
979             buffer.append (version);
980             buffer.append ("\"");
981         }
982         for (int i = 0; i < cookies.size (); i++)
983         {
984             cookie = (Cookie)cookies.elementAt (i);
985             if (0 != buffer.length ())
986                 buffer.append ("; ");
987             buffer.append (cookie.getName ());
988             buffer.append ("=");
989             if (0 != version)
990                 buffer.append ("\"");
991             buffer.append (cookie.getValue ());
992             if (0 != version)
993                 buffer.append ("\"");
994             if (0 != version)
995             {
996                 if ((null != cookie.getPath ())
997                     && (0 != cookie.getPath ().length ()))
998                 {
999                     buffer.append ("; $Path=\"");
1000                    buffer.append (cookie.getPath ());
1001                    buffer.append ("\"");
1002                }
1003                if ((null != cookie.getDomain ())
1004                    && (0 != cookie.getDomain ().length ()))
1005                {
1006                    buffer.append ("; $Domain=\"");
1007                    buffer.append (cookie.getDomain ());
1008                    buffer.append ("\"");
1009                }
1010            }
1011        }
1012        if (0 != buffer.length ())
1013            ret = buffer.toString ();
1014
1015        return (ret);
1016    }
1017    
1018    /**
1019     * Check for cookie and parse into cookie jar.
1020     */

1021    public void parseCookies (URLConnection JavaDoc connection)
1022    {
1023        String JavaDoc string;
1024        Vector JavaDoc cookies;
1025        StringTokenizer JavaDoc tokenizer;
1026        String JavaDoc token;
1027        int index;
1028        String JavaDoc name;
1029        String JavaDoc key;
1030        String JavaDoc value;
1031        Cookie cookie;
1032        
1033        string = connection.getHeaderField ("Set-Cookie");
1034        if (null != string)
1035        {
1036// set-cookie = "Set-Cookie:" cookies
1037
// cookies = 1#cookie
1038
// cookie = NAME "=" VALUE *(";" cookie-av)
1039
// NAME = attr
1040
// VALUE = value
1041
// cookie-av = "Comment" "=" value
1042
// | "Domain" "=" value
1043
// | "Max-Age" "=" value
1044
// | "Path" "=" value
1045
// | "Secure"
1046
// | "Version" "=" 1*DIGIT
1047
cookies = new Vector JavaDoc ();
1048            tokenizer = new StringTokenizer JavaDoc (string, ";,", true);
1049            cookie = null;
1050            while (tokenizer.hasMoreTokens ())
1051            {
1052                token = tokenizer.nextToken ().trim ();
1053                if (token.equals (";"))
1054                    continue;
1055                else if (token.equals (","))
1056                {
1057                    cookie = null;
1058                    continue;
1059                }
1060                    
1061                index = token.indexOf ('=');
1062                if (-1 == index)
1063                {
1064                    name = token;
1065                    value = null;
1066                    if (null == cookie)
1067                        throw new IllegalStateException JavaDoc ("no cookie value");
1068                    key = name.toLowerCase ();
1069                }
1070                else
1071                {
1072                    name = token.substring (0, index);
1073                    value = token.substring (index + 1);
1074                    key = name.toLowerCase ();
1075                }
1076
1077                if (null == cookie)
1078                {
1079                    cookie = new Cookie (name, value);
1080                    cookies.addElement (cookie);
1081                }
1082                else
1083                {
1084                    if (key.equals ("expires")) // Wdy, DD-Mon-YY HH:MM:SS GMT
1085
{
1086                        String JavaDoc comma = tokenizer.nextToken ();
1087                        String JavaDoc rest = tokenizer.nextToken ();
1088                        SimpleDateFormat JavaDoc format = new SimpleDateFormat JavaDoc ("EEE, dd-MMM-yy kk:mm:ss z");
1089                        try
1090                        {
1091                            Date JavaDoc date = format.parse (value + comma + rest);
1092                            cookie.setExpiryDate (date);
1093                        }
1094                        catch (ParseException JavaDoc pe)
1095                        {
1096                            // ok now what
1097
cookie.setExpiryDate (null);
1098                        }
1099                    }
1100                    else
1101                        if (key.equals ("domain"))
1102                            cookie.setDomain (value);
1103                        else
1104                            if (key.equals ("path"))
1105                                cookie.setPath (value);
1106                            else
1107                                if (key.equals ("secure"))
1108                                    cookie.setSecure (true);
1109                                else
1110                                    if (key.equals ("comment"))
1111                                        cookie.setComment (value);
1112                                    else
1113                                        if (key.equals ("version"))
1114                                            cookie.setVersion (Integer.parseInt (value));
1115                                        else
1116                                            if (key.equals ("max-age"))
1117                                            {
1118                                                Date JavaDoc date = new Date JavaDoc ();
1119                                                long then = date.getTime () + Integer.parseInt (value) * 1000;
1120                                                date.setTime (then);
1121                                                cookie.setExpiryDate (date);
1122                                            }
1123                                            else
1124                                            { // error,? unknown attribute,
1125
// maybe just another cookie not separated by a comma
1126
cookie = new Cookie (name, value);
1127                                                cookies.addElement (cookie);
1128                                            }
1129                }
1130           }
1131           if (0 != cookies.size ())
1132               saveCookies (cookies, connection);
1133        }
1134    }
1135
1136    protected void saveCookies (Vector JavaDoc list, URLConnection JavaDoc connection)
1137    {
1138        Cookie cookie;
1139        String JavaDoc domain;
1140
1141        for (int i = 0; i < list.size (); i++)
1142        {
1143            cookie = (Cookie)list.elementAt (i);
1144            domain = cookie.getDomain ();
1145            if (null == domain)
1146                domain = connection.getURL ().getHost ();
1147            setCookie (cookie, domain);
1148        }
1149    }
1150}
1151
1152
Popular Tags