Page


1   /*
2    * WebSphinx web-crawling toolkit
3    *
4    * Copyright (c) 1998-2002 Carnegie Mellon University.  All rights
5    * reserved.
6    *
7    * Redistribution and use in source and binary forms, with or without
8    * modification, are permitted provided that the following conditions
9    * are met:
10   *
11   * 1. Redistributions of source code must retain the above copyright
12   *    notice, this list of conditions and the following disclaimer.
13   *
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions and the following disclaimer in
16   *    the documentation and/or other materials provided with the
17   *    distribution.
18   *
19   * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20   * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21   * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22   * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23   * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30   *
31   */
32  
33  package websphinx;
34  
35  import java.net.URL  ;
36  import java.net.URLConnection  ;
37  //#ifdef JDK1.1 
38  import java.net.HttpURLConnection  ;
39  //#endif JDK1.1
40  import java.io.IOException  ;
41  import java.io.InputStream  ;
42  import rcm.util.Str;
43  
44  /**
45   * A Web page.  Although a Page can represent any MIME type, it mainly
46   * supports HTML pages, which are automatically parsed.  The parsing produces
47   * a list of tags, a list of words, an HTML parse tree, and a list of links.
48   */
49  public class Page extends Region {
50  
51      // typical page length, to optimize downloads
52      static final int TYPICAL_LENGTH = 20240;
53  
54      // Permanent content
55      Link origin;
56      long lastModified = 0;
57      long expiration = 0;
58      String   contentType;
59      String   contentEncoding;
60      int responseCode = -1;
61      String   responseMessage = null;
62      URL   base;
63      String   title;
64      Link[] links;
65  
66      int contentLock; 
67          // If page was downloaded from Net, represents number of 
68          //    callers who want to keep the content.
69          // If page was created from a string, set to -1. 
70  
71      // Discardable content (thrown away when contentLock falls to 0)
72      byte[] contentBytes;
73      String   content;
74      Region[] tokens;
75      Text[] words;
76      Tag[] tags;
77      Element[] elements;
78      Element root;
79      String   canonicalTags;
80  
81      /**
82       * Make a Page by downloading and parsing a Link.
83       * @param link Link to download
84       */
85      public Page (Link link) throws IOException   {
86          this (link, DownloadParameters.NO_LIMITS, new HTMLParser ());
87      }
88  
89      /**
90       * Make a Page by downloading a Link.
91       * @param link Link to download
92       * @param dp Download parameters to use
93       */
94      public Page (Link link, DownloadParameters dp) throws IOException   {
95          this (link, dp, new HTMLParser ());
96      }
97  
98      /**
99       * Make a Page by downloading a Link.
100      * @param link Link to download
101      * @param parser HTML parser to use
102      */
103     public Page (Link link, DownloadParameters dp, HTMLParser parser) throws IOException   {
104         super (null, 0, 0);
105         source = this;
106         origin = link;
107         base = getURL ();
108         download (dp, parser);
109         link.setPage (this);
110     }
111 
112     /**
113      * Make a Page from a URL and a string of HTML.
114      * The created page has no originating link, so calls to getURL(), getProtocol(), etc. will fail.
115      * @param url URL to use as a base for relative links on the page
116      * @param html the HTML content of the page
117      */
118     public Page (URL   url, String   html) {
119         this (url, html, new HTMLParser ());
120     }
121 
122     /**
123      * Make a Page from a URL and a string of HTML.
124      * The created page has no originating link, so calls to getURL(), getProtocol(), etc. will fail.
125      * @param url URL to use as a base for relative links on the page
126      * @param html the HTML content of the page
127      * @param parser HTML parser to use
128      */
129     public Page (URL   url, String   html, HTMLParser parser) {
130         super (null, 0, html.length());
131         source = this;
132         base = url;
133         this.content = html;
134         this.contentBytes = html.getBytes ();
135         contentLock = -1;
136         parse (parser);
137     }
138 
139     /**
140      * Make a Page from a string of content.  The content is not parsed. 
141      * The created page has no originating link, so calls to getURL(), getProtocol(), etc. will fail.
142      * @param content HTML content of the page */
143     public Page (String   content) {
144         super (null, 0, content.length());
145         // FIX: don't think base==null will work
146         source = this;
147         this.content = content;
148         this.contentBytes = content.getBytes ();
149         contentLock = -1;
150     }
151 
152     /**
153      * Make a Page from a byte array of content.  The content is not parsed. 
154      * The created page has no originating link, so calls to getURL(), getProtocol(), etc. will fail.
155      * @param content byte content of the page */
156     public Page (byte[] content) {
157         super (null, 0, content.length);
158         // FIX: don't think base==null will work
159         source = this;
160         this.contentBytes = new byte[content.length];
161         System.arraycopy (content, 0, this.contentBytes, 0, content.length);
162         this.content = new String   (content);
163         contentLock = -1;
164     }
165 
166     //
167     // Downloading
168     //
169 
170     // This code generates SecurityExceptions in Netscape 4.0,
171     // and it doesn't seem to be necessary anyway: redirects are followed
172     // by Netscape and JDK by default, despite the fact that the JDK
173     // docs claim that setFollowRedirects() defaults to false
174     
175     //static {
176       //try {
177       //  HttpURLConnection.setFollowRedirects (true);
178       //} catch (Throwable t) { }
179     //}
180 
181     /*
182      * Download the page.  The downloaded page is parsed 
183      * if its MIME type is HTML or unspecified.
184      * @param parser HTML parser to use
185      * @exception IOException if an error occurs in downloading the page
186      */
187     public void download (DownloadParameters dp, HTMLParser parser) throws IOException   {
188         URLConnection   conn = 
189             Access.getAccess ().openConnection (origin);
190         
191         // fetch and store final redirected URL and response headers
192         InputStream   in = conn.getInputStream ();
193         base = conn.getURL ();
194         lastModified = conn.getLastModified ();
195         expiration = conn.getExpiration ();
196         contentType = conn.getContentType ();
197         contentEncoding = conn.getContentEncoding ();
198 
199 //#ifdef JDK1.1 
200         // get HTTP response codes
201         if (conn instanceof HttpURLConnection  ) {
202             HttpURLConnection   httpconn = (HttpURLConnection  )conn;
203 
204             responseCode = httpconn.getResponseCode ();
205             responseMessage = httpconn.getResponseMessage ();
206             if (responseMessage == null)
207                 responseMessage = "unknown error";
208             
209             if (responseCode >= 300)
210                 // HTTP failure
211                 throw new IOException   (responseCode + " " + responseMessage); 
212         }
213 //#endif JDK1.1
214 
215 //     System.err.println ("Original URL: " + origin.getURL());
216 //     System.err.println ("Final URL: " + conn.getURL());
217 
218         // download content
219         int maxKB = dp.getMaxPageSize ();
220         int maxBytes = (maxKB > 0) ? maxKB * 1024 : Integer.MAX_VALUE;
221         int expectedLength = conn.getContentLength ();
222         if (expectedLength > maxBytes)
223             throw new IOException   ("Page greater than " 
224                                    + maxBytes + " bytes");
225         if (expectedLength == -1)
226             expectedLength = TYPICAL_LENGTH;
227         byte[] buf = new byte[expectedLength];
228         int n;
229         int total = 0;
230 
231         while ((n = in.read (buf, total, buf.length - total)) != -1) {
232             total += n;
233             if (total > maxBytes)
234                 throw new IOException   ("Page greater than " 
235                                        + maxBytes + " bytes");
236             if (total == buf.length) {
237                 // try to read one more character
238                 int c = in.read ();
239                 if (c == -1)
240                     break; // EOF, we're done
241                 else {
242                     // need more space in array.  Double the array, but don't make
243                     // it bigger than maxBytes.
244                     byte[] newbuf = new byte[Math.min (buf.length * 2, maxBytes)];
245                     System.arraycopy (buf, 0, newbuf, 0, buf.length);
246                     buf = newbuf;
247                     buf[total++] = (byte) c;
248                 }
249             }                    
250         }
251         in.close ();
252         
253         if (total != buf.length) {
254             // resize the array to be precisely total bytes long
255             byte[] newbuf = new byte[total];
256             System.arraycopy (buf, 0, newbuf, 0, total);
257             buf = newbuf;
258         }
259  
260         contentBytes = buf;
261         content = new String   (buf);
262         start = 0;
263         end = total;
264         contentLock = 1;
265 
266         //  parse the response
267         if (contentType == null
268             || contentType.startsWith ("text/html") 
269             || contentType.startsWith ("content/unknown"))
270             parse (parser);
271     }
272 
273     void downloadSafely () {
274       try {
275           download (new DownloadParameters (), new HTMLParser ());
276       } catch (Throwable   e) {
277       }
278     }
279 
280     //
281     // Parsing
282     //
283 
284     /**
285      * Parse the page.  Assumes the page has already been downloaded.
286      * @param parser HTML parser to use
287      * @exception RuntimeException if an error occurs in downloading the page
288      */
289     public void parse (HTMLParser parser) {
290         if (!hasContent())
291             downloadSafely ();
292         try {
293             parser.parse (this);
294         } catch (IOException   e) {
295             throw new RuntimeException   (e.toString());
296         }
297     }
298     
299     /**
300      * Test whether page has been parsed.  Pages are parsed during 
301      * download only if its MIME type is HTML or unspecified.
302      * @return true if page was parsed, false if not
303      */
304     public boolean isParsed () {
305         return tokens != null;
306     }
307 
308     /**
309      * Test whether page is HTML.
310      * @return true if page is HTML.
311      */
312     public boolean isHTML () {
313         return root != null;
314     }
315 
316     /**
317      * Test whether page is a GIF or JPEG image.
318      * @return true if page is a GIF or JPEG image, false if not
319      */
320     public boolean isImage () {
321         byte[] bytes = getContentBytes ();
322         return startsWith (bytes, GIF_MAGIC) || startsWith (bytes, JPG_MAGIC);
323     }
324 
325     private static final byte[] GIF_MAGIC = { 
326         (byte) 'G', (byte)'I', (byte)'F', (byte)'8' 
327     };
328     private static final byte[] JPG_MAGIC = {
329         (byte) 0377, (byte) 0330, (byte) 0377,
330         (byte) 0340, (byte) 0, (byte) 020,
331         (byte) 'J', (byte) 'F', (byte) 'I', (byte) 'F'
332     };
333 
334     private boolean startsWith (byte[] bytes, byte[] prefix) {
335         if (prefix.length > bytes.length)
336             return false;
337         for (int i = 0, n = prefix.length; i < n; ++i)
338             if (bytes[i] != prefix[i])
339                 return false;
340         return true;
341     }
342 
343     //
344     // Content management
345     //
346 
347     /**
348      * Lock the page's content (to prevent it from being discarded).
349      * This method increments a lock counter, representing all the 
350      * callers interested in preserving the content.  The lock
351      * counter is set to 1 when the page is initially downloaded.
352      */
353     public void keepContent () {
354         if (contentLock > 0)
355             ++contentLock;
356     }
357 
358     /**
359      * Unlock the page's content (allowing it to be garbage-collected, to
360      * save space during a Web crawl).  This method decrements a lock counter.
361      * If the counter falls to
362      * 0 (meaning no callers are interested in the content), 
363      * the content is released.  At least the following
364      * fields are discarded: content, tokens, tags, words, elements, and
365      * root.  After the content has been discarded, calling getContent()
366      * (or getTokens(), getTags(), etc.) will force the page to be downloaded
367      * again.  Hopefully the download will come from the cache, however.
368      * <P> Links are not considered part of the content, and are not subject to
369      * discarding by this method.  Also, if the page was created from a string
370      * (rather than by downloading), its content is not subject to discarding 
371      * (since there would be no way to recover it). 
372      */
373     public void discardContent () {
374         if (contentLock == 0)    // already discarded
375             return;
376             
377         if (--contentLock > 0)   // somebody else still has a lock on the content
378             return;
379             
380         if (origin == null)
381             return;     // without an origin, we'd have no way to recover this page
382             
383         //System.err.println ("discarding content of " + toDescription());
384         contentBytes = null;
385         content = null;
386         tokens = null;
387         tags = null;
388         words = null;
389         elements = null;
390         root = null;
391         canonicalTags = null;
392 
393         // keep links, but isolate them from the element tree
394         if (links != null) {
395             for (int i=0; i<links.length; ++i) 
396                 if (links[i] instanceof Link)
397                     ((Link)links[i]).discardContent ();
398         }
399         
400         // FIX: debugging only: disconnect this page from its parent
401         //origin.page = null;
402         //origin = null;
403 
404         contentLock = 0;
405     }
406 
407     /**
408      * Test if page content is available.
409      * @return true if content is downloaded and available, false if content has not been downloaded 
410      * or has been discarded.
411      */
412     public final boolean hasContent () {
413         return contentLock != 0;
414     }
415 
416     //
417     // Page accessors
418     //
419 
420     /**
421      * Get depth of page in crawl.
422      * @return depth of page from root (depth of page is same as depth of its originating link)
423      */
424     public int getDepth () {
425         return origin != null ? origin.getDepth () : 0;
426     }
427     
428     /**
429      * Get the Link that points to this page.
430      * @return the Link object that was used to download this page.
431      */ 
432     public Link getOrigin () {
433         return origin;
434     }
435 
436     /**
437      * Get the base URL, relative to which the page's links were interpreted.
438      * The base URL defaults to the URL of the 
439      * Link that was used to download the page.  If any redirects occur
440      * while downloading the page, the final location becomes the new base
441      * URL.  Lastly, if a <BASE> element is found in the page, that
442      * becomes the new base URL.
443      * @return the page's base URL.
444      */ 
445     public URL   getBase () {
446         return base;
447     }
448 
449     /**
450      * Get the URL.
451      * @return the URL of the link that was used to download this page
452      */ 
453     public URL   getURL () {
454         return origin != null ? origin.getURL() : null;
455     }
456 
457     /**
458      * Get the title of the page.
459      * @return the page's title, or null if the page hasn't been parsed.
460      */
461     public String   getTitle () {
462         return title;
463     }
464 
465     /**
466      * Get the content of the page as a String.  May not work properly for
467      * binary data like images; use getContentBytes instead.
468      * @return the String content of the page.
469      */
470     public String   getContent () {
471         if (!hasContent())
472             downloadSafely ();
473         return content;
474     }
475 
476     /**
477      * Get the content of the page as an array of bytes.
478      * @return the content of the page in binary form.
479      */
480     public byte[] getContentBytes () {
481         if (!hasContent())
482             downloadSafely ();
483         return contentBytes;
484     }
485 
486     /**
487      * Get the token sequence of the page.  Tokens are tags and whitespace-delimited text.
488      * @return token regions in the page, or null if the page hasn't been downloaded or parsed.
489      */
490     public Region[] getTokens() {
491         if (!hasContent ())
492             downloadSafely ();
493         return tokens;
494     }
495 
496     /**
497      * Get the tag sequence of the page.
498      * @return tags in the page, or null if the page hasn't been downloaded or parsed.
499      */
500     public Tag[] getTags () {
501         if (!hasContent ())
502             downloadSafely ();
503         return tags;
504     }
505 
506     /**
507      * Get the words in the page.  Words are whitespace- and tag-delimited text.
508      * @return words in the page, or null if the page hasn't been downloaded or parsed.
509      */
510     public Text[] getWords () {
511         if (!hasContent ())
512             downloadSafely ();
513         return words;
514     }
515 
516     /**
517      * Get the HTML elements in the page.  All elements in the page
518      * are included in the list, in the order they would appear in
519      * an inorder traversal of the HTML parse tree.
520      * @return HTML elements in the page ordered by inorder, or null if the page
521      * hasn't been downloaded or parsed.
522      */
523     public Element[] getElements () {
524         if (!hasContent ())
525             downloadSafely ();
526         return elements;
527     }
528     
529     /**
530      * Get the root HTML element of the page.
531      * @return first top-level HTML element in the page, or null 
532      * if the page hasn't been downloaded or parsed.
533      */
534     public Element getRootElement () {
535         if (!hasContent ())
536             downloadSafely ();
537         return root;
538     }
539 
540     /**
541      * Get the links found in the page.
542      * @return links in the page, or null 
543      * if the page hasn't been downloaded or parsed.
544      */
545     public Link[] getLinks() {
546         return links;
547     }
548 
549     /**
550      * Convert the link's URL to a String
551      * @return the URL represented as a string
552      */
553     public String   toURL () {
554         return origin != null ? origin.toURL () : null;
555     }
556 
557     /**
558      * Generate a human-readable description of the page.
559      * @return a description of the link, in the form "title [url]".
560      */
561     public String   toDescription () {
562         return (title != null && title.length() > 0 ? title + " " : "") + "[" + getURL() + "]";
563     }
564 
565     /**
566      * Get page containing the region.
567      * @return page containing the region
568      */
569     public String   toString () {
570         return getContent ();
571     }
572 
573     /**
574      * Get last-modified date of page.
575      * @return the date when the page was last modified, or 0 if not known. 
576      * The value is number of seconds since January 1, 1970 GMT
577      */
578     public long getLastModified () {
579         return lastModified;
580     }
581     /**
582      * Set last-modified date of page.
583      * @param last the date when the page was last modified, or 0 if not known. 
584      * The value is number of seconds since January 1, 1970 GMT
585      */
586     public void setLastModified (long last) {
587         lastModified = last;
588     }
589 
590     /**
591      * Get expiration date of page.
592      * @return the expiration date of the page, or 0 if not known. 
593      * The value is number of seconds since January 1, 1970 GMT.
594      */
595     public long getExpiration () {
596         return expiration;
597     }
598     /**
599      * Set expiration date of page.
600      * @param expire the expiration date of the page, or 0 if not known. 
601      * The value is number of seconds since January 1, 1970 GMT.
602      */
603     public void setExpiration (long expire) {
604         expiration = expire;
605     }
606 
607     /**
608      * Get MIME type of page.
609      * @return the MIME type of page, such as "text/html", or null if not known. 
610      */
611     public String   getContentType () {
612         return contentType;
613     }
614     /**
615      * Set MIME type of page.
616      * @param type the MIME type of page, such as "text/html", or null if not known. 
617      */
618     public void setContentType (String   type) {
619         contentType = type;
620     }
621 
622     /**
623      * Get content encoding of page.
624      * @return the encoding type of page, such as "base-64", or null if not known. 
625      */
626     public String   getContentEncoding () {
627         return contentEncoding;
628     }
629     /**
630      * Set content encoding of page.
631      * @param encoding the encoding type of page, such as "base-64", or null if not known. 
632      */
633     public void setContentEncoding (String   encoding) {
634         contentEncoding = encoding;
635     }
636 
637     /**
638      * Get response code returned by the Web server.  For list of
639      * possible values, see java.net.HttpURLConnection.
640      * @return response code, such as 200 (for OK) or 404 (not found).
641      * Code is -1 if unknown.
642      * @see java.net.HttpURLConnection
643      */
644     public int getResponseCode () {
645         return responseCode;
646     }
647 
648     /**
649      * Get response message returned by the Web server.
650      * @return response message, such as "OK" or "Not Found".  The response message is null if the page failed to be fetched or not known. 
651      */
652     public String   getResponseMessage () {
653         return responseMessage;
654     }
655 
656     /**
657      * Get raw content found in a region.
658      * @param start starting offset of region
659      * @param end ending offset of region
660      * @return raw HTML contained in the region
661      */
662     public String   substringContent (int start, int end) {
663         return getContent ().substring (start, end);
664     }
665 
666     /**
667      * Get HTML found in a region.
668      * @param start starting offset of region
669      * @param end ending offset of region
670      * @return representation of region as HTML
671      */
672     public String   substringHTML (int start, int end) {
673         String   s = getContent ().substring (start, end);
674         if (!isHTML ()) {
675             s = Str.replace (s, "&", "&amp;");
676             s = Str.replace (s, "<", "&lt;");
677             s = Str.replace (s, ">", "&gt;");
678             s = "<PRE>" + s + "</PRE>";
679         }
680         return s;
681     }
682 
683     /**
684      * Get tagless text found in a region.
685      * Runs of whitespace and tags are reduced to a single space character.
686      * @param start starting offset of region
687      * @param end ending offset of region
688      * @return tagless text contained in the region
689      */
690     public String   substringText (int start, int end) {
691         if (words == null)
692             return ""; // page is not parsed
693 
694         // FIX: find some other mapping
695         StringBuffer   buf = new StringBuffer  ();
696         for (int j = findStart (words, start); j<words.length; ++j) {
697             if (words[j].end > end)
698                 break;
699             else {
700                 if (buf.length() > 0)
701                     buf.append (' ');
702                 buf.append (words[j].text);
703             }
704         }
705         return buf.toString();             
706     }
707 
708     /**
709      * Get HTML tags found in a region.  Whitespace and text among the
710      * tags are deleted.
711      * @param start starting offset of region
712      * @param end ending offset of region
713      * @return tags contained in the region
714      */
715     public String   substringTags (int start, int end) {
716         if (tags == null)
717             return ""; // page is not parsed
718 
719         // FIX: find some other mapping
720         StringBuffer   buf = new StringBuffer  ();
721         for (int j = findStart (tags, start); j<tags.length; ++j) {
722             if (tags[j].end > end)
723                 break;
724             else {
725                 if (buf.length() > 0)
726                     buf.append (' ');
727                 buf.append (getContent ().substring (tags[j].start, tags[j].end));
728             }
729         }
730         return buf.toString();             
731     }
732 
733     /**
734      * Get canonicalized HTML tags found in a region.
735      * A canonicalized tag looks like the following:
736      * <PRE>
737      * &lt;tagname#index attr=value attr=value attr=value ...&gt
738      * <PRE>
739      * where tagname and attr are all lowercase, index is the tag's
740      * index in the page's tokens array.  Attributes are sorted in
741      * increasing order by attribute name. Attributes without values
742      * omit the entire "=value" portion.  Values are delimited by a 
743      * space.  All occurences of &lt, &gt, space, and % characters 
744      * in a value are URL-encoded (e.g., space is converted to %20).  
745      * Thus the only occurences of these characters in the canonical 
746      * tag are the tag delimiters.
747      *
748      * <P>For example, raw HTML that looks like:
749      * <PRE>
750      * &lt;IMG SRC="http://foo.com/map&lt;&gt;.gif" ISMAP&gt;Image&lt;/IMG&gt;
751      * </PRE>
752      * would be canonicalized to:
753      * <PRE>
754      * &lt;img ismap SRC=http://foo.com/map%3C%3E.gif&gt;&lt;/img&gt;
755      * </PRE>
756      * <P>
757      * Comment and declaration tags (whose tag name is !) are omitted
758      * from the canonicalization.
759      *
760      * @param start starting offset of region
761      * @param end ending offset of region
762      * @return canonicalized tags contained in the region
763      */
764     public String   substringCanonicalTags (int start, int end) {
765         if (tokens == null)
766             return ""; // page is not parsed
767 
768         boolean all = (start == this.start && end == this.end);
769 
770         if (all && canonicalTags != null)
771             return canonicalTags;
772 
773         // FIX: find some other mapping
774         StringBuffer   buf = new StringBuffer  ();
775         for (int j = findStart (tokens, start); j<tokens.length; ++j) {
776             if (tokens[j].end > end)
777                 break;
778             else if (tokens[j] instanceof Tag)
779                 Tagexp.canonicalizeTag (buf, (Tag)tokens[j], j);
780         }
781 
782         String   result = buf.toString ();
783         if (all)
784             canonicalTags = result;
785         return result;
786     }
787 
788     public static void main (String  [] args) throws Exception   {
789         int method = Link.GET;
790 
791         for (int i=0; i<args.length; ++i) {
792             if (args[i].equals ("-post"))
793                 method = Link.POST;
794             else if (args[i].equals ("-get"))
795                 method = Link.GET;
796             else {
797                 Link link = method == Link.GET 
798                              ? new Link (args[i]) 
799                              : new Link (args[i]); // FIX: POST?
800                 try {
801                     Page p = new Page (link);
802                     System.out.write (p.getContentBytes ());
803                 } catch (IOException   e) {
804                     System.out.println (e);
805                 }
806             }
807         }
808     }
809 
810 }
811
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags