KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > javacoding > jspider > core > util > html > RobotsTXTLineSet


1 package net.javacoding.jspider.core.util.html;
2
3 import java.io.*;
4 import java.util.ArrayList JavaDoc;
5
6 /**
7  * $Id: RobotsTXTLineSet.java,v 1.2 2003/02/13 20:12:56 vanrogu Exp $
8  */

9 public class RobotsTXTLineSet {
10
11     public static final String JavaDoc USER_AGENT="user-agent:";
12
13     protected String JavaDoc userAgent;
14     protected RobotsTXTLine[] lines;
15
16     RobotsTXTLineSet(String JavaDoc userAgent, RobotsTXTLine[] lines) {
17         this.userAgent = userAgent;
18         this.lines = lines;
19     }
20
21     public String JavaDoc getUserAgent ( ) {
22         return userAgent;
23     }
24
25     public RobotsTXTLine[] getLines ( ) {
26         return lines;
27     }
28
29     public static RobotsTXTLineSet findLineSet ( InputStream is, String JavaDoc spiderUserAgent) throws IOException {
30         BufferedReader br = new BufferedReader(new InputStreamReader(is));
31         return findLineSet(br, spiderUserAgent);
32     }
33
34     public static RobotsTXTLineSet findLineSet(BufferedReader br, String JavaDoc spiderUserAgent) throws IOException {
35         String JavaDoc userAgent = findUserAgent ( br, spiderUserAgent );
36         if ( userAgent == null ) {
37           return null;
38         } else {
39           RobotsTXTLine[] lines = parseRules ( br );
40           return new RobotsTXTLineSet(userAgent, lines);
41         }
42     }
43
44     private static String JavaDoc findUserAgent ( BufferedReader br, String JavaDoc spiderUserAgent ) throws IOException {
45         if ( spiderUserAgent == null ) {
46             spiderUserAgent = "";
47         }
48         String JavaDoc spiderUserAgentLowerCase = spiderUserAgent.toLowerCase();
49         String JavaDoc line = br.readLine();
50         while (line != null) {
51             line = line.trim();
52             if (line.toLowerCase().startsWith(USER_AGENT)) {
53                 String JavaDoc userAgent = line.substring(USER_AGENT.length() + 1).trim();
54                 if (userAgent.equals("*") || spiderUserAgentLowerCase.indexOf(userAgent.toLowerCase()) > -1) {
55                     return userAgent;
56                 }
57             }
58             line = br.readLine();
59         }
60         return null;
61     }
62
63     private static RobotsTXTLine[] parseRules ( BufferedReader br ) throws IOException {
64         ArrayList JavaDoc al = new ArrayList JavaDoc();
65         String JavaDoc line = br.readLine();
66         while (line != null && (line.toLowerCase().indexOf(USER_AGENT) == -1)) {
67             RobotsTXTLine robotsTXTline = RobotsTXTLine.parse(line);
68             if ( robotsTXTline != null && robotsTXTline.getType() == RobotsTXTLine.ROBOTSTXT_RULE_DISALLOW ) {
69                 al.add ( robotsTXTline );
70             }
71             line = br.readLine();
72         }
73         return (RobotsTXTLine[]) al.toArray(new RobotsTXTLine[al.size()]);
74     }
75
76 }
77
Popular Tags