KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > jetspeed > services > search > handlers > URLToDocHandler


1 /*
2  * Copyright 2000-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.jetspeed.services.search.handlers;
17
18 // Java imports
19
import java.io.IOException JavaDoc;
20 import java.net.URL JavaDoc;
21
22 // Commons HTTPClient
23
import org.apache.commons.httpclient.HttpClient;
24 import org.apache.commons.httpclient.HttpException;
25 import org.apache.commons.httpclient.methods.GetMethod;
26
27 // Jetspeed imports
28
import org.apache.jetspeed.services.logging.JetspeedLogFactoryService;
29 import org.apache.jetspeed.services.logging.JetspeedLogger;
30 import org.apache.jetspeed.services.search.AbstractObjectHandler;
31 import org.apache.jetspeed.services.search.BaseParsedObject;
32 import org.apache.jetspeed.services.search.ParsedObject;
33
34 /**
35  * This object handler deals with URLs.
36  *
37  * @author <a HREF="mailto:morciuch@apache.org">Mark Orciuch</a>
38  * @version $Id: URLToDocHandler.java,v 1.4 2004/02/23 03:47:46 jford Exp $
39  */

40 public class URLToDocHandler extends AbstractObjectHandler
41 {
42     /**
43      * Static initialization of the logger for this class
44      */

45     private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(URLToDocHandler.class.getName());
46     
47     /**
48      * Parses a specific object into a document suitable for index placement
49      *
50      * @param o
51      * @return
52      */

53     public ParsedObject parseObject(Object JavaDoc o)
54     {
55         ParsedObject result = new BaseParsedObject();
56
57         if ((o instanceof URL JavaDoc) == false)
58         {
59             logger.error("URLToDocHandler: invalid object type: " + o);
60             return null;
61         }
62
63         URL JavaDoc pageToAdd = (URL JavaDoc) o;
64
65         HttpClient client = new HttpClient();
66         client.startSession(pageToAdd);
67         GetMethod method = new GetMethod(pageToAdd.getPath());
68         method.setFollowRedirects(true);
69         int statusCode = -1;
70         int attempt = 0;
71
72         // We will retry up to 3 times.
73
while (statusCode == -1 && attempt < 3)
74         {
75             try
76             {
77                 // execute the method.
78
client.executeMethod(method);
79                 statusCode = method.getStatusCode();
80                 if (logger.isDebugEnabled())
81                 {
82                     logger.debug("URL = " + pageToAdd.toString() + "Status code = " + statusCode);
83                 }
84             }
85             catch (HttpException e)
86             {
87                 // We will retry
88
}
89             catch (IOException JavaDoc e)
90             {
91                 return null;
92             }
93         }
94         // Check that we didn't run out of retries.
95
if (statusCode != -1)
96         {
97             String JavaDoc content = null;
98             try
99             {
100                 content = method.getDataAsString();
101             }
102             catch (IOException JavaDoc ioe)
103             {
104                 logger.error("Getting content for " + pageToAdd.toString(), ioe);
105             }
106
107             if (content != null)
108             {
109                 try
110                 {
111                     result.setKey(java.net.URLEncoder.encode(pageToAdd.toString()));
112                     result.setType(ParsedObject.OBJECT_TYPE_URL);
113                     // TODO: We should extract the <title> tag here.
114
result.setTitle(pageToAdd.toString());
115                     result.setContent(content);
116                     result.setDescription("");
117                     result.setLanguage("");
118                     result.setURL(pageToAdd);
119                     result.setClassName(o.getClass().getName());
120                     logger.info("Parsed '" + pageToAdd.toString() + "'");
121                 }
122                 catch (Exception JavaDoc e)
123                 {
124                     e.printStackTrace();
125                     logger.error("Adding document to index", e);
126                 }
127             }
128         }
129         try
130         {
131             client.endSession();
132         }
133         catch (IOException JavaDoc ioe)
134         {
135             ioe.printStackTrace();
136             logger.error("Ending session to " + pageToAdd.toString(), ioe);
137         }
138
139         return result;
140
141     }
142 }
143
144
Popular Tags