1 16 package org.apache.jetspeed.services.search.handlers; 17 18 import java.io.IOException ; 20 import java.net.URL ; 21 22 import org.apache.commons.httpclient.HttpClient; 24 import org.apache.commons.httpclient.HttpException; 25 import org.apache.commons.httpclient.methods.GetMethod; 26 27 import org.apache.jetspeed.services.logging.JetspeedLogFactoryService; 29 import org.apache.jetspeed.services.logging.JetspeedLogger; 30 import org.apache.jetspeed.services.search.AbstractObjectHandler; 31 import org.apache.jetspeed.services.search.BaseParsedObject; 32 import org.apache.jetspeed.services.search.ParsedObject; 33 34 40 public class URLToDocHandler extends AbstractObjectHandler 41 { 42 45 private static final JetspeedLogger logger = JetspeedLogFactoryService.getLogger(URLToDocHandler.class.getName()); 46 47 53 public ParsedObject parseObject(Object o) 54 { 55 ParsedObject result = new BaseParsedObject(); 56 57 if ((o instanceof URL ) == false) 58 { 59 logger.error("URLToDocHandler: invalid object type: " + o); 60 return null; 61 } 62 63 URL pageToAdd = (URL ) o; 64 65 HttpClient client = new HttpClient(); 66 client.startSession(pageToAdd); 67 GetMethod method = new GetMethod(pageToAdd.getPath()); 68 method.setFollowRedirects(true); 69 int statusCode = -1; 70 int attempt = 0; 71 72 while (statusCode == -1 && attempt < 3) 74 { 75 try 76 { 77 client.executeMethod(method); 79 statusCode = method.getStatusCode(); 80 if (logger.isDebugEnabled()) 81 { 82 logger.debug("URL = " + pageToAdd.toString() + "Status code = " + statusCode); 83 } 84 } 85 catch (HttpException e) 86 { 87 } 89 catch (IOException e) 90 { 91 return null; 92 } 93 } 94 if (statusCode != -1) 96 { 97 String content = null; 98 try 99 { 100 content = method.getDataAsString(); 101 } 102 catch (IOException ioe) 103 { 104 logger.error("Getting content for " + pageToAdd.toString(), ioe); 105 } 106 107 if (content != null) 108 { 109 try 110 { 111 result.setKey(java.net.URLEncoder.encode(pageToAdd.toString())); 112 result.setType(ParsedObject.OBJECT_TYPE_URL); 113 result.setTitle(pageToAdd.toString()); 115 result.setContent(content); 116 result.setDescription(""); 117 result.setLanguage(""); 118 result.setURL(pageToAdd); 119 result.setClassName(o.getClass().getName()); 120 logger.info("Parsed '" + pageToAdd.toString() + "'"); 121 } 122 catch (Exception e) 123 { 124 e.printStackTrace(); 125 logger.error("Adding document to index", e); 126 } 127 } 128 } 129 try 130 { 131 client.endSession(); 132 } 133 catch (IOException ioe) 134 { 135 ioe.printStackTrace(); 136 logger.error("Ending session to " + pageToAdd.toString(), ioe); 137 } 138 139 return result; 140 141 } 142 } 143 144 | Popular Tags |