1 26 27 29 package de.nava.informa.utils; 30 31 import java.io.BufferedInputStream ; 32 import java.io.InputStream ; 33 import java.io.IOException ; 34 import java.net.URL ; 35 36 import org.apache.commons.logging.Log; 37 import org.apache.commons.logging.LogFactory; 38 39 import de.nava.informa.core.ChannelFormat; 40 import de.nava.informa.core.UnsupportedFormatException; 41 42 48 public final class FormatDetector { 49 50 private static Log logger = LogFactory.getLog(FormatDetector.class); 51 52 private static final int NR_FIRST_BYTES = 2048; 53 54 55 69 public static ChannelFormat getFormat(URL url) 70 throws IOException , UnsupportedFormatException { 71 72 logger.info("Trying to retrieve stream from " + url); 73 BufferedInputStream in = new BufferedInputStream (url.openStream(), 74 NR_FIRST_BYTES); 75 return getFormat(in); 76 } 77 78 92 public static ChannelFormat getFormat(InputStream in) 93 throws IOException , UnsupportedFormatException { 94 95 byte[] b = new byte[NR_FIRST_BYTES]; 96 97 int bytesRead = 0; 98 while (bytesRead < NR_FIRST_BYTES) { 99 int bytes = in.read(b, bytesRead, NR_FIRST_BYTES - bytesRead); 100 if (bytes == -1) break; 101 bytesRead += bytes; 102 } 103 104 String rootElement = getRootElement(b); 105 logger.debug("Detected [" + rootElement + "]."); 106 if (rootElement.startsWith("rss")) { 107 if (rootElement.indexOf("0.91") > 0) { 108 logger.info("Channel uses RSS root element (Version 0.91)."); 109 return ChannelFormat.RSS_0_91; 110 } else if (rootElement.indexOf("0.92") > 0) { 111 logger.info("Channel uses RSS root element (Version 0.92)."); 112 return ChannelFormat.RSS_0_92; 115 } else if (rootElement.indexOf("0.93") > 0) { 116 logger.info("Channel uses RSS root element (Version 0.93)."); 117 logger.warn("RSS 0.93 not fully supported yet, fall back to 0.92."); 118 return ChannelFormat.RSS_0_92; 121 } else if (rootElement.indexOf("0.94") > 0) { 122 logger.info("Channel uses RSS root element (Version 0.94)."); 123 logger.warn("RSS 0.94 not fully supported yet, fall back to 0.92."); 124 return ChannelFormat.RSS_0_92; 127 } else if (rootElement.indexOf("2.0") > 0) { 128 logger.info("Channel uses RSS root element (Version 2.0)."); 129 return ChannelFormat.RSS_2_0; 130 } else { 131 throw new UnsupportedFormatException("Unsupported RSS version [" + 132 rootElement + "]."); 133 } 134 } else if (rootElement.indexOf("rdf") >= 0) { 135 logger.info("Channel uses RDF root element."); 136 return ChannelFormat.RSS_1_0; 137 } else { 138 throw new UnsupportedFormatException("Not able to parse document " + 139 "with root element [" + 140 rootElement + "]."); 141 } 142 } 143 144 148 private static final String getRootElement(byte[] b) { 149 String s = new String (b); 150 int startPos = 0; 151 int endPos = 0; 152 boolean inComment = false; 153 for (int i = 0; i < s.length(); i++) { 154 if (s.charAt(i) == '<' && Character.isLetter(s.charAt(i+1)) 155 && !inComment) { 156 startPos = i + 1; 157 for (int j = i + 1; j < s.length(); j++) { 158 if (s.charAt(j) == '>') { 159 endPos = j; 160 break; 161 } 162 } 163 break; 164 } 165 else if (!inComment && s.charAt(i) == '<' && s.charAt(i+1) == '!' 166 && s.charAt(i+2) == '-' && s.charAt(i+3) == '-') 167 inComment = true; 168 else if (inComment && s.charAt(i) == '-' && s.charAt(i+1) == '-' 169 && s.charAt(i+2) == '>') 170 inComment = false; 171 } if (startPos >= 0 && endPos >= 0 && endPos > startPos) { 173 return s.substring(startPos, endPos); 174 } else { 175 throw new IllegalArgumentException ("Unable to retrieve root " + 176 "element from " + s); 177 } 178 } 179 180 } 181 | Popular Tags |