1 2 3 4 package net.nutch.parse; 5 6 import net.nutch.protocol.Content; 7 import net.nutch.plugin.*; 8 9 import org.w3c.dom.DocumentFragment ; 10 11 12 public class HtmlParseFilters { 13 14 private static final HtmlParseFilter[] CACHE; 15 static { 16 try { 17 ExtensionPoint point = PluginRepository.getInstance() 18 .getExtensionPoint(HtmlParseFilter.X_POINT_ID); 19 if (point == null) 20 throw new RuntimeException (HtmlParseFilter.X_POINT_ID+" not found."); 21 Extension[] extensions = point.getExtentens(); 22 CACHE = new HtmlParseFilter[extensions.length]; 23 for (int i = 0; i < extensions.length; i++) { 24 Extension extension = extensions[i]; 25 CACHE[i] = (HtmlParseFilter)extension.getExtensionInstance(); 26 } 27 } catch (PluginRuntimeException e) { 28 throw new RuntimeException (e); 29 } 30 } 31 32 private HtmlParseFilters() {} 34 35 public static Parse filter(Content content,Parse parse,DocumentFragment doc) 36 throws ParseException { 37 38 for (int i = 0 ; i < CACHE.length; i++) { 39 parse = CACHE[i].filter(content, parse, doc); 40 } 41 42 return parse; 43 } 44 } 45 | Popular Tags |