KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > net > nutch > parse > ParserFactory


1 /* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
2 /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
3
4 package net.nutch.parse;
5
6 import java.util.Hashtable JavaDoc;
7
8 import net.nutch.plugin.*;
9
10 import java.util.logging.Logger JavaDoc;
11 import net.nutch.util.LogFormatter;
12
13 /** Creates and caches {@link Parser} plugins.*/
14 public class ParserFactory {
15
16   public static final Logger JavaDoc LOG = LogFormatter
17     .getLogger(ParserFactory.class.getName());
18
19   private static final ExtensionPoint X_POINT = PluginRepository.getInstance()
20       .getExtensionPoint(Parser.X_POINT_ID);
21
22   static {
23     if (X_POINT == null) {
24       throw new RuntimeException JavaDoc("x point "+Parser.X_POINT_ID+" not found.");
25     }
26   }
27
28   private static final Hashtable JavaDoc CACHE = new Hashtable JavaDoc();
29
30   private ParserFactory() {} // no public ctor
31

32   /** Returns the appropriate {@link Parser} implementation given a content
33    * type and url.
34    *
35    * <p>Parser extensions should define the attributes"contentType" and/or
36    * "pathSuffix". Content type has priority: the first plugin found whose
37    * "contentType" attribute matches the beginning of the content's type is
38    * used. If none match, then the first whose "pathSuffix" attribute matches
39    * the end of the url's path is used. If neither of these match, then the
40    * first plugin whose "pathSuffix" is the empty string is used.
41    */

42   public static Parser getParser(String JavaDoc contentType, String JavaDoc url)
43     throws ParserNotFound {
44
45     try {
46       Extension extension = getExtension(contentType, getSuffix(url));
47       if (extension == null)
48         throw new ParserNotFound(url, contentType);
49
50       return (Parser)extension.getExtensionInstance();
51
52     } catch (PluginRuntimeException e) {
53       throw new ParserNotFound(url, contentType, e.toString());
54     }
55   }
56
57   private static String JavaDoc getSuffix(String JavaDoc url) {
58     int i = url.lastIndexOf('.');
59     int j = url.lastIndexOf('/');
60     if (i == -1 || i == url.length()-1 || i < j)
61       return null;
62     return url.substring(i+1);
63   }
64
65
66   private static Extension getExtension(String JavaDoc contentType, String JavaDoc suffix)
67     throws PluginRuntimeException {
68
69     //LOG.fine("getExtension: contentType="+contentType+" suffix="+suffix);
70

71     String JavaDoc key = contentType + "+" + suffix;
72
73     if (CACHE.containsKey(key))
74       return (Extension)CACHE.get(key);
75     
76     Extension extension = findExtension(contentType, suffix);
77     
78     CACHE.put(key, extension);
79     
80     return extension;
81   }
82
83   private static Extension findExtension(String JavaDoc contentType, String JavaDoc suffix)
84     throws PluginRuntimeException{
85
86     //LOG.fine("findExtension: contentType="+contentType+" suffix="+suffix);
87

88     Extension[] extensions = X_POINT.getExtentens();
89
90     // first look for a content-type match
91
if (contentType != null) {
92       for (int i = 0; i < extensions.length; i++) {
93         Extension extension = extensions[i];
94         if (contentType.startsWith(extension.getAttribute("contentType")))
95           return extension; // found a match
96
}
97     }
98
99     // next look for a url path suffix match
100
if (suffix != null) {
101       for (int i = 0; i < extensions.length; i++) {
102         Extension extension = extensions[i];
103         if (suffix.equals(extension.getAttribute("pathSuffix")))
104           return extension; // found a match
105
}
106     }
107
108     // finally, look for an extension that accepts anything
109
for (int i = 0; i < extensions.length; i++) {
110       Extension extension = extensions[i];
111       if ("".equals(extension.getAttribute("pathSuffix"))) // matches all
112
return extension;
113     }
114
115     return null;
116   }
117 }
118
Popular Tags