KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > eclipse > help > search > XMLSearchParticipant


1 /*******************************************************************************
2  * Copyright (c) 2005, 2007 IBM Corporation and others.
3  * All rights reserved. This program and the accompanying materials
4  * are made available under the terms of the Eclipse Public License v1.0
5  * which accompanies this distribution, and is available at
6  * http://www.eclipse.org/legal/epl-v10.html
7  *
8  * Contributors:
9  * IBM Corporation - initial API and implementation
10  *******************************************************************************/

11 package org.eclipse.help.search;
12
13 import java.io.IOException JavaDoc;
14 import java.io.InputStream JavaDoc;
15 import java.io.Reader JavaDoc;
16 import java.io.StringReader JavaDoc;
17 import java.net.URL JavaDoc;
18 import java.util.Stack JavaDoc;
19
20 import javax.xml.parsers.SAXParser JavaDoc;
21 import javax.xml.parsers.SAXParserFactory JavaDoc;
22
23 import org.apache.lucene.document.Document;
24 import org.apache.lucene.document.Field;
25 import org.eclipse.core.runtime.IStatus;
26 import org.eclipse.core.runtime.Status;
27 import org.eclipse.help.internal.base.HelpBasePlugin;
28 import org.eclipse.help.internal.dynamic.DocumentReader;
29 import org.eclipse.help.internal.dynamic.ExtensionHandler;
30 import org.eclipse.help.internal.dynamic.IncludeHandler;
31 import org.eclipse.help.internal.dynamic.ProcessorHandler;
32 import org.eclipse.help.internal.dynamic.XMLProcessor;
33 import org.xml.sax.Attributes JavaDoc;
34 import org.xml.sax.InputSource JavaDoc;
35 import org.xml.sax.SAXException JavaDoc;
36 import org.xml.sax.helpers.DefaultHandler JavaDoc;
37
38 /**
39  * An abstract search participants for adding XML documents to the Lucene search index. Subclass it
40  * and implement or override protected methods to handle parsing of the document.
41  *
42  * @since 3.2
43  */

44 public abstract class XMLSearchParticipant extends LuceneSearchParticipant {
45     private Stack JavaDoc stack = new Stack JavaDoc();
46     private SAXParser JavaDoc parser;
47     private XMLProcessor processor;
48     private boolean hasFilters;
49
50     /**
51      * Class that implements this interface is used to store data obtained during the parsing phase.
52      */

53     protected interface IParsedXMLContent {
54
55         /**
56          * Returns the locale of the index.
57          *
58          * @return the locale string
59          */

60         String JavaDoc getLocale();
61
62         /**
63          * Sets the title of the parsed document for indexing.
64          *
65          * @param title
66          * the document title
67          */

68         void setTitle(String JavaDoc title);
69
70         /**
71          * Sets the optional summary of the parsed document that can be later rendered for the
72          * search hits.
73          *
74          * @param summary
75          * the short document summary
76          */

77         void addToSummary(String JavaDoc summary);
78
79         /**
80          * Adds the text to the content buffer for indexing.
81          *
82          * @param text
83          * the text to add to the document content buffer
84          */

85         void addText(String JavaDoc text);
86     }
87
88     private static class ParsedXMLContent implements IParsedXMLContent {
89         private StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
90         private StringBuffer JavaDoc summary = new StringBuffer JavaDoc();
91         private String JavaDoc title;
92         private String JavaDoc locale;
93         private static int SUMMARY_LENGTH = 200;
94
95         public ParsedXMLContent(String JavaDoc locale) {
96             this.locale = locale;
97         }
98
99         public String JavaDoc getLocale() {
100             return locale;
101         }
102
103         public void setTitle(String JavaDoc title) {
104             this.title = title;
105         }
106
107         public void addToSummary(String JavaDoc text) {
108             if (summary.length() >= SUMMARY_LENGTH)
109                 return;
110             if (summary.length() > 0)
111                 summary.append(" "); //$NON-NLS-1$
112
summary.append(text);
113             if (summary.length() > SUMMARY_LENGTH)
114                 summary.delete(SUMMARY_LENGTH, summary.length());
115         }
116
117         public void addText(String JavaDoc text) {
118             if (buffer.length() > 0)
119                 buffer.append(" "); //$NON-NLS-1$
120
buffer.append(text);
121         }
122
123         public Reader JavaDoc newContentReader() {
124             return new StringReader JavaDoc(buffer.toString());
125         }
126
127         public String JavaDoc getSummary() {
128             // if the summary starts with the title, trim that part off.
129
String JavaDoc summaryStr = summary.toString();
130             if (title != null && summaryStr.length() >= title.length()) {
131                 String JavaDoc header = summaryStr.substring(0, title.length());
132                 if (header.equalsIgnoreCase(title)) {
133                     return summaryStr.substring(title.length()).trim();
134                 }
135             }
136             return summaryStr;
137         }
138
139         public String JavaDoc getTitle() {
140             return title;
141         }
142     }
143
144     private class XMLHandler extends DefaultHandler JavaDoc {
145
146         public ParsedXMLContent data;
147
148         public XMLHandler(ParsedXMLContent data) {
149             this.data = data;
150         }
151
152         public void startElement(String JavaDoc uri, String JavaDoc localName, String JavaDoc qName, Attributes JavaDoc attributes)
153                 throws SAXException JavaDoc {
154             stack.push(qName);
155             handleStartElement(qName, attributes, data);
156             if (attributes.getValue("filter") != null || qName.equalsIgnoreCase("filter")) { //$NON-NLS-1$ //$NON-NLS-2$
157
hasFilters = true;
158             }
159         }
160
161         public void endElement(String JavaDoc uri, String JavaDoc localName, String JavaDoc qName) throws SAXException JavaDoc {
162             handleEndElement(qName, data);
163             String JavaDoc top = (String JavaDoc) stack.peek();
164             if (top != null && top.equals(qName))
165                 stack.pop();
166         }
167
168         /*
169          * (non-Javadoc)
170          *
171          * @see org.xml.sax.helpers.DefaultHandler#startDocument()
172          */

173         public void startDocument() throws SAXException JavaDoc {
174             XMLSearchParticipant.this.handleStartDocument(data);
175         }
176
177         /*
178          * (non-Javadoc)
179          *
180          * @see org.xml.sax.helpers.DefaultHandler#endDocument()
181          */

182         public void endDocument() throws SAXException JavaDoc {
183             XMLSearchParticipant.this.handleEndDocument(data);
184         }
185
186         /*
187          * (non-Javadoc)
188          *
189          * @see org.xml.sax.helpers.DefaultHandler#processingInstruction(java.lang.String,
190          * java.lang.String)
191          */

192         public void processingInstruction(String JavaDoc target, String JavaDoc pidata) throws SAXException JavaDoc {
193             handleProcessingInstruction(target, data);
194         }
195
196         /*
197          * (non-Javadoc)
198          *
199          * @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
200          */

201         public void characters(char[] characters, int start, int length) throws SAXException JavaDoc {
202             if (length == 0)
203                 return;
204             StringBuffer JavaDoc buff = new StringBuffer JavaDoc();
205             for (int i = 0; i < length; i++) {
206                 buff.append(characters[start + i]);
207             }
208             String JavaDoc text = buff.toString().trim();
209             if (text.length() > 0)
210                 handleText(text, data);
211         }
212         
213         /*
214          * Note: throws clause does not declare IOException due to a bug in
215          * sun jdk: http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6327149
216          *
217          * @see org.xml.sax.helpers.DefaultHandler#resolveEntity(java.lang.String, java.lang.String)
218          */

219         public InputSource JavaDoc resolveEntity(String JavaDoc publicId, String JavaDoc systemId) throws SAXException JavaDoc {
220             return new InputSource JavaDoc(new StringReader JavaDoc("")); //$NON-NLS-1$
221
}
222     }
223
224     /**
225      * Called when the element has been started.
226      *
227      * @param name
228      * the element name
229      * @param attributes
230      * the element attributes
231      * @param data
232      * data the parser content data to update
233      */

234     protected abstract void handleStartElement(String JavaDoc name, Attributes JavaDoc attributes, IParsedXMLContent data);
235
236     /**
237      * Called when the element has been ended.
238      *
239      * @param name
240      * the name of the XML element
241      * @param data
242      * data the parser content data to update
243      */

244     protected abstract void handleEndElement(String JavaDoc name, IParsedXMLContent data);
245
246     /**
247      * Called when the XML document has been started.
248      *
249      * @param data
250      * data the parser content data to update
251      */

252     protected void handleStartDocument(IParsedXMLContent data) {
253     }
254
255     /**
256      * Called when the XML document has been ended.
257      *
258      * @param data
259      * data the parser content data to update
260      */

261     protected void handleEndDocument(IParsedXMLContent data) {
262     }
263
264     /**
265      * Called when a processing instruction has been encountered.
266      *
267      * @param type
268      * the instruction data
269      * @param data
270      * the parser content data to update
271      */

272     protected void handleProcessingInstruction(String JavaDoc type, IParsedXMLContent data) {
273     }
274
275     /**
276      * Called when element body text has been encountered. Use 'getElementStackPath()' to determine
277      * the element in question.
278      *
279      * @param text
280      * the body text
281      * @param data
282      * the parser content data to update
283      */

284     protected abstract void handleText(String JavaDoc text, IParsedXMLContent data);
285
286     /*
287      * @see LuceneSearchParticipant#addDocument(String, String, URL, String, Document)
288      */

289     public IStatus addDocument(ISearchIndex index, String JavaDoc pluginId, String JavaDoc name, URL JavaDoc url, String JavaDoc id,
290             Document doc) {
291         InputStream JavaDoc stream = null;
292         try {
293             if (parser == null) {
294                 parser = SAXParserFactory.newInstance().newSAXParser();
295             }
296             stack.clear();
297             hasFilters = false;
298             ParsedXMLContent parsed = new ParsedXMLContent(index.getLocale());
299             XMLHandler handler = new XMLHandler(parsed);
300             stream = url.openStream();
301             stream = preprocess(stream, name, index.getLocale());
302             parser.parse(stream, handler);
303             doc.add(new Field("contents", parsed.newContentReader())); //$NON-NLS-1$
304
doc.add(new Field("exact_contents", parsed.newContentReader())); //$NON-NLS-1$
305
String JavaDoc title = parsed.getTitle();
306             if (title != null)
307                 addTitle(title, doc);
308             String JavaDoc summary = parsed.getSummary();
309             if (summary != null)
310                 doc.add(new Field("summary", summary, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
311
if (hasFilters) {
312                 doc.add(new Field("filters", "true", Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$ //$NON-NLS-2$
313
}
314             return Status.OK_STATUS;
315         } catch (Exception JavaDoc e) {
316             return new Status(IStatus.ERROR, HelpBasePlugin.PLUGIN_ID, IStatus.ERROR,
317                     "Exception occurred while adding document " + name //$NON-NLS-1$
318
+ " to index.", //$NON-NLS-1$
319
e);
320         } finally {
321             if (stream != null) {
322                 try {
323                     stream.close();
324                 } catch (IOException JavaDoc e) {
325                 }
326                 stream = null;
327             }
328         }
329     }
330
331     /**
332      * Returns the name of the element that is currently at the top of the element stack.
333      *
334      * @return the name of the element that is currently at the top of the element stack
335      */

336
337     protected String JavaDoc getTopElement() {
338         return (String JavaDoc) stack.peek();
339     }
340
341     /**
342      * Returns the full path of the current element in the stack separated by the '/' character.
343      *
344      * @return the path to the current element in the stack.
345      */

346     protected String JavaDoc getElementStackPath() {
347         StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
348         for (int i = 0; i < stack.size(); i++) {
349             if (i > 0)
350                 buf.append("/"); //$NON-NLS-1$
351
buf.append((String JavaDoc) stack.get(i));
352         }
353         return buf.toString();
354     }
355     
356     /**
357      * <p>
358      * Pre-processes the given document input stream for the given document name and locale.
359      * This implementation will resolve dynamic content that is applicable to searching,
360      * e.g. includes and extensions, but not filters. Subclasses may override to do their
361      * own pre-processing.
362      * </p>
363      * <p>
364      * For performance, implementations that handle documents that do not support dynamic
365      * content should subclass and return the original stream.
366      * </p>
367      *
368      * @param in the input stream for the document content
369      * @param name the name of the document as it appears in the index
370      * @param locale the locale code, e.g. "en_US"
371      * @return the processed content
372      * @since 3.3
373      */

374     protected InputStream JavaDoc preprocess(InputStream JavaDoc in, String JavaDoc name, String JavaDoc locale) {
375         if (processor == null) {
376             DocumentReader reader = new DocumentReader();
377             processor = new XMLProcessor(new ProcessorHandler[] {
378                 new IncludeHandler(reader, locale),
379                 new ExtensionHandler(reader, locale)
380             });
381         }
382         try {
383             return processor.process(in, name, null);
384         }
385         catch (Throwable JavaDoc t) {
386             String JavaDoc msg = "An error occured while pre-processing user assistance document \"" + name + "\" for search indexing"; //$NON-NLS-1$ //$NON-NLS-2$
387
HelpBasePlugin.logError(msg, t);
388             return in;
389         }
390     }
391 }
392
Popular Tags