KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > cocoon > components > search > LuceneIndexContentHandler


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.cocoon.components.search;
17
18 import org.apache.lucene.document.Document;
19 import org.apache.lucene.document.Field;
20 import org.xml.sax.Attributes JavaDoc;
21 import org.xml.sax.ContentHandler JavaDoc;
22 import org.xml.sax.Locator JavaDoc;
23 import org.xml.sax.helpers.AttributesImpl JavaDoc;
24
25 import java.util.ArrayList JavaDoc;
26 import java.util.HashSet JavaDoc;
27 import java.util.Iterator JavaDoc;
28 import java.util.List JavaDoc;
29 import java.util.Stack JavaDoc;
30
31 /**
32  * Parse XML and generate lucene document(s)
33  *
34  * can now be configured via SimpleLuceneXMLIndexerImpl
35  * to store specific tags in Lucene, so that you can
36  * display them with hits.
37  *
38  * @author <a HREF="mailto:berni_huber@a1.net">Bernhard Huber</a>
39  * @author <a HREF="mailto:jeremy@apache.org">Jeremy Quinn</a>
40  * @version CVS $Id: LuceneIndexContentHandler.java 30932 2004-07-29 17:35:38Z vgritsenko $
41  */

42 public class LuceneIndexContentHandler implements ContentHandler JavaDoc
43 {
44     public static final String JavaDoc LUCENE_URI = "http://apache.org/cocoon/lucene/1.0";
45
46     /**
47      * If this attribute is specified on element, values of all attributes
48      * are added to the text of the element, and to the document
49      * body text
50      */

51     public static final String JavaDoc LUCENE_ATTR_TO_TEXT_ATTRIBUTE = "text-attr";
52
53     StringBuffer JavaDoc bodyText;
54     private List JavaDoc documents;
55     private Document bodyDocument;
56     private Stack JavaDoc elementStack;
57     private HashSet JavaDoc fieldTags;
58
59     /**
60      * Constructor for the LuceneIndexContentHandler object
61      */

62     public LuceneIndexContentHandler() {
63         this.bodyText = new StringBuffer JavaDoc();
64         this.bodyDocument = new Document();
65         this.documents = new ArrayList JavaDoc();
66         this.documents.add(this.bodyDocument);
67         this.elementStack = new Stack JavaDoc();
68         this.fieldTags = new HashSet JavaDoc();
69     }
70
71     /**
72      * Sets the fieldTags attribute of the LuceneIndexContentHandler object
73      *
74      * @param fieldTags The new fieldTags value
75      */

76     public void setFieldTags(HashSet JavaDoc fieldTags) {
77         this.fieldTags = fieldTags;
78     }
79
80     /**
81      * Sets the documentLocator attribute of the LuceneIndexContentHandler object
82      *
83      * @param locator The new documentLocator value
84      */

85     public void setDocumentLocator(Locator JavaDoc locator) { }
86
87     public List JavaDoc allDocuments() {
88         return documents;
89     }
90
91     public Iterator JavaDoc iterator() {
92         return documents.iterator();
93     }
94
95     public void characters(char[] ch, int start, int length) {
96         if (ch.length > 0 && start >= 0 && length > 1) {
97             if (elementStack.size() > 0) {
98                 IndexHelperField tos = (IndexHelperField) elementStack.peek();
99                 tos.appendText(ch, start, length);
100             }
101             bodyText.append(' ');
102             bodyText.append(ch, start, length);
103         }
104     }
105
106     public void endDocument() {
107         bodyDocument.add(Field.UnStored(LuceneXMLIndexer.BODY_FIELD, bodyText.toString()));
108     }
109
110     public void endElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName) {
111         IndexHelperField tos = (IndexHelperField) elementStack.pop();
112         String JavaDoc lname = tos.getLocalFieldName();
113         StringBuffer JavaDoc text = tos.getText();
114
115         // (VG): Atts are never null, see startElement
116
Attributes JavaDoc atts = tos.getAttributes();
117         boolean attributesToText = atts.getIndex(LUCENE_URI, LUCENE_ATTR_TO_TEXT_ATTRIBUTE) != -1;
118         for (int i = 0; i < atts.getLength(); i++) {
119             if (LUCENE_URI.equals(atts.getURI(i))) continue;
120
121             String JavaDoc atts_lname = atts.getLocalName(i);
122             String JavaDoc atts_value = atts.getValue(i);
123             bodyDocument.add(Field.UnStored(lname + "@" + atts_lname, atts_value));
124             if (attributesToText) {
125                 text.append(' ');
126                 text.append(atts_value);
127                 bodyText.append(' ');
128                 bodyText.append(atts_value);
129             }
130         }
131
132         if (text != null && text.length() > 0) {
133             if (isFieldTag(lname)) {
134                 bodyDocument.add(Field.UnIndexed(lname, text.toString()));
135             }
136             bodyDocument.add(Field.UnStored(lname, text.toString()));
137         }
138     }
139
140     public void endPrefixMapping(String JavaDoc prefix) { }
141
142     public void ignorableWhitespace(char[] ch, int start, int length) { }
143
144     public void processingInstruction(String JavaDoc target, String JavaDoc data) { }
145
146     public void skippedEntity(String JavaDoc name) { }
147
148     public void startDocument() { }
149
150     public void startElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName, Attributes JavaDoc atts) {
151         IndexHelperField ihf = new IndexHelperField(localName, qName, new AttributesImpl JavaDoc(atts));
152         elementStack.push(ihf);
153     }
154
155     public void startPrefixMapping(String JavaDoc prefix, String JavaDoc uri) { }
156
157     /**
158      * check if tag is a candidate for making into a Field
159      *
160      * @param tag local name of the tag we are processing
161      * @return boolean
162      */

163     private boolean isFieldTag(String JavaDoc tag) {
164         // by default do not make field
165
if (fieldTags == null) {
166             return false;
167         }
168         Iterator JavaDoc i = fieldTags.iterator();
169         while (i.hasNext()) {
170             if (tag.equals(i.next())) {
171                 return true;
172             }
173         }
174         return false;
175     }
176 }
177
Popular Tags