1 16 package org.apache.cocoon.components.search; 17 18 import org.apache.lucene.document.Document; 19 import org.apache.lucene.document.Field; 20 import org.xml.sax.Attributes ; 21 import org.xml.sax.ContentHandler ; 22 import org.xml.sax.Locator ; 23 import org.xml.sax.helpers.AttributesImpl ; 24 25 import java.util.ArrayList ; 26 import java.util.HashSet ; 27 import java.util.Iterator ; 28 import java.util.List ; 29 import java.util.Stack ; 30 31 42 public class LuceneIndexContentHandler implements ContentHandler 43 { 44 public static final String LUCENE_URI = "http://apache.org/cocoon/lucene/1.0"; 45 46 51 public static final String LUCENE_ATTR_TO_TEXT_ATTRIBUTE = "text-attr"; 52 53 StringBuffer bodyText; 54 private List documents; 55 private Document bodyDocument; 56 private Stack elementStack; 57 private HashSet fieldTags; 58 59 62 public LuceneIndexContentHandler() { 63 this.bodyText = new StringBuffer (); 64 this.bodyDocument = new Document(); 65 this.documents = new ArrayList (); 66 this.documents.add(this.bodyDocument); 67 this.elementStack = new Stack (); 68 this.fieldTags = new HashSet (); 69 } 70 71 76 public void setFieldTags(HashSet fieldTags) { 77 this.fieldTags = fieldTags; 78 } 79 80 85 public void setDocumentLocator(Locator locator) { } 86 87 public List allDocuments() { 88 return documents; 89 } 90 91 public Iterator iterator() { 92 return documents.iterator(); 93 } 94 95 public void characters(char[] ch, int start, int length) { 96 if (ch.length > 0 && start >= 0 && length > 1) { 97 if (elementStack.size() > 0) { 98 IndexHelperField tos = (IndexHelperField) elementStack.peek(); 99 tos.appendText(ch, start, length); 100 } 101 bodyText.append(' '); 102 bodyText.append(ch, start, length); 103 } 104 } 105 106 public void endDocument() { 107 bodyDocument.add(Field.UnStored(LuceneXMLIndexer.BODY_FIELD, bodyText.toString())); 108 } 109 110 public void endElement(String namespaceURI, String localName, String qName) { 111 IndexHelperField tos = (IndexHelperField) elementStack.pop(); 112 String lname = tos.getLocalFieldName(); 113 StringBuffer text = tos.getText(); 114 115 Attributes atts = tos.getAttributes(); 117 boolean attributesToText = atts.getIndex(LUCENE_URI, LUCENE_ATTR_TO_TEXT_ATTRIBUTE) != -1; 118 for (int i = 0; i < atts.getLength(); i++) { 119 if (LUCENE_URI.equals(atts.getURI(i))) continue; 120 121 String atts_lname = atts.getLocalName(i); 122 String atts_value = atts.getValue(i); 123 bodyDocument.add(Field.UnStored(lname + "@" + atts_lname, atts_value)); 124 if (attributesToText) { 125 text.append(' '); 126 text.append(atts_value); 127 bodyText.append(' '); 128 bodyText.append(atts_value); 129 } 130 } 131 132 if (text != null && text.length() > 0) { 133 if (isFieldTag(lname)) { 134 bodyDocument.add(Field.UnIndexed(lname, text.toString())); 135 } 136 bodyDocument.add(Field.UnStored(lname, text.toString())); 137 } 138 } 139 140 public void endPrefixMapping(String prefix) { } 141 142 public void ignorableWhitespace(char[] ch, int start, int length) { } 143 144 public void processingInstruction(String target, String data) { } 145 146 public void skippedEntity(String name) { } 147 148 public void startDocument() { } 149 150 public void startElement(String namespaceURI, String localName, String qName, Attributes atts) { 151 IndexHelperField ihf = new IndexHelperField(localName, qName, new AttributesImpl (atts)); 152 elementStack.push(ihf); 153 } 154 155 public void startPrefixMapping(String prefix, String uri) { } 156 157 163 private boolean isFieldTag(String tag) { 164 if (fieldTags == null) { 166 return false; 167 } 168 Iterator i = fieldTags.iterator(); 169 while (i.hasNext()) { 170 if (tag.equals(i.next())) { 171 return true; 172 } 173 } 174 return false; 175 } 176 } 177 | Popular Tags |