KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lenya > lucene > parser > SwingHTMLHandler


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17
18 /* $Id: SwingHTMLHandler.java 42598 2004-03-01 16:18:28Z gregor $ */
19
20 package org.apache.lenya.lucene.parser;
21
22 import java.io.Reader JavaDoc;
23 import java.io.StringReader JavaDoc;
24 import java.util.ArrayList JavaDoc;
25 import java.util.Iterator JavaDoc;
26 import java.util.List JavaDoc;
27
28 import javax.swing.text.MutableAttributeSet JavaDoc;
29 import javax.swing.text.html.HTML JavaDoc;
30 import javax.swing.text.html.HTML.Tag;
31 import javax.swing.text.html.HTMLEditorKit.ParserCallback;
32
33 import org.apache.log4j.Category;
34
35 public class SwingHTMLHandler extends ParserCallback {
36     Category log = Category.getInstance(SwingHTMLHandler.class);
37
38     /**
39      * Creates a new instance of SwingHTMLHandler
40      */

41     public SwingHTMLHandler() {
42         debug("\n\n\n\n\nCreating " + getClass().getName());
43
44         // index everything by default
45
startIndexing();
46     }
47
48     private TagStack tagStack = new TagStack();
49
50     protected TagStack getStack() {
51         return tagStack;
52     }
53
54     private StringBuffer JavaDoc titleBuffer = new StringBuffer JavaDoc();
55     private StringBuffer JavaDoc keywordsBuffer = new StringBuffer JavaDoc();
56
57     /**
58      *
59      */

60     protected void appendToTitle(char[] data) {
61         titleBuffer.append(data);
62     }
63
64     /**
65      * Get title
66      *
67      * @return DOCUMENT ME!
68      */

69     public String JavaDoc getTitle() {
70         debug("\n\nTitle: " + titleBuffer.toString());
71
72         return titleBuffer.toString();
73     }
74
75     /**
76      * Get keywords
77      *
78      * @return DOCUMENT ME!
79      */

80     public String JavaDoc getKeywords() {
81         log.debug("Keywords: " + keywordsBuffer.toString());
82
83         return keywordsBuffer.toString();
84     }
85
86     private StringBuffer JavaDoc contentsBuffer = new StringBuffer JavaDoc();
87
88     protected void appendToContents(char[] data) {
89         contentsBuffer.append(data);
90     }
91
92     /**
93      * DOCUMENT ME!
94      *
95      * @return DOCUMENT ME!
96      */

97     public Reader JavaDoc getReader() {
98         debug("\nContents: " + contentsBuffer.toString());
99
100         return new StringReader JavaDoc(contentsBuffer.toString());
101     }
102
103     private boolean indexing;
104
105     protected boolean isIndexing() {
106         return indexing;
107     }
108
109     protected void startIndexing() {
110         indexing = true;
111     }
112
113     protected void stopIndexing() {
114         indexing = false;
115     }
116
117     //-------------------------------------------------------------------------
118
// Tag handling
119
//-------------------------------------------------------------------------
120

121     /**
122      * Handles a start tag.
123      */

124     public void handleStartTag(Tag tag, MutableAttributeSet JavaDoc attributes, int pos) {
125         getStack().push(tag);
126
127         // append whitespace
128
if (!contentsBuffer.toString().endsWith(" ")) {
129             contentsBuffer.append(" ");
130         }
131
132         if (tag.equals(HTML.Tag.META)) {
133             handleMetaTag(attributes);
134         }
135
136         if (tag.equals(HTML.Tag.TITLE)) {
137             handleTitleStartTag();
138         }
139
140         if (isTagInitialized() && tag.equals(getLuceneTag())) {
141             handleLuceneStartTag(tag, attributes);
142         }
143     }
144
145     /**
146      * Handles an end tag.
147      */

148     public void handleEndTag(Tag tag, int pos) {
149         // append whitespace
150
if (!contentsBuffer.toString().endsWith(" ")) {
151             contentsBuffer.append(" ");
152         }
153
154         if (isTagInitialized() && tag.equals(getLuceneTag())) {
155             handleLuceneEndTag();
156         }
157
158         if (tag.equals(HTML.Tag.TITLE)) {
159             handleTitleEndTag();
160         }
161
162         try {
163             getStack().pop();
164         } catch (TagStack.UnderflowException e) {
165             log(e);
166         }
167     }
168
169     //-------------------------------------------------------------------------
170
// Title
171
//-------------------------------------------------------------------------
172
private boolean titleParsing;
173
174     protected boolean isTitleParsing() {
175         return titleParsing;
176     }
177
178     protected void startTitleParsing() {
179         titleParsing = true;
180     }
181
182     protected void stopTitleParsing() {
183         titleParsing = false;
184     }
185
186     protected void handleTitleStartTag() {
187         startTitleParsing();
188     }
189
190     protected void handleTitleEndTag() {
191         stopTitleParsing();
192     }
193
194     //-------------------------------------------------------------------------
195
// Lucene metag tags
196
//-------------------------------------------------------------------------
197
public static final String JavaDoc LUCENE_TAG_NAME = "lucene-tag-name";
198     public static final String JavaDoc LUCENE_CLASS_VALUE = "lucene-class-value";
199     private HTML.Tag JavaDoc luceneTag = null;
200
201     /**
202      * Sets the tag name used to avoid indexing.
203      */

204     protected void setLuceneTag(HTML.Tag JavaDoc tag) {
205         debug("Lucene tag: " + tag);
206         luceneTag = tag;
207     }
208
209     /**
210      * Returns the tag name used to avoid indexing.
211      */

212     protected HTML.Tag JavaDoc getLuceneTag() {
213         return luceneTag;
214     }
215
216     private String JavaDoc luceneClassValue = null;
217
218     /**
219      * Sets the value for the <code>class</code> attribute used to avoid indexing.
220      */

221     protected void setLuceneClassValue(String JavaDoc value) {
222         debug("Lucene class value: " + value);
223         luceneClassValue = value;
224     }
225
226     /**
227      * Returns the value for the <code>class</code> attribute used to avoid indexing.
228      */

229     protected String JavaDoc getLuceneClassValue() {
230         return luceneClassValue;
231     }
232
233     /**
234      * Returns if the Lucene META tags are provided.
235      */

236     protected boolean isTagInitialized() {
237         return (getLuceneTag() != null) && (getLuceneClassValue() != null);
238     }
239
240     /**
241      * Handles a META tag. This method checks for the Lucene configuration tags.
242      */

243     protected void handleMetaTag(MutableAttributeSet JavaDoc attributes) {
244         Object JavaDoc nameObject = attributes.getAttribute(HTML.Attribute.NAME);
245         Object JavaDoc valueObject = attributes.getAttribute(HTML.Attribute.VALUE);
246
247         if ((nameObject != null) && (valueObject != null)) {
248             String JavaDoc name = (String JavaDoc) nameObject;
249             log.debug("Meta tag found: name = " + name);
250
251             if (name.equals(LUCENE_TAG_NAME)) {
252                 String JavaDoc tagName = (String JavaDoc) valueObject;
253                 HTML.Tag JavaDoc tag = HTML.getTag(tagName.toLowerCase());
254                 setLuceneTag(tag);
255             }
256
257             if (name.equals(LUCENE_CLASS_VALUE)) {
258                 setLuceneClassValue((String JavaDoc) valueObject);
259             }
260         }
261
262         Object JavaDoc contentObject = attributes.getAttribute(HTML.Attribute.CONTENT);
263         if ((nameObject != null) && (contentObject != null)) {
264             String JavaDoc name = (String JavaDoc) nameObject;
265             log.debug("Meta tag found: name = " + name);
266             if (name.equals("keywords")) {
267                 log.debug("Keywords found ...");
268                 keywordsBuffer = new StringBuffer JavaDoc((String JavaDoc) contentObject);
269             }
270         }
271
272         // do not index everything if tags are provided
273
if (isTagInitialized()) {
274             stopIndexing();
275         }
276     }
277
278     //-------------------------------------------------------------------------
279
// Lucene index control tags
280
//-------------------------------------------------------------------------
281
private TagStack luceneStack = new TagStack();
282
283     protected TagStack getLuceneStack() {
284         return luceneStack;
285     }
286
287     /**
288      * Handles a Lucene index control start tag.
289      */

290     protected void handleLuceneStartTag(HTML.Tag JavaDoc tag, MutableAttributeSet JavaDoc attributes) {
291         Object JavaDoc valueObject = attributes.getAttribute(HTML.Attribute.CLASS);
292
293         if (valueObject != null) {
294             String JavaDoc value = (String JavaDoc) valueObject;
295
296             if (value.equals(getLuceneClassValue())) {
297                 getLuceneStack().push(tag);
298                 debug("");
299                 debug("---------- Starting indexing ----------");
300                 startIndexing();
301             }
302         }
303     }
304
305     /**
306      * Handles a Lucene index control end tag.
307      */

308     protected void handleLuceneEndTag() {
309         try {
310             HTML.Tag JavaDoc stackTag = getStack().top();
311
312             if (!getLuceneStack().isEmpty()) {
313                 HTML.Tag JavaDoc luceneTag = getLuceneStack().top();
314
315                 if (stackTag == luceneTag) {
316                     debug("");
317                     debug("---------- Stopping indexing ----------");
318                     getLuceneStack().pop();
319                     stopIndexing();
320                 }
321             }
322         } catch (TagStack.UnderflowException e) {
323             log("Lucene index control tag not closed!", e);
324         }
325     }
326
327     /**
328      * Handles an end tag.
329      */

330     public void handleSimpleTag(Tag tag, MutableAttributeSet JavaDoc attributes, int pos) {
331         handleStartTag(tag, attributes, pos);
332         handleEndTag(tag, pos);
333     }
334
335     //-------------------------------------------------------------------------
336
// Text handling
337
//-------------------------------------------------------------------------
338
public void handleText(char[] data, int pos) {
339         //String string = new String(data);
340
//System.out.println(indent + string.substring(0, Math.min(20, string.length())) + " ...");
341
if (isDebug) {
342             System.out.println(".handleText(): data: " + new String JavaDoc(data));
343         }
344
345         /*
346                 if (data[0] == '>') {
347                    throw new IllegalStateException();
348                    }
349         */

350         if (isIndexing() || isTitleParsing()) {
351             appendToContents(data);
352         }
353
354         if (isTitleParsing()) {
355             appendToTitle(data);
356         }
357     }
358
359     //-------------------------------------------------------------------------
360
// Logging
361
//-------------------------------------------------------------------------
362
private boolean isDebug = false;
363
364     /**
365      * Logs a message.
366      */

367     protected void debug(String JavaDoc message) {
368         if (isDebug) {
369             System.out.println(message);
370         }
371     }
372
373     /**
374      * Logs an exception.
375      */

376     protected void log(Exception JavaDoc e) {
377         log("", e);
378     }
379
380     /**
381      * Logs an exception with a message.
382      */

383     protected void log(String JavaDoc message, Exception JavaDoc e) {
384         System.out.print(getClass().getName() + ": " + message + " ");
385         e.printStackTrace(System.out);
386     }
387
388     /**
389      * DOCUMENT ME!
390      */

391     public class TagStack {
392         private List JavaDoc tags = new ArrayList JavaDoc();
393
394         /**
395          * DOCUMENT ME!
396          *
397          * @param tag DOCUMENT ME!
398          */

399         public void push(HTML.Tag JavaDoc tag) {
400             tags.add(0, tag);
401         }
402
403         /**
404          * DOCUMENT ME!
405          *
406          * @return DOCUMENT ME!
407          *
408          * @throws UnderflowException DOCUMENT ME!
409          */

410         public HTML.Tag JavaDoc pop() throws UnderflowException {
411             HTML.Tag JavaDoc tag = top();
412             tags.remove(tag);
413
414             return tag;
415         }
416
417         /**
418          * DOCUMENT ME!
419          *
420          * @return DOCUMENT ME!
421          *
422          * @throws UnderflowException DOCUMENT ME!
423          */

424         public HTML.Tag JavaDoc top() throws UnderflowException {
425             HTML.Tag JavaDoc tag = null;
426
427             if (!tags.isEmpty()) {
428                 tag = (HTML.Tag JavaDoc) tags.get(0);
429             } else {
430                 throw new UnderflowException();
431             }
432
433             return tag;
434         }
435
436         /**
437          * DOCUMENT ME!
438          *
439          * @return DOCUMENT ME!
440          */

441         public boolean isEmpty() {
442             return tags.isEmpty();
443         }
444
445         /**
446          * DOCUMENT ME!
447          */

448         public void dump() {
449             System.out.print("stack: ");
450
451             for (Iterator JavaDoc i = tags.iterator(); i.hasNext();) {
452                 System.out.print(i.next() + ", ");
453             }
454
455             System.out.println("");
456         }
457
458         /**
459          * DOCUMENT ME!
460          */

461         public class UnderflowException extends Exception JavaDoc {
462             /**
463              * Creates a new UnderflowException object.
464              */

465             public UnderflowException() {
466                 super("Stack underflow");
467             }
468         }
469     }
470 }
471
Popular Tags