KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > lenya > lucene > parser > PreParser


1 /*
2  * Copyright 1999-2004 The Apache Software Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */

17
18 /* $Id: PreParser.java 42598 2004-03-01 16:18:28Z gregor $ */
19
20 package org.apache.lenya.lucene.parser;
21
22 import java.io.IOException JavaDoc;
23 import java.io.Reader JavaDoc;
24 import java.io.StringReader JavaDoc;
25
26 import org.apache.log4j.Category;
27
28 /**
29  * The Java HTML parser cannot handle self-closing text.
30  * This class converts all "/>" strings to ">" to avoid this problem.
31  */

32 public class PreParser {
33     
34     private static Category log = Category.getInstance(PreParser.class);
35     
36     /** Creates a new instance of PreParser */
37     public PreParser() {
38         log.debug("creating new object");
39     }
40
41     /**
42      * Parses HTML from a reader.
43      */

44     public Reader JavaDoc parse(Reader JavaDoc reader) throws IOException JavaDoc {
45         StringBuffer JavaDoc buffer = new StringBuffer JavaDoc();
46         boolean pending = false;
47
48         char[] chars = new char[1];
49
50         while (reader.read(chars) != -1) {
51             int lastPosition = buffer.length() - 1;
52
53             if ((chars[0] == '>') && (buffer.charAt(lastPosition) == '/')) {
54                 buffer.deleteCharAt(lastPosition);
55             }
56
57             buffer.append(chars[0]);
58         }
59
60         return new StringReader JavaDoc(buffer.toString());
61     }
62 }
63
Popular Tags