KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > cocoon > slop > parsing > SimpleSlopParser


1 /*
2  * Copyright 1999-2002,2004-2005 The Apache Software Foundation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */

16 package org.apache.cocoon.slop.parsing;
17
18 import org.xml.sax.ContentHandler JavaDoc;
19 import org.xml.sax.SAXException JavaDoc;
20 import org.xml.sax.helpers.AttributesImpl JavaDoc;
21 import org.apache.cocoon.ProcessingException;
22 import org.apache.cocoon.xml.XMLUtils;
23 import org.apache.cocoon.slop.interfaces.SlopParser;
24 import org.apache.cocoon.slop.interfaces.SlopConstants;
25
26 /**
27  * Simplistic SLOP parser, recognizes the following constructs:
28  *
29  * Field: a line starting with letters and : is considered a field
30  *
31  * Empty lines are detected.
32  * Other lines are output as line elements
33  *
34  * This is sufficient for basic parsing of RFC 822 headers,
35  * but a configurable rfc822 mode would be good to differentiate
36  * between the header and body of the email message and parse them
37  * with different rules.
38  *
39  * @author <a HREF="mailto:bdelacretaz@apache.org">Bertrand Delacretaz</a>
40  * @version $Id: SimpleSlopParser.java 164808 2005-04-26 16:07:03Z vgritsenko $
41  */

42 public class SimpleSlopParser implements SlopParser,SlopConstants {
43
44     private ContentHandler JavaDoc contentHandler;
45
46     /** chars that can be part of a field name (other than letters) */
47     private final static String JavaDoc DEFAULT_TAGNAME_CHARS = "-_";
48     private String JavaDoc tagnameChars = DEFAULT_TAGNAME_CHARS;
49
50     /** valid characters in an XML element name (in addition to letters and digits) */
51     final static String JavaDoc VALID_TAGNAME_CHARS = "_-";
52     final static String JavaDoc TAGNAME_REPLACEMENT_CHAR = "_";
53
54     /** optionally preserve whitespace in input */
55     private boolean preserveSpace = false;
56
57     /** count lines */
58     private int lineCounter;
59
60     /** result of parsing a line */
61     static class ParsedLine {
62         final String JavaDoc name;
63         final String JavaDoc contents;
64
65         ParsedLine(String JavaDoc elementName, String JavaDoc elementContents) {
66             name = filterElementName(elementName);
67             contents = elementContents;
68         }
69     }
70
71     /** make sure element names are valid XML */
72     static String JavaDoc filterElementName(String JavaDoc str) {
73         final StringBuffer JavaDoc sb = new StringBuffer JavaDoc();
74         for(int i=0; i < str.length(); i++) {
75             final char c = str.charAt(i);
76             if(Character.isLetter(c)) {
77                 sb.append(c);
78             } else if(Character.isDigit(c) && i > 0) {
79                 sb.append(c);
80             } else if(VALID_TAGNAME_CHARS.indexOf(c) >= 0) {
81                 sb.append(c);
82             } else {
83                 sb.append(TAGNAME_REPLACEMENT_CHAR);
84             }
85         }
86         return sb.toString();
87     }
88
89     /** set the list of valid chars for tag names (in addition to letters) */
90     public void setValidTagnameChars(String JavaDoc str) {
91         tagnameChars = (str == null ? DEFAULT_TAGNAME_CHARS : str.trim());
92     }
93
94     /** optionally preserve whitespace in input */
95     public void setPreserveWhitespace(boolean b) {
96         preserveSpace = b;
97     }
98
99     /** must be called before any call to processLine() */
100     public void startDocument(ContentHandler JavaDoc destination)
101     throws SAXException JavaDoc, ProcessingException {
102         contentHandler = destination;
103         contentHandler.startDocument();
104         contentHandler.startPrefixMapping("", SLOP_NAMESPACE_URI);
105         contentHandler.startElement(SLOP_NAMESPACE_URI, SLOP_ROOT_ELEMENT, SLOP_ROOT_ELEMENT, XMLUtils.EMPTY_ATTRIBUTES);
106     }
107
108     /** must be called once all calls to processLine() are done */
109     public void endDocument()
110     throws SAXException JavaDoc, ProcessingException {
111         contentHandler.endElement(SLOP_NAMESPACE_URI, SLOP_ROOT_ELEMENT, SLOP_ROOT_ELEMENT);
112         contentHandler.endPrefixMapping("");
113         contentHandler.endDocument();
114         contentHandler = null;
115     }
116
117     /** add simple name-value attribute to attr */
118     private void setAttribute(AttributesImpl JavaDoc attr,String JavaDoc name,String JavaDoc value) {
119         final String JavaDoc ATTR_TYPE = "NMTOKEN";
120         attr.addAttribute("",name,name,ATTR_TYPE,value);
121     }
122
123     /** call this to process input lines, does the actual parsing */
124     public void processLine(String JavaDoc line)
125     throws SAXException JavaDoc, ProcessingException {
126         if(contentHandler == null) {
127             throw new ProcessingException("SimpleSlopParser content handler is null (startDocument not called?)");
128         }
129
130         // find out which element name to use, based on the contents of the line
131
final ParsedLine p = parseLine(line);
132
133         // generate the element and its contents
134
lineCounter++;
135         final AttributesImpl JavaDoc atts = new AttributesImpl JavaDoc();
136         setAttribute(atts,SLOP_ATTR_LINENUMBER,String.valueOf(lineCounter));
137         contentHandler.startElement(SLOP_NAMESPACE_URI, p.name, p.name, atts);
138         contentHandler.characters(p.contents.toCharArray(),0,p.contents.length());
139         contentHandler.endElement(SLOP_NAMESPACE_URI, p.name, p.name);
140     }
141
142     /** parse a line, extract element name and contents */
143     protected ParsedLine parseLine(String JavaDoc line) {
144         ParsedLine result = null;
145
146         // empty lines
147
if(line == null || line.trim().length()==0) {
148             result = new ParsedLine(SLOP_EMPTY_LINE_ELEMENT,"");
149         }
150
151         // simple extraction of field names, lines starting with alpha chars followed
152
// by a colon are parsed as follows:
153
//
154
// input:
155
// field-name: this line is a field
156
// output:
157
// <field-name>this line is a field</field-name>
158
if(result == null) {
159             final int colonPos = line.indexOf(':');
160             if(colonPos > 0) {
161                 boolean fieldFound = true;
162                 for(int i=0; i < colonPos; i++) {
163                     final char c = line.charAt(i);
164                     final boolean isFieldChar = Character.isLetter(c) || tagnameChars.indexOf(c) >= 0;
165                     if(!isFieldChar) {
166                         fieldFound = false;
167                         break;
168                     }
169                 }
170
171                 if(fieldFound) {
172                     String JavaDoc contents = "";
173                     if(line.length() > colonPos + 1) {
174                         final String JavaDoc str = line.substring(colonPos+1);
175                         contents = (preserveSpace ? str : str.trim());
176                     }
177                     result = new ParsedLine(line.substring(0,colonPos),contents);
178                 }
179             }
180         }
181
182         // default: output a line element
183
if(result == null) {
184             final String JavaDoc str = (preserveSpace ? line : line.trim());
185             result = new ParsedLine(SLOP_LINE_ELEMENT,str);
186         }
187
188         return result;
189     }
190 }
191
Popular Tags