KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > lowagie > text > rtf > direct > RtfTokeniser


1 /**
2  * $Id: RtfTokeniser.java 2429 2006-10-06 14:58:54Z psoares33 $
3  * $Name$
4  *
5  * Copyright 2006 by Mark Hall
6  *
7  * The contents of this file are subject to the Mozilla Public License Version 1.1
8  * (the "License"); you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
10  *
11  * Software distributed under the License is distributed on an "AS IS" basis,
12  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13  * for the specific language governing rights and limitations under the License.
14  *
15  * The Original Code is 'iText, a free JAVA-PDF library'.
16  *
17  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
18  * the Initial Developer are Copyright (C) 1999-2006 by Bruno Lowagie.
19  * All Rights Reserved.
20  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
21  * are Copyright (C) 2000-2006 by Paulo Soares. All Rights Reserved.
22  *
23  * Contributor(s): all the names of the contributors are added in the source code
24  * where applicable.
25  *
26  * Alternatively, the contents of this file may be used under the terms of the
27  * LGPL license (the ?GNU LIBRARY GENERAL PUBLIC LICENSE?), in which case the
28  * provisions of LGPL are applicable instead of those above. If you wish to
29  * allow use of your version of this file only under the terms of the LGPL
30  * License and not to allow others to use your version of this file under
31  * the MPL, indicate your decision by deleting the provisions above and
32  * replace them with the notice and other provisions required by the LGPL.
33  * If you do not delete the provisions above, a recipient may use your version
34  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
35  *
36  * This library is free software; you can redistribute it and/or modify it
37  * under the terms of the MPL as stated above or under the terms of the GNU
38  * Library General Public License as published by the Free Software Foundation;
39  * either version 2 of the License, or any later version.
40  *
41  * This library is distributed in the hope that it will be useful, but WITHOUT
42  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
43  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
44  * details.
45  *
46  * If you didn't download this code from the following link, you should check if
47  * you aren't using an obsolete version:
48  * http://www.lowagie.com/iText/
49  */

50
51 package com.lowagie.text.rtf.direct;
52
53 import java.io.IOException JavaDoc;
54 import java.io.Reader JavaDoc;
55
56 /**
57  * The RtfTokeniser takes an RTF document stream and
58  * turns it into a set of RTF tokens. Five groups of
59  * tokens are differentiated:
60  *
61  * <ul>
62  * <li>Group opening: {</li>
63  * <li>Group closing: }</li>
64  * <li>Control characters</li>
65  * <li>Control words</li>
66  * <li>Text</li>
67  * </ul>
68  *
69  * @version $Revision: 2429 $
70  * @author Mark Hall (mhall@edu.uni-klu.ac.at)
71  * @author Bullo (bullo70@users.sourceforge.net)
72  */

73 public class RtfTokeniser {
74     /**
75      * The RtfTokeniser is in its ground state. Any token may follow.
76      */

77     private static final int TOKENISER_STATE_READY = 0;
78     /**
79      * The last token parsed was a slash.
80      */

81     private static final int TOKENISER_STATE_SLASH = 1;
82     /**
83      * The RtfTokeniser is currently tokenising a control word.
84      */

85     private static final int TOKENISER_STATE_IN_CTRL_WORD = 2;
86     /**
87      * The RtfTokeniser is currently tokenising a text.
88      */

89     private static final int TOKENISER_STATE_IN_TEXT = 4;
90     
91     /**
92      * The current state of this RtfTokeniser.
93      */

94     private int state = TOKENISER_STATE_READY;
95     /**
96      * The current group nesting level.
97      */

98     private int groupLevel = 0;
99     /**
100      * The RtfParser to send tokens to.
101      */

102     private RtfParser rtfParser = null;
103
104     /**
105      * Constructs a new RtfTokeniser. The startGroupLevel is required when parsing
106      * RTF fragments, since they are missing the opening group and closing group
107      * and thus this has to be set at the beginning.
108      *
109      * @param rtfParser The RtfParser to send tokens to.
110      * @param startGroupLevel The starting group nesting level. 0 for full documents, 1 for fragments.
111      */

112     public RtfTokeniser(RtfParser rtfParser, int startGroupLevel) {
113         this.rtfParser = rtfParser;
114         this.groupLevel = startGroupLevel;
115     }
116     
117     /**
118      * The main tokenisation method. Implements a LL(1) parser.
119      *
120      * @param reader The Reader to read the RTF document from.
121      * @throws IOException On I/O errors.
122      */

123     public void tokenise(Reader JavaDoc reader) throws IOException JavaDoc {
124         char[] nextChar = new char[1];
125         StringBuffer JavaDoc temp = new StringBuffer JavaDoc();
126         this.state = TOKENISER_STATE_READY;
127         this.groupLevel = 0;
128         while(reader.read(nextChar) != -1) {
129             if(this.state == TOKENISER_STATE_READY) { // No influence from previous characters.
130
if(nextChar[0] == '{') { // Open a group
131
this.rtfParser.handleOpenGroup(this.groupLevel);
132                     groupLevel++;
133                 } else if(nextChar[0] == '}') { // Close a group
134
this.rtfParser.handleCloseGroup(this.groupLevel);
135                     groupLevel--;
136                 } else if(nextChar[0] == '\\') {
137                     this.state = TOKENISER_STATE_SLASH;
138                     temp = new StringBuffer JavaDoc();
139                 } else {
140                     this.state = TOKENISER_STATE_IN_TEXT;
141                     temp.append(nextChar[0]);
142                 }
143             } else if((this.state & TOKENISER_STATE_SLASH) == TOKENISER_STATE_SLASH) { // A slash signals a control character or word or an escaped character
144
if(nextChar[0] == '{') {
145                     this.state = TOKENISER_STATE_IN_TEXT;
146                     temp.append("\\{");
147                 } else if(nextChar[0] == '}') {
148                     this.state = TOKENISER_STATE_IN_TEXT;
149                     temp.append("\\}");
150                 } else if(nextChar[0] == '\\') {
151                     this.state = TOKENISER_STATE_IN_TEXT;
152                     temp.append("\\\\");
153                 } else {
154                     if((this.state & TOKENISER_STATE_IN_TEXT) == TOKENISER_STATE_IN_TEXT) { // A control word or character closes previous text token
155
this.rtfParser.handleText(temp.toString(), this.groupLevel);
156                         temp = new StringBuffer JavaDoc();
157                     }
158                     if(nextChar[0] == '|') {
159                         this.state = TOKENISER_STATE_READY;
160                         this.rtfParser.handleCtrlCharacter("\\|", this.groupLevel);
161                     } else if(nextChar[0] == '~') {
162                         this.state = TOKENISER_STATE_READY;
163                         this.rtfParser.handleCtrlCharacter("\\~", this.groupLevel);
164                     } else if(nextChar[0] == '-') {
165                         this.state = TOKENISER_STATE_READY;
166                         this.rtfParser.handleCtrlCharacter("\\-", this.groupLevel);
167                     } else if(nextChar[0] == '_') {
168                         this.state = TOKENISER_STATE_READY;
169                         this.rtfParser.handleCtrlCharacter("\\_", this.groupLevel);
170                     } else if(nextChar[0] == ':') {
171                         this.state = TOKENISER_STATE_READY;
172                         this.rtfParser.handleCtrlCharacter("\\:", this.groupLevel);
173                     } else if(nextChar[0] == '*') {
174                         this.state = TOKENISER_STATE_READY;
175                         this.rtfParser.handleCtrlCharacter("\\*", this.groupLevel);
176                     } else {
177                         this.state = TOKENISER_STATE_IN_CTRL_WORD;
178                         temp = new StringBuffer JavaDoc("\\");
179                         temp.append(nextChar[0]);
180                     }
181                 }
182             } else if(this.state == TOKENISER_STATE_IN_CTRL_WORD) { // Control words run until a space, close or open group or another control word is found.
183
if(nextChar[0] == '\n' || nextChar[0] == '\r') {
184                     nextChar[0] = ' ';
185                 }
186                 if(nextChar[0] == '{') {
187                     this.rtfParser.handleCtrlWord(temp.toString(), this.groupLevel);
188                     this.rtfParser.handleOpenGroup(this.groupLevel);
189                     groupLevel++;
190                     this.state = TOKENISER_STATE_READY;
191                     temp = new StringBuffer JavaDoc();
192                 } else if(nextChar[0] == '}') {
193                     this.rtfParser.handleCtrlWord(temp.toString(), this.groupLevel);
194                     this.rtfParser.handleCloseGroup(this.groupLevel);
195                     groupLevel--;
196                     this.state = TOKENISER_STATE_READY;
197                     temp = new StringBuffer JavaDoc();
198                 } else if(nextChar[0] == '\\') {
199                     this.rtfParser.handleCtrlWord(temp.toString(), this.groupLevel);
200                     this.state = TOKENISER_STATE_SLASH;
201                     temp = new StringBuffer JavaDoc();
202                 } else if(nextChar[0] == ' ') {
203                     this.rtfParser.handleCtrlWord(temp.toString(), this.groupLevel);
204                     this.rtfParser.handleText(" ", this.groupLevel);
205                     this.state = TOKENISER_STATE_READY;
206                     temp = new StringBuffer JavaDoc();
207                 } else if(nextChar[0] == ';') {
208                     this.rtfParser.handleCtrlWord(temp.toString(), this.groupLevel);
209                     this.rtfParser.handleText(";", this.groupLevel);
210                     this.state = TOKENISER_STATE_READY;
211                     temp = new StringBuffer JavaDoc();
212                 } else {
213                     temp.append(nextChar[0]);
214                 }
215             } else if(this.state == TOKENISER_STATE_IN_TEXT) { // Text tokens are closed by control characters or words or open and close groups
216
if(nextChar[0] == '{') {
217                     this.rtfParser.handleText(temp.toString(), this.groupLevel);
218                     this.rtfParser.handleOpenGroup(this.groupLevel);
219                     groupLevel++;
220                     this.state = TOKENISER_STATE_READY;
221                     temp = new StringBuffer JavaDoc();
222                 } else if(nextChar[0] == '}') {
223                     this.rtfParser.handleText(temp.toString(), this.groupLevel);
224                     this.rtfParser.handleCloseGroup(this.groupLevel);
225                     groupLevel--;
226                     this.state = TOKENISER_STATE_READY;
227                     temp = new StringBuffer JavaDoc();
228                 } else if(nextChar[0] == '\\') {
229                     this.state = TOKENISER_STATE_IN_TEXT | TOKENISER_STATE_SLASH;
230                 } else {
231                     temp.append(nextChar[0]);
232                 }
233             }
234         }
235         if((this.state & TOKENISER_STATE_IN_TEXT) == TOKENISER_STATE_IN_TEXT && !temp.toString().equals("")) { // If at the end a text token was being parsed, emmit that token. Required for RTF fragments
236
this.rtfParser.handleText(temp.toString(), this.groupLevel);
237         }
238     }
239 }
240
Popular Tags