KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > lowagie > text > pdf > hyphenation > SimplePatternParser


1 /*
2  * Copyright 2005 by Paulo Soares.
3  *
4  * The contents of this file are subject to the Mozilla Public License Version 1.1
5  * (the "License"); you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
7  *
8  * Software distributed under the License is distributed on an "AS IS" basis,
9  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10  * for the specific language governing rights and limitations under the License.
11  *
12  * The Original Code is 'iText, a free JAVA-PDF library'.
13  *
14  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
15  * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
16  * All Rights Reserved.
17  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
18  * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
19  *
20  * Contributor(s): all the names of the contributors are added in the source code
21  * where applicable.
22  *
23  * Alternatively, the contents of this file may be used under the terms of the
24  * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
25  * provisions of LGPL are applicable instead of those above. If you wish to
26  * allow use of your version of this file only under the terms of the LGPL
27  * License and not to allow others to use your version of this file under
28  * the MPL, indicate your decision by deleting the provisions above and
29  * replace them with the notice and other provisions required by the LGPL.
30  * If you do not delete the provisions above, a recipient may use your version
31  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
32  *
33  * This library is free software; you can redistribute it and/or modify it
34  * under the terms of the MPL as stated above or under the terms of the GNU
35  * Library General Public License as published by the Free Software Foundation;
36  * either version 2 of the License, or any later version.
37  *
38  * This library is distributed in the hope that it will be useful, but WITHOUT
39  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
40  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
41  * details.
42  *
43  * If you didn't download this code from the following link, you should check if
44  * you aren't using an obsolete version:
45  * http://www.lowagie.com/iText/
46  */

47
48 package com.lowagie.text.pdf.hyphenation;
49
50 import java.io.FileInputStream JavaDoc;
51 import java.io.IOException JavaDoc;
52 import java.io.InputStream JavaDoc;
53 import java.util.ArrayList JavaDoc;
54 import java.util.StringTokenizer JavaDoc;
55
56 import com.lowagie.text.ExceptionConverter;
57 import com.lowagie.text.xml.simpleparser.SimpleXMLDocHandler;
58 import com.lowagie.text.xml.simpleparser.SimpleXMLParser;
59
60 /** Parses the xml hyphenation pattern.
61  *
62  * @author Paulo Soares (psoares@consiste.pt)
63  */

64 public class SimplePatternParser implements SimpleXMLDocHandler, PatternConsumer {
65     int currElement;
66     PatternConsumer consumer;
67     StringBuffer JavaDoc token;
68     ArrayList JavaDoc exception;
69     char hyphenChar;
70     SimpleXMLParser parser;
71     
72     static final int ELEM_CLASSES = 1;
73     static final int ELEM_EXCEPTIONS = 2;
74     static final int ELEM_PATTERNS = 3;
75     static final int ELEM_HYPHEN = 4;
76
77     /** Creates a new instance of PatternParser2 */
78     public SimplePatternParser() {
79         token = new StringBuffer JavaDoc();
80         hyphenChar = '-'; // default
81
}
82     
83     public void parse(InputStream JavaDoc stream, PatternConsumer consumer) {
84         this.consumer = consumer;
85         try {
86             SimpleXMLParser.parse(this, stream);
87         }
88         catch (IOException JavaDoc e) {
89             throw new ExceptionConverter(e);
90         }
91         finally {
92             try{stream.close();}catch(Exception JavaDoc e){}
93         }
94     }
95     
96     protected static String JavaDoc getPattern(String JavaDoc word) {
97         StringBuffer JavaDoc pat = new StringBuffer JavaDoc();
98         int len = word.length();
99         for (int i = 0; i < len; i++) {
100             if (!Character.isDigit(word.charAt(i))) {
101                 pat.append(word.charAt(i));
102             }
103         }
104         return pat.toString();
105     }
106
107     protected ArrayList JavaDoc normalizeException(ArrayList JavaDoc ex) {
108         ArrayList JavaDoc res = new ArrayList JavaDoc();
109         for (int i = 0; i < ex.size(); i++) {
110             Object JavaDoc item = ex.get(i);
111             if (item instanceof String JavaDoc) {
112                 String JavaDoc str = (String JavaDoc)item;
113                 StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
114                 for (int j = 0; j < str.length(); j++) {
115                     char c = str.charAt(j);
116                     if (c != hyphenChar) {
117                         buf.append(c);
118                     } else {
119                         res.add(buf.toString());
120                         buf.setLength(0);
121                         char[] h = new char[1];
122                         h[0] = hyphenChar;
123                         // we use here hyphenChar which is not necessarily
124
// the one to be printed
125
res.add(new Hyphen(new String JavaDoc(h), null, null));
126                     }
127                 }
128                 if (buf.length() > 0) {
129                     res.add(buf.toString());
130                 }
131             } else {
132                 res.add(item);
133             }
134         }
135         return res;
136     }
137
138     protected String JavaDoc getExceptionWord(ArrayList JavaDoc ex) {
139         StringBuffer JavaDoc res = new StringBuffer JavaDoc();
140         for (int i = 0; i < ex.size(); i++) {
141             Object JavaDoc item = ex.get(i);
142             if (item instanceof String JavaDoc) {
143                 res.append((String JavaDoc)item);
144             } else {
145                 if (((Hyphen)item).noBreak != null) {
146                     res.append(((Hyphen)item).noBreak);
147                 }
148             }
149         }
150         return res.toString();
151     }
152
153     protected static String JavaDoc getInterletterValues(String JavaDoc pat) {
154         StringBuffer JavaDoc il = new StringBuffer JavaDoc();
155         String JavaDoc word = pat + "a"; // add dummy letter to serve as sentinel
156
int len = word.length();
157         for (int i = 0; i < len; i++) {
158             char c = word.charAt(i);
159             if (Character.isDigit(c)) {
160                 il.append(c);
161                 i++;
162             } else {
163                 il.append('0');
164             }
165         }
166         return il.toString();
167     }
168
169     public void endDocument() {
170     }
171     
172     public void endElement(String JavaDoc tag) {
173         if (token.length() > 0) {
174             String JavaDoc word = token.toString();
175             switch (currElement) {
176             case ELEM_CLASSES:
177                 consumer.addClass(word);
178                 break;
179             case ELEM_EXCEPTIONS:
180                 exception.add(word);
181                 exception = normalizeException(exception);
182                 consumer.addException(getExceptionWord(exception),
183                                       (ArrayList JavaDoc)exception.clone());
184                 break;
185             case ELEM_PATTERNS:
186                 consumer.addPattern(getPattern(word),
187                                     getInterletterValues(word));
188                 break;
189             case ELEM_HYPHEN:
190                 // nothing to do
191
break;
192             }
193             if (currElement != ELEM_HYPHEN) {
194                 token.setLength(0);
195             }
196         }
197         if (currElement == ELEM_HYPHEN) {
198             currElement = ELEM_EXCEPTIONS;
199         } else {
200             currElement = 0;
201         }
202     }
203     
204     public void startDocument() {
205     }
206     
207     public void startElement(String JavaDoc tag, java.util.HashMap JavaDoc h) {
208         if (tag.equals("hyphen-char")) {
209             String JavaDoc hh = (String JavaDoc)h.get("value");
210             if (hh != null && hh.length() == 1) {
211                 hyphenChar = hh.charAt(0);
212             }
213         } else if (tag.equals("classes")) {
214             currElement = ELEM_CLASSES;
215         } else if (tag.equals("patterns")) {
216             currElement = ELEM_PATTERNS;
217         } else if (tag.equals("exceptions")) {
218             currElement = ELEM_EXCEPTIONS;
219             exception = new ArrayList JavaDoc();
220         } else if (tag.equals("hyphen")) {
221             if (token.length() > 0) {
222                 exception.add(token.toString());
223             }
224             exception.add(new Hyphen((String JavaDoc)h.get("pre"),
225                                             (String JavaDoc)h.get("no"),
226                                             (String JavaDoc)h.get("post")));
227             currElement = ELEM_HYPHEN;
228         }
229         token.setLength(0);
230     }
231     
232     public void text(String JavaDoc str) {
233         StringTokenizer JavaDoc tk = new StringTokenizer JavaDoc(str);
234         while (tk.hasMoreTokens()) {
235             String JavaDoc word = tk.nextToken();
236             // System.out.println("\"" + word + "\"");
237
switch (currElement) {
238             case ELEM_CLASSES:
239                 consumer.addClass(word);
240                 break;
241             case ELEM_EXCEPTIONS:
242                 exception.add(word);
243                 exception = normalizeException(exception);
244                 consumer.addException(getExceptionWord(exception),
245                                       (ArrayList JavaDoc)exception.clone());
246                 exception.clear();
247                 break;
248             case ELEM_PATTERNS:
249                 consumer.addPattern(getPattern(word),
250                                     getInterletterValues(word));
251                 break;
252             }
253         }
254     }
255     
256     // PatternConsumer implementation for testing purposes
257
public void addClass(String JavaDoc c) {
258         System.out.println("class: " + c);
259     }
260
261     public void addException(String JavaDoc w, ArrayList JavaDoc e) {
262         System.out.println("exception: " + w + " : " + e.toString());
263     }
264
265     public void addPattern(String JavaDoc p, String JavaDoc v) {
266         System.out.println("pattern: " + p + " : " + v);
267     }
268
269     public static void main(String JavaDoc[] args) throws Exception JavaDoc {
270         try {
271             if (args.length > 0) {
272                 SimplePatternParser pp = new SimplePatternParser();
273                 pp.parse(new FileInputStream JavaDoc(args[0]), pp);
274             }
275         }
276         catch (Exception JavaDoc e) {
277             e.printStackTrace();
278         }
279     }
280 }
281
Popular Tags