KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > fop > layout > hyphenation > PatternParser


1 /*
2  * $Id: PatternParser.java,v 1.4.2.3 2003/02/25 14:07:11 jeremias Exp $
3  * ============================================================================
4  * The Apache Software License, Version 1.1
5  * ============================================================================
6  *
7  * Copyright (C) 1999-2003 The Apache Software Foundation. All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without modifica-
10  * tion, are permitted provided that the following conditions are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * 3. The end-user documentation included with the redistribution, if any, must
20  * include the following acknowledgment: "This product includes software
21  * developed by the Apache Software Foundation (http://www.apache.org/)."
22  * Alternately, this acknowledgment may appear in the software itself, if
23  * and wherever such third-party acknowledgments normally appear.
24  *
25  * 4. The names "FOP" and "Apache Software Foundation" must not be used to
26  * endorse or promote products derived from this software without prior
27  * written permission. For written permission, please contact
28  * apache@apache.org.
29  *
30  * 5. Products derived from this software may not be called "Apache", nor may
31  * "Apache" appear in their name, without prior written permission of the
32  * Apache Software Foundation.
33  *
34  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
35  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
36  * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
37  * APACHE SOFTWARE FOUNDATION OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
38  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLU-
39  * DING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
40  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
41  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
42  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
43  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44  * ============================================================================
45  *
46  * This software consists of voluntary contributions made by many individuals
47  * on behalf of the Apache Software Foundation and was originally created by
48  * James Tauber <jtauber@jtauber.com>. For more information on the Apache
49  * Software Foundation, please see <http://www.apache.org/>.
50  */

51 package org.apache.fop.layout.hyphenation;
52
53 // SAX
54
import org.xml.sax.XMLReader JavaDoc;
55 import org.xml.sax.InputSource JavaDoc;
56 import org.xml.sax.SAXException JavaDoc;
57 import org.xml.sax.SAXParseException JavaDoc;
58 import org.xml.sax.helpers.DefaultHandler JavaDoc;
59 import org.xml.sax.Attributes JavaDoc;
60
61 // Java
62
import java.io.File JavaDoc;
63 import java.io.IOException JavaDoc;
64 import java.util.ArrayList JavaDoc;
65 import java.net.URL JavaDoc;
66
67 /**
68  * A SAX document handler to read and parse hyphenation patterns
69  * from a XML file.
70  *
71  * @author Carlos Villegas <cav@uniscope.co.jp>
72  */

73 public class PatternParser extends DefaultHandler JavaDoc implements PatternConsumer {
74
75     XMLReader JavaDoc parser;
76     int currElement;
77     PatternConsumer consumer;
78     StringBuffer JavaDoc token;
79     ArrayList JavaDoc exception;
80     char hyphenChar;
81     String JavaDoc errMsg;
82
83     static final int ELEM_CLASSES = 1;
84     static final int ELEM_EXCEPTIONS = 2;
85     static final int ELEM_PATTERNS = 3;
86     static final int ELEM_HYPHEN = 4;
87
88     public PatternParser() throws HyphenationException {
89         token = new StringBuffer JavaDoc();
90         parser = createParser();
91         parser.setContentHandler(this);
92         parser.setErrorHandler(this);
93         hyphenChar = '-'; // default
94

95     }
96
97     public PatternParser(PatternConsumer consumer)
98             throws HyphenationException {
99         this();
100         this.consumer = consumer;
101     }
102
103     public void setConsumer(PatternConsumer consumer) {
104         this.consumer = consumer;
105     }
106
107     public void parse(String JavaDoc filename) throws HyphenationException {
108         InputSource JavaDoc uri = fileInputSource(filename);
109
110         try {
111             parser.parse(uri);
112         } catch (SAXException JavaDoc e) {
113             throw new HyphenationException(errMsg);
114         } catch (IOException JavaDoc e) {
115             throw new HyphenationException(e.getMessage());
116         } catch (NullPointerException JavaDoc e) {
117             throw new HyphenationException("SAX parser not available");
118         }
119     }
120
121     /**
122      * creates a SAX parser, using the value of org.xml.sax.parser
123      * defaulting to org.apache.xerces.parsers.SAXParser
124      *
125      * @return the created SAX parser
126      */

127     static XMLReader JavaDoc createParser() throws HyphenationException {
128         String JavaDoc parserClassName = System.getProperty("org.xml.sax.parser");
129         if (parserClassName == null) {
130             parserClassName = "org.apache.xerces.parsers.SAXParser";
131         }
132         // System.out.println("using SAX parser " + parserClassName);
133

134         try {
135             return (XMLReader JavaDoc)Class.forName(parserClassName).newInstance();
136         } catch (ClassNotFoundException JavaDoc e) {
137             throw new HyphenationException("Could not find "
138                                            + parserClassName);
139         } catch (InstantiationException JavaDoc e) {
140             throw new HyphenationException("Could not instantiate "
141                                            + parserClassName);
142         } catch (IllegalAccessException JavaDoc e) {
143             throw new HyphenationException("Could not access "
144                                            + parserClassName);
145         } catch (ClassCastException JavaDoc e) {
146             throw new HyphenationException(parserClassName
147                                            + " is not a SAX driver");
148         }
149     }
150
151     /**
152      * create an InputSource from a file name
153      *
154      * @param filename the name of the file
155      * @return the InputSource created
156      */

157     protected static InputSource JavaDoc fileInputSource(String JavaDoc filename)
158             throws HyphenationException {
159
160         /* this code adapted from James Clark's in XT */
161         File JavaDoc file = new File JavaDoc(filename);
162         String JavaDoc path = file.getAbsolutePath();
163         String JavaDoc fSep = System.getProperty("file.separator");
164         if (fSep != null && fSep.length() == 1)
165             path = path.replace(fSep.charAt(0), '/');
166         if (path.length() > 0 && path.charAt(0) != '/')
167             path = '/' + path;
168         try {
169             return new InputSource JavaDoc(new URL JavaDoc("file", null, path).toString());
170         } catch (java.net.MalformedURLException JavaDoc e) {
171             throw new HyphenationException("unexpected MalformedURLException");
172         }
173     }
174
175     protected String JavaDoc readToken(StringBuffer JavaDoc chars) {
176         String JavaDoc word;
177         boolean space = false;
178         int i;
179         for (i = 0; i < chars.length(); i++)
180             if (Character.isWhitespace(chars.charAt(i)))
181                 space = true;
182             else
183                 break;
184         if (space) {
185             // chars.delete(0,i);
186
for (int countr = i; countr < chars.length(); countr++)
187                 chars.setCharAt(countr - i, chars.charAt(countr));
188             chars.setLength(chars.length() - i);
189             if (token.length() > 0) {
190                 word = token.toString();
191                 token.setLength(0);
192                 return word;
193             }
194         }
195         space = false;
196         for (i = 0; i < chars.length(); i++) {
197             if (Character.isWhitespace(chars.charAt(i))) {
198                 space = true;
199                 break;
200             }
201         }
202         token.append(chars.toString().substring(0, i));
203         // chars.delete(0,i);
204
for (int countr = i; countr < chars.length(); countr++)
205             chars.setCharAt(countr - i, chars.charAt(countr));
206         chars.setLength(chars.length() - i);
207         if (space) {
208             word = token.toString();
209             token.setLength(0);
210             return word;
211         }
212         token.append(chars);
213         return null;
214     }
215
216     protected static String JavaDoc getPattern(String JavaDoc word) {
217         StringBuffer JavaDoc pat = new StringBuffer JavaDoc();
218         int len = word.length();
219         for (int i = 0; i < len; i++)
220             if (!Character.isDigit(word.charAt(i)))
221                 pat.append(word.charAt(i));
222         return pat.toString();
223     }
224
225     protected ArrayList JavaDoc normalizeException(ArrayList JavaDoc ex) {
226         ArrayList JavaDoc res = new ArrayList JavaDoc();
227         for (int i = 0; i < ex.size(); i++) {
228             Object JavaDoc item = ex.get(i);
229             if (item instanceof String JavaDoc) {
230                 String JavaDoc str = (String JavaDoc)item;
231                 StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
232                 for (int j = 0; j < str.length(); j++) {
233                     char c = str.charAt(j);
234                     if (c != hyphenChar)
235                         buf.append(c);
236                     else {
237                         res.add(buf.toString());
238                         buf.setLength(0);
239                         char[] h = new char[1];
240                         h[0] = hyphenChar;
241                         // we use here hyphenChar which is not necessarily
242
// the one to be printed
243
res.add(new Hyphen(new String JavaDoc(h), null, null));
244                     }
245                 }
246                 if (buf.length() > 0)
247                     res.add(buf.toString());
248             } else
249                 res.add(item);
250         }
251         return res;
252     }
253
254     protected String JavaDoc getExceptionWord(ArrayList JavaDoc ex) {
255         StringBuffer JavaDoc res = new StringBuffer JavaDoc();
256         for (int i = 0; i < ex.size(); i++) {
257             Object JavaDoc item = ex.get(i);
258             if (item instanceof String JavaDoc)
259                 res.append((String JavaDoc)item);
260             else {
261                 if (((Hyphen)item).noBreak != null)
262                     res.append(((Hyphen)item).noBreak);
263             }
264         }
265         return res.toString();
266     }
267
268     protected static String JavaDoc getInterletterValues(String JavaDoc pat) {
269         StringBuffer JavaDoc il = new StringBuffer JavaDoc();
270         String JavaDoc word = pat + "a"; // add dummy letter to serve as sentinel
271
int len = word.length();
272         for (int i = 0; i < len; i++) {
273             char c = word.charAt(i);
274             if (Character.isDigit(c)) {
275                 il.append(c);
276                 i++;
277             } else
278                 il.append('0');
279         }
280         return il.toString();
281     }
282
283     //
284
// DocumentHandler methods
285
//
286

287     /**
288      * Start element.
289      */

290     public void startElement(String JavaDoc uri, String JavaDoc local, String JavaDoc raw,
291                              Attributes JavaDoc attrs) {
292         if (local.equals("hyphen-char")) {
293             String JavaDoc h = attrs.getValue("value");
294             if (h != null && h.length() == 1)
295                 hyphenChar = h.charAt(0);
296         } else if (local.equals("classes"))
297             currElement = ELEM_CLASSES;
298         else if (local.equals("patterns"))
299             currElement = ELEM_PATTERNS;
300         else if (local.equals("exceptions")) {
301             currElement = ELEM_EXCEPTIONS;
302             exception = new ArrayList JavaDoc();
303         } else if (local.equals("hyphen")) {
304             if (token.length() > 0) {
305                 exception.add(token.toString());
306             }
307             exception.add(new Hyphen(attrs.getValue("pre"),
308                                      attrs.getValue("no"),
309                                      attrs.getValue("post")));
310             currElement = ELEM_HYPHEN;
311         }
312         token.setLength(0);
313     }
314
315     public void endElement(String JavaDoc uri, String JavaDoc local, String JavaDoc raw) {
316
317         if (token.length() > 0) {
318             String JavaDoc word = token.toString();
319             switch (currElement) {
320             case ELEM_CLASSES:
321                 consumer.addClass(word);
322                 break;
323             case ELEM_EXCEPTIONS:
324                 exception.add(word);
325                 exception = normalizeException(exception);
326                 consumer.addException(getExceptionWord(exception),
327                                       (ArrayList JavaDoc)exception.clone());
328                 break;
329             case ELEM_PATTERNS:
330                 consumer.addPattern(getPattern(word),
331                                     getInterletterValues(word));
332                 break;
333             case ELEM_HYPHEN:
334                 // nothing to do
335
break;
336             }
337             if (currElement != ELEM_HYPHEN)
338                 token.setLength(0);
339         }
340         if (currElement == ELEM_HYPHEN)
341             currElement = ELEM_EXCEPTIONS;
342         else
343             currElement = 0;
344
345     }
346
347     /**
348      * Characters.
349      */

350     public void characters(char ch[], int start, int length) {
351         StringBuffer JavaDoc chars = new StringBuffer JavaDoc(length);
352         chars.append(ch, start, length);
353         String JavaDoc word = readToken(chars);
354         while (word != null) {
355             // System.out.println("\"" + word + "\"");
356
switch (currElement) {
357             case ELEM_CLASSES:
358                 consumer.addClass(word);
359                 break;
360             case ELEM_EXCEPTIONS:
361                 exception.add(word);
362                 exception = normalizeException(exception);
363                 consumer.addException(getExceptionWord(exception),
364                                       (ArrayList JavaDoc)exception.clone());
365                 exception.clear();
366                 break;
367             case ELEM_PATTERNS:
368                 consumer.addPattern(getPattern(word),
369                                     getInterletterValues(word));
370                 break;
371             }
372             word = readToken(chars);
373         }
374
375     }
376
377     //
378
// ErrorHandler methods
379
//
380

381     /**
382      * Warning.
383      */

384     public void warning(SAXParseException JavaDoc ex) {
385         errMsg = "[Warning] " + getLocationString(ex) + ": "
386                  + ex.getMessage();
387     }
388
389     /**
390      * Error.
391      */

392     public void error(SAXParseException JavaDoc ex) {
393         errMsg = "[Error] " + getLocationString(ex) + ": " + ex.getMessage();
394     }
395
396     /**
397      * Fatal error.
398      */

399     public void fatalError(SAXParseException JavaDoc ex) throws SAXException JavaDoc {
400         errMsg = "[Fatal Error] " + getLocationString(ex) + ": "
401                  + ex.getMessage();
402         throw ex;
403     }
404
405     /**
406      * Returns a string of the location.
407      */

408     private String JavaDoc getLocationString(SAXParseException JavaDoc ex) {
409         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
410
411         String JavaDoc systemId = ex.getSystemId();
412         if (systemId != null) {
413             int index = systemId.lastIndexOf('/');
414             if (index != -1)
415                 systemId = systemId.substring(index + 1);
416             str.append(systemId);
417         }
418         str.append(':');
419         str.append(ex.getLineNumber());
420         str.append(':');
421         str.append(ex.getColumnNumber());
422
423         return str.toString();
424
425     } // getLocationString(SAXParseException):String
426

427
428     // PatternConsumer implementation for testing purposes
429
public void addClass(String JavaDoc c) {
430         System.out.println("class: " + c);
431     }
432
433     public void addException(String JavaDoc w, ArrayList JavaDoc e) {
434         System.out.println("exception: " + w + " : " + e.toString());
435     }
436
437     public void addPattern(String JavaDoc p, String JavaDoc v) {
438         System.out.println("pattern: " + p + " : " + v);
439     }
440
441     public static void main(String JavaDoc[] args) throws Exception JavaDoc {
442         if (args.length > 0) {
443             PatternParser pp = new PatternParser();
444             pp.setConsumer(pp);
445             pp.parse(args[0]);
446         }
447     }
448
449 }
450
Popular Tags