KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > fop > hyphenation > PatternParser


1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18 /* $Id: PatternParser.java 426576 2006-07-28 15:44:37Z jeremias $ */
19  
20 package org.apache.fop.hyphenation;
21
22 // SAX
23
import org.xml.sax.XMLReader JavaDoc;
24 import org.xml.sax.InputSource JavaDoc;
25 import org.xml.sax.SAXException JavaDoc;
26 import org.xml.sax.SAXParseException JavaDoc;
27 import org.xml.sax.helpers.DefaultHandler JavaDoc;
28 import org.xml.sax.Attributes JavaDoc;
29
30 // Java
31
import java.io.File JavaDoc;
32 import java.io.FileNotFoundException JavaDoc;
33 import java.io.IOException JavaDoc;
34 import java.net.MalformedURLException JavaDoc;
35 import java.util.ArrayList JavaDoc;
36
37 import javax.xml.parsers.SAXParserFactory JavaDoc;
38
39 /**
40  * A SAX document handler to read and parse hyphenation patterns
41  * from a XML file.
42  *
43  * @author Carlos Villegas <cav@uniscope.co.jp>
44  */

45 public class PatternParser extends DefaultHandler JavaDoc implements PatternConsumer {
46
47     XMLReader JavaDoc parser;
48     int currElement;
49     PatternConsumer consumer;
50     StringBuffer JavaDoc token;
51     ArrayList JavaDoc exception;
52     char hyphenChar;
53     String JavaDoc errMsg;
54
55     static final int ELEM_CLASSES = 1;
56     static final int ELEM_EXCEPTIONS = 2;
57     static final int ELEM_PATTERNS = 3;
58     static final int ELEM_HYPHEN = 4;
59
60     public PatternParser() throws HyphenationException {
61         token = new StringBuffer JavaDoc();
62         parser = createParser();
63         parser.setContentHandler(this);
64         parser.setErrorHandler(this);
65         hyphenChar = '-'; // default
66

67     }
68
69     public PatternParser(PatternConsumer consumer)
70             throws HyphenationException {
71         this();
72         this.consumer = consumer;
73     }
74
75     public void setConsumer(PatternConsumer consumer) {
76         this.consumer = consumer;
77     }
78
79     /**
80      * Parses a hyphenation pattern file.
81      * @param filename the filename
82      * @throws HyphenationException In case of an exception while parsing
83      */

84     public void parse(String JavaDoc filename) throws HyphenationException {
85         parse(new File JavaDoc(filename));
86     }
87     
88     /**
89      * Parses a hyphenation pattern file.
90      * @param file the pattern file
91      * @throws HyphenationException In case of an exception while parsing
92      */

93     public void parse(File JavaDoc file) throws HyphenationException {
94         try {
95             InputSource JavaDoc src = new InputSource JavaDoc(file.toURL().toExternalForm());
96             parse(src);
97         } catch (MalformedURLException JavaDoc e) {
98             throw new HyphenationException("Error converting the File '" + file + "' to a URL: "
99                     + e.getMessage());
100         }
101     }
102
103     /**
104      * Parses a hyphenation pattern file.
105      * @param source the InputSource for the file
106      * @throws HyphenationException In case of an exception while parsing
107      */

108     public void parse(InputSource JavaDoc source) throws HyphenationException {
109         try {
110             parser.parse(source);
111         } catch (FileNotFoundException JavaDoc fnfe) {
112             throw new HyphenationException("File not found: " + fnfe.getMessage());
113         } catch (IOException JavaDoc ioe) {
114             throw new HyphenationException(ioe.getMessage());
115         } catch (SAXException JavaDoc e) {
116             throw new HyphenationException(errMsg);
117         }
118     }
119     
120     /**
121      * Creates a SAX parser using JAXP
122      * @return the created SAX parser
123      */

124     static XMLReader JavaDoc createParser() {
125         try {
126             SAXParserFactory JavaDoc factory = SAXParserFactory.newInstance();
127             factory.setNamespaceAware(true);
128             return factory.newSAXParser().getXMLReader();
129         } catch (Exception JavaDoc e) {
130             throw new RuntimeException JavaDoc("Couldn't create XMLReader: " + e.getMessage());
131         }
132     }
133
134     protected String JavaDoc readToken(StringBuffer JavaDoc chars) {
135         String JavaDoc word;
136         boolean space = false;
137         int i;
138         for (i = 0; i < chars.length(); i++) {
139             if (Character.isWhitespace(chars.charAt(i))) {
140                 space = true;
141             } else {
142                 break;
143             }
144         }
145         if (space) {
146             // chars.delete(0,i);
147
for (int countr = i; countr < chars.length(); countr++) {
148                 chars.setCharAt(countr - i, chars.charAt(countr));
149             }
150             chars.setLength(chars.length() - i);
151             if (token.length() > 0) {
152                 word = token.toString();
153                 token.setLength(0);
154                 return word;
155             }
156         }
157         space = false;
158         for (i = 0; i < chars.length(); i++) {
159             if (Character.isWhitespace(chars.charAt(i))) {
160                 space = true;
161                 break;
162             }
163         }
164         token.append(chars.toString().substring(0, i));
165         // chars.delete(0,i);
166
for (int countr = i; countr < chars.length(); countr++) {
167             chars.setCharAt(countr - i, chars.charAt(countr));
168         }
169         chars.setLength(chars.length() - i);
170         if (space) {
171             word = token.toString();
172             token.setLength(0);
173             return word;
174         }
175         token.append(chars);
176         return null;
177     }
178
179     protected static String JavaDoc getPattern(String JavaDoc word) {
180         StringBuffer JavaDoc pat = new StringBuffer JavaDoc();
181         int len = word.length();
182         for (int i = 0; i < len; i++) {
183             if (!Character.isDigit(word.charAt(i))) {
184                 pat.append(word.charAt(i));
185             }
186         }
187         return pat.toString();
188     }
189
190     protected ArrayList JavaDoc normalizeException(ArrayList JavaDoc ex) {
191         ArrayList JavaDoc res = new ArrayList JavaDoc();
192         for (int i = 0; i < ex.size(); i++) {
193             Object JavaDoc item = ex.get(i);
194             if (item instanceof String JavaDoc) {
195                 String JavaDoc str = (String JavaDoc)item;
196                 StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
197                 for (int j = 0; j < str.length(); j++) {
198                     char c = str.charAt(j);
199                     if (c != hyphenChar) {
200                         buf.append(c);
201                     } else {
202                         res.add(buf.toString());
203                         buf.setLength(0);
204                         char[] h = new char[1];
205                         h[0] = hyphenChar;
206                         // we use here hyphenChar which is not necessarily
207
// the one to be printed
208
res.add(new Hyphen(new String JavaDoc(h), null, null));
209                     }
210                 }
211                 if (buf.length() > 0) {
212                     res.add(buf.toString());
213                 }
214             } else {
215                 res.add(item);
216             }
217         }
218         return res;
219     }
220
221     protected String JavaDoc getExceptionWord(ArrayList JavaDoc ex) {
222         StringBuffer JavaDoc res = new StringBuffer JavaDoc();
223         for (int i = 0; i < ex.size(); i++) {
224             Object JavaDoc item = ex.get(i);
225             if (item instanceof String JavaDoc) {
226                 res.append((String JavaDoc)item);
227             } else {
228                 if (((Hyphen)item).noBreak != null) {
229                     res.append(((Hyphen)item).noBreak);
230                 }
231             }
232         }
233         return res.toString();
234     }
235
236     protected static String JavaDoc getInterletterValues(String JavaDoc pat) {
237         StringBuffer JavaDoc il = new StringBuffer JavaDoc();
238         String JavaDoc word = pat + "a"; // add dummy letter to serve as sentinel
239
int len = word.length();
240         for (int i = 0; i < len; i++) {
241             char c = word.charAt(i);
242             if (Character.isDigit(c)) {
243                 il.append(c);
244                 i++;
245             } else {
246                 il.append('0');
247             }
248         }
249         return il.toString();
250     }
251
252     //
253
// ContentHandler methods
254
//
255

256     /**
257      * @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
258      */

259     public void startElement(String JavaDoc uri, String JavaDoc local, String JavaDoc raw,
260                              Attributes JavaDoc attrs) {
261         if (local.equals("hyphen-char")) {
262             String JavaDoc h = attrs.getValue("value");
263             if (h != null && h.length() == 1) {
264                 hyphenChar = h.charAt(0);
265             }
266         } else if (local.equals("classes")) {
267             currElement = ELEM_CLASSES;
268         } else if (local.equals("patterns")) {
269             currElement = ELEM_PATTERNS;
270         } else if (local.equals("exceptions")) {
271             currElement = ELEM_EXCEPTIONS;
272             exception = new ArrayList JavaDoc();
273         } else if (local.equals("hyphen")) {
274             if (token.length() > 0) {
275                 exception.add(token.toString());
276             }
277             exception.add(new Hyphen(attrs.getValue("pre"),
278                                             attrs.getValue("no"),
279                                             attrs.getValue("post")));
280             currElement = ELEM_HYPHEN;
281         }
282         token.setLength(0);
283     }
284
285     /**
286      * @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
287      */

288     public void endElement(String JavaDoc uri, String JavaDoc local, String JavaDoc raw) {
289
290         if (token.length() > 0) {
291             String JavaDoc word = token.toString();
292             switch (currElement) {
293             case ELEM_CLASSES:
294                 consumer.addClass(word);
295                 break;
296             case ELEM_EXCEPTIONS:
297                 exception.add(word);
298                 exception = normalizeException(exception);
299                 consumer.addException(getExceptionWord(exception),
300                                       (ArrayList JavaDoc)exception.clone());
301                 break;
302             case ELEM_PATTERNS:
303                 consumer.addPattern(getPattern(word),
304                                     getInterletterValues(word));
305                 break;
306             case ELEM_HYPHEN:
307                 // nothing to do
308
break;
309             }
310             if (currElement != ELEM_HYPHEN) {
311                 token.setLength(0);
312             }
313         }
314         if (currElement == ELEM_HYPHEN) {
315             currElement = ELEM_EXCEPTIONS;
316         } else {
317             currElement = 0;
318         }
319
320     }
321
322     /**
323      * @see org.xml.sax.ContentHandler#characters(char[], int, int)
324      */

325     public void characters(char ch[], int start, int length) {
326         StringBuffer JavaDoc chars = new StringBuffer JavaDoc(length);
327         chars.append(ch, start, length);
328         String JavaDoc word = readToken(chars);
329         while (word != null) {
330             // System.out.println("\"" + word + "\"");
331
switch (currElement) {
332             case ELEM_CLASSES:
333                 consumer.addClass(word);
334                 break;
335             case ELEM_EXCEPTIONS:
336                 exception.add(word);
337                 exception = normalizeException(exception);
338                 consumer.addException(getExceptionWord(exception),
339                                       (ArrayList JavaDoc)exception.clone());
340                 exception.clear();
341                 break;
342             case ELEM_PATTERNS:
343                 consumer.addPattern(getPattern(word),
344                                     getInterletterValues(word));
345                 break;
346             }
347             word = readToken(chars);
348         }
349
350     }
351
352     //
353
// ErrorHandler methods
354
//
355

356     /**
357      * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException)
358      */

359     public void warning(SAXParseException JavaDoc ex) {
360         errMsg = "[Warning] " + getLocationString(ex) + ": "
361                  + ex.getMessage();
362     }
363
364     /**
365      * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException)
366      */

367     public void error(SAXParseException JavaDoc ex) {
368         errMsg = "[Error] " + getLocationString(ex) + ": " + ex.getMessage();
369     }
370
371     /**
372      * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException)
373      */

374     public void fatalError(SAXParseException JavaDoc ex) throws SAXException JavaDoc {
375         errMsg = "[Fatal Error] " + getLocationString(ex) + ": "
376                  + ex.getMessage();
377         throw ex;
378     }
379
380     /**
381      * Returns a string of the location.
382      */

383     private String JavaDoc getLocationString(SAXParseException JavaDoc ex) {
384         StringBuffer JavaDoc str = new StringBuffer JavaDoc();
385
386         String JavaDoc systemId = ex.getSystemId();
387         if (systemId != null) {
388             int index = systemId.lastIndexOf('/');
389             if (index != -1) {
390                 systemId = systemId.substring(index + 1);
391             }
392             str.append(systemId);
393         }
394         str.append(':');
395         str.append(ex.getLineNumber());
396         str.append(':');
397         str.append(ex.getColumnNumber());
398
399         return str.toString();
400
401     } // getLocationString(SAXParseException):String
402

403
404     // PatternConsumer implementation for testing purposes
405
public void addClass(String JavaDoc c) {
406         System.out.println("class: " + c);
407     }
408
409     public void addException(String JavaDoc w, ArrayList JavaDoc e) {
410         System.out.println("exception: " + w + " : " + e.toString());
411     }
412
413     public void addPattern(String JavaDoc p, String JavaDoc v) {
414         System.out.println("pattern: " + p + " : " + v);
415     }
416
417     public static void main(String JavaDoc[] args) throws Exception JavaDoc {
418         if (args.length > 0) {
419             PatternParser pp = new PatternParser();
420             pp.setConsumer(pp);
421             pp.parse(args[0]);
422         }
423     }
424
425 }
426
Popular Tags