1 17 18 19 20 package org.apache.fop.hyphenation; 21 22 import org.xml.sax.XMLReader ; 24 import org.xml.sax.InputSource ; 25 import org.xml.sax.SAXException ; 26 import org.xml.sax.SAXParseException ; 27 import org.xml.sax.helpers.DefaultHandler ; 28 import org.xml.sax.Attributes ; 29 30 import java.io.File ; 32 import java.io.FileNotFoundException ; 33 import java.io.IOException ; 34 import java.net.MalformedURLException ; 35 import java.util.ArrayList ; 36 37 import javax.xml.parsers.SAXParserFactory ; 38 39 45 public class PatternParser extends DefaultHandler implements PatternConsumer { 46 47 XMLReader parser; 48 int currElement; 49 PatternConsumer consumer; 50 StringBuffer token; 51 ArrayList exception; 52 char hyphenChar; 53 String errMsg; 54 55 static final int ELEM_CLASSES = 1; 56 static final int ELEM_EXCEPTIONS = 2; 57 static final int ELEM_PATTERNS = 3; 58 static final int ELEM_HYPHEN = 4; 59 60 public PatternParser() throws HyphenationException { 61 token = new StringBuffer (); 62 parser = createParser(); 63 parser.setContentHandler(this); 64 parser.setErrorHandler(this); 65 hyphenChar = '-'; 67 } 68 69 public PatternParser(PatternConsumer consumer) 70 throws HyphenationException { 71 this(); 72 this.consumer = consumer; 73 } 74 75 public void setConsumer(PatternConsumer consumer) { 76 this.consumer = consumer; 77 } 78 79 84 public void parse(String filename) throws HyphenationException { 85 parse(new File (filename)); 86 } 87 88 93 public void parse(File file) throws HyphenationException { 94 try { 95 InputSource src = new InputSource (file.toURL().toExternalForm()); 96 parse(src); 97 } catch (MalformedURLException e) { 98 throw new HyphenationException("Error converting the File '" + file + "' to a URL: " 99 + e.getMessage()); 100 } 101 } 102 103 108 public void parse(InputSource source) throws HyphenationException { 109 try { 110 parser.parse(source); 111 } catch (FileNotFoundException fnfe) { 112 throw new HyphenationException("File not found: " + fnfe.getMessage()); 113 } catch (IOException ioe) { 114 throw new HyphenationException(ioe.getMessage()); 115 } catch (SAXException e) { 116 throw new HyphenationException(errMsg); 117 } 118 } 119 120 124 static XMLReader createParser() { 125 try { 126 SAXParserFactory factory = SAXParserFactory.newInstance(); 127 factory.setNamespaceAware(true); 128 return factory.newSAXParser().getXMLReader(); 129 } catch (Exception e) { 130 throw new RuntimeException ("Couldn't create XMLReader: " + e.getMessage()); 131 } 132 } 133 134 protected String readToken(StringBuffer chars) { 135 String word; 136 boolean space = false; 137 int i; 138 for (i = 0; i < chars.length(); i++) { 139 if (Character.isWhitespace(chars.charAt(i))) { 140 space = true; 141 } else { 142 break; 143 } 144 } 145 if (space) { 146 for (int countr = i; countr < chars.length(); countr++) { 148 chars.setCharAt(countr - i, chars.charAt(countr)); 149 } 150 chars.setLength(chars.length() - i); 151 if (token.length() > 0) { 152 word = token.toString(); 153 token.setLength(0); 154 return word; 155 } 156 } 157 space = false; 158 for (i = 0; i < chars.length(); i++) { 159 if (Character.isWhitespace(chars.charAt(i))) { 160 space = true; 161 break; 162 } 163 } 164 token.append(chars.toString().substring(0, i)); 165 for (int countr = i; countr < chars.length(); countr++) { 167 chars.setCharAt(countr - i, chars.charAt(countr)); 168 } 169 chars.setLength(chars.length() - i); 170 if (space) { 171 word = token.toString(); 172 token.setLength(0); 173 return word; 174 } 175 token.append(chars); 176 return null; 177 } 178 179 protected static String getPattern(String word) { 180 StringBuffer pat = new StringBuffer (); 181 int len = word.length(); 182 for (int i = 0; i < len; i++) { 183 if (!Character.isDigit(word.charAt(i))) { 184 pat.append(word.charAt(i)); 185 } 186 } 187 return pat.toString(); 188 } 189 190 protected ArrayList normalizeException(ArrayList ex) { 191 ArrayList res = new ArrayList (); 192 for (int i = 0; i < ex.size(); i++) { 193 Object item = ex.get(i); 194 if (item instanceof String ) { 195 String str = (String )item; 196 StringBuffer buf = new StringBuffer (); 197 for (int j = 0; j < str.length(); j++) { 198 char c = str.charAt(j); 199 if (c != hyphenChar) { 200 buf.append(c); 201 } else { 202 res.add(buf.toString()); 203 buf.setLength(0); 204 char[] h = new char[1]; 205 h[0] = hyphenChar; 206 res.add(new Hyphen(new String (h), null, null)); 209 } 210 } 211 if (buf.length() > 0) { 212 res.add(buf.toString()); 213 } 214 } else { 215 res.add(item); 216 } 217 } 218 return res; 219 } 220 221 protected String getExceptionWord(ArrayList ex) { 222 StringBuffer res = new StringBuffer (); 223 for (int i = 0; i < ex.size(); i++) { 224 Object item = ex.get(i); 225 if (item instanceof String ) { 226 res.append((String )item); 227 } else { 228 if (((Hyphen)item).noBreak != null) { 229 res.append(((Hyphen)item).noBreak); 230 } 231 } 232 } 233 return res.toString(); 234 } 235 236 protected static String getInterletterValues(String pat) { 237 StringBuffer il = new StringBuffer (); 238 String word = pat + "a"; int len = word.length(); 240 for (int i = 0; i < len; i++) { 241 char c = word.charAt(i); 242 if (Character.isDigit(c)) { 243 il.append(c); 244 i++; 245 } else { 246 il.append('0'); 247 } 248 } 249 return il.toString(); 250 } 251 252 256 259 public void startElement(String uri, String local, String raw, 260 Attributes attrs) { 261 if (local.equals("hyphen-char")) { 262 String h = attrs.getValue("value"); 263 if (h != null && h.length() == 1) { 264 hyphenChar = h.charAt(0); 265 } 266 } else if (local.equals("classes")) { 267 currElement = ELEM_CLASSES; 268 } else if (local.equals("patterns")) { 269 currElement = ELEM_PATTERNS; 270 } else if (local.equals("exceptions")) { 271 currElement = ELEM_EXCEPTIONS; 272 exception = new ArrayList (); 273 } else if (local.equals("hyphen")) { 274 if (token.length() > 0) { 275 exception.add(token.toString()); 276 } 277 exception.add(new Hyphen(attrs.getValue("pre"), 278 attrs.getValue("no"), 279 attrs.getValue("post"))); 280 currElement = ELEM_HYPHEN; 281 } 282 token.setLength(0); 283 } 284 285 288 public void endElement(String uri, String local, String raw) { 289 290 if (token.length() > 0) { 291 String word = token.toString(); 292 switch (currElement) { 293 case ELEM_CLASSES: 294 consumer.addClass(word); 295 break; 296 case ELEM_EXCEPTIONS: 297 exception.add(word); 298 exception = normalizeException(exception); 299 consumer.addException(getExceptionWord(exception), 300 (ArrayList )exception.clone()); 301 break; 302 case ELEM_PATTERNS: 303 consumer.addPattern(getPattern(word), 304 getInterletterValues(word)); 305 break; 306 case ELEM_HYPHEN: 307 break; 309 } 310 if (currElement != ELEM_HYPHEN) { 311 token.setLength(0); 312 } 313 } 314 if (currElement == ELEM_HYPHEN) { 315 currElement = ELEM_EXCEPTIONS; 316 } else { 317 currElement = 0; 318 } 319 320 } 321 322 325 public void characters(char ch[], int start, int length) { 326 StringBuffer chars = new StringBuffer (length); 327 chars.append(ch, start, length); 328 String word = readToken(chars); 329 while (word != null) { 330 switch (currElement) { 332 case ELEM_CLASSES: 333 consumer.addClass(word); 334 break; 335 case ELEM_EXCEPTIONS: 336 exception.add(word); 337 exception = normalizeException(exception); 338 consumer.addException(getExceptionWord(exception), 339 (ArrayList )exception.clone()); 340 exception.clear(); 341 break; 342 case ELEM_PATTERNS: 343 consumer.addPattern(getPattern(word), 344 getInterletterValues(word)); 345 break; 346 } 347 word = readToken(chars); 348 } 349 350 } 351 352 356 359 public void warning(SAXParseException ex) { 360 errMsg = "[Warning] " + getLocationString(ex) + ": " 361 + ex.getMessage(); 362 } 363 364 367 public void error(SAXParseException ex) { 368 errMsg = "[Error] " + getLocationString(ex) + ": " + ex.getMessage(); 369 } 370 371 374 public void fatalError(SAXParseException ex) throws SAXException { 375 errMsg = "[Fatal Error] " + getLocationString(ex) + ": " 376 + ex.getMessage(); 377 throw ex; 378 } 379 380 383 private String getLocationString(SAXParseException ex) { 384 StringBuffer str = new StringBuffer (); 385 386 String systemId = ex.getSystemId(); 387 if (systemId != null) { 388 int index = systemId.lastIndexOf('/'); 389 if (index != -1) { 390 systemId = systemId.substring(index + 1); 391 } 392 str.append(systemId); 393 } 394 str.append(':'); 395 str.append(ex.getLineNumber()); 396 str.append(':'); 397 str.append(ex.getColumnNumber()); 398 399 return str.toString(); 400 401 } 403 404 public void addClass(String c) { 406 System.out.println("class: " + c); 407 } 408 409 public void addException(String w, ArrayList e) { 410 System.out.println("exception: " + w + " : " + e.toString()); 411 } 412 413 public void addPattern(String p, String v) { 414 System.out.println("pattern: " + p + " : " + v); 415 } 416 417 public static void main(String [] args) throws Exception { 418 if (args.length > 0) { 419 PatternParser pp = new PatternParser(); 420 pp.setConsumer(pp); 421 pp.parse(args[0]); 422 } 423 } 424 425 } 426 | Popular Tags |