1 47 48 package com.lowagie.text.pdf.hyphenation; 49 50 import java.io.FileInputStream ; 51 import java.io.IOException ; 52 import java.io.InputStream ; 53 import java.util.ArrayList ; 54 import java.util.StringTokenizer ; 55 56 import com.lowagie.text.ExceptionConverter; 57 import com.lowagie.text.xml.simpleparser.SimpleXMLDocHandler; 58 import com.lowagie.text.xml.simpleparser.SimpleXMLParser; 59 60 64 public class SimplePatternParser implements SimpleXMLDocHandler, PatternConsumer { 65 int currElement; 66 PatternConsumer consumer; 67 StringBuffer token; 68 ArrayList exception; 69 char hyphenChar; 70 SimpleXMLParser parser; 71 72 static final int ELEM_CLASSES = 1; 73 static final int ELEM_EXCEPTIONS = 2; 74 static final int ELEM_PATTERNS = 3; 75 static final int ELEM_HYPHEN = 4; 76 77 78 public SimplePatternParser() { 79 token = new StringBuffer (); 80 hyphenChar = '-'; } 82 83 public void parse(InputStream stream, PatternConsumer consumer) { 84 this.consumer = consumer; 85 try { 86 SimpleXMLParser.parse(this, stream); 87 } 88 catch (IOException e) { 89 throw new ExceptionConverter(e); 90 } 91 finally { 92 try{stream.close();}catch(Exception e){} 93 } 94 } 95 96 protected static String getPattern(String word) { 97 StringBuffer pat = new StringBuffer (); 98 int len = word.length(); 99 for (int i = 0; i < len; i++) { 100 if (!Character.isDigit(word.charAt(i))) { 101 pat.append(word.charAt(i)); 102 } 103 } 104 return pat.toString(); 105 } 106 107 protected ArrayList normalizeException(ArrayList ex) { 108 ArrayList res = new ArrayList (); 109 for (int i = 0; i < ex.size(); i++) { 110 Object item = ex.get(i); 111 if (item instanceof String ) { 112 String str = (String )item; 113 StringBuffer buf = new StringBuffer (); 114 for (int j = 0; j < str.length(); j++) { 115 char c = str.charAt(j); 116 if (c != hyphenChar) { 117 buf.append(c); 118 } else { 119 res.add(buf.toString()); 120 buf.setLength(0); 121 char[] h = new char[1]; 122 h[0] = hyphenChar; 123 res.add(new Hyphen(new String (h), null, null)); 126 } 127 } 128 if (buf.length() > 0) { 129 res.add(buf.toString()); 130 } 131 } else { 132 res.add(item); 133 } 134 } 135 return res; 136 } 137 138 protected String getExceptionWord(ArrayList ex) { 139 StringBuffer res = new StringBuffer (); 140 for (int i = 0; i < ex.size(); i++) { 141 Object item = ex.get(i); 142 if (item instanceof String ) { 143 res.append((String )item); 144 } else { 145 if (((Hyphen)item).noBreak != null) { 146 res.append(((Hyphen)item).noBreak); 147 } 148 } 149 } 150 return res.toString(); 151 } 152 153 protected static String getInterletterValues(String pat) { 154 StringBuffer il = new StringBuffer (); 155 String word = pat + "a"; int len = word.length(); 157 for (int i = 0; i < len; i++) { 158 char c = word.charAt(i); 159 if (Character.isDigit(c)) { 160 il.append(c); 161 i++; 162 } else { 163 il.append('0'); 164 } 165 } 166 return il.toString(); 167 } 168 169 public void endDocument() { 170 } 171 172 public void endElement(String tag) { 173 if (token.length() > 0) { 174 String word = token.toString(); 175 switch (currElement) { 176 case ELEM_CLASSES: 177 consumer.addClass(word); 178 break; 179 case ELEM_EXCEPTIONS: 180 exception.add(word); 181 exception = normalizeException(exception); 182 consumer.addException(getExceptionWord(exception), 183 (ArrayList )exception.clone()); 184 break; 185 case ELEM_PATTERNS: 186 consumer.addPattern(getPattern(word), 187 getInterletterValues(word)); 188 break; 189 case ELEM_HYPHEN: 190 break; 192 } 193 if (currElement != ELEM_HYPHEN) { 194 token.setLength(0); 195 } 196 } 197 if (currElement == ELEM_HYPHEN) { 198 currElement = ELEM_EXCEPTIONS; 199 } else { 200 currElement = 0; 201 } 202 } 203 204 public void startDocument() { 205 } 206 207 public void startElement(String tag, java.util.HashMap h) { 208 if (tag.equals("hyphen-char")) { 209 String hh = (String )h.get("value"); 210 if (hh != null && hh.length() == 1) { 211 hyphenChar = hh.charAt(0); 212 } 213 } else if (tag.equals("classes")) { 214 currElement = ELEM_CLASSES; 215 } else if (tag.equals("patterns")) { 216 currElement = ELEM_PATTERNS; 217 } else if (tag.equals("exceptions")) { 218 currElement = ELEM_EXCEPTIONS; 219 exception = new ArrayList (); 220 } else if (tag.equals("hyphen")) { 221 if (token.length() > 0) { 222 exception.add(token.toString()); 223 } 224 exception.add(new Hyphen((String )h.get("pre"), 225 (String )h.get("no"), 226 (String )h.get("post"))); 227 currElement = ELEM_HYPHEN; 228 } 229 token.setLength(0); 230 } 231 232 public void text(String str) { 233 StringTokenizer tk = new StringTokenizer (str); 234 while (tk.hasMoreTokens()) { 235 String word = tk.nextToken(); 236 switch (currElement) { 238 case ELEM_CLASSES: 239 consumer.addClass(word); 240 break; 241 case ELEM_EXCEPTIONS: 242 exception.add(word); 243 exception = normalizeException(exception); 244 consumer.addException(getExceptionWord(exception), 245 (ArrayList )exception.clone()); 246 exception.clear(); 247 break; 248 case ELEM_PATTERNS: 249 consumer.addPattern(getPattern(word), 250 getInterletterValues(word)); 251 break; 252 } 253 } 254 } 255 256 public void addClass(String c) { 258 System.out.println("class: " + c); 259 } 260 261 public void addException(String w, ArrayList e) { 262 System.out.println("exception: " + w + " : " + e.toString()); 263 } 264 265 public void addPattern(String p, String v) { 266 System.out.println("pattern: " + p + " : " + v); 267 } 268 269 public static void main(String [] args) throws Exception { 270 try { 271 if (args.length > 0) { 272 SimplePatternParser pp = new SimplePatternParser(); 273 pp.parse(new FileInputStream (args[0]), pp); 274 } 275 } 276 catch (Exception e) { 277 e.printStackTrace(); 278 } 279 } 280 } 281 | Popular Tags |