1 51 package org.apache.fop.layout.hyphenation; 52 53 import java.io.*; 54 import java.util.ArrayList ; 55 import java.util.HashMap ; 56 57 64 public class HyphenationTree extends TernaryTree implements PatternConsumer, 65 Serializable { 66 67 70 protected ByteVector vspace; 71 72 75 protected HashMap stoplist; 76 77 80 protected TernaryTree classmap; 81 82 85 private transient TernaryTree ivalues; 86 87 public HyphenationTree() { 88 stoplist = new HashMap (23); classmap = new TernaryTree(); 90 vspace = new ByteVector(); 91 vspace.alloc(1); } 93 94 103 protected int packValues(String values) { 104 int i, n = values.length(); 105 int m = (n & 1) == 1 ? (n >> 1) + 2 : (n >> 1) + 1; 106 int offset = vspace.alloc(m); 107 byte[] va = vspace.getArray(); 108 for (i = 0; i < n; i++) { 109 int j = i >> 1; 110 byte v = (byte)((values.charAt(i) - '0' + 1) & 0x0f); 111 if ((i & 1) == 1) 112 va[j + offset] = (byte)(va[j + offset] | v); 113 else 114 va[j + offset] = (byte)(v << 4); } 116 va[m - 1 + offset] = 0; return offset; 118 } 119 120 protected String unpackValues(int k) { 121 StringBuffer buf = new StringBuffer (); 122 byte v = vspace.get(k++); 123 while (v != 0) { 124 char c = (char)((v >>> 4) - 1 + '0'); 125 buf.append(c); 126 c = (char)(v & 0x0f); 127 if (c == 0) 128 break; 129 c = (char)(c - 1 + '0'); 130 buf.append(c); 131 v = vspace.get(k++); 132 } 133 return buf.toString(); 134 } 135 136 139 public void loadPatterns(String filename) throws HyphenationException { 140 PatternParser pp = new PatternParser(this); 141 ivalues = new TernaryTree(); 142 143 pp.parse(filename); 144 145 trimToSize(); 148 vspace.trimToSize(); 149 classmap.trimToSize(); 150 151 ivalues = null; 153 } 154 155 public String findPattern(String pat) { 156 int k = super.find(pat); 157 if (k >= 0) 158 return unpackValues(k); 159 return ""; 160 } 161 162 166 protected int hstrcmp(char[] s, int si, char[] t, int ti) { 167 for (; s[si] == t[ti]; si++, ti++) 168 if (s[si] == 0) 169 return 0; 170 if (t[ti] == 0) 171 return 0; 172 return s[si] - t[ti]; 173 } 174 175 protected byte[] getValues(int k) { 176 StringBuffer buf = new StringBuffer (); 177 byte v = vspace.get(k++); 178 while (v != 0) { 179 char c = (char)((v >>> 4) - 1); 180 buf.append(c); 181 c = (char)(v & 0x0f); 182 if (c == 0) 183 break; 184 c = (char)(c - 1); 185 buf.append(c); 186 v = vspace.get(k++); 187 } 188 byte[] res = new byte[buf.length()]; 189 for (int i = 0; i < res.length; i++) 190 res[i] = (byte)buf.charAt(i); 191 return res; 192 } 193 194 218 protected void searchPatterns(char[] word, int index, byte[] il) { 219 byte[] values; 220 int i = index; 221 char p, q; 222 char sp = word[i]; 223 p = root; 224 225 while (p > 0 && p < sc.length) { 226 if (sc[p] == 0xFFFF) { 227 if (hstrcmp(word, i, kv.getArray(), lo[p]) == 0) { 228 values = getValues(eq[p]); int j = index; 230 for (int k = 0; k < values.length; k++) { 231 if (j < il.length && values[k] > il[j]) 232 il[j] = values[k]; 233 j++; 234 } 235 } 236 return; 237 } 238 int d = sp - sc[p]; 239 if (d == 0) { 240 if (sp == 0) { 241 break; 242 } 243 sp = word[++i]; 244 p = eq[p]; 245 q = p; 246 247 while (q > 0 && q < sc.length) { 250 if (sc[q] == 0xFFFF) { break; 252 } 253 if (sc[q] == 0) { 254 values = getValues(eq[q]); 255 int j = index; 256 for (int k = 0; k < values.length; k++) { 257 if (j < il.length && values[k] > il[j]) { 258 il[j] = values[k]; 259 } 260 j++; 261 } 262 break; 263 } else { 264 q = lo[q]; 265 266 271 } 272 } 273 } else 274 p = d < 0 ? lo[p] : hi[p]; 275 } 276 } 277 278 288 public Hyphenation hyphenate(String word, int remainCharCount, 289 int pushCharCount) { 290 char[] w = word.toCharArray(); 291 return hyphenate(w, 0, w.length, remainCharCount, pushCharCount); 292 } 293 294 306 public Hyphenation hyphenate(char[] w, int offset, int len, 307 int remainCharCount, int pushCharCount) { 308 int i; 309 char[] word = new char[len + 3]; 310 311 char[] c = new char[2]; 313 for (i = 1; i <= len; i++) { 314 c[0] = w[offset + i - 1]; 315 int nc = classmap.find(c, 0); 316 if (nc < 0) { return null; 318 } 319 word[i] = (char)nc; 320 } 321 int[] result = new int[len + 1]; 322 int k = 0; 323 324 String sw = new String (word, 1, len); 326 if (stoplist.containsKey(sw)) { 327 ArrayList hw = (ArrayList )stoplist.get(sw); 329 int j = 0; 330 for (i = 0; i < hw.size(); i++) { 331 Object o = hw.get(i); 332 if (o instanceof String ) { 333 j += ((String )o).length(); 334 if (j >= remainCharCount && j < (len - pushCharCount)) 335 result[k++] = j; 336 } 337 } 338 } else { 339 word[0] = '.'; word[len + 1] = '.'; word[len + 2] = 0; byte[] il = new byte[len + 3]; for (i = 0; i < len + 1; i++) { 345 searchPatterns(word, i, il); 346 } 347 348 for (i = 0; i < len; i++) { 350 if (((il[i + 1] & 1) == 1) && i >= remainCharCount 351 && i <= (len - pushCharCount)) { 352 result[k++] = i; 353 } 354 } 355 } 356 357 358 if (k > 0) { 359 int[] res = new int[k]; 361 System.arraycopy(result, 0, res, 0, k); 362 return new Hyphenation(new String (w, offset, len), res); 363 } else { 364 return null; 365 } 366 } 367 368 380 public void addClass(String chargroup) { 381 if (chargroup.length() > 0) { 382 char equivChar = chargroup.charAt(0); 383 char[] key = new char[2]; 384 key[1] = 0; 385 for (int i = 0; i < chargroup.length(); i++) { 386 key[0] = chargroup.charAt(i); 387 classmap.insert(key, 0, equivChar); 388 } 389 } 390 } 391 392 400 public void addException(String word, ArrayList hyphenatedword) { 401 stoplist.put(word, hyphenatedword); 402 } 403 404 414 public void addPattern(String pattern, String ivalue) { 415 int k = ivalues.find(ivalue); 416 if (k <= 0) { 417 k = packValues(ivalue); 418 ivalues.insert(ivalue, (char)k); 419 } 420 insert(pattern, (char)k); 421 } 422 423 public void printStats() { 424 System.out.println("Value space size = " 425 + Integer.toString(vspace.length())); 426 super.printStats(); 427 428 } 429 430 public static void main(String [] argv) throws Exception { 431 HyphenationTree ht = null; 432 int minCharCount = 2; 433 BufferedReader in = 434 new BufferedReader(new InputStreamReader(System.in)); 435 for (; ; ) { 436 System.out.print("l:\tload patterns from XML\n" 437 + "L:\tload patterns from serialized object\n" 438 + "s:\tset minimun character count\n" 439 + "w:\twrite hyphenation tree to object file\n" 440 + "h:\thyphenate\n" 441 + "f:\tfind pattern\n" 442 + "b:\tbenchmark\n" 443 + "q:\tquit\n" 444 + "\nCommand:"); 445 String token = in.readLine().trim(); 446 if (token.equals("f")) { 447 System.out.print("Pattern: "); 448 token = in.readLine().trim(); 449 System.out.println("Values: " + ht.findPattern(token)); 450 } else if (token.equals("s")) { 451 System.out.print("Minimun value: "); 452 token = in.readLine().trim(); 453 minCharCount = Integer.parseInt(token); 454 } else if (token.equals("l")) { 455 ht = new HyphenationTree(); 456 System.out.print("XML file name: "); 457 token = in.readLine().trim(); 458 ht.loadPatterns(token); 459 } else if (token.equals("L")) { 460 ObjectInputStream ois = null; 461 System.out.print("Object file name: "); 462 token = in.readLine().trim(); 463 try { 464 ois = new ObjectInputStream(new FileInputStream(token)); 465 ht = (HyphenationTree)ois.readObject(); 466 } catch (Exception e) { 467 e.printStackTrace(); 468 } 469 finally { 470 if (ois != null) { 471 try { 472 ois.close(); 473 } catch (IOException e) {} 474 } 475 } 476 } else if (token.equals("w")) { 477 System.out.print("Object file name: "); 478 token = in.readLine().trim(); 479 ObjectOutputStream oos = null; 480 try { 481 oos = new ObjectOutputStream(new FileOutputStream(token)); 482 oos.writeObject(ht); 483 } catch (Exception e) { 484 e.printStackTrace(); 485 } 486 finally { 487 if (oos != null) { 488 try { 489 oos.flush(); 490 } catch (IOException e) {} 491 try { 492 oos.close(); 493 } catch (IOException e) {} 494 } 495 } 496 } else if (token.equals("h")) { 497 System.out.print("Word: "); 498 token = in.readLine().trim(); 499 System.out.print("Hyphenation points: "); 500 System.out.println(ht.hyphenate(token, minCharCount, 501 minCharCount)); 502 } else if (token.equals("b")) { 503 if (ht == null) { 504 System.out.println("No patterns has been loaded."); 505 break; 506 } 507 System.out.print("Word list filename: "); 508 token = in.readLine().trim(); 509 long starttime = 0; 510 int counter = 0; 511 ; 512 try { 513 BufferedReader reader = 514 new BufferedReader(new FileReader(token)); 515 String line; 516 517 starttime = System.currentTimeMillis(); 518 while ((line = reader.readLine()) != null) { 519 Hyphenation hyp = ht.hyphenate(line, minCharCount, 521 minCharCount); 522 if (hyp != null) { 523 String hword = hyp.toString(); 524 } else { 527 } 529 counter++; 530 } 531 } catch (Exception ioe) { 532 System.out.println("Exception " + ioe); 533 ioe.printStackTrace(); 534 } 535 long endtime = System.currentTimeMillis(); 536 long result = endtime - starttime; 537 System.out.println(counter + " words in " + result 538 + " Millisekunden hyphenated"); 539 540 } else if (token.equals("q")) 541 break; 542 } 543 544 } 545 546 } 547 | Popular Tags |