| 1 18 19 package org.apache.batik.gvt.flow; 20 21 import java.text.AttributedCharacterIterator ; 22 import java.text.AttributedString ; 23 import java.util.HashSet ; 24 import java.util.Set ; 25 26 import org.apache.batik.gvt.text.GVTAttributedCharacterIterator; 27 28 public class TextLineBreaks { 29 public static final AttributedCharacterIterator.Attribute WORD_LIMIT = 30 new GVTAttributedCharacterIterator.TextAttribute("WORD_LIMIT"); 31 32 public static final AttributedCharacterIterator.Attribute FLOW_PARAGRAPH 33 = GVTAttributedCharacterIterator.TextAttribute.FLOW_PARAGRAPH; 34 35 public static final AttributedCharacterIterator.Attribute FLOW_LINE_BREAK 36 = GVTAttributedCharacterIterator.TextAttribute.FLOW_LINE_BREAK; 37 38 static Set lineBrks = new HashSet (); 39 40 static { 41 lineBrks.add(FLOW_PARAGRAPH); 42 lineBrks.add(FLOW_LINE_BREAK); 43 } 44 45 static int findComplexBreak(AttributedCharacterIterator aci) 47 { 48 int cnt = 0; 49 for(char ch = aci.current(); 50 ch == AttributedCharacterIterator.DONE; 51 ch = aci.next(), cnt++) { 52 53 57 if (getCharCharClass(ch) != CHAR_CLASS_SA) 58 break; 59 } 60 return cnt; 61 } 62 63 public static void findLineBrk(AttributedString as) { 66 AttributedCharacterIterator aci = as.getIterator(); 67 if (aci.getEndIndex() == 0) 68 return; 69 char ch = aci.current(), prevCh = (char)-1; 70 byte cls = getCharCharClass(ch); 71 byte curCls = cls; 72 byte prevCls = cls; 73 byte prevPrevCls = -1; 74 int wordCnt = 0; 75 int wordBegin = aci.getBeginIndex(); 76 int ich = wordBegin+1; 78 int lineEnd = aci.getRunLimit(lineBrks); 79 80 if (cls >= CHAR_CLASS_CM) cls = CHAR_CLASS_AL; 81 82 for (ch = aci.next(); 83 ch != AttributedCharacterIterator.DONE; 84 ich++, prevCh = ch, ch = aci.next(), 85 prevPrevCls = prevCls, prevCls = curCls) { 86 87 if (ich == lineEnd) { 88 as.addAttribute(WORD_LIMIT, new Integer (wordCnt++), 89 wordBegin, ich); 90 wordBegin = ich; 91 92 cls = getCharCharClass(ch); 93 curCls = cls; 94 prevCls = cls; 95 if (cls >= CHAR_CLASS_CM) cls = CHAR_CLASS_AL; 96 97 lineEnd = aci.getRunLimit(lineBrks); 98 continue; 99 } 100 curCls = getCharCharClass(ch); 102 if (curCls == CHAR_CLASS_SP) { 103 continue; 105 } 106 107 if (curCls == CHAR_CLASS_SA) { 109 ich += findComplexBreak(aci); 110 ch = aci.previous(); 111 if (ch != AttributedCharacterIterator.DONE) 112 prevCls = getCharCharClass(ch); 113 ch = aci.next(); 114 if (ch != AttributedCharacterIterator.DONE) 115 curCls = cls = getCharCharClass(ch); 116 continue; 117 } 118 119 if ((ch == CHAR_ZERO_WIDTH_JOINER) || 122 (prevCh == CHAR_ZERO_WIDTH_JOINER)) 123 continue; 125 if (curCls == CHAR_CLASS_CM) { 127 if (prevCls == CHAR_CLASS_SP) { 128 cls = CHAR_CLASS_ID; 129 if (prevPrevCls != -1) { 130 if (brkPairs[prevPrevCls][CHAR_CLASS_ID] == 131 BREAK_ACTION_DIRECT) { 132 as.addAttribute(WORD_LIMIT, new Integer (wordCnt++), 133 wordBegin, ich-1); 134 wordBegin = ich-1; 135 } else { 137 } 139 } 140 } 141 continue; 143 } 144 145 byte brk = brkPairs[cls][curCls]; 147 148 if (brk == BREAK_ACTION_DIRECT) { 149 as.addAttribute(WORD_LIMIT, new Integer (wordCnt++), 150 wordBegin, ich); 151 wordBegin = ich; 152 } else if (brk == BREAK_ACTION_INDIRECT) { 154 if (prevCls == CHAR_CLASS_SP) { 155 as.addAttribute(WORD_LIMIT, new Integer (wordCnt++), 156 wordBegin, ich); 157 wordBegin = ich; 158 } 159 } 163 cls = curCls; 164 } 165 166 as.addAttribute(WORD_LIMIT, new Integer (wordCnt++), 168 wordBegin, ich); 169 wordBegin = ich; 170 172 return; 173 } 174 175 public static byte[] stringToLineBreakClasses(String s) { 176 int len = s.length(); 177 byte[] ret = new byte[len]; 178 for (int i=0; i<len; i++) { 179 ret[i] = getCharCharClass(s.charAt(i)); 180 } 181 return ret; 182 } 183 184 public static byte getCharCharClass(char ch) { 185 if (ch < QUICK_LUT_SIZE) { 186 if (quickLut == null) buildQuickLut(); 187 return quickLut[ch]; 188 } 189 int len = raw_data.length; 190 int l = 0; 191 int r = (len/2)-1; 192 int entry = (l+r)/2; 193 while(l <= r) { 194 char min = raw_data[2*entry]; 195 char max = raw_data[2*entry+1]; 196 if (ch < min) r = entry-1; 197 else if (ch > max) l = entry+1; 198 else break; 199 entry = (l+r)/2; 200 } 201 return raw_classes[entry]; 202 } 203 204 public final static char CHAR_ZERO_WIDTH_JOINER = 0x200D; 205 206 protected final static int QUICK_LUT_SIZE = 256; 207 208 protected static void buildQuickLut() { 209 int entry = 0; 210 quickLut = new byte[QUICK_LUT_SIZE]; 211 int i=0; 212 while (i<QUICK_LUT_SIZE) { 213 int max = raw_data[2*entry+1]; 214 byte cls = raw_classes[entry]; 215 while (i<=max) { 216 quickLut[i] = cls; 217 i++; 218 if (i>=QUICK_LUT_SIZE) break; 219 } 220 entry++; 221 } 222 } 223 224 final public static byte BREAK_ACTION_DIRECT = 0; 226 final public static byte BREAK_ACTION_INDIRECT = 1; 228 final public static byte BREAK_ACTION_PROHIBITED = 2; 230 231 final public static String [] brkStrs = { "DB", "IB", "PB" }; 232 233 final public static byte CHAR_CLASS_OP = 0; 235 final public static byte CHAR_CLASS_CL = 1; 236 final public static byte CHAR_CLASS_QU = 2; 237 final public static byte CHAR_CLASS_GL = 3; 238 final public static byte CHAR_CLASS_NS = 4; 239 final public static byte CHAR_CLASS_EX = 5; 240 final public static byte CHAR_CLASS_SY = 6; 241 final public static byte CHAR_CLASS_IS = 7; 242 final public static byte CHAR_CLASS_PR = 8; 243 final public static byte CHAR_CLASS_PO = 9; 244 final public static byte CHAR_CLASS_NU = 10; 245 final public static byte CHAR_CLASS_AL = 11; 246 final public static byte CHAR_CLASS_ID = 12; 247 final public static byte CHAR_CLASS_IN = 13; 248 final public static byte CHAR_CLASS_HY = 14; 249 final public static byte CHAR_CLASS_BA = 15; 250 final public static byte CHAR_CLASS_BB = 16; 251 final public static byte CHAR_CLASS_B2 = 17; 252 final public static byte CHAR_CLASS_ZW = 18; 253 final public static byte CHAR_CLASS_CM = 19; 254 255 final public static byte CHAR_CLASS_SA = 20; 256 final public static byte CHAR_CLASS_SP = 21; 257 final public static byte CHAR_CLASS_BK = 22; 258 final public static byte CHAR_CLASS_AI = 23; 259 final public static byte CHAR_CLASS_CR = 24; 260 final public static byte CHAR_CLASS_LF = 25; 261 final public static byte CHAR_CLASS_SG = 26; 262 final public static byte CHAR_CLASS_XX = 27; 263 final public static byte CHAR_CLASS_CB = 28; 264 265 final public static String [] clsStrs = { 266 "OP", "CL", "QU", "GL", "NS", "EX", "SY", "IS", "PR", "PO", 267 "NU", "AL", "ID", "IN", "HY", "BA", "BB", "B2", "ZW", "CM", 268 "SA", "SP", "BK", "AI", "CR", "LF", "SG", "XX", "CB" }; 269 270 static byte [][]brkPairs = 271 { { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 }, 274 { 0, 2, 1, 1, 2, 2, 2, 2, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 2, 1 }, 277 { 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 }, 280 { 1, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 }, 283 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 1 }, 286 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 1 }, 289 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 1 }, 292 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 1 }, 295 { 1, 2, 1, 1, 1, 2, 2, 2, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 2, 1 }, 298 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 1 }, 301 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 2, 1 }, 304 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 2, 1 }, 307 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 2, 1 }, 310 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 2, 1 }, 313 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 1 }, 316 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 2, 1 }, 319 { 1, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1 }, 322 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 2, 1 }, 325 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1 }, 328 { 0, 2, 1, 1, 1, 2, 2, 2, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 2, 1 }}; 331 332 static byte [] quickLut = null; 333 334 final static char [] raw_data = { 335 0x0000, 0x0008, 336 0x0009, 0x0009, 337 0x000A, 0x000A, 338 0x000B, 0x000B, 339 0x000C, 0x000C, 340 0x000D, 0x000D, 341 0x000E, 0x001F, 342 0x0020, 0x0020, 343 0x0021, 0x0021, 344 0x0022, 0x0022, 345 0x0023, 0x0023, 346 0x0024, 0x0024, 347 0x0025, 0x0025, 348 0x0026, 0x0026, 349 0x0027, 0x0027, 350 0x0028, 0x0028, 351 0x0029, 0x0029, 352 0x002A, 0x002A, 353 0x002B, 0x002B, 354 0x002C, 0x002C, 355 0x002D, 0x002D, 356 0x002E, 0x002E, 357 0x002F, 0x002F, 358 0x0030, 0x0039, 359 0x003A, 0x003B, 360 0x003C, 0x003E, 361 0x003F, 0x003F, 362 0x0040, 0x005A, 363 0x005B, 0x005B, 364 0x005C, 0x005C, 365 0x005D, 0x005D, 366 0x005E, 0x007A, 367 0x007B, 0x007B, 368 0x007C, 0x007C, 369 0x007D, 0x007D, 370 0x007E, 0x007E, 371 0x007F, 0x009F, 372 0x00A0, 0x00A0, 373 0x00A1, 0x00A1, 374 0x00A2, 0x00A2, 375 0x00A3, 0x00A5, 376 0x00A6, 0x00A6, 377 0x00A7, 0x00A8, 378 0x00A9, 0x00A9, 379 0x00AA, 0x00AA, 380 0x00AB, 0x00AB, 381 0x00AC, 0x00AC, 382 0x00AD, 0x00AD, 383 0x00AE, 0x00AF, 384 0x00B0, 0x00B0, 385 0x00B1, 0x00B1, 386 0x00B2, 0x00B3, 387 0x00B4, 0x00B4, 388 0x00B5, 0x00B5, 389 0x00B6, 0x00BA, 390 0x00BB, 0x00BB, 391 0x00BC, 0x00BF, 392 0x00C0, 0x00C5, 393 0x00C6, 0x00C6, 394 0x00C7, 0x00CF, 395 0x00D0, 0x00D0, 396 0x00D1, 0x00D6, 397 0x00D7, 0x00D8, 398 0x00D9, 0x00DD, 399 0x00DE, 0x00E1, 400 0x00E2, 0x00E5, 401 0x00E6, 0x00E6, 402 0x00E7, 0x00E7, 403 0x00E8, 0x00EA, 404 0x00EB, 0x00EB, 405 0x00EC, 0x00ED, 406 0x00EE, 0x00EF, 407 0x00F0, 0x00F0, 408 0x00F1, 0x00F1, 409 0x00F2, 0x00F3, 410 0x00F4, 0x00F6, 411 0x00F7, 0x00FA, 412 0x00FB, 0x00FB, 413 0x00FC, 0x00FC, 414 0x00FD, 0x00FD, 415 0x00FE, 0x00FE, 416 0x00FF, 0x0100, 417 0x0101, 0x0101, 418 0x0102, 0x0110, 419 0x0111, 0x0111, 420 0x0112, 0x0112, 421 0x0113, 0x0113, 422 0x0114, 0x011A, 423 0x011B, 0x011B, 424 0x011C, 0x0125, 425 0x0126, 0x0127, 426 0x0128, 0x012A, 427 0x012B, 0x012B, 428 0x012C, 0x0130, 429 0x0131, 0x0133, 430 0x0134, 0x0137, 431 0x0138, 0x0138, 432 0x0139, 0x013E, 433 0x013F, 0x0142, 434 0x0143, 0x0143, 435 0x0144, 0x0144, 436 0x0145, 0x0147, 437 0x0148, 0x014A, 438 0x014B, 0x014C, 439 0x014D, 0x014D, 440 0x014E, 0x0151, 441 0x0152, 0x0153, 442 0x0154, 0x0165, 443 0x0166, 0x0167, 444 0x0168, 0x016A, 445 0x016B, 0x016B, 446 0x016C, 0x01CD, 447 0x01CE, 0x01CE, 448 0x01CF, 0x01CF, 449 0x01D0, 0x01D0, 450 0x01D1, 0x01D1, 451 0x01D2, 0x01D2, 452 0x01D3, 0x01D3, 453 0x01D4, 0x01D4, 454 0x01D5, 0x01D5, 455 0x01D6, 0x01D6, 456 0x01D7, 0x01D7, 457 0x01D8, 0x01D8, 458 0x01D9, 0x01D9, 459 0x01DA, 0x01DA, 460 0x01DB, 0x01DB, 461 0x01DC, 0x01DC, 462 0x01DD, 0x0250, 463 0x0251, 0x0251, 464 0x0252, 0x0260, 465 0x0261, 0x0261, 466 0x0262, 0x02C6, 467 0x02C7, 0x02C7, 468 0x02C8, 0x02C8, 469 0x02C9, 0x02CB, 470 0x02CC, 0x02CC, 471 0x02CD, 0x02CD, 472 0x02CE, 0x02CF, 473 0x02D0, 0x02D0, 474 0x02D1, 0x02D7, 475 0x02D8, 0x02DB, 476 0x02DC, 0x02DC, 477 0x02DD, 0x02DD, 478 0x02DE, 0x02EE, 479 0x0300, 0x036F, 480 0x0374, 0x0390, 481 0x0391, 0x03A9, 482 0x03AA, 0x03B0, 483 0x03B1, 0x03C1, 484 0x03C2, 0x03C2, 485 0x03C3, 0x03C9, 486 0x03CA, 0x0400, 487 0x0401, 0x0401, 488 0x0402, 0x040F, 489 0x0410, 0x044F, 490 0x0450, 0x0450, 491 0x0451, 0x0451, 492 0x0452, 0x0482, 493 0x0483, 0x0489, 494 0x048A, 0x0587, 495 0x0589, 0x0589, 496 0x058A, 0x058A, 497 0x0591, 0x05BD, 498 0x05BE, 0x05BE, 499 0x05BF, 0x05BF, 500 0x05C0, 0x05C0, 501 0x05C1, 0x05C2, 502 0x05C3, 0x05C3, 503 0x05C4, 0x05C4, 504 0x05D0, 0x064A, 505 0x064B, 0x0655, 506 0x0660, 0x0669, 507 0x066A, 0x066F, 508 0x0670, 0x0670, 509 0x0671, 0x06D5, 510 0x06D6, 0x06E4, 511 0x06E5, 0x06E6, 512 0x06E7, 0x06E8, 513 0x06E9, 0x06E9, 514 0x06EA, 0x06ED, 515 0x06F0, 0x06F9, 516 0x06FA, 0x070D, 517 0x070F, 0x070F, 518 0x0710, 0x0710, 519 0x0711, 0x0711, 520 0x0712, 0x072C, 521 0x0730, 0x074A, 522 0x0780, 0x07A5, 523 0x07A6, 0x07B0, 524 0x07B1, 0x07B1, 525 0x0901, 0x0903, 526 0x0905, 0x0939, 527 0x093C, 0x093C, 528 0x093D, 0x093D, 529 0x093E, 0x094D, 530 0x0950, 0x0950, 531 0x0951, 0x0954, 532 0x0958, 0x0961, 533 0x0962, 0x0963, 534 0x0964, 0x0965, 535 0x0966, 0x096F, 536 0x0970, 0x0970, 537 0x0981, 0x0983, 538 0x0985, 0x09B9, 539 0x09BC, 0x09D7, 540 0x09DC, 0x09E1, 541 0x09E2, 0x09E3, 542 0x09E6, 0x09EF, 543 0x09F0, 0x09F1, 544 0x09F2, 0x09F3, 545 0x09F4, 0x09FA, 546 0x0A02, 0x0A02, 547 0x0A05, 0x0A39, 548 0x0A3C, 0x0A4D, 549 0x0A59, 0x0A5E, 550 0x0A66, 0x0A6F, 551 0x0A70, 0x0A71, 552 0x0A72, 0x0A74, 553 0x0A81, 0x0A83, 554 0x0A85, 0x0AB9, 555 0x0ABC, 0x0ABC, 556 0x0ABD, 0x0ABD, 557 0x0ABE, 0x0ACD, 558 0x0AD0, 0x0AE0, 559 0x0AE6, 0x0AEF, 560 0x0B01, 0x0B03, 561 0x0B05, 0x0B39, 562 0x0B3C, 0x0B3C, 563 0x0B3D, 0x0B3D, 564 0x0B3E, 0x0B57, 565 0x0B5C, 0x0B61, 566 0x0B66, 0x0B6F, 567 0x0B70, 0x0B70, 568 0x0B82, 0x0B82, 569 0x0B83, 0x0BB9, 570 0x0BBE, 0x0BD7, 571 0x0BE7, 0x0BEF, 572 0x0BF0, 0x0BF2, 573 0x0C01, 0x0C03, 574 0x0C05, 0x0C39, 575 0x0C3E, 0x0C56, 576 0x0C60, 0x0C61, 577 0x0C66, 0x0C6F, 578 0x0C82, 0x0C83, 579 0x0C85, 0x0CB9, 580 0x0CBE, 0x0CD6, 581 0x0CDE, 0x0CE1, 582 0x0CE6, 0x0CEF, 583 0x0D02, 0x0D03, 584 0x0D05, 0x0D39, 585 0x0D3E, 0x0D57, 586 0x0D60, 0x0D61, 587 0x0D66, 0x0D6F, 588 0x0D82, 0x0D83, 589 0x0D85, 0x0DC6, 590 0x0DCA, 0x0DF3, 591 0x0DF4, 0x0DF4, 592 0x0E01, 0x0E30, 593 0x0E31, 0x0E31, 594 0x0E32, 0x0E33, 595 0x0E34, 0x0E3A, 596 0x0E3F, 0x0E3F, 597 0x0E40, 0x0E46, 598 0x0E47, 0x0E4E, 599 0x0E4F, 0x0E4F, 600 0x0E50, 0x0E59, 601 0x0E5A, 0x0E5B, 602 0x0E81, 0x0EB0, 603 0x0EB1, 0x0EB1, 604 0x0EB2, 0x0EB3, 605 0x0EB4, 0x0EBC, 606 0x0EBD, 0x0EC6, 607 0x0EC8, 0x0ECD, 608 0x0ED0, 0x0ED9, 609 0x0EDC, 0x0EDD, 610 0x0F00, 0x0F0A, 611 0x0F0B, 0x0F0B, 612 0x0F0C, 0x0F0C, 613 0x0F0D, 0x0F17, 614 0x0F18, 0x0F19, 615 0x0F1A, 0x0F1F, 616 0x0F20, 0x0F29, 617 0x0F2A, 0x0F34, 618 0x0F35, 0x0F35, 619 0x0F36, 0x0F36, 620 0x0F37, 0x0F37, 621 0x0F38, 0x0F38, 622 0x0F39, 0x0F39, 623 0x0F3A, 0x0F3A, 624 0x0F3B, 0x0F3B, 625 0x0F3C, 0x0F3C, 626 0x0F3D, 0x0F3D, 627 0x0F3E, 0x0F3F, 628 0x0F40, 0x0F6A, 629 0x0F71, 0x0F84, 630 0x0F85, 0x0F85, 631 0x0F86, 0x0F87, 632 0x0F88, 0x0F8B, 633 0x0F90, 0x0FBC, 634 0x0FBE, 0x0FC5, 635 0x0FC6, 0x0FC6, 636 0x0FC7, 0x0FCF, 637 0x1000, 0x102A, 638 0x102C, 0x1039, 639 0x1040, 0x1049, 640 0x104A, 0x104F, 641 0x1050, 0x1055, 642 0x1056, 0x1059, 643 0x10A0, 0x10FB, 644 0x1100, 0x115F, 645 0x1160, 0x11F9, 646 0x1200, 0x135A, 647 0x1361, 0x1361, 648 0x1362, 0x1368, 649 0x1369, 0x1371, 650 0x1372, 0x1676, 651 0x1680, 0x1680, 652 0x1681, 0x169A, 653 0x169B, 0x169B, 654 0x169C, 0x169C, 655 0x16A0, 0x1711, 656 0x1712, 0x1714, 657 0x1720, 0x1731, 658 0x1732, 0x1734, 659 0x1735, 0x1751, 660 0x1752, 0x1753, 661 0x1760, 0x1770, 662 0x1772, 0x1773, 663 0x1780, 0x17B3, 664 0x17B4, 0x17D3, 665 0x17D4, 0x17D4, 666 0x17D5, 0x17D5, 667 0x17D6, 0x17DA, 668 0x17DB, 0x17DB |