KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > lowagie > text > pdf > BidiOrder


1 package com.lowagie.text.pdf;
2
3 /*
4  * Copyright 2003 Paulo Soares
5  *
6  * The contents of this file are subject to the Mozilla Public License Version 1.1
7  * (the "License"); you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at http://www.mozilla.org/MPL/
9  *
10  * Software distributed under the License is distributed on an "AS IS" basis,
11  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12  * for the specific language governing rights and limitations under the License.
13  *
14  * The Original Code is 'iText, a free JAVA-PDF library'.
15  *
16  * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
17  * the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
18  * All Rights Reserved.
19  * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
20  * are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
21  *
22  * Contributor(s): all the names of the contributors are added in the source code
23  * where applicable.
24  *
25  * Alternatively, the contents of this file may be used under the terms of the
26  * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
27  * provisions of LGPL are applicable instead of those above. If you wish to
28  * allow use of your version of this file only under the terms of the LGPL
29  * License and not to allow others to use your version of this file under
30  * the MPL, indicate your decision by deleting the provisions above and
31  * replace them with the notice and other provisions required by the LGPL.
32  * If you do not delete the provisions above, a recipient may use your version
33  * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
34  *
35  * This library is free software; you can redistribute it and/or modify it
36  * under the terms of the MPL as stated above or under the terms of the GNU
37  * Library General Public License as published by the Free Software Foundation;
38  * either version 2 of the License, or any later version.
39  *
40  * This library is distributed in the hope that it will be useful, but WITHOUT
41  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
42  * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
43  * details.
44  *
45  * If you didn't download this code from the following link, you should check if
46  * you aren't using an obsolete version:
47  * http://www.lowagie.com/iText/
48  */

49
50 /*
51  * (C) Copyright IBM Corp. 1999, All Rights Reserved
52  *
53  * version 1.1
54  */

55
56 /*
57  * As stated in the Javadoc comments below, materials from Unicode.org
58  * are used in this class. The following license applies to these materials:
59  * http://www.unicode.org/copyright.html#Exhibit1
60  *
61  * EXHIBIT 1
62  * UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
63  *
64  * Unicode Data Files include all data files under the directories
65  * http://www.unicode.org/Public/, http://www.unicode.org/reports/,
66  * and http://www.unicode.org/cldr/data/ .
67  * Unicode Software includes any source code published in the Unicode Standard
68  * or under the directories http://www.unicode.org/Public/, http://www.unicode.org/reports/,
69  * and http://www.unicode.org/cldr/data/.
70  *
71  * NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING,
72  * INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S DATA FILES ("DATA FILES"),
73  * AND/OR SOFTWARE ("SOFTWARE"), YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY,
74  * ALL OF THE TERMS AND CONDITIONS OF THIS AGREEMENT. IF YOU DO NOT AGREE, DO NOT
75  * DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE THE DATA FILES OR SOFTWARE.
76  *
77  * COPYRIGHT AND PERMISSION NOTICE
78  * Copyright (C) 1991-2007 Unicode, Inc. All rights reserved. Distributed under
79  * the Terms of Use in http://www.unicode.org/copyright.html.
80  *
81  * Permission is hereby granted, free of charge, to any person obtaining a copy
82  * of the Unicode data files and any associated documentation (the "Data Files")
83  * or Unicode software and any associated documentation (the "Software") to deal
84  * in the Data Files or Software without restriction, including without limitation
85  * the rights to use, copy, modify, merge, publish, distribute, and/or sell copies
86  * of the Data Files or Software, and to permit persons to whom the Data Files
87  * or Software are furnished to do so, provided that (a) the above copyright
88  * notice(s) and this permission notice appear with all copies of the Data Files
89  * or Software, (b) both the above copyright notice(s) and this permission notice
90  * appear in associated documentation, and (c) there is clear notice in each
91  * modified Data File or in the Software as well as in the documentation associated
92  * with the Data File(s) or Software that the data or software has been modified.
93  *
94  * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
95  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
96  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
97  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE
98  * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY
99  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
100  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
101  * CONNECTION WITH THE USE OR PERFORMANCE OF THE DATA FILES OR SOFTWARE.
102  *
103  * Except as contained in this notice, the name of a copyright holder shall not
104  * be used in advertising or otherwise to promote the sale, use or other dealings
105  * in these Data Files or Software without prior written authorization of the
106  * copyright holder.
107  */

108
109 /**
110  * Reference implementation of the Unicode 3.0 Bidi algorithm.
111  *
112  * <p>
113  * This implementation is not optimized for performance. It is intended
114  * as a reference implementation that closely follows the specification
115  * of the Bidirectional Algorithm in The Unicode Standard version 3.0.
116  * <p>
117  * <b>Input:</b><br>
118  * There are two levels of input to the algorithm, since clients may prefer
119  * to supply some information from out-of-band sources rather than relying on
120  * the default behavior.
121  * <ol>
122  * <li>unicode type array
123  * <li>unicode type array, with externally supplied base line direction
124  * </ol>
125  * <p><b>Output:</b><br>
126  * Output is separated into several stages as well, to better enable clients
127  * to evaluate various aspects of implementation conformance.
128  * <ol>
129  * <li>levels array over entire paragraph
130  * <li>reordering array over entire paragraph
131  * <li>levels array over line
132  * <li>reordering array over line
133  * </ol>
134  * Note that for conformance, algorithms are only required to generate correct
135  * reordering and character directionality (odd or even levels) over a line.
136  * Generating identical level arrays over a line is not required. Bidi
137  * explicit format codes (LRE, RLE, LRO, RLO, PDF) and BN can be assigned
138  * arbitrary levels and positions as long as the other text matches.
139  * <p>
140  * As the algorithm is defined to operate on a single paragraph at a time,
141  * this implementation is written to handle single paragraphs. Thus
142  * rule P1 is presumed by this implementation-- the data provided to the
143  * implementation is assumed to be a single paragraph, and either contains no
144  * 'B' codes, or a single 'B' code at the end of the input. 'B' is allowed
145  * as input to illustrate how the algorithm assigns it a level.
146  * <p>
147  * Also note that rules L3 and L4 depend on the rendering engine that uses
148  * the result of the bidi algorithm. This implementation assumes that the
149  * rendering engine expects combining marks in visual order (e.g. to the
150  * left of their base character in RTL runs) and that it adjust the glyphs
151  * used to render mirrored characters that are in RTL runs so that they
152  * render appropriately.
153  *
154  * @author Doug Felt
155  */

156
157 public final class BidiOrder {
158     private byte[] initialTypes;
159     private byte[] embeddings; // generated from processing format codes
160
private byte paragraphEmbeddingLevel = -1; // undefined
161

162     private int textLength; // for convenience
163
private byte[] resultTypes; // for paragraph, not lines
164
private byte[] resultLevels; // for paragraph, not lines
165

166     // The bidi types
167

168     /** Left-to-right*/
169     public static final byte L = 0;
170     
171     /** Left-to-Right Embedding */
172     public static final byte LRE = 1;
173     
174     /** Left-to-Right Override */
175     public static final byte LRO = 2;
176     
177     /** Right-to-Left */
178     public static final byte R = 3;
179     
180     /** Right-to-Left Arabic */
181     public static final byte AL = 4;
182     
183     /** Right-to-Left Embedding */
184     public static final byte RLE = 5;
185     
186     /** Right-to-Left Override */
187     public static final byte RLO = 6;
188     
189     /** Pop Directional Format */
190     public static final byte PDF = 7;
191     
192     /** European Number */
193     public static final byte EN = 8;
194     
195     /** European Number Separator */
196     public static final byte ES = 9;
197     
198     /** European Number Terminator */
199     public static final byte ET = 10;
200     
201     /** Arabic Number */
202     public static final byte AN = 11;
203     
204     /** Common Number Separator */
205     public static final byte CS = 12;
206     
207     /** Non-Spacing Mark */
208     public static final byte NSM = 13;
209     
210     /** Boundary Neutral */
211     public static final byte BN = 14;
212     
213     /** Paragraph Separator */
214     public static final byte B = 15;
215     
216     /** Segment Separator */
217     public static final byte S = 16;
218     
219     /** Whitespace */
220     public static final byte WS = 17;
221     
222     /** Other Neutrals */
223     public static final byte ON = 18;
224     
225     /** Minimum bidi type value. */
226     public static final byte TYPE_MIN = 0;
227     
228     /** Maximum bidi type value. */
229     public static final byte TYPE_MAX = 18;
230     
231     //
232
// Input
233
//
234

235     /**
236      * Initialize using an array of direction types. Types range from TYPE_MIN to TYPE_MAX inclusive
237      * and represent the direction codes of the characters in the text.
238      *
239      * @param types the types array
240      */

241     public BidiOrder(byte[] types) {
242         validateTypes(types);
243         
244         this.initialTypes = (byte[])types.clone(); // client type array remains unchanged
245

246         runAlgorithm();
247     }
248     
249     /**
250      * Initialize using an array of direction types and an externally supplied paragraph embedding level.
251      * The embedding level may be -1, 0, or 1. -1 means to apply the default algorithm (rules P2 and P3),
252      * 0 is for LTR paragraphs, and 1 is for RTL paragraphs.
253      *
254      * @param types the types array
255      * @param paragraphEmbeddingLevel the externally supplied paragraph embedding level.
256      */

257     public BidiOrder(byte[] types, byte paragraphEmbeddingLevel) {
258         validateTypes(types);
259         validateParagraphEmbeddingLevel(paragraphEmbeddingLevel);
260         
261         this.initialTypes = (byte[])types.clone(); // client type array remains unchanged
262
this.paragraphEmbeddingLevel = paragraphEmbeddingLevel;
263         
264         runAlgorithm();
265     }
266     
267     public BidiOrder(char text[], int offset, int length, byte paragraphEmbeddingLevel) {
268         initialTypes = new byte[length];
269         for (int k = 0; k < length; ++k) {
270             initialTypes[k] = rtypes[text[offset + k]];
271         }
272         validateParagraphEmbeddingLevel(paragraphEmbeddingLevel);
273         
274         this.paragraphEmbeddingLevel = paragraphEmbeddingLevel;
275         
276         runAlgorithm();
277     }
278     
279     public final static byte getDirection(char c) {
280         return rtypes[c];
281     }
282     
283     /**
284      * The algorithm.
285      * Does not include line-based processing (Rules L1, L2).
286      * These are applied later in the line-based phase of the algorithm.
287      */

288     private void runAlgorithm() {
289         textLength = initialTypes.length;
290         
291         // Initialize output types.
292
// Result types initialized to input types.
293
resultTypes = (byte[])initialTypes.clone();
294         
295         
296         // 1) determining the paragraph level
297
// Rule P1 is the requirement for entering this algorithm.
298
// Rules P2, P3.
299
// If no externally supplied paragraph embedding level, use default.
300
if (paragraphEmbeddingLevel == -1) {
301             determineParagraphEmbeddingLevel();
302         }
303         
304         // Initialize result levels to paragraph embedding level.
305
resultLevels = new byte[textLength];
306         setLevels(0, textLength, paragraphEmbeddingLevel);
307         
308         // 2) Explicit levels and directions
309
// Rules X1-X8.
310
determineExplicitEmbeddingLevels();
311         
312         // Rule X9.
313
textLength = removeExplicitCodes();
314         
315         // Rule X10.
316
// Run remainder of algorithm one level run at a time
317
byte prevLevel = paragraphEmbeddingLevel;
318         int start = 0;
319         while (start < textLength) {
320             byte level = resultLevels[start];
321             byte prevType = typeForLevel(Math.max(prevLevel, level));
322             
323             int limit = start + 1;
324             while (limit < textLength && resultLevels[limit] == level) {
325                 ++limit;
326             }
327             
328             byte succLevel = limit < textLength ? resultLevels[limit] : paragraphEmbeddingLevel;
329             byte succType = typeForLevel(Math.max(succLevel, level));
330             
331             // 3) resolving weak types
332
// Rules W1-W7.
333
resolveWeakTypes(start, limit, level, prevType, succType);
334             
335             // 4) resolving neutral types
336
// Rules N1-N3.
337
resolveNeutralTypes(start, limit, level, prevType, succType);
338             
339             // 5) resolving implicit embedding levels
340
// Rules I1, I2.
341
resolveImplicitLevels(start, limit, level, prevType, succType);
342             
343             prevLevel = level;
344             start = limit;
345         }
346         
347         // Reinsert explicit codes and assign appropriate levels to 'hide' them.
348
// This is for convenience, so the resulting level array maps 1-1
349
// with the initial array.
350
// See the implementation suggestions section of TR#9 for guidelines on
351
// how to implement the algorithm without removing and reinserting the codes.
352
textLength = reinsertExplicitCodes(textLength);
353     }
354     
355     /**
356      * 1) determining the paragraph level.
357      * <p>
358      * Rules P2, P3.
359      * <p>
360      * At the end of this function, the member variable paragraphEmbeddingLevel is set to either 0 or 1.
361      */

362     private void determineParagraphEmbeddingLevel() {
363         byte strongType = -1; // unknown
364

365         // Rule P2.
366
for (int i = 0; i < textLength; ++i) {
367             byte t = resultTypes[i];
368             if (t == L || t == AL || t == R) {
369                 strongType = t;
370                 break;
371             }
372         }
373         
374         // Rule P3.
375
if (strongType == -1) { // none found
376
// default embedding level when no strong types found is 0.
377
paragraphEmbeddingLevel = 0;
378         } else if (strongType == L) {
379             paragraphEmbeddingLevel = 0;
380         } else { // AL, R
381
paragraphEmbeddingLevel = 1;
382         }
383     }
384     
385     /**
386      * Process embedding format codes.
387      * <p>
388      * Calls processEmbeddings to generate an embedding array from the explicit format codes. The
389      * embedding overrides in the array are then applied to the result types, and the result levels are
390      * initialized.
391      * @see #processEmbeddings
392      */

393     private void determineExplicitEmbeddingLevels() {
394         embeddings = processEmbeddings(resultTypes, paragraphEmbeddingLevel);
395         
396         for (int i = 0; i < textLength; ++i) {
397             byte level = embeddings[i];
398             if ((level & 0x80) != 0) {
399                 level &= 0x7f;
400                 resultTypes[i] = typeForLevel(level);
401             }
402             resultLevels[i] = level;
403         }
404     }
405     
406     /**
407      * Rules X9.
408      * Remove explicit codes so that they may be ignored during the remainder
409      * of the main portion of the algorithm. The length of the resulting text
410      * is returned.
411      * @return the length of the data excluding explicit codes and BN.
412      */

413     private int removeExplicitCodes() {
414         int w = 0;
415         for (int i = 0; i < textLength; ++i) {
416             byte t = initialTypes[i];
417             if (!(t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN)) {
418                 embeddings[w] = embeddings[i];
419                 resultTypes[w] = resultTypes[i];
420                 resultLevels[w] = resultLevels[i];
421                 w++;
422             }
423         }
424         return w; // new textLength while explicit levels are removed
425
}
426     
427     /**
428      * Reinsert levels information for explicit codes.
429      * This is for ease of relating the level information
430      * to the original input data. Note that the levels
431      * assigned to these codes are arbitrary, they're
432      * chosen so as to avoid breaking level runs.
433      * @param textLength the length of the data after compression
434      * @return the length of the data (original length of
435      * types array supplied to constructor)
436      */

437     private int reinsertExplicitCodes(int textLength) {
438         for (int i = initialTypes.length; --i >= 0;) {
439             byte t = initialTypes[i];
440             if (t == LRE || t == RLE || t == LRO || t == RLO || t == PDF || t == BN) {
441                 embeddings[i] = 0;
442                 resultTypes[i] = t;
443                 resultLevels[i] = -1;
444             } else {
445                 --textLength;
446                 embeddings[i] = embeddings[textLength];
447                 resultTypes[i] = resultTypes[textLength];
448                 resultLevels[i] = resultLevels[textLength];
449             }
450         }
451         
452         // now propagate forward the levels information (could have
453
// propagated backward, the main thing is not to introduce a level
454
// break where one doesn't already exist).
455

456         if (resultLevels[0] == -1) {
457             resultLevels[0] = paragraphEmbeddingLevel;
458         }
459         for (int i = 1; i < initialTypes.length; ++i) {
460             if (resultLevels[i] == -1) {
461                 resultLevels[i] = resultLevels[i-1];
462             }
463         }
464         
465         // Embedding information is for informational purposes only
466
// so need not be adjusted.
467

468         return initialTypes.length;
469     }
470     
471     /**
472      * 2) determining explicit levels
473      * Rules X1 - X8
474      *
475      * The interaction of these rules makes handling them a bit complex.
476      * This examines resultTypes but does not modify it. It returns embedding and
477      * override information in the result array. The low 7 bits are the level, the high
478      * bit is set if the level is an override, and clear if it is an embedding.
479      */

480     private static byte[] processEmbeddings(byte[] resultTypes, byte paragraphEmbeddingLevel) {
481         final int EXPLICIT_LEVEL_LIMIT = 62;
482         
483         int textLength = resultTypes.length;
484         byte[] embeddings = new byte[textLength];
485         
486         // This stack will store the embedding levels and override status in a single byte
487
// as described above.
488
byte[] embeddingValueStack = new byte[EXPLICIT_LEVEL_LIMIT];
489         int stackCounter = 0;
490         
491         // An LRE or LRO at level 60 is invalid, since the new level 62 is invalid. But
492
// an RLE at level 60 is valid, since the new level 61 is valid. The current wording
493
// of the rules requires that the RLE remain valid even if a previous LRE is invalid.
494
// This keeps track of ignored LRE or LRO codes at level 60, so that the matching PDFs
495
// will not try to pop the stack.
496
int overflowAlmostCounter = 0;
497         
498         // This keeps track of ignored pushes at level 61 or higher, so that matching PDFs will
499
// not try to pop the stack.
500
int overflowCounter = 0;
501         
502         // Rule X1.
503

504         // Keep the level separate from the value (level | override status flag) for ease of access.
505
byte currentEmbeddingLevel = paragraphEmbeddingLevel;
506         byte currentEmbeddingValue = paragraphEmbeddingLevel;
507         
508         // Loop through types, handling all remaining rules
509
for (int i = 0; i < textLength; ++i) {
510             
511             embeddings[i] = currentEmbeddingValue;
512             
513             byte t = resultTypes[i];
514             
515             // Rules X2, X3, X4, X5
516
switch (t) {
517                 case RLE:
518                 case LRE:
519                 case RLO:
520                 case LRO:
521                     // Only need to compute new level if current level is valid
522
if (overflowCounter == 0) {
523                         byte newLevel;
524                         if (t == RLE || t == RLO) {
525                             newLevel = (byte)((currentEmbeddingLevel + 1) | 1); // least greater odd
526
} else { // t == LRE || t == LRO
527
newLevel = (byte)((currentEmbeddingLevel + 2) & ~1); // least greater even
528
}
529                         
530                         // If the new level is valid, push old embedding level and override status
531
// No check for valid stack counter, since the level check suffices.
532
if (newLevel < EXPLICIT_LEVEL_LIMIT) {
533                             embeddingValueStack[stackCounter] = currentEmbeddingValue;
534                             stackCounter++;
535                             
536                             currentEmbeddingLevel = newLevel;
537                             if (t == LRO || t == RLO) { // override
538
currentEmbeddingValue = (byte)(newLevel | 0x80);
539                             } else {
540                                 currentEmbeddingValue = newLevel;
541                             }
542                             
543                             // Adjust level of format mark (for expositional purposes only, this gets
544
// removed later).
545
embeddings[i] = currentEmbeddingValue;
546                             break;
547                         }
548                         
549                         // Otherwise new level is invalid, but a valid level can still be achieved if this
550
// level is 60 and we encounter an RLE or RLO further on. So record that we
551
// 'almost' overflowed.
552
if (currentEmbeddingLevel == 60) {
553                             overflowAlmostCounter++;
554                             break;
555                         }
556                     }
557                     
558                     // Otherwise old or new level is invalid.
559
overflowCounter++;
560                     break;
561                     
562                 case PDF:
563                     // The only case where this did not actually overflow but may have almost overflowed
564
// is when there was an RLE or RLO on level 60, which would result in level 61. So we
565
// only test the almost overflow condition in that case.
566
//
567
// Also note that there may be a PDF without any pushes at all.
568

569                     if (overflowCounter > 0) {
570                         --overflowCounter;
571                     } else if (overflowAlmostCounter > 0 && currentEmbeddingLevel != 61) {
572                         --overflowAlmostCounter;
573                     } else if (stackCounter > 0) {
574                         --stackCounter;
575                         currentEmbeddingValue = embeddingValueStack[stackCounter];
576                         currentEmbeddingLevel = (byte)(currentEmbeddingValue & 0x7f);
577                     }
578                     break;
579                     
580                 case B:
581                     // Rule X8.
582

583                     // These values are reset for clarity, in this implementation B can only
584
// occur as the last code in the array.
585
stackCounter = 0;
586                     overflowCounter = 0;
587                     overflowAlmostCounter = 0;
588                     currentEmbeddingLevel = paragraphEmbeddingLevel;
589                     currentEmbeddingValue = paragraphEmbeddingLevel;
590                     
591                     embeddings[i] = paragraphEmbeddingLevel;
592                     break;
593                     
594                 default:
595                     break;
596             }
597         }
598         
599         return embeddings;
600     }
601     
602     
603     /**
604      * 3) resolving weak types
605      * Rules W1-W7.
606      *
607      * Note that some weak types (EN, AN) remain after this processing is complete.
608      */

609     private void resolveWeakTypes(int start, int limit, byte level, byte sor, byte eor) {
610         
611         // Rule W1.
612
// Changes all NSMs.
613
byte preceedingCharacterType = sor;
614         for (int i = start; i < limit; ++i) {
615             byte t = resultTypes[i];
616             if (t == NSM) {
617                 resultTypes[i] = preceedingCharacterType;
618             } else {
619                 preceedingCharacterType = t;
620             }
621         }
622         
623         // Rule W2.
624
// EN does not change at the start of the run, because sor != AL.
625
for (int i = start; i < limit; ++i) {
626             if (resultTypes[i] == EN) {
627                 for (int j = i - 1; j >= start; --j) {
628                     byte t = resultTypes[j];
629                     if (t == L || t == R || t == AL) {
630                         if (t == AL) {
631                             resultTypes[i] = AN;
632                         }
633                         break;
634                     }
635                 }
636             }
637         }
638         
639         // Rule W3.
640
for (int i = start; i < limit; ++i) {
641             if (resultTypes[i] == AL) {
642                 resultTypes[i] = R;
643             }
644         }
645         
646         // Rule W4.
647
// Since there must be values on both sides for this rule to have an
648
// effect, the scan skips the first and last value.
649
//
650
// Although the scan proceeds left to right, and changes the type values
651
// in a way that would appear to affect the computations later in the scan,
652
// there is actually no problem. A change in the current value can only
653
// affect the value to its immediate right, and only affect it if it is
654
// ES or CS. But the current value can only change if the value to its
655
// right is not ES or CS. Thus either the current value will not change,
656
// or its change will have no effect on the remainder of the analysis.
657

658         for (int i = start + 1; i < limit - 1; ++i) {
659             if (resultTypes[i] == ES || resultTypes[i] == CS) {
660                 byte prevSepType = resultTypes[i-1];
661                 byte succSepType = resultTypes[i+1];
662                 if (prevSepType == EN && succSepType == EN) {
663                     resultTypes[i] = EN;
664                 } else if (resultTypes[i] == CS && prevSepType == AN && succSepType == AN) {
665                     resultTypes[i] = AN;
666                 }
667             }
668         }
669         
670         // Rule W5.
671
for (int i = start; i < limit; ++i) {
672             if (resultTypes[i] == ET) {
673                 // locate end of sequence
674
int runstart = i;
675                 int runlimit = findRunLimit(runstart, limit, new byte[] { ET });
676                 
677                 // check values at ends of sequence
678
byte t = runstart == start ? sor : resultTypes[runstart - 1];
679                 
680                 if (t != EN) {
681                     t = runlimit == limit ? eor : resultTypes[runlimit];
682                 }
683                 
684                 if (t == EN) {
685                     setTypes(runstart, runlimit, EN);
686                 }
687                 
688                 // continue at end of sequence
689
i = runlimit;
690             }
691         }
692         
693         // Rule W6.
694
for (int i = start; i < limit; ++i) {
695             byte t = resultTypes[i];
696             if (t == ES || t == ET || t == CS) {
697                 resultTypes[i] = ON;
698             }
699         }
700         
701         // Rule W7.
702
for (int i = start; i < limit; ++i) {
703             if (resultTypes[i] == EN) {
704                 // set default if we reach start of run
705
byte prevStrongType = sor;
706                 for (int j = i - 1; j >= start; --j) {
707                     byte t = resultTypes[j];
708                     if (t == L || t == R) { // AL's have been removed
709
prevStrongType = t;
710                         break;
711                     }
712                 }
713                 if (prevStrongType == L) {
714                     resultTypes[i] = L;
715                 }
716             }
717         }
718     }
719     
720     /**
721      * 6) resolving neutral types
722      * Rules N1-N2.
723      */

724     private void resolveNeutralTypes(int start, int limit, byte level, byte sor, byte eor) {
725         
726         for (int i = start; i < limit; ++i) {
727             byte t = resultTypes[i];
728             if (t == WS || t == ON || t == B || t == S) {
729                 // find bounds of run of neutrals
730
int runstart = i;
731                 int runlimit = findRunLimit(runstart, limit, new byte[] {B, S, WS, ON});
732                 
733                 // determine effective types at ends of run
734
byte leadingType;
735                 byte trailingType;
736                 
737                 if (runstart == start) {
738                     leadingType = sor;
739                 } else {
740                     leadingType = resultTypes[runstart - 1];
741                     if (leadingType == L || leadingType == R) {
742                         // found the strong type
743
} else if (leadingType == AN) {
744                         leadingType = R;
745                     } else if (leadingType == EN) {
746                         // Since EN's with previous strong L types have been changed
747
// to L in W7, the leadingType must be R.
748
leadingType = R;
749                     }
750                 }
751                 
752                 if (runlimit == limit) {
753                     trailingType = eor;
754                 } else {
755                     trailingType = resultTypes[runlimit];
756                     if (trailingType == L || trailingType == R) {
757                         // found the strong type
758
} else if (trailingType == AN) {
759                         trailingType = R;
760                     } else if (trailingType == EN) {
761                         trailingType = R;
762                     }
763                 }
764                 
765                 byte resolvedType;
766                 if (leadingType == trailingType) {
767                     // Rule N1.
768
resolvedType = leadingType;
769                 } else {
770                     // Rule N2.
771
// Notice the embedding level of the run is used, not
772
// the paragraph embedding level.
773
resolvedType = typeForLevel(level);
774                 }
775                 
776                 setTypes(runstart, runlimit, resolvedType);
777                 
778                 // skip over run of (former) neutrals
779
i = runlimit;
780             }
781         }
782     }
783     
784     /**
785      * 7) resolving implicit embedding levels
786      * Rules I1, I2.
787      */

788     private void resolveImplicitLevels(int start, int limit, byte level, byte sor, byte eor) {
789         if ((level & 1) == 0) { // even level
790
for (int i = start; i < limit; ++i) {
791                 byte t = resultTypes[i];
792                 // Rule I1.
793
if (t == L ) {
794                     // no change
795
} else if (t == R) {
796                     resultLevels[i] += 1;
797                 } else { // t == AN || t == EN
798
resultLevels[i] += 2;
799                 }
800             }
801         } else { // odd level
802
for (int i = start; i < limit; ++i) {
803                 byte t = resultTypes[i];
804                 // Rule I2.
805
if (t == R) {
806                     // no change
807
} else { // t == L || t == AN || t == EN
808
resultLevels[i] += 1;
809                 }
810             }
811         }
812     }
813     
814     //
815
// Output
816
//
817

818     public byte[] getLevels() {
819         return getLevels(new int[]{textLength});
820     }
821     
822     /**
823      * Return levels array breaking lines at offsets in linebreaks. <br>
824      * Rule L1.
825      * <p>
826      * The returned levels array contains the resolved level for each
827      * bidi code passed to the constructor.
828      * <p>
829      * The linebreaks array must include at least one value.
830      * The values must be in strictly increasing order (no duplicates)
831      * between 1 and the length of the text, inclusive. The last value
832      * must be the length of the text.
833      *
834      * @param linebreaks the offsets at which to break the paragraph
835      * @return the resolved levels of the text
836      */

837     public byte[] getLevels(int[] linebreaks) {
838         
839         // Note that since the previous processing has removed all
840
// P, S, and WS values from resultTypes, the values referred to
841
// in these rules are the initial types, before any processing
842
// has been applied (including processing of overrides).
843
//
844
// This example implementation has reinserted explicit format codes
845
// and BN, in order that the levels array correspond to the
846
// initial text. Their final placement is not normative.
847
// These codes are treated like WS in this implementation,
848
// so they don't interrupt sequences of WS.
849

850         validateLineBreaks(linebreaks, textLength);
851         
852         byte[] result = (byte[])resultLevels.clone(); // will be returned to caller
853

854         // don't worry about linebreaks since if there is a break within
855
// a series of WS values preceeding S, the linebreak itself
856
// causes the reset.
857
for (int i = 0; i < result.length; ++i) {
858             byte t = initialTypes[i];
859             if (t == B || t == S) {
860                 // Rule L1, clauses one and two.
861
result[i] = paragraphEmbeddingLevel;
862                 
863                 // Rule L1, clause three.
864
for (int j = i - 1; j >= 0; --j) {
865                     if (isWhitespace(initialTypes[j])) { // including format codes
866
result[j] = paragraphEmbeddingLevel;
867                     } else {
868                         break;
869                     }
870                 }
871             }
872         }
873         
874         // Rule L1, clause four.
875
int start = 0;
876         for (int i = 0; i < linebreaks.length; ++i) {
877             int limit = linebreaks[i];
878             for (int j = limit - 1; j >= start; --j) {
879                 if (isWhitespace(initialTypes[j])) { // including format codes
880
result[j] = paragraphEmbeddingLevel;
881                 } else {
882                     break;
883                 }
884             }
885             
886             start = limit;
887         }
888         
889         return result;
890     }
891     
892     /**
893      * Return reordering array breaking lines at offsets in linebreaks.
894      * <p>
895      * The reordering array maps from a visual index to a logical index.
896      * Lines are concatenated from left to right. So for example, the
897      * fifth character from the left on the third line is
898      * <pre> getReordering(linebreaks)[linebreaks[1] + 4]</pre>
899      * (linebreaks[1] is the position after the last character of the
900      * second line, which is also the index of the first character on the
901      * third line, and adding four gets the fifth character from the left).
902      * <p>
903      * The linebreaks array must include at least one value.
904      * The values must be in strictly increasing order (no duplicates)
905      * between 1 and the length of the text, inclusive. The last value
906      * must be the length of the text.
907      *
908      * @param linebreaks the offsets at which to break the paragraph.
909      */

910     public int[] getReordering(int[] linebreaks) {
911         validateLineBreaks(linebreaks, textLength);
912         
913         byte[] levels = getLevels(linebreaks);
914         
915         return computeMultilineReordering(levels, linebreaks);
916     }
917     
918     /**
919      * Return multiline reordering array for a given level array.
920      * Reordering does not occur across a line break.
921      */

922     private static int[] computeMultilineReordering(byte[] levels, int[] linebreaks) {
923         int[] result = new int[levels.length];
924         
925         int start = 0;
926         for (int i = 0; i < linebreaks.length; ++i) {
927             int limit = linebreaks[i];
928             
929             byte[] templevels = new byte[limit - start];
930             System.arraycopy(levels, start, templevels, 0, templevels.length);
931             
932             int[] temporder = computeReordering(templevels);
933             for (int j = 0; j < temporder.length; ++j) {
934                 result[start + j] = temporder[j] + start;
935             }
936             
937             start = limit;
938         }
939         
940         return result;
941     }
942     
943     /**
944      * Return reordering array for a given level array. This reorders a single line.
945      * The reordering is a visual to logical map. For example,
946      * the leftmost char is string.charAt(order[0]).
947      * Rule L2.
948      */

949     private static int[] computeReordering(byte[] levels) {
950         int lineLength = levels.length;
951         
952         int[] result = new int[lineLength];
953         
954         // initialize order
955
for (int i = 0; i < lineLength; ++i) {
956             result[i] = i;
957         }
958         
959         // locate highest level found on line.
960
// Note the rules say text, but no reordering across line bounds is performed,
961
// so this is sufficient.
962
byte highestLevel = 0;
963         byte lowestOddLevel = 63;
964         for (int i = 0; i < lineLength; ++i) {
965             byte level = levels[i];
966             if (level > highestLevel) {
967                 highestLevel = level;
968             }
969             if (((level & 1) != 0) && level < lowestOddLevel) {
970                 lowestOddLevel = level;
971             }
972         }
973         
974         for (int level = highestLevel; level >= lowestOddLevel; --level) {
975             for (int i = 0; i < lineLength; ++i) {
976                 if (levels[i] >= level) {
977                     // find range of text at or above this level
978
int start = i;
979                     int limit = i + 1;
980                     while (limit < lineLength && levels[limit] >= level) {
981                         ++limit;
982                     }
983                     
984                     // reverse run
985
for (int j = start, k = limit - 1; j < k; ++j, --k) {
986                         int temp = result[j];
987                         result[j] = result[k];
988                         result[k] = temp;
989                     }
990                     
991                     // skip to end of level run
992
i = limit;
993                 }
994             }
995         }
996         
997         return result;
998     }
999     
1000    /**
1001     * Return the base level of the paragraph.
1002     */

1003    public byte getBaseLevel() {
1004        return paragraphEmbeddingLevel;
1005    }
1006    
1007    // --- internal utilities -------------------------------------------------
1008

1009    /**
1010     * Return true if the type is considered a whitespace type for the line break rules.
1011     */

1012    private static boolean isWhitespace(byte biditype) {
1013        switch (biditype) {
1014            case LRE:
1015            case RLE:
1016            case LRO:
1017            case RLO:
1018            case PDF:
1019            case BN:
1020            case WS:
1021                return true;
1022            default:
1023                return false;
1024        }
1025    }
1026    
1027    /**
1028     * Return the strong type (L or R) corresponding to the level.
1029     */

1030    private static byte typeForLevel(int level) {
1031        return ((level & 0x1) == 0) ? L : R;
1032    }
1033    
1034    /**
1035     * Return the limit of the run starting at index that includes only resultTypes in validSet.
1036     * This checks the value at index, and will return index if that value is not in validSet.
1037     */

1038    private int findRunLimit(int index, int limit, byte[] validSet) {
1039        --index;
1040        loop:
1041            while (++index < limit) {
1042                byte t = resultTypes[index];
1043                for (int i = 0; i < validSet.length; ++i) {
1044                    if (t == validSet[i]) {
1045                        continue loop;
1046                    }
1047                }
1048                // didn't find a match in validSet
1049
return index;
1050            }
1051            return limit;
1052    }
1053    
1054    /**
1055     * Return the start of the run including index that includes only resultTypes in validSet.
1056     * This assumes the value at index is valid, and does not check it.
1057     */

1058    private int findRunStart(int index, byte[] validSet) {
1059        loop:
1060            while (--index >= 0) {
1061                byte t = resultTypes[index];
1062                for (int i = 0; i < validSet.length; ++i) {
1063                    if (t == validSet[i]) {
1064                        continue loop;
1065                    }
1066                }
1067                return index + 1;
1068            }
1069            return 0;
1070    }
1071    
1072    /**
1073     * Set resultTypes from start up to (but not including) limit to newType.
1074     */

1075    private void setTypes(int start, int limit, byte newType) {
1076        for (int i = start; i < limit; ++i) {
1077            resultTypes[i] = newType;
1078        }
1079    }
1080    
1081    /**
1082     * Set resultLevels from start up to (but not including) limit to newLevel.
1083     */

1084    private void setLevels(int start, int limit, byte newLevel) {
1085        for (int i = start; i < limit; ++i) {
1086            resultLevels[i] = newLevel;
1087        }
1088    }
1089    
1090    // --- input validation ---------------------------------------------------
1091

1092    /**
1093     * Throw exception if type array is invalid.
1094     */

1095    private static void validateTypes(byte[] types) {
1096        if (types == null) {
1097            throw new IllegalArgumentException JavaDoc("types is null");
1098        }
1099        for (int i = 0; i < types.length; ++i) {
1100            if (types[i] < TYPE_MIN || types[i] > TYPE_MAX) {
1101                throw new IllegalArgumentException JavaDoc("illegal type value at " + i + ": " + types[i]);
1102            }
1103        }
1104        for (int i = 0; i < types.length - 1; ++i) {
1105            if (types[i] == B) {
1106                throw new IllegalArgumentException JavaDoc("B type before end of paragraph at index: " + i);
1107            }
1108        }
1109    }
1110    
1111    /**
1112     * Throw exception if paragraph embedding level is invalid. Special allowance for -1 so that
1113     * default processing can still be performed when using this API.
1114     */

1115    private static void validateParagraphEmbeddingLevel(byte paragraphEmbeddingLevel) {
1116        if (paragraphEmbeddingLevel != -1 &&
1117        paragraphEmbeddingLevel != 0 &&
1118        paragraphEmbeddingLevel != 1) {
1119            throw new IllegalArgumentException JavaDoc("illegal paragraph embedding level: " + paragraphEmbeddingLevel);
1120        }
1121    }
1122    
1123    /**
1124     * Throw exception if line breaks array is invalid.
1125     */

1126    private static void validateLineBreaks(int[] linebreaks, int textLength) {
1127        int prev = 0;
1128        for (int i = 0; i < linebreaks.length; ++i) {
1129            int next = linebreaks[i];
1130            if (next <= prev) {
1131                throw new IllegalArgumentException JavaDoc("bad linebreak: " + next + " at index: " + i);
1132            }
1133            prev = next;
1134        }
1135        if (prev != textLength) {
1136            throw new IllegalArgumentException JavaDoc("last linebreak must be at " + textLength);
1137        }
1138    }
1139    
1140    private static final byte rtypes[] = new byte[0x10000];
1141    
1142    private static char baseTypes[] = {
1143        0, 8, (char)BN, 9, 9, (char)S, 10, 10, (char)B, 11, 11, (char)S, 12, 12, (char)WS, 13, 13, (char)B,
1144        14, 27, (char)BN, 28, 30, (char)B, 31, 31, (char)S, 32, 32, (char)WS, 33, 34, (char)ON, 35, 37, (char)ET,
1145        38, 42, (char)ON, 43, 43, (char)ET, 44, 44, (char)CS, 45, 45, (char)ET, 46, 46, (char)CS, 47, 47, (char)ES,
1146        48, 57, (char)EN, 58, 58, (char)CS, 59, 64, (char)ON, 65, 90, (char)L, 91, 96, (char)ON, 97, 122, (char)L,
1147        123, 126, (char)ON, 127, 132, (char)BN, 133, 133, (char)B, 134, 159, (char)BN, 160, 160, (char)CS,
1148        161, 161, (char)ON, 162, 165, (char)ET, 166, 169, (char)ON, 170, 170, (char)L, 171, 175, (char)ON,
1149        176, 177, (char)ET, 178, 179, (char)EN, 180, 180, (char)ON, 181, 181, (char)L, 182, 184, (char)ON,
1150        185, 185, (char)EN, 186, 186, (char)L, 187, 191, (char)ON, 192, 214, (char)L, 215, 215, (char)ON,
1151        216, 246, (char)L, 247, 247, (char)ON, 248, 696, (char)L, 697, 698, (char)ON, 699, 705, (char)L,
1152        706, 719, (char)ON, 720, 721, (char)L, 722, 735, (char)ON, 736, 740, (char)L, 741, 749, (char)ON,
1153        750, 750, (char)L, 751, 767, (char)ON, 768, 855, (char)NSM, 856, 860, (char)L, 861, 879, (char)NSM,
1154        880, 883, (char)L, 884, 885, (char)ON, 886, 893, (char)L, 894, 894, (char)ON, 895, 899, (char)L,
1155        900, 901, (char)ON, 902, 902, (char)L, 903, 903, (char)ON, 904, 1013, (char)L, 1014, 1014, (char)ON,
1156        1015, 1154, (char)L, 1155, 1158, (char)NSM, 1159, 1159, (char)L, 1160, 1161, (char)NSM,
1157        1162, 1417, (char)L, 1418, 1418, (char)ON, 1419, 1424, (char)L, 1425, 1441, (char)NSM,
1158        1442, 1442, (char)L, 1443, 1465, (char)NSM, 1466, 1466, (char)L, 1467, 1469, (char)NSM,
1159        1470, 1470, (char)R, 1471, 1471, (char)NSM, 1472, 1472, (char)R, 1473, 1474, (char)NSM,
1160        1475, 1475, (char)R, 1476, 1476, (char)NSM, 1477, 1487, (char)L, 1488, 1514, (char)R,
1161        1515, 1519, (char)L, 1520, 1524, (char)R, 1525, 1535, (char)L, 1536, 1539, (char)AL,
1162        1540, 1547, (char)L, 1548, 1548, (char)CS, 1549, 1549, (char)AL, 1550, 1551, (char)ON,
1163        1552, 1557, (char)NSM, 1558, 1562, (char)L, 1563, 1563, (char)AL, 1564, 1566, (char)L,
1164        1567, 1567, (char)AL, 1568, 1568, (char)L, 1569, 1594, (char)AL, 1595, 1599, (char)L,
1165        1600, 1610, (char)AL, 1611, 1624, (char)NSM, 1625, 1631, (char)L, 1632, 1641, (char)AN,
1166        1642, 1642, (char)ET, 1643, 1644, (char)AN, 1645, 1647, (char)AL, 1648, 1648, (char)NSM,
1167        1649, 1749, (char)AL, 1750, 1756, (char)NSM, 1757, 1757, (char)AL, 1758, 1764, (char)NSM,
1168        1765, 1766, (char)AL, 1767, 1768, (char)NSM, 1769, 1769, (char)ON, 1770, 1773, (char)NSM,
1169        1774, 1775, (char)AL, 1776, 1785, (char)EN, 1786, 1805, (char)AL, 1806, 1806, (char)L,
1170        1807, 1807, (char)BN, 1808, 1808, (char)AL, 1809, 1809, (char)NSM, 1810, 1839, (char)AL,
1171        1840, 1866, (char)NSM, 1867, 1868, (char)L, 1869, 1871, (char)AL, 1872, 1919, (char)L,
1172        1920, 1957, (char)AL, 1958, 1968, (char)NSM, 1969, 1969, (char)AL, 1970, 2304, (char)L,
1173        2305, 2306, (char)NSM, 2307, 2363, (char)L, 2364, 2364, (char)NSM, 2365, 2368, (char)L,
1174        2369, 2376, (char)NSM, 2377, 2380, (char)L, 2381, 2381, (char)NSM, 2382, 2384, (char)L,
1175        2385, 2388, (char)NSM, 2389, 2401, (char)L, 2402, 2403, (char)NSM, 2404, 2432, (char)L,
1176        2433, 2433, (char)NSM, 2434, 2491, (char)L, 2492, 2492, (char)NSM, 2493, 2496, (char)L,
1177        2497, 2500, (char)NSM, 2501, 2508, (char)L, 2509, 2509, (char)NSM, 2510, 2529, (char)L,
1178        2530, 2531, (char)NSM, 2532, 2545, (char)L, 2546, 2547, (char)ET, 2548, 2560, (char)L,
1179        2561, 2562, (char)NSM, 2563, 2619, (char)L, 2620, 2620, (char)NSM, 2621, 2624, (char)L,
1180        2625, 2626, (char)NSM, 2627, 2630, (char)L, 2631, 2632, (char)NSM, 2633, 2634, (char)L,
1181        2635, 2637, (char)NSM, 2638, 2671, (char)L, 2672, 2673, (char)NSM, 2674, 2688, (char)L,
1182        2689, 2690, (char)NSM, 2691, 2747, (char)L, 2748, 2748, (char)NSM, 2749, 2752, (char)L,
1183        2753, 2757, (char)NSM, 2758, 2758, (char)L, 2759, 2760, (char)NSM, 2761, 2764, (char)L,
1184        2765, 2765, (char)NSM, 2766, 2785, (char)L, 2786, 2787, (char)NSM, 2788, 2800, (char)L,
1185        2801, 2801, (char)ET, 2802, 2816, (char)L, 2817, 2817, (char)NSM, 2818, 2875, (char)L,
1186        2876, 2876, (char)NSM, 2877, 2878, (char)L, 2879, 2879, (char)NSM, 2880, 2880, (char)L,
1187        2881, 2883, (char)NSM, 2884, 2892, (char)L, 2893, 2893, (char)NSM, 2894, 2901, (char)L,
1188        2902, 2902, (char)NSM, 2903, 2945, (char)L, 2946, 2946, (char)NSM, 2947, 3007, (char)L,
1189        3008, 3008, (char)NSM, 3009, 3020, (char)L, 3021, 3021, (char)NSM, 3022, 3058, (char)L,
1190        3059, 3064, (char)ON, 3065, 3065, (char)ET, 3066, 3066, (char)ON, 3067, 3133, (char)L,
1191        3134, 3136, (char)NSM, 3137, 3141, (char)L, 3142, 3144, (char)NSM, 3145, 3145, (char)L,
1192        3146, 3149, (char)NSM, 3150, 3156, (char)L, 3157, 3158, (char)NSM, 3159, 3259, (char)L,
1193        3260, 3260, (char)NSM, 3261, 3275, (char)L, 3276, 3277, (char)NSM, 3278, 3392, (char)L,
1194        3393, 3395, (char)NSM, 3396, 3404, (char)L, 3405, 3405, (char)NSM, 3406, 3529, (char)L,
1195        3530, 3530, (char)NSM, 3531, 3537, (char)L, 3538, 3540, (char)NSM, 3541, 3541, (char)L,
1196        3542, 3542, (char)NSM, 3543, 3632, (char)L, 3633, 3633, (char)NSM, 3634, 3635, (char)L,
1197        3636, 3642, (char)NSM, 3643, 3646, (char)L, 3647, 3647, (char)ET, 3648, 3654, (char)L,
1198        3655, 3662, (char)NSM, 3663, 3760, (char)L, 3761, 3761, (char)NSM, 3762, 3763, (char)L,
1199        3764, 3769, (char)NSM, 3770, 3770, (char)L, 3771, 3772, (char)NSM, 3773, 3783, (char)L,
1200        3784, 3789, (char)NSM, 3790, 3863, (char)L, 3864, 3865, (char)NSM, 3866, 3892, (char)L,
1201        3893, 3893, (char)NSM, 3894, 3894, (char)L, 3895, 3895, (char)NSM, 3896, 3896, (char)L,
1202        3897, 3897, (char)NSM, 3898, 3901, (char)ON, 3902, 3952, (char)L, 3953, 3966, (char)NSM,
1203        3967, 3967, (char)L, 3968, 3972, (char)NSM, 3973, 3973, (char)L, 3974, 3975, (char)NSM,
1204        3976, 3983, (char)L, 3984, 3991, (char)NSM, 3992, 3992, (char)L, 3993, 4028, (char)NSM,
1205        4029, 4037, (char)L, 4038, 4038, (char)NSM, 4039, 4140, (char)L, 4141, 4144, (char)NSM,
1206        4145, 4145, (char)L, 4146, 4146, (char)NSM, 4147, 4149, (char)L, 4150, 4151, (char)NSM,
1207        4152, 4152, (char)L, 4153, 4153, (char)NSM, 4154, 4183, (char)L, 4184, 4185, (char)NSM,
1208        4186, 5759, (char)L, 5760, 5760, (char)WS, 5761, 5786, (char)L, 5787, 5788, (char)ON,
1209        5789, 5905, (char)L, 5906, 5908, (char)NSM, 5909, 5937, (char)L, 5938, 5940, (char)NSM,
1210        5941, 5969, (char)L, 5970, 5971, (char)NSM, 5972, 6001, (char)L, 6002, 6003, (char)NSM,
1211        6004, 6070, (char)L, 6071, 6077, (char)NSM, 6078, 6085, (char)L, 6086, 6086, (char)NSM,
1212        6087, 6088, (char)L, 6089, 6099, (char)NSM, 6100, 6106, (char)L, 6107, 6107, (char)ET,
1213        6108, 6108, (char)L, 6109, 6109, (char)NSM, 6110, 6127, (char)L, 6128, 6137, (char)ON,
1214        6138, 6143, (char)L, 6144, 6154, (char)ON, 6155, 6157, (char)NSM, 6158, 6158, (char)WS,
1215        6159, 6312, (char)L, 6313, 6313, (char)NSM, 6314, 6431, (char)L, 6432, 6434, (char)NSM,
1216        6435, 6438, (char)L, 6439, 6443, (char)NSM, 6444, 6449, (char)L, 6450, 6450, (char)NSM,
1217        6451, 6456, (char)L, 6457, 6459, (char)NSM, 6460, 6463, (char)L, 6464, 6464, (char)ON,
1218        6465, 6467, (char)L, 6468, 6469, (char)ON, 6470, 6623, (char)L, 6624, 6655, (char)ON,
1219        6656, 8124, (char)L, 8125, 8125, (char)ON, 8126, 8126, (char)L, 8127, 8129, (char)ON,
1220        8130, 8140, (char)L, 8141, 8143, (char)ON, 8144, 8156, (char)L, 8157, 8159, (char)ON,
1221        8160, 8172, (char)L, 8173, 8175, (char)ON, 8176, 8188, (char)L, 8189, 8190, (char)ON,
1222        8191, 8191, (char)L, 8192, 8202, (char)WS, 8203, 8205, (char)BN, 8206, 8206, (char)L,
1223        8207, 8207, (char)R, 8208, 8231, (char)ON, 8232, 8232, (char)WS, 8233, 8233, (char)B,
1224        8234, 8234, (char)LRE, 8235, 8235, (char)RLE, 8236, 8236, (char)PDF, 8237, 8237, (char)LRO,
1225        8238, 8238, (char)RLO, 8239, 8239, (char)WS, 8240, 8244, (char)ET, 8245, 8276, (char)ON,
1226        8277, 8278, (char)L, 8279, 8279, (char)ON, 8280, 8286, (char)L, 8287, 8287, (char)WS,
1227        8288, 8291, (char)BN, 8292, 8297, (char)L, 8298, 8303, (char)BN, 8304, 8304, (char)EN,
1228        8305, 8307, (char)L, 8308, 8313, (char)EN, 8314, 8315, (char)ET, 8316, 8318, (char)ON,
1229        8319, 8319, (char)L, 8320, 8329, (char)EN, 8330, 8331, (char)ET, 8332, 8334, (char)ON,
1230        8335, 8351, (char)L, 8352, 8369, (char)ET, 8370, 8399, (char)L, 8400, 8426, (char)NSM,
1231        8427, 8447, (char)L, 8448, 8449, (char)ON, 8450, 8450, (char)L, 8451, 8454, (char)ON,
1232        8455, 8455, (char)L, 8456, 8457, (char)ON, 8458, 8467, (char)L, 8468, 8468, (char)ON,
1233        8469, 8469, (char)L, 8470, 8472, (char)ON, 8473, 8477, (char)L, 8478, 8483, (char)ON,
1234        8484, 8484, (char)L, 8485, 8485, (char)ON, 8486, 8486, (char)L, 8487, 8487, (char)ON,
1235        8488, 8488, (char)L, 8489, 8489, (char)ON, 8490, 8493, (char)L, 8494, 8494, (char)ET,
1236        8495, 8497, (char)L, 8498, 8498, (char)ON, 8499, 8505, (char)L, 8506, 8507, (char)ON,
1237        8508, 8511, (char)L, 8512, 8516, (char)ON, 8517, 8521, (char)L, 8522, 8523, (char)ON,
1238        8524, 8530, (char)L, 8531, 8543, (char)ON, 8544, 8591, (char)L, 8592, 8721, (char)ON,
1239        8722, 8723, (char)ET, 8724, 9013, (char)ON, 9014, 9082, (char)L, 9083, 9108, (char)ON,
1240        9109, 9109, (char)L, 9110, 9168, (char)ON, 9169, 9215, (char)L, 9216, 9254, (char)ON,
1241        9255, 9279, (char)L, 9280, 9290, (char)ON, 9291, 9311, (char)L, 9312, 9371, (char)EN,
1242        9372, 9449, (char)L, 9450, 9450, (char)EN, 9451, 9751, (char)ON, 9752, 9752, (char)L,
1243        9753, 9853, (char)ON, 9854, 9855, (char)L, 9856, 9873, (char)ON, 9874, 9887, (char)L,
1244        9888, 9889, (char)ON, 9890, 9984, (char)L, 9985, 9988, (char)ON, 9989, 9989, (char)L,
1245        9990, 9993, (char)ON, 9994, 9995, (char)L, 9996, 10023, (char)ON, 10024, 10024, (char)L,
1246        10025, 10059, (char)ON, 10060, 10060, (char)L, 10061, 10061, (char)ON, 10062, 10062, (char)L,
1247        10063, 10066, (char)ON, 10067, 10069, (char)L, 10070, 10070, (char)ON, 10071, 10071, (char)L,
1248        10072, 10078, (char)ON, 10079, 10080, (char)L, 10081, 10132, (char)ON, 10133, 10135, (char)L,
1249        10136, 10159, (char)ON, 10160, 10160, (char)L, 10161, 10174, (char)ON, 10175, 10191, (char)L,
1250        10192, 10219, (char)ON, 10220, 10223, (char)L, 10224, 11021, (char)ON, 11022, 11903, (char)L,
1251        11904, 11929, (char)ON, 11930, 11930, (char)L, 11931, 12019, (char)ON, 12020, 12031, (char)L,
1252        12032, 12245, (char)ON, 12246, 12271, (char)L, 12272, 12283, (char)ON, 12284, 12287, (char)L,
1253        12288, 12288, (char)WS, 12289, 12292, (char)ON, 12293, 12295, (char)L, 12296, 12320, (char)ON,
1254        12321, 12329, (char)L, 12330, 12335, (char)NSM, 12336, 12336, (char)ON, 12337, 12341, (char)L,
1255        12342, 12343, (char)ON, 12344, 12348, (char)L, 12349, 12351, (char)ON, 12352, 12440, (char)L,
1256        12441, 12442, (char)NSM, 12443, 12444, (char)ON, 12445, 12447, (char)L, 12448, 12448, (char)ON,
1257        12449, 12538, (char)L, 12539, 12539, (char)ON, 12540, 12828, (char)L, 12829, 12830, (char)ON,
1258        12831, 12879, (char)L, 12880, 12895, (char)ON, 12896, 12923, (char)L, 12924, 12925, (char)ON,
1259        12926, 12976, (char)L, 12977, 12991, (char)ON, 12992, 13003, (char)L, 13004, 13007, (char)ON,
1260        13008, 13174, (char)L, 13175, 13178, (char)ON, 13179, 13277, (char)L, 13278, 13279, (char)ON,
1261        13280, 13310, (char)L, 13311, 13311, (char)ON, 13312, 19903, (char)L, 19904, 19967, (char)ON,
1262        19968, 42127, (char)L, 42128, 42182, (char)ON, 42183, 64284, (char)L, 64285, 64285, (char)R,
1263        64286, 64286, (char)NSM, 64287, 64296, (char)R, 64297, 64297, (char)ET, 64298, 64310, (char)R,
1264        64311, 64311, (char)L, 64312, 64316, (char)R, 64317, 64317, (char)L, 64318, 64318, (char)R,
1265        64319, 64319, (char)L, 64320, 64321, (char)R, 64322, 64322, (char)L, 64323, 64324, (char)R,
1266        64325, 64325, (char)L, 64326, 64335, (char)R, 64336, 64433, (char)AL, 64434, 64466, (char)L,
1267        64467, 64829, (char)AL, 64830, 64831, (char)ON, 64832, 64847, (char)L, 64848, 64911, (char)AL,
1268        64912, 64913, (char)L, 64914, 64967, (char)AL, 64968, 65007, (char)L, 65008, 65020, (char)AL,
1269        65021, 65021, (char)ON, 65022, 65023, (char)L, 65024, 65039, (char)NSM, 65040, 65055, (char)L,
1270        65056, 65059, (char)NSM, 65060, 65071, (char)L, 65072, 65103, (char)ON, 65104, 65104, (char)CS,
1271        65105, 65105, (char)ON, 65106, 65106, (char)CS, 65107, 65107, (char)L, 65108, 65108, (char)ON,
1272        65109, 65109, (char)CS, 65110, 65118, (char)ON, 65119, 65119, (char)ET, 65120, 65121, (char)ON,
1273        65122, 65123, (char)ET, 65124, 65126, (char)ON, 65127, 65127, (char)L, 65128, 65128, (char)ON,
1274        65129, 65130, (char)ET, 65131, 65131, (char)ON, 65132, 65135, (char)L, 65136, 65140, (char)AL,
1275        65141, 65141, (char)L, 65142, 65276, (char)AL, 65277, 65278, (char)L, 65279, 65279, (char)BN,
1276        65280, 65280, (char)L, 65281, 65282, (char)ON, 65283, 65285, (char)ET, 65286, 65290, (char)ON,
1277        65291, 65291, (char)ET, 65292, 65292, (char)CS, 65293, 65293, (char)ET, 65294, 65294, (char)CS,
1278        65295, 65295, (char)ES, 65296, 65305, (char)EN, 65306, 65306, (char)CS, 65307, 65312, (char)ON,
1279        65313, 65338, (char)L, 65339, 65344, (char)ON, 65345, 65370, (char)L, 65371, 65381, (char)ON,
1280        65382, 65503, (char)L, 65504, 65505, (char)ET, 65506, 65508, (char)ON, 65509, 65510, (char)ET,
1281        65511, 65511, (char)L, 65512, 65518, (char)ON, 65519, 65528, (char)L, 65529, 65531, (char)BN,
1282        65532, 65533, (char)ON, 65534, 65535, (char)L};
1283        
1284    static {
1285        for (int k = 0; k < baseTypes.length; ++k) {
1286            int start = baseTypes[k];
1287            int end = baseTypes[++k];
1288            byte b = (byte)baseTypes[++k];
1289            while (start <= end)
1290                rtypes[start++] = b;
1291        }
1292    }
1293}
Popular Tags