Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/openjdk-multiarch-jdk8u
Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/text/bidi/BidiBase.java
38918 views
1
/*
2
* Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
3
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4
*
5
* This code is free software; you can redistribute it and/or modify it
6
* under the terms of the GNU General Public License version 2 only, as
7
* published by the Free Software Foundation. Oracle designates this
8
* particular file as subject to the "Classpath" exception as provided
9
* by Oracle in the LICENSE file that accompanied this code.
10
*
11
* This code is distributed in the hope that it will be useful, but WITHOUT
12
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14
* version 2 for more details (a copy is included in the LICENSE file that
15
* accompanied this code).
16
*
17
* You should have received a copy of the GNU General Public License version
18
* 2 along with this work; if not, write to the Free Software Foundation,
19
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20
*
21
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22
* or visit www.oracle.com if you need additional information or have any
23
* questions.
24
*/
25
/*
26
*******************************************************************************
27
* (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *
28
* *
29
* The original version of this source code and documentation is copyrighted *
30
* and owned by IBM, These materials are provided under terms of a License *
31
* Agreement between IBM and Sun. This technology is protected by multiple *
32
* US and International patents. This notice and attribution to IBM may not *
33
* to removed. *
34
*******************************************************************************
35
*/
36
37
/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of
38
* algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre
39
* concept of RUNS_ONLY which is a double operation.
40
* It could be advantageous to divide this into 3 concepts:
41
* a) Operation: direct / inverse / RUNS_ONLY
42
* b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L
43
* c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL
44
* This would allow combinations not possible today like RUNS_ONLY with
45
* NUMBERS_SPECIAL.
46
* Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and
47
* REMOVE_CONTROLS for the inverse step.
48
* Not all combinations would be supported, and probably not all do make sense.
49
* This would need to document which ones are supported and what are the
50
* fallbacks for unsupported combinations.
51
*/
52
53
package sun.text.bidi;
54
55
import java.io.IOException;
56
import java.lang.reflect.Array;
57
import java.lang.reflect.Field;
58
import java.lang.reflect.Method;
59
import java.lang.reflect.InvocationTargetException;
60
import java.text.AttributedCharacterIterator;
61
import java.text.Bidi;
62
import java.util.Arrays;
63
import java.util.MissingResourceException;
64
import sun.text.normalizer.UBiDiProps;
65
import sun.text.normalizer.UCharacter;
66
import sun.text.normalizer.UTF16;
67
68
/**
69
*
70
* <h2>Bidi algorithm for ICU</h2>
71
*
72
* This is an implementation of the Unicode Bidirectional algorithm. The
73
* algorithm is defined in the <a
74
* href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
75
* version 13, also described in The Unicode Standard, Version 4.0 .
76
* <p>
77
*
78
* Note: Libraries that perform a bidirectional algorithm and reorder strings
79
* accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and
80
* shaping (ArabicShaping) classes can be used at the core of such "Storage
81
* Layout Engines".
82
*
83
* <h3>General remarks about the API:</h3>
84
*
85
* The &quot;limit&quot; of a sequence of characters is the position just after
86
* their last character, i.e., one more than that position.
87
* <p>
88
*
89
* Some of the API methods provide access to &quot;runs&quot;. Such a
90
* &quot;run&quot; is defined as a sequence of characters that are at the same
91
* embedding level after performing the Bidi algorithm.
92
* <p>
93
*
94
* <h3>Basic concept: paragraph</h3>
95
* A piece of text can be divided into several paragraphs by characters
96
* with the Bidi class <code>Block Separator</code>. For handling of
97
* paragraphs, see:
98
* <ul>
99
* <li>{@link #countParagraphs}
100
* <li>{@link #getParaLevel}
101
* <li>{@link #getParagraph}
102
* <li>{@link #getParagraphByIndex}
103
* </ul>
104
*
105
* <h3>Basic concept: text direction</h3>
106
* The direction of a piece of text may be:
107
* <ul>
108
* <li>{@link #LTR}
109
* <li>{@link #RTL}
110
* <li>{@link #MIXED}
111
* </ul>
112
*
113
* <h3>Basic concept: levels</h3>
114
*
115
* Levels in this API represent embedding levels according to the Unicode
116
* Bidirectional Algorithm.
117
* Their low-order bit (even/odd value) indicates the visual direction.<p>
118
*
119
* Levels can be abstract values when used for the
120
* <code>paraLevel</code> and <code>embeddingLevels</code>
121
* arguments of <code>setPara()</code>; there:
122
* <ul>
123
* <li>the high-order bit of an <code>embeddingLevels[]</code>
124
* value indicates whether the using application is
125
* specifying the level of a character to <i>override</i> whatever the
126
* Bidi implementation would resolve it to.</li>
127
* <li><code>paraLevel</code> can be set to the
128
* pseudo-level values <code>LEVEL_DEFAULT_LTR</code>
129
* and <code>LEVEL_DEFAULT_RTL</code>.</li>
130
* </ul>
131
*
132
* <p>The related constants are not real, valid level values.
133
* <code>DEFAULT_XXX</code> can be used to specify
134
* a default for the paragraph level for
135
* when the <code>setPara()</code> method
136
* shall determine it but there is no
137
* strongly typed character in the input.<p>
138
*
139
* Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even
140
* and the one for <code>LEVEL_DEFAULT_RTL</code> is odd,
141
* just like with normal LTR and RTL level values -
142
* these special values are designed that way. Also, the implementation
143
* assumes that MAX_EXPLICIT_LEVEL is odd.
144
*
145
* <ul><b>See Also:</b>
146
* <li>{@link #LEVEL_DEFAULT_LTR}
147
* <li>{@link #LEVEL_DEFAULT_RTL}
148
* <li>{@link #LEVEL_OVERRIDE}
149
* <li>{@link #MAX_EXPLICIT_LEVEL}
150
* <li>{@link #setPara}
151
* </ul>
152
*
153
* <h3>Basic concept: Reordering Mode</h3>
154
* Reordering mode values indicate which variant of the Bidi algorithm to
155
* use.
156
*
157
* <ul><b>See Also:</b>
158
* <li>{@link #setReorderingMode}
159
* <li>{@link #REORDER_DEFAULT}
160
* <li>{@link #REORDER_NUMBERS_SPECIAL}
161
* <li>{@link #REORDER_GROUP_NUMBERS_WITH_R}
162
* <li>{@link #REORDER_RUNS_ONLY}
163
* <li>{@link #REORDER_INVERSE_NUMBERS_AS_L}
164
* <li>{@link #REORDER_INVERSE_LIKE_DIRECT}
165
* <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}
166
* </ul>
167
*
168
* <h3>Basic concept: Reordering Options</h3>
169
* Reordering options can be applied during Bidi text transformations.
170
* <ul><b>See Also:</b>
171
* <li>{@link #setReorderingOptions}
172
* <li>{@link #OPTION_DEFAULT}
173
* <li>{@link #OPTION_INSERT_MARKS}
174
* <li>{@link #OPTION_REMOVE_CONTROLS}
175
* <li>{@link #OPTION_STREAMING}
176
* </ul>
177
*
178
*
179
* @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)
180
* @stable ICU 3.8
181
*
182
*
183
* <h4> Sample code for the ICU Bidi API </h4>
184
*
185
* <h5>Rendering a paragraph with the ICU Bidi API</h5>
186
*
187
* This is (hypothetical) sample code that illustrates how the ICU Bidi API
188
* could be used to render a paragraph of text. Rendering code depends highly on
189
* the graphics system, therefore this sample code must make a lot of
190
* assumptions, which may or may not match any existing graphics system's
191
* properties.
192
*
193
* <p>
194
* The basic assumptions are:
195
* </p>
196
* <ul>
197
* <li>Rendering is done from left to right on a horizontal line.</li>
198
* <li>A run of single-style, unidirectional text can be rendered at once.
199
* </li>
200
* <li>Such a run of text is passed to the graphics system with characters
201
* (code units) in logical order.</li>
202
* <li>The line-breaking algorithm is very complicated and Locale-dependent -
203
* and therefore its implementation omitted from this sample code.</li>
204
* </ul>
205
*
206
* <pre>
207
*
208
* package com.ibm.icu.dev.test.bidi;
209
*
210
* import com.ibm.icu.text.Bidi;
211
* import com.ibm.icu.text.BidiRun;
212
*
213
* public class Sample {
214
*
215
* static final int styleNormal = 0;
216
* static final int styleSelected = 1;
217
* static final int styleBold = 2;
218
* static final int styleItalics = 4;
219
* static final int styleSuper=8;
220
* static final int styleSub = 16;
221
*
222
* static class StyleRun {
223
* int limit;
224
* int style;
225
*
226
* public StyleRun(int limit, int style) {
227
* this.limit = limit;
228
* this.style = style;
229
* }
230
* }
231
*
232
* static class Bounds {
233
* int start;
234
* int limit;
235
*
236
* public Bounds(int start, int limit) {
237
* this.start = start;
238
* this.limit = limit;
239
* }
240
* }
241
*
242
* static int getTextWidth(String text, int start, int limit,
243
* StyleRun[] styleRuns, int styleRunCount) {
244
* // simplistic way to compute the width
245
* return limit - start;
246
* }
247
*
248
* // set limit and StyleRun limit for a line
249
* // from text[start] and from styleRuns[styleRunStart]
250
* // using Bidi.getLogicalRun(...)
251
* // returns line width
252
* static int getLineBreak(String text, Bounds line, Bidi para,
253
* StyleRun styleRuns[], Bounds styleRun) {
254
* // dummy return
255
* return 0;
256
* }
257
*
258
* // render runs on a line sequentially, always from left to right
259
*
260
* // prepare rendering a new line
261
* static void startLine(byte textDirection, int lineWidth) {
262
* System.out.println();
263
* }
264
*
265
* // render a run of text and advance to the right by the run width
266
* // the text[start..limit-1] is always in logical order
267
* static void renderRun(String text, int start, int limit,
268
* byte textDirection, int style) {
269
* }
270
*
271
* // We could compute a cross-product
272
* // from the style runs with the directional runs
273
* // and then reorder it.
274
* // Instead, here we iterate over each run type
275
* // and render the intersections -
276
* // with shortcuts in simple (and common) cases.
277
* // renderParagraph() is the main function.
278
*
279
* // render a directional run with
280
* // (possibly) multiple style runs intersecting with it
281
* static void renderDirectionalRun(String text, int start, int limit,
282
* byte direction, StyleRun styleRuns[],
283
* int styleRunCount) {
284
* int i;
285
*
286
* // iterate over style runs
287
* if (direction == Bidi.LTR) {
288
* int styleLimit;
289
* for (i = 0; i < styleRunCount; ++i) {
290
* styleLimit = styleRuns[i].limit;
291
* if (start < styleLimit) {
292
* if (styleLimit > limit) {
293
* styleLimit = limit;
294
* }
295
* renderRun(text, start, styleLimit,
296
* direction, styleRuns[i].style);
297
* if (styleLimit == limit) {
298
* break;
299
* }
300
* start = styleLimit;
301
* }
302
* }
303
* } else {
304
* int styleStart;
305
*
306
* for (i = styleRunCount-1; i >= 0; --i) {
307
* if (i > 0) {
308
* styleStart = styleRuns[i-1].limit;
309
* } else {
310
* styleStart = 0;
311
* }
312
* if (limit >= styleStart) {
313
* if (styleStart < start) {
314
* styleStart = start;
315
* }
316
* renderRun(text, styleStart, limit, direction,
317
* styleRuns[i].style);
318
* if (styleStart == start) {
319
* break;
320
* }
321
* limit = styleStart;
322
* }
323
* }
324
* }
325
* }
326
*
327
* // the line object represents text[start..limit-1]
328
* static void renderLine(Bidi line, String text, int start, int limit,
329
* StyleRun styleRuns[], int styleRunCount) {
330
* byte direction = line.getDirection();
331
* if (direction != Bidi.MIXED) {
332
* // unidirectional
333
* if (styleRunCount <= 1) {
334
* renderRun(text, start, limit, direction, styleRuns[0].style);
335
* } else {
336
* renderDirectionalRun(text, start, limit, direction,
337
* styleRuns, styleRunCount);
338
* }
339
* } else {
340
* // mixed-directional
341
* int count, i;
342
* BidiRun run;
343
*
344
* try {
345
* count = line.countRuns();
346
* } catch (IllegalStateException e) {
347
* e.printStackTrace();
348
* return;
349
* }
350
* if (styleRunCount <= 1) {
351
* int style = styleRuns[0].style;
352
*
353
* // iterate over directional runs
354
* for (i = 0; i < count; ++i) {
355
* run = line.getVisualRun(i);
356
* renderRun(text, run.getStart(), run.getLimit(),
357
* run.getDirection(), style);
358
* }
359
* } else {
360
* // iterate over both directional and style runs
361
* for (i = 0; i < count; ++i) {
362
* run = line.getVisualRun(i);
363
* renderDirectionalRun(text, run.getStart(),
364
* run.getLimit(), run.getDirection(),
365
* styleRuns, styleRunCount);
366
* }
367
* }
368
* }
369
* }
370
*
371
* static void renderParagraph(String text, byte textDirection,
372
* StyleRun styleRuns[], int styleRunCount,
373
* int lineWidth) {
374
* int length = text.length();
375
* Bidi para = new Bidi();
376
* try {
377
* para.setPara(text,
378
* textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL
379
* : Bidi.LEVEL_DEFAULT_LTR,
380
* null);
381
* } catch (Exception e) {
382
* e.printStackTrace();
383
* return;
384
* }
385
* byte paraLevel = (byte)(1 & para.getParaLevel());
386
* StyleRun styleRun = new StyleRun(length, styleNormal);
387
*
388
* if (styleRuns == null || styleRunCount <= 0) {
389
* styleRuns = new StyleRun[1];
390
* styleRunCount = 1;
391
* styleRuns[0] = styleRun;
392
* }
393
* // assume styleRuns[styleRunCount-1].limit>=length
394
*
395
* int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);
396
* if (width <= lineWidth) {
397
* // everything fits onto one line
398
*
399
* // prepare rendering a new line from either left or right
400
* startLine(paraLevel, width);
401
*
402
* renderLine(para, text, 0, length, styleRuns, styleRunCount);
403
* } else {
404
* // we need to render several lines
405
* Bidi line = new Bidi(length, 0);
406
* int start = 0, limit;
407
* int styleRunStart = 0, styleRunLimit;
408
*
409
* for (;;) {
410
* limit = length;
411
* styleRunLimit = styleRunCount;
412
* width = getLineBreak(text, new Bounds(start, limit),
413
* para, styleRuns,
414
* new Bounds(styleRunStart, styleRunLimit));
415
* try {
416
* line = para.setLine(start, limit);
417
* } catch (Exception e) {
418
* e.printStackTrace();
419
* return;
420
* }
421
* // prepare rendering a new line
422
* // from either left or right
423
* startLine(paraLevel, width);
424
*
425
* if (styleRunStart > 0) {
426
* int newRunCount = styleRuns.length - styleRunStart;
427
* StyleRun[] newRuns = new StyleRun[newRunCount];
428
* System.arraycopy(styleRuns, styleRunStart, newRuns, 0,
429
* newRunCount);
430
* renderLine(line, text, start, limit, newRuns,
431
* styleRunLimit - styleRunStart);
432
* } else {
433
* renderLine(line, text, start, limit, styleRuns,
434
* styleRunLimit - styleRunStart);
435
* }
436
* if (limit == length) {
437
* break;
438
* }
439
* start = limit;
440
* styleRunStart = styleRunLimit - 1;
441
* if (start >= styleRuns[styleRunStart].limit) {
442
* ++styleRunStart;
443
* }
444
* }
445
* }
446
* }
447
*
448
* public static void main(String[] args)
449
* {
450
* renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);
451
* renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);
452
* }
453
* }
454
*
455
* </pre>
456
*/
457
458
public class BidiBase {
459
460
class Point {
461
int pos; /* position in text */
462
int flag; /* flag for LRM/RLM, before/after */
463
}
464
465
class InsertPoints {
466
int size;
467
int confirmed;
468
Point[] points = new Point[0];
469
}
470
471
/** Paragraph level setting<p>
472
*
473
* Constant indicating that the base direction depends on the first strong
474
* directional character in the text according to the Unicode Bidirectional
475
* Algorithm. If no strong directional character is present,
476
* then set the paragraph level to 0 (left-to-right).<p>
477
*
478
* If this value is used in conjunction with reordering modes
479
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
480
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
481
* is assumed to be visual LTR, and the text after reordering is required
482
* to be the corresponding logical string with appropriate contextual
483
* direction. The direction of the result string will be RTL if either
484
* the righmost or leftmost strong character of the source text is RTL
485
* or Arabic Letter, the direction will be LTR otherwise.<p>
486
*
487
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
488
* be added at the beginning of the result string to ensure round trip
489
* (that the result string, when reordered back to visual, will produce
490
* the original source text).
491
* @see #REORDER_INVERSE_LIKE_DIRECT
492
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
493
* @stable ICU 3.8
494
*/
495
public static final byte INTERNAL_LEVEL_DEFAULT_LTR = (byte)0x7e;
496
497
/** Paragraph level setting<p>
498
*
499
* Constant indicating that the base direction depends on the first strong
500
* directional character in the text according to the Unicode Bidirectional
501
* Algorithm. If no strong directional character is present,
502
* then set the paragraph level to 1 (right-to-left).<p>
503
*
504
* If this value is used in conjunction with reordering modes
505
* <code>REORDER_INVERSE_LIKE_DIRECT</code> or
506
* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
507
* is assumed to be visual LTR, and the text after reordering is required
508
* to be the corresponding logical string with appropriate contextual
509
* direction. The direction of the result string will be RTL if either
510
* the righmost or leftmost strong character of the source text is RTL
511
* or Arabic Letter, or if the text contains no strong character;
512
* the direction will be LTR otherwise.<p>
513
*
514
* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may
515
* be added at the beginning of the result string to ensure round trip
516
* (that the result string, when reordered back to visual, will produce
517
* the original source text).
518
* @see #REORDER_INVERSE_LIKE_DIRECT
519
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
520
* @stable ICU 3.8
521
*/
522
public static final byte INTERNAL_LEVEL_DEFAULT_RTL = (byte)0x7f;
523
524
/**
525
* Maximum explicit embedding level.
526
* (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).
527
* @stable ICU 3.8
528
*/
529
public static final byte MAX_EXPLICIT_LEVEL = 61;
530
531
/**
532
* Bit flag for level input.
533
* Overrides directional properties.
534
* @stable ICU 3.8
535
*/
536
public static final byte INTERNAL_LEVEL_OVERRIDE = (byte)0x80;
537
538
/**
539
* Special value which can be returned by the mapping methods when a
540
* logical index has no corresponding visual index or vice-versa. This may
541
* happen for the logical-to-visual mapping of a Bidi control when option
542
* <code>OPTION_REMOVE_CONTROLS</code> is
543
* specified. This can also happen for the visual-to-logical mapping of a
544
* Bidi mark (LRM or RLM) inserted by option
545
* <code>OPTION_INSERT_MARKS</code>.
546
* @see #getVisualIndex
547
* @see #getVisualMap
548
* @see #getLogicalIndex
549
* @see #getLogicalMap
550
* @see #OPTION_INSERT_MARKS
551
* @see #OPTION_REMOVE_CONTROLS
552
* @stable ICU 3.8
553
*/
554
public static final int MAP_NOWHERE = -1;
555
556
/**
557
* Mixed-directional text.
558
* @stable ICU 3.8
559
*/
560
public static final byte MIXED = 2;
561
562
/**
563
* option bit for writeReordered():
564
* replace characters with the "mirrored" property in RTL runs
565
* by their mirror-image mappings
566
*
567
* @see #writeReordered
568
* @stable ICU 3.8
569
*/
570
public static final short DO_MIRRORING = 2;
571
572
/** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.
573
* @see #setReorderingMode
574
* @stable ICU 3.8
575
*/
576
private static final short REORDER_DEFAULT = 0;
577
578
/** Reordering mode: Logical to Visual algorithm which handles numbers in
579
* a way which mimicks the behavior of Windows XP.
580
* @see #setReorderingMode
581
* @stable ICU 3.8
582
*/
583
private static final short REORDER_NUMBERS_SPECIAL = 1;
584
585
/** Reordering mode: Logical to Visual algorithm grouping numbers with
586
* adjacent R characters (reversible algorithm).
587
* @see #setReorderingMode
588
* @stable ICU 3.8
589
*/
590
private static final short REORDER_GROUP_NUMBERS_WITH_R = 2;
591
592
/** Reordering mode: Reorder runs only to transform a Logical LTR string
593
* to the logical RTL string with the same display, or vice-versa.<br>
594
* If this mode is set together with option
595
* <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source
596
* text may be removed and other controls may be added to produce the
597
* minimum combination which has the required display.
598
* @see #OPTION_INSERT_MARKS
599
* @see #setReorderingMode
600
* @stable ICU 3.8
601
*/
602
private static final short REORDER_RUNS_ONLY = 3;
603
604
/** Reordering mode: Visual to Logical algorithm which handles numbers
605
* like L (same algorithm as selected by <code>setInverse(true)</code>.
606
* @see #setInverse
607
* @see #setReorderingMode
608
* @stable ICU 3.8
609
*/
610
private static final short REORDER_INVERSE_NUMBERS_AS_L = 4;
611
612
/** Reordering mode: Visual to Logical algorithm equivalent to the regular
613
* Logical to Visual algorithm.
614
* @see #setReorderingMode
615
* @stable ICU 3.8
616
*/
617
private static final short REORDER_INVERSE_LIKE_DIRECT = 5;
618
619
/** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the
620
* <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
621
* @see #setReorderingMode
622
* @stable ICU 3.8
623
*/
624
private static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;
625
626
/* Reordering mode values must be ordered so that all the regular logical to
627
* visual modes come first, and all inverse Bidi modes come last.
628
*/
629
private static final short REORDER_LAST_LOGICAL_TO_VISUAL =
630
REORDER_NUMBERS_SPECIAL;
631
632
/**
633
* Option bit for <code>setReorderingOptions</code>:
634
* insert Bidi marks (LRM or RLM) when needed to ensure correct result of
635
* a reordering to a Logical order
636
*
637
* <p>This option must be set or reset before calling
638
* <code>setPara</code>.</p>
639
*
640
* <p>This option is significant only with reordering modes which generate
641
* a result with Logical order, specifically.</p>
642
* <ul>
643
* <li><code>REORDER_RUNS_ONLY</code></li>
644
* <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li>
645
* <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li>
646
* <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
647
* </ul>
648
*
649
* <p>If this option is set in conjunction with reordering mode
650
* <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
651
* <code>setInverse(true)</code>, it implies option
652
* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
653
* <code>writeReordered()</code>.</p>
654
*
655
* <p>For other reordering modes, a minimum number of LRM or RLM characters
656
* will be added to the source text after reordering it so as to ensure
657
* round trip, i.e. when applying the inverse reordering mode on the
658
* resulting logical text with removal of Bidi marks
659
* (option <code>OPTION_REMOVE_CONTROLS</code> set before calling
660
* <code>setPara()</code> or option
661
* <code>REMOVE_BIDI_CONTROLS</code> in
662
* <code>writeReordered</code>), the result will be identical to the
663
* source text in the first transformation.
664
*
665
* <p>This option will be ignored if specified together with option
666
* <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option
667
* <code>REMOVE_BIDI_CONTROLS</code> in calls to method
668
* <code>writeReordered()</code> and it implies option
669
* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
670
* <code>writeReordered()</code> if the reordering mode is
671
* <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
672
*
673
* @see #setReorderingMode
674
* @see #setReorderingOptions
675
* @see #INSERT_LRM_FOR_NUMERIC
676
* @see #REMOVE_BIDI_CONTROLS
677
* @see #OPTION_REMOVE_CONTROLS
678
* @see #REORDER_RUNS_ONLY
679
* @see #REORDER_INVERSE_NUMBERS_AS_L
680
* @see #REORDER_INVERSE_LIKE_DIRECT
681
* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL
682
* @stable ICU 3.8
683
*/
684
private static final int OPTION_INSERT_MARKS = 1;
685
686
/**
687
* Option bit for <code>setReorderingOptions</code>:
688
* remove Bidi control characters
689
*
690
* <p>This option must be set or reset before calling
691
* <code>setPara</code>.</p>
692
*
693
* <p>This option nullifies option
694
* <code>OPTION_INSERT_MARKS</code>. It inhibits option
695
* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method
696
* <code>writeReordered()</code> and it implies option
697
* <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p>
698
*
699
* @see #setReorderingMode
700
* @see #setReorderingOptions
701
* @see #OPTION_INSERT_MARKS
702
* @see #INSERT_LRM_FOR_NUMERIC
703
* @see #REMOVE_BIDI_CONTROLS
704
* @stable ICU 3.8
705
*/
706
private static final int OPTION_REMOVE_CONTROLS = 2;
707
708
/**
709
* Option bit for <code>setReorderingOptions</code>:
710
* process the output as part of a stream to be continued
711
*
712
* <p>This option must be set or reset before calling
713
* <code>setPara</code>.</p>
714
*
715
* <p>This option specifies that the caller is interested in processing
716
* large text object in parts. The results of the successive calls are
717
* expected to be concatenated by the caller. Only the call for the last
718
* part will have this option bit off.</p>
719
*
720
* <p>When this option bit is on, <code>setPara()</code> may process
721
* less than the full source text in order to truncate the text at a
722
* meaningful boundary. The caller should call
723
* <code>getProcessedLength()</code> immediately after calling
724
* <code>setPara()</code> in order to determine how much of the source
725
* text has been processed. Source text beyond that length should be
726
* resubmitted in following calls to <code>setPara</code>. The
727
* processed length may be less than the length of the source text if a
728
* character preceding the last character of the source text constitutes a
729
* reasonable boundary (like a block separator) for text to be continued.<br>
730
* If the last character of the source text constitutes a reasonable
731
* boundary, the whole text will be processed at once.<br>
732
* If nowhere in the source text there exists
733
* such a reasonable boundary, the processed length will be zero.<br>
734
* The caller should check for such an occurrence and do one of the following:
735
* <ul><li>submit a larger amount of text with a better chance to include
736
* a reasonable boundary.</li>
737
* <li>resubmit the same text after turning off option
738
* <code>OPTION_STREAMING</code>.</li></ul>
739
* In all cases, this option should be turned off before processing the last
740
* part of the text.</p>
741
*
742
* <p>When the <code>OPTION_STREAMING</code> option is used, it is
743
* recommended to call <code>orderParagraphsLTR()</code> with argument
744
* <code>orderParagraphsLTR</code> set to <code>true</code> before calling
745
* <code>setPara()</code> so that later paragraphs may be concatenated to
746
* previous paragraphs on the right.
747
* </p>
748
*
749
* @see #setReorderingMode
750
* @see #setReorderingOptions
751
* @see #getProcessedLength
752
* @see #orderParagraphsLTR
753
* @stable ICU 3.8
754
*/
755
private static final int OPTION_STREAMING = 4;
756
757
/*
758
* Comparing the description of the Bidi algorithm with this implementation
759
* is easier with the same names for the Bidi types in the code as there.
760
* See UCharacterDirection
761
*/
762
private static final byte L = 0;
763
private static final byte R = 1;
764
private static final byte EN = 2;
765
private static final byte ES = 3;
766
private static final byte ET = 4;
767
private static final byte AN = 5;
768
private static final byte CS = 6;
769
static final byte B = 7;
770
private static final byte S = 8;
771
private static final byte WS = 9;
772
private static final byte ON = 10;
773
private static final byte LRE = 11;
774
private static final byte LRO = 12;
775
private static final byte AL = 13;
776
private static final byte RLE = 14;
777
private static final byte RLO = 15;
778
private static final byte PDF = 16;
779
private static final byte NSM = 17;
780
private static final byte BN = 18;
781
782
private static final int MASK_R_AL = (1 << R | 1 << AL);
783
784
private static final char CR = '\r';
785
private static final char LF = '\n';
786
787
static final int LRM_BEFORE = 1;
788
static final int LRM_AFTER = 2;
789
static final int RLM_BEFORE = 4;
790
static final int RLM_AFTER = 8;
791
792
/*
793
* reference to parent paragraph object (reference to self if this object is
794
* a paragraph object); set to null in a newly opened object; set to a
795
* real value after a successful execution of setPara or setLine
796
*/
797
BidiBase paraBidi;
798
799
final UBiDiProps bdp;
800
801
/* character array representing the current text */
802
char[] text;
803
804
/* length of the current text */
805
int originalLength;
806
807
/* if the option OPTION_STREAMING is set, this is the length of
808
* text actually processed by <code>setPara</code>, which may be shorter
809
* than the original length. Otherwise, it is identical to the original
810
* length.
811
*/
812
public int length;
813
814
/* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi
815
* marks are allowed to be inserted in one of the reordering modes, the
816
* length of the result string may be different from the processed length.
817
*/
818
int resultLength;
819
820
/* indicators for whether memory may be allocated after construction */
821
boolean mayAllocateText;
822
boolean mayAllocateRuns;
823
824
/* arrays with one value per text-character */
825
byte[] dirPropsMemory = new byte[1];
826
byte[] levelsMemory = new byte[1];
827
byte[] dirProps;
828
byte[] levels;
829
830
/* must block separators receive level 0? */
831
boolean orderParagraphsLTR;
832
833
/* the paragraph level */
834
byte paraLevel;
835
836
/* original paraLevel when contextual */
837
/* must be one of DEFAULT_xxx or 0 if not contextual */
838
byte defaultParaLevel;
839
840
/* the following is set in setPara, used in processPropertySeq */
841
842
ImpTabPair impTabPair; /* reference to levels state table pair */
843
844
/* the overall paragraph or line directionality*/
845
byte direction;
846
847
/* flags is a bit set for which directional properties are in the text */
848
int flags;
849
850
/* lastArabicPos is index to the last AL in the text, -1 if none */
851
int lastArabicPos;
852
853
/* characters after trailingWSStart are WS and are */
854
/* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */
855
int trailingWSStart;
856
857
/* fields for paragraph handling */
858
int paraCount; /* set in getDirProps() */
859
int[] parasMemory = new int[1];
860
int[] paras; /* limits of paragraphs, filled in
861
ResolveExplicitLevels() or CheckExplicitLevels() */
862
863
/* for single paragraph text, we only need a tiny array of paras (no allocation) */
864
int[] simpleParas = {0};
865
866
/* fields for line reordering */
867
int runCount; /* ==-1: runs not set up yet */
868
BidiRun[] runsMemory = new BidiRun[0];
869
BidiRun[] runs;
870
871
/* for non-mixed text, we only need a tiny array of runs (no allocation) */
872
BidiRun[] simpleRuns = {new BidiRun()};
873
874
/* mapping of runs in logical order to visual order */
875
int[] logicalToVisualRunsMap;
876
877
/* flag to indicate that the map has been updated */
878
boolean isGoodLogicalToVisualRunsMap;
879
880
/* for inverse Bidi with insertion of directional marks */
881
InsertPoints insertPoints = new InsertPoints();
882
883
/* for option OPTION_REMOVE_CONTROLS */
884
int controlCount;
885
886
/*
887
* Sometimes, bit values are more appropriate
888
* to deal with directionality properties.
889
* Abbreviations in these method names refer to names
890
* used in the Bidi algorithm.
891
*/
892
static int DirPropFlag(byte dir) {
893
return (1 << dir);
894
}
895
896
/*
897
* The following bit is ORed to the property of characters in paragraphs
898
* with contextual RTL direction when paraLevel is contextual.
899
*/
900
static final byte CONTEXT_RTL_SHIFT = 6;
901
static final byte CONTEXT_RTL = (byte)(1<<CONTEXT_RTL_SHIFT); // 0x40
902
static byte NoContextRTL(byte dir)
903
{
904
return (byte)(dir & ~CONTEXT_RTL);
905
}
906
907
/*
908
* The following is a variant of DirProp.DirPropFlag() which ignores the
909
* CONTEXT_RTL bit.
910
*/
911
static int DirPropFlagNC(byte dir) {
912
return (1<<(dir & ~CONTEXT_RTL));
913
}
914
915
static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);
916
917
/* to avoid some conditional statements, use tiny constant arrays */
918
static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };
919
static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };
920
static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };
921
922
static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }
923
static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; }
924
static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; }
925
926
/*
927
* are there any characters that are LTR?
928
*/
929
static final int MASK_LTR =
930
DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO);
931
932
/*
933
* are there any characters that are RTL?
934
*/
935
static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO);
936
937
/* explicit embedding codes */
938
private static final int MASK_LRX = DirPropFlag(LRE)|DirPropFlag(LRO);
939
private static final int MASK_RLX = DirPropFlag(RLE)|DirPropFlag(RLO);
940
private static final int MASK_EXPLICIT = MASK_LRX|MASK_RLX|DirPropFlag(PDF);
941
private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;
942
943
/* paragraph and segment separators */
944
private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);
945
946
/* all types that are counted as White Space or Neutral in some steps */
947
static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT;
948
private static final int MASK_N = DirPropFlag(ON)|MASK_WS;
949
950
/* types that are neutrals or could becomes neutrals in (Wn) */
951
private static final int MASK_POSSIBLE_N = DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_N;
952
953
/*
954
* These types may be changed to "e",
955
* the embedding type (L or R) of the run,
956
* in the Bidi algorithm (N2)
957
*/
958
static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;
959
960
/*
961
* the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java
962
*/
963
private static byte GetLRFromLevel(byte level)
964
{
965
return (byte)(level & 1);
966
}
967
968
private static boolean IsDefaultLevel(byte level)
969
{
970
return ((level & INTERNAL_LEVEL_DEFAULT_LTR) == INTERNAL_LEVEL_DEFAULT_LTR);
971
}
972
973
byte GetParaLevelAt(int index)
974
{
975
return (defaultParaLevel != 0) ?
976
(byte)(dirProps[index]>>CONTEXT_RTL_SHIFT) : paraLevel;
977
}
978
979
static boolean IsBidiControlChar(int c)
980
{
981
/* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
982
0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
983
return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e)));
984
}
985
986
public void verifyValidPara()
987
{
988
if (this != this.paraBidi) {
989
throw new IllegalStateException("");
990
}
991
}
992
993
public void verifyValidParaOrLine()
994
{
995
BidiBase para = this.paraBidi;
996
/* verify Para */
997
if (this == para) {
998
return;
999
}
1000
/* verify Line */
1001
if ((para == null) || (para != para.paraBidi)) {
1002
throw new IllegalStateException();
1003
}
1004
}
1005
1006
public void verifyRange(int index, int start, int limit)
1007
{
1008
if (index < start || index >= limit) {
1009
throw new IllegalArgumentException("Value " + index +
1010
" is out of range " + start + " to " + limit);
1011
}
1012
}
1013
1014
public void verifyIndex(int index, int start, int limit)
1015
{
1016
if (index < start || index >= limit) {
1017
throw new ArrayIndexOutOfBoundsException("Index " + index +
1018
" is out of range " + start + " to " + limit);
1019
}
1020
}
1021
1022
/**
1023
* Allocate a <code>Bidi</code> object with preallocated memory
1024
* for internal structures.
1025
* This method provides a <code>Bidi</code> object like the default constructor
1026
* but it also preallocates memory for internal structures
1027
* according to the sizings supplied by the caller.<p>
1028
* The preallocation can be limited to some of the internal memory
1029
* by setting some values to 0 here. That means that if, e.g.,
1030
* <code>maxRunCount</code> cannot be reasonably predetermined and should not
1031
* be set to <code>maxLength</code> (the only failproof value) to avoid
1032
* wasting memory, then <code>maxRunCount</code> could be set to 0 here
1033
* and the internal structures that are associated with it will be allocated
1034
* on demand, just like with the default constructor.
1035
*
1036
* @param maxLength is the maximum text or line length that internal memory
1037
* will be preallocated for. An attempt to associate this object with a
1038
* longer text will fail, unless this value is 0, which leaves the allocation
1039
* up to the implementation.
1040
*
1041
* @param maxRunCount is the maximum anticipated number of same-level runs
1042
* that internal memory will be preallocated for. An attempt to access
1043
* visual runs on an object that was not preallocated for as many runs
1044
* as the text was actually resolved to will fail,
1045
* unless this value is 0, which leaves the allocation up to the implementation.<br><br>
1046
* The number of runs depends on the actual text and maybe anywhere between
1047
* 1 and <code>maxLength</code>. It is typically small.
1048
*
1049
* @throws IllegalArgumentException if maxLength or maxRunCount is less than 0
1050
* @stable ICU 3.8
1051
*/
1052
public BidiBase(int maxLength, int maxRunCount)
1053
{
1054
/* check the argument values */
1055
if (maxLength < 0 || maxRunCount < 0) {
1056
throw new IllegalArgumentException();
1057
}
1058
1059
/* reset the object, all reference variables null, all flags false,
1060
all sizes 0.
1061
In fact, we don't need to do anything, since class members are
1062
initialized as zero when an instance is created.
1063
*/
1064
/*
1065
mayAllocateText = false;
1066
mayAllocateRuns = false;
1067
orderParagraphsLTR = false;
1068
paraCount = 0;
1069
runCount = 0;
1070
trailingWSStart = 0;
1071
flags = 0;
1072
paraLevel = 0;
1073
defaultParaLevel = 0;
1074
direction = 0;
1075
*/
1076
/* get Bidi properties */
1077
try {
1078
bdp = UBiDiProps.getSingleton();
1079
}
1080
catch (IOException e) {
1081
throw new MissingResourceException(e.getMessage(), "(BidiProps)", "");
1082
}
1083
1084
/* allocate memory for arrays as requested */
1085
if (maxLength > 0) {
1086
getInitialDirPropsMemory(maxLength);
1087
getInitialLevelsMemory(maxLength);
1088
} else {
1089
mayAllocateText = true;
1090
}
1091
1092
if (maxRunCount > 0) {
1093
// if maxRunCount == 1, use simpleRuns[]
1094
if (maxRunCount > 1) {
1095
getInitialRunsMemory(maxRunCount);
1096
}
1097
} else {
1098
mayAllocateRuns = true;
1099
}
1100
}
1101
1102
/*
1103
* We are allowed to allocate memory if object==null or
1104
* mayAllocate==true for each array that we need.
1105
*
1106
* Assume sizeNeeded>0.
1107
* If object != null, then assume size > 0.
1108
*/
1109
private Object getMemory(String label, Object array, Class<?> arrayClass,
1110
boolean mayAllocate, int sizeNeeded)
1111
{
1112
int len = Array.getLength(array);
1113
1114
/* we have at least enough memory and must not allocate */
1115
if (sizeNeeded == len) {
1116
return array;
1117
}
1118
if (!mayAllocate) {
1119
/* we must not allocate */
1120
if (sizeNeeded <= len) {
1121
return array;
1122
}
1123
throw new OutOfMemoryError("Failed to allocate memory for "
1124
+ label);
1125
}
1126
/* we may try to grow or shrink */
1127
/* FOOD FOR THOUGHT: when shrinking it should be possible to avoid
1128
the allocation altogether and rely on this.length */
1129
try {
1130
return Array.newInstance(arrayClass, sizeNeeded);
1131
} catch (Exception e) {
1132
throw new OutOfMemoryError("Failed to allocate memory for "
1133
+ label);
1134
}
1135
}
1136
1137
/* helper methods for each allocated array */
1138
private void getDirPropsMemory(boolean mayAllocate, int len)
1139
{
1140
Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len);
1141
dirPropsMemory = (byte[]) array;
1142
}
1143
1144
void getDirPropsMemory(int len)
1145
{
1146
getDirPropsMemory(mayAllocateText, len);
1147
}
1148
1149
private void getLevelsMemory(boolean mayAllocate, int len)
1150
{
1151
Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len);
1152
levelsMemory = (byte[]) array;
1153
}
1154
1155
void getLevelsMemory(int len)
1156
{
1157
getLevelsMemory(mayAllocateText, len);
1158
}
1159
1160
private void getRunsMemory(boolean mayAllocate, int len)
1161
{
1162
Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len);
1163
runsMemory = (BidiRun[]) array;
1164
}
1165
1166
void getRunsMemory(int len)
1167
{
1168
getRunsMemory(mayAllocateRuns, len);
1169
}
1170
1171
/* additional methods used by constructor - always allow allocation */
1172
private void getInitialDirPropsMemory(int len)
1173
{
1174
getDirPropsMemory(true, len);
1175
}
1176
1177
private void getInitialLevelsMemory(int len)
1178
{
1179
getLevelsMemory(true, len);
1180
}
1181
1182
private void getInitialParasMemory(int len)
1183
{
1184
Object array = getMemory("Paras", parasMemory, Integer.TYPE, true, len);
1185
parasMemory = (int[]) array;
1186
}
1187
1188
private void getInitialRunsMemory(int len)
1189
{
1190
getRunsMemory(true, len);
1191
}
1192
1193
/* perform (P2)..(P3) ------------------------------------------------------- */
1194
1195
private void getDirProps()
1196
{
1197
int i = 0, i0, i1;
1198
flags = 0; /* collect all directionalities in the text */
1199
int uchar;
1200
byte dirProp;
1201
byte paraDirDefault = 0; /* initialize to avoid compiler warnings */
1202
boolean isDefaultLevel = IsDefaultLevel(paraLevel);
1203
/* for inverse Bidi, the default para level is set to RTL if there is a
1204
strong R or AL character at either end of the text */
1205
lastArabicPos = -1;
1206
controlCount = 0;
1207
1208
final int NOT_CONTEXTUAL = 0; /* 0: not contextual paraLevel */
1209
final int LOOKING_FOR_STRONG = 1; /* 1: looking for first strong char */
1210
final int FOUND_STRONG_CHAR = 2; /* 2: found first strong char */
1211
1212
int state;
1213
int paraStart = 0; /* index of first char in paragraph */
1214
byte paraDir; /* == CONTEXT_RTL within paragraphs
1215
starting with strong R char */
1216
byte lastStrongDir=0; /* for default level & inverse Bidi */
1217
int lastStrongLTR=0; /* for STREAMING option */
1218
1219
if (isDefaultLevel) {
1220
paraDirDefault = ((paraLevel & 1) != 0) ? CONTEXT_RTL : 0;
1221
paraDir = paraDirDefault;
1222
lastStrongDir = paraDirDefault;
1223
state = LOOKING_FOR_STRONG;
1224
} else {
1225
state = NOT_CONTEXTUAL;
1226
paraDir = 0;
1227
}
1228
/* count paragraphs and determine the paragraph level (P2..P3) */
1229
/*
1230
* see comment on constant fields:
1231
* the LEVEL_DEFAULT_XXX values are designed so that
1232
* their low-order bit alone yields the intended default
1233
*/
1234
1235
for (i = 0; i < originalLength; /* i is incremented in the loop */) {
1236
i0 = i; /* index of first code unit */
1237
uchar = UTF16.charAt(text, 0, originalLength, i);
1238
i += Character.charCount(uchar);
1239
i1 = i - 1; /* index of last code unit, gets the directional property */
1240
1241
dirProp = (byte)bdp.getClass(uchar);
1242
1243
flags |= DirPropFlag(dirProp);
1244
dirProps[i1] = (byte)(dirProp | paraDir);
1245
if (i1 > i0) { /* set previous code units' properties to BN */
1246
flags |= DirPropFlag(BN);
1247
do {
1248
dirProps[--i1] = (byte)(BN | paraDir);
1249
} while (i1 > i0);
1250
}
1251
if (state == LOOKING_FOR_STRONG) {
1252
if (dirProp == L) {
1253
state = FOUND_STRONG_CHAR;
1254
if (paraDir != 0) {
1255
paraDir = 0;
1256
for (i1 = paraStart; i1 < i; i1++) {
1257
dirProps[i1] &= ~CONTEXT_RTL;
1258
}
1259
}
1260
continue;
1261
}
1262
if (dirProp == R || dirProp == AL) {
1263
state = FOUND_STRONG_CHAR;
1264
if (paraDir == 0) {
1265
paraDir = CONTEXT_RTL;
1266
for (i1 = paraStart; i1 < i; i1++) {
1267
dirProps[i1] |= CONTEXT_RTL;
1268
}
1269
}
1270
continue;
1271
}
1272
}
1273
if (dirProp == L) {
1274
lastStrongDir = 0;
1275
lastStrongLTR = i; /* i is index to next character */
1276
}
1277
else if (dirProp == R) {
1278
lastStrongDir = CONTEXT_RTL;
1279
}
1280
else if (dirProp == AL) {
1281
lastStrongDir = CONTEXT_RTL;
1282
lastArabicPos = i-1;
1283
}
1284
else if (dirProp == B) {
1285
if (i < originalLength) { /* B not last char in text */
1286
if (!((uchar == (int)CR) && (text[i] == (int)LF))) {
1287
paraCount++;
1288
}
1289
if (isDefaultLevel) {
1290
state=LOOKING_FOR_STRONG;
1291
paraStart = i; /* i is index to next character */
1292
paraDir = paraDirDefault;
1293
lastStrongDir = paraDirDefault;
1294
}
1295
}
1296
}
1297
}
1298
if (isDefaultLevel) {
1299
paraLevel = GetParaLevelAt(0);
1300
}
1301
1302
/* The following line does nothing new for contextual paraLevel, but is
1303
needed for absolute paraLevel. */
1304
flags |= DirPropFlagLR(paraLevel);
1305
1306
if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
1307
flags |= DirPropFlag(L);
1308
}
1309
}
1310
1311
/* perform (X1)..(X9) ------------------------------------------------------- */
1312
1313
/* determine if the text is mixed-directional or single-directional */
1314
private byte directionFromFlags() {
1315
/* if the text contains AN and neutrals, then some neutrals may become RTL */
1316
if (!((flags & MASK_RTL) != 0 ||
1317
((flags & DirPropFlag(AN)) != 0 &&
1318
(flags & MASK_POSSIBLE_N) != 0))) {
1319
return Bidi.DIRECTION_LEFT_TO_RIGHT;
1320
} else if ((flags & MASK_LTR) == 0) {
1321
return Bidi.DIRECTION_RIGHT_TO_LEFT;
1322
} else {
1323
return MIXED;
1324
}
1325
}
1326
1327
/*
1328
* Resolve the explicit levels as specified by explicit embedding codes.
1329
* Recalculate the flags to have them reflect the real properties
1330
* after taking the explicit embeddings into account.
1331
*
1332
* The Bidi algorithm is designed to result in the same behavior whether embedding
1333
* levels are externally specified (from "styled text", supposedly the preferred
1334
* method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
1335
* That is why (X9) instructs to remove all explicit codes (and BN).
1336
* However, in a real implementation, this removal of these codes and their index
1337
* positions in the plain text is undesirable since it would result in
1338
* reallocated, reindexed text.
1339
* Instead, this implementation leaves the codes in there and just ignores them
1340
* in the subsequent processing.
1341
* In order to get the same reordering behavior, positions with a BN or an
1342
* explicit embedding code just get the same level assigned as the last "real"
1343
* character.
1344
*
1345
* Some implementations, not this one, then overwrite some of these
1346
* directionality properties at "real" same-level-run boundaries by
1347
* L or R codes so that the resolution of weak types can be performed on the
1348
* entire paragraph at once instead of having to parse it once more and
1349
* perform that resolution on same-level-runs.
1350
* This limits the scope of the implicit rules in effectively
1351
* the same way as the run limits.
1352
*
1353
* Instead, this implementation does not modify these codes.
1354
* On one hand, the paragraph has to be scanned for same-level-runs, but
1355
* on the other hand, this saves another loop to reset these codes,
1356
* or saves making and modifying a copy of dirProps[].
1357
*
1358
*
1359
* Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.
1360
*
1361
*
1362
* Handling the stack of explicit levels (Xn):
1363
*
1364
* With the Bidi stack of explicit levels,
1365
* as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
1366
* the explicit level must never exceed MAX_EXPLICIT_LEVEL==61.
1367
*
1368
* In order to have a correct push-pop semantics even in the case of overflows,
1369
* there are two overflow counters:
1370
* - countOver60 is incremented with each LRx at level 60
1371
* - from level 60, one RLx increases the level to 61
1372
* - countOver61 is incremented with each LRx and RLx at level 61
1373
*
1374
* Popping levels with PDF must work in the opposite order so that level 61
1375
* is correct at the correct point. Underflows (too many PDFs) must be checked.
1376
*
1377
* This implementation assumes that MAX_EXPLICIT_LEVEL is odd.
1378
*/
1379
private byte resolveExplicitLevels() {
1380
int i = 0;
1381
byte dirProp;
1382
byte level = GetParaLevelAt(0);
1383
1384
byte dirct;
1385
int paraIndex = 0;
1386
1387
/* determine if the text is mixed-directional or single-directional */
1388
dirct = directionFromFlags();
1389
1390
/* we may not need to resolve any explicit levels, but for multiple
1391
paragraphs we want to loop on all chars to set the para boundaries */
1392
if ((dirct != MIXED) && (paraCount == 1)) {
1393
/* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
1394
} else if ((paraCount == 1) &&
1395
((flags & MASK_EXPLICIT) == 0)) {
1396
/* mixed, but all characters are at the same embedding level */
1397
/* or we are in "inverse Bidi" */
1398
/* and we don't have contextual multiple paragraphs with some B char */
1399
/* set all levels to the paragraph level */
1400
for (i = 0; i < length; ++i) {
1401
levels[i] = level;
1402
}
1403
} else {
1404
/* continue to perform (Xn) */
1405
1406
/* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
1407
/* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */
1408
byte embeddingLevel = level;
1409
byte newLevel;
1410
byte stackTop = 0;
1411
1412
byte[] stack = new byte[MAX_EXPLICIT_LEVEL]; /* we never push anything >=MAX_EXPLICIT_LEVEL */
1413
int countOver60 = 0;
1414
int countOver61 = 0; /* count overflows of explicit levels */
1415
1416
/* recalculate the flags */
1417
flags = 0;
1418
1419
for (i = 0; i < length; ++i) {
1420
dirProp = NoContextRTL(dirProps[i]);
1421
switch(dirProp) {
1422
case LRE:
1423
case LRO:
1424
/* (X3, X5) */
1425
newLevel = (byte)((embeddingLevel+2) & ~(INTERNAL_LEVEL_OVERRIDE | 1)); /* least greater even level */
1426
if (newLevel <= MAX_EXPLICIT_LEVEL) {
1427
stack[stackTop] = embeddingLevel;
1428
++stackTop;
1429
embeddingLevel = newLevel;
1430
if (dirProp == LRO) {
1431
embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;
1432
}
1433
/* we don't need to set LEVEL_OVERRIDE off for LRE
1434
since this has already been done for newLevel which is
1435
the source for embeddingLevel.
1436
*/
1437
} else if ((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL) {
1438
++countOver61;
1439
} else /* (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL-1 */ {
1440
++countOver60;
1441
}
1442
flags |= DirPropFlag(BN);
1443
break;
1444
case RLE:
1445
case RLO:
1446
/* (X2, X4) */
1447
newLevel=(byte)(((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) + 1) | 1); /* least greater odd level */
1448
if (newLevel<=MAX_EXPLICIT_LEVEL) {
1449
stack[stackTop] = embeddingLevel;
1450
++stackTop;
1451
embeddingLevel = newLevel;
1452
if (dirProp == RLO) {
1453
embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;
1454
}
1455
/* we don't need to set LEVEL_OVERRIDE off for RLE
1456
since this has already been done for newLevel which is
1457
the source for embeddingLevel.
1458
*/
1459
} else {
1460
++countOver61;
1461
}
1462
flags |= DirPropFlag(BN);
1463
break;
1464
case PDF:
1465
/* (X7) */
1466
/* handle all the overflow cases first */
1467
if (countOver61 > 0) {
1468
--countOver61;
1469
} else if (countOver60 > 0 && (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) != MAX_EXPLICIT_LEVEL) {
1470
/* handle LRx overflows from level 60 */
1471
--countOver60;
1472
} else if (stackTop > 0) {
1473
/* this is the pop operation; it also pops level 61 while countOver60>0 */
1474
--stackTop;
1475
embeddingLevel = stack[stackTop];
1476
/* } else { (underflow) */
1477
}
1478
flags |= DirPropFlag(BN);
1479
break;
1480
case B:
1481
stackTop = 0;
1482
countOver60 = 0;
1483
countOver61 = 0;
1484
level = GetParaLevelAt(i);
1485
if ((i + 1) < length) {
1486
embeddingLevel = GetParaLevelAt(i+1);
1487
if (!((text[i] == CR) && (text[i + 1] == LF))) {
1488
paras[paraIndex++] = i+1;
1489
}
1490
}
1491
flags |= DirPropFlag(B);
1492
break;
1493
case BN:
1494
/* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
1495
/* they will get their levels set correctly in adjustWSLevels() */
1496
flags |= DirPropFlag(BN);
1497
break;
1498
default:
1499
/* all other types get the "real" level */
1500
if (level != embeddingLevel) {
1501
level = embeddingLevel;
1502
if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {
1503
flags |= DirPropFlagO(level) | DirPropFlagMultiRuns;
1504
} else {
1505
flags |= DirPropFlagE(level) | DirPropFlagMultiRuns;
1506
}
1507
}
1508
if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {
1509
flags |= DirPropFlag(dirProp);
1510
}
1511
break;
1512
}
1513
1514
/*
1515
* We need to set reasonable levels even on BN codes and
1516
* explicit codes because we will later look at same-level runs (X10).
1517
*/
1518
levels[i] = level;
1519
}
1520
if ((flags & MASK_EMBEDDING) != 0) {
1521
flags |= DirPropFlagLR(paraLevel);
1522
}
1523
if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {
1524
flags |= DirPropFlag(L);
1525
}
1526
1527
/* subsequently, ignore the explicit codes and BN (X9) */
1528
1529
/* again, determine if the text is mixed-directional or single-directional */
1530
dirct = directionFromFlags();
1531
}
1532
1533
return dirct;
1534
}
1535
1536
/*
1537
* Use a pre-specified embedding levels array:
1538
*
1539
* Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
1540
* ignore all explicit codes (X9),
1541
* and check all the preset levels.
1542
*
1543
* Recalculate the flags to have them reflect the real properties
1544
* after taking the explicit embeddings into account.
1545
*/
1546
private byte checkExplicitLevels() {
1547
byte dirProp;
1548
int i;
1549
this.flags = 0; /* collect all directionalities in the text */
1550
byte level;
1551
int paraIndex = 0;
1552
1553
for (i = 0; i < length; ++i) {
1554
if (levels[i] == 0) {
1555
levels[i] = paraLevel;
1556
}
1557
if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) {
1558
if ((levels[i] & INTERNAL_LEVEL_OVERRIDE) != 0) {
1559
levels[i] = (byte)(paraLevel|INTERNAL_LEVEL_OVERRIDE);
1560
} else {
1561
levels[i] = paraLevel;
1562
}
1563
}
1564
level = levels[i];
1565
dirProp = NoContextRTL(dirProps[i]);
1566
if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {
1567
/* keep the override flag in levels[i] but adjust the flags */
1568
level &= ~INTERNAL_LEVEL_OVERRIDE; /* make the range check below simpler */
1569
flags |= DirPropFlagO(level);
1570
} else {
1571
/* set the flags */
1572
flags |= DirPropFlagE(level) | DirPropFlag(dirProp);
1573
}
1574
1575
if ((level < GetParaLevelAt(i) &&
1576
!((0 == level) && (dirProp == B))) ||
1577
(MAX_EXPLICIT_LEVEL <level)) {
1578
/* level out of bounds */
1579
throw new IllegalArgumentException("level " + level +
1580
" out of bounds at index " + i);
1581
}
1582
if ((dirProp == B) && ((i + 1) < length)) {
1583
if (!((text[i] == CR) && (text[i + 1] == LF))) {
1584
paras[paraIndex++] = i + 1;
1585
}
1586
}
1587
}
1588
if ((flags&MASK_EMBEDDING) != 0) {
1589
flags |= DirPropFlagLR(paraLevel);
1590
}
1591
1592
/* determine if the text is mixed-directional or single-directional */
1593
return directionFromFlags();
1594
}
1595
1596
/*********************************************************************/
1597
/* The Properties state machine table */
1598
/*********************************************************************/
1599
/* */
1600
/* All table cells are 8 bits: */
1601
/* bits 0..4: next state */
1602
/* bits 5..7: action to perform (if > 0) */
1603
/* */
1604
/* Cells may be of format "n" where n represents the next state */
1605
/* (except for the rightmost column). */
1606
/* Cells may also be of format "_(x,y)" where x represents an action */
1607
/* to perform and y represents the next state. */
1608
/* */
1609
/*********************************************************************/
1610
/* Definitions and type for properties state tables */
1611
/*********************************************************************/
1612
private static final int IMPTABPROPS_COLUMNS = 14;
1613
private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;
1614
private static short GetStateProps(short cell) {
1615
return (short)(cell & 0x1f);
1616
}
1617
private static short GetActionProps(short cell) {
1618
return (short)(cell >> 5);
1619
}
1620
1621
private static final short groupProp[] = /* dirProp regrouped */
1622
{
1623
/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */
1624
0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10
1625
};
1626
private static final short _L = 0;
1627
private static final short _R = 1;
1628
private static final short _EN = 2;
1629
private static final short _AN = 3;
1630
private static final short _ON = 4;
1631
private static final short _S = 5;
1632
private static final short _B = 6; /* reduced dirProp */
1633
1634
/*********************************************************************/
1635
/* */
1636
/* PROPERTIES STATE TABLE */
1637
/* */
1638
/* In table impTabProps, */
1639
/* - the ON column regroups ON and WS */
1640
/* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */
1641
/* - the Res column is the reduced property assigned to a run */
1642
/* */
1643
/* Action 1: process current run1, init new run1 */
1644
/* 2: init new run2 */
1645
/* 3: process run1, process run2, init new run1 */
1646
/* 4: process run1, set run1=run2, init new run2 */
1647
/* */
1648
/* Notes: */
1649
/* 1) This table is used in resolveImplicitLevels(). */
1650
/* 2) This table triggers actions when there is a change in the Bidi*/
1651
/* property of incoming characters (action 1). */
1652
/* 3) Most such property sequences are processed immediately (in */
1653
/* fact, passed to processPropertySeq(). */
1654
/* 4) However, numbers are assembled as one sequence. This means */
1655
/* that undefined situations (like CS following digits, until */
1656
/* it is known if the next char will be a digit) are held until */
1657
/* following chars define them. */
1658
/* Example: digits followed by CS, then comes another CS or ON; */
1659
/* the digits will be processed, then the CS assigned */
1660
/* as the start of an ON sequence (action 3). */
1661
/* 5) There are cases where more than one sequence must be */
1662
/* processed, for instance digits followed by CS followed by L: */
1663
/* the digits must be processed as one sequence, and the CS */
1664
/* must be processed as an ON sequence, all this before starting */
1665
/* assembling chars for the opening L sequence. */
1666
/* */
1667
/* */
1668
private static final short impTabProps[][] =
1669
{
1670
/* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, Res */
1671
/* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, _ON },
1672
/* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, _L },
1673
/* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, _R },
1674
/* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, _R },
1675
/* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, _EN },
1676
/* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, _AN },
1677
/* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, _AN },
1678
/* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, _ON },
1679
/* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, _ON },
1680
/* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, _ON },
1681
/*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, _EN },
1682
/*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, _EN },
1683
/*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, _AN },
1684
/*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, _AN },
1685
/*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3, _ON },
1686
/*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, _S },
1687
/*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, _S },
1688
/*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, _B }
1689
};
1690
1691
/*********************************************************************/
1692
/* The levels state machine tables */
1693
/*********************************************************************/
1694
/* */
1695
/* All table cells are 8 bits: */
1696
/* bits 0..3: next state */
1697
/* bits 4..7: action to perform (if > 0) */
1698
/* */
1699
/* Cells may be of format "n" where n represents the next state */
1700
/* (except for the rightmost column). */
1701
/* Cells may also be of format "_(x,y)" where x represents an action */
1702
/* to perform and y represents the next state. */
1703
/* */
1704
/* This format limits each table to 16 states each and to 15 actions.*/
1705
/* */
1706
/*********************************************************************/
1707
/* Definitions and type for levels state tables */
1708
/*********************************************************************/
1709
private static final int IMPTABLEVELS_COLUMNS = _B + 2;
1710
private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1;
1711
private static short GetState(byte cell) { return (short)(cell & 0x0f); }
1712
private static short GetAction(byte cell) { return (short)(cell >> 4); }
1713
1714
private static class ImpTabPair {
1715
byte[][][] imptab;
1716
short[][] impact;
1717
1718
ImpTabPair(byte[][] table1, byte[][] table2,
1719
short[] act1, short[] act2) {
1720
imptab = new byte[][][] {table1, table2};
1721
impact = new short[][] {act1, act2};
1722
}
1723
}
1724
1725
/*********************************************************************/
1726
/* */
1727
/* LEVELS STATE TABLES */
1728
/* */
1729
/* In all levels state tables, */
1730
/* - state 0 is the initial state */
1731
/* - the Res column is the increment to add to the text level */
1732
/* for this property sequence. */
1733
/* */
1734
/* The impact arrays for each table of a pair map the local action */
1735
/* numbers of the table to the total list of actions. For instance, */
1736
/* action 2 in a given table corresponds to the action number which */
1737
/* appears in entry [2] of the impact array for that table. */
1738
/* The first entry of all impact arrays must be 0. */
1739
/* */
1740
/* Action 1: init conditional sequence */
1741
/* 2: prepend conditional sequence to current sequence */
1742
/* 3: set ON sequence to new level - 1 */
1743
/* 4: init EN/AN/ON sequence */
1744
/* 5: fix EN/AN/ON sequence followed by R */
1745
/* 6: set previous level sequence to level 2 */
1746
/* */
1747
/* Notes: */
1748
/* 1) These tables are used in processPropertySeq(). The input */
1749
/* is property sequences as determined by resolveImplicitLevels. */
1750
/* 2) Most such property sequences are processed immediately */
1751
/* (levels are assigned). */
1752
/* 3) However, some sequences cannot be assigned a final level till */
1753
/* one or more following sequences are received. For instance, */
1754
/* ON following an R sequence within an even-level paragraph. */
1755
/* If the following sequence is R, the ON sequence will be */
1756
/* assigned basic run level+1, and so will the R sequence. */
1757
/* 4) S is generally handled like ON, since its level will be fixed */
1758
/* to paragraph level in adjustWSLevels(). */
1759
/* */
1760
1761
private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */
1762
/* In this table, conditional sequences receive the higher possible level
1763
until proven otherwise.
1764
*/
1765
{
1766
/* L, R, EN, AN, ON, S, B, Res */
1767
/* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 },
1768
/* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 },
1769
/* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 },
1770
/* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 },
1771
/* 4 : R+ON */ { 0x20, 1, 3, 3, 4, 4, 0x20, 1 },
1772
/* 5 : AN+ON */ { 0x20, 1, 0x20, 2, 5, 5, 0x20, 1 }
1773
};
1774
1775
private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */
1776
/* In this table, conditional sequences receive the lower possible level
1777
until proven otherwise.
1778
*/
1779
{
1780
/* L, R, EN, AN, ON, S, B, Res */
1781
/* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },
1782
/* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 },
1783
/* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },
1784
/* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 },
1785
/* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 },
1786
/* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 }
1787
};
1788
1789
private static final short[] impAct0 = {0,1,2,3,4,5,6};
1790
1791
private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(
1792
impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);
1793
1794
private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */
1795
/* In this table, conditional sequences receive the higher possible
1796
level until proven otherwise.
1797
*/
1798
/* L, R, EN, AN, ON, S, B, Res */
1799
/* 0 : init */ { 0, 2, 1, 1, 0, 0, 0, 0 },
1800
/* 1 : L+EN/AN */ { 0, 2, 1, 1, 0, 0, 0, 2 },
1801
/* 2 : R */ { 0, 2, 4, 4, 0x13, 0, 0, 1 },
1802
/* 3 : R+ON */ { 0x20, 2, 4, 4, 3, 3, 0x20, 1 },
1803
/* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 }
1804
};
1805
private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(
1806
impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);
1807
1808
private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {
1809
/* In this table, EN/AN+ON sequences receive levels as if associated with R
1810
until proven that there is L or sor/eor on both sides. AN is handled like EN.
1811
*/
1812
/* L, R, EN, AN, ON, S, B, Res */
1813
/* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 },
1814
/* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 },
1815
/* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 },
1816
/* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 },
1817
/* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 },
1818
/* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 }
1819
};
1820
private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {
1821
/* In this table, EN/AN+ON sequences receive levels as if associated with R
1822
until proven that there is L on both sides. AN is handled like EN.
1823
*/
1824
/* L, R, EN, AN, ON, S, B, Res */
1825
/* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 },
1826
/* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 },
1827
/* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 },
1828
/* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 },
1829
/* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 }
1830
};
1831
private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new
1832
ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,
1833
impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);
1834
1835
private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {
1836
/* This table is identical to the Default LTR table except that EN and AN
1837
are handled like L.
1838
*/
1839
/* L, R, EN, AN, ON, S, B, Res */
1840
/* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 },
1841
/* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 },
1842
/* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 },
1843
/* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 },
1844
/* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 },
1845
/* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 }
1846
};
1847
private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {
1848
/* This table is identical to the Default RTL table except that EN and AN
1849
are handled like L.
1850
*/
1851
/* L, R, EN, AN, ON, S, B, Res */
1852
/* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 },
1853
/* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 },
1854
/* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 },
1855
/* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 },
1856
/* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 },
1857
/* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 }
1858
};
1859
private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair
1860
(impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,
1861
impAct0, impAct0);
1862
1863
private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = { /* Odd paragraph level */
1864
/* In this table, conditional sequences receive the lower possible level
1865
until proven otherwise.
1866
*/
1867
/* L, R, EN, AN, ON, S, B, Res */
1868
/* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },
1869
/* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 },
1870
/* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },
1871
/* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 },
1872
/* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 },
1873
/* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 },
1874
/* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 }
1875
};
1876
private static final short[] impAct1 = {0,1,11,12};
1877
private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(
1878
impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
1879
1880
private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
1881
/* The case handled in this table is (visually): R EN L
1882
*/
1883
/* L, R, EN, AN, ON, S, B, Res */
1884
/* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 },
1885
/* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 },
1886
/* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 },
1887
/* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 },
1888
/* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 },
1889
/* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 },
1890
/* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 }
1891
};
1892
private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {
1893
/* The cases handled in this table are (visually): R EN L
1894
R L AN L
1895
*/
1896
/* L, R, EN, AN, ON, S, B, Res */
1897
/* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 },
1898
/* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 },
1899
/* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 },
1900
/* 3 : L */ { 3 , 0, 3, 0x36, 0x14, 0x40, 0, 1 },
1901
/* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 },
1902
/* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 },
1903
/* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 }
1904
};
1905
private static final short impAct2[] = {0,1,7,8,9,10};
1906
private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =
1907
new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,
1908
impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);
1909
1910
private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(
1911
impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);
1912
1913
private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {
1914
/* The case handled in this table is (visually): R EN L
1915
*/
1916
/* L, R, EN, AN, ON, S, B, Res */
1917
/* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 },
1918
/* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 },
1919
/* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 },
1920
/* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 },
1921
/* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 }
1922
};
1923
private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new
1924
ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,
1925
impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);
1926
1927
private class LevState {
1928
byte[][] impTab; /* level table pointer */
1929
short[] impAct; /* action map array */
1930
int startON; /* start of ON sequence */
1931
int startL2EN; /* start of level 2 sequence */
1932
int lastStrongRTL; /* index of last found R or AL */
1933
short state; /* current state */
1934
byte runLevel; /* run level before implicit solving */
1935
}
1936
1937
/*------------------------------------------------------------------------*/
1938
1939
static final int FIRSTALLOC = 10;
1940
/*
1941
* param pos: position where to insert
1942
* param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER
1943
*/
1944
private void addPoint(int pos, int flag)
1945
{
1946
Point point = new Point();
1947
1948
int len = insertPoints.points.length;
1949
if (len == 0) {
1950
insertPoints.points = new Point[FIRSTALLOC];
1951
len = FIRSTALLOC;
1952
}
1953
if (insertPoints.size >= len) { /* no room for new point */
1954
Point[] savePoints = insertPoints.points;
1955
insertPoints.points = new Point[len * 2];
1956
System.arraycopy(savePoints, 0, insertPoints.points, 0, len);
1957
}
1958
point.pos = pos;
1959
point.flag = flag;
1960
insertPoints.points[insertPoints.size] = point;
1961
insertPoints.size++;
1962
}
1963
1964
/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
1965
1966
/*
1967
* This implementation of the (Wn) rules applies all rules in one pass.
1968
* In order to do so, it needs a look-ahead of typically 1 character
1969
* (except for W5: sequences of ET) and keeps track of changes
1970
* in a rule Wp that affect a later Wq (p<q).
1971
*
1972
* The (Nn) and (In) rules are also performed in that same single loop,
1973
* but effectively one iteration behind for white space.
1974
*
1975
* Since all implicit rules are performed in one step, it is not necessary
1976
* to actually store the intermediate directional properties in dirProps[].
1977
*/
1978
1979
private void processPropertySeq(LevState levState, short _prop,
1980
int start, int limit) {
1981
byte cell;
1982
byte[][] impTab = levState.impTab;
1983
short[] impAct = levState.impAct;
1984
short oldStateSeq,actionSeq;
1985
byte level, addLevel;
1986
int start0, k;
1987
1988
start0 = start; /* save original start position */
1989
oldStateSeq = levState.state;
1990
cell = impTab[oldStateSeq][_prop];
1991
levState.state = GetState(cell); /* isolate the new state */
1992
actionSeq = impAct[GetAction(cell)]; /* isolate the action */
1993
addLevel = impTab[levState.state][IMPTABLEVELS_RES];
1994
1995
if (actionSeq != 0) {
1996
switch (actionSeq) {
1997
case 1: /* init ON seq */
1998
levState.startON = start0;
1999
break;
2000
2001
case 2: /* prepend ON seq to current seq */
2002
start = levState.startON;
2003
break;
2004
2005
case 3: /* L or S after possible relevant EN/AN */
2006
/* check if we had EN after R/AL */
2007
if (levState.startL2EN >= 0) {
2008
addPoint(levState.startL2EN, LRM_BEFORE);
2009
}
2010
levState.startL2EN = -1; /* not within previous if since could also be -2 */
2011
/* check if we had any relevant EN/AN after R/AL */
2012
if ((insertPoints.points.length == 0) ||
2013
(insertPoints.size <= insertPoints.confirmed)) {
2014
/* nothing, just clean up */
2015
levState.lastStrongRTL = -1;
2016
/* check if we have a pending conditional segment */
2017
level = impTab[oldStateSeq][IMPTABLEVELS_RES];
2018
if ((level & 1) != 0 && levState.startON > 0) { /* after ON */
2019
start = levState.startON; /* reset to basic run level */
2020
}
2021
if (_prop == _S) { /* add LRM before S */
2022
addPoint(start0, LRM_BEFORE);
2023
insertPoints.confirmed = insertPoints.size;
2024
}
2025
break;
2026
}
2027
/* reset previous RTL cont to level for LTR text */
2028
for (k = levState.lastStrongRTL + 1; k < start0; k++) {
2029
/* reset odd level, leave runLevel+2 as is */
2030
levels[k] = (byte)((levels[k] - 2) & ~1);
2031
}
2032
/* mark insert points as confirmed */
2033
insertPoints.confirmed = insertPoints.size;
2034
levState.lastStrongRTL = -1;
2035
if (_prop == _S) { /* add LRM before S */
2036
addPoint(start0, LRM_BEFORE);
2037
insertPoints.confirmed = insertPoints.size;
2038
}
2039
break;
2040
2041
case 4: /* R/AL after possible relevant EN/AN */
2042
/* just clean up */
2043
if (insertPoints.points.length > 0)
2044
/* remove all non confirmed insert points */
2045
insertPoints.size = insertPoints.confirmed;
2046
levState.startON = -1;
2047
levState.startL2EN = -1;
2048
levState.lastStrongRTL = limit - 1;
2049
break;
2050
2051
case 5: /* EN/AN after R/AL + possible cont */
2052
/* check for real AN */
2053
if ((_prop == _AN) && (NoContextRTL(dirProps[start0]) == AN)) {
2054
/* real AN */
2055
if (levState.startL2EN == -1) { /* if no relevant EN already found */
2056
/* just note the righmost digit as a strong RTL */
2057
levState.lastStrongRTL = limit - 1;
2058
break;
2059
}
2060
if (levState.startL2EN >= 0) { /* after EN, no AN */
2061
addPoint(levState.startL2EN, LRM_BEFORE);
2062
levState.startL2EN = -2;
2063
}
2064
/* note AN */
2065
addPoint(start0, LRM_BEFORE);
2066
break;
2067
}
2068
/* if first EN/AN after R/AL */
2069
if (levState.startL2EN == -1) {
2070
levState.startL2EN = start0;
2071
}
2072
break;
2073
2074
case 6: /* note location of latest R/AL */
2075
levState.lastStrongRTL = limit - 1;
2076
levState.startON = -1;
2077
break;
2078
2079
case 7: /* L after R+ON/EN/AN */
2080
/* include possible adjacent number on the left */
2081
for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {
2082
}
2083
if (k >= 0) {
2084
addPoint(k, RLM_BEFORE); /* add RLM before */
2085
insertPoints.confirmed = insertPoints.size; /* confirm it */
2086
}
2087
levState.startON = start0;
2088
break;
2089
2090
case 8: /* AN after L */
2091
/* AN numbers between L text on both sides may be trouble. */
2092
/* tentatively bracket with LRMs; will be confirmed if followed by L */
2093
addPoint(start0, LRM_BEFORE); /* add LRM before */
2094
addPoint(start0, LRM_AFTER); /* add LRM after */
2095
break;
2096
2097
case 9: /* R after L+ON/EN/AN */
2098
/* false alert, infirm LRMs around previous AN */
2099
insertPoints.size=insertPoints.confirmed;
2100
if (_prop == _S) { /* add RLM before S */
2101
addPoint(start0, RLM_BEFORE);
2102
insertPoints.confirmed = insertPoints.size;
2103
}
2104
break;
2105
2106
case 10: /* L after L+ON/AN */
2107
level = (byte)(levState.runLevel + addLevel);
2108
for (k=levState.startON; k < start0; k++) {
2109
if (levels[k] < level) {
2110
levels[k] = level;
2111
}
2112
}
2113
insertPoints.confirmed = insertPoints.size; /* confirm inserts */
2114
levState.startON = start0;
2115
break;
2116
2117
case 11: /* L after L+ON+EN/AN/ON */
2118
level = levState.runLevel;
2119
for (k = start0-1; k >= levState.startON; k--) {
2120
if (levels[k] == level+3) {
2121
while (levels[k] == level+3) {
2122
levels[k--] -= 2;
2123
}
2124
while (levels[k] == level) {
2125
k--;
2126
}
2127
}
2128
if (levels[k] == level+2) {
2129
levels[k] = level;
2130
continue;
2131
}
2132
levels[k] = (byte)(level+1);
2133
}
2134
break;
2135
2136
case 12: /* R after L+ON+EN/AN/ON */
2137
level = (byte)(levState.runLevel+1);
2138
for (k = start0-1; k >= levState.startON; k--) {
2139
if (levels[k] > level) {
2140
levels[k] -= 2;
2141
}
2142
}
2143
break;
2144
2145
default: /* we should never get here */
2146
throw new IllegalStateException("Internal ICU error in processPropertySeq");
2147
}
2148
}
2149
if ((addLevel) != 0 || (start < start0)) {
2150
level = (byte)(levState.runLevel + addLevel);
2151
for (k = start; k < limit; k++) {
2152
levels[k] = level;
2153
}
2154
}
2155
}
2156
2157
private void resolveImplicitLevels(int start, int limit, short sor, short eor)
2158
{
2159
LevState levState = new LevState();
2160
int i, start1, start2;
2161
short oldStateImp, stateImp, actionImp;
2162
short gprop, resProp, cell;
2163
short nextStrongProp = R;
2164
int nextStrongPos = -1;
2165
2166
2167
/* check for RTL inverse Bidi mode */
2168
/* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to
2169
* loop on the text characters from end to start.
2170
* This would need a different properties state table (at least different
2171
* actions) and different levels state tables (maybe very similar to the
2172
* LTR corresponding ones.
2173
*/
2174
/* initialize for levels state table */
2175
levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2176
levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */
2177
levState.state = 0;
2178
levState.runLevel = levels[start];
2179
levState.impTab = impTabPair.imptab[levState.runLevel & 1];
2180
levState.impAct = impTabPair.impact[levState.runLevel & 1];
2181
processPropertySeq(levState, sor, start, start);
2182
/* initialize for property state table */
2183
if (dirProps[start] == NSM) {
2184
stateImp = (short)(1 + sor);
2185
} else {
2186
stateImp = 0;
2187
}
2188
start1 = start;
2189
start2 = 0;
2190
2191
for (i = start; i <= limit; i++) {
2192
if (i >= limit) {
2193
gprop = eor;
2194
} else {
2195
short prop, prop1;
2196
prop = NoContextRTL(dirProps[i]);
2197
gprop = groupProp[prop];
2198
}
2199
oldStateImp = stateImp;
2200
cell = impTabProps[oldStateImp][gprop];
2201
stateImp = GetStateProps(cell); /* isolate the new state */
2202
actionImp = GetActionProps(cell); /* isolate the action */
2203
if ((i == limit) && (actionImp == 0)) {
2204
/* there is an unprocessed sequence if its property == eor */
2205
actionImp = 1; /* process the last sequence */
2206
}
2207
if (actionImp != 0) {
2208
resProp = impTabProps[oldStateImp][IMPTABPROPS_RES];
2209
switch (actionImp) {
2210
case 1: /* process current seq1, init new seq1 */
2211
processPropertySeq(levState, resProp, start1, i);
2212
start1 = i;
2213
break;
2214
case 2: /* init new seq2 */
2215
start2 = i;
2216
break;
2217
case 3: /* process seq1, process seq2, init new seq1 */
2218
processPropertySeq(levState, resProp, start1, start2);
2219
processPropertySeq(levState, _ON, start2, i);
2220
start1 = i;
2221
break;
2222
case 4: /* process seq1, set seq1=seq2, init new seq2 */
2223
processPropertySeq(levState, resProp, start1, start2);
2224
start1 = start2;
2225
start2 = i;
2226
break;
2227
default: /* we should never get here */
2228
throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");
2229
}
2230
}
2231
}
2232
/* flush possible pending sequence, e.g. ON */
2233
processPropertySeq(levState, eor, limit, limit);
2234
}
2235
2236
/* perform (L1) and (X9) ---------------------------------------------------- */
2237
2238
/*
2239
* Reset the embedding levels for some non-graphic characters (L1).
2240
* This method also sets appropriate levels for BN, and
2241
* explicit embedding types that are supposed to have been removed
2242
* from the paragraph in (X9).
2243
*/
2244
private void adjustWSLevels() {
2245
int i;
2246
2247
if ((flags & MASK_WS) != 0) {
2248
int flag;
2249
i = trailingWSStart;
2250
while (i > 0) {
2251
/* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
2252
while (i > 0 && ((flag = DirPropFlagNC(dirProps[--i])) & MASK_WS) != 0) {
2253
if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
2254
levels[i] = 0;
2255
} else {
2256
levels[i] = GetParaLevelAt(i);
2257
}
2258
}
2259
2260
/* reset BN to the next character's paraLevel until B/S, which restarts above loop */
2261
/* here, i+1 is guaranteed to be <length */
2262
while (i > 0) {
2263
flag = DirPropFlagNC(dirProps[--i]);
2264
if ((flag & MASK_BN_EXPLICIT) != 0) {
2265
levels[i] = levels[i + 1];
2266
} else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {
2267
levels[i] = 0;
2268
break;
2269
} else if ((flag & MASK_B_S) != 0){
2270
levels[i] = GetParaLevelAt(i);
2271
break;
2272
}
2273
}
2274
}
2275
}
2276
}
2277
2278
private int Bidi_Min(int x, int y) {
2279
return x < y ? x : y;
2280
}
2281
2282
private int Bidi_Abs(int x) {
2283
return x >= 0 ? x : -x;
2284
}
2285
2286
/**
2287
* Perform the Unicode Bidi algorithm. It is defined in the
2288
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
2289
* version 13,
2290
* also described in The Unicode Standard, Version 4.0 .<p>
2291
*
2292
* This method takes a piece of plain text containing one or more paragraphs,
2293
* with or without externally specified embedding levels from <i>styled</i>
2294
* text and computes the left-right-directionality of each character.<p>
2295
*
2296
* If the entire text is all of the same directionality, then
2297
* the method may not perform all the steps described by the algorithm,
2298
* i.e., some levels may not be the same as if all steps were performed.
2299
* This is not relevant for unidirectional text.<br>
2300
* For example, in pure LTR text with numbers the numbers would get
2301
* a resolved level of 2 higher than the surrounding text according to
2302
* the algorithm. This implementation may set all resolved levels to
2303
* the same value in such a case.<p>
2304
*
2305
* The text can be composed of multiple paragraphs. Occurrence of a block
2306
* separator in the text terminates a paragraph, and whatever comes next starts
2307
* a new paragraph. The exception to this rule is when a Carriage Return (CR)
2308
* is followed by a Line Feed (LF). Both CR and LF are block separators, but
2309
* in that case, the pair of characters is considered as terminating the
2310
* preceding paragraph, and a new paragraph will be started by a character
2311
* coming after the LF.
2312
*
2313
* Although the text is passed here as a <code>String</code>, it is
2314
* stored internally as an array of characters. Therefore the
2315
* documentation will refer to indexes of the characters in the text.
2316
*
2317
* @param text contains the text that the Bidi algorithm will be performed
2318
* on. This text can be retrieved with <code>getText()</code> or
2319
* <code>getTextAsString</code>.<br>
2320
*
2321
* @param paraLevel specifies the default level for the text;
2322
* it is typically 0 (LTR) or 1 (RTL).
2323
* If the method shall determine the paragraph level from the text,
2324
* then <code>paraLevel</code> can be set to
2325
* either <code>LEVEL_DEFAULT_LTR</code>
2326
* or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
2327
* paragraphs, the paragraph level shall be determined separately for
2328
* each paragraph; if a paragraph does not include any strongly typed
2329
* character, then the desired default is used (0 for LTR or 1 for RTL).
2330
* Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
2331
* is also valid, with odd levels indicating RTL.
2332
*
2333
* @param embeddingLevels (in) may be used to preset the embedding and override levels,
2334
* ignoring characters like LRE and PDF in the text.
2335
* A level overrides the directional property of its corresponding
2336
* (same index) character if the level has the
2337
* <code>LEVEL_OVERRIDE</code> bit set.<br><br>
2338
* Except for that bit, it must be
2339
* <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,
2340
* with one exception: a level of zero may be specified for a
2341
* paragraph separator even if <code>paraLevel&gt;0</code> when multiple
2342
* paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
2343
* <strong>Caution: </strong>A reference to this array, not a copy
2344
* of the levels, will be stored in the <code>Bidi</code> object;
2345
* the <code>embeddingLevels</code>
2346
* should not be modified to avoid unexpected results on subsequent
2347
* Bidi operations. However, the <code>setPara()</code> and
2348
* <code>setLine()</code> methods may modify some or all of the
2349
* levels.<br><br>
2350
* <strong>Note:</strong> the <code>embeddingLevels</code> array must
2351
* have one entry for each character in <code>text</code>.
2352
*
2353
* @throws IllegalArgumentException if the values in embeddingLevels are
2354
* not within the allowed range
2355
*
2356
* @see #LEVEL_DEFAULT_LTR
2357
* @see #LEVEL_DEFAULT_RTL
2358
* @see #LEVEL_OVERRIDE
2359
* @see #MAX_EXPLICIT_LEVEL
2360
* @stable ICU 3.8
2361
*/
2362
void setPara(String text, byte paraLevel, byte[] embeddingLevels)
2363
{
2364
if (text == null) {
2365
setPara(new char[0], paraLevel, embeddingLevels);
2366
} else {
2367
setPara(text.toCharArray(), paraLevel, embeddingLevels);
2368
}
2369
}
2370
2371
/**
2372
* Perform the Unicode Bidi algorithm. It is defined in the
2373
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
2374
* version 13,
2375
* also described in The Unicode Standard, Version 4.0 .<p>
2376
*
2377
* This method takes a piece of plain text containing one or more paragraphs,
2378
* with or without externally specified embedding levels from <i>styled</i>
2379
* text and computes the left-right-directionality of each character.<p>
2380
*
2381
* If the entire text is all of the same directionality, then
2382
* the method may not perform all the steps described by the algorithm,
2383
* i.e., some levels may not be the same as if all steps were performed.
2384
* This is not relevant for unidirectional text.<br>
2385
* For example, in pure LTR text with numbers the numbers would get
2386
* a resolved level of 2 higher than the surrounding text according to
2387
* the algorithm. This implementation may set all resolved levels to
2388
* the same value in such a case.<p>
2389
*
2390
* The text can be composed of multiple paragraphs. Occurrence of a block
2391
* separator in the text terminates a paragraph, and whatever comes next starts
2392
* a new paragraph. The exception to this rule is when a Carriage Return (CR)
2393
* is followed by a Line Feed (LF). Both CR and LF are block separators, but
2394
* in that case, the pair of characters is considered as terminating the
2395
* preceding paragraph, and a new paragraph will be started by a character
2396
* coming after the LF.
2397
*
2398
* The text is stored internally as an array of characters. Therefore the
2399
* documentation will refer to indexes of the characters in the text.
2400
*
2401
* @param chars contains the text that the Bidi algorithm will be performed
2402
* on. This text can be retrieved with <code>getText()</code> or
2403
* <code>getTextAsString</code>.<br>
2404
*
2405
* @param paraLevel specifies the default level for the text;
2406
* it is typically 0 (LTR) or 1 (RTL).
2407
* If the method shall determine the paragraph level from the text,
2408
* then <code>paraLevel</code> can be set to
2409
* either <code>LEVEL_DEFAULT_LTR</code>
2410
* or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple
2411
* paragraphs, the paragraph level shall be determined separately for
2412
* each paragraph; if a paragraph does not include any strongly typed
2413
* character, then the desired default is used (0 for LTR or 1 for RTL).
2414
* Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>
2415
* is also valid, with odd levels indicating RTL.
2416
*
2417
* @param embeddingLevels (in) may be used to preset the embedding and
2418
* override levels, ignoring characters like LRE and PDF in the text.
2419
* A level overrides the directional property of its corresponding
2420
* (same index) character if the level has the
2421
* <code>LEVEL_OVERRIDE</code> bit set.<br><br>
2422
* Except for that bit, it must be
2423
* <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,
2424
* with one exception: a level of zero may be specified for a
2425
* paragraph separator even if <code>paraLevel&gt;0</code> when multiple
2426
* paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>
2427
* <strong>Caution: </strong>A reference to this array, not a copy
2428
* of the levels, will be stored in the <code>Bidi</code> object;
2429
* the <code>embeddingLevels</code>
2430
* should not be modified to avoid unexpected results on subsequent
2431
* Bidi operations. However, the <code>setPara()</code> and
2432
* <code>setLine()</code> methods may modify some or all of the
2433
* levels.<br><br>
2434
* <strong>Note:</strong> the <code>embeddingLevels</code> array must
2435
* have one entry for each character in <code>text</code>.
2436
*
2437
* @throws IllegalArgumentException if the values in embeddingLevels are
2438
* not within the allowed range
2439
*
2440
* @see #LEVEL_DEFAULT_LTR
2441
* @see #LEVEL_DEFAULT_RTL
2442
* @see #LEVEL_OVERRIDE
2443
* @see #MAX_EXPLICIT_LEVEL
2444
* @stable ICU 3.8
2445
*/
2446
public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)
2447
{
2448
/* check the argument values */
2449
if (paraLevel < INTERNAL_LEVEL_DEFAULT_LTR) {
2450
verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);
2451
}
2452
if (chars == null) {
2453
chars = new char[0];
2454
}
2455
2456
/* initialize the Bidi object */
2457
this.paraBidi = null; /* mark unfinished setPara */
2458
this.text = chars;
2459
this.length = this.originalLength = this.resultLength = text.length;
2460
this.paraLevel = paraLevel;
2461
this.direction = Bidi.DIRECTION_LEFT_TO_RIGHT;
2462
this.paraCount = 1;
2463
2464
/* Allocate zero-length arrays instead of setting to null here; then
2465
* checks for null in various places can be eliminated.
2466
*/
2467
dirProps = new byte[0];
2468
levels = new byte[0];
2469
runs = new BidiRun[0];
2470
isGoodLogicalToVisualRunsMap = false;
2471
insertPoints.size = 0; /* clean up from last call */
2472
insertPoints.confirmed = 0; /* clean up from last call */
2473
2474
/*
2475
* Save the original paraLevel if contextual; otherwise, set to 0.
2476
*/
2477
if (IsDefaultLevel(paraLevel)) {
2478
defaultParaLevel = paraLevel;
2479
} else {
2480
defaultParaLevel = 0;
2481
}
2482
2483
if (length == 0) {
2484
/*
2485
* For an empty paragraph, create a Bidi object with the paraLevel and
2486
* the flags and the direction set but without allocating zero-length arrays.
2487
* There is nothing more to do.
2488
*/
2489
if (IsDefaultLevel(paraLevel)) {
2490
this.paraLevel &= 1;
2491
defaultParaLevel = 0;
2492
}
2493
if ((this.paraLevel & 1) != 0) {
2494
flags = DirPropFlag(R);
2495
direction = Bidi.DIRECTION_RIGHT_TO_LEFT;
2496
} else {
2497
flags = DirPropFlag(L);
2498
direction = Bidi.DIRECTION_LEFT_TO_RIGHT;
2499
}
2500
2501
runCount = 0;
2502
paraCount = 0;
2503
paraBidi = this; /* mark successful setPara */
2504
return;
2505
}
2506
2507
runCount = -1;
2508
2509
/*
2510
* Get the directional properties,
2511
* the flags bit-set, and
2512
* determine the paragraph level if necessary.
2513
*/
2514
getDirPropsMemory(length);
2515
dirProps = dirPropsMemory;
2516
getDirProps();
2517
2518
/* the processed length may have changed if OPTION_STREAMING is set */
2519
trailingWSStart = length; /* the levels[] will reflect the WS run */
2520
2521
/* allocate paras memory */
2522
if (paraCount > 1) {
2523
getInitialParasMemory(paraCount);
2524
paras = parasMemory;
2525
paras[paraCount - 1] = length;
2526
} else {
2527
/* initialize paras for single paragraph */
2528
paras = simpleParas;
2529
simpleParas[0] = length;
2530
}
2531
2532
/* are explicit levels specified? */
2533
if (embeddingLevels == null) {
2534
/* no: determine explicit levels according to the (Xn) rules */
2535
getLevelsMemory(length);
2536
levels = levelsMemory;
2537
direction = resolveExplicitLevels();
2538
} else {
2539
/* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */
2540
levels = embeddingLevels;
2541
direction = checkExplicitLevels();
2542
}
2543
2544
/*
2545
* The steps after (X9) in the Bidi algorithm are performed only if
2546
* the paragraph text has mixed directionality!
2547
*/
2548
switch (direction) {
2549
case Bidi.DIRECTION_LEFT_TO_RIGHT:
2550
/* make sure paraLevel is even */
2551
paraLevel = (byte)((paraLevel + 1) & ~1);
2552
2553
/* all levels are implicitly at paraLevel (important for getLevels()) */
2554
trailingWSStart = 0;
2555
break;
2556
case Bidi.DIRECTION_RIGHT_TO_LEFT:
2557
/* make sure paraLevel is odd */
2558
paraLevel |= 1;
2559
2560
/* all levels are implicitly at paraLevel (important for getLevels()) */
2561
trailingWSStart = 0;
2562
break;
2563
default:
2564
this.impTabPair = impTab_DEFAULT;
2565
2566
/*
2567
* If there are no external levels specified and there
2568
* are no significant explicit level codes in the text,
2569
* then we can treat the entire paragraph as one run.
2570
* Otherwise, we need to perform the following rules on runs of
2571
* the text with the same embedding levels. (X10)
2572
* "Significant" explicit level codes are ones that actually
2573
* affect non-BN characters.
2574
* Examples for "insignificant" ones are empty embeddings
2575
* LRE-PDF, LRE-RLE-PDF-PDF, etc.
2576
*/
2577
if (embeddingLevels == null && paraCount <= 1 &&
2578
(flags & DirPropFlagMultiRuns) == 0) {
2579
resolveImplicitLevels(0, length,
2580
GetLRFromLevel(GetParaLevelAt(0)),
2581
GetLRFromLevel(GetParaLevelAt(length - 1)));
2582
} else {
2583
/* sor, eor: start and end types of same-level-run */
2584
int start, limit = 0;
2585
byte level, nextLevel;
2586
short sor, eor;
2587
2588
/* determine the first sor and set eor to it because of the loop body (sor=eor there) */
2589
level = GetParaLevelAt(0);
2590
nextLevel = levels[0];
2591
if (level < nextLevel) {
2592
eor = GetLRFromLevel(nextLevel);
2593
} else {
2594
eor = GetLRFromLevel(level);
2595
}
2596
2597
do {
2598
/* determine start and limit of the run (end points just behind the run) */
2599
2600
/* the values for this run's start are the same as for the previous run's end */
2601
start = limit;
2602
level = nextLevel;
2603
if ((start > 0) && (NoContextRTL(dirProps[start - 1]) == B)) {
2604
/* except if this is a new paragraph, then set sor = para level */
2605
sor = GetLRFromLevel(GetParaLevelAt(start));
2606
} else {
2607
sor = eor;
2608
}
2609
2610
/* search for the limit of this run */
2611
while (++limit < length && levels[limit] == level) {}
2612
2613
/* get the correct level of the next run */
2614
if (limit < length) {
2615
nextLevel = levels[limit];
2616
} else {
2617
nextLevel = GetParaLevelAt(length - 1);
2618
}
2619
2620
/* determine eor from max(level, nextLevel); sor is last run's eor */
2621
if ((level & ~INTERNAL_LEVEL_OVERRIDE) < (nextLevel & ~INTERNAL_LEVEL_OVERRIDE)) {
2622
eor = GetLRFromLevel(nextLevel);
2623
} else {
2624
eor = GetLRFromLevel(level);
2625
}
2626
2627
/* if the run consists of overridden directional types, then there
2628
are no implicit types to be resolved */
2629
if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {
2630
resolveImplicitLevels(start, limit, sor, eor);
2631
} else {
2632
/* remove the LEVEL_OVERRIDE flags */
2633
do {
2634
levels[start++] &= ~INTERNAL_LEVEL_OVERRIDE;
2635
} while (start < limit);
2636
}
2637
} while (limit < length);
2638
}
2639
2640
/* reset the embedding levels for some non-graphic characters (L1), (X9) */
2641
adjustWSLevels();
2642
2643
break;
2644
}
2645
2646
resultLength += insertPoints.size;
2647
paraBidi = this; /* mark successful setPara */
2648
}
2649
2650
/**
2651
* Perform the Unicode Bidi algorithm on a given paragraph, as defined in the
2652
* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
2653
* version 13,
2654
* also described in The Unicode Standard, Version 4.0 .<p>
2655
*
2656
* This method takes a paragraph of text and computes the
2657
* left-right-directionality of each character. The text should not
2658
* contain any Unicode block separators.<p>
2659
*
2660
* The RUN_DIRECTION attribute in the text, if present, determines the base
2661
* direction (left-to-right or right-to-left). If not present, the base
2662
* direction is computed using the Unicode Bidirectional Algorithm,
2663
* defaulting to left-to-right if there are no strong directional characters
2664
* in the text. This attribute, if present, must be applied to all the text
2665
* in the paragraph.<p>
2666
*
2667
* The BIDI_EMBEDDING attribute in the text, if present, represents
2668
* embedding level information. Negative values from -1 to -62 indicate
2669
* overrides at the absolute value of the level. Positive values from 1 to
2670
* 62 indicate embeddings. Where values are zero or not defined, the base
2671
* embedding level as determined by the base direction is assumed.<p>
2672
*
2673
* The NUMERIC_SHAPING attribute in the text, if present, converts European
2674
* digits to other decimal digits before running the bidi algorithm. This
2675
* attribute, if present, must be applied to all the text in the paragraph.
2676
*
2677
* If the entire text is all of the same directionality, then
2678
* the method may not perform all the steps described by the algorithm,
2679
* i.e., some levels may not be the same as if all steps were performed.
2680
* This is not relevant for unidirectional text.<br>
2681
* For example, in pure LTR text with numbers the numbers would get
2682
* a resolved level of 2 higher than the surrounding text according to
2683
* the algorithm. This implementation may set all resolved levels to
2684
* the same value in such a case.<p>
2685
*
2686
* @param paragraph a paragraph of text with optional character and
2687
* paragraph attribute information
2688
* @stable ICU 3.8
2689
*/
2690
public void setPara(AttributedCharacterIterator paragraph)
2691
{
2692
byte paraLvl;
2693
char ch = paragraph.first();
2694
Boolean runDirection =
2695
(Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION);
2696
Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING);
2697
if (runDirection == null) {
2698
paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;
2699
} else {
2700
paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ?
2701
(byte)Bidi.DIRECTION_LEFT_TO_RIGHT : (byte)Bidi.DIRECTION_RIGHT_TO_LEFT;
2702
}
2703
2704
byte[] lvls = null;
2705
int len = paragraph.getEndIndex() - paragraph.getBeginIndex();
2706
byte[] embeddingLevels = new byte[len];
2707
char[] txt = new char[len];
2708
int i = 0;
2709
while (ch != AttributedCharacterIterator.DONE) {
2710
txt[i] = ch;
2711
Integer embedding =
2712
(Integer) paragraph.getAttribute(TextAttributeConstants.BIDI_EMBEDDING);
2713
if (embedding != null) {
2714
byte level = embedding.byteValue();
2715
if (level == 0) {
2716
/* no-op */
2717
} else if (level < 0) {
2718
lvls = embeddingLevels;
2719
embeddingLevels[i] = (byte)((0 - level) | INTERNAL_LEVEL_OVERRIDE);
2720
} else {
2721
lvls = embeddingLevels;
2722
embeddingLevels[i] = level;
2723
}
2724
}
2725
ch = paragraph.next();
2726
++i;
2727
}
2728
2729
if (shaper != null) {
2730
NumericShapings.shape(shaper, txt, 0, len);
2731
}
2732
setPara(txt, paraLvl, lvls);
2733
}
2734
2735
/**
2736
* Specify whether block separators must be allocated level zero,
2737
* so that successive paragraphs will progress from left to right.
2738
* This method must be called before <code>setPara()</code>.
2739
* Paragraph separators (B) may appear in the text. Setting them to level zero
2740
* means that all paragraph separators (including one possibly appearing
2741
* in the last text position) are kept in the reordered text after the text
2742
* that they follow in the source text.
2743
* When this feature is not enabled, a paragraph separator at the last
2744
* position of the text before reordering will go to the first position
2745
* of the reordered text when the paragraph level is odd.
2746
*
2747
* @param ordarParaLTR specifies whether paragraph separators (B) must
2748
* receive level 0, so that successive paragraphs progress from left to right.
2749
*
2750
* @see #setPara
2751
* @stable ICU 3.8
2752
*/
2753
private void orderParagraphsLTR(boolean ordarParaLTR) {
2754
orderParagraphsLTR = ordarParaLTR;
2755
}
2756
2757
/**
2758
* Get the directionality of the text.
2759
*
2760
* @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code>
2761
* that indicates if the entire text
2762
* represented by this object is unidirectional,
2763
* and which direction, or if it is mixed-directional.
2764
*
2765
* @throws IllegalStateException if this call is not preceded by a successful
2766
* call to <code>setPara</code> or <code>setLine</code>
2767
*
2768
* @see #LTR
2769
* @see #RTL
2770
* @see #MIXED
2771
* @stable ICU 3.8
2772
*/
2773
private byte getDirection()
2774
{
2775
verifyValidParaOrLine();
2776
return direction;
2777
}
2778
2779
/**
2780
* Get the length of the text.
2781
*
2782
* @return The length of the text that the <code>Bidi</code> object was
2783
* created for.
2784
*
2785
* @throws IllegalStateException if this call is not preceded by a successful
2786
* call to <code>setPara</code> or <code>setLine</code>
2787
* @stable ICU 3.8
2788
*/
2789
public int getLength()
2790
{
2791
verifyValidParaOrLine();
2792
return originalLength;
2793
}
2794
2795
/* paragraphs API methods ------------------------------------------------- */
2796
2797
/**
2798
* Get the paragraph level of the text.
2799
*
2800
* @return The paragraph level. If there are multiple paragraphs, their
2801
* level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or
2802
* LEVEL_DEFAULT_RTL. In that case, the level of the first paragraph
2803
* is returned.
2804
*
2805
* @throws IllegalStateException if this call is not preceded by a successful
2806
* call to <code>setPara</code> or <code>setLine</code>
2807
*
2808
* @see #LEVEL_DEFAULT_LTR
2809
* @see #LEVEL_DEFAULT_RTL
2810
* @see #getParagraph
2811
* @see #getParagraphByIndex
2812
* @stable ICU 3.8
2813
*/
2814
public byte getParaLevel()
2815
{
2816
verifyValidParaOrLine();
2817
return paraLevel;
2818
}
2819
2820
/**
2821
* Get the index of a paragraph, given a position within the text.<p>
2822
*
2823
* @param charIndex is the index of a character within the text, in the
2824
* range <code>[0..getProcessedLength()-1]</code>.
2825
*
2826
* @return The index of the paragraph containing the specified position,
2827
* starting from 0.
2828
*
2829
* @throws IllegalStateException if this call is not preceded by a successful
2830
* call to <code>setPara</code> or <code>setLine</code>
2831
* @throws IllegalArgumentException if charIndex is not within the legal range
2832
*
2833
* @see com.ibm.icu.text.BidiRun
2834
* @see #getProcessedLength
2835
* @stable ICU 3.8
2836
*/
2837
public int getParagraphIndex(int charIndex)
2838
{
2839
verifyValidParaOrLine();
2840
BidiBase bidi = paraBidi; /* get Para object if Line object */
2841
verifyRange(charIndex, 0, bidi.length);
2842
int paraIndex;
2843
for (paraIndex = 0; charIndex >= bidi.paras[paraIndex]; paraIndex++) {
2844
}
2845
return paraIndex;
2846
}
2847
2848
/**
2849
* <code>setLine()</code> returns a <code>Bidi</code> object to
2850
* contain the reordering information, especially the resolved levels,
2851
* for all the characters in a line of text. This line of text is
2852
* specified by referring to a <code>Bidi</code> object representing
2853
* this information for a piece of text containing one or more paragraphs,
2854
* and by specifying a range of indexes in this text.<p>
2855
* In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
2856
*
2857
* This is used after calling <code>setPara()</code>
2858
* for a piece of text, and after line-breaking on that text.
2859
* It is not necessary if each paragraph is treated as a single line.<p>
2860
*
2861
* After line-breaking, rules (L1) and (L2) for the treatment of
2862
* trailing WS and for reordering are performed on
2863
* a <code>Bidi</code> object that represents a line.<p>
2864
*
2865
* <strong>Important: </strong>the line <code>Bidi</code> object may
2866
* reference data within the global text <code>Bidi</code> object.
2867
* You should not alter the content of the global text object until
2868
* you are finished using the line object.
2869
*
2870
* @param start is the line's first index into the text.
2871
*
2872
* @param limit is just behind the line's last index into the text
2873
* (its last index +1).
2874
*
2875
* @return a <code>Bidi</code> object that will now represent a line of the text.
2876
*
2877
* @throws IllegalStateException if this call is not preceded by a successful
2878
* call to <code>setPara</code>
2879
* @throws IllegalArgumentException if start and limit are not in the range
2880
* <code>0&lt;=start&lt;limit&lt;=getProcessedLength()</code>,
2881
* or if the specified line crosses a paragraph boundary
2882
*
2883
* @see #setPara
2884
* @see #getProcessedLength
2885
* @stable ICU 3.8
2886
*/
2887
public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit)
2888
{
2889
verifyValidPara();
2890
verifyRange(start, 0, limit);
2891
verifyRange(limit, 0, length+1);
2892
2893
return BidiLine.setLine(bidi, this, newBidi, newBidiBase, start, limit);
2894
}
2895
2896
/**
2897
* Get the level for one character.
2898
*
2899
* @param charIndex the index of a character.
2900
*
2901
* @return The level for the character at <code>charIndex</code>.
2902
*
2903
* @throws IllegalStateException if this call is not preceded by a successful
2904
* call to <code>setPara</code> or <code>setLine</code>
2905
* @throws IllegalArgumentException if charIndex is not in the range
2906
* <code>0&lt;=charIndex&lt;getProcessedLength()</code>
2907
*
2908
* @see #getProcessedLength
2909
* @stable ICU 3.8
2910
*/
2911
public byte getLevelAt(int charIndex)
2912
{
2913
if (charIndex < 0 || charIndex >= length) {
2914
return (byte)getBaseLevel();
2915
}
2916
verifyValidParaOrLine();
2917
verifyRange(charIndex, 0, length);
2918
return BidiLine.getLevelAt(this, charIndex);
2919
}
2920
2921
/**
2922
* Get an array of levels for each character.<p>
2923
*
2924
* Note that this method may allocate memory under some
2925
* circumstances, unlike <code>getLevelAt()</code>.
2926
*
2927
* @return The levels array for the text,
2928
* or <code>null</code> if an error occurs.
2929
*
2930
* @throws IllegalStateException if this call is not preceded by a successful
2931
* call to <code>setPara</code> or <code>setLine</code>
2932
* @stable ICU 3.8
2933
*/
2934
private byte[] getLevels()
2935
{
2936
verifyValidParaOrLine();
2937
if (length <= 0) {
2938
return new byte[0];
2939
}
2940
return BidiLine.getLevels(this);
2941
}
2942
2943
/**
2944
* Get the number of runs.
2945
* This method may invoke the actual reordering on the
2946
* <code>Bidi</code> object, after <code>setPara()</code>
2947
* may have resolved only the levels of the text. Therefore,
2948
* <code>countRuns()</code> may have to allocate memory,
2949
* and may throw an exception if it fails to do so.
2950
*
2951
* @return The number of runs.
2952
*
2953
* @throws IllegalStateException if this call is not preceded by a successful
2954
* call to <code>setPara</code> or <code>setLine</code>
2955
* @stable ICU 3.8
2956
*/
2957
public int countRuns()
2958
{
2959
verifyValidParaOrLine();
2960
BidiLine.getRuns(this);
2961
return runCount;
2962
}
2963
2964
/**
2965
* Get a visual-to-logical index map (array) for the characters in the
2966
* <code>Bidi</code> (paragraph or line) object.
2967
* <p>
2968
* Some values in the map may be <code>MAP_NOWHERE</code> if the
2969
* corresponding text characters are Bidi marks inserted in the visual
2970
* output by the option <code>OPTION_INSERT_MARKS</code>.
2971
* <p>
2972
* When the visual output is altered by using options of
2973
* <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,
2974
* <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,
2975
* <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
2976
* be correct. It is advised to use, when possible, reordering options
2977
* such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.
2978
*
2979
* @return an array of <code>getResultLength()</code>
2980
* indexes which will reflect the reordering of the characters.<br><br>
2981
* The index map will result in
2982
* <code>indexMap[visualIndex]==logicalIndex</code>, where
2983
* <code>indexMap</code> represents the returned array.
2984
*
2985
* @throws IllegalStateException if this call is not preceded by a successful
2986
* call to <code>setPara</code> or <code>setLine</code>
2987
*
2988
* @see #getLogicalMap
2989
* @see #getLogicalIndex
2990
* @see #getResultLength
2991
* @see #MAP_NOWHERE
2992
* @see #OPTION_INSERT_MARKS
2993
* @see #writeReordered
2994
* @stable ICU 3.8
2995
*/
2996
private int[] getVisualMap()
2997
{
2998
/* countRuns() checks successful call to setPara/setLine */
2999
countRuns();
3000
if (resultLength <= 0) {
3001
return new int[0];
3002
}
3003
return BidiLine.getVisualMap(this);
3004
}
3005
3006
/**
3007
* This is a convenience method that does not use a <code>Bidi</code> object.
3008
* It is intended to be used for when an application has determined the levels
3009
* of objects (character sequences) and just needs to have them reordered (L2).
3010
* This is equivalent to using <code>getVisualMap()</code> on a
3011
* <code>Bidi</code> object.
3012
*
3013
* @param levels is an array of levels that have been determined by
3014
* the application.
3015
*
3016
* @return an array of <code>levels.length</code>
3017
* indexes which will reflect the reordering of the characters.<p>
3018
* The index map will result in
3019
* <code>indexMap[visualIndex]==logicalIndex</code>, where
3020
* <code>indexMap</code> represents the returned array.
3021
*
3022
* @stable ICU 3.8
3023
*/
3024
private static int[] reorderVisual(byte[] levels)
3025
{
3026
return BidiLine.reorderVisual(levels);
3027
}
3028
3029
/**
3030
* Constant indicating that the base direction depends on the first strong
3031
* directional character in the text according to the Unicode Bidirectional
3032
* Algorithm. If no strong directional character is present, the base
3033
* direction is left-to-right.
3034
* @stable ICU 3.8
3035
*/
3036
private static final int INTERNAL_DIRECTION_DEFAULT_LEFT_TO_RIGHT = 0x7e;
3037
3038
/**
3039
* Constant indicating that the base direction depends on the first strong
3040
* directional character in the text according to the Unicode Bidirectional
3041
* Algorithm. If no strong directional character is present, the base
3042
* direction is right-to-left.
3043
* @stable ICU 3.8
3044
*/
3045
private static final int INTERMAL_DIRECTION_DEFAULT_RIGHT_TO_LEFT = 0x7f;
3046
3047
/**
3048
* Create Bidi from the given text, embedding, and direction information.
3049
* The embeddings array may be null. If present, the values represent
3050
* embedding level information. Negative values from -1 to -61 indicate
3051
* overrides at the absolute value of the level. Positive values from 1 to
3052
* 61 indicate embeddings. Where values are zero, the base embedding level
3053
* as determined by the base direction is assumed.<p>
3054
*
3055
* Note: this constructor calls setPara() internally.
3056
*
3057
* @param text an array containing the paragraph of text to process.
3058
* @param textStart the index into the text array of the start of the
3059
* paragraph.
3060
* @param embeddings an array containing embedding values for each character
3061
* in the paragraph. This can be null, in which case it is assumed
3062
* that there is no external embedding information.
3063
* @param embStart the index into the embedding array of the start of the
3064
* paragraph.
3065
* @param paragraphLength the length of the paragraph in the text and
3066
* embeddings arrays.
3067
* @param flags a collection of flags that control the algorithm. The
3068
* algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,
3069
* DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and
3070
* DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.
3071
*
3072
* @throws IllegalArgumentException if the values in embeddings are
3073
* not within the allowed range
3074
*
3075
* @see #DIRECTION_LEFT_TO_RIGHT
3076
* @see #DIRECTION_RIGHT_TO_LEFT
3077
* @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT
3078
* @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT
3079
* @stable ICU 3.8
3080
*/
3081
public BidiBase(char[] text,
3082
int textStart,
3083
byte[] embeddings,
3084
int embStart,
3085
int paragraphLength,
3086
int flags)
3087
{
3088
this(0, 0);
3089
byte paraLvl;
3090
switch (flags) {
3091
case Bidi.DIRECTION_LEFT_TO_RIGHT:
3092
default:
3093
paraLvl = Bidi.DIRECTION_LEFT_TO_RIGHT;
3094
break;
3095
case Bidi.DIRECTION_RIGHT_TO_LEFT:
3096
paraLvl = Bidi.DIRECTION_RIGHT_TO_LEFT;
3097
break;
3098
case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT:
3099
paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;
3100
break;
3101
case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT:
3102
paraLvl = INTERNAL_LEVEL_DEFAULT_RTL;
3103
break;
3104
}
3105
byte[] paraEmbeddings;
3106
if (embeddings == null) {
3107
paraEmbeddings = null;
3108
} else {
3109
paraEmbeddings = new byte[paragraphLength];
3110
byte lev;
3111
for (int i = 0; i < paragraphLength; i++) {
3112
lev = embeddings[i + embStart];
3113
if (lev < 0) {
3114
lev = (byte)((- lev) | INTERNAL_LEVEL_OVERRIDE);
3115
} else if (lev == 0) {
3116
lev = paraLvl;
3117
if (paraLvl > MAX_EXPLICIT_LEVEL) {
3118
lev &= 1;
3119
}
3120
}
3121
paraEmbeddings[i] = lev;
3122
}
3123
}
3124
if (textStart == 0 && embStart == 0 && paragraphLength == text.length) {
3125
setPara(text, paraLvl, paraEmbeddings);
3126
} else {
3127
char[] paraText = new char[paragraphLength];
3128
System.arraycopy(text, textStart, paraText, 0, paragraphLength);
3129
setPara(paraText, paraLvl, paraEmbeddings);
3130
}
3131
}
3132
3133
/**
3134
* Return true if the line is not left-to-right or right-to-left. This means
3135
* it either has mixed runs of left-to-right and right-to-left text, or the
3136
* base direction differs from the direction of the only run of text.
3137
*
3138
* @return true if the line is not left-to-right or right-to-left.
3139
*
3140
* @throws IllegalStateException if this call is not preceded by a successful
3141
* call to <code>setPara</code>
3142
* @stable ICU 3.8
3143
*/
3144
public boolean isMixed()
3145
{
3146
return (!isLeftToRight() && !isRightToLeft());
3147
}
3148
3149
/**
3150
* Return true if the line is all left-to-right text and the base direction
3151
* is left-to-right.
3152
*
3153
* @return true if the line is all left-to-right text and the base direction
3154
* is left-to-right.
3155
*
3156
* @throws IllegalStateException if this call is not preceded by a successful
3157
* call to <code>setPara</code>
3158
* @stable ICU 3.8
3159
*/
3160
public boolean isLeftToRight()
3161
{
3162
return (getDirection() == Bidi.DIRECTION_LEFT_TO_RIGHT && (paraLevel & 1) == 0);
3163
}
3164
3165
/**
3166
* Return true if the line is all right-to-left text, and the base direction
3167
* is right-to-left
3168
*
3169
* @return true if the line is all right-to-left text, and the base
3170
* direction is right-to-left
3171
*
3172
* @throws IllegalStateException if this call is not preceded by a successful
3173
* call to <code>setPara</code>
3174
* @stable ICU 3.8
3175
*/
3176
public boolean isRightToLeft()
3177
{
3178
return (getDirection() == Bidi.DIRECTION_RIGHT_TO_LEFT && (paraLevel & 1) == 1);
3179
}
3180
3181
/**
3182
* Return true if the base direction is left-to-right
3183
*
3184
* @return true if the base direction is left-to-right
3185
*
3186
* @throws IllegalStateException if this call is not preceded by a successful
3187
* call to <code>setPara</code> or <code>setLine</code>
3188
*
3189
* @stable ICU 3.8
3190
*/
3191
public boolean baseIsLeftToRight()
3192
{
3193
return (getParaLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT);
3194
}
3195
3196
/**
3197
* Return the base level (0 if left-to-right, 1 if right-to-left).
3198
*
3199
* @return the base level
3200
*
3201
* @throws IllegalStateException if this call is not preceded by a successful
3202
* call to <code>setPara</code> or <code>setLine</code>
3203
*
3204
* @stable ICU 3.8
3205
*/
3206
public int getBaseLevel()
3207
{
3208
return getParaLevel();
3209
}
3210
3211
/**
3212
* Compute the logical to visual run mapping
3213
*/
3214
private void getLogicalToVisualRunsMap()
3215
{
3216
if (isGoodLogicalToVisualRunsMap) {
3217
return;
3218
}
3219
int count = countRuns();
3220
if ((logicalToVisualRunsMap == null) ||
3221
(logicalToVisualRunsMap.length < count)) {
3222
logicalToVisualRunsMap = new int[count];
3223
}
3224
int i;
3225
long[] keys = new long[count];
3226
for (i = 0; i < count; i++) {
3227
keys[i] = ((long)(runs[i].start)<<32) + i;
3228
}
3229
Arrays.sort(keys);
3230
for (i = 0; i < count; i++) {
3231
logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF);
3232
}
3233
keys = null;
3234
isGoodLogicalToVisualRunsMap = true;
3235
}
3236
3237
/**
3238
* Return the level of the nth logical run in this line.
3239
*
3240
* @param run the index of the run, between 0 and <code>countRuns()-1</code>
3241
*
3242
* @return the level of the run
3243
*
3244
* @throws IllegalStateException if this call is not preceded by a successful
3245
* call to <code>setPara</code> or <code>setLine</code>
3246
* @throws IllegalArgumentException if <code>run</code> is not in
3247
* the range <code>0&lt;=run&lt;countRuns()</code>
3248
* @stable ICU 3.8
3249
*/
3250
public int getRunLevel(int run)
3251
{
3252
verifyValidParaOrLine();
3253
BidiLine.getRuns(this);
3254
if (run < 0 || run >= runCount) {
3255
return getParaLevel();
3256
}
3257
getLogicalToVisualRunsMap();
3258
return runs[logicalToVisualRunsMap[run]].level;
3259
}
3260
3261
/**
3262
* Return the index of the character at the start of the nth logical run in
3263
* this line, as an offset from the start of the line.
3264
*
3265
* @param run the index of the run, between 0 and <code>countRuns()</code>
3266
*
3267
* @return the start of the run
3268
*
3269
* @throws IllegalStateException if this call is not preceded by a successful
3270
* call to <code>setPara</code> or <code>setLine</code>
3271
* @throws IllegalArgumentException if <code>run</code> is not in
3272
* the range <code>0&lt;=run&lt;countRuns()</code>
3273
* @stable ICU 3.8
3274
*/
3275
public int getRunStart(int run)
3276
{
3277
verifyValidParaOrLine();
3278
BidiLine.getRuns(this);
3279
if (runCount == 1) {
3280
return 0;
3281
} else if (run == runCount) {
3282
return length;
3283
}
3284
verifyIndex(run, 0, runCount);
3285
getLogicalToVisualRunsMap();
3286
return runs[logicalToVisualRunsMap[run]].start;
3287
}
3288
3289
/**
3290
* Return the index of the character past the end of the nth logical run in
3291
* this line, as an offset from the start of the line. For example, this
3292
* will return the length of the line for the last run on the line.
3293
*
3294
* @param run the index of the run, between 0 and <code>countRuns()</code>
3295
*
3296
* @return the limit of the run
3297
*
3298
* @throws IllegalStateException if this call is not preceded by a successful
3299
* call to <code>setPara</code> or <code>setLine</code>
3300
* @throws IllegalArgumentException if <code>run</code> is not in
3301
* the range <code>0&lt;=run&lt;countRuns()</code>
3302
* @stable ICU 3.8
3303
*/
3304
public int getRunLimit(int run)
3305
{
3306
verifyValidParaOrLine();
3307
BidiLine.getRuns(this);
3308
if (runCount == 1) {
3309
return length;
3310
}
3311
verifyIndex(run, 0, runCount);
3312
getLogicalToVisualRunsMap();
3313
int idx = logicalToVisualRunsMap[run];
3314
int len = idx == 0 ? runs[idx].limit :
3315
runs[idx].limit - runs[idx-1].limit;
3316
return runs[idx].start + len;
3317
}
3318
3319
/**
3320
* Return true if the specified text requires bidi analysis. If this returns
3321
* false, the text will display left-to-right. Clients can then avoid
3322
* constructing a Bidi object. Text in the Arabic Presentation Forms area of
3323
* Unicode is presumed to already be shaped and ordered for display, and so
3324
* will not cause this method to return true.
3325
*
3326
* @param text the text containing the characters to test
3327
* @param start the start of the range of characters to test
3328
* @param limit the limit of the range of characters to test
3329
*
3330
* @return true if the range of characters requires bidi analysis
3331
*
3332
* @stable ICU 3.8
3333
*/
3334
public static boolean requiresBidi(char[] text,
3335
int start,
3336
int limit)
3337
{
3338
final int RTLMask = (1 << Bidi.DIRECTION_RIGHT_TO_LEFT |
3339
1 << AL |
3340
1 << RLE |
3341
1 << RLO |
3342
1 << AN);
3343
3344
if (0 > start || start > limit || limit > text.length) {
3345
throw new IllegalArgumentException("Value start " + start +
3346
" is out of range 0 to " + limit);
3347
}
3348
for (int i = start; i < limit; ++i) {
3349
if (Character.isHighSurrogate(text[i]) && i < (limit-1) &&
3350
Character.isLowSurrogate(text[i+1])) {
3351
if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) {
3352
return true;
3353
}
3354
} else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {
3355
return true;
3356
}
3357
}
3358
return false;
3359
}
3360
3361
/**
3362
* Reorder the objects in the array into visual order based on their levels.
3363
* This is a utility method to use when you have a collection of objects
3364
* representing runs of text in logical order, each run containing text at a
3365
* single level. The elements at <code>index</code> from
3366
* <code>objectStart</code> up to <code>objectStart + count</code> in the
3367
* objects array will be reordered into visual order assuming
3368
* each run of text has the level indicated by the corresponding element in
3369
* the levels array (at <code>index - objectStart + levelStart</code>).
3370
*
3371
* @param levels an array representing the bidi level of each object
3372
* @param levelStart the start position in the levels array
3373
* @param objects the array of objects to be reordered into visual order
3374
* @param objectStart the start position in the objects array
3375
* @param count the number of objects to reorder
3376
* @stable ICU 3.8
3377
*/
3378
public static void reorderVisually(byte[] levels,
3379
int levelStart,
3380
Object[] objects,
3381
int objectStart,
3382
int count)
3383
{
3384
if (0 > levelStart || levels.length <= levelStart) {
3385
throw new IllegalArgumentException("Value levelStart " +
3386
levelStart + " is out of range 0 to " +
3387
(levels.length-1));
3388
}
3389
if (0 > objectStart || objects.length <= objectStart) {
3390
throw new IllegalArgumentException("Value objectStart " +
3391
levelStart + " is out of range 0 to " +
3392
(objects.length-1));
3393
}
3394
if (0 > count || objects.length < (objectStart+count)) {
3395
throw new IllegalArgumentException("Value count " +
3396
levelStart + " is out of range 0 to " +
3397
(objects.length - objectStart));
3398
}
3399
byte[] reorderLevels = new byte[count];
3400
System.arraycopy(levels, levelStart, reorderLevels, 0, count);
3401
int[] indexMap = reorderVisual(reorderLevels);
3402
Object[] temp = new Object[count];
3403
System.arraycopy(objects, objectStart, temp, 0, count);
3404
for (int i = 0; i < count; ++i) {
3405
objects[objectStart + i] = temp[indexMap[i]];
3406
}
3407
}
3408
3409
/**
3410
* Display the bidi internal state, used in debugging.
3411
*/
3412
public String toString() {
3413
StringBuilder buf = new StringBuilder(getClass().getName());
3414
3415
buf.append("[dir: ");
3416
buf.append(direction);
3417
buf.append(" baselevel: ");
3418
buf.append(paraLevel);
3419
buf.append(" length: ");
3420
buf.append(length);
3421
buf.append(" runs: ");
3422
if (levels == null) {
3423
buf.append("none");
3424
} else {
3425
buf.append('[');
3426
buf.append(levels[0]);
3427
for (int i = 1; i < levels.length; i++) {
3428
buf.append(' ');
3429
buf.append(levels[i]);
3430
}
3431
buf.append(']');
3432
}
3433
buf.append(" text: [0x");
3434
buf.append(Integer.toHexString(text[0]));
3435
for (int i = 1; i < text.length; i++) {
3436
buf.append(" 0x");
3437
buf.append(Integer.toHexString(text[i]));
3438
}
3439
buf.append("]]");
3440
3441
return buf.toString();
3442
}
3443
3444
/**
3445
* A class that provides access to constants defined by
3446
* java.awt.font.TextAttribute without creating a static dependency.
3447
*/
3448
private static class TextAttributeConstants {
3449
private static final Class<?> clazz = getClass("java.awt.font.TextAttribute");
3450
3451
/**
3452
* TextAttribute instances (or a fake Attribute type if
3453
* java.awt.font.TextAttribute is not present)
3454
*/
3455
static final AttributedCharacterIterator.Attribute RUN_DIRECTION =
3456
getTextAttribute("RUN_DIRECTION");
3457
static final AttributedCharacterIterator.Attribute NUMERIC_SHAPING =
3458
getTextAttribute("NUMERIC_SHAPING");
3459
static final AttributedCharacterIterator.Attribute BIDI_EMBEDDING =
3460
getTextAttribute("BIDI_EMBEDDING");
3461
3462
/**
3463
* TextAttribute.RUN_DIRECTION_LTR
3464
*/
3465
static final Boolean RUN_DIRECTION_LTR = (clazz == null) ?
3466
Boolean.FALSE : (Boolean)getStaticField(clazz, "RUN_DIRECTION_LTR");
3467
3468
3469
private static Class<?> getClass(String name) {
3470
try {
3471
return Class.forName(name, true, null);
3472
} catch (ClassNotFoundException e) {
3473
return null;
3474
}
3475
}
3476
3477
private static Object getStaticField(Class<?> clazz, String name) {
3478
try {
3479
Field f = clazz.getField(name);
3480
return f.get(null);
3481
} catch (NoSuchFieldException | IllegalAccessException x) {
3482
throw new AssertionError(x);
3483
}
3484
}
3485
3486
@SuppressWarnings("serial")
3487
private static AttributedCharacterIterator.Attribute
3488
getTextAttribute(String name)
3489
{
3490
if (clazz == null) {
3491
// fake attribute
3492
return new AttributedCharacterIterator.Attribute(name) { };
3493
} else {
3494
return (AttributedCharacterIterator.Attribute)getStaticField(clazz, name);
3495
}
3496
}
3497
}
3498
3499
/**
3500
* A class that provides access to java.awt.font.NumericShaping without
3501
* creating a static dependency.
3502
*/
3503
private static class NumericShapings {
3504
private static final Class<?> clazz =
3505
getClass("java.awt.font.NumericShaper");
3506
private static final Method shapeMethod =
3507
getMethod(clazz, "shape", char[].class, int.class, int.class);
3508
3509
private static Class<?> getClass(String name) {
3510
try {
3511
return Class.forName(name, true, null);
3512
} catch (ClassNotFoundException e) {
3513
return null;
3514
}
3515
}
3516
3517
private static Method getMethod(Class<?> clazz,
3518
String name,
3519
Class<?>... paramTypes)
3520
{
3521
if (clazz != null) {
3522
try {
3523
return clazz.getMethod(name, paramTypes);
3524
} catch (NoSuchMethodException e) {
3525
throw new AssertionError(e);
3526
}
3527
} else {
3528
return null;
3529
}
3530
}
3531
3532
/**
3533
* Invokes NumericShaping shape(text,start,count) method.
3534
*/
3535
static void shape(Object shaper, char[] text, int start, int count) {
3536
if (shapeMethod == null)
3537
throw new AssertionError("Should not get here");
3538
try {
3539
shapeMethod.invoke(shaper, text, start, count);
3540
} catch (InvocationTargetException e) {
3541
Throwable cause = e.getCause();
3542
if (cause instanceof RuntimeException)
3543
throw (RuntimeException)cause;
3544
throw new AssertionError(e);
3545
} catch (IllegalAccessException iae) {
3546
throw new AssertionError(iae);
3547
}
3548
}
3549
}
3550
}
3551
3552