Path: blob/aarch64-shenandoah-jdk8u272-b10/jdk/src/share/classes/sun/text/bidi/BidiBase.java
38918 views
/*1* Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.2* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.3*4* This code is free software; you can redistribute it and/or modify it5* under the terms of the GNU General Public License version 2 only, as6* published by the Free Software Foundation. Oracle designates this7* particular file as subject to the "Classpath" exception as provided8* by Oracle in the LICENSE file that accompanied this code.9*10* This code is distributed in the hope that it will be useful, but WITHOUT11* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or12* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License13* version 2 for more details (a copy is included in the LICENSE file that14* accompanied this code).15*16* You should have received a copy of the GNU General Public License version17* 2 along with this work; if not, write to the Free Software Foundation,18* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.19*20* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA21* or visit www.oracle.com if you need additional information or have any22* questions.23*/24/*25*******************************************************************************26* (C) Copyright IBM Corp. and others, 1996-2009 - All Rights Reserved *27* *28* The original version of this source code and documentation is copyrighted *29* and owned by IBM, These materials are provided under terms of a License *30* Agreement between IBM and Sun. This technology is protected by multiple *31* US and International patents. This notice and attribution to IBM may not *32* to removed. *33*******************************************************************************34*/3536/* FOOD FOR THOUGHT: currently the reordering modes are a mixture of37* algorithm for direct BiDi, algorithm for inverse Bidi and the bizarre38* concept of RUNS_ONLY which is a double operation.39* It could be advantageous to divide this into 3 concepts:40* a) Operation: direct / inverse / RUNS_ONLY41* b) Direct algorithm: default / NUMBERS_SPECIAL / GROUP_NUMBERS_WITH_L42* c) Inverse algorithm: default / INVERSE_LIKE_DIRECT / NUMBERS_SPECIAL43* This would allow combinations not possible today like RUNS_ONLY with44* NUMBERS_SPECIAL.45* Also allow to set INSERT_MARKS for the direct step of RUNS_ONLY and46* REMOVE_CONTROLS for the inverse step.47* Not all combinations would be supported, and probably not all do make sense.48* This would need to document which ones are supported and what are the49* fallbacks for unsupported combinations.50*/5152package sun.text.bidi;5354import java.io.IOException;55import java.lang.reflect.Array;56import java.lang.reflect.Field;57import java.lang.reflect.Method;58import java.lang.reflect.InvocationTargetException;59import java.text.AttributedCharacterIterator;60import java.text.Bidi;61import java.util.Arrays;62import java.util.MissingResourceException;63import sun.text.normalizer.UBiDiProps;64import sun.text.normalizer.UCharacter;65import sun.text.normalizer.UTF16;6667/**68*69* <h2>Bidi algorithm for ICU</h2>70*71* This is an implementation of the Unicode Bidirectional algorithm. The72* algorithm is defined in the <a73* href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,74* version 13, also described in The Unicode Standard, Version 4.0 .75* <p>76*77* Note: Libraries that perform a bidirectional algorithm and reorder strings78* accordingly are sometimes called "Storage Layout Engines". ICU's Bidi and79* shaping (ArabicShaping) classes can be used at the core of such "Storage80* Layout Engines".81*82* <h3>General remarks about the API:</h3>83*84* The "limit" of a sequence of characters is the position just after85* their last character, i.e., one more than that position.86* <p>87*88* Some of the API methods provide access to "runs". Such a89* "run" is defined as a sequence of characters that are at the same90* embedding level after performing the Bidi algorithm.91* <p>92*93* <h3>Basic concept: paragraph</h3>94* A piece of text can be divided into several paragraphs by characters95* with the Bidi class <code>Block Separator</code>. For handling of96* paragraphs, see:97* <ul>98* <li>{@link #countParagraphs}99* <li>{@link #getParaLevel}100* <li>{@link #getParagraph}101* <li>{@link #getParagraphByIndex}102* </ul>103*104* <h3>Basic concept: text direction</h3>105* The direction of a piece of text may be:106* <ul>107* <li>{@link #LTR}108* <li>{@link #RTL}109* <li>{@link #MIXED}110* </ul>111*112* <h3>Basic concept: levels</h3>113*114* Levels in this API represent embedding levels according to the Unicode115* Bidirectional Algorithm.116* Their low-order bit (even/odd value) indicates the visual direction.<p>117*118* Levels can be abstract values when used for the119* <code>paraLevel</code> and <code>embeddingLevels</code>120* arguments of <code>setPara()</code>; there:121* <ul>122* <li>the high-order bit of an <code>embeddingLevels[]</code>123* value indicates whether the using application is124* specifying the level of a character to <i>override</i> whatever the125* Bidi implementation would resolve it to.</li>126* <li><code>paraLevel</code> can be set to the127* pseudo-level values <code>LEVEL_DEFAULT_LTR</code>128* and <code>LEVEL_DEFAULT_RTL</code>.</li>129* </ul>130*131* <p>The related constants are not real, valid level values.132* <code>DEFAULT_XXX</code> can be used to specify133* a default for the paragraph level for134* when the <code>setPara()</code> method135* shall determine it but there is no136* strongly typed character in the input.<p>137*138* Note that the value for <code>LEVEL_DEFAULT_LTR</code> is even139* and the one for <code>LEVEL_DEFAULT_RTL</code> is odd,140* just like with normal LTR and RTL level values -141* these special values are designed that way. Also, the implementation142* assumes that MAX_EXPLICIT_LEVEL is odd.143*144* <ul><b>See Also:</b>145* <li>{@link #LEVEL_DEFAULT_LTR}146* <li>{@link #LEVEL_DEFAULT_RTL}147* <li>{@link #LEVEL_OVERRIDE}148* <li>{@link #MAX_EXPLICIT_LEVEL}149* <li>{@link #setPara}150* </ul>151*152* <h3>Basic concept: Reordering Mode</h3>153* Reordering mode values indicate which variant of the Bidi algorithm to154* use.155*156* <ul><b>See Also:</b>157* <li>{@link #setReorderingMode}158* <li>{@link #REORDER_DEFAULT}159* <li>{@link #REORDER_NUMBERS_SPECIAL}160* <li>{@link #REORDER_GROUP_NUMBERS_WITH_R}161* <li>{@link #REORDER_RUNS_ONLY}162* <li>{@link #REORDER_INVERSE_NUMBERS_AS_L}163* <li>{@link #REORDER_INVERSE_LIKE_DIRECT}164* <li>{@link #REORDER_INVERSE_FOR_NUMBERS_SPECIAL}165* </ul>166*167* <h3>Basic concept: Reordering Options</h3>168* Reordering options can be applied during Bidi text transformations.169* <ul><b>See Also:</b>170* <li>{@link #setReorderingOptions}171* <li>{@link #OPTION_DEFAULT}172* <li>{@link #OPTION_INSERT_MARKS}173* <li>{@link #OPTION_REMOVE_CONTROLS}174* <li>{@link #OPTION_STREAMING}175* </ul>176*177*178* @author Simon Montagu, Matitiahu Allouche (ported from C code written by Markus W. Scherer)179* @stable ICU 3.8180*181*182* <h4> Sample code for the ICU Bidi API </h4>183*184* <h5>Rendering a paragraph with the ICU Bidi API</h5>185*186* This is (hypothetical) sample code that illustrates how the ICU Bidi API187* could be used to render a paragraph of text. Rendering code depends highly on188* the graphics system, therefore this sample code must make a lot of189* assumptions, which may or may not match any existing graphics system's190* properties.191*192* <p>193* The basic assumptions are:194* </p>195* <ul>196* <li>Rendering is done from left to right on a horizontal line.</li>197* <li>A run of single-style, unidirectional text can be rendered at once.198* </li>199* <li>Such a run of text is passed to the graphics system with characters200* (code units) in logical order.</li>201* <li>The line-breaking algorithm is very complicated and Locale-dependent -202* and therefore its implementation omitted from this sample code.</li>203* </ul>204*205* <pre>206*207* package com.ibm.icu.dev.test.bidi;208*209* import com.ibm.icu.text.Bidi;210* import com.ibm.icu.text.BidiRun;211*212* public class Sample {213*214* static final int styleNormal = 0;215* static final int styleSelected = 1;216* static final int styleBold = 2;217* static final int styleItalics = 4;218* static final int styleSuper=8;219* static final int styleSub = 16;220*221* static class StyleRun {222* int limit;223* int style;224*225* public StyleRun(int limit, int style) {226* this.limit = limit;227* this.style = style;228* }229* }230*231* static class Bounds {232* int start;233* int limit;234*235* public Bounds(int start, int limit) {236* this.start = start;237* this.limit = limit;238* }239* }240*241* static int getTextWidth(String text, int start, int limit,242* StyleRun[] styleRuns, int styleRunCount) {243* // simplistic way to compute the width244* return limit - start;245* }246*247* // set limit and StyleRun limit for a line248* // from text[start] and from styleRuns[styleRunStart]249* // using Bidi.getLogicalRun(...)250* // returns line width251* static int getLineBreak(String text, Bounds line, Bidi para,252* StyleRun styleRuns[], Bounds styleRun) {253* // dummy return254* return 0;255* }256*257* // render runs on a line sequentially, always from left to right258*259* // prepare rendering a new line260* static void startLine(byte textDirection, int lineWidth) {261* System.out.println();262* }263*264* // render a run of text and advance to the right by the run width265* // the text[start..limit-1] is always in logical order266* static void renderRun(String text, int start, int limit,267* byte textDirection, int style) {268* }269*270* // We could compute a cross-product271* // from the style runs with the directional runs272* // and then reorder it.273* // Instead, here we iterate over each run type274* // and render the intersections -275* // with shortcuts in simple (and common) cases.276* // renderParagraph() is the main function.277*278* // render a directional run with279* // (possibly) multiple style runs intersecting with it280* static void renderDirectionalRun(String text, int start, int limit,281* byte direction, StyleRun styleRuns[],282* int styleRunCount) {283* int i;284*285* // iterate over style runs286* if (direction == Bidi.LTR) {287* int styleLimit;288* for (i = 0; i < styleRunCount; ++i) {289* styleLimit = styleRuns[i].limit;290* if (start < styleLimit) {291* if (styleLimit > limit) {292* styleLimit = limit;293* }294* renderRun(text, start, styleLimit,295* direction, styleRuns[i].style);296* if (styleLimit == limit) {297* break;298* }299* start = styleLimit;300* }301* }302* } else {303* int styleStart;304*305* for (i = styleRunCount-1; i >= 0; --i) {306* if (i > 0) {307* styleStart = styleRuns[i-1].limit;308* } else {309* styleStart = 0;310* }311* if (limit >= styleStart) {312* if (styleStart < start) {313* styleStart = start;314* }315* renderRun(text, styleStart, limit, direction,316* styleRuns[i].style);317* if (styleStart == start) {318* break;319* }320* limit = styleStart;321* }322* }323* }324* }325*326* // the line object represents text[start..limit-1]327* static void renderLine(Bidi line, String text, int start, int limit,328* StyleRun styleRuns[], int styleRunCount) {329* byte direction = line.getDirection();330* if (direction != Bidi.MIXED) {331* // unidirectional332* if (styleRunCount <= 1) {333* renderRun(text, start, limit, direction, styleRuns[0].style);334* } else {335* renderDirectionalRun(text, start, limit, direction,336* styleRuns, styleRunCount);337* }338* } else {339* // mixed-directional340* int count, i;341* BidiRun run;342*343* try {344* count = line.countRuns();345* } catch (IllegalStateException e) {346* e.printStackTrace();347* return;348* }349* if (styleRunCount <= 1) {350* int style = styleRuns[0].style;351*352* // iterate over directional runs353* for (i = 0; i < count; ++i) {354* run = line.getVisualRun(i);355* renderRun(text, run.getStart(), run.getLimit(),356* run.getDirection(), style);357* }358* } else {359* // iterate over both directional and style runs360* for (i = 0; i < count; ++i) {361* run = line.getVisualRun(i);362* renderDirectionalRun(text, run.getStart(),363* run.getLimit(), run.getDirection(),364* styleRuns, styleRunCount);365* }366* }367* }368* }369*370* static void renderParagraph(String text, byte textDirection,371* StyleRun styleRuns[], int styleRunCount,372* int lineWidth) {373* int length = text.length();374* Bidi para = new Bidi();375* try {376* para.setPara(text,377* textDirection != 0 ? Bidi.LEVEL_DEFAULT_RTL378* : Bidi.LEVEL_DEFAULT_LTR,379* null);380* } catch (Exception e) {381* e.printStackTrace();382* return;383* }384* byte paraLevel = (byte)(1 & para.getParaLevel());385* StyleRun styleRun = new StyleRun(length, styleNormal);386*387* if (styleRuns == null || styleRunCount <= 0) {388* styleRuns = new StyleRun[1];389* styleRunCount = 1;390* styleRuns[0] = styleRun;391* }392* // assume styleRuns[styleRunCount-1].limit>=length393*394* int width = getTextWidth(text, 0, length, styleRuns, styleRunCount);395* if (width <= lineWidth) {396* // everything fits onto one line397*398* // prepare rendering a new line from either left or right399* startLine(paraLevel, width);400*401* renderLine(para, text, 0, length, styleRuns, styleRunCount);402* } else {403* // we need to render several lines404* Bidi line = new Bidi(length, 0);405* int start = 0, limit;406* int styleRunStart = 0, styleRunLimit;407*408* for (;;) {409* limit = length;410* styleRunLimit = styleRunCount;411* width = getLineBreak(text, new Bounds(start, limit),412* para, styleRuns,413* new Bounds(styleRunStart, styleRunLimit));414* try {415* line = para.setLine(start, limit);416* } catch (Exception e) {417* e.printStackTrace();418* return;419* }420* // prepare rendering a new line421* // from either left or right422* startLine(paraLevel, width);423*424* if (styleRunStart > 0) {425* int newRunCount = styleRuns.length - styleRunStart;426* StyleRun[] newRuns = new StyleRun[newRunCount];427* System.arraycopy(styleRuns, styleRunStart, newRuns, 0,428* newRunCount);429* renderLine(line, text, start, limit, newRuns,430* styleRunLimit - styleRunStart);431* } else {432* renderLine(line, text, start, limit, styleRuns,433* styleRunLimit - styleRunStart);434* }435* if (limit == length) {436* break;437* }438* start = limit;439* styleRunStart = styleRunLimit - 1;440* if (start >= styleRuns[styleRunStart].limit) {441* ++styleRunStart;442* }443* }444* }445* }446*447* public static void main(String[] args)448* {449* renderParagraph("Some Latin text...", Bidi.LTR, null, 0, 80);450* renderParagraph("Some Hebrew text...", Bidi.RTL, null, 0, 60);451* }452* }453*454* </pre>455*/456457public class BidiBase {458459class Point {460int pos; /* position in text */461int flag; /* flag for LRM/RLM, before/after */462}463464class InsertPoints {465int size;466int confirmed;467Point[] points = new Point[0];468}469470/** Paragraph level setting<p>471*472* Constant indicating that the base direction depends on the first strong473* directional character in the text according to the Unicode Bidirectional474* Algorithm. If no strong directional character is present,475* then set the paragraph level to 0 (left-to-right).<p>476*477* If this value is used in conjunction with reordering modes478* <code>REORDER_INVERSE_LIKE_DIRECT</code> or479* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder480* is assumed to be visual LTR, and the text after reordering is required481* to be the corresponding logical string with appropriate contextual482* direction. The direction of the result string will be RTL if either483* the righmost or leftmost strong character of the source text is RTL484* or Arabic Letter, the direction will be LTR otherwise.<p>485*486* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may487* be added at the beginning of the result string to ensure round trip488* (that the result string, when reordered back to visual, will produce489* the original source text).490* @see #REORDER_INVERSE_LIKE_DIRECT491* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL492* @stable ICU 3.8493*/494public static final byte INTERNAL_LEVEL_DEFAULT_LTR = (byte)0x7e;495496/** Paragraph level setting<p>497*498* Constant indicating that the base direction depends on the first strong499* directional character in the text according to the Unicode Bidirectional500* Algorithm. If no strong directional character is present,501* then set the paragraph level to 1 (right-to-left).<p>502*503* If this value is used in conjunction with reordering modes504* <code>REORDER_INVERSE_LIKE_DIRECT</code> or505* <code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder506* is assumed to be visual LTR, and the text after reordering is required507* to be the corresponding logical string with appropriate contextual508* direction. The direction of the result string will be RTL if either509* the righmost or leftmost strong character of the source text is RTL510* or Arabic Letter, or if the text contains no strong character;511* the direction will be LTR otherwise.<p>512*513* If reordering option <code>OPTION_INSERT_MARKS</code> is set, an RLM may514* be added at the beginning of the result string to ensure round trip515* (that the result string, when reordered back to visual, will produce516* the original source text).517* @see #REORDER_INVERSE_LIKE_DIRECT518* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL519* @stable ICU 3.8520*/521public static final byte INTERNAL_LEVEL_DEFAULT_RTL = (byte)0x7f;522523/**524* Maximum explicit embedding level.525* (The maximum resolved level can be up to <code>MAX_EXPLICIT_LEVEL+1</code>).526* @stable ICU 3.8527*/528public static final byte MAX_EXPLICIT_LEVEL = 61;529530/**531* Bit flag for level input.532* Overrides directional properties.533* @stable ICU 3.8534*/535public static final byte INTERNAL_LEVEL_OVERRIDE = (byte)0x80;536537/**538* Special value which can be returned by the mapping methods when a539* logical index has no corresponding visual index or vice-versa. This may540* happen for the logical-to-visual mapping of a Bidi control when option541* <code>OPTION_REMOVE_CONTROLS</code> is542* specified. This can also happen for the visual-to-logical mapping of a543* Bidi mark (LRM or RLM) inserted by option544* <code>OPTION_INSERT_MARKS</code>.545* @see #getVisualIndex546* @see #getVisualMap547* @see #getLogicalIndex548* @see #getLogicalMap549* @see #OPTION_INSERT_MARKS550* @see #OPTION_REMOVE_CONTROLS551* @stable ICU 3.8552*/553public static final int MAP_NOWHERE = -1;554555/**556* Mixed-directional text.557* @stable ICU 3.8558*/559public static final byte MIXED = 2;560561/**562* option bit for writeReordered():563* replace characters with the "mirrored" property in RTL runs564* by their mirror-image mappings565*566* @see #writeReordered567* @stable ICU 3.8568*/569public static final short DO_MIRRORING = 2;570571/** Reordering mode: Regular Logical to Visual Bidi algorithm according to Unicode.572* @see #setReorderingMode573* @stable ICU 3.8574*/575private static final short REORDER_DEFAULT = 0;576577/** Reordering mode: Logical to Visual algorithm which handles numbers in578* a way which mimicks the behavior of Windows XP.579* @see #setReorderingMode580* @stable ICU 3.8581*/582private static final short REORDER_NUMBERS_SPECIAL = 1;583584/** Reordering mode: Logical to Visual algorithm grouping numbers with585* adjacent R characters (reversible algorithm).586* @see #setReorderingMode587* @stable ICU 3.8588*/589private static final short REORDER_GROUP_NUMBERS_WITH_R = 2;590591/** Reordering mode: Reorder runs only to transform a Logical LTR string592* to the logical RTL string with the same display, or vice-versa.<br>593* If this mode is set together with option594* <code>OPTION_INSERT_MARKS</code>, some Bidi controls in the source595* text may be removed and other controls may be added to produce the596* minimum combination which has the required display.597* @see #OPTION_INSERT_MARKS598* @see #setReorderingMode599* @stable ICU 3.8600*/601private static final short REORDER_RUNS_ONLY = 3;602603/** Reordering mode: Visual to Logical algorithm which handles numbers604* like L (same algorithm as selected by <code>setInverse(true)</code>.605* @see #setInverse606* @see #setReorderingMode607* @stable ICU 3.8608*/609private static final short REORDER_INVERSE_NUMBERS_AS_L = 4;610611/** Reordering mode: Visual to Logical algorithm equivalent to the regular612* Logical to Visual algorithm.613* @see #setReorderingMode614* @stable ICU 3.8615*/616private static final short REORDER_INVERSE_LIKE_DIRECT = 5;617618/** Reordering mode: Inverse Bidi (Visual to Logical) algorithm for the619* <code>REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.620* @see #setReorderingMode621* @stable ICU 3.8622*/623private static final short REORDER_INVERSE_FOR_NUMBERS_SPECIAL = 6;624625/* Reordering mode values must be ordered so that all the regular logical to626* visual modes come first, and all inverse Bidi modes come last.627*/628private static final short REORDER_LAST_LOGICAL_TO_VISUAL =629REORDER_NUMBERS_SPECIAL;630631/**632* Option bit for <code>setReorderingOptions</code>:633* insert Bidi marks (LRM or RLM) when needed to ensure correct result of634* a reordering to a Logical order635*636* <p>This option must be set or reset before calling637* <code>setPara</code>.</p>638*639* <p>This option is significant only with reordering modes which generate640* a result with Logical order, specifically.</p>641* <ul>642* <li><code>REORDER_RUNS_ONLY</code></li>643* <li><code>REORDER_INVERSE_NUMBERS_AS_L</code></li>644* <li><code>REORDER_INVERSE_LIKE_DIRECT</code></li>645* <li><code>REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>646* </ul>647*648* <p>If this option is set in conjunction with reordering mode649* <code>REORDER_INVERSE_NUMBERS_AS_L</code> or with calling650* <code>setInverse(true)</code>, it implies option651* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method652* <code>writeReordered()</code>.</p>653*654* <p>For other reordering modes, a minimum number of LRM or RLM characters655* will be added to the source text after reordering it so as to ensure656* round trip, i.e. when applying the inverse reordering mode on the657* resulting logical text with removal of Bidi marks658* (option <code>OPTION_REMOVE_CONTROLS</code> set before calling659* <code>setPara()</code> or option660* <code>REMOVE_BIDI_CONTROLS</code> in661* <code>writeReordered</code>), the result will be identical to the662* source text in the first transformation.663*664* <p>This option will be ignored if specified together with option665* <code>OPTION_REMOVE_CONTROLS</code>. It inhibits option666* <code>REMOVE_BIDI_CONTROLS</code> in calls to method667* <code>writeReordered()</code> and it implies option668* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method669* <code>writeReordered()</code> if the reordering mode is670* <code>REORDER_INVERSE_NUMBERS_AS_L</code>.</p>671*672* @see #setReorderingMode673* @see #setReorderingOptions674* @see #INSERT_LRM_FOR_NUMERIC675* @see #REMOVE_BIDI_CONTROLS676* @see #OPTION_REMOVE_CONTROLS677* @see #REORDER_RUNS_ONLY678* @see #REORDER_INVERSE_NUMBERS_AS_L679* @see #REORDER_INVERSE_LIKE_DIRECT680* @see #REORDER_INVERSE_FOR_NUMBERS_SPECIAL681* @stable ICU 3.8682*/683private static final int OPTION_INSERT_MARKS = 1;684685/**686* Option bit for <code>setReorderingOptions</code>:687* remove Bidi control characters688*689* <p>This option must be set or reset before calling690* <code>setPara</code>.</p>691*692* <p>This option nullifies option693* <code>OPTION_INSERT_MARKS</code>. It inhibits option694* <code>INSERT_LRM_FOR_NUMERIC</code> in calls to method695* <code>writeReordered()</code> and it implies option696* <code>REMOVE_BIDI_CONTROLS</code> in calls to that method.</p>697*698* @see #setReorderingMode699* @see #setReorderingOptions700* @see #OPTION_INSERT_MARKS701* @see #INSERT_LRM_FOR_NUMERIC702* @see #REMOVE_BIDI_CONTROLS703* @stable ICU 3.8704*/705private static final int OPTION_REMOVE_CONTROLS = 2;706707/**708* Option bit for <code>setReorderingOptions</code>:709* process the output as part of a stream to be continued710*711* <p>This option must be set or reset before calling712* <code>setPara</code>.</p>713*714* <p>This option specifies that the caller is interested in processing715* large text object in parts. The results of the successive calls are716* expected to be concatenated by the caller. Only the call for the last717* part will have this option bit off.</p>718*719* <p>When this option bit is on, <code>setPara()</code> may process720* less than the full source text in order to truncate the text at a721* meaningful boundary. The caller should call722* <code>getProcessedLength()</code> immediately after calling723* <code>setPara()</code> in order to determine how much of the source724* text has been processed. Source text beyond that length should be725* resubmitted in following calls to <code>setPara</code>. The726* processed length may be less than the length of the source text if a727* character preceding the last character of the source text constitutes a728* reasonable boundary (like a block separator) for text to be continued.<br>729* If the last character of the source text constitutes a reasonable730* boundary, the whole text will be processed at once.<br>731* If nowhere in the source text there exists732* such a reasonable boundary, the processed length will be zero.<br>733* The caller should check for such an occurrence and do one of the following:734* <ul><li>submit a larger amount of text with a better chance to include735* a reasonable boundary.</li>736* <li>resubmit the same text after turning off option737* <code>OPTION_STREAMING</code>.</li></ul>738* In all cases, this option should be turned off before processing the last739* part of the text.</p>740*741* <p>When the <code>OPTION_STREAMING</code> option is used, it is742* recommended to call <code>orderParagraphsLTR()</code> with argument743* <code>orderParagraphsLTR</code> set to <code>true</code> before calling744* <code>setPara()</code> so that later paragraphs may be concatenated to745* previous paragraphs on the right.746* </p>747*748* @see #setReorderingMode749* @see #setReorderingOptions750* @see #getProcessedLength751* @see #orderParagraphsLTR752* @stable ICU 3.8753*/754private static final int OPTION_STREAMING = 4;755756/*757* Comparing the description of the Bidi algorithm with this implementation758* is easier with the same names for the Bidi types in the code as there.759* See UCharacterDirection760*/761private static final byte L = 0;762private static final byte R = 1;763private static final byte EN = 2;764private static final byte ES = 3;765private static final byte ET = 4;766private static final byte AN = 5;767private static final byte CS = 6;768static final byte B = 7;769private static final byte S = 8;770private static final byte WS = 9;771private static final byte ON = 10;772private static final byte LRE = 11;773private static final byte LRO = 12;774private static final byte AL = 13;775private static final byte RLE = 14;776private static final byte RLO = 15;777private static final byte PDF = 16;778private static final byte NSM = 17;779private static final byte BN = 18;780781private static final int MASK_R_AL = (1 << R | 1 << AL);782783private static final char CR = '\r';784private static final char LF = '\n';785786static final int LRM_BEFORE = 1;787static final int LRM_AFTER = 2;788static final int RLM_BEFORE = 4;789static final int RLM_AFTER = 8;790791/*792* reference to parent paragraph object (reference to self if this object is793* a paragraph object); set to null in a newly opened object; set to a794* real value after a successful execution of setPara or setLine795*/796BidiBase paraBidi;797798final UBiDiProps bdp;799800/* character array representing the current text */801char[] text;802803/* length of the current text */804int originalLength;805806/* if the option OPTION_STREAMING is set, this is the length of807* text actually processed by <code>setPara</code>, which may be shorter808* than the original length. Otherwise, it is identical to the original809* length.810*/811public int length;812813/* if option OPTION_REMOVE_CONTROLS is set, and/or Bidi814* marks are allowed to be inserted in one of the reordering modes, the815* length of the result string may be different from the processed length.816*/817int resultLength;818819/* indicators for whether memory may be allocated after construction */820boolean mayAllocateText;821boolean mayAllocateRuns;822823/* arrays with one value per text-character */824byte[] dirPropsMemory = new byte[1];825byte[] levelsMemory = new byte[1];826byte[] dirProps;827byte[] levels;828829/* must block separators receive level 0? */830boolean orderParagraphsLTR;831832/* the paragraph level */833byte paraLevel;834835/* original paraLevel when contextual */836/* must be one of DEFAULT_xxx or 0 if not contextual */837byte defaultParaLevel;838839/* the following is set in setPara, used in processPropertySeq */840841ImpTabPair impTabPair; /* reference to levels state table pair */842843/* the overall paragraph or line directionality*/844byte direction;845846/* flags is a bit set for which directional properties are in the text */847int flags;848849/* lastArabicPos is index to the last AL in the text, -1 if none */850int lastArabicPos;851852/* characters after trailingWSStart are WS and are */853/* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */854int trailingWSStart;855856/* fields for paragraph handling */857int paraCount; /* set in getDirProps() */858int[] parasMemory = new int[1];859int[] paras; /* limits of paragraphs, filled in860ResolveExplicitLevels() or CheckExplicitLevels() */861862/* for single paragraph text, we only need a tiny array of paras (no allocation) */863int[] simpleParas = {0};864865/* fields for line reordering */866int runCount; /* ==-1: runs not set up yet */867BidiRun[] runsMemory = new BidiRun[0];868BidiRun[] runs;869870/* for non-mixed text, we only need a tiny array of runs (no allocation) */871BidiRun[] simpleRuns = {new BidiRun()};872873/* mapping of runs in logical order to visual order */874int[] logicalToVisualRunsMap;875876/* flag to indicate that the map has been updated */877boolean isGoodLogicalToVisualRunsMap;878879/* for inverse Bidi with insertion of directional marks */880InsertPoints insertPoints = new InsertPoints();881882/* for option OPTION_REMOVE_CONTROLS */883int controlCount;884885/*886* Sometimes, bit values are more appropriate887* to deal with directionality properties.888* Abbreviations in these method names refer to names889* used in the Bidi algorithm.890*/891static int DirPropFlag(byte dir) {892return (1 << dir);893}894895/*896* The following bit is ORed to the property of characters in paragraphs897* with contextual RTL direction when paraLevel is contextual.898*/899static final byte CONTEXT_RTL_SHIFT = 6;900static final byte CONTEXT_RTL = (byte)(1<<CONTEXT_RTL_SHIFT); // 0x40901static byte NoContextRTL(byte dir)902{903return (byte)(dir & ~CONTEXT_RTL);904}905906/*907* The following is a variant of DirProp.DirPropFlag() which ignores the908* CONTEXT_RTL bit.909*/910static int DirPropFlagNC(byte dir) {911return (1<<(dir & ~CONTEXT_RTL));912}913914static final int DirPropFlagMultiRuns = DirPropFlag((byte)31);915916/* to avoid some conditional statements, use tiny constant arrays */917static final int DirPropFlagLR[] = { DirPropFlag(L), DirPropFlag(R) };918static final int DirPropFlagE[] = { DirPropFlag(LRE), DirPropFlag(RLE) };919static final int DirPropFlagO[] = { DirPropFlag(LRO), DirPropFlag(RLO) };920921static final int DirPropFlagLR(byte level) { return DirPropFlagLR[level & 1]; }922static final int DirPropFlagE(byte level) { return DirPropFlagE[level & 1]; }923static final int DirPropFlagO(byte level) { return DirPropFlagO[level & 1]; }924925/*926* are there any characters that are LTR?927*/928static final int MASK_LTR =929DirPropFlag(L)|DirPropFlag(EN)|DirPropFlag(AN)|DirPropFlag(LRE)|DirPropFlag(LRO);930931/*932* are there any characters that are RTL?933*/934static final int MASK_RTL = DirPropFlag(R)|DirPropFlag(AL)|DirPropFlag(RLE)|DirPropFlag(RLO);935936/* explicit embedding codes */937private static final int MASK_LRX = DirPropFlag(LRE)|DirPropFlag(LRO);938private static final int MASK_RLX = DirPropFlag(RLE)|DirPropFlag(RLO);939private static final int MASK_EXPLICIT = MASK_LRX|MASK_RLX|DirPropFlag(PDF);940private static final int MASK_BN_EXPLICIT = DirPropFlag(BN)|MASK_EXPLICIT;941942/* paragraph and segment separators */943private static final int MASK_B_S = DirPropFlag(B)|DirPropFlag(S);944945/* all types that are counted as White Space or Neutral in some steps */946static final int MASK_WS = MASK_B_S|DirPropFlag(WS)|MASK_BN_EXPLICIT;947private static final int MASK_N = DirPropFlag(ON)|MASK_WS;948949/* types that are neutrals or could becomes neutrals in (Wn) */950private static final int MASK_POSSIBLE_N = DirPropFlag(CS)|DirPropFlag(ES)|DirPropFlag(ET)|MASK_N;951952/*953* These types may be changed to "e",954* the embedding type (L or R) of the run,955* in the Bidi algorithm (N2)956*/957static final int MASK_EMBEDDING = DirPropFlag(NSM)|MASK_POSSIBLE_N;958959/*960* the dirProp's L and R are defined to 0 and 1 values in UCharacterDirection.java961*/962private static byte GetLRFromLevel(byte level)963{964return (byte)(level & 1);965}966967private static boolean IsDefaultLevel(byte level)968{969return ((level & INTERNAL_LEVEL_DEFAULT_LTR) == INTERNAL_LEVEL_DEFAULT_LTR);970}971972byte GetParaLevelAt(int index)973{974return (defaultParaLevel != 0) ?975(byte)(dirProps[index]>>CONTEXT_RTL_SHIFT) : paraLevel;976}977978static boolean IsBidiControlChar(int c)979{980/* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or9810x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */982return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e)));983}984985public void verifyValidPara()986{987if (this != this.paraBidi) {988throw new IllegalStateException("");989}990}991992public void verifyValidParaOrLine()993{994BidiBase para = this.paraBidi;995/* verify Para */996if (this == para) {997return;998}999/* verify Line */1000if ((para == null) || (para != para.paraBidi)) {1001throw new IllegalStateException();1002}1003}10041005public void verifyRange(int index, int start, int limit)1006{1007if (index < start || index >= limit) {1008throw new IllegalArgumentException("Value " + index +1009" is out of range " + start + " to " + limit);1010}1011}10121013public void verifyIndex(int index, int start, int limit)1014{1015if (index < start || index >= limit) {1016throw new ArrayIndexOutOfBoundsException("Index " + index +1017" is out of range " + start + " to " + limit);1018}1019}10201021/**1022* Allocate a <code>Bidi</code> object with preallocated memory1023* for internal structures.1024* This method provides a <code>Bidi</code> object like the default constructor1025* but it also preallocates memory for internal structures1026* according to the sizings supplied by the caller.<p>1027* The preallocation can be limited to some of the internal memory1028* by setting some values to 0 here. That means that if, e.g.,1029* <code>maxRunCount</code> cannot be reasonably predetermined and should not1030* be set to <code>maxLength</code> (the only failproof value) to avoid1031* wasting memory, then <code>maxRunCount</code> could be set to 0 here1032* and the internal structures that are associated with it will be allocated1033* on demand, just like with the default constructor.1034*1035* @param maxLength is the maximum text or line length that internal memory1036* will be preallocated for. An attempt to associate this object with a1037* longer text will fail, unless this value is 0, which leaves the allocation1038* up to the implementation.1039*1040* @param maxRunCount is the maximum anticipated number of same-level runs1041* that internal memory will be preallocated for. An attempt to access1042* visual runs on an object that was not preallocated for as many runs1043* as the text was actually resolved to will fail,1044* unless this value is 0, which leaves the allocation up to the implementation.<br><br>1045* The number of runs depends on the actual text and maybe anywhere between1046* 1 and <code>maxLength</code>. It is typically small.1047*1048* @throws IllegalArgumentException if maxLength or maxRunCount is less than 01049* @stable ICU 3.81050*/1051public BidiBase(int maxLength, int maxRunCount)1052{1053/* check the argument values */1054if (maxLength < 0 || maxRunCount < 0) {1055throw new IllegalArgumentException();1056}10571058/* reset the object, all reference variables null, all flags false,1059all sizes 0.1060In fact, we don't need to do anything, since class members are1061initialized as zero when an instance is created.1062*/1063/*1064mayAllocateText = false;1065mayAllocateRuns = false;1066orderParagraphsLTR = false;1067paraCount = 0;1068runCount = 0;1069trailingWSStart = 0;1070flags = 0;1071paraLevel = 0;1072defaultParaLevel = 0;1073direction = 0;1074*/1075/* get Bidi properties */1076try {1077bdp = UBiDiProps.getSingleton();1078}1079catch (IOException e) {1080throw new MissingResourceException(e.getMessage(), "(BidiProps)", "");1081}10821083/* allocate memory for arrays as requested */1084if (maxLength > 0) {1085getInitialDirPropsMemory(maxLength);1086getInitialLevelsMemory(maxLength);1087} else {1088mayAllocateText = true;1089}10901091if (maxRunCount > 0) {1092// if maxRunCount == 1, use simpleRuns[]1093if (maxRunCount > 1) {1094getInitialRunsMemory(maxRunCount);1095}1096} else {1097mayAllocateRuns = true;1098}1099}11001101/*1102* We are allowed to allocate memory if object==null or1103* mayAllocate==true for each array that we need.1104*1105* Assume sizeNeeded>0.1106* If object != null, then assume size > 0.1107*/1108private Object getMemory(String label, Object array, Class<?> arrayClass,1109boolean mayAllocate, int sizeNeeded)1110{1111int len = Array.getLength(array);11121113/* we have at least enough memory and must not allocate */1114if (sizeNeeded == len) {1115return array;1116}1117if (!mayAllocate) {1118/* we must not allocate */1119if (sizeNeeded <= len) {1120return array;1121}1122throw new OutOfMemoryError("Failed to allocate memory for "1123+ label);1124}1125/* we may try to grow or shrink */1126/* FOOD FOR THOUGHT: when shrinking it should be possible to avoid1127the allocation altogether and rely on this.length */1128try {1129return Array.newInstance(arrayClass, sizeNeeded);1130} catch (Exception e) {1131throw new OutOfMemoryError("Failed to allocate memory for "1132+ label);1133}1134}11351136/* helper methods for each allocated array */1137private void getDirPropsMemory(boolean mayAllocate, int len)1138{1139Object array = getMemory("DirProps", dirPropsMemory, Byte.TYPE, mayAllocate, len);1140dirPropsMemory = (byte[]) array;1141}11421143void getDirPropsMemory(int len)1144{1145getDirPropsMemory(mayAllocateText, len);1146}11471148private void getLevelsMemory(boolean mayAllocate, int len)1149{1150Object array = getMemory("Levels", levelsMemory, Byte.TYPE, mayAllocate, len);1151levelsMemory = (byte[]) array;1152}11531154void getLevelsMemory(int len)1155{1156getLevelsMemory(mayAllocateText, len);1157}11581159private void getRunsMemory(boolean mayAllocate, int len)1160{1161Object array = getMemory("Runs", runsMemory, BidiRun.class, mayAllocate, len);1162runsMemory = (BidiRun[]) array;1163}11641165void getRunsMemory(int len)1166{1167getRunsMemory(mayAllocateRuns, len);1168}11691170/* additional methods used by constructor - always allow allocation */1171private void getInitialDirPropsMemory(int len)1172{1173getDirPropsMemory(true, len);1174}11751176private void getInitialLevelsMemory(int len)1177{1178getLevelsMemory(true, len);1179}11801181private void getInitialParasMemory(int len)1182{1183Object array = getMemory("Paras", parasMemory, Integer.TYPE, true, len);1184parasMemory = (int[]) array;1185}11861187private void getInitialRunsMemory(int len)1188{1189getRunsMemory(true, len);1190}11911192/* perform (P2)..(P3) ------------------------------------------------------- */11931194private void getDirProps()1195{1196int i = 0, i0, i1;1197flags = 0; /* collect all directionalities in the text */1198int uchar;1199byte dirProp;1200byte paraDirDefault = 0; /* initialize to avoid compiler warnings */1201boolean isDefaultLevel = IsDefaultLevel(paraLevel);1202/* for inverse Bidi, the default para level is set to RTL if there is a1203strong R or AL character at either end of the text */1204lastArabicPos = -1;1205controlCount = 0;12061207final int NOT_CONTEXTUAL = 0; /* 0: not contextual paraLevel */1208final int LOOKING_FOR_STRONG = 1; /* 1: looking for first strong char */1209final int FOUND_STRONG_CHAR = 2; /* 2: found first strong char */12101211int state;1212int paraStart = 0; /* index of first char in paragraph */1213byte paraDir; /* == CONTEXT_RTL within paragraphs1214starting with strong R char */1215byte lastStrongDir=0; /* for default level & inverse Bidi */1216int lastStrongLTR=0; /* for STREAMING option */12171218if (isDefaultLevel) {1219paraDirDefault = ((paraLevel & 1) != 0) ? CONTEXT_RTL : 0;1220paraDir = paraDirDefault;1221lastStrongDir = paraDirDefault;1222state = LOOKING_FOR_STRONG;1223} else {1224state = NOT_CONTEXTUAL;1225paraDir = 0;1226}1227/* count paragraphs and determine the paragraph level (P2..P3) */1228/*1229* see comment on constant fields:1230* the LEVEL_DEFAULT_XXX values are designed so that1231* their low-order bit alone yields the intended default1232*/12331234for (i = 0; i < originalLength; /* i is incremented in the loop */) {1235i0 = i; /* index of first code unit */1236uchar = UTF16.charAt(text, 0, originalLength, i);1237i += Character.charCount(uchar);1238i1 = i - 1; /* index of last code unit, gets the directional property */12391240dirProp = (byte)bdp.getClass(uchar);12411242flags |= DirPropFlag(dirProp);1243dirProps[i1] = (byte)(dirProp | paraDir);1244if (i1 > i0) { /* set previous code units' properties to BN */1245flags |= DirPropFlag(BN);1246do {1247dirProps[--i1] = (byte)(BN | paraDir);1248} while (i1 > i0);1249}1250if (state == LOOKING_FOR_STRONG) {1251if (dirProp == L) {1252state = FOUND_STRONG_CHAR;1253if (paraDir != 0) {1254paraDir = 0;1255for (i1 = paraStart; i1 < i; i1++) {1256dirProps[i1] &= ~CONTEXT_RTL;1257}1258}1259continue;1260}1261if (dirProp == R || dirProp == AL) {1262state = FOUND_STRONG_CHAR;1263if (paraDir == 0) {1264paraDir = CONTEXT_RTL;1265for (i1 = paraStart; i1 < i; i1++) {1266dirProps[i1] |= CONTEXT_RTL;1267}1268}1269continue;1270}1271}1272if (dirProp == L) {1273lastStrongDir = 0;1274lastStrongLTR = i; /* i is index to next character */1275}1276else if (dirProp == R) {1277lastStrongDir = CONTEXT_RTL;1278}1279else if (dirProp == AL) {1280lastStrongDir = CONTEXT_RTL;1281lastArabicPos = i-1;1282}1283else if (dirProp == B) {1284if (i < originalLength) { /* B not last char in text */1285if (!((uchar == (int)CR) && (text[i] == (int)LF))) {1286paraCount++;1287}1288if (isDefaultLevel) {1289state=LOOKING_FOR_STRONG;1290paraStart = i; /* i is index to next character */1291paraDir = paraDirDefault;1292lastStrongDir = paraDirDefault;1293}1294}1295}1296}1297if (isDefaultLevel) {1298paraLevel = GetParaLevelAt(0);1299}13001301/* The following line does nothing new for contextual paraLevel, but is1302needed for absolute paraLevel. */1303flags |= DirPropFlagLR(paraLevel);13041305if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {1306flags |= DirPropFlag(L);1307}1308}13091310/* perform (X1)..(X9) ------------------------------------------------------- */13111312/* determine if the text is mixed-directional or single-directional */1313private byte directionFromFlags() {1314/* if the text contains AN and neutrals, then some neutrals may become RTL */1315if (!((flags & MASK_RTL) != 0 ||1316((flags & DirPropFlag(AN)) != 0 &&1317(flags & MASK_POSSIBLE_N) != 0))) {1318return Bidi.DIRECTION_LEFT_TO_RIGHT;1319} else if ((flags & MASK_LTR) == 0) {1320return Bidi.DIRECTION_RIGHT_TO_LEFT;1321} else {1322return MIXED;1323}1324}13251326/*1327* Resolve the explicit levels as specified by explicit embedding codes.1328* Recalculate the flags to have them reflect the real properties1329* after taking the explicit embeddings into account.1330*1331* The Bidi algorithm is designed to result in the same behavior whether embedding1332* levels are externally specified (from "styled text", supposedly the preferred1333* method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.1334* That is why (X9) instructs to remove all explicit codes (and BN).1335* However, in a real implementation, this removal of these codes and their index1336* positions in the plain text is undesirable since it would result in1337* reallocated, reindexed text.1338* Instead, this implementation leaves the codes in there and just ignores them1339* in the subsequent processing.1340* In order to get the same reordering behavior, positions with a BN or an1341* explicit embedding code just get the same level assigned as the last "real"1342* character.1343*1344* Some implementations, not this one, then overwrite some of these1345* directionality properties at "real" same-level-run boundaries by1346* L or R codes so that the resolution of weak types can be performed on the1347* entire paragraph at once instead of having to parse it once more and1348* perform that resolution on same-level-runs.1349* This limits the scope of the implicit rules in effectively1350* the same way as the run limits.1351*1352* Instead, this implementation does not modify these codes.1353* On one hand, the paragraph has to be scanned for same-level-runs, but1354* on the other hand, this saves another loop to reset these codes,1355* or saves making and modifying a copy of dirProps[].1356*1357*1358* Note that (Pn) and (Xn) changed significantly from version 4 of the Bidi algorithm.1359*1360*1361* Handling the stack of explicit levels (Xn):1362*1363* With the Bidi stack of explicit levels,1364* as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,1365* the explicit level must never exceed MAX_EXPLICIT_LEVEL==61.1366*1367* In order to have a correct push-pop semantics even in the case of overflows,1368* there are two overflow counters:1369* - countOver60 is incremented with each LRx at level 601370* - from level 60, one RLx increases the level to 611371* - countOver61 is incremented with each LRx and RLx at level 611372*1373* Popping levels with PDF must work in the opposite order so that level 611374* is correct at the correct point. Underflows (too many PDFs) must be checked.1375*1376* This implementation assumes that MAX_EXPLICIT_LEVEL is odd.1377*/1378private byte resolveExplicitLevels() {1379int i = 0;1380byte dirProp;1381byte level = GetParaLevelAt(0);13821383byte dirct;1384int paraIndex = 0;13851386/* determine if the text is mixed-directional or single-directional */1387dirct = directionFromFlags();13881389/* we may not need to resolve any explicit levels, but for multiple1390paragraphs we want to loop on all chars to set the para boundaries */1391if ((dirct != MIXED) && (paraCount == 1)) {1392/* not mixed directionality: levels don't matter - trailingWSStart will be 0 */1393} else if ((paraCount == 1) &&1394((flags & MASK_EXPLICIT) == 0)) {1395/* mixed, but all characters are at the same embedding level */1396/* or we are in "inverse Bidi" */1397/* and we don't have contextual multiple paragraphs with some B char */1398/* set all levels to the paragraph level */1399for (i = 0; i < length; ++i) {1400levels[i] = level;1401}1402} else {1403/* continue to perform (Xn) */14041405/* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */1406/* both variables may carry the LEVEL_OVERRIDE flag to indicate the override status */1407byte embeddingLevel = level;1408byte newLevel;1409byte stackTop = 0;14101411byte[] stack = new byte[MAX_EXPLICIT_LEVEL]; /* we never push anything >=MAX_EXPLICIT_LEVEL */1412int countOver60 = 0;1413int countOver61 = 0; /* count overflows of explicit levels */14141415/* recalculate the flags */1416flags = 0;14171418for (i = 0; i < length; ++i) {1419dirProp = NoContextRTL(dirProps[i]);1420switch(dirProp) {1421case LRE:1422case LRO:1423/* (X3, X5) */1424newLevel = (byte)((embeddingLevel+2) & ~(INTERNAL_LEVEL_OVERRIDE | 1)); /* least greater even level */1425if (newLevel <= MAX_EXPLICIT_LEVEL) {1426stack[stackTop] = embeddingLevel;1427++stackTop;1428embeddingLevel = newLevel;1429if (dirProp == LRO) {1430embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;1431}1432/* we don't need to set LEVEL_OVERRIDE off for LRE1433since this has already been done for newLevel which is1434the source for embeddingLevel.1435*/1436} else if ((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL) {1437++countOver61;1438} else /* (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) == MAX_EXPLICIT_LEVEL-1 */ {1439++countOver60;1440}1441flags |= DirPropFlag(BN);1442break;1443case RLE:1444case RLO:1445/* (X2, X4) */1446newLevel=(byte)(((embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) + 1) | 1); /* least greater odd level */1447if (newLevel<=MAX_EXPLICIT_LEVEL) {1448stack[stackTop] = embeddingLevel;1449++stackTop;1450embeddingLevel = newLevel;1451if (dirProp == RLO) {1452embeddingLevel |= INTERNAL_LEVEL_OVERRIDE;1453}1454/* we don't need to set LEVEL_OVERRIDE off for RLE1455since this has already been done for newLevel which is1456the source for embeddingLevel.1457*/1458} else {1459++countOver61;1460}1461flags |= DirPropFlag(BN);1462break;1463case PDF:1464/* (X7) */1465/* handle all the overflow cases first */1466if (countOver61 > 0) {1467--countOver61;1468} else if (countOver60 > 0 && (embeddingLevel & ~INTERNAL_LEVEL_OVERRIDE) != MAX_EXPLICIT_LEVEL) {1469/* handle LRx overflows from level 60 */1470--countOver60;1471} else if (stackTop > 0) {1472/* this is the pop operation; it also pops level 61 while countOver60>0 */1473--stackTop;1474embeddingLevel = stack[stackTop];1475/* } else { (underflow) */1476}1477flags |= DirPropFlag(BN);1478break;1479case B:1480stackTop = 0;1481countOver60 = 0;1482countOver61 = 0;1483level = GetParaLevelAt(i);1484if ((i + 1) < length) {1485embeddingLevel = GetParaLevelAt(i+1);1486if (!((text[i] == CR) && (text[i + 1] == LF))) {1487paras[paraIndex++] = i+1;1488}1489}1490flags |= DirPropFlag(B);1491break;1492case BN:1493/* BN, LRE, RLE, and PDF are supposed to be removed (X9) */1494/* they will get their levels set correctly in adjustWSLevels() */1495flags |= DirPropFlag(BN);1496break;1497default:1498/* all other types get the "real" level */1499if (level != embeddingLevel) {1500level = embeddingLevel;1501if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {1502flags |= DirPropFlagO(level) | DirPropFlagMultiRuns;1503} else {1504flags |= DirPropFlagE(level) | DirPropFlagMultiRuns;1505}1506}1507if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {1508flags |= DirPropFlag(dirProp);1509}1510break;1511}15121513/*1514* We need to set reasonable levels even on BN codes and1515* explicit codes because we will later look at same-level runs (X10).1516*/1517levels[i] = level;1518}1519if ((flags & MASK_EMBEDDING) != 0) {1520flags |= DirPropFlagLR(paraLevel);1521}1522if (orderParagraphsLTR && (flags & DirPropFlag(B)) != 0) {1523flags |= DirPropFlag(L);1524}15251526/* subsequently, ignore the explicit codes and BN (X9) */15271528/* again, determine if the text is mixed-directional or single-directional */1529dirct = directionFromFlags();1530}15311532return dirct;1533}15341535/*1536* Use a pre-specified embedding levels array:1537*1538* Adjust the directional properties for overrides (->LEVEL_OVERRIDE),1539* ignore all explicit codes (X9),1540* and check all the preset levels.1541*1542* Recalculate the flags to have them reflect the real properties1543* after taking the explicit embeddings into account.1544*/1545private byte checkExplicitLevels() {1546byte dirProp;1547int i;1548this.flags = 0; /* collect all directionalities in the text */1549byte level;1550int paraIndex = 0;15511552for (i = 0; i < length; ++i) {1553if (levels[i] == 0) {1554levels[i] = paraLevel;1555}1556if (MAX_EXPLICIT_LEVEL < (levels[i]&0x7f)) {1557if ((levels[i] & INTERNAL_LEVEL_OVERRIDE) != 0) {1558levels[i] = (byte)(paraLevel|INTERNAL_LEVEL_OVERRIDE);1559} else {1560levels[i] = paraLevel;1561}1562}1563level = levels[i];1564dirProp = NoContextRTL(dirProps[i]);1565if ((level & INTERNAL_LEVEL_OVERRIDE) != 0) {1566/* keep the override flag in levels[i] but adjust the flags */1567level &= ~INTERNAL_LEVEL_OVERRIDE; /* make the range check below simpler */1568flags |= DirPropFlagO(level);1569} else {1570/* set the flags */1571flags |= DirPropFlagE(level) | DirPropFlag(dirProp);1572}15731574if ((level < GetParaLevelAt(i) &&1575!((0 == level) && (dirProp == B))) ||1576(MAX_EXPLICIT_LEVEL <level)) {1577/* level out of bounds */1578throw new IllegalArgumentException("level " + level +1579" out of bounds at index " + i);1580}1581if ((dirProp == B) && ((i + 1) < length)) {1582if (!((text[i] == CR) && (text[i + 1] == LF))) {1583paras[paraIndex++] = i + 1;1584}1585}1586}1587if ((flags&MASK_EMBEDDING) != 0) {1588flags |= DirPropFlagLR(paraLevel);1589}15901591/* determine if the text is mixed-directional or single-directional */1592return directionFromFlags();1593}15941595/*********************************************************************/1596/* The Properties state machine table */1597/*********************************************************************/1598/* */1599/* All table cells are 8 bits: */1600/* bits 0..4: next state */1601/* bits 5..7: action to perform (if > 0) */1602/* */1603/* Cells may be of format "n" where n represents the next state */1604/* (except for the rightmost column). */1605/* Cells may also be of format "_(x,y)" where x represents an action */1606/* to perform and y represents the next state. */1607/* */1608/*********************************************************************/1609/* Definitions and type for properties state tables */1610/*********************************************************************/1611private static final int IMPTABPROPS_COLUMNS = 14;1612private static final int IMPTABPROPS_RES = IMPTABPROPS_COLUMNS - 1;1613private static short GetStateProps(short cell) {1614return (short)(cell & 0x1f);1615}1616private static short GetActionProps(short cell) {1617return (short)(cell >> 5);1618}16191620private static final short groupProp[] = /* dirProp regrouped */1621{1622/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */16230, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 101624};1625private static final short _L = 0;1626private static final short _R = 1;1627private static final short _EN = 2;1628private static final short _AN = 3;1629private static final short _ON = 4;1630private static final short _S = 5;1631private static final short _B = 6; /* reduced dirProp */16321633/*********************************************************************/1634/* */1635/* PROPERTIES STATE TABLE */1636/* */1637/* In table impTabProps, */1638/* - the ON column regroups ON and WS */1639/* - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF */1640/* - the Res column is the reduced property assigned to a run */1641/* */1642/* Action 1: process current run1, init new run1 */1643/* 2: init new run2 */1644/* 3: process run1, process run2, init new run1 */1645/* 4: process run1, set run1=run2, init new run2 */1646/* */1647/* Notes: */1648/* 1) This table is used in resolveImplicitLevels(). */1649/* 2) This table triggers actions when there is a change in the Bidi*/1650/* property of incoming characters (action 1). */1651/* 3) Most such property sequences are processed immediately (in */1652/* fact, passed to processPropertySeq(). */1653/* 4) However, numbers are assembled as one sequence. This means */1654/* that undefined situations (like CS following digits, until */1655/* it is known if the next char will be a digit) are held until */1656/* following chars define them. */1657/* Example: digits followed by CS, then comes another CS or ON; */1658/* the digits will be processed, then the CS assigned */1659/* as the start of an ON sequence (action 3). */1660/* 5) There are cases where more than one sequence must be */1661/* processed, for instance digits followed by CS followed by L: */1662/* the digits must be processed as one sequence, and the CS */1663/* must be processed as an ON sequence, all this before starting */1664/* assembling chars for the opening L sequence. */1665/* */1666/* */1667private static final short impTabProps[][] =1668{1669/* L, R, EN, AN, ON, S, B, ES, ET, CS, BN, NSM, AL, Res */1670/* 0 Init */ { 1, 2, 4, 5, 7, 15, 17, 7, 9, 7, 0, 7, 3, _ON },1671/* 1 L */ { 1, 32+2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 1, 1, 32+3, _L },1672/* 2 R */ { 32+1, 2, 32+4, 32+5, 32+7, 32+15, 32+17, 32+7, 32+9, 32+7, 2, 2, 32+3, _R },1673/* 3 AL */ { 32+1, 32+2, 32+6, 32+6, 32+8, 32+16, 32+17, 32+8, 32+8, 32+8, 3, 3, 3, _R },1674/* 4 EN */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 64+10, 11, 64+10, 4, 4, 32+3, _EN },1675/* 5 AN */ { 32+1, 32+2, 32+4, 5, 32+7, 32+15, 32+17, 32+7, 32+9, 64+12, 5, 5, 32+3, _AN },1676/* 6 AL:EN/AN */ { 32+1, 32+2, 6, 6, 32+8, 32+16, 32+17, 32+8, 32+8, 64+13, 6, 6, 32+3, _AN },1677/* 7 ON */ { 32+1, 32+2, 32+4, 32+5, 7, 32+15, 32+17, 7, 64+14, 7, 7, 7, 32+3, _ON },1678/* 8 AL:ON */ { 32+1, 32+2, 32+6, 32+6, 8, 32+16, 32+17, 8, 8, 8, 8, 8, 32+3, _ON },1679/* 9 ET */ { 32+1, 32+2, 4, 32+5, 7, 32+15, 32+17, 7, 9, 7, 9, 9, 32+3, _ON },1680/*10 EN+ES/CS */ { 96+1, 96+2, 4, 96+5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 10, 128+7, 96+3, _EN },1681/*11 EN+ET */ { 32+1, 32+2, 4, 32+5, 32+7, 32+15, 32+17, 32+7, 11, 32+7, 11, 11, 32+3, _EN },1682/*12 AN+CS */ { 96+1, 96+2, 96+4, 5, 128+7, 96+15, 96+17, 128+7,128+14, 128+7, 12, 128+7, 96+3, _AN },1683/*13 AL:EN/AN+CS */ { 96+1, 96+2, 6, 6, 128+8, 96+16, 96+17, 128+8, 128+8, 128+8, 13, 128+8, 96+3, _AN },1684/*14 ON+ET */ { 32+1, 32+2, 128+4, 32+5, 7, 32+15, 32+17, 7, 14, 7, 14, 14, 32+3, _ON },1685/*15 S */ { 32+1, 32+2, 32+4, 32+5, 32+7, 15, 32+17, 32+7, 32+9, 32+7, 15, 32+7, 32+3, _S },1686/*16 AL:S */ { 32+1, 32+2, 32+6, 32+6, 32+8, 16, 32+17, 32+8, 32+8, 32+8, 16, 32+8, 32+3, _S },1687/*17 B */ { 32+1, 32+2, 32+4, 32+5, 32+7, 32+15, 17, 32+7, 32+9, 32+7, 17, 32+7, 32+3, _B }1688};16891690/*********************************************************************/1691/* The levels state machine tables */1692/*********************************************************************/1693/* */1694/* All table cells are 8 bits: */1695/* bits 0..3: next state */1696/* bits 4..7: action to perform (if > 0) */1697/* */1698/* Cells may be of format "n" where n represents the next state */1699/* (except for the rightmost column). */1700/* Cells may also be of format "_(x,y)" where x represents an action */1701/* to perform and y represents the next state. */1702/* */1703/* This format limits each table to 16 states each and to 15 actions.*/1704/* */1705/*********************************************************************/1706/* Definitions and type for levels state tables */1707/*********************************************************************/1708private static final int IMPTABLEVELS_COLUMNS = _B + 2;1709private static final int IMPTABLEVELS_RES = IMPTABLEVELS_COLUMNS - 1;1710private static short GetState(byte cell) { return (short)(cell & 0x0f); }1711private static short GetAction(byte cell) { return (short)(cell >> 4); }17121713private static class ImpTabPair {1714byte[][][] imptab;1715short[][] impact;17161717ImpTabPair(byte[][] table1, byte[][] table2,1718short[] act1, short[] act2) {1719imptab = new byte[][][] {table1, table2};1720impact = new short[][] {act1, act2};1721}1722}17231724/*********************************************************************/1725/* */1726/* LEVELS STATE TABLES */1727/* */1728/* In all levels state tables, */1729/* - state 0 is the initial state */1730/* - the Res column is the increment to add to the text level */1731/* for this property sequence. */1732/* */1733/* The impact arrays for each table of a pair map the local action */1734/* numbers of the table to the total list of actions. For instance, */1735/* action 2 in a given table corresponds to the action number which */1736/* appears in entry [2] of the impact array for that table. */1737/* The first entry of all impact arrays must be 0. */1738/* */1739/* Action 1: init conditional sequence */1740/* 2: prepend conditional sequence to current sequence */1741/* 3: set ON sequence to new level - 1 */1742/* 4: init EN/AN/ON sequence */1743/* 5: fix EN/AN/ON sequence followed by R */1744/* 6: set previous level sequence to level 2 */1745/* */1746/* Notes: */1747/* 1) These tables are used in processPropertySeq(). The input */1748/* is property sequences as determined by resolveImplicitLevels. */1749/* 2) Most such property sequences are processed immediately */1750/* (levels are assigned). */1751/* 3) However, some sequences cannot be assigned a final level till */1752/* one or more following sequences are received. For instance, */1753/* ON following an R sequence within an even-level paragraph. */1754/* If the following sequence is R, the ON sequence will be */1755/* assigned basic run level+1, and so will the R sequence. */1756/* 4) S is generally handled like ON, since its level will be fixed */1757/* to paragraph level in adjustWSLevels(). */1758/* */17591760private static final byte impTabL_DEFAULT[][] = /* Even paragraph level */1761/* In this table, conditional sequences receive the higher possible level1762until proven otherwise.1763*/1764{1765/* L, R, EN, AN, ON, S, B, Res */1766/* 0 : init */ { 0, 1, 0, 2, 0, 0, 0, 0 },1767/* 1 : R */ { 0, 1, 3, 3, 0x14, 0x14, 0, 1 },1768/* 2 : AN */ { 0, 1, 0, 2, 0x15, 0x15, 0, 2 },1769/* 3 : R+EN/AN */ { 0, 1, 3, 3, 0x14, 0x14, 0, 2 },1770/* 4 : R+ON */ { 0x20, 1, 3, 3, 4, 4, 0x20, 1 },1771/* 5 : AN+ON */ { 0x20, 1, 0x20, 2, 5, 5, 0x20, 1 }1772};17731774private static final byte impTabR_DEFAULT[][] = /* Odd paragraph level */1775/* In this table, conditional sequences receive the lower possible level1776until proven otherwise.1777*/1778{1779/* L, R, EN, AN, ON, S, B, Res */1780/* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },1781/* 1 : L */ { 1, 0, 1, 3, 0x14, 0x14, 0, 1 },1782/* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },1783/* 3 : L+AN */ { 1, 0, 1, 3, 5, 5, 0, 1 },1784/* 4 : L+ON */ { 0x21, 0, 0x21, 3, 4, 4, 0, 0 },1785/* 5 : L+AN+ON */ { 1, 0, 1, 3, 5, 5, 0, 0 }1786};17871788private static final short[] impAct0 = {0,1,2,3,4,5,6};17891790private static final ImpTabPair impTab_DEFAULT = new ImpTabPair(1791impTabL_DEFAULT, impTabR_DEFAULT, impAct0, impAct0);17921793private static final byte impTabL_NUMBERS_SPECIAL[][] = { /* Even paragraph level */1794/* In this table, conditional sequences receive the higher possible1795level until proven otherwise.1796*/1797/* L, R, EN, AN, ON, S, B, Res */1798/* 0 : init */ { 0, 2, 1, 1, 0, 0, 0, 0 },1799/* 1 : L+EN/AN */ { 0, 2, 1, 1, 0, 0, 0, 2 },1800/* 2 : R */ { 0, 2, 4, 4, 0x13, 0, 0, 1 },1801/* 3 : R+ON */ { 0x20, 2, 4, 4, 3, 3, 0x20, 1 },1802/* 4 : R+EN/AN */ { 0, 2, 4, 4, 0x13, 0x13, 0, 2 }1803};1804private static final ImpTabPair impTab_NUMBERS_SPECIAL = new ImpTabPair(1805impTabL_NUMBERS_SPECIAL, impTabR_DEFAULT, impAct0, impAct0);18061807private static final byte impTabL_GROUP_NUMBERS_WITH_R[][] = {1808/* In this table, EN/AN+ON sequences receive levels as if associated with R1809until proven that there is L or sor/eor on both sides. AN is handled like EN.1810*/1811/* L, R, EN, AN, ON, S, B, Res */1812/* 0 init */ { 0, 3, 0x11, 0x11, 0, 0, 0, 0 },1813/* 1 EN/AN */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 2 },1814/* 2 EN/AN+ON */ { 0x20, 3, 1, 1, 2, 0x20, 0x20, 1 },1815/* 3 R */ { 0, 3, 5, 5, 0x14, 0, 0, 1 },1816/* 4 R+ON */ { 0x20, 3, 5, 5, 4, 0x20, 0x20, 1 },1817/* 5 R+EN/AN */ { 0, 3, 5, 5, 0x14, 0, 0, 2 }1818};1819private static final byte impTabR_GROUP_NUMBERS_WITH_R[][] = {1820/* In this table, EN/AN+ON sequences receive levels as if associated with R1821until proven that there is L on both sides. AN is handled like EN.1822*/1823/* L, R, EN, AN, ON, S, B, Res */1824/* 0 init */ { 2, 0, 1, 1, 0, 0, 0, 0 },1825/* 1 EN/AN */ { 2, 0, 1, 1, 0, 0, 0, 1 },1826/* 2 L */ { 2, 0, 0x14, 0x14, 0x13, 0, 0, 1 },1827/* 3 L+ON */ { 0x22, 0, 4, 4, 3, 0, 0, 0 },1828/* 4 L+EN/AN */ { 0x22, 0, 4, 4, 3, 0, 0, 1 }1829};1830private static final ImpTabPair impTab_GROUP_NUMBERS_WITH_R = new1831ImpTabPair(impTabL_GROUP_NUMBERS_WITH_R,1832impTabR_GROUP_NUMBERS_WITH_R, impAct0, impAct0);18331834private static final byte impTabL_INVERSE_NUMBERS_AS_L[][] = {1835/* This table is identical to the Default LTR table except that EN and AN1836are handled like L.1837*/1838/* L, R, EN, AN, ON, S, B, Res */1839/* 0 : init */ { 0, 1, 0, 0, 0, 0, 0, 0 },1840/* 1 : R */ { 0, 1, 0, 0, 0x14, 0x14, 0, 1 },1841/* 2 : AN */ { 0, 1, 0, 0, 0x15, 0x15, 0, 2 },1842/* 3 : R+EN/AN */ { 0, 1, 0, 0, 0x14, 0x14, 0, 2 },1843/* 4 : R+ON */ { 0x20, 1, 0x20, 0x20, 4, 4, 0x20, 1 },1844/* 5 : AN+ON */ { 0x20, 1, 0x20, 0x20, 5, 5, 0x20, 1 }1845};1846private static final byte impTabR_INVERSE_NUMBERS_AS_L[][] = {1847/* This table is identical to the Default RTL table except that EN and AN1848are handled like L.1849*/1850/* L, R, EN, AN, ON, S, B, Res */1851/* 0 : init */ { 1, 0, 1, 1, 0, 0, 0, 0 },1852/* 1 : L */ { 1, 0, 1, 1, 0x14, 0x14, 0, 1 },1853/* 2 : EN/AN */ { 1, 0, 1, 1, 0, 0, 0, 1 },1854/* 3 : L+AN */ { 1, 0, 1, 1, 5, 5, 0, 1 },1855/* 4 : L+ON */ { 0x21, 0, 0x21, 0x21, 4, 4, 0, 0 },1856/* 5 : L+AN+ON */ { 1, 0, 1, 1, 5, 5, 0, 0 }1857};1858private static final ImpTabPair impTab_INVERSE_NUMBERS_AS_L = new ImpTabPair1859(impTabL_INVERSE_NUMBERS_AS_L, impTabR_INVERSE_NUMBERS_AS_L,1860impAct0, impAct0);18611862private static final byte impTabR_INVERSE_LIKE_DIRECT[][] = { /* Odd paragraph level */1863/* In this table, conditional sequences receive the lower possible level1864until proven otherwise.1865*/1866/* L, R, EN, AN, ON, S, B, Res */1867/* 0 : init */ { 1, 0, 2, 2, 0, 0, 0, 0 },1868/* 1 : L */ { 1, 0, 1, 2, 0x13, 0x13, 0, 1 },1869/* 2 : EN/AN */ { 1, 0, 2, 2, 0, 0, 0, 1 },1870/* 3 : L+ON */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 0 },1871/* 4 : L+ON+AN */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 3 },1872/* 5 : L+AN+ON */ { 0x21, 0x30, 6, 4, 5, 5, 0x30, 2 },1873/* 6 : L+ON+EN */ { 0x21, 0x30, 6, 4, 3, 3, 0x30, 1 }1874};1875private static final short[] impAct1 = {0,1,11,12};1876private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT = new ImpTabPair(1877impTabL_DEFAULT, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);18781879private static final byte impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {1880/* The case handled in this table is (visually): R EN L1881*/1882/* L, R, EN, AN, ON, S, B, Res */1883/* 0 : init */ { 0, 0x63, 0, 1, 0, 0, 0, 0 },1884/* 1 : L+AN */ { 0, 0x63, 0, 1, 0x12, 0x30, 0, 4 },1885/* 2 : L+AN+ON */ { 0x20, 0x63, 0x20, 1, 2, 0x30, 0x20, 3 },1886/* 3 : R */ { 0, 0x63, 0x55, 0x56, 0x14, 0x30, 0, 3 },1887/* 4 : R+ON */ { 0x30, 0x43, 0x55, 0x56, 4, 0x30, 0x30, 3 },1888/* 5 : R+EN */ { 0x30, 0x43, 5, 0x56, 0x14, 0x30, 0x30, 4 },1889/* 6 : R+AN */ { 0x30, 0x43, 0x55, 6, 0x14, 0x30, 0x30, 4 }1890};1891private static final byte impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS[][] = {1892/* The cases handled in this table are (visually): R EN L1893R L AN L1894*/1895/* L, R, EN, AN, ON, S, B, Res */1896/* 0 : init */ { 0x13, 0, 1, 1, 0, 0, 0, 0 },1897/* 1 : R+EN/AN */ { 0x23, 0, 1, 1, 2, 0x40, 0, 1 },1898/* 2 : R+EN/AN+ON */ { 0x23, 0, 1, 1, 2, 0x40, 0, 0 },1899/* 3 : L */ { 3 , 0, 3, 0x36, 0x14, 0x40, 0, 1 },1900/* 4 : L+ON */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 0 },1901/* 5 : L+ON+EN */ { 0x53, 0x40, 5, 0x36, 4, 0x40, 0x40, 1 },1902/* 6 : L+AN */ { 0x53, 0x40, 6, 6, 4, 0x40, 0x40, 3 }1903};1904private static final short impAct2[] = {0,1,7,8,9,10};1905private static final ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS =1906new ImpTabPair(impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS,1907impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);19081909private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = new ImpTabPair(1910impTabL_NUMBERS_SPECIAL, impTabR_INVERSE_LIKE_DIRECT, impAct0, impAct1);19111912private static final byte impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS[][] = {1913/* The case handled in this table is (visually): R EN L1914*/1915/* L, R, EN, AN, ON, S, B, Res */1916/* 0 : init */ { 0, 0x62, 1, 1, 0, 0, 0, 0 },1917/* 1 : L+EN/AN */ { 0, 0x62, 1, 1, 0, 0x30, 0, 4 },1918/* 2 : R */ { 0, 0x62, 0x54, 0x54, 0x13, 0x30, 0, 3 },1919/* 3 : R+ON */ { 0x30, 0x42, 0x54, 0x54, 3, 0x30, 0x30, 3 },1920/* 4 : R+EN/AN */ { 0x30, 0x42, 4, 4, 0x13, 0x30, 0x30, 4 }1921};1922private static final ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = new1923ImpTabPair(impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS,1924impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS, impAct0, impAct2);19251926private class LevState {1927byte[][] impTab; /* level table pointer */1928short[] impAct; /* action map array */1929int startON; /* start of ON sequence */1930int startL2EN; /* start of level 2 sequence */1931int lastStrongRTL; /* index of last found R or AL */1932short state; /* current state */1933byte runLevel; /* run level before implicit solving */1934}19351936/*------------------------------------------------------------------------*/19371938static final int FIRSTALLOC = 10;1939/*1940* param pos: position where to insert1941* param flag: one of LRM_BEFORE, LRM_AFTER, RLM_BEFORE, RLM_AFTER1942*/1943private void addPoint(int pos, int flag)1944{1945Point point = new Point();19461947int len = insertPoints.points.length;1948if (len == 0) {1949insertPoints.points = new Point[FIRSTALLOC];1950len = FIRSTALLOC;1951}1952if (insertPoints.size >= len) { /* no room for new point */1953Point[] savePoints = insertPoints.points;1954insertPoints.points = new Point[len * 2];1955System.arraycopy(savePoints, 0, insertPoints.points, 0, len);1956}1957point.pos = pos;1958point.flag = flag;1959insertPoints.points[insertPoints.size] = point;1960insertPoints.size++;1961}19621963/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */19641965/*1966* This implementation of the (Wn) rules applies all rules in one pass.1967* In order to do so, it needs a look-ahead of typically 1 character1968* (except for W5: sequences of ET) and keeps track of changes1969* in a rule Wp that affect a later Wq (p<q).1970*1971* The (Nn) and (In) rules are also performed in that same single loop,1972* but effectively one iteration behind for white space.1973*1974* Since all implicit rules are performed in one step, it is not necessary1975* to actually store the intermediate directional properties in dirProps[].1976*/19771978private void processPropertySeq(LevState levState, short _prop,1979int start, int limit) {1980byte cell;1981byte[][] impTab = levState.impTab;1982short[] impAct = levState.impAct;1983short oldStateSeq,actionSeq;1984byte level, addLevel;1985int start0, k;19861987start0 = start; /* save original start position */1988oldStateSeq = levState.state;1989cell = impTab[oldStateSeq][_prop];1990levState.state = GetState(cell); /* isolate the new state */1991actionSeq = impAct[GetAction(cell)]; /* isolate the action */1992addLevel = impTab[levState.state][IMPTABLEVELS_RES];19931994if (actionSeq != 0) {1995switch (actionSeq) {1996case 1: /* init ON seq */1997levState.startON = start0;1998break;19992000case 2: /* prepend ON seq to current seq */2001start = levState.startON;2002break;20032004case 3: /* L or S after possible relevant EN/AN */2005/* check if we had EN after R/AL */2006if (levState.startL2EN >= 0) {2007addPoint(levState.startL2EN, LRM_BEFORE);2008}2009levState.startL2EN = -1; /* not within previous if since could also be -2 */2010/* check if we had any relevant EN/AN after R/AL */2011if ((insertPoints.points.length == 0) ||2012(insertPoints.size <= insertPoints.confirmed)) {2013/* nothing, just clean up */2014levState.lastStrongRTL = -1;2015/* check if we have a pending conditional segment */2016level = impTab[oldStateSeq][IMPTABLEVELS_RES];2017if ((level & 1) != 0 && levState.startON > 0) { /* after ON */2018start = levState.startON; /* reset to basic run level */2019}2020if (_prop == _S) { /* add LRM before S */2021addPoint(start0, LRM_BEFORE);2022insertPoints.confirmed = insertPoints.size;2023}2024break;2025}2026/* reset previous RTL cont to level for LTR text */2027for (k = levState.lastStrongRTL + 1; k < start0; k++) {2028/* reset odd level, leave runLevel+2 as is */2029levels[k] = (byte)((levels[k] - 2) & ~1);2030}2031/* mark insert points as confirmed */2032insertPoints.confirmed = insertPoints.size;2033levState.lastStrongRTL = -1;2034if (_prop == _S) { /* add LRM before S */2035addPoint(start0, LRM_BEFORE);2036insertPoints.confirmed = insertPoints.size;2037}2038break;20392040case 4: /* R/AL after possible relevant EN/AN */2041/* just clean up */2042if (insertPoints.points.length > 0)2043/* remove all non confirmed insert points */2044insertPoints.size = insertPoints.confirmed;2045levState.startON = -1;2046levState.startL2EN = -1;2047levState.lastStrongRTL = limit - 1;2048break;20492050case 5: /* EN/AN after R/AL + possible cont */2051/* check for real AN */2052if ((_prop == _AN) && (NoContextRTL(dirProps[start0]) == AN)) {2053/* real AN */2054if (levState.startL2EN == -1) { /* if no relevant EN already found */2055/* just note the righmost digit as a strong RTL */2056levState.lastStrongRTL = limit - 1;2057break;2058}2059if (levState.startL2EN >= 0) { /* after EN, no AN */2060addPoint(levState.startL2EN, LRM_BEFORE);2061levState.startL2EN = -2;2062}2063/* note AN */2064addPoint(start0, LRM_BEFORE);2065break;2066}2067/* if first EN/AN after R/AL */2068if (levState.startL2EN == -1) {2069levState.startL2EN = start0;2070}2071break;20722073case 6: /* note location of latest R/AL */2074levState.lastStrongRTL = limit - 1;2075levState.startON = -1;2076break;20772078case 7: /* L after R+ON/EN/AN */2079/* include possible adjacent number on the left */2080for (k = start0-1; k >= 0 && ((levels[k] & 1) == 0); k--) {2081}2082if (k >= 0) {2083addPoint(k, RLM_BEFORE); /* add RLM before */2084insertPoints.confirmed = insertPoints.size; /* confirm it */2085}2086levState.startON = start0;2087break;20882089case 8: /* AN after L */2090/* AN numbers between L text on both sides may be trouble. */2091/* tentatively bracket with LRMs; will be confirmed if followed by L */2092addPoint(start0, LRM_BEFORE); /* add LRM before */2093addPoint(start0, LRM_AFTER); /* add LRM after */2094break;20952096case 9: /* R after L+ON/EN/AN */2097/* false alert, infirm LRMs around previous AN */2098insertPoints.size=insertPoints.confirmed;2099if (_prop == _S) { /* add RLM before S */2100addPoint(start0, RLM_BEFORE);2101insertPoints.confirmed = insertPoints.size;2102}2103break;21042105case 10: /* L after L+ON/AN */2106level = (byte)(levState.runLevel + addLevel);2107for (k=levState.startON; k < start0; k++) {2108if (levels[k] < level) {2109levels[k] = level;2110}2111}2112insertPoints.confirmed = insertPoints.size; /* confirm inserts */2113levState.startON = start0;2114break;21152116case 11: /* L after L+ON+EN/AN/ON */2117level = levState.runLevel;2118for (k = start0-1; k >= levState.startON; k--) {2119if (levels[k] == level+3) {2120while (levels[k] == level+3) {2121levels[k--] -= 2;2122}2123while (levels[k] == level) {2124k--;2125}2126}2127if (levels[k] == level+2) {2128levels[k] = level;2129continue;2130}2131levels[k] = (byte)(level+1);2132}2133break;21342135case 12: /* R after L+ON+EN/AN/ON */2136level = (byte)(levState.runLevel+1);2137for (k = start0-1; k >= levState.startON; k--) {2138if (levels[k] > level) {2139levels[k] -= 2;2140}2141}2142break;21432144default: /* we should never get here */2145throw new IllegalStateException("Internal ICU error in processPropertySeq");2146}2147}2148if ((addLevel) != 0 || (start < start0)) {2149level = (byte)(levState.runLevel + addLevel);2150for (k = start; k < limit; k++) {2151levels[k] = level;2152}2153}2154}21552156private void resolveImplicitLevels(int start, int limit, short sor, short eor)2157{2158LevState levState = new LevState();2159int i, start1, start2;2160short oldStateImp, stateImp, actionImp;2161short gprop, resProp, cell;2162short nextStrongProp = R;2163int nextStrongPos = -1;216421652166/* check for RTL inverse Bidi mode */2167/* FOOD FOR THOUGHT: in case of RTL inverse Bidi, it would make sense to2168* loop on the text characters from end to start.2169* This would need a different properties state table (at least different2170* actions) and different levels state tables (maybe very similar to the2171* LTR corresponding ones.2172*/2173/* initialize for levels state table */2174levState.startL2EN = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */2175levState.lastStrongRTL = -1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */2176levState.state = 0;2177levState.runLevel = levels[start];2178levState.impTab = impTabPair.imptab[levState.runLevel & 1];2179levState.impAct = impTabPair.impact[levState.runLevel & 1];2180processPropertySeq(levState, sor, start, start);2181/* initialize for property state table */2182if (dirProps[start] == NSM) {2183stateImp = (short)(1 + sor);2184} else {2185stateImp = 0;2186}2187start1 = start;2188start2 = 0;21892190for (i = start; i <= limit; i++) {2191if (i >= limit) {2192gprop = eor;2193} else {2194short prop, prop1;2195prop = NoContextRTL(dirProps[i]);2196gprop = groupProp[prop];2197}2198oldStateImp = stateImp;2199cell = impTabProps[oldStateImp][gprop];2200stateImp = GetStateProps(cell); /* isolate the new state */2201actionImp = GetActionProps(cell); /* isolate the action */2202if ((i == limit) && (actionImp == 0)) {2203/* there is an unprocessed sequence if its property == eor */2204actionImp = 1; /* process the last sequence */2205}2206if (actionImp != 0) {2207resProp = impTabProps[oldStateImp][IMPTABPROPS_RES];2208switch (actionImp) {2209case 1: /* process current seq1, init new seq1 */2210processPropertySeq(levState, resProp, start1, i);2211start1 = i;2212break;2213case 2: /* init new seq2 */2214start2 = i;2215break;2216case 3: /* process seq1, process seq2, init new seq1 */2217processPropertySeq(levState, resProp, start1, start2);2218processPropertySeq(levState, _ON, start2, i);2219start1 = i;2220break;2221case 4: /* process seq1, set seq1=seq2, init new seq2 */2222processPropertySeq(levState, resProp, start1, start2);2223start1 = start2;2224start2 = i;2225break;2226default: /* we should never get here */2227throw new IllegalStateException("Internal ICU error in resolveImplicitLevels");2228}2229}2230}2231/* flush possible pending sequence, e.g. ON */2232processPropertySeq(levState, eor, limit, limit);2233}22342235/* perform (L1) and (X9) ---------------------------------------------------- */22362237/*2238* Reset the embedding levels for some non-graphic characters (L1).2239* This method also sets appropriate levels for BN, and2240* explicit embedding types that are supposed to have been removed2241* from the paragraph in (X9).2242*/2243private void adjustWSLevels() {2244int i;22452246if ((flags & MASK_WS) != 0) {2247int flag;2248i = trailingWSStart;2249while (i > 0) {2250/* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */2251while (i > 0 && ((flag = DirPropFlagNC(dirProps[--i])) & MASK_WS) != 0) {2252if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {2253levels[i] = 0;2254} else {2255levels[i] = GetParaLevelAt(i);2256}2257}22582259/* reset BN to the next character's paraLevel until B/S, which restarts above loop */2260/* here, i+1 is guaranteed to be <length */2261while (i > 0) {2262flag = DirPropFlagNC(dirProps[--i]);2263if ((flag & MASK_BN_EXPLICIT) != 0) {2264levels[i] = levels[i + 1];2265} else if (orderParagraphsLTR && (flag & DirPropFlag(B)) != 0) {2266levels[i] = 0;2267break;2268} else if ((flag & MASK_B_S) != 0){2269levels[i] = GetParaLevelAt(i);2270break;2271}2272}2273}2274}2275}22762277private int Bidi_Min(int x, int y) {2278return x < y ? x : y;2279}22802281private int Bidi_Abs(int x) {2282return x >= 0 ? x : -x;2283}22842285/**2286* Perform the Unicode Bidi algorithm. It is defined in the2287* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,2288* version 13,2289* also described in The Unicode Standard, Version 4.0 .<p>2290*2291* This method takes a piece of plain text containing one or more paragraphs,2292* with or without externally specified embedding levels from <i>styled</i>2293* text and computes the left-right-directionality of each character.<p>2294*2295* If the entire text is all of the same directionality, then2296* the method may not perform all the steps described by the algorithm,2297* i.e., some levels may not be the same as if all steps were performed.2298* This is not relevant for unidirectional text.<br>2299* For example, in pure LTR text with numbers the numbers would get2300* a resolved level of 2 higher than the surrounding text according to2301* the algorithm. This implementation may set all resolved levels to2302* the same value in such a case.<p>2303*2304* The text can be composed of multiple paragraphs. Occurrence of a block2305* separator in the text terminates a paragraph, and whatever comes next starts2306* a new paragraph. The exception to this rule is when a Carriage Return (CR)2307* is followed by a Line Feed (LF). Both CR and LF are block separators, but2308* in that case, the pair of characters is considered as terminating the2309* preceding paragraph, and a new paragraph will be started by a character2310* coming after the LF.2311*2312* Although the text is passed here as a <code>String</code>, it is2313* stored internally as an array of characters. Therefore the2314* documentation will refer to indexes of the characters in the text.2315*2316* @param text contains the text that the Bidi algorithm will be performed2317* on. This text can be retrieved with <code>getText()</code> or2318* <code>getTextAsString</code>.<br>2319*2320* @param paraLevel specifies the default level for the text;2321* it is typically 0 (LTR) or 1 (RTL).2322* If the method shall determine the paragraph level from the text,2323* then <code>paraLevel</code> can be set to2324* either <code>LEVEL_DEFAULT_LTR</code>2325* or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple2326* paragraphs, the paragraph level shall be determined separately for2327* each paragraph; if a paragraph does not include any strongly typed2328* character, then the desired default is used (0 for LTR or 1 for RTL).2329* Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>2330* is also valid, with odd levels indicating RTL.2331*2332* @param embeddingLevels (in) may be used to preset the embedding and override levels,2333* ignoring characters like LRE and PDF in the text.2334* A level overrides the directional property of its corresponding2335* (same index) character if the level has the2336* <code>LEVEL_OVERRIDE</code> bit set.<br><br>2337* Except for that bit, it must be2338* <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,2339* with one exception: a level of zero may be specified for a2340* paragraph separator even if <code>paraLevel>0</code> when multiple2341* paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>2342* <strong>Caution: </strong>A reference to this array, not a copy2343* of the levels, will be stored in the <code>Bidi</code> object;2344* the <code>embeddingLevels</code>2345* should not be modified to avoid unexpected results on subsequent2346* Bidi operations. However, the <code>setPara()</code> and2347* <code>setLine()</code> methods may modify some or all of the2348* levels.<br><br>2349* <strong>Note:</strong> the <code>embeddingLevels</code> array must2350* have one entry for each character in <code>text</code>.2351*2352* @throws IllegalArgumentException if the values in embeddingLevels are2353* not within the allowed range2354*2355* @see #LEVEL_DEFAULT_LTR2356* @see #LEVEL_DEFAULT_RTL2357* @see #LEVEL_OVERRIDE2358* @see #MAX_EXPLICIT_LEVEL2359* @stable ICU 3.82360*/2361void setPara(String text, byte paraLevel, byte[] embeddingLevels)2362{2363if (text == null) {2364setPara(new char[0], paraLevel, embeddingLevels);2365} else {2366setPara(text.toCharArray(), paraLevel, embeddingLevels);2367}2368}23692370/**2371* Perform the Unicode Bidi algorithm. It is defined in the2372* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,2373* version 13,2374* also described in The Unicode Standard, Version 4.0 .<p>2375*2376* This method takes a piece of plain text containing one or more paragraphs,2377* with or without externally specified embedding levels from <i>styled</i>2378* text and computes the left-right-directionality of each character.<p>2379*2380* If the entire text is all of the same directionality, then2381* the method may not perform all the steps described by the algorithm,2382* i.e., some levels may not be the same as if all steps were performed.2383* This is not relevant for unidirectional text.<br>2384* For example, in pure LTR text with numbers the numbers would get2385* a resolved level of 2 higher than the surrounding text according to2386* the algorithm. This implementation may set all resolved levels to2387* the same value in such a case.<p>2388*2389* The text can be composed of multiple paragraphs. Occurrence of a block2390* separator in the text terminates a paragraph, and whatever comes next starts2391* a new paragraph. The exception to this rule is when a Carriage Return (CR)2392* is followed by a Line Feed (LF). Both CR and LF are block separators, but2393* in that case, the pair of characters is considered as terminating the2394* preceding paragraph, and a new paragraph will be started by a character2395* coming after the LF.2396*2397* The text is stored internally as an array of characters. Therefore the2398* documentation will refer to indexes of the characters in the text.2399*2400* @param chars contains the text that the Bidi algorithm will be performed2401* on. This text can be retrieved with <code>getText()</code> or2402* <code>getTextAsString</code>.<br>2403*2404* @param paraLevel specifies the default level for the text;2405* it is typically 0 (LTR) or 1 (RTL).2406* If the method shall determine the paragraph level from the text,2407* then <code>paraLevel</code> can be set to2408* either <code>LEVEL_DEFAULT_LTR</code>2409* or <code>LEVEL_DEFAULT_RTL</code>; if the text contains multiple2410* paragraphs, the paragraph level shall be determined separately for2411* each paragraph; if a paragraph does not include any strongly typed2412* character, then the desired default is used (0 for LTR or 1 for RTL).2413* Any other value between 0 and <code>MAX_EXPLICIT_LEVEL</code>2414* is also valid, with odd levels indicating RTL.2415*2416* @param embeddingLevels (in) may be used to preset the embedding and2417* override levels, ignoring characters like LRE and PDF in the text.2418* A level overrides the directional property of its corresponding2419* (same index) character if the level has the2420* <code>LEVEL_OVERRIDE</code> bit set.<br><br>2421* Except for that bit, it must be2422* <code>paraLevel<=embeddingLevels[]<=MAX_EXPLICIT_LEVEL</code>,2423* with one exception: a level of zero may be specified for a2424* paragraph separator even if <code>paraLevel>0</code> when multiple2425* paragraphs are submitted in the same call to <code>setPara()</code>.<br><br>2426* <strong>Caution: </strong>A reference to this array, not a copy2427* of the levels, will be stored in the <code>Bidi</code> object;2428* the <code>embeddingLevels</code>2429* should not be modified to avoid unexpected results on subsequent2430* Bidi operations. However, the <code>setPara()</code> and2431* <code>setLine()</code> methods may modify some or all of the2432* levels.<br><br>2433* <strong>Note:</strong> the <code>embeddingLevels</code> array must2434* have one entry for each character in <code>text</code>.2435*2436* @throws IllegalArgumentException if the values in embeddingLevels are2437* not within the allowed range2438*2439* @see #LEVEL_DEFAULT_LTR2440* @see #LEVEL_DEFAULT_RTL2441* @see #LEVEL_OVERRIDE2442* @see #MAX_EXPLICIT_LEVEL2443* @stable ICU 3.82444*/2445public void setPara(char[] chars, byte paraLevel, byte[] embeddingLevels)2446{2447/* check the argument values */2448if (paraLevel < INTERNAL_LEVEL_DEFAULT_LTR) {2449verifyRange(paraLevel, 0, MAX_EXPLICIT_LEVEL + 1);2450}2451if (chars == null) {2452chars = new char[0];2453}24542455/* initialize the Bidi object */2456this.paraBidi = null; /* mark unfinished setPara */2457this.text = chars;2458this.length = this.originalLength = this.resultLength = text.length;2459this.paraLevel = paraLevel;2460this.direction = Bidi.DIRECTION_LEFT_TO_RIGHT;2461this.paraCount = 1;24622463/* Allocate zero-length arrays instead of setting to null here; then2464* checks for null in various places can be eliminated.2465*/2466dirProps = new byte[0];2467levels = new byte[0];2468runs = new BidiRun[0];2469isGoodLogicalToVisualRunsMap = false;2470insertPoints.size = 0; /* clean up from last call */2471insertPoints.confirmed = 0; /* clean up from last call */24722473/*2474* Save the original paraLevel if contextual; otherwise, set to 0.2475*/2476if (IsDefaultLevel(paraLevel)) {2477defaultParaLevel = paraLevel;2478} else {2479defaultParaLevel = 0;2480}24812482if (length == 0) {2483/*2484* For an empty paragraph, create a Bidi object with the paraLevel and2485* the flags and the direction set but without allocating zero-length arrays.2486* There is nothing more to do.2487*/2488if (IsDefaultLevel(paraLevel)) {2489this.paraLevel &= 1;2490defaultParaLevel = 0;2491}2492if ((this.paraLevel & 1) != 0) {2493flags = DirPropFlag(R);2494direction = Bidi.DIRECTION_RIGHT_TO_LEFT;2495} else {2496flags = DirPropFlag(L);2497direction = Bidi.DIRECTION_LEFT_TO_RIGHT;2498}24992500runCount = 0;2501paraCount = 0;2502paraBidi = this; /* mark successful setPara */2503return;2504}25052506runCount = -1;25072508/*2509* Get the directional properties,2510* the flags bit-set, and2511* determine the paragraph level if necessary.2512*/2513getDirPropsMemory(length);2514dirProps = dirPropsMemory;2515getDirProps();25162517/* the processed length may have changed if OPTION_STREAMING is set */2518trailingWSStart = length; /* the levels[] will reflect the WS run */25192520/* allocate paras memory */2521if (paraCount > 1) {2522getInitialParasMemory(paraCount);2523paras = parasMemory;2524paras[paraCount - 1] = length;2525} else {2526/* initialize paras for single paragraph */2527paras = simpleParas;2528simpleParas[0] = length;2529}25302531/* are explicit levels specified? */2532if (embeddingLevels == null) {2533/* no: determine explicit levels according to the (Xn) rules */2534getLevelsMemory(length);2535levels = levelsMemory;2536direction = resolveExplicitLevels();2537} else {2538/* set BN for all explicit codes, check that all levels are 0 or paraLevel..MAX_EXPLICIT_LEVEL */2539levels = embeddingLevels;2540direction = checkExplicitLevels();2541}25422543/*2544* The steps after (X9) in the Bidi algorithm are performed only if2545* the paragraph text has mixed directionality!2546*/2547switch (direction) {2548case Bidi.DIRECTION_LEFT_TO_RIGHT:2549/* make sure paraLevel is even */2550paraLevel = (byte)((paraLevel + 1) & ~1);25512552/* all levels are implicitly at paraLevel (important for getLevels()) */2553trailingWSStart = 0;2554break;2555case Bidi.DIRECTION_RIGHT_TO_LEFT:2556/* make sure paraLevel is odd */2557paraLevel |= 1;25582559/* all levels are implicitly at paraLevel (important for getLevels()) */2560trailingWSStart = 0;2561break;2562default:2563this.impTabPair = impTab_DEFAULT;25642565/*2566* If there are no external levels specified and there2567* are no significant explicit level codes in the text,2568* then we can treat the entire paragraph as one run.2569* Otherwise, we need to perform the following rules on runs of2570* the text with the same embedding levels. (X10)2571* "Significant" explicit level codes are ones that actually2572* affect non-BN characters.2573* Examples for "insignificant" ones are empty embeddings2574* LRE-PDF, LRE-RLE-PDF-PDF, etc.2575*/2576if (embeddingLevels == null && paraCount <= 1 &&2577(flags & DirPropFlagMultiRuns) == 0) {2578resolveImplicitLevels(0, length,2579GetLRFromLevel(GetParaLevelAt(0)),2580GetLRFromLevel(GetParaLevelAt(length - 1)));2581} else {2582/* sor, eor: start and end types of same-level-run */2583int start, limit = 0;2584byte level, nextLevel;2585short sor, eor;25862587/* determine the first sor and set eor to it because of the loop body (sor=eor there) */2588level = GetParaLevelAt(0);2589nextLevel = levels[0];2590if (level < nextLevel) {2591eor = GetLRFromLevel(nextLevel);2592} else {2593eor = GetLRFromLevel(level);2594}25952596do {2597/* determine start and limit of the run (end points just behind the run) */25982599/* the values for this run's start are the same as for the previous run's end */2600start = limit;2601level = nextLevel;2602if ((start > 0) && (NoContextRTL(dirProps[start - 1]) == B)) {2603/* except if this is a new paragraph, then set sor = para level */2604sor = GetLRFromLevel(GetParaLevelAt(start));2605} else {2606sor = eor;2607}26082609/* search for the limit of this run */2610while (++limit < length && levels[limit] == level) {}26112612/* get the correct level of the next run */2613if (limit < length) {2614nextLevel = levels[limit];2615} else {2616nextLevel = GetParaLevelAt(length - 1);2617}26182619/* determine eor from max(level, nextLevel); sor is last run's eor */2620if ((level & ~INTERNAL_LEVEL_OVERRIDE) < (nextLevel & ~INTERNAL_LEVEL_OVERRIDE)) {2621eor = GetLRFromLevel(nextLevel);2622} else {2623eor = GetLRFromLevel(level);2624}26252626/* if the run consists of overridden directional types, then there2627are no implicit types to be resolved */2628if ((level & INTERNAL_LEVEL_OVERRIDE) == 0) {2629resolveImplicitLevels(start, limit, sor, eor);2630} else {2631/* remove the LEVEL_OVERRIDE flags */2632do {2633levels[start++] &= ~INTERNAL_LEVEL_OVERRIDE;2634} while (start < limit);2635}2636} while (limit < length);2637}26382639/* reset the embedding levels for some non-graphic characters (L1), (X9) */2640adjustWSLevels();26412642break;2643}26442645resultLength += insertPoints.size;2646paraBidi = this; /* mark successful setPara */2647}26482649/**2650* Perform the Unicode Bidi algorithm on a given paragraph, as defined in the2651* <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,2652* version 13,2653* also described in The Unicode Standard, Version 4.0 .<p>2654*2655* This method takes a paragraph of text and computes the2656* left-right-directionality of each character. The text should not2657* contain any Unicode block separators.<p>2658*2659* The RUN_DIRECTION attribute in the text, if present, determines the base2660* direction (left-to-right or right-to-left). If not present, the base2661* direction is computed using the Unicode Bidirectional Algorithm,2662* defaulting to left-to-right if there are no strong directional characters2663* in the text. This attribute, if present, must be applied to all the text2664* in the paragraph.<p>2665*2666* The BIDI_EMBEDDING attribute in the text, if present, represents2667* embedding level information. Negative values from -1 to -62 indicate2668* overrides at the absolute value of the level. Positive values from 1 to2669* 62 indicate embeddings. Where values are zero or not defined, the base2670* embedding level as determined by the base direction is assumed.<p>2671*2672* The NUMERIC_SHAPING attribute in the text, if present, converts European2673* digits to other decimal digits before running the bidi algorithm. This2674* attribute, if present, must be applied to all the text in the paragraph.2675*2676* If the entire text is all of the same directionality, then2677* the method may not perform all the steps described by the algorithm,2678* i.e., some levels may not be the same as if all steps were performed.2679* This is not relevant for unidirectional text.<br>2680* For example, in pure LTR text with numbers the numbers would get2681* a resolved level of 2 higher than the surrounding text according to2682* the algorithm. This implementation may set all resolved levels to2683* the same value in such a case.<p>2684*2685* @param paragraph a paragraph of text with optional character and2686* paragraph attribute information2687* @stable ICU 3.82688*/2689public void setPara(AttributedCharacterIterator paragraph)2690{2691byte paraLvl;2692char ch = paragraph.first();2693Boolean runDirection =2694(Boolean) paragraph.getAttribute(TextAttributeConstants.RUN_DIRECTION);2695Object shaper = paragraph.getAttribute(TextAttributeConstants.NUMERIC_SHAPING);2696if (runDirection == null) {2697paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;2698} else {2699paraLvl = (runDirection.equals(TextAttributeConstants.RUN_DIRECTION_LTR)) ?2700(byte)Bidi.DIRECTION_LEFT_TO_RIGHT : (byte)Bidi.DIRECTION_RIGHT_TO_LEFT;2701}27022703byte[] lvls = null;2704int len = paragraph.getEndIndex() - paragraph.getBeginIndex();2705byte[] embeddingLevels = new byte[len];2706char[] txt = new char[len];2707int i = 0;2708while (ch != AttributedCharacterIterator.DONE) {2709txt[i] = ch;2710Integer embedding =2711(Integer) paragraph.getAttribute(TextAttributeConstants.BIDI_EMBEDDING);2712if (embedding != null) {2713byte level = embedding.byteValue();2714if (level == 0) {2715/* no-op */2716} else if (level < 0) {2717lvls = embeddingLevels;2718embeddingLevels[i] = (byte)((0 - level) | INTERNAL_LEVEL_OVERRIDE);2719} else {2720lvls = embeddingLevels;2721embeddingLevels[i] = level;2722}2723}2724ch = paragraph.next();2725++i;2726}27272728if (shaper != null) {2729NumericShapings.shape(shaper, txt, 0, len);2730}2731setPara(txt, paraLvl, lvls);2732}27332734/**2735* Specify whether block separators must be allocated level zero,2736* so that successive paragraphs will progress from left to right.2737* This method must be called before <code>setPara()</code>.2738* Paragraph separators (B) may appear in the text. Setting them to level zero2739* means that all paragraph separators (including one possibly appearing2740* in the last text position) are kept in the reordered text after the text2741* that they follow in the source text.2742* When this feature is not enabled, a paragraph separator at the last2743* position of the text before reordering will go to the first position2744* of the reordered text when the paragraph level is odd.2745*2746* @param ordarParaLTR specifies whether paragraph separators (B) must2747* receive level 0, so that successive paragraphs progress from left to right.2748*2749* @see #setPara2750* @stable ICU 3.82751*/2752private void orderParagraphsLTR(boolean ordarParaLTR) {2753orderParagraphsLTR = ordarParaLTR;2754}27552756/**2757* Get the directionality of the text.2758*2759* @return a value of <code>LTR</code>, <code>RTL</code> or <code>MIXED</code>2760* that indicates if the entire text2761* represented by this object is unidirectional,2762* and which direction, or if it is mixed-directional.2763*2764* @throws IllegalStateException if this call is not preceded by a successful2765* call to <code>setPara</code> or <code>setLine</code>2766*2767* @see #LTR2768* @see #RTL2769* @see #MIXED2770* @stable ICU 3.82771*/2772private byte getDirection()2773{2774verifyValidParaOrLine();2775return direction;2776}27772778/**2779* Get the length of the text.2780*2781* @return The length of the text that the <code>Bidi</code> object was2782* created for.2783*2784* @throws IllegalStateException if this call is not preceded by a successful2785* call to <code>setPara</code> or <code>setLine</code>2786* @stable ICU 3.82787*/2788public int getLength()2789{2790verifyValidParaOrLine();2791return originalLength;2792}27932794/* paragraphs API methods ------------------------------------------------- */27952796/**2797* Get the paragraph level of the text.2798*2799* @return The paragraph level. If there are multiple paragraphs, their2800* level may vary if the required paraLevel is LEVEL_DEFAULT_LTR or2801* LEVEL_DEFAULT_RTL. In that case, the level of the first paragraph2802* is returned.2803*2804* @throws IllegalStateException if this call is not preceded by a successful2805* call to <code>setPara</code> or <code>setLine</code>2806*2807* @see #LEVEL_DEFAULT_LTR2808* @see #LEVEL_DEFAULT_RTL2809* @see #getParagraph2810* @see #getParagraphByIndex2811* @stable ICU 3.82812*/2813public byte getParaLevel()2814{2815verifyValidParaOrLine();2816return paraLevel;2817}28182819/**2820* Get the index of a paragraph, given a position within the text.<p>2821*2822* @param charIndex is the index of a character within the text, in the2823* range <code>[0..getProcessedLength()-1]</code>.2824*2825* @return The index of the paragraph containing the specified position,2826* starting from 0.2827*2828* @throws IllegalStateException if this call is not preceded by a successful2829* call to <code>setPara</code> or <code>setLine</code>2830* @throws IllegalArgumentException if charIndex is not within the legal range2831*2832* @see com.ibm.icu.text.BidiRun2833* @see #getProcessedLength2834* @stable ICU 3.82835*/2836public int getParagraphIndex(int charIndex)2837{2838verifyValidParaOrLine();2839BidiBase bidi = paraBidi; /* get Para object if Line object */2840verifyRange(charIndex, 0, bidi.length);2841int paraIndex;2842for (paraIndex = 0; charIndex >= bidi.paras[paraIndex]; paraIndex++) {2843}2844return paraIndex;2845}28462847/**2848* <code>setLine()</code> returns a <code>Bidi</code> object to2849* contain the reordering information, especially the resolved levels,2850* for all the characters in a line of text. This line of text is2851* specified by referring to a <code>Bidi</code> object representing2852* this information for a piece of text containing one or more paragraphs,2853* and by specifying a range of indexes in this text.<p>2854* In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>2855*2856* This is used after calling <code>setPara()</code>2857* for a piece of text, and after line-breaking on that text.2858* It is not necessary if each paragraph is treated as a single line.<p>2859*2860* After line-breaking, rules (L1) and (L2) for the treatment of2861* trailing WS and for reordering are performed on2862* a <code>Bidi</code> object that represents a line.<p>2863*2864* <strong>Important: </strong>the line <code>Bidi</code> object may2865* reference data within the global text <code>Bidi</code> object.2866* You should not alter the content of the global text object until2867* you are finished using the line object.2868*2869* @param start is the line's first index into the text.2870*2871* @param limit is just behind the line's last index into the text2872* (its last index +1).2873*2874* @return a <code>Bidi</code> object that will now represent a line of the text.2875*2876* @throws IllegalStateException if this call is not preceded by a successful2877* call to <code>setPara</code>2878* @throws IllegalArgumentException if start and limit are not in the range2879* <code>0<=start<limit<=getProcessedLength()</code>,2880* or if the specified line crosses a paragraph boundary2881*2882* @see #setPara2883* @see #getProcessedLength2884* @stable ICU 3.82885*/2886public Bidi setLine(Bidi bidi, BidiBase bidiBase, Bidi newBidi, BidiBase newBidiBase, int start, int limit)2887{2888verifyValidPara();2889verifyRange(start, 0, limit);2890verifyRange(limit, 0, length+1);28912892return BidiLine.setLine(bidi, this, newBidi, newBidiBase, start, limit);2893}28942895/**2896* Get the level for one character.2897*2898* @param charIndex the index of a character.2899*2900* @return The level for the character at <code>charIndex</code>.2901*2902* @throws IllegalStateException if this call is not preceded by a successful2903* call to <code>setPara</code> or <code>setLine</code>2904* @throws IllegalArgumentException if charIndex is not in the range2905* <code>0<=charIndex<getProcessedLength()</code>2906*2907* @see #getProcessedLength2908* @stable ICU 3.82909*/2910public byte getLevelAt(int charIndex)2911{2912if (charIndex < 0 || charIndex >= length) {2913return (byte)getBaseLevel();2914}2915verifyValidParaOrLine();2916verifyRange(charIndex, 0, length);2917return BidiLine.getLevelAt(this, charIndex);2918}29192920/**2921* Get an array of levels for each character.<p>2922*2923* Note that this method may allocate memory under some2924* circumstances, unlike <code>getLevelAt()</code>.2925*2926* @return The levels array for the text,2927* or <code>null</code> if an error occurs.2928*2929* @throws IllegalStateException if this call is not preceded by a successful2930* call to <code>setPara</code> or <code>setLine</code>2931* @stable ICU 3.82932*/2933private byte[] getLevels()2934{2935verifyValidParaOrLine();2936if (length <= 0) {2937return new byte[0];2938}2939return BidiLine.getLevels(this);2940}29412942/**2943* Get the number of runs.2944* This method may invoke the actual reordering on the2945* <code>Bidi</code> object, after <code>setPara()</code>2946* may have resolved only the levels of the text. Therefore,2947* <code>countRuns()</code> may have to allocate memory,2948* and may throw an exception if it fails to do so.2949*2950* @return The number of runs.2951*2952* @throws IllegalStateException if this call is not preceded by a successful2953* call to <code>setPara</code> or <code>setLine</code>2954* @stable ICU 3.82955*/2956public int countRuns()2957{2958verifyValidParaOrLine();2959BidiLine.getRuns(this);2960return runCount;2961}29622963/**2964* Get a visual-to-logical index map (array) for the characters in the2965* <code>Bidi</code> (paragraph or line) object.2966* <p>2967* Some values in the map may be <code>MAP_NOWHERE</code> if the2968* corresponding text characters are Bidi marks inserted in the visual2969* output by the option <code>OPTION_INSERT_MARKS</code>.2970* <p>2971* When the visual output is altered by using options of2972* <code>writeReordered()</code> such as <code>INSERT_LRM_FOR_NUMERIC</code>,2973* <code>KEEP_BASE_COMBINING</code>, <code>OUTPUT_REVERSE</code>,2974* <code>REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not2975* be correct. It is advised to use, when possible, reordering options2976* such as {@link #OPTION_INSERT_MARKS} and {@link #OPTION_REMOVE_CONTROLS}.2977*2978* @return an array of <code>getResultLength()</code>2979* indexes which will reflect the reordering of the characters.<br><br>2980* The index map will result in2981* <code>indexMap[visualIndex]==logicalIndex</code>, where2982* <code>indexMap</code> represents the returned array.2983*2984* @throws IllegalStateException if this call is not preceded by a successful2985* call to <code>setPara</code> or <code>setLine</code>2986*2987* @see #getLogicalMap2988* @see #getLogicalIndex2989* @see #getResultLength2990* @see #MAP_NOWHERE2991* @see #OPTION_INSERT_MARKS2992* @see #writeReordered2993* @stable ICU 3.82994*/2995private int[] getVisualMap()2996{2997/* countRuns() checks successful call to setPara/setLine */2998countRuns();2999if (resultLength <= 0) {3000return new int[0];3001}3002return BidiLine.getVisualMap(this);3003}30043005/**3006* This is a convenience method that does not use a <code>Bidi</code> object.3007* It is intended to be used for when an application has determined the levels3008* of objects (character sequences) and just needs to have them reordered (L2).3009* This is equivalent to using <code>getVisualMap()</code> on a3010* <code>Bidi</code> object.3011*3012* @param levels is an array of levels that have been determined by3013* the application.3014*3015* @return an array of <code>levels.length</code>3016* indexes which will reflect the reordering of the characters.<p>3017* The index map will result in3018* <code>indexMap[visualIndex]==logicalIndex</code>, where3019* <code>indexMap</code> represents the returned array.3020*3021* @stable ICU 3.83022*/3023private static int[] reorderVisual(byte[] levels)3024{3025return BidiLine.reorderVisual(levels);3026}30273028/**3029* Constant indicating that the base direction depends on the first strong3030* directional character in the text according to the Unicode Bidirectional3031* Algorithm. If no strong directional character is present, the base3032* direction is left-to-right.3033* @stable ICU 3.83034*/3035private static final int INTERNAL_DIRECTION_DEFAULT_LEFT_TO_RIGHT = 0x7e;30363037/**3038* Constant indicating that the base direction depends on the first strong3039* directional character in the text according to the Unicode Bidirectional3040* Algorithm. If no strong directional character is present, the base3041* direction is right-to-left.3042* @stable ICU 3.83043*/3044private static final int INTERMAL_DIRECTION_DEFAULT_RIGHT_TO_LEFT = 0x7f;30453046/**3047* Create Bidi from the given text, embedding, and direction information.3048* The embeddings array may be null. If present, the values represent3049* embedding level information. Negative values from -1 to -61 indicate3050* overrides at the absolute value of the level. Positive values from 1 to3051* 61 indicate embeddings. Where values are zero, the base embedding level3052* as determined by the base direction is assumed.<p>3053*3054* Note: this constructor calls setPara() internally.3055*3056* @param text an array containing the paragraph of text to process.3057* @param textStart the index into the text array of the start of the3058* paragraph.3059* @param embeddings an array containing embedding values for each character3060* in the paragraph. This can be null, in which case it is assumed3061* that there is no external embedding information.3062* @param embStart the index into the embedding array of the start of the3063* paragraph.3064* @param paragraphLength the length of the paragraph in the text and3065* embeddings arrays.3066* @param flags a collection of flags that control the algorithm. The3067* algorithm understands the flags DIRECTION_LEFT_TO_RIGHT,3068* DIRECTION_RIGHT_TO_LEFT, DIRECTION_DEFAULT_LEFT_TO_RIGHT, and3069* DIRECTION_DEFAULT_RIGHT_TO_LEFT. Other values are reserved.3070*3071* @throws IllegalArgumentException if the values in embeddings are3072* not within the allowed range3073*3074* @see #DIRECTION_LEFT_TO_RIGHT3075* @see #DIRECTION_RIGHT_TO_LEFT3076* @see #DIRECTION_DEFAULT_LEFT_TO_RIGHT3077* @see #DIRECTION_DEFAULT_RIGHT_TO_LEFT3078* @stable ICU 3.83079*/3080public BidiBase(char[] text,3081int textStart,3082byte[] embeddings,3083int embStart,3084int paragraphLength,3085int flags)3086{3087this(0, 0);3088byte paraLvl;3089switch (flags) {3090case Bidi.DIRECTION_LEFT_TO_RIGHT:3091default:3092paraLvl = Bidi.DIRECTION_LEFT_TO_RIGHT;3093break;3094case Bidi.DIRECTION_RIGHT_TO_LEFT:3095paraLvl = Bidi.DIRECTION_RIGHT_TO_LEFT;3096break;3097case Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT:3098paraLvl = INTERNAL_LEVEL_DEFAULT_LTR;3099break;3100case Bidi.DIRECTION_DEFAULT_RIGHT_TO_LEFT:3101paraLvl = INTERNAL_LEVEL_DEFAULT_RTL;3102break;3103}3104byte[] paraEmbeddings;3105if (embeddings == null) {3106paraEmbeddings = null;3107} else {3108paraEmbeddings = new byte[paragraphLength];3109byte lev;3110for (int i = 0; i < paragraphLength; i++) {3111lev = embeddings[i + embStart];3112if (lev < 0) {3113lev = (byte)((- lev) | INTERNAL_LEVEL_OVERRIDE);3114} else if (lev == 0) {3115lev = paraLvl;3116if (paraLvl > MAX_EXPLICIT_LEVEL) {3117lev &= 1;3118}3119}3120paraEmbeddings[i] = lev;3121}3122}3123if (textStart == 0 && embStart == 0 && paragraphLength == text.length) {3124setPara(text, paraLvl, paraEmbeddings);3125} else {3126char[] paraText = new char[paragraphLength];3127System.arraycopy(text, textStart, paraText, 0, paragraphLength);3128setPara(paraText, paraLvl, paraEmbeddings);3129}3130}31313132/**3133* Return true if the line is not left-to-right or right-to-left. This means3134* it either has mixed runs of left-to-right and right-to-left text, or the3135* base direction differs from the direction of the only run of text.3136*3137* @return true if the line is not left-to-right or right-to-left.3138*3139* @throws IllegalStateException if this call is not preceded by a successful3140* call to <code>setPara</code>3141* @stable ICU 3.83142*/3143public boolean isMixed()3144{3145return (!isLeftToRight() && !isRightToLeft());3146}31473148/**3149* Return true if the line is all left-to-right text and the base direction3150* is left-to-right.3151*3152* @return true if the line is all left-to-right text and the base direction3153* is left-to-right.3154*3155* @throws IllegalStateException if this call is not preceded by a successful3156* call to <code>setPara</code>3157* @stable ICU 3.83158*/3159public boolean isLeftToRight()3160{3161return (getDirection() == Bidi.DIRECTION_LEFT_TO_RIGHT && (paraLevel & 1) == 0);3162}31633164/**3165* Return true if the line is all right-to-left text, and the base direction3166* is right-to-left3167*3168* @return true if the line is all right-to-left text, and the base3169* direction is right-to-left3170*3171* @throws IllegalStateException if this call is not preceded by a successful3172* call to <code>setPara</code>3173* @stable ICU 3.83174*/3175public boolean isRightToLeft()3176{3177return (getDirection() == Bidi.DIRECTION_RIGHT_TO_LEFT && (paraLevel & 1) == 1);3178}31793180/**3181* Return true if the base direction is left-to-right3182*3183* @return true if the base direction is left-to-right3184*3185* @throws IllegalStateException if this call is not preceded by a successful3186* call to <code>setPara</code> or <code>setLine</code>3187*3188* @stable ICU 3.83189*/3190public boolean baseIsLeftToRight()3191{3192return (getParaLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT);3193}31943195/**3196* Return the base level (0 if left-to-right, 1 if right-to-left).3197*3198* @return the base level3199*3200* @throws IllegalStateException if this call is not preceded by a successful3201* call to <code>setPara</code> or <code>setLine</code>3202*3203* @stable ICU 3.83204*/3205public int getBaseLevel()3206{3207return getParaLevel();3208}32093210/**3211* Compute the logical to visual run mapping3212*/3213private void getLogicalToVisualRunsMap()3214{3215if (isGoodLogicalToVisualRunsMap) {3216return;3217}3218int count = countRuns();3219if ((logicalToVisualRunsMap == null) ||3220(logicalToVisualRunsMap.length < count)) {3221logicalToVisualRunsMap = new int[count];3222}3223int i;3224long[] keys = new long[count];3225for (i = 0; i < count; i++) {3226keys[i] = ((long)(runs[i].start)<<32) + i;3227}3228Arrays.sort(keys);3229for (i = 0; i < count; i++) {3230logicalToVisualRunsMap[i] = (int)(keys[i] & 0x00000000FFFFFFFF);3231}3232keys = null;3233isGoodLogicalToVisualRunsMap = true;3234}32353236/**3237* Return the level of the nth logical run in this line.3238*3239* @param run the index of the run, between 0 and <code>countRuns()-1</code>3240*3241* @return the level of the run3242*3243* @throws IllegalStateException if this call is not preceded by a successful3244* call to <code>setPara</code> or <code>setLine</code>3245* @throws IllegalArgumentException if <code>run</code> is not in3246* the range <code>0<=run<countRuns()</code>3247* @stable ICU 3.83248*/3249public int getRunLevel(int run)3250{3251verifyValidParaOrLine();3252BidiLine.getRuns(this);3253if (run < 0 || run >= runCount) {3254return getParaLevel();3255}3256getLogicalToVisualRunsMap();3257return runs[logicalToVisualRunsMap[run]].level;3258}32593260/**3261* Return the index of the character at the start of the nth logical run in3262* this line, as an offset from the start of the line.3263*3264* @param run the index of the run, between 0 and <code>countRuns()</code>3265*3266* @return the start of the run3267*3268* @throws IllegalStateException if this call is not preceded by a successful3269* call to <code>setPara</code> or <code>setLine</code>3270* @throws IllegalArgumentException if <code>run</code> is not in3271* the range <code>0<=run<countRuns()</code>3272* @stable ICU 3.83273*/3274public int getRunStart(int run)3275{3276verifyValidParaOrLine();3277BidiLine.getRuns(this);3278if (runCount == 1) {3279return 0;3280} else if (run == runCount) {3281return length;3282}3283verifyIndex(run, 0, runCount);3284getLogicalToVisualRunsMap();3285return runs[logicalToVisualRunsMap[run]].start;3286}32873288/**3289* Return the index of the character past the end of the nth logical run in3290* this line, as an offset from the start of the line. For example, this3291* will return the length of the line for the last run on the line.3292*3293* @param run the index of the run, between 0 and <code>countRuns()</code>3294*3295* @return the limit of the run3296*3297* @throws IllegalStateException if this call is not preceded by a successful3298* call to <code>setPara</code> or <code>setLine</code>3299* @throws IllegalArgumentException if <code>run</code> is not in3300* the range <code>0<=run<countRuns()</code>3301* @stable ICU 3.83302*/3303public int getRunLimit(int run)3304{3305verifyValidParaOrLine();3306BidiLine.getRuns(this);3307if (runCount == 1) {3308return length;3309}3310verifyIndex(run, 0, runCount);3311getLogicalToVisualRunsMap();3312int idx = logicalToVisualRunsMap[run];3313int len = idx == 0 ? runs[idx].limit :3314runs[idx].limit - runs[idx-1].limit;3315return runs[idx].start + len;3316}33173318/**3319* Return true if the specified text requires bidi analysis. If this returns3320* false, the text will display left-to-right. Clients can then avoid3321* constructing a Bidi object. Text in the Arabic Presentation Forms area of3322* Unicode is presumed to already be shaped and ordered for display, and so3323* will not cause this method to return true.3324*3325* @param text the text containing the characters to test3326* @param start the start of the range of characters to test3327* @param limit the limit of the range of characters to test3328*3329* @return true if the range of characters requires bidi analysis3330*3331* @stable ICU 3.83332*/3333public static boolean requiresBidi(char[] text,3334int start,3335int limit)3336{3337final int RTLMask = (1 << Bidi.DIRECTION_RIGHT_TO_LEFT |33381 << AL |33391 << RLE |33401 << RLO |33411 << AN);33423343if (0 > start || start > limit || limit > text.length) {3344throw new IllegalArgumentException("Value start " + start +3345" is out of range 0 to " + limit);3346}3347for (int i = start; i < limit; ++i) {3348if (Character.isHighSurrogate(text[i]) && i < (limit-1) &&3349Character.isLowSurrogate(text[i+1])) {3350if (((1 << UCharacter.getDirection(Character.codePointAt(text, i))) & RTLMask) != 0) {3351return true;3352}3353} else if (((1 << UCharacter.getDirection(text[i])) & RTLMask) != 0) {3354return true;3355}3356}3357return false;3358}33593360/**3361* Reorder the objects in the array into visual order based on their levels.3362* This is a utility method to use when you have a collection of objects3363* representing runs of text in logical order, each run containing text at a3364* single level. The elements at <code>index</code> from3365* <code>objectStart</code> up to <code>objectStart + count</code> in the3366* objects array will be reordered into visual order assuming3367* each run of text has the level indicated by the corresponding element in3368* the levels array (at <code>index - objectStart + levelStart</code>).3369*3370* @param levels an array representing the bidi level of each object3371* @param levelStart the start position in the levels array3372* @param objects the array of objects to be reordered into visual order3373* @param objectStart the start position in the objects array3374* @param count the number of objects to reorder3375* @stable ICU 3.83376*/3377public static void reorderVisually(byte[] levels,3378int levelStart,3379Object[] objects,3380int objectStart,3381int count)3382{3383if (0 > levelStart || levels.length <= levelStart) {3384throw new IllegalArgumentException("Value levelStart " +3385levelStart + " is out of range 0 to " +3386(levels.length-1));3387}3388if (0 > objectStart || objects.length <= objectStart) {3389throw new IllegalArgumentException("Value objectStart " +3390levelStart + " is out of range 0 to " +3391(objects.length-1));3392}3393if (0 > count || objects.length < (objectStart+count)) {3394throw new IllegalArgumentException("Value count " +3395levelStart + " is out of range 0 to " +3396(objects.length - objectStart));3397}3398byte[] reorderLevels = new byte[count];3399System.arraycopy(levels, levelStart, reorderLevels, 0, count);3400int[] indexMap = reorderVisual(reorderLevels);3401Object[] temp = new Object[count];3402System.arraycopy(objects, objectStart, temp, 0, count);3403for (int i = 0; i < count; ++i) {3404objects[objectStart + i] = temp[indexMap[i]];3405}3406}34073408/**3409* Display the bidi internal state, used in debugging.3410*/3411public String toString() {3412StringBuilder buf = new StringBuilder(getClass().getName());34133414buf.append("[dir: ");3415buf.append(direction);3416buf.append(" baselevel: ");3417buf.append(paraLevel);3418buf.append(" length: ");3419buf.append(length);3420buf.append(" runs: ");3421if (levels == null) {3422buf.append("none");3423} else {3424buf.append('[');3425buf.append(levels[0]);3426for (int i = 1; i < levels.length; i++) {3427buf.append(' ');3428buf.append(levels[i]);3429}3430buf.append(']');3431}3432buf.append(" text: [0x");3433buf.append(Integer.toHexString(text[0]));3434for (int i = 1; i < text.length; i++) {3435buf.append(" 0x");3436buf.append(Integer.toHexString(text[i]));3437}3438buf.append("]]");34393440return buf.toString();3441}34423443/**3444* A class that provides access to constants defined by3445* java.awt.font.TextAttribute without creating a static dependency.3446*/3447private static class TextAttributeConstants {3448private static final Class<?> clazz = getClass("java.awt.font.TextAttribute");34493450/**3451* TextAttribute instances (or a fake Attribute type if3452* java.awt.font.TextAttribute is not present)3453*/3454static final AttributedCharacterIterator.Attribute RUN_DIRECTION =3455getTextAttribute("RUN_DIRECTION");3456static final AttributedCharacterIterator.Attribute NUMERIC_SHAPING =3457getTextAttribute("NUMERIC_SHAPING");3458static final AttributedCharacterIterator.Attribute BIDI_EMBEDDING =3459getTextAttribute("BIDI_EMBEDDING");34603461/**3462* TextAttribute.RUN_DIRECTION_LTR3463*/3464static final Boolean RUN_DIRECTION_LTR = (clazz == null) ?3465Boolean.FALSE : (Boolean)getStaticField(clazz, "RUN_DIRECTION_LTR");346634673468private static Class<?> getClass(String name) {3469try {3470return Class.forName(name, true, null);3471} catch (ClassNotFoundException e) {3472return null;3473}3474}34753476private static Object getStaticField(Class<?> clazz, String name) {3477try {3478Field f = clazz.getField(name);3479return f.get(null);3480} catch (NoSuchFieldException | IllegalAccessException x) {3481throw new AssertionError(x);3482}3483}34843485@SuppressWarnings("serial")3486private static AttributedCharacterIterator.Attribute3487getTextAttribute(String name)3488{3489if (clazz == null) {3490// fake attribute3491return new AttributedCharacterIterator.Attribute(name) { };3492} else {3493return (AttributedCharacterIterator.Attribute)getStaticField(clazz, name);3494}3495}3496}34973498/**3499* A class that provides access to java.awt.font.NumericShaping without3500* creating a static dependency.3501*/3502private static class NumericShapings {3503private static final Class<?> clazz =3504getClass("java.awt.font.NumericShaper");3505private static final Method shapeMethod =3506getMethod(clazz, "shape", char[].class, int.class, int.class);35073508private static Class<?> getClass(String name) {3509try {3510return Class.forName(name, true, null);3511} catch (ClassNotFoundException e) {3512return null;3513}3514}35153516private static Method getMethod(Class<?> clazz,3517String name,3518Class<?>... paramTypes)3519{3520if (clazz != null) {3521try {3522return clazz.getMethod(name, paramTypes);3523} catch (NoSuchMethodException e) {3524throw new AssertionError(e);3525}3526} else {3527return null;3528}3529}35303531/**3532* Invokes NumericShaping shape(text,start,count) method.3533*/3534static void shape(Object shaper, char[] text, int start, int count) {3535if (shapeMethod == null)3536throw new AssertionError("Should not get here");3537try {3538shapeMethod.invoke(shaper, text, start, count);3539} catch (InvocationTargetException e) {3540Throwable cause = e.getCause();3541if (cause instanceof RuntimeException)3542throw (RuntimeException)cause;3543throw new AssertionError(e);3544} catch (IllegalAccessException iae) {3545throw new AssertionError(iae);3546}3547}3548}3549}355035513552