Path: blob/master/modules/text_server_adv/script_iterator.cpp
20937 views
/**************************************************************************/1/* script_iterator.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#include "script_iterator.h"3132// This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp3334inline constexpr UChar32 ZERO_WIDTH_JOINER = 0x200d;35inline constexpr UChar32 VARIATION_SELECTOR_15 = 0xfe0e;36inline constexpr UChar32 VARIATION_SELECTOR_16 = 0xfe0f;37inline constexpr UChar32 COMBINING_ENCLOSING_KEYCAP = 0x20e3;3839inline bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) {40return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two;41}4243inline bool ScriptIterator::is_emoji(UChar32 p_c, UChar32 p_next) {44if (p_next == VARIATION_SELECTOR_15 && (u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC))) {45return false;46} else if (p_next == VARIATION_SELECTOR_16 && (u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC))) {47return true;48} else {49return u_hasBinaryProperty(p_c, UCHAR_EMOJI_PRESENTATION) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_MODIFIER) || u_hasBinaryProperty(p_c, UCHAR_REGIONAL_INDICATOR);50}51}5253ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) {54struct ParenStackEntry {55int pair_index;56UScriptCode script_code;57};5859struct EmojiSubrunEntry {60int start;61int end;62};6364if (p_start >= p_length) {65p_start = p_length - 1;66}6768if (p_start < 0) {69p_start = 0;70}7172int paren_size = PAREN_STACK_DEPTH;73ParenStackEntry starter_paren_stack[PAREN_STACK_DEPTH];74ParenStackEntry *paren_stack = starter_paren_stack;7576int emoji_size = EMOJI_STACK_DEPTH;77EmojiSubrunEntry starter_emoji_stack[EMOJI_STACK_DEPTH];78EmojiSubrunEntry *emoji_stack = starter_emoji_stack;7980int script_start;81int script_end = p_start;82UScriptCode script_code;83int paren_sp = -1;84int start_sp = paren_sp;85UErrorCode err = U_ZERO_ERROR;86const char32_t *str = p_string.ptr();8788do {89script_code = USCRIPT_COMMON;90int emoji_sp = -1;91bool emoji_run = false;92for (script_start = script_end; script_end < p_length; script_end++) {93UChar32 ch = str[script_end];94UChar32 n = (script_end + 1 < p_length) ? str[script_end + 1] : 0;95if (is_emoji(ch, n)) {96if (!emoji_run) {97emoji_run = true;98emoji_sp++;99if (unlikely(emoji_sp >= emoji_size)) {100emoji_size += EMOJI_STACK_DEPTH;101if (emoji_stack == starter_emoji_stack) {102emoji_stack = static_cast<EmojiSubrunEntry *>(memalloc(emoji_size * sizeof(EmojiSubrunEntry)));103} else {104emoji_stack = static_cast<EmojiSubrunEntry *>(memrealloc(emoji_stack, emoji_size * sizeof(EmojiSubrunEntry)));105}106}107emoji_stack[emoji_sp].start = script_end;108emoji_stack[emoji_sp].end = script_end;109}110} else if (emoji_run && ch != ZERO_WIDTH_JOINER && ch != VARIATION_SELECTOR_16 && ch != COMBINING_ENCLOSING_KEYCAP && !(u_hasBinaryProperty(ch, UCHAR_EXTENDED_PICTOGRAPHIC) && n != VARIATION_SELECTOR_15)) {111emoji_run = false;112emoji_stack[emoji_sp].end = script_end;113}114115UScriptCode sc = uscript_getScript(ch, &err);116if (U_FAILURE(err)) {117if (paren_stack != starter_paren_stack) {118memfree(paren_stack);119}120ERR_FAIL_MSG(u_errorName(err));121}122123if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) {124if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) {125// If it's an open character, push it onto the stack.126paren_sp++;127if (unlikely(paren_sp >= paren_size)) {128// If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text.129paren_size += PAREN_STACK_DEPTH;130if (paren_stack == starter_paren_stack) {131paren_stack = static_cast<ParenStackEntry *>(memalloc(paren_size * sizeof(ParenStackEntry)));132} else {133paren_stack = static_cast<ParenStackEntry *>(memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry)));134}135}136paren_stack[paren_sp].pair_index = ch;137paren_stack[paren_sp].script_code = script_code;138} else if (paren_sp >= 0) {139// If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped.140UChar32 paired_ch = u_getBidiPairedBracket(ch);141while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) {142paren_sp -= 1;143}144if (paren_sp < start_sp) {145start_sp = paren_sp;146}147if (paren_sp >= 0) {148sc = paren_stack[paren_sp].script_code;149}150}151}152153if (same_script(script_code, sc)) {154if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {155script_code = sc;156// Now that we have a final script code, fix any open characters we pushed before we knew the script code.157while (start_sp < paren_sp) {158paren_stack[++start_sp].script_code = script_code;159}160}161if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) {162// If this character is a close paired character pop the matching open character from the stack.163paren_sp -= 1;164if (start_sp >= 0) {165start_sp -= 1;166}167}168} else {169break;170}171}172if (emoji_run) {173emoji_stack[emoji_sp].end = script_end;174}175176for (int sub = 0; sub <= emoji_sp; sub++) {177if (emoji_stack[sub].start > script_start) {178ScriptRange rng;179rng.script = hb_icu_script_to_script(script_code);180rng.start = script_start;181rng.end = emoji_stack[sub].start;182script_ranges.push_back(rng);183}184ScriptRange rng;185rng.script = (hb_script_t)HB_TAG('Z', 's', 'y', 'e');186rng.start = emoji_stack[sub].start;187rng.end = emoji_stack[sub].end;188script_ranges.push_back(rng);189190script_start = emoji_stack[sub].end;191}192if (script_start != script_end) {193ScriptRange rng;194rng.script = hb_icu_script_to_script(script_code);195rng.start = script_start;196rng.end = script_end;197script_ranges.push_back(rng);198}199200if (emoji_stack != starter_emoji_stack) {201memfree(emoji_stack);202}203} while (script_end < p_length);204205if (paren_stack != starter_paren_stack) {206memfree(paren_stack);207}208}209210211