Path: blob/master/modules/text_server_adv/script_iterator.cpp
11351 views
/**************************************************************************/1/* script_iterator.cpp */2/**************************************************************************/3/* This file is part of: */4/* GODOT ENGINE */5/* https://godotengine.org */6/**************************************************************************/7/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */8/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */9/* */10/* Permission is hereby granted, free of charge, to any person obtaining */11/* a copy of this software and associated documentation files (the */12/* "Software"), to deal in the Software without restriction, including */13/* without limitation the rights to use, copy, modify, merge, publish, */14/* distribute, sublicense, and/or sell copies of the Software, and to */15/* permit persons to whom the Software is furnished to do so, subject to */16/* the following conditions: */17/* */18/* The above copyright notice and this permission notice shall be */19/* included in all copies or substantial portions of the Software. */20/* */21/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */22/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */23/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */24/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */25/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */26/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */27/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */28/**************************************************************************/2930#include "script_iterator.h"3132// This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp3334inline bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) {35return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two;36}3738inline bool ScriptIterator::is_emoji(UChar32 p_c, UChar32 p_next) {39if (p_next == 0xFE0E) { // Variation Selector-1540return false;41} else if (p_next == 0xFE0F) { // Variation Selector-1642return true;43} else {44return u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_PRESENTATION) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_MODIFIER) || u_hasBinaryProperty(p_c, UCHAR_REGIONAL_INDICATOR) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC);45}46}4748ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) {49struct ParenStackEntry {50int pair_index;51UScriptCode script_code;52};5354if (p_start >= p_length) {55p_start = p_length - 1;56}5758if (p_start < 0) {59p_start = 0;60}6162int paren_size = PAREN_STACK_DEPTH;63ParenStackEntry *paren_stack = static_cast<ParenStackEntry *>(memalloc(paren_size * sizeof(ParenStackEntry)));6465int script_start;66int script_end = p_start;67UScriptCode script_code;68int paren_sp = -1;69int start_sp = paren_sp;70UErrorCode err = U_ZERO_ERROR;71const char32_t *str = p_string.ptr();7273do {74script_code = USCRIPT_COMMON;75for (script_start = script_end; script_end < p_length; script_end++) {76UChar32 ch = str[script_end];77UChar32 n = (script_end + 1 < p_length) ? str[script_end + 1] : 0;78UScriptCode sc = uscript_getScript(ch, &err);79if (U_FAILURE(err)) {80memfree(paren_stack);81ERR_FAIL_MSG(u_errorName(err));82}83if (is_emoji(ch, n)) {84sc = USCRIPT_SYMBOLS_EMOJI;85}8687if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) {88if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) {89// If it's an open character, push it onto the stack.90paren_sp++;91if (unlikely(paren_sp >= paren_size)) {92// If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text.93paren_size += PAREN_STACK_DEPTH;94paren_stack = static_cast<ParenStackEntry *>(memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry)));95}96paren_stack[paren_sp].pair_index = ch;97paren_stack[paren_sp].script_code = script_code;98} else if (paren_sp >= 0) {99// If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped.100UChar32 paired_ch = u_getBidiPairedBracket(ch);101while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) {102paren_sp -= 1;103}104if (paren_sp < start_sp) {105start_sp = paren_sp;106}107if (paren_sp >= 0) {108sc = paren_stack[paren_sp].script_code;109}110}111}112113if (script_code == USCRIPT_SYMBOLS_EMOJI && script_code != sc) {114UCharCategory cat = (UCharCategory)u_charType(ch);115if ((cat >= U_SPACE_SEPARATOR && cat <= U_CONTROL_CHAR) || (cat >= U_DASH_PUNCTUATION && cat <= U_OTHER_PUNCTUATION) || (cat >= U_INITIAL_PUNCTUATION && cat <= U_FINAL_PUNCTUATION)) {116break;117}118} else if (same_script(script_code, sc)) {119if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {120script_code = sc;121// Now that we have a final script code, fix any open characters we pushed before we knew the script code.122while (start_sp < paren_sp) {123paren_stack[++start_sp].script_code = script_code;124}125}126if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) {127// If this character is a close paired character pop the matching open character from the stack.128paren_sp -= 1;129if (start_sp >= 0) {130start_sp -= 1;131}132}133} else {134break;135}136}137138ScriptRange rng;139rng.script = hb_icu_script_to_script(script_code);140rng.start = script_start;141rng.end = script_end;142143script_ranges.push_back(rng);144} while (script_end < p_length);145146memfree(paren_stack);147}148149150