Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/modules/text_server_adv/script_iterator.cpp
20937 views
1
/**************************************************************************/
2
/* script_iterator.cpp */
3
/**************************************************************************/
4
/* This file is part of: */
5
/* GODOT ENGINE */
6
/* https://godotengine.org */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10
/* */
11
/* Permission is hereby granted, free of charge, to any person obtaining */
12
/* a copy of this software and associated documentation files (the */
13
/* "Software"), to deal in the Software without restriction, including */
14
/* without limitation the rights to use, copy, modify, merge, publish, */
15
/* distribute, sublicense, and/or sell copies of the Software, and to */
16
/* permit persons to whom the Software is furnished to do so, subject to */
17
/* the following conditions: */
18
/* */
19
/* The above copyright notice and this permission notice shall be */
20
/* included in all copies or substantial portions of the Software. */
21
/* */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29
/**************************************************************************/
30
31
#include "script_iterator.h"
32
33
// This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp
34
35
inline constexpr UChar32 ZERO_WIDTH_JOINER = 0x200d;
36
inline constexpr UChar32 VARIATION_SELECTOR_15 = 0xfe0e;
37
inline constexpr UChar32 VARIATION_SELECTOR_16 = 0xfe0f;
38
inline constexpr UChar32 COMBINING_ENCLOSING_KEYCAP = 0x20e3;
39
40
inline bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) {
41
return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two;
42
}
43
44
inline bool ScriptIterator::is_emoji(UChar32 p_c, UChar32 p_next) {
45
if (p_next == VARIATION_SELECTOR_15 && (u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC))) {
46
return false;
47
} else if (p_next == VARIATION_SELECTOR_16 && (u_hasBinaryProperty(p_c, UCHAR_EMOJI) || u_hasBinaryProperty(p_c, UCHAR_EXTENDED_PICTOGRAPHIC))) {
48
return true;
49
} else {
50
return u_hasBinaryProperty(p_c, UCHAR_EMOJI_PRESENTATION) || u_hasBinaryProperty(p_c, UCHAR_EMOJI_MODIFIER) || u_hasBinaryProperty(p_c, UCHAR_REGIONAL_INDICATOR);
51
}
52
}
53
54
ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) {
55
struct ParenStackEntry {
56
int pair_index;
57
UScriptCode script_code;
58
};
59
60
struct EmojiSubrunEntry {
61
int start;
62
int end;
63
};
64
65
if (p_start >= p_length) {
66
p_start = p_length - 1;
67
}
68
69
if (p_start < 0) {
70
p_start = 0;
71
}
72
73
int paren_size = PAREN_STACK_DEPTH;
74
ParenStackEntry starter_paren_stack[PAREN_STACK_DEPTH];
75
ParenStackEntry *paren_stack = starter_paren_stack;
76
77
int emoji_size = EMOJI_STACK_DEPTH;
78
EmojiSubrunEntry starter_emoji_stack[EMOJI_STACK_DEPTH];
79
EmojiSubrunEntry *emoji_stack = starter_emoji_stack;
80
81
int script_start;
82
int script_end = p_start;
83
UScriptCode script_code;
84
int paren_sp = -1;
85
int start_sp = paren_sp;
86
UErrorCode err = U_ZERO_ERROR;
87
const char32_t *str = p_string.ptr();
88
89
do {
90
script_code = USCRIPT_COMMON;
91
int emoji_sp = -1;
92
bool emoji_run = false;
93
for (script_start = script_end; script_end < p_length; script_end++) {
94
UChar32 ch = str[script_end];
95
UChar32 n = (script_end + 1 < p_length) ? str[script_end + 1] : 0;
96
if (is_emoji(ch, n)) {
97
if (!emoji_run) {
98
emoji_run = true;
99
emoji_sp++;
100
if (unlikely(emoji_sp >= emoji_size)) {
101
emoji_size += EMOJI_STACK_DEPTH;
102
if (emoji_stack == starter_emoji_stack) {
103
emoji_stack = static_cast<EmojiSubrunEntry *>(memalloc(emoji_size * sizeof(EmojiSubrunEntry)));
104
} else {
105
emoji_stack = static_cast<EmojiSubrunEntry *>(memrealloc(emoji_stack, emoji_size * sizeof(EmojiSubrunEntry)));
106
}
107
}
108
emoji_stack[emoji_sp].start = script_end;
109
emoji_stack[emoji_sp].end = script_end;
110
}
111
} else if (emoji_run && ch != ZERO_WIDTH_JOINER && ch != VARIATION_SELECTOR_16 && ch != COMBINING_ENCLOSING_KEYCAP && !(u_hasBinaryProperty(ch, UCHAR_EXTENDED_PICTOGRAPHIC) && n != VARIATION_SELECTOR_15)) {
112
emoji_run = false;
113
emoji_stack[emoji_sp].end = script_end;
114
}
115
116
UScriptCode sc = uscript_getScript(ch, &err);
117
if (U_FAILURE(err)) {
118
if (paren_stack != starter_paren_stack) {
119
memfree(paren_stack);
120
}
121
ERR_FAIL_MSG(u_errorName(err));
122
}
123
124
if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) {
125
if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) {
126
// If it's an open character, push it onto the stack.
127
paren_sp++;
128
if (unlikely(paren_sp >= paren_size)) {
129
// If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text.
130
paren_size += PAREN_STACK_DEPTH;
131
if (paren_stack == starter_paren_stack) {
132
paren_stack = static_cast<ParenStackEntry *>(memalloc(paren_size * sizeof(ParenStackEntry)));
133
} else {
134
paren_stack = static_cast<ParenStackEntry *>(memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry)));
135
}
136
}
137
paren_stack[paren_sp].pair_index = ch;
138
paren_stack[paren_sp].script_code = script_code;
139
} else if (paren_sp >= 0) {
140
// If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped.
141
UChar32 paired_ch = u_getBidiPairedBracket(ch);
142
while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) {
143
paren_sp -= 1;
144
}
145
if (paren_sp < start_sp) {
146
start_sp = paren_sp;
147
}
148
if (paren_sp >= 0) {
149
sc = paren_stack[paren_sp].script_code;
150
}
151
}
152
}
153
154
if (same_script(script_code, sc)) {
155
if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
156
script_code = sc;
157
// Now that we have a final script code, fix any open characters we pushed before we knew the script code.
158
while (start_sp < paren_sp) {
159
paren_stack[++start_sp].script_code = script_code;
160
}
161
}
162
if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) {
163
// If this character is a close paired character pop the matching open character from the stack.
164
paren_sp -= 1;
165
if (start_sp >= 0) {
166
start_sp -= 1;
167
}
168
}
169
} else {
170
break;
171
}
172
}
173
if (emoji_run) {
174
emoji_stack[emoji_sp].end = script_end;
175
}
176
177
for (int sub = 0; sub <= emoji_sp; sub++) {
178
if (emoji_stack[sub].start > script_start) {
179
ScriptRange rng;
180
rng.script = hb_icu_script_to_script(script_code);
181
rng.start = script_start;
182
rng.end = emoji_stack[sub].start;
183
script_ranges.push_back(rng);
184
}
185
ScriptRange rng;
186
rng.script = (hb_script_t)HB_TAG('Z', 's', 'y', 'e');
187
rng.start = emoji_stack[sub].start;
188
rng.end = emoji_stack[sub].end;
189
script_ranges.push_back(rng);
190
191
script_start = emoji_stack[sub].end;
192
}
193
if (script_start != script_end) {
194
ScriptRange rng;
195
rng.script = hb_icu_script_to_script(script_code);
196
rng.start = script_start;
197
rng.end = script_end;
198
script_ranges.push_back(rng);
199
}
200
201
if (emoji_stack != starter_emoji_stack) {
202
memfree(emoji_stack);
203
}
204
} while (script_end < p_length);
205
206
if (paren_stack != starter_paren_stack) {
207
memfree(paren_stack);
208
}
209
}
210
211