CoCalc -- ustring.cpp

GitHub Repository: godotengine/godot
Path: blob/master/core/string/ustring.cpp
²¹⁰⁸³ views
1
/**************************************************************************/
2
/*  ustring.cpp                                                           */
3
/**************************************************************************/
4
/*                         This file is part of:                          */
5
/*                             GODOT ENGINE                               */
6
/*                        https://godotengine.org                         */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
10
/*                                                                        */
11
/* Permission is hereby granted, free of charge, to any person obtaining  */
12
/* a copy of this software and associated documentation files (the        */
13
/* "Software"), to deal in the Software without restriction, including    */
14
/* without limitation the rights to use, copy, modify, merge, publish,    */
15
/* distribute, sublicense, and/or sell copies of the Software, and to     */
16
/* permit persons to whom the Software is furnished to do so, subject to  */
17
/* the following conditions:                                              */
18
/*                                                                        */
19
/* The above copyright notice and this permission notice shall be         */
20
/* included in all copies or substantial portions of the Software.        */
21
/*                                                                        */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
29
/**************************************************************************/
30

31
#include "ustring.h"
32

33
STATIC_ASSERT_INCOMPLETE_TYPE(class, Array);
34
STATIC_ASSERT_INCOMPLETE_TYPE(class, Dictionary);
35
STATIC_ASSERT_INCOMPLETE_TYPE(class, Object);
36

37
#include "core/crypto/crypto_core.h"
38
#include "core/io/ip_address.h"
39
#include "core/math/color.h"
40
#include "core/math/math_funcs.h"
41
#include "core/object/object.h"
42
#include "core/os/memory.h"
43
#include "core/os/os.h"
44
#include "core/string/print_string.h"
45
#include "core/string/string_name.h"
46
#include "core/string/translation_server.h"
47
#include "core/string/ucaps.h"
48
#include "core/variant/variant.h"
49
#include "core/version_generated.gen.h"
50

51
#include "thirdparty/grisu2/grisu2.h"
52

53
#ifdef _MSC_VER
54
#define _CRT_SECURE_NO_WARNINGS // to disable build-time warning which suggested to use strcpy_s instead strcpy
55
#endif
56

57
#if defined(MINGW_ENABLED) || defined(_MSC_VER)
58
#define snprintf _snprintf_s
59
#endif
60

61
static const int MAX_DECIMALS = 32;
62

63
static _FORCE_INLINE_ char32_t lower_case(char32_t c) {
64
	return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c);
65
}
66

67
// Case-insensitive version of are_spans_equal
68
template <typename T1, typename T2>
69
static bool strings_equal_lower(const T1 *p_lhs_begin, const T2 *p_rhs_begin, size_t p_len) {
70
	for (size_t i = 0; i < p_len; ++i) {
71
		if (_find_lower(p_lhs_begin[i]) != _find_lower(p_rhs_begin[i])) {
72
			return false;
73
		}
74
	}
75
	return true;
76
}
77

78
Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path, String &r_fragment) const {
79
	// Splits the URL into scheme, host, port, path, fragment. Strip credentials when present.
80
	String base = *this;
81
	r_scheme = "";
82
	r_host = "";
83
	r_port = 0;
84
	r_path = "";
85
	r_fragment = "";
86

87
	int pos = base.find("://");
88
	// Scheme
89
	if (pos != -1) {
90
		bool is_scheme_valid = true;
91
		for (int i = 0; i < pos; i++) {
92
			if (!is_ascii_alphanumeric_char(base[i]) && base[i] != '+' && base[i] != '-' && base[i] != '.') {
93
				is_scheme_valid = false;
94
				break;
95
			}
96
		}
97
		if (is_scheme_valid) {
98
			r_scheme = base.substr(0, pos + 3).to_lower();
99
			base = base.substr(pos + 3);
100
		}
101
	}
102
	pos = base.find_char('#');
103
	// Fragment
104
	if (pos != -1) {
105
		r_fragment = base.substr(pos + 1);
106
		base = base.substr(0, pos);
107
	}
108
	pos = base.find_char('/');
109
	// Path
110
	if (pos != -1) {
111
		r_path = base.substr(pos);
112
		base = base.substr(0, pos);
113
	}
114
	// Host
115
	pos = base.find_char('@');
116
	if (pos != -1) {
117
		// Strip credentials
118
		base = base.substr(pos + 1);
119
	}
120
	if (base.begins_with("[")) {
121
		// Literal IPv6
122
		pos = base.rfind_char(']');
123
		if (pos == -1) {
124
			return ERR_INVALID_PARAMETER;
125
		}
126
		r_host = base.substr(1, pos - 1);
127
		base = base.substr(pos + 1);
128
	} else {
129
		// Anything else
130
		if (base.get_slice_count(":") > 2) {
131
			return ERR_INVALID_PARAMETER;
132
		}
133
		pos = base.rfind_char(':');
134
		if (pos == -1) {
135
			r_host = base;
136
			base = "";
137
		} else {
138
			r_host = base.substr(0, pos);
139
			base = base.substr(pos);
140
		}
141
	}
142
	if (r_host.is_empty()) {
143
		return ERR_INVALID_PARAMETER;
144
	}
145
	r_host = r_host.to_lower();
146
	// Port
147
	if (base.begins_with(":")) {
148
		base = base.substr(1);
149
		if (!base.is_valid_int()) {
150
			return ERR_INVALID_PARAMETER;
151
		}
152
		r_port = base.to_int();
153
		if (r_port < 1 || r_port > 65535) {
154
			return ERR_INVALID_PARAMETER;
155
		}
156
	}
157
	return OK;
158
}
159

160
void String::append_latin1(const Span<char> &p_cstr) {
161
	if (p_cstr.is_empty()) {
162
		return;
163
	}
164

165
	const int prev_length = length();
166
	resize_uninitialized(prev_length + p_cstr.size() + 1); // include 0
167

168
	const char *src = p_cstr.ptr();
169
	const char *end = src + p_cstr.size();
170
	char32_t *dst = ptrw() + prev_length;
171

172
	for (; src < end; ++src, ++dst) {
173
		// If char is int8_t, a set sign bit will be reinterpreted as 256 - val implicitly.
174
		if (unlikely(*src == '\0')) {
175
			// NUL in string is allowed by the unicode standard, but unsupported in our implementation right now.
176
			print_unicode_error("Unexpected NUL character", true);
177
			*dst = _replacement_char;
178
		} else {
179
			*dst = static_cast<uint8_t>(*src);
180
		}
181
	}
182
	*dst = 0;
183
}
184

185
Error String::append_utf32(const Span<char32_t> &p_cstr) {
186
	if (p_cstr.is_empty()) {
187
		return OK;
188
	}
189

190
	Error error = OK;
191

192
	const int prev_length = length();
193
	resize_uninitialized(prev_length + p_cstr.size() + 1);
194
	const char32_t *src = p_cstr.ptr();
195
	const char32_t *end = p_cstr.ptr() + p_cstr.size();
196
	char32_t *dst = ptrw() + prev_length;
197

198
	// Copy the string, and check for UTF-32 problems.
199
	for (; src < end; ++src, ++dst) {
200
		const char32_t chr = *src;
201
		if (unlikely(chr == U'\0')) {
202
			// NUL in string is allowed by the unicode standard, but unsupported in our implementation right now.
203
			print_unicode_error("Unexpected NUL character", true);
204
			*dst = _replacement_char;
205
			error = ERR_PARSE_ERROR;
206
		} else if (unlikely((chr & 0xfffff800) == 0xd800)) {
207
			print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr), true);
208
			*dst = _replacement_char;
209
			error = ERR_PARSE_ERROR;
210
		} else if (unlikely(chr > 0x10ffff)) {
211
			print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr), true);
212
			*dst = _replacement_char;
213
			error = ERR_PARSE_ERROR;
214
		} else {
215
			*dst = chr;
216
		}
217
	}
218
	*dst = 0;
219
	return error;
220
}
221

222
void String::append_utf32_unchecked(const Span<char32_t> &p_span) {
223
	const int prev_length = length();
224
	resize_uninitialized(prev_length + p_span.size() + 1); // + 1 for \0
225
	char32_t *dst = ptrw() + prev_length;
226
	memcpy(dst, p_span.ptr(), p_span.size() * sizeof(char32_t));
227
	*(dst + p_span.size()) = _null;
228
}
229

230
String String::operator+(const String &p_str) const {
231
	String res = *this;
232
	res += p_str;
233
	return res;
234
}
235

236
String String::operator+(const char *p_str) const {
237
	String res = *this;
238
	res += p_str;
239
	return res;
240
}
241

242
String String::operator+(const wchar_t *p_str) const {
243
	String res = *this;
244
	res += p_str;
245
	return res;
246
}
247

248
String String::operator+(const char32_t *p_str) const {
249
	String res = *this;
250
	res += p_str;
251
	return res;
252
}
253

254
String String::operator+(char32_t p_char) const {
255
	String res = *this;
256
	res += p_char;
257
	return res;
258
}
259

260
String operator+(const char *p_chr, const String &p_str) {
261
	String tmp = p_chr;
262
	tmp += p_str;
263
	return tmp;
264
}
265

266
String operator+(const wchar_t *p_chr, const String &p_str) {
267
#ifdef WINDOWS_ENABLED
268
	// wchar_t is 16-bit
269
	String tmp = String::utf16((const char16_t *)p_chr);
270
#else
271
	// wchar_t is 32-bit
272
	String tmp = (const char32_t *)p_chr;
273
#endif
274
	tmp += p_str;
275
	return tmp;
276
}
277

278
String operator+(char32_t p_chr, const String &p_str) {
279
	return (String::chr(p_chr) + p_str);
280
}
281

282
String &String::operator+=(const String &p_str) {
283
	if (is_empty()) {
284
		*this = p_str;
285
		return *this;
286
	}
287
	append_utf32_unchecked(p_str);
288
	return *this;
289
}
290

291
String &String::operator+=(const char *p_str) {
292
	append_latin1(p_str);
293
	return *this;
294
}
295

296
String &String::operator+=(const wchar_t *p_str) {
297
#ifdef WINDOWS_ENABLED
298
	// wchar_t is 16-bit
299
	*this += String::utf16((const char16_t *)p_str);
300
#else
301
	// wchar_t is 32-bit
302
	*this += String((const char32_t *)p_str);
303
#endif
304
	return *this;
305
}
306

307
String &String::operator+=(const char32_t *p_str) {
308
	append_utf32(Span(p_str, strlen(p_str)));
309
	return *this;
310
}
311

312
String &String::operator+=(char32_t p_char) {
313
	append_utf32(Span(&p_char, 1));
314
	return *this;
315
}
316

317
bool String::operator==(const char *p_str) const {
318
	// Compare Latin-1 encoded c-string.
319
	return span() == Span(p_str, strlen(p_str)).reinterpret<uint8_t>();
320
}
321

322
bool String::operator==(const wchar_t *p_str) const {
323
#ifdef WINDOWS_ENABLED
324
	// wchar_t is 16-bit, parse as UTF-16
325
	return *this == String::utf16((const char16_t *)p_str);
326
#else
327
	// wchar_t is 32-bit, compare char by char
328
	return *this == (const char32_t *)p_str;
329
#endif
330
}
331

332
bool String::operator==(const char32_t *p_str) const {
333
	// Compare UTF-32 encoded c-string.
334
	return span() == Span(p_str, strlen(p_str));
335
}
336

337
bool String::operator==(const String &p_str) const {
338
	return span() == p_str.span();
339
}
340

341
bool String::operator==(const Span<char32_t> &p_str_range) const {
342
	return span() == p_str_range;
343
}
344

345
bool operator==(const char *p_chr, const String &p_str) {
346
	return p_str == p_chr;
347
}
348

349
bool operator==(const wchar_t *p_chr, const String &p_str) {
350
#ifdef WINDOWS_ENABLED
351
	// wchar_t is 16-bit
352
	return p_str == String::utf16((const char16_t *)p_chr);
353
#else
354
	// wchar_t is 32-bi
355
	return p_str == (const char32_t *)p_chr;
356
#endif
357
}
358

359
bool operator!=(const char *p_chr, const String &p_str) {
360
	return !(p_str == p_chr);
361
}
362

363
bool operator!=(const wchar_t *p_chr, const String &p_str) {
364
#ifdef WINDOWS_ENABLED
365
	// wchar_t is 16-bit
366
	return !(p_str == String::utf16((const char16_t *)p_chr));
367
#else
368
	// wchar_t is 32-bi
369
	return !(p_str == String((const char32_t *)p_chr));
370
#endif
371
}
372

373
bool String::operator!=(const char *p_str) const {
374
	return (!(*this == p_str));
375
}
376

377
bool String::operator!=(const wchar_t *p_str) const {
378
	return (!(*this == p_str));
379
}
380

381
bool String::operator!=(const char32_t *p_str) const {
382
	return (!(*this == p_str));
383
}
384

385
bool String::operator!=(const String &p_str) const {
386
	return !((*this == p_str));
387
}
388

389
bool String::operator<=(const String &p_str) const {
390
	return !(p_str < *this);
391
}
392

393
bool String::operator>(const String &p_str) const {
394
	return p_str < *this;
395
}
396

397
bool String::operator>=(const String &p_str) const {
398
	return !(*this < p_str);
399
}
400

401
bool String::operator<(const char *p_str) const {
402
	if (is_empty() && p_str[0] == 0) {
403
		return false;
404
	}
405
	if (is_empty()) {
406
		return true;
407
	}
408
	return str_compare(get_data(), p_str) < 0;
409
}
410

411
bool String::operator<(const wchar_t *p_str) const {
412
	if (is_empty() && p_str[0] == 0) {
413
		return false;
414
	}
415
	if (is_empty()) {
416
		return true;
417
	}
418

419
#ifdef WINDOWS_ENABLED
420
	// wchar_t is 16-bit
421
	return str_compare(get_data(), String::utf16((const char16_t *)p_str).get_data()) < 0;
422
#else
423
	// wchar_t is 32-bit
424
	return str_compare(get_data(), (const char32_t *)p_str) < 0;
425
#endif
426
}
427

428
bool String::operator<(const char32_t *p_str) const {
429
	if (is_empty() && p_str[0] == 0) {
430
		return false;
431
	}
432
	if (is_empty()) {
433
		return true;
434
	}
435

436
	return str_compare(get_data(), p_str) < 0;
437
}
438

439
bool String::operator<(const String &p_str) const {
440
	return operator<(p_str.get_data());
441
}
442

443
signed char String::nocasecmp_to(const String &p_str) const {
444
	if (is_empty() && p_str.is_empty()) {
445
		return 0;
446
	}
447
	if (is_empty()) {
448
		return -1;
449
	}
450
	if (p_str.is_empty()) {
451
		return 1;
452
	}
453

454
	const char32_t *that_str = p_str.get_data();
455
	const char32_t *this_str = get_data();
456

457
	while (true) {
458
		if (*that_str == 0 && *this_str == 0) { // If both strings are at the end, they are equal.
459
			return 0;
460
		} else if (*this_str == 0) { // If at the end of this, and not of other, we are less.
461
			return -1;
462
		} else if (*that_str == 0) { // If at end of other, and not of this, we are greater.
463
			return 1;
464
		} else if (_find_upper(*this_str) < _find_upper(*that_str)) { // If current character in this is less, we are less.
465
			return -1;
466
		} else if (_find_upper(*this_str) > _find_upper(*that_str)) { // If current character in this is greater, we are greater.
467
			return 1;
468
		}
469

470
		this_str++;
471
		that_str++;
472
	}
473
}
474

475
signed char String::casecmp_to(const String &p_str) const {
476
	if (is_empty() && p_str.is_empty()) {
477
		return 0;
478
	}
479
	if (is_empty()) {
480
		return -1;
481
	}
482
	if (p_str.is_empty()) {
483
		return 1;
484
	}
485

486
	const char32_t *that_str = p_str.get_data();
487
	const char32_t *this_str = get_data();
488

489
	while (true) {
490
		if (*that_str == 0 && *this_str == 0) { // If both strings are at the end, they are equal.
491
			return 0;
492
		} else if (*this_str == 0) { // If at the end of this, and not of other, we are less.
493
			return -1;
494
		} else if (*that_str == 0) { // If at end of other, and not of this, we are greater.
495
			return 1;
496
		} else if (*this_str < *that_str) { // If current character in this is less, we are less.
497
			return -1;
498
		} else if (*this_str > *that_str) { // If current character in this is greater, we are greater.
499
			return 1;
500
		}
501

502
		this_str++;
503
		that_str++;
504
	}
505
}
506

507
static _FORCE_INLINE_ signed char natural_cmp_common(const char32_t *&r_this_str, const char32_t *&r_that_str) {
508
	// Keep ptrs to start of numerical sequences.
509
	const char32_t *this_substr = r_this_str;
510
	const char32_t *that_substr = r_that_str;
511

512
	// Compare lengths of both numerical sequences, ignoring leading zeros.
513
	while (is_digit(*r_this_str)) {
514
		r_this_str++;
515
	}
516
	while (is_digit(*r_that_str)) {
517
		r_that_str++;
518
	}
519
	while (*this_substr == '0') {
520
		this_substr++;
521
	}
522
	while (*that_substr == '0') {
523
		that_substr++;
524
	}
525
	int this_len = r_this_str - this_substr;
526
	int that_len = r_that_str - that_substr;
527

528
	if (this_len < that_len) {
529
		return -1;
530
	} else if (this_len > that_len) {
531
		return 1;
532
	}
533

534
	// If lengths equal, compare lexicographically.
535
	while (this_substr != r_this_str && that_substr != r_that_str) {
536
		if (*this_substr < *that_substr) {
537
			return -1;
538
		} else if (*this_substr > *that_substr) {
539
			return 1;
540
		}
541
		this_substr++;
542
		that_substr++;
543
	}
544

545
	return 0;
546
}
547

548
static _FORCE_INLINE_ signed char naturalcasecmp_to_base(const char32_t *p_this_str, const char32_t *p_that_str) {
549
	if (p_this_str && p_that_str) {
550
		while (*p_this_str == '.' || *p_that_str == '.') {
551
			if (*p_this_str++ != '.') {
552
				return 1;
553
			}
554
			if (*p_that_str++ != '.') {
555
				return -1;
556
			}
557
			if (!*p_that_str) {
558
				return 1;
559
			}
560
			if (!*p_this_str) {
561
				return -1;
562
			}
563
		}
564

565
		while (*p_this_str) {
566
			if (!*p_that_str) {
567
				return 1;
568
			} else if (is_digit(*p_this_str)) {
569
				if (!is_digit(*p_that_str)) {
570
					return -1;
571
				}
572

573
				signed char ret = natural_cmp_common(p_this_str, p_that_str);
574
				if (ret) {
575
					return ret;
576
				}
577
			} else if (is_digit(*p_that_str)) {
578
				return 1;
579
			} else {
580
				if (*p_this_str < *p_that_str) { // If current character in this is less, we are less.
581
					return -1;
582
				} else if (*p_this_str > *p_that_str) { // If current character in this is greater, we are greater.
583
					return 1;
584
				}
585

586
				p_this_str++;
587
				p_that_str++;
588
			}
589
		}
590
		if (*p_that_str) {
591
			return -1;
592
		}
593
	}
594

595
	return 0;
596
}
597

598
signed char String::naturalcasecmp_to(const String &p_str) const {
599
	const char32_t *this_str = get_data();
600
	const char32_t *that_str = p_str.get_data();
601

602
	return naturalcasecmp_to_base(this_str, that_str);
603
}
604

605
static _FORCE_INLINE_ signed char naturalnocasecmp_to_base(const char32_t *p_this_str, const char32_t *p_that_str) {
606
	if (p_this_str && p_that_str) {
607
		while (*p_this_str == '.' || *p_that_str == '.') {
608
			if (*p_this_str++ != '.') {
609
				return 1;
610
			}
611
			if (*p_that_str++ != '.') {
612
				return -1;
613
			}
614
			if (!*p_that_str) {
615
				return 1;
616
			}
617
			if (!*p_this_str) {
618
				return -1;
619
			}
620
		}
621

622
		while (*p_this_str) {
623
			if (!*p_that_str) {
624
				return 1;
625
			} else if (is_digit(*p_this_str)) {
626
				if (!is_digit(*p_that_str)) {
627
					return -1;
628
				}
629

630
				signed char ret = natural_cmp_common(p_this_str, p_that_str);
631
				if (ret) {
632
					return ret;
633
				}
634
			} else if (is_digit(*p_that_str)) {
635
				return 1;
636
			} else {
637
				if (_find_upper(*p_this_str) < _find_upper(*p_that_str)) { // If current character in this is less, we are less.
638
					return -1;
639
				} else if (_find_upper(*p_this_str) > _find_upper(*p_that_str)) { // If current character in this is greater, we are greater.
640
					return 1;
641
				}
642

643
				p_this_str++;
644
				p_that_str++;
645
			}
646
		}
647
		if (*p_that_str) {
648
			return -1;
649
		}
650
	}
651

652
	return 0;
653
}
654

655
signed char String::naturalnocasecmp_to(const String &p_str) const {
656
	const char32_t *this_str = get_data();
657
	const char32_t *that_str = p_str.get_data();
658

659
	return naturalnocasecmp_to_base(this_str, that_str);
660
}
661

662
static _FORCE_INLINE_ signed char file_cmp_common(const char32_t *&r_this_str, const char32_t *&r_that_str) {
663
	// Compare leading `_` sequences.
664
	while ((*r_this_str == '_' && *r_that_str) || (*r_this_str && *r_that_str == '_')) {
665
		// Sort `_` lower than everything except `.`
666
		if (*r_this_str != '_') {
667
			return *r_this_str == '.' ? -1 : 1;
668
		} else if (*r_that_str != '_') {
669
			return *r_that_str == '.' ? 1 : -1;
670
		}
671
		r_this_str++;
672
		r_that_str++;
673
	}
674

675
	return 0;
676
}
677

678
signed char String::filecasecmp_to(const String &p_str) const {
679
	const char32_t *this_str = get_data();
680
	const char32_t *that_str = p_str.get_data();
681

682
	signed char ret = file_cmp_common(this_str, that_str);
683
	if (ret) {
684
		return ret;
685
	}
686

687
	return naturalcasecmp_to_base(this_str, that_str);
688
}
689

690
signed char String::filenocasecmp_to(const String &p_str) const {
691
	const char32_t *this_str = get_data();
692
	const char32_t *that_str = p_str.get_data();
693

694
	signed char ret = file_cmp_common(this_str, that_str);
695
	if (ret) {
696
		return ret;
697
	}
698

699
	return naturalnocasecmp_to_base(this_str, that_str);
700
}
701

702
String String::_separate_compound_words() const {
703
	if (length() == 0) {
704
		return *this;
705
	}
706

707
	const char32_t *cstr = get_data();
708
	int start_index = 0;
709
	String new_string;
710

711
	bool is_prev_upper = is_unicode_upper_case(cstr[0]);
712
	bool is_prev_lower = is_unicode_lower_case(cstr[0]);
713
	bool is_prev_digit = is_digit(cstr[0]);
714

715
	for (int i = 1; i < length(); i++) {
716
		const bool is_curr_upper = is_unicode_upper_case(cstr[i]);
717
		const bool is_curr_lower = is_unicode_lower_case(cstr[i]);
718
		const bool is_curr_digit = is_digit(cstr[i]);
719

720
		bool is_next_lower = false;
721
		if (i + 1 < length()) {
722
			is_next_lower = is_unicode_lower_case(cstr[i + 1]);
723
		}
724

725
		const bool cond_a = is_prev_lower && is_curr_upper; // aA
726
		const bool cond_b = (is_prev_upper || is_prev_digit) && is_curr_upper && is_next_lower; // AAa, 2Aa
727
		const bool cond_c = is_prev_digit && is_curr_lower && is_next_lower; // 2aa
728
		const bool cond_d = (is_prev_upper || is_prev_lower) && is_curr_digit; // A2, a2
729

730
		if (cond_a || cond_b || cond_c || cond_d) {
731
			new_string += substr(start_index, i - start_index) + " ";
732
			start_index = i;
733
		}
734

735
		is_prev_upper = is_curr_upper;
736
		is_prev_lower = is_curr_lower;
737
		is_prev_digit = is_curr_digit;
738
	}
739

740
	new_string += substr(start_index, size() - start_index);
741

742
	for (int i = 0; i < new_string.size(); i++) {
743
		const bool whitespace = is_whitespace(new_string[i]);
744
		const bool underscore = is_underscore(new_string[i]);
745
		const bool hyphen = is_hyphen(new_string[i]);
746

747
		if (whitespace || underscore || hyphen) {
748
			new_string[i] = ' ';
749
		}
750
	}
751

752
	return new_string.to_lower();
753
}
754

755
String String::capitalize() const {
756
	String words = _separate_compound_words().strip_edges();
757
	String ret;
758
	for (int i = 0; i < words.get_slice_count(" "); i++) {
759
		String slice = words.get_slicec(' ', i);
760
		if (slice.length() > 0) {
761
			slice[0] = _find_upper(slice[0]);
762
			if (i > 0) {
763
				ret += " ";
764
			}
765
			ret += slice;
766
		}
767
	}
768
	return ret;
769
}
770

771
String String::to_camel_case() const {
772
	String words = _separate_compound_words().strip_edges();
773
	String ret;
774
	for (int i = 0; i < words.get_slice_count(" "); i++) {
775
		String slice = words.get_slicec(' ', i);
776
		if (slice.length() > 0) {
777
			if (i == 0) {
778
				slice[0] = _find_lower(slice[0]);
779
			} else {
780
				slice[0] = _find_upper(slice[0]);
781
			}
782
			ret += slice;
783
		}
784
	}
785
	return ret;
786
}
787

788
String String::to_pascal_case() const {
789
	String words = _separate_compound_words().strip_edges();
790
	String ret;
791
	for (int i = 0; i < words.get_slice_count(" "); i++) {
792
		String slice = words.get_slicec(' ', i);
793
		if (slice.length() > 0) {
794
			slice[0] = _find_upper(slice[0]);
795
			ret += slice;
796
		}
797
	}
798
	return ret;
799
}
800

801
String String::to_snake_case() const {
802
	return _separate_compound_words().replace_char(' ', '_');
803
}
804

805
String String::to_kebab_case() const {
806
	return _separate_compound_words().replace_char(' ', '-');
807
}
808

809
String String::get_with_code_lines() const {
810
	const Vector<String> lines = split("\n");
811
	String ret;
812
	for (int i = 0; i < lines.size(); i++) {
813
		if (i > 0) {
814
			ret += "\n";
815
		}
816
		ret += vformat("%4d | %s", i + 1, lines[i]);
817
	}
818
	return ret;
819
}
820

821
int String::get_slice_count(const String &p_splitter) const {
822
	if (is_empty()) {
823
		return 0;
824
	}
825
	if (p_splitter.is_empty()) {
826
		return 0;
827
	}
828

829
	int pos = 0;
830
	int slices = 1;
831

832
	while ((pos = find(p_splitter, pos)) >= 0) {
833
		slices++;
834
		pos += p_splitter.length();
835
	}
836

837
	return slices;
838
}
839

840
int String::get_slice_count(const char *p_splitter) const {
841
	if (is_empty()) {
842
		return 0;
843
	}
844
	if (p_splitter == nullptr || *p_splitter == '\0') {
845
		return 0;
846
	}
847

848
	int pos = 0;
849
	int slices = 1;
850
	int splitter_length = strlen(p_splitter);
851

852
	while ((pos = find(p_splitter, pos)) >= 0) {
853
		slices++;
854
		pos += splitter_length;
855
	}
856

857
	return slices;
858
}
859

860
String String::get_slice(const String &p_splitter, int p_slice) const {
861
	if (is_empty() || p_splitter.is_empty()) {
862
		return "";
863
	}
864

865
	int pos = 0;
866
	int prev_pos = 0;
867
	//int slices=1;
868
	if (p_slice < 0) {
869
		return "";
870
	}
871
	if (find(p_splitter) == -1) {
872
		return *this;
873
	}
874

875
	int i = 0;
876
	while (true) {
877
		pos = find(p_splitter, pos);
878
		if (pos == -1) {
879
			pos = length(); //reached end
880
		}
881

882
		int from = prev_pos;
883
		//int to=pos;
884

885
		if (p_slice == i) {
886
			return substr(from, pos - from);
887
		}
888

889
		if (pos == length()) { //reached end and no find
890
			break;
891
		}
892
		pos += p_splitter.length();
893
		prev_pos = pos;
894
		i++;
895
	}
896

897
	return ""; //no find!
898
}
899

900
String String::get_slice(const char *p_splitter, int p_slice) const {
901
	if (is_empty() || p_splitter == nullptr || *p_splitter == '\0') {
902
		return "";
903
	}
904

905
	int pos = 0;
906
	int prev_pos = 0;
907
	//int slices=1;
908
	if (p_slice < 0) {
909
		return "";
910
	}
911
	if (find(p_splitter) == -1) {
912
		return *this;
913
	}
914

915
	int i = 0;
916
	const int splitter_length = strlen(p_splitter);
917
	while (true) {
918
		pos = find(p_splitter, pos);
919
		if (pos == -1) {
920
			pos = length(); //reached end
921
		}
922

923
		int from = prev_pos;
924
		//int to=pos;
925

926
		if (p_slice == i) {
927
			return substr(from, pos - from);
928
		}
929

930
		if (pos == length()) { //reached end and no find
931
			break;
932
		}
933
		pos += splitter_length;
934
		prev_pos = pos;
935
		i++;
936
	}
937

938
	return ""; //no find!
939
}
940

941
String String::get_slicec(char32_t p_splitter, int p_slice) const {
942
	if (is_empty()) {
943
		return String();
944
	}
945

946
	if (p_slice < 0) {
947
		return String();
948
	}
949

950
	const char32_t *c = ptr();
951
	int i = 0;
952
	int prev = 0;
953
	int count = 0;
954
	while (true) {
955
		if (c[i] == 0 || c[i] == p_splitter) {
956
			if (p_slice == count) {
957
				return substr(prev, i - prev);
958
			} else if (c[i] == 0) {
959
				return String();
960
			} else {
961
				count++;
962
				prev = i + 1;
963
			}
964
		}
965

966
		i++;
967
	}
968
}
969

970
Vector<String> String::split_spaces(int p_maxsplit) const {
971
	Vector<String> ret;
972
	int from = 0;
973
	int i = 0;
974
	int len = length();
975
	if (len == 0) {
976
		return ret;
977
	}
978

979
	bool inside = false;
980

981
	while (true) {
982
		bool empty = operator[](i) < 33;
983

984
		if (i == 0) {
985
			inside = !empty;
986
		}
987

988
		if (!empty && !inside) {
989
			inside = true;
990
			from = i;
991
		}
992

993
		if (empty && inside) {
994
			if (p_maxsplit > 0 && p_maxsplit == ret.size()) {
995
				// Put rest of the string and leave cycle.
996
				ret.push_back(substr(from));
997
				break;
998
			}
999
			ret.push_back(substr(from, i - from));
1000
			inside = false;
1001
		}
1002

1003
		if (i == len) {
1004
			break;
1005
		}
1006
		i++;
1007
	}
1008

1009
	return ret;
1010
}
1011

1012
Vector<String> String::split(const String &p_splitter, bool p_allow_empty, int p_maxsplit) const {
1013
	Vector<String> ret;
1014

1015
	if (is_empty()) {
1016
		if (p_allow_empty) {
1017
			ret.push_back("");
1018
		}
1019
		return ret;
1020
	}
1021

1022
	int from = 0;
1023
	int len = length();
1024

1025
	while (true) {
1026
		int end;
1027
		if (p_splitter.is_empty()) {
1028
			end = from + 1;
1029
		} else {
1030
			end = find(p_splitter, from);
1031
			if (end < 0) {
1032
				end = len;
1033
			}
1034
		}
1035
		if (p_allow_empty || (end > from)) {
1036
			if (p_maxsplit <= 0) {
1037
				ret.push_back(substr(from, end - from));
1038
			} else {
1039
				// Put rest of the string and leave cycle.
1040
				if (p_maxsplit == ret.size()) {
1041
					ret.push_back(substr(from, len));
1042
					break;
1043
				}
1044

1045
				// Otherwise, push items until positive limit is reached.
1046
				ret.push_back(substr(from, end - from));
1047
			}
1048
		}
1049

1050
		if (end == len) {
1051
			break;
1052
		}
1053

1054
		from = end + p_splitter.length();
1055
	}
1056

1057
	return ret;
1058
}
1059

1060
Vector<String> String::split(const char *p_splitter, bool p_allow_empty, int p_maxsplit) const {
1061
	Vector<String> ret;
1062

1063
	if (is_empty()) {
1064
		if (p_allow_empty) {
1065
			ret.push_back("");
1066
		}
1067
		return ret;
1068
	}
1069

1070
	int from = 0;
1071
	int len = length();
1072
	const int splitter_length = strlen(p_splitter);
1073

1074
	while (true) {
1075
		int end;
1076
		if (p_splitter == nullptr || *p_splitter == '\0') {
1077
			end = from + 1;
1078
		} else {
1079
			end = find(p_splitter, from);
1080
			if (end < 0) {
1081
				end = len;
1082
			}
1083
		}
1084
		if (p_allow_empty || (end > from)) {
1085
			if (p_maxsplit <= 0) {
1086
				ret.push_back(substr(from, end - from));
1087
			} else {
1088
				// Put rest of the string and leave cycle.
1089
				if (p_maxsplit == ret.size()) {
1090
					ret.push_back(substr(from, len));
1091
					break;
1092
				}
1093

1094
				// Otherwise, push items until positive limit is reached.
1095
				ret.push_back(substr(from, end - from));
1096
			}
1097
		}
1098

1099
		if (end == len) {
1100
			break;
1101
		}
1102

1103
		from = end + splitter_length;
1104
	}
1105

1106
	return ret;
1107
}
1108

1109
Vector<String> String::rsplit(const String &p_splitter, bool p_allow_empty, int p_maxsplit) const {
1110
	Vector<String> ret;
1111
	const int len = length();
1112
	int remaining_len = len;
1113

1114
	while (true) {
1115
		if (remaining_len < p_splitter.length() || (p_maxsplit > 0 && p_maxsplit == ret.size())) {
1116
			// no room for another splitter or hit max splits, push what's left and we're done
1117
			if (p_allow_empty || remaining_len > 0) {
1118
				ret.push_back(substr(0, remaining_len));
1119
			}
1120
			break;
1121
		}
1122

1123
		int left_edge;
1124
		if (p_splitter.is_empty()) {
1125
			left_edge = remaining_len - 1;
1126
			if (left_edge == 0) {
1127
				left_edge--; // Skip to the < 0 condition.
1128
			}
1129
		} else {
1130
			left_edge = rfind(p_splitter, remaining_len - p_splitter.length());
1131
		}
1132

1133
		if (left_edge < 0) {
1134
			// no more splitters, we're done
1135
			ret.push_back(substr(0, remaining_len));
1136
			break;
1137
		}
1138

1139
		int substr_start = left_edge + p_splitter.length();
1140
		if (p_allow_empty || substr_start < remaining_len) {
1141
			ret.push_back(substr(substr_start, remaining_len - substr_start));
1142
		}
1143

1144
		remaining_len = left_edge;
1145
	}
1146

1147
	ret.reverse();
1148
	return ret;
1149
}
1150

1151
Vector<String> String::rsplit(const char *p_splitter, bool p_allow_empty, int p_maxsplit) const {
1152
	Vector<String> ret;
1153
	const int len = length();
1154
	const int splitter_length = strlen(p_splitter);
1155
	int remaining_len = len;
1156

1157
	while (true) {
1158
		if (remaining_len < splitter_length || (p_maxsplit > 0 && p_maxsplit == ret.size())) {
1159
			// no room for another splitter or hit max splits, push what's left and we're done
1160
			if (p_allow_empty || remaining_len > 0) {
1161
				ret.push_back(substr(0, remaining_len));
1162
			}
1163
			break;
1164
		}
1165

1166
		int left_edge;
1167
		if (p_splitter == nullptr || *p_splitter == '\0') {
1168
			left_edge = remaining_len - 1;
1169
			if (left_edge == 0) {
1170
				left_edge--; // Skip to the < 0 condition.
1171
			}
1172
		} else {
1173
			left_edge = rfind(p_splitter, remaining_len - splitter_length);
1174
		}
1175

1176
		if (left_edge < 0) {
1177
			// no more splitters, we're done
1178
			ret.push_back(substr(0, remaining_len));
1179
			break;
1180
		}
1181

1182
		int substr_start = left_edge + splitter_length;
1183
		if (p_allow_empty || substr_start < remaining_len) {
1184
			ret.push_back(substr(substr_start, remaining_len - substr_start));
1185
		}
1186

1187
		remaining_len = left_edge;
1188
	}
1189

1190
	ret.reverse();
1191
	return ret;
1192
}
1193

1194
Vector<double> String::split_floats(const String &p_splitter, bool p_allow_empty) const {
1195
	Vector<double> ret;
1196
	int from = 0;
1197
	int len = length();
1198

1199
	String buffer = *this;
1200
	while (true) {
1201
		int end = find(p_splitter, from);
1202
		if (end < 0) {
1203
			end = len;
1204
		}
1205
		if (p_allow_empty || (end > from)) {
1206
			buffer[end] = 0;
1207
			ret.push_back(String::to_float(&buffer.get_data()[from]));
1208
			buffer[end] = _cowdata.get(end);
1209
		}
1210

1211
		if (end == len) {
1212
			break;
1213
		}
1214

1215
		from = end + p_splitter.length();
1216
	}
1217

1218
	return ret;
1219
}
1220

1221
Vector<float> String::split_floats_mk(const Vector<String> &p_splitters, bool p_allow_empty) const {
1222
	Vector<float> ret;
1223
	int from = 0;
1224
	int len = length();
1225

1226
	String buffer = *this;
1227
	while (true) {
1228
		int idx = 0;
1229
		int end = findmk(p_splitters, from, &idx);
1230
		int spl_len = 1;
1231
		if (end < 0) {
1232
			end = len;
1233
		} else {
1234
			spl_len = p_splitters[idx].length();
1235
		}
1236

1237
		if (p_allow_empty || (end > from)) {
1238
			buffer[end] = 0;
1239
			ret.push_back(String::to_float(&buffer.get_data()[from]));
1240
			buffer[end] = _cowdata.get(end);
1241
		}
1242

1243
		if (end == len) {
1244
			break;
1245
		}
1246

1247
		from = end + spl_len;
1248
	}
1249

1250
	return ret;
1251
}
1252

1253
Vector<int> String::split_ints(const String &p_splitter, bool p_allow_empty) const {
1254
	Vector<int> ret;
1255
	int from = 0;
1256
	int len = length();
1257

1258
	while (true) {
1259
		int end = find(p_splitter, from);
1260
		if (end < 0) {
1261
			end = len;
1262
		}
1263
		if (p_allow_empty || (end > from)) {
1264
			ret.push_back(String::to_int(&get_data()[from], end - from));
1265
		}
1266

1267
		if (end == len) {
1268
			break;
1269
		}
1270

1271
		from = end + p_splitter.length();
1272
	}
1273

1274
	return ret;
1275
}
1276

1277
Vector<int> String::split_ints_mk(const Vector<String> &p_splitters, bool p_allow_empty) const {
1278
	Vector<int> ret;
1279
	int from = 0;
1280
	int len = length();
1281

1282
	while (true) {
1283
		int idx = 0;
1284
		int end = findmk(p_splitters, from, &idx);
1285
		int spl_len = 1;
1286
		if (end < 0) {
1287
			end = len;
1288
		} else {
1289
			spl_len = p_splitters[idx].length();
1290
		}
1291

1292
		if (p_allow_empty || (end > from)) {
1293
			ret.push_back(String::to_int(&get_data()[from], end - from));
1294
		}
1295

1296
		if (end == len) {
1297
			break;
1298
		}
1299

1300
		from = end + spl_len;
1301
	}
1302

1303
	return ret;
1304
}
1305

1306
String String::join(const Vector<String> &parts) const {
1307
	if (parts.is_empty()) {
1308
		return String();
1309
	} else if (parts.size() == 1) {
1310
		return parts[0];
1311
	}
1312

1313
	const int this_length = length();
1314

1315
	int new_size = (parts.size() - 1) * this_length;
1316
	for (const String &part : parts) {
1317
		new_size += part.length();
1318
	}
1319
	new_size += 1;
1320

1321
	String ret;
1322
	ret.resize_uninitialized(new_size);
1323
	char32_t *ret_ptrw = ret.ptrw();
1324
	const char32_t *this_ptr = ptr();
1325

1326
	bool first = true;
1327
	for (const String &part : parts) {
1328
		if (first) {
1329
			first = false;
1330
		} else if (this_length) {
1331
			memcpy(ret_ptrw, this_ptr, this_length * sizeof(char32_t));
1332
			ret_ptrw += this_length;
1333
		}
1334

1335
		const int part_length = part.length();
1336
		if (part_length) {
1337
			memcpy(ret_ptrw, part.ptr(), part_length * sizeof(char32_t));
1338
			ret_ptrw += part_length;
1339
		}
1340
	}
1341

1342
	*ret_ptrw = 0;
1343

1344
	return ret;
1345
}
1346

1347
char32_t String::char_uppercase(char32_t p_char) {
1348
	return _find_upper(p_char);
1349
}
1350

1351
char32_t String::char_lowercase(char32_t p_char) {
1352
	return _find_lower(p_char);
1353
}
1354

1355
String String::to_upper() const {
1356
	if (is_empty()) {
1357
		return *this;
1358
	}
1359

1360
	String upper;
1361
	upper.resize_uninitialized(size());
1362
	const char32_t *old_ptr = ptr();
1363
	char32_t *upper_ptrw = upper.ptrw();
1364

1365
	while (*old_ptr) {
1366
		*upper_ptrw++ = _find_upper(*old_ptr++);
1367
	}
1368

1369
	*upper_ptrw = 0;
1370

1371
	return upper;
1372
}
1373

1374
String String::to_lower() const {
1375
	if (is_empty()) {
1376
		return *this;
1377
	}
1378

1379
	String lower;
1380
	lower.resize_uninitialized(size());
1381
	const char32_t *old_ptr = ptr();
1382
	char32_t *lower_ptrw = lower.ptrw();
1383

1384
	while (*old_ptr) {
1385
		*lower_ptrw++ = _find_lower(*old_ptr++);
1386
	}
1387

1388
	*lower_ptrw = 0;
1389

1390
	return lower;
1391
}
1392

1393
String String::num(double p_num, int p_decimals) {
1394
	if (Math::is_nan(p_num)) {
1395
		return "nan";
1396
	}
1397

1398
	if (Math::is_inf(p_num)) {
1399
		if (std::signbit(p_num)) {
1400
			return "-inf";
1401
		} else {
1402
			return "inf";
1403
		}
1404
	}
1405

1406
	if (p_decimals < 0) {
1407
		p_decimals = 14;
1408
		const double abs_num = Math::abs(p_num);
1409
		if (abs_num > 10) {
1410
			// We want to align the digits to the above reasonable default, so we only
1411
			// need to subtract log10 for numbers with a positive power of ten.
1412
			p_decimals -= (int)std::floor(std::log10(abs_num));
1413
		}
1414
	}
1415
	if (p_decimals > MAX_DECIMALS) {
1416
		p_decimals = MAX_DECIMALS;
1417
	}
1418

1419
	char fmt[7];
1420
	fmt[0] = '%';
1421
	fmt[1] = '.';
1422

1423
	if (p_decimals < 0) {
1424
		fmt[1] = 'l';
1425
		fmt[2] = 'f';
1426
		fmt[3] = 0;
1427
	} else if (p_decimals < 10) {
1428
		fmt[2] = '0' + p_decimals;
1429
		fmt[3] = 'l';
1430
		fmt[4] = 'f';
1431
		fmt[5] = 0;
1432
	} else {
1433
		fmt[2] = '0' + (p_decimals / 10);
1434
		fmt[3] = '0' + (p_decimals % 10);
1435
		fmt[4] = 'l';
1436
		fmt[5] = 'f';
1437
		fmt[6] = 0;
1438
	}
1439
	// if we want to convert a double with as much decimal places as
1440
	// DBL_MAX or DBL_MIN then we would theoretically need a buffer of at least
1441
	// DBL_MAX_10_EXP + 2 for DBL_MAX and DBL_MAX_10_EXP + 4 for DBL_MIN.
1442
	// BUT those values where still giving me exceptions, so I tested from
1443
	// DBL_MAX_10_EXP + 10 incrementing one by one and DBL_MAX_10_EXP + 17 (325)
1444
	// was the first buffer size not to throw an exception
1445
	char buf[325];
1446

1447
#if defined(__GNUC__) || defined(_MSC_VER)
1448
	// PLEASE NOTE that, albeit vcrt online reference states that snprintf
1449
	// should safely truncate the output to the given buffer size, we have
1450
	// found a case where this is not true, so we should create a buffer
1451
	// as big as needed
1452
	snprintf(buf, 325, fmt, p_num);
1453
#else
1454
	sprintf(buf, fmt, p_num);
1455
#endif
1456

1457
	buf[324] = 0;
1458
	// Destroy trailing zeroes, except one after period.
1459
	{
1460
		bool period = false;
1461
		int z = 0;
1462
		while (buf[z]) {
1463
			if (buf[z] == '.') {
1464
				period = true;
1465
			}
1466
			z++;
1467
		}
1468

1469
		if (period) {
1470
			z--;
1471
			while (z > 0) {
1472
				if (buf[z] == '0') {
1473
					buf[z] = 0;
1474
				} else if (buf[z] == '.') {
1475
					buf[z + 1] = '0';
1476
					break;
1477
				} else {
1478
					break;
1479
				}
1480

1481
				z--;
1482
			}
1483
		}
1484
	}
1485

1486
	return buf;
1487
}
1488

1489
String String::num_int64(int64_t p_num, int base, bool capitalize_hex) {
1490
	ERR_FAIL_COND_V_MSG(base < 2 || base > 36, "", "Cannot convert to base " + itos(base) + ", since the value is " + (base < 2 ? "less than 2." : "greater than 36."));
1491

1492
	bool sign = p_num < 0;
1493

1494
	int64_t n = p_num;
1495

1496
	int chars = 0;
1497
	do {
1498
		n /= base;
1499
		chars++;
1500
	} while (n);
1501

1502
	if (sign) {
1503
		chars++;
1504
	}
1505
	String s;
1506
	s.resize_uninitialized(chars + 1);
1507
	char32_t *c = s.ptrw();
1508
	c[chars] = 0;
1509
	n = p_num;
1510
	do {
1511
		int mod = Math::abs(n % base);
1512
		if (mod >= 10) {
1513
			char a = (capitalize_hex ? 'A' : 'a');
1514
			c[--chars] = a + (mod - 10);
1515
		} else {
1516
			c[--chars] = '0' + mod;
1517
		}
1518

1519
		n /= base;
1520
	} while (n);
1521

1522
	if (sign) {
1523
		c[0] = '-';
1524
	}
1525

1526
	return s;
1527
}
1528

1529
String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
1530
	ERR_FAIL_COND_V_MSG(base < 2 || base > 36, "", "Cannot convert to base " + itos(base) + ", since the value is " + (base < 2 ? "less than 2." : "greater than 36."));
1531

1532
	uint64_t n = p_num;
1533

1534
	int chars = 0;
1535
	do {
1536
		n /= base;
1537
		chars++;
1538
	} while (n);
1539

1540
	String s;
1541
	s.resize_uninitialized(chars + 1);
1542
	char32_t *c = s.ptrw();
1543
	c[chars] = 0;
1544
	n = p_num;
1545
	do {
1546
		int mod = n % base;
1547
		if (mod >= 10) {
1548
			char a = (capitalize_hex ? 'A' : 'a');
1549
			c[--chars] = a + (mod - 10);
1550
		} else {
1551
			c[--chars] = '0' + mod;
1552
		}
1553

1554
		n /= base;
1555
	} while (n);
1556

1557
	return s;
1558
}
1559

1560
String String::num_real(double p_num, bool p_trailing) {
1561
	if (Math::is_nan(p_num) || Math::is_inf(p_num)) {
1562
		return num(p_num, 0);
1563
	}
1564

1565
	if (p_num == (double)(int64_t)p_num) {
1566
		if (p_trailing) {
1567
			return num_int64((int64_t)p_num) + ".0";
1568
		} else {
1569
			return num_int64((int64_t)p_num);
1570
		}
1571
	}
1572

1573
	int decimals = 14;
1574
	// We want to align the digits to the above sane default, so we only need
1575
	// to subtract log10 for numbers with a positive power of ten magnitude.
1576
	const double abs_num = Math::abs(p_num);
1577
	if (abs_num > 10) {
1578
		decimals -= (int)std::floor(std::log10(abs_num));
1579
	}
1580

1581
	return num(p_num, decimals);
1582
}
1583

1584
String String::num_real(float p_num, bool p_trailing) {
1585
	if (Math::is_nan(p_num) || Math::is_inf(p_num)) {
1586
		return num(p_num, 0);
1587
	}
1588

1589
	if (p_num == (float)(int64_t)p_num) {
1590
		if (p_trailing) {
1591
			return num_int64((int64_t)p_num) + ".0";
1592
		} else {
1593
			return num_int64((int64_t)p_num);
1594
		}
1595
	}
1596
	int decimals = 6;
1597
	// We want to align the digits to the above sane default, so we only need
1598
	// to subtract log10 for numbers with a positive power of ten magnitude.
1599
	const float abs_num = Math::abs(p_num);
1600
	if (abs_num > 10) {
1601
		decimals -= (int)std::floor(std::log10(abs_num));
1602
	}
1603
	return num(p_num, decimals);
1604
}
1605

1606
String String::num_scientific(double p_num) {
1607
	if (Math::is_nan(p_num) || Math::is_inf(p_num)) {
1608
		return num(p_num, 0);
1609
	}
1610
	char buffer[256];
1611
	char *last = grisu2::to_chars(buffer, p_num);
1612
	return String::ascii(Span(buffer, last - buffer));
1613
}
1614

1615
String String::num_scientific(float p_num) {
1616
	if (Math::is_nan(p_num) || Math::is_inf(p_num)) {
1617
		return num(p_num, 0);
1618
	}
1619
	char buffer[256];
1620
	char *last = grisu2::to_chars(buffer, p_num);
1621
	return String::ascii(Span(buffer, last - buffer));
1622
}
1623

1624
String String::md5(const uint8_t *p_md5) {
1625
	return String::hex_encode_buffer(p_md5, 16);
1626
}
1627

1628
String String::hex_encode_buffer(const uint8_t *p_buffer, int p_len) {
1629
	static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
1630

1631
	String ret;
1632
	ret.resize_uninitialized(p_len * 2 + 1);
1633
	char32_t *ret_ptrw = ret.ptrw();
1634

1635
	for (int i = 0; i < p_len; i++) {
1636
		*ret_ptrw++ = hex[p_buffer[i] >> 4];
1637
		*ret_ptrw++ = hex[p_buffer[i] & 0xF];
1638
	}
1639

1640
	*ret_ptrw = 0;
1641

1642
	return ret;
1643
}
1644

1645
Vector<uint8_t> String::hex_decode() const {
1646
	ERR_FAIL_COND_V_MSG(length() % 2 != 0, Vector<uint8_t>(), "Hexadecimal string of uneven length.");
1647

1648
#define HEX_TO_BYTE(m_output, m_index) \
1649
	uint8_t m_output; \
1650
	c = operator[](m_index); \
1651
	if (is_digit(c)) { \
1652
		m_output = c - '0'; \
1653
	} else if (c >= 'a' && c <= 'f') { \
1654
		m_output = c - 'a' + 10; \
1655
	} else if (c >= 'A' && c <= 'F') { \
1656
		m_output = c - 'A' + 10; \
1657
	} else { \
1658
		ERR_FAIL_V_MSG(Vector<uint8_t>(), "Invalid hexadecimal character \"" + chr(c) + "\" at index " + m_index + "."); \
1659
	}
1660

1661
	Vector<uint8_t> out;
1662
	int len = length() / 2;
1663
	out.resize_uninitialized(len);
1664
	uint8_t *out_ptrw = out.ptrw();
1665
	for (int i = 0; i < len; i++) {
1666
		char32_t c;
1667
		HEX_TO_BYTE(first, i * 2);
1668
		HEX_TO_BYTE(second, i * 2 + 1);
1669
		out_ptrw[i] = first * 16 + second;
1670
	}
1671
	return out;
1672
#undef HEX_TO_BYTE
1673
}
1674

1675
void String::print_unicode_error(const String &p_message, bool p_critical) const {
1676
	if (p_critical) {
1677
		print_error(vformat(U"Unicode parsing error, some characters were replaced with � (U+FFFD): %s", p_message));
1678
	} else {
1679
		print_error(vformat("Unicode parsing error: %s", p_message));
1680
	}
1681
}
1682

1683
CharString String::ascii(bool p_allow_extended) const {
1684
	if (!length()) {
1685
		return CharString();
1686
	}
1687

1688
	CharString cs;
1689
	cs.resize_uninitialized(size());
1690
	char *cs_ptrw = cs.ptrw();
1691
	const char32_t *this_ptr = ptr();
1692

1693
	for (int i = 0; i < size(); i++) {
1694
		char32_t c = this_ptr[i];
1695
		if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
1696
			cs_ptrw[i] = char(c);
1697
		} else {
1698
			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
1699
			cs_ptrw[i] = 0x20; // ASCII doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane.
1700
		}
1701
	}
1702

1703
	return cs;
1704
}
1705

1706
Error String::append_ascii(const Span<char> &p_range) {
1707
	if (p_range.is_empty()) {
1708
		return OK;
1709
	}
1710

1711
	const int prev_length = length();
1712
	resize_uninitialized(prev_length + p_range.size() + 1); // Include \0
1713

1714
	const char *src = p_range.ptr();
1715
	const char *end = src + p_range.size();
1716
	char32_t *dst = ptrw() + prev_length;
1717
	bool decode_failed = false;
1718

1719
	for (; src < end; ++src, ++dst) {
1720
		// If char is int8_t, a set sign bit will be reinterpreted as 256 - val implicitly.
1721
		const uint8_t chr = *src;
1722
		if (unlikely(chr == '\0')) {
1723
			// NUL in string is allowed by the unicode standard, but unsupported in our implementation right now.
1724
			print_unicode_error("Unexpected NUL character", true);
1725
			*dst = _replacement_char;
1726
		} else if (unlikely(chr > 127)) {
1727
			print_unicode_error(vformat("Invalid ASCII codepoint (%x)", (uint32_t)chr), true);
1728
			decode_failed = true;
1729
			*dst = _replacement_char;
1730
		} else {
1731
			*dst = chr;
1732
		}
1733
	}
1734
	*dst = _null;
1735
	return decode_failed ? ERR_INVALID_DATA : OK;
1736
}
1737

1738
Error String::append_utf8(const char *p_utf8, int p_len) {
1739
	if (!p_utf8) {
1740
		return ERR_INVALID_DATA;
1741
	}
1742

1743
	/* HANDLE BOM (Byte Order Mark) */
1744
	if (p_len < 0 || p_len >= 3) {
1745
		bool has_bom = uint8_t(p_utf8[0]) == 0xef && uint8_t(p_utf8[1]) == 0xbb && uint8_t(p_utf8[2]) == 0xbf;
1746
		if (has_bom) {
1747
			//8-bit encoding, byte order has no meaning in UTF-8, just skip it
1748
			if (p_len >= 0) {
1749
				p_len -= 3;
1750
			}
1751
			p_utf8 += 3;
1752
		}
1753
	}
1754

1755
	if (p_len < 0) {
1756
		p_len = strlen(p_utf8);
1757
	}
1758

1759
	const int prev_length = length();
1760
	// If all utf8 characters maps to ASCII, then the max size will be p_len, and we add +1 for the null termination.
1761
	resize_uninitialized(prev_length + p_len + 1);
1762
	char32_t *dst = ptrw() + prev_length;
1763

1764
	Error result = Error::OK;
1765

1766
	const uint8_t *ptrtmp = (uint8_t *)p_utf8;
1767
	const uint8_t *ptr_limit = (uint8_t *)p_utf8 + p_len;
1768

1769
	while (ptrtmp < ptr_limit && *ptrtmp) {
1770
		uint8_t c = *ptrtmp;
1771
		uint32_t unicode = _replacement_char;
1772
		uint32_t size = 1;
1773

1774
		if ((c & 0b10000000) == 0) {
1775
			unicode = c;
1776
			if (unicode > 0x7F) {
1777
				unicode = _replacement_char;
1778
				print_unicode_error(vformat("Invalid unicode codepoint (%d)", unicode), true);
1779
				result = Error::ERR_INVALID_DATA;
1780
			}
1781
		} else if ((c & 0b11100000) == 0b11000000) {
1782
			if (ptrtmp + 1 >= ptr_limit) {
1783
				print_unicode_error(vformat("Missing %x UTF-8 continuation byte", c), true);
1784
				result = Error::ERR_INVALID_DATA;
1785
			} else {
1786
				uint8_t c2 = *(ptrtmp + 1);
1787

1788
				if ((c2 & 0b11000000) == 0b10000000) {
1789
					unicode = (uint32_t)((c & 0b00011111) << 6) | (uint32_t)(c2 & 0b00111111);
1790

1791
					if (unicode < 0x80) {
1792
						unicode = _replacement_char;
1793
						print_unicode_error(vformat("Overlong encoding (%x %x)", c, c2));
1794
						result = Error::ERR_INVALID_DATA;
1795
					} else if (unicode > 0x7FF) {
1796
						unicode = _replacement_char;
1797
						print_unicode_error(vformat("Invalid unicode codepoint (%d)", unicode), true);
1798
						result = Error::ERR_INVALID_DATA;
1799
					} else {
1800
						size = 2;
1801
					}
1802
				} else {
1803
					print_unicode_error(vformat("Byte %x is not a correct continuation byte after %x", c2, c));
1804
					result = Error::ERR_INVALID_DATA;
1805
				}
1806
			}
1807
		} else if ((c & 0b11110000) == 0b11100000) {
1808
			uint32_t range_min = (c == 0xE0) ? 0xA0 : 0x80;
1809
			uint32_t range_max = (c == 0xED) ? 0x9F : 0xBF;
1810
			uint8_t c2 = (ptrtmp + 1) < ptr_limit ? *(ptrtmp + 1) : 0;
1811
			uint8_t c3 = (ptrtmp + 2) < ptr_limit ? *(ptrtmp + 2) : 0;
1812
			bool c2_valid = c2 && (c2 >= range_min) && (c2 <= range_max);
1813
			bool c3_valid = c3 && ((c3 & 0b11000000) == 0b10000000);
1814

1815
			if (c2_valid && c3_valid) {
1816
				unicode = (uint32_t)((c & 0b00001111) << 12) | (uint32_t)((c2 & 0b00111111) << 6) | (uint32_t)(c3 & 0b00111111);
1817

1818
				if (unicode < 0x800) {
1819
					unicode = _replacement_char;
1820
					print_unicode_error(vformat("Overlong encoding (%x %x %x)", c, c2, c3));
1821
					result = Error::ERR_INVALID_DATA;
1822
				} else if (unicode > 0xFFFF) {
1823
					unicode = _replacement_char;
1824
					print_unicode_error(vformat("Invalid unicode codepoint (%d)", unicode), true);
1825
					result = Error::ERR_INVALID_DATA;
1826
				} else {
1827
					size = 3;
1828
				}
1829
			} else {
1830
				if (c2 == 0) {
1831
					print_unicode_error(vformat("Missing %x UTF-8 continuation byte", c), true);
1832
				} else if (c2_valid == false) {
1833
					print_unicode_error(vformat("Byte %x is not a correct continuation byte after %x", c2, c));
1834
				} else if (c3 == 0) {
1835
					print_unicode_error(vformat("Missing %x %x UTF-8 continuation byte", c, c2), true);
1836
				} else {
1837
					print_unicode_error(vformat("Byte %x is not a correct continuation byte after %x %x", c3, c, c2));
1838
					// The unicode specification, in paragraphe 3.9 "Unicode Encoding Forms" Conformance
1839
					// state : "Only when a sequence of two or three bytes is a truncated version of a sequence which is
1840
					// otherwise well-formed to that point, is more than one byte replaced with a single U+FFFD"
1841
					// So here we replace the first 2 bytes with one single replacement_char.
1842
					size = 2;
1843
				}
1844

1845
				result = Error::ERR_INVALID_DATA;
1846
			}
1847
		} else if ((c & 0b11111000) == 0b11110000) {
1848
			uint32_t range_min = (c == 0xF0) ? 0x90 : 0x80;
1849
			uint32_t range_max = (c == 0xF4) ? 0x8F : 0xBF;
1850

1851
			uint8_t c2 = ((ptrtmp + 1) < ptr_limit) ? *(ptrtmp + 1) : 0;
1852
			uint8_t c3 = ((ptrtmp + 2) < ptr_limit) ? *(ptrtmp + 2) : 0;
1853
			uint8_t c4 = ((ptrtmp + 3) < ptr_limit) ? *(ptrtmp + 3) : 0;
1854

1855
			bool c2_valid = c2 && (c2 >= range_min) && (c2 <= range_max);
1856
			bool c3_valid = c3 && ((c3 & 0b11000000) == 0b10000000);
1857
			bool c4_valid = c4 && ((c4 & 0b11000000) == 0b10000000);
1858

1859
			if (c2_valid && c3_valid && c4_valid) {
1860
				unicode = (uint32_t)((c & 0b00000111) << 18) | (uint32_t)((c2 & 0b00111111) << 12) | (uint32_t)((c3 & 0b00111111) << 6) | (uint32_t)(c4 & 0b00111111);
1861

1862
				if (unicode < 0x10000) {
1863
					unicode = _replacement_char;
1864
					print_unicode_error(vformat("Overlong encoding (%x %x %x %x)", c, c2, c3, c4));
1865
					result = Error::ERR_INVALID_DATA;
1866
				} else if (unicode > 0x10FFFF) {
1867
					unicode = _replacement_char;
1868
					print_unicode_error(vformat("Invalid unicode codepoint (%d)", unicode), true);
1869
					result = Error::ERR_INVALID_DATA;
1870
				} else {
1871
					size = 4;
1872
				}
1873
			} else {
1874
				if (c2 == 0) {
1875
					print_unicode_error(vformat("Missing %x UTF-8 continuation byte", c), true);
1876
				} else if (c2_valid == false) {
1877
					print_unicode_error(vformat("Byte %x is not a correct continuation byte after %x", c2, c));
1878
				} else if (c3 == 0) {
1879
					print_unicode_error(vformat("Missing %x %x UTF-8 continuation byte", c, c2), true);
1880
				} else if (c3_valid == false) {
1881
					print_unicode_error(vformat("Byte %x is not a correct continuation byte after %x %x", c3, c, c2));
1882
					size = 2;
1883
				} else if (c4 == 0) {
1884
					print_unicode_error(vformat("Missing %x %x %x UTF-8 continuation byte", c, c2, c3), true);
1885
				} else {
1886
					print_unicode_error(vformat("Byte %x is not a correct continuation byte after %x %x %x", c4, c, c2, c3));
1887
					size = 3;
1888
				}
1889

1890
				result = Error::ERR_INVALID_DATA;
1891
			}
1892
		} else {
1893
			print_unicode_error(vformat("Invalid UTF-8 leading byte (%x)", c), true);
1894
			result = Error::ERR_INVALID_DATA;
1895
		}
1896

1897
		(*dst++) = unicode;
1898
		ptrtmp += size;
1899
	}
1900

1901
	(*dst++) = 0;
1902
	resize_uninitialized(dst - ptr());
1903

1904
	return result;
1905
}
1906

1907
CharString String::utf8(Vector<uint8_t> *r_ch_length_map) const {
1908
	int l = length();
1909
	if (!l) {
1910
		return CharString();
1911
	}
1912

1913
	uint8_t *map_ptr = nullptr;
1914
	if (r_ch_length_map) {
1915
		r_ch_length_map->resize_uninitialized(l);
1916
		map_ptr = r_ch_length_map->ptrw();
1917
	}
1918

1919
	const char32_t *d = &operator[](0);
1920
	int fl = 0;
1921
	for (int i = 0; i < l; i++) {
1922
		uint32_t c = d[i];
1923
		int ch_w = 1;
1924
		if (c <= 0x7f) { // 7 bits.
1925
			ch_w = 1;
1926
		} else if (c <= 0x7ff) { // 11 bits
1927
			ch_w = 2;
1928
		} else if (c <= 0xffff) { // 16 bits
1929
			ch_w = 3;
1930
		} else if (c <= 0x001fffff) { // 21 bits
1931
			ch_w = 4;
1932
		} else if (c <= 0x03ffffff) { // 26 bits
1933
			ch_w = 5;
1934
			print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
1935
		} else if (c <= 0x7fffffff) { // 31 bits
1936
			ch_w = 6;
1937
			print_unicode_error(vformat("Invalid unicode codepoint (%x)", c));
1938
		} else {
1939
			ch_w = 1;
1940
			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-8", c), true);
1941
		}
1942
		fl += ch_w;
1943
		if (map_ptr) {
1944
			map_ptr[i] = ch_w;
1945
		}
1946
	}
1947

1948
	CharString utf8s;
1949
	if (fl == 0) {
1950
		return utf8s;
1951
	}
1952

1953
	utf8s.resize_uninitialized(fl + 1);
1954
	uint8_t *cdst = (uint8_t *)utf8s.get_data();
1955

1956
#define APPEND_CHAR(m_c) *(cdst++) = m_c
1957

1958
	for (int i = 0; i < l; i++) {
1959
		uint32_t c = d[i];
1960

1961
		if (c <= 0x7f) { // 7 bits.
1962
			APPEND_CHAR(c);
1963
		} else if (c <= 0x7ff) { // 11 bits
1964
			APPEND_CHAR(uint32_t(0xc0 | ((c >> 6) & 0x1f))); // Top 5 bits.
1965
			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
1966
		} else if (c <= 0xffff) { // 16 bits
1967
			APPEND_CHAR(uint32_t(0xe0 | ((c >> 12) & 0x0f))); // Top 4 bits.
1968
			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Middle 6 bits.
1969
			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
1970
		} else if (c <= 0x001fffff) { // 21 bits
1971
			APPEND_CHAR(uint32_t(0xf0 | ((c >> 18) & 0x07))); // Top 3 bits.
1972
			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper middle 6 bits.
1973
			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
1974
			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
1975
		} else if (c <= 0x03ffffff) { // 26 bits
1976
			APPEND_CHAR(uint32_t(0xf8 | ((c >> 24) & 0x03))); // Top 2 bits.
1977
			APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Upper middle 6 bits.
1978
			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // middle 6 bits.
1979
			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower middle 6 bits.
1980
			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
1981
		} else if (c <= 0x7fffffff) { // 31 bits
1982
			APPEND_CHAR(uint32_t(0xfc | ((c >> 30) & 0x01))); // Top 1 bit.
1983
			APPEND_CHAR(uint32_t(0x80 | ((c >> 24) & 0x3f))); // Upper upper middle 6 bits.
1984
			APPEND_CHAR(uint32_t(0x80 | ((c >> 18) & 0x3f))); // Lower upper middle 6 bits.
1985
			APPEND_CHAR(uint32_t(0x80 | ((c >> 12) & 0x3f))); // Upper lower middle 6 bits.
1986
			APPEND_CHAR(uint32_t(0x80 | ((c >> 6) & 0x3f))); // Lower lower middle 6 bits.
1987
			APPEND_CHAR(uint32_t(0x80 | (c & 0x3f))); // Bottom 6 bits.
1988
		} else {
1989
			// the string is a valid UTF32, so it should never happen ...
1990
			print_unicode_error(vformat("Non scalar value (%x)", c), true);
1991
			APPEND_CHAR(uint32_t(0xe0 | ((_replacement_char >> 12) & 0x0f))); // Top 4 bits.
1992
			APPEND_CHAR(uint32_t(0x80 | ((_replacement_char >> 6) & 0x3f))); // Middle 6 bits.
1993
			APPEND_CHAR(uint32_t(0x80 | (_replacement_char & 0x3f))); // Bottom 6 bits.
1994
		}
1995
	}
1996
#undef APPEND_CHAR
1997
	*cdst = 0; //trailing zero
1998

1999
	return utf8s;
2000
}
2001

2002
Error String::append_utf16(const char16_t *p_utf16, int p_len, bool p_default_little_endian) {
2003
	if (!p_utf16) {
2004
		return ERR_INVALID_DATA;
2005
	}
2006

2007
	String aux;
2008

2009
	int cstr_size = 0;
2010
	int str_size = 0;
2011

2012
#ifdef BIG_ENDIAN_ENABLED
2013
	bool byteswap = p_default_little_endian;
2014
#else
2015
	bool byteswap = !p_default_little_endian;
2016
#endif
2017
	/* HANDLE BOM (Byte Order Mark) */
2018
	if (p_len < 0 || p_len >= 1) {
2019
		bool has_bom = false;
2020
		if (uint16_t(p_utf16[0]) == 0xfeff) { // correct BOM, read as is
2021
			has_bom = true;
2022
			byteswap = false;
2023
		} else if (uint16_t(p_utf16[0]) == 0xfffe) { // backwards BOM, swap bytes
2024
			has_bom = true;
2025
			byteswap = true;
2026
		}
2027
		if (has_bom) {
2028
			if (p_len >= 0) {
2029
				p_len -= 1;
2030
			}
2031
			p_utf16 += 1;
2032
		}
2033
	}
2034

2035
	bool decode_error = false;
2036
	{
2037
		const char16_t *ptrtmp = p_utf16;
2038
		const char16_t *ptrtmp_limit = p_len >= 0 ? &p_utf16[p_len] : nullptr;
2039
		uint32_t c_prev = 0;
2040
		bool skip = false;
2041
		while (ptrtmp != ptrtmp_limit && *ptrtmp) {
2042
			uint32_t c = (byteswap) ? BSWAP16(*ptrtmp) : *ptrtmp;
2043

2044
			if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
2045
				if (skip) {
2046
					print_unicode_error(vformat("Unpaired lead surrogate (%x [trail?] %x)", c_prev, c));
2047
					decode_error = true;
2048
				}
2049
				skip = true;
2050
			} else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
2051
				if (skip) {
2052
					str_size--;
2053
				} else {
2054
					print_unicode_error(vformat("Unpaired trail surrogate (%x [lead?] %x)", c_prev, c));
2055
					decode_error = true;
2056
				}
2057
				skip = false;
2058
			} else {
2059
				skip = false;
2060
			}
2061

2062
			c_prev = c;
2063
			str_size++;
2064
			cstr_size++;
2065
			ptrtmp++;
2066
		}
2067

2068
		if (skip) {
2069
			print_unicode_error(vformat("Unpaired lead surrogate (%x [eol])", c_prev));
2070
			decode_error = true;
2071
		}
2072
	}
2073

2074
	if (str_size == 0) {
2075
		clear();
2076
		return OK; // empty string
2077
	}
2078

2079
	const int prev_length = length();
2080
	resize_uninitialized(prev_length + str_size + 1);
2081
	char32_t *dst = ptrw() + prev_length;
2082
	dst[str_size] = 0;
2083

2084
	bool skip = false;
2085
	uint32_t c_prev = 0;
2086
	while (cstr_size) {
2087
		uint32_t c = (byteswap) ? BSWAP16(*p_utf16) : *p_utf16;
2088

2089
		if ((c & 0xfffffc00) == 0xd800) { // lead surrogate
2090
			if (skip) {
2091
				*(dst++) = c_prev; // unpaired, store as is
2092
			}
2093
			skip = true;
2094
		} else if ((c & 0xfffffc00) == 0xdc00) { // trail surrogate
2095
			if (skip) {
2096
				*(dst++) = (c_prev << 10UL) + c - ((0xd800 << 10UL) + 0xdc00 - 0x10000); // decode pair
2097
			} else {
2098
				*(dst++) = c; // unpaired, store as is
2099
			}
2100
			skip = false;
2101
		} else {
2102
			*(dst++) = c;
2103
			skip = false;
2104
		}
2105

2106
		cstr_size--;
2107
		p_utf16++;
2108
		c_prev = c;
2109
	}
2110

2111
	if (skip) {
2112
		*(dst++) = c_prev;
2113
	}
2114

2115
	if (decode_error) {
2116
		return ERR_PARSE_ERROR;
2117
	} else {
2118
		return OK;
2119
	}
2120
}
2121

2122
Char16String String::utf16() const {
2123
	int l = length();
2124
	if (!l) {
2125
		return Char16String();
2126
	}
2127

2128
	const char32_t *d = &operator[](0);
2129
	int fl = 0;
2130
	for (int i = 0; i < l; i++) {
2131
		uint32_t c = d[i];
2132
		if (c <= 0xffff) { // 16 bits.
2133
			fl += 1;
2134
			if ((c & 0xfffff800) == 0xd800) {
2135
				print_unicode_error(vformat("Unpaired surrogate (%x)", c));
2136
			}
2137
		} else if (c <= 0x10ffff) { // 32 bits.
2138
			fl += 2;
2139
		} else {
2140
			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as UTF-16", c), true);
2141
			fl += 1;
2142
		}
2143
	}
2144

2145
	Char16String utf16s;
2146
	if (fl == 0) {
2147
		return utf16s;
2148
	}
2149

2150
	utf16s.resize_uninitialized(fl + 1);
2151
	uint16_t *cdst = (uint16_t *)utf16s.get_data();
2152

2153
#define APPEND_CHAR(m_c) *(cdst++) = m_c
2154

2155
	for (int i = 0; i < l; i++) {
2156
		uint32_t c = d[i];
2157

2158
		if (c <= 0xffff) { // 16 bits.
2159
			APPEND_CHAR(c);
2160
		} else if (c <= 0x10ffff) { // 32 bits.
2161
			APPEND_CHAR(uint32_t((c >> 10) + 0xd7c0)); // lead surrogate.
2162
			APPEND_CHAR(uint32_t((c & 0x3ff) | 0xdc00)); // trail surrogate.
2163
		} else {
2164
			// the string is a valid UTF32, so it should never happen ...
2165
			APPEND_CHAR(uint32_t((_replacement_char >> 10) + 0xd7c0));
2166
			APPEND_CHAR(uint32_t((_replacement_char & 0x3ff) | 0xdc00));
2167
		}
2168
	}
2169
#undef APPEND_CHAR
2170
	*cdst = 0; //trailing zero
2171

2172
	return utf16s;
2173
}
2174

2175
int64_t String::hex_to_int() const {
2176
	int len = length();
2177
	if (len == 0) {
2178
		return 0;
2179
	}
2180

2181
	const char32_t *s = ptr();
2182

2183
	int64_t sign = s[0] == '-' ? -1 : 1;
2184

2185
	if (sign < 0) {
2186
		s++;
2187
	}
2188

2189
	if (len > 2 && s[0] == '0' && lower_case(s[1]) == 'x') {
2190
		s += 2;
2191
	}
2192

2193
	int64_t hex = 0;
2194

2195
	while (*s) {
2196
		char32_t c = lower_case(*s);
2197
		int64_t n;
2198
		if (is_digit(c)) {
2199
			n = c - '0';
2200
		} else if (c >= 'a' && c <= 'f') {
2201
			n = (c - 'a') + 10;
2202
		} else {
2203
			ERR_FAIL_V_MSG(0, vformat(R"(Invalid hexadecimal notation character "%c" (U+%04X) in string "%s".)", *s, static_cast<int32_t>(*s), *this));
2204
		}
2205
		// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
2206
		bool overflow = ((hex > INT64_MAX / 16) && (sign == 1 || (sign == -1 && hex != (INT64_MAX >> 4) + 1))) || (sign == -1 && hex == (INT64_MAX >> 4) + 1 && c > '0');
2207
		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
2208
		hex *= 16;
2209
		hex += n;
2210
		s++;
2211
	}
2212

2213
	return hex * sign;
2214
}
2215

2216
int64_t String::bin_to_int() const {
2217
	int len = length();
2218
	if (len == 0) {
2219
		return 0;
2220
	}
2221

2222
	const char32_t *s = ptr();
2223

2224
	int64_t sign = s[0] == '-' ? -1 : 1;
2225

2226
	if (sign < 0) {
2227
		s++;
2228
	}
2229

2230
	if (len > 2 && s[0] == '0' && lower_case(s[1]) == 'b') {
2231
		s += 2;
2232
	}
2233

2234
	int64_t binary = 0;
2235

2236
	while (*s) {
2237
		char32_t c = lower_case(*s);
2238
		int64_t n;
2239
		if (c == '0' || c == '1') {
2240
			n = c - '0';
2241
		} else {
2242
			return 0;
2243
		}
2244
		// Check for overflow/underflow, with special case to ensure INT64_MIN does not result in error
2245
		bool overflow = ((binary > INT64_MAX / 2) && (sign == 1 || (sign == -1 && binary != (INT64_MAX >> 1) + 1))) || (sign == -1 && binary == (INT64_MAX >> 1) + 1 && c > '0');
2246
		ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
2247
		binary *= 2;
2248
		binary += n;
2249
		s++;
2250
	}
2251

2252
	return binary * sign;
2253
}
2254

2255
template <typename C, typename T>
2256
_ALWAYS_INLINE_ int64_t _to_int(const T &p_in, int to) {
2257
	// Accumulate the total number in an unsigned integer as the range is:
2258
	// +9223372036854775807 to -9223372036854775808 and the smallest negative
2259
	// number does not fit inside an int64_t. So we accumulate the positive
2260
	// number in an unsigned, and then at the very end convert to its signed
2261
	// form.
2262
	uint64_t integer = 0;
2263
	uint8_t digits = 0;
2264
	bool positive = true;
2265

2266
	for (int i = 0; i < to; i++) {
2267
		C c = p_in[i];
2268
		if (is_digit(c)) {
2269
			// No need to do expensive checks unless we're approaching INT64_MAX / INT64_MIN.
2270
			if (unlikely(digits > 18)) {
2271
				bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((positive && c > '7') || (!positive && c > '8')));
2272
				ERR_FAIL_COND_V_MSG(overflow, positive ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_in) + " as a 64-bit signed integer, since the value is " + (positive ? "too large." : "too small."));
2273
			}
2274

2275
			integer *= 10;
2276
			integer += c - '0';
2277
			++digits;
2278

2279
		} else if (integer == 0 && c == '-') {
2280
			positive = !positive;
2281
		}
2282
	}
2283

2284
	if (positive) {
2285
		return int64_t(integer);
2286
	} else {
2287
		return int64_t(integer * uint64_t(-1));
2288
	}
2289
}
2290

2291
int64_t String::to_int() const {
2292
	if (length() == 0) {
2293
		return 0;
2294
	}
2295

2296
	int to = (find_char('.') >= 0) ? find_char('.') : length();
2297

2298
	return _to_int<char32_t>(*this, to);
2299
}
2300

2301
int64_t String::to_int(const char *p_str, int p_len) {
2302
	int to = 0;
2303
	if (p_len >= 0) {
2304
		to = p_len;
2305
	} else {
2306
		while (p_str[to] != 0 && p_str[to] != '.') {
2307
			to++;
2308
		}
2309
	}
2310

2311
	return _to_int<char>(p_str, to);
2312
}
2313

2314
int64_t String::to_int(const wchar_t *p_str, int p_len) {
2315
	int to = 0;
2316
	if (p_len >= 0) {
2317
		to = p_len;
2318
	} else {
2319
		while (p_str[to] != 0 && p_str[to] != '.') {
2320
			to++;
2321
		}
2322
	}
2323

2324
	return _to_int<wchar_t>(p_str, to);
2325
}
2326

2327
bool String::is_numeric() const {
2328
	if (length() == 0) {
2329
		return false;
2330
	}
2331

2332
	int s = 0;
2333
	if (operator[](0) == '-') {
2334
		++s;
2335
	}
2336
	bool dot = false;
2337
	for (int i = s; i < length(); i++) {
2338
		char32_t c = operator[](i);
2339
		if (c == '.') {
2340
			if (dot) {
2341
				return false;
2342
			}
2343
			dot = true;
2344
		} else if (!is_digit(c)) {
2345
			return false;
2346
		}
2347
	}
2348

2349
	return true; // TODO: Use the parser below for this instead
2350
}
2351

2352
template <typename C>
2353
static double built_in_strtod(
2354
		/* A decimal ASCII floating-point number,
2355
		 * optionally preceded by white space. Must
2356
		 * have form "-I.FE-X", where I is the integer
2357
		 * part of the mantissa, F is the fractional
2358
		 * part of the mantissa, and X is the
2359
		 * exponent. Either of the signs may be "+",
2360
		 * "-", or omitted. Either I or F may be
2361
		 * omitted, or both. The decimal point isn't
2362
		 * necessary unless F is present. The "E" may
2363
		 * actually be an "e". E and X may both be
2364
		 * omitted (but not just one). */
2365
		const C *string,
2366
		/* If non-nullptr, store terminating Cacter's
2367
		 * address here. */
2368
		C **endPtr = nullptr) {
2369
	/* Largest possible base 10 exponent. Any
2370
	 * exponent larger than this will already
2371
	 * produce underflow or overflow, so there's
2372
	 * no need to worry about additional digits. */
2373
	static const int maxExponent = 511;
2374
	/* Table giving binary powers of 10. Entry
2375
	 * is 10^2^i. Used to convert decimal
2376
	 * exponents into floating-point numbers. */
2377
	static const double powersOf10[] = {
2378
		10.,
2379
		100.,
2380
		1.0e4,
2381
		1.0e8,
2382
		1.0e16,
2383
		1.0e32,
2384
		1.0e64,
2385
		1.0e128,
2386
		1.0e256
2387
	};
2388

2389
	bool sign, expSign = false;
2390
	double fraction, dblExp;
2391
	const double *d;
2392
	const C *p;
2393
	int c;
2394
	/* Exponent read from "EX" field. */
2395
	int exp = 0;
2396
	/* Exponent that derives from the fractional
2397
	 * part. Under normal circumstances, it is
2398
	 * the negative of the number of digits in F.
2399
	 * However, if I is very long, the last digits
2400
	 * of I get dropped (otherwise a long I with a
2401
	 * large negative exponent could cause an
2402
	 * unnecessary overflow on I alone). In this
2403
	 * case, fracExp is incremented one for each
2404
	 * dropped digit. */
2405
	int fracExp = 0;
2406
	/* Number of digits in mantissa. */
2407
	int mantSize;
2408
	/* Number of mantissa digits BEFORE decimal point. */
2409
	int decPt;
2410
	/* Temporarily holds location of exponent in string. */
2411
	const C *pExp;
2412

2413
	/*
2414
	 * Strip off leading blanks and check for a sign.
2415
	 */
2416

2417
	p = string;
2418
	while (*p == ' ' || *p == '\t' || *p == '\n') {
2419
		p += 1;
2420
	}
2421
	if (*p == '-') {
2422
		sign = true;
2423
		p += 1;
2424
	} else {
2425
		if (*p == '+') {
2426
			p += 1;
2427
		}
2428
		sign = false;
2429
	}
2430

2431
	/*
2432
	 * Count the number of digits in the mantissa (including the decimal
2433
	 * point), and also locate the decimal point.
2434
	 */
2435

2436
	decPt = -1;
2437
	for (mantSize = 0;; mantSize += 1) {
2438
		c = *p;
2439
		if (!is_digit(c)) {
2440
			if ((c != '.') || (decPt >= 0)) {
2441
				break;
2442
			}
2443
			decPt = mantSize;
2444
		}
2445
		p += 1;
2446
	}
2447

2448
	/*
2449
	 * Now suck up the digits in the mantissa. Use two integers to collect 9
2450
	 * digits each (this is faster than using floating-point). If the mantissa
2451
	 * has more than 18 digits, ignore the extras, since they can't affect the
2452
	 * value anyway.
2453
	 */
2454

2455
	pExp = p;
2456
	p -= mantSize;
2457
	if (decPt < 0) {
2458
		decPt = mantSize;
2459
	} else {
2460
		mantSize -= 1; /* One of the digits was the point. */
2461
	}
2462
	if (mantSize > 18) {
2463
		fracExp = decPt - 18;
2464
		mantSize = 18;
2465
	} else {
2466
		fracExp = decPt - mantSize;
2467
	}
2468
	if (mantSize == 0) {
2469
		fraction = 0.0;
2470
		p = string;
2471
		goto done;
2472
	} else {
2473
		int frac1, frac2;
2474

2475
		frac1 = 0;
2476
		for (; mantSize > 9; mantSize -= 1) {
2477
			c = *p;
2478
			p += 1;
2479
			if (c == '.') {
2480
				c = *p;
2481
				p += 1;
2482
			}
2483
			frac1 = 10 * frac1 + (c - '0');
2484
		}
2485
		frac2 = 0;
2486
		for (; mantSize > 0; mantSize -= 1) {
2487
			c = *p;
2488
			p += 1;
2489
			if (c == '.') {
2490
				c = *p;
2491
				p += 1;
2492
			}
2493
			frac2 = 10 * frac2 + (c - '0');
2494
		}
2495
		fraction = (1.0e9 * frac1) + frac2;
2496
	}
2497

2498
	/*
2499
	 * Skim off the exponent.
2500
	 */
2501

2502
	p = pExp;
2503
	if ((*p == 'E') || (*p == 'e')) {
2504
		p += 1;
2505
		if (*p == '-') {
2506
			expSign = true;
2507
			p += 1;
2508
		} else {
2509
			if (*p == '+') {
2510
				p += 1;
2511
			}
2512
			expSign = false;
2513
		}
2514
		if (!is_digit(char32_t(*p))) {
2515
			p = pExp;
2516
			goto done;
2517
		}
2518
		while (is_digit(char32_t(*p))) {
2519
			exp = exp * 10 + (*p - '0');
2520
			p += 1;
2521
		}
2522
	}
2523
	if (expSign) {
2524
		exp = fracExp - exp;
2525
	} else {
2526
		exp = fracExp + exp;
2527
	}
2528

2529
	/*
2530
	 * Generate a floating-point number that represents the exponent. Do this
2531
	 * by processing the exponent one bit at a time to combine many powers of
2532
	 * 2 of 10. Then combine the exponent with the fraction.
2533
	 */
2534

2535
	if (exp < 0) {
2536
		expSign = true;
2537
		exp = -exp;
2538
	} else {
2539
		expSign = false;
2540
	}
2541

2542
	if (exp > maxExponent) {
2543
		exp = maxExponent;
2544
		WARN_PRINT("Exponent too high");
2545
	}
2546
	dblExp = 1.0;
2547
	for (d = powersOf10; exp != 0; exp >>= 1, ++d) {
2548
		if (exp & 01) {
2549
			dblExp *= *d;
2550
		}
2551
	}
2552
	if (expSign) {
2553
		fraction /= dblExp;
2554
	} else {
2555
		fraction *= dblExp;
2556
	}
2557

2558
done:
2559
	if (endPtr != nullptr) {
2560
		*endPtr = (C *)p;
2561
	}
2562

2563
	if (sign) {
2564
		return -fraction;
2565
	}
2566
	return fraction;
2567
}
2568

2569
#define READING_SIGN 0
2570
#define READING_INT 1
2571
#define READING_DEC 2
2572
#define READING_EXP 3
2573
#define READING_DONE 4
2574

2575
double String::to_float(const char *p_str) {
2576
	return built_in_strtod<char>(p_str);
2577
}
2578

2579
double String::to_float(const char32_t *p_str, const char32_t **r_end) {
2580
	return built_in_strtod<char32_t>(p_str, (char32_t **)r_end);
2581
}
2582

2583
double String::to_float(const wchar_t *p_str, const wchar_t **r_end) {
2584
	return built_in_strtod<wchar_t>(p_str, (wchar_t **)r_end);
2585
}
2586

2587
uint32_t String::num_characters(int64_t p_int) {
2588
	int r = 1;
2589
	if (p_int < 0) {
2590
		r += 1;
2591
		if (p_int == INT64_MIN) {
2592
			p_int = INT64_MAX;
2593
		} else {
2594
			p_int = -p_int;
2595
		}
2596
	}
2597
	while (p_int >= 10) {
2598
		p_int /= 10;
2599
		r++;
2600
	}
2601
	return r;
2602
}
2603

2604
int64_t String::to_int(const char32_t *p_str, int p_len, bool p_clamp) {
2605
	if (p_len == 0 || !p_str[0]) {
2606
		return 0;
2607
	}
2608
	///@todo make more exact so saving and loading does not lose precision
2609

2610
	int64_t integer = 0;
2611
	int64_t sign = 1;
2612
	int reading = READING_SIGN;
2613

2614
	const char32_t *str = p_str;
2615
	const char32_t *limit = &p_str[p_len];
2616

2617
	while (*str && reading != READING_DONE && str != limit) {
2618
		char32_t c = *(str++);
2619
		switch (reading) {
2620
			case READING_SIGN: {
2621
				if (is_digit(c)) {
2622
					reading = READING_INT;
2623
					// let it fallthrough
2624
				} else if (c == '-') {
2625
					sign = -1;
2626
					reading = READING_INT;
2627
					break;
2628
				} else if (c == '+') {
2629
					sign = 1;
2630
					reading = READING_INT;
2631
					break;
2632
				} else {
2633
					break;
2634
				}
2635
				[[fallthrough]];
2636
			}
2637
			case READING_INT: {
2638
				if (is_digit(c)) {
2639
					if (integer > INT64_MAX / 10) {
2640
						String number("");
2641
						str = p_str;
2642
						while (*str && str != limit) {
2643
							number += *(str++);
2644
						}
2645
						if (p_clamp) {
2646
							if (sign == 1) {
2647
								return INT64_MAX;
2648
							} else {
2649
								return INT64_MIN;
2650
							}
2651
						} else {
2652
							ERR_FAIL_V_MSG(sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + number + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
2653
						}
2654
					}
2655
					integer *= 10;
2656
					integer += c - '0';
2657
				} else {
2658
					reading = READING_DONE;
2659
				}
2660

2661
			} break;
2662
		}
2663
	}
2664

2665
	return sign * integer;
2666
}
2667

2668
double String::to_float() const {
2669
	if (is_empty()) {
2670
		return 0;
2671
	}
2672
	return built_in_strtod<char32_t>(get_data());
2673
}
2674

2675
uint32_t String::hash(const char *p_cstr) {
2676
	// static_cast: avoid negative values on platforms where char is signed.
2677
	uint32_t hashv = 5381;
2678
	uint32_t c = static_cast<uint8_t>(*p_cstr++);
2679

2680
	while (c) {
2681
		hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2682
		c = static_cast<uint8_t>(*p_cstr++);
2683
	}
2684

2685
	return hashv;
2686
}
2687

2688
uint32_t String::hash(const char *p_cstr, int p_len) {
2689
	uint32_t hashv = 5381;
2690
	for (int i = 0; i < p_len; i++) {
2691
		// static_cast: avoid negative values on platforms where char is signed.
2692
		hashv = ((hashv << 5) + hashv) + static_cast<uint8_t>(p_cstr[i]); /* hash * 33 + c */
2693
	}
2694

2695
	return hashv;
2696
}
2697

2698
uint32_t String::hash(const wchar_t *p_cstr, int p_len) {
2699
	// Avoid negative values on platforms where wchar_t is signed. Account for different sizes.
2700
	using wide_unsigned = std::conditional<sizeof(wchar_t) == 2, uint16_t, uint32_t>::type;
2701

2702
	uint32_t hashv = 5381;
2703
	for (int i = 0; i < p_len; i++) {
2704
		hashv = ((hashv << 5) + hashv) + static_cast<wide_unsigned>(p_cstr[i]); /* hash * 33 + c */
2705
	}
2706

2707
	return hashv;
2708
}
2709

2710
uint32_t String::hash(const wchar_t *p_cstr) {
2711
	// Avoid negative values on platforms where wchar_t is signed. Account for different sizes.
2712
	using wide_unsigned = std::conditional<sizeof(wchar_t) == 2, uint16_t, uint32_t>::type;
2713

2714
	uint32_t hashv = 5381;
2715
	uint32_t c = static_cast<wide_unsigned>(*p_cstr++);
2716

2717
	while (c) {
2718
		hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2719
		c = static_cast<wide_unsigned>(*p_cstr++);
2720
	}
2721

2722
	return hashv;
2723
}
2724

2725
uint32_t String::hash(const char32_t *p_cstr, int p_len) {
2726
	uint32_t hashv = 5381;
2727
	for (int i = 0; i < p_len; i++) {
2728
		hashv = ((hashv << 5) + hashv) + p_cstr[i]; /* hash * 33 + c */
2729
	}
2730

2731
	return hashv;
2732
}
2733

2734
uint32_t String::hash(const char32_t *p_cstr) {
2735
	uint32_t hashv = 5381;
2736
	uint32_t c = *p_cstr++;
2737

2738
	while (c) {
2739
		hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2740
		c = *p_cstr++;
2741
	}
2742

2743
	return hashv;
2744
}
2745

2746
uint32_t String::hash() const {
2747
	/* simple djb2 hashing */
2748

2749
	const char32_t *chr = get_data();
2750
	uint32_t hashv = 5381;
2751
	uint32_t c = *chr++;
2752

2753
	while (c) {
2754
		hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2755
		c = *chr++;
2756
	}
2757

2758
	return hashv;
2759
}
2760

2761
uint64_t String::hash64() const {
2762
	/* simple djb2 hashing */
2763

2764
	const char32_t *chr = get_data();
2765
	uint64_t hashv = 5381;
2766
	uint64_t c = *chr++;
2767

2768
	while (c) {
2769
		hashv = ((hashv << 5) + hashv) + c; /* hash * 33 + c */
2770
		c = *chr++;
2771
	}
2772

2773
	return hashv;
2774
}
2775

2776
String String::md5_text() const {
2777
	CharString cs = utf8();
2778
	unsigned char hash[16];
2779
	CryptoCore::md5((unsigned char *)cs.ptr(), cs.length(), hash);
2780
	return String::hex_encode_buffer(hash, 16);
2781
}
2782

2783
String String::sha1_text() const {
2784
	CharString cs = utf8();
2785
	unsigned char hash[20];
2786
	CryptoCore::sha1((unsigned char *)cs.ptr(), cs.length(), hash);
2787
	return String::hex_encode_buffer(hash, 20);
2788
}
2789

2790
String String::sha256_text() const {
2791
	CharString cs = utf8();
2792
	unsigned char hash[32];
2793
	CryptoCore::sha256((unsigned char *)cs.ptr(), cs.length(), hash);
2794
	return String::hex_encode_buffer(hash, 32);
2795
}
2796

2797
Vector<uint8_t> String::md5_buffer() const {
2798
	CharString cs = utf8();
2799
	unsigned char hash[16];
2800
	CryptoCore::md5((unsigned char *)cs.ptr(), cs.length(), hash);
2801

2802
	Vector<uint8_t> ret;
2803
	ret.resize_uninitialized(16);
2804
	uint8_t *ret_ptrw = ret.ptrw();
2805
	for (int i = 0; i < 16; i++) {
2806
		ret_ptrw[i] = hash[i];
2807
	}
2808
	return ret;
2809
}
2810

2811
Vector<uint8_t> String::sha1_buffer() const {
2812
	CharString cs = utf8();
2813
	unsigned char hash[20];
2814
	CryptoCore::sha1((unsigned char *)cs.ptr(), cs.length(), hash);
2815

2816
	Vector<uint8_t> ret;
2817
	ret.resize_uninitialized(20);
2818
	uint8_t *ret_ptrw = ret.ptrw();
2819
	for (int i = 0; i < 20; i++) {
2820
		ret_ptrw[i] = hash[i];
2821
	}
2822

2823
	return ret;
2824
}
2825

2826
Vector<uint8_t> String::sha256_buffer() const {
2827
	CharString cs = utf8();
2828
	unsigned char hash[32];
2829
	CryptoCore::sha256((unsigned char *)cs.ptr(), cs.length(), hash);
2830

2831
	Vector<uint8_t> ret;
2832
	ret.resize_uninitialized(32);
2833
	uint8_t *ret_ptrw = ret.ptrw();
2834
	for (int i = 0; i < 32; i++) {
2835
		ret_ptrw[i] = hash[i];
2836
	}
2837
	return ret;
2838
}
2839

2840
String String::insert(int p_at_pos, const String &p_string) const {
2841
	if (p_string.is_empty() || p_at_pos < 0) {
2842
		return *this;
2843
	}
2844

2845
	if (p_at_pos > length()) {
2846
		p_at_pos = length();
2847
	}
2848

2849
	String ret;
2850
	ret.resize_uninitialized(length() + p_string.length() + 1);
2851
	char32_t *ret_ptrw = ret.ptrw();
2852
	const char32_t *this_ptr = ptr();
2853

2854
	if (p_at_pos > 0) {
2855
		memcpy(ret_ptrw, this_ptr, p_at_pos * sizeof(char32_t));
2856
		ret_ptrw += p_at_pos;
2857
	}
2858

2859
	memcpy(ret_ptrw, p_string.ptr(), p_string.length() * sizeof(char32_t));
2860
	ret_ptrw += p_string.length();
2861

2862
	if (p_at_pos < length()) {
2863
		memcpy(ret_ptrw, this_ptr + p_at_pos, (length() - p_at_pos) * sizeof(char32_t));
2864
		ret_ptrw += length() - p_at_pos;
2865
	}
2866

2867
	*ret_ptrw = 0;
2868

2869
	return ret;
2870
}
2871

2872
String String::erase(int p_pos, int p_chars) const {
2873
	ERR_FAIL_COND_V_MSG(p_pos < 0, "", vformat("Invalid starting position for `String.erase()`: %d. Starting position must be positive or zero.", p_pos));
2874
	ERR_FAIL_COND_V_MSG(p_chars < 0, "", vformat("Invalid character count for `String.erase()`: %d. Character count must be positive or zero.", p_chars));
2875
	return left(p_pos) + substr(p_pos + p_chars);
2876
}
2877

2878
template <class T>
2879
static bool _contains_char(char32_t p_c, const T *p_chars, int p_chars_len) {
2880
	for (int i = 0; i < p_chars_len; ++i) {
2881
		if (p_c == (char32_t)p_chars[i]) {
2882
			return true;
2883
		}
2884
	}
2885

2886
	return false;
2887
}
2888

2889
String String::remove_char(char32_t p_char) const {
2890
	if (p_char == 0) {
2891
		return *this;
2892
	}
2893

2894
	int len = length();
2895
	if (len == 0) {
2896
		return *this;
2897
	}
2898

2899
	int index = 0;
2900
	const char32_t *old_ptr = ptr();
2901
	for (; index < len; ++index) {
2902
		if (old_ptr[index] == p_char) {
2903
			break;
2904
		}
2905
	}
2906

2907
	// If no occurrence of `char` was found, return this.
2908
	if (index == len) {
2909
		return *this;
2910
	}
2911

2912
	// If we found at least one occurrence of `char`, create new string, allocating enough space for the current length minus one.
2913
	String new_string;
2914
	new_string.resize_uninitialized(len);
2915
	char32_t *new_ptr = new_string.ptrw();
2916

2917
	// Copy part of input before `char`.
2918
	memcpy(new_ptr, old_ptr, index * sizeof(char32_t));
2919

2920
	int new_size = index;
2921

2922
	// Copy rest, skipping `char`.
2923
	for (++index; index < len; ++index) {
2924
		const char32_t old_char = old_ptr[index];
2925
		if (old_char != p_char) {
2926
			new_ptr[new_size] = old_char;
2927
			++new_size;
2928
		}
2929
	}
2930

2931
	new_ptr[new_size] = _null;
2932

2933
	// Shrink new string to fit.
2934
	new_string.resize_uninitialized(new_size + 1);
2935

2936
	return new_string;
2937
}
2938

2939
template <class T>
2940
static String _remove_chars_common(const String &p_this, const T *p_chars, int p_chars_len) {
2941
	// Delegate if p_chars has a single element.
2942
	if (p_chars_len == 1) {
2943
		return p_this.remove_char(*p_chars);
2944
	} else if (p_chars_len == 0) {
2945
		return p_this;
2946
	}
2947

2948
	int len = p_this.length();
2949

2950
	if (len == 0) {
2951
		return p_this;
2952
	}
2953

2954
	int index = 0;
2955
	const char32_t *old_ptr = p_this.ptr();
2956
	for (; index < len; ++index) {
2957
		if (_contains_char(old_ptr[index], p_chars, p_chars_len)) {
2958
			break;
2959
		}
2960
	}
2961

2962
	// If no occurrence of `chars` was found, return this.
2963
	if (index == len) {
2964
		return p_this;
2965
	}
2966

2967
	// If we found at least one occurrence of `chars`, create new string, allocating enough space for the current length minus one.
2968
	String new_string;
2969
	new_string.resize_uninitialized(len);
2970
	char32_t *new_ptr = new_string.ptrw();
2971

2972
	// Copy part of input before `char`.
2973
	memcpy(new_ptr, old_ptr, index * sizeof(char32_t));
2974

2975
	int new_size = index;
2976

2977
	// Copy rest, skipping `chars`.
2978
	for (++index; index < len; ++index) {
2979
		const char32_t old_char = old_ptr[index];
2980
		if (!_contains_char(old_char, p_chars, p_chars_len)) {
2981
			new_ptr[new_size] = old_char;
2982
			++new_size;
2983
		}
2984
	}
2985

2986
	new_ptr[new_size] = 0;
2987

2988
	// Shrink new string to fit.
2989
	new_string.resize_uninitialized(new_size + 1);
2990

2991
	return new_string;
2992
}
2993

2994
String String::remove_chars(const String &p_chars) const {
2995
	return _remove_chars_common(*this, p_chars.ptr(), p_chars.length());
2996
}
2997

2998
String String::remove_chars(const char *p_chars) const {
2999
	return _remove_chars_common(*this, p_chars, strlen(p_chars));
3000
}
3001

3002
String String::substr(int p_from, int p_chars) const {
3003
	if (p_chars == -1) {
3004
		p_chars = length() - p_from;
3005
	}
3006

3007
	if (is_empty() || p_from < 0 || p_from >= length() || p_chars <= 0) {
3008
		return "";
3009
	}
3010

3011
	if ((p_from + p_chars) > length()) {
3012
		p_chars = length() - p_from;
3013
	}
3014

3015
	if (p_from == 0 && p_chars >= length()) {
3016
		return String(*this);
3017
	}
3018

3019
	String s;
3020
	s.append_utf32_unchecked(Span(ptr() + p_from, p_chars));
3021
	return s;
3022
}
3023

3024
int String::find(const String &p_str, int p_from) const {
3025
	const int str_len = p_str.length();
3026
	const int len = length();
3027

3028
	if (p_from < 0) {
3029
		p_from = len - str_len + p_from + 1;
3030
	}
3031
	if (p_from < 0 || p_from > len - str_len || p_str.is_empty()) {
3032
		return -1; // Still out of bounds
3033
	}
3034

3035
	if (p_str.length() == 1) {
3036
		// Optimize with single-char implementation.
3037
		return span().find(p_str[0], p_from);
3038
	}
3039

3040
	return span().find_sequence(p_str.span(), p_from);
3041
}
3042

3043
int String::find(const char *p_str, int p_from) const {
3044
	const int str_len = strlen(p_str);
3045
	const int len = length();
3046

3047
	if (p_from < 0) {
3048
		p_from = len - str_len + p_from + 1;
3049
	}
3050
	if (p_from < 0 || p_from > len - str_len || str_len == 0) {
3051
		return -1; // Still out of bounds
3052
	}
3053

3054
	if (str_len == 1) {
3055
		return find_char(*p_str, p_from); // Optimize with single-char find.
3056
	}
3057

3058
	return span().find_sequence(Span((const unsigned char *)p_str, str_len), p_from);
3059
}
3060

3061
int String::find_char(char32_t p_char, int p_from) const {
3062
	if (p_from < 0) {
3063
		p_from = length() + p_from;
3064
	}
3065
	if (p_from < 0 || p_from >= length()) {
3066
		return -1;
3067
	}
3068
	return span().find(p_char, p_from);
3069
}
3070

3071
int String::findmk(const Vector<String> &p_keys, int p_from, int *r_key) const {
3072
	if (p_from < 0) {
3073
		return -1;
3074
	}
3075
	if (p_keys.is_empty()) {
3076
		return -1;
3077
	}
3078

3079
	//int str_len=p_str.length();
3080
	const String *keys = &p_keys[0];
3081
	int key_count = p_keys.size();
3082
	int len = length();
3083

3084
	if (len == 0) {
3085
		return -1; // won't find anything!
3086
	}
3087

3088
	const char32_t *src = get_data();
3089

3090
	for (int i = p_from; i < len; i++) {
3091
		for (int k = 0; k < key_count; k++) {
3092
			const int str_len = keys[k].length();
3093

3094
			if (i + str_len > len) {
3095
				continue; // Can't find this key here.
3096
			}
3097

3098
			const char32_t *str = keys[k].get_data();
3099
			if (are_spans_equal(src + i, str, str_len)) {
3100
				if (r_key) {
3101
					*r_key = k;
3102
				}
3103
				return i;
3104
			}
3105
		}
3106
	}
3107

3108
	return -1;
3109
}
3110

3111
int String::findn(const String &p_str, int p_from) const {
3112
	const int str_len = p_str.length();
3113
	const int len = length();
3114

3115
	if (p_from < 0) {
3116
		p_from = len - str_len + p_from + 1;
3117
	}
3118
	if (p_from < 0 || p_from > len - str_len || p_str.is_empty()) {
3119
		return -1; // Still out of bounds
3120
	}
3121

3122
	const char32_t *src = get_data();
3123
	const char32_t *str = p_str.get_data();
3124

3125
	for (int i = p_from; i <= (len - str_len); i++) {
3126
		if (strings_equal_lower(src + i, str, str_len)) {
3127
			return i;
3128
		}
3129
	}
3130

3131
	return -1;
3132
}
3133

3134
int String::findn(const char *p_str, int p_from) const {
3135
	const int str_len = strlen(p_str);
3136
	const int len = length();
3137

3138
	if (p_from < 0) {
3139
		p_from = len - str_len + p_from + 1;
3140
	}
3141
	if (p_from < 0 || p_from > len - str_len || str_len == 0) {
3142
		return -1; // Still out of bounds
3143
	}
3144

3145
	const char32_t *src = get_data();
3146

3147
	for (int i = p_from; i <= (len - str_len); i++) {
3148
		if (strings_equal_lower(src + i, p_str, str_len)) {
3149
			return i;
3150
		}
3151
	}
3152

3153
	return -1;
3154
}
3155

3156
int String::rfind(const String &p_str, int p_from) const {
3157
	const int str_len = p_str.length();
3158
	const int len = length();
3159

3160
	if (p_from < 0) {
3161
		p_from = len - str_len + p_from + 1;
3162
	}
3163
	if (p_from < 0 || p_from > len - str_len || p_str.is_empty()) {
3164
		return -1; // Still out of bounds
3165
	}
3166

3167
	if (p_str.length() == 1) {
3168
		// Optimize with single-char implementation.
3169
		return span().rfind(p_str[0], p_from);
3170
	}
3171

3172
	return span().rfind_sequence(p_str.span(), p_from);
3173
}
3174

3175
int String::rfind(const char *p_str, int p_from) const {
3176
	const int str_len = strlen(p_str);
3177
	const int len = length();
3178

3179
	if (p_from < 0) {
3180
		p_from = len - str_len + p_from + 1;
3181
	}
3182
	if (p_from < 0 || p_from > len - str_len || str_len == 0) {
3183
		return -1; // Still out of bounds
3184
	}
3185

3186
	if (str_len == 1) {
3187
		// Optimize with single-char implementation.
3188
		return span().rfind(p_str[0], p_from);
3189
	}
3190

3191
	return span().rfind_sequence(Span((const unsigned char *)p_str, str_len), p_from);
3192
}
3193

3194
int String::rfind_char(char32_t p_char, int p_from) const {
3195
	if (p_from < 0) {
3196
		p_from = length() + p_from;
3197
	}
3198
	if (p_from < 0 || p_from >= length()) {
3199
		return -1;
3200
	}
3201
	return span().rfind(p_char, p_from);
3202
}
3203

3204
int String::rfindn(const String &p_str, int p_from) const {
3205
	const int str_len = p_str.length();
3206
	const int len = length();
3207

3208
	if (p_from < 0) {
3209
		p_from = len - str_len + p_from + 1;
3210
	}
3211
	if (p_from < 0 || p_from > len - str_len || p_str.is_empty()) {
3212
		return -1; // Still out of bounds
3213
	}
3214

3215
	const char32_t *src = get_data();
3216
	const char32_t *str = p_str.get_data();
3217

3218
	for (int i = p_from; i >= 0; i--) {
3219
		if (strings_equal_lower(src + i, str, str_len)) {
3220
			return i;
3221
		}
3222
	}
3223

3224
	return -1;
3225
}
3226

3227
int String::rfindn(const char *p_str, int p_from) const {
3228
	const int str_len = strlen(p_str);
3229
	const int len = length();
3230

3231
	if (p_from < 0) {
3232
		p_from = len - str_len + p_from + 1;
3233
	}
3234
	if (p_from < 0 || p_from > len - str_len || str_len == 0) {
3235
		return -1; // Still out of bounds
3236
	}
3237

3238
	const char32_t *src = get_data();
3239

3240
	for (int i = p_from; i >= 0; i--) {
3241
		if (strings_equal_lower(src + i, p_str, str_len)) {
3242
			return i;
3243
		}
3244
	}
3245

3246
	return -1;
3247
}
3248

3249
bool String::ends_with(const String &p_string) const {
3250
	const int l = p_string.length();
3251
	if (l > length()) {
3252
		return false;
3253
	}
3254
	if (l == 0) {
3255
		return true;
3256
	}
3257

3258
	return memcmp(ptr() + (length() - l), p_string.ptr(), l * sizeof(char32_t)) == 0;
3259
}
3260

3261
bool String::ends_with(const char *p_string) const {
3262
	if (!p_string) {
3263
		return false;
3264
	}
3265

3266
	int l = strlen(p_string);
3267
	if (l > length()) {
3268
		return false;
3269
	}
3270

3271
	if (l == 0) {
3272
		return true;
3273
	}
3274

3275
	const char32_t *s = &operator[](length() - l);
3276

3277
	for (int i = 0; i < l; i++) {
3278
		if (static_cast<char32_t>(p_string[i]) != s[i]) {
3279
			return false;
3280
		}
3281
	}
3282

3283
	return true;
3284
}
3285

3286
bool String::begins_with(const String &p_string) const {
3287
	const int l = p_string.length();
3288
	if (l > length()) {
3289
		return false;
3290
	}
3291
	if (l == 0) {
3292
		return true;
3293
	}
3294

3295
	return memcmp(ptr(), p_string.ptr(), l * sizeof(char32_t)) == 0;
3296
}
3297

3298
bool String::begins_with(const char *p_string) const {
3299
	if (!p_string) {
3300
		return false;
3301
	}
3302

3303
	int l = length();
3304
	if (l == 0) {
3305
		return *p_string == 0;
3306
	}
3307

3308
	const char32_t *str = &operator[](0);
3309
	int i = 0;
3310

3311
	while (*p_string && i < l) {
3312
		if ((char32_t)*p_string != str[i]) {
3313
			return false;
3314
		}
3315
		i++;
3316
		p_string++;
3317
	}
3318

3319
	return *p_string == 0;
3320
}
3321

3322
bool String::is_enclosed_in(const String &p_string) const {
3323
	return begins_with(p_string) && ends_with(p_string);
3324
}
3325

3326
bool String::is_subsequence_of(const String &p_string) const {
3327
	return _base_is_subsequence_of(p_string, false);
3328
}
3329

3330
bool String::is_subsequence_ofn(const String &p_string) const {
3331
	return _base_is_subsequence_of(p_string, true);
3332
}
3333

3334
bool String::is_quoted() const {
3335
	return is_enclosed_in("\"") || is_enclosed_in("'");
3336
}
3337

3338
bool String::is_lowercase() const {
3339
	for (const char32_t *str = &operator[](0); *str; str++) {
3340
		if (is_unicode_upper_case(*str)) {
3341
			return false;
3342
		}
3343
	}
3344
	return true;
3345
}
3346

3347
int String::_count(const String &p_string, int p_from, int p_to, bool p_case_insensitive) const {
3348
	if (p_string.is_empty()) {
3349
		return 0;
3350
	}
3351
	int len = length();
3352
	int slen = p_string.length();
3353
	if (len < slen) {
3354
		return 0;
3355
	}
3356
	String str;
3357
	if (p_from >= 0 && p_to >= 0) {
3358
		if (p_to == 0) {
3359
			p_to = len;
3360
		} else if (p_from >= p_to) {
3361
			return 0;
3362
		}
3363
		if (p_from == 0 && p_to == len) {
3364
			str = *this;
3365
		} else {
3366
			str = substr(p_from, p_to - p_from);
3367
		}
3368
	} else {
3369
		return 0;
3370
	}
3371
	int c = 0;
3372
	int idx = 0;
3373
	while ((idx = p_case_insensitive ? str.findn(p_string, idx) : str.find(p_string, idx)) != -1) {
3374
		// Skip the occurrence itself.
3375
		idx += slen;
3376
		++c;
3377
	}
3378
	return c;
3379
}
3380

3381
int String::_count(const char *p_string, int p_from, int p_to, bool p_case_insensitive) const {
3382
	int substring_length = strlen(p_string);
3383
	if (substring_length == 0) {
3384
		return 0;
3385
	}
3386
	const int source_length = length();
3387

3388
	if (source_length < substring_length) {
3389
		return 0;
3390
	}
3391
	String str;
3392
	int search_limit = p_to;
3393
	if (p_from >= 0 && p_to >= 0) {
3394
		if (p_to == 0) {
3395
			search_limit = source_length;
3396
		} else if (p_from >= p_to) {
3397
			return 0;
3398
		}
3399
		if (p_from == 0 && search_limit == source_length) {
3400
			str = *this;
3401
		} else {
3402
			str = substr(p_from, search_limit - p_from);
3403
		}
3404
	} else {
3405
		return 0;
3406
	}
3407
	int c = 0;
3408
	int idx = 0;
3409
	while ((idx = p_case_insensitive ? str.findn(p_string, idx) : str.find(p_string, idx)) != -1) {
3410
		// Skip the occurrence itself.
3411
		idx += substring_length;
3412
		++c;
3413
	}
3414
	return c;
3415
}
3416

3417
int String::count(const String &p_string, int p_from, int p_to) const {
3418
	return _count(p_string, p_from, p_to, false);
3419
}
3420

3421
int String::count(const char *p_string, int p_from, int p_to) const {
3422
	return _count(p_string, p_from, p_to, false);
3423
}
3424

3425
int String::countn(const String &p_string, int p_from, int p_to) const {
3426
	return _count(p_string, p_from, p_to, true);
3427
}
3428

3429
int String::countn(const char *p_string, int p_from, int p_to) const {
3430
	return _count(p_string, p_from, p_to, true);
3431
}
3432

3433
bool String::_base_is_subsequence_of(const String &p_string, bool case_insensitive) const {
3434
	int len = length();
3435
	if (len == 0) {
3436
		// Technically an empty string is subsequence of any string
3437
		return true;
3438
	}
3439

3440
	if (len > p_string.length()) {
3441
		return false;
3442
	}
3443

3444
	const char32_t *src = &operator[](0);
3445
	const char32_t *tgt = &p_string[0];
3446

3447
	for (; *src && *tgt; tgt++) {
3448
		bool match = false;
3449
		if (case_insensitive) {
3450
			char32_t srcc = _find_lower(*src);
3451
			char32_t tgtc = _find_lower(*tgt);
3452
			match = srcc == tgtc;
3453
		} else {
3454
			match = *src == *tgt;
3455
		}
3456
		if (match) {
3457
			src++;
3458
			if (!*src) {
3459
				return true;
3460
			}
3461
		}
3462
	}
3463

3464
	return false;
3465
}
3466

3467
Vector<String> String::bigrams() const {
3468
	int n_pairs = length() - 1;
3469
	Vector<String> b;
3470
	if (n_pairs <= 0) {
3471
		return b;
3472
	}
3473
	b.resize_initialized(n_pairs);
3474
	String *b_ptrw = b.ptrw();
3475
	for (int i = 0; i < n_pairs; i++) {
3476
		b_ptrw[i] = substr(i, 2);
3477
	}
3478
	return b;
3479
}
3480

3481
// Similarity according to Sorensen-Dice coefficient
3482
float String::similarity(const String &p_string) const {
3483
	if (operator==(p_string)) {
3484
		// Equal strings are totally similar
3485
		return 1.0f;
3486
	}
3487
	if (length() < 2 || p_string.length() < 2) {
3488
		// No way to calculate similarity without a single bigram
3489
		return 0.0f;
3490
	}
3491

3492
	const int src_size = length() - 1;
3493
	const int tgt_size = p_string.length() - 1;
3494

3495
	const int sum = src_size + tgt_size;
3496
	int inter = 0;
3497
	for (int i = 0; i < src_size; i++) {
3498
		const char32_t i0 = get(i);
3499
		const char32_t i1 = get(i + 1);
3500

3501
		for (int j = 0; j < tgt_size; j++) {
3502
			if (i0 == p_string.get(j) && i1 == p_string.get(j + 1)) {
3503
				inter++;
3504
				break;
3505
			}
3506
		}
3507
	}
3508

3509
	return (2.0f * inter) / sum;
3510
}
3511

3512
static bool _wildcard_match(const char32_t *p_pattern, const char32_t *p_string, bool p_case_sensitive) {
3513
	switch (*p_pattern) {
3514
		case '\0':
3515
			return !*p_string;
3516
		case '*':
3517
			return _wildcard_match(p_pattern + 1, p_string, p_case_sensitive) || (*p_string && _wildcard_match(p_pattern, p_string + 1, p_case_sensitive));
3518
		case '?':
3519
			return *p_string && (*p_string != '.') && _wildcard_match(p_pattern + 1, p_string + 1, p_case_sensitive);
3520
		default:
3521

3522
			return (p_case_sensitive ? (*p_string == *p_pattern) : (_find_upper(*p_string) == _find_upper(*p_pattern))) && _wildcard_match(p_pattern + 1, p_string + 1, p_case_sensitive);
3523
	}
3524
}
3525

3526
bool String::match(const String &p_wildcard) const {
3527
	if (!p_wildcard.length() || !length()) {
3528
		return false;
3529
	}
3530

3531
	return _wildcard_match(p_wildcard.get_data(), get_data(), true);
3532
}
3533

3534
bool String::matchn(const String &p_wildcard) const {
3535
	if (!p_wildcard.length() || !length()) {
3536
		return false;
3537
	}
3538
	return _wildcard_match(p_wildcard.get_data(), get_data(), false);
3539
}
3540

3541
String String::format(const Variant &values, const String &placeholder) const {
3542
	String new_string = *this;
3543

3544
	if (values.get_type() == Variant::ARRAY) {
3545
		Array values_arr = values;
3546

3547
		for (int i = 0; i < values_arr.size(); i++) {
3548
			if (values_arr[i].get_type() == Variant::ARRAY) { //Array in Array structure [["name","RobotGuy"],[0,"godot"],["strength",9000.91]]
3549
				Array value_arr = values_arr[i];
3550

3551
				if (value_arr.size() == 2) {
3552
					String key = value_arr[0];
3553
					String val = value_arr[1];
3554

3555
					new_string = new_string.replace(placeholder.replace("_", key), val);
3556
				} else {
3557
					ERR_PRINT(vformat("Invalid format: the inner Array at index %d needs to contain only 2 elements, as a key-value pair.", i).ascii().get_data());
3558
				}
3559
			} else { //Array structure ["RobotGuy","Logis","rookie"]
3560
				String val = values_arr[i];
3561

3562
				if (placeholder.contains_char('_')) {
3563
					new_string = new_string.replace(placeholder.replace("_", String::num_int64(i)), val);
3564
				} else {
3565
					new_string = new_string.replace_first(placeholder, val);
3566
				}
3567
			}
3568
		}
3569
	} else if (values.get_type() == Variant::DICTIONARY) {
3570
		Dictionary d = values;
3571

3572
		for (const KeyValue<Variant, Variant> &kv : d) {
3573
			new_string = new_string.replace(placeholder.replace("_", kv.key), kv.value);
3574
		}
3575
	} else if (values.get_type() == Variant::OBJECT) {
3576
		Object *obj = values.get_validated_object();
3577
		ERR_FAIL_NULL_V(obj, new_string);
3578

3579
		List<PropertyInfo> props;
3580
		obj->get_property_list(&props);
3581

3582
		for (const PropertyInfo &E : props) {
3583
			new_string = new_string.replace(placeholder.replace("_", E.name), obj->get(E.name));
3584
		}
3585
	} else {
3586
		ERR_PRINT(String("Invalid type: use Array, Dictionary or Object.").ascii().get_data());
3587
	}
3588

3589
	return new_string;
3590
}
3591

3592
static String _replace_common(const String &p_this, const String &p_key, const String &p_with, bool p_case_insensitive) {
3593
	if (p_key.is_empty() || p_this.is_empty()) {
3594
		return p_this;
3595
	}
3596

3597
	const size_t key_length = p_key.length();
3598

3599
	int search_from = 0;
3600
	int result = 0;
3601

3602
	LocalVector<int> found;
3603

3604
	while ((result = (p_case_insensitive ? p_this.findn(p_key, search_from) : p_this.find(p_key, search_from))) >= 0) {
3605
		found.push_back(result);
3606
		ERR_FAIL_COND_V_MSG((result + key_length) > INT32_MAX, p_this, "Key length too long");
3607
		search_from = result + key_length;
3608
	}
3609

3610
	if (found.is_empty()) {
3611
		return p_this;
3612
	}
3613

3614
	String new_string;
3615

3616
	const int with_length = p_with.length();
3617
	const int old_length = p_this.length();
3618

3619
	new_string.resize_uninitialized(old_length + int(found.size()) * (with_length - key_length) + 1);
3620

3621
	char32_t *new_ptrw = new_string.ptrw();
3622
	const char32_t *old_ptr = p_this.ptr();
3623
	const char32_t *with_ptr = p_with.ptr();
3624

3625
	int last_pos = 0;
3626

3627
	for (const int &pos : found) {
3628
		if (last_pos != pos) {
3629
			memcpy(new_ptrw, old_ptr + last_pos, (pos - last_pos) * sizeof(char32_t));
3630
			new_ptrw += (pos - last_pos);
3631
		}
3632
		if (with_length) {
3633
			memcpy(new_ptrw, with_ptr, with_length * sizeof(char32_t));
3634
			new_ptrw += with_length;
3635
		}
3636
		last_pos = pos + key_length;
3637
	}
3638

3639
	if (last_pos != old_length) {
3640
		memcpy(new_ptrw, old_ptr + last_pos, (old_length - last_pos) * sizeof(char32_t));
3641
		new_ptrw += old_length - last_pos;
3642
	}
3643

3644
	*new_ptrw = 0;
3645

3646
	return new_string;
3647
}
3648

3649
static String _replace_common(const String &p_this, char const *p_key, char const *p_with, bool p_case_insensitive) {
3650
	size_t key_length = strlen(p_key);
3651

3652
	if (key_length == 0 || p_this.is_empty()) {
3653
		return p_this;
3654
	}
3655

3656
	int search_from = 0;
3657
	int result = 0;
3658

3659
	LocalVector<int> found;
3660

3661
	while ((result = (p_case_insensitive ? p_this.findn(p_key, search_from) : p_this.find(p_key, search_from))) >= 0) {
3662
		found.push_back(result);
3663
		ERR_FAIL_COND_V_MSG((result + key_length) > INT32_MAX, p_this, "Key length too long");
3664
		search_from = result + key_length;
3665
	}
3666

3667
	if (found.is_empty()) {
3668
		return p_this;
3669
	}
3670

3671
	String new_string;
3672

3673
	// Create string to speed up copying as we can't do `memcopy` between `char32_t` and `char`.
3674
	const String with_string(p_with);
3675
	const int with_length = with_string.length();
3676
	const int old_length = p_this.length();
3677

3678
	new_string.resize_uninitialized(old_length + int(found.size()) * (with_length - key_length) + 1);
3679

3680
	char32_t *new_ptrw = new_string.ptrw();
3681
	const char32_t *old_ptr = p_this.ptr();
3682
	const char32_t *with_ptr = with_string.ptr();
3683

3684
	int last_pos = 0;
3685

3686
	for (const int &pos : found) {
3687
		if (last_pos != pos) {
3688
			memcpy(new_ptrw, old_ptr + last_pos, (pos - last_pos) * sizeof(char32_t));
3689
			new_ptrw += (pos - last_pos);
3690
		}
3691
		if (with_length) {
3692
			memcpy(new_ptrw, with_ptr, with_length * sizeof(char32_t));
3693
			new_ptrw += with_length;
3694
		}
3695
		last_pos = pos + key_length;
3696
	}
3697

3698
	if (last_pos != old_length) {
3699
		memcpy(new_ptrw, old_ptr + last_pos, (old_length - last_pos) * sizeof(char32_t));
3700
		new_ptrw += old_length - last_pos;
3701
	}
3702

3703
	*new_ptrw = 0;
3704

3705
	return new_string;
3706
}
3707

3708
String String::replace(const String &p_key, const String &p_with) const {
3709
	return _replace_common(*this, p_key, p_with, false);
3710
}
3711

3712
String String::replace(const char *p_key, const char *p_with) const {
3713
	return _replace_common(*this, p_key, p_with, false);
3714
}
3715

3716
String String::replace_first(const String &p_key, const String &p_with) const {
3717
	int pos = find(p_key);
3718
	if (pos >= 0) {
3719
		const int old_length = length();
3720
		const int key_length = p_key.length();
3721
		const int with_length = p_with.length();
3722

3723
		String new_string;
3724
		new_string.resize_uninitialized(old_length + (with_length - key_length) + 1);
3725

3726
		char32_t *new_ptrw = new_string.ptrw();
3727
		const char32_t *old_ptr = ptr();
3728
		const char32_t *with_ptr = p_with.ptr();
3729

3730
		if (pos > 0) {
3731
			memcpy(new_ptrw, old_ptr, pos * sizeof(char32_t));
3732
			new_ptrw += pos;
3733
		}
3734

3735
		if (with_length) {
3736
			memcpy(new_ptrw, with_ptr, with_length * sizeof(char32_t));
3737
			new_ptrw += with_length;
3738
		}
3739
		pos += key_length;
3740

3741
		if (pos != old_length) {
3742
			memcpy(new_ptrw, old_ptr + pos, (old_length - pos) * sizeof(char32_t));
3743
			new_ptrw += (old_length - pos);
3744
		}
3745

3746
		*new_ptrw = 0;
3747

3748
		return new_string;
3749
	}
3750

3751
	return *this;
3752
}
3753

3754
String String::replace_first(const char *p_key, const char *p_with) const {
3755
	int pos = find(p_key);
3756
	if (pos >= 0) {
3757
		const int old_length = length();
3758
		const int key_length = strlen(p_key);
3759
		const int with_length = strlen(p_with);
3760

3761
		String new_string;
3762
		new_string.resize_uninitialized(old_length + (with_length - key_length) + 1);
3763

3764
		char32_t *new_ptrw = new_string.ptrw();
3765
		const char32_t *old_ptr = ptr();
3766

3767
		if (pos > 0) {
3768
			memcpy(new_ptrw, old_ptr, pos * sizeof(char32_t));
3769
			new_ptrw += pos;
3770
		}
3771

3772
		for (int i = 0; i < with_length; ++i) {
3773
			*new_ptrw++ = p_with[i];
3774
		}
3775
		pos += key_length;
3776

3777
		if (pos != old_length) {
3778
			memcpy(new_ptrw, old_ptr + pos, (old_length - pos) * sizeof(char32_t));
3779
			new_ptrw += (old_length - pos);
3780
		}
3781

3782
		*new_ptrw = 0;
3783

3784
		return new_string;
3785
	}
3786

3787
	return *this;
3788
}
3789

3790
String String::replace_char(char32_t p_key, char32_t p_with) const {
3791
	ERR_FAIL_COND_V_MSG(p_with == 0, *this, "`with` must not be the NUL character.");
3792

3793
	if (p_key == 0) {
3794
		return *this;
3795
	}
3796

3797
	int len = length();
3798
	if (len == 0) {
3799
		return *this;
3800
	}
3801

3802
	int index = 0;
3803
	const char32_t *old_ptr = ptr();
3804
	for (; index < len; ++index) {
3805
		if (old_ptr[index] == p_key) {
3806
			break;
3807
		}
3808
	}
3809

3810
	// If no occurrence of `key` was found, return this.
3811
	if (index == len) {
3812
		return *this;
3813
	}
3814

3815
	// If we found at least one occurrence of `key`, create new string.
3816
	String new_string;
3817
	new_string.resize_uninitialized(len + 1);
3818
	char32_t *new_ptr = new_string.ptrw();
3819

3820
	// Copy part of input before `key`.
3821
	memcpy(new_ptr, old_ptr, index * sizeof(char32_t));
3822

3823
	new_ptr[index] = p_with;
3824

3825
	// Copy or replace rest of input.
3826
	for (++index; index < len; ++index) {
3827
		if (old_ptr[index] == p_key) {
3828
			new_ptr[index] = p_with;
3829
		} else {
3830
			new_ptr[index] = old_ptr[index];
3831
		}
3832
	}
3833

3834
	new_ptr[index] = _null;
3835

3836
	return new_string;
3837
}
3838

3839
template <class T>
3840
static String _replace_chars_common(const String &p_this, const T *p_keys, int p_keys_len, char32_t p_with) {
3841
	ERR_FAIL_COND_V_MSG(p_with == 0, p_this, "`with` must not be the NUL character.");
3842

3843
	// Delegate if p_keys is a single element.
3844
	if (p_keys_len == 1) {
3845
		return p_this.replace_char(*p_keys, p_with);
3846
	} else if (p_keys_len == 0) {
3847
		return p_this;
3848
	}
3849

3850
	int len = p_this.length();
3851
	if (len == 0) {
3852
		return p_this;
3853
	}
3854

3855
	int index = 0;
3856
	const char32_t *old_ptr = p_this.ptr();
3857
	for (; index < len; ++index) {
3858
		if (_contains_char(old_ptr[index], p_keys, p_keys_len)) {
3859
			break;
3860
		}
3861
	}
3862

3863
	// If no occurrence of `keys` was found, return this.
3864
	if (index == len) {
3865
		return p_this;
3866
	}
3867

3868
	// If we found at least one occurrence of `keys`, create new string.
3869
	String new_string;
3870
	new_string.resize_uninitialized(len + 1);
3871
	char32_t *new_ptr = new_string.ptrw();
3872

3873
	// Copy part of input before `key`.
3874
	memcpy(new_ptr, old_ptr, index * sizeof(char32_t));
3875

3876
	new_ptr[index] = p_with;
3877

3878
	// Copy or replace rest of input.
3879
	for (++index; index < len; ++index) {
3880
		const char32_t old_char = old_ptr[index];
3881
		if (_contains_char(old_char, p_keys, p_keys_len)) {
3882
			new_ptr[index] = p_with;
3883
		} else {
3884
			new_ptr[index] = old_char;
3885
		}
3886
	}
3887

3888
	new_ptr[index] = 0;
3889

3890
	return new_string;
3891
}
3892

3893
String String::replace_chars(const String &p_keys, char32_t p_with) const {
3894
	return _replace_chars_common(*this, p_keys.ptr(), p_keys.length(), p_with);
3895
}
3896

3897
String String::replace_chars(const char *p_keys, char32_t p_with) const {
3898
	return _replace_chars_common(*this, p_keys, strlen(p_keys), p_with);
3899
}
3900

3901
String String::replacen(const String &p_key, const String &p_with) const {
3902
	return _replace_common(*this, p_key, p_with, true);
3903
}
3904

3905
String String::replacen(const char *p_key, const char *p_with) const {
3906
	return _replace_common(*this, p_key, p_with, true);
3907
}
3908

3909
String String::repeat(int p_count) const {
3910
	ERR_FAIL_COND_V_MSG(p_count < 0, "", "Parameter count should be a positive number.");
3911

3912
	if (p_count == 0) {
3913
		return "";
3914
	}
3915

3916
	if (p_count == 1) {
3917
		return *this;
3918
	}
3919

3920
	int len = length();
3921
	String new_string = *this;
3922
	new_string.resize_uninitialized(p_count * len + 1);
3923

3924
	char32_t *dst = new_string.ptrw();
3925
	int offset = 1;
3926
	int stride = 1;
3927
	while (offset < p_count) {
3928
		memcpy(dst + offset * len, dst, stride * len * sizeof(char32_t));
3929
		offset += stride;
3930
		stride = MIN(stride * 2, p_count - offset);
3931
	}
3932
	dst[p_count * len] = _null;
3933
	return new_string;
3934
}
3935

3936
String String::reverse() const {
3937
	int len = length();
3938
	if (len <= 1) {
3939
		return *this;
3940
	}
3941
	String new_string;
3942
	new_string.resize_uninitialized(len + 1);
3943

3944
	const char32_t *src = ptr();
3945
	char32_t *dst = new_string.ptrw();
3946
	for (int i = 0; i < len; i++) {
3947
		dst[i] = src[len - i - 1];
3948
	}
3949
	dst[len] = _null;
3950
	return new_string;
3951
}
3952

3953
String String::left(int p_len) const {
3954
	if (p_len < 0) {
3955
		p_len = length() + p_len;
3956
	}
3957

3958
	if (p_len <= 0) {
3959
		return "";
3960
	}
3961

3962
	if (p_len >= length()) {
3963
		return *this;
3964
	}
3965

3966
	String s;
3967
	s.append_utf32_unchecked(Span(ptr(), p_len));
3968
	return s;
3969
}
3970

3971
String String::right(int p_len) const {
3972
	if (p_len < 0) {
3973
		p_len = length() + p_len;
3974
	}
3975

3976
	if (p_len <= 0) {
3977
		return "";
3978
	}
3979

3980
	if (p_len >= length()) {
3981
		return *this;
3982
	}
3983

3984
	String s;
3985
	s.append_utf32_unchecked(Span(ptr() + length() - p_len, p_len));
3986
	return s;
3987
}
3988

3989
char32_t String::unicode_at(int p_idx) const {
3990
	ERR_FAIL_INDEX_V(p_idx, length(), 0);
3991
	return operator[](p_idx);
3992
}
3993

3994
String String::indent(const String &p_prefix) const {
3995
	String new_string;
3996
	int line_start = 0;
3997

3998
	for (int i = 0; i < length(); i++) {
3999
		const char32_t c = operator[](i);
4000
		if (c == '\n') {
4001
			if (i == line_start) {
4002
				new_string += c; // Leave empty lines empty.
4003
			} else {
4004
				new_string += p_prefix + substr(line_start, i - line_start + 1);
4005
			}
4006
			line_start = i + 1;
4007
		}
4008
	}
4009
	if (line_start != length()) {
4010
		new_string += p_prefix + substr(line_start);
4011
	}
4012
	return new_string;
4013
}
4014

4015
String String::dedent() const {
4016
	String new_string;
4017
	String indent;
4018
	bool has_indent = false;
4019
	bool has_text = false;
4020
	int line_start = 0;
4021
	int indent_stop = -1;
4022

4023
	for (int i = 0; i < length(); i++) {
4024
		char32_t c = operator[](i);
4025
		if (c == '\n') {
4026
			if (has_text) {
4027
				new_string += substr(indent_stop, i - indent_stop);
4028
			}
4029
			new_string += "\n";
4030
			has_text = false;
4031
			line_start = i + 1;
4032
			indent_stop = -1;
4033
		} else if (!has_text) {
4034
			if (c > 32) {
4035
				has_text = true;
4036
				if (!has_indent) {
4037
					has_indent = true;
4038
					indent = substr(line_start, i - line_start);
4039
					indent_stop = i;
4040
				}
4041
			}
4042
			if (has_indent && indent_stop < 0) {
4043
				int j = i - line_start;
4044
				if (j >= indent.length() || c != indent[j]) {
4045
					indent_stop = i;
4046
				}
4047
			}
4048
		}
4049
	}
4050

4051
	if (has_text) {
4052
		new_string += substr(indent_stop, length() - indent_stop);
4053
	}
4054

4055
	return new_string;
4056
}
4057

4058
String String::strip_edges(bool left, bool right) const {
4059
	int len = length();
4060
	int beg = 0, end = len;
4061

4062
	if (left) {
4063
		for (int i = 0; i < len; i++) {
4064
			if (operator[](i) <= 32) {
4065
				beg++;
4066
			} else {
4067
				break;
4068
			}
4069
		}
4070
	}
4071

4072
	if (right) {
4073
		for (int i = len - 1; i >= 0; i--) {
4074
			if (operator[](i) <= 32) {
4075
				end--;
4076
			} else {
4077
				break;
4078
			}
4079
		}
4080
	}
4081

4082
	if (beg == 0 && end == len) {
4083
		return *this;
4084
	}
4085

4086
	return substr(beg, end - beg);
4087
}
4088

4089
String String::strip_escapes() const {
4090
	String new_string;
4091
	for (int i = 0; i < length(); i++) {
4092
		// Escape characters on first page of the ASCII table, before 32 (Space).
4093
		if (operator[](i) < 32) {
4094
			continue;
4095
		}
4096
		new_string += operator[](i);
4097
	}
4098

4099
	return new_string;
4100
}
4101

4102
String String::lstrip(const String &p_chars) const {
4103
	int len = length();
4104
	int beg;
4105

4106
	for (beg = 0; beg < len; beg++) {
4107
		if (p_chars.find_char(get(beg)) == -1) {
4108
			break;
4109
		}
4110
	}
4111

4112
	if (beg == 0) {
4113
		return *this;
4114
	}
4115

4116
	return substr(beg, len - beg);
4117
}
4118

4119
String String::rstrip(const String &p_chars) const {
4120
	int len = length();
4121
	int end;
4122

4123
	for (end = len - 1; end >= 0; end--) {
4124
		if (p_chars.find_char(get(end)) == -1) {
4125
			break;
4126
		}
4127
	}
4128

4129
	if (end == len - 1) {
4130
		return *this;
4131
	}
4132

4133
	return substr(0, end + 1);
4134
}
4135

4136
bool String::is_network_share_path() const {
4137
	return begins_with("//") || begins_with("\\\\");
4138
}
4139

4140
String String::simplify_path() const {
4141
	String s = *this;
4142
	String drive;
4143

4144
	// Check if we have a special path (like res://) or a protocol identifier.
4145
	int p = s.find("://");
4146
	bool found = false;
4147
	if (p > 0) {
4148
		bool only_chars = true;
4149
		for (int i = 0; i < p; i++) {
4150
			if (!is_ascii_alphanumeric_char(s[i])) {
4151
				only_chars = false;
4152
				break;
4153
			}
4154
		}
4155
		if (only_chars) {
4156
			found = true;
4157
			drive = s.substr(0, p + 3);
4158
			s = s.substr(p + 3);
4159
		}
4160
	}
4161
	if (!found) {
4162
		if (is_network_share_path()) {
4163
			// Network path, beginning with // or \\.
4164
			drive = s.substr(0, 2);
4165
			s = s.substr(2);
4166
		} else if (s.begins_with("/") || s.begins_with("\\")) {
4167
			// Absolute path.
4168
			drive = s.substr(0, 1);
4169
			s = s.substr(1);
4170
		} else {
4171
			// Windows-style drive path, like C:/ or C:\.
4172
			p = s.find(":/");
4173
			if (p == -1) {
4174
				p = s.find(":\\");
4175
			}
4176
			if (p != -1 && p < s.find_char('/')) {
4177
				drive = s.substr(0, p + 2);
4178
				s = s.substr(p + 2);
4179
			}
4180
		}
4181
	}
4182

4183
	s = s.replace_char('\\', '/');
4184
	while (true) { // in case of using 2 or more slash
4185
		String compare = s.replace("//", "/");
4186
		if (s == compare) {
4187
			break;
4188
		} else {
4189
			s = compare;
4190
		}
4191
	}
4192
	Vector<String> dirs = s.split("/", false);
4193
	bool absolute_path = is_absolute_path();
4194

4195
	absolute_path = absolute_path && !begins_with("res://"); // FIXME: Some code (GLTF importer) rely on accessing files up from `res://`, this probably should be disabled in the future.
4196

4197
	for (int i = 0; i < dirs.size(); i++) {
4198
		String d = dirs[i];
4199
		if (d == ".") {
4200
			dirs.remove_at(i);
4201
			i--;
4202
		} else if (d == "..") {
4203
			if (i != 0 && dirs[i - 1] != "..") {
4204
				dirs.remove_at(i);
4205
				dirs.remove_at(i - 1);
4206
				i -= 2;
4207
			} else if (absolute_path && i == 0) {
4208
				dirs.remove_at(i);
4209
				i--;
4210
			}
4211
		}
4212
	}
4213

4214
	s = "";
4215

4216
	for (int i = 0; i < dirs.size(); i++) {
4217
		if (i > 0) {
4218
			s += "/";
4219
		}
4220
		s += dirs[i];
4221
	}
4222

4223
	return drive + s;
4224
}
4225

4226
static int _humanize_digits(int p_num) {
4227
	if (p_num < 100) {
4228
		return 2;
4229
	} else if (p_num < 1024) {
4230
		return 1;
4231
	} else {
4232
		return 0;
4233
	}
4234
}
4235

4236
String String::humanize_size(uint64_t p_size) {
4237
	int magnitude = 0;
4238
	uint64_t _div = 1;
4239
	while (p_size > _div * 1024 && magnitude < 6) {
4240
		_div *= 1024;
4241
		magnitude++;
4242
	}
4243

4244
	if (magnitude == 0) {
4245
		return String::num_uint64(p_size) + " " + RTR("B");
4246
	} else {
4247
		String suffix;
4248
		switch (magnitude) {
4249
			case 1:
4250
				suffix = RTR("KiB");
4251
				break;
4252
			case 2:
4253
				suffix = RTR("MiB");
4254
				break;
4255
			case 3:
4256
				suffix = RTR("GiB");
4257
				break;
4258
			case 4:
4259
				suffix = RTR("TiB");
4260
				break;
4261
			case 5:
4262
				suffix = RTR("PiB");
4263
				break;
4264
			case 6:
4265
				suffix = RTR("EiB");
4266
				break;
4267
		}
4268

4269
		const double divisor = _div;
4270
		const int digits = _humanize_digits(p_size / _div);
4271
		return String::num(p_size / divisor).pad_decimals(digits) + " " + suffix;
4272
	}
4273
}
4274

4275
bool String::is_absolute_path() const {
4276
	if (length() > 1) {
4277
		return (operator[](0) == '/' || operator[](0) == '\\' || find(":/") != -1 || find(":\\") != -1);
4278
	} else if ((length()) == 1) {
4279
		return (operator[](0) == '/' || operator[](0) == '\\');
4280
	} else {
4281
		return false;
4282
	}
4283
}
4284

4285
String String::validate_ascii_identifier() const {
4286
	if (is_empty()) {
4287
		return "_"; // Empty string is not a valid identifier.
4288
	}
4289

4290
	String result;
4291
	if (is_digit(operator[](0))) {
4292
		result = "_" + *this;
4293
	} else {
4294
		result = *this;
4295
	}
4296

4297
	int len = result.length();
4298
	char32_t *buffer = result.ptrw();
4299
	for (int i = 0; i < len; i++) {
4300
		if (!is_ascii_identifier_char(buffer[i])) {
4301
			buffer[i] = '_';
4302
		}
4303
	}
4304

4305
	return result;
4306
}
4307

4308
String String::validate_unicode_identifier() const {
4309
	if (is_empty()) {
4310
		return "_"; // Empty string is not a valid identifier.
4311
	}
4312

4313
	String result;
4314
	if (is_unicode_identifier_start(operator[](0))) {
4315
		result = *this;
4316
	} else {
4317
		result = "_" + *this;
4318
	}
4319

4320
	int len = result.length();
4321
	char32_t *buffer = result.ptrw();
4322
	for (int i = 0; i < len; i++) {
4323
		if (!is_unicode_identifier_continue(buffer[i])) {
4324
			buffer[i] = '_';
4325
		}
4326
	}
4327

4328
	return result;
4329
}
4330

4331
bool String::is_valid_ascii_identifier() const {
4332
	int len = length();
4333

4334
	if (len == 0) {
4335
		return false;
4336
	}
4337

4338
	if (is_digit(operator[](0))) {
4339
		return false;
4340
	}
4341

4342
	const char32_t *str = &operator[](0);
4343

4344
	for (int i = 0; i < len; i++) {
4345
		if (!is_ascii_identifier_char(str[i])) {
4346
			return false;
4347
		}
4348
	}
4349

4350
	return true;
4351
}
4352

4353
bool String::is_valid_unicode_identifier() const {
4354
	const char32_t *str = ptr();
4355
	int len = length();
4356

4357
	if (len == 0) {
4358
		return false; // Empty string.
4359
	}
4360

4361
	if (!is_unicode_identifier_start(str[0])) {
4362
		return false;
4363
	}
4364

4365
	for (int i = 1; i < len; i++) {
4366
		if (!is_unicode_identifier_continue(str[i])) {
4367
			return false;
4368
		}
4369
	}
4370
	return true;
4371
}
4372

4373
bool String::is_valid_string() const {
4374
	int l = length();
4375
	const char32_t *src = get_data();
4376
	bool valid = true;
4377
	for (int i = 0; i < l; i++) {
4378
		valid = valid && (src[i] < 0xd800 || (src[i] > 0xdfff && src[i] <= 0x10ffff));
4379
	}
4380
	return valid;
4381
}
4382

4383
String String::uri_encode() const {
4384
	const CharString temp = utf8();
4385
	String res;
4386

4387
	for (int i = 0; i < temp.length(); ++i) {
4388
		uint8_t ord = uint8_t(temp[i]);
4389
		if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) {
4390
			res += ord;
4391
		} else {
4392
			char p[4] = { '%', 0, 0, 0 };
4393
			p[1] = hex_char_table_upper[ord >> 4];
4394
			p[2] = hex_char_table_upper[ord & 0xF];
4395
			res += p;
4396
		}
4397
	}
4398
	return res;
4399
}
4400

4401
String String::uri_decode() const {
4402
	CharString src = utf8();
4403
	CharString res;
4404
	for (int i = 0; i < src.length(); ++i) {
4405
		if (src[i] == '%' && i + 2 < src.length()) {
4406
			char ord1 = src[i + 1];
4407
			if (is_digit(ord1) || is_ascii_upper_case(ord1)) {
4408
				char ord2 = src[i + 2];
4409
				if (is_digit(ord2) || is_ascii_upper_case(ord2)) {
4410
					char bytes[3] = { (char)ord1, (char)ord2, 0 };
4411
					res += (char)strtol(bytes, nullptr, 16);
4412
					i += 2;
4413
				}
4414
			} else {
4415
				res += src[i];
4416
			}
4417
		} else if (src[i] == '+') {
4418
			res += ' ';
4419
		} else {
4420
			res += src[i];
4421
		}
4422
	}
4423
	return String::utf8(res);
4424
}
4425

4426
String String::uri_file_decode() const {
4427
	CharString src = utf8();
4428
	CharString res;
4429
	for (int i = 0; i < src.length(); ++i) {
4430
		if (src[i] == '%' && i + 2 < src.length()) {
4431
			char ord1 = src[i + 1];
4432
			if (is_digit(ord1) || is_ascii_upper_case(ord1)) {
4433
				char ord2 = src[i + 2];
4434
				if (is_digit(ord2) || is_ascii_upper_case(ord2)) {
4435
					char bytes[3] = { (char)ord1, (char)ord2, 0 };
4436
					res += (char)strtol(bytes, nullptr, 16);
4437
					i += 2;
4438
				}
4439
			} else {
4440
				res += src[i];
4441
			}
4442
		} else {
4443
			res += src[i];
4444
		}
4445
	}
4446
	return String::utf8(res);
4447
}
4448

4449
String String::c_unescape() const {
4450
	String escaped = *this;
4451
	escaped = escaped.replace("\\a", "\a");
4452
	escaped = escaped.replace("\\b", "\b");
4453
	escaped = escaped.replace("\\f", "\f");
4454
	escaped = escaped.replace("\\n", "\n");
4455
	escaped = escaped.replace("\\r", "\r");
4456
	escaped = escaped.replace("\\t", "\t");
4457
	escaped = escaped.replace("\\v", "\v");
4458
	escaped = escaped.replace("\\'", "\'");
4459
	escaped = escaped.replace("\\\"", "\"");
4460
	escaped = escaped.replace("\\\\", "\\");
4461

4462
	return escaped;
4463
}
4464

4465
String String::c_escape() const {
4466
	String escaped = *this;
4467
	escaped = escaped.replace("\\", "\\\\");
4468
	escaped = escaped.replace("\a", "\\a");
4469
	escaped = escaped.replace("\b", "\\b");
4470
	escaped = escaped.replace("\f", "\\f");
4471
	escaped = escaped.replace("\n", "\\n");
4472
	escaped = escaped.replace("\r", "\\r");
4473
	escaped = escaped.replace("\t", "\\t");
4474
	escaped = escaped.replace("\v", "\\v");
4475
	escaped = escaped.replace("\'", "\\'");
4476
	escaped = escaped.replace("\"", "\\\"");
4477

4478
	return escaped;
4479
}
4480

4481
String String::c_escape_multiline() const {
4482
	String escaped = *this;
4483
	escaped = escaped.replace("\\", "\\\\");
4484
	escaped = escaped.replace("\"", "\\\"");
4485

4486
	return escaped;
4487
}
4488

4489
String String::json_escape() const {
4490
	String escaped = *this;
4491
	escaped = escaped.replace("\\", "\\\\");
4492
	escaped = escaped.replace("\b", "\\b");
4493
	escaped = escaped.replace("\f", "\\f");
4494
	escaped = escaped.replace("\n", "\\n");
4495
	escaped = escaped.replace("\r", "\\r");
4496
	escaped = escaped.replace("\t", "\\t");
4497
	escaped = escaped.replace("\v", "\\v");
4498
	escaped = escaped.replace("\"", "\\\"");
4499

4500
	return escaped;
4501
}
4502

4503
String String::xml_escape(bool p_escape_quotes) const {
4504
	String str = *this;
4505
	str = str.replace("&", "&amp;");
4506
	str = str.replace("<", "&lt;");
4507
	str = str.replace(">", "&gt;");
4508
	if (p_escape_quotes) {
4509
		str = str.replace("'", "&apos;");
4510
		str = str.replace("\"", "&quot;");
4511
	}
4512
	/*
4513
for (int i=1;i<32;i++) {
4514
	char chr[2]={i,0};
4515
	str=str.replace(chr,"&#"+String::num(i)+";");
4516
}*/
4517
	return str;
4518
}
4519

4520
static _FORCE_INLINE_ int _xml_unescape(const char32_t *p_src, int p_src_len, char32_t *p_dst) {
4521
	int len = 0;
4522
	while (p_src_len) {
4523
		if (*p_src == '&') {
4524
			int eat = 0;
4525

4526
			if (p_src_len >= 4 && p_src[1] == '#') {
4527
				char32_t c = 0;
4528
				bool overflow = false;
4529
				if (p_src[2] == 'x') {
4530
					// Hex entity &#x<num>;
4531
					for (int i = 3; i < p_src_len; i++) {
4532
						eat = i + 1;
4533
						char32_t ct = p_src[i];
4534
						if (ct == ';') {
4535
							break;
4536
						} else if (is_digit(ct)) {
4537
							ct = ct - '0';
4538
						} else if (ct >= 'a' && ct <= 'f') {
4539
							ct = (ct - 'a') + 10;
4540
						} else if (ct >= 'A' && ct <= 'F') {
4541
							ct = (ct - 'A') + 10;
4542
						} else {
4543
							break;
4544
						}
4545
						if (c > (UINT32_MAX >> 4)) {
4546
							overflow = true;
4547
							break;
4548
						}
4549
						c <<= 4;
4550
						c |= ct;
4551
					}
4552
				} else {
4553
					// Decimal entity &#<num>;
4554
					for (int i = 2; i < p_src_len; i++) {
4555
						eat = i + 1;
4556
						char32_t ct = p_src[i];
4557
						if (ct == ';' || !is_digit(ct)) {
4558
							break;
4559
						}
4560
					}
4561
					if (p_src[eat - 1] == ';') {
4562
						int64_t val = String::to_int(p_src + 2, eat - 3);
4563
						if (val > 0 && val <= UINT32_MAX) {
4564
							c = (char32_t)val;
4565
						} else {
4566
							overflow = true;
4567
						}
4568
					}
4569
				}
4570

4571
				// Value must be non-zero, in the range of char32_t,
4572
				// actually end with ';'. If invalid, leave the entity as-is
4573
				if (c == '\0' || overflow || p_src[eat - 1] != ';') {
4574
					eat = 1;
4575
					c = *p_src;
4576
				}
4577
				if (p_dst) {
4578
					*p_dst = c;
4579
				}
4580

4581
			} else if (p_src_len >= 4 && p_src[1] == 'g' && p_src[2] == 't' && p_src[3] == ';') {
4582
				if (p_dst) {
4583
					*p_dst = '>';
4584
				}
4585
				eat = 4;
4586
			} else if (p_src_len >= 4 && p_src[1] == 'l' && p_src[2] == 't' && p_src[3] == ';') {
4587
				if (p_dst) {
4588
					*p_dst = '<';
4589
				}
4590
				eat = 4;
4591
			} else if (p_src_len >= 5 && p_src[1] == 'a' && p_src[2] == 'm' && p_src[3] == 'p' && p_src[4] == ';') {
4592
				if (p_dst) {
4593
					*p_dst = '&';
4594
				}
4595
				eat = 5;
4596
			} else if (p_src_len >= 6 && p_src[1] == 'q' && p_src[2] == 'u' && p_src[3] == 'o' && p_src[4] == 't' && p_src[5] == ';') {
4597
				if (p_dst) {
4598
					*p_dst = '"';
4599
				}
4600
				eat = 6;
4601
			} else if (p_src_len >= 6 && p_src[1] == 'a' && p_src[2] == 'p' && p_src[3] == 'o' && p_src[4] == 's' && p_src[5] == ';') {
4602
				if (p_dst) {
4603
					*p_dst = '\'';
4604
				}
4605
				eat = 6;
4606
			} else {
4607
				if (p_dst) {
4608
					*p_dst = *p_src;
4609
				}
4610
				eat = 1;
4611
			}
4612

4613
			if (p_dst) {
4614
				p_dst++;
4615
			}
4616

4617
			len++;
4618
			p_src += eat;
4619
			p_src_len -= eat;
4620
		} else {
4621
			if (p_dst) {
4622
				*p_dst = *p_src;
4623
				p_dst++;
4624
			}
4625
			len++;
4626
			p_src++;
4627
			p_src_len--;
4628
		}
4629
	}
4630

4631
	return len;
4632
}
4633

4634
String String::xml_unescape() const {
4635
	String str;
4636
	int l = length();
4637
	int len = _xml_unescape(get_data(), l, nullptr);
4638
	if (len == 0) {
4639
		return String();
4640
	}
4641
	str.resize_uninitialized(len + 1);
4642
	char32_t *str_ptrw = str.ptrw();
4643
	_xml_unescape(get_data(), l, str_ptrw);
4644
	str_ptrw[len] = 0;
4645
	return str;
4646
}
4647

4648
String String::pad_decimals(int p_digits) const {
4649
	String s = *this;
4650
	int c = s.find_char('.');
4651

4652
	if (c == -1) {
4653
		if (p_digits <= 0) {
4654
			return s;
4655
		}
4656
		s += ".";
4657
		c = s.length() - 1;
4658
	} else {
4659
		if (p_digits <= 0) {
4660
			return s.substr(0, c);
4661
		}
4662
	}
4663

4664
	if (s.length() - (c + 1) > p_digits) {
4665
		return s.substr(0, c + p_digits + 1);
4666
	} else {
4667
		int zeros_to_add = p_digits - s.length() + (c + 1);
4668
		return s + String("0").repeat(zeros_to_add);
4669
	}
4670
}
4671

4672
String String::pad_zeros(int p_digits) const {
4673
	String s = *this;
4674
	int end = s.find_char('.');
4675

4676
	if (end == -1) {
4677
		end = s.length();
4678
	}
4679

4680
	if (end == 0) {
4681
		return s;
4682
	}
4683

4684
	int begin = 0;
4685

4686
	while (begin < end && !is_digit(s[begin])) {
4687
		begin++;
4688
	}
4689

4690
	int zeros_to_add = p_digits - (end - begin);
4691

4692
	if (zeros_to_add <= 0) {
4693
		return s;
4694
	} else {
4695
		return s.insert(begin, String("0").repeat(zeros_to_add));
4696
	}
4697
}
4698

4699
String String::trim_prefix(const String &p_prefix) const {
4700
	String s = *this;
4701
	if (s.begins_with(p_prefix)) {
4702
		return s.substr(p_prefix.length());
4703
	}
4704
	return s;
4705
}
4706

4707
String String::trim_prefix(const char *p_prefix) const {
4708
	String s = *this;
4709
	if (s.begins_with(p_prefix)) {
4710
		int prefix_length = strlen(p_prefix);
4711
		return s.substr(prefix_length);
4712
	}
4713
	return s;
4714
}
4715

4716
String String::trim_suffix(const String &p_suffix) const {
4717
	String s = *this;
4718
	if (s.ends_with(p_suffix)) {
4719
		return s.substr(0, s.length() - p_suffix.length());
4720
	}
4721
	return s;
4722
}
4723

4724
String String::trim_suffix(const char *p_suffix) const {
4725
	String s = *this;
4726
	if (s.ends_with(p_suffix)) {
4727
		return s.substr(0, s.length() - strlen(p_suffix));
4728
	}
4729
	return s;
4730
}
4731

4732
bool String::is_valid_int() const {
4733
	int len = length();
4734

4735
	if (len == 0) {
4736
		return false;
4737
	}
4738

4739
	int from = 0;
4740
	if (len != 1 && (operator[](0) == '+' || operator[](0) == '-')) {
4741
		from++;
4742
	}
4743

4744
	for (int i = from; i < len; i++) {
4745
		if (!is_digit(operator[](i))) {
4746
			return false; // no start with number plz
4747
		}
4748
	}
4749

4750
	return true;
4751
}
4752

4753
bool String::is_valid_hex_number(bool p_with_prefix) const {
4754
	int len = length();
4755

4756
	if (len == 0) {
4757
		return false;
4758
	}
4759

4760
	int from = 0;
4761
	if (len != 1 && (operator[](0) == '+' || operator[](0) == '-')) {
4762
		from++;
4763
	}
4764

4765
	if (p_with_prefix) {
4766
		if (len < 3) {
4767
			return false;
4768
		}
4769
		if (operator[](from) != '0' || operator[](from + 1) != 'x') {
4770
			return false;
4771
		}
4772
		from += 2;
4773
	}
4774

4775
	if (from == len) {
4776
		return false;
4777
	}
4778

4779
	for (int i = from; i < len; i++) {
4780
		char32_t c = operator[](i);
4781
		if (is_hex_digit(c)) {
4782
			continue;
4783
		}
4784
		return false;
4785
	}
4786

4787
	return true;
4788
}
4789

4790
bool String::is_valid_float() const {
4791
	int len = length();
4792

4793
	if (len == 0) {
4794
		return false;
4795
	}
4796

4797
	int from = 0;
4798
	if (operator[](0) == '+' || operator[](0) == '-') {
4799
		from++;
4800
	}
4801

4802
	bool exponent_found = false;
4803
	bool period_found = false;
4804
	bool sign_found = false;
4805
	bool exponent_values_found = false;
4806
	bool numbers_found = false;
4807

4808
	for (int i = from; i < len; i++) {
4809
		const char32_t c = operator[](i);
4810
		if (is_digit(c)) {
4811
			if (exponent_found) {
4812
				exponent_values_found = true;
4813
			} else {
4814
				numbers_found = true;
4815
			}
4816
		} else if (numbers_found && !exponent_found && (c == 'e' || c == 'E')) {
4817
			exponent_found = true;
4818
		} else if (!period_found && !exponent_found && c == '.') {
4819
			period_found = true;
4820
		} else if ((c == '-' || c == '+') && exponent_found && !exponent_values_found && !sign_found) {
4821
			sign_found = true;
4822
		} else {
4823
			return false; // no start with number plz
4824
		}
4825
	}
4826

4827
	return numbers_found;
4828
}
4829

4830
String String::path_to_file(const String &p_path) const {
4831
	// Don't get base dir for src, this is expected to be a dir already.
4832
	String src = replace_char('\\', '/');
4833
	String dst = p_path.replace_char('\\', '/').get_base_dir();
4834
	String rel = src.path_to(dst);
4835
	if (rel == dst) { // failed
4836
		return p_path;
4837
	} else {
4838
		return rel + p_path.get_file();
4839
	}
4840
}
4841

4842
String String::path_to(const String &p_path) const {
4843
	String src = replace_char('\\', '/');
4844
	String dst = p_path.replace_char('\\', '/');
4845
	if (!src.ends_with("/")) {
4846
		src += "/";
4847
	}
4848
	if (!dst.ends_with("/")) {
4849
		dst += "/";
4850
	}
4851

4852
	if (src.begins_with("res://") && dst.begins_with("res://")) {
4853
		src = src.replace("res://", "/");
4854
		dst = dst.replace("res://", "/");
4855

4856
	} else if (src.begins_with("user://") && dst.begins_with("user://")) {
4857
		src = src.replace("user://", "/");
4858
		dst = dst.replace("user://", "/");
4859

4860
	} else if (src.begins_with("/") && dst.begins_with("/")) {
4861
		//nothing
4862
	} else {
4863
		//dos style
4864
		String src_begin = src.get_slicec('/', 0);
4865
		String dst_begin = dst.get_slicec('/', 0);
4866

4867
		if (src_begin != dst_begin) {
4868
			return p_path; //impossible to do this
4869
		}
4870

4871
		src = src.substr(src_begin.length());
4872
		dst = dst.substr(dst_begin.length());
4873
	}
4874

4875
	//remove leading and trailing slash and split
4876
	Vector<String> src_dirs = src.substr(1, src.length() - 2).split("/");
4877
	Vector<String> dst_dirs = dst.substr(1, dst.length() - 2).split("/");
4878

4879
	//find common parent
4880
	int common_parent = 0;
4881

4882
	while (true) {
4883
		if (src_dirs.size() == common_parent) {
4884
			break;
4885
		}
4886
		if (dst_dirs.size() == common_parent) {
4887
			break;
4888
		}
4889
		if (src_dirs[common_parent] != dst_dirs[common_parent]) {
4890
			break;
4891
		}
4892
		common_parent++;
4893
	}
4894

4895
	common_parent--;
4896

4897
	int dirs_to_backtrack = (src_dirs.size() - 1) - common_parent;
4898
	String dir = String("../").repeat(dirs_to_backtrack);
4899

4900
	for (int i = common_parent + 1; i < dst_dirs.size(); i++) {
4901
		dir += dst_dirs[i] + "/";
4902
	}
4903

4904
	if (dir.length() == 0) {
4905
		dir = "./";
4906
	}
4907
	return dir;
4908
}
4909

4910
bool String::is_valid_html_color() const {
4911
	return Color::html_is_valid(*this);
4912
}
4913

4914
// Changes made to the set of invalid filename characters must also be reflected in the String documentation for is_valid_filename.
4915
static const char *invalid_filename_characters[] = { ":", "/", "\\", "?", "*", "\"", "|", "%", "<", ">" };
4916

4917
bool String::is_valid_filename() const {
4918
	String stripped = strip_edges();
4919
	if (*this != stripped) {
4920
		return false;
4921
	}
4922

4923
	if (stripped.is_empty()) {
4924
		return false;
4925
	}
4926

4927
	for (const char *ch : invalid_filename_characters) {
4928
		if (contains(ch)) {
4929
			return false;
4930
		}
4931
	}
4932
	return true;
4933
}
4934

4935
String String::validate_filename() const {
4936
	String name = strip_edges();
4937
	for (const char *ch : invalid_filename_characters) {
4938
		name = name.replace(ch, "_");
4939
	}
4940
	return name;
4941
}
4942

4943
bool String::is_valid_ip_address() const {
4944
	return IPAddress::is_valid_ip_address(*this);
4945
}
4946

4947
bool String::is_resource_file() const {
4948
	return begins_with("res://") && find("::") == -1;
4949
}
4950

4951
bool String::is_relative_path() const {
4952
	return !is_absolute_path();
4953
}
4954

4955
String String::get_base_dir() const {
4956
	int end = 0;
4957

4958
	// URL scheme style base.
4959
	int basepos = find("://");
4960
	if (basepos != -1) {
4961
		end = basepos + 3;
4962
	}
4963

4964
	// Windows top level directory base.
4965
	if (end == 0) {
4966
		basepos = find(":/");
4967
		if (basepos == -1) {
4968
			basepos = find(":\\");
4969
		}
4970
		if (basepos != -1) {
4971
			end = basepos + 2;
4972
		}
4973
	}
4974

4975
	// Windows UNC network share path.
4976
	if (end == 0) {
4977
		if (is_network_share_path()) {
4978
			basepos = find_char('/', 2);
4979
			if (basepos == -1) {
4980
				basepos = find_char('\\', 2);
4981
			}
4982
			int servpos = find_char('/', basepos + 1);
4983
			if (servpos == -1) {
4984
				servpos = find_char('\\', basepos + 1);
4985
			}
4986
			if (servpos != -1) {
4987
				end = servpos + 1;
4988
			}
4989
		}
4990
	}
4991

4992
	// Unix root directory base.
4993
	if (end == 0) {
4994
		if (begins_with("/")) {
4995
			end = 1;
4996
		}
4997
	}
4998

4999
	String rs;
5000
	String base;
5001
	if (end != 0) {
5002
		rs = substr(end, length());
5003
		base = substr(0, end);
5004
	} else {
5005
		rs = *this;
5006
	}
5007

5008
	int sep = MAX(rs.rfind_char('/'), rs.rfind_char('\\'));
5009
	if (sep == -1) {
5010
		return base;
5011
	}
5012

5013
	return base + rs.substr(0, sep);
5014
}
5015

5016
String String::get_file() const {
5017
	int sep = MAX(rfind_char('/'), rfind_char('\\'));
5018
	if (sep == -1) {
5019
		return *this;
5020
	}
5021

5022
	return substr(sep + 1, length());
5023
}
5024

5025
String String::get_extension() const {
5026
	int pos = rfind_char('.');
5027
	if (pos < 0 || pos < MAX(rfind_char('/'), rfind_char('\\'))) {
5028
		return "";
5029
	}
5030

5031
	return substr(pos + 1, length());
5032
}
5033

5034
String String::path_join(const String &p_file) const {
5035
	if (is_empty()) {
5036
		return p_file;
5037
	}
5038
	if (operator[](length() - 1) == '/' || (p_file.size() > 0 && p_file.operator[](0) == '/')) {
5039
		return *this + p_file;
5040
	}
5041
	return *this + "/" + p_file;
5042
}
5043

5044
String String::property_name_encode() const {
5045
	// Escape and quote strings with extended ASCII or further Unicode characters
5046
	// as well as '"', '=' or ' ' (32)
5047
	const char32_t *cstr = get_data();
5048
	for (int i = 0; cstr[i]; i++) {
5049
		if (cstr[i] == '=' || cstr[i] == '"' || cstr[i] == ';' || cstr[i] == '[' || cstr[i] == ']' || cstr[i] < 33 || cstr[i] > 126) {
5050
			return "\"" + c_escape_multiline() + "\"";
5051
		}
5052
	}
5053
	// Keep as is
5054
	return *this;
5055
}
5056

5057
// Changes made to the set of invalid characters must also be reflected in the String documentation.
5058

5059
static const char32_t invalid_node_name_characters[] = { '.', ':', '@', '/', '\"', UNIQUE_NODE_PREFIX[0], 0 };
5060

5061
String String::get_invalid_node_name_characters(bool p_allow_internal) {
5062
	// Do not use this function for critical validation.
5063
	String r;
5064
	const char32_t *c = invalid_node_name_characters;
5065
	while (*c) {
5066
		if (p_allow_internal && *c == '@') {
5067
			c++;
5068
			continue;
5069
		}
5070

5071
		if (c != invalid_node_name_characters) {
5072
			r += " ";
5073
		}
5074
		r += String::chr(*c);
5075
		c++;
5076
	}
5077
	return r;
5078
}
5079

5080
String String::validate_node_name() const {
5081
	// This is a critical validation in node addition, so it must be optimized.
5082
	const char32_t *cn = ptr();
5083
	if (cn == nullptr) {
5084
		return String();
5085
	}
5086
	bool valid = true;
5087
	uint32_t idx = 0;
5088
	while (cn[idx]) {
5089
		const char32_t *c = invalid_node_name_characters;
5090
		while (*c) {
5091
			if (cn[idx] == *c) {
5092
				valid = false;
5093
				break;
5094
			}
5095
			c++;
5096
		}
5097
		if (!valid) {
5098
			break;
5099
		}
5100
		idx++;
5101
	}
5102

5103
	if (valid) {
5104
		return *this;
5105
	}
5106

5107
	String validated = *this;
5108
	char32_t *nn = validated.ptrw();
5109
	while (nn[idx]) {
5110
		const char32_t *c = invalid_node_name_characters;
5111
		while (*c) {
5112
			if (nn[idx] == *c) {
5113
				nn[idx] = '_';
5114
				break;
5115
			}
5116
			c++;
5117
		}
5118
		idx++;
5119
	}
5120

5121
	return validated;
5122
}
5123

5124
String String::get_basename() const {
5125
	int pos = rfind_char('.');
5126
	if (pos < 0 || pos < MAX(rfind_char('/'), rfind_char('\\'))) {
5127
		return *this;
5128
	}
5129

5130
	return substr(0, pos);
5131
}
5132

5133
String itos(int64_t p_val) {
5134
	return String::num_int64(p_val);
5135
}
5136

5137
String uitos(uint64_t p_val) {
5138
	return String::num_uint64(p_val);
5139
}
5140

5141
String rtos(double p_val) {
5142
	return String::num(p_val);
5143
}
5144

5145
String rtoss(double p_val) {
5146
	return String::num_scientific(p_val);
5147
}
5148

5149
// Right-pad with a character.
5150
String String::rpad(int min_length, const String &character) const {
5151
	String s = *this;
5152
	int padding = min_length - s.length();
5153
	if (padding > 0) {
5154
		s += character.repeat(padding);
5155
	}
5156
	return s;
5157
}
5158

5159
// Left-pad with a character.
5160
String String::lpad(int min_length, const String &character) const {
5161
	String s = *this;
5162
	int padding = min_length - s.length();
5163
	if (padding > 0) {
5164
		s = character.repeat(padding) + s;
5165
	}
5166
	return s;
5167
}
5168

5169
// sprintf is implemented in GDScript via:
5170
//   "fish %s pie" % "frog"
5171
//   "fish %s %d pie" % ["frog", 12]
5172
// In case of an error, the string returned is the error description and "error" is true.
5173
String String::sprintf(const Span<Variant> &values, bool *error) const {
5174
	static const String ZERO("0");
5175
	static const String SPACE(" ");
5176
	static const String MINUS("-");
5177
	static const String PLUS("+");
5178

5179
	String formatted;
5180
	char32_t *self = (char32_t *)get_data();
5181
	bool in_format = false;
5182
	uint64_t value_index = 0;
5183
	int min_chars = 0;
5184
	int min_decimals = 0;
5185
	bool in_decimals = false;
5186
	bool pad_with_zeros = false;
5187
	bool left_justified = false;
5188
	bool show_sign = false;
5189
	bool as_unsigned = false;
5190

5191
	if (error) {
5192
		*error = true;
5193
	}
5194

5195
	for (; *self; self++) {
5196
		const char32_t c = *self;
5197

5198
		if (in_format) { // We have % - let's see what else we get.
5199
			switch (c) {
5200
				case '%': { // Replace %% with %
5201
					formatted += c;
5202
					in_format = false;
5203
					break;
5204
				}
5205
				case 'd': // Integer (signed)
5206
				case 'o': // Octal
5207
				case 'x': // Hexadecimal (lowercase)
5208
				case 'X': { // Hexadecimal (uppercase)
5209
					if (value_index >= values.size()) {
5210
						return "not enough arguments for format string";
5211
					}
5212

5213
					if (!values[value_index].is_num()) {
5214
						return "a number is required";
5215
					}
5216

5217
					int64_t value = values[value_index];
5218
					int base = 16;
5219
					bool capitalize = false;
5220
					switch (c) {
5221
						case 'd':
5222
							base = 10;
5223
							break;
5224
						case 'o':
5225
							base = 8;
5226
							break;
5227
						case 'x':
5228
							break;
5229
						case 'X':
5230
							capitalize = true;
5231
							break;
5232
					}
5233
					// Get basic number.
5234
					String str;
5235
					if (!as_unsigned) {
5236
						if (value == INT64_MIN) { // INT64_MIN can't be represented as positive value.
5237
							str = String::num_int64(value, base, capitalize).trim_prefix("-");
5238
						} else {
5239
							str = String::num_int64(Math::abs(value), base, capitalize);
5240
						}
5241
					} else {
5242
						uint64_t uvalue = *((uint64_t *)&value);
5243
						// In unsigned hex, if the value fits in 32 bits, trim it down to that.
5244
						if (base == 16 && value < 0 && value >= INT32_MIN) {
5245
							uvalue &= 0xffffffff;
5246
						}
5247
						str = String::num_uint64(uvalue, base, capitalize);
5248
					}
5249
					int number_len = str.length();
5250

5251
					bool negative = value < 0 && !as_unsigned;
5252

5253
					// Padding.
5254
					int pad_chars_count = (negative || show_sign) ? min_chars - 1 : min_chars;
5255
					const String &pad_char = pad_with_zeros ? ZERO : SPACE;
5256
					if (left_justified) {
5257
						str = str.rpad(pad_chars_count, pad_char);
5258
					} else {
5259
						str = str.lpad(pad_chars_count, pad_char);
5260
					}
5261

5262
					// Sign.
5263
					if (show_sign || negative) {
5264
						const String &sign_char = negative ? MINUS : PLUS;
5265
						if (left_justified) {
5266
							str = str.insert(0, sign_char);
5267
						} else {
5268
							str = str.insert(pad_with_zeros ? 0 : str.length() - number_len, sign_char);
5269
						}
5270
					}
5271

5272
					formatted += str;
5273
					++value_index;
5274
					in_format = false;
5275

5276
					break;
5277
				}
5278
				case 'f': { // Float
5279
					if (value_index >= values.size()) {
5280
						return "not enough arguments for format string";
5281
					}
5282

5283
					if (!values[value_index].is_num()) {
5284
						return "a number is required";
5285
					}
5286

5287
					double value = values[value_index];
5288
					bool is_negative = std::signbit(value);
5289
					String str = String::num(Math::abs(value), min_decimals);
5290
					const bool is_finite = Math::is_finite(value);
5291

5292
					// Pad decimals out.
5293
					if (is_finite) {
5294
						str = str.pad_decimals(min_decimals);
5295
					}
5296

5297
					int initial_len = str.length();
5298

5299
					// Padding. Leave room for sign later if required.
5300
					int pad_chars_count = (is_negative || show_sign) ? min_chars - 1 : min_chars;
5301
					const String &pad_char = (pad_with_zeros && is_finite) ? ZERO : SPACE; // Never pad NaN or inf with zeros
5302
					if (left_justified) {
5303
						str = str.rpad(pad_chars_count, pad_char);
5304
					} else {
5305
						str = str.lpad(pad_chars_count, pad_char);
5306
					}
5307

5308
					// Add sign if needed.
5309
					if (show_sign || is_negative) {
5310
						const String &sign_char = is_negative ? MINUS : PLUS;
5311
						if (left_justified) {
5312
							str = str.insert(0, sign_char);
5313
						} else {
5314
							str = str.insert(pad_with_zeros ? 0 : str.length() - initial_len, sign_char);
5315
						}
5316
					}
5317

5318
					formatted += str;
5319
					++value_index;
5320
					in_format = false;
5321
					break;
5322
				}
5323
				case 'v': { // Vector2/3/4/2i/3i/4i
5324
					if (value_index >= values.size()) {
5325
						return "not enough arguments for format string";
5326
					}
5327

5328
					int count;
5329
					switch (values[value_index].get_type()) {
5330
						case Variant::VECTOR2:
5331
						case Variant::VECTOR2I: {
5332
							count = 2;
5333
						} break;
5334
						case Variant::VECTOR3:
5335
						case Variant::VECTOR3I: {
5336
							count = 3;
5337
						} break;
5338
						case Variant::VECTOR4:
5339
						case Variant::VECTOR4I: {
5340
							count = 4;
5341
						} break;
5342
						default: {
5343
							return "%v requires a vector type (Vector2/3/4/2i/3i/4i)";
5344
						}
5345
					}
5346

5347
					Vector4 vec = values[value_index];
5348
					String str = "(";
5349
					for (int i = 0; i < count; i++) {
5350
						double val = vec[i];
5351
						String number_str = String::num(Math::abs(val), min_decimals);
5352
						const bool is_finite = Math::is_finite(val);
5353

5354
						// Pad decimals out.
5355
						if (is_finite) {
5356
							number_str = number_str.pad_decimals(min_decimals);
5357
						}
5358

5359
						int initial_len = number_str.length();
5360

5361
						// Padding. Leave room for sign later if required.
5362
						int pad_chars_count = val < 0 ? min_chars - 1 : min_chars;
5363
						const String &pad_char = (pad_with_zeros && is_finite) ? ZERO : SPACE; // Never pad NaN or inf with zeros
5364
						if (left_justified) {
5365
							number_str = number_str.rpad(pad_chars_count, pad_char);
5366
						} else {
5367
							number_str = number_str.lpad(pad_chars_count, pad_char);
5368
						}
5369

5370
						// Add sign if needed.
5371
						if (val < 0) {
5372
							if (left_justified) {
5373
								number_str = number_str.insert(0, MINUS);
5374
							} else {
5375
								number_str = number_str.insert(pad_with_zeros ? 0 : number_str.length() - initial_len, MINUS);
5376
							}
5377
						}
5378

5379
						// Add number to combined string
5380
						str += number_str;
5381

5382
						if (i < count - 1) {
5383
							str += ", ";
5384
						}
5385
					}
5386
					str += ")";
5387

5388
					formatted += str;
5389
					++value_index;
5390
					in_format = false;
5391
					break;
5392
				}
5393
				case 's': { // String
5394
					if (value_index >= values.size()) {
5395
						return "not enough arguments for format string";
5396
					}
5397

5398
					String str = values[value_index];
5399
					// Padding.
5400
					if (left_justified) {
5401
						str = str.rpad(min_chars);
5402
					} else {
5403
						str = str.lpad(min_chars);
5404
					}
5405

5406
					formatted += str;
5407
					++value_index;
5408
					in_format = false;
5409
					break;
5410
				}
5411
				case 'c': {
5412
					if (value_index >= values.size()) {
5413
						return "not enough arguments for format string";
5414
					}
5415

5416
					// Convert to character.
5417
					String str;
5418
					if (values[value_index].is_num()) {
5419
						int value = values[value_index];
5420
						if (value < 0) {
5421
							return "unsigned integer is lower than minimum";
5422
						} else if (value >= 0xd800 && value <= 0xdfff) {
5423
							return "unsigned integer is invalid Unicode character";
5424
						} else if (value > 0x10ffff) {
5425
							return "unsigned integer is greater than maximum";
5426
						}
5427
						str = chr(values[value_index]);
5428
					} else if (values[value_index].get_type() == Variant::STRING) {
5429
						str = values[value_index];
5430
						if (str.length() != 1) {
5431
							return "%c requires number or single-character string";
5432
						}
5433
					} else {
5434
						return "%c requires number or single-character string";
5435
					}
5436

5437
					// Padding.
5438
					if (left_justified) {
5439
						str = str.rpad(min_chars);
5440
					} else {
5441
						str = str.lpad(min_chars);
5442
					}
5443

5444
					formatted += str;
5445
					++value_index;
5446
					in_format = false;
5447
					break;
5448
				}
5449
				case '-': { // Left justify
5450
					left_justified = true;
5451
					break;
5452
				}
5453
				case '+': { // Show + if positive.
5454
					show_sign = true;
5455
					break;
5456
				}
5457
				case 'u': { // Treat as unsigned (for int/hex).
5458
					as_unsigned = true;
5459
					break;
5460
				}
5461
				case '0':
5462
				case '1':
5463
				case '2':
5464
				case '3':
5465
				case '4':
5466
				case '5':
5467
				case '6':
5468
				case '7':
5469
				case '8':
5470
				case '9': {
5471
					int n = c - '0';
5472
					if (in_decimals) {
5473
						min_decimals *= 10;
5474
						min_decimals += n;
5475
					} else {
5476
						if (c == '0' && min_chars == 0) {
5477
							if (left_justified) {
5478
								WARN_PRINT("'0' flag ignored with '-' flag in string format");
5479
							} else {
5480
								pad_with_zeros = true;
5481
							}
5482
						} else {
5483
							min_chars *= 10;
5484
							min_chars += n;
5485
						}
5486
					}
5487
					break;
5488
				}
5489
				case '.': { // Float/Vector separator.
5490
					if (in_decimals) {
5491
						return "too many decimal points in format";
5492
					}
5493
					in_decimals = true;
5494
					min_decimals = 0; // We want to add the value manually.
5495
					break;
5496
				}
5497

5498
				case '*': { // Dynamic width, based on value.
5499
					if (value_index >= values.size()) {
5500
						return "not enough arguments for format string";
5501
					}
5502

5503
					Variant::Type value_type = values[value_index].get_type();
5504
					if (!values[value_index].is_num() &&
5505
							value_type != Variant::VECTOR2 && value_type != Variant::VECTOR2I &&
5506
							value_type != Variant::VECTOR3 && value_type != Variant::VECTOR3I &&
5507
							value_type != Variant::VECTOR4 && value_type != Variant::VECTOR4I) {
5508
						return "* wants number or vector";
5509
					}
5510

5511
					int size = values[value_index];
5512

5513
					if (in_decimals) {
5514
						min_decimals = size;
5515
					} else {
5516
						min_chars = size;
5517
					}
5518

5519
					++value_index;
5520
					break;
5521
				}
5522

5523
				default: {
5524
					return "unsupported format character";
5525
				}
5526
			}
5527
		} else { // Not in format string.
5528
			switch (c) {
5529
				case '%':
5530
					in_format = true;
5531
					// Back to defaults:
5532
					min_chars = 0;
5533
					min_decimals = 6;
5534
					pad_with_zeros = false;
5535
					left_justified = false;
5536
					show_sign = false;
5537
					in_decimals = false;
5538
					break;
5539
				default:
5540
					formatted += c;
5541
			}
5542
		}
5543
	}
5544

5545
	if (in_format) {
5546
		return "incomplete format";
5547
	}
5548

5549
	if (value_index != values.size()) {
5550
		return "not all arguments converted during string formatting";
5551
	}
5552

5553
	if (error) {
5554
		*error = false;
5555
	}
5556
	return formatted;
5557
}
5558

5559
String String::quote(const String &quotechar) const {
5560
	return quotechar + *this + quotechar;
5561
}
5562

5563
String String::unquote() const {
5564
	if (!is_quoted()) {
5565
		return *this;
5566
	}
5567

5568
	return substr(1, length() - 2);
5569
}
5570

5571
Vector<uint8_t> String::to_ascii_buffer() const {
5572
	const String *s = this;
5573
	if (s->is_empty()) {
5574
		return Vector<uint8_t>();
5575
	}
5576
	CharString charstr = s->ascii();
5577

5578
	Vector<uint8_t> retval;
5579
	size_t len = charstr.length();
5580
	retval.resize_uninitialized(len);
5581
	uint8_t *w = retval.ptrw();
5582
	memcpy(w, charstr.ptr(), len);
5583

5584
	return retval;
5585
}
5586

5587
Vector<uint8_t> String::to_utf8_buffer() const {
5588
	const String *s = this;
5589
	if (s->is_empty()) {
5590
		return Vector<uint8_t>();
5591
	}
5592
	CharString charstr = s->utf8();
5593

5594
	Vector<uint8_t> retval;
5595
	size_t len = charstr.length();
5596
	retval.resize_uninitialized(len);
5597
	uint8_t *w = retval.ptrw();
5598
	memcpy(w, charstr.ptr(), len);
5599

5600
	return retval;
5601
}
5602

5603
Vector<uint8_t> String::to_utf16_buffer() const {
5604
	const String *s = this;
5605
	if (s->is_empty()) {
5606
		return Vector<uint8_t>();
5607
	}
5608
	Char16String charstr = s->utf16();
5609

5610
	Vector<uint8_t> retval;
5611
	size_t len = charstr.length() * sizeof(char16_t);
5612
	retval.resize_uninitialized(len);
5613
	uint8_t *w = retval.ptrw();
5614
	memcpy(w, (const void *)charstr.ptr(), len);
5615

5616
	return retval;
5617
}
5618

5619
Vector<uint8_t> String::to_utf32_buffer() const {
5620
	const String *s = this;
5621
	if (s->is_empty()) {
5622
		return Vector<uint8_t>();
5623
	}
5624

5625
	Vector<uint8_t> retval;
5626
	size_t len = s->length() * sizeof(char32_t);
5627
	retval.resize_uninitialized(len);
5628
	uint8_t *w = retval.ptrw();
5629
	memcpy(w, (const void *)s->ptr(), len);
5630

5631
	return retval;
5632
}
5633

5634
Vector<uint8_t> String::to_wchar_buffer() const {
5635
#ifdef WINDOWS_ENABLED
5636
	return to_utf16_buffer();
5637
#else
5638
	return to_utf32_buffer();
5639
#endif
5640
}
5641

5642
Vector<uint8_t> String::to_multibyte_char_buffer(const String &p_encoding) const {
5643
	return OS::get_singleton()->string_to_multibyte(p_encoding, *this);
5644
}
5645

5646
#ifdef TOOLS_ENABLED
5647
/**
5648
 * "Tools TRanslate". Performs string replacement for internationalization
5649
 * within the editor. A translation context can optionally be specified to
5650
 * disambiguate between identical source strings in translations. When
5651
 * placeholders are desired, use `vformat(TTR("Example: %s"), some_string)`.
5652
 * If a string mentions a quantity (and may therefore need a dynamic plural form),
5653
 * use `TTRN()` instead of `TTR()`.
5654
 *
5655
 * NOTE: Only use `TTR()` in editor-only code (typically within the `editor/` folder).
5656
 * For translations that can be supplied by exported projects, use `RTR()` instead.
5657
 */
5658
String TTR(const String &p_text, const String &p_context) {
5659
	if (TranslationServer::get_singleton()) {
5660
		return TranslationServer::get_singleton()->get_editor_domain()->translate(p_text, p_context);
5661
	}
5662

5663
	return p_text;
5664
}
5665

5666
/**
5667
 * "Tools TRanslate for N items". Performs string replacement for
5668
 * internationalization within the editor. A translation context can optionally
5669
 * be specified to disambiguate between identical source strings in
5670
 * translations. Use `TTR()` if the string doesn't need dynamic plural form.
5671
 * When placeholders are desired, use
5672
 * `vformat(TTRN("%d item", "%d items", some_integer), some_integer)`.
5673
 * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
5674
 *
5675
 * NOTE: Only use `TTRN()` in editor-only code (typically within the `editor/` folder).
5676
 * For translations that can be supplied by exported projects, use `RTRN()` instead.
5677
 */
5678
String TTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
5679
	if (TranslationServer::get_singleton()) {
5680
		return TranslationServer::get_singleton()->get_editor_domain()->translate_plural(p_text, p_text_plural, p_n, p_context);
5681
	}
5682

5683
	// Return message based on English plural rule if translation is not possible.
5684
	if (p_n == 1) {
5685
		return p_text;
5686
	}
5687
	return p_text_plural;
5688
}
5689

5690
/**
5691
 * "Docs TRanslate". Used for the editor class reference documentation,
5692
 * handling descriptions extracted from the XML.
5693
 * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
5694
 * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
5695
 */
5696
String DTR(const String &p_text, const String &p_context) {
5697
	// Comes straight from the XML, so remove indentation and any trailing whitespace.
5698
	const String text = p_text.dedent().strip_edges();
5699

5700
	if (TranslationServer::get_singleton()) {
5701
		return String(TranslationServer::get_singleton()->get_doc_domain()->translate(text, p_context)).replace("$DOCS_URL", GODOT_VERSION_DOCS_URL);
5702
	}
5703

5704
	return text.replace("$DOCS_URL", GODOT_VERSION_DOCS_URL);
5705
}
5706

5707
/**
5708
 * "Docs TRanslate for N items". Used for the editor class reference documentation
5709
 * (with support for plurals), handling descriptions extracted from the XML.
5710
 * It also replaces `$DOCS_URL` with the actual URL to the documentation's branch,
5711
 * to allow dehardcoding it in the XML and doing proper substitutions everywhere.
5712
 */
5713
String DTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
5714
	const String text = p_text.dedent().strip_edges();
5715
	const String text_plural = p_text_plural.dedent().strip_edges();
5716

5717
	if (TranslationServer::get_singleton()) {
5718
		return String(TranslationServer::get_singleton()->get_doc_domain()->translate_plural(text, text_plural, p_n, p_context)).replace("$DOCS_URL", GODOT_VERSION_DOCS_URL);
5719
	}
5720

5721
	// Return message based on English plural rule if translation is not possible.
5722
	if (p_n == 1) {
5723
		return text.replace("$DOCS_URL", GODOT_VERSION_DOCS_URL);
5724
	}
5725
	return text_plural.replace("$DOCS_URL", GODOT_VERSION_DOCS_URL);
5726
}
5727
#endif
5728

5729
/**
5730
 * "Run-time TRanslate". Performs string replacement for internationalization
5731
 * without the editor. A translation context can optionally be specified to
5732
 * disambiguate between identical source strings in translations. When
5733
 * placeholders are desired, use `vformat(RTR("Example: %s"), some_string)`.
5734
 * If a string mentions a quantity (and may therefore need a dynamic plural form),
5735
 * use `RTRN()` instead of `RTR()`.
5736
 *
5737
 * NOTE: Do not use `RTR()` in editor-only code (typically within the `editor/`
5738
 * folder). For editor translations, use `TTR()` instead.
5739
 */
5740
String RTR(const String &p_text, const String &p_context) {
5741
	if (TranslationServer::get_singleton()) {
5742
#ifdef TOOLS_ENABLED
5743
		String rtr = TranslationServer::get_singleton()->get_editor_domain()->translate(p_text, p_context);
5744
		if (!rtr.is_empty() && rtr != p_text) {
5745
			return rtr;
5746
		}
5747
#endif // TOOLS_ENABLED
5748
		return TranslationServer::get_singleton()->translate(p_text, p_context);
5749
	}
5750

5751
	return p_text;
5752
}
5753

5754
/**
5755
 * "Run-time TRanslate for N items". Performs string replacement for
5756
 * internationalization without the editor. A translation context can optionally
5757
 * be specified to disambiguate between identical source strings in translations.
5758
 * Use `RTR()` if the string doesn't need dynamic plural form. When placeholders
5759
 * are desired, use `vformat(RTRN("%d item", "%d items", some_integer), some_integer)`.
5760
 * The placeholder must be present in both strings to avoid run-time warnings in `vformat()`.
5761
 *
5762
 * NOTE: Do not use `RTRN()` in editor-only code (typically within the `editor/`
5763
 * folder). For editor translations, use `TTRN()` instead.
5764
 */
5765
String RTRN(const String &p_text, const String &p_text_plural, int p_n, const String &p_context) {
5766
	if (TranslationServer::get_singleton()) {
5767
#ifdef TOOLS_ENABLED
5768
		String rtr = TranslationServer::get_singleton()->get_editor_domain()->translate_plural(p_text, p_text_plural, p_n, p_context);
5769
		if (!rtr.is_empty() && rtr != p_text && rtr != p_text_plural) {
5770
			return rtr;
5771
		}
5772
#endif // TOOLS_ENABLED
5773
		return TranslationServer::get_singleton()->translate_plural(p_text, p_text_plural, p_n, p_context);
5774
	}
5775

5776
	// Return message based on English plural rule if translation is not possible.
5777
	if (p_n == 1) {
5778
		return p_text;
5779
	}
5780
	return p_text_plural;
5781
}
5782

5783
Product

Resources

Company