Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/misc/scripts/ucaps_fetch.py
20880 views
1
#!/usr/bin/env python3
2
3
# Script used to dump case mappings from
4
# the Unicode Character Database to the `ucaps.h` file.
5
# NOTE: This script is deliberately not integrated into the build system;
6
# you should run it manually whenever you want to update the data.
7
from __future__ import annotations
8
9
import os
10
import sys
11
from typing import Final
12
from urllib.request import urlopen
13
14
if __name__ == "__main__":
15
sys.path.insert(1, os.path.join(os.path.dirname(__file__), "../../"))
16
17
from methods import generate_copyright_header
18
19
URL: Final[str] = "https://www.unicode.org/Public/17.0.0/ucd/UnicodeData.txt"
20
21
22
lower_to_upper: list[tuple[str, str]] = []
23
upper_to_lower: list[tuple[str, str]] = []
24
25
26
def parse_unicode_data() -> None:
27
lines: list[str] = [line.decode("utf-8") for line in urlopen(URL)]
28
29
for line in lines:
30
split_line: list[str] = line.split(";")
31
32
code_value: str = split_line[0].strip()
33
uppercase_mapping: str = split_line[12].strip()
34
lowercase_mapping: str = split_line[13].strip()
35
36
if uppercase_mapping:
37
lower_to_upper.append((f"0x{code_value}", f"0x{uppercase_mapping}"))
38
if lowercase_mapping:
39
upper_to_lower.append((f"0x{code_value}", f"0x{lowercase_mapping}"))
40
41
42
def make_cap_table(table_name: str, len_name: str, table: list[tuple[str, str]]) -> str:
43
result: str = f"static const int {table_name}[{len_name}][2] = {{\n"
44
45
for first, second in table:
46
result += f"\t{{ {first}, {second} }},\n"
47
48
result += "};\n\n"
49
50
return result
51
52
53
def generate_ucaps_fetch() -> None:
54
parse_unicode_data()
55
56
source: str = generate_copyright_header("ucaps.h")
57
58
source += f"""
59
#pragma once
60
61
// This file was generated using the `misc/scripts/ucaps_fetch.py` script.
62
63
#define LTU_LEN {len(lower_to_upper)}
64
#define UTL_LEN {len(upper_to_lower)}\n\n"""
65
66
source += make_cap_table("caps_table", "LTU_LEN", lower_to_upper)
67
source += make_cap_table("reverse_caps_table", "UTL_LEN", upper_to_lower)
68
69
source += """static int _find_upper(int ch) {
70
\tint low = 0;
71
\tint high = LTU_LEN - 1;
72
\tint middle;
73
74
\twhile (low <= high) {
75
\t\tmiddle = (low + high) / 2;
76
77
\t\tif (ch < caps_table[middle][0]) {
78
\t\t\thigh = middle - 1; // Search low end of array.
79
\t\t} else if (caps_table[middle][0] < ch) {
80
\t\t\tlow = middle + 1; // Search high end of array.
81
\t\t} else {
82
\t\t\treturn caps_table[middle][1];
83
\t\t}
84
\t}
85
86
\treturn ch;
87
}
88
89
static int _find_lower(int ch) {
90
\tint low = 0;
91
\tint high = UTL_LEN - 1;
92
\tint middle;
93
94
\twhile (low <= high) {
95
\t\tmiddle = (low + high) / 2;
96
97
\t\tif (ch < reverse_caps_table[middle][0]) {
98
\t\t\thigh = middle - 1; // Search low end of array.
99
\t\t} else if (reverse_caps_table[middle][0] < ch) {
100
\t\t\tlow = middle + 1; // Search high end of array.
101
\t\t} else {
102
\t\t\treturn reverse_caps_table[middle][1];
103
\t\t}
104
\t}
105
106
\treturn ch;
107
}
108
"""
109
110
ucaps_path: str = os.path.join(os.path.dirname(__file__), "../../core/string/ucaps.h")
111
with open(ucaps_path, "w", newline="\n") as f:
112
f.write(source)
113
114
print("`ucaps.h` generated successfully.")
115
116
117
if __name__ == "__main__":
118
generate_ucaps_fetch()
119
120