Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/misc/scripts/unicode_ranges_fetch.py
9896 views
1
#!/usr/bin/env python3
2
3
# Script used to dump char ranges from
4
# the Unicode Character Database to the `char_range.inc` file.
5
# NOTE: This script is deliberately not integrated into the build system;
6
# you should run it manually whenever you want to update the data.
7
8
import os
9
import sys
10
from typing import Final, List, Set, Tuple
11
from urllib.request import urlopen
12
13
if __name__ == "__main__":
14
sys.path.insert(1, os.path.join(os.path.dirname(__file__), "../../"))
15
16
from methods import generate_copyright_header
17
18
URL: Final[str] = "https://www.unicode.org/Public/16.0.0/ucd/Blocks.txt"
19
20
21
ranges: List[Tuple[str, str, str]] = []
22
23
exclude_blocks: Set[str] = {
24
"High Surrogates",
25
"High Private Use Surrogates",
26
"Low Surrogates",
27
"Variation Selectors",
28
"Specials",
29
"Egyptian Hieroglyph Format Controls",
30
"Tags",
31
"Variation Selectors Supplement",
32
}
33
34
35
def parse_unicode_data() -> None:
36
lines: List[str] = [line.decode("utf-8") for line in urlopen(URL)]
37
38
for line in lines:
39
if line.startswith("#") or not line.strip():
40
continue
41
42
split_line: List[str] = line.split(";")
43
44
char_range: str = split_line[0].strip()
45
block: str = split_line[1].strip()
46
47
if block in exclude_blocks:
48
continue
49
50
range_start, range_end = char_range.split("..")
51
52
ranges.append((f"0x{range_start}", f"0x{range_end}", block))
53
54
55
def make_array(array_name: str, ranges: List[Tuple[str, str, str]]) -> str:
56
result: str = f"static UniRange {array_name}[] = {{\n"
57
58
for start, end, block in ranges:
59
result += f'\t{{ {start}, {end}, U"{block}" }},\n'
60
61
result += """\t{ 0x10FFFF, 0x10FFFF, String() }
62
};\n\n"""
63
64
return result
65
66
67
def generate_unicode_ranges_inc() -> None:
68
parse_unicode_data()
69
70
source: str = generate_copyright_header("unicode_ranges.inc")
71
72
source += f"""
73
// This file was generated using the `misc/scripts/unicode_ranges_fetch.py` script.
74
75
#ifndef UNICODE_RANGES_INC
76
#define UNICODE_RANGES_INC
77
78
// Unicode Character Blocks
79
// Source: {URL}
80
81
struct UniRange {{
82
\tint32_t start;
83
\tint32_t end;
84
\tString name;
85
}};\n\n"""
86
87
source += make_array("unicode_ranges", ranges)
88
89
source += "#endif // UNICODE_RANGES_INC\n"
90
91
unicode_ranges_path: str = os.path.join(os.path.dirname(__file__), "../../editor/import/unicode_ranges.inc")
92
with open(unicode_ranges_path, "w", newline="\n") as f:
93
f.write(source)
94
95
print("`unicode_ranges.inc` generated successfully.")
96
97
98
if __name__ == "__main__":
99
generate_unicode_ranges_inc()
100
101