Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/misc/scripts/unicode_ranges_fetch.py
20907 views
1
#!/usr/bin/env python3
2
3
# Script used to dump char ranges from
4
# the Unicode Character Database to the `char_range.inc` file.
5
# NOTE: This script is deliberately not integrated into the build system;
6
# you should run it manually whenever you want to update the data.
7
from __future__ import annotations
8
9
import os
10
import sys
11
from typing import Final
12
from urllib.request import urlopen
13
14
if __name__ == "__main__":
15
sys.path.insert(1, os.path.join(os.path.dirname(__file__), "../../"))
16
17
from methods import generate_copyright_header
18
19
URL: Final[str] = "https://www.unicode.org/Public/17.0.0/ucd/Blocks.txt"
20
21
22
ranges: list[tuple[str, str, str]] = []
23
24
exclude_blocks: set[str] = {
25
"High Surrogates",
26
"High Private Use Surrogates",
27
"Low Surrogates",
28
"Variation Selectors",
29
"Specials",
30
"Egyptian Hieroglyph Format Controls",
31
"Tags",
32
"Variation Selectors Supplement",
33
}
34
35
36
def parse_unicode_data() -> None:
37
lines: list[str] = [line.decode("utf-8") for line in urlopen(URL)]
38
39
for line in lines:
40
if line.startswith("#") or not line.strip():
41
continue
42
43
split_line: list[str] = line.split(";")
44
45
char_range: str = split_line[0].strip()
46
block: str = split_line[1].strip()
47
48
if block in exclude_blocks:
49
continue
50
51
range_start, range_end = char_range.split("..")
52
53
ranges.append((f"0x{range_start}", f"0x{range_end}", block))
54
55
56
def make_array(array_name: str, ranges: list[tuple[str, str, str]]) -> str:
57
result: str = f"static UniRange {array_name}[] = {{\n"
58
59
for start, end, block in ranges:
60
result += f'\t{{ {start}, {end}, U"{block}" }},\n'
61
62
result += """\t{ 0x10FFFF, 0x10FFFF, String() }
63
};\n\n"""
64
65
return result
66
67
68
def generate_unicode_ranges_inc() -> None:
69
parse_unicode_data()
70
71
source: str = generate_copyright_header("unicode_ranges.inc")
72
73
source += f"""
74
// This file was generated using the `misc/scripts/unicode_ranges_fetch.py` script.
75
76
#ifndef UNICODE_RANGES_INC
77
#define UNICODE_RANGES_INC
78
79
// Unicode Character Blocks
80
// Source: {URL}
81
82
struct UniRange {{
83
\tint32_t start;
84
\tint32_t end;
85
\tString name;
86
}};\n\n"""
87
88
source += make_array("unicode_ranges", ranges)
89
90
source += "#endif // UNICODE_RANGES_INC\n"
91
92
unicode_ranges_path: str = os.path.join(os.path.dirname(__file__), "../../editor/import/unicode_ranges.inc")
93
with open(unicode_ranges_path, "w", newline="\n") as f:
94
f.write(source)
95
96
print("`unicode_ranges.inc` generated successfully.")
97
98
99
if __name__ == "__main__":
100
generate_unicode_ranges_inc()
101
102