Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
singlestore-labs
GitHub Repository: singlestore-labs/singlestoredb-python
Path: blob/main/singlestoredb/mysql/charset.py
469 views
1
from typing import Dict
2
from typing import Optional
3
4
MBLENGTH = {8: 1, 33: 3, 88: 2, 91: 2}
5
6
7
class Charset:
8
9
def __init__(self, id: int, name: str, collation: str, is_default: str):
10
self.id, self.name, self.collation = id, name, collation
11
self.is_default = is_default == 'Yes'
12
13
def __repr__(self) -> str:
14
return 'Charset(id=%s, name=%r, collation=%r)' % (
15
self.id,
16
self.name,
17
self.collation,
18
)
19
20
@property
21
def encoding(self) -> str:
22
name = self.name
23
if name in ('utf8mb4', 'utf8mb3'):
24
return 'utf8'
25
if name == 'latin1':
26
return 'cp1252'
27
if name == 'koi8r':
28
return 'koi8_r'
29
if name == 'koi8u':
30
return 'koi8_u'
31
return name
32
33
@property
34
def is_binary(self) -> bool:
35
return self.id == 63
36
37
38
class Charsets:
39
40
def __init__(self) -> None:
41
self._by_id: Dict[int, Charset] = {}
42
self._by_name: Dict[str, Charset] = {}
43
44
def add(self, c: Charset) -> None:
45
self._by_id[c.id] = c
46
if c.is_default:
47
self._by_name[c.name] = c
48
49
def by_id(self, id: int) -> Charset:
50
return self._by_id[id]
51
52
def by_name(self, name: str) -> Optional[Charset]:
53
return self._by_name.get(name.lower())
54
55
56
_charsets = Charsets()
57
58
# Generated with:
59
#
60
# mysql -N -s -e "select id, character_set_name, collation_name, is_default
61
# from information_schema.collations order by id;" | python -c "import sys
62
# for l in sys.stdin.readlines():
63
# id, name, collation, is_default = l.split(chr(9))
64
# print '_charsets.add(Charset(%s, \'%s\', \'%s\', \'%s\'))' \
65
# % (id, name, collation, is_default.strip())
66
# "
67
#
68
#
69
_charsets.add(Charset(1, 'big5', 'big5_chinese_ci', 'Yes'))
70
_charsets.add(Charset(2, 'latin2', 'latin2_czech_cs', ''))
71
_charsets.add(Charset(3, 'dec8', 'dec8_swedish_ci', 'Yes'))
72
_charsets.add(Charset(4, 'cp850', 'cp850_general_ci', 'Yes'))
73
_charsets.add(Charset(5, 'latin1', 'latin1_german1_ci', ''))
74
_charsets.add(Charset(6, 'hp8', 'hp8_english_ci', 'Yes'))
75
_charsets.add(Charset(7, 'koi8r', 'koi8r_general_ci', 'Yes'))
76
_charsets.add(Charset(8, 'latin1', 'latin1_swedish_ci', 'Yes'))
77
_charsets.add(Charset(9, 'latin2', 'latin2_general_ci', 'Yes'))
78
_charsets.add(Charset(10, 'swe7', 'swe7_swedish_ci', 'Yes'))
79
_charsets.add(Charset(11, 'ascii', 'ascii_general_ci', 'Yes'))
80
_charsets.add(Charset(12, 'ujis', 'ujis_japanese_ci', 'Yes'))
81
_charsets.add(Charset(13, 'sjis', 'sjis_japanese_ci', 'Yes'))
82
_charsets.add(Charset(14, 'cp1251', 'cp1251_bulgarian_ci', ''))
83
_charsets.add(Charset(15, 'latin1', 'latin1_danish_ci', ''))
84
_charsets.add(Charset(16, 'hebrew', 'hebrew_general_ci', 'Yes'))
85
_charsets.add(Charset(18, 'tis620', 'tis620_thai_ci', 'Yes'))
86
_charsets.add(Charset(19, 'euckr', 'euckr_korean_ci', 'Yes'))
87
_charsets.add(Charset(20, 'latin7', 'latin7_estonian_cs', ''))
88
_charsets.add(Charset(21, 'latin2', 'latin2_hungarian_ci', ''))
89
_charsets.add(Charset(22, 'koi8u', 'koi8u_general_ci', 'Yes'))
90
_charsets.add(Charset(23, 'cp1251', 'cp1251_ukrainian_ci', ''))
91
_charsets.add(Charset(24, 'gb2312', 'gb2312_chinese_ci', 'Yes'))
92
_charsets.add(Charset(25, 'greek', 'greek_general_ci', 'Yes'))
93
_charsets.add(Charset(26, 'cp1250', 'cp1250_general_ci', 'Yes'))
94
_charsets.add(Charset(27, 'latin2', 'latin2_croatian_ci', ''))
95
_charsets.add(Charset(28, 'gbk', 'gbk_chinese_ci', 'Yes'))
96
_charsets.add(Charset(29, 'cp1257', 'cp1257_lithuanian_ci', ''))
97
_charsets.add(Charset(30, 'latin5', 'latin5_turkish_ci', 'Yes'))
98
_charsets.add(Charset(31, 'latin1', 'latin1_german2_ci', ''))
99
_charsets.add(Charset(32, 'armscii8', 'armscii8_general_ci', 'Yes'))
100
_charsets.add(Charset(33, 'utf8', 'utf8_general_ci', 'Yes'))
101
_charsets.add(Charset(34, 'cp1250', 'cp1250_czech_cs', ''))
102
_charsets.add(Charset(36, 'cp866', 'cp866_general_ci', 'Yes'))
103
_charsets.add(Charset(37, 'keybcs2', 'keybcs2_general_ci', 'Yes'))
104
_charsets.add(Charset(38, 'macce', 'macce_general_ci', 'Yes'))
105
_charsets.add(Charset(39, 'macroman', 'macroman_general_ci', 'Yes'))
106
_charsets.add(Charset(40, 'cp852', 'cp852_general_ci', 'Yes'))
107
_charsets.add(Charset(41, 'latin7', 'latin7_general_ci', 'Yes'))
108
_charsets.add(Charset(42, 'latin7', 'latin7_general_cs', ''))
109
_charsets.add(Charset(43, 'macce', 'macce_bin', ''))
110
_charsets.add(Charset(44, 'cp1250', 'cp1250_croatian_ci', ''))
111
_charsets.add(Charset(45, 'utf8mb4', 'utf8mb4_general_ci', 'Yes'))
112
_charsets.add(Charset(46, 'utf8mb4', 'utf8mb4_bin', ''))
113
_charsets.add(Charset(47, 'latin1', 'latin1_bin', ''))
114
_charsets.add(Charset(48, 'latin1', 'latin1_general_ci', ''))
115
_charsets.add(Charset(49, 'latin1', 'latin1_general_cs', ''))
116
_charsets.add(Charset(50, 'cp1251', 'cp1251_bin', ''))
117
_charsets.add(Charset(51, 'cp1251', 'cp1251_general_ci', 'Yes'))
118
_charsets.add(Charset(52, 'cp1251', 'cp1251_general_cs', ''))
119
_charsets.add(Charset(53, 'macroman', 'macroman_bin', ''))
120
_charsets.add(Charset(57, 'cp1256', 'cp1256_general_ci', 'Yes'))
121
_charsets.add(Charset(58, 'cp1257', 'cp1257_bin', ''))
122
_charsets.add(Charset(59, 'cp1257', 'cp1257_general_ci', 'Yes'))
123
_charsets.add(Charset(63, 'binary', 'binary', 'Yes'))
124
_charsets.add(Charset(64, 'armscii8', 'armscii8_bin', ''))
125
_charsets.add(Charset(65, 'ascii', 'ascii_bin', ''))
126
_charsets.add(Charset(66, 'cp1250', 'cp1250_bin', ''))
127
_charsets.add(Charset(67, 'cp1256', 'cp1256_bin', ''))
128
_charsets.add(Charset(68, 'cp866', 'cp866_bin', ''))
129
_charsets.add(Charset(69, 'dec8', 'dec8_bin', ''))
130
_charsets.add(Charset(70, 'greek', 'greek_bin', ''))
131
_charsets.add(Charset(71, 'hebrew', 'hebrew_bin', ''))
132
_charsets.add(Charset(72, 'hp8', 'hp8_bin', ''))
133
_charsets.add(Charset(73, 'keybcs2', 'keybcs2_bin', ''))
134
_charsets.add(Charset(74, 'koi8r', 'koi8r_bin', ''))
135
_charsets.add(Charset(75, 'koi8u', 'koi8u_bin', ''))
136
_charsets.add(Charset(76, 'utf8', 'utf8_tolower_ci', ''))
137
_charsets.add(Charset(77, 'latin2', 'latin2_bin', ''))
138
_charsets.add(Charset(78, 'latin5', 'latin5_bin', ''))
139
_charsets.add(Charset(79, 'latin7', 'latin7_bin', ''))
140
_charsets.add(Charset(80, 'cp850', 'cp850_bin', ''))
141
_charsets.add(Charset(81, 'cp852', 'cp852_bin', ''))
142
_charsets.add(Charset(82, 'swe7', 'swe7_bin', ''))
143
_charsets.add(Charset(83, 'utf8', 'utf8_bin', ''))
144
_charsets.add(Charset(84, 'big5', 'big5_bin', ''))
145
_charsets.add(Charset(85, 'euckr', 'euckr_bin', ''))
146
_charsets.add(Charset(86, 'gb2312', 'gb2312_bin', ''))
147
_charsets.add(Charset(87, 'gbk', 'gbk_bin', ''))
148
_charsets.add(Charset(88, 'sjis', 'sjis_bin', ''))
149
_charsets.add(Charset(89, 'tis620', 'tis620_bin', ''))
150
_charsets.add(Charset(91, 'ujis', 'ujis_bin', ''))
151
_charsets.add(Charset(92, 'geostd8', 'geostd8_general_ci', 'Yes'))
152
_charsets.add(Charset(93, 'geostd8', 'geostd8_bin', ''))
153
_charsets.add(Charset(94, 'latin1', 'latin1_spanish_ci', ''))
154
_charsets.add(Charset(95, 'cp932', 'cp932_japanese_ci', 'Yes'))
155
_charsets.add(Charset(96, 'cp932', 'cp932_bin', ''))
156
_charsets.add(Charset(97, 'eucjpms', 'eucjpms_japanese_ci', 'Yes'))
157
_charsets.add(Charset(98, 'eucjpms', 'eucjpms_bin', ''))
158
_charsets.add(Charset(99, 'cp1250', 'cp1250_polish_ci', ''))
159
_charsets.add(Charset(192, 'utf8', 'utf8_unicode_ci', ''))
160
_charsets.add(Charset(193, 'utf8', 'utf8_icelandic_ci', ''))
161
_charsets.add(Charset(194, 'utf8', 'utf8_latvian_ci', ''))
162
_charsets.add(Charset(195, 'utf8', 'utf8_romanian_ci', ''))
163
_charsets.add(Charset(196, 'utf8', 'utf8_slovenian_ci', ''))
164
_charsets.add(Charset(197, 'utf8', 'utf8_polish_ci', ''))
165
_charsets.add(Charset(198, 'utf8', 'utf8_estonian_ci', ''))
166
_charsets.add(Charset(199, 'utf8', 'utf8_spanish_ci', ''))
167
_charsets.add(Charset(200, 'utf8', 'utf8_swedish_ci', ''))
168
_charsets.add(Charset(201, 'utf8', 'utf8_turkish_ci', ''))
169
_charsets.add(Charset(202, 'utf8', 'utf8_czech_ci', ''))
170
_charsets.add(Charset(203, 'utf8', 'utf8_danish_ci', ''))
171
_charsets.add(Charset(204, 'utf8', 'utf8_lithuanian_ci', ''))
172
_charsets.add(Charset(205, 'utf8', 'utf8_slovak_ci', ''))
173
_charsets.add(Charset(206, 'utf8', 'utf8_spanish2_ci', ''))
174
_charsets.add(Charset(207, 'utf8', 'utf8_roman_ci', ''))
175
_charsets.add(Charset(208, 'utf8', 'utf8_persian_ci', ''))
176
_charsets.add(Charset(209, 'utf8', 'utf8_esperanto_ci', ''))
177
_charsets.add(Charset(210, 'utf8', 'utf8_hungarian_ci', ''))
178
_charsets.add(Charset(211, 'utf8', 'utf8_sinhala_ci', ''))
179
_charsets.add(Charset(212, 'utf8', 'utf8_german2_ci', ''))
180
_charsets.add(Charset(213, 'utf8', 'utf8_croatian_ci', ''))
181
_charsets.add(Charset(214, 'utf8', 'utf8_unicode_520_ci', ''))
182
_charsets.add(Charset(215, 'utf8', 'utf8_vietnamese_ci', ''))
183
_charsets.add(Charset(223, 'utf8', 'utf8_general_mysql500_ci', ''))
184
_charsets.add(Charset(224, 'utf8mb4', 'utf8mb4_unicode_ci', ''))
185
_charsets.add(Charset(225, 'utf8mb4', 'utf8mb4_icelandic_ci', ''))
186
_charsets.add(Charset(226, 'utf8mb4', 'utf8mb4_latvian_ci', ''))
187
_charsets.add(Charset(227, 'utf8mb4', 'utf8mb4_romanian_ci', ''))
188
_charsets.add(Charset(228, 'utf8mb4', 'utf8mb4_slovenian_ci', ''))
189
_charsets.add(Charset(229, 'utf8mb4', 'utf8mb4_polish_ci', ''))
190
_charsets.add(Charset(230, 'utf8mb4', 'utf8mb4_estonian_ci', ''))
191
_charsets.add(Charset(231, 'utf8mb4', 'utf8mb4_spanish_ci', ''))
192
_charsets.add(Charset(232, 'utf8mb4', 'utf8mb4_swedish_ci', ''))
193
_charsets.add(Charset(233, 'utf8mb4', 'utf8mb4_turkish_ci', ''))
194
_charsets.add(Charset(234, 'utf8mb4', 'utf8mb4_czech_ci', ''))
195
_charsets.add(Charset(235, 'utf8mb4', 'utf8mb4_danish_ci', ''))
196
_charsets.add(Charset(236, 'utf8mb4', 'utf8mb4_lithuanian_ci', ''))
197
_charsets.add(Charset(237, 'utf8mb4', 'utf8mb4_slovak_ci', ''))
198
_charsets.add(Charset(238, 'utf8mb4', 'utf8mb4_spanish2_ci', ''))
199
_charsets.add(Charset(239, 'utf8mb4', 'utf8mb4_roman_ci', ''))
200
_charsets.add(Charset(240, 'utf8mb4', 'utf8mb4_persian_ci', ''))
201
_charsets.add(Charset(241, 'utf8mb4', 'utf8mb4_esperanto_ci', ''))
202
_charsets.add(Charset(242, 'utf8mb4', 'utf8mb4_hungarian_ci', ''))
203
_charsets.add(Charset(243, 'utf8mb4', 'utf8mb4_sinhala_ci', ''))
204
_charsets.add(Charset(244, 'utf8mb4', 'utf8mb4_german2_ci', ''))
205
_charsets.add(Charset(245, 'utf8mb4', 'utf8mb4_croatian_ci', ''))
206
_charsets.add(Charset(246, 'utf8mb4', 'utf8mb4_unicode_520_ci', ''))
207
_charsets.add(Charset(247, 'utf8mb4', 'utf8mb4_vietnamese_ci', ''))
208
_charsets.add(Charset(248, 'gb18030', 'gb18030_chinese_ci', 'Yes'))
209
_charsets.add(Charset(249, 'gb18030', 'gb18030_bin', ''))
210
_charsets.add(Charset(250, 'gb18030', 'gb18030_unicode_520_ci', ''))
211
_charsets.add(Charset(255, 'utf8mb4', 'utf8mb4_0900_ai_ci', ''))
212
213
charset_by_name = _charsets.by_name
214
charset_by_id = _charsets.by_id
215
216