Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/expressions/categoricals.py
7890 views
1
# --8<-- [start:enum-example]
2
import polars as pl
3
4
bears_enum = pl.Enum(["Polar", "Panda", "Brown"])
5
bears = pl.Series(["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=bears_enum)
6
print(bears)
7
# --8<-- [end:enum-example]
8
9
# --8<-- [start:enum-wrong-value]
10
from polars.exceptions import InvalidOperationError
11
12
try:
13
bears_kind_of = pl.Series(
14
["Polar", "Panda", "Brown", "Polar", "Shark"],
15
dtype=bears_enum,
16
)
17
except InvalidOperationError as exc:
18
print("InvalidOperationError:", exc)
19
# --8<-- [end:enum-wrong-value]
20
21
# --8<-- [start:log-levels]
22
log_levels = pl.Enum(["debug", "info", "warning", "error"])
23
24
logs = pl.DataFrame(
25
{
26
"level": ["debug", "info", "debug", "error"],
27
"message": [
28
"process id: 525",
29
"Service started correctly",
30
"startup time: 67ms",
31
"Cannot connect to DB!",
32
],
33
},
34
schema_overrides={
35
"level": log_levels,
36
},
37
)
38
39
non_debug_logs = logs.filter(
40
pl.col("level") > "debug",
41
)
42
print(non_debug_logs)
43
# --8<-- [end:log-levels]
44
45
# --8<-- [start:categorical-example]
46
bears_cat = pl.Series(
47
["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical
48
)
49
print(bears_cat)
50
# --8<-- [end:categorical-example]
51
52
# --8<-- [start:categorical-comparison-string]
53
print(bears_cat < "Cat")
54
# --8<-- [end:categorical-comparison-string]
55
56
# --8<-- [start:categorical-comparison-string-column]
57
bears_str = pl.Series(
58
["Panda", "Brown", "Brown", "Polar", "Polar"],
59
)
60
print(bears_cat == bears_str)
61
# --8<-- [end:categorical-comparison-string-column]
62
63
# --8<-- [start:categorical-comparison-categorical-column]
64
from polars.exceptions import StringCacheMismatchError
65
66
bears_cat2 = pl.Series(
67
["Panda", "Brown", "Brown", "Polar", "Polar"],
68
dtype=pl.Categorical,
69
)
70
71
try:
72
print(bears_cat == bears_cat2)
73
except StringCacheMismatchError as exc:
74
exc_str = str(exc).splitlines()[0]
75
print("StringCacheMismatchError:", exc_str)
76
# --8<-- [end:categorical-comparison-categorical-column]
77
78
# --8<-- [start:stringcache-categorical-equality]
79
with pl.StringCache():
80
bears_cat = pl.Series(
81
["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical
82
)
83
bears_cat2 = pl.Series(
84
["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical
85
)
86
87
print(bears_cat == bears_cat2)
88
# --8<-- [end:stringcache-categorical-equality]
89
90
# --8<-- [start:stringcache-categorical-comparison-lexical]
91
import warnings
92
93
with pl.StringCache():
94
with warnings.catch_warnings():
95
warnings.filterwarnings("ignore", category=DeprecationWarning)
96
97
bears_cat = pl.Series(
98
["Polar", "Panda", "Brown", "Brown", "Polar"],
99
dtype=pl.Categorical(ordering="lexical"),
100
)
101
bears_cat2 = pl.Series(
102
["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical
103
)
104
105
print(bears_cat > bears_cat2)
106
# --8<-- [end:stringcache-categorical-comparison-lexical]
107
108
# --8<-- [start:stringcache-categorical-comparison-physical]
109
with pl.StringCache():
110
bears_cat = pl.Series(
111
# Polar < Panda < Brown
112
["Polar", "Panda", "Brown", "Brown", "Polar"],
113
dtype=pl.Categorical,
114
)
115
bears_cat2 = pl.Series(
116
["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical
117
)
118
119
print(bears_cat > bears_cat2)
120
# --8<-- [end:stringcache-categorical-comparison-physical]
121
122
# --8<-- [start:concatenating-categoricals]
123
import warnings
124
125
from polars.exceptions import CategoricalRemappingWarning
126
127
male_bears = pl.DataFrame(
128
{
129
"species": ["Polar", "Brown", "Panda"],
130
"weight": [450, 500, 110], # kg
131
},
132
schema_overrides={"species": pl.Categorical},
133
)
134
female_bears = pl.DataFrame(
135
{
136
"species": ["Brown", "Polar", "Panda"],
137
"weight": [340, 200, 90], # kg
138
},
139
schema_overrides={"species": pl.Categorical},
140
)
141
142
with warnings.catch_warnings():
143
warnings.filterwarnings("ignore", category=CategoricalRemappingWarning)
144
bears = pl.concat([male_bears, female_bears], how="vertical")
145
146
print(bears)
147
# --8<-- [end:concatenating-categoricals]
148
149
150
# --8<-- [start:example]
151
import polars as pl
152
153
bears_enum = pl.Enum(["Polar", "Panda", "Brown"])
154
bears = pl.Series(["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=bears_enum)
155
print(bears)
156
157
cat_bears = pl.Series(
158
["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical
159
)
160
# --8<-- [end:example]
161
162
163
# --8<-- [start:append]
164
cat_bears = pl.Series(
165
["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical
166
)
167
cat2_series = pl.Series(
168
["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical
169
)
170
171
# Triggers a CategoricalRemappingWarning.
172
print(cat_bears.append(cat2_series))
173
# --8<-- [end:append]
174
175
# --8<-- [start:enum_append]
176
dtype = pl.Enum(["Polar", "Panda", "Brown"])
177
cat_bears = pl.Series(["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=dtype)
178
cat2_series = pl.Series(["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=dtype)
179
print(cat_bears.append(cat2_series))
180
# --8<-- [end:enum_append]
181
182
# --8<-- [start:enum_error]
183
dtype = pl.Enum(["Polar", "Panda", "Brown"])
184
try:
185
cat_bears = pl.Series(["Polar", "Panda", "Brown", "Black"], dtype=dtype)
186
except Exception as e:
187
print(e)
188
# --8<-- [end:enum_error]
189
190
# --8<-- [start:equality]
191
dtype = pl.Enum(["Polar", "Panda", "Brown"])
192
cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=dtype)
193
cat_series2 = pl.Series(["Polar", "Panda", "Brown"], dtype=dtype)
194
print(cat_bears == cat_series2)
195
# --8<-- [end:equality]
196
197
# --8<-- [start:global_equality]
198
with pl.StringCache():
199
cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=pl.Categorical)
200
cat_series2 = pl.Series(["Polar", "Panda", "Black"], dtype=pl.Categorical)
201
print(cat_bears == cat_series2)
202
# --8<-- [end:global_equality]
203
204
# --8<-- [start:equality]
205
dtype = pl.Enum(["Polar", "Panda", "Brown"])
206
cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=dtype)
207
cat_series2 = pl.Series(["Polar", "Panda", "Brown"], dtype=dtype)
208
print(cat_bears == cat_series2)
209
# --8<-- [end:equality]
210
211
# --8<-- [start:str_compare_single]
212
cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=pl.Categorical)
213
print(cat_bears <= "Cat")
214
# --8<-- [end:str_compare_single]
215
216
# --8<-- [start:str_compare]
217
cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=pl.Categorical)
218
cat_series_utf = pl.Series(["Panda", "Panda", "Polar"])
219
print(cat_bears <= cat_series_utf)
220
# --8<-- [end:str_compare]
221
222