Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/test_struct.py
8415 views
1
from __future__ import annotations
2
3
import datetime
4
from collections import OrderedDict
5
6
import pytest
7
8
import polars as pl
9
from polars.exceptions import ColumnNotFoundError, InvalidOperationError
10
from polars.testing import assert_frame_equal, assert_series_equal
11
12
13
def test_struct_various() -> None:
14
df = pl.DataFrame(
15
{"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]}
16
)
17
s = df.to_struct("my_struct")
18
19
assert s.struct.fields == ["int", "str", "bool", "list"]
20
assert s[0] == {"int": 1, "str": "a", "bool": True, "list": [1, 2]}
21
assert s[1] == {"int": 2, "str": "b", "bool": None, "list": [3]}
22
assert s.struct.field("list").to_list() == [[1, 2], [3]]
23
assert s.struct.field("int").to_list() == [1, 2]
24
assert s.struct["list"].to_list() == [[1, 2], [3]]
25
assert s.struct["int"].to_list() == [1, 2]
26
27
for s, expected_name in (
28
(df.to_struct(), ""),
29
(df.to_struct("my_struct"), "my_struct"),
30
):
31
assert s.name == expected_name
32
assert_frame_equal(s.struct.unnest(), df)
33
assert s.struct._ipython_key_completions_() == s.struct.fields
34
35
36
def test_rename_fields() -> None:
37
df = pl.DataFrame({"int": [1, 2], "str": ["a", "b"], "bool": [True, None]})
38
s = df.to_struct("my_struct").struct.rename_fields(["a", "b"])
39
assert s.struct.fields == ["a", "b"]
40
41
42
def test_struct_json_encode() -> None:
43
assert pl.DataFrame(
44
{"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}]}
45
).with_columns(pl.col("a").struct.json_encode().alias("encoded")).to_dict(
46
as_series=False
47
) == {
48
"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}],
49
"encoded": ['{"a":[1,2],"b":[45]}', '{"a":[9,1,3],"b":null}'],
50
}
51
52
53
def test_struct_json_encode_logical_type() -> None:
54
df = pl.DataFrame(
55
{
56
"a": [
57
{
58
"a": [datetime.date(1997, 1, 1)],
59
"b": [datetime.datetime(2000, 1, 29, 10, 30)],
60
"c": [datetime.timedelta(1, 25)],
61
}
62
]
63
}
64
).select(pl.col("a").struct.json_encode().alias("encoded"))
65
assert df.to_dict(as_series=False) == {
66
"encoded": ['{"a":["1997-01-01"],"b":["2000-01-29 10:30:00"],"c":["PT86425S"]}']
67
}
68
69
70
def test_map_fields() -> None:
71
df = pl.DataFrame({"x": {"a": 1, "b": 2}})
72
assert df.schema == OrderedDict([("x", pl.Struct({"a": pl.Int64, "b": pl.Int64}))])
73
df = df.select(pl.col("x").name.map_fields(lambda x: x.upper()))
74
assert df.schema == OrderedDict([("x", pl.Struct({"A": pl.Int64, "B": pl.Int64}))])
75
76
77
def test_prefix_suffix_fields() -> None:
78
df = pl.DataFrame({"x": {"a": 1, "b": 2}})
79
80
prefix_df = df.select(pl.col("x").name.prefix_fields("p_"))
81
assert prefix_df.schema == OrderedDict(
82
[("x", pl.Struct({"p_a": pl.Int64, "p_b": pl.Int64}))]
83
)
84
85
suffix_df = df.select(pl.col("x").name.suffix_fields("_f"))
86
assert suffix_df.schema == OrderedDict(
87
[("x", pl.Struct({"a_f": pl.Int64, "b_f": pl.Int64}))]
88
)
89
90
91
def test_struct_alias_prune_15401() -> None:
92
df = pl.DataFrame({"a": []}, schema={"a": pl.Struct({"b": pl.Int8})})
93
assert df.select(pl.col("a").alias("c").struct.field("b")).columns == ["b"]
94
95
96
def test_empty_list_eval_schema_5734() -> None:
97
df = pl.DataFrame({"a": [[{"b": 1, "c": 2}]]})
98
assert df.filter(False).select(
99
pl.col("a").list.eval(pl.element().struct.field("b"))
100
).schema == {"a": pl.List(pl.Int64)}
101
102
103
def test_field_by_index_18732() -> None:
104
df = pl.DataFrame({"foo": [{"a": 1, "b": 2}, {"a": 2, "b": 1}]})
105
106
# illegal upper bound
107
with pytest.raises(ColumnNotFoundError):
108
df.filter(pl.col.foo.struct[2] == 1)
109
110
# legal
111
expected_df = pl.DataFrame({"foo": [{"a": 1, "b": 2}]})
112
result_df = df.filter(pl.col.foo.struct[0] == 1)
113
assert_frame_equal(expected_df, result_df)
114
115
expected_df = pl.DataFrame({"foo": [{"a": 2, "b": 1}]})
116
result_df = df.filter(pl.col.foo.struct[-1] == 1)
117
assert_frame_equal(expected_df, result_df)
118
119
120
def test_unnest_raises_on_non_struct_23654() -> None:
121
df = pl.DataFrame(
122
{
123
"a": [1],
124
"b": [1.1],
125
"c": ["abc"],
126
"d": [True],
127
"e": [datetime.datetime(2025, 1, 1)],
128
"f": [datetime.datetime(2025, 1, 2).date()],
129
}
130
)
131
for z in "abcdef":
132
with pytest.raises(InvalidOperationError):
133
df.unnest(z)
134
135
136
def test_json_encode_decimal_25881() -> None:
137
s = pl.Series(
138
[{"a": 1.23}, {"a": 4.56}, {"a": None}, {"a": 30.13}],
139
dtype=pl.Struct({"a": pl.Decimal(4, 2)}),
140
)
141
result = s.struct.json_encode()
142
expected = pl.Series(
143
['{"a":"1.23"}', '{"a":"4.56"}', '{"a":null}', '{"a":"30.13"}']
144
)
145
assert_series_equal(result, expected)
146
147
148
def test_json_encode_i128() -> None:
149
s = pl.Series(
150
[{"a": 2**127 - 5}, {"a": None}, {"a": -(2**127) + 124912489}],
151
dtype=pl.Struct({"a": pl.Int128}),
152
)
153
result = s.struct.json_encode()
154
expected = pl.Series(
155
[
156
'{"a":170141183460469231731687303715884105723}',
157
'{"a":null}',
158
'{"a":-170141183460469231731687303715759193239}',
159
]
160
)
161
assert_series_equal(result, expected)
162
163
164
def test_json_encode_u128() -> None:
165
s = pl.Series(
166
[{"a": 2**128 - 5}, {"a": None}],
167
dtype=pl.Struct({"a": pl.UInt128}),
168
)
169
result = s.struct.json_encode()
170
expected = pl.Series(
171
['{"a":340282366920938463463374607431768211451}', '{"a":null}']
172
)
173
assert_series_equal(result, expected)
174
175
176
@pytest.mark.parametrize("dtype", [pl.Enum(["bar", "foo"]), pl.Categorical])
177
def test_json_encode_categorical(dtype: pl.DataType) -> None:
178
s = pl.Series("a", ["foo", "bar"], dtype=dtype)
179
assert_series_equal(
180
s.to_frame().select(c=pl.struct("a").struct.json_encode()).to_series(),
181
pl.Series("c", ['{"a":"foo"}', '{"a":"bar"}'], pl.String),
182
)
183
184