Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/test_struct.py
6940 views
1
from __future__ import annotations
2
3
import datetime
4
from collections import OrderedDict
5
6
import pytest
7
8
import polars as pl
9
from polars.exceptions import (
10
ColumnNotFoundError,
11
)
12
from polars.testing import assert_frame_equal
13
14
15
def test_struct_various() -> None:
16
df = pl.DataFrame(
17
{"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]}
18
)
19
s = df.to_struct("my_struct")
20
21
assert s.struct.fields == ["int", "str", "bool", "list"]
22
assert s[0] == {"int": 1, "str": "a", "bool": True, "list": [1, 2]}
23
assert s[1] == {"int": 2, "str": "b", "bool": None, "list": [3]}
24
assert s.struct.field("list").to_list() == [[1, 2], [3]]
25
assert s.struct.field("int").to_list() == [1, 2]
26
assert s.struct["list"].to_list() == [[1, 2], [3]]
27
assert s.struct["int"].to_list() == [1, 2]
28
29
for s, expected_name in (
30
(df.to_struct(), ""),
31
(df.to_struct("my_struct"), "my_struct"),
32
):
33
assert s.name == expected_name
34
assert_frame_equal(s.struct.unnest(), df)
35
assert s.struct._ipython_key_completions_() == s.struct.fields
36
37
38
def test_rename_fields() -> None:
39
df = pl.DataFrame({"int": [1, 2], "str": ["a", "b"], "bool": [True, None]})
40
s = df.to_struct("my_struct").struct.rename_fields(["a", "b"])
41
assert s.struct.fields == ["a", "b"]
42
43
44
def test_struct_json_encode() -> None:
45
assert pl.DataFrame(
46
{"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}]}
47
).with_columns(pl.col("a").struct.json_encode().alias("encoded")).to_dict(
48
as_series=False
49
) == {
50
"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}],
51
"encoded": ['{"a":[1,2],"b":[45]}', '{"a":[9,1,3],"b":null}'],
52
}
53
54
55
def test_struct_json_encode_logical_type() -> None:
56
df = pl.DataFrame(
57
{
58
"a": [
59
{
60
"a": [datetime.date(1997, 1, 1)],
61
"b": [datetime.datetime(2000, 1, 29, 10, 30)],
62
"c": [datetime.timedelta(1, 25)],
63
}
64
]
65
}
66
).select(pl.col("a").struct.json_encode().alias("encoded"))
67
assert df.to_dict(as_series=False) == {
68
"encoded": ['{"a":["1997-01-01"],"b":["2000-01-29 10:30:00"],"c":["PT86425S"]}']
69
}
70
71
72
def test_map_fields() -> None:
73
df = pl.DataFrame({"x": {"a": 1, "b": 2}})
74
assert df.schema == OrderedDict([("x", pl.Struct({"a": pl.Int64, "b": pl.Int64}))])
75
df = df.select(pl.col("x").name.map_fields(lambda x: x.upper()))
76
assert df.schema == OrderedDict([("x", pl.Struct({"A": pl.Int64, "B": pl.Int64}))])
77
78
79
def test_prefix_suffix_fields() -> None:
80
df = pl.DataFrame({"x": {"a": 1, "b": 2}})
81
82
prefix_df = df.select(pl.col("x").name.prefix_fields("p_"))
83
assert prefix_df.schema == OrderedDict(
84
[("x", pl.Struct({"p_a": pl.Int64, "p_b": pl.Int64}))]
85
)
86
87
suffix_df = df.select(pl.col("x").name.suffix_fields("_f"))
88
assert suffix_df.schema == OrderedDict(
89
[("x", pl.Struct({"a_f": pl.Int64, "b_f": pl.Int64}))]
90
)
91
92
93
def test_struct_alias_prune_15401() -> None:
94
df = pl.DataFrame({"a": []}, schema={"a": pl.Struct({"b": pl.Int8})})
95
assert df.select(pl.col("a").alias("c").struct.field("b")).columns == ["b"]
96
97
98
def test_empty_list_eval_schema_5734() -> None:
99
df = pl.DataFrame({"a": [[{"b": 1, "c": 2}]]})
100
assert df.filter(False).select(
101
pl.col("a").list.eval(pl.element().struct.field("b"))
102
).schema == {"a": pl.List(pl.Int64)}
103
104
105
def test_field_by_index_18732() -> None:
106
df = pl.DataFrame({"foo": [{"a": 1, "b": 2}, {"a": 2, "b": 1}]})
107
108
# illegal upper bound
109
with pytest.raises(ColumnNotFoundError):
110
df.filter(pl.col.foo.struct[2] == 1)
111
112
# legal
113
expected_df = pl.DataFrame({"foo": [{"a": 1, "b": 2}]})
114
result_df = df.filter(pl.col.foo.struct[0] == 1)
115
assert_frame_equal(expected_df, result_df)
116
117
expected_df = pl.DataFrame({"foo": [{"a": 2, "b": 1}]})
118
result_df = df.filter(pl.col.foo.struct[-1] == 1)
119
assert_frame_equal(expected_df, result_df)
120
121