Path: blob/main/py-polars/tests/unit/operations/namespaces/test_struct.py
8415 views
from __future__ import annotations12import datetime3from collections import OrderedDict45import pytest67import polars as pl8from polars.exceptions import ColumnNotFoundError, InvalidOperationError9from polars.testing import assert_frame_equal, assert_series_equal101112def test_struct_various() -> None:13df = pl.DataFrame(14{"int": [1, 2], "str": ["a", "b"], "bool": [True, None], "list": [[1, 2], [3]]}15)16s = df.to_struct("my_struct")1718assert s.struct.fields == ["int", "str", "bool", "list"]19assert s[0] == {"int": 1, "str": "a", "bool": True, "list": [1, 2]}20assert s[1] == {"int": 2, "str": "b", "bool": None, "list": [3]}21assert s.struct.field("list").to_list() == [[1, 2], [3]]22assert s.struct.field("int").to_list() == [1, 2]23assert s.struct["list"].to_list() == [[1, 2], [3]]24assert s.struct["int"].to_list() == [1, 2]2526for s, expected_name in (27(df.to_struct(), ""),28(df.to_struct("my_struct"), "my_struct"),29):30assert s.name == expected_name31assert_frame_equal(s.struct.unnest(), df)32assert s.struct._ipython_key_completions_() == s.struct.fields333435def test_rename_fields() -> None:36df = pl.DataFrame({"int": [1, 2], "str": ["a", "b"], "bool": [True, None]})37s = df.to_struct("my_struct").struct.rename_fields(["a", "b"])38assert s.struct.fields == ["a", "b"]394041def test_struct_json_encode() -> None:42assert pl.DataFrame(43{"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}]}44).with_columns(pl.col("a").struct.json_encode().alias("encoded")).to_dict(45as_series=False46) == {47"a": [{"a": [1, 2], "b": [45]}, {"a": [9, 1, 3], "b": None}],48"encoded": ['{"a":[1,2],"b":[45]}', '{"a":[9,1,3],"b":null}'],49}505152def test_struct_json_encode_logical_type() -> None:53df = pl.DataFrame(54{55"a": [56{57"a": [datetime.date(1997, 1, 1)],58"b": [datetime.datetime(2000, 1, 29, 10, 30)],59"c": [datetime.timedelta(1, 25)],60}61]62}63).select(pl.col("a").struct.json_encode().alias("encoded"))64assert df.to_dict(as_series=False) == {65"encoded": ['{"a":["1997-01-01"],"b":["2000-01-29 10:30:00"],"c":["PT86425S"]}']66}676869def test_map_fields() -> None:70df = pl.DataFrame({"x": {"a": 1, "b": 2}})71assert df.schema == OrderedDict([("x", pl.Struct({"a": pl.Int64, "b": pl.Int64}))])72df = df.select(pl.col("x").name.map_fields(lambda x: x.upper()))73assert df.schema == OrderedDict([("x", pl.Struct({"A": pl.Int64, "B": pl.Int64}))])747576def test_prefix_suffix_fields() -> None:77df = pl.DataFrame({"x": {"a": 1, "b": 2}})7879prefix_df = df.select(pl.col("x").name.prefix_fields("p_"))80assert prefix_df.schema == OrderedDict(81[("x", pl.Struct({"p_a": pl.Int64, "p_b": pl.Int64}))]82)8384suffix_df = df.select(pl.col("x").name.suffix_fields("_f"))85assert suffix_df.schema == OrderedDict(86[("x", pl.Struct({"a_f": pl.Int64, "b_f": pl.Int64}))]87)888990def test_struct_alias_prune_15401() -> None:91df = pl.DataFrame({"a": []}, schema={"a": pl.Struct({"b": pl.Int8})})92assert df.select(pl.col("a").alias("c").struct.field("b")).columns == ["b"]939495def test_empty_list_eval_schema_5734() -> None:96df = pl.DataFrame({"a": [[{"b": 1, "c": 2}]]})97assert df.filter(False).select(98pl.col("a").list.eval(pl.element().struct.field("b"))99).schema == {"a": pl.List(pl.Int64)}100101102def test_field_by_index_18732() -> None:103df = pl.DataFrame({"foo": [{"a": 1, "b": 2}, {"a": 2, "b": 1}]})104105# illegal upper bound106with pytest.raises(ColumnNotFoundError):107df.filter(pl.col.foo.struct[2] == 1)108109# legal110expected_df = pl.DataFrame({"foo": [{"a": 1, "b": 2}]})111result_df = df.filter(pl.col.foo.struct[0] == 1)112assert_frame_equal(expected_df, result_df)113114expected_df = pl.DataFrame({"foo": [{"a": 2, "b": 1}]})115result_df = df.filter(pl.col.foo.struct[-1] == 1)116assert_frame_equal(expected_df, result_df)117118119def test_unnest_raises_on_non_struct_23654() -> None:120df = pl.DataFrame(121{122"a": [1],123"b": [1.1],124"c": ["abc"],125"d": [True],126"e": [datetime.datetime(2025, 1, 1)],127"f": [datetime.datetime(2025, 1, 2).date()],128}129)130for z in "abcdef":131with pytest.raises(InvalidOperationError):132df.unnest(z)133134135def test_json_encode_decimal_25881() -> None:136s = pl.Series(137[{"a": 1.23}, {"a": 4.56}, {"a": None}, {"a": 30.13}],138dtype=pl.Struct({"a": pl.Decimal(4, 2)}),139)140result = s.struct.json_encode()141expected = pl.Series(142['{"a":"1.23"}', '{"a":"4.56"}', '{"a":null}', '{"a":"30.13"}']143)144assert_series_equal(result, expected)145146147def test_json_encode_i128() -> None:148s = pl.Series(149[{"a": 2**127 - 5}, {"a": None}, {"a": -(2**127) + 124912489}],150dtype=pl.Struct({"a": pl.Int128}),151)152result = s.struct.json_encode()153expected = pl.Series(154[155'{"a":170141183460469231731687303715884105723}',156'{"a":null}',157'{"a":-170141183460469231731687303715759193239}',158]159)160assert_series_equal(result, expected)161162163def test_json_encode_u128() -> None:164s = pl.Series(165[{"a": 2**128 - 5}, {"a": None}],166dtype=pl.Struct({"a": pl.UInt128}),167)168result = s.struct.json_encode()169expected = pl.Series(170['{"a":340282366920938463463374607431768211451}', '{"a":null}']171)172assert_series_equal(result, expected)173174175@pytest.mark.parametrize("dtype", [pl.Enum(["bar", "foo"]), pl.Categorical])176def test_json_encode_categorical(dtype: pl.DataType) -> None:177s = pl.Series("a", ["foo", "bar"], dtype=dtype)178assert_series_equal(179s.to_frame().select(c=pl.struct("a").struct.json_encode()).to_series(),180pl.Series("c", ['{"a":"foo"}', '{"a":"bar"}'], pl.String),181)182183184