Path: blob/main/py-polars/tests/unit/operations/test_expansion.py
8424 views
from __future__ import annotations12from typing import Any34import pytest56import polars as pl7from polars.testing import assert_frame_equal8from tests.unit.conftest import NUMERIC_DTYPES91011def test_regex_exclude() -> None:12df = pl.DataFrame({f"col_{i}": [i] for i in range(5)})1314assert df.select(pl.col("^col_.*$").exclude("col_0")).columns == [15"col_1",16"col_2",17"col_3",18"col_4",19]202122def test_regex_in_filter() -> None:23df = pl.DataFrame(24{25"nrs": [1, 2, 3, None, 5],26"names": ["foo", "ham", "spam", "egg", None],27"flt": [1.0, None, 3.0, 1.0, None],28}29)3031res = df.filter(32pl.fold(33acc=False, function=lambda acc, s: acc | s, exprs=(pl.col("^nrs|flt*$") < 3)34)35).row(0)36expected = (1, "foo", 1.0)37assert res == expected383940def test_regex_selection() -> None:41lf = pl.LazyFrame(42{43"foo": [1],44"fooey": [1],45"foobar": [1],46"bar": [1],47}48)49result = lf.select([pl.col("^foo.*$")])50assert result.collect_schema().names() == ["foo", "fooey", "foobar"]515253@pytest.mark.parametrize(54("expr", "expected"),55[56(pl.exclude("a"), ["b", "c"]),57(pl.all().exclude(pl.Boolean), ["a", "b"]),58(pl.all().exclude([pl.Boolean]), ["a", "b"]),59(pl.all().exclude(NUMERIC_DTYPES), ["c"]),60],61)62def test_exclude_selection(expr: pl.Expr, expected: list[str]) -> None:63lf = pl.LazyFrame({"a": [1], "b": [1], "c": [True]})6465assert lf.select(expr).collect_schema().names() == expected666768def test_struct_name_resolving_15430() -> None:69q = pl.LazyFrame([{"a": {"b": "c"}}])70a = (71q.with_columns(pl.col("a").struct.field("b"))72.drop("a")73.collect(optimizations=pl.QueryOptFlags(projection_pushdown=True))74)7576b = (77q.with_columns(pl.col("a").struct[0])78.drop("a")79.collect(optimizations=pl.QueryOptFlags(projection_pushdown=True))80)8182assert a["b"].item() == "c"83assert b["b"].item() == "c"84assert a.columns == ["b"]85assert b.columns == ["b"]868788@pytest.mark.parametrize(89("expr", "expected"),90[91(pl.all().name.prefix("agg_"), ["A", "agg_B", "agg_C"]),92(pl.col("B", "C").name.prefix("agg_"), ["A", "agg_B", "agg_C"]),93(pl.col("A", "C").name.prefix("agg_"), ["A", "agg_A", "agg_C"]),94],95)96def test_exclude_keys_in_aggregation_16170(expr: pl.Expr, expected: list[str]) -> None:97df = pl.DataFrame({"A": [4, 4, 3], "B": [1, 2, 3], "C": [5, 6, 7]})9899# wildcard excludes aggregation column100result = df.lazy().group_by("A").agg(expr)101assert result.collect_schema().names() == expected102103104@pytest.mark.parametrize(105"field",106[107["aaa", "ccc"],108[["aaa", "ccc"]],109[["aaa"], "ccc"],110[["^aa.+|cc.+$"]],111],112)113def test_struct_field_expand(field: Any) -> None:114df = pl.DataFrame(115{116"aaa": [1, 2],117"bbb": ["ab", "cd"],118"ccc": [True, None],119"ddd": [[1, 2], [3]],120}121)122struct_df = df.select(pl.struct(["aaa", "bbb", "ccc", "ddd"]).alias("struct_col"))123res_df = struct_df.select(pl.col("struct_col").struct.field(*field))124assert_frame_equal(res_df, df.select("aaa", "ccc"))125126127def test_struct_field_expand_star() -> None:128df = pl.DataFrame(129{130"aaa": [1, 2],131"bbb": ["ab", "cd"],132"ccc": [True, None],133"ddd": [[1, 2], [3]],134}135)136struct_df = df.select(pl.struct(["aaa", "bbb", "ccc", "ddd"]).alias("struct_col"))137assert_frame_equal(struct_df.select(pl.col("struct_col").struct.field("*")), df)138139140def test_struct_unnest() -> None:141"""Same as test_struct_field_expand_star but using the unnest alias."""142df = pl.DataFrame(143{144"aaa": [1, 2],145"bbb": ["ab", "cd"],146"ccc": [True, None],147"ddd": [[1, 2], [3]],148}149)150struct_df = df.select(pl.struct(["aaa", "bbb", "ccc", "ddd"]).alias("struct_col"))151assert_frame_equal(struct_df.select(pl.col("struct_col").struct.unnest()), df)152153154def test_struct_field_expand_rewrite() -> None:155df = pl.DataFrame({"A": [1], "B": [2]})156assert df.select(157pl.struct(["A", "B"]).struct.field("*").name.prefix("foo_")158).to_dict(as_series=False) == {"foo_A": [1], "foo_B": [2]}159160161def test_struct_field_expansion_16410() -> None:162q = pl.LazyFrame({"coords": [{"x": 4, "y": 4}]})163164assert q.with_columns(165pl.col("coords").struct.with_fields(pl.field("x").sqrt()).struct.field("*")166).collect().to_dict(as_series=False) == {167"coords": [{"x": 4, "y": 4}],168"x": [2.0],169"y": [4],170}171172173def test_field_and_column_expansion() -> None:174df = pl.DataFrame({"a": [{"x": 1, "y": 2}], "b": [{"i": 3, "j": 4}]})175176assert df.select(pl.col("a", "b").struct.field("*")).to_dict(as_series=False) == {177"x": [1],178"y": [2],179"i": [3],180"j": [4],181}182183184def test_struct_field_exclude_and_wildcard_expansion() -> None:185df = pl.DataFrame({"a": [{"x": 1, "y": 2}], "b": [{"i": 3, "j": 4}]})186187assert df.select(pl.exclude("foo").struct.field("*")).to_dict(as_series=False) == {188"x": [1],189"y": [2],190"i": [3],191"j": [4],192}193assert df.select(pl.all().struct.field("*")).to_dict(as_series=False) == {194"x": [1],195"y": [2],196"i": [3],197"j": [4],198}199200201def test_err_on_multiple_column_expansion() -> None:202assert_frame_equal(203pl.DataFrame(204{205"a": [1],206"b": [2],207"c": [3],208"d": [4],209}210).select([pl.col(["a", "b"]) + pl.col(["c", "d"])]),211pl.DataFrame({"a": [4], "b": [6]}),212)213214215