CoCalc -- test_expansion.py

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/test_expansion.py
⁶⁹³⁹ views
1
from __future__ import annotations
2

3
from typing import Any
4

5
import pytest
6

7
import polars as pl
8
from polars.testing import assert_frame_equal
9
from tests.unit.conftest import NUMERIC_DTYPES
10

11

12
def test_regex_exclude() -> None:
13
    df = pl.DataFrame({f"col_{i}": [i] for i in range(5)})
14

15
    assert df.select(pl.col("^col_.*$").exclude("col_0")).columns == [
16
        "col_1",
17
        "col_2",
18
        "col_3",
19
        "col_4",
20
    ]
21

22

23
def test_regex_in_filter() -> None:
24
    df = pl.DataFrame(
25
        {
26
            "nrs": [1, 2, 3, None, 5],
27
            "names": ["foo", "ham", "spam", "egg", None],
28
            "flt": [1.0, None, 3.0, 1.0, None],
29
        }
30
    )
31

32
    res = df.filter(
33
        pl.fold(
34
            acc=False, function=lambda acc, s: acc | s, exprs=(pl.col("^nrs|flt*$") < 3)
35
        )
36
    ).row(0)
37
    expected = (1, "foo", 1.0)
38
    assert res == expected
39

40

41
def test_regex_selection() -> None:
42
    lf = pl.LazyFrame(
43
        {
44
            "foo": [1],
45
            "fooey": [1],
46
            "foobar": [1],
47
            "bar": [1],
48
        }
49
    )
50
    result = lf.select([pl.col("^foo.*$")])
51
    assert result.collect_schema().names() == ["foo", "fooey", "foobar"]
52

53

54
@pytest.mark.parametrize(
55
    ("expr", "expected"),
56
    [
57
        (pl.exclude("a"), ["b", "c"]),
58
        (pl.all().exclude(pl.Boolean), ["a", "b"]),
59
        (pl.all().exclude([pl.Boolean]), ["a", "b"]),
60
        (pl.all().exclude(NUMERIC_DTYPES), ["c"]),
61
    ],
62
)
63
def test_exclude_selection(expr: pl.Expr, expected: list[str]) -> None:
64
    lf = pl.LazyFrame({"a": [1], "b": [1], "c": [True]})
65

66
    assert lf.select(expr).collect_schema().names() == expected
67

68

69
def test_struct_name_resolving_15430() -> None:
70
    q = pl.LazyFrame([{"a": {"b": "c"}}])
71
    a = (
72
        q.with_columns(pl.col("a").struct.field("b"))
73
        .drop("a")
74
        .collect(optimizations=pl.QueryOptFlags(projection_pushdown=True))
75
    )
76

77
    b = (
78
        q.with_columns(pl.col("a").struct[0])
79
        .drop("a")
80
        .collect(optimizations=pl.QueryOptFlags(projection_pushdown=True))
81
    )
82

83
    assert a["b"].item() == "c"
84
    assert b["b"].item() == "c"
85
    assert a.columns == ["b"]
86
    assert b.columns == ["b"]
87

88

89
@pytest.mark.parametrize(
90
    ("expr", "expected"),
91
    [
92
        (pl.all().name.prefix("agg_"), ["A", "agg_B", "agg_C"]),
93
        (pl.col("B", "C").name.prefix("agg_"), ["A", "agg_B", "agg_C"]),
94
        (pl.col("A", "C").name.prefix("agg_"), ["A", "agg_A", "agg_C"]),
95
    ],
96
)
97
def test_exclude_keys_in_aggregation_16170(expr: pl.Expr, expected: list[str]) -> None:
98
    df = pl.DataFrame({"A": [4, 4, 3], "B": [1, 2, 3], "C": [5, 6, 7]})
99

100
    # wildcard excludes aggregation column
101
    result = df.lazy().group_by("A").agg(expr)
102
    assert result.collect_schema().names() == expected
103

104

105
@pytest.mark.parametrize(
106
    "field",
107
    [
108
        ["aaa", "ccc"],
109
        [["aaa", "ccc"]],
110
        [["aaa"], "ccc"],
111
        [["^aa.+|cc.+$"]],
112
    ],
113
)
114
def test_struct_field_expand(field: Any) -> None:
115
    df = pl.DataFrame(
116
        {
117
            "aaa": [1, 2],
118
            "bbb": ["ab", "cd"],
119
            "ccc": [True, None],
120
            "ddd": [[1, 2], [3]],
121
        }
122
    )
123
    struct_df = df.select(pl.struct(["aaa", "bbb", "ccc", "ddd"]).alias("struct_col"))
124
    res_df = struct_df.select(pl.col("struct_col").struct.field(*field))
125
    assert_frame_equal(res_df, df.select("aaa", "ccc"))
126

127

128
def test_struct_field_expand_star() -> None:
129
    df = pl.DataFrame(
130
        {
131
            "aaa": [1, 2],
132
            "bbb": ["ab", "cd"],
133
            "ccc": [True, None],
134
            "ddd": [[1, 2], [3]],
135
        }
136
    )
137
    struct_df = df.select(pl.struct(["aaa", "bbb", "ccc", "ddd"]).alias("struct_col"))
138
    assert_frame_equal(struct_df.select(pl.col("struct_col").struct.field("*")), df)
139

140

141
def test_struct_unnest() -> None:
142
    """Same as test_struct_field_expand_star but using the unnest alias."""
143
    df = pl.DataFrame(
144
        {
145
            "aaa": [1, 2],
146
            "bbb": ["ab", "cd"],
147
            "ccc": [True, None],
148
            "ddd": [[1, 2], [3]],
149
        }
150
    )
151
    struct_df = df.select(pl.struct(["aaa", "bbb", "ccc", "ddd"]).alias("struct_col"))
152
    assert_frame_equal(struct_df.select(pl.col("struct_col").struct.unnest()), df)
153

154

155
def test_struct_field_expand_rewrite() -> None:
156
    df = pl.DataFrame({"A": [1], "B": [2]})
157
    assert df.select(
158
        pl.struct(["A", "B"]).struct.field("*").name.prefix("foo_")
159
    ).to_dict(as_series=False) == {"foo_A": [1], "foo_B": [2]}
160

161

162
def test_struct_field_expansion_16410() -> None:
163
    q = pl.LazyFrame({"coords": [{"x": 4, "y": 4}]})
164

165
    assert q.with_columns(
166
        pl.col("coords").struct.with_fields(pl.field("x").sqrt()).struct.field("*")
167
    ).collect().to_dict(as_series=False) == {
168
        "coords": [{"x": 4, "y": 4}],
169
        "x": [2.0],
170
        "y": [4],
171
    }
172

173

174
def test_field_and_column_expansion() -> None:
175
    df = pl.DataFrame({"a": [{"x": 1, "y": 2}], "b": [{"i": 3, "j": 4}]})
176

177
    assert df.select(pl.col("a", "b").struct.field("*")).to_dict(as_series=False) == {
178
        "x": [1],
179
        "y": [2],
180
        "i": [3],
181
        "j": [4],
182
    }
183

184

185
def test_struct_field_exclude_and_wildcard_expansion() -> None:
186
    df = pl.DataFrame({"a": [{"x": 1, "y": 2}], "b": [{"i": 3, "j": 4}]})
187

188
    assert df.select(pl.exclude("foo").struct.field("*")).to_dict(as_series=False) == {
189
        "x": [1],
190
        "y": [2],
191
        "i": [3],
192
        "j": [4],
193
    }
194
    assert df.select(pl.all().struct.field("*")).to_dict(as_series=False) == {
195
        "x": [1],
196
        "y": [2],
197
        "i": [3],
198
        "j": [4],
199
    }
200

201

202
def test_err_on_multiple_column_expansion() -> None:
203
    assert_frame_equal(
204
        pl.DataFrame(
205
            {
206
                "a": [1],
207
                "b": [2],
208
                "c": [3],
209
                "d": [4],
210
            }
211
        ).select([pl.col(["a", "b"]) + pl.col(["c", "d"])]),
212
        pl.DataFrame({"a": [4], "b": [6]}),
213
    )
214

215
Product

Resources

Company