Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/test_expansion.py
6939 views
1
from __future__ import annotations
2
3
from typing import Any
4
5
import pytest
6
7
import polars as pl
8
from polars.testing import assert_frame_equal
9
from tests.unit.conftest import NUMERIC_DTYPES
10
11
12
def test_regex_exclude() -> None:
13
df = pl.DataFrame({f"col_{i}": [i] for i in range(5)})
14
15
assert df.select(pl.col("^col_.*$").exclude("col_0")).columns == [
16
"col_1",
17
"col_2",
18
"col_3",
19
"col_4",
20
]
21
22
23
def test_regex_in_filter() -> None:
24
df = pl.DataFrame(
25
{
26
"nrs": [1, 2, 3, None, 5],
27
"names": ["foo", "ham", "spam", "egg", None],
28
"flt": [1.0, None, 3.0, 1.0, None],
29
}
30
)
31
32
res = df.filter(
33
pl.fold(
34
acc=False, function=lambda acc, s: acc | s, exprs=(pl.col("^nrs|flt*$") < 3)
35
)
36
).row(0)
37
expected = (1, "foo", 1.0)
38
assert res == expected
39
40
41
def test_regex_selection() -> None:
42
lf = pl.LazyFrame(
43
{
44
"foo": [1],
45
"fooey": [1],
46
"foobar": [1],
47
"bar": [1],
48
}
49
)
50
result = lf.select([pl.col("^foo.*$")])
51
assert result.collect_schema().names() == ["foo", "fooey", "foobar"]
52
53
54
@pytest.mark.parametrize(
55
("expr", "expected"),
56
[
57
(pl.exclude("a"), ["b", "c"]),
58
(pl.all().exclude(pl.Boolean), ["a", "b"]),
59
(pl.all().exclude([pl.Boolean]), ["a", "b"]),
60
(pl.all().exclude(NUMERIC_DTYPES), ["c"]),
61
],
62
)
63
def test_exclude_selection(expr: pl.Expr, expected: list[str]) -> None:
64
lf = pl.LazyFrame({"a": [1], "b": [1], "c": [True]})
65
66
assert lf.select(expr).collect_schema().names() == expected
67
68
69
def test_struct_name_resolving_15430() -> None:
70
q = pl.LazyFrame([{"a": {"b": "c"}}])
71
a = (
72
q.with_columns(pl.col("a").struct.field("b"))
73
.drop("a")
74
.collect(optimizations=pl.QueryOptFlags(projection_pushdown=True))
75
)
76
77
b = (
78
q.with_columns(pl.col("a").struct[0])
79
.drop("a")
80
.collect(optimizations=pl.QueryOptFlags(projection_pushdown=True))
81
)
82
83
assert a["b"].item() == "c"
84
assert b["b"].item() == "c"
85
assert a.columns == ["b"]
86
assert b.columns == ["b"]
87
88
89
@pytest.mark.parametrize(
90
("expr", "expected"),
91
[
92
(pl.all().name.prefix("agg_"), ["A", "agg_B", "agg_C"]),
93
(pl.col("B", "C").name.prefix("agg_"), ["A", "agg_B", "agg_C"]),
94
(pl.col("A", "C").name.prefix("agg_"), ["A", "agg_A", "agg_C"]),
95
],
96
)
97
def test_exclude_keys_in_aggregation_16170(expr: pl.Expr, expected: list[str]) -> None:
98
df = pl.DataFrame({"A": [4, 4, 3], "B": [1, 2, 3], "C": [5, 6, 7]})
99
100
# wildcard excludes aggregation column
101
result = df.lazy().group_by("A").agg(expr)
102
assert result.collect_schema().names() == expected
103
104
105
@pytest.mark.parametrize(
106
"field",
107
[
108
["aaa", "ccc"],
109
[["aaa", "ccc"]],
110
[["aaa"], "ccc"],
111
[["^aa.+|cc.+$"]],
112
],
113
)
114
def test_struct_field_expand(field: Any) -> None:
115
df = pl.DataFrame(
116
{
117
"aaa": [1, 2],
118
"bbb": ["ab", "cd"],
119
"ccc": [True, None],
120
"ddd": [[1, 2], [3]],
121
}
122
)
123
struct_df = df.select(pl.struct(["aaa", "bbb", "ccc", "ddd"]).alias("struct_col"))
124
res_df = struct_df.select(pl.col("struct_col").struct.field(*field))
125
assert_frame_equal(res_df, df.select("aaa", "ccc"))
126
127
128
def test_struct_field_expand_star() -> None:
129
df = pl.DataFrame(
130
{
131
"aaa": [1, 2],
132
"bbb": ["ab", "cd"],
133
"ccc": [True, None],
134
"ddd": [[1, 2], [3]],
135
}
136
)
137
struct_df = df.select(pl.struct(["aaa", "bbb", "ccc", "ddd"]).alias("struct_col"))
138
assert_frame_equal(struct_df.select(pl.col("struct_col").struct.field("*")), df)
139
140
141
def test_struct_unnest() -> None:
142
"""Same as test_struct_field_expand_star but using the unnest alias."""
143
df = pl.DataFrame(
144
{
145
"aaa": [1, 2],
146
"bbb": ["ab", "cd"],
147
"ccc": [True, None],
148
"ddd": [[1, 2], [3]],
149
}
150
)
151
struct_df = df.select(pl.struct(["aaa", "bbb", "ccc", "ddd"]).alias("struct_col"))
152
assert_frame_equal(struct_df.select(pl.col("struct_col").struct.unnest()), df)
153
154
155
def test_struct_field_expand_rewrite() -> None:
156
df = pl.DataFrame({"A": [1], "B": [2]})
157
assert df.select(
158
pl.struct(["A", "B"]).struct.field("*").name.prefix("foo_")
159
).to_dict(as_series=False) == {"foo_A": [1], "foo_B": [2]}
160
161
162
def test_struct_field_expansion_16410() -> None:
163
q = pl.LazyFrame({"coords": [{"x": 4, "y": 4}]})
164
165
assert q.with_columns(
166
pl.col("coords").struct.with_fields(pl.field("x").sqrt()).struct.field("*")
167
).collect().to_dict(as_series=False) == {
168
"coords": [{"x": 4, "y": 4}],
169
"x": [2.0],
170
"y": [4],
171
}
172
173
174
def test_field_and_column_expansion() -> None:
175
df = pl.DataFrame({"a": [{"x": 1, "y": 2}], "b": [{"i": 3, "j": 4}]})
176
177
assert df.select(pl.col("a", "b").struct.field("*")).to_dict(as_series=False) == {
178
"x": [1],
179
"y": [2],
180
"i": [3],
181
"j": [4],
182
}
183
184
185
def test_struct_field_exclude_and_wildcard_expansion() -> None:
186
df = pl.DataFrame({"a": [{"x": 1, "y": 2}], "b": [{"i": 3, "j": 4}]})
187
188
assert df.select(pl.exclude("foo").struct.field("*")).to_dict(as_series=False) == {
189
"x": [1],
190
"y": [2],
191
"i": [3],
192
"j": [4],
193
}
194
assert df.select(pl.all().struct.field("*")).to_dict(as_series=False) == {
195
"x": [1],
196
"y": [2],
197
"i": [3],
198
"j": [4],
199
}
200
201
202
def test_err_on_multiple_column_expansion() -> None:
203
assert_frame_equal(
204
pl.DataFrame(
205
{
206
"a": [1],
207
"b": [2],
208
"c": [3],
209
"d": [4],
210
}
211
).select([pl.col(["a", "b"]) + pl.col(["c", "d"])]),
212
pl.DataFrame({"a": [4], "b": [6]}),
213
)
214
215