Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/test_expr_multi_cols.py
6939 views
1
import polars as pl
2
from polars.testing import assert_frame_equal
3
4
5
def test_exclude_name_from_dtypes() -> None:
6
df = pl.DataFrame({"a": ["a"], "b": ["b"]})
7
8
assert_frame_equal(
9
df.with_columns(pl.col(pl.String).exclude("a").name.suffix("_foo")),
10
pl.DataFrame({"a": ["a"], "b": ["b"], "b_foo": ["b"]}),
11
)
12
13
14
def test_fold_regex_expand() -> None:
15
df = pl.DataFrame(
16
{
17
"x": [0, 1, 2],
18
"y_1": [1.1, 2.2, 3.3],
19
"y_2": [1.0, 2.5, 3.5],
20
}
21
)
22
assert df.with_columns(
23
pl.fold(
24
acc=pl.lit(0.0), function=lambda acc, x: acc + x, exprs=pl.col("^y_.*$")
25
).alias("y_sum"),
26
).to_dict(as_series=False) == {
27
"x": [0, 1, 2],
28
"y_1": [1.1, 2.2, 3.3],
29
"y_2": [1.0, 2.5, 3.5],
30
"y_sum": [2.1, 4.7, 6.8],
31
}
32
33
34
def test_arg_sort_argument_expansion() -> None:
35
df = pl.DataFrame(
36
{
37
"col1": [1, 2, 3],
38
"col2": [4, 5, 6],
39
"sort_order": [9, 8, 7],
40
}
41
)
42
assert df.select(
43
pl.col("col1").sort_by(pl.col("sort_order").arg_sort()).name.suffix("_suffix")
44
).to_dict(as_series=False) == {"col1_suffix": [3, 2, 1]}
45
assert df.select(
46
pl.col("^col.*$").sort_by(pl.col("sort_order")).arg_sort()
47
).to_dict(as_series=False) == {"col1": [2, 1, 0], "col2": [2, 1, 0]}
48
assert df.select(
49
pl.all().exclude("sort_order").sort_by(pl.col("sort_order")).arg_sort()
50
).to_dict(as_series=False) == {"col1": [2, 1, 0], "col2": [2, 1, 0]}
51
52
53
def test_multiple_columns_length_9137() -> None:
54
df = pl.DataFrame(
55
{
56
"a": [1, 1],
57
"b": ["c", "d"],
58
}
59
)
60
61
# list is larger than groups
62
cmp_list = ["a", "b", "c"]
63
64
assert df.group_by("a").agg(pl.col("b").is_in(cmp_list)).to_dict(
65
as_series=False
66
) == {
67
"a": [1],
68
"b": [[True, False]],
69
}
70
71
72
def test_regex_in_cols() -> None:
73
df = pl.DataFrame(
74
{
75
"col1": [1, 2, 3],
76
"col2": [4, 5, 6],
77
"val1": ["a", "b", "c"],
78
"val2": ["A", "B", "C"],
79
}
80
)
81
82
assert df.select(pl.col("^col.*$").name.prefix("matched_")).to_dict(
83
as_series=False
84
) == {
85
"matched_col1": [1, 2, 3],
86
"matched_col2": [4, 5, 6],
87
}
88
89
assert df.with_columns(
90
pl.col("^col.*$", "^val.*$").name.prefix("matched_")
91
).to_dict(as_series=False) == {
92
"col1": [1, 2, 3],
93
"col2": [4, 5, 6],
94
"val1": ["a", "b", "c"],
95
"val2": ["A", "B", "C"],
96
"matched_col1": [1, 2, 3],
97
"matched_col2": [4, 5, 6],
98
"matched_val1": ["a", "b", "c"],
99
"matched_val2": ["A", "B", "C"],
100
}
101
assert df.select(pl.col("^col.*$", "val1").name.prefix("matched_")).to_dict(
102
as_series=False
103
) == {
104
"matched_col1": [1, 2, 3],
105
"matched_col2": [4, 5, 6],
106
"matched_val1": ["a", "b", "c"],
107
}
108
109
assert df.select(pl.col("^col.*$", "val1").exclude("col2")).to_dict(
110
as_series=False
111
) == {
112
"col1": [1, 2, 3],
113
"val1": ["a", "b", "c"],
114
}
115
116