Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_over.py
8422 views
1
from typing import Any
2
3
import pytest
4
5
import polars as pl
6
from polars.testing import assert_frame_equal, assert_series_equal
7
8
9
def test_implode_explode_over_22188() -> None:
10
df = pl.DataFrame(
11
{
12
"x": [1, 2, 3, 1, 2, 3, 1, 2, 3],
13
"y": [2, 2, 2, 3, 3, 3, 4, 4, 4],
14
}
15
)
16
result = df.select(
17
(pl.col.x * (pl.lit(pl.Series([1, 1, 1])).implode().explode())).over(pl.col.y),
18
)
19
20
assert_series_equal(result.to_series(), df.get_column("x"))
21
22
23
def test_implode_in_over_22188() -> None:
24
df = pl.DataFrame(
25
{
26
"x": [[1], [2], [3]],
27
"y": [2, 3, 4],
28
}
29
).select(pl.col.x.list.set_union(pl.lit(pl.Series([1])).implode()).over(pl.col.y))
30
assert_series_equal(df.to_series(), pl.Series("x", [[1], [2, 1], [3, 1]]))
31
32
33
def test_over_no_partition_by() -> None:
34
df = pl.DataFrame({"a": [1, 1, 2], "i": [2, 1, 3]})
35
result = df.with_columns(b=pl.col("a").cum_sum().over(order_by="i"))
36
expected = pl.DataFrame({"a": [1, 1, 2], "i": [2, 1, 3], "b": [2, 1, 4]})
37
assert_frame_equal(result, expected)
38
39
40
def test_over_no_partition_by_no_over() -> None:
41
df = pl.DataFrame({"a": [1, 1, 2], "i": [2, 1, 3]})
42
with pytest.raises(pl.exceptions.InvalidOperationError):
43
df.with_columns(b=pl.col("a").cum_sum().over())
44
45
46
def test_over_explode_22770() -> None:
47
df = pl.DataFrame({"x": [[1.0], [2.0]], "idx": [1, 2]})
48
e = pl.col("x").list.explode().over("idx", mapping_strategy="join")
49
50
assert_frame_equal(
51
df.select(pl.col("x").list.diff()),
52
df.select(e.list.diff()),
53
)
54
55
56
def test_over_replace_strict_22870() -> None:
57
lookup = pl.DataFrame(
58
{
59
"cat": ["a", "b", "c"],
60
"val": [102, 100, 101],
61
}
62
)
63
64
df = pl.DataFrame(
65
{
66
"cat": ["a", "b", "a", "a", "b"],
67
"data": [2, 3, 4, 5, 6],
68
"a": ["a", "b", "c", "d", "e"],
69
"b": [102, 100, 101, 109, 110],
70
}
71
)
72
73
out = (
74
df.lazy()
75
.select(
76
pl.col("cat")
77
.replace_strict(lookup["cat"], lookup["val"], default=-1)
78
.alias("val"),
79
pl.col("cat")
80
.replace_strict(lookup["cat"], lookup["val"], default=-1)
81
.over("cat")
82
.alias("val_over"),
83
)
84
.collect()
85
)
86
assert_series_equal(
87
out.get_column("val"), out.get_column("val_over"), check_names=False
88
)
89
90
out = (
91
df.lazy()
92
.select(
93
pl.col("cat").replace_strict(pl.col.a, pl.col.b, default=-1).alias("val"),
94
pl.col("cat")
95
.replace_strict(pl.col.a, pl.col.b, default=-1)
96
.over("cat")
97
.alias("val_over"),
98
)
99
.collect()
100
)
101
assert_series_equal(
102
out.get_column("val"), out.get_column("val_over"), check_names=False
103
)
104
105
106
@pytest.mark.parametrize(
107
"col",
108
[
109
[1, 2, 3],
110
[[11, 12], [21], [31]],
111
],
112
)
113
def test_implode_explode_list_over_24616(col: list[Any]) -> None:
114
df = pl.DataFrame({"x": col})
115
q = df.lazy().select(pl.col.x.implode().explode().over(1))
116
q_base = df.lazy().select(pl.col.x.over(1))
117
expected = df
118
assert_frame_equal(q.collect(), expected)
119
assert_frame_equal(q_base.collect(), expected)
120
121
df = pl.DataFrame({"g": [10, 10, 20], "x": col})
122
q = df.lazy().with_columns(pl.col.x.implode().explode().over("g"))
123
q_base = df.lazy().with_columns(pl.col.x.over("g"))
124
expected = df
125
assert_frame_equal(q.collect(), expected)
126
assert_frame_equal(q_base.collect(), expected)
127
128
129
def test_first_last_over() -> None:
130
df = pl.DataFrame(
131
{
132
"a": [1, 1, 1, 1, 2, 2, 2, 2],
133
"b": pl.Series([1, 2, 3, None, None, 4, 5, 6], dtype=pl.Int32),
134
}
135
)
136
137
result = df.select(pl.col("b").first().over("a"))
138
expected = pl.DataFrame(
139
{"b": pl.Series([1, 1, 1, 1, None, None, None, None], dtype=pl.Int32)}
140
)
141
assert_frame_equal(result, expected)
142
143
result = df.select(pl.col("b").first(ignore_nulls=True).over("a"))
144
expected = pl.DataFrame({"b": pl.Series([1, 1, 1, 1, 4, 4, 4, 4], dtype=pl.Int32)})
145
assert_frame_equal(result, expected)
146
147
result = df.select(pl.col("b").last().over("a"))
148
expected = pl.DataFrame(
149
{"b": pl.Series([None, None, None, None, 6, 6, 6, 6], dtype=pl.Int32)}
150
)
151
assert_frame_equal(result, expected)
152
153
result = df.select(pl.col("b").last(ignore_nulls=True).over("a"))
154
expected = pl.DataFrame({"b": pl.Series([3, 3, 3, 3, 6, 6, 6, 6], dtype=pl.Int32)})
155
assert_frame_equal(result, expected)
156
157