Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/functions/test_cum_count.py
6939 views
1
from __future__ import annotations
2
3
import pytest
4
5
import polars as pl
6
from polars.testing import assert_frame_equal, assert_series_equal
7
8
9
@pytest.mark.parametrize(("reverse", "output"), [(False, [1, 2, 2]), (True, [2, 1, 0])])
10
def test_cum_count_single_arg(reverse: bool, output: list[int]) -> None:
11
df = pl.DataFrame({"a": [5, 5, None]})
12
result = df.select(pl.cum_count("a", reverse=reverse))
13
expected = pl.Series("a", output, dtype=pl.UInt32).to_frame()
14
assert_frame_equal(result, expected)
15
assert result.to_series().flags[("SORTED_ASC", "SORTED_DESC")[reverse]]
16
17
18
def test_cum_count_multi_arg() -> None:
19
df = pl.DataFrame(
20
{
21
"a": [5, 5, 5],
22
"b": [None, 5, 5],
23
"c": [5, None, 5],
24
"d": [5, 5, None],
25
"e": [None, None, None],
26
}
27
)
28
result = df.select(pl.cum_count("a", "b", "c", "d", "e"))
29
expected = pl.DataFrame(
30
[
31
pl.Series("a", [1, 2, 3], dtype=pl.UInt32),
32
pl.Series("b", [0, 1, 2], dtype=pl.UInt32),
33
pl.Series("c", [1, 1, 2], dtype=pl.UInt32),
34
pl.Series("d", [1, 2, 2], dtype=pl.UInt32),
35
pl.Series("e", [0, 0, 0], dtype=pl.UInt32),
36
]
37
)
38
assert_frame_equal(result, expected)
39
40
41
def test_cum_count_multi_arg_reverse() -> None:
42
df = pl.DataFrame(
43
{
44
"a": [5, 5, 5],
45
"b": [None, 5, 5],
46
"c": [5, None, 5],
47
"d": [5, 5, None],
48
"e": [None, None, None],
49
}
50
)
51
result = df.select(pl.cum_count("a", "b", "c", "d", "e", reverse=True))
52
expected = pl.DataFrame(
53
[
54
pl.Series("a", [3, 2, 1], dtype=pl.UInt32),
55
pl.Series("b", [2, 2, 1], dtype=pl.UInt32),
56
pl.Series("c", [2, 1, 1], dtype=pl.UInt32),
57
pl.Series("d", [2, 1, 0], dtype=pl.UInt32),
58
pl.Series("e", [0, 0, 0], dtype=pl.UInt32),
59
]
60
)
61
assert_frame_equal(result, expected)
62
63
64
def test_cum_count() -> None:
65
df = pl.DataFrame(
66
[["a"], ["a"], ["a"], ["b"], ["b"], ["a"]], schema=["A"], orient="row"
67
)
68
69
out = df.group_by("A", maintain_order=True).agg(
70
pl.col("A").cum_count().alias("foo")
71
)
72
73
assert out["foo"][0].to_list() == [1, 2, 3, 4]
74
assert out["foo"][1].to_list() == [1, 2]
75
76
77
def test_series_cum_count() -> None:
78
s = pl.Series(["x", "k", None, "d"])
79
result = s.cum_count()
80
expected = pl.Series([1, 2, 2, 3], dtype=pl.UInt32)
81
assert_series_equal(result, expected)
82
83