Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_rank.py
6939 views
1
import pytest
2
3
import polars as pl
4
from polars.testing import assert_frame_equal, assert_series_equal
5
6
7
def test_rank_nulls() -> None:
8
assert pl.Series([]).rank().to_list() == []
9
assert pl.Series([None]).rank().to_list() == [None]
10
assert pl.Series([None, None]).rank().to_list() == [None, None]
11
12
13
def test_rank_random_expr() -> None:
14
df = pl.from_dict(
15
{"a": [1] * 5, "b": [1, 2, 3, 4, 5], "c": [200, 100, 100, 50, 100]}
16
)
17
18
df_ranks1 = df.with_columns(
19
pl.col("c").rank(method="random", seed=1).over("a").alias("rank")
20
)
21
df_ranks2 = df.with_columns(
22
pl.col("c").rank(method="random", seed=1).over("a").alias("rank")
23
)
24
assert_frame_equal(df_ranks1, df_ranks2)
25
26
27
def test_rank_random_series() -> None:
28
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
29
assert_series_equal(
30
s.rank("random", seed=1), pl.Series("a", [2, 5, 7, 3, 4, 6, 1], dtype=pl.UInt32)
31
)
32
33
34
def test_rank_df() -> None:
35
df = pl.DataFrame(
36
{
37
"a": [1, 1, 2, 2, 3],
38
}
39
)
40
41
s = df.select(pl.col("a").rank(method="average").alias("b")).to_series()
42
assert s.to_list() == [1.5, 1.5, 3.5, 3.5, 5.0]
43
assert s.dtype == pl.Float64
44
45
s = df.select(pl.col("a").rank(method="max").alias("b")).to_series()
46
assert s.to_list() == [2, 2, 4, 4, 5]
47
assert s.dtype == pl.get_index_type()
48
49
50
@pytest.mark.parametrize("maintain_order", [False, True])
51
def test_rank_so_4109(maintain_order: bool) -> None:
52
# also tests ranks null behavior
53
df = pl.from_dict(
54
{
55
"id": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4],
56
"rank": [None, 3, 2, 4, 1, 4, 3, 2, 1, None, 3, 4, 4, 1, None, 3],
57
}
58
).sort(by=["id", "rank"])
59
60
df = df.group_by("id", maintain_order=maintain_order).agg(
61
[
62
pl.col("rank").alias("original"),
63
pl.col("rank").rank(method="dense").alias("dense"),
64
pl.col("rank").rank(method="average").alias("average"),
65
]
66
)
67
expected = pl.DataFrame(
68
{
69
"id": [1, 2, 3, 4],
70
"original": [
71
[None, 2, 3, 4],
72
[1, 2, 3, 4],
73
[None, 1, 3, 4],
74
[None, 1, 3, 4],
75
],
76
"dense": [
77
[None, 1, 2, 3],
78
[1, 2, 3, 4],
79
[None, 1, 2, 3],
80
[None, 1, 2, 3],
81
],
82
"average": [
83
[None, 1.0, 2.0, 3.0],
84
[1.0, 2.0, 3.0, 4.0],
85
[None, 1.0, 2.0, 3.0],
86
[None, 1.0, 2.0, 3.0],
87
],
88
},
89
schema=df.schema,
90
)
91
92
assert_frame_equal(df, expected, check_row_order=maintain_order)
93
94
95
def test_rank_string_null_11252() -> None:
96
rank = pl.Series([None, "", "z", None, "a"]).rank()
97
assert rank.to_list() == [None, 1.0, 3.0, None, 2.0]
98
99
100
def test_rank_series() -> None:
101
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
102
103
assert_series_equal(
104
s.rank("dense"), pl.Series("a", [2, 3, 4, 3, 3, 4, 1], dtype=pl.UInt32)
105
)
106
107
df = pl.DataFrame([s])
108
assert df.select(pl.col("a").rank("dense"))["a"].to_list() == [2, 3, 4, 3, 3, 4, 1]
109
110
assert_series_equal(
111
s.rank("dense", descending=True),
112
pl.Series("a", [3, 2, 1, 2, 2, 1, 4], dtype=pl.UInt32),
113
)
114
115
assert s.rank(method="average").dtype == pl.Float64
116
assert s.rank(method="max").dtype == pl.get_index_type()
117
118