Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/test_rank.py
8431 views
1
import pytest
2
3
import polars as pl
4
from polars.testing import assert_frame_equal, assert_series_equal
5
6
7
def test_rank_nulls() -> None:
8
assert pl.Series([]).rank().to_list() == []
9
assert pl.Series([None]).rank().to_list() == [None]
10
assert pl.Series([None, None]).rank().to_list() == [None, None]
11
12
13
def test_rank_random_expr() -> None:
14
df = pl.from_dict(
15
{"a": [1] * 5, "b": [1, 2, 3, 4, 5], "c": [200, 100, 100, 50, 100]}
16
)
17
18
df_ranks1 = df.with_columns(
19
pl.col("c").rank(method="random", seed=1).over("a").alias("rank")
20
)
21
df_ranks2 = df.with_columns(
22
pl.col("c").rank(method="random", seed=1).over("a").alias("rank")
23
)
24
assert_frame_equal(df_ranks1, df_ranks2)
25
26
27
def test_rank_random_series() -> None:
28
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
29
assert_series_equal(
30
s.rank("random", seed=1),
31
pl.Series("a", [2, 5, 7, 3, 4, 6, 1], dtype=pl.get_index_type()),
32
)
33
34
35
def test_rank_df() -> None:
36
df = pl.DataFrame(
37
{
38
"a": [1, 1, 2, 2, 3],
39
}
40
)
41
42
s = df.select(pl.col("a").rank(method="average").alias("b")).to_series()
43
assert s.to_list() == [1.5, 1.5, 3.5, 3.5, 5.0]
44
assert s.dtype == pl.Float64
45
46
s = df.select(pl.col("a").rank(method="max").alias("b")).to_series()
47
assert s.to_list() == [2, 2, 4, 4, 5]
48
assert s.dtype == pl.get_index_type()
49
50
51
@pytest.mark.parametrize("maintain_order", [False, True])
52
def test_rank_so_4109(maintain_order: bool) -> None:
53
# also tests ranks null behavior
54
df = pl.from_dict(
55
{
56
"id": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4],
57
"rank": [None, 3, 2, 4, 1, 4, 3, 2, 1, None, 3, 4, 4, 1, None, 3],
58
}
59
).sort(by=["id", "rank"])
60
61
df = df.group_by("id", maintain_order=maintain_order).agg(
62
[
63
pl.col("rank").alias("original"),
64
pl.col("rank").rank(method="dense").alias("dense"),
65
pl.col("rank").rank(method="average").alias("average"),
66
]
67
)
68
expected = pl.DataFrame(
69
{
70
"id": [1, 2, 3, 4],
71
"original": [
72
[None, 2, 3, 4],
73
[1, 2, 3, 4],
74
[None, 1, 3, 4],
75
[None, 1, 3, 4],
76
],
77
"dense": [
78
[None, 1, 2, 3],
79
[1, 2, 3, 4],
80
[None, 1, 2, 3],
81
[None, 1, 2, 3],
82
],
83
"average": [
84
[None, 1.0, 2.0, 3.0],
85
[1.0, 2.0, 3.0, 4.0],
86
[None, 1.0, 2.0, 3.0],
87
[None, 1.0, 2.0, 3.0],
88
],
89
},
90
schema=df.schema,
91
)
92
93
assert_frame_equal(df, expected, check_row_order=maintain_order)
94
95
96
def test_rank_string_null_11252() -> None:
97
rank = pl.Series([None, "", "z", None, "a"]).rank()
98
assert rank.to_list() == [None, 1.0, 3.0, None, 2.0]
99
100
101
def test_rank_series() -> None:
102
s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])
103
104
assert_series_equal(
105
s.rank("dense"),
106
pl.Series("a", [2, 3, 4, 3, 3, 4, 1], dtype=pl.get_index_type()),
107
)
108
109
df = pl.DataFrame([s])
110
assert df.select(pl.col("a").rank("dense"))["a"].to_list() == [2, 3, 4, 3, 3, 4, 1]
111
112
assert_series_equal(
113
s.rank("dense", descending=True),
114
pl.Series("a", [3, 2, 1, 2, 2, 1, 4], dtype=pl.get_index_type()),
115
)
116
117
assert s.rank(method="average").dtype == pl.Float64
118
assert s.rank(method="max").dtype == pl.get_index_type()
119
120