Path: blob/main/py-polars/tests/unit/operations/test_rank.py
6939 views
import pytest12import polars as pl3from polars.testing import assert_frame_equal, assert_series_equal456def test_rank_nulls() -> None:7assert pl.Series([]).rank().to_list() == []8assert pl.Series([None]).rank().to_list() == [None]9assert pl.Series([None, None]).rank().to_list() == [None, None]101112def test_rank_random_expr() -> None:13df = pl.from_dict(14{"a": [1] * 5, "b": [1, 2, 3, 4, 5], "c": [200, 100, 100, 50, 100]}15)1617df_ranks1 = df.with_columns(18pl.col("c").rank(method="random", seed=1).over("a").alias("rank")19)20df_ranks2 = df.with_columns(21pl.col("c").rank(method="random", seed=1).over("a").alias("rank")22)23assert_frame_equal(df_ranks1, df_ranks2)242526def test_rank_random_series() -> None:27s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])28assert_series_equal(29s.rank("random", seed=1), pl.Series("a", [2, 5, 7, 3, 4, 6, 1], dtype=pl.UInt32)30)313233def test_rank_df() -> None:34df = pl.DataFrame(35{36"a": [1, 1, 2, 2, 3],37}38)3940s = df.select(pl.col("a").rank(method="average").alias("b")).to_series()41assert s.to_list() == [1.5, 1.5, 3.5, 3.5, 5.0]42assert s.dtype == pl.Float644344s = df.select(pl.col("a").rank(method="max").alias("b")).to_series()45assert s.to_list() == [2, 2, 4, 4, 5]46assert s.dtype == pl.get_index_type()474849@pytest.mark.parametrize("maintain_order", [False, True])50def test_rank_so_4109(maintain_order: bool) -> None:51# also tests ranks null behavior52df = pl.from_dict(53{54"id": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4],55"rank": [None, 3, 2, 4, 1, 4, 3, 2, 1, None, 3, 4, 4, 1, None, 3],56}57).sort(by=["id", "rank"])5859df = df.group_by("id", maintain_order=maintain_order).agg(60[61pl.col("rank").alias("original"),62pl.col("rank").rank(method="dense").alias("dense"),63pl.col("rank").rank(method="average").alias("average"),64]65)66expected = pl.DataFrame(67{68"id": [1, 2, 3, 4],69"original": [70[None, 2, 3, 4],71[1, 2, 3, 4],72[None, 1, 3, 4],73[None, 1, 3, 4],74],75"dense": [76[None, 1, 2, 3],77[1, 2, 3, 4],78[None, 1, 2, 3],79[None, 1, 2, 3],80],81"average": [82[None, 1.0, 2.0, 3.0],83[1.0, 2.0, 3.0, 4.0],84[None, 1.0, 2.0, 3.0],85[None, 1.0, 2.0, 3.0],86],87},88schema=df.schema,89)9091assert_frame_equal(df, expected, check_row_order=maintain_order)929394def test_rank_string_null_11252() -> None:95rank = pl.Series([None, "", "z", None, "a"]).rank()96assert rank.to_list() == [None, 1.0, 3.0, None, 2.0]979899def test_rank_series() -> None:100s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])101102assert_series_equal(103s.rank("dense"), pl.Series("a", [2, 3, 4, 3, 3, 4, 1], dtype=pl.UInt32)104)105106df = pl.DataFrame([s])107assert df.select(pl.col("a").rank("dense"))["a"].to_list() == [2, 3, 4, 3, 3, 4, 1]108109assert_series_equal(110s.rank("dense", descending=True),111pl.Series("a", [3, 2, 1, 2, 2, 1, 4], dtype=pl.UInt32),112)113114assert s.rank(method="average").dtype == pl.Float64115assert s.rank(method="max").dtype == pl.get_index_type()116117118