Path: blob/main/py-polars/tests/unit/operations/test_rank.py
8431 views
import pytest12import polars as pl3from polars.testing import assert_frame_equal, assert_series_equal456def test_rank_nulls() -> None:7assert pl.Series([]).rank().to_list() == []8assert pl.Series([None]).rank().to_list() == [None]9assert pl.Series([None, None]).rank().to_list() == [None, None]101112def test_rank_random_expr() -> None:13df = pl.from_dict(14{"a": [1] * 5, "b": [1, 2, 3, 4, 5], "c": [200, 100, 100, 50, 100]}15)1617df_ranks1 = df.with_columns(18pl.col("c").rank(method="random", seed=1).over("a").alias("rank")19)20df_ranks2 = df.with_columns(21pl.col("c").rank(method="random", seed=1).over("a").alias("rank")22)23assert_frame_equal(df_ranks1, df_ranks2)242526def test_rank_random_series() -> None:27s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])28assert_series_equal(29s.rank("random", seed=1),30pl.Series("a", [2, 5, 7, 3, 4, 6, 1], dtype=pl.get_index_type()),31)323334def test_rank_df() -> None:35df = pl.DataFrame(36{37"a": [1, 1, 2, 2, 3],38}39)4041s = df.select(pl.col("a").rank(method="average").alias("b")).to_series()42assert s.to_list() == [1.5, 1.5, 3.5, 3.5, 5.0]43assert s.dtype == pl.Float644445s = df.select(pl.col("a").rank(method="max").alias("b")).to_series()46assert s.to_list() == [2, 2, 4, 4, 5]47assert s.dtype == pl.get_index_type()484950@pytest.mark.parametrize("maintain_order", [False, True])51def test_rank_so_4109(maintain_order: bool) -> None:52# also tests ranks null behavior53df = pl.from_dict(54{55"id": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4],56"rank": [None, 3, 2, 4, 1, 4, 3, 2, 1, None, 3, 4, 4, 1, None, 3],57}58).sort(by=["id", "rank"])5960df = df.group_by("id", maintain_order=maintain_order).agg(61[62pl.col("rank").alias("original"),63pl.col("rank").rank(method="dense").alias("dense"),64pl.col("rank").rank(method="average").alias("average"),65]66)67expected = pl.DataFrame(68{69"id": [1, 2, 3, 4],70"original": [71[None, 2, 3, 4],72[1, 2, 3, 4],73[None, 1, 3, 4],74[None, 1, 3, 4],75],76"dense": [77[None, 1, 2, 3],78[1, 2, 3, 4],79[None, 1, 2, 3],80[None, 1, 2, 3],81],82"average": [83[None, 1.0, 2.0, 3.0],84[1.0, 2.0, 3.0, 4.0],85[None, 1.0, 2.0, 3.0],86[None, 1.0, 2.0, 3.0],87],88},89schema=df.schema,90)9192assert_frame_equal(df, expected, check_row_order=maintain_order)939495def test_rank_string_null_11252() -> None:96rank = pl.Series([None, "", "z", None, "a"]).rank()97assert rank.to_list() == [None, 1.0, 3.0, None, 2.0]9899100def test_rank_series() -> None:101s = pl.Series("a", [1, 2, 3, 2, 2, 3, 0])102103assert_series_equal(104s.rank("dense"),105pl.Series("a", [2, 3, 4, 3, 3, 4, 1], dtype=pl.get_index_type()),106)107108df = pl.DataFrame([s])109assert df.select(pl.col("a").rank("dense"))["a"].to_list() == [2, 3, 4, 3, 3, 4, 1]110111assert_series_equal(112s.rank("dense", descending=True),113pl.Series("a", [3, 2, 1, 2, 2, 1, 4], dtype=pl.get_index_type()),114)115116assert s.rank(method="average").dtype == pl.Float64117assert s.rank(method="max").dtype == pl.get_index_type()118119120