Path: blob/main/py-polars/tests/benchmark/test_group_by.py
6939 views
"""1Benchmark tests for the group-by operation.23These tests are based on the H2O.ai database benchmark.45See:6https://h2oai.github.io/db-benchmark/7"""89from __future__ import annotations1011import pytest1213import polars as pl1415pytestmark = pytest.mark.benchmark()161718def test_groupby_h2oai_q1(groupby_data: pl.DataFrame) -> None:19(20groupby_data.lazy()21.group_by("id1")22.agg(23pl.sum("v1").alias("v1_sum"),24)25.collect()26)272829def test_groupby_h2oai_q2(groupby_data: pl.DataFrame) -> None:30(31groupby_data.lazy()32.group_by("id1", "id2")33.agg(34pl.sum("v1").alias("v1_sum"),35)36.collect()37)383940def test_groupby_h2oai_q3(groupby_data: pl.DataFrame) -> None:41(42groupby_data.lazy()43.group_by("id3")44.agg(45pl.sum("v1").alias("v1_sum"),46pl.mean("v3").alias("v3_mean"),47)48.collect()49)505152def test_groupby_h2oai_q4(groupby_data: pl.DataFrame) -> None:53(54groupby_data.lazy()55.group_by("id4")56.agg(57pl.mean("v1").alias("v1_mean"),58pl.mean("v2").alias("v2_mean"),59pl.mean("v3").alias("v3_mean"),60)61.collect()62)636465def test_groupby_h2oai_q5(groupby_data: pl.DataFrame) -> None:66(67groupby_data.lazy()68.group_by("id6")69.agg(70pl.sum("v1").alias("v1_sum"),71pl.sum("v2").alias("v2_sum"),72pl.sum("v3").alias("v3_sum"),73)74.collect()75)767778def test_groupby_h2oai_q6(groupby_data: pl.DataFrame) -> None:79(80groupby_data.lazy()81.group_by("id4", "id5")82.agg(83pl.median("v3").alias("v3_median"),84pl.std("v3").alias("v3_std"),85)86.collect()87)888990def test_groupby_h2oai_q7(groupby_data: pl.DataFrame) -> None:91(92groupby_data.lazy()93.group_by("id3")94.agg((pl.max("v1") - pl.min("v2")).alias("range_v1_v2"))95.collect()96)979899def test_groupby_h2oai_q8(groupby_data: pl.DataFrame) -> None:100(101groupby_data.lazy()102.drop_nulls("v3")103.group_by("id6")104.agg(pl.col("v3").top_k(2).alias("largest2_v3"))105.explode("largest2_v3")106.collect()107)108109110def test_groupby_h2oai_q9(groupby_data: pl.DataFrame) -> None:111(112groupby_data.lazy()113.group_by("id2", "id4")114.agg((pl.corr("v1", "v2") ** 2).alias("r2"))115.collect()116)117118119def test_groupby_h2oai_q10(groupby_data: pl.DataFrame) -> None:120(121groupby_data.lazy()122.group_by("id1", "id2", "id3", "id4", "id5", "id6")123.agg(124pl.sum("v3").alias("v3_sum"),125pl.count("v1").alias("v1_count"),126)127.collect()128)129130131