Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/benchmark/test_group_by.py
6939 views
1
"""
2
Benchmark tests for the group-by operation.
3
4
These tests are based on the H2O.ai database benchmark.
5
6
See:
7
https://h2oai.github.io/db-benchmark/
8
"""
9
10
from __future__ import annotations
11
12
import pytest
13
14
import polars as pl
15
16
pytestmark = pytest.mark.benchmark()
17
18
19
def test_groupby_h2oai_q1(groupby_data: pl.DataFrame) -> None:
20
(
21
groupby_data.lazy()
22
.group_by("id1")
23
.agg(
24
pl.sum("v1").alias("v1_sum"),
25
)
26
.collect()
27
)
28
29
30
def test_groupby_h2oai_q2(groupby_data: pl.DataFrame) -> None:
31
(
32
groupby_data.lazy()
33
.group_by("id1", "id2")
34
.agg(
35
pl.sum("v1").alias("v1_sum"),
36
)
37
.collect()
38
)
39
40
41
def test_groupby_h2oai_q3(groupby_data: pl.DataFrame) -> None:
42
(
43
groupby_data.lazy()
44
.group_by("id3")
45
.agg(
46
pl.sum("v1").alias("v1_sum"),
47
pl.mean("v3").alias("v3_mean"),
48
)
49
.collect()
50
)
51
52
53
def test_groupby_h2oai_q4(groupby_data: pl.DataFrame) -> None:
54
(
55
groupby_data.lazy()
56
.group_by("id4")
57
.agg(
58
pl.mean("v1").alias("v1_mean"),
59
pl.mean("v2").alias("v2_mean"),
60
pl.mean("v3").alias("v3_mean"),
61
)
62
.collect()
63
)
64
65
66
def test_groupby_h2oai_q5(groupby_data: pl.DataFrame) -> None:
67
(
68
groupby_data.lazy()
69
.group_by("id6")
70
.agg(
71
pl.sum("v1").alias("v1_sum"),
72
pl.sum("v2").alias("v2_sum"),
73
pl.sum("v3").alias("v3_sum"),
74
)
75
.collect()
76
)
77
78
79
def test_groupby_h2oai_q6(groupby_data: pl.DataFrame) -> None:
80
(
81
groupby_data.lazy()
82
.group_by("id4", "id5")
83
.agg(
84
pl.median("v3").alias("v3_median"),
85
pl.std("v3").alias("v3_std"),
86
)
87
.collect()
88
)
89
90
91
def test_groupby_h2oai_q7(groupby_data: pl.DataFrame) -> None:
92
(
93
groupby_data.lazy()
94
.group_by("id3")
95
.agg((pl.max("v1") - pl.min("v2")).alias("range_v1_v2"))
96
.collect()
97
)
98
99
100
def test_groupby_h2oai_q8(groupby_data: pl.DataFrame) -> None:
101
(
102
groupby_data.lazy()
103
.drop_nulls("v3")
104
.group_by("id6")
105
.agg(pl.col("v3").top_k(2).alias("largest2_v3"))
106
.explode("largest2_v3")
107
.collect()
108
)
109
110
111
def test_groupby_h2oai_q9(groupby_data: pl.DataFrame) -> None:
112
(
113
groupby_data.lazy()
114
.group_by("id2", "id4")
115
.agg((pl.corr("v1", "v2") ** 2).alias("r2"))
116
.collect()
117
)
118
119
120
def test_groupby_h2oai_q10(groupby_data: pl.DataFrame) -> None:
121
(
122
groupby_data.lazy()
123
.group_by("id1", "id2", "id3", "id4", "id5", "id6")
124
.agg(
125
pl.sum("v3").alias("v3_sum"),
126
pl.count("v1").alias("v1_count"),
127
)
128
.collect()
129
)
130
131