Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/benchmark/test_with_columns.py
6939 views
1
from time import perf_counter
2
3
import pytest
4
5
import polars as pl
6
import polars.selectors as cs
7
8
9
# TODO: this is slow in streaming
10
@pytest.mark.may_fail_auto_streaming
11
@pytest.mark.slow
12
def test_with_columns_quadratic_19503() -> None:
13
num_columns = 10_000
14
data1 = {f"col_{i}": [0] for i in range(num_columns)}
15
df1 = pl.DataFrame(data1)
16
17
data2 = {f"feature_{i}": [0] for i in range(num_columns)}
18
df2 = pl.DataFrame(data2)
19
20
times = [] # [slow, fast]
21
22
class _:
23
rhs = df2
24
t = perf_counter()
25
df1.with_columns(rhs)
26
times.append(perf_counter() - t)
27
28
class _: # type: ignore[no-redef]
29
rhs = df2.select(cs.by_index(range(num_columns // 1_000)))
30
t = perf_counter()
31
df1.with_columns(rhs)
32
times.append(perf_counter() - t)
33
34
ratio = times[0] / times[1]
35
36
# Assert the relative rather than exact runtime to avoid flakiness in CI
37
# We pick a threshold just low enough to pass CI without any false
38
# negatives.
39
# 1.12.0 | 1.14.0
40
# M3 Pro 11-core | 200x | 20x
41
# EC2 c7i.4xlarge | 150x | 13x
42
# GitHub CI runner | | 50x
43
if ratio > 100:
44
raise AssertionError(ratio)
45
46