Path: blob/main/py-polars/tests/benchmark/test_with_columns.py
6939 views
from time import perf_counter12import pytest34import polars as pl5import polars.selectors as cs678# TODO: this is slow in streaming9@pytest.mark.may_fail_auto_streaming10@pytest.mark.slow11def test_with_columns_quadratic_19503() -> None:12num_columns = 10_00013data1 = {f"col_{i}": [0] for i in range(num_columns)}14df1 = pl.DataFrame(data1)1516data2 = {f"feature_{i}": [0] for i in range(num_columns)}17df2 = pl.DataFrame(data2)1819times = [] # [slow, fast]2021class _:22rhs = df223t = perf_counter()24df1.with_columns(rhs)25times.append(perf_counter() - t)2627class _: # type: ignore[no-redef]28rhs = df2.select(cs.by_index(range(num_columns // 1_000)))29t = perf_counter()30df1.with_columns(rhs)31times.append(perf_counter() - t)3233ratio = times[0] / times[1]3435# Assert the relative rather than exact runtime to avoid flakiness in CI36# We pick a threshold just low enough to pass CI without any false37# negatives.38# 1.12.0 | 1.14.039# M3 Pro 11-core | 200x | 20x40# EC2 c7i.4xlarge | 150x | 13x41# GitHub CI runner | | 50x42if ratio > 100:43raise AssertionError(ratio)444546