Path: blob/main/py-polars/tests/unit/test_polars_import.py
6939 views
from __future__ import annotations12import compileall3import subprocess4import sys5from pathlib import Path67import pytest89import polars as pl10from polars import selectors as cs1112# set a maximum cutoff at 0.5 secs; note that we are typically much faster13# than this (more like ~0.07 secs, depending on hardware), but we allow a14# margin of error to account for frequent noise from slow/contended CI.15MAX_ALLOWED_IMPORT_TIME = 500_000 # << microseconds161718def _import_time_from_frame(tm: pl.DataFrame) -> int:19return int(20tm.filter(pl.col("import").str.strip_chars() == "polars")21.select("cumulative_time")22.item()23)242526def _import_timings() -> bytes:27# assemble suitable command to get polars module import timing;28# run in a separate process to ensure clean timing results.29cmd = f'{sys.executable} -S -X importtime -c "import polars"'30output = subprocess.run(cmd, shell=True, capture_output=True).stderr31if b"Traceback" in output:32msg = f"measuring import timings failed\n\nCommand output:\n{output.decode()}"33raise RuntimeError(msg)34return output.replace(b"import time:", b"").strip()353637def _import_timings_as_frame(n_tries: int) -> tuple[pl.DataFrame, int]:38import_timings = []39for _ in range(n_tries):40df_import = (41pl.read_csv(42source=_import_timings(),43separator="|",44has_header=True,45new_columns=["own_time", "cumulative_time", "import"],46)47.with_columns(cs.ends_with("_time").str.strip_chars().cast(pl.UInt32))48.select("import", "own_time", "cumulative_time")49.reverse()50)51polars_import_time = _import_time_from_frame(df_import)52if polars_import_time < MAX_ALLOWED_IMPORT_TIME:53return df_import, polars_import_time5455import_timings.append(df_import)5657# note: if a qualifying import time was already achieved, we won't get here.58# if we do, let's see all the failed timings to help see what's going on:59import_times = [_import_time_from_frame(df) for df in import_timings]60msg = "\n".join(f"({idx}) {tm:,}μs" for idx, tm in enumerate(import_times))61min_max = f"Min => {min(import_times):,}μs, Max => {max(import_times):,}μs)"62print(f"\nImport times achieved over {n_tries} tries:\n{min_max}\n\n{msg}")6364sorted_timing_frames = sorted(import_timings, key=_import_time_from_frame)65return sorted_timing_frames[0], min(import_times)666768@pytest.mark.skipif(sys.platform == "win32", reason="Unreliable on Windows")69@pytest.mark.debug70@pytest.mark.slow71def test_polars_import() -> None:72# up-front compile '.py' -> '.pyc' before timing73polars_path = Path(pl.__file__).parent74compileall.compile_dir(polars_path, quiet=1)7576# note: reduce noise by allowing up to 'n' tries (but return immediately if/when77# a qualifying time is achieved, so we don't waste time running unnecessary tests)78df_import, polars_import_time = _import_timings_as_frame(n_tries=10)7980with pl.Config(81# get a complete view of what's going on in case of failure82tbl_rows=250,83fmt_str_lengths=100,84tbl_hide_dataframe_shape=True,85):86# ensure that we have not broken lazy-loading (numpy, pandas, pyarrow, etc).87lazy_modules = [88dep for dep in pl.dependencies.__all__ if not dep.startswith("_")89]90for mod in lazy_modules:91not_imported = not df_import["import"].str.starts_with(mod).any()92if_err = f"lazy-loading regression: found {mod!r} at import time"93assert not_imported, f"{if_err}\n{df_import}"9495# ensure that we do not have an import speed regression.96if polars_import_time > MAX_ALLOWED_IMPORT_TIME:97import_time_ms = polars_import_time // 1_00098msg = f"Possible import speed regression; took {import_time_ms}ms\n{df_import}"99raise AssertionError(msg)100101102