Path: blob/main/py-polars/tests/unit/io/test_lazy_ipc.py
6939 views
from __future__ import annotations12import io3from typing import TYPE_CHECKING, Any45import pyarrow.ipc6import pytest78import polars as pl9from polars.interchange.protocol import CompatLevel10from polars.testing.asserts.frame import assert_frame_equal1112if TYPE_CHECKING:13from pathlib import Path141516@pytest.fixture17def foods_ipc_path(io_files_path: Path) -> Path:18return io_files_path / "foods1.ipc"192021def test_row_index(foods_ipc_path: Path) -> None:22df = pl.read_ipc(foods_ipc_path, row_index_name="row_index", use_pyarrow=False)23assert df["row_index"].to_list() == list(range(27))2425df = (26pl.scan_ipc(foods_ipc_path, row_index_name="row_index")27.filter(pl.col("category") == pl.lit("vegetables"))28.collect()29)3031assert df["row_index"].to_list() == [0, 6, 11, 13, 14, 20, 25]3233df = (34pl.scan_ipc(foods_ipc_path, row_index_name="row_index")35.with_row_index("foo", 10)36.filter(pl.col("category") == pl.lit("vegetables"))37.collect()38)3940assert df["foo"].to_list() == [10, 16, 21, 23, 24, 30, 35]414243def test_is_in_type_coercion(foods_ipc_path: Path) -> None:44out = (45pl.scan_ipc(foods_ipc_path)46.filter(pl.col("category").is_in(("vegetables", "ice cream")))47.collect()48)49assert out.shape == (7, 4)50out = (51pl.scan_ipc(foods_ipc_path)52.select(pl.col("category").alias("cat"))53.filter(pl.col("cat").is_in(["vegetables"]))54.collect()55)56assert out.shape == (7, 1)575859def test_row_index_schema(foods_ipc_path: Path) -> None:60assert (61pl.scan_ipc(foods_ipc_path, row_index_name="id")62.select(["id", "category"])63.collect()64).dtypes == [pl.UInt32, pl.String]656667def test_glob_n_rows(io_files_path: Path) -> None:68file_path = io_files_path / "foods*.ipc"69df = pl.scan_ipc(file_path, n_rows=40).collect()7071# 27 rows from foods1.ipc and 13 from foods2.ipc72assert df.shape == (40, 4)7374# take first and last rows75assert df[[0, 39]].to_dict(as_series=False) == {76"category": ["vegetables", "seafood"],77"calories": [45, 146],78"fats_g": [0.5, 6.0],79"sugars_g": [2, 2],80}818283def test_ipc_list_arg(io_files_path: Path) -> None:84first = io_files_path / "foods1.ipc"85second = io_files_path / "foods2.ipc"8687df = pl.scan_ipc(source=[first, second]).collect()88assert df.shape == (54, 4)89assert df.row(-1) == ("seafood", 194, 12.0, 1)90assert df.row(0) == ("vegetables", 45, 0.5, 2)919293def test_scan_ipc_local_with_async(94monkeypatch: Any,95io_files_path: Path,96) -> None:97monkeypatch.setenv("POLARS_VERBOSE", "1")98monkeypatch.setenv("POLARS_FORCE_ASYNC", "1")99100assert_frame_equal(101pl.scan_ipc(io_files_path / "foods1.ipc").head(1).collect(),102pl.DataFrame(103{104"category": ["vegetables"],105"calories": [45],106"fats_g": [0.5],107"sugars_g": [2],108}109),110)111112113def test_sink_ipc_compat_level_22930() -> None:114df = pl.DataFrame({"a": ["foo"]})115116f1 = io.BytesIO()117f2 = io.BytesIO()118119df.lazy().sink_ipc(f1, compat_level=CompatLevel.oldest(), engine="in-memory")120df.lazy().sink_ipc(f2, compat_level=CompatLevel.oldest(), engine="streaming")121122f1.seek(0)123f2.seek(0)124125t1 = pyarrow.ipc.open_file(f1)126assert "large_string" in str(t1.schema)127assert_frame_equal(pl.DataFrame(t1.read_all()), df)128129t2 = pyarrow.ipc.open_file(f2)130assert "large_string" in str(t2.schema)131assert_frame_equal(pl.DataFrame(t2.read_all()), df)132133134def test_scan_file_info_cache(135capfd: Any, monkeypatch: Any, foods_ipc_path: Path136) -> None:137monkeypatch.setenv("POLARS_VERBOSE", "1")138a = pl.scan_ipc(foods_ipc_path)139b = pl.scan_ipc(foods_ipc_path)140141a.join(b, how="cross").explain()142143captured = capfd.readouterr().err144assert "FILE_INFO CACHE HIT" in captured145146147