Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/io/test_lazy_ipc.py
6939 views
1
from __future__ import annotations
2
3
import io
4
from typing import TYPE_CHECKING, Any
5
6
import pyarrow.ipc
7
import pytest
8
9
import polars as pl
10
from polars.interchange.protocol import CompatLevel
11
from polars.testing.asserts.frame import assert_frame_equal
12
13
if TYPE_CHECKING:
14
from pathlib import Path
15
16
17
@pytest.fixture
18
def foods_ipc_path(io_files_path: Path) -> Path:
19
return io_files_path / "foods1.ipc"
20
21
22
def test_row_index(foods_ipc_path: Path) -> None:
23
df = pl.read_ipc(foods_ipc_path, row_index_name="row_index", use_pyarrow=False)
24
assert df["row_index"].to_list() == list(range(27))
25
26
df = (
27
pl.scan_ipc(foods_ipc_path, row_index_name="row_index")
28
.filter(pl.col("category") == pl.lit("vegetables"))
29
.collect()
30
)
31
32
assert df["row_index"].to_list() == [0, 6, 11, 13, 14, 20, 25]
33
34
df = (
35
pl.scan_ipc(foods_ipc_path, row_index_name="row_index")
36
.with_row_index("foo", 10)
37
.filter(pl.col("category") == pl.lit("vegetables"))
38
.collect()
39
)
40
41
assert df["foo"].to_list() == [10, 16, 21, 23, 24, 30, 35]
42
43
44
def test_is_in_type_coercion(foods_ipc_path: Path) -> None:
45
out = (
46
pl.scan_ipc(foods_ipc_path)
47
.filter(pl.col("category").is_in(("vegetables", "ice cream")))
48
.collect()
49
)
50
assert out.shape == (7, 4)
51
out = (
52
pl.scan_ipc(foods_ipc_path)
53
.select(pl.col("category").alias("cat"))
54
.filter(pl.col("cat").is_in(["vegetables"]))
55
.collect()
56
)
57
assert out.shape == (7, 1)
58
59
60
def test_row_index_schema(foods_ipc_path: Path) -> None:
61
assert (
62
pl.scan_ipc(foods_ipc_path, row_index_name="id")
63
.select(["id", "category"])
64
.collect()
65
).dtypes == [pl.UInt32, pl.String]
66
67
68
def test_glob_n_rows(io_files_path: Path) -> None:
69
file_path = io_files_path / "foods*.ipc"
70
df = pl.scan_ipc(file_path, n_rows=40).collect()
71
72
# 27 rows from foods1.ipc and 13 from foods2.ipc
73
assert df.shape == (40, 4)
74
75
# take first and last rows
76
assert df[[0, 39]].to_dict(as_series=False) == {
77
"category": ["vegetables", "seafood"],
78
"calories": [45, 146],
79
"fats_g": [0.5, 6.0],
80
"sugars_g": [2, 2],
81
}
82
83
84
def test_ipc_list_arg(io_files_path: Path) -> None:
85
first = io_files_path / "foods1.ipc"
86
second = io_files_path / "foods2.ipc"
87
88
df = pl.scan_ipc(source=[first, second]).collect()
89
assert df.shape == (54, 4)
90
assert df.row(-1) == ("seafood", 194, 12.0, 1)
91
assert df.row(0) == ("vegetables", 45, 0.5, 2)
92
93
94
def test_scan_ipc_local_with_async(
95
monkeypatch: Any,
96
io_files_path: Path,
97
) -> None:
98
monkeypatch.setenv("POLARS_VERBOSE", "1")
99
monkeypatch.setenv("POLARS_FORCE_ASYNC", "1")
100
101
assert_frame_equal(
102
pl.scan_ipc(io_files_path / "foods1.ipc").head(1).collect(),
103
pl.DataFrame(
104
{
105
"category": ["vegetables"],
106
"calories": [45],
107
"fats_g": [0.5],
108
"sugars_g": [2],
109
}
110
),
111
)
112
113
114
def test_sink_ipc_compat_level_22930() -> None:
115
df = pl.DataFrame({"a": ["foo"]})
116
117
f1 = io.BytesIO()
118
f2 = io.BytesIO()
119
120
df.lazy().sink_ipc(f1, compat_level=CompatLevel.oldest(), engine="in-memory")
121
df.lazy().sink_ipc(f2, compat_level=CompatLevel.oldest(), engine="streaming")
122
123
f1.seek(0)
124
f2.seek(0)
125
126
t1 = pyarrow.ipc.open_file(f1)
127
assert "large_string" in str(t1.schema)
128
assert_frame_equal(pl.DataFrame(t1.read_all()), df)
129
130
t2 = pyarrow.ipc.open_file(f2)
131
assert "large_string" in str(t2.schema)
132
assert_frame_equal(pl.DataFrame(t2.read_all()), df)
133
134
135
def test_scan_file_info_cache(
136
capfd: Any, monkeypatch: Any, foods_ipc_path: Path
137
) -> None:
138
monkeypatch.setenv("POLARS_VERBOSE", "1")
139
a = pl.scan_ipc(foods_ipc_path)
140
b = pl.scan_ipc(foods_ipc_path)
141
142
a.join(b, how="cross").explain()
143
144
captured = capfd.readouterr().err
145
assert "FILE_INFO CACHE HIT" in captured
146
147