Path: blob/main/py-polars/tests/unit/interop/test_interop.py
8420 views
from __future__ import annotations12import io3from datetime import date, datetime, time, timedelta, timezone4from typing import TYPE_CHECKING, Any, cast56import numpy as np7import pandas as pd8import pyarrow as pa9import pyarrow.parquet as pq10import pytest1112import polars as pl13from polars.exceptions import (14ComputeError,15DuplicateError,16InvalidOperationError,17PanicException,18UnstableWarning,19)20from polars.interchange.protocol import CompatLevel21from polars.testing import assert_frame_equal, assert_series_equal22from tests.unit.utils.pycapsule_utils import PyCapsuleStreamHolder2324if TYPE_CHECKING:25from tests.conftest import PlMonkeyPatch262728def test_arrow_list_roundtrip() -> None:29# https://github.com/pola-rs/polars/issues/106430tbl = pa.table({"a": [1], "b": [[1, 2]]})31arw = pl.from_arrow(tbl).to_arrow()3233assert arw.shape == tbl.shape34assert arw.schema.names == tbl.schema.names35for c1, c2 in zip(arw.columns, tbl.columns, strict=True):36assert c1.to_pylist() == c2.to_pylist()373839def test_arrow_null_roundtrip() -> None:40tbl = pa.table({"a": [None, None], "b": [[None, None], [None, None]]})41df = pl.from_arrow(tbl)4243if isinstance(df, pl.DataFrame):44assert df.dtypes == [pl.Null, pl.List(pl.Null)]4546arw = df.to_arrow()4748assert arw.shape == tbl.shape49assert arw.schema.names == tbl.schema.names50for c1, c2 in zip(arw.columns, tbl.columns, strict=True):51assert c1.to_pylist() == c2.to_pylist()525354def test_arrow_empty_dataframe() -> None:55# 0x0 dataframe56df = pl.DataFrame({})57tbl = pa.table({})58assert df.to_arrow() == tbl59df2 = cast("pl.DataFrame", pl.from_arrow(df.to_arrow()))60assert_frame_equal(df2, df)6162# 0 row dataframe63df = pl.DataFrame({}, schema={"a": pl.Int32})64tbl = pa.Table.from_batches([], pa.schema([pa.field("a", pa.int32())]))65assert df.to_arrow() == tbl66df2 = cast("pl.DataFrame", pl.from_arrow(df.to_arrow()))67assert df2.schema == {"a": pl.Int32}68assert df2.shape == (0, 1)697071def test_arrow_dict_to_polars() -> None:72pa_dict = pa.DictionaryArray.from_arrays(73indices=np.array([0, 1, 2, 3, 1, 0, 2, 3, 3, 2]),74dictionary=np.array(["AAA", "BBB", "CCC", "DDD"]),75).cast(pa.large_utf8())7677s = pl.Series(78name="pa_dict",79values=["AAA", "BBB", "CCC", "DDD", "BBB", "AAA", "CCC", "DDD", "DDD", "CCC"],80)81assert_series_equal(s, pl.Series("pa_dict", pa_dict))828384def test_arrow_list_chunked_array() -> None:85a = pa.array([[1, 2], [3, 4]])86ca = pa.chunked_array([a, a, a])87s = cast("pl.Series", pl.from_arrow(ca))88assert s.dtype == pl.List899091# Test that polars convert Arrays of logical types correctly to arrow92def test_arrow_array_logical() -> None:93# cast to large string and uint8 indices because polars converts to those94pa_data1 = (95pa.array(["a", "b", "c", "d"])96.dictionary_encode()97.cast(pa.dictionary(pa.uint8(), pa.large_string()))98)99pa_array_logical1 = pa.FixedSizeListArray.from_arrays(pa_data1, 2)100101s1 = pl.Series(102values=[["a", "b"], ["c", "d"]],103dtype=pl.Array(pl.Enum(["a", "b", "c", "d"]), shape=2),104)105assert s1.to_arrow() == pa_array_logical1106107pa_data2 = pa.array([date(2024, 1, 1), date(2024, 1, 2)])108pa_array_logical2 = pa.FixedSizeListArray.from_arrays(pa_data2, 1)109110s2 = pl.Series(111values=[[date(2024, 1, 1)], [date(2024, 1, 2)]],112dtype=pl.Array(pl.Date, shape=1),113)114assert s2.to_arrow() == pa_array_logical2115116117def test_from_dict() -> None:118data = {"a": [1, 2], "b": [3, 4]}119df = pl.from_dict(data)120assert df.shape == (2, 2)121for s1, s2 in zip(122list(df), [pl.Series("a", [1, 2]), pl.Series("b", [3, 4])], strict=True123):124assert_series_equal(s1, s2)125126127def test_from_dict_struct() -> None:128data: dict[str, dict[str, list[int]] | list[int]] = {129"a": {"b": [1, 3], "c": [2, 4]},130"d": [5, 6],131}132df = pl.from_dict(data)133assert df.shape == (2, 2)134assert df["a"][0] == {"b": 1, "c": 2}135assert df["a"][1] == {"b": 3, "c": 4}136assert df.schema == {"a": pl.Struct({"b": pl.Int64, "c": pl.Int64}), "d": pl.Int64}137138139def test_from_dicts() -> None:140data = [{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": None}]141df = pl.from_dicts(data) # type: ignore[arg-type]142assert df.shape == (3, 2)143assert df.rows() == [(1, 4), (2, 5), (3, None)]144assert df.schema == {"a": pl.Int64, "b": pl.Int64}145146147def test_from_dict_no_inference() -> None:148schema = {"a": pl.String}149data = [{"a": "aa"}]150df = pl.from_dicts(data, schema_overrides=schema, infer_schema_length=0)151assert df.schema == schema152assert df.to_dicts() == data153154155def test_from_dicts_schema_override() -> None:156schema = {157"a": pl.String,158"b": pl.Int64,159"c": pl.List(pl.Struct({"x": pl.Int64, "y": pl.String, "z": pl.Float64})),160}161162# initial data matches the expected schema163data1 = [164{165"a": "l",166"b": i,167"c": [{"x": (j + 2), "y": "?", "z": (j % 2)} for j in range(2)],168}169for i in range(5)170]171172# extend with a mix of fields that are/not in the schema173data2 = [{"b": i + 5, "d": "ABC", "e": "DEF"} for i in range(5)]174175for n_infer in (0, 3, 5, 8, 10, 100):176df = pl.DataFrame(177data=(data1 + data2),178schema=schema, # type: ignore[arg-type]179infer_schema_length=n_infer,180)181assert df.schema == schema182assert df.rows() == [183("l", 0, [{"x": 2, "y": "?", "z": 0.0}, {"x": 3, "y": "?", "z": 1.0}]),184("l", 1, [{"x": 2, "y": "?", "z": 0.0}, {"x": 3, "y": "?", "z": 1.0}]),185("l", 2, [{"x": 2, "y": "?", "z": 0.0}, {"x": 3, "y": "?", "z": 1.0}]),186("l", 3, [{"x": 2, "y": "?", "z": 0.0}, {"x": 3, "y": "?", "z": 1.0}]),187("l", 4, [{"x": 2, "y": "?", "z": 0.0}, {"x": 3, "y": "?", "z": 1.0}]),188(None, 5, None),189(None, 6, None),190(None, 7, None),191(None, 8, None),192(None, 9, None),193]194195196def test_from_dicts_struct() -> None:197data = [{"a": {"b": 1, "c": 2}, "d": 5}, {"a": {"b": 3, "c": 4}, "d": 6}]198df = pl.from_dicts(data)199assert df.shape == (2, 2)200assert df["a"][0] == {"b": 1, "c": 2}201assert df["a"][1] == {"b": 3, "c": 4}202203# 5649204assert pl.from_dicts([{"a": [{"x": 1}]}, {"a": [{"y": 1}]}]).to_dict(205as_series=False206) == {"a": [[{"y": None, "x": 1}], [{"y": 1, "x": None}]]}207assert pl.from_dicts([{"a": [{"x": 1}, {"y": 2}]}, {"a": [{"y": 1}]}]).to_dict(208as_series=False209) == {"a": [[{"y": None, "x": 1}, {"y": 2, "x": None}], [{"y": 1, "x": None}]]}210211212def test_from_records() -> None:213data = [[1, 2, 3], [4, 5, 6]]214df = pl.from_records(data, schema=["a", "b"])215assert df.shape == (3, 2)216assert df.rows() == [(1, 4), (2, 5), (3, 6)]217218219# https://github.com/pola-rs/polars/issues/15195220@pytest.mark.parametrize(221"input",222[223pl.Series([1, 2]),224pl.Series([{"a": 1, "b": 2}]),225pl.DataFrame({"a": [1, 2], "b": [3, 4]}),226],227)228def test_from_records_non_sequence_input(input: Any) -> None:229with pytest.raises(TypeError, match="expected data of type Sequence"):230pl.from_records(input)231232233def test_from_arrow() -> None:234data = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})235df = pl.from_arrow(data)236assert df.shape == (3, 2)237assert df.rows() == [(1, 4), (2, 5), (3, 6)] # type: ignore[union-attr]238239# if not a PyArrow type, raise a TypeError240with pytest.raises(TypeError):241_ = pl.from_arrow([1, 2])242243df = pl.from_arrow(244data, schema=["a", "b"], schema_overrides={"a": pl.UInt32, "b": pl.UInt64}245)246assert df.rows() == [(1, 4), (2, 5), (3, 6)] # type: ignore[union-attr]247assert df.schema == {"a": pl.UInt32, "b": pl.UInt64} # type: ignore[union-attr]248249250def test_from_arrow_with_bigquery_metadata() -> None:251arrow_schema = pa.schema(252[253pa.field("id", pa.int64()).with_metadata(254{"ARROW:extension:name": "google:sqlType:integer"}255),256pa.field(257"misc",258pa.struct([("num", pa.int32()), ("val", pa.string())]),259).with_metadata({"ARROW:extension:name": "google:sqlType:struct"}),260]261)262arrow_tbl = pa.Table.from_pylist(263[{"id": 1, "misc": None}, {"id": 2, "misc": None}],264schema=arrow_schema,265)266267expected_data = {"id": [1, 2], "num": [None, None], "val": [None, None]}268expected_schema = {"id": pl.Int64, "num": pl.Int32, "val": pl.String}269assert_frame_equal(270pl.DataFrame(expected_data, schema=expected_schema),271pl.from_arrow(arrow_tbl).unnest("misc"), # type: ignore[union-attr]272)273274275def test_from_optional_not_available() -> None:276from polars._dependencies import _LazyModule277278# proxy module is created dynamically if the required module is not available279# (see the polars._dependencies source code for additional detail/comments)280281np = _LazyModule("numpy", module_available=False)282with pytest.raises(ImportError, match=r"np\.array requires 'numpy'"):283pl.from_numpy(np.array([[1, 2], [3, 4]]), schema=["a", "b"])284285pa = _LazyModule("pyarrow", module_available=False)286with pytest.raises(ImportError, match=r"pa\.table requires 'pyarrow'"):287pl.from_arrow(pa.table({"a": [1, 2, 3], "b": [4, 5, 6]}))288289pd = _LazyModule("pandas", module_available=False)290with pytest.raises(ImportError, match=r"pd\.Series requires 'pandas'"):291pl.from_pandas(pd.Series([1, 2, 3]))292293294def test_upcast_pyarrow_dicts() -> None:295# https://github.com/pola-rs/polars/issues/1752296tbls = [297pa.table(298{299"col_name": pa.array(300[f"value_{i}"], pa.dictionary(pa.int8(), pa.string())301)302}303)304for i in range(128)305]306307tbl = pa.concat_tables(tbls, promote_options="default")308out = cast("pl.DataFrame", pl.from_arrow(tbl))309assert out.shape == (128, 1)310assert out["col_name"][0] == "value_0"311assert out["col_name"][127] == "value_127"312313314def test_no_rechunk() -> None:315table = pa.Table.from_pydict({"x": pa.chunked_array([list("ab"), list("cd")])})316# table317assert pl.from_arrow(table, rechunk=False).n_chunks() == 2318# chunked array319assert pl.from_arrow(table["x"], rechunk=False).n_chunks() == 2320321322def test_from_empty_arrow() -> None:323df = cast("pl.DataFrame", pl.from_arrow(pa.table(pd.DataFrame({"a": [], "b": []}))))324assert df.columns == ["a", "b"]325assert df.dtypes == [pl.Float64, pl.Float64]326327# 2705328df1 = pd.DataFrame(columns=["b"], dtype=float, index=pd.Index([]))329tbl = pa.Table.from_pandas(df1)330out = cast("pl.DataFrame", pl.from_arrow(tbl))331assert out.columns == ["b", "__index_level_0__"]332assert out.dtypes == [pl.Float64, pl.Null]333tbl = pa.Table.from_pandas(df1, preserve_index=False)334out = cast("pl.DataFrame", pl.from_arrow(tbl))335assert out.columns == ["b"]336assert out.dtypes == [pl.Float64]337338# 4568339tbl = pa.table({"l": []}, schema=pa.schema([("l", pa.large_list(pa.uint8()))]))340341df = cast("pl.DataFrame", pl.from_arrow(tbl))342assert df.schema["l"] == pl.List(pl.UInt8)343344345def test_cat_int_types_3500() -> None:346# Create an enum / categorical / dictionary typed pyarrow array347# Most simply done by creating a pandas categorical series first348categorical_s = pd.Series(["a", "a", "b"], dtype="category")349pyarrow_array = pa.Array.from_pandas(categorical_s)350351# The in-memory representation of each category can either be a signed or352# unsigned 8-bit integer. Pandas uses Int8...353int_dict_type = pa.dictionary(index_type=pa.int8(), value_type=pa.utf8())354# ... while DuckDB uses UInt8355uint_dict_type = pa.dictionary(index_type=pa.uint8(), value_type=pa.utf8())356357for t in [int_dict_type, uint_dict_type]:358s = cast("pl.Series", pl.from_arrow(pyarrow_array.cast(t)))359assert_series_equal(360s, pl.Series(["a", "a", "b"]).cast(pl.Categorical), check_names=False361)362363364def test_from_pyarrow_chunked_array() -> None:365column = pa.chunked_array([[1], [2]])366series = pl.Series("column", column)367assert series.to_list() == [1, 2]368369370def test_arrow_list_null_5697() -> None:371# Create a pyarrow table with a list[null] column.372pa_table = pa.table([[[None]]], names=["mycol"])373df = pl.from_arrow(pa_table)374pa_table = df.to_arrow()375# again to polars to test the schema376assert pl.from_arrow(pa_table).schema == {"mycol": pl.List(pl.Null)} # type: ignore[union-attr]377378379def test_from_pyarrow_map() -> None:380pa_table = pa.table(381[[1, 2], [[("a", "something")], [("a", "else"), ("b", "another key")]]],382schema=pa.schema(383[("idx", pa.int16()), ("mapping", pa.map_(pa.string(), pa.string()))]384),385)386387# Convert from an empty table to trigger an ArrowSchema -> native schema388# conversion (checks that ArrowDataType::Map is handled in Rust).389pl.DataFrame(pa_table.slice(0, 0))390391result = pl.DataFrame(pa_table)392assert result.to_dict(as_series=False) == {393"idx": [1, 2],394"mapping": [395[{"key": "a", "value": "something"}],396[{"key": "a", "value": "else"}, {"key": "b", "value": "another key"}],397],398}399400401def test_from_fixed_size_binary_list() -> None:402val = [[b"63A0B1C66575DD5708E1EB2B"]]403arrow_array = pa.array(val, type=pa.list_(pa.binary(24)))404s = cast("pl.Series", pl.from_arrow(arrow_array))405assert s.dtype == pl.List(pl.Binary)406assert s.to_list() == val407408409def test_dataframe_from_repr() -> None:410# round-trip various types411frame = (412pl.LazyFrame(413{414"a": [1, 2, None],415"b": [4.5, 5.23e13, -3.12e12],416"c": ["x", "y", "z"],417"d": [True, False, True],418"e": [None, "", None],419"f": [date(2022, 7, 5), date(2023, 2, 5), date(2023, 8, 5)],420"g": [time(0, 0, 0, 1), time(12, 30, 45), time(23, 59, 59, 999000)],421"h": [422datetime(2022, 7, 5, 10, 30, 45, 4560),423datetime(2023, 10, 12, 20, 3, 8, 11),424None,425],426},427)428.with_columns(429pl.col("c").cast(pl.Categorical),430pl.col("h").cast(pl.Datetime("ns")),431)432.collect()433)434435assert frame.schema == {436"a": pl.Int64,437"b": pl.Float64,438"c": pl.Categorical(),439"d": pl.Boolean,440"e": pl.String,441"f": pl.Date,442"g": pl.Time,443"h": pl.Datetime("ns"),444}445df = cast("pl.DataFrame", pl.from_repr(repr(frame)))446assert_frame_equal(frame, df)447448# empty frame; confirm schema is inferred449df = cast(450"pl.DataFrame",451pl.from_repr(452"""453┌─────┬─────┬─────┬─────┬─────┬───────┐454│ id ┆ q1 ┆ q2 ┆ q3 ┆ q4 ┆ total │455│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │456│ str ┆ i8 ┆ i16 ┆ i32 ┆ i64 ┆ f64 │457╞═════╪═════╪═════╪═════╪═════╪═══════╡458└─────┴─────┴─────┴─────┴─────┴───────┘459"""460),461)462assert df.shape == (0, 6)463assert df.rows() == []464assert df.schema == {465"id": pl.String,466"q1": pl.Int8,467"q2": pl.Int16,468"q3": pl.Int32,469"q4": pl.Int64,470"total": pl.Float64,471}472473# empty frame with no dtypes474df = cast(475"pl.DataFrame",476pl.from_repr(477"""478┌──────┬───────┐479│ misc ┆ other │480╞══════╪═══════╡481└──────┴───────┘482"""483),484)485assert_frame_equal(df, pl.DataFrame(schema={"misc": pl.String, "other": pl.String}))486487# empty frame with a non-standard/blank 'null' in numeric col488df = cast(489"pl.DataFrame",490pl.from_repr(491"""492┌─────┬──────┐493│ c1 ┆ c2 │494│ --- ┆ --- │495│ i32 ┆ f64 │496╞═════╪══════╡497│ │ NULL │498└─────┴──────┘499"""500),501)502assert_frame_equal(503df,504pl.DataFrame(505data=[(None, None)],506schema={"c1": pl.Int32, "c2": pl.Float64},507orient="row",508),509)510511df = cast(512"pl.DataFrame",513pl.from_repr(514"""515# >>> Missing cols with old-style ellipsis, nulls, commented out516# ┌────────────┬─────┬─────┬─────┬─────┬─────┬─────┬─────┬──────┐517# │ dt ┆ c1 ┆ c2 ┆ c3 ┆ ... ┆ c96 ┆ c97 ┆ c98 ┆ c99 │518# │ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ --- ┆ --- │519# │ date ┆ i32 ┆ i32 ┆ i32 ┆ ┆ i64 ┆ i64 ┆ i64 ┆ i64 │520# ╞════════════╪═════╪═════╪═════╪═════╪═════╪═════╪═════╪══════╡521# │ 2023-03-25 ┆ 1 ┆ 2 ┆ 3 ┆ ... ┆ 96 ┆ 97 ┆ 98 ┆ 99 │522# │ 1999-12-31 ┆ 3 ┆ 6 ┆ 9 ┆ ... ┆ 288 ┆ 291 ┆ 294 ┆ null │523# │ null ┆ 9 ┆ 18 ┆ 27 ┆ ... ┆ 864 ┆ 873 ┆ 882 ┆ 891 │524# └────────────┴─────┴─────┴─────┴─────┴─────┴─────┴─────┴──────┘525"""526),527)528assert df.schema == {529"dt": pl.Date,530"c1": pl.Int32,531"c2": pl.Int32,532"c3": pl.Int32,533"c96": pl.Int64,534"c97": pl.Int64,535"c98": pl.Int64,536"c99": pl.Int64,537}538assert df.rows() == [539(date(2023, 3, 25), 1, 2, 3, 96, 97, 98, 99),540(date(1999, 12, 31), 3, 6, 9, 288, 291, 294, None),541(None, 9, 18, 27, 864, 873, 882, 891),542]543544df = cast(545"pl.DataFrame",546pl.from_repr(547"""548# >>> no dtypes:549# ┌────────────┬──────┐550# │ dt ┆ c99 │551# ╞════════════╪══════╡552# │ 2023-03-25 ┆ 99 │553# │ 1999-12-31 ┆ null │554# │ null ┆ 891 │555# └────────────┴──────┘556"""557),558)559assert df.schema == {"dt": pl.Date, "c99": pl.Int64}560assert df.rows() == [561(date(2023, 3, 25), 99),562(date(1999, 12, 31), None),563(None, 891),564]565566df = cast(567"pl.DataFrame",568pl.from_repr(569"""570In [2]: with pl.Config() as cfg:571...: pl.Config.set_tbl_formatting("UTF8_FULL", rounded_corners=True)572...: print(df)573...:574shape: (1, 5)575╭───────────┬────────────┬───┬───────┬────────────────────────────────╮576│ source_ac ┆ source_cha ┆ … ┆ ident ┆ timestamp │577│ tor_id ┆ nnel_id ┆ ┆ --- ┆ --- │578│ --- ┆ --- ┆ ┆ str ┆ datetime[μs, Asia/Tokyo] │579│ i32 ┆ i64 ┆ ┆ ┆ │580╞═══════════╪════════════╪═══╪═══════╪════════════════════════════════╡581│ 123456780 ┆ 9876543210 ┆ … ┆ a:b:c ┆ 2023-03-25 10:56:59.663053 JST │582├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤583│ … ┆ … ┆ … ┆ … ┆ … │584├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤585│ 803065983 ┆ 2055938745 ┆ … ┆ x:y:z ┆ 2023-03-25 12:38:18.050545 JST │586╰───────────┴────────────┴───┴───────┴────────────────────────────────╯587# "Een fluitje van een cent..." :)588"""589),590)591assert df.shape == (2, 4)592assert df.schema == {593"source_actor_id": pl.Int32,594"source_channel_id": pl.Int64,595"ident": pl.String,596"timestamp": pl.Datetime("us", "Asia/Tokyo"),597}598599600def test_dataframe_from_repr_24110() -> None:601df = cast(602"pl.DataFrame",603pl.from_repr("""604shape: (7, 1)605┌──────────────┐606│ time_offset │607│ --- │608│ duration[μs] │609╞══════════════╡610│ -2h │611│ 0µs │612│ 2h │613│ +2h │614└──────────────┘615"""),616)617expected = pl.DataFrame(618{619"time_offset": [620timedelta(hours=-2),621timedelta(),622timedelta(hours=2),623timedelta(hours=2),624]625},626schema={"time_offset": pl.Duration("us")},627)628assert_frame_equal(df, expected)629630631def test_dataframe_from_duckdb_repr() -> None:632df = cast(633"pl.DataFrame",634pl.from_repr(635"""636# misc streaming stats637┌────────────┬───────┬───────────────────┬───┬────────────────┬───────────────────┐638│ As Of │ Rank │ Year to Date Rank │ … │ Days In Top 10 │ Streaming Seconds │639│ date │ int32 │ varchar │ │ int16 │ int128 │640├────────────┼───────┼───────────────────┼───┼────────────────┼───────────────────┤641│ 2025-05-09 │ 1 │ 1 │ … │ 29 │ 1864939402857430 │642│ 2025-05-09 │ 2 │ 2 │ … │ 15 │ 658937443590045 │643│ 2025-05-09 │ 3 │ 3 │ … │ 9 │ 267876522242076 │644└────────────┴───────┴───────────────────┴───┴────────────────┴───────────────────┘645"""646),647)648expected = pl.DataFrame(649{650"As Of": [date(2025, 5, 9), date(2025, 5, 9), date(2025, 5, 9)],651"Rank": [1, 2, 3],652"Year to Date Rank": ["1", "2", "3"],653"Days In Top 10": [29, 15, 9],654"Streaming Seconds": [1864939402857430, 658937443590045, 267876522242076],655},656schema={657"As Of": pl.Date,658"Rank": pl.Int32,659"Year to Date Rank": pl.String,660"Days In Top 10": pl.Int16,661"Streaming Seconds": pl.Int128,662},663)664assert_frame_equal(expected, df)665666667def test_series_from_repr() -> None:668frame = (669pl.LazyFrame(670{671"a": [1, 2, None],672"b": [4.5, 5.5, 6.5],673"c": ["x", "y", "z"],674"d": [True, False, True],675"e": [None, "", None],676"f": [date(2022, 7, 5), date(2023, 2, 5), date(2023, 8, 5)],677"g": [time(0, 0, 0, 1), time(12, 30, 45), time(23, 59, 59, 999000)],678"h": [679datetime(2022, 7, 5, 10, 30, 45, 4560),680datetime(2023, 10, 12, 20, 3, 8, 11),681None,682],683},684)685.with_columns(686pl.col("c").cast(pl.Categorical),687pl.col("h").cast(pl.Datetime("ns")),688)689.collect()690)691692for col in frame.columns:693s = cast("pl.Series", pl.from_repr(repr(frame[col])))694assert_series_equal(s, frame[col])695696s = cast(697"pl.Series",698pl.from_repr(699"""700Out[3]:701shape: (3,)702Series: 's' [str]703[704"a"705…706"c"707]708"""709),710)711assert_series_equal(s, pl.Series("s", ["a", "c"]))712713s = cast(714"pl.Series",715pl.from_repr(716"""717Series: 'flt' [f32]718[719]720"""721),722)723assert_series_equal(s, pl.Series("flt", [], dtype=pl.Float32))724725s = cast(726"pl.Series",727pl.from_repr(728"""729Series: 'flt' [f64]730[731null732+inf733-inf734inf7350.0736NaN737]738>>> print("stuff")739"""740),741)742inf, nan = float("inf"), float("nan")743assert_series_equal(744s,745pl.Series(746name="flt",747dtype=pl.Float64,748values=[None, inf, -inf, inf, 0.0, nan],749),750)751752753def test_dataframe_from_repr_custom_separators() -> None:754# repr created with custom digit-grouping755# and non-default group/decimal separators756df = cast(757"pl.DataFrame",758pl.from_repr(759"""760┌───────────┬────────────┐761│ x ┆ y │762│ --- ┆ --- │763│ i32 ┆ f64 │764╞═══════════╪════════════╡765│ 123.456 ┆ -10.000,55 │766│ -9.876 ┆ 10,0 │767│ 9.999.999 ┆ 8,5e8 │768└───────────┴────────────┘769"""770),771)772assert_frame_equal(773df,774pl.DataFrame(775{776"x": [123456, -9876, 9999999],777"y": [-10000.55, 10.0, 850000000.0],778},779schema={"x": pl.Int32, "y": pl.Float64},780),781)782783784def test_sliced_struct_from_arrow() -> None:785# Create a dataset with 3 rows786tbl = pa.Table.from_arrays(787arrays=[788pa.StructArray.from_arrays(789arrays=[790pa.array([1, 2, 3], pa.int32()),791pa.array(["foo", "bar", "baz"], pa.utf8()),792],793names=["a", "b"],794)795],796names=["struct_col"],797)798799# slice the table800# check if FFI correctly reads sliced801result = cast("pl.DataFrame", pl.from_arrow(tbl.slice(1, 2)))802assert result.to_dict(as_series=False) == {803"struct_col": [{"a": 2, "b": "bar"}, {"a": 3, "b": "baz"}]804}805806result = cast("pl.DataFrame", pl.from_arrow(tbl.slice(1, 1)))807assert result.to_dict(as_series=False) == {"struct_col": [{"a": 2, "b": "bar"}]}808809810def test_from_arrow_invalid_time_zone() -> None:811arr = pa.array(812[datetime(2021, 1, 1, 0, 0, 0, 0)],813type=pa.timestamp("ns", tz="this-is-not-a-time-zone"),814)815with pytest.raises(816ComputeError, match=r"unable to parse time zone: 'this-is-not-a-time-zone'"817):818pl.from_arrow(arr)819820821@pytest.mark.parametrize(822("fixed_offset", "etc_tz"),823[824("+10:00", "Etc/GMT-10"),825("10:00", "Etc/GMT-10"),826("-10:00", "Etc/GMT+10"),827("+05:00", "Etc/GMT-5"),828("05:00", "Etc/GMT-5"),829("-05:00", "Etc/GMT+5"),830],831)832def test_from_arrow_fixed_offset(fixed_offset: str, etc_tz: str) -> None:833arr = pa.array(834[datetime(2021, 1, 1, 0, 0, 0, 0)],835type=pa.timestamp("us", tz=fixed_offset),836)837result = cast("pl.Series", pl.from_arrow(arr))838expected = pl.Series(839[datetime(2021, 1, 1, tzinfo=timezone.utc)]840).dt.convert_time_zone(etc_tz)841assert_series_equal(result, expected)842843844def test_from_avro_valid_time_zone_13032() -> None:845arr = pa.array(846[datetime(2021, 1, 1, 0, 0, 0, 0)], type=pa.timestamp("ns", tz="00:00")847)848result = cast("pl.Series", pl.from_arrow(arr))849expected = pl.Series([datetime(2021, 1, 1)], dtype=pl.Datetime("ns", "UTC"))850assert_series_equal(result, expected)851852853def test_from_numpy_different_resolution_15991() -> None:854result = pl.Series(855np.array(["2020-01-01"], dtype="datetime64[ns]"), dtype=pl.Datetime("us")856)857expected = pl.Series([datetime(2020, 1, 1)], dtype=pl.Datetime("us"))858assert_series_equal(result, expected)859860861def test_from_numpy_different_resolution_invalid() -> None:862with pytest.raises(ValueError, match="Please cast"):863pl.Series(864np.array(["2020-01-01"], dtype="datetime64[s]"), dtype=pl.Datetime("us")865)866867868def test_compat_level(plmonkeypatch: PlMonkeyPatch) -> None:869# change these if compat level bumped870plmonkeypatch.setenv("POLARS_WARN_UNSTABLE", "1")871oldest = CompatLevel.oldest()872assert oldest is CompatLevel.oldest() # test singleton873assert oldest._version == 0874with pytest.warns(UnstableWarning):875newest = CompatLevel.newest()876with pytest.warns(UnstableWarning):877assert newest is CompatLevel.newest()878assert newest._version == 1879880str_col = pl.Series(["awd"])881bin_col = pl.Series([b"dwa"])882assert str_col._newest_compat_level() == newest._version883assert isinstance(str_col.to_arrow(), pa.LargeStringArray)884assert isinstance(str_col.to_arrow(compat_level=oldest), pa.LargeStringArray)885assert isinstance(str_col.to_arrow(compat_level=newest), pa.StringViewArray)886assert isinstance(bin_col.to_arrow(), pa.LargeBinaryArray)887assert isinstance(bin_col.to_arrow(compat_level=oldest), pa.LargeBinaryArray)888assert isinstance(bin_col.to_arrow(compat_level=newest), pa.BinaryViewArray)889890df = pl.DataFrame({"str_col": str_col, "bin_col": bin_col})891assert isinstance(df.to_arrow()["str_col"][0], pa.LargeStringScalar)892assert isinstance(893df.to_arrow(compat_level=oldest)["str_col"][0], pa.LargeStringScalar894)895assert isinstance(896df.to_arrow(compat_level=newest)["str_col"][0], pa.StringViewScalar897)898assert isinstance(df.to_arrow()["bin_col"][0], pa.LargeBinaryScalar)899assert isinstance(900df.to_arrow(compat_level=oldest)["bin_col"][0], pa.LargeBinaryScalar901)902assert isinstance(903df.to_arrow(compat_level=newest)["bin_col"][0], pa.BinaryViewScalar904)905906907def test_df_pycapsule_interface() -> None:908df = pl.DataFrame(909{910"a": [1, 2, 3],911"b": ["a", "b", "c"],912"c": ["fooooooooooooooooooooo", "bar", "looooooooooooooooong string"],913}914)915916capsule_df = PyCapsuleStreamHolder(df)917out = pa.table(capsule_df)918assert df.shape == out.shape919assert df.schema.names() == out.schema.names920921schema_overrides = {"a": pl.Int128}922expected_schema = pl.Schema([("a", pl.Int128), ("b", pl.String), ("c", pl.String)])923924for arrow_obj in (925pl.from_arrow(capsule_df), # capsule926out, # table loaded from capsule927):928df_res = pl.from_arrow(arrow_obj, schema_overrides=schema_overrides)929assert expected_schema == df_res.schema # type: ignore[union-attr]930assert isinstance(df_res, pl.DataFrame)931assert df.equals(df_res)932933934def test_misaligned_nested_arrow_19097() -> None:935a = pl.Series("a", [1, 2, 3])936a = a.slice(1, 2) # by slicing we offset=1 the values937a = a.replace(2, None) # then we add a validity mask with offset=0938a = a.reshape((2, 1)) # then we make it nested939assert_series_equal(pl.Series("a", a.to_arrow()), a)940941942def test_arrow_roundtrip_lex_cat_20288() -> None:943tb = pl.Series("a", ["A", "B"], pl.Categorical()).to_frame().to_arrow()944df = pl.from_arrow(tb)945assert isinstance(df, pl.DataFrame)946dt = df.schema["a"]947assert isinstance(dt, pl.Categorical)948assert dt.ordering == "lexical"949950951def test_from_arrow_20271() -> None:952df = pl.from_arrow(953pa.table({"b": pa.DictionaryArray.from_arrays([0, 1], ["D", "E"])})954)955assert isinstance(df, pl.DataFrame)956assert_series_equal(957df.to_series(),958pl.Series("b", ["D", "E"], pl.Categorical),959)960961962def test_to_arrow_empty_chunks_20627() -> None:963df = pl.concat(2 * [pl.Series([1])]).filter(pl.Series([False, True])).to_frame()964assert df.to_arrow().shape == (1, 1)965966967def test_from_arrow_recorbatch() -> None:968n_legs = pa.array([2, 2, 4, 4, 5, 100])969animals = pa.array(970["Flamingo", "Parrot", "Dog", "Horse", "Brittle stars", "Centipede"]971)972names = ["n_legs", "animals"]973record_batch = pa.RecordBatch.from_arrays([n_legs, animals], names=names)974assert_frame_equal(975pl.DataFrame(record_batch),976pl.DataFrame({"n_legs": n_legs, "animals": animals}),977)978979980def test_from_arrow_map_containing_timestamp_23658() -> None:981arrow_tbl = pa.Table.from_pydict(982{983"column_1": [984[985{986"field_1": [987{"key": 1, "value": datetime(2025, 1, 1)},988{"key": 2, "value": datetime(2025, 1, 2)},989{"key": 2, "value": None},990]991},992{"field_1": []},993None,994]995],996},997schema=pa.schema(998[999(1000"column_1",1001pa.list_(1002pa.struct(1003[1004("field_1", pa.map_(pa.int32(), pa.timestamp("ms"))),1005]1006)1007),1008)1009]1010),1011)10121013expect = pl.DataFrame(1014{1015"column_1": [1016[1017{1018"field_1": [1019{"key": 1, "value": datetime(2025, 1, 1)},1020{"key": 2, "value": datetime(2025, 1, 2)},1021{"key": 2, "value": None},1022]1023},1024{"field_1": []},1025None,1026]1027],1028},1029schema={1030"column_1": pl.List(1031pl.Struct(1032{1033"field_1": pl.List(1034pl.Struct({"key": pl.Int32, "value": pl.Datetime("ms")})1035)1036}1037)1038)1039},1040)10411042out = pl.DataFrame(arrow_tbl)1043assert_frame_equal(out, expect)104410451046def test_schema_constructor_from_schema_capsule() -> None:1047arrow_schema = pa.schema(1048[pa.field("test", pa.map_(pa.int32(), pa.timestamp("ms")))]1049)10501051assert pl.Schema(arrow_schema) == {1052"test": pl.List(pl.Struct({"key": pl.Int32, "value": pl.Datetime("ms")}))1053}10541055# Test __arrow_c_schema__ implementation on `pl.Schema`1056assert pa.schema(pl.Schema({"x": pl.Int32})) == pa.schema(1057[pa.field("x", pa.int32())]1058)10591060arrow_schema = pa.schema([pa.field("a", pa.int32()), pa.field("a", pa.int32())])10611062with pytest.raises(1063DuplicateError,1064match="arrow schema contained duplicate name: a",1065):1066pl.Schema(arrow_schema)10671068with pytest.raises(1069ValueError,1070match=r"object passed to pl.Schema did not return struct dtype: object: pyarrow\.Field<a: int32>, dtype: Int32",1071):1072pl.Schema(pa.field("a", pa.int32()))10731074assert pl.Schema([pa.field("a", pa.int32()), pa.field("b", pa.string())]) == {1075"a": pl.Int32,1076"b": pl.String,1077}10781079with pytest.raises(1080DuplicateError,1081match=r"iterable passed to pl\.Schema contained duplicate name 'a'",1082):1083pl.Schema([pa.field("a", pa.int32()), pa.field("a", pa.int64())])108410851086def test_to_arrow_24142() -> None:1087df = pl.DataFrame({"a": object(), "b": "any string or bytes"})1088df.to_arrow(compat_level=CompatLevel.oldest())108910901091def test_pycapsule_stream_interface_all_types() -> None:1092"""Test all data types via Arrow C Stream PyCapsule interface."""1093import datetime1094from decimal import Decimal10951096df = pl.DataFrame(1097[1098pl.Series("bool", [True, False, None], dtype=pl.Boolean),1099pl.Series("int8", [1, 2, None], dtype=pl.Int8),1100pl.Series("int16", [1, 2, None], dtype=pl.Int16),1101pl.Series("int32", [1, 2, None], dtype=pl.Int32),1102pl.Series("int64", [1, 2, None], dtype=pl.Int64),1103pl.Series("uint8", [1, 2, None], dtype=pl.UInt8),1104pl.Series("uint16", [1, 2, None], dtype=pl.UInt16),1105pl.Series("uint32", [1, 2, None], dtype=pl.UInt32),1106pl.Series("uint64", [1, 2, None], dtype=pl.UInt64),1107pl.Series(1108"float32",1109[1.100000023841858, 2.200000047683716, None],1110dtype=pl.Float32,1111),1112pl.Series("float64", [1.1, 2.2, None], dtype=pl.Float64),1113pl.Series("string", ["hello", "world", None], dtype=pl.String),1114pl.Series("binary", [b"hello", b"world", None], dtype=pl.Binary),1115pl.Series(1116"decimal",1117[Decimal("1.23"), Decimal("4.56"), None],1118dtype=pl.Decimal(precision=10, scale=2),1119),1120pl.Series(1121"date",1122[datetime.date(2023, 1, 1), datetime.date(2023, 1, 2), None],1123dtype=pl.Date,1124),1125pl.Series(1126"datetime",1127[1128datetime.datetime(2023, 1, 1, 12, 0),1129datetime.datetime(2023, 1, 2, 13, 30),1130None,1131],1132dtype=pl.Datetime(time_unit="us", time_zone=None),1133),1134pl.Series(1135"time",1136[datetime.time(12, 0), datetime.time(13, 30), None],1137dtype=pl.Time,1138),1139pl.Series(1140"duration_us",1141[datetime.timedelta(days=1), datetime.timedelta(seconds=7200), None],1142dtype=pl.Duration(time_unit="us"),1143),1144pl.Series(1145"duration_ms",1146[datetime.timedelta(microseconds=100000), datetime.timedelta(0), None],1147dtype=pl.Duration(time_unit="ms"),1148),1149pl.Series(1150"duration_ns",1151[1152datetime.timedelta(seconds=1),1153datetime.timedelta(microseconds=1000),1154None,1155],1156dtype=pl.Duration(time_unit="ns"),1157),1158pl.Series(1159"categorical", ["apple", "banana", "apple"], dtype=pl.Categorical1160),1161pl.Series(1162"categorical_named",1163["apple", "banana", "apple"],1164dtype=pl.Categorical(pl.Categories(name="test")),1165),1166]1167)11681169assert_frame_equal(1170df.map_columns(1171pl.selectors.all(), lambda s: pl.Series(PyCapsuleStreamHolder(s))1172),1173df,1174)11751176assert_frame_equal(1177df.map_columns(1178pl.selectors.all(),1179lambda s: (1180pl.Series(1181PyCapsuleStreamHolder(pl.select(pl.struct(pl.lit(s))).to_series())1182)1183.struct.unnest()1184.to_series()1185),1186),1187df,1188)11891190assert_frame_equal(1191df.map_columns(1192pl.selectors.all(),1193lambda s: pl.Series(PyCapsuleStreamHolder(s.implode())).explode(),1194),1195df,1196)11971198assert_frame_equal(1199df.map_columns(1200pl.selectors.all(),1201lambda s: pl.Series(PyCapsuleStreamHolder(s.reshape((3, 1)))).reshape((3,)),1202),1203df,1204)12051206assert_frame_equal(pl.DataFrame(PyCapsuleStreamHolder(df)), df)1207assert_frame_equal(1208pl.DataFrame(PyCapsuleStreamHolder(df.select(pl.struct("*")))).unnest("*"), df1209)1210assert_frame_equal(1211pl.DataFrame(PyCapsuleStreamHolder(df.select(pl.all().implode()))).explode("*"),1212df,1213)1214assert_frame_equal(1215pl.DataFrame(PyCapsuleStreamHolder(df.select(pl.all().reshape((3, 1))))).select(1216pl.all().reshape((3,))1217),1218df,1219)122012211222def pyarrow_table_to_ipc_bytes(tbl: pa.Table) -> bytes:1223f = io.BytesIO()1224batches = tbl.to_batches()12251226with pa.ipc.new_file(f, batches[0].schema) as writer:1227for batch in batches:1228writer.write_batch(batch)12291230return f.getvalue()123112321233@pytest.mark.write_disk1234def test_month_day_nano_from_ffi_15969(plmonkeypatch: PlMonkeyPatch) -> None:1235import datetime12361237def new_interval_scalar(months: int, days: int, nanoseconds: int) -> pa.Scalar:1238return pa.scalar((months, days, nanoseconds), type=pa.month_day_nano_interval())12391240arrow_tbl = pa.Table.from_pydict(1241{1242"interval": [1243new_interval_scalar(1, 0, 0),1244new_interval_scalar(0, 1, 0),1245new_interval_scalar(0, 0, 1_000),1246new_interval_scalar(1, 1, 1_000_001_000),1247new_interval_scalar(-1, 0, 0),1248new_interval_scalar(0, -1, 0),1249new_interval_scalar(0, 0, -1_000),1250new_interval_scalar(-1, -1, -1_000_001_000),1251new_interval_scalar(3558, 0, 0),1252new_interval_scalar(-3558, 0, 0),1253new_interval_scalar(1, -1, 1_999_999_000),1254]1255},1256schema=pa.schema([pa.field("interval", pa.month_day_nano_interval())]),1257)12581259ipc_bytes = pyarrow_table_to_ipc_bytes(arrow_tbl)12601261import_err_msg = (1262"could not import from `month_day_nano_interval` type. "1263"Hint: This can be imported by setting "1264"POLARS_IMPORT_INTERVAL_AS_STRUCT=1 in the environment. "1265"Note however that this is unstable functionality "1266"that may change at any time."1267)12681269with pytest.raises(PanicException, match=import_err_msg):1270pl.scan_ipc(ipc_bytes).collect_schema()12711272with pytest.raises(PanicException, match=import_err_msg):1273pl.scan_ipc(ipc_bytes).collect()12741275with pytest.raises(PanicException, match=import_err_msg):1276pl.DataFrame(1277pa.Table.from_pydict(1278{"interval": pa.array([], type=pa.month_day_nano_interval())}1279)1280)12811282with pytest.raises(ComputeError, match=import_err_msg):1283pl.Series(pa.array([], type=pa.month_day_nano_interval()))12841285plmonkeypatch.setenv("POLARS_IMPORT_INTERVAL_AS_STRUCT", "1")12861287expect = pl.DataFrame(1288[1289pl.Series(1290"interval",1291[1292{"months": 1, "days": 0, "nanoseconds": datetime.timedelta(0)},1293{"months": 0, "days": 1, "nanoseconds": datetime.timedelta(0)},1294{1295"months": 0,1296"days": 0,1297"nanoseconds": datetime.timedelta(microseconds=1),1298},1299{1300"months": 1,1301"days": 1,1302"nanoseconds": datetime.timedelta(seconds=1, microseconds=1),1303},1304{"months": -1, "days": 0, "nanoseconds": datetime.timedelta(0)},1305{"months": 0, "days": -1, "nanoseconds": datetime.timedelta(0)},1306{1307"months": 0,1308"days": 0,1309"nanoseconds": datetime.timedelta(1310days=-1, seconds=86399, microseconds=9999991311),1312},1313{1314"months": -1,1315"days": -1,1316"nanoseconds": datetime.timedelta(1317days=-1, seconds=86398, microseconds=9999991318),1319},1320{"months": 3558, "days": 0, "nanoseconds": datetime.timedelta(0)},1321{"months": -3558, "days": 0, "nanoseconds": datetime.timedelta(0)},1322{1323"months": 1,1324"days": -1,1325"nanoseconds": datetime.timedelta(1326seconds=1, microseconds=9999991327),1328},1329],1330dtype=pl.Struct(1331{1332"months": pl.Int32,1333"days": pl.Int32,1334"nanoseconds": pl.Duration(time_unit="ns"),1335}1336),1337),1338]1339)13401341assert_frame_equal(pl.DataFrame(arrow_tbl), expect)1342assert_series_equal(1343pl.Series(arrow_tbl.column(0)).alias("interval"), expect.to_series()1344)13451346# Test IPC scan1347assert pl.scan_ipc(ipc_bytes).collect_schema() == {1348"interval": pl.Struct(1349{1350"months": pl.Int32,1351"days": pl.Int32,1352"nanoseconds": pl.Duration(time_unit="ns"),1353}1354)1355}1356assert_frame_equal(pl.scan_ipc(ipc_bytes).collect(), expect)13571358assert_frame_equal(1359pl.DataFrame(1360pa.Table.from_pydict(1361{"interval": pa.array([], type=pa.month_day_nano_interval())}1362)1363),1364pl.DataFrame(1365schema={1366"interval": pl.Struct(1367{1368"months": pl.Int32,1369"days": pl.Int32,1370"nanoseconds": pl.Duration(time_unit="ns"),1371}1372)1373}1374),1375)13761377assert_series_equal(1378pl.Series(pa.array([], type=pa.month_day_nano_interval())),1379pl.Series(1380dtype=pl.Struct(1381{1382"months": pl.Int32,1383"days": pl.Int32,1384"nanoseconds": pl.Duration(time_unit="ns"),1385}1386)1387),1388)13891390f = io.BytesIO()13911392# TODO: Add Parquet round-trip test if this starts working.1393with pytest.raises(pa.ArrowNotImplementedError):1394pq.write_table(arrow_tbl, f)139513961397def test_schema_to_arrow_15563() -> None:1398assert pl.Schema({"x": pl.String}).to_arrow() == pa.schema(1399[pa.field("x", pa.string_view())]1400)14011402assert pl.Schema({"x": pl.String}).to_arrow(1403compat_level=CompatLevel.oldest()1404) == pa.schema([pa.field("x", pa.large_string())])140514061407def test_0_width_df_roundtrip() -> None:1408assert pl.DataFrame(height=(1 << 32) - 1).to_numpy().shape == ((1 << 32) - 1, 0)1409assert pl.DataFrame(np.zeros((10, 0))).shape == (10, 0)14101411arrow_table = pl.DataFrame(height=(1 << 32) - 1).to_arrow()1412assert arrow_table.shape == ((1 << 32) - 1, 0)1413assert pl.DataFrame(arrow_table).shape == ((1 << 32) - 1, 0)14141415pandas_df = pl.DataFrame(height=(1 << 32) - 1).to_pandas()1416assert pandas_df.shape == ((1 << 32) - 1, 0)1417assert pl.DataFrame(pandas_df).shape == ((1 << 32) - 1, 0)14181419df = pl.DataFrame(height=5)14201421assert pl.DataFrame.deserialize(df.serialize()).shape == (5, 0)1422assert pl.LazyFrame.deserialize(df.lazy().serialize()).collect().shape == (5, 0)14231424for file_format in ["parquet", "ipc", "ndjson"]:1425f = io.BytesIO()1426getattr(pl.DataFrame, f"write_{file_format}")(df, f)1427f.seek(0)1428assert getattr(pl, f"read_{file_format}")(f).shape == (5, 0)14291430f = io.BytesIO()1431getattr(pl.LazyFrame, f"sink_{file_format}")(df.lazy(), f)1432f.seek(0)1433assert getattr(pl, f"scan_{file_format}")(f).collect().shape == (5, 0)14341435f = io.BytesIO()1436pl.LazyFrame().sink_csv(f)1437v = f.getvalue()1438assert v == b"\n"14391440with pytest.raises(1441InvalidOperationError,1442match=r"cannot sink 0-width DataFrame with non-zero height \(1\) to CSV",1443):1444pl.LazyFrame(height=1).sink_csv(io.BytesIO())144514461447