Path: blob/main/py-polars/tests/unit/interchange/test_dataframe.py
6939 views
from __future__ import annotations12import pytest34import polars as pl5from polars.interchange.dataframe import PolarsDataFrame6from polars.interchange.protocol import CopyNotAllowedError7from polars.testing import assert_frame_equal, assert_series_equal8910def test_dataframe_dunder() -> None:11df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})12dfi = PolarsDataFrame(df)1314assert_frame_equal(dfi._df, df)15assert dfi._allow_copy is True1617dfi_new = dfi.__dataframe__(allow_copy=False)1819assert_frame_equal(dfi_new._df, df)20assert dfi_new._allow_copy is False212223def test_dataframe_dunder_nan_as_null_not_implemented() -> None:24df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})25dfi = PolarsDataFrame(df)2627with pytest.raises(NotImplementedError, match="has not been implemented"):28df.__dataframe__(nan_as_null=True)2930with pytest.raises(NotImplementedError, match="has not been implemented"):31dfi.__dataframe__(nan_as_null=True)323334def test_metadata() -> None:35df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})36dfi = PolarsDataFrame(df)37assert dfi.metadata == {}383940def test_num_columns() -> None:41df = pl.DataFrame({"a": [1], "b": [2]})42dfi = PolarsDataFrame(df)43assert dfi.num_columns() == 2444546def test_num_rows() -> None:47df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})48dfi = PolarsDataFrame(df)49assert dfi.num_rows() == 2505152def test_num_chunks() -> None:53df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})54dfi = PolarsDataFrame(df)55assert dfi.num_chunks() == 15657df2 = pl.concat([df, df], rechunk=False)58dfi2 = df2.__dataframe__()59assert dfi2.num_chunks() == 2606162def test_column_names() -> None:63df = pl.DataFrame({"a": [1], "b": [2]})64dfi = PolarsDataFrame(df)65assert dfi.column_names() == ["a", "b"]666768def test_get_column() -> None:69df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})70dfi = PolarsDataFrame(df)7172out = dfi.get_column(1)7374expected = pl.Series("b", [3, 4])75assert_series_equal(out._col, expected)767778def test_get_column_by_name() -> None:79df = pl.DataFrame({"a": [1, 2], "b": [3, 4]})80dfi = PolarsDataFrame(df)8182out = dfi.get_column_by_name("b")8384expected = pl.Series("b", [3, 4])85assert_series_equal(out._col, expected)868788def test_get_columns() -> None:89s1 = pl.Series("a", [1, 2])90s2 = pl.Series("b", [3, 4])91df = pl.DataFrame([s1, s2])92dfi = PolarsDataFrame(df)9394out = dfi.get_columns()9596expected = [s1, s2]97for o, e in zip(out, expected):98assert_series_equal(o._col, e)99100101def test_select_columns() -> None:102df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})103dfi = PolarsDataFrame(df)104105out = dfi.select_columns([0, 2])106107expected = pl.DataFrame({"a": [1, 2], "c": [5, 6]})108assert_frame_equal(out._df, expected)109110111def test_select_columns_nonlist_input() -> None:112df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})113dfi = PolarsDataFrame(df)114115out = dfi.select_columns((2,))116117expected = pl.DataFrame({"c": [5, 6]})118assert_frame_equal(out._df, expected)119120121def test_select_columns_invalid_input() -> None:122df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})123dfi = PolarsDataFrame(df)124125with pytest.raises(TypeError):126dfi.select_columns(1) # type: ignore[arg-type]127128129def test_select_columns_by_name() -> None:130df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})131dfi = PolarsDataFrame(df)132133out = dfi.select_columns_by_name(["a", "c"])134135expected = pl.DataFrame({"a": [1, 2], "c": [5, 6]})136assert_frame_equal(out._df, expected)137138139def test_select_columns_by_name_invalid_input() -> None:140df = pl.DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]})141dfi = PolarsDataFrame(df)142143with pytest.raises(TypeError):144dfi.select_columns_by_name(1) # type: ignore[arg-type]145146147@pytest.mark.parametrize("n_chunks", [None, 2])148def test_get_chunks(n_chunks: int | None) -> None:149df1 = pl.DataFrame({"a": [1, 2], "b": [4, 5]})150df2 = pl.DataFrame({"a": [3], "b": [6]})151df = pl.concat([df1, df2], rechunk=False)152dfi = PolarsDataFrame(df)153154out = dfi.get_chunks(n_chunks)155156expected = dfi._get_chunks_from_col_chunks()157for o, e in zip(out, expected):158assert_frame_equal(o._df, e)159160161def test_get_chunks_invalid_input() -> None:162df1 = pl.DataFrame({"a": [1, 2], "b": [4, 5]})163df2 = pl.DataFrame({"a": [3], "b": [6]})164df = pl.concat([df1, df2], rechunk=False)165166dfi = PolarsDataFrame(df)167168with pytest.raises(ValueError):169next(dfi.get_chunks(0))170171with pytest.raises(ValueError):172next(dfi.get_chunks(3))173174175def test_get_chunks_subdivided_chunks() -> None:176df1 = pl.DataFrame({"a": [1, 2, 3], "b": [6, 7, 8]})177df2 = pl.DataFrame({"a": [4, 5], "b": [9, 0]})178df = pl.concat([df1, df2], rechunk=False)179180dfi = PolarsDataFrame(df)181out = dfi.get_chunks(4)182183chunk1 = next(out)184expected1 = pl.DataFrame({"a": [1, 2], "b": [6, 7]})185assert_frame_equal(chunk1._df, expected1)186187chunk2 = next(out)188expected2 = pl.DataFrame({"a": [3], "b": [8]})189assert_frame_equal(chunk2._df, expected2)190191chunk3 = next(out)192expected3 = pl.DataFrame({"a": [4], "b": [9]})193assert_frame_equal(chunk3._df, expected3)194195chunk4 = next(out)196expected4 = pl.DataFrame({"a": [5], "b": [0]})197assert_frame_equal(chunk4._df, expected4)198199with pytest.raises(StopIteration):200next(out)201202203def test_get_chunks_zero_copy_fail() -> None:204col1 = pl.Series([1, 2])205col2 = pl.concat([pl.Series([3]), pl.Series([4])], rechunk=False)206df = pl.DataFrame({"a": col1, "b": col2})207208dfi = PolarsDataFrame(df, allow_copy=False)209210with pytest.raises(211CopyNotAllowedError, match="unevenly chunked columns must be rechunked"212):213next(dfi.get_chunks())214215216@pytest.mark.parametrize("allow_copy", [True, False])217def test_get_chunks_from_col_chunks_single_chunk(allow_copy: bool) -> None:218df = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})219220dfi = PolarsDataFrame(df, allow_copy=allow_copy)221out = dfi._get_chunks_from_col_chunks()222223chunk1 = next(out)224assert_frame_equal(chunk1, df)225226with pytest.raises(StopIteration):227next(out)228229230@pytest.mark.parametrize("allow_copy", [True, False])231def test_get_chunks_from_col_chunks_even_chunks(allow_copy: bool) -> None:232df1 = pl.DataFrame({"a": [1, 2], "b": [4, 5]})233df2 = pl.DataFrame({"a": [3], "b": [6]})234df = pl.concat([df1, df2], rechunk=False)235236dfi = PolarsDataFrame(df, allow_copy=allow_copy)237out = dfi._get_chunks_from_col_chunks()238239chunk1 = next(out)240assert_frame_equal(chunk1, df1)241242chunk2 = next(out)243assert_frame_equal(chunk2, df2)244245with pytest.raises(StopIteration):246next(out)247248249def test_get_chunks_from_col_chunks_uneven_chunks_allow_copy() -> None:250col1 = pl.concat([pl.Series([1, 2]), pl.Series([3, 4, 5])], rechunk=False)251col2 = pl.concat(252[pl.Series([6, 7]), pl.Series([8]), pl.Series([9, 0])], rechunk=False253)254df = pl.DataFrame({"a": col1, "b": col2})255256dfi = PolarsDataFrame(df, allow_copy=True)257out = dfi._get_chunks_from_col_chunks()258259expected1 = pl.DataFrame({"a": [1, 2], "b": [6, 7]})260chunk1 = next(out)261assert_frame_equal(chunk1, expected1)262263expected2 = pl.DataFrame({"a": [3, 4, 5], "b": [8, 9, 0]})264chunk2 = next(out)265assert_frame_equal(chunk2, expected2)266267with pytest.raises(StopIteration):268next(out)269270271def test_get_chunks_from_col_chunks_uneven_chunks_zero_copy_fails() -> None:272col1 = pl.concat([pl.Series([1, 2]), pl.Series([3, 4, 5])], rechunk=False)273col2 = pl.concat(274[pl.Series([6, 7]), pl.Series([8]), pl.Series([9, 0])], rechunk=False275)276df = pl.DataFrame({"a": col1, "b": col2})277278dfi = PolarsDataFrame(df, allow_copy=False)279out = dfi._get_chunks_from_col_chunks()280281# First chunk can be yielded zero copy282expected1 = pl.DataFrame({"a": [1, 2], "b": [6, 7]})283chunk1 = next(out)284assert_frame_equal(chunk1, expected1)285286# Second chunk requires a rechunk of the second column287with pytest.raises(CopyNotAllowedError, match="columns must be rechunked"):288next(out)289290291def test_dataframe_unsupported_types() -> None:292df = pl.DataFrame({"a": [[4], [5, 6]]})293dfi = PolarsDataFrame(df)294295# Generic dataframe operations work fine296assert dfi.num_rows() == 2297298# Certain column operations also work299col = dfi.get_column_by_name("a")300assert col.num_chunks() == 1301302# Error is raised when unsupported operations are requested303with pytest.raises(ValueError, match="not supported"):304col.dtype305306307