Path: blob/main/py-polars/tests/unit/series/buffers/test_from_buffers.py
6940 views
from __future__ import annotations12from datetime import datetime3from zoneinfo import ZoneInfo45import pytest6from hypothesis import given78import polars as pl9from polars.exceptions import PanicException10from polars.testing import assert_series_equal11from polars.testing.parametric import series12from tests.unit.conftest import NUMERIC_DTYPES131415@given(16s=series(17allowed_dtypes=[*NUMERIC_DTYPES, pl.Boolean],18allow_chunks=False,19)20)21def test_series_from_buffers_numeric_with_validity(s: pl.Series) -> None:22validity = s.is_not_null()23result = pl.Series._from_buffers(s.dtype, data=s, validity=validity)24assert_series_equal(s, result)252627@given(28s=series(29allowed_dtypes=[*NUMERIC_DTYPES, pl.Boolean],30allow_chunks=False,31allow_null=False,32)33)34def test_series_from_buffers_numeric(s: pl.Series) -> None:35result = pl.Series._from_buffers(s.dtype, data=s)36assert_series_equal(s, result)373839@given(40s=series(41allowed_dtypes=[pl.Date, pl.Time, pl.Datetime, pl.Duration],42allow_chunks=False,43)44)45def test_series_from_buffers_temporal_with_validity(s: pl.Series) -> None:46validity = s.is_not_null()47physical = pl.Int32 if s.dtype == pl.Date else pl.Int6448data = s.cast(physical)49result = pl.Series._from_buffers(s.dtype, data=data, validity=validity)50assert_series_equal(s, result)515253def test_series_from_buffers_int() -> None:54dtype = pl.UInt1655data = pl.Series([97, 98, 99, 195], dtype=dtype)56validity = pl.Series([True, True, False, True])5758result = pl.Series._from_buffers(dtype, data=data, validity=validity)5960expected = pl.Series([97, 98, None, 195], dtype=dtype)61assert_series_equal(result, expected)626364def test_series_from_buffers_float() -> None:65dtype = pl.Float6466data = pl.Series([0.0, 1.0, -1.0, float("nan"), float("inf")], dtype=dtype)67validity = pl.Series([True, True, False, True, True])6869result = pl.Series._from_buffers(dtype, data=data, validity=validity)7071expected = pl.Series([0.0, 1.0, None, float("nan"), float("inf")], dtype=dtype)72assert_series_equal(result, expected)737475def test_series_from_buffers_boolean() -> None:76dtype = pl.Boolean77data = pl.Series([True, False, True])78validity = pl.Series([True, True, False])7980result = pl.Series._from_buffers(dtype, data=data, validity=validity)8182expected = pl.Series([True, False, None])83assert_series_equal(result, expected)848586def test_series_from_buffers_datetime() -> None:87dtype = pl.Datetime(time_zone="Europe/Amsterdam")88tzinfo = ZoneInfo("Europe/Amsterdam")89data = pl.Series(90[91datetime(2022, 2, 10, 6, tzinfo=tzinfo),92datetime(2022, 2, 11, 12, tzinfo=tzinfo),93datetime(2022, 2, 12, 18, tzinfo=tzinfo),94],95dtype=dtype,96).cast(pl.Int64)97validity = pl.Series([True, False, True])9899result = pl.Series._from_buffers(dtype, data=data, validity=validity)100101expected = pl.Series(102[103datetime(2022, 2, 10, 6, tzinfo=tzinfo),104None,105datetime(2022, 2, 12, 18, tzinfo=tzinfo),106],107dtype=dtype,108)109assert_series_equal(result, expected)110111112def test_series_from_buffers_string() -> None:113dtype = pl.String114data = pl.Series([97, 98, 99, 195, 169, 195, 162, 195, 167], dtype=pl.UInt8)115validity = pl.Series([True, True, False, True])116offsets = pl.Series([0, 1, 3, 3, 9], dtype=pl.Int64)117118result = pl.Series._from_buffers(dtype, data=[data, offsets], validity=validity)119120expected = pl.Series(["a", "bc", None, "éâç"], dtype=dtype)121assert_series_equal(result, expected)122123124def test_series_from_buffers_enum() -> None:125dtype = pl.Enum(["a", "b", "c"])126data = pl.Series([0, 1, 0, 2], dtype=pl.UInt8)127validity = pl.Series([True, True, False, True])128129result = pl.Series._from_buffers(dtype, data=data, validity=validity)130131expected = pl.Series(["a", "b", None, "c"], dtype=dtype)132assert_series_equal(result, expected)133134135def test_series_from_buffers_sliced() -> None:136dtype = pl.Int64137data = pl.Series([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=dtype)138data = data[5:]139validity = pl.Series([True, True, True, True, False, True, False, False, True])140validity = validity[5:]141142result = pl.Series._from_buffers(dtype, data=data, validity=validity)143144expected = pl.Series([6, None, None, 9], dtype=dtype)145assert_series_equal(result, expected)146147148def test_series_from_buffers_unsupported_validity() -> None:149s = pl.Series([1, 2, 3])150151msg = "validity buffer must have data type Boolean, got Int64"152with pytest.raises(TypeError, match=msg):153pl.Series._from_buffers(pl.Date, data=s, validity=s)154155156def test_series_from_buffers_unsupported_offsets() -> None:157data = pl.Series([97, 98, 99, 195, 169, 195, 162, 195, 167], dtype=pl.UInt8)158offsets = pl.Series([0, 1, 3, 3, 9], dtype=pl.Int8)159160msg = "offsets buffer must have data type Int64, got Int8"161with pytest.raises(TypeError, match=msg):162pl.Series._from_buffers(pl.String, data=[data, offsets])163164165def test_series_from_buffers_offsets_do_not_match_data() -> None:166data = pl.Series([97, 98, 99, 195, 169, 195, 162, 195, 167], dtype=pl.UInt8)167offsets = pl.Series([0, 1, 3, 3, 9, 11], dtype=pl.Int64)168169msg = "offsets must not exceed the values length"170with pytest.raises(PanicException, match=msg):171pl.Series._from_buffers(pl.String, data=[data, offsets])172173174def test_series_from_buffers_no_buffers() -> None:175msg = "`data` input to `_from_buffers` must contain at least one buffer"176with pytest.raises(TypeError, match=msg):177pl.Series._from_buffers(pl.Int32, data=[])178179180