Path: blob/main/py-polars/tests/unit/constructors/test_any_value_fallbacks.py
6939 views
# TODO: Replace direct calls to fallback constructors with calls to the Series1# constructor once the Python-side logic has been updated2from __future__ import annotations34from datetime import date, datetime, time, timedelta5from decimal import Decimal as D6from typing import TYPE_CHECKING, Any78import pytest910import polars as pl11from polars._plr import PySeries12from polars._utils.wrap import wrap_s13from polars.testing import assert_frame_equal1415if TYPE_CHECKING:16from polars._typing import PolarsDataType171819@pytest.mark.parametrize(20("dtype", "values"),21[22(pl.Int64, [-1, 0, 100_000, None]),23(pl.Float64, [-1.5, 0.0, 10.0, None]),24(pl.Boolean, [True, False, None]),25(pl.Binary, [b"123", b"xyz", None]),26(pl.String, ["123", "xyz", None]),27(pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),28(pl.Time, [time(0, 0), time(23, 59, 59), None]),29(pl.Datetime, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None]),30(pl.Duration, [timedelta(hours=0), timedelta(seconds=100), None]),31(pl.Categorical, ["a", "b", "a", None]),32(pl.Enum(["a", "b"]), ["a", "b", "a", None]),33(pl.Decimal(10, 3), [D("12.345"), D("0.789"), None]),34(35pl.Struct({"a": pl.Int8, "b": pl.String}),36[{"a": 1, "b": "foo"}, {"a": -1, "b": "bar"}],37),38],39)40@pytest.mark.parametrize("strict", [True, False])41def test_fallback_with_dtype_strict(42dtype: PolarsDataType, values: list[Any], strict: bool43) -> None:44result = wrap_s(45PySeries.new_from_any_values_and_dtype("", values, dtype, strict=strict)46)47assert result.to_list() == values484950@pytest.mark.parametrize(51("dtype", "values"),52[53(pl.Int64, [1.0, 2.0]),54(pl.Float64, [1, 2]),55(pl.Boolean, [0, 1]),56(pl.Binary, ["123", "xyz"]),57(pl.String, [b"123", b"xyz"]),58(pl.Date, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),59(pl.Time, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),60(pl.Datetime, [date(1970, 1, 1), date(2020, 12, 31)]),61(pl.Datetime("ms"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),62(pl.Datetime("ns"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),63(pl.Duration, [0, 1200]),64(pl.Duration("ms"), [timedelta(hours=0), timedelta(seconds=100)]),65(pl.Duration("ns"), [timedelta(hours=0), timedelta(seconds=100)]),66(pl.Categorical, [0, 1, 0]),67(pl.Enum(["a", "b"]), [0, 1, 0]),68(pl.Decimal(10, 3), [100, 200]),69(pl.Decimal(5, 3), [D("1.2345")]),70(71pl.Struct({"a": pl.Int8, "b": pl.String}),72[{"a": 1, "b": "foo"}, {"a": 2.0, "b": "bar"}],73),74],75)76def test_fallback_with_dtype_strict_failure(77dtype: PolarsDataType, values: list[Any]78) -> None:79with pytest.raises(TypeError, match="unexpected value"):80PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)818283@pytest.mark.parametrize(84("dtype", "values", "expected"),85[86(87pl.Int64,88[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],89[0, 1, 0, -1, 0, 2, 1, 5, None],90),91(92pl.Float64,93[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],94[0.0, 1.0, 0.0, -1.0, 0.0, 2.5, 1.0, 5.0, None],95),96(97pl.Boolean,98[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 1), "true"],99[False, True, False, True, False, True, None, None],100),101(102pl.Binary,103[b"123", "xyz", 100, True, None],104[b"123", b"xyz", None, None, None],105),106(107pl.String,108["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],109["xyz", "1", "2.5", "1970-01-01", "true", None, None],110),111(112pl.Date,113["xyz", 1, 2.5, date(1970, 1, 1), datetime(2000, 1, 1, 12), True, None],114[115None,116date(1970, 1, 2),117date(1970, 1, 3),118date(1970, 1, 1),119date(2000, 1, 1),120None,121None,122],123),124(125pl.Time,126[127"xyz",1281,1292.5,130date(1970, 1, 1),131time(12, 0),132datetime(2000, 1, 1, 12),133timedelta(hours=5),134True,135None,136],137[138None,139time(0, 0),140time(0, 0),141None,142time(12, 0),143time(12, 0),144None,145None,146None,147],148),149(150pl.Datetime,151[152"xyz",1531,1542.5,155date(1970, 1, 1),156time(12, 0),157datetime(2000, 1, 1, 12),158timedelta(hours=5),159True,160None,161],162[163None,164datetime(1970, 1, 1, microsecond=1),165datetime(1970, 1, 1, microsecond=2),166datetime(1970, 1, 1),167None,168datetime(2000, 1, 1, 12, 0),169None,170None,171None,172],173),174(175pl.Duration,176[177"xyz",1781,1792.5,180date(1970, 1, 1),181time(12, 0),182datetime(2000, 1, 1, 12),183timedelta(hours=5),184True,185None,186],187[188None,189timedelta(microseconds=1),190timedelta(microseconds=2),191None,192timedelta(hours=12),193None,194timedelta(hours=5),195None,196None,197],198),199(200pl.Categorical,201["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],202["xyz", "1", "2.5", "1970-01-01", "true", None, None],203),204(205pl.Enum(["a", "b"]),206["a", "b", "c", 1, 2, None],207["a", "b", None, None, None, None],208),209(210pl.Decimal(5, 3),211[212D("12"),213D("1.2345"),214# D("123456"),215False,216True,2170,218-1,2190.0,2202.5,221date(1970, 1, 2),222"5",223"xyz",224],225[226D("12.000"),227None,228# None,229None,230None,231D("0.000"),232D("-1.000"),233None,234None,235None,236None,237None,238],239),240(241pl.Struct({"a": pl.Int8, "b": pl.String}),242[{"a": 1, "b": "foo"}, {"a": 1_000, "b": 2.0}],243[{"a": 1, "b": "foo"}, {"a": None, "b": "2.0"}],244),245],246)247def test_fallback_with_dtype_nonstrict(248dtype: PolarsDataType, values: list[Any], expected: list[Any]249) -> None:250result = wrap_s(251PySeries.new_from_any_values_and_dtype("", values, dtype, strict=False)252)253assert result.to_list() == expected254255256@pytest.mark.parametrize(257("expected_dtype", "values"),258[259(pl.Int64, [-1, 0, 100_000, None]),260(pl.Float64, [-1.5, 0.0, 10.0, None]),261(pl.Boolean, [True, False, None]),262(pl.Binary, [b"123", b"xyz", None]),263(pl.String, ["123", "xyz", None]),264(pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),265(pl.Time, [time(0, 0), time(23, 59, 59), None]),266(267pl.Datetime("us"),268[datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None],269),270(pl.Duration("us"), [timedelta(hours=0), timedelta(seconds=100), None]),271(pl.Decimal(None, 3), [D("12.345"), D("0.789"), None]),272(pl.Decimal(None, 0), [D("12"), D("56789"), None]),273(274pl.Struct({"a": pl.Int64, "b": pl.String, "c": pl.Float64}),275[{"a": 1, "b": "foo", "c": None}, {"a": -1, "b": "bar", "c": 3.0}],276),277],278)279@pytest.mark.parametrize("strict", [True, False])280def test_fallback_without_dtype(281expected_dtype: PolarsDataType, values: list[Any], strict: bool282) -> None:283result = wrap_s(PySeries.new_from_any_values("", values, strict=strict))284assert result.to_list() == values285assert result.dtype == expected_dtype286287288@pytest.mark.parametrize(289"values",290[291[1.0, 2],292[1, 2.0],293[False, 1],294[b"123", "xyz"],295["123", b"xyz"],296[date(1970, 1, 1), datetime(2020, 12, 31)],297[time(0, 0), 1_000],298[datetime(1970, 1, 1), date(2020, 12, 31)],299[timedelta(hours=0), 1_000],300[D("12.345"), 100],301[D("12.345"), 3.14],302[{"a": 1, "b": "foo"}, {"a": -1, "b": date(2020, 12, 31)}],303[{"a": None}, {"a": 1.0}, {"a": 1}],304],305)306def test_fallback_without_dtype_strict_failure(values: list[Any]) -> None:307with pytest.raises(TypeError, match="unexpected value"):308PySeries.new_from_any_values("", values, strict=True)309310311@pytest.mark.parametrize(312("values", "expected", "expected_dtype"),313[314([True, 2], [1, 2], pl.Int64),315([1, 2.0], [1.0, 2.0], pl.Float64),316([2.0, "c"], ["2.0", "c"], pl.String),317(318[date(1970, 1, 1), datetime(2022, 12, 31)],319[datetime(1970, 1, 1), datetime(2022, 12, 31)],320pl.Datetime("us"),321),322([D("3.1415"), 2.51], [3.1415, 2.51], pl.Float64),323([D("3.1415"), 100], [D("3.1415"), D("100")], pl.Decimal(None, 4)),324([1, 2.0, b"d", date(2022, 1, 1)], [1, 2.0, b"d", date(2022, 1, 1)], pl.Object),325(326[327{"a": 1, "b": "foo", "c": None},328{"a": 2.0, "b": date(2020, 12, 31), "c": None},329],330[331{"a": 1.0, "b": "foo", "c": None},332{"a": 2.0, "b": "2020-12-31", "c": None},333],334pl.Struct({"a": pl.Float64, "b": pl.String, "c": pl.Null}),335),336(337[{"a": None}, {"a": 1.0}, {"a": 1}],338[{"a": None}, {"a": 1.0}, {"a": 1.0}],339pl.Struct({"a": pl.Float64}),340),341],342)343def test_fallback_without_dtype_nonstrict_mixed_types(344values: list[Any],345expected_dtype: PolarsDataType,346expected: list[Any],347) -> None:348result = wrap_s(PySeries.new_from_any_values("", values, strict=False))349assert result.dtype == expected_dtype350assert result.to_list() == expected351352353def test_fallback_without_dtype_large_int() -> None:354values = [1, 2**128, None]355with pytest.raises(356OverflowError,357match="int value too large for Polars integer types",358):359PySeries.new_from_any_values("", values, strict=True)360361result = wrap_s(PySeries.new_from_any_values("", values, strict=False))362assert result.dtype == pl.Float64363assert result.to_list() == [1.0, 340282366920938500000000000000000000000.0, None]364365366def test_fallback_with_dtype_large_int() -> None:367values = [1, 2**128, None]368with pytest.raises(OverflowError):369PySeries.new_from_any_values_and_dtype("", values, dtype=pl.Int128, strict=True)370371result = wrap_s(372PySeries.new_from_any_values_and_dtype(373"", values, dtype=pl.Int128, strict=False374)375)376assert result.dtype == pl.Int128377assert result.to_list() == [1, None, None]378379380def test_fallback_with_dtype_strict_failure_enum_casting() -> None:381dtype = pl.Enum(["a", "b"])382values = ["a", "b", "c", None]383384with pytest.raises(TypeError, match="attempted to insert 'c'"):385PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)386387388def test_fallback_with_dtype_strict_failure_decimal_precision() -> None:389dtype = pl.Decimal(3, 0)390values = [D("12345")]391392with pytest.raises(393TypeError, match="decimal precision 3 can't fit values with 5 digits"394):395PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)396397398def test_categorical_lit_18874() -> None:399assert_frame_equal(400pl.DataFrame(401{"a": [1, 2, 3]},402).with_columns(b=pl.lit("foo").cast(pl.Categorical)),403pl.DataFrame(404[405pl.Series("a", [1, 2, 3]),406pl.Series("b", ["foo"] * 3, pl.Categorical),407]408),409)410411412