Path: blob/main/py-polars/tests/unit/constructors/test_any_value_fallbacks.py
8406 views
# TODO: Replace direct calls to fallback constructors with calls to the Series1# constructor once the Python-side logic has been updated2from __future__ import annotations34from datetime import date, datetime, time, timedelta5from decimal import Decimal as D6from typing import TYPE_CHECKING, Any78import pytest9from numpy import array1011import polars as pl12from polars._plr import PySeries13from polars._utils.wrap import wrap_s14from polars.testing import assert_frame_equal1516if TYPE_CHECKING:17from polars._typing import PolarsDataType181920@pytest.mark.parametrize(21("dtype", "values"),22[23(pl.Int64, [-1, 0, 100_000, None]),24(pl.Float64, [-1.5, 0.0, 10.0, None]),25(pl.Boolean, [True, False, None]),26(pl.Binary, [b"123", b"xyz", None]),27(pl.String, ["123", "xyz", None]),28(pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),29(pl.Time, [time(0, 0), time(23, 59, 59), None]),30(pl.Datetime, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None]),31(pl.Duration, [timedelta(hours=0), timedelta(seconds=100), None]),32(pl.Categorical, ["a", "b", "a", None]),33(pl.Enum(["a", "b"]), ["a", "b", "a", None]),34(pl.Decimal(10, 3), [D("12.345"), D("0.789"), None]),35(36pl.Struct({"a": pl.Int8, "b": pl.String}),37[{"a": 1, "b": "foo"}, {"a": -1, "b": "bar"}],38),39],40)41@pytest.mark.parametrize("strict", [True, False])42def test_fallback_with_dtype_strict(43dtype: PolarsDataType, values: list[Any], strict: bool44) -> None:45result = wrap_s(46PySeries.new_from_any_values_and_dtype("", values, dtype, strict=strict)47)48assert result.to_list() == values495051@pytest.mark.parametrize(52("dtype", "values"),53[54(pl.Int64, [1.0, 2.0]),55(pl.Float64, [1, 2]),56(pl.Boolean, [0, 1]),57(pl.Binary, ["123", "xyz"]),58(pl.String, [b"123", b"xyz"]),59(pl.Date, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),60(pl.Time, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),61(pl.Datetime, [date(1970, 1, 1), date(2020, 12, 31)]),62(pl.Datetime("ms"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),63(pl.Datetime("ns"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),64(pl.Duration, [0, 1200]),65(pl.Duration("ms"), [timedelta(hours=0), timedelta(seconds=100)]),66(pl.Duration("ns"), [timedelta(hours=0), timedelta(seconds=100)]),67(pl.Categorical, [0, 1, 0]),68(pl.Enum(["a", "b"]), [0, 1, 0]),69(pl.Decimal(10, 3), [100, 200]),70(pl.Decimal(5, 3), [D("1.2345")]),71(72pl.Struct({"a": pl.Int8, "b": pl.String}),73[{"a": 1, "b": "foo"}, {"a": 2.0, "b": "bar"}],74),75],76)77def test_fallback_with_dtype_strict_failure(78dtype: PolarsDataType, values: list[Any]79) -> None:80with pytest.raises(TypeError, match="unexpected value"):81PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)828384@pytest.mark.parametrize(85("dtype", "values", "expected"),86[87(88pl.Int64,89[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],90[0, 1, 0, -1, 0, 2, 1, 5, None],91),92(93pl.Float64,94[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],95[0.0, 1.0, 0.0, -1.0, 0.0, 2.5, 1.0, 5.0, None],96),97(98pl.Boolean,99[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 1), "true"],100[False, True, False, True, False, True, None, None],101),102(103pl.Binary,104[b"123", "xyz", 100, True, None],105[b"123", b"xyz", None, None, None],106),107(108pl.String,109["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],110["xyz", "1", "2.5", "1970-01-01", "true", None, None],111),112(113pl.Date,114["xyz", 1, 2.5, date(1970, 1, 1), datetime(2000, 1, 1, 12), True, None],115[116None,117date(1970, 1, 2),118date(1970, 1, 3),119date(1970, 1, 1),120date(2000, 1, 1),121None,122None,123],124),125(126pl.Time,127[128"xyz",1291,1302.5,131date(1970, 1, 1),132time(12, 0),133datetime(2000, 1, 1, 12),134timedelta(hours=5),135True,136None,137],138[139None,140time(0, 0),141time(0, 0),142None,143time(12, 0),144time(12, 0),145None,146None,147None,148],149),150(151pl.Datetime,152[153"xyz",1541,1552.5,156date(1970, 1, 1),157time(12, 0),158datetime(2000, 1, 1, 12),159timedelta(hours=5),160True,161None,162],163[164None,165datetime(1970, 1, 1, microsecond=1),166datetime(1970, 1, 1, microsecond=2),167datetime(1970, 1, 1),168None,169datetime(2000, 1, 1, 12, 0),170None,171None,172None,173],174),175(176pl.Duration,177[178"xyz",1791,1802.5,181date(1970, 1, 1),182time(12, 0),183datetime(2000, 1, 1, 12),184timedelta(hours=5),185True,186None,187],188[189None,190timedelta(microseconds=1),191timedelta(microseconds=2),192None,193timedelta(hours=12),194None,195timedelta(hours=5),196None,197None,198],199),200(201pl.Categorical,202["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],203["xyz", "1", "2.5", "1970-01-01", "true", None, None],204),205(206pl.Enum(["a", "b"]),207["a", "b", "c", 1, 2, None],208["a", "b", None, None, None, None],209),210(211pl.Decimal(5, 3),212[213D("12"),214D("1.2345"),215D("123456"),216False,217True,2180,219-1,2200.0,2212.5,222date(1970, 1, 2),223"5",224"xyz",225],226[227D("12.000"),228D("1.234"),229None,230None,231None,232D("0.000"),233D("-1.000"),234D("0.000"),235D("2.500"),236None,237None,238None,239],240),241(242pl.Struct({"a": pl.Int8, "b": pl.String}),243[{"a": 1, "b": "foo"}, {"a": 1_000, "b": 2.0}],244[{"a": 1, "b": "foo"}, {"a": None, "b": "2.0"}],245),246],247)248def test_fallback_with_dtype_nonstrict(249dtype: PolarsDataType, values: list[Any], expected: list[Any]250) -> None:251result = wrap_s(252PySeries.new_from_any_values_and_dtype("", values, dtype, strict=False)253)254assert result.to_list() == expected255256257@pytest.mark.parametrize(258("expected_dtype", "values"),259[260(pl.Int64, [-1, 0, 100_000, None]),261(pl.Float64, [-1.5, 0.0, 10.0, None]),262(pl.Boolean, [True, False, None]),263(pl.Binary, [b"123", b"xyz", None]),264(pl.String, ["123", "xyz", None]),265(pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),266(pl.Time, [time(0, 0), time(23, 59, 59), None]),267(268pl.Datetime("us"),269[datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None],270),271(pl.Duration("us"), [timedelta(hours=0), timedelta(seconds=100), None]),272(pl.Decimal(None, 3), [D("12.345"), D("0.789"), None]),273(pl.Decimal(None, 0), [D("12"), D("56789"), None]),274(275pl.Struct({"a": pl.Int64, "b": pl.String, "c": pl.Float64}),276[{"a": 1, "b": "foo", "c": None}, {"a": -1, "b": "bar", "c": 3.0}],277),278],279)280@pytest.mark.parametrize("strict", [True, False])281def test_fallback_without_dtype(282expected_dtype: PolarsDataType, values: list[Any], strict: bool283) -> None:284result = wrap_s(PySeries.new_from_any_values("", values, strict=strict))285assert result.to_list() == values286assert result.dtype == expected_dtype287288289@pytest.mark.parametrize(290"values",291[292[1.0, 2],293[1, 2.0],294[False, 1],295[b"123", "xyz"],296["123", b"xyz"],297[date(1970, 1, 1), datetime(2020, 12, 31)],298[time(0, 0), 1_000],299[datetime(1970, 1, 1), date(2020, 12, 31)],300[timedelta(hours=0), 1_000],301[D("12.345"), 100],302[D("12.345"), 3.14],303[{"a": 1, "b": "foo"}, {"a": -1, "b": date(2020, 12, 31)}],304[{"a": None}, {"a": 1.0}, {"a": 1}],305],306)307def test_fallback_without_dtype_strict_failure(values: list[Any]) -> None:308with pytest.raises(TypeError, match="unexpected value"):309PySeries.new_from_any_values("", values, strict=True)310311312@pytest.mark.parametrize(313("values", "expected", "expected_dtype"),314[315([True, 2], [1, 2], pl.Int64),316([1, 2.0], [1.0, 2.0], pl.Float64),317([2.0, "c"], ["2.0", "c"], pl.String),318(319[date(1970, 1, 1), datetime(2022, 12, 31)],320[datetime(1970, 1, 1), datetime(2022, 12, 31)],321pl.Datetime("us"),322),323([D("3.1415"), 2.51], [3.1415, 2.51], pl.Float64),324([D("3.1415"), 100], [D("3.1415"), D("100")], pl.Decimal(None, 4)),325([1, 2.0, b"d", date(2022, 1, 1)], [1, 2.0, b"d", date(2022, 1, 1)], pl.Object),326(327[328{"a": 1, "b": "foo", "c": None},329{"a": 2.0, "b": date(2020, 12, 31), "c": None},330],331[332{"a": 1.0, "b": "foo", "c": None},333{"a": 2.0, "b": "2020-12-31", "c": None},334],335pl.Struct({"a": pl.Float64, "b": pl.String, "c": pl.Null}),336),337(338[{"a": None}, {"a": 1.0}, {"a": 1}],339[{"a": None}, {"a": 1.0}, {"a": 1.0}],340pl.Struct({"a": pl.Float64}),341),342],343)344def test_fallback_without_dtype_nonstrict_mixed_types(345values: list[Any],346expected_dtype: PolarsDataType,347expected: list[Any],348) -> None:349result = wrap_s(PySeries.new_from_any_values("", values, strict=False))350assert result.dtype == expected_dtype351assert result.to_list() == expected352353354def test_fallback_without_dtype_large_int() -> None:355values = [1, 2**128, None]356with pytest.raises(357OverflowError,358match="int value too large for Polars integer types",359):360PySeries.new_from_any_values("", values, strict=True)361362result = wrap_s(PySeries.new_from_any_values("", values, strict=False))363assert result.dtype == pl.Float64364assert result.to_list() == [1.0, 340282366920938500000000000000000000000.0, None]365366367def test_fallback_with_dtype_large_int() -> None:368values = [1, 2**128, None]369with pytest.raises(OverflowError):370PySeries.new_from_any_values_and_dtype("", values, dtype=pl.Int128, strict=True)371372result = wrap_s(373PySeries.new_from_any_values_and_dtype(374"", values, dtype=pl.Int128, strict=False375)376)377assert result.dtype == pl.Int128378assert result.to_list() == [1, None, None]379380381def test_fallback_with_dtype_strict_failure_enum_casting() -> None:382dtype = pl.Enum(["a", "b"])383values = ["a", "b", "c", None]384385with pytest.raises(TypeError, match="attempted to insert 'c'"):386PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)387388389def test_fallback_with_dtype_strict_failure_decimal_precision() -> None:390dtype = pl.Decimal(3, 0)391values = [D("12345")]392393with pytest.raises(394TypeError, match="decimal precision 3 can't fit values with 5 digits"395):396PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)397398399def test_categorical_lit_18874() -> None:400assert_frame_equal(401pl.DataFrame(402{"a": [1, 2, 3]},403).with_columns(b=pl.lit("foo").cast(pl.Categorical)),404pl.DataFrame(405[406pl.Series("a", [1, 2, 3]),407pl.Series("b", ["foo"] * 3, pl.Categorical),408]409),410)411412413@pytest.mark.parametrize(414("values", "expected"),415[416# Float64 should have ~17; Float32 ~6 digits of precision preserved417([0.123, 0.123456789], ["0.123", "0.123456789"]),418([[0.123, 0.123456789]], ["[0.123,0.123456789]"]),419([array([0.123, 0.123456789])], ["[0.123,0.123456789]"]),420([{"a": 0.123, "b": 0.123456789}], ["{0.123,0.123456789}"]),421([[{"a": 0.123, "b": 0.123456789}]], ["[{0.123,0.123456789}]"]),422([{"x": [0.1, 0.2]}, [{"y": 0.3}]], ["{[0.1,0.2]}", "[{0.3}]"]),423(424[None, {"a": None, "b": 1.0}, [None, 2.0]],425[None, "{null,1.0}", "[null,2.0]"],426),427([[], {}], ["[]", "{}"]),428([[0.5]], ["[0.5]"]),429([{"a": 0.5}], ["{0.5}"]),430],431ids=[432"basic_floats",433"nested_list",434"nested_array",435"basic_struct",436"list_of_structs",437"nested_mixed",438"mixed_nulls",439"empty_containers",440"single_element_list",441"single_element_struct",442],443)444def test_float_to_string_precision_25257(445values: list[Any], expected: list[Any]446) -> None:447# verify the conversion is decoupled from Display formatting448with pl.Config(float_precision=1):449s = pl.Series(values, strict=False, dtype=pl.String)450451assert (s == pl.Series(expected)).all()452453454