CoCalc -- test_any_value

GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/constructors/test_any_value_fallbacks.py
⁶⁹³⁹ views
1
# TODO: Replace direct calls to fallback constructors with calls to the Series
2
# constructor once the Python-side logic has been updated
3
from __future__ import annotations
4

5
from datetime import date, datetime, time, timedelta
6
from decimal import Decimal as D
7
from typing import TYPE_CHECKING, Any
8

9
import pytest
10

11
import polars as pl
12
from polars._plr import PySeries
13
from polars._utils.wrap import wrap_s
14
from polars.testing import assert_frame_equal
15

16
if TYPE_CHECKING:
17
    from polars._typing import PolarsDataType
18

19

20
@pytest.mark.parametrize(
21
    ("dtype", "values"),
22
    [
23
        (pl.Int64, [-1, 0, 100_000, None]),
24
        (pl.Float64, [-1.5, 0.0, 10.0, None]),
25
        (pl.Boolean, [True, False, None]),
26
        (pl.Binary, [b"123", b"xyz", None]),
27
        (pl.String, ["123", "xyz", None]),
28
        (pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),
29
        (pl.Time, [time(0, 0), time(23, 59, 59), None]),
30
        (pl.Datetime, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None]),
31
        (pl.Duration, [timedelta(hours=0), timedelta(seconds=100), None]),
32
        (pl.Categorical, ["a", "b", "a", None]),
33
        (pl.Enum(["a", "b"]), ["a", "b", "a", None]),
34
        (pl.Decimal(10, 3), [D("12.345"), D("0.789"), None]),
35
        (
36
            pl.Struct({"a": pl.Int8, "b": pl.String}),
37
            [{"a": 1, "b": "foo"}, {"a": -1, "b": "bar"}],
38
        ),
39
    ],
40
)
41
@pytest.mark.parametrize("strict", [True, False])
42
def test_fallback_with_dtype_strict(
43
    dtype: PolarsDataType, values: list[Any], strict: bool
44
) -> None:
45
    result = wrap_s(
46
        PySeries.new_from_any_values_and_dtype("", values, dtype, strict=strict)
47
    )
48
    assert result.to_list() == values
49

50

51
@pytest.mark.parametrize(
52
    ("dtype", "values"),
53
    [
54
        (pl.Int64, [1.0, 2.0]),
55
        (pl.Float64, [1, 2]),
56
        (pl.Boolean, [0, 1]),
57
        (pl.Binary, ["123", "xyz"]),
58
        (pl.String, [b"123", b"xyz"]),
59
        (pl.Date, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
60
        (pl.Time, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
61
        (pl.Datetime, [date(1970, 1, 1), date(2020, 12, 31)]),
62
        (pl.Datetime("ms"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
63
        (pl.Datetime("ns"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
64
        (pl.Duration, [0, 1200]),
65
        (pl.Duration("ms"), [timedelta(hours=0), timedelta(seconds=100)]),
66
        (pl.Duration("ns"), [timedelta(hours=0), timedelta(seconds=100)]),
67
        (pl.Categorical, [0, 1, 0]),
68
        (pl.Enum(["a", "b"]), [0, 1, 0]),
69
        (pl.Decimal(10, 3), [100, 200]),
70
        (pl.Decimal(5, 3), [D("1.2345")]),
71
        (
72
            pl.Struct({"a": pl.Int8, "b": pl.String}),
73
            [{"a": 1, "b": "foo"}, {"a": 2.0, "b": "bar"}],
74
        ),
75
    ],
76
)
77
def test_fallback_with_dtype_strict_failure(
78
    dtype: PolarsDataType, values: list[Any]
79
) -> None:
80
    with pytest.raises(TypeError, match="unexpected value"):
81
        PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)
82

83

84
@pytest.mark.parametrize(
85
    ("dtype", "values", "expected"),
86
    [
87
        (
88
            pl.Int64,
89
            [False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],
90
            [0, 1, 0, -1, 0, 2, 1, 5, None],
91
        ),
92
        (
93
            pl.Float64,
94
            [False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],
95
            [0.0, 1.0, 0.0, -1.0, 0.0, 2.5, 1.0, 5.0, None],
96
        ),
97
        (
98
            pl.Boolean,
99
            [False, True, 0, -1, 0.0, 2.5, date(1970, 1, 1), "true"],
100
            [False, True, False, True, False, True, None, None],
101
        ),
102
        (
103
            pl.Binary,
104
            [b"123", "xyz", 100, True, None],
105
            [b"123", b"xyz", None, None, None],
106
        ),
107
        (
108
            pl.String,
109
            ["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],
110
            ["xyz", "1", "2.5", "1970-01-01", "true", None, None],
111
        ),
112
        (
113
            pl.Date,
114
            ["xyz", 1, 2.5, date(1970, 1, 1), datetime(2000, 1, 1, 12), True, None],
115
            [
116
                None,
117
                date(1970, 1, 2),
118
                date(1970, 1, 3),
119
                date(1970, 1, 1),
120
                date(2000, 1, 1),
121
                None,
122
                None,
123
            ],
124
        ),
125
        (
126
            pl.Time,
127
            [
128
                "xyz",
129
                1,
130
                2.5,
131
                date(1970, 1, 1),
132
                time(12, 0),
133
                datetime(2000, 1, 1, 12),
134
                timedelta(hours=5),
135
                True,
136
                None,
137
            ],
138
            [
139
                None,
140
                time(0, 0),
141
                time(0, 0),
142
                None,
143
                time(12, 0),
144
                time(12, 0),
145
                None,
146
                None,
147
                None,
148
            ],
149
        ),
150
        (
151
            pl.Datetime,
152
            [
153
                "xyz",
154
                1,
155
                2.5,
156
                date(1970, 1, 1),
157
                time(12, 0),
158
                datetime(2000, 1, 1, 12),
159
                timedelta(hours=5),
160
                True,
161
                None,
162
            ],
163
            [
164
                None,
165
                datetime(1970, 1, 1, microsecond=1),
166
                datetime(1970, 1, 1, microsecond=2),
167
                datetime(1970, 1, 1),
168
                None,
169
                datetime(2000, 1, 1, 12, 0),
170
                None,
171
                None,
172
                None,
173
            ],
174
        ),
175
        (
176
            pl.Duration,
177
            [
178
                "xyz",
179
                1,
180
                2.5,
181
                date(1970, 1, 1),
182
                time(12, 0),
183
                datetime(2000, 1, 1, 12),
184
                timedelta(hours=5),
185
                True,
186
                None,
187
            ],
188
            [
189
                None,
190
                timedelta(microseconds=1),
191
                timedelta(microseconds=2),
192
                None,
193
                timedelta(hours=12),
194
                None,
195
                timedelta(hours=5),
196
                None,
197
                None,
198
            ],
199
        ),
200
        (
201
            pl.Categorical,
202
            ["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],
203
            ["xyz", "1", "2.5", "1970-01-01", "true", None, None],
204
        ),
205
        (
206
            pl.Enum(["a", "b"]),
207
            ["a", "b", "c", 1, 2, None],
208
            ["a", "b", None, None, None, None],
209
        ),
210
        (
211
            pl.Decimal(5, 3),
212
            [
213
                D("12"),
214
                D("1.2345"),
215
                # D("123456"),
216
                False,
217
                True,
218
                0,
219
                -1,
220
                0.0,
221
                2.5,
222
                date(1970, 1, 2),
223
                "5",
224
                "xyz",
225
            ],
226
            [
227
                D("12.000"),
228
                None,
229
                # None,
230
                None,
231
                None,
232
                D("0.000"),
233
                D("-1.000"),
234
                None,
235
                None,
236
                None,
237
                None,
238
                None,
239
            ],
240
        ),
241
        (
242
            pl.Struct({"a": pl.Int8, "b": pl.String}),
243
            [{"a": 1, "b": "foo"}, {"a": 1_000, "b": 2.0}],
244
            [{"a": 1, "b": "foo"}, {"a": None, "b": "2.0"}],
245
        ),
246
    ],
247
)
248
def test_fallback_with_dtype_nonstrict(
249
    dtype: PolarsDataType, values: list[Any], expected: list[Any]
250
) -> None:
251
    result = wrap_s(
252
        PySeries.new_from_any_values_and_dtype("", values, dtype, strict=False)
253
    )
254
    assert result.to_list() == expected
255

256

257
@pytest.mark.parametrize(
258
    ("expected_dtype", "values"),
259
    [
260
        (pl.Int64, [-1, 0, 100_000, None]),
261
        (pl.Float64, [-1.5, 0.0, 10.0, None]),
262
        (pl.Boolean, [True, False, None]),
263
        (pl.Binary, [b"123", b"xyz", None]),
264
        (pl.String, ["123", "xyz", None]),
265
        (pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),
266
        (pl.Time, [time(0, 0), time(23, 59, 59), None]),
267
        (
268
            pl.Datetime("us"),
269
            [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None],
270
        ),
271
        (pl.Duration("us"), [timedelta(hours=0), timedelta(seconds=100), None]),
272
        (pl.Decimal(None, 3), [D("12.345"), D("0.789"), None]),
273
        (pl.Decimal(None, 0), [D("12"), D("56789"), None]),
274
        (
275
            pl.Struct({"a": pl.Int64, "b": pl.String, "c": pl.Float64}),
276
            [{"a": 1, "b": "foo", "c": None}, {"a": -1, "b": "bar", "c": 3.0}],
277
        ),
278
    ],
279
)
280
@pytest.mark.parametrize("strict", [True, False])
281
def test_fallback_without_dtype(
282
    expected_dtype: PolarsDataType, values: list[Any], strict: bool
283
) -> None:
284
    result = wrap_s(PySeries.new_from_any_values("", values, strict=strict))
285
    assert result.to_list() == values
286
    assert result.dtype == expected_dtype
287

288

289
@pytest.mark.parametrize(
290
    "values",
291
    [
292
        [1.0, 2],
293
        [1, 2.0],
294
        [False, 1],
295
        [b"123", "xyz"],
296
        ["123", b"xyz"],
297
        [date(1970, 1, 1), datetime(2020, 12, 31)],
298
        [time(0, 0), 1_000],
299
        [datetime(1970, 1, 1), date(2020, 12, 31)],
300
        [timedelta(hours=0), 1_000],
301
        [D("12.345"), 100],
302
        [D("12.345"), 3.14],
303
        [{"a": 1, "b": "foo"}, {"a": -1, "b": date(2020, 12, 31)}],
304
        [{"a": None}, {"a": 1.0}, {"a": 1}],
305
    ],
306
)
307
def test_fallback_without_dtype_strict_failure(values: list[Any]) -> None:
308
    with pytest.raises(TypeError, match="unexpected value"):
309
        PySeries.new_from_any_values("", values, strict=True)
310

311

312
@pytest.mark.parametrize(
313
    ("values", "expected", "expected_dtype"),
314
    [
315
        ([True, 2], [1, 2], pl.Int64),
316
        ([1, 2.0], [1.0, 2.0], pl.Float64),
317
        ([2.0, "c"], ["2.0", "c"], pl.String),
318
        (
319
            [date(1970, 1, 1), datetime(2022, 12, 31)],
320
            [datetime(1970, 1, 1), datetime(2022, 12, 31)],
321
            pl.Datetime("us"),
322
        ),
323
        ([D("3.1415"), 2.51], [3.1415, 2.51], pl.Float64),
324
        ([D("3.1415"), 100], [D("3.1415"), D("100")], pl.Decimal(None, 4)),
325
        ([1, 2.0, b"d", date(2022, 1, 1)], [1, 2.0, b"d", date(2022, 1, 1)], pl.Object),
326
        (
327
            [
328
                {"a": 1, "b": "foo", "c": None},
329
                {"a": 2.0, "b": date(2020, 12, 31), "c": None},
330
            ],
331
            [
332
                {"a": 1.0, "b": "foo", "c": None},
333
                {"a": 2.0, "b": "2020-12-31", "c": None},
334
            ],
335
            pl.Struct({"a": pl.Float64, "b": pl.String, "c": pl.Null}),
336
        ),
337
        (
338
            [{"a": None}, {"a": 1.0}, {"a": 1}],
339
            [{"a": None}, {"a": 1.0}, {"a": 1.0}],
340
            pl.Struct({"a": pl.Float64}),
341
        ),
342
    ],
343
)
344
def test_fallback_without_dtype_nonstrict_mixed_types(
345
    values: list[Any],
346
    expected_dtype: PolarsDataType,
347
    expected: list[Any],
348
) -> None:
349
    result = wrap_s(PySeries.new_from_any_values("", values, strict=False))
350
    assert result.dtype == expected_dtype
351
    assert result.to_list() == expected
352

353

354
def test_fallback_without_dtype_large_int() -> None:
355
    values = [1, 2**128, None]
356
    with pytest.raises(
357
        OverflowError,
358
        match="int value too large for Polars integer types",
359
    ):
360
        PySeries.new_from_any_values("", values, strict=True)
361

362
    result = wrap_s(PySeries.new_from_any_values("", values, strict=False))
363
    assert result.dtype == pl.Float64
364
    assert result.to_list() == [1.0, 340282366920938500000000000000000000000.0, None]
365

366

367
def test_fallback_with_dtype_large_int() -> None:
368
    values = [1, 2**128, None]
369
    with pytest.raises(OverflowError):
370
        PySeries.new_from_any_values_and_dtype("", values, dtype=pl.Int128, strict=True)
371

372
    result = wrap_s(
373
        PySeries.new_from_any_values_and_dtype(
374
            "", values, dtype=pl.Int128, strict=False
375
        )
376
    )
377
    assert result.dtype == pl.Int128
378
    assert result.to_list() == [1, None, None]
379

380

381
def test_fallback_with_dtype_strict_failure_enum_casting() -> None:
382
    dtype = pl.Enum(["a", "b"])
383
    values = ["a", "b", "c", None]
384

385
    with pytest.raises(TypeError, match="attempted to insert 'c'"):
386
        PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)
387

388

389
def test_fallback_with_dtype_strict_failure_decimal_precision() -> None:
390
    dtype = pl.Decimal(3, 0)
391
    values = [D("12345")]
392

393
    with pytest.raises(
394
        TypeError, match="decimal precision 3 can't fit values with 5 digits"
395
    ):
396
        PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)
397

398

399
def test_categorical_lit_18874() -> None:
400
    assert_frame_equal(
401
        pl.DataFrame(
402
            {"a": [1, 2, 3]},
403
        ).with_columns(b=pl.lit("foo").cast(pl.Categorical)),
404
        pl.DataFrame(
405
            [
406
                pl.Series("a", [1, 2, 3]),
407
                pl.Series("b", ["foo"] * 3, pl.Categorical),
408
            ]
409
        ),
410
    )
411

412
Product

Resources

Company