Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/constructors/test_any_value_fallbacks.py
8406 views
1
# TODO: Replace direct calls to fallback constructors with calls to the Series
2
# constructor once the Python-side logic has been updated
3
from __future__ import annotations
4
5
from datetime import date, datetime, time, timedelta
6
from decimal import Decimal as D
7
from typing import TYPE_CHECKING, Any
8
9
import pytest
10
from numpy import array
11
12
import polars as pl
13
from polars._plr import PySeries
14
from polars._utils.wrap import wrap_s
15
from polars.testing import assert_frame_equal
16
17
if TYPE_CHECKING:
18
from polars._typing import PolarsDataType
19
20
21
@pytest.mark.parametrize(
22
("dtype", "values"),
23
[
24
(pl.Int64, [-1, 0, 100_000, None]),
25
(pl.Float64, [-1.5, 0.0, 10.0, None]),
26
(pl.Boolean, [True, False, None]),
27
(pl.Binary, [b"123", b"xyz", None]),
28
(pl.String, ["123", "xyz", None]),
29
(pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),
30
(pl.Time, [time(0, 0), time(23, 59, 59), None]),
31
(pl.Datetime, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None]),
32
(pl.Duration, [timedelta(hours=0), timedelta(seconds=100), None]),
33
(pl.Categorical, ["a", "b", "a", None]),
34
(pl.Enum(["a", "b"]), ["a", "b", "a", None]),
35
(pl.Decimal(10, 3), [D("12.345"), D("0.789"), None]),
36
(
37
pl.Struct({"a": pl.Int8, "b": pl.String}),
38
[{"a": 1, "b": "foo"}, {"a": -1, "b": "bar"}],
39
),
40
],
41
)
42
@pytest.mark.parametrize("strict", [True, False])
43
def test_fallback_with_dtype_strict(
44
dtype: PolarsDataType, values: list[Any], strict: bool
45
) -> None:
46
result = wrap_s(
47
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=strict)
48
)
49
assert result.to_list() == values
50
51
52
@pytest.mark.parametrize(
53
("dtype", "values"),
54
[
55
(pl.Int64, [1.0, 2.0]),
56
(pl.Float64, [1, 2]),
57
(pl.Boolean, [0, 1]),
58
(pl.Binary, ["123", "xyz"]),
59
(pl.String, [b"123", b"xyz"]),
60
(pl.Date, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
61
(pl.Time, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
62
(pl.Datetime, [date(1970, 1, 1), date(2020, 12, 31)]),
63
(pl.Datetime("ms"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
64
(pl.Datetime("ns"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
65
(pl.Duration, [0, 1200]),
66
(pl.Duration("ms"), [timedelta(hours=0), timedelta(seconds=100)]),
67
(pl.Duration("ns"), [timedelta(hours=0), timedelta(seconds=100)]),
68
(pl.Categorical, [0, 1, 0]),
69
(pl.Enum(["a", "b"]), [0, 1, 0]),
70
(pl.Decimal(10, 3), [100, 200]),
71
(pl.Decimal(5, 3), [D("1.2345")]),
72
(
73
pl.Struct({"a": pl.Int8, "b": pl.String}),
74
[{"a": 1, "b": "foo"}, {"a": 2.0, "b": "bar"}],
75
),
76
],
77
)
78
def test_fallback_with_dtype_strict_failure(
79
dtype: PolarsDataType, values: list[Any]
80
) -> None:
81
with pytest.raises(TypeError, match="unexpected value"):
82
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)
83
84
85
@pytest.mark.parametrize(
86
("dtype", "values", "expected"),
87
[
88
(
89
pl.Int64,
90
[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],
91
[0, 1, 0, -1, 0, 2, 1, 5, None],
92
),
93
(
94
pl.Float64,
95
[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],
96
[0.0, 1.0, 0.0, -1.0, 0.0, 2.5, 1.0, 5.0, None],
97
),
98
(
99
pl.Boolean,
100
[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 1), "true"],
101
[False, True, False, True, False, True, None, None],
102
),
103
(
104
pl.Binary,
105
[b"123", "xyz", 100, True, None],
106
[b"123", b"xyz", None, None, None],
107
),
108
(
109
pl.String,
110
["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],
111
["xyz", "1", "2.5", "1970-01-01", "true", None, None],
112
),
113
(
114
pl.Date,
115
["xyz", 1, 2.5, date(1970, 1, 1), datetime(2000, 1, 1, 12), True, None],
116
[
117
None,
118
date(1970, 1, 2),
119
date(1970, 1, 3),
120
date(1970, 1, 1),
121
date(2000, 1, 1),
122
None,
123
None,
124
],
125
),
126
(
127
pl.Time,
128
[
129
"xyz",
130
1,
131
2.5,
132
date(1970, 1, 1),
133
time(12, 0),
134
datetime(2000, 1, 1, 12),
135
timedelta(hours=5),
136
True,
137
None,
138
],
139
[
140
None,
141
time(0, 0),
142
time(0, 0),
143
None,
144
time(12, 0),
145
time(12, 0),
146
None,
147
None,
148
None,
149
],
150
),
151
(
152
pl.Datetime,
153
[
154
"xyz",
155
1,
156
2.5,
157
date(1970, 1, 1),
158
time(12, 0),
159
datetime(2000, 1, 1, 12),
160
timedelta(hours=5),
161
True,
162
None,
163
],
164
[
165
None,
166
datetime(1970, 1, 1, microsecond=1),
167
datetime(1970, 1, 1, microsecond=2),
168
datetime(1970, 1, 1),
169
None,
170
datetime(2000, 1, 1, 12, 0),
171
None,
172
None,
173
None,
174
],
175
),
176
(
177
pl.Duration,
178
[
179
"xyz",
180
1,
181
2.5,
182
date(1970, 1, 1),
183
time(12, 0),
184
datetime(2000, 1, 1, 12),
185
timedelta(hours=5),
186
True,
187
None,
188
],
189
[
190
None,
191
timedelta(microseconds=1),
192
timedelta(microseconds=2),
193
None,
194
timedelta(hours=12),
195
None,
196
timedelta(hours=5),
197
None,
198
None,
199
],
200
),
201
(
202
pl.Categorical,
203
["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],
204
["xyz", "1", "2.5", "1970-01-01", "true", None, None],
205
),
206
(
207
pl.Enum(["a", "b"]),
208
["a", "b", "c", 1, 2, None],
209
["a", "b", None, None, None, None],
210
),
211
(
212
pl.Decimal(5, 3),
213
[
214
D("12"),
215
D("1.2345"),
216
D("123456"),
217
False,
218
True,
219
0,
220
-1,
221
0.0,
222
2.5,
223
date(1970, 1, 2),
224
"5",
225
"xyz",
226
],
227
[
228
D("12.000"),
229
D("1.234"),
230
None,
231
None,
232
None,
233
D("0.000"),
234
D("-1.000"),
235
D("0.000"),
236
D("2.500"),
237
None,
238
None,
239
None,
240
],
241
),
242
(
243
pl.Struct({"a": pl.Int8, "b": pl.String}),
244
[{"a": 1, "b": "foo"}, {"a": 1_000, "b": 2.0}],
245
[{"a": 1, "b": "foo"}, {"a": None, "b": "2.0"}],
246
),
247
],
248
)
249
def test_fallback_with_dtype_nonstrict(
250
dtype: PolarsDataType, values: list[Any], expected: list[Any]
251
) -> None:
252
result = wrap_s(
253
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=False)
254
)
255
assert result.to_list() == expected
256
257
258
@pytest.mark.parametrize(
259
("expected_dtype", "values"),
260
[
261
(pl.Int64, [-1, 0, 100_000, None]),
262
(pl.Float64, [-1.5, 0.0, 10.0, None]),
263
(pl.Boolean, [True, False, None]),
264
(pl.Binary, [b"123", b"xyz", None]),
265
(pl.String, ["123", "xyz", None]),
266
(pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),
267
(pl.Time, [time(0, 0), time(23, 59, 59), None]),
268
(
269
pl.Datetime("us"),
270
[datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None],
271
),
272
(pl.Duration("us"), [timedelta(hours=0), timedelta(seconds=100), None]),
273
(pl.Decimal(None, 3), [D("12.345"), D("0.789"), None]),
274
(pl.Decimal(None, 0), [D("12"), D("56789"), None]),
275
(
276
pl.Struct({"a": pl.Int64, "b": pl.String, "c": pl.Float64}),
277
[{"a": 1, "b": "foo", "c": None}, {"a": -1, "b": "bar", "c": 3.0}],
278
),
279
],
280
)
281
@pytest.mark.parametrize("strict", [True, False])
282
def test_fallback_without_dtype(
283
expected_dtype: PolarsDataType, values: list[Any], strict: bool
284
) -> None:
285
result = wrap_s(PySeries.new_from_any_values("", values, strict=strict))
286
assert result.to_list() == values
287
assert result.dtype == expected_dtype
288
289
290
@pytest.mark.parametrize(
291
"values",
292
[
293
[1.0, 2],
294
[1, 2.0],
295
[False, 1],
296
[b"123", "xyz"],
297
["123", b"xyz"],
298
[date(1970, 1, 1), datetime(2020, 12, 31)],
299
[time(0, 0), 1_000],
300
[datetime(1970, 1, 1), date(2020, 12, 31)],
301
[timedelta(hours=0), 1_000],
302
[D("12.345"), 100],
303
[D("12.345"), 3.14],
304
[{"a": 1, "b": "foo"}, {"a": -1, "b": date(2020, 12, 31)}],
305
[{"a": None}, {"a": 1.0}, {"a": 1}],
306
],
307
)
308
def test_fallback_without_dtype_strict_failure(values: list[Any]) -> None:
309
with pytest.raises(TypeError, match="unexpected value"):
310
PySeries.new_from_any_values("", values, strict=True)
311
312
313
@pytest.mark.parametrize(
314
("values", "expected", "expected_dtype"),
315
[
316
([True, 2], [1, 2], pl.Int64),
317
([1, 2.0], [1.0, 2.0], pl.Float64),
318
([2.0, "c"], ["2.0", "c"], pl.String),
319
(
320
[date(1970, 1, 1), datetime(2022, 12, 31)],
321
[datetime(1970, 1, 1), datetime(2022, 12, 31)],
322
pl.Datetime("us"),
323
),
324
([D("3.1415"), 2.51], [3.1415, 2.51], pl.Float64),
325
([D("3.1415"), 100], [D("3.1415"), D("100")], pl.Decimal(None, 4)),
326
([1, 2.0, b"d", date(2022, 1, 1)], [1, 2.0, b"d", date(2022, 1, 1)], pl.Object),
327
(
328
[
329
{"a": 1, "b": "foo", "c": None},
330
{"a": 2.0, "b": date(2020, 12, 31), "c": None},
331
],
332
[
333
{"a": 1.0, "b": "foo", "c": None},
334
{"a": 2.0, "b": "2020-12-31", "c": None},
335
],
336
pl.Struct({"a": pl.Float64, "b": pl.String, "c": pl.Null}),
337
),
338
(
339
[{"a": None}, {"a": 1.0}, {"a": 1}],
340
[{"a": None}, {"a": 1.0}, {"a": 1.0}],
341
pl.Struct({"a": pl.Float64}),
342
),
343
],
344
)
345
def test_fallback_without_dtype_nonstrict_mixed_types(
346
values: list[Any],
347
expected_dtype: PolarsDataType,
348
expected: list[Any],
349
) -> None:
350
result = wrap_s(PySeries.new_from_any_values("", values, strict=False))
351
assert result.dtype == expected_dtype
352
assert result.to_list() == expected
353
354
355
def test_fallback_without_dtype_large_int() -> None:
356
values = [1, 2**128, None]
357
with pytest.raises(
358
OverflowError,
359
match="int value too large for Polars integer types",
360
):
361
PySeries.new_from_any_values("", values, strict=True)
362
363
result = wrap_s(PySeries.new_from_any_values("", values, strict=False))
364
assert result.dtype == pl.Float64
365
assert result.to_list() == [1.0, 340282366920938500000000000000000000000.0, None]
366
367
368
def test_fallback_with_dtype_large_int() -> None:
369
values = [1, 2**128, None]
370
with pytest.raises(OverflowError):
371
PySeries.new_from_any_values_and_dtype("", values, dtype=pl.Int128, strict=True)
372
373
result = wrap_s(
374
PySeries.new_from_any_values_and_dtype(
375
"", values, dtype=pl.Int128, strict=False
376
)
377
)
378
assert result.dtype == pl.Int128
379
assert result.to_list() == [1, None, None]
380
381
382
def test_fallback_with_dtype_strict_failure_enum_casting() -> None:
383
dtype = pl.Enum(["a", "b"])
384
values = ["a", "b", "c", None]
385
386
with pytest.raises(TypeError, match="attempted to insert 'c'"):
387
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)
388
389
390
def test_fallback_with_dtype_strict_failure_decimal_precision() -> None:
391
dtype = pl.Decimal(3, 0)
392
values = [D("12345")]
393
394
with pytest.raises(
395
TypeError, match="decimal precision 3 can't fit values with 5 digits"
396
):
397
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)
398
399
400
def test_categorical_lit_18874() -> None:
401
assert_frame_equal(
402
pl.DataFrame(
403
{"a": [1, 2, 3]},
404
).with_columns(b=pl.lit("foo").cast(pl.Categorical)),
405
pl.DataFrame(
406
[
407
pl.Series("a", [1, 2, 3]),
408
pl.Series("b", ["foo"] * 3, pl.Categorical),
409
]
410
),
411
)
412
413
414
@pytest.mark.parametrize(
415
("values", "expected"),
416
[
417
# Float64 should have ~17; Float32 ~6 digits of precision preserved
418
([0.123, 0.123456789], ["0.123", "0.123456789"]),
419
([[0.123, 0.123456789]], ["[0.123,0.123456789]"]),
420
([array([0.123, 0.123456789])], ["[0.123,0.123456789]"]),
421
([{"a": 0.123, "b": 0.123456789}], ["{0.123,0.123456789}"]),
422
([[{"a": 0.123, "b": 0.123456789}]], ["[{0.123,0.123456789}]"]),
423
([{"x": [0.1, 0.2]}, [{"y": 0.3}]], ["{[0.1,0.2]}", "[{0.3}]"]),
424
(
425
[None, {"a": None, "b": 1.0}, [None, 2.0]],
426
[None, "{null,1.0}", "[null,2.0]"],
427
),
428
([[], {}], ["[]", "{}"]),
429
([[0.5]], ["[0.5]"]),
430
([{"a": 0.5}], ["{0.5}"]),
431
],
432
ids=[
433
"basic_floats",
434
"nested_list",
435
"nested_array",
436
"basic_struct",
437
"list_of_structs",
438
"nested_mixed",
439
"mixed_nulls",
440
"empty_containers",
441
"single_element_list",
442
"single_element_struct",
443
],
444
)
445
def test_float_to_string_precision_25257(
446
values: list[Any], expected: list[Any]
447
) -> None:
448
# verify the conversion is decoupled from Display formatting
449
with pl.Config(float_precision=1):
450
s = pl.Series(values, strict=False, dtype=pl.String)
451
452
assert (s == pl.Series(expected)).all()
453
454