Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/constructors/test_any_value_fallbacks.py
6939 views
1
# TODO: Replace direct calls to fallback constructors with calls to the Series
2
# constructor once the Python-side logic has been updated
3
from __future__ import annotations
4
5
from datetime import date, datetime, time, timedelta
6
from decimal import Decimal as D
7
from typing import TYPE_CHECKING, Any
8
9
import pytest
10
11
import polars as pl
12
from polars._plr import PySeries
13
from polars._utils.wrap import wrap_s
14
from polars.testing import assert_frame_equal
15
16
if TYPE_CHECKING:
17
from polars._typing import PolarsDataType
18
19
20
@pytest.mark.parametrize(
21
("dtype", "values"),
22
[
23
(pl.Int64, [-1, 0, 100_000, None]),
24
(pl.Float64, [-1.5, 0.0, 10.0, None]),
25
(pl.Boolean, [True, False, None]),
26
(pl.Binary, [b"123", b"xyz", None]),
27
(pl.String, ["123", "xyz", None]),
28
(pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),
29
(pl.Time, [time(0, 0), time(23, 59, 59), None]),
30
(pl.Datetime, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None]),
31
(pl.Duration, [timedelta(hours=0), timedelta(seconds=100), None]),
32
(pl.Categorical, ["a", "b", "a", None]),
33
(pl.Enum(["a", "b"]), ["a", "b", "a", None]),
34
(pl.Decimal(10, 3), [D("12.345"), D("0.789"), None]),
35
(
36
pl.Struct({"a": pl.Int8, "b": pl.String}),
37
[{"a": 1, "b": "foo"}, {"a": -1, "b": "bar"}],
38
),
39
],
40
)
41
@pytest.mark.parametrize("strict", [True, False])
42
def test_fallback_with_dtype_strict(
43
dtype: PolarsDataType, values: list[Any], strict: bool
44
) -> None:
45
result = wrap_s(
46
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=strict)
47
)
48
assert result.to_list() == values
49
50
51
@pytest.mark.parametrize(
52
("dtype", "values"),
53
[
54
(pl.Int64, [1.0, 2.0]),
55
(pl.Float64, [1, 2]),
56
(pl.Boolean, [0, 1]),
57
(pl.Binary, ["123", "xyz"]),
58
(pl.String, [b"123", b"xyz"]),
59
(pl.Date, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
60
(pl.Time, [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
61
(pl.Datetime, [date(1970, 1, 1), date(2020, 12, 31)]),
62
(pl.Datetime("ms"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
63
(pl.Datetime("ns"), [datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
64
(pl.Duration, [0, 1200]),
65
(pl.Duration("ms"), [timedelta(hours=0), timedelta(seconds=100)]),
66
(pl.Duration("ns"), [timedelta(hours=0), timedelta(seconds=100)]),
67
(pl.Categorical, [0, 1, 0]),
68
(pl.Enum(["a", "b"]), [0, 1, 0]),
69
(pl.Decimal(10, 3), [100, 200]),
70
(pl.Decimal(5, 3), [D("1.2345")]),
71
(
72
pl.Struct({"a": pl.Int8, "b": pl.String}),
73
[{"a": 1, "b": "foo"}, {"a": 2.0, "b": "bar"}],
74
),
75
],
76
)
77
def test_fallback_with_dtype_strict_failure(
78
dtype: PolarsDataType, values: list[Any]
79
) -> None:
80
with pytest.raises(TypeError, match="unexpected value"):
81
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)
82
83
84
@pytest.mark.parametrize(
85
("dtype", "values", "expected"),
86
[
87
(
88
pl.Int64,
89
[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],
90
[0, 1, 0, -1, 0, 2, 1, 5, None],
91
),
92
(
93
pl.Float64,
94
[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 2), "5", "xyz"],
95
[0.0, 1.0, 0.0, -1.0, 0.0, 2.5, 1.0, 5.0, None],
96
),
97
(
98
pl.Boolean,
99
[False, True, 0, -1, 0.0, 2.5, date(1970, 1, 1), "true"],
100
[False, True, False, True, False, True, None, None],
101
),
102
(
103
pl.Binary,
104
[b"123", "xyz", 100, True, None],
105
[b"123", b"xyz", None, None, None],
106
),
107
(
108
pl.String,
109
["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],
110
["xyz", "1", "2.5", "1970-01-01", "true", None, None],
111
),
112
(
113
pl.Date,
114
["xyz", 1, 2.5, date(1970, 1, 1), datetime(2000, 1, 1, 12), True, None],
115
[
116
None,
117
date(1970, 1, 2),
118
date(1970, 1, 3),
119
date(1970, 1, 1),
120
date(2000, 1, 1),
121
None,
122
None,
123
],
124
),
125
(
126
pl.Time,
127
[
128
"xyz",
129
1,
130
2.5,
131
date(1970, 1, 1),
132
time(12, 0),
133
datetime(2000, 1, 1, 12),
134
timedelta(hours=5),
135
True,
136
None,
137
],
138
[
139
None,
140
time(0, 0),
141
time(0, 0),
142
None,
143
time(12, 0),
144
time(12, 0),
145
None,
146
None,
147
None,
148
],
149
),
150
(
151
pl.Datetime,
152
[
153
"xyz",
154
1,
155
2.5,
156
date(1970, 1, 1),
157
time(12, 0),
158
datetime(2000, 1, 1, 12),
159
timedelta(hours=5),
160
True,
161
None,
162
],
163
[
164
None,
165
datetime(1970, 1, 1, microsecond=1),
166
datetime(1970, 1, 1, microsecond=2),
167
datetime(1970, 1, 1),
168
None,
169
datetime(2000, 1, 1, 12, 0),
170
None,
171
None,
172
None,
173
],
174
),
175
(
176
pl.Duration,
177
[
178
"xyz",
179
1,
180
2.5,
181
date(1970, 1, 1),
182
time(12, 0),
183
datetime(2000, 1, 1, 12),
184
timedelta(hours=5),
185
True,
186
None,
187
],
188
[
189
None,
190
timedelta(microseconds=1),
191
timedelta(microseconds=2),
192
None,
193
timedelta(hours=12),
194
None,
195
timedelta(hours=5),
196
None,
197
None,
198
],
199
),
200
(
201
pl.Categorical,
202
["xyz", 1, 2.5, date(1970, 1, 1), True, b"123", None],
203
["xyz", "1", "2.5", "1970-01-01", "true", None, None],
204
),
205
(
206
pl.Enum(["a", "b"]),
207
["a", "b", "c", 1, 2, None],
208
["a", "b", None, None, None, None],
209
),
210
(
211
pl.Decimal(5, 3),
212
[
213
D("12"),
214
D("1.2345"),
215
# D("123456"),
216
False,
217
True,
218
0,
219
-1,
220
0.0,
221
2.5,
222
date(1970, 1, 2),
223
"5",
224
"xyz",
225
],
226
[
227
D("12.000"),
228
None,
229
# None,
230
None,
231
None,
232
D("0.000"),
233
D("-1.000"),
234
None,
235
None,
236
None,
237
None,
238
None,
239
],
240
),
241
(
242
pl.Struct({"a": pl.Int8, "b": pl.String}),
243
[{"a": 1, "b": "foo"}, {"a": 1_000, "b": 2.0}],
244
[{"a": 1, "b": "foo"}, {"a": None, "b": "2.0"}],
245
),
246
],
247
)
248
def test_fallback_with_dtype_nonstrict(
249
dtype: PolarsDataType, values: list[Any], expected: list[Any]
250
) -> None:
251
result = wrap_s(
252
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=False)
253
)
254
assert result.to_list() == expected
255
256
257
@pytest.mark.parametrize(
258
("expected_dtype", "values"),
259
[
260
(pl.Int64, [-1, 0, 100_000, None]),
261
(pl.Float64, [-1.5, 0.0, 10.0, None]),
262
(pl.Boolean, [True, False, None]),
263
(pl.Binary, [b"123", b"xyz", None]),
264
(pl.String, ["123", "xyz", None]),
265
(pl.Date, [date(1970, 1, 1), date(2020, 12, 31), None]),
266
(pl.Time, [time(0, 0), time(23, 59, 59), None]),
267
(
268
pl.Datetime("us"),
269
[datetime(1970, 1, 1), datetime(2020, 12, 31, 23, 59, 59), None],
270
),
271
(pl.Duration("us"), [timedelta(hours=0), timedelta(seconds=100), None]),
272
(pl.Decimal(None, 3), [D("12.345"), D("0.789"), None]),
273
(pl.Decimal(None, 0), [D("12"), D("56789"), None]),
274
(
275
pl.Struct({"a": pl.Int64, "b": pl.String, "c": pl.Float64}),
276
[{"a": 1, "b": "foo", "c": None}, {"a": -1, "b": "bar", "c": 3.0}],
277
),
278
],
279
)
280
@pytest.mark.parametrize("strict", [True, False])
281
def test_fallback_without_dtype(
282
expected_dtype: PolarsDataType, values: list[Any], strict: bool
283
) -> None:
284
result = wrap_s(PySeries.new_from_any_values("", values, strict=strict))
285
assert result.to_list() == values
286
assert result.dtype == expected_dtype
287
288
289
@pytest.mark.parametrize(
290
"values",
291
[
292
[1.0, 2],
293
[1, 2.0],
294
[False, 1],
295
[b"123", "xyz"],
296
["123", b"xyz"],
297
[date(1970, 1, 1), datetime(2020, 12, 31)],
298
[time(0, 0), 1_000],
299
[datetime(1970, 1, 1), date(2020, 12, 31)],
300
[timedelta(hours=0), 1_000],
301
[D("12.345"), 100],
302
[D("12.345"), 3.14],
303
[{"a": 1, "b": "foo"}, {"a": -1, "b": date(2020, 12, 31)}],
304
[{"a": None}, {"a": 1.0}, {"a": 1}],
305
],
306
)
307
def test_fallback_without_dtype_strict_failure(values: list[Any]) -> None:
308
with pytest.raises(TypeError, match="unexpected value"):
309
PySeries.new_from_any_values("", values, strict=True)
310
311
312
@pytest.mark.parametrize(
313
("values", "expected", "expected_dtype"),
314
[
315
([True, 2], [1, 2], pl.Int64),
316
([1, 2.0], [1.0, 2.0], pl.Float64),
317
([2.0, "c"], ["2.0", "c"], pl.String),
318
(
319
[date(1970, 1, 1), datetime(2022, 12, 31)],
320
[datetime(1970, 1, 1), datetime(2022, 12, 31)],
321
pl.Datetime("us"),
322
),
323
([D("3.1415"), 2.51], [3.1415, 2.51], pl.Float64),
324
([D("3.1415"), 100], [D("3.1415"), D("100")], pl.Decimal(None, 4)),
325
([1, 2.0, b"d", date(2022, 1, 1)], [1, 2.0, b"d", date(2022, 1, 1)], pl.Object),
326
(
327
[
328
{"a": 1, "b": "foo", "c": None},
329
{"a": 2.0, "b": date(2020, 12, 31), "c": None},
330
],
331
[
332
{"a": 1.0, "b": "foo", "c": None},
333
{"a": 2.0, "b": "2020-12-31", "c": None},
334
],
335
pl.Struct({"a": pl.Float64, "b": pl.String, "c": pl.Null}),
336
),
337
(
338
[{"a": None}, {"a": 1.0}, {"a": 1}],
339
[{"a": None}, {"a": 1.0}, {"a": 1.0}],
340
pl.Struct({"a": pl.Float64}),
341
),
342
],
343
)
344
def test_fallback_without_dtype_nonstrict_mixed_types(
345
values: list[Any],
346
expected_dtype: PolarsDataType,
347
expected: list[Any],
348
) -> None:
349
result = wrap_s(PySeries.new_from_any_values("", values, strict=False))
350
assert result.dtype == expected_dtype
351
assert result.to_list() == expected
352
353
354
def test_fallback_without_dtype_large_int() -> None:
355
values = [1, 2**128, None]
356
with pytest.raises(
357
OverflowError,
358
match="int value too large for Polars integer types",
359
):
360
PySeries.new_from_any_values("", values, strict=True)
361
362
result = wrap_s(PySeries.new_from_any_values("", values, strict=False))
363
assert result.dtype == pl.Float64
364
assert result.to_list() == [1.0, 340282366920938500000000000000000000000.0, None]
365
366
367
def test_fallback_with_dtype_large_int() -> None:
368
values = [1, 2**128, None]
369
with pytest.raises(OverflowError):
370
PySeries.new_from_any_values_and_dtype("", values, dtype=pl.Int128, strict=True)
371
372
result = wrap_s(
373
PySeries.new_from_any_values_and_dtype(
374
"", values, dtype=pl.Int128, strict=False
375
)
376
)
377
assert result.dtype == pl.Int128
378
assert result.to_list() == [1, None, None]
379
380
381
def test_fallback_with_dtype_strict_failure_enum_casting() -> None:
382
dtype = pl.Enum(["a", "b"])
383
values = ["a", "b", "c", None]
384
385
with pytest.raises(TypeError, match="attempted to insert 'c'"):
386
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)
387
388
389
def test_fallback_with_dtype_strict_failure_decimal_precision() -> None:
390
dtype = pl.Decimal(3, 0)
391
values = [D("12345")]
392
393
with pytest.raises(
394
TypeError, match="decimal precision 3 can't fit values with 5 digits"
395
):
396
PySeries.new_from_any_values_and_dtype("", values, dtype, strict=True)
397
398
399
def test_categorical_lit_18874() -> None:
400
assert_frame_equal(
401
pl.DataFrame(
402
{"a": [1, 2, 3]},
403
).with_columns(b=pl.lit("foo").cast(pl.Categorical)),
404
pl.DataFrame(
405
[
406
pl.Series("a", [1, 2, 3]),
407
pl.Series("b", ["foo"] * 3, pl.Categorical),
408
]
409
),
410
)
411
412