Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/constructors/test_series.py
6939 views
1
from __future__ import annotations
2
3
from datetime import date, datetime, timedelta
4
from typing import TYPE_CHECKING, Any
5
6
import numpy as np
7
import pandas as pd
8
import pytest
9
10
import polars as pl
11
from polars.testing.asserts.series import assert_series_equal
12
13
if TYPE_CHECKING:
14
from polars._typing import PolarsDataType
15
16
17
def test_series_mixed_dtypes_list() -> None:
18
values = [[0.1, 1]]
19
s = pl.Series(values, strict=True)
20
assert s.dtype == pl.List(pl.Float64)
21
assert s.to_list() == [[0.1, 1.0]]
22
23
24
def test_series_mixed_dtypes_string() -> None:
25
values = [[12], "foo", 9]
26
27
with pytest.raises(TypeError, match="unexpected value"):
28
pl.Series(values)
29
30
s = pl.Series(values, strict=False)
31
assert s.dtype == pl.String
32
assert s.to_list() == ["[12]", "foo", "9"]
33
assert s[1] == "foo"
34
35
36
def test_series_mixed_dtypes_object() -> None:
37
values = [[12], b"foo", 9]
38
39
with pytest.raises(TypeError, match="unexpected value"):
40
pl.Series(values)
41
42
s = pl.Series(values, strict=False)
43
assert s.dtype.is_object()
44
assert s.to_list() == values
45
assert s[1] == b"foo"
46
47
48
# https://github.com/pola-rs/polars/issues/15139
49
@pytest.mark.parametrize("dtype", [pl.List(pl.Int64), None])
50
def test_sequence_of_series_with_dtype(dtype: PolarsDataType | None) -> None:
51
values = [1, 2, 3]
52
int_series = pl.Series(values)
53
list_series = pl.Series([int_series], dtype=dtype)
54
55
assert list_series.to_list() == [values]
56
assert list_series.dtype == pl.List(pl.Int64)
57
58
59
@pytest.mark.parametrize(
60
("values", "dtype", "expected_dtype"),
61
[
62
([1, 1.0, 1], None, pl.Float64),
63
([1, 1, "1.0"], None, pl.String),
64
([1, 1.0, "1.0"], None, pl.String),
65
([True, 1], None, pl.Int64),
66
([True, 1.0], None, pl.Float64),
67
([True, 1], pl.Boolean, pl.Boolean),
68
([True, 1.0], pl.Boolean, pl.Boolean),
69
([False, "1.0"], None, pl.String),
70
],
71
)
72
def test_upcast_primitive_and_strings(
73
values: list[Any], dtype: PolarsDataType, expected_dtype: PolarsDataType
74
) -> None:
75
with pytest.raises(TypeError):
76
pl.Series(values, dtype=dtype, strict=True)
77
78
assert pl.Series(values, dtype=dtype, strict=False).dtype == expected_dtype
79
80
81
def test_preserve_decimal_precision() -> None:
82
dtype = pl.Decimal(None, 1)
83
s = pl.Series(dtype=dtype)
84
assert s.dtype == dtype
85
86
87
@pytest.mark.parametrize("dtype", [None, pl.Duration("ms")])
88
def test_large_timedelta(dtype: pl.DataType | None) -> None:
89
values = [timedelta.min, timedelta.max]
90
s = pl.Series(values, dtype=dtype)
91
assert s.dtype == pl.Duration("ms")
92
93
# Microsecond precision is lost
94
expected = [timedelta.min, timedelta.max - timedelta(microseconds=999)]
95
assert s.to_list() == expected
96
97
98
def test_array_large_u64() -> None:
99
u64_max = 2**64 - 1
100
values = [[u64_max]]
101
dtype = pl.Array(pl.UInt64, 1)
102
s = pl.Series(values, dtype=dtype)
103
assert s.dtype == dtype
104
assert s.to_list() == values
105
106
107
def test_series_init_ambiguous_datetime() -> None:
108
value = datetime(2001, 10, 28, 2)
109
dtype = pl.Datetime(time_zone="Europe/Belgrade")
110
111
result = pl.Series([value], dtype=dtype, strict=True)
112
expected = pl.Series([datetime(2001, 10, 28, 3)]).dt.replace_time_zone(
113
"Europe/Belgrade"
114
)
115
assert_series_equal(result, expected)
116
117
result = pl.Series([value], dtype=dtype, strict=False)
118
assert_series_equal(result, expected)
119
120
121
def test_series_init_nonexistent_datetime() -> None:
122
value = datetime(2024, 3, 31, 2, 30)
123
dtype = pl.Datetime(time_zone="Europe/Amsterdam")
124
125
result = pl.Series([value], dtype=dtype, strict=True)
126
expected = pl.Series([datetime(2024, 3, 31, 4, 30)]).dt.replace_time_zone(
127
"Europe/Amsterdam"
128
)
129
assert_series_equal(result, expected)
130
131
result = pl.Series([value], dtype=dtype, strict=False)
132
assert_series_equal(result, expected)
133
134
135
# https://github.com/pola-rs/polars/issues/15518
136
def test_series_init_np_temporal_with_nat_15518() -> None:
137
arr = np.array(["2020-01-01", "2020-01-02", "2020-01-03"], "datetime64[D]")
138
arr[1] = np.datetime64("NaT")
139
140
result = pl.Series(arr)
141
142
expected = pl.Series([date(2020, 1, 1), None, date(2020, 1, 3)])
143
assert_series_equal(result, expected)
144
145
146
def test_series_init_pandas_timestamp_18127() -> None:
147
result = pl.Series([pd.Timestamp("2000-01-01T00:00:00.123456789", tz="UTC")])
148
# Note: time unit is not (yet) respected, it should be Datetime('ns', 'UTC').
149
assert result.dtype == pl.Datetime("us", "UTC")
150
151
152
def test_series_init_np_2d_zero_zero_shape() -> None:
153
arr = np.array([]).reshape(0, 0)
154
assert_series_equal(
155
pl.Series("a", arr),
156
pl.Series("a", [], pl.Array(pl.Float64, 0)),
157
)
158
159
160
def test_series_init_np_2d_empty() -> None:
161
arr = np.array([]).reshape(0, 2)
162
assert_series_equal(
163
pl.Series("a", arr),
164
pl.Series("a", [], pl.Array(pl.Float64, 2)),
165
)
166
167
168
def test_list_null_constructor_schema() -> None:
169
expected = pl.List(pl.Null)
170
assert pl.Series([[]]).dtype == expected
171
assert pl.Series([[]], dtype=pl.List).dtype == expected
172
173