Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/series/buffers/test_from_buffers.py
6940 views
1
from __future__ import annotations
2
3
from datetime import datetime
4
from zoneinfo import ZoneInfo
5
6
import pytest
7
from hypothesis import given
8
9
import polars as pl
10
from polars.exceptions import PanicException
11
from polars.testing import assert_series_equal
12
from polars.testing.parametric import series
13
from tests.unit.conftest import NUMERIC_DTYPES
14
15
16
@given(
17
s=series(
18
allowed_dtypes=[*NUMERIC_DTYPES, pl.Boolean],
19
allow_chunks=False,
20
)
21
)
22
def test_series_from_buffers_numeric_with_validity(s: pl.Series) -> None:
23
validity = s.is_not_null()
24
result = pl.Series._from_buffers(s.dtype, data=s, validity=validity)
25
assert_series_equal(s, result)
26
27
28
@given(
29
s=series(
30
allowed_dtypes=[*NUMERIC_DTYPES, pl.Boolean],
31
allow_chunks=False,
32
allow_null=False,
33
)
34
)
35
def test_series_from_buffers_numeric(s: pl.Series) -> None:
36
result = pl.Series._from_buffers(s.dtype, data=s)
37
assert_series_equal(s, result)
38
39
40
@given(
41
s=series(
42
allowed_dtypes=[pl.Date, pl.Time, pl.Datetime, pl.Duration],
43
allow_chunks=False,
44
)
45
)
46
def test_series_from_buffers_temporal_with_validity(s: pl.Series) -> None:
47
validity = s.is_not_null()
48
physical = pl.Int32 if s.dtype == pl.Date else pl.Int64
49
data = s.cast(physical)
50
result = pl.Series._from_buffers(s.dtype, data=data, validity=validity)
51
assert_series_equal(s, result)
52
53
54
def test_series_from_buffers_int() -> None:
55
dtype = pl.UInt16
56
data = pl.Series([97, 98, 99, 195], dtype=dtype)
57
validity = pl.Series([True, True, False, True])
58
59
result = pl.Series._from_buffers(dtype, data=data, validity=validity)
60
61
expected = pl.Series([97, 98, None, 195], dtype=dtype)
62
assert_series_equal(result, expected)
63
64
65
def test_series_from_buffers_float() -> None:
66
dtype = pl.Float64
67
data = pl.Series([0.0, 1.0, -1.0, float("nan"), float("inf")], dtype=dtype)
68
validity = pl.Series([True, True, False, True, True])
69
70
result = pl.Series._from_buffers(dtype, data=data, validity=validity)
71
72
expected = pl.Series([0.0, 1.0, None, float("nan"), float("inf")], dtype=dtype)
73
assert_series_equal(result, expected)
74
75
76
def test_series_from_buffers_boolean() -> None:
77
dtype = pl.Boolean
78
data = pl.Series([True, False, True])
79
validity = pl.Series([True, True, False])
80
81
result = pl.Series._from_buffers(dtype, data=data, validity=validity)
82
83
expected = pl.Series([True, False, None])
84
assert_series_equal(result, expected)
85
86
87
def test_series_from_buffers_datetime() -> None:
88
dtype = pl.Datetime(time_zone="Europe/Amsterdam")
89
tzinfo = ZoneInfo("Europe/Amsterdam")
90
data = pl.Series(
91
[
92
datetime(2022, 2, 10, 6, tzinfo=tzinfo),
93
datetime(2022, 2, 11, 12, tzinfo=tzinfo),
94
datetime(2022, 2, 12, 18, tzinfo=tzinfo),
95
],
96
dtype=dtype,
97
).cast(pl.Int64)
98
validity = pl.Series([True, False, True])
99
100
result = pl.Series._from_buffers(dtype, data=data, validity=validity)
101
102
expected = pl.Series(
103
[
104
datetime(2022, 2, 10, 6, tzinfo=tzinfo),
105
None,
106
datetime(2022, 2, 12, 18, tzinfo=tzinfo),
107
],
108
dtype=dtype,
109
)
110
assert_series_equal(result, expected)
111
112
113
def test_series_from_buffers_string() -> None:
114
dtype = pl.String
115
data = pl.Series([97, 98, 99, 195, 169, 195, 162, 195, 167], dtype=pl.UInt8)
116
validity = pl.Series([True, True, False, True])
117
offsets = pl.Series([0, 1, 3, 3, 9], dtype=pl.Int64)
118
119
result = pl.Series._from_buffers(dtype, data=[data, offsets], validity=validity)
120
121
expected = pl.Series(["a", "bc", None, "éâç"], dtype=dtype)
122
assert_series_equal(result, expected)
123
124
125
def test_series_from_buffers_enum() -> None:
126
dtype = pl.Enum(["a", "b", "c"])
127
data = pl.Series([0, 1, 0, 2], dtype=pl.UInt8)
128
validity = pl.Series([True, True, False, True])
129
130
result = pl.Series._from_buffers(dtype, data=data, validity=validity)
131
132
expected = pl.Series(["a", "b", None, "c"], dtype=dtype)
133
assert_series_equal(result, expected)
134
135
136
def test_series_from_buffers_sliced() -> None:
137
dtype = pl.Int64
138
data = pl.Series([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=dtype)
139
data = data[5:]
140
validity = pl.Series([True, True, True, True, False, True, False, False, True])
141
validity = validity[5:]
142
143
result = pl.Series._from_buffers(dtype, data=data, validity=validity)
144
145
expected = pl.Series([6, None, None, 9], dtype=dtype)
146
assert_series_equal(result, expected)
147
148
149
def test_series_from_buffers_unsupported_validity() -> None:
150
s = pl.Series([1, 2, 3])
151
152
msg = "validity buffer must have data type Boolean, got Int64"
153
with pytest.raises(TypeError, match=msg):
154
pl.Series._from_buffers(pl.Date, data=s, validity=s)
155
156
157
def test_series_from_buffers_unsupported_offsets() -> None:
158
data = pl.Series([97, 98, 99, 195, 169, 195, 162, 195, 167], dtype=pl.UInt8)
159
offsets = pl.Series([0, 1, 3, 3, 9], dtype=pl.Int8)
160
161
msg = "offsets buffer must have data type Int64, got Int8"
162
with pytest.raises(TypeError, match=msg):
163
pl.Series._from_buffers(pl.String, data=[data, offsets])
164
165
166
def test_series_from_buffers_offsets_do_not_match_data() -> None:
167
data = pl.Series([97, 98, 99, 195, 169, 195, 162, 195, 167], dtype=pl.UInt8)
168
offsets = pl.Series([0, 1, 3, 3, 9, 11], dtype=pl.Int64)
169
170
msg = "offsets must not exceed the values length"
171
with pytest.raises(PanicException, match=msg):
172
pl.Series._from_buffers(pl.String, data=[data, offsets])
173
174
175
def test_series_from_buffers_no_buffers() -> None:
176
msg = "`data` input to `_from_buffers` must contain at least one buffer"
177
with pytest.raises(TypeError, match=msg):
178
pl.Series._from_buffers(pl.Int32, data=[])
179
180