Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/utils/test_utils.py
8410 views
1
from __future__ import annotations
2
3
from datetime import date, datetime, time, timedelta
4
from typing import TYPE_CHECKING, Any
5
from zoneinfo import ZoneInfo
6
7
import numpy as np
8
import pytest
9
10
import polars as pl
11
from polars._utils.convert import (
12
date_to_int,
13
datetime_to_int,
14
parse_as_duration_string,
15
time_to_int,
16
timedelta_to_int,
17
)
18
from polars._utils.various import (
19
_in_notebook,
20
is_bool_sequence,
21
is_int_sequence,
22
is_sequence,
23
is_str_sequence,
24
parse_percentiles,
25
parse_version,
26
)
27
28
if TYPE_CHECKING:
29
from collections.abc import Sequence
30
31
from polars._typing import TimeUnit
32
33
34
@pytest.mark.parametrize(
35
("td", "expected"),
36
[
37
(timedelta(), ""),
38
(timedelta(days=1), "1d"),
39
(timedelta(days=-1), "-1d"),
40
(timedelta(seconds=1), "1s"),
41
(timedelta(seconds=-1), "-1s"),
42
(timedelta(microseconds=1), "1us"),
43
(timedelta(microseconds=-1), "-1us"),
44
(timedelta(days=1, seconds=1), "1d1s"),
45
(timedelta(minutes=-1, seconds=1), "-59s"),
46
(timedelta(days=-1, seconds=-1), "-1d1s"),
47
(timedelta(days=1, microseconds=1), "1d1us"),
48
(timedelta(days=-1, microseconds=-1), "-1d1us"),
49
(None, None),
50
("1d2s", "1d2s"),
51
],
52
)
53
def test_parse_as_duration_string(
54
td: timedelta | str | None, expected: str | None
55
) -> None:
56
assert parse_as_duration_string(td) == expected
57
58
59
@pytest.mark.parametrize(
60
("d", "expected"),
61
[
62
(date(1999, 9, 9), 10_843),
63
(date(1969, 12, 31), -1),
64
(date.min, -719_162),
65
(date.max, 2_932_896),
66
],
67
)
68
def test_date_to_int(d: date, expected: int) -> None:
69
assert date_to_int(d) == expected
70
71
72
@pytest.mark.parametrize(
73
("t", "expected"),
74
[
75
(time(0, 0, 1), 1_000_000_000),
76
(time(20, 52, 10), 75_130_000_000_000),
77
(time(20, 52, 10, 200), 75_130_000_200_000),
78
(time.min, 0),
79
(time.max, 86_399_999_999_000),
80
(time(12, 0, tzinfo=None), 43_200_000_000_000),
81
(time(12, 0, tzinfo=ZoneInfo("UTC")), 43_200_000_000_000),
82
(time(12, 0, tzinfo=ZoneInfo("Asia/Shanghai")), 43_200_000_000_000),
83
(time(12, 0, tzinfo=ZoneInfo("America/Chicago")), 43_200_000_000_000),
84
],
85
)
86
def test_time_to_int(t: time, expected: int) -> None:
87
assert time_to_int(t) == expected
88
89
90
@pytest.mark.parametrize(
91
"tzinfo",
92
[None, ZoneInfo("UTC"), ZoneInfo("Asia/Shanghai"), ZoneInfo("America/Chicago")],
93
)
94
def test_time_to_int_with_time_zone(tzinfo: Any) -> None:
95
t = time(12, 0, tzinfo=tzinfo)
96
assert time_to_int(t) == 43_200_000_000_000
97
98
99
@pytest.mark.parametrize(
100
("dt", "time_unit", "expected"),
101
[
102
(datetime(2121, 1, 1), "ns", 4_765_132_800_000_000_000),
103
(datetime(2121, 1, 1), "us", 4_765_132_800_000_000),
104
(datetime(2121, 1, 1), "ms", 4_765_132_800_000),
105
(datetime(1969, 12, 31, 23, 59, 59, 999999), "us", -1),
106
(datetime(1969, 12, 30, 23, 59, 59, 999999), "us", -86_400_000_001),
107
(datetime.min, "ns", -62_135_596_800_000_000_000),
108
(datetime.max, "ns", 253_402_300_799_999_999_000),
109
(datetime.min, "ms", -62_135_596_800_000),
110
(datetime.max, "ms", 253_402_300_799_999),
111
],
112
)
113
def test_datetime_to_int(dt: datetime, time_unit: TimeUnit, expected: int) -> None:
114
assert datetime_to_int(dt, time_unit) == expected
115
116
117
@pytest.mark.parametrize(
118
("dt", "expected"),
119
[
120
(
121
datetime(2000, 1, 1, 12, 0, tzinfo=None),
122
946_728_000_000_000,
123
),
124
(
125
datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("UTC")),
126
946_728_000_000_000,
127
),
128
(
129
datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("Asia/Shanghai")),
130
946_699_200_000_000,
131
),
132
(
133
datetime(2000, 1, 1, 12, 0, tzinfo=ZoneInfo("America/Chicago")),
134
946_749_600_000_000,
135
),
136
],
137
)
138
def test_datetime_to_int_with_time_zone(dt: datetime, expected: int) -> None:
139
assert datetime_to_int(dt, "us") == expected
140
141
142
@pytest.mark.parametrize(
143
("td", "time_unit", "expected"),
144
[
145
(timedelta(days=1), "ns", 86_400_000_000_000),
146
(timedelta(days=1), "us", 86_400_000_000),
147
(timedelta(days=1), "ms", 86_400_000),
148
(timedelta.min, "ns", -86_399_999_913_600_000_000_000),
149
(timedelta.max, "ns", 86_399_999_999_999_999_999_000),
150
(timedelta.min, "ms", -86_399_999_913_600_000),
151
(timedelta.max, "ms", 86_399_999_999_999_999),
152
],
153
)
154
def test_timedelta_to_int(td: timedelta, time_unit: TimeUnit, expected: int) -> None:
155
assert timedelta_to_int(td, time_unit) == expected
156
157
158
def test_estimated_size() -> None:
159
s = pl.Series("n", list(range(100)))
160
df = s.to_frame()
161
162
for sz in (s.estimated_size(), s.estimated_size("b"), s.estimated_size("bytes")):
163
assert sz == df.estimated_size()
164
165
assert s.estimated_size("kb") == (df.estimated_size("b") / 1024)
166
assert s.estimated_size("mb") == (df.estimated_size("kb") / 1024)
167
assert s.estimated_size("gb") == (df.estimated_size("mb") / 1024)
168
assert s.estimated_size("tb") == (df.estimated_size("gb") / 1024)
169
170
with pytest.raises(ValueError):
171
s.estimated_size("milkshake") # type: ignore[arg-type]
172
173
174
def test_estimated_size_sliced_list_25068() -> None:
175
df = pl.select(pl.int_range(10000).cast(pl.List(pl.Int64)))
176
177
assert df.slice(5000).estimated_size() / df.estimated_size() <= 0.5
178
179
180
@pytest.mark.parametrize(
181
("v1", "v2"),
182
[
183
("0.16.8", "0.16.7"),
184
("23.0.0", (3, 1000)),
185
((23, 0, 0), "3.1000"),
186
(("0", "0", "2beta"), "0.0.1"),
187
(("2", "5", "0", "1"), (2, 5, 0)),
188
],
189
)
190
def test_parse_version(v1: Any, v2: Any) -> None:
191
assert parse_version(v1) > parse_version(v2)
192
assert parse_version(v2) < parse_version(v1)
193
194
195
@pytest.mark.slow
196
def test_in_notebook() -> None:
197
# private function, but easier to test this separately and mock it in the callers
198
assert not _in_notebook()
199
200
201
@pytest.mark.parametrize(
202
("percentiles", "expected", "inject_median"),
203
[
204
(None, [0.5], True),
205
(0.2, [0.2, 0.5], True),
206
(0.5, [0.5], True),
207
((0.25, 0.75), [0.25, 0.5, 0.75], True),
208
# Undocumented effect - percentiles get sorted.
209
# Can be changed, this serves as documentation of current behaviour.
210
((0.6, 0.3), [0.3, 0.5, 0.6], True),
211
(None, [], False),
212
(0.2, [0.2], False),
213
(0.5, [0.5], False),
214
((0.25, 0.75), [0.25, 0.75], False),
215
((0.6, 0.3), [0.3, 0.6], False),
216
],
217
)
218
def test_parse_percentiles(
219
percentiles: Sequence[float] | float | None,
220
expected: Sequence[float],
221
inject_median: bool,
222
) -> None:
223
assert parse_percentiles(percentiles, inject_median=inject_median) == expected
224
225
226
@pytest.mark.parametrize(("percentiles"), [(1.1), ([-0.1])])
227
def test_parse_percentiles_errors(percentiles: Sequence[float] | float | None) -> None:
228
with pytest.raises(ValueError):
229
parse_percentiles(percentiles)
230
231
232
@pytest.mark.parametrize(
233
("sequence", "include_series", "expected"),
234
[
235
(pl.Series(["xx", "yy"]), True, False),
236
(pl.Series([True, False]), False, False),
237
(pl.Series([True, False]), True, True),
238
(np.array([False, True]), False, True),
239
(np.array([False, True]), True, True),
240
([True, False], False, True),
241
(["xx", "yy"], False, False),
242
(True, False, False),
243
],
244
)
245
def test_is_bool_sequence_check(
246
sequence: Any,
247
include_series: bool,
248
expected: bool,
249
) -> None:
250
assert is_bool_sequence(sequence, include_series=include_series) == expected
251
if expected:
252
assert is_sequence(sequence, include_series=include_series)
253
254
255
@pytest.mark.parametrize(
256
("sequence", "include_series", "expected"),
257
[
258
(pl.Series(["xx", "yy"]), True, False),
259
(pl.Series([123, 345]), False, False),
260
(pl.Series([123, 345]), True, True),
261
(np.array([123, 345]), False, True),
262
(np.array([123, 345]), True, True),
263
(["xx", "yy"], False, False),
264
([123, 456], False, True),
265
(123, False, False),
266
],
267
)
268
def test_is_int_sequence_check(
269
sequence: Any,
270
include_series: bool,
271
expected: bool,
272
) -> None:
273
assert is_int_sequence(sequence, include_series=include_series) == expected
274
if expected:
275
assert is_sequence(sequence, include_series=include_series)
276
277
278
@pytest.mark.parametrize(
279
("sequence", "include_series", "expected"),
280
[
281
(pl.Series(["xx", "yy"]), False, False),
282
(pl.Series(["xx", "yy"]), True, True),
283
(pl.Series([123, 345]), True, False),
284
(np.array(["xx", "yy"]), False, True),
285
(np.array(["xx", "yy"]), True, True),
286
(["xx", "yy"], False, True),
287
([123, 456], False, False),
288
("xx", False, False),
289
],
290
)
291
def test_is_str_sequence_check(
292
sequence: Any,
293
include_series: bool,
294
expected: bool,
295
) -> None:
296
assert is_str_sequence(sequence, include_series=include_series) == expected
297
if expected:
298
assert is_sequence(sequence, include_series=include_series)
299
300