Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/temporal/test_round.py
6940 views
1
from __future__ import annotations
2
3
from datetime import date, datetime, timedelta
4
from typing import TYPE_CHECKING
5
from zoneinfo import ZoneInfo
6
7
import hypothesis.strategies as st
8
import pytest
9
from hypothesis import given
10
11
import polars as pl
12
from polars._utils.convert import parse_as_duration_string
13
from polars.testing import assert_series_equal
14
15
if TYPE_CHECKING:
16
from polars.type_aliases import TimeUnit
17
18
19
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])
20
def test_round_by_day_datetime(time_zone: str | None) -> None:
21
ser = pl.Series([datetime(2021, 11, 7, 3)]).dt.replace_time_zone(time_zone)
22
result = ser.dt.round("1d")
23
expected = pl.Series([datetime(2021, 11, 7)]).dt.replace_time_zone(time_zone)
24
assert_series_equal(result, expected)
25
26
27
def test_round_ambiguous() -> None:
28
t = (
29
pl.datetime_range(
30
date(2020, 10, 25),
31
datetime(2020, 10, 25, 2),
32
"30m",
33
eager=True,
34
time_zone="Europe/London",
35
)
36
.alias("datetime")
37
.dt.offset_by("15m")
38
)
39
result = t.dt.round("30m")
40
expected = (
41
pl.Series(
42
[
43
"2020-10-25T00:30:00+0100",
44
"2020-10-25T01:00:00+0100",
45
"2020-10-25T01:30:00+0100",
46
"2020-10-25T01:00:00+0000",
47
"2020-10-25T01:30:00+0000",
48
"2020-10-25T02:00:00+0000",
49
"2020-10-25T02:30:00+0000",
50
]
51
)
52
.str.to_datetime()
53
.dt.convert_time_zone("Europe/London")
54
.rename("datetime")
55
)
56
assert_series_equal(result, expected)
57
58
df = pl.DataFrame(
59
{
60
"date": pl.datetime_range(
61
date(2020, 10, 25),
62
datetime(2020, 10, 25, 2),
63
"30m",
64
eager=True,
65
time_zone="Europe/London",
66
).dt.offset_by("15m")
67
}
68
)
69
70
df = df.select(pl.col("date").dt.round("30m"))
71
assert df.to_dict(as_series=False) == {
72
"date": [
73
datetime(2020, 10, 25, 0, 30, tzinfo=ZoneInfo("Europe/London")),
74
datetime(2020, 10, 25, 1, tzinfo=ZoneInfo("Europe/London")),
75
datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo("Europe/London")),
76
datetime(2020, 10, 25, 1, tzinfo=ZoneInfo("Europe/London")),
77
datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo("Europe/London")),
78
datetime(2020, 10, 25, 2, tzinfo=ZoneInfo("Europe/London")),
79
datetime(2020, 10, 25, 2, 30, tzinfo=ZoneInfo("Europe/London")),
80
]
81
}
82
83
84
def test_round_by_week() -> None:
85
df = pl.DataFrame(
86
{
87
"date": pl.Series(
88
[
89
# Sunday and Monday
90
"1998-04-12",
91
"2022-11-28",
92
]
93
).str.strptime(pl.Date, "%Y-%m-%d")
94
}
95
)
96
97
assert (
98
df.select(
99
pl.col("date").dt.round("7d").alias("7d"),
100
pl.col("date").dt.round("1w").alias("1w"),
101
)
102
).to_dict(as_series=False) == {
103
"7d": [date(1998, 4, 9), date(2022, 12, 1)],
104
"1w": [date(1998, 4, 13), date(2022, 11, 28)],
105
}
106
107
108
@given(
109
datetimes=st.lists(
110
st.datetimes(min_value=datetime(1960, 1, 1), max_value=datetime(1980, 1, 1)),
111
min_size=1,
112
max_size=3,
113
),
114
every=st.timedeltas(
115
min_value=timedelta(microseconds=1), max_value=timedelta(days=1)
116
).map(parse_as_duration_string),
117
)
118
def test_dt_round_fast_path_vs_slow_path(datetimes: list[datetime], every: str) -> None:
119
s = pl.Series(datetimes)
120
# Might use fastpath:
121
result = s.dt.round(every)
122
# Definitely uses slowpath:
123
expected = s.dt.round(pl.Series([every] * len(datetimes)))
124
assert_series_equal(result, expected)
125
126
127
def test_round_date() -> None:
128
# n vs n
129
df = pl.DataFrame(
130
{"a": [date(2020, 1, 1), None, date(2020, 1, 19)], "b": [None, "1mo", "1mo"]}
131
)
132
result = df.select(pl.col("a").dt.round(pl.col("b")))["a"]
133
expected = pl.Series("a", [None, None, date(2020, 2, 1)])
134
assert_series_equal(result, expected)
135
136
# n vs 1
137
df = pl.DataFrame(
138
{"a": [date(2020, 1, 1), None, date(2020, 1, 3)], "b": [None, "1mo", "1mo"]}
139
)
140
result = df.select(pl.col("a").dt.round("1mo"))["a"]
141
expected = pl.Series("a", [date(2020, 1, 1), None, date(2020, 1, 1)])
142
assert_series_equal(result, expected)
143
144
# n vs missing
145
df = pl.DataFrame(
146
{"a": [date(2020, 1, 1), None, date(2020, 1, 3)], "b": [None, "1mo", "1mo"]}
147
)
148
result = df.select(pl.col("a").dt.round(pl.lit(None, dtype=pl.String)))["a"]
149
expected = pl.Series("a", [None, None, None], dtype=pl.Date)
150
assert_series_equal(result, expected)
151
152
# 1 vs n
153
df = pl.DataFrame(
154
{"a": [date(2020, 1, 1), None, date(2020, 1, 3)], "b": [None, "1mo", "1mo"]}
155
)
156
result = df.select(a=pl.date(2020, 1, 1).dt.round(pl.col("b")))["a"]
157
expected = pl.Series("a", [None, date(2020, 1, 1), date(2020, 1, 1)])
158
assert_series_equal(result, expected)
159
160
# missing vs n
161
df = pl.DataFrame(
162
{"a": [date(2020, 1, 1), None, date(2020, 1, 3)], "b": [None, "1mo", "1mo"]}
163
)
164
result = df.select(a=pl.lit(None, dtype=pl.Date).dt.round(pl.col("b")))["a"]
165
expected = pl.Series("a", [None, None, None], dtype=pl.Date)
166
assert_series_equal(result, expected)
167
168
169
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
170
def test_round_datetime_simple(time_unit: TimeUnit) -> None:
171
s = pl.Series([datetime(2020, 1, 2, 6)], dtype=pl.Datetime(time_unit))
172
result = s.dt.round("1mo").item()
173
assert result == datetime(2020, 1, 1)
174
result = s.dt.round("1d").item()
175
assert result == datetime(2020, 1, 2)
176
177
178
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
179
def test_round_datetime_w_expression(time_unit: TimeUnit) -> None:
180
df = pl.DataFrame(
181
{"a": [datetime(2020, 1, 2, 6), datetime(2020, 1, 20, 21)], "b": ["1mo", "1d"]},
182
schema_overrides={"a": pl.Datetime(time_unit)},
183
)
184
result = df.select(pl.col("a").dt.round(pl.col("b")))["a"]
185
assert result[0] == datetime(2020, 1, 1)
186
assert result[1] == datetime(2020, 1, 21)
187
188
189
@pytest.mark.parametrize(
190
("time_unit", "expected"),
191
[
192
("ms", 0),
193
("us", 0),
194
("ns", 0),
195
],
196
)
197
def test_round_negative_towards_epoch_18239(time_unit: TimeUnit, expected: int) -> None:
198
s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit))
199
s = s.dt.offset_by(f"-1{time_unit}")
200
result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item()
201
assert result == expected
202
result = (
203
s.dt.replace_time_zone("Europe/London")
204
.dt.round(f"2{time_unit}")
205
.dt.replace_time_zone(None)
206
.dt.timestamp(time_unit="ns")
207
.item()
208
)
209
assert result == expected
210
211
212
@pytest.mark.parametrize(
213
("time_unit", "expected"),
214
[
215
("ms", 2_000_000),
216
("us", 2_000),
217
("ns", 2),
218
],
219
)
220
def test_round_positive_away_from_epoch_18239(
221
time_unit: TimeUnit, expected: int
222
) -> None:
223
s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit))
224
s = s.dt.offset_by(f"1{time_unit}")
225
result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item()
226
assert result == expected
227
result = (
228
s.dt.replace_time_zone("Europe/London")
229
.dt.round(f"2{time_unit}")
230
.dt.replace_time_zone(None)
231
.dt.timestamp(time_unit="ns")
232
.item()
233
)
234
assert result == expected
235
236
237
@pytest.mark.parametrize("as_date", [False, True])
238
def test_round_unequal_length_22018(as_date: bool) -> None:
239
start = datetime(2001, 1, 1)
240
stop = datetime(2001, 1, 1, 1)
241
s = pl.datetime_range(start, stop, "10m", eager=True).alias("datetime")
242
if as_date:
243
s = s.dt.date()
244
245
with pytest.raises(pl.exceptions.ShapeError):
246
s.dt.round(pl.Series(["30m", "20m"]))
247
248
249
def test_round_small() -> None:
250
small = 1.234e-320
251
small_s = pl.Series([small])
252
assert small_s.round().item() == 0.0
253
assert small_s.round(320).item() == 1e-320
254
assert small_s.round(321).item() == 1.2e-320
255
assert small_s.round(322).item() == 1.23e-320
256
assert small_s.round(323).item() == 1.234e-320
257
assert small_s.round(324).item() == small
258
assert small_s.round(1000).item() == small
259
260
assert small_s.round_sig_figs(1).item() == 1e-320
261
assert small_s.round_sig_figs(2).item() == 1.2e-320
262
assert small_s.round_sig_figs(3).item() == 1.23e-320
263
assert small_s.round_sig_figs(4).item() == 1.234e-320
264
assert small_s.round_sig_figs(5).item() == small
265
assert small_s.round_sig_figs(1000).item() == small
266
267
268
def test_round_big() -> None:
269
big = 1.234e308
270
max_err = big / 10**10
271
big_s = pl.Series([big])
272
assert big_s.round().item() == big
273
assert big_s.round(1).item() == big
274
assert big_s.round(100).item() == big
275
276
assert abs(big_s.round_sig_figs(1).item() - 1e308) <= max_err
277
assert abs(big_s.round_sig_figs(2).item() - 1.2e308) <= max_err
278
assert abs(big_s.round_sig_figs(3).item() - 1.23e308) <= max_err
279
assert abs(big_s.round_sig_figs(4).item() - 1.234e308) <= max_err
280
assert abs(big_s.round_sig_figs(4).item() - big) <= max_err
281
assert big_s.round_sig_figs(100).item() == big
282
283