Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/temporal/test_round.py
8424 views
1
from __future__ import annotations
2
3
from datetime import date, datetime, timedelta
4
from typing import TYPE_CHECKING
5
from zoneinfo import ZoneInfo
6
7
import hypothesis.strategies as st
8
import pytest
9
from hypothesis import given
10
11
import polars as pl
12
from polars._utils.convert import parse_as_duration_string
13
from polars.testing import assert_series_equal
14
15
if TYPE_CHECKING:
16
from polars._typing import RoundMode
17
from polars.type_aliases import TimeUnit
18
19
20
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])
21
def test_round_by_day_datetime(time_zone: str | None) -> None:
22
ser = pl.Series([datetime(2021, 11, 7, 3)]).dt.replace_time_zone(time_zone)
23
result = ser.dt.round("1d")
24
expected = pl.Series([datetime(2021, 11, 7)]).dt.replace_time_zone(time_zone)
25
assert_series_equal(result, expected)
26
27
28
def test_round_ambiguous() -> None:
29
t = (
30
pl.datetime_range(
31
date(2020, 10, 25),
32
datetime(2020, 10, 25, 2),
33
"30m",
34
eager=True,
35
time_zone="Europe/London",
36
)
37
.alias("datetime")
38
.dt.offset_by("15m")
39
)
40
result = t.dt.round("30m")
41
expected = (
42
pl.Series(
43
[
44
"2020-10-25T00:30:00+0100",
45
"2020-10-25T01:00:00+0100",
46
"2020-10-25T01:30:00+0100",
47
"2020-10-25T01:00:00+0000",
48
"2020-10-25T01:30:00+0000",
49
"2020-10-25T02:00:00+0000",
50
"2020-10-25T02:30:00+0000",
51
]
52
)
53
.str.to_datetime()
54
.dt.convert_time_zone("Europe/London")
55
.rename("datetime")
56
)
57
assert_series_equal(result, expected)
58
59
df = pl.DataFrame(
60
{
61
"date": pl.datetime_range(
62
date(2020, 10, 25),
63
datetime(2020, 10, 25, 2),
64
"30m",
65
eager=True,
66
time_zone="Europe/London",
67
).dt.offset_by("15m")
68
}
69
)
70
71
df = df.select(pl.col("date").dt.round("30m"))
72
assert df.to_dict(as_series=False) == {
73
"date": [
74
datetime(2020, 10, 25, 0, 30, tzinfo=ZoneInfo("Europe/London")),
75
datetime(2020, 10, 25, 1, tzinfo=ZoneInfo("Europe/London")),
76
datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo("Europe/London")),
77
datetime(2020, 10, 25, 1, tzinfo=ZoneInfo("Europe/London")),
78
datetime(2020, 10, 25, 1, 30, tzinfo=ZoneInfo("Europe/London")),
79
datetime(2020, 10, 25, 2, tzinfo=ZoneInfo("Europe/London")),
80
datetime(2020, 10, 25, 2, 30, tzinfo=ZoneInfo("Europe/London")),
81
]
82
}
83
84
85
def test_round_by_week() -> None:
86
df = pl.DataFrame(
87
{
88
"date": pl.Series(
89
[
90
# Sunday and Monday
91
"1998-04-12",
92
"2022-11-28",
93
]
94
).str.strptime(pl.Date, "%Y-%m-%d")
95
}
96
)
97
98
assert (
99
df.select(
100
pl.col("date").dt.round("7d").alias("7d"),
101
pl.col("date").dt.round("1w").alias("1w"),
102
)
103
).to_dict(as_series=False) == {
104
"7d": [date(1998, 4, 9), date(2022, 12, 1)],
105
"1w": [date(1998, 4, 13), date(2022, 11, 28)],
106
}
107
108
109
@given(
110
datetimes=st.lists(
111
st.datetimes(min_value=datetime(1960, 1, 1), max_value=datetime(1980, 1, 1)),
112
min_size=1,
113
max_size=3,
114
),
115
every=st.timedeltas(
116
min_value=timedelta(microseconds=1), max_value=timedelta(days=1)
117
).map(parse_as_duration_string),
118
)
119
def test_dt_round_fast_path_vs_slow_path(datetimes: list[datetime], every: str) -> None:
120
s = pl.Series(datetimes)
121
# Might use fastpath:
122
result = s.dt.round(every)
123
# Definitely uses slowpath:
124
expected = s.dt.round(pl.Series([every] * len(datetimes)))
125
assert_series_equal(result, expected)
126
127
128
def test_round_date() -> None:
129
# n vs n
130
df = pl.DataFrame(
131
{"a": [date(2020, 1, 1), None, date(2020, 1, 19)], "b": [None, "1mo", "1mo"]}
132
)
133
result = df.select(pl.col("a").dt.round(pl.col("b")))["a"]
134
expected = pl.Series("a", [None, None, date(2020, 2, 1)])
135
assert_series_equal(result, expected)
136
137
# n vs 1
138
df = pl.DataFrame(
139
{"a": [date(2020, 1, 1), None, date(2020, 1, 3)], "b": [None, "1mo", "1mo"]}
140
)
141
result = df.select(pl.col("a").dt.round("1mo"))["a"]
142
expected = pl.Series("a", [date(2020, 1, 1), None, date(2020, 1, 1)])
143
assert_series_equal(result, expected)
144
145
# n vs missing
146
df = pl.DataFrame(
147
{"a": [date(2020, 1, 1), None, date(2020, 1, 3)], "b": [None, "1mo", "1mo"]}
148
)
149
result = df.select(pl.col("a").dt.round(pl.lit(None, dtype=pl.String)))["a"]
150
expected = pl.Series("a", [None, None, None], dtype=pl.Date)
151
assert_series_equal(result, expected)
152
153
# 1 vs n
154
df = pl.DataFrame(
155
{"a": [date(2020, 1, 1), None, date(2020, 1, 3)], "b": [None, "1mo", "1mo"]}
156
)
157
result = df.select(a=pl.date(2020, 1, 1).dt.round(pl.col("b")))["a"]
158
expected = pl.Series("a", [None, date(2020, 1, 1), date(2020, 1, 1)])
159
assert_series_equal(result, expected)
160
161
# missing vs n
162
df = pl.DataFrame(
163
{"a": [date(2020, 1, 1), None, date(2020, 1, 3)], "b": [None, "1mo", "1mo"]}
164
)
165
result = df.select(a=pl.lit(None, dtype=pl.Date).dt.round(pl.col("b")))["a"]
166
expected = pl.Series("a", [None, None, None], dtype=pl.Date)
167
assert_series_equal(result, expected)
168
169
170
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
171
def test_round_datetime_simple(time_unit: TimeUnit) -> None:
172
s = pl.Series([datetime(2020, 1, 2, 6)], dtype=pl.Datetime(time_unit))
173
result = s.dt.round("1mo").item()
174
assert result == datetime(2020, 1, 1)
175
result = s.dt.round("1d").item()
176
assert result == datetime(2020, 1, 2)
177
178
179
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
180
def test_round_datetime_w_expression(time_unit: TimeUnit) -> None:
181
df = pl.DataFrame(
182
{"a": [datetime(2020, 1, 2, 6), datetime(2020, 1, 20, 21)], "b": ["1mo", "1d"]},
183
schema_overrides={"a": pl.Datetime(time_unit)},
184
)
185
result = df.select(pl.col("a").dt.round(pl.col("b")))["a"]
186
assert result[0] == datetime(2020, 1, 1)
187
assert result[1] == datetime(2020, 1, 21)
188
189
190
@pytest.mark.parametrize(
191
("time_unit", "expected"),
192
[
193
("ms", 0),
194
("us", 0),
195
("ns", 0),
196
],
197
)
198
def test_round_negative_towards_epoch_18239(time_unit: TimeUnit, expected: int) -> None:
199
s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit))
200
s = s.dt.offset_by(f"-1{time_unit}")
201
result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item()
202
assert result == expected
203
result = (
204
s.dt.replace_time_zone("Europe/London")
205
.dt.round(f"2{time_unit}")
206
.dt.replace_time_zone(None)
207
.dt.timestamp(time_unit="ns")
208
.item()
209
)
210
assert result == expected
211
212
213
@pytest.mark.parametrize(
214
("time_unit", "expected"),
215
[
216
("ms", 2_000_000),
217
("us", 2_000),
218
("ns", 2),
219
],
220
)
221
def test_round_positive_away_from_epoch_18239(
222
time_unit: TimeUnit, expected: int
223
) -> None:
224
s = pl.Series([datetime(1970, 1, 1)], dtype=pl.Datetime(time_unit))
225
s = s.dt.offset_by(f"1{time_unit}")
226
result = s.dt.round(f"2{time_unit}").dt.timestamp(time_unit="ns").item()
227
assert result == expected
228
result = (
229
s.dt.replace_time_zone("Europe/London")
230
.dt.round(f"2{time_unit}")
231
.dt.replace_time_zone(None)
232
.dt.timestamp(time_unit="ns")
233
.item()
234
)
235
assert result == expected
236
237
238
@pytest.mark.parametrize("as_date", [False, True])
239
def test_round_unequal_length_22018(as_date: bool) -> None:
240
start = datetime(2001, 1, 1)
241
stop = datetime(2001, 1, 1, 1)
242
s = pl.datetime_range(start, stop, "10m", eager=True).alias("datetime")
243
if as_date:
244
s = s.dt.date()
245
246
with pytest.raises(pl.exceptions.ShapeError):
247
s.dt.round(pl.Series(["30m", "20m"]))
248
249
250
@pytest.mark.parametrize("mode", ["half_to_even", "half_away_from_zero"])
251
def test_round_small(mode: RoundMode) -> None:
252
small = 1.234e-320
253
small_s = pl.Series([small])
254
assert small_s.round(mode=mode).item() == 0.0
255
assert small_s.round(320, mode=mode).item() == 1e-320
256
assert small_s.round(321, mode=mode).item() == 1.2e-320
257
assert small_s.round(322, mode=mode).item() == 1.23e-320
258
assert small_s.round(323, mode=mode).item() == 1.234e-320
259
assert small_s.round(324, mode=mode).item() == small
260
assert small_s.round(1000, mode=mode).item() == small
261
262
assert small_s.round_sig_figs(1).item() == 1e-320
263
assert small_s.round_sig_figs(2).item() == 1.2e-320
264
assert small_s.round_sig_figs(3).item() == 1.23e-320
265
assert small_s.round_sig_figs(4).item() == 1.234e-320
266
assert small_s.round_sig_figs(5).item() == small
267
assert small_s.round_sig_figs(1000).item() == small
268
269
270
@pytest.mark.parametrize("mode", ["half_to_even", "half_away_from_zero"])
271
def test_round_big(mode: RoundMode) -> None:
272
big = 1.234e308
273
max_err = big / 10**10
274
big_s = pl.Series([big])
275
assert big_s.round(mode=mode).item() == big
276
assert big_s.round(1, mode=mode).item() == big
277
assert big_s.round(100, mode=mode).item() == big
278
279
assert abs(big_s.round_sig_figs(1).item() - 1e308) <= max_err
280
assert abs(big_s.round_sig_figs(2).item() - 1.2e308) <= max_err
281
assert abs(big_s.round_sig_figs(3).item() - 1.23e308) <= max_err
282
assert abs(big_s.round_sig_figs(4).item() - 1.234e308) <= max_err
283
assert abs(big_s.round_sig_figs(4).item() - big) <= max_err
284
assert big_s.round_sig_figs(100).item() == big
285
286