Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/functions/range/test_datetime_range.py
8395 views
1
from __future__ import annotations
2
3
from datetime import date, datetime, timedelta
4
from typing import TYPE_CHECKING
5
from zoneinfo import ZoneInfo
6
7
import hypothesis.strategies as st
8
import pytest
9
from hypothesis import given, settings
10
11
import polars as pl
12
from polars.datatypes import DTYPE_TEMPORAL_UNITS
13
from polars.exceptions import ComputeError, InvalidOperationError, SchemaError
14
from polars.testing import assert_frame_equal, assert_series_equal
15
16
if TYPE_CHECKING:
17
from polars._typing import ClosedInterval, PolarsDataType, TimeUnit
18
19
20
def test_datetime_range() -> None:
21
result = pl.datetime_range(
22
date(1985, 1, 1), date(2015, 7, 1), timedelta(days=1, hours=12), eager=True
23
)
24
assert len(result) == 7426
25
assert result.dt[0] == datetime(1985, 1, 1)
26
assert result.dt[1] == datetime(1985, 1, 2, 12, 0)
27
assert result.dt[2] == datetime(1985, 1, 4, 0, 0)
28
assert result.dt[-1] == datetime(2015, 6, 30, 12, 0)
29
30
for time_unit in DTYPE_TEMPORAL_UNITS:
31
rng = pl.datetime_range(
32
datetime(2020, 1, 1),
33
date(2020, 1, 2),
34
"2h",
35
time_unit=time_unit,
36
eager=True,
37
)
38
assert rng.dtype.time_unit == time_unit # type: ignore[attr-defined]
39
assert rng.shape == (13,)
40
assert rng.dt[0] == datetime(2020, 1, 1)
41
assert rng.dt[-1] == datetime(2020, 1, 2)
42
43
result = pl.datetime_range(date(2022, 1, 1), date(2022, 1, 2), "1h30m", eager=True)
44
assert list(result) == [
45
datetime(2022, 1, 1, 0, 0),
46
datetime(2022, 1, 1, 1, 30),
47
datetime(2022, 1, 1, 3, 0),
48
datetime(2022, 1, 1, 4, 30),
49
datetime(2022, 1, 1, 6, 0),
50
datetime(2022, 1, 1, 7, 30),
51
datetime(2022, 1, 1, 9, 0),
52
datetime(2022, 1, 1, 10, 30),
53
datetime(2022, 1, 1, 12, 0),
54
datetime(2022, 1, 1, 13, 30),
55
datetime(2022, 1, 1, 15, 0),
56
datetime(2022, 1, 1, 16, 30),
57
datetime(2022, 1, 1, 18, 0),
58
datetime(2022, 1, 1, 19, 30),
59
datetime(2022, 1, 1, 21, 0),
60
datetime(2022, 1, 1, 22, 30),
61
datetime(2022, 1, 2, 0, 0),
62
]
63
64
result = pl.datetime_range(
65
datetime(2022, 1, 1), datetime(2022, 1, 1, 0, 1), "987456321ns", eager=True
66
)
67
assert len(result) == 61
68
assert result.dtype.time_unit == "ns" # type: ignore[attr-defined]
69
assert result.dt.second()[-1] == 59
70
assert result.cast(pl.String)[-1] == "2022-01-01 00:00:59.247379260"
71
72
73
@pytest.mark.parametrize(
74
("time_unit", "expected_micros"),
75
[
76
("ms", 986000),
77
("us", 986759),
78
("ns", 986759),
79
(None, 986759),
80
],
81
)
82
def test_datetime_range_precision(
83
time_unit: TimeUnit | None, expected_micros: int
84
) -> None:
85
micros = 986759
86
start = datetime(2000, 5, 30, 1, 53, 4, micros)
87
stop = datetime(2000, 5, 31, 1, 53, 4, micros)
88
result = pl.datetime_range(start, stop, time_unit=time_unit, eager=True)
89
expected_start = start.replace(microsecond=expected_micros)
90
expected_stop = stop.replace(microsecond=expected_micros)
91
assert result[0] == expected_start
92
assert result[1] == expected_stop
93
94
95
def test_datetime_range_invalid_time_unit() -> None:
96
with pytest.raises(InvalidOperationError, match="'x' not supported"):
97
pl.datetime_range(
98
start=datetime(2021, 12, 16),
99
end=datetime(2021, 12, 16, 3),
100
interval="1X",
101
eager=True,
102
)
103
104
105
def test_datetime_range_interval_too_small() -> None:
106
# start/end/interval
107
with pytest.raises(
108
InvalidOperationError,
109
match="interval 1ns is too small for time unit μs and was rounded down to zero",
110
):
111
pl.datetime_range(
112
start=datetime(2025, 1, 1),
113
end=datetime(2025, 1, 5),
114
interval="1ns",
115
time_unit="us",
116
eager=True,
117
)
118
119
120
def test_datetime_range_output_ns_due_to_interval() -> None:
121
result = pl.datetime_range(
122
start=datetime(2025, 1, 1),
123
end=datetime(2025, 1, 1, 0, 0, 0, 1),
124
interval="1ns",
125
eager=True,
126
)
127
assert result.len() == 1001
128
assert result.dtype == pl.Datetime(time_unit="ns")
129
130
131
def test_datetime_range_lazy_time_zones() -> None:
132
start = datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))
133
stop = datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu"))
134
result = (
135
pl.DataFrame({"start": [start], "stop": [stop]})
136
.with_columns(
137
pl.datetime_range(
138
start=start,
139
end=stop,
140
interval="678d",
141
eager=False,
142
time_zone="Pacific/Tarawa",
143
)
144
)
145
.lazy()
146
)
147
expected = pl.DataFrame(
148
{
149
"start": [
150
datetime(2020, 1, 1, 00, 00, tzinfo=ZoneInfo(key="Asia/Kathmandu"))
151
],
152
"stop": [
153
datetime(2020, 1, 2, 00, 00, tzinfo=ZoneInfo(key="Asia/Kathmandu"))
154
],
155
"literal": [
156
datetime(2020, 1, 1, 6, 15, tzinfo=ZoneInfo(key="Pacific/Tarawa"))
157
],
158
}
159
).with_columns(pl.col("literal").dt.convert_time_zone("Pacific/Tarawa"))
160
assert_frame_equal(result.collect(), expected)
161
162
163
def test_datetime_range_invalid_time_zone() -> None:
164
with pytest.raises(ComputeError, match="unable to parse time zone: 'foo'"):
165
pl.datetime_range(
166
datetime(2001, 1, 1),
167
datetime(2001, 1, 3),
168
time_zone="foo",
169
eager=True,
170
)
171
172
173
def test_timezone_aware_datetime_range() -> None:
174
low = datetime(2022, 10, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai"))
175
high = datetime(2022, 11, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai"))
176
177
assert pl.datetime_range(
178
low, high, interval=timedelta(days=5), eager=True
179
).to_list() == [
180
datetime(2022, 10, 17, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),
181
datetime(2022, 10, 22, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),
182
datetime(2022, 10, 27, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),
183
datetime(2022, 11, 1, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),
184
datetime(2022, 11, 6, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),
185
datetime(2022, 11, 11, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),
186
datetime(2022, 11, 16, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),
187
]
188
189
with pytest.raises(
190
SchemaError,
191
match="failed to determine supertype",
192
):
193
pl.datetime_range(
194
low,
195
high.replace(tzinfo=None),
196
interval=timedelta(days=5),
197
time_zone="UTC",
198
eager=True,
199
)
200
201
202
def test_tzaware_datetime_range_crossing_dst_hourly() -> None:
203
result = pl.datetime_range(
204
datetime(2021, 11, 7),
205
datetime(2021, 11, 7, 2),
206
"1h",
207
time_zone="America/Chicago",
208
eager=True,
209
)
210
assert result.to_list() == [
211
datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),
212
datetime(2021, 11, 7, 1, 0, tzinfo=ZoneInfo("America/Chicago")),
213
datetime(2021, 11, 7, 1, 0, fold=1, tzinfo=ZoneInfo("America/Chicago")),
214
datetime(2021, 11, 7, 2, 0, tzinfo=ZoneInfo("America/Chicago")),
215
]
216
217
218
def test_tzaware_datetime_range_crossing_dst_daily() -> None:
219
result = pl.datetime_range(
220
datetime(2021, 11, 7),
221
datetime(2021, 11, 11),
222
"2d",
223
time_zone="America/Chicago",
224
eager=True,
225
)
226
assert result.to_list() == [
227
datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),
228
datetime(2021, 11, 9, 0, 0, tzinfo=ZoneInfo("America/Chicago")),
229
datetime(2021, 11, 11, 0, 0, tzinfo=ZoneInfo("America/Chicago")),
230
]
231
232
233
def test_tzaware_datetime_range_crossing_dst_weekly() -> None:
234
result = pl.datetime_range(
235
datetime(2021, 11, 7),
236
datetime(2021, 11, 20),
237
"1w",
238
time_zone="America/Chicago",
239
eager=True,
240
)
241
assert result.to_list() == [
242
datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),
243
datetime(2021, 11, 14, 0, 0, tzinfo=ZoneInfo("America/Chicago")),
244
]
245
246
247
def test_tzaware_datetime_range_crossing_dst_monthly() -> None:
248
result = pl.datetime_range(
249
datetime(2021, 11, 7),
250
datetime(2021, 12, 20),
251
"1mo",
252
time_zone="America/Chicago",
253
eager=True,
254
)
255
assert result.to_list() == [
256
datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),
257
datetime(2021, 12, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),
258
]
259
260
261
def test_datetime_range_with_unsupported_datetimes() -> None:
262
with pytest.raises(
263
ComputeError,
264
match=r"datetime '2021-11-07 01:00:00' is ambiguous in time zone 'America/Chicago'",
265
):
266
pl.datetime_range(
267
datetime(2021, 11, 7, 1),
268
datetime(2021, 11, 7, 2),
269
"1h",
270
time_zone="America/Chicago",
271
eager=True,
272
)
273
with pytest.raises(
274
ComputeError,
275
match=r"datetime '2021-03-28 02:30:00' is non-existent in time zone 'Europe/Vienna'",
276
):
277
pl.datetime_range(
278
datetime(2021, 3, 28, 2, 30),
279
datetime(2021, 3, 28, 4),
280
"1h",
281
time_zone="Europe/Vienna",
282
eager=True,
283
)
284
285
286
@pytest.mark.parametrize(
287
("closed", "expected_values"),
288
[
289
("right", [datetime(2020, 2, 29), datetime(2020, 3, 31)]),
290
("left", [datetime(2020, 1, 31), datetime(2020, 2, 29)]),
291
("none", [datetime(2020, 2, 29)]),
292
("both", [datetime(2020, 1, 31), datetime(2020, 2, 29), datetime(2020, 3, 31)]),
293
],
294
)
295
def test_datetime_range_end_of_month_5441(
296
closed: ClosedInterval, expected_values: list[datetime]
297
) -> None:
298
start = date(2020, 1, 31)
299
stop = date(2020, 3, 31)
300
result = pl.datetime_range(
301
start=start,
302
end=stop,
303
interval="1mo",
304
closed=closed,
305
eager=True,
306
)
307
expected = pl.Series("literal", expected_values, dtype=pl.Datetime("us"))
308
assert_series_equal(result, expected)
309
310
311
def test_datetime_range_specifying_ambiguous_11713() -> None:
312
result = pl.datetime_range(
313
pl.datetime(2023, 10, 29, 2, 0).dt.replace_time_zone(
314
"Europe/Madrid", ambiguous="earliest"
315
),
316
pl.datetime(2023, 10, 29, 3, 0).dt.replace_time_zone("Europe/Madrid"),
317
"1h",
318
eager=True,
319
)
320
expected = pl.Series(
321
"datetime",
322
[
323
datetime(2023, 10, 29, 2),
324
datetime(2023, 10, 29, 2),
325
datetime(2023, 10, 29, 3),
326
],
327
).dt.replace_time_zone(
328
"Europe/Madrid", ambiguous=pl.Series(["earliest", "latest", "raise"])
329
)
330
assert_series_equal(result, expected)
331
result = pl.datetime_range(
332
pl.datetime(2023, 10, 29, 2, 0).dt.replace_time_zone(
333
"Europe/Madrid", ambiguous="latest"
334
),
335
pl.datetime(2023, 10, 29, 3, 0).dt.replace_time_zone("Europe/Madrid"),
336
"1h",
337
eager=True,
338
)
339
expected = pl.Series(
340
"datetime", [datetime(2023, 10, 29, 2), datetime(2023, 10, 29, 3)]
341
).dt.replace_time_zone("Europe/Madrid", ambiguous=pl.Series(["latest", "raise"]))
342
assert_series_equal(result, expected)
343
344
345
@given(
346
closed=st.sampled_from(["none", "left", "right", "both"]),
347
time_unit=st.sampled_from(["ms", "us", "ns"]),
348
n=st.integers(1, 10),
349
size=st.integers(8, 10),
350
unit=st.sampled_from(["s", "m", "h", "d", "mo"]),
351
start=st.datetimes(datetime(1965, 1, 1), datetime(2100, 1, 1)),
352
)
353
@settings(max_examples=50)
354
@pytest.mark.benchmark
355
def test_datetime_range_fast_slow_paths(
356
closed: ClosedInterval,
357
time_unit: TimeUnit,
358
n: int,
359
size: int,
360
unit: str,
361
start: datetime,
362
) -> None:
363
end = pl.select(pl.lit(start).dt.offset_by(f"{n * size}{unit}")).item()
364
result_slow = pl.datetime_range(
365
start,
366
end,
367
closed=closed,
368
time_unit=time_unit,
369
interval=f"{n}{unit}",
370
time_zone="Asia/Kathmandu",
371
eager=True,
372
).dt.replace_time_zone(None)
373
result_fast = pl.datetime_range(
374
start,
375
end,
376
closed=closed,
377
time_unit=time_unit,
378
interval=f"{n}{unit}",
379
eager=True,
380
)
381
assert_series_equal(result_slow, result_fast)
382
383
384
def test_dt_range_with_nanosecond_interval_19931() -> None:
385
with pytest.raises(
386
InvalidOperationError, match="interval 1ns is too small for time unit ms"
387
):
388
pl.datetime_range(
389
pl.date(2022, 1, 1),
390
pl.date(2022, 1, 1),
391
time_zone="Asia/Kathmandu",
392
interval="1ns",
393
time_unit="ms",
394
eager=True,
395
)
396
397
398
def test_datetime_range_with_nanoseconds_overflow_15735() -> None:
399
s = pl.datetime_range(date(2000, 1, 1), date(2300, 1, 1), "24h", eager=True)
400
assert s.dtype == pl.Datetime("us")
401
assert s.shape == (109574,)
402
403
404
# Helper function to generate output Series with expected dtype.
405
def to_expected(
406
values: list[date] | list[datetime], dtype: PolarsDataType
407
) -> pl.Series:
408
if dtype == pl.Date:
409
return pl.Series("literal", values, dtype=pl.Datetime("us"))
410
else:
411
if (tz := dtype.time_zone) is not None: # type: ignore[union-attr]
412
return pl.Series(
413
"literal",
414
values,
415
dtype=pl.Datetime(dtype.time_unit), # type: ignore[union-attr]
416
).dt.replace_time_zone(tz)
417
else:
418
return pl.Series("literal", values, dtype=dtype)
419
420
421
# start/end/interval
422
@pytest.mark.parametrize(
423
"dtype",
424
[
425
pl.Date,
426
pl.Datetime("ms"),
427
pl.Datetime("us"),
428
pl.Datetime("ns"),
429
pl.Datetime("ms", time_zone="Asia/Kathmandu"),
430
pl.Datetime("us", time_zone="Asia/Kathmandu"),
431
pl.Datetime("ns", time_zone="Asia/Kathmandu"),
432
],
433
)
434
def test_datetime_range_start_end_interval_forwards(dtype: PolarsDataType) -> None:
435
start = date(2025, 1, 1)
436
end = date(2025, 1, 10)
437
tu: TimeUnit = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[assignment, union-attr]
438
tz: str = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[assignment, union-attr]
439
440
assert_series_equal(
441
pl.datetime_range(
442
start=start,
443
end=end,
444
interval="3d",
445
closed="left",
446
eager=True,
447
time_unit=tu,
448
time_zone=tz,
449
),
450
to_expected([date(2025, 1, 1), date(2025, 1, 4), date(2025, 1, 7)], dtype),
451
)
452
assert_series_equal(
453
pl.datetime_range(
454
start=start,
455
end=end,
456
interval="3d",
457
closed="right",
458
eager=True,
459
time_unit=tu,
460
time_zone=tz,
461
),
462
to_expected([date(2025, 1, 4), date(2025, 1, 7), date(2025, 1, 10)], dtype),
463
)
464
assert_series_equal(
465
pl.datetime_range(
466
start=start,
467
end=end,
468
interval="3d",
469
closed="none",
470
eager=True,
471
time_unit=tu,
472
time_zone=tz,
473
),
474
to_expected([date(2025, 1, 4), date(2025, 1, 7)], dtype),
475
)
476
assert_series_equal(
477
pl.datetime_range(
478
start=start,
479
end=end,
480
interval="3d",
481
closed="both",
482
eager=True,
483
time_unit=tu,
484
time_zone=tz,
485
),
486
to_expected(
487
[date(2025, 1, 1), date(2025, 1, 4), date(2025, 1, 7), date(2025, 1, 10)],
488
dtype,
489
),
490
)
491
# test wrong direction is empty
492
assert_series_equal(
493
pl.datetime_range(
494
start=end,
495
end=start,
496
interval="3d",
497
eager=True,
498
time_unit=tu,
499
time_zone=tz,
500
),
501
to_expected([], dtype),
502
)
503
504
505
@pytest.mark.parametrize(
506
"dtype",
507
[
508
pl.Date,
509
pl.Datetime("ms"),
510
pl.Datetime("us"),
511
pl.Datetime("ns"),
512
pl.Datetime("ms", time_zone="Asia/Kathmandu"),
513
pl.Datetime("us", time_zone="Asia/Kathmandu"),
514
pl.Datetime("ns", time_zone="Asia/Kathmandu"),
515
],
516
)
517
def test_datetime_range_expr_scalar(dtype: PolarsDataType) -> None:
518
df = pl.DataFrame(
519
{
520
"a": [date(2025, 1, 3), date(2025, 1, 1)],
521
"interval": ["1d", "2d"],
522
}
523
)
524
tu: TimeUnit = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[assignment, union-attr]
525
tz: str = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[assignment, union-attr]
526
result = df.select(
527
forward_start_end_interval=pl.datetime_range(
528
start=pl.col("a").min(),
529
end=pl.col("a").max(),
530
interval="1d",
531
time_unit=tu,
532
time_zone=tz,
533
),
534
)
535
forward = [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)]
536
expected = pl.DataFrame(
537
{
538
"forward_start_end_interval": to_expected(forward, dtype=dtype),
539
}
540
)
541
assert_frame_equal(result, expected)
542
543