Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/functions/range/test_datetime_ranges.py
7884 views
1
from __future__ import annotations
2
3
from datetime import date, datetime
4
from typing import TYPE_CHECKING
5
from zoneinfo import ZoneInfo
6
7
import pytest
8
9
import polars as pl
10
from polars.testing import assert_frame_equal, assert_series_equal
11
12
if TYPE_CHECKING:
13
from polars._typing import ClosedInterval, PolarsDataType, TimeUnit
14
15
16
@pytest.mark.parametrize("low", ["start", pl.col("start")])
17
@pytest.mark.parametrize("high", ["stop", pl.col("stop")])
18
def test_datetime_ranges_lazy_with_expressions(
19
low: str | pl.Expr, high: str | pl.Expr
20
) -> None:
21
df = pl.DataFrame(
22
{
23
"start": [datetime(2000, 1, 1), datetime(2022, 6, 1)],
24
"stop": [datetime(2000, 1, 2), datetime(2022, 6, 2)],
25
}
26
)
27
28
result_df = df.with_columns(
29
pl.datetime_ranges(start=low, end=high, interval="1d").alias("dts")
30
)
31
32
assert result_df.to_dict(as_series=False) == {
33
"start": [datetime(2000, 1, 1, 0, 0), datetime(2022, 6, 1, 0, 0)],
34
"stop": [datetime(2000, 1, 2, 0, 0), datetime(2022, 6, 2, 0, 0)],
35
"dts": [
36
[datetime(2000, 1, 1, 0, 0), datetime(2000, 1, 2, 0, 0)],
37
[datetime(2022, 6, 1, 0, 0), datetime(2022, 6, 2, 0, 0)],
38
],
39
}
40
41
42
@pytest.mark.parametrize(
43
("values_time_zone", "input_time_zone", "output_time_zone"),
44
[
45
("Asia/Kathmandu", "Asia/Kathmandu", "Asia/Kathmandu"),
46
("Asia/Kathmandu", None, "Asia/Kathmandu"),
47
(None, "Asia/Kathmandu", "Asia/Kathmandu"),
48
(None, None, None),
49
],
50
)
51
@pytest.mark.parametrize(
52
("values_time_unit", "input_time_unit", "output_time_unit"),
53
[
54
("ms", None, "ms"),
55
("us", None, "us"),
56
("ns", None, "ns"),
57
("ms", "ms", "ms"),
58
("us", "ms", "ms"),
59
("ns", "ms", "ms"),
60
("ms", "us", "us"),
61
("us", "us", "us"),
62
("ns", "us", "us"),
63
("ms", "ns", "ns"),
64
("us", "ns", "ns"),
65
("ns", "ns", "ns"),
66
],
67
)
68
def test_datetime_ranges_schema(
69
values_time_zone: str | None,
70
input_time_zone: str | None,
71
output_time_zone: str | None,
72
values_time_unit: TimeUnit,
73
input_time_unit: TimeUnit | None,
74
output_time_unit: TimeUnit,
75
) -> None:
76
df = (
77
pl.DataFrame({"start": [datetime(2020, 1, 1)], "end": [datetime(2020, 1, 2)]})
78
.with_columns(
79
pl.col("*")
80
.dt.replace_time_zone(values_time_zone)
81
.dt.cast_time_unit(values_time_unit)
82
)
83
.lazy()
84
)
85
result = df.with_columns(
86
datetime_range=pl.datetime_ranges(
87
pl.col("start"),
88
pl.col("end"),
89
time_zone=input_time_zone,
90
time_unit=input_time_unit,
91
)
92
)
93
expected_schema = {
94
"start": pl.Datetime(time_unit=values_time_unit, time_zone=values_time_zone),
95
"end": pl.Datetime(time_unit=values_time_unit, time_zone=values_time_zone),
96
"datetime_range": pl.List(
97
pl.Datetime(time_unit=output_time_unit, time_zone=output_time_zone)
98
),
99
}
100
assert result.collect_schema() == expected_schema
101
assert result.collect().schema == expected_schema
102
103
expected = pl.DataFrame(
104
{
105
"start": [datetime(2020, 1, 1)],
106
"end": [datetime(2020, 1, 2)],
107
"datetime_range": [[datetime(2020, 1, 1), datetime(2020, 1, 2)]],
108
}
109
).with_columns(
110
pl.col("start")
111
.dt.replace_time_zone(values_time_zone)
112
.dt.cast_time_unit(values_time_unit),
113
pl.col("end")
114
.dt.replace_time_zone(values_time_zone)
115
.dt.cast_time_unit(values_time_unit),
116
pl.col("datetime_range")
117
.explode()
118
.dt.replace_time_zone(output_time_zone)
119
.dt.cast_time_unit(output_time_unit)
120
.implode(),
121
)
122
assert_frame_equal(result.collect(), expected)
123
124
125
@pytest.mark.parametrize(
126
(
127
"input_time_unit",
128
"input_time_zone",
129
"output_dtype",
130
"interval",
131
"expected_datetime_range",
132
),
133
[
134
(None, None, pl.Datetime("us"), "1s1d", ["2020-01-01", "2020-01-02 00:00:01"]),
135
(None, None, pl.Datetime("us"), "1d1s", ["2020-01-01", "2020-01-02 00:00:01"]),
136
(
137
None,
138
None,
139
pl.Datetime("ns"),
140
"1d1ns",
141
["2020-01-01", "2020-01-02 00:00:00.000000001"],
142
),
143
("ms", None, pl.Datetime("ms"), "1s1d", ["2020-01-01", "2020-01-02 00:00:01"]),
144
("ms", None, pl.Datetime("ms"), "1d1s", ["2020-01-01", "2020-01-02 00:00:01"]),
145
(
146
None,
147
"Asia/Kathmandu",
148
pl.Datetime("us", "Asia/Kathmandu"),
149
"1s1d",
150
["2020-01-01", "2020-01-02 00:00:01"],
151
),
152
(
153
None,
154
"Asia/Kathmandu",
155
pl.Datetime("us", "Asia/Kathmandu"),
156
"1d1s",
157
["2020-01-01", "2020-01-02 00:00:01"],
158
),
159
(
160
None,
161
"Asia/Kathmandu",
162
pl.Datetime("ns", "Asia/Kathmandu"),
163
"1d1ns",
164
["2020-01-01", "2020-01-02 00:00:00.000000001"],
165
),
166
(
167
"ms",
168
"Asia/Kathmandu",
169
pl.Datetime("ms", "Asia/Kathmandu"),
170
"1s1d",
171
["2020-01-01", "2020-01-02 00:00:01"],
172
),
173
(
174
"ms",
175
"Asia/Kathmandu",
176
pl.Datetime("ms", "Asia/Kathmandu"),
177
"1d1s",
178
["2020-01-01", "2020-01-02 00:00:01"],
179
),
180
],
181
)
182
def test_datetime_ranges_schema_upcasts_to_datetime(
183
input_time_unit: TimeUnit | None,
184
input_time_zone: str | None,
185
output_dtype: PolarsDataType,
186
interval: str,
187
expected_datetime_range: list[str],
188
) -> None:
189
df = pl.DataFrame({"start": [date(2020, 1, 1)], "end": [date(2020, 1, 3)]}).lazy()
190
result = df.with_columns(
191
datetime_range=pl.datetime_ranges(
192
start=pl.col("start"),
193
end=pl.col("end"),
194
interval=interval,
195
time_unit=input_time_unit,
196
time_zone=input_time_zone,
197
)
198
)
199
expected_schema = {
200
"start": pl.Date,
201
"end": pl.Date,
202
"datetime_range": pl.List(output_dtype),
203
}
204
assert result.collect_schema() == expected_schema
205
assert result.collect().schema == expected_schema
206
207
expected = pl.DataFrame(
208
{
209
"start": [date(2020, 1, 1)],
210
"end": [date(2020, 1, 3)],
211
"datetime_range": pl.Series(expected_datetime_range)
212
.str.to_datetime(time_unit="ns")
213
.implode(),
214
}
215
).with_columns(
216
pl.col("datetime_range")
217
.explode()
218
.dt.cast_time_unit(output_dtype.time_unit) # type: ignore[union-attr]
219
.dt.replace_time_zone(output_dtype.time_zone) # type: ignore[union-attr]
220
.implode(),
221
)
222
assert_frame_equal(result.collect(), expected)
223
224
# check datetime_range too
225
result_single = pl.datetime_range(
226
date(2020, 1, 1),
227
date(2020, 1, 3),
228
interval=interval,
229
time_unit=input_time_unit,
230
time_zone=input_time_zone,
231
eager=True,
232
).alias("datetime")
233
assert_series_equal(
234
result_single, expected["datetime_range"].explode().rename("datetime")
235
)
236
237
238
def test_datetime_ranges_no_alias_schema_9037() -> None:
239
df = pl.DataFrame(
240
{"start": [datetime(2020, 1, 1)], "end": [datetime(2020, 1, 2)]}
241
).lazy()
242
result = df.with_columns(pl.datetime_ranges(pl.col("start"), pl.col("end")))
243
expected_schema = {
244
"start": pl.List(pl.Datetime(time_unit="us", time_zone=None)),
245
"end": pl.Datetime(time_unit="us", time_zone=None),
246
}
247
assert result.collect_schema() == expected_schema
248
assert result.collect().schema == expected_schema
249
250
251
def test_datetime_ranges_broadcasting() -> None:
252
df = pl.DataFrame(
253
{
254
"datetimes": [
255
datetime(2021, 1, 1),
256
datetime(2021, 1, 2),
257
datetime(2021, 1, 3),
258
]
259
}
260
)
261
result = df.select(
262
pl.datetime_ranges(start="datetimes", end=datetime(2021, 1, 3)).alias("end"),
263
pl.datetime_ranges(start=datetime(2021, 1, 1), end="datetimes").alias("start"),
264
)
265
expected = pl.DataFrame(
266
{
267
"end": [
268
[datetime(2021, 1, 1), datetime(2021, 1, 2), datetime(2021, 1, 3)],
269
[datetime(2021, 1, 2), datetime(2021, 1, 3)],
270
[datetime(2021, 1, 3)],
271
],
272
"start": [
273
[datetime(2021, 1, 1)],
274
[datetime(2021, 1, 1), datetime(2021, 1, 2)],
275
[datetime(2021, 1, 1), datetime(2021, 1, 2), datetime(2021, 1, 3)],
276
],
277
}
278
)
279
assert_frame_equal(result, expected)
280
281
282
@pytest.mark.parametrize(
283
("closed", "expected"),
284
[
285
(
286
"both",
287
[
288
[
289
datetime(2025, 1, 1),
290
datetime(2025, 1, 2, 12),
291
datetime(2025, 1, 4),
292
datetime(2025, 1, 5, 12),
293
datetime(2025, 1, 7),
294
datetime(2025, 1, 8, 12),
295
datetime(2025, 1, 10),
296
],
297
[
298
datetime(2025, 1, 8),
299
datetime(2025, 1, 9, 12),
300
datetime(2025, 1, 11),
301
datetime(2025, 1, 12, 12),
302
datetime(2025, 1, 14),
303
datetime(2025, 1, 15, 12),
304
datetime(2025, 1, 17),
305
],
306
],
307
),
308
(
309
"left",
310
[
311
[
312
datetime(2025, 1, 1),
313
datetime(2025, 1, 2, 12),
314
datetime(2025, 1, 4),
315
datetime(2025, 1, 5, 12),
316
datetime(2025, 1, 7),
317
datetime(2025, 1, 8, 12),
318
],
319
[
320
datetime(2025, 1, 8),
321
datetime(2025, 1, 9, 12),
322
datetime(2025, 1, 11),
323
datetime(2025, 1, 12, 12),
324
datetime(2025, 1, 14),
325
datetime(2025, 1, 15, 12),
326
],
327
],
328
),
329
(
330
"right",
331
[
332
[
333
datetime(2025, 1, 2, 12),
334
datetime(2025, 1, 4),
335
datetime(2025, 1, 5, 12),
336
datetime(2025, 1, 7),
337
datetime(2025, 1, 8, 12),
338
datetime(2025, 1, 10),
339
],
340
[
341
datetime(2025, 1, 9, 12),
342
datetime(2025, 1, 11),
343
datetime(2025, 1, 12, 12),
344
datetime(2025, 1, 14),
345
datetime(2025, 1, 15, 12),
346
datetime(2025, 1, 17),
347
],
348
],
349
),
350
(
351
"none",
352
[
353
[
354
datetime(2025, 1, 2, 12),
355
datetime(2025, 1, 4),
356
datetime(2025, 1, 5, 12),
357
datetime(2025, 1, 7),
358
datetime(2025, 1, 8, 12),
359
],
360
[
361
datetime(2025, 1, 9, 12),
362
datetime(2025, 1, 11),
363
datetime(2025, 1, 12, 12),
364
datetime(2025, 1, 14),
365
datetime(2025, 1, 15, 12),
366
],
367
],
368
),
369
],
370
)
371
@pytest.mark.parametrize(
372
"dtype",
373
[
374
pl.Date,
375
pl.Datetime("ms"),
376
pl.Datetime("us"),
377
pl.Datetime("ns"),
378
pl.Datetime("ms", time_zone="Asia/Kathmandu"),
379
pl.Datetime("us", time_zone="Asia/Kathmandu"),
380
pl.Datetime("ns", time_zone="Asia/Kathmandu"),
381
],
382
)
383
def test_datetime_ranges_start_end_interval_forwards(
384
closed: ClosedInterval,
385
expected: list[list[datetime]],
386
dtype: PolarsDataType,
387
) -> None:
388
tu = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[union-attr]
389
tz = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[union-attr]
390
if tz is not None:
391
time_zone = ZoneInfo(tz)
392
expected = [[e.replace(tzinfo=time_zone) for e in x] for x in expected]
393
df = pl.DataFrame(
394
{
395
"start": [date(2025, 1, 1), date(2025, 1, 8)],
396
"end": [date(2025, 1, 10), date(2025, 1, 17)],
397
}
398
)
399
result = df.select(
400
dates=pl.datetime_ranges(
401
start="start",
402
end="end",
403
interval="1d12h",
404
closed=closed,
405
time_unit=tu,
406
time_zone=tz,
407
)
408
)
409
dt_out = pl.List(pl.Datetime("us")) if dtype == pl.Date else pl.List(dtype)
410
s_expected = pl.Series("dates", expected, dtype=dt_out)
411
assert_frame_equal(result, s_expected.to_frame())
412
413
414
def test_datetime_ranges_lit_combinations_start_end_interval() -> None:
415
df = pl.DataFrame(
416
{
417
"start": [date(2025, 1, 1), date(2025, 1, 1)],
418
"end": [date(2025, 1, 3), date(2025, 1, 3)],
419
}
420
)
421
result = df.select(
422
start_lit=pl.datetime_ranges(start=date(2025, 1, 1), end="end", interval="1d"),
423
end_lit=pl.datetime_ranges(start="start", end=date(2025, 1, 3), interval="1d"),
424
)
425
dt = [datetime(2025, 1, 1), datetime(2025, 1, 2), datetime(2025, 1, 3)]
426
s = pl.Series([dt, dt], dtype=pl.List(pl.Datetime("us")))
427
expected = pl.DataFrame(
428
{
429
"start_lit": s,
430
"end_lit": s,
431
}
432
)
433
assert_frame_equal(result, expected)
434
435
436
def test_datetime_ranges_null_lit_combinations_start_end_interval() -> None:
437
df = pl.DataFrame(
438
{
439
"start": [date(2025, 1, 1), date(2025, 1, 1)],
440
"end": [date(2025, 1, 3), date(2025, 1, 3)],
441
}
442
)
443
lit_dt = pl.lit(None, dtype=pl.Date)
444
result = df.select(
445
start_lit=pl.datetime_ranges(start=lit_dt, end="end", interval="1d"),
446
end_lit=pl.datetime_ranges(start="start", end=lit_dt, interval="1d"),
447
all_lit=pl.datetime_ranges(start=lit_dt, end=lit_dt, interval="1d"),
448
)
449
s = pl.Series([None, None], dtype=pl.List(pl.Datetime("us")))
450
expected = pl.DataFrame({"start_lit": s, "end_lit": s, "all_lit": s})
451
assert_frame_equal(result, expected)
452
453