Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py
6940 views
1
from __future__ import annotations
2
3
from collections import OrderedDict
4
from datetime import date, datetime, time, timedelta
5
from typing import TYPE_CHECKING, Callable
6
from zoneinfo import ZoneInfo
7
8
import pytest
9
from hypothesis import given
10
11
import polars as pl
12
from polars.datatypes import DTYPE_TEMPORAL_UNITS
13
from polars.exceptions import ComputeError, InvalidOperationError
14
from polars.testing import assert_frame_equal, assert_series_equal
15
from polars.testing.parametric import series
16
17
if TYPE_CHECKING:
18
from polars._typing import PolarsDataType, TemporalLiteral, TimeUnit
19
20
21
@pytest.fixture
22
def series_of_int_dates() -> pl.Series:
23
return pl.Series([8401, 10000, 20000, 30000], dtype=pl.Date)
24
25
26
@pytest.fixture
27
def series_of_str_dates() -> pl.Series:
28
return pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"])
29
30
31
def test_dt_to_string(series_of_int_dates: pl.Series) -> None:
32
expected_str_dates = pl.Series(
33
["1993-01-01", "1997-05-19", "2024-10-04", "2052-02-20"]
34
)
35
36
assert series_of_int_dates.dtype == pl.Date
37
assert_series_equal(series_of_int_dates.dt.to_string("%F"), expected_str_dates)
38
39
# Check strftime alias as well
40
assert_series_equal(series_of_int_dates.dt.strftime("%F"), expected_str_dates)
41
42
43
@pytest.mark.parametrize(
44
("unit_attr", "expected"),
45
[
46
("millennium", pl.Series(values=[2, 2, 3, 3], dtype=pl.Int32)),
47
("century", pl.Series(values=[20, 20, 21, 21], dtype=pl.Int32)),
48
("year", pl.Series(values=[1993, 1997, 2024, 2052], dtype=pl.Int32)),
49
("iso_year", pl.Series(values=[1992, 1997, 2024, 2052], dtype=pl.Int32)),
50
("quarter", pl.Series(values=[1, 2, 4, 1], dtype=pl.Int8)),
51
("month", pl.Series(values=[1, 5, 10, 2], dtype=pl.Int8)),
52
("week", pl.Series(values=[53, 21, 40, 8], dtype=pl.Int8)),
53
("day", pl.Series(values=[1, 19, 4, 20], dtype=pl.Int8)),
54
("weekday", pl.Series(values=[5, 1, 5, 2], dtype=pl.Int8)),
55
("ordinal_day", pl.Series(values=[1, 139, 278, 51], dtype=pl.Int16)),
56
],
57
)
58
@pytest.mark.parametrize("time_zone", ["Asia/Kathmandu", None])
59
def test_dt_extract_datetime_component(
60
unit_attr: str,
61
expected: pl.Series,
62
series_of_int_dates: pl.Series,
63
time_zone: str | None,
64
) -> None:
65
assert_series_equal(getattr(series_of_int_dates.dt, unit_attr)(), expected)
66
assert_series_equal(
67
getattr(
68
series_of_int_dates.cast(pl.Datetime).dt.replace_time_zone(time_zone).dt,
69
unit_attr,
70
)(),
71
expected,
72
)
73
74
75
@pytest.mark.parametrize(
76
("unit_attr", "expected"),
77
[
78
("hour", pl.Series(values=[0, 3], dtype=pl.Int8)),
79
("minute", pl.Series(values=[0, 20], dtype=pl.Int8)),
80
("second", pl.Series(values=[0, 10], dtype=pl.Int8)),
81
("millisecond", pl.Series(values=[0, 987], dtype=pl.Int32)),
82
("microsecond", pl.Series(values=[0, 987654], dtype=pl.Int32)),
83
("nanosecond", pl.Series(values=[0, 987654321], dtype=pl.Int32)),
84
],
85
)
86
def test_strptime_extract_times(
87
unit_attr: str,
88
expected: pl.Series,
89
series_of_int_dates: pl.Series,
90
series_of_str_dates: pl.Series,
91
) -> None:
92
s = series_of_str_dates.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S.%9f")
93
94
assert_series_equal(getattr(s.dt, unit_attr)(), expected)
95
96
97
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])
98
@pytest.mark.parametrize(
99
("attribute", "expected"),
100
[
101
("date", date(2022, 1, 1)),
102
("time", time(23)),
103
],
104
)
105
def test_dt_date_and_time(
106
attribute: str, time_zone: None | str, expected: date | time
107
) -> None:
108
ser = pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)
109
result = getattr(ser.dt, attribute)().item()
110
assert result == expected
111
112
113
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])
114
@pytest.mark.parametrize("time_unit", ["us", "ns", "ms"])
115
def test_dt_replace_time_zone_none(time_zone: str | None, time_unit: TimeUnit) -> None:
116
ser = (
117
pl.Series([datetime(2022, 1, 1, 23)])
118
.dt.cast_time_unit(time_unit)
119
.dt.replace_time_zone(time_zone)
120
)
121
result = ser.dt.replace_time_zone(None)
122
expected = datetime(2022, 1, 1, 23)
123
assert result.dtype == pl.Datetime(time_unit, None)
124
assert result.item() == expected
125
126
127
def test_dt_datetime_deprecated() -> None:
128
s = pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone("Asia/Kathmandu")
129
with pytest.deprecated_call():
130
result = s.dt.datetime()
131
expected = datetime(2022, 1, 1, 23)
132
assert result.dtype == pl.Datetime(time_zone=None)
133
assert result.item() == expected
134
135
136
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"])
137
def test_local_date_sortedness(time_zone: str | None) -> None:
138
# singleton
139
ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()
140
result = ser.dt.date()
141
assert result.flags["SORTED_ASC"]
142
143
# 2 elements
144
ser = (
145
pl.Series([datetime(2022, 1, 1, 23)] * 2).dt.replace_time_zone(time_zone)
146
).sort()
147
result = ser.dt.date()
148
assert result.flags["SORTED_ASC"]
149
150
151
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"])
152
def test_local_time_sortedness(time_zone: str | None) -> None:
153
# singleton - always sorted
154
ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()
155
result = ser.dt.time()
156
assert result.flags["SORTED_ASC"]
157
158
# three elements - not sorted
159
ser = (
160
pl.Series(
161
[
162
datetime(2022, 1, 1, 23),
163
datetime(2022, 1, 2, 21),
164
datetime(2022, 1, 3, 22),
165
]
166
).dt.replace_time_zone(time_zone)
167
).sort()
168
result = ser.dt.time()
169
assert not result.flags["SORTED_ASC"]
170
assert not result.flags["SORTED_DESC"]
171
172
173
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
174
def test_local_time_before_epoch(time_unit: TimeUnit) -> None:
175
ser = pl.Series([datetime(1969, 7, 21, 2, 56, 2, 123000)]).dt.cast_time_unit(
176
time_unit
177
)
178
result = ser.dt.time().item()
179
expected = time(2, 56, 2, 123000)
180
assert result == expected
181
182
183
@pytest.mark.parametrize(
184
("time_zone", "offset", "expected"),
185
[
186
(None, "1d", True),
187
("Europe/London", "1d", False),
188
("UTC", "1d", True),
189
(None, "1m", True),
190
("Europe/London", "1m", True),
191
("UTC", "1m", True),
192
(None, "1w", True),
193
("Europe/London", "1w", False),
194
("UTC", "1w", True),
195
(None, "1h", True),
196
("Europe/London", "1h", True),
197
("UTC", "1h", True),
198
],
199
)
200
def test_offset_by_sortedness(
201
time_zone: str | None, offset: str, expected: bool
202
) -> None:
203
s = pl.datetime_range(
204
datetime(2020, 10, 25),
205
datetime(2020, 10, 25, 3),
206
"30m",
207
time_zone=time_zone,
208
eager=True,
209
).sort()
210
assert s.flags["SORTED_ASC"]
211
assert not s.flags["SORTED_DESC"]
212
result = s.dt.offset_by(offset)
213
assert result.flags["SORTED_ASC"] == expected
214
assert not result.flags["SORTED_DESC"]
215
216
217
def test_offset_by_invalid_duration() -> None:
218
with pytest.raises(
219
InvalidOperationError, match="expected leading integer in the duration string"
220
):
221
pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.offset_by("P")
222
223
224
def test_offset_by_missing_unit() -> None:
225
with pytest.raises(
226
InvalidOperationError,
227
match="expected a unit to follow integer in the duration string '1'",
228
):
229
pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.offset_by("1")
230
231
with pytest.raises(
232
InvalidOperationError,
233
match="expected a unit to follow integer in the duration string '1mo23d4'",
234
):
235
pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.offset_by("1mo23d4")
236
237
with pytest.raises(
238
InvalidOperationError,
239
match="expected a unit to follow integer in the duration string '-2d1'",
240
):
241
pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.offset_by("-2d1")
242
243
with pytest.raises(
244
InvalidOperationError,
245
match="expected a unit to follow integer in the duration string '1d2'",
246
):
247
pl.DataFrame(
248
{"a": [datetime(2022, 3, 20, 5, 7)] * 2, "b": ["1d", "1d2"]}
249
).select(pl.col("a").dt.offset_by(pl.col("b")))
250
251
252
def test_dt_datetime_date_time_invalid() -> None:
253
with pytest.raises(ComputeError, match="expected Datetime or Date"):
254
pl.Series([time(23)]).dt.date()
255
with pytest.raises(ComputeError, match="expected Datetime or Date"):
256
pl.Series([timedelta(1)]).dt.date()
257
with pytest.raises(ComputeError, match="expected Datetime or Time"):
258
pl.Series([timedelta(1)]).dt.time()
259
with pytest.raises(ComputeError, match="expected Datetime or Time"):
260
pl.Series([date(2020, 1, 1)]).dt.time()
261
262
263
@pytest.mark.parametrize(
264
("dt", "expected"),
265
[
266
(datetime(2022, 3, 15, 3), datetime(2022, 3, 1, 3)),
267
(datetime(2022, 3, 15, 3, 2, 1, 123000), datetime(2022, 3, 1, 3, 2, 1, 123000)),
268
(datetime(2022, 3, 15), datetime(2022, 3, 1)),
269
(datetime(2022, 3, 1), datetime(2022, 3, 1)),
270
],
271
)
272
@pytest.mark.parametrize(
273
("tzinfo", "time_zone"),
274
[
275
(None, None),
276
(ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu"),
277
],
278
)
279
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
280
def test_month_start_datetime(
281
dt: datetime,
282
expected: datetime,
283
time_unit: TimeUnit,
284
tzinfo: ZoneInfo | None,
285
time_zone: str | None,
286
) -> None:
287
ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit)
288
result = ser.dt.month_start().item()
289
assert result == expected.replace(tzinfo=tzinfo)
290
291
292
@pytest.mark.parametrize(
293
("dt", "expected"),
294
[
295
(date(2022, 3, 15), date(2022, 3, 1)),
296
(date(2022, 3, 31), date(2022, 3, 1)),
297
],
298
)
299
def test_month_start_date(dt: date, expected: date) -> None:
300
ser = pl.Series([dt])
301
result = ser.dt.month_start().item()
302
assert result == expected
303
304
305
@pytest.mark.parametrize(
306
("dt", "expected"),
307
[
308
(datetime(2022, 3, 15, 3), datetime(2022, 3, 31, 3)),
309
(
310
datetime(2022, 3, 15, 3, 2, 1, 123000),
311
datetime(2022, 3, 31, 3, 2, 1, 123000),
312
),
313
(datetime(2022, 3, 15), datetime(2022, 3, 31)),
314
(datetime(2022, 3, 31), datetime(2022, 3, 31)),
315
],
316
)
317
@pytest.mark.parametrize(
318
("tzinfo", "time_zone"),
319
[
320
(None, None),
321
(ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu"),
322
],
323
)
324
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
325
def test_month_end_datetime(
326
dt: datetime,
327
expected: datetime,
328
time_unit: TimeUnit,
329
tzinfo: ZoneInfo | None,
330
time_zone: str | None,
331
) -> None:
332
ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit)
333
result = ser.dt.month_end().item()
334
assert result == expected.replace(tzinfo=tzinfo)
335
336
337
@pytest.mark.parametrize(
338
("dt", "expected"),
339
[
340
(date(2022, 3, 15), date(2022, 3, 31)),
341
(date(2022, 3, 31), date(2022, 3, 31)),
342
],
343
)
344
def test_month_end_date(dt: date, expected: date) -> None:
345
ser = pl.Series([dt])
346
result = ser.dt.month_end().item()
347
assert result == expected
348
349
350
def test_month_start_end_invalid() -> None:
351
ser = pl.Series([time(1, 2, 3)])
352
with pytest.raises(
353
InvalidOperationError,
354
match=r"`month_start` operation not supported for dtype `time` \(expected: date/datetime\)",
355
):
356
ser.dt.month_start()
357
with pytest.raises(
358
InvalidOperationError,
359
match=r"`month_end` operation not supported for dtype `time` \(expected: date/datetime\)",
360
):
361
ser.dt.month_end()
362
363
364
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
365
def test_base_utc_offset(time_unit: TimeUnit) -> None:
366
ser = pl.datetime_range(
367
datetime(2011, 12, 29),
368
datetime(2012, 1, 1),
369
"2d",
370
time_zone="Pacific/Apia",
371
eager=True,
372
).dt.cast_time_unit(time_unit)
373
result = ser.dt.base_utc_offset().rename("base_utc_offset")
374
expected = pl.Series(
375
"base_utc_offset",
376
[-11 * 3600 * 1000, 13 * 3600 * 1000],
377
dtype=pl.Duration("ms"),
378
)
379
assert_series_equal(result, expected)
380
381
382
def test_base_utc_offset_lazy_schema() -> None:
383
ser = pl.datetime_range(
384
datetime(2020, 10, 25),
385
datetime(2020, 10, 26),
386
time_zone="Europe/London",
387
eager=True,
388
)
389
df = pl.DataFrame({"ts": ser}).lazy()
390
result = df.with_columns(
391
base_utc_offset=pl.col("ts").dt.base_utc_offset()
392
).collect_schema()
393
expected = {
394
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
395
"base_utc_offset": pl.Duration(time_unit="ms"),
396
}
397
assert result == expected
398
399
400
def test_base_utc_offset_invalid() -> None:
401
ser = pl.datetime_range(datetime(2020, 10, 25), datetime(2020, 10, 26), eager=True)
402
with pytest.raises(
403
InvalidOperationError,
404
match=r"`base_utc_offset` operation not supported for dtype `datetime\[μs\]` \(expected: time-zone-aware datetime\)",
405
):
406
ser.dt.base_utc_offset().rename("base_utc_offset")
407
408
409
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
410
def test_dst_offset(time_unit: TimeUnit) -> None:
411
ser = pl.datetime_range(
412
datetime(2020, 10, 25),
413
datetime(2020, 10, 26),
414
time_zone="Europe/London",
415
eager=True,
416
).dt.cast_time_unit(time_unit)
417
result = ser.dt.dst_offset().rename("dst_offset")
418
expected = pl.Series("dst_offset", [3_600 * 1_000, 0], dtype=pl.Duration("ms"))
419
assert_series_equal(result, expected)
420
421
422
def test_dst_offset_lazy_schema() -> None:
423
ser = pl.datetime_range(
424
datetime(2020, 10, 25),
425
datetime(2020, 10, 26),
426
time_zone="Europe/London",
427
eager=True,
428
)
429
df = pl.DataFrame({"ts": ser}).lazy()
430
result = df.with_columns(dst_offset=pl.col("ts").dt.dst_offset()).collect_schema()
431
expected = {
432
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
433
"dst_offset": pl.Duration(time_unit="ms"),
434
}
435
assert result == expected
436
437
438
def test_dst_offset_invalid() -> None:
439
ser = pl.datetime_range(datetime(2020, 10, 25), datetime(2020, 10, 26), eager=True)
440
with pytest.raises(
441
InvalidOperationError,
442
match=r"`dst_offset` operation not supported for dtype `datetime\[μs\]` \(expected: time-zone-aware datetime\)",
443
):
444
ser.dt.dst_offset().rename("dst_offset")
445
446
447
@pytest.mark.parametrize(
448
("time_unit", "expected"),
449
[
450
("d", pl.Series(values=[18262, 18294], dtype=pl.Int32)),
451
("s", pl.Series(values=[1_577_836_800, 1_580_613_610], dtype=pl.Int64)),
452
(
453
"ms",
454
pl.Series(values=[1_577_836_800_000, 1_580_613_610_987], dtype=pl.Int64),
455
),
456
],
457
)
458
def test_strptime_epoch(
459
time_unit: TimeUnit,
460
expected: pl.Series,
461
series_of_str_dates: pl.Series,
462
) -> None:
463
s = series_of_str_dates.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S.%9f")
464
465
assert_series_equal(s.dt.epoch(time_unit=time_unit), expected)
466
467
468
def test_strptime_fractional_seconds(series_of_str_dates: pl.Series) -> None:
469
s = series_of_str_dates.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S.%9f")
470
471
assert_series_equal(
472
s.dt.second(fractional=True),
473
pl.Series([0.0, 10.987654321], dtype=pl.Float64),
474
)
475
476
477
@pytest.mark.parametrize(
478
("unit_attr", "expected"),
479
[
480
("total_days", pl.Series([1])),
481
("total_hours", pl.Series([24])),
482
("total_minutes", pl.Series([24 * 60])),
483
("total_seconds", pl.Series([3600 * 24])),
484
("total_milliseconds", pl.Series([3600 * 24 * int(1e3)])),
485
("total_microseconds", pl.Series([3600 * 24 * int(1e6)])),
486
("total_nanoseconds", pl.Series([3600 * 24 * int(1e9)])),
487
],
488
)
489
def test_duration_extract_times(
490
unit_attr: str,
491
expected: pl.Series,
492
) -> None:
493
duration = pl.Series([datetime(2022, 1, 2)]) - pl.Series([datetime(2022, 1, 1)])
494
495
assert_series_equal(getattr(duration.dt, unit_attr)(), expected)
496
497
498
@pytest.mark.parametrize(
499
("time_unit", "every"),
500
[
501
("ms", "1h"),
502
("us", "1h0m0s"),
503
("ns", timedelta(hours=1)),
504
],
505
ids=["milliseconds", "microseconds", "nanoseconds"],
506
)
507
def test_truncate(
508
time_unit: TimeUnit,
509
every: str | timedelta,
510
) -> None:
511
start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2)
512
s = pl.datetime_range(
513
start,
514
stop,
515
timedelta(minutes=30),
516
time_unit=time_unit,
517
eager=True,
518
).alias(f"dates[{time_unit}]")
519
520
# can pass strings and time-deltas
521
out = s.dt.truncate(every)
522
assert out.dt[0] == start
523
assert out.dt[1] == start
524
assert out.dt[2] == start + timedelta(hours=1)
525
assert out.dt[3] == start + timedelta(hours=1)
526
# ...
527
assert out.dt[-3] == stop - timedelta(hours=1)
528
assert out.dt[-2] == stop - timedelta(hours=1)
529
assert out.dt[-1] == stop
530
531
532
def test_truncate_negative() -> None:
533
"""Test that truncating to a negative duration gives a helpful error message."""
534
df = pl.DataFrame(
535
{
536
"date": [date(1895, 5, 7), date(1955, 11, 5)],
537
"datetime": [datetime(1895, 5, 7), datetime(1955, 11, 5)],
538
"duration": ["-1m", "1m"],
539
}
540
)
541
542
with pytest.raises(
543
ComputeError, match="cannot truncate a Date to a negative duration"
544
):
545
df.select(pl.col("date").dt.truncate("-1m"))
546
547
with pytest.raises(
548
ComputeError, match="cannot truncate a Datetime to a negative duration"
549
):
550
df.select(pl.col("datetime").dt.truncate("-1m"))
551
552
with pytest.raises(
553
ComputeError, match="cannot truncate a Date to a negative duration"
554
):
555
df.select(pl.col("date").dt.truncate(pl.col("duration")))
556
557
with pytest.raises(
558
ComputeError, match="cannot truncate a Datetime to a negative duration"
559
):
560
df.select(pl.col("datetime").dt.truncate(pl.col("duration")))
561
562
563
@pytest.mark.parametrize(
564
("time_unit", "every"),
565
[
566
("ms", "1h"),
567
("us", "1h0m0s"),
568
("ns", timedelta(hours=1)),
569
],
570
ids=["milliseconds", "microseconds", "nanoseconds"],
571
)
572
def test_round(
573
time_unit: TimeUnit,
574
every: str | timedelta,
575
) -> None:
576
start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2)
577
s = pl.datetime_range(
578
start,
579
stop,
580
timedelta(minutes=30),
581
time_unit=time_unit,
582
eager=True,
583
).alias(f"dates[{time_unit}]")
584
585
# can pass strings and time-deltas
586
out = s.dt.round(every)
587
assert out.dt[0] == start
588
assert out.dt[1] == start + timedelta(hours=1)
589
assert out.dt[2] == start + timedelta(hours=1)
590
assert out.dt[3] == start + timedelta(hours=2)
591
# ...
592
assert out.dt[-3] == stop - timedelta(hours=1)
593
assert out.dt[-2] == stop
594
assert out.dt[-1] == stop
595
596
597
def test_round_expr() -> None:
598
df = pl.DataFrame(
599
{
600
"date": [
601
datetime(2022, 11, 14),
602
datetime(2023, 10, 11),
603
datetime(2022, 3, 20, 5, 7, 18),
604
datetime(2022, 4, 3, 13, 30, 32),
605
None,
606
datetime(2022, 12, 1),
607
],
608
"every": ["1y", "1mo", "1m", "1m", "1mo", None],
609
}
610
)
611
612
output = df.select(
613
all_expr=pl.col("date").dt.round(every=pl.col("every")),
614
date_lit=pl.lit(datetime(2022, 4, 3, 13, 30, 32)).dt.round(
615
every=pl.col("every")
616
),
617
every_lit=pl.col("date").dt.round("1d"),
618
)
619
620
expected = pl.DataFrame(
621
{
622
"all_expr": [
623
datetime(2023, 1, 1),
624
datetime(2023, 10, 1),
625
datetime(2022, 3, 20, 5, 7),
626
datetime(2022, 4, 3, 13, 31),
627
None,
628
None,
629
],
630
"date_lit": [
631
datetime(2022, 1, 1),
632
datetime(2022, 4, 1),
633
datetime(2022, 4, 3, 13, 31),
634
datetime(2022, 4, 3, 13, 31),
635
datetime(2022, 4, 1),
636
None,
637
],
638
"every_lit": [
639
datetime(2022, 11, 14),
640
datetime(2023, 10, 11),
641
datetime(2022, 3, 20),
642
datetime(2022, 4, 4),
643
None,
644
datetime(2022, 12, 1),
645
],
646
}
647
)
648
649
assert_frame_equal(output, expected)
650
651
all_lit = pl.select(all_lit=pl.lit(datetime(2022, 3, 20, 5, 7)).dt.round("1h"))
652
assert all_lit.to_dict(as_series=False) == {"all_lit": [datetime(2022, 3, 20, 5)]}
653
654
655
def test_round_negative() -> None:
656
"""Test that rounding to a negative duration gives a helpful error message."""
657
with pytest.raises(
658
ComputeError, match="cannot round a Date to a negative duration"
659
):
660
pl.Series([date(1895, 5, 7)]).dt.round("-1m")
661
662
with pytest.raises(
663
ComputeError, match="cannot round a Datetime to a negative duration"
664
):
665
pl.Series([datetime(1895, 5, 7)]).dt.round("-1m")
666
667
668
def test_round_invalid_duration() -> None:
669
with pytest.raises(
670
InvalidOperationError, match="expected leading integer in the duration string"
671
):
672
pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.round("P")
673
674
675
@pytest.mark.parametrize(
676
("time_unit", "date_in_that_unit"),
677
[
678
("ns", [978307200000000000, 981022089000000000]),
679
("us", [978307200000000, 981022089000000]),
680
("ms", [978307200000, 981022089000]),
681
],
682
ids=["nanoseconds", "microseconds", "milliseconds"],
683
)
684
def test_cast_time_units(
685
time_unit: TimeUnit,
686
date_in_that_unit: list[int],
687
) -> None:
688
dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)])
689
690
assert dates.dt.cast_time_unit(time_unit).cast(int).to_list() == date_in_that_unit
691
692
693
def test_epoch_matches_timestamp() -> None:
694
dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)])
695
696
for unit in DTYPE_TEMPORAL_UNITS:
697
assert_series_equal(dates.dt.epoch(unit), dates.dt.timestamp(unit))
698
699
assert_series_equal(dates.dt.epoch("s"), dates.dt.timestamp("ms") // 1000)
700
assert_series_equal(
701
dates.dt.epoch("d"),
702
(dates.dt.timestamp("ms") // (1000 * 3600 * 24)).cast(pl.Int32),
703
)
704
705
706
@pytest.mark.parametrize(
707
("tzinfo", "time_zone"),
708
[(None, None), (ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu")],
709
)
710
def test_date_time_combine(tzinfo: ZoneInfo | None, time_zone: str | None) -> None:
711
# Define a DataFrame with columns for datetime, date, and time
712
df = pl.DataFrame(
713
{
714
"dtm": [
715
datetime(2022, 12, 31, 10, 30, 45),
716
datetime(2023, 7, 5, 23, 59, 59),
717
],
718
"dt": [
719
date(2022, 10, 10),
720
date(2022, 7, 5),
721
],
722
"tm": [
723
time(1, 2, 3, 456000),
724
time(7, 8, 9, 101000),
725
],
726
}
727
)
728
df = df.with_columns(pl.col("dtm").dt.replace_time_zone(time_zone))
729
730
# Combine datetime/date with time
731
df = df.select(
732
pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), # datetime & time
733
pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # date & time
734
pl.col("dt").dt.combine(time(4, 5, 6)).alias("d3"), # date & specified time
735
)
736
737
# Assert that the new columns have the expected values and datatypes
738
expected_dict = {
739
"d1": [ # Time component should be overwritten by `tm` values
740
datetime(2022, 12, 31, 1, 2, 3, 456000, tzinfo=tzinfo),
741
datetime(2023, 7, 5, 7, 8, 9, 101000, tzinfo=tzinfo),
742
],
743
"d2": [ # Both date and time components combined "as-is" into new datetime
744
datetime(2022, 10, 10, 1, 2, 3, 456000),
745
datetime(2022, 7, 5, 7, 8, 9, 101000),
746
],
747
"d3": [ # New datetime should use specified time component
748
datetime(2022, 10, 10, 4, 5, 6),
749
datetime(2022, 7, 5, 4, 5, 6),
750
],
751
}
752
assert df.to_dict(as_series=False) == expected_dict
753
754
expected_schema = {
755
"d1": pl.Datetime("us", time_zone),
756
"d2": pl.Datetime("us"),
757
"d3": pl.Datetime("us"),
758
}
759
assert df.schema == expected_schema
760
761
762
def test_combine_unsupported_types() -> None:
763
with pytest.raises(ComputeError, match="expected Date or Datetime, got time"):
764
pl.Series([time(1, 2)]).dt.combine(time(3, 4))
765
766
767
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
768
@pytest.mark.parametrize("time_zone", ["Asia/Kathmandu", None])
769
def test_combine_lazy_schema_datetime(
770
time_zone: str | None,
771
time_unit: TimeUnit,
772
) -> None:
773
df = pl.DataFrame({"ts": pl.Series([datetime(2020, 1, 1)])})
774
df = df.with_columns(pl.col("ts").dt.replace_time_zone(time_zone))
775
result = df.lazy().select(
776
pl.col("ts").dt.combine(time(1, 2, 3), time_unit=time_unit)
777
)
778
expected_dtypes = [pl.Datetime(time_unit, time_zone)]
779
assert result.collect_schema().dtypes() == expected_dtypes
780
781
782
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
783
def test_combine_lazy_schema_date(time_unit: TimeUnit) -> None:
784
df = pl.DataFrame({"ts": pl.Series([date(2020, 1, 1)])})
785
result = df.lazy().select(
786
pl.col("ts").dt.combine(time(1, 2, 3), time_unit=time_unit)
787
)
788
expected_dtypes = [pl.Datetime(time_unit, None)]
789
assert result.collect_schema().dtypes() == expected_dtypes
790
791
792
@pytest.mark.parametrize(
793
("range_fn", "value_type", "kwargs"),
794
[
795
(pl.datetime_range, datetime, {"time_unit": "ns"}),
796
(pl.datetime_range, datetime, {"time_unit": "ns", "time_zone": "CET"}),
797
(pl.datetime_range, datetime, {"time_unit": "us"}),
798
(pl.datetime_range, datetime, {"time_unit": "us", "time_zone": "CET"}),
799
(pl.datetime_range, datetime, {"time_unit": "ms"}),
800
(pl.datetime_range, datetime, {"time_unit": "ms", "time_zone": "CET"}),
801
(pl.date_range, date, {}),
802
],
803
)
804
def test_iso_year(
805
range_fn: Callable[..., pl.Series], value_type: type, kwargs: dict[str, str]
806
) -> None:
807
assert range_fn(
808
value_type(1990, 1, 1), value_type(2004, 1, 1), "1y", **kwargs, eager=True
809
).dt.iso_year().to_list() == [
810
1990,
811
1991,
812
1992,
813
1992,
814
1993,
815
1994,
816
1996,
817
1997,
818
1998,
819
1998,
820
1999,
821
2001,
822
2002,
823
2003,
824
2004,
825
]
826
827
828
@pytest.mark.parametrize(
829
("range_fn", "value_type", "kwargs"),
830
[
831
(pl.datetime_range, datetime, {"time_unit": "ns"}),
832
(pl.datetime_range, datetime, {"time_unit": "ns", "time_zone": "CET"}),
833
(pl.datetime_range, datetime, {"time_unit": "us"}),
834
(pl.datetime_range, datetime, {"time_unit": "us", "time_zone": "CET"}),
835
(pl.datetime_range, datetime, {"time_unit": "ms"}),
836
(pl.datetime_range, datetime, {"time_unit": "ms", "time_zone": "CET"}),
837
(pl.date_range, date, {}),
838
],
839
)
840
def test_is_leap_year(
841
range_fn: Callable[..., pl.Series], value_type: type, kwargs: dict[str, str]
842
) -> None:
843
assert range_fn(
844
value_type(1990, 1, 1), value_type(2004, 1, 1), "1y", **kwargs, eager=True
845
).dt.is_leap_year().to_list() == [
846
False,
847
False,
848
True, # 1992
849
False,
850
False,
851
False,
852
True, # 1996
853
False,
854
False,
855
False,
856
True, # 2000
857
False,
858
False,
859
False,
860
True, # 2004
861
]
862
863
864
@pytest.mark.parametrize(
865
("value_type", "time_unit", "time_zone"),
866
[
867
(date, None, None),
868
(datetime, "ns", None),
869
(
870
datetime,
871
"ns",
872
"Asia/Kathmandu",
873
),
874
(datetime, "us", None),
875
(
876
datetime,
877
"us",
878
"Asia/Kathmandu",
879
),
880
(datetime, "ms", None),
881
(
882
datetime,
883
"ms",
884
"Asia/Kathmandu",
885
),
886
],
887
)
888
@pytest.mark.parametrize(
889
("start_ymd", "end_ymd", "feb_days"),
890
[
891
# Non-leap year cases
892
((1900, 1, 1), (1900, 12, 1), 28), # 1900 can be divided by 100 but not by 400
893
((2025, 1, 1), (2025, 12, 1), 28), # 2025 cannot be divided by 4
894
# Leap year cases
895
((2000, 1, 1), (2000, 12, 1), 29), # 2000 can be divided by 400
896
((2004, 1, 1), (2004, 12, 1), 29), # 2004 can be divided by 4 but not by 100
897
],
898
)
899
def test_days_in_month(
900
value_type: type,
901
time_unit: str | None,
902
time_zone: str | None,
903
start_ymd: tuple[int, int, int],
904
end_ymd: tuple[int, int, int],
905
feb_days: int,
906
) -> None:
907
assert value_type in (date, datetime)
908
range_fn: Callable[..., pl.Series] = (
909
pl.date_range if value_type is date else pl.datetime_range
910
)
911
kwargs: dict[str, str] = {}
912
if time_unit is not None:
913
kwargs["time_unit"] = time_unit
914
if time_zone is not None:
915
kwargs["time_zone"] = time_zone
916
assert range_fn(
917
value_type(*start_ymd), value_type(*end_ymd), "1mo", **kwargs, eager=True
918
).dt.days_in_month().to_list() == [
919
31,
920
feb_days,
921
31,
922
30,
923
31,
924
30,
925
31,
926
31,
927
30,
928
31,
929
30,
930
31,
931
]
932
933
934
def test_quarter() -> None:
935
assert pl.datetime_range(
936
datetime(2022, 1, 1), datetime(2022, 12, 1), "1mo", eager=True
937
).dt.quarter().to_list() == [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]
938
939
940
def test_offset_by() -> None:
941
df = pl.DataFrame(
942
{
943
"dates": pl.datetime_range(
944
datetime(2000, 1, 1), datetime(2020, 1, 1), "1y", eager=True
945
)
946
}
947
)
948
949
# Add two new columns to the DataFrame using the offset_by() method
950
df = df.with_columns(
951
df["dates"].dt.offset_by("1y").alias("date_plus_1y"),
952
df["dates"].dt.offset_by("-1y2mo").alias("date_min"),
953
)
954
955
# Assert that the day of the month for all the dates in new columns is 1
956
assert (df["date_plus_1y"].dt.day() == 1).all()
957
assert (df["date_min"].dt.day() == 1).all()
958
959
# Assert that the 'date_min' column contains the expected list of dates
960
expected_dates = [datetime(year, 11, 1, 0, 0) for year in range(1998, 2019)]
961
assert df["date_min"].to_list() == expected_dates
962
963
964
@pytest.mark.parametrize("time_zone", ["US/Central", None])
965
def test_offset_by_crossing_dst(time_zone: str | None) -> None:
966
ser = pl.Series([datetime(2021, 11, 7)]).dt.replace_time_zone(time_zone)
967
result = ser.dt.offset_by("1d")
968
expected = pl.Series([datetime(2021, 11, 8)]).dt.replace_time_zone(time_zone)
969
assert_series_equal(result, expected)
970
971
972
def test_negative_offset_by_err_msg_8464() -> None:
973
result = pl.Series([datetime(2022, 3, 30)]).dt.offset_by("-1mo")
974
expected = pl.Series([datetime(2022, 2, 28)])
975
assert_series_equal(result, expected)
976
977
978
def test_offset_by_truncate_sorted_flag() -> None:
979
s = pl.Series([datetime(2001, 1, 1), datetime(2001, 1, 2)])
980
s = s.set_sorted()
981
982
assert s.flags["SORTED_ASC"]
983
s1 = s.dt.offset_by("1d")
984
assert s1.to_list() == [datetime(2001, 1, 2), datetime(2001, 1, 3)]
985
assert s1.flags["SORTED_ASC"]
986
s2 = s1.dt.truncate("1mo")
987
assert s2.flags["SORTED_ASC"]
988
989
990
def test_offset_by_broadcasting() -> None:
991
# test broadcast lhs
992
df = pl.DataFrame(
993
{
994
"offset": ["1d", "10d", "3d", None],
995
}
996
)
997
result = df.select(
998
d1=pl.lit(datetime(2020, 10, 25)).dt.offset_by(pl.col("offset")),
999
d2=pl.lit(datetime(2020, 10, 25))
1000
.dt.cast_time_unit("ms")
1001
.dt.offset_by(pl.col("offset")),
1002
d3=pl.lit(datetime(2020, 10, 25))
1003
.dt.replace_time_zone("Europe/London")
1004
.dt.offset_by(pl.col("offset")),
1005
d4=pl.lit(datetime(2020, 10, 25)).dt.date().dt.offset_by(pl.col("offset")),
1006
d5=pl.lit(None, dtype=pl.Datetime).dt.offset_by(pl.col("offset")),
1007
)
1008
expected_dict = {
1009
"d1": [
1010
datetime(2020, 10, 26),
1011
datetime(2020, 11, 4),
1012
datetime(2020, 10, 28),
1013
None,
1014
],
1015
"d2": [
1016
datetime(2020, 10, 26),
1017
datetime(2020, 11, 4),
1018
datetime(2020, 10, 28),
1019
None,
1020
],
1021
"d3": [
1022
datetime(2020, 10, 26, tzinfo=ZoneInfo("Europe/London")),
1023
datetime(2020, 11, 4, tzinfo=ZoneInfo("Europe/London")),
1024
datetime(2020, 10, 28, tzinfo=ZoneInfo("Europe/London")),
1025
None,
1026
],
1027
"d4": [
1028
datetime(2020, 10, 26).date(),
1029
datetime(2020, 11, 4).date(),
1030
datetime(2020, 10, 28).date(),
1031
None,
1032
],
1033
"d5": [None, None, None, None],
1034
}
1035
assert result.to_dict(as_series=False) == expected_dict
1036
1037
# test broadcast rhs
1038
df = pl.DataFrame({"dt": [datetime(2020, 10, 25), datetime(2021, 1, 2), None]})
1039
result = df.select(
1040
d1=pl.col("dt").dt.offset_by(pl.lit("1mo3d")),
1041
d2=pl.col("dt").dt.cast_time_unit("ms").dt.offset_by(pl.lit("1y1mo")),
1042
d3=pl.col("dt")
1043
.dt.replace_time_zone("Europe/London")
1044
.dt.offset_by(pl.lit("3d")),
1045
d4=pl.col("dt").dt.date().dt.offset_by(pl.lit("1y1mo1d")),
1046
)
1047
expected_dict = {
1048
"d1": [datetime(2020, 11, 28), datetime(2021, 2, 5), None],
1049
"d2": [datetime(2021, 11, 25), datetime(2022, 2, 2), None],
1050
"d3": [
1051
datetime(2020, 10, 28, tzinfo=ZoneInfo("Europe/London")),
1052
datetime(2021, 1, 5, tzinfo=ZoneInfo("Europe/London")),
1053
None,
1054
],
1055
"d4": [datetime(2021, 11, 26).date(), datetime(2022, 2, 3).date(), None],
1056
}
1057
assert result.to_dict(as_series=False) == expected_dict
1058
1059
# test all literal
1060
result = df.select(d=pl.lit(datetime(2021, 11, 26)).dt.offset_by("1mo1d"))
1061
assert result.to_dict(as_series=False) == {"d": [datetime(2021, 12, 27)]}
1062
1063
1064
def test_offset_by_expressions() -> None:
1065
df = pl.DataFrame(
1066
{
1067
"a": [
1068
datetime(2020, 10, 25),
1069
datetime(2021, 1, 2),
1070
None,
1071
datetime(2021, 1, 4),
1072
None,
1073
],
1074
"b": ["1d", "10d", "3d", None, None],
1075
}
1076
)
1077
df = df.sort("a")
1078
result = df.select(
1079
c=pl.col("a").dt.offset_by(pl.col("b")),
1080
d=pl.col("a").dt.cast_time_unit("ms").dt.offset_by(pl.col("b")),
1081
e=pl.col("a").dt.replace_time_zone("Europe/London").dt.offset_by(pl.col("b")),
1082
f=pl.col("a").dt.date().dt.offset_by(pl.col("b")),
1083
)
1084
1085
expected = pl.DataFrame(
1086
{
1087
"c": [None, None, datetime(2020, 10, 26), datetime(2021, 1, 12), None],
1088
"d": [None, None, datetime(2020, 10, 26), datetime(2021, 1, 12), None],
1089
"e": [
1090
None,
1091
None,
1092
datetime(2020, 10, 26, tzinfo=ZoneInfo("Europe/London")),
1093
datetime(2021, 1, 12, tzinfo=ZoneInfo("Europe/London")),
1094
None,
1095
],
1096
"f": [None, None, date(2020, 10, 26), date(2021, 1, 12), None],
1097
},
1098
schema_overrides={
1099
"d": pl.Datetime("ms"),
1100
"e": pl.Datetime(time_zone="Europe/London"),
1101
},
1102
)
1103
assert_frame_equal(result, expected)
1104
assert result.flags == {
1105
"c": {"SORTED_ASC": False, "SORTED_DESC": False},
1106
"d": {"SORTED_ASC": False, "SORTED_DESC": False},
1107
"e": {"SORTED_ASC": False, "SORTED_DESC": False},
1108
"f": {"SORTED_ASC": False, "SORTED_DESC": False},
1109
}
1110
1111
# Check single-row cases
1112
for i in range(df.height):
1113
df_slice = df[i : i + 1]
1114
result = df_slice.select(
1115
c=pl.col("a").dt.offset_by(pl.col("b")),
1116
d=pl.col("a").dt.cast_time_unit("ms").dt.offset_by(pl.col("b")),
1117
e=pl.col("a")
1118
.dt.replace_time_zone("Europe/London")
1119
.dt.offset_by(pl.col("b")),
1120
f=pl.col("a").dt.date().dt.offset_by(pl.col("b")),
1121
)
1122
assert_frame_equal(result, expected[i : i + 1])
1123
# single-row Series are always sorted
1124
assert result.flags == {
1125
"c": {"SORTED_ASC": True, "SORTED_DESC": False},
1126
"d": {"SORTED_ASC": True, "SORTED_DESC": False},
1127
"e": {"SORTED_ASC": True, "SORTED_DESC": False},
1128
"f": {"SORTED_ASC": True, "SORTED_DESC": False},
1129
}
1130
1131
1132
@pytest.mark.parametrize(
1133
("duration", "input_date", "expected"),
1134
[
1135
("1mo", date(2018, 1, 31), date(2018, 2, 28)),
1136
("1y", date(2024, 2, 29), date(2025, 2, 28)),
1137
("1y1mo", date(2024, 1, 30), date(2025, 2, 28)),
1138
],
1139
)
1140
def test_offset_by_saturating_8217_8474(
1141
duration: str, input_date: date, expected: date
1142
) -> None:
1143
result = pl.Series([input_date]).dt.offset_by(duration).item()
1144
assert result == expected
1145
1146
1147
def test_year_empty_df() -> None:
1148
df = pl.DataFrame(pl.Series(name="date", dtype=pl.Date))
1149
assert df.select(pl.col("date").dt.year()).dtypes == [pl.Int32]
1150
1151
1152
def test_epoch_invalid() -> None:
1153
with pytest.raises(InvalidOperationError, match="not supported for dtype"):
1154
pl.Series([timedelta(1)]).dt.epoch()
1155
1156
1157
@pytest.mark.parametrize(
1158
"time_unit",
1159
["ms", "us", "ns"],
1160
ids=["milliseconds", "microseconds", "nanoseconds"],
1161
)
1162
def test_weekday(time_unit: TimeUnit) -> None:
1163
friday = pl.Series([datetime(2023, 2, 17)])
1164
1165
assert friday.dt.cast_time_unit(time_unit).dt.weekday()[0] == 5
1166
assert friday.cast(pl.Date).dt.weekday()[0] == 5
1167
1168
1169
@pytest.mark.parametrize(
1170
("values", "expected_median"),
1171
[
1172
([], None),
1173
([None, None], None),
1174
([date(2022, 1, 1)], datetime(2022, 1, 1)),
1175
([date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 4)], datetime(2022, 1, 2)),
1176
([date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)], datetime(2022, 1, 2)),
1177
([datetime(2022, 1, 1)], datetime(2022, 1, 1)),
1178
(
1179
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)],
1180
datetime(2022, 1, 2),
1181
),
1182
(
1183
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],
1184
datetime(2022, 1, 2),
1185
),
1186
([timedelta(days=1)], timedelta(days=1)),
1187
([timedelta(days=1), timedelta(days=2), timedelta(days=3)], timedelta(days=2)),
1188
([timedelta(days=1), timedelta(days=2), timedelta(days=15)], timedelta(days=2)),
1189
([time(hour=1)], time(hour=1)),
1190
([time(hour=1), time(hour=2), time(hour=3)], time(hour=2)),
1191
([time(hour=1), time(hour=2), time(hour=15)], time(hour=2)),
1192
],
1193
ids=[
1194
"empty",
1195
"Nones",
1196
"single_date",
1197
"spread_even_date",
1198
"spread_skewed_date",
1199
"single_datetime",
1200
"spread_even_datetime",
1201
"spread_skewed_datetime",
1202
"single_dur",
1203
"spread_even_dur",
1204
"spread_skewed_dur",
1205
"single_time",
1206
"spread_even_time",
1207
"spread_skewed_time",
1208
],
1209
)
1210
def test_median(
1211
values: list[TemporalLiteral | None], expected_median: TemporalLiteral | None
1212
) -> None:
1213
assert pl.Series(values).median() == expected_median
1214
1215
1216
@pytest.mark.parametrize(
1217
("values", "expected_mean"),
1218
[
1219
([], None),
1220
([None, None], None),
1221
([date(2022, 1, 1)], datetime(2022, 1, 1)),
1222
(
1223
[date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 4)],
1224
datetime(2022, 1, 2, 8),
1225
),
1226
(
1227
[date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)],
1228
datetime(2022, 10, 16, 16, 0),
1229
),
1230
([datetime(2022, 1, 1)], datetime(2022, 1, 1)),
1231
(
1232
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)],
1233
datetime(2022, 1, 2),
1234
),
1235
(
1236
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],
1237
datetime(2022, 10, 16, 16, 0, 0),
1238
),
1239
([timedelta(days=1)], timedelta(days=1)),
1240
([timedelta(days=1), timedelta(days=2), timedelta(days=3)], timedelta(days=2)),
1241
([timedelta(days=1), timedelta(days=2), timedelta(days=15)], timedelta(days=6)),
1242
([time(hour=1)], time(hour=1)),
1243
([time(hour=1), time(hour=2), time(hour=3)], time(hour=2)),
1244
([time(hour=1), time(hour=2), time(hour=15)], time(hour=6)),
1245
],
1246
ids=[
1247
"empty",
1248
"Nones",
1249
"single_date",
1250
"spread_even_date",
1251
"spread_skewed_date",
1252
"single_datetime",
1253
"spread_even_datetime",
1254
"spread_skewed_datetime",
1255
"single_duration",
1256
"spread_even_duration",
1257
"spread_skewed_duration",
1258
"single_time",
1259
"spread_even_time",
1260
"spread_skewed_time",
1261
],
1262
)
1263
def test_mean(
1264
values: list[TemporalLiteral | None], expected_mean: TemporalLiteral | None
1265
) -> None:
1266
assert pl.Series(values).mean() == expected_mean
1267
1268
1269
@pytest.mark.parametrize(
1270
("values", "expected_mean"),
1271
[
1272
([None], None),
1273
(
1274
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],
1275
datetime(2022, 10, 16, 16, 0, 0),
1276
),
1277
],
1278
ids=["None_dt", "spread_skewed_dt"],
1279
)
1280
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
1281
def test_datetime_mean_with_tu(
1282
values: list[datetime], expected_mean: datetime, time_unit: TimeUnit
1283
) -> None:
1284
assert pl.Series(values, dtype=pl.Duration(time_unit)).mean() == expected_mean
1285
1286
1287
@pytest.mark.parametrize(
1288
("values", "expected_median"),
1289
[
1290
([None], None),
1291
(
1292
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],
1293
datetime(2022, 1, 2),
1294
),
1295
],
1296
ids=["None_dt", "spread_skewed_dt"],
1297
)
1298
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
1299
def test_datetime_median_with_tu(
1300
values: list[datetime], expected_median: datetime, time_unit: TimeUnit
1301
) -> None:
1302
assert pl.Series(values, dtype=pl.Duration(time_unit)).median() == expected_median
1303
1304
1305
def test_date_median_upcast() -> None:
1306
df = pl.DataFrame({"a": [date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)]})
1307
result = df.select(pl.col("a").median())
1308
expected = pl.DataFrame(
1309
{"a": pl.Series([datetime(2022, 1, 2)], dtype=pl.Datetime("us"))}
1310
)
1311
assert_frame_equal(result, expected)
1312
1313
1314
@pytest.mark.parametrize(
1315
("values", "expected_mean"),
1316
[
1317
([None], None),
1318
(
1319
[timedelta(days=1), timedelta(days=2), timedelta(days=15)],
1320
timedelta(days=6),
1321
),
1322
],
1323
ids=["None_dur", "spread_skewed_dur"],
1324
)
1325
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
1326
def test_duration_mean_with_tu(
1327
values: list[timedelta], expected_mean: timedelta, time_unit: TimeUnit
1328
) -> None:
1329
assert pl.Series(values, dtype=pl.Duration(time_unit)).mean() == expected_mean
1330
1331
1332
@pytest.mark.parametrize(
1333
("values", "expected_median"),
1334
[
1335
([None], None),
1336
(
1337
[timedelta(days=1), timedelta(days=2), timedelta(days=15)],
1338
timedelta(days=2),
1339
),
1340
],
1341
ids=["None_dur", "spread_skewed_dur"],
1342
)
1343
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
1344
def test_duration_median_with_tu(
1345
values: list[timedelta], expected_median: timedelta, time_unit: TimeUnit
1346
) -> None:
1347
assert pl.Series(values, dtype=pl.Duration(time_unit)).median() == expected_median
1348
1349
1350
def test_agg_mean_expr() -> None:
1351
df = pl.DataFrame(
1352
{
1353
"date": pl.Series(
1354
[date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 4)],
1355
dtype=pl.Date,
1356
),
1357
"datetime_ms": pl.Series(
1358
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1359
dtype=pl.Datetime("ms"),
1360
),
1361
"datetime_us": pl.Series(
1362
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1363
dtype=pl.Datetime("us"),
1364
),
1365
"datetime_ns": pl.Series(
1366
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1367
dtype=pl.Datetime("ns"),
1368
),
1369
"duration_ms": pl.Series(
1370
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1371
dtype=pl.Duration("ms"),
1372
),
1373
"duration_us": pl.Series(
1374
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1375
dtype=pl.Duration("us"),
1376
),
1377
"duration_ns": pl.Series(
1378
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1379
dtype=pl.Duration("ns"),
1380
),
1381
"time": pl.Series(
1382
[time(hour=1), time(hour=2), time(hour=4)],
1383
dtype=pl.Time,
1384
),
1385
}
1386
)
1387
1388
expected = pl.DataFrame(
1389
{
1390
"date": pl.Series([datetime(2023, 1, 2, 8, 0)], dtype=pl.Datetime("us")),
1391
"datetime_ms": pl.Series(
1392
[datetime(2023, 1, 2, 8, 0, 0)], dtype=pl.Datetime("ms")
1393
),
1394
"datetime_us": pl.Series(
1395
[datetime(2023, 1, 2, 8, 0, 0)], dtype=pl.Datetime("us")
1396
),
1397
"datetime_ns": pl.Series(
1398
[datetime(2023, 1, 2, 8, 0, 0)], dtype=pl.Datetime("ns")
1399
),
1400
"duration_ms": pl.Series(
1401
[timedelta(days=2, hours=8)], dtype=pl.Duration("ms")
1402
),
1403
"duration_us": pl.Series(
1404
[timedelta(days=2, hours=8)], dtype=pl.Duration("us")
1405
),
1406
"duration_ns": pl.Series(
1407
[timedelta(days=2, hours=8)], dtype=pl.Duration("ns")
1408
),
1409
"time": pl.Series([time(hour=2, minute=20)], dtype=pl.Time),
1410
}
1411
)
1412
1413
assert_frame_equal(df.select(pl.all().mean()), expected)
1414
1415
1416
def test_agg_median_expr() -> None:
1417
df = pl.DataFrame(
1418
{
1419
"date": pl.Series(
1420
[date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 4)],
1421
dtype=pl.Date,
1422
),
1423
"datetime_ms": pl.Series(
1424
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1425
dtype=pl.Datetime("ms"),
1426
),
1427
"datetime_us": pl.Series(
1428
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1429
dtype=pl.Datetime("us"),
1430
),
1431
"datetime_ns": pl.Series(
1432
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1433
dtype=pl.Datetime("ns"),
1434
),
1435
"duration_ms": pl.Series(
1436
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1437
dtype=pl.Duration("ms"),
1438
),
1439
"duration_us": pl.Series(
1440
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1441
dtype=pl.Duration("us"),
1442
),
1443
"duration_ns": pl.Series(
1444
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1445
dtype=pl.Duration("ns"),
1446
),
1447
"time": pl.Series(
1448
[time(hour=1), time(hour=2), time(hour=4)],
1449
dtype=pl.Time,
1450
),
1451
}
1452
)
1453
1454
expected = pl.DataFrame(
1455
{
1456
"date": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("us")),
1457
"datetime_ms": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("ms")),
1458
"datetime_us": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("us")),
1459
"datetime_ns": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("ns")),
1460
"duration_ms": pl.Series([timedelta(days=2)], dtype=pl.Duration("ms")),
1461
"duration_us": pl.Series([timedelta(days=2)], dtype=pl.Duration("us")),
1462
"duration_ns": pl.Series([timedelta(days=2)], dtype=pl.Duration("ns")),
1463
"time": pl.Series([time(hour=2)], dtype=pl.Time),
1464
}
1465
)
1466
1467
assert_frame_equal(df.select(pl.all().median()), expected)
1468
1469
1470
@given(
1471
s=series(min_size=1, max_size=10, dtype=pl.Duration),
1472
)
1473
@pytest.mark.skip(
1474
"These functions are currently bugged for large values: "
1475
"https://github.com/pola-rs/polars/issues/16057"
1476
)
1477
def test_series_duration_timeunits(
1478
s: pl.Series,
1479
) -> None:
1480
nanos = s.dt.total_nanoseconds().to_list()
1481
micros = s.dt.total_microseconds().to_list()
1482
millis = s.dt.total_milliseconds().to_list()
1483
1484
scale = {
1485
"ns": 1,
1486
"us": 1_000,
1487
"ms": 1_000_000,
1488
}
1489
assert nanos == [v * scale[s.dtype.time_unit] for v in s.to_physical()] # type: ignore[attr-defined]
1490
assert micros == [int(v / 1_000) for v in nanos]
1491
assert millis == [int(v / 1_000) for v in micros]
1492
1493
# special handling for ns timeunit (as we may generate a microsecs-based
1494
# timedelta that results in 64bit overflow on conversion to nanosecs)
1495
lower_bound, upper_bound = -(2**63), (2**63) - 1
1496
if all(
1497
(lower_bound <= (us * 1000) <= upper_bound)
1498
for us in micros
1499
if isinstance(us, int)
1500
):
1501
for ns, us in zip(s.dt.total_nanoseconds(), micros):
1502
assert ns == (us * 1000)
1503
1504
1505
@given(
1506
s=series(min_size=1, max_size=10, dtype=pl.Datetime, allow_null=False),
1507
)
1508
def test_series_datetime_timeunits(
1509
s: pl.Series,
1510
) -> None:
1511
# datetime
1512
assert s.to_list() == list(s)
1513
assert list(s.dt.millisecond()) == [v.microsecond // 1000 for v in s]
1514
assert list(s.dt.nanosecond()) == [v.microsecond * 1000 for v in s]
1515
assert list(s.dt.microsecond()) == [v.microsecond for v in s]
1516
1517
1518
def test_dt_median_deprecated() -> None:
1519
values = [date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)]
1520
s = pl.Series(values)
1521
with pytest.deprecated_call():
1522
result = s.dt.median()
1523
assert result == s.median()
1524
1525
1526
def test_dt_mean_deprecated() -> None:
1527
values = [date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)]
1528
s = pl.Series(values)
1529
with pytest.deprecated_call():
1530
result = s.dt.mean()
1531
assert result == s.mean()
1532
1533
1534
@pytest.mark.parametrize(
1535
"dtype",
1536
[
1537
pl.Date,
1538
pl.Datetime("ms"),
1539
pl.Datetime("ms", "EST"),
1540
pl.Datetime("us"),
1541
pl.Datetime("us", "EST"),
1542
pl.Datetime("ns"),
1543
pl.Datetime("ns", "EST"),
1544
],
1545
)
1546
@pytest.mark.parametrize(
1547
"value",
1548
[
1549
# date(1677, 9, 22), # See test_literal_from_datetime.
1550
date(1970, 1, 1),
1551
date(2024, 2, 29),
1552
date(2262, 4, 11),
1553
],
1554
)
1555
def test_literal_from_date(
1556
value: date,
1557
dtype: PolarsDataType,
1558
) -> None:
1559
out = pl.select(pl.lit(value, dtype=dtype))
1560
assert out.schema == OrderedDict({"literal": dtype})
1561
if dtype == pl.Datetime:
1562
tz = ZoneInfo(dtype.time_zone) if dtype.time_zone is not None else None # type: ignore[union-attr]
1563
value = datetime(value.year, value.month, value.day, tzinfo=tz)
1564
assert out.item() == value
1565
1566
1567
@pytest.mark.parametrize(
1568
"dtype",
1569
[
1570
pl.Date,
1571
pl.Datetime("ms"),
1572
pl.Datetime("ms", "EST"),
1573
pl.Datetime("us"),
1574
pl.Datetime("us", "EST"),
1575
pl.Datetime("ns"),
1576
pl.Datetime("ns", "EST"),
1577
],
1578
)
1579
@pytest.mark.parametrize(
1580
"value",
1581
[
1582
# Very old dates with a timezone like EST caused problems for the CI due
1583
# to the IANA timezone database updating their historical offset, so
1584
# these have been disabled for now. A mismatch between the timezone
1585
# database that chrono_tz crate uses vs. the one that Python uses (which
1586
# differs from platform to platform) will cause this to fail.
1587
# datetime(1677, 9, 22),
1588
# datetime(1677, 9, 22, tzinfo=ZoneInfo("EST")),
1589
datetime(1970, 1, 1),
1590
datetime(1970, 1, 1, tzinfo=ZoneInfo("EST")),
1591
datetime(2024, 2, 29),
1592
datetime(2024, 2, 29, tzinfo=ZoneInfo("EST")),
1593
datetime(2262, 4, 11),
1594
datetime(2262, 4, 11, tzinfo=ZoneInfo("EST")),
1595
],
1596
)
1597
def test_literal_from_datetime(
1598
value: datetime,
1599
dtype: pl.Date | pl.Datetime,
1600
) -> None:
1601
out = pl.select(pl.lit(value, dtype=dtype))
1602
if dtype == pl.Date:
1603
value = value.date() # type: ignore[assignment]
1604
elif dtype.time_zone is None and value.tzinfo is not None: # type: ignore[union-attr]
1605
# update the dtype with the supplied time zone in the value
1606
dtype = pl.Datetime(dtype.time_unit, str(value.tzinfo)) # type: ignore[union-attr]
1607
elif dtype.time_zone is not None and value.tzinfo is None: # type: ignore[union-attr]
1608
# cast from dt without tz to dtype with tz
1609
value = value.replace(tzinfo=ZoneInfo(dtype.time_zone)) # type: ignore[union-attr]
1610
1611
assert out.schema == OrderedDict({"literal": dtype})
1612
assert out.item() == value
1613
1614
1615
@pytest.mark.parametrize(
1616
"value",
1617
[
1618
time(0),
1619
time(hour=1),
1620
time(hour=16, minute=43, microsecond=500),
1621
time(hour=23, minute=59, second=59, microsecond=999999),
1622
],
1623
)
1624
def test_literal_from_time(value: time) -> None:
1625
out = pl.select(pl.lit(value))
1626
assert out.schema == OrderedDict({"literal": pl.Time})
1627
assert out.item() == value
1628
1629
1630
@pytest.mark.parametrize(
1631
"dtype",
1632
[
1633
None,
1634
pl.Duration("ms"),
1635
pl.Duration("us"),
1636
pl.Duration("ns"),
1637
],
1638
)
1639
@pytest.mark.parametrize(
1640
"value",
1641
[
1642
timedelta(0),
1643
timedelta(hours=1),
1644
timedelta(days=-99999),
1645
timedelta(days=99999),
1646
],
1647
)
1648
def test_literal_from_timedelta(value: time, dtype: pl.Duration | None) -> None:
1649
out = pl.select(pl.lit(value, dtype=dtype))
1650
assert out.schema == OrderedDict({"literal": dtype or pl.Duration("us")})
1651
assert out.item() == value
1652
1653