Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py
8422 views
1
from __future__ import annotations
2
3
from collections import OrderedDict
4
from datetime import date, datetime, time, timedelta
5
from typing import TYPE_CHECKING
6
from zoneinfo import ZoneInfo
7
8
import pytest
9
from hypothesis import given
10
11
import polars as pl
12
from polars.datatypes import DTYPE_TEMPORAL_UNITS
13
from polars.exceptions import ComputeError, InvalidOperationError
14
from polars.testing import assert_frame_equal, assert_series_equal
15
from polars.testing.parametric import series
16
17
if TYPE_CHECKING:
18
from collections.abc import Callable
19
20
from polars._typing import PolarsDataType, TemporalLiteral, TimeUnit
21
22
23
@pytest.fixture
24
def series_of_int_dates() -> pl.Series:
25
return pl.Series([8401, 10000, 20000, 30000], dtype=pl.Date)
26
27
28
@pytest.fixture
29
def series_of_str_dates() -> pl.Series:
30
return pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"])
31
32
33
def test_dt_to_string(series_of_int_dates: pl.Series) -> None:
34
expected_str_dates = pl.Series(
35
["1993-01-01", "1997-05-19", "2024-10-04", "2052-02-20"]
36
)
37
38
assert series_of_int_dates.dtype == pl.Date
39
assert_series_equal(series_of_int_dates.dt.to_string("%F"), expected_str_dates)
40
41
# Check strftime alias as well
42
assert_series_equal(series_of_int_dates.dt.strftime("%F"), expected_str_dates)
43
44
45
@pytest.mark.parametrize(
46
("unit_attr", "expected"),
47
[
48
("millennium", pl.Series(values=[2, 2, 3, 3], dtype=pl.Int32)),
49
("century", pl.Series(values=[20, 20, 21, 21], dtype=pl.Int32)),
50
("year", pl.Series(values=[1993, 1997, 2024, 2052], dtype=pl.Int32)),
51
("iso_year", pl.Series(values=[1992, 1997, 2024, 2052], dtype=pl.Int32)),
52
("quarter", pl.Series(values=[1, 2, 4, 1], dtype=pl.Int8)),
53
("month", pl.Series(values=[1, 5, 10, 2], dtype=pl.Int8)),
54
("week", pl.Series(values=[53, 21, 40, 8], dtype=pl.Int8)),
55
("day", pl.Series(values=[1, 19, 4, 20], dtype=pl.Int8)),
56
("weekday", pl.Series(values=[5, 1, 5, 2], dtype=pl.Int8)),
57
("ordinal_day", pl.Series(values=[1, 139, 278, 51], dtype=pl.Int16)),
58
],
59
)
60
@pytest.mark.parametrize("time_zone", ["Asia/Kathmandu", None])
61
def test_dt_extract_datetime_component(
62
unit_attr: str,
63
expected: pl.Series,
64
series_of_int_dates: pl.Series,
65
time_zone: str | None,
66
) -> None:
67
assert_series_equal(getattr(series_of_int_dates.dt, unit_attr)(), expected)
68
assert_series_equal(
69
getattr(
70
series_of_int_dates.cast(pl.Datetime).dt.replace_time_zone(time_zone).dt,
71
unit_attr,
72
)(),
73
expected,
74
)
75
76
77
@pytest.mark.parametrize(
78
("unit_attr", "expected"),
79
[
80
("hour", pl.Series(values=[0, 3], dtype=pl.Int8)),
81
("minute", pl.Series(values=[0, 20], dtype=pl.Int8)),
82
("second", pl.Series(values=[0, 10], dtype=pl.Int8)),
83
("millisecond", pl.Series(values=[0, 987], dtype=pl.Int32)),
84
("microsecond", pl.Series(values=[0, 987654], dtype=pl.Int32)),
85
("nanosecond", pl.Series(values=[0, 987654321], dtype=pl.Int32)),
86
],
87
)
88
def test_strptime_extract_times(
89
unit_attr: str,
90
expected: pl.Series,
91
series_of_int_dates: pl.Series,
92
series_of_str_dates: pl.Series,
93
) -> None:
94
s = series_of_str_dates.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S.%9f")
95
96
assert_series_equal(getattr(s.dt, unit_attr)(), expected)
97
98
99
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])
100
@pytest.mark.parametrize(
101
("attribute", "expected"),
102
[
103
("date", date(2022, 1, 1)),
104
("time", time(23)),
105
],
106
)
107
def test_dt_date_and_time(
108
attribute: str, time_zone: None | str, expected: date | time
109
) -> None:
110
ser = pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)
111
result = getattr(ser.dt, attribute)().item()
112
assert result == expected
113
114
115
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])
116
@pytest.mark.parametrize("time_unit", ["us", "ns", "ms"])
117
def test_dt_replace_time_zone_none(time_zone: str | None, time_unit: TimeUnit) -> None:
118
ser = (
119
pl.Series([datetime(2022, 1, 1, 23)])
120
.dt.cast_time_unit(time_unit)
121
.dt.replace_time_zone(time_zone)
122
)
123
result = ser.dt.replace_time_zone(None)
124
expected = datetime(2022, 1, 1, 23)
125
assert result.dtype == pl.Datetime(time_unit, None)
126
assert result.item() == expected
127
128
129
def test_dt_datetime_deprecated() -> None:
130
s = pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone("Asia/Kathmandu")
131
with pytest.deprecated_call():
132
result = s.dt.datetime()
133
expected = datetime(2022, 1, 1, 23)
134
assert result.dtype == pl.Datetime(time_zone=None)
135
assert result.item() == expected
136
137
138
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"])
139
def test_local_date_sortedness(time_zone: str | None) -> None:
140
# singleton
141
ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()
142
result = ser.dt.date()
143
assert result.flags["SORTED_ASC"]
144
145
# 2 elements
146
ser = (
147
pl.Series([datetime(2022, 1, 1, 23)] * 2).dt.replace_time_zone(time_zone)
148
).sort()
149
result = ser.dt.date()
150
assert result.flags["SORTED_ASC"]
151
152
153
@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"])
154
def test_local_time_sortedness(time_zone: str | None) -> None:
155
# singleton - always sorted
156
ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()
157
result = ser.dt.time()
158
assert result.flags["SORTED_ASC"]
159
160
# three elements - not sorted
161
ser = (
162
pl.Series(
163
[
164
datetime(2022, 1, 1, 23),
165
datetime(2022, 1, 2, 21),
166
datetime(2022, 1, 3, 22),
167
]
168
).dt.replace_time_zone(time_zone)
169
).sort()
170
result = ser.dt.time()
171
assert not result.flags["SORTED_ASC"]
172
assert not result.flags["SORTED_DESC"]
173
174
175
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
176
def test_local_time_before_epoch(time_unit: TimeUnit) -> None:
177
ser = pl.Series([datetime(1969, 7, 21, 2, 56, 2, 123000)]).dt.cast_time_unit(
178
time_unit
179
)
180
result = ser.dt.time().item()
181
expected = time(2, 56, 2, 123000)
182
assert result == expected
183
184
185
@pytest.mark.parametrize(
186
("time_zone", "offset", "expected"),
187
[
188
(None, "+1d", True),
189
("Europe/London", "1d", False),
190
("UTC", "1d", True),
191
(None, "1m", True),
192
("Europe/London", "1m", True),
193
("UTC", "1m", True),
194
(None, "1w", True),
195
("Europe/London", "1w", False),
196
("UTC", "+1w", True),
197
(None, "1h", True),
198
("Europe/London", "1h", True),
199
("UTC", "1h", True),
200
],
201
)
202
def test_offset_by_sortedness(
203
time_zone: str | None, offset: str, expected: bool
204
) -> None:
205
s = pl.datetime_range(
206
datetime(2020, 10, 25),
207
datetime(2020, 10, 25, 3),
208
"30m",
209
time_zone=time_zone,
210
eager=True,
211
).sort()
212
assert s.flags["SORTED_ASC"]
213
assert not s.flags["SORTED_DESC"]
214
result = s.dt.offset_by(offset)
215
assert result.flags["SORTED_ASC"] == expected
216
assert not result.flags["SORTED_DESC"]
217
218
219
@pytest.mark.parametrize("offset", ["?", "xx", "P1D", "~10d"])
220
def test_offset_by_invalid_duration(offset: str) -> None:
221
with pytest.raises(
222
InvalidOperationError,
223
match="expected leading integer in the duration string",
224
):
225
pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.offset_by(offset)
226
227
228
@pytest.mark.parametrize("offset", ["++1d", "+1d+1m+1s", "--1d", "-1d-1m-1s"])
229
def test_offset_by_invalid_duration_unary_ops(offset: str) -> None:
230
op = "+" if "+" in offset else "-"
231
with pytest.raises(
232
InvalidOperationError,
233
match=rf"duration string can only have a single '\{op}' sign",
234
):
235
pl.Series([datetime(2025, 10, 3, 11, 42)]).dt.offset_by(offset)
236
237
238
@pytest.mark.parametrize("offset", ["1", "1mo23d4", "-2d1", "12時30分45秒"])
239
def test_offset_by_missing_or_invalid_unit(offset: str) -> None:
240
with pytest.raises(
241
InvalidOperationError,
242
match=f"expected a valid unit to follow integer in the duration string '{offset}'",
243
):
244
pl.Series([datetime(2025, 10, 6, 13, 45)]).dt.offset_by(offset)
245
246
247
def test_offset_by_missing_unit_in_expr() -> None:
248
with pytest.raises(
249
InvalidOperationError,
250
match="expected a valid unit to follow integer in the duration string '1d2'",
251
):
252
pl.DataFrame(
253
{"a": [datetime(2022, 3, 20, 5, 7)] * 2, "b": ["1d", "1d2"]}
254
).select(pl.col("a").dt.offset_by(pl.col("b")))
255
256
257
def test_dt_datetime_date_time_invalid() -> None:
258
with pytest.raises(ComputeError, match="expected Datetime or Date"):
259
pl.Series([time(23)]).dt.date()
260
with pytest.raises(ComputeError, match="expected Datetime or Date"):
261
pl.Series([timedelta(1)]).dt.date()
262
with pytest.raises(ComputeError, match="expected Datetime or Time"):
263
pl.Series([timedelta(1)]).dt.time()
264
with pytest.raises(ComputeError, match="expected Datetime or Time"):
265
pl.Series([date(2020, 1, 1)]).dt.time()
266
267
268
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
269
def test_base_utc_offset(time_unit: TimeUnit) -> None:
270
ser = pl.datetime_range(
271
datetime(2011, 12, 29),
272
datetime(2012, 1, 1),
273
"2d",
274
time_zone="Pacific/Apia",
275
eager=True,
276
).dt.cast_time_unit(time_unit)
277
result = ser.dt.base_utc_offset().rename("base_utc_offset")
278
expected = pl.Series(
279
"base_utc_offset",
280
[-11 * 3600 * 1000, 13 * 3600 * 1000],
281
dtype=pl.Duration("ms"),
282
)
283
assert_series_equal(result, expected)
284
285
286
def test_base_utc_offset_lazy_schema() -> None:
287
ser = pl.datetime_range(
288
datetime(2020, 10, 25),
289
datetime(2020, 10, 26),
290
time_zone="Europe/London",
291
eager=True,
292
)
293
df = pl.DataFrame({"ts": ser}).lazy()
294
result = df.with_columns(
295
base_utc_offset=pl.col("ts").dt.base_utc_offset()
296
).collect_schema()
297
expected = {
298
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
299
"base_utc_offset": pl.Duration(time_unit="ms"),
300
}
301
assert result == expected
302
303
304
def test_base_utc_offset_invalid() -> None:
305
ser = pl.datetime_range(datetime(2020, 10, 25), datetime(2020, 10, 26), eager=True)
306
with pytest.raises(
307
InvalidOperationError,
308
match=r"`base_utc_offset` operation not supported for dtype `datetime\[μs\]` \(expected: time-zone-aware datetime\)",
309
):
310
ser.dt.base_utc_offset().rename("base_utc_offset")
311
312
313
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
314
def test_dst_offset(time_unit: TimeUnit) -> None:
315
ser = pl.datetime_range(
316
datetime(2020, 10, 25),
317
datetime(2020, 10, 26),
318
time_zone="Europe/London",
319
eager=True,
320
).dt.cast_time_unit(time_unit)
321
result = ser.dt.dst_offset().rename("dst_offset")
322
expected = pl.Series("dst_offset", [3_600 * 1_000, 0], dtype=pl.Duration("ms"))
323
assert_series_equal(result, expected)
324
325
326
def test_dst_offset_lazy_schema() -> None:
327
ser = pl.datetime_range(
328
datetime(2020, 10, 25),
329
datetime(2020, 10, 26),
330
time_zone="Europe/London",
331
eager=True,
332
)
333
df = pl.DataFrame({"ts": ser}).lazy()
334
result = df.with_columns(dst_offset=pl.col("ts").dt.dst_offset()).collect_schema()
335
expected = {
336
"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),
337
"dst_offset": pl.Duration(time_unit="ms"),
338
}
339
assert result == expected
340
341
342
def test_dst_offset_invalid() -> None:
343
ser = pl.datetime_range(datetime(2020, 10, 25), datetime(2020, 10, 26), eager=True)
344
with pytest.raises(
345
InvalidOperationError,
346
match=r"`dst_offset` operation not supported for dtype `datetime\[μs\]` \(expected: time-zone-aware datetime\)",
347
):
348
ser.dt.dst_offset().rename("dst_offset")
349
350
351
@pytest.mark.parametrize(
352
("time_unit", "expected"),
353
[
354
("d", pl.Series(values=[18262, 18294], dtype=pl.Int32)),
355
("s", pl.Series(values=[1_577_836_800, 1_580_613_610], dtype=pl.Int64)),
356
(
357
"ms",
358
pl.Series(values=[1_577_836_800_000, 1_580_613_610_987], dtype=pl.Int64),
359
),
360
],
361
)
362
def test_strptime_epoch(
363
time_unit: TimeUnit,
364
expected: pl.Series,
365
series_of_str_dates: pl.Series,
366
) -> None:
367
s = series_of_str_dates.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S.%9f")
368
369
assert_series_equal(s.dt.epoch(time_unit=time_unit), expected)
370
371
372
def test_strptime_fractional_seconds(series_of_str_dates: pl.Series) -> None:
373
s = series_of_str_dates.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S.%9f")
374
375
assert_series_equal(
376
s.dt.second(fractional=True),
377
pl.Series([0.0, 10.987654321], dtype=pl.Float64),
378
)
379
380
381
@pytest.mark.parametrize(
382
("unit_attr", "expected"),
383
[
384
("total_days", pl.Series([1])),
385
("total_hours", pl.Series([24])),
386
("total_minutes", pl.Series([24 * 60])),
387
("total_seconds", pl.Series([3600 * 24])),
388
("total_milliseconds", pl.Series([3600 * 24 * int(1e3)])),
389
("total_microseconds", pl.Series([3600 * 24 * int(1e6)])),
390
("total_nanoseconds", pl.Series([3600 * 24 * int(1e9)])),
391
],
392
)
393
def test_duration_extract_times(
394
unit_attr: str,
395
expected: pl.Series,
396
) -> None:
397
duration = pl.Series([datetime(2022, 1, 2)]) - pl.Series([datetime(2022, 1, 1)])
398
399
assert_series_equal(getattr(duration.dt, unit_attr)(), expected)
400
401
402
@pytest.mark.parametrize(
403
("time_unit", "every"),
404
[
405
("ms", "1h"),
406
("us", "1h0m0s"),
407
("ns", timedelta(hours=1)),
408
],
409
ids=["milliseconds", "microseconds", "nanoseconds"],
410
)
411
def test_truncate(
412
time_unit: TimeUnit,
413
every: str | timedelta,
414
) -> None:
415
start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2)
416
s = pl.datetime_range(
417
start,
418
stop,
419
timedelta(minutes=30),
420
time_unit=time_unit,
421
eager=True,
422
).alias(f"dates[{time_unit}]")
423
424
# can pass strings and time-deltas
425
out = s.dt.truncate(every)
426
assert out.dt[0] == start
427
assert out.dt[1] == start
428
assert out.dt[2] == start + timedelta(hours=1)
429
assert out.dt[3] == start + timedelta(hours=1)
430
# ...
431
assert out.dt[-3] == stop - timedelta(hours=1)
432
assert out.dt[-2] == stop - timedelta(hours=1)
433
assert out.dt[-1] == stop
434
435
436
def test_truncate_negative() -> None:
437
"""Test that truncating to a negative duration gives a helpful error message."""
438
df = pl.DataFrame(
439
{
440
"date": [date(1895, 5, 7), date(1955, 11, 5)],
441
"datetime": [datetime(1895, 5, 7), datetime(1955, 11, 5)],
442
"duration": ["-1m", "1m"],
443
}
444
)
445
446
with pytest.raises(
447
ComputeError, match="cannot truncate a Date to a negative duration"
448
):
449
df.select(pl.col("date").dt.truncate("-1m"))
450
451
with pytest.raises(
452
ComputeError, match="cannot truncate a Datetime to a negative duration"
453
):
454
df.select(pl.col("datetime").dt.truncate("-1m"))
455
456
with pytest.raises(
457
ComputeError, match="cannot truncate a Date to a negative duration"
458
):
459
df.select(pl.col("date").dt.truncate(pl.col("duration")))
460
461
with pytest.raises(
462
ComputeError, match="cannot truncate a Datetime to a negative duration"
463
):
464
df.select(pl.col("datetime").dt.truncate(pl.col("duration")))
465
466
467
@pytest.mark.parametrize(
468
("time_unit", "every"),
469
[
470
("ms", "1h"),
471
("us", "1h0m0s"),
472
("ns", timedelta(hours=1)),
473
],
474
ids=["milliseconds", "microseconds", "nanoseconds"],
475
)
476
def test_round(
477
time_unit: TimeUnit,
478
every: str | timedelta,
479
) -> None:
480
start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2)
481
s = pl.datetime_range(
482
start,
483
stop,
484
timedelta(minutes=30),
485
time_unit=time_unit,
486
eager=True,
487
).alias(f"dates[{time_unit}]")
488
489
# can pass strings and time-deltas
490
out = s.dt.round(every)
491
assert out.dt[0] == start
492
assert out.dt[1] == start + timedelta(hours=1)
493
assert out.dt[2] == start + timedelta(hours=1)
494
assert out.dt[3] == start + timedelta(hours=2)
495
# ...
496
assert out.dt[-3] == stop - timedelta(hours=1)
497
assert out.dt[-2] == stop
498
assert out.dt[-1] == stop
499
500
501
def test_round_expr() -> None:
502
df = pl.DataFrame(
503
{
504
"date": [
505
datetime(2022, 11, 14),
506
datetime(2023, 10, 11),
507
datetime(2022, 3, 20, 5, 7, 18),
508
datetime(2022, 4, 3, 13, 30, 32),
509
None,
510
datetime(2022, 12, 1),
511
],
512
"every": ["1y", "1mo", "1m", "1m", "1mo", None],
513
}
514
)
515
516
output = df.select(
517
all_expr=pl.col("date").dt.round(every=pl.col("every")),
518
date_lit=pl.lit(datetime(2022, 4, 3, 13, 30, 32)).dt.round(
519
every=pl.col("every")
520
),
521
every_lit=pl.col("date").dt.round("1d"),
522
)
523
524
expected = pl.DataFrame(
525
{
526
"all_expr": [
527
datetime(2023, 1, 1),
528
datetime(2023, 10, 1),
529
datetime(2022, 3, 20, 5, 7),
530
datetime(2022, 4, 3, 13, 31),
531
None,
532
None,
533
],
534
"date_lit": [
535
datetime(2022, 1, 1),
536
datetime(2022, 4, 1),
537
datetime(2022, 4, 3, 13, 31),
538
datetime(2022, 4, 3, 13, 31),
539
datetime(2022, 4, 1),
540
None,
541
],
542
"every_lit": [
543
datetime(2022, 11, 14),
544
datetime(2023, 10, 11),
545
datetime(2022, 3, 20),
546
datetime(2022, 4, 4),
547
None,
548
datetime(2022, 12, 1),
549
],
550
}
551
)
552
553
assert_frame_equal(output, expected)
554
555
all_lit = pl.select(all_lit=pl.lit(datetime(2022, 3, 20, 5, 7)).dt.round("1h"))
556
assert all_lit.to_dict(as_series=False) == {"all_lit": [datetime(2022, 3, 20, 5)]}
557
558
559
def test_round_negative() -> None:
560
"""Test that rounding to a negative duration gives a helpful error message."""
561
with pytest.raises(
562
ComputeError, match="cannot round a Date to a negative duration"
563
):
564
pl.Series([date(1895, 5, 7)]).dt.round("-1m")
565
566
with pytest.raises(
567
ComputeError, match="cannot round a Datetime to a negative duration"
568
):
569
pl.Series([datetime(1895, 5, 7)]).dt.round("-1m")
570
571
572
def test_round_invalid_duration() -> None:
573
with pytest.raises(
574
InvalidOperationError, match="expected leading integer in the duration string"
575
):
576
pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.round("P")
577
578
579
@pytest.mark.parametrize(
580
("time_unit", "date_in_that_unit"),
581
[
582
("ns", [978307200000000000, 981022089000000000]),
583
("us", [978307200000000, 981022089000000]),
584
("ms", [978307200000, 981022089000]),
585
],
586
ids=["nanoseconds", "microseconds", "milliseconds"],
587
)
588
def test_cast_time_units(
589
time_unit: TimeUnit,
590
date_in_that_unit: list[int],
591
) -> None:
592
dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)])
593
594
assert dates.dt.cast_time_unit(time_unit).cast(int).to_list() == date_in_that_unit
595
596
597
def test_epoch_matches_timestamp() -> None:
598
dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)])
599
600
for unit in DTYPE_TEMPORAL_UNITS:
601
assert_series_equal(dates.dt.epoch(unit), dates.dt.timestamp(unit))
602
603
assert_series_equal(dates.dt.epoch("s"), dates.dt.timestamp("ms") // 1000)
604
assert_series_equal(
605
dates.dt.epoch("d"),
606
(dates.dt.timestamp("ms") // (1000 * 3600 * 24)).cast(pl.Int32),
607
)
608
609
610
@pytest.mark.parametrize(
611
("tzinfo", "time_zone"),
612
[(None, None), (ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu")],
613
)
614
def test_date_time_combine(tzinfo: ZoneInfo | None, time_zone: str | None) -> None:
615
# Define a DataFrame with columns for datetime, date, and time
616
df = pl.DataFrame(
617
{
618
"dtm": [
619
datetime(2022, 12, 31, 10, 30, 45),
620
datetime(2023, 7, 5, 23, 59, 59),
621
],
622
"dt": [
623
date(2022, 10, 10),
624
date(2022, 7, 5),
625
],
626
"tm": [
627
time(1, 2, 3, 456000),
628
time(7, 8, 9, 101000),
629
],
630
}
631
)
632
df = df.with_columns(pl.col("dtm").dt.replace_time_zone(time_zone))
633
634
# Combine datetime/date with time
635
df = df.select(
636
pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), # datetime & time
637
pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # date & time
638
pl.col("dt").dt.combine(time(4, 5, 6)).alias("d3"), # date & specified time
639
)
640
641
# Assert that the new columns have the expected values and datatypes
642
expected_dict = {
643
"d1": [ # Time component should be overwritten by `tm` values
644
datetime(2022, 12, 31, 1, 2, 3, 456000, tzinfo=tzinfo),
645
datetime(2023, 7, 5, 7, 8, 9, 101000, tzinfo=tzinfo),
646
],
647
"d2": [ # Both date and time components combined "as-is" into new datetime
648
datetime(2022, 10, 10, 1, 2, 3, 456000),
649
datetime(2022, 7, 5, 7, 8, 9, 101000),
650
],
651
"d3": [ # New datetime should use specified time component
652
datetime(2022, 10, 10, 4, 5, 6),
653
datetime(2022, 7, 5, 4, 5, 6),
654
],
655
}
656
assert df.to_dict(as_series=False) == expected_dict
657
658
expected_schema = {
659
"d1": pl.Datetime("us", time_zone),
660
"d2": pl.Datetime("us"),
661
"d3": pl.Datetime("us"),
662
}
663
assert df.schema == expected_schema
664
665
666
def test_combine_unsupported_types() -> None:
667
with pytest.raises(ComputeError, match="expected Date or Datetime, got time"):
668
pl.Series([time(1, 2)]).dt.combine(time(3, 4))
669
670
671
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
672
@pytest.mark.parametrize("time_zone", ["Asia/Kathmandu", None])
673
def test_combine_lazy_schema_datetime(
674
time_zone: str | None,
675
time_unit: TimeUnit,
676
) -> None:
677
df = pl.DataFrame({"ts": pl.Series([datetime(2020, 1, 1)])})
678
df = df.with_columns(pl.col("ts").dt.replace_time_zone(time_zone))
679
result = df.lazy().select(
680
pl.col("ts").dt.combine(time(1, 2, 3), time_unit=time_unit)
681
)
682
expected_dtypes = [pl.Datetime(time_unit, time_zone)]
683
assert result.collect_schema().dtypes() == expected_dtypes
684
685
686
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
687
def test_combine_lazy_schema_date(time_unit: TimeUnit) -> None:
688
df = pl.DataFrame({"ts": pl.Series([date(2020, 1, 1)])})
689
result = df.lazy().select(
690
pl.col("ts").dt.combine(time(1, 2, 3), time_unit=time_unit)
691
)
692
expected_dtypes = [pl.Datetime(time_unit, None)]
693
assert result.collect_schema().dtypes() == expected_dtypes
694
695
696
@pytest.mark.parametrize(
697
("range_fn", "value_type", "kwargs"),
698
[
699
(pl.datetime_range, datetime, {"time_unit": "ns"}),
700
(pl.datetime_range, datetime, {"time_unit": "ns", "time_zone": "CET"}),
701
(pl.datetime_range, datetime, {"time_unit": "us"}),
702
(pl.datetime_range, datetime, {"time_unit": "us", "time_zone": "CET"}),
703
(pl.datetime_range, datetime, {"time_unit": "ms"}),
704
(pl.datetime_range, datetime, {"time_unit": "ms", "time_zone": "CET"}),
705
(pl.date_range, date, {}),
706
],
707
)
708
def test_iso_year(
709
range_fn: Callable[..., pl.Series], value_type: type, kwargs: dict[str, str]
710
) -> None:
711
assert range_fn(
712
value_type(1990, 1, 1), value_type(2004, 1, 1), "1y", **kwargs, eager=True
713
).dt.iso_year().to_list() == [
714
1990,
715
1991,
716
1992,
717
1992,
718
1993,
719
1994,
720
1996,
721
1997,
722
1998,
723
1998,
724
1999,
725
2001,
726
2002,
727
2003,
728
2004,
729
]
730
731
732
@pytest.mark.parametrize(
733
("range_fn", "value_type", "kwargs"),
734
[
735
(pl.datetime_range, datetime, {"time_unit": "ns"}),
736
(pl.datetime_range, datetime, {"time_unit": "ns", "time_zone": "CET"}),
737
(pl.datetime_range, datetime, {"time_unit": "us"}),
738
(pl.datetime_range, datetime, {"time_unit": "us", "time_zone": "CET"}),
739
(pl.datetime_range, datetime, {"time_unit": "ms"}),
740
(pl.datetime_range, datetime, {"time_unit": "ms", "time_zone": "CET"}),
741
(pl.date_range, date, {}),
742
],
743
)
744
def test_is_leap_year(
745
range_fn: Callable[..., pl.Series], value_type: type, kwargs: dict[str, str]
746
) -> None:
747
assert range_fn(
748
value_type(1990, 1, 1), value_type(2004, 1, 1), "1y", **kwargs, eager=True
749
).dt.is_leap_year().to_list() == [
750
False,
751
False,
752
True, # 1992
753
False,
754
False,
755
False,
756
True, # 1996
757
False,
758
False,
759
False,
760
True, # 2000
761
False,
762
False,
763
False,
764
True, # 2004
765
]
766
767
768
@pytest.mark.parametrize(
769
("value_type", "time_unit", "time_zone"),
770
[
771
(date, None, None),
772
(datetime, "ns", None),
773
(
774
datetime,
775
"ns",
776
"Asia/Kathmandu",
777
),
778
(datetime, "us", None),
779
(
780
datetime,
781
"us",
782
"Asia/Kathmandu",
783
),
784
(datetime, "ms", None),
785
(
786
datetime,
787
"ms",
788
"Asia/Kathmandu",
789
),
790
],
791
)
792
@pytest.mark.parametrize(
793
("start_ymd", "end_ymd", "feb_days"),
794
[
795
# Non-leap year cases
796
((1900, 1, 1), (1900, 12, 1), 28), # 1900 can be divided by 100 but not by 400
797
((2025, 1, 1), (2025, 12, 1), 28), # 2025 cannot be divided by 4
798
# Leap year cases
799
((2000, 1, 1), (2000, 12, 1), 29), # 2000 can be divided by 400
800
((2004, 1, 1), (2004, 12, 1), 29), # 2004 can be divided by 4 but not by 100
801
],
802
)
803
def test_days_in_month(
804
value_type: type,
805
time_unit: str | None,
806
time_zone: str | None,
807
start_ymd: tuple[int, int, int],
808
end_ymd: tuple[int, int, int],
809
feb_days: int,
810
) -> None:
811
assert value_type in (date, datetime)
812
range_fn: Callable[..., pl.Series] = (
813
pl.date_range if value_type is date else pl.datetime_range
814
)
815
kwargs: dict[str, str] = {}
816
if time_unit is not None:
817
kwargs["time_unit"] = time_unit
818
if time_zone is not None:
819
kwargs["time_zone"] = time_zone
820
assert range_fn(
821
value_type(*start_ymd), value_type(*end_ymd), "1mo", **kwargs, eager=True
822
).dt.days_in_month().to_list() == [
823
31,
824
feb_days,
825
31,
826
30,
827
31,
828
30,
829
31,
830
31,
831
30,
832
31,
833
30,
834
31,
835
]
836
837
838
def test_quarter() -> None:
839
assert pl.datetime_range(
840
datetime(2022, 1, 1), datetime(2022, 12, 1), "1mo", eager=True
841
).dt.quarter().to_list() == [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]
842
843
844
def test_offset_by() -> None:
845
df = pl.DataFrame(
846
{
847
"dates": pl.datetime_range(
848
datetime(2000, 1, 1), datetime(2020, 1, 1), "1y", eager=True
849
)
850
}
851
)
852
853
# Add two new columns to the DataFrame using the offset_by() method
854
df = df.with_columns(
855
df["dates"].dt.offset_by("1y").alias("date_plus_1y"),
856
df["dates"].dt.offset_by("-1y2mo").alias("date_min"),
857
)
858
859
# Assert that the day of the month for all the dates in new columns is 1
860
assert (df["date_plus_1y"].dt.day() == 1).all()
861
assert (df["date_min"].dt.day() == 1).all()
862
863
# Assert that the 'date_min' column contains the expected list of dates
864
expected_dates = [datetime(year, 11, 1, 0, 0) for year in range(1998, 2019)]
865
assert df["date_min"].to_list() == expected_dates
866
867
868
@pytest.mark.parametrize("time_zone", ["America/Chicago", None])
869
def test_offset_by_crossing_dst(time_zone: str | None) -> None:
870
ser = pl.Series([datetime(2021, 11, 7)]).dt.replace_time_zone(time_zone)
871
result = ser.dt.offset_by("1d")
872
expected = pl.Series([datetime(2021, 11, 8)]).dt.replace_time_zone(time_zone)
873
assert_series_equal(result, expected)
874
875
876
def test_negative_offset_by_err_msg_8464() -> None:
877
result = pl.Series([datetime(2022, 3, 30)]).dt.offset_by("-1mo")
878
expected = pl.Series([datetime(2022, 2, 28)])
879
assert_series_equal(result, expected)
880
881
882
def test_offset_by_truncate_sorted_flag() -> None:
883
s = pl.Series([datetime(2001, 1, 1), datetime(2001, 1, 2)])
884
s = s.set_sorted()
885
886
assert s.flags["SORTED_ASC"]
887
s1 = s.dt.offset_by("1d")
888
assert s1.to_list() == [datetime(2001, 1, 2), datetime(2001, 1, 3)]
889
assert s1.flags["SORTED_ASC"]
890
s2 = s1.dt.truncate("1mo")
891
assert s2.flags["SORTED_ASC"]
892
893
894
def test_offset_by_broadcasting() -> None:
895
# test broadcast lhs
896
df = pl.DataFrame(
897
{
898
"offset": ["1d", "10d", "3d", None],
899
}
900
)
901
result = df.select(
902
d1=pl.lit(datetime(2020, 10, 25)).dt.offset_by(pl.col("offset")),
903
d2=pl.lit(datetime(2020, 10, 25))
904
.dt.cast_time_unit("ms")
905
.dt.offset_by(pl.col("offset")),
906
d3=pl.lit(datetime(2020, 10, 25))
907
.dt.replace_time_zone("Europe/London")
908
.dt.offset_by(pl.col("offset")),
909
d4=pl.lit(datetime(2020, 10, 25)).dt.date().dt.offset_by(pl.col("offset")),
910
d5=pl.lit(None, dtype=pl.Datetime).dt.offset_by(pl.col("offset")),
911
)
912
expected_dict = {
913
"d1": [
914
datetime(2020, 10, 26),
915
datetime(2020, 11, 4),
916
datetime(2020, 10, 28),
917
None,
918
],
919
"d2": [
920
datetime(2020, 10, 26),
921
datetime(2020, 11, 4),
922
datetime(2020, 10, 28),
923
None,
924
],
925
"d3": [
926
datetime(2020, 10, 26, tzinfo=ZoneInfo("Europe/London")),
927
datetime(2020, 11, 4, tzinfo=ZoneInfo("Europe/London")),
928
datetime(2020, 10, 28, tzinfo=ZoneInfo("Europe/London")),
929
None,
930
],
931
"d4": [
932
datetime(2020, 10, 26).date(),
933
datetime(2020, 11, 4).date(),
934
datetime(2020, 10, 28).date(),
935
None,
936
],
937
"d5": [None, None, None, None],
938
}
939
assert result.to_dict(as_series=False) == expected_dict
940
941
# test broadcast rhs
942
df = pl.DataFrame({"dt": [datetime(2020, 10, 25), datetime(2021, 1, 2), None]})
943
result = df.select(
944
d1=pl.col("dt").dt.offset_by(pl.lit("1mo3d")),
945
d2=pl.col("dt").dt.cast_time_unit("ms").dt.offset_by(pl.lit("1y1mo")),
946
d3=pl.col("dt")
947
.dt.replace_time_zone("Europe/London")
948
.dt.offset_by(pl.lit("3d")),
949
d4=pl.col("dt").dt.date().dt.offset_by(pl.lit("1y1mo1d")),
950
)
951
expected_dict = {
952
"d1": [datetime(2020, 11, 28), datetime(2021, 2, 5), None],
953
"d2": [datetime(2021, 11, 25), datetime(2022, 2, 2), None],
954
"d3": [
955
datetime(2020, 10, 28, tzinfo=ZoneInfo("Europe/London")),
956
datetime(2021, 1, 5, tzinfo=ZoneInfo("Europe/London")),
957
None,
958
],
959
"d4": [datetime(2021, 11, 26).date(), datetime(2022, 2, 3).date(), None],
960
}
961
assert result.to_dict(as_series=False) == expected_dict
962
963
# test all literal
964
result = df.select(d=pl.lit(datetime(2021, 11, 26)).dt.offset_by("1mo1d"))
965
assert result.to_dict(as_series=False) == {"d": [datetime(2021, 12, 27)]}
966
967
968
def test_offset_by_expressions() -> None:
969
df = pl.DataFrame(
970
{
971
"a": [
972
datetime(2020, 10, 25),
973
datetime(2021, 1, 2),
974
None,
975
datetime(2021, 1, 4),
976
None,
977
],
978
"b": ["1d", "10d", "3d", None, None],
979
}
980
)
981
df = df.sort("a")
982
result = df.select(
983
c=pl.col("a").dt.offset_by(pl.col("b")),
984
d=pl.col("a").dt.cast_time_unit("ms").dt.offset_by(pl.col("b")),
985
e=pl.col("a").dt.replace_time_zone("Europe/London").dt.offset_by(pl.col("b")),
986
f=pl.col("a").dt.date().dt.offset_by(pl.col("b")),
987
)
988
989
expected = pl.DataFrame(
990
{
991
"c": [None, None, datetime(2020, 10, 26), datetime(2021, 1, 12), None],
992
"d": [None, None, datetime(2020, 10, 26), datetime(2021, 1, 12), None],
993
"e": [
994
None,
995
None,
996
datetime(2020, 10, 26, tzinfo=ZoneInfo("Europe/London")),
997
datetime(2021, 1, 12, tzinfo=ZoneInfo("Europe/London")),
998
None,
999
],
1000
"f": [None, None, date(2020, 10, 26), date(2021, 1, 12), None],
1001
},
1002
schema_overrides={
1003
"d": pl.Datetime("ms"),
1004
"e": pl.Datetime(time_zone="Europe/London"),
1005
},
1006
)
1007
assert_frame_equal(result, expected)
1008
assert result.flags == {
1009
"c": {"SORTED_ASC": False, "SORTED_DESC": False},
1010
"d": {"SORTED_ASC": False, "SORTED_DESC": False},
1011
"e": {"SORTED_ASC": False, "SORTED_DESC": False},
1012
"f": {"SORTED_ASC": False, "SORTED_DESC": False},
1013
}
1014
1015
# Check single-row cases
1016
for i in range(df.height):
1017
df_slice = df[i : i + 1]
1018
result = df_slice.select(
1019
c=pl.col("a").dt.offset_by(pl.col("b")),
1020
d=pl.col("a").dt.cast_time_unit("ms").dt.offset_by(pl.col("b")),
1021
e=pl.col("a")
1022
.dt.replace_time_zone("Europe/London")
1023
.dt.offset_by(pl.col("b")),
1024
f=pl.col("a").dt.date().dt.offset_by(pl.col("b")),
1025
)
1026
assert_frame_equal(result, expected[i : i + 1])
1027
# single-row Series are always sorted
1028
assert result.flags == {
1029
"c": {"SORTED_ASC": True, "SORTED_DESC": False},
1030
"d": {"SORTED_ASC": True, "SORTED_DESC": False},
1031
"e": {"SORTED_ASC": True, "SORTED_DESC": False},
1032
"f": {"SORTED_ASC": True, "SORTED_DESC": False},
1033
}
1034
1035
1036
@pytest.mark.parametrize(
1037
("duration", "input_date", "expected"),
1038
[
1039
("1mo", date(2018, 1, 31), date(2018, 2, 28)),
1040
("1y", date(2024, 2, 29), date(2025, 2, 28)),
1041
("1y1mo", date(2024, 1, 30), date(2025, 2, 28)),
1042
],
1043
)
1044
def test_offset_by_saturating_8217_8474(
1045
duration: str, input_date: date, expected: date
1046
) -> None:
1047
result = pl.Series([input_date]).dt.offset_by(duration).item()
1048
assert result == expected
1049
1050
1051
def test_year_empty_df() -> None:
1052
df = pl.DataFrame(pl.Series(name="date", dtype=pl.Date))
1053
assert df.select(pl.col("date").dt.year()).dtypes == [pl.Int32]
1054
1055
1056
def test_epoch_invalid() -> None:
1057
with pytest.raises(InvalidOperationError, match="not supported for dtype"):
1058
pl.Series([timedelta(1)]).dt.epoch()
1059
1060
1061
@pytest.mark.parametrize(
1062
"time_unit",
1063
["ms", "us", "ns"],
1064
ids=["milliseconds", "microseconds", "nanoseconds"],
1065
)
1066
def test_weekday(time_unit: TimeUnit) -> None:
1067
friday = pl.Series([datetime(2023, 2, 17)])
1068
1069
assert friday.dt.cast_time_unit(time_unit).dt.weekday()[0] == 5
1070
assert friday.cast(pl.Date).dt.weekday()[0] == 5
1071
1072
1073
@pytest.mark.parametrize(
1074
("values", "expected_median"),
1075
[
1076
([], None),
1077
([None, None], None),
1078
([date(2022, 1, 1)], datetime(2022, 1, 1)),
1079
([date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 4)], datetime(2022, 1, 2)),
1080
([date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)], datetime(2022, 1, 2)),
1081
([datetime(2022, 1, 1)], datetime(2022, 1, 1)),
1082
(
1083
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)],
1084
datetime(2022, 1, 2),
1085
),
1086
(
1087
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],
1088
datetime(2022, 1, 2),
1089
),
1090
([timedelta(days=1)], timedelta(days=1)),
1091
([timedelta(days=1), timedelta(days=2), timedelta(days=3)], timedelta(days=2)),
1092
([timedelta(days=1), timedelta(days=2), timedelta(days=15)], timedelta(days=2)),
1093
([time(hour=1)], time(hour=1)),
1094
([time(hour=1), time(hour=2), time(hour=3)], time(hour=2)),
1095
([time(hour=1), time(hour=2), time(hour=15)], time(hour=2)),
1096
],
1097
ids=[
1098
"empty",
1099
"Nones",
1100
"single_date",
1101
"spread_even_date",
1102
"spread_skewed_date",
1103
"single_datetime",
1104
"spread_even_datetime",
1105
"spread_skewed_datetime",
1106
"single_dur",
1107
"spread_even_dur",
1108
"spread_skewed_dur",
1109
"single_time",
1110
"spread_even_time",
1111
"spread_skewed_time",
1112
],
1113
)
1114
def test_median(
1115
values: list[TemporalLiteral | None], expected_median: TemporalLiteral | None
1116
) -> None:
1117
assert pl.Series(values).median() == expected_median
1118
1119
1120
@pytest.mark.parametrize(
1121
("values", "expected_mean"),
1122
[
1123
([], None),
1124
([None, None], None),
1125
([date(2022, 1, 1)], datetime(2022, 1, 1)),
1126
(
1127
[date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 4)],
1128
datetime(2022, 1, 2, 8),
1129
),
1130
(
1131
[date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)],
1132
datetime(2022, 10, 16, 16, 0),
1133
),
1134
([datetime(2022, 1, 1)], datetime(2022, 1, 1)),
1135
(
1136
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)],
1137
datetime(2022, 1, 2),
1138
),
1139
(
1140
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],
1141
datetime(2022, 10, 16, 16, 0, 0),
1142
),
1143
([timedelta(days=1)], timedelta(days=1)),
1144
([timedelta(days=1), timedelta(days=2), timedelta(days=3)], timedelta(days=2)),
1145
([timedelta(days=1), timedelta(days=2), timedelta(days=15)], timedelta(days=6)),
1146
([time(hour=1)], time(hour=1)),
1147
([time(hour=1), time(hour=2), time(hour=3)], time(hour=2)),
1148
([time(hour=1), time(hour=2), time(hour=15)], time(hour=6)),
1149
],
1150
ids=[
1151
"empty",
1152
"Nones",
1153
"single_date",
1154
"spread_even_date",
1155
"spread_skewed_date",
1156
"single_datetime",
1157
"spread_even_datetime",
1158
"spread_skewed_datetime",
1159
"single_duration",
1160
"spread_even_duration",
1161
"spread_skewed_duration",
1162
"single_time",
1163
"spread_even_time",
1164
"spread_skewed_time",
1165
],
1166
)
1167
def test_mean(
1168
values: list[TemporalLiteral | None], expected_mean: TemporalLiteral | None
1169
) -> None:
1170
assert pl.Series(values).mean() == expected_mean
1171
1172
1173
@pytest.mark.parametrize(
1174
("values", "expected_mean"),
1175
[
1176
([None], None),
1177
(
1178
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],
1179
datetime(2022, 10, 16, 16, 0, 0),
1180
),
1181
],
1182
ids=["None_dt", "spread_skewed_dt"],
1183
)
1184
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
1185
def test_datetime_mean_with_tu(
1186
values: list[datetime], expected_mean: datetime, time_unit: TimeUnit
1187
) -> None:
1188
assert pl.Series(values, dtype=pl.Duration(time_unit)).mean() == expected_mean
1189
1190
1191
@pytest.mark.parametrize(
1192
("values", "expected_median"),
1193
[
1194
([None], None),
1195
(
1196
[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],
1197
datetime(2022, 1, 2),
1198
),
1199
],
1200
ids=["None_dt", "spread_skewed_dt"],
1201
)
1202
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
1203
def test_datetime_median_with_tu(
1204
values: list[datetime], expected_median: datetime, time_unit: TimeUnit
1205
) -> None:
1206
assert pl.Series(values, dtype=pl.Duration(time_unit)).median() == expected_median
1207
1208
1209
def test_date_median_upcast() -> None:
1210
df = pl.DataFrame({"a": [date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)]})
1211
result = df.select(pl.col("a").median())
1212
expected = pl.DataFrame(
1213
{"a": pl.Series([datetime(2022, 1, 2)], dtype=pl.Datetime("us"))}
1214
)
1215
assert_frame_equal(result, expected)
1216
1217
1218
@pytest.mark.parametrize(
1219
("values", "expected_mean"),
1220
[
1221
([None], None),
1222
(
1223
[timedelta(days=1), timedelta(days=2), timedelta(days=15)],
1224
timedelta(days=6),
1225
),
1226
],
1227
ids=["None_dur", "spread_skewed_dur"],
1228
)
1229
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
1230
def test_duration_mean_with_tu(
1231
values: list[timedelta], expected_mean: timedelta, time_unit: TimeUnit
1232
) -> None:
1233
assert pl.Series(values, dtype=pl.Duration(time_unit)).mean() == expected_mean
1234
1235
1236
@pytest.mark.parametrize(
1237
("values", "expected_median"),
1238
[
1239
([None], None),
1240
(
1241
[timedelta(days=1), timedelta(days=2), timedelta(days=15)],
1242
timedelta(days=2),
1243
),
1244
],
1245
ids=["None_dur", "spread_skewed_dur"],
1246
)
1247
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
1248
def test_duration_median_with_tu(
1249
values: list[timedelta], expected_median: timedelta, time_unit: TimeUnit
1250
) -> None:
1251
assert pl.Series(values, dtype=pl.Duration(time_unit)).median() == expected_median
1252
1253
1254
def test_agg_mean_expr() -> None:
1255
df = pl.DataFrame(
1256
{
1257
"date": pl.Series(
1258
[date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 4)],
1259
dtype=pl.Date,
1260
),
1261
"datetime_ms": pl.Series(
1262
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1263
dtype=pl.Datetime("ms"),
1264
),
1265
"datetime_us": pl.Series(
1266
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1267
dtype=pl.Datetime("us"),
1268
),
1269
"datetime_ns": pl.Series(
1270
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1271
dtype=pl.Datetime("ns"),
1272
),
1273
"duration_ms": pl.Series(
1274
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1275
dtype=pl.Duration("ms"),
1276
),
1277
"duration_us": pl.Series(
1278
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1279
dtype=pl.Duration("us"),
1280
),
1281
"duration_ns": pl.Series(
1282
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1283
dtype=pl.Duration("ns"),
1284
),
1285
"time": pl.Series(
1286
[time(hour=1), time(hour=2), time(hour=4)],
1287
dtype=pl.Time,
1288
),
1289
}
1290
)
1291
1292
expected = pl.DataFrame(
1293
{
1294
"date": pl.Series([datetime(2023, 1, 2, 8, 0)], dtype=pl.Datetime("us")),
1295
"datetime_ms": pl.Series(
1296
[datetime(2023, 1, 2, 8, 0, 0)], dtype=pl.Datetime("ms")
1297
),
1298
"datetime_us": pl.Series(
1299
[datetime(2023, 1, 2, 8, 0, 0)], dtype=pl.Datetime("us")
1300
),
1301
"datetime_ns": pl.Series(
1302
[datetime(2023, 1, 2, 8, 0, 0)], dtype=pl.Datetime("ns")
1303
),
1304
"duration_ms": pl.Series(
1305
[timedelta(days=2, hours=8)], dtype=pl.Duration("ms")
1306
),
1307
"duration_us": pl.Series(
1308
[timedelta(days=2, hours=8)], dtype=pl.Duration("us")
1309
),
1310
"duration_ns": pl.Series(
1311
[timedelta(days=2, hours=8)], dtype=pl.Duration("ns")
1312
),
1313
"time": pl.Series([time(hour=2, minute=20)], dtype=pl.Time),
1314
}
1315
)
1316
1317
assert_frame_equal(df.select(pl.all().mean()), expected)
1318
1319
1320
def test_agg_median_expr() -> None:
1321
df = pl.DataFrame(
1322
{
1323
"date": pl.Series(
1324
[date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 4)],
1325
dtype=pl.Date,
1326
),
1327
"datetime_ms": pl.Series(
1328
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1329
dtype=pl.Datetime("ms"),
1330
),
1331
"datetime_us": pl.Series(
1332
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1333
dtype=pl.Datetime("us"),
1334
),
1335
"datetime_ns": pl.Series(
1336
[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],
1337
dtype=pl.Datetime("ns"),
1338
),
1339
"duration_ms": pl.Series(
1340
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1341
dtype=pl.Duration("ms"),
1342
),
1343
"duration_us": pl.Series(
1344
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1345
dtype=pl.Duration("us"),
1346
),
1347
"duration_ns": pl.Series(
1348
[timedelta(days=1), timedelta(days=2), timedelta(days=4)],
1349
dtype=pl.Duration("ns"),
1350
),
1351
"time": pl.Series(
1352
[time(hour=1), time(hour=2), time(hour=4)],
1353
dtype=pl.Time,
1354
),
1355
}
1356
)
1357
1358
expected = pl.DataFrame(
1359
{
1360
"date": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("us")),
1361
"datetime_ms": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("ms")),
1362
"datetime_us": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("us")),
1363
"datetime_ns": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("ns")),
1364
"duration_ms": pl.Series([timedelta(days=2)], dtype=pl.Duration("ms")),
1365
"duration_us": pl.Series([timedelta(days=2)], dtype=pl.Duration("us")),
1366
"duration_ns": pl.Series([timedelta(days=2)], dtype=pl.Duration("ns")),
1367
"time": pl.Series([time(hour=2)], dtype=pl.Time),
1368
}
1369
)
1370
1371
assert_frame_equal(df.select(pl.all().median()), expected)
1372
1373
1374
@given(
1375
s=series(min_size=1, max_size=10, dtype=pl.Duration),
1376
)
1377
@pytest.mark.skip(
1378
"These functions are currently bugged for large values: "
1379
"https://github.com/pola-rs/polars/issues/16057"
1380
)
1381
def test_series_duration_timeunits(
1382
s: pl.Series,
1383
) -> None:
1384
nanos = s.dt.total_nanoseconds().to_list()
1385
micros = s.dt.total_microseconds().to_list()
1386
millis = s.dt.total_milliseconds().to_list()
1387
1388
scale = {
1389
"ns": 1,
1390
"us": 1_000,
1391
"ms": 1_000_000,
1392
}
1393
assert nanos == [v * scale[s.dtype.time_unit] for v in s.to_physical()] # type: ignore[attr-defined]
1394
assert micros == [int(v / 1_000) for v in nanos]
1395
assert millis == [int(v / 1_000) for v in micros]
1396
1397
# special handling for ns timeunit (as we may generate a microsecs-based
1398
# timedelta that results in 64bit overflow on conversion to nanosecs)
1399
lower_bound, upper_bound = -(2**63), (2**63) - 1
1400
if all(
1401
(lower_bound <= (us * 1000) <= upper_bound)
1402
for us in micros
1403
if isinstance(us, int)
1404
):
1405
for ns, us in zip(s.dt.total_nanoseconds(), micros, strict=True):
1406
assert ns == (us * 1000)
1407
1408
1409
@given(
1410
s=series(min_size=1, max_size=10, dtype=pl.Datetime, allow_null=False),
1411
)
1412
def test_series_datetime_timeunits(
1413
s: pl.Series,
1414
) -> None:
1415
# datetime
1416
assert s.to_list() == list(s)
1417
assert list(s.dt.millisecond()) == [v.microsecond // 1000 for v in s]
1418
assert list(s.dt.nanosecond()) == [v.microsecond * 1000 for v in s]
1419
assert list(s.dt.microsecond()) == [v.microsecond for v in s]
1420
1421
1422
def test_dt_median_deprecated() -> None:
1423
values = [date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)]
1424
s = pl.Series(values)
1425
with pytest.deprecated_call():
1426
result = s.dt.median()
1427
assert result == s.median()
1428
1429
1430
def test_dt_mean_deprecated() -> None:
1431
values = [date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)]
1432
s = pl.Series(values)
1433
with pytest.deprecated_call():
1434
result = s.dt.mean()
1435
assert result == s.mean()
1436
1437
1438
@pytest.mark.parametrize(
1439
"dtype",
1440
[
1441
pl.Date,
1442
pl.Datetime("ms"),
1443
pl.Datetime("ms", "America/New_York"),
1444
pl.Datetime("us"),
1445
pl.Datetime("us", "America/New_York"),
1446
pl.Datetime("ns"),
1447
pl.Datetime("ns", "America/New_York"),
1448
],
1449
)
1450
@pytest.mark.parametrize(
1451
"value",
1452
[
1453
date(1677, 9, 22),
1454
date(1970, 1, 1),
1455
date(2024, 2, 29),
1456
],
1457
)
1458
def test_literal_from_date(
1459
value: date,
1460
dtype: PolarsDataType,
1461
) -> None:
1462
out = pl.select(pl.lit(value, dtype=dtype))
1463
assert out.schema == OrderedDict({"literal": dtype})
1464
if dtype == pl.Datetime:
1465
tz = ZoneInfo(dtype.time_zone) if dtype.time_zone is not None else None # type: ignore[union-attr]
1466
value = datetime(value.year, value.month, value.day, tzinfo=tz)
1467
assert out.item() == value
1468
1469
1470
@pytest.mark.parametrize(
1471
"dtype",
1472
[
1473
pl.Date,
1474
pl.Datetime("ms"),
1475
pl.Datetime("ms", "America/New_York"),
1476
pl.Datetime("us"),
1477
pl.Datetime("us", "America/New_York"),
1478
pl.Datetime("ns"),
1479
pl.Datetime("ns", "America/New_York"),
1480
],
1481
)
1482
@pytest.mark.parametrize(
1483
"value",
1484
[
1485
datetime(1677, 9, 22),
1486
datetime(1677, 9, 22, tzinfo=ZoneInfo("America/New_York")),
1487
datetime(1970, 1, 1),
1488
datetime(1970, 1, 1, tzinfo=ZoneInfo("America/New_York")),
1489
datetime(2024, 2, 29),
1490
datetime(2024, 2, 29, tzinfo=ZoneInfo("America/New_York")),
1491
],
1492
)
1493
def test_literal_from_datetime(
1494
value: datetime,
1495
dtype: pl.Date | pl.Datetime,
1496
) -> None:
1497
out = pl.select(pl.lit(value, dtype=dtype))
1498
if dtype == pl.Date:
1499
value = value.date() # type: ignore[assignment]
1500
elif dtype.time_zone is None and value.tzinfo is not None: # type: ignore[union-attr]
1501
# update the dtype with the supplied time zone in the value
1502
dtype = pl.Datetime(dtype.time_unit, str(value.tzinfo)) # type: ignore[union-attr]
1503
elif dtype.time_zone is not None and value.tzinfo is None: # type: ignore[union-attr]
1504
# cast from dt without tz to dtype with tz
1505
value = value.replace(tzinfo=ZoneInfo(dtype.time_zone)) # type: ignore[union-attr]
1506
1507
assert out.schema == OrderedDict({"literal": dtype})
1508
assert out.item() == value
1509
1510
1511
@pytest.mark.parametrize(
1512
"value",
1513
[
1514
time(0),
1515
time(hour=1),
1516
time(hour=16, minute=43, microsecond=500),
1517
time(hour=23, minute=59, second=59, microsecond=999999),
1518
],
1519
)
1520
def test_literal_from_time(value: time) -> None:
1521
out = pl.select(pl.lit(value))
1522
assert out.schema == OrderedDict({"literal": pl.Time})
1523
assert out.item() == value
1524
1525
1526
@pytest.mark.parametrize(
1527
"dtype",
1528
[
1529
None,
1530
pl.Duration("ms"),
1531
pl.Duration("us"),
1532
pl.Duration("ns"),
1533
],
1534
)
1535
@pytest.mark.parametrize(
1536
"value",
1537
[
1538
timedelta(0),
1539
timedelta(hours=1),
1540
timedelta(days=-99999),
1541
timedelta(days=99999),
1542
],
1543
)
1544
def test_literal_from_timedelta(value: time, dtype: pl.Duration | None) -> None:
1545
out = pl.select(pl.lit(value, dtype=dtype))
1546
assert out.schema == OrderedDict({"literal": dtype or pl.Duration("us")})
1547
assert out.item() == value
1548
1549
1550
def test_out_of_range_date_year_11991() -> None:
1551
# Out-of-range dates should return null instead of wrong values or panicking
1552
# Regression test for #11991 where out-of-range dates silently returned
1553
# the input value
1554
s = pl.Series([-96_465_659]).cast(pl.Date)
1555
result = s.dt.year()
1556
# Should return null, not the input value -96465659
1557
assert result[0] is None
1558
1559
# is_leap_year should also return null for out-of-range dates
1560
result_leap = s.dt.is_leap_year()
1561
assert result_leap[0] is None
1562
1563