Path: blob/main/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py
6940 views
from __future__ import annotations12from collections import OrderedDict3from datetime import date, datetime, time, timedelta4from typing import TYPE_CHECKING, Callable5from zoneinfo import ZoneInfo67import pytest8from hypothesis import given910import polars as pl11from polars.datatypes import DTYPE_TEMPORAL_UNITS12from polars.exceptions import ComputeError, InvalidOperationError13from polars.testing import assert_frame_equal, assert_series_equal14from polars.testing.parametric import series1516if TYPE_CHECKING:17from polars._typing import PolarsDataType, TemporalLiteral, TimeUnit181920@pytest.fixture21def series_of_int_dates() -> pl.Series:22return pl.Series([8401, 10000, 20000, 30000], dtype=pl.Date)232425@pytest.fixture26def series_of_str_dates() -> pl.Series:27return pl.Series(["2020-01-01 00:00:00.000000000", "2020-02-02 03:20:10.987654321"])282930def test_dt_to_string(series_of_int_dates: pl.Series) -> None:31expected_str_dates = pl.Series(32["1993-01-01", "1997-05-19", "2024-10-04", "2052-02-20"]33)3435assert series_of_int_dates.dtype == pl.Date36assert_series_equal(series_of_int_dates.dt.to_string("%F"), expected_str_dates)3738# Check strftime alias as well39assert_series_equal(series_of_int_dates.dt.strftime("%F"), expected_str_dates)404142@pytest.mark.parametrize(43("unit_attr", "expected"),44[45("millennium", pl.Series(values=[2, 2, 3, 3], dtype=pl.Int32)),46("century", pl.Series(values=[20, 20, 21, 21], dtype=pl.Int32)),47("year", pl.Series(values=[1993, 1997, 2024, 2052], dtype=pl.Int32)),48("iso_year", pl.Series(values=[1992, 1997, 2024, 2052], dtype=pl.Int32)),49("quarter", pl.Series(values=[1, 2, 4, 1], dtype=pl.Int8)),50("month", pl.Series(values=[1, 5, 10, 2], dtype=pl.Int8)),51("week", pl.Series(values=[53, 21, 40, 8], dtype=pl.Int8)),52("day", pl.Series(values=[1, 19, 4, 20], dtype=pl.Int8)),53("weekday", pl.Series(values=[5, 1, 5, 2], dtype=pl.Int8)),54("ordinal_day", pl.Series(values=[1, 139, 278, 51], dtype=pl.Int16)),55],56)57@pytest.mark.parametrize("time_zone", ["Asia/Kathmandu", None])58def test_dt_extract_datetime_component(59unit_attr: str,60expected: pl.Series,61series_of_int_dates: pl.Series,62time_zone: str | None,63) -> None:64assert_series_equal(getattr(series_of_int_dates.dt, unit_attr)(), expected)65assert_series_equal(66getattr(67series_of_int_dates.cast(pl.Datetime).dt.replace_time_zone(time_zone).dt,68unit_attr,69)(),70expected,71)727374@pytest.mark.parametrize(75("unit_attr", "expected"),76[77("hour", pl.Series(values=[0, 3], dtype=pl.Int8)),78("minute", pl.Series(values=[0, 20], dtype=pl.Int8)),79("second", pl.Series(values=[0, 10], dtype=pl.Int8)),80("millisecond", pl.Series(values=[0, 987], dtype=pl.Int32)),81("microsecond", pl.Series(values=[0, 987654], dtype=pl.Int32)),82("nanosecond", pl.Series(values=[0, 987654321], dtype=pl.Int32)),83],84)85def test_strptime_extract_times(86unit_attr: str,87expected: pl.Series,88series_of_int_dates: pl.Series,89series_of_str_dates: pl.Series,90) -> None:91s = series_of_str_dates.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S.%9f")9293assert_series_equal(getattr(s.dt, unit_attr)(), expected)949596@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])97@pytest.mark.parametrize(98("attribute", "expected"),99[100("date", date(2022, 1, 1)),101("time", time(23)),102],103)104def test_dt_date_and_time(105attribute: str, time_zone: None | str, expected: date | time106) -> None:107ser = pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)108result = getattr(ser.dt, attribute)().item()109assert result == expected110111112@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu"])113@pytest.mark.parametrize("time_unit", ["us", "ns", "ms"])114def test_dt_replace_time_zone_none(time_zone: str | None, time_unit: TimeUnit) -> None:115ser = (116pl.Series([datetime(2022, 1, 1, 23)])117.dt.cast_time_unit(time_unit)118.dt.replace_time_zone(time_zone)119)120result = ser.dt.replace_time_zone(None)121expected = datetime(2022, 1, 1, 23)122assert result.dtype == pl.Datetime(time_unit, None)123assert result.item() == expected124125126def test_dt_datetime_deprecated() -> None:127s = pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone("Asia/Kathmandu")128with pytest.deprecated_call():129result = s.dt.datetime()130expected = datetime(2022, 1, 1, 23)131assert result.dtype == pl.Datetime(time_zone=None)132assert result.item() == expected133134135@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"])136def test_local_date_sortedness(time_zone: str | None) -> None:137# singleton138ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()139result = ser.dt.date()140assert result.flags["SORTED_ASC"]141142# 2 elements143ser = (144pl.Series([datetime(2022, 1, 1, 23)] * 2).dt.replace_time_zone(time_zone)145).sort()146result = ser.dt.date()147assert result.flags["SORTED_ASC"]148149150@pytest.mark.parametrize("time_zone", [None, "Asia/Kathmandu", "UTC"])151def test_local_time_sortedness(time_zone: str | None) -> None:152# singleton - always sorted153ser = (pl.Series([datetime(2022, 1, 1, 23)]).dt.replace_time_zone(time_zone)).sort()154result = ser.dt.time()155assert result.flags["SORTED_ASC"]156157# three elements - not sorted158ser = (159pl.Series(160[161datetime(2022, 1, 1, 23),162datetime(2022, 1, 2, 21),163datetime(2022, 1, 3, 22),164]165).dt.replace_time_zone(time_zone)166).sort()167result = ser.dt.time()168assert not result.flags["SORTED_ASC"]169assert not result.flags["SORTED_DESC"]170171172@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])173def test_local_time_before_epoch(time_unit: TimeUnit) -> None:174ser = pl.Series([datetime(1969, 7, 21, 2, 56, 2, 123000)]).dt.cast_time_unit(175time_unit176)177result = ser.dt.time().item()178expected = time(2, 56, 2, 123000)179assert result == expected180181182@pytest.mark.parametrize(183("time_zone", "offset", "expected"),184[185(None, "1d", True),186("Europe/London", "1d", False),187("UTC", "1d", True),188(None, "1m", True),189("Europe/London", "1m", True),190("UTC", "1m", True),191(None, "1w", True),192("Europe/London", "1w", False),193("UTC", "1w", True),194(None, "1h", True),195("Europe/London", "1h", True),196("UTC", "1h", True),197],198)199def test_offset_by_sortedness(200time_zone: str | None, offset: str, expected: bool201) -> None:202s = pl.datetime_range(203datetime(2020, 10, 25),204datetime(2020, 10, 25, 3),205"30m",206time_zone=time_zone,207eager=True,208).sort()209assert s.flags["SORTED_ASC"]210assert not s.flags["SORTED_DESC"]211result = s.dt.offset_by(offset)212assert result.flags["SORTED_ASC"] == expected213assert not result.flags["SORTED_DESC"]214215216def test_offset_by_invalid_duration() -> None:217with pytest.raises(218InvalidOperationError, match="expected leading integer in the duration string"219):220pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.offset_by("P")221222223def test_offset_by_missing_unit() -> None:224with pytest.raises(225InvalidOperationError,226match="expected a unit to follow integer in the duration string '1'",227):228pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.offset_by("1")229230with pytest.raises(231InvalidOperationError,232match="expected a unit to follow integer in the duration string '1mo23d4'",233):234pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.offset_by("1mo23d4")235236with pytest.raises(237InvalidOperationError,238match="expected a unit to follow integer in the duration string '-2d1'",239):240pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.offset_by("-2d1")241242with pytest.raises(243InvalidOperationError,244match="expected a unit to follow integer in the duration string '1d2'",245):246pl.DataFrame(247{"a": [datetime(2022, 3, 20, 5, 7)] * 2, "b": ["1d", "1d2"]}248).select(pl.col("a").dt.offset_by(pl.col("b")))249250251def test_dt_datetime_date_time_invalid() -> None:252with pytest.raises(ComputeError, match="expected Datetime or Date"):253pl.Series([time(23)]).dt.date()254with pytest.raises(ComputeError, match="expected Datetime or Date"):255pl.Series([timedelta(1)]).dt.date()256with pytest.raises(ComputeError, match="expected Datetime or Time"):257pl.Series([timedelta(1)]).dt.time()258with pytest.raises(ComputeError, match="expected Datetime or Time"):259pl.Series([date(2020, 1, 1)]).dt.time()260261262@pytest.mark.parametrize(263("dt", "expected"),264[265(datetime(2022, 3, 15, 3), datetime(2022, 3, 1, 3)),266(datetime(2022, 3, 15, 3, 2, 1, 123000), datetime(2022, 3, 1, 3, 2, 1, 123000)),267(datetime(2022, 3, 15), datetime(2022, 3, 1)),268(datetime(2022, 3, 1), datetime(2022, 3, 1)),269],270)271@pytest.mark.parametrize(272("tzinfo", "time_zone"),273[274(None, None),275(ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu"),276],277)278@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])279def test_month_start_datetime(280dt: datetime,281expected: datetime,282time_unit: TimeUnit,283tzinfo: ZoneInfo | None,284time_zone: str | None,285) -> None:286ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit)287result = ser.dt.month_start().item()288assert result == expected.replace(tzinfo=tzinfo)289290291@pytest.mark.parametrize(292("dt", "expected"),293[294(date(2022, 3, 15), date(2022, 3, 1)),295(date(2022, 3, 31), date(2022, 3, 1)),296],297)298def test_month_start_date(dt: date, expected: date) -> None:299ser = pl.Series([dt])300result = ser.dt.month_start().item()301assert result == expected302303304@pytest.mark.parametrize(305("dt", "expected"),306[307(datetime(2022, 3, 15, 3), datetime(2022, 3, 31, 3)),308(309datetime(2022, 3, 15, 3, 2, 1, 123000),310datetime(2022, 3, 31, 3, 2, 1, 123000),311),312(datetime(2022, 3, 15), datetime(2022, 3, 31)),313(datetime(2022, 3, 31), datetime(2022, 3, 31)),314],315)316@pytest.mark.parametrize(317("tzinfo", "time_zone"),318[319(None, None),320(ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu"),321],322)323@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])324def test_month_end_datetime(325dt: datetime,326expected: datetime,327time_unit: TimeUnit,328tzinfo: ZoneInfo | None,329time_zone: str | None,330) -> None:331ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit)332result = ser.dt.month_end().item()333assert result == expected.replace(tzinfo=tzinfo)334335336@pytest.mark.parametrize(337("dt", "expected"),338[339(date(2022, 3, 15), date(2022, 3, 31)),340(date(2022, 3, 31), date(2022, 3, 31)),341],342)343def test_month_end_date(dt: date, expected: date) -> None:344ser = pl.Series([dt])345result = ser.dt.month_end().item()346assert result == expected347348349def test_month_start_end_invalid() -> None:350ser = pl.Series([time(1, 2, 3)])351with pytest.raises(352InvalidOperationError,353match=r"`month_start` operation not supported for dtype `time` \(expected: date/datetime\)",354):355ser.dt.month_start()356with pytest.raises(357InvalidOperationError,358match=r"`month_end` operation not supported for dtype `time` \(expected: date/datetime\)",359):360ser.dt.month_end()361362363@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])364def test_base_utc_offset(time_unit: TimeUnit) -> None:365ser = pl.datetime_range(366datetime(2011, 12, 29),367datetime(2012, 1, 1),368"2d",369time_zone="Pacific/Apia",370eager=True,371).dt.cast_time_unit(time_unit)372result = ser.dt.base_utc_offset().rename("base_utc_offset")373expected = pl.Series(374"base_utc_offset",375[-11 * 3600 * 1000, 13 * 3600 * 1000],376dtype=pl.Duration("ms"),377)378assert_series_equal(result, expected)379380381def test_base_utc_offset_lazy_schema() -> None:382ser = pl.datetime_range(383datetime(2020, 10, 25),384datetime(2020, 10, 26),385time_zone="Europe/London",386eager=True,387)388df = pl.DataFrame({"ts": ser}).lazy()389result = df.with_columns(390base_utc_offset=pl.col("ts").dt.base_utc_offset()391).collect_schema()392expected = {393"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),394"base_utc_offset": pl.Duration(time_unit="ms"),395}396assert result == expected397398399def test_base_utc_offset_invalid() -> None:400ser = pl.datetime_range(datetime(2020, 10, 25), datetime(2020, 10, 26), eager=True)401with pytest.raises(402InvalidOperationError,403match=r"`base_utc_offset` operation not supported for dtype `datetime\[μs\]` \(expected: time-zone-aware datetime\)",404):405ser.dt.base_utc_offset().rename("base_utc_offset")406407408@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])409def test_dst_offset(time_unit: TimeUnit) -> None:410ser = pl.datetime_range(411datetime(2020, 10, 25),412datetime(2020, 10, 26),413time_zone="Europe/London",414eager=True,415).dt.cast_time_unit(time_unit)416result = ser.dt.dst_offset().rename("dst_offset")417expected = pl.Series("dst_offset", [3_600 * 1_000, 0], dtype=pl.Duration("ms"))418assert_series_equal(result, expected)419420421def test_dst_offset_lazy_schema() -> None:422ser = pl.datetime_range(423datetime(2020, 10, 25),424datetime(2020, 10, 26),425time_zone="Europe/London",426eager=True,427)428df = pl.DataFrame({"ts": ser}).lazy()429result = df.with_columns(dst_offset=pl.col("ts").dt.dst_offset()).collect_schema()430expected = {431"ts": pl.Datetime(time_unit="us", time_zone="Europe/London"),432"dst_offset": pl.Duration(time_unit="ms"),433}434assert result == expected435436437def test_dst_offset_invalid() -> None:438ser = pl.datetime_range(datetime(2020, 10, 25), datetime(2020, 10, 26), eager=True)439with pytest.raises(440InvalidOperationError,441match=r"`dst_offset` operation not supported for dtype `datetime\[μs\]` \(expected: time-zone-aware datetime\)",442):443ser.dt.dst_offset().rename("dst_offset")444445446@pytest.mark.parametrize(447("time_unit", "expected"),448[449("d", pl.Series(values=[18262, 18294], dtype=pl.Int32)),450("s", pl.Series(values=[1_577_836_800, 1_580_613_610], dtype=pl.Int64)),451(452"ms",453pl.Series(values=[1_577_836_800_000, 1_580_613_610_987], dtype=pl.Int64),454),455],456)457def test_strptime_epoch(458time_unit: TimeUnit,459expected: pl.Series,460series_of_str_dates: pl.Series,461) -> None:462s = series_of_str_dates.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S.%9f")463464assert_series_equal(s.dt.epoch(time_unit=time_unit), expected)465466467def test_strptime_fractional_seconds(series_of_str_dates: pl.Series) -> None:468s = series_of_str_dates.str.strptime(pl.Datetime, format="%Y-%m-%d %H:%M:%S.%9f")469470assert_series_equal(471s.dt.second(fractional=True),472pl.Series([0.0, 10.987654321], dtype=pl.Float64),473)474475476@pytest.mark.parametrize(477("unit_attr", "expected"),478[479("total_days", pl.Series([1])),480("total_hours", pl.Series([24])),481("total_minutes", pl.Series([24 * 60])),482("total_seconds", pl.Series([3600 * 24])),483("total_milliseconds", pl.Series([3600 * 24 * int(1e3)])),484("total_microseconds", pl.Series([3600 * 24 * int(1e6)])),485("total_nanoseconds", pl.Series([3600 * 24 * int(1e9)])),486],487)488def test_duration_extract_times(489unit_attr: str,490expected: pl.Series,491) -> None:492duration = pl.Series([datetime(2022, 1, 2)]) - pl.Series([datetime(2022, 1, 1)])493494assert_series_equal(getattr(duration.dt, unit_attr)(), expected)495496497@pytest.mark.parametrize(498("time_unit", "every"),499[500("ms", "1h"),501("us", "1h0m0s"),502("ns", timedelta(hours=1)),503],504ids=["milliseconds", "microseconds", "nanoseconds"],505)506def test_truncate(507time_unit: TimeUnit,508every: str | timedelta,509) -> None:510start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2)511s = pl.datetime_range(512start,513stop,514timedelta(minutes=30),515time_unit=time_unit,516eager=True,517).alias(f"dates[{time_unit}]")518519# can pass strings and time-deltas520out = s.dt.truncate(every)521assert out.dt[0] == start522assert out.dt[1] == start523assert out.dt[2] == start + timedelta(hours=1)524assert out.dt[3] == start + timedelta(hours=1)525# ...526assert out.dt[-3] == stop - timedelta(hours=1)527assert out.dt[-2] == stop - timedelta(hours=1)528assert out.dt[-1] == stop529530531def test_truncate_negative() -> None:532"""Test that truncating to a negative duration gives a helpful error message."""533df = pl.DataFrame(534{535"date": [date(1895, 5, 7), date(1955, 11, 5)],536"datetime": [datetime(1895, 5, 7), datetime(1955, 11, 5)],537"duration": ["-1m", "1m"],538}539)540541with pytest.raises(542ComputeError, match="cannot truncate a Date to a negative duration"543):544df.select(pl.col("date").dt.truncate("-1m"))545546with pytest.raises(547ComputeError, match="cannot truncate a Datetime to a negative duration"548):549df.select(pl.col("datetime").dt.truncate("-1m"))550551with pytest.raises(552ComputeError, match="cannot truncate a Date to a negative duration"553):554df.select(pl.col("date").dt.truncate(pl.col("duration")))555556with pytest.raises(557ComputeError, match="cannot truncate a Datetime to a negative duration"558):559df.select(pl.col("datetime").dt.truncate(pl.col("duration")))560561562@pytest.mark.parametrize(563("time_unit", "every"),564[565("ms", "1h"),566("us", "1h0m0s"),567("ns", timedelta(hours=1)),568],569ids=["milliseconds", "microseconds", "nanoseconds"],570)571def test_round(572time_unit: TimeUnit,573every: str | timedelta,574) -> None:575start, stop = datetime(2022, 1, 1), datetime(2022, 1, 2)576s = pl.datetime_range(577start,578stop,579timedelta(minutes=30),580time_unit=time_unit,581eager=True,582).alias(f"dates[{time_unit}]")583584# can pass strings and time-deltas585out = s.dt.round(every)586assert out.dt[0] == start587assert out.dt[1] == start + timedelta(hours=1)588assert out.dt[2] == start + timedelta(hours=1)589assert out.dt[3] == start + timedelta(hours=2)590# ...591assert out.dt[-3] == stop - timedelta(hours=1)592assert out.dt[-2] == stop593assert out.dt[-1] == stop594595596def test_round_expr() -> None:597df = pl.DataFrame(598{599"date": [600datetime(2022, 11, 14),601datetime(2023, 10, 11),602datetime(2022, 3, 20, 5, 7, 18),603datetime(2022, 4, 3, 13, 30, 32),604None,605datetime(2022, 12, 1),606],607"every": ["1y", "1mo", "1m", "1m", "1mo", None],608}609)610611output = df.select(612all_expr=pl.col("date").dt.round(every=pl.col("every")),613date_lit=pl.lit(datetime(2022, 4, 3, 13, 30, 32)).dt.round(614every=pl.col("every")615),616every_lit=pl.col("date").dt.round("1d"),617)618619expected = pl.DataFrame(620{621"all_expr": [622datetime(2023, 1, 1),623datetime(2023, 10, 1),624datetime(2022, 3, 20, 5, 7),625datetime(2022, 4, 3, 13, 31),626None,627None,628],629"date_lit": [630datetime(2022, 1, 1),631datetime(2022, 4, 1),632datetime(2022, 4, 3, 13, 31),633datetime(2022, 4, 3, 13, 31),634datetime(2022, 4, 1),635None,636],637"every_lit": [638datetime(2022, 11, 14),639datetime(2023, 10, 11),640datetime(2022, 3, 20),641datetime(2022, 4, 4),642None,643datetime(2022, 12, 1),644],645}646)647648assert_frame_equal(output, expected)649650all_lit = pl.select(all_lit=pl.lit(datetime(2022, 3, 20, 5, 7)).dt.round("1h"))651assert all_lit.to_dict(as_series=False) == {"all_lit": [datetime(2022, 3, 20, 5)]}652653654def test_round_negative() -> None:655"""Test that rounding to a negative duration gives a helpful error message."""656with pytest.raises(657ComputeError, match="cannot round a Date to a negative duration"658):659pl.Series([date(1895, 5, 7)]).dt.round("-1m")660661with pytest.raises(662ComputeError, match="cannot round a Datetime to a negative duration"663):664pl.Series([datetime(1895, 5, 7)]).dt.round("-1m")665666667def test_round_invalid_duration() -> None:668with pytest.raises(669InvalidOperationError, match="expected leading integer in the duration string"670):671pl.Series([datetime(2022, 3, 20, 5, 7)]).dt.round("P")672673674@pytest.mark.parametrize(675("time_unit", "date_in_that_unit"),676[677("ns", [978307200000000000, 981022089000000000]),678("us", [978307200000000, 981022089000000]),679("ms", [978307200000, 981022089000]),680],681ids=["nanoseconds", "microseconds", "milliseconds"],682)683def test_cast_time_units(684time_unit: TimeUnit,685date_in_that_unit: list[int],686) -> None:687dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)])688689assert dates.dt.cast_time_unit(time_unit).cast(int).to_list() == date_in_that_unit690691692def test_epoch_matches_timestamp() -> None:693dates = pl.Series([datetime(2001, 1, 1), datetime(2001, 2, 1, 10, 8, 9)])694695for unit in DTYPE_TEMPORAL_UNITS:696assert_series_equal(dates.dt.epoch(unit), dates.dt.timestamp(unit))697698assert_series_equal(dates.dt.epoch("s"), dates.dt.timestamp("ms") // 1000)699assert_series_equal(700dates.dt.epoch("d"),701(dates.dt.timestamp("ms") // (1000 * 3600 * 24)).cast(pl.Int32),702)703704705@pytest.mark.parametrize(706("tzinfo", "time_zone"),707[(None, None), (ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu")],708)709def test_date_time_combine(tzinfo: ZoneInfo | None, time_zone: str | None) -> None:710# Define a DataFrame with columns for datetime, date, and time711df = pl.DataFrame(712{713"dtm": [714datetime(2022, 12, 31, 10, 30, 45),715datetime(2023, 7, 5, 23, 59, 59),716],717"dt": [718date(2022, 10, 10),719date(2022, 7, 5),720],721"tm": [722time(1, 2, 3, 456000),723time(7, 8, 9, 101000),724],725}726)727df = df.with_columns(pl.col("dtm").dt.replace_time_zone(time_zone))728729# Combine datetime/date with time730df = df.select(731pl.col("dtm").dt.combine(pl.col("tm")).alias("d1"), # datetime & time732pl.col("dt").dt.combine(pl.col("tm")).alias("d2"), # date & time733pl.col("dt").dt.combine(time(4, 5, 6)).alias("d3"), # date & specified time734)735736# Assert that the new columns have the expected values and datatypes737expected_dict = {738"d1": [ # Time component should be overwritten by `tm` values739datetime(2022, 12, 31, 1, 2, 3, 456000, tzinfo=tzinfo),740datetime(2023, 7, 5, 7, 8, 9, 101000, tzinfo=tzinfo),741],742"d2": [ # Both date and time components combined "as-is" into new datetime743datetime(2022, 10, 10, 1, 2, 3, 456000),744datetime(2022, 7, 5, 7, 8, 9, 101000),745],746"d3": [ # New datetime should use specified time component747datetime(2022, 10, 10, 4, 5, 6),748datetime(2022, 7, 5, 4, 5, 6),749],750}751assert df.to_dict(as_series=False) == expected_dict752753expected_schema = {754"d1": pl.Datetime("us", time_zone),755"d2": pl.Datetime("us"),756"d3": pl.Datetime("us"),757}758assert df.schema == expected_schema759760761def test_combine_unsupported_types() -> None:762with pytest.raises(ComputeError, match="expected Date or Datetime, got time"):763pl.Series([time(1, 2)]).dt.combine(time(3, 4))764765766@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])767@pytest.mark.parametrize("time_zone", ["Asia/Kathmandu", None])768def test_combine_lazy_schema_datetime(769time_zone: str | None,770time_unit: TimeUnit,771) -> None:772df = pl.DataFrame({"ts": pl.Series([datetime(2020, 1, 1)])})773df = df.with_columns(pl.col("ts").dt.replace_time_zone(time_zone))774result = df.lazy().select(775pl.col("ts").dt.combine(time(1, 2, 3), time_unit=time_unit)776)777expected_dtypes = [pl.Datetime(time_unit, time_zone)]778assert result.collect_schema().dtypes() == expected_dtypes779780781@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])782def test_combine_lazy_schema_date(time_unit: TimeUnit) -> None:783df = pl.DataFrame({"ts": pl.Series([date(2020, 1, 1)])})784result = df.lazy().select(785pl.col("ts").dt.combine(time(1, 2, 3), time_unit=time_unit)786)787expected_dtypes = [pl.Datetime(time_unit, None)]788assert result.collect_schema().dtypes() == expected_dtypes789790791@pytest.mark.parametrize(792("range_fn", "value_type", "kwargs"),793[794(pl.datetime_range, datetime, {"time_unit": "ns"}),795(pl.datetime_range, datetime, {"time_unit": "ns", "time_zone": "CET"}),796(pl.datetime_range, datetime, {"time_unit": "us"}),797(pl.datetime_range, datetime, {"time_unit": "us", "time_zone": "CET"}),798(pl.datetime_range, datetime, {"time_unit": "ms"}),799(pl.datetime_range, datetime, {"time_unit": "ms", "time_zone": "CET"}),800(pl.date_range, date, {}),801],802)803def test_iso_year(804range_fn: Callable[..., pl.Series], value_type: type, kwargs: dict[str, str]805) -> None:806assert range_fn(807value_type(1990, 1, 1), value_type(2004, 1, 1), "1y", **kwargs, eager=True808).dt.iso_year().to_list() == [8091990,8101991,8111992,8121992,8131993,8141994,8151996,8161997,8171998,8181998,8191999,8202001,8212002,8222003,8232004,824]825826827@pytest.mark.parametrize(828("range_fn", "value_type", "kwargs"),829[830(pl.datetime_range, datetime, {"time_unit": "ns"}),831(pl.datetime_range, datetime, {"time_unit": "ns", "time_zone": "CET"}),832(pl.datetime_range, datetime, {"time_unit": "us"}),833(pl.datetime_range, datetime, {"time_unit": "us", "time_zone": "CET"}),834(pl.datetime_range, datetime, {"time_unit": "ms"}),835(pl.datetime_range, datetime, {"time_unit": "ms", "time_zone": "CET"}),836(pl.date_range, date, {}),837],838)839def test_is_leap_year(840range_fn: Callable[..., pl.Series], value_type: type, kwargs: dict[str, str]841) -> None:842assert range_fn(843value_type(1990, 1, 1), value_type(2004, 1, 1), "1y", **kwargs, eager=True844).dt.is_leap_year().to_list() == [845False,846False,847True, # 1992848False,849False,850False,851True, # 1996852False,853False,854False,855True, # 2000856False,857False,858False,859True, # 2004860]861862863@pytest.mark.parametrize(864("value_type", "time_unit", "time_zone"),865[866(date, None, None),867(datetime, "ns", None),868(869datetime,870"ns",871"Asia/Kathmandu",872),873(datetime, "us", None),874(875datetime,876"us",877"Asia/Kathmandu",878),879(datetime, "ms", None),880(881datetime,882"ms",883"Asia/Kathmandu",884),885],886)887@pytest.mark.parametrize(888("start_ymd", "end_ymd", "feb_days"),889[890# Non-leap year cases891((1900, 1, 1), (1900, 12, 1), 28), # 1900 can be divided by 100 but not by 400892((2025, 1, 1), (2025, 12, 1), 28), # 2025 cannot be divided by 4893# Leap year cases894((2000, 1, 1), (2000, 12, 1), 29), # 2000 can be divided by 400895((2004, 1, 1), (2004, 12, 1), 29), # 2004 can be divided by 4 but not by 100896],897)898def test_days_in_month(899value_type: type,900time_unit: str | None,901time_zone: str | None,902start_ymd: tuple[int, int, int],903end_ymd: tuple[int, int, int],904feb_days: int,905) -> None:906assert value_type in (date, datetime)907range_fn: Callable[..., pl.Series] = (908pl.date_range if value_type is date else pl.datetime_range909)910kwargs: dict[str, str] = {}911if time_unit is not None:912kwargs["time_unit"] = time_unit913if time_zone is not None:914kwargs["time_zone"] = time_zone915assert range_fn(916value_type(*start_ymd), value_type(*end_ymd), "1mo", **kwargs, eager=True917).dt.days_in_month().to_list() == [91831,919feb_days,92031,92130,92231,92330,92431,92531,92630,92731,92830,92931,930]931932933def test_quarter() -> None:934assert pl.datetime_range(935datetime(2022, 1, 1), datetime(2022, 12, 1), "1mo", eager=True936).dt.quarter().to_list() == [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]937938939def test_offset_by() -> None:940df = pl.DataFrame(941{942"dates": pl.datetime_range(943datetime(2000, 1, 1), datetime(2020, 1, 1), "1y", eager=True944)945}946)947948# Add two new columns to the DataFrame using the offset_by() method949df = df.with_columns(950df["dates"].dt.offset_by("1y").alias("date_plus_1y"),951df["dates"].dt.offset_by("-1y2mo").alias("date_min"),952)953954# Assert that the day of the month for all the dates in new columns is 1955assert (df["date_plus_1y"].dt.day() == 1).all()956assert (df["date_min"].dt.day() == 1).all()957958# Assert that the 'date_min' column contains the expected list of dates959expected_dates = [datetime(year, 11, 1, 0, 0) for year in range(1998, 2019)]960assert df["date_min"].to_list() == expected_dates961962963@pytest.mark.parametrize("time_zone", ["US/Central", None])964def test_offset_by_crossing_dst(time_zone: str | None) -> None:965ser = pl.Series([datetime(2021, 11, 7)]).dt.replace_time_zone(time_zone)966result = ser.dt.offset_by("1d")967expected = pl.Series([datetime(2021, 11, 8)]).dt.replace_time_zone(time_zone)968assert_series_equal(result, expected)969970971def test_negative_offset_by_err_msg_8464() -> None:972result = pl.Series([datetime(2022, 3, 30)]).dt.offset_by("-1mo")973expected = pl.Series([datetime(2022, 2, 28)])974assert_series_equal(result, expected)975976977def test_offset_by_truncate_sorted_flag() -> None:978s = pl.Series([datetime(2001, 1, 1), datetime(2001, 1, 2)])979s = s.set_sorted()980981assert s.flags["SORTED_ASC"]982s1 = s.dt.offset_by("1d")983assert s1.to_list() == [datetime(2001, 1, 2), datetime(2001, 1, 3)]984assert s1.flags["SORTED_ASC"]985s2 = s1.dt.truncate("1mo")986assert s2.flags["SORTED_ASC"]987988989def test_offset_by_broadcasting() -> None:990# test broadcast lhs991df = pl.DataFrame(992{993"offset": ["1d", "10d", "3d", None],994}995)996result = df.select(997d1=pl.lit(datetime(2020, 10, 25)).dt.offset_by(pl.col("offset")),998d2=pl.lit(datetime(2020, 10, 25))999.dt.cast_time_unit("ms")1000.dt.offset_by(pl.col("offset")),1001d3=pl.lit(datetime(2020, 10, 25))1002.dt.replace_time_zone("Europe/London")1003.dt.offset_by(pl.col("offset")),1004d4=pl.lit(datetime(2020, 10, 25)).dt.date().dt.offset_by(pl.col("offset")),1005d5=pl.lit(None, dtype=pl.Datetime).dt.offset_by(pl.col("offset")),1006)1007expected_dict = {1008"d1": [1009datetime(2020, 10, 26),1010datetime(2020, 11, 4),1011datetime(2020, 10, 28),1012None,1013],1014"d2": [1015datetime(2020, 10, 26),1016datetime(2020, 11, 4),1017datetime(2020, 10, 28),1018None,1019],1020"d3": [1021datetime(2020, 10, 26, tzinfo=ZoneInfo("Europe/London")),1022datetime(2020, 11, 4, tzinfo=ZoneInfo("Europe/London")),1023datetime(2020, 10, 28, tzinfo=ZoneInfo("Europe/London")),1024None,1025],1026"d4": [1027datetime(2020, 10, 26).date(),1028datetime(2020, 11, 4).date(),1029datetime(2020, 10, 28).date(),1030None,1031],1032"d5": [None, None, None, None],1033}1034assert result.to_dict(as_series=False) == expected_dict10351036# test broadcast rhs1037df = pl.DataFrame({"dt": [datetime(2020, 10, 25), datetime(2021, 1, 2), None]})1038result = df.select(1039d1=pl.col("dt").dt.offset_by(pl.lit("1mo3d")),1040d2=pl.col("dt").dt.cast_time_unit("ms").dt.offset_by(pl.lit("1y1mo")),1041d3=pl.col("dt")1042.dt.replace_time_zone("Europe/London")1043.dt.offset_by(pl.lit("3d")),1044d4=pl.col("dt").dt.date().dt.offset_by(pl.lit("1y1mo1d")),1045)1046expected_dict = {1047"d1": [datetime(2020, 11, 28), datetime(2021, 2, 5), None],1048"d2": [datetime(2021, 11, 25), datetime(2022, 2, 2), None],1049"d3": [1050datetime(2020, 10, 28, tzinfo=ZoneInfo("Europe/London")),1051datetime(2021, 1, 5, tzinfo=ZoneInfo("Europe/London")),1052None,1053],1054"d4": [datetime(2021, 11, 26).date(), datetime(2022, 2, 3).date(), None],1055}1056assert result.to_dict(as_series=False) == expected_dict10571058# test all literal1059result = df.select(d=pl.lit(datetime(2021, 11, 26)).dt.offset_by("1mo1d"))1060assert result.to_dict(as_series=False) == {"d": [datetime(2021, 12, 27)]}106110621063def test_offset_by_expressions() -> None:1064df = pl.DataFrame(1065{1066"a": [1067datetime(2020, 10, 25),1068datetime(2021, 1, 2),1069None,1070datetime(2021, 1, 4),1071None,1072],1073"b": ["1d", "10d", "3d", None, None],1074}1075)1076df = df.sort("a")1077result = df.select(1078c=pl.col("a").dt.offset_by(pl.col("b")),1079d=pl.col("a").dt.cast_time_unit("ms").dt.offset_by(pl.col("b")),1080e=pl.col("a").dt.replace_time_zone("Europe/London").dt.offset_by(pl.col("b")),1081f=pl.col("a").dt.date().dt.offset_by(pl.col("b")),1082)10831084expected = pl.DataFrame(1085{1086"c": [None, None, datetime(2020, 10, 26), datetime(2021, 1, 12), None],1087"d": [None, None, datetime(2020, 10, 26), datetime(2021, 1, 12), None],1088"e": [1089None,1090None,1091datetime(2020, 10, 26, tzinfo=ZoneInfo("Europe/London")),1092datetime(2021, 1, 12, tzinfo=ZoneInfo("Europe/London")),1093None,1094],1095"f": [None, None, date(2020, 10, 26), date(2021, 1, 12), None],1096},1097schema_overrides={1098"d": pl.Datetime("ms"),1099"e": pl.Datetime(time_zone="Europe/London"),1100},1101)1102assert_frame_equal(result, expected)1103assert result.flags == {1104"c": {"SORTED_ASC": False, "SORTED_DESC": False},1105"d": {"SORTED_ASC": False, "SORTED_DESC": False},1106"e": {"SORTED_ASC": False, "SORTED_DESC": False},1107"f": {"SORTED_ASC": False, "SORTED_DESC": False},1108}11091110# Check single-row cases1111for i in range(df.height):1112df_slice = df[i : i + 1]1113result = df_slice.select(1114c=pl.col("a").dt.offset_by(pl.col("b")),1115d=pl.col("a").dt.cast_time_unit("ms").dt.offset_by(pl.col("b")),1116e=pl.col("a")1117.dt.replace_time_zone("Europe/London")1118.dt.offset_by(pl.col("b")),1119f=pl.col("a").dt.date().dt.offset_by(pl.col("b")),1120)1121assert_frame_equal(result, expected[i : i + 1])1122# single-row Series are always sorted1123assert result.flags == {1124"c": {"SORTED_ASC": True, "SORTED_DESC": False},1125"d": {"SORTED_ASC": True, "SORTED_DESC": False},1126"e": {"SORTED_ASC": True, "SORTED_DESC": False},1127"f": {"SORTED_ASC": True, "SORTED_DESC": False},1128}112911301131@pytest.mark.parametrize(1132("duration", "input_date", "expected"),1133[1134("1mo", date(2018, 1, 31), date(2018, 2, 28)),1135("1y", date(2024, 2, 29), date(2025, 2, 28)),1136("1y1mo", date(2024, 1, 30), date(2025, 2, 28)),1137],1138)1139def test_offset_by_saturating_8217_8474(1140duration: str, input_date: date, expected: date1141) -> None:1142result = pl.Series([input_date]).dt.offset_by(duration).item()1143assert result == expected114411451146def test_year_empty_df() -> None:1147df = pl.DataFrame(pl.Series(name="date", dtype=pl.Date))1148assert df.select(pl.col("date").dt.year()).dtypes == [pl.Int32]114911501151def test_epoch_invalid() -> None:1152with pytest.raises(InvalidOperationError, match="not supported for dtype"):1153pl.Series([timedelta(1)]).dt.epoch()115411551156@pytest.mark.parametrize(1157"time_unit",1158["ms", "us", "ns"],1159ids=["milliseconds", "microseconds", "nanoseconds"],1160)1161def test_weekday(time_unit: TimeUnit) -> None:1162friday = pl.Series([datetime(2023, 2, 17)])11631164assert friday.dt.cast_time_unit(time_unit).dt.weekday()[0] == 51165assert friday.cast(pl.Date).dt.weekday()[0] == 5116611671168@pytest.mark.parametrize(1169("values", "expected_median"),1170[1171([], None),1172([None, None], None),1173([date(2022, 1, 1)], datetime(2022, 1, 1)),1174([date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 4)], datetime(2022, 1, 2)),1175([date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)], datetime(2022, 1, 2)),1176([datetime(2022, 1, 1)], datetime(2022, 1, 1)),1177(1178[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)],1179datetime(2022, 1, 2),1180),1181(1182[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],1183datetime(2022, 1, 2),1184),1185([timedelta(days=1)], timedelta(days=1)),1186([timedelta(days=1), timedelta(days=2), timedelta(days=3)], timedelta(days=2)),1187([timedelta(days=1), timedelta(days=2), timedelta(days=15)], timedelta(days=2)),1188([time(hour=1)], time(hour=1)),1189([time(hour=1), time(hour=2), time(hour=3)], time(hour=2)),1190([time(hour=1), time(hour=2), time(hour=15)], time(hour=2)),1191],1192ids=[1193"empty",1194"Nones",1195"single_date",1196"spread_even_date",1197"spread_skewed_date",1198"single_datetime",1199"spread_even_datetime",1200"spread_skewed_datetime",1201"single_dur",1202"spread_even_dur",1203"spread_skewed_dur",1204"single_time",1205"spread_even_time",1206"spread_skewed_time",1207],1208)1209def test_median(1210values: list[TemporalLiteral | None], expected_median: TemporalLiteral | None1211) -> None:1212assert pl.Series(values).median() == expected_median121312141215@pytest.mark.parametrize(1216("values", "expected_mean"),1217[1218([], None),1219([None, None], None),1220([date(2022, 1, 1)], datetime(2022, 1, 1)),1221(1222[date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 4)],1223datetime(2022, 1, 2, 8),1224),1225(1226[date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)],1227datetime(2022, 10, 16, 16, 0),1228),1229([datetime(2022, 1, 1)], datetime(2022, 1, 1)),1230(1231[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2022, 1, 3)],1232datetime(2022, 1, 2),1233),1234(1235[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],1236datetime(2022, 10, 16, 16, 0, 0),1237),1238([timedelta(days=1)], timedelta(days=1)),1239([timedelta(days=1), timedelta(days=2), timedelta(days=3)], timedelta(days=2)),1240([timedelta(days=1), timedelta(days=2), timedelta(days=15)], timedelta(days=6)),1241([time(hour=1)], time(hour=1)),1242([time(hour=1), time(hour=2), time(hour=3)], time(hour=2)),1243([time(hour=1), time(hour=2), time(hour=15)], time(hour=6)),1244],1245ids=[1246"empty",1247"Nones",1248"single_date",1249"spread_even_date",1250"spread_skewed_date",1251"single_datetime",1252"spread_even_datetime",1253"spread_skewed_datetime",1254"single_duration",1255"spread_even_duration",1256"spread_skewed_duration",1257"single_time",1258"spread_even_time",1259"spread_skewed_time",1260],1261)1262def test_mean(1263values: list[TemporalLiteral | None], expected_mean: TemporalLiteral | None1264) -> None:1265assert pl.Series(values).mean() == expected_mean126612671268@pytest.mark.parametrize(1269("values", "expected_mean"),1270[1271([None], None),1272(1273[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],1274datetime(2022, 10, 16, 16, 0, 0),1275),1276],1277ids=["None_dt", "spread_skewed_dt"],1278)1279@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])1280def test_datetime_mean_with_tu(1281values: list[datetime], expected_mean: datetime, time_unit: TimeUnit1282) -> None:1283assert pl.Series(values, dtype=pl.Duration(time_unit)).mean() == expected_mean128412851286@pytest.mark.parametrize(1287("values", "expected_median"),1288[1289([None], None),1290(1291[datetime(2022, 1, 1), datetime(2022, 1, 2), datetime(2024, 5, 15)],1292datetime(2022, 1, 2),1293),1294],1295ids=["None_dt", "spread_skewed_dt"],1296)1297@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])1298def test_datetime_median_with_tu(1299values: list[datetime], expected_median: datetime, time_unit: TimeUnit1300) -> None:1301assert pl.Series(values, dtype=pl.Duration(time_unit)).median() == expected_median130213031304def test_date_median_upcast() -> None:1305df = pl.DataFrame({"a": [date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)]})1306result = df.select(pl.col("a").median())1307expected = pl.DataFrame(1308{"a": pl.Series([datetime(2022, 1, 2)], dtype=pl.Datetime("us"))}1309)1310assert_frame_equal(result, expected)131113121313@pytest.mark.parametrize(1314("values", "expected_mean"),1315[1316([None], None),1317(1318[timedelta(days=1), timedelta(days=2), timedelta(days=15)],1319timedelta(days=6),1320),1321],1322ids=["None_dur", "spread_skewed_dur"],1323)1324@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])1325def test_duration_mean_with_tu(1326values: list[timedelta], expected_mean: timedelta, time_unit: TimeUnit1327) -> None:1328assert pl.Series(values, dtype=pl.Duration(time_unit)).mean() == expected_mean132913301331@pytest.mark.parametrize(1332("values", "expected_median"),1333[1334([None], None),1335(1336[timedelta(days=1), timedelta(days=2), timedelta(days=15)],1337timedelta(days=2),1338),1339],1340ids=["None_dur", "spread_skewed_dur"],1341)1342@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])1343def test_duration_median_with_tu(1344values: list[timedelta], expected_median: timedelta, time_unit: TimeUnit1345) -> None:1346assert pl.Series(values, dtype=pl.Duration(time_unit)).median() == expected_median134713481349def test_agg_mean_expr() -> None:1350df = pl.DataFrame(1351{1352"date": pl.Series(1353[date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 4)],1354dtype=pl.Date,1355),1356"datetime_ms": pl.Series(1357[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],1358dtype=pl.Datetime("ms"),1359),1360"datetime_us": pl.Series(1361[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],1362dtype=pl.Datetime("us"),1363),1364"datetime_ns": pl.Series(1365[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],1366dtype=pl.Datetime("ns"),1367),1368"duration_ms": pl.Series(1369[timedelta(days=1), timedelta(days=2), timedelta(days=4)],1370dtype=pl.Duration("ms"),1371),1372"duration_us": pl.Series(1373[timedelta(days=1), timedelta(days=2), timedelta(days=4)],1374dtype=pl.Duration("us"),1375),1376"duration_ns": pl.Series(1377[timedelta(days=1), timedelta(days=2), timedelta(days=4)],1378dtype=pl.Duration("ns"),1379),1380"time": pl.Series(1381[time(hour=1), time(hour=2), time(hour=4)],1382dtype=pl.Time,1383),1384}1385)13861387expected = pl.DataFrame(1388{1389"date": pl.Series([datetime(2023, 1, 2, 8, 0)], dtype=pl.Datetime("us")),1390"datetime_ms": pl.Series(1391[datetime(2023, 1, 2, 8, 0, 0)], dtype=pl.Datetime("ms")1392),1393"datetime_us": pl.Series(1394[datetime(2023, 1, 2, 8, 0, 0)], dtype=pl.Datetime("us")1395),1396"datetime_ns": pl.Series(1397[datetime(2023, 1, 2, 8, 0, 0)], dtype=pl.Datetime("ns")1398),1399"duration_ms": pl.Series(1400[timedelta(days=2, hours=8)], dtype=pl.Duration("ms")1401),1402"duration_us": pl.Series(1403[timedelta(days=2, hours=8)], dtype=pl.Duration("us")1404),1405"duration_ns": pl.Series(1406[timedelta(days=2, hours=8)], dtype=pl.Duration("ns")1407),1408"time": pl.Series([time(hour=2, minute=20)], dtype=pl.Time),1409}1410)14111412assert_frame_equal(df.select(pl.all().mean()), expected)141314141415def test_agg_median_expr() -> None:1416df = pl.DataFrame(1417{1418"date": pl.Series(1419[date(2023, 1, 1), date(2023, 1, 2), date(2023, 1, 4)],1420dtype=pl.Date,1421),1422"datetime_ms": pl.Series(1423[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],1424dtype=pl.Datetime("ms"),1425),1426"datetime_us": pl.Series(1427[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],1428dtype=pl.Datetime("us"),1429),1430"datetime_ns": pl.Series(1431[datetime(2023, 1, 1), datetime(2023, 1, 2), datetime(2023, 1, 4)],1432dtype=pl.Datetime("ns"),1433),1434"duration_ms": pl.Series(1435[timedelta(days=1), timedelta(days=2), timedelta(days=4)],1436dtype=pl.Duration("ms"),1437),1438"duration_us": pl.Series(1439[timedelta(days=1), timedelta(days=2), timedelta(days=4)],1440dtype=pl.Duration("us"),1441),1442"duration_ns": pl.Series(1443[timedelta(days=1), timedelta(days=2), timedelta(days=4)],1444dtype=pl.Duration("ns"),1445),1446"time": pl.Series(1447[time(hour=1), time(hour=2), time(hour=4)],1448dtype=pl.Time,1449),1450}1451)14521453expected = pl.DataFrame(1454{1455"date": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("us")),1456"datetime_ms": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("ms")),1457"datetime_us": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("us")),1458"datetime_ns": pl.Series([datetime(2023, 1, 2)], dtype=pl.Datetime("ns")),1459"duration_ms": pl.Series([timedelta(days=2)], dtype=pl.Duration("ms")),1460"duration_us": pl.Series([timedelta(days=2)], dtype=pl.Duration("us")),1461"duration_ns": pl.Series([timedelta(days=2)], dtype=pl.Duration("ns")),1462"time": pl.Series([time(hour=2)], dtype=pl.Time),1463}1464)14651466assert_frame_equal(df.select(pl.all().median()), expected)146714681469@given(1470s=series(min_size=1, max_size=10, dtype=pl.Duration),1471)1472@pytest.mark.skip(1473"These functions are currently bugged for large values: "1474"https://github.com/pola-rs/polars/issues/16057"1475)1476def test_series_duration_timeunits(1477s: pl.Series,1478) -> None:1479nanos = s.dt.total_nanoseconds().to_list()1480micros = s.dt.total_microseconds().to_list()1481millis = s.dt.total_milliseconds().to_list()14821483scale = {1484"ns": 1,1485"us": 1_000,1486"ms": 1_000_000,1487}1488assert nanos == [v * scale[s.dtype.time_unit] for v in s.to_physical()] # type: ignore[attr-defined]1489assert micros == [int(v / 1_000) for v in nanos]1490assert millis == [int(v / 1_000) for v in micros]14911492# special handling for ns timeunit (as we may generate a microsecs-based1493# timedelta that results in 64bit overflow on conversion to nanosecs)1494lower_bound, upper_bound = -(2**63), (2**63) - 11495if all(1496(lower_bound <= (us * 1000) <= upper_bound)1497for us in micros1498if isinstance(us, int)1499):1500for ns, us in zip(s.dt.total_nanoseconds(), micros):1501assert ns == (us * 1000)150215031504@given(1505s=series(min_size=1, max_size=10, dtype=pl.Datetime, allow_null=False),1506)1507def test_series_datetime_timeunits(1508s: pl.Series,1509) -> None:1510# datetime1511assert s.to_list() == list(s)1512assert list(s.dt.millisecond()) == [v.microsecond // 1000 for v in s]1513assert list(s.dt.nanosecond()) == [v.microsecond * 1000 for v in s]1514assert list(s.dt.microsecond()) == [v.microsecond for v in s]151515161517def test_dt_median_deprecated() -> None:1518values = [date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)]1519s = pl.Series(values)1520with pytest.deprecated_call():1521result = s.dt.median()1522assert result == s.median()152315241525def test_dt_mean_deprecated() -> None:1526values = [date(2022, 1, 1), date(2022, 1, 2), date(2024, 5, 15)]1527s = pl.Series(values)1528with pytest.deprecated_call():1529result = s.dt.mean()1530assert result == s.mean()153115321533@pytest.mark.parametrize(1534"dtype",1535[1536pl.Date,1537pl.Datetime("ms"),1538pl.Datetime("ms", "EST"),1539pl.Datetime("us"),1540pl.Datetime("us", "EST"),1541pl.Datetime("ns"),1542pl.Datetime("ns", "EST"),1543],1544)1545@pytest.mark.parametrize(1546"value",1547[1548# date(1677, 9, 22), # See test_literal_from_datetime.1549date(1970, 1, 1),1550date(2024, 2, 29),1551date(2262, 4, 11),1552],1553)1554def test_literal_from_date(1555value: date,1556dtype: PolarsDataType,1557) -> None:1558out = pl.select(pl.lit(value, dtype=dtype))1559assert out.schema == OrderedDict({"literal": dtype})1560if dtype == pl.Datetime:1561tz = ZoneInfo(dtype.time_zone) if dtype.time_zone is not None else None # type: ignore[union-attr]1562value = datetime(value.year, value.month, value.day, tzinfo=tz)1563assert out.item() == value156415651566@pytest.mark.parametrize(1567"dtype",1568[1569pl.Date,1570pl.Datetime("ms"),1571pl.Datetime("ms", "EST"),1572pl.Datetime("us"),1573pl.Datetime("us", "EST"),1574pl.Datetime("ns"),1575pl.Datetime("ns", "EST"),1576],1577)1578@pytest.mark.parametrize(1579"value",1580[1581# Very old dates with a timezone like EST caused problems for the CI due1582# to the IANA timezone database updating their historical offset, so1583# these have been disabled for now. A mismatch between the timezone1584# database that chrono_tz crate uses vs. the one that Python uses (which1585# differs from platform to platform) will cause this to fail.1586# datetime(1677, 9, 22),1587# datetime(1677, 9, 22, tzinfo=ZoneInfo("EST")),1588datetime(1970, 1, 1),1589datetime(1970, 1, 1, tzinfo=ZoneInfo("EST")),1590datetime(2024, 2, 29),1591datetime(2024, 2, 29, tzinfo=ZoneInfo("EST")),1592datetime(2262, 4, 11),1593datetime(2262, 4, 11, tzinfo=ZoneInfo("EST")),1594],1595)1596def test_literal_from_datetime(1597value: datetime,1598dtype: pl.Date | pl.Datetime,1599) -> None:1600out = pl.select(pl.lit(value, dtype=dtype))1601if dtype == pl.Date:1602value = value.date() # type: ignore[assignment]1603elif dtype.time_zone is None and value.tzinfo is not None: # type: ignore[union-attr]1604# update the dtype with the supplied time zone in the value1605dtype = pl.Datetime(dtype.time_unit, str(value.tzinfo)) # type: ignore[union-attr]1606elif dtype.time_zone is not None and value.tzinfo is None: # type: ignore[union-attr]1607# cast from dt without tz to dtype with tz1608value = value.replace(tzinfo=ZoneInfo(dtype.time_zone)) # type: ignore[union-attr]16091610assert out.schema == OrderedDict({"literal": dtype})1611assert out.item() == value161216131614@pytest.mark.parametrize(1615"value",1616[1617time(0),1618time(hour=1),1619time(hour=16, minute=43, microsecond=500),1620time(hour=23, minute=59, second=59, microsecond=999999),1621],1622)1623def test_literal_from_time(value: time) -> None:1624out = pl.select(pl.lit(value))1625assert out.schema == OrderedDict({"literal": pl.Time})1626assert out.item() == value162716281629@pytest.mark.parametrize(1630"dtype",1631[1632None,1633pl.Duration("ms"),1634pl.Duration("us"),1635pl.Duration("ns"),1636],1637)1638@pytest.mark.parametrize(1639"value",1640[1641timedelta(0),1642timedelta(hours=1),1643timedelta(days=-99999),1644timedelta(days=99999),1645],1646)1647def test_literal_from_timedelta(value: time, dtype: pl.Duration | None) -> None:1648out = pl.select(pl.lit(value, dtype=dtype))1649assert out.schema == OrderedDict({"literal": dtype or pl.Duration("us")})1650assert out.item() == value165116521653