Path: blob/main/py-polars/tests/unit/functions/range/test_datetime_ranges.py
7884 views
from __future__ import annotations12from datetime import date, datetime3from typing import TYPE_CHECKING4from zoneinfo import ZoneInfo56import pytest78import polars as pl9from polars.testing import assert_frame_equal, assert_series_equal1011if TYPE_CHECKING:12from polars._typing import ClosedInterval, PolarsDataType, TimeUnit131415@pytest.mark.parametrize("low", ["start", pl.col("start")])16@pytest.mark.parametrize("high", ["stop", pl.col("stop")])17def test_datetime_ranges_lazy_with_expressions(18low: str | pl.Expr, high: str | pl.Expr19) -> None:20df = pl.DataFrame(21{22"start": [datetime(2000, 1, 1), datetime(2022, 6, 1)],23"stop": [datetime(2000, 1, 2), datetime(2022, 6, 2)],24}25)2627result_df = df.with_columns(28pl.datetime_ranges(start=low, end=high, interval="1d").alias("dts")29)3031assert result_df.to_dict(as_series=False) == {32"start": [datetime(2000, 1, 1, 0, 0), datetime(2022, 6, 1, 0, 0)],33"stop": [datetime(2000, 1, 2, 0, 0), datetime(2022, 6, 2, 0, 0)],34"dts": [35[datetime(2000, 1, 1, 0, 0), datetime(2000, 1, 2, 0, 0)],36[datetime(2022, 6, 1, 0, 0), datetime(2022, 6, 2, 0, 0)],37],38}394041@pytest.mark.parametrize(42("values_time_zone", "input_time_zone", "output_time_zone"),43[44("Asia/Kathmandu", "Asia/Kathmandu", "Asia/Kathmandu"),45("Asia/Kathmandu", None, "Asia/Kathmandu"),46(None, "Asia/Kathmandu", "Asia/Kathmandu"),47(None, None, None),48],49)50@pytest.mark.parametrize(51("values_time_unit", "input_time_unit", "output_time_unit"),52[53("ms", None, "ms"),54("us", None, "us"),55("ns", None, "ns"),56("ms", "ms", "ms"),57("us", "ms", "ms"),58("ns", "ms", "ms"),59("ms", "us", "us"),60("us", "us", "us"),61("ns", "us", "us"),62("ms", "ns", "ns"),63("us", "ns", "ns"),64("ns", "ns", "ns"),65],66)67def test_datetime_ranges_schema(68values_time_zone: str | None,69input_time_zone: str | None,70output_time_zone: str | None,71values_time_unit: TimeUnit,72input_time_unit: TimeUnit | None,73output_time_unit: TimeUnit,74) -> None:75df = (76pl.DataFrame({"start": [datetime(2020, 1, 1)], "end": [datetime(2020, 1, 2)]})77.with_columns(78pl.col("*")79.dt.replace_time_zone(values_time_zone)80.dt.cast_time_unit(values_time_unit)81)82.lazy()83)84result = df.with_columns(85datetime_range=pl.datetime_ranges(86pl.col("start"),87pl.col("end"),88time_zone=input_time_zone,89time_unit=input_time_unit,90)91)92expected_schema = {93"start": pl.Datetime(time_unit=values_time_unit, time_zone=values_time_zone),94"end": pl.Datetime(time_unit=values_time_unit, time_zone=values_time_zone),95"datetime_range": pl.List(96pl.Datetime(time_unit=output_time_unit, time_zone=output_time_zone)97),98}99assert result.collect_schema() == expected_schema100assert result.collect().schema == expected_schema101102expected = pl.DataFrame(103{104"start": [datetime(2020, 1, 1)],105"end": [datetime(2020, 1, 2)],106"datetime_range": [[datetime(2020, 1, 1), datetime(2020, 1, 2)]],107}108).with_columns(109pl.col("start")110.dt.replace_time_zone(values_time_zone)111.dt.cast_time_unit(values_time_unit),112pl.col("end")113.dt.replace_time_zone(values_time_zone)114.dt.cast_time_unit(values_time_unit),115pl.col("datetime_range")116.explode()117.dt.replace_time_zone(output_time_zone)118.dt.cast_time_unit(output_time_unit)119.implode(),120)121assert_frame_equal(result.collect(), expected)122123124@pytest.mark.parametrize(125(126"input_time_unit",127"input_time_zone",128"output_dtype",129"interval",130"expected_datetime_range",131),132[133(None, None, pl.Datetime("us"), "1s1d", ["2020-01-01", "2020-01-02 00:00:01"]),134(None, None, pl.Datetime("us"), "1d1s", ["2020-01-01", "2020-01-02 00:00:01"]),135(136None,137None,138pl.Datetime("ns"),139"1d1ns",140["2020-01-01", "2020-01-02 00:00:00.000000001"],141),142("ms", None, pl.Datetime("ms"), "1s1d", ["2020-01-01", "2020-01-02 00:00:01"]),143("ms", None, pl.Datetime("ms"), "1d1s", ["2020-01-01", "2020-01-02 00:00:01"]),144(145None,146"Asia/Kathmandu",147pl.Datetime("us", "Asia/Kathmandu"),148"1s1d",149["2020-01-01", "2020-01-02 00:00:01"],150),151(152None,153"Asia/Kathmandu",154pl.Datetime("us", "Asia/Kathmandu"),155"1d1s",156["2020-01-01", "2020-01-02 00:00:01"],157),158(159None,160"Asia/Kathmandu",161pl.Datetime("ns", "Asia/Kathmandu"),162"1d1ns",163["2020-01-01", "2020-01-02 00:00:00.000000001"],164),165(166"ms",167"Asia/Kathmandu",168pl.Datetime("ms", "Asia/Kathmandu"),169"1s1d",170["2020-01-01", "2020-01-02 00:00:01"],171),172(173"ms",174"Asia/Kathmandu",175pl.Datetime("ms", "Asia/Kathmandu"),176"1d1s",177["2020-01-01", "2020-01-02 00:00:01"],178),179],180)181def test_datetime_ranges_schema_upcasts_to_datetime(182input_time_unit: TimeUnit | None,183input_time_zone: str | None,184output_dtype: PolarsDataType,185interval: str,186expected_datetime_range: list[str],187) -> None:188df = pl.DataFrame({"start": [date(2020, 1, 1)], "end": [date(2020, 1, 3)]}).lazy()189result = df.with_columns(190datetime_range=pl.datetime_ranges(191start=pl.col("start"),192end=pl.col("end"),193interval=interval,194time_unit=input_time_unit,195time_zone=input_time_zone,196)197)198expected_schema = {199"start": pl.Date,200"end": pl.Date,201"datetime_range": pl.List(output_dtype),202}203assert result.collect_schema() == expected_schema204assert result.collect().schema == expected_schema205206expected = pl.DataFrame(207{208"start": [date(2020, 1, 1)],209"end": [date(2020, 1, 3)],210"datetime_range": pl.Series(expected_datetime_range)211.str.to_datetime(time_unit="ns")212.implode(),213}214).with_columns(215pl.col("datetime_range")216.explode()217.dt.cast_time_unit(output_dtype.time_unit) # type: ignore[union-attr]218.dt.replace_time_zone(output_dtype.time_zone) # type: ignore[union-attr]219.implode(),220)221assert_frame_equal(result.collect(), expected)222223# check datetime_range too224result_single = pl.datetime_range(225date(2020, 1, 1),226date(2020, 1, 3),227interval=interval,228time_unit=input_time_unit,229time_zone=input_time_zone,230eager=True,231).alias("datetime")232assert_series_equal(233result_single, expected["datetime_range"].explode().rename("datetime")234)235236237def test_datetime_ranges_no_alias_schema_9037() -> None:238df = pl.DataFrame(239{"start": [datetime(2020, 1, 1)], "end": [datetime(2020, 1, 2)]}240).lazy()241result = df.with_columns(pl.datetime_ranges(pl.col("start"), pl.col("end")))242expected_schema = {243"start": pl.List(pl.Datetime(time_unit="us", time_zone=None)),244"end": pl.Datetime(time_unit="us", time_zone=None),245}246assert result.collect_schema() == expected_schema247assert result.collect().schema == expected_schema248249250def test_datetime_ranges_broadcasting() -> None:251df = pl.DataFrame(252{253"datetimes": [254datetime(2021, 1, 1),255datetime(2021, 1, 2),256datetime(2021, 1, 3),257]258}259)260result = df.select(261pl.datetime_ranges(start="datetimes", end=datetime(2021, 1, 3)).alias("end"),262pl.datetime_ranges(start=datetime(2021, 1, 1), end="datetimes").alias("start"),263)264expected = pl.DataFrame(265{266"end": [267[datetime(2021, 1, 1), datetime(2021, 1, 2), datetime(2021, 1, 3)],268[datetime(2021, 1, 2), datetime(2021, 1, 3)],269[datetime(2021, 1, 3)],270],271"start": [272[datetime(2021, 1, 1)],273[datetime(2021, 1, 1), datetime(2021, 1, 2)],274[datetime(2021, 1, 1), datetime(2021, 1, 2), datetime(2021, 1, 3)],275],276}277)278assert_frame_equal(result, expected)279280281@pytest.mark.parametrize(282("closed", "expected"),283[284(285"both",286[287[288datetime(2025, 1, 1),289datetime(2025, 1, 2, 12),290datetime(2025, 1, 4),291datetime(2025, 1, 5, 12),292datetime(2025, 1, 7),293datetime(2025, 1, 8, 12),294datetime(2025, 1, 10),295],296[297datetime(2025, 1, 8),298datetime(2025, 1, 9, 12),299datetime(2025, 1, 11),300datetime(2025, 1, 12, 12),301datetime(2025, 1, 14),302datetime(2025, 1, 15, 12),303datetime(2025, 1, 17),304],305],306),307(308"left",309[310[311datetime(2025, 1, 1),312datetime(2025, 1, 2, 12),313datetime(2025, 1, 4),314datetime(2025, 1, 5, 12),315datetime(2025, 1, 7),316datetime(2025, 1, 8, 12),317],318[319datetime(2025, 1, 8),320datetime(2025, 1, 9, 12),321datetime(2025, 1, 11),322datetime(2025, 1, 12, 12),323datetime(2025, 1, 14),324datetime(2025, 1, 15, 12),325],326],327),328(329"right",330[331[332datetime(2025, 1, 2, 12),333datetime(2025, 1, 4),334datetime(2025, 1, 5, 12),335datetime(2025, 1, 7),336datetime(2025, 1, 8, 12),337datetime(2025, 1, 10),338],339[340datetime(2025, 1, 9, 12),341datetime(2025, 1, 11),342datetime(2025, 1, 12, 12),343datetime(2025, 1, 14),344datetime(2025, 1, 15, 12),345datetime(2025, 1, 17),346],347],348),349(350"none",351[352[353datetime(2025, 1, 2, 12),354datetime(2025, 1, 4),355datetime(2025, 1, 5, 12),356datetime(2025, 1, 7),357datetime(2025, 1, 8, 12),358],359[360datetime(2025, 1, 9, 12),361datetime(2025, 1, 11),362datetime(2025, 1, 12, 12),363datetime(2025, 1, 14),364datetime(2025, 1, 15, 12),365],366],367),368],369)370@pytest.mark.parametrize(371"dtype",372[373pl.Date,374pl.Datetime("ms"),375pl.Datetime("us"),376pl.Datetime("ns"),377pl.Datetime("ms", time_zone="Asia/Kathmandu"),378pl.Datetime("us", time_zone="Asia/Kathmandu"),379pl.Datetime("ns", time_zone="Asia/Kathmandu"),380],381)382def test_datetime_ranges_start_end_interval_forwards(383closed: ClosedInterval,384expected: list[list[datetime]],385dtype: PolarsDataType,386) -> None:387tu = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[union-attr]388tz = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[union-attr]389if tz is not None:390time_zone = ZoneInfo(tz)391expected = [[e.replace(tzinfo=time_zone) for e in x] for x in expected]392df = pl.DataFrame(393{394"start": [date(2025, 1, 1), date(2025, 1, 8)],395"end": [date(2025, 1, 10), date(2025, 1, 17)],396}397)398result = df.select(399dates=pl.datetime_ranges(400start="start",401end="end",402interval="1d12h",403closed=closed,404time_unit=tu,405time_zone=tz,406)407)408dt_out = pl.List(pl.Datetime("us")) if dtype == pl.Date else pl.List(dtype)409s_expected = pl.Series("dates", expected, dtype=dt_out)410assert_frame_equal(result, s_expected.to_frame())411412413def test_datetime_ranges_lit_combinations_start_end_interval() -> None:414df = pl.DataFrame(415{416"start": [date(2025, 1, 1), date(2025, 1, 1)],417"end": [date(2025, 1, 3), date(2025, 1, 3)],418}419)420result = df.select(421start_lit=pl.datetime_ranges(start=date(2025, 1, 1), end="end", interval="1d"),422end_lit=pl.datetime_ranges(start="start", end=date(2025, 1, 3), interval="1d"),423)424dt = [datetime(2025, 1, 1), datetime(2025, 1, 2), datetime(2025, 1, 3)]425s = pl.Series([dt, dt], dtype=pl.List(pl.Datetime("us")))426expected = pl.DataFrame(427{428"start_lit": s,429"end_lit": s,430}431)432assert_frame_equal(result, expected)433434435def test_datetime_ranges_null_lit_combinations_start_end_interval() -> None:436df = pl.DataFrame(437{438"start": [date(2025, 1, 1), date(2025, 1, 1)],439"end": [date(2025, 1, 3), date(2025, 1, 3)],440}441)442lit_dt = pl.lit(None, dtype=pl.Date)443result = df.select(444start_lit=pl.datetime_ranges(start=lit_dt, end="end", interval="1d"),445end_lit=pl.datetime_ranges(start="start", end=lit_dt, interval="1d"),446all_lit=pl.datetime_ranges(start=lit_dt, end=lit_dt, interval="1d"),447)448s = pl.Series([None, None], dtype=pl.List(pl.Datetime("us")))449expected = pl.DataFrame({"start_lit": s, "end_lit": s, "all_lit": s})450assert_frame_equal(result, expected)451452453