Path: blob/main/py-polars/tests/unit/functions/range/test_datetime_range.py
6939 views
from __future__ import annotations12from datetime import date, datetime, timedelta3from typing import TYPE_CHECKING4from zoneinfo import ZoneInfo56import hypothesis.strategies as st7import pytest8from hypothesis import given, settings910import polars as pl11from polars.datatypes import DTYPE_TEMPORAL_UNITS12from polars.exceptions import ComputeError, InvalidOperationError, SchemaError13from polars.testing import assert_frame_equal, assert_series_equal1415if TYPE_CHECKING:16from polars._typing import ClosedInterval, PolarsDataType, TimeUnit171819def test_datetime_range() -> None:20result = pl.datetime_range(21date(1985, 1, 1), date(2015, 7, 1), timedelta(days=1, hours=12), eager=True22)23assert len(result) == 742624assert result.dt[0] == datetime(1985, 1, 1)25assert result.dt[1] == datetime(1985, 1, 2, 12, 0)26assert result.dt[2] == datetime(1985, 1, 4, 0, 0)27assert result.dt[-1] == datetime(2015, 6, 30, 12, 0)2829for time_unit in DTYPE_TEMPORAL_UNITS:30rng = pl.datetime_range(31datetime(2020, 1, 1),32date(2020, 1, 2),33"2h",34time_unit=time_unit,35eager=True,36)37assert rng.dtype.time_unit == time_unit # type: ignore[attr-defined]38assert rng.shape == (13,)39assert rng.dt[0] == datetime(2020, 1, 1)40assert rng.dt[-1] == datetime(2020, 1, 2)4142result = pl.datetime_range(date(2022, 1, 1), date(2022, 1, 2), "1h30m", eager=True)43assert list(result) == [44datetime(2022, 1, 1, 0, 0),45datetime(2022, 1, 1, 1, 30),46datetime(2022, 1, 1, 3, 0),47datetime(2022, 1, 1, 4, 30),48datetime(2022, 1, 1, 6, 0),49datetime(2022, 1, 1, 7, 30),50datetime(2022, 1, 1, 9, 0),51datetime(2022, 1, 1, 10, 30),52datetime(2022, 1, 1, 12, 0),53datetime(2022, 1, 1, 13, 30),54datetime(2022, 1, 1, 15, 0),55datetime(2022, 1, 1, 16, 30),56datetime(2022, 1, 1, 18, 0),57datetime(2022, 1, 1, 19, 30),58datetime(2022, 1, 1, 21, 0),59datetime(2022, 1, 1, 22, 30),60datetime(2022, 1, 2, 0, 0),61]6263result = pl.datetime_range(64datetime(2022, 1, 1), datetime(2022, 1, 1, 0, 1), "987456321ns", eager=True65)66assert len(result) == 6167assert result.dtype.time_unit == "ns" # type: ignore[attr-defined]68assert result.dt.second()[-1] == 5969assert result.cast(pl.String)[-1] == "2022-01-01 00:00:59.247379260"707172@pytest.mark.parametrize(73("time_unit", "expected_micros"),74[75("ms", 986000),76("us", 986759),77("ns", 986759),78(None, 986759),79],80)81def test_datetime_range_precision(82time_unit: TimeUnit | None, expected_micros: int83) -> None:84micros = 98675985start = datetime(2000, 5, 30, 1, 53, 4, micros)86stop = datetime(2000, 5, 31, 1, 53, 4, micros)87result = pl.datetime_range(start, stop, time_unit=time_unit, eager=True)88expected_start = start.replace(microsecond=expected_micros)89expected_stop = stop.replace(microsecond=expected_micros)90assert result[0] == expected_start91assert result[1] == expected_stop929394def test_datetime_range_invalid_time_unit() -> None:95with pytest.raises(InvalidOperationError, match="'x' not supported"):96pl.datetime_range(97start=datetime(2021, 12, 16),98end=datetime(2021, 12, 16, 3),99interval="1X",100eager=True,101)102103104def test_datetime_range_lazy_time_zones() -> None:105start = datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))106stop = datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu"))107result = (108pl.DataFrame({"start": [start], "stop": [stop]})109.with_columns(110pl.datetime_range(111start,112stop,113interval="678d",114eager=False,115time_zone="Pacific/Tarawa",116)117)118.lazy()119)120expected = pl.DataFrame(121{122"start": [123datetime(2020, 1, 1, 00, 00, tzinfo=ZoneInfo(key="Asia/Kathmandu"))124],125"stop": [126datetime(2020, 1, 2, 00, 00, tzinfo=ZoneInfo(key="Asia/Kathmandu"))127],128"literal": [129datetime(2020, 1, 1, 6, 15, tzinfo=ZoneInfo(key="Pacific/Tarawa"))130],131}132).with_columns(pl.col("literal").dt.convert_time_zone("Pacific/Tarawa"))133assert_frame_equal(result.collect(), expected)134135136@pytest.mark.parametrize("low", ["start", pl.col("start")])137@pytest.mark.parametrize("high", ["stop", pl.col("stop")])138def test_datetime_range_lazy_with_expressions(139low: str | pl.Expr, high: str | pl.Expr140) -> None:141df = pl.DataFrame(142{143"start": [datetime(2000, 1, 1), datetime(2022, 6, 1)],144"stop": [datetime(2000, 1, 2), datetime(2022, 6, 2)],145}146)147148result_df = df.with_columns(149pl.datetime_ranges(low, high, interval="1d").alias("dts")150)151152assert result_df.to_dict(as_series=False) == {153"start": [datetime(2000, 1, 1, 0, 0), datetime(2022, 6, 1, 0, 0)],154"stop": [datetime(2000, 1, 2, 0, 0), datetime(2022, 6, 2, 0, 0)],155"dts": [156[datetime(2000, 1, 1, 0, 0), datetime(2000, 1, 2, 0, 0)],157[datetime(2022, 6, 1, 0, 0), datetime(2022, 6, 2, 0, 0)],158],159}160161162def test_datetime_range_invalid_time_zone() -> None:163with pytest.raises(ComputeError, match="unable to parse time zone: 'foo'"):164pl.datetime_range(165datetime(2001, 1, 1),166datetime(2001, 1, 3),167time_zone="foo",168eager=True,169)170171172def test_timezone_aware_datetime_range() -> None:173low = datetime(2022, 10, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai"))174high = datetime(2022, 11, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai"))175176assert pl.datetime_range(177low, high, interval=timedelta(days=5), eager=True178).to_list() == [179datetime(2022, 10, 17, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),180datetime(2022, 10, 22, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),181datetime(2022, 10, 27, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),182datetime(2022, 11, 1, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),183datetime(2022, 11, 6, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),184datetime(2022, 11, 11, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),185datetime(2022, 11, 16, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),186]187188with pytest.raises(189SchemaError,190match="failed to determine supertype",191):192pl.datetime_range(193low,194high.replace(tzinfo=None),195interval=timedelta(days=5),196time_zone="UTC",197eager=True,198)199200201def test_tzaware_datetime_range_crossing_dst_hourly() -> None:202result = pl.datetime_range(203datetime(2021, 11, 7),204datetime(2021, 11, 7, 2),205"1h",206time_zone="US/Central",207eager=True,208)209assert result.to_list() == [210datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("US/Central")),211datetime(2021, 11, 7, 1, 0, tzinfo=ZoneInfo("US/Central")),212datetime(2021, 11, 7, 1, 0, fold=1, tzinfo=ZoneInfo("US/Central")),213datetime(2021, 11, 7, 2, 0, tzinfo=ZoneInfo("US/Central")),214]215216217def test_tzaware_datetime_range_crossing_dst_daily() -> None:218result = pl.datetime_range(219datetime(2021, 11, 7),220datetime(2021, 11, 11),221"2d",222time_zone="US/Central",223eager=True,224)225assert result.to_list() == [226datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("US/Central")),227datetime(2021, 11, 9, 0, 0, tzinfo=ZoneInfo("US/Central")),228datetime(2021, 11, 11, 0, 0, tzinfo=ZoneInfo("US/Central")),229]230231232def test_tzaware_datetime_range_crossing_dst_weekly() -> None:233result = pl.datetime_range(234datetime(2021, 11, 7),235datetime(2021, 11, 20),236"1w",237time_zone="US/Central",238eager=True,239)240assert result.to_list() == [241datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("US/Central")),242datetime(2021, 11, 14, 0, 0, tzinfo=ZoneInfo("US/Central")),243]244245246def test_tzaware_datetime_range_crossing_dst_monthly() -> None:247result = pl.datetime_range(248datetime(2021, 11, 7),249datetime(2021, 12, 20),250"1mo",251time_zone="US/Central",252eager=True,253)254assert result.to_list() == [255datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("US/Central")),256datetime(2021, 12, 7, 0, 0, tzinfo=ZoneInfo("US/Central")),257]258259260def test_datetime_range_with_unsupported_datetimes() -> None:261with pytest.raises(262ComputeError,263match=r"datetime '2021-11-07 01:00:00' is ambiguous in time zone 'US/Central'",264):265pl.datetime_range(266datetime(2021, 11, 7, 1),267datetime(2021, 11, 7, 2),268"1h",269time_zone="US/Central",270eager=True,271)272with pytest.raises(273ComputeError,274match=r"datetime '2021-03-28 02:30:00' is non-existent in time zone 'Europe/Vienna'",275):276pl.datetime_range(277datetime(2021, 3, 28, 2, 30),278datetime(2021, 3, 28, 4),279"1h",280time_zone="Europe/Vienna",281eager=True,282)283284285@pytest.mark.parametrize(286("values_time_zone", "input_time_zone", "output_time_zone"),287[288("Asia/Kathmandu", "Asia/Kathmandu", "Asia/Kathmandu"),289("Asia/Kathmandu", None, "Asia/Kathmandu"),290(None, "Asia/Kathmandu", "Asia/Kathmandu"),291(None, None, None),292],293)294@pytest.mark.parametrize(295("values_time_unit", "input_time_unit", "output_time_unit"),296[297("ms", None, "ms"),298("us", None, "us"),299("ns", None, "ns"),300("ms", "ms", "ms"),301("us", "ms", "ms"),302("ns", "ms", "ms"),303("ms", "us", "us"),304("us", "us", "us"),305("ns", "us", "us"),306("ms", "ns", "ns"),307("us", "ns", "ns"),308("ns", "ns", "ns"),309],310)311def test_datetime_ranges_schema(312values_time_zone: str | None,313input_time_zone: str | None,314output_time_zone: str | None,315values_time_unit: TimeUnit,316input_time_unit: TimeUnit | None,317output_time_unit: TimeUnit,318) -> None:319df = (320pl.DataFrame({"start": [datetime(2020, 1, 1)], "end": [datetime(2020, 1, 2)]})321.with_columns(322pl.col("*")323.dt.replace_time_zone(values_time_zone)324.dt.cast_time_unit(values_time_unit)325)326.lazy()327)328result = df.with_columns(329datetime_range=pl.datetime_ranges(330pl.col("start"),331pl.col("end"),332time_zone=input_time_zone,333time_unit=input_time_unit,334)335)336expected_schema = {337"start": pl.Datetime(time_unit=values_time_unit, time_zone=values_time_zone),338"end": pl.Datetime(time_unit=values_time_unit, time_zone=values_time_zone),339"datetime_range": pl.List(340pl.Datetime(time_unit=output_time_unit, time_zone=output_time_zone)341),342}343assert result.collect_schema() == expected_schema344assert result.collect().schema == expected_schema345346expected = pl.DataFrame(347{348"start": [datetime(2020, 1, 1)],349"end": [datetime(2020, 1, 2)],350"datetime_range": [[datetime(2020, 1, 1), datetime(2020, 1, 2)]],351}352).with_columns(353pl.col("start")354.dt.replace_time_zone(values_time_zone)355.dt.cast_time_unit(values_time_unit),356pl.col("end")357.dt.replace_time_zone(values_time_zone)358.dt.cast_time_unit(values_time_unit),359pl.col("datetime_range")360.explode()361.dt.replace_time_zone(output_time_zone)362.dt.cast_time_unit(output_time_unit)363.implode(),364)365assert_frame_equal(result.collect(), expected)366367368@pytest.mark.parametrize(369(370"input_time_unit",371"input_time_zone",372"output_dtype",373"interval",374"expected_datetime_range",375),376[377(None, None, pl.Datetime("us"), "1s1d", ["2020-01-01", "2020-01-02 00:00:01"]),378(None, None, pl.Datetime("us"), "1d1s", ["2020-01-01", "2020-01-02 00:00:01"]),379(380None,381None,382pl.Datetime("ns"),383"1d1ns",384["2020-01-01", "2020-01-02 00:00:00.000000001"],385),386("ms", None, pl.Datetime("ms"), "1s1d", ["2020-01-01", "2020-01-02 00:00:01"]),387("ms", None, pl.Datetime("ms"), "1d1s", ["2020-01-01", "2020-01-02 00:00:01"]),388(389None,390"Asia/Kathmandu",391pl.Datetime("us", "Asia/Kathmandu"),392"1s1d",393["2020-01-01", "2020-01-02 00:00:01"],394),395(396None,397"Asia/Kathmandu",398pl.Datetime("us", "Asia/Kathmandu"),399"1d1s",400["2020-01-01", "2020-01-02 00:00:01"],401),402(403None,404"Asia/Kathmandu",405pl.Datetime("ns", "Asia/Kathmandu"),406"1d1ns",407["2020-01-01", "2020-01-02 00:00:00.000000001"],408),409(410"ms",411"Asia/Kathmandu",412pl.Datetime("ms", "Asia/Kathmandu"),413"1s1d",414["2020-01-01", "2020-01-02 00:00:01"],415),416(417"ms",418"Asia/Kathmandu",419pl.Datetime("ms", "Asia/Kathmandu"),420"1d1s",421["2020-01-01", "2020-01-02 00:00:01"],422),423],424)425def test_datetime_range_schema_upcasts_to_datetime(426input_time_unit: TimeUnit | None,427input_time_zone: str | None,428output_dtype: PolarsDataType,429interval: str,430expected_datetime_range: list[str],431) -> None:432df = pl.DataFrame({"start": [date(2020, 1, 1)], "end": [date(2020, 1, 3)]}).lazy()433result = df.with_columns(434datetime_range=pl.datetime_ranges(435pl.col("start"),436pl.col("end"),437interval=interval,438time_unit=input_time_unit,439time_zone=input_time_zone,440)441)442expected_schema = {443"start": pl.Date,444"end": pl.Date,445"datetime_range": pl.List(output_dtype),446}447assert result.collect_schema() == expected_schema448assert result.collect().schema == expected_schema449450expected = pl.DataFrame(451{452"start": [date(2020, 1, 1)],453"end": [date(2020, 1, 3)],454"datetime_range": pl.Series(expected_datetime_range)455.str.to_datetime(time_unit="ns")456.implode(),457}458).with_columns(459pl.col("datetime_range")460.explode()461.dt.cast_time_unit(output_dtype.time_unit) # type: ignore[union-attr]462.dt.replace_time_zone(output_dtype.time_zone) # type: ignore[union-attr]463.implode(),464)465assert_frame_equal(result.collect(), expected)466467# check datetime_range too468result_single = pl.datetime_range(469date(2020, 1, 1),470date(2020, 1, 3),471interval=interval,472time_unit=input_time_unit,473time_zone=input_time_zone,474eager=True,475).alias("datetime")476assert_series_equal(477result_single, expected["datetime_range"].explode().rename("datetime")478)479480481def test_datetime_ranges_no_alias_schema_9037() -> None:482df = pl.DataFrame(483{"start": [datetime(2020, 1, 1)], "end": [datetime(2020, 1, 2)]}484).lazy()485result = df.with_columns(pl.datetime_ranges(pl.col("start"), pl.col("end")))486expected_schema = {487"start": pl.List(pl.Datetime(time_unit="us", time_zone=None)),488"end": pl.Datetime(time_unit="us", time_zone=None),489}490assert result.collect_schema() == expected_schema491assert result.collect().schema == expected_schema492493494@pytest.mark.parametrize("interval", [timedelta(0), timedelta(minutes=-10)])495def test_datetime_range_invalid_interval(interval: timedelta) -> None:496with pytest.raises(ComputeError, match="`interval` must be positive"):497pl.datetime_range(498datetime(2000, 3, 20), datetime(2000, 3, 21), interval="-1h", eager=True499)500501502@pytest.mark.parametrize(503("closed", "expected_values"),504[505("right", [datetime(2020, 2, 29), datetime(2020, 3, 31)]),506("left", [datetime(2020, 1, 31), datetime(2020, 2, 29)]),507("none", [datetime(2020, 2, 29)]),508("both", [datetime(2020, 1, 31), datetime(2020, 2, 29), datetime(2020, 3, 31)]),509],510)511def test_datetime_range_end_of_month_5441(512closed: ClosedInterval, expected_values: list[datetime]513) -> None:514start = date(2020, 1, 31)515stop = date(2020, 3, 31)516result = pl.datetime_range(start, stop, interval="1mo", closed=closed, eager=True)517expected = pl.Series("literal", expected_values)518assert_series_equal(result, expected)519520521def test_datetime_ranges_broadcasting() -> None:522df = pl.DataFrame(523{524"datetimes": [525datetime(2021, 1, 1),526datetime(2021, 1, 2),527datetime(2021, 1, 3),528]529}530)531result = df.select(532pl.datetime_ranges(start="datetimes", end=datetime(2021, 1, 3)).alias("end"),533pl.datetime_ranges(start=datetime(2021, 1, 1), end="datetimes").alias("start"),534)535expected = pl.DataFrame(536{537"end": [538[datetime(2021, 1, 1), datetime(2021, 1, 2), datetime(2021, 1, 3)],539[datetime(2021, 1, 2), datetime(2021, 1, 3)],540[datetime(2021, 1, 3)],541],542"start": [543[datetime(2021, 1, 1)],544[datetime(2021, 1, 1), datetime(2021, 1, 2)],545[datetime(2021, 1, 1), datetime(2021, 1, 2), datetime(2021, 1, 3)],546],547}548)549assert_frame_equal(result, expected)550551552def test_datetime_range_specifying_ambiguous_11713() -> None:553result = pl.datetime_range(554pl.datetime(2023, 10, 29, 2, 0).dt.replace_time_zone(555"Europe/Madrid", ambiguous="earliest"556),557pl.datetime(2023, 10, 29, 3, 0).dt.replace_time_zone("Europe/Madrid"),558"1h",559eager=True,560)561expected = pl.Series(562"datetime",563[564datetime(2023, 10, 29, 2),565datetime(2023, 10, 29, 2),566datetime(2023, 10, 29, 3),567],568).dt.replace_time_zone(569"Europe/Madrid", ambiguous=pl.Series(["earliest", "latest", "raise"])570)571assert_series_equal(result, expected)572result = pl.datetime_range(573pl.datetime(2023, 10, 29, 2, 0).dt.replace_time_zone(574"Europe/Madrid", ambiguous="latest"575),576pl.datetime(2023, 10, 29, 3, 0).dt.replace_time_zone("Europe/Madrid"),577"1h",578eager=True,579)580expected = pl.Series(581"datetime", [datetime(2023, 10, 29, 2), datetime(2023, 10, 29, 3)]582).dt.replace_time_zone("Europe/Madrid", ambiguous=pl.Series(["latest", "raise"]))583assert_series_equal(result, expected)584585586@given(587closed=st.sampled_from(["none", "left", "right", "both"]),588time_unit=st.sampled_from(["ms", "us", "ns"]),589n=st.integers(1, 10),590size=st.integers(8, 10),591unit=st.sampled_from(["s", "m", "h", "d", "mo"]),592start=st.datetimes(datetime(1965, 1, 1), datetime(2100, 1, 1)),593)594@settings(max_examples=50)595@pytest.mark.benchmark596def test_datetime_range_fast_slow_paths(597closed: ClosedInterval,598time_unit: TimeUnit,599n: int,600size: int,601unit: str,602start: datetime,603) -> None:604end = pl.select(pl.lit(start).dt.offset_by(f"{n * size}{unit}")).item()605result_slow = pl.datetime_range(606start,607end,608closed=closed,609time_unit=time_unit,610interval=f"{n}{unit}",611time_zone="Asia/Kathmandu",612eager=True,613).dt.replace_time_zone(None)614result_fast = pl.datetime_range(615start,616end,617closed=closed,618time_unit=time_unit,619interval=f"{n}{unit}",620eager=True,621)622assert_series_equal(result_slow, result_fast)623624625def test_dt_range_with_nanosecond_interval_19931() -> None:626with pytest.raises(627InvalidOperationError, match="interval 1ns is too small for time unit ms"628):629pl.datetime_range(630pl.date(2022, 1, 1),631pl.date(2022, 1, 1),632time_zone="Asia/Kathmandu",633interval="1ns",634time_unit="ms",635eager=True,636)637638639def test_datetime_range_with_nanoseconds_overflow_15735() -> None:640s = pl.datetime_range(date(2000, 1, 1), date(2300, 1, 1), "24h", eager=True)641assert s.dtype == pl.Datetime("us")642assert s.shape == (109574,)643644645