Path: blob/main/py-polars/tests/unit/functions/range/test_datetime_range.py
8395 views
from __future__ import annotations12from datetime import date, datetime, timedelta3from typing import TYPE_CHECKING4from zoneinfo import ZoneInfo56import hypothesis.strategies as st7import pytest8from hypothesis import given, settings910import polars as pl11from polars.datatypes import DTYPE_TEMPORAL_UNITS12from polars.exceptions import ComputeError, InvalidOperationError, SchemaError13from polars.testing import assert_frame_equal, assert_series_equal1415if TYPE_CHECKING:16from polars._typing import ClosedInterval, PolarsDataType, TimeUnit171819def test_datetime_range() -> None:20result = pl.datetime_range(21date(1985, 1, 1), date(2015, 7, 1), timedelta(days=1, hours=12), eager=True22)23assert len(result) == 742624assert result.dt[0] == datetime(1985, 1, 1)25assert result.dt[1] == datetime(1985, 1, 2, 12, 0)26assert result.dt[2] == datetime(1985, 1, 4, 0, 0)27assert result.dt[-1] == datetime(2015, 6, 30, 12, 0)2829for time_unit in DTYPE_TEMPORAL_UNITS:30rng = pl.datetime_range(31datetime(2020, 1, 1),32date(2020, 1, 2),33"2h",34time_unit=time_unit,35eager=True,36)37assert rng.dtype.time_unit == time_unit # type: ignore[attr-defined]38assert rng.shape == (13,)39assert rng.dt[0] == datetime(2020, 1, 1)40assert rng.dt[-1] == datetime(2020, 1, 2)4142result = pl.datetime_range(date(2022, 1, 1), date(2022, 1, 2), "1h30m", eager=True)43assert list(result) == [44datetime(2022, 1, 1, 0, 0),45datetime(2022, 1, 1, 1, 30),46datetime(2022, 1, 1, 3, 0),47datetime(2022, 1, 1, 4, 30),48datetime(2022, 1, 1, 6, 0),49datetime(2022, 1, 1, 7, 30),50datetime(2022, 1, 1, 9, 0),51datetime(2022, 1, 1, 10, 30),52datetime(2022, 1, 1, 12, 0),53datetime(2022, 1, 1, 13, 30),54datetime(2022, 1, 1, 15, 0),55datetime(2022, 1, 1, 16, 30),56datetime(2022, 1, 1, 18, 0),57datetime(2022, 1, 1, 19, 30),58datetime(2022, 1, 1, 21, 0),59datetime(2022, 1, 1, 22, 30),60datetime(2022, 1, 2, 0, 0),61]6263result = pl.datetime_range(64datetime(2022, 1, 1), datetime(2022, 1, 1, 0, 1), "987456321ns", eager=True65)66assert len(result) == 6167assert result.dtype.time_unit == "ns" # type: ignore[attr-defined]68assert result.dt.second()[-1] == 5969assert result.cast(pl.String)[-1] == "2022-01-01 00:00:59.247379260"707172@pytest.mark.parametrize(73("time_unit", "expected_micros"),74[75("ms", 986000),76("us", 986759),77("ns", 986759),78(None, 986759),79],80)81def test_datetime_range_precision(82time_unit: TimeUnit | None, expected_micros: int83) -> None:84micros = 98675985start = datetime(2000, 5, 30, 1, 53, 4, micros)86stop = datetime(2000, 5, 31, 1, 53, 4, micros)87result = pl.datetime_range(start, stop, time_unit=time_unit, eager=True)88expected_start = start.replace(microsecond=expected_micros)89expected_stop = stop.replace(microsecond=expected_micros)90assert result[0] == expected_start91assert result[1] == expected_stop929394def test_datetime_range_invalid_time_unit() -> None:95with pytest.raises(InvalidOperationError, match="'x' not supported"):96pl.datetime_range(97start=datetime(2021, 12, 16),98end=datetime(2021, 12, 16, 3),99interval="1X",100eager=True,101)102103104def test_datetime_range_interval_too_small() -> None:105# start/end/interval106with pytest.raises(107InvalidOperationError,108match="interval 1ns is too small for time unit μs and was rounded down to zero",109):110pl.datetime_range(111start=datetime(2025, 1, 1),112end=datetime(2025, 1, 5),113interval="1ns",114time_unit="us",115eager=True,116)117118119def test_datetime_range_output_ns_due_to_interval() -> None:120result = pl.datetime_range(121start=datetime(2025, 1, 1),122end=datetime(2025, 1, 1, 0, 0, 0, 1),123interval="1ns",124eager=True,125)126assert result.len() == 1001127assert result.dtype == pl.Datetime(time_unit="ns")128129130def test_datetime_range_lazy_time_zones() -> None:131start = datetime(2020, 1, 1, tzinfo=ZoneInfo("Asia/Kathmandu"))132stop = datetime(2020, 1, 2, tzinfo=ZoneInfo("Asia/Kathmandu"))133result = (134pl.DataFrame({"start": [start], "stop": [stop]})135.with_columns(136pl.datetime_range(137start=start,138end=stop,139interval="678d",140eager=False,141time_zone="Pacific/Tarawa",142)143)144.lazy()145)146expected = pl.DataFrame(147{148"start": [149datetime(2020, 1, 1, 00, 00, tzinfo=ZoneInfo(key="Asia/Kathmandu"))150],151"stop": [152datetime(2020, 1, 2, 00, 00, tzinfo=ZoneInfo(key="Asia/Kathmandu"))153],154"literal": [155datetime(2020, 1, 1, 6, 15, tzinfo=ZoneInfo(key="Pacific/Tarawa"))156],157}158).with_columns(pl.col("literal").dt.convert_time_zone("Pacific/Tarawa"))159assert_frame_equal(result.collect(), expected)160161162def test_datetime_range_invalid_time_zone() -> None:163with pytest.raises(ComputeError, match="unable to parse time zone: 'foo'"):164pl.datetime_range(165datetime(2001, 1, 1),166datetime(2001, 1, 3),167time_zone="foo",168eager=True,169)170171172def test_timezone_aware_datetime_range() -> None:173low = datetime(2022, 10, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai"))174high = datetime(2022, 11, 17, 10, tzinfo=ZoneInfo("Asia/Shanghai"))175176assert pl.datetime_range(177low, high, interval=timedelta(days=5), eager=True178).to_list() == [179datetime(2022, 10, 17, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),180datetime(2022, 10, 22, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),181datetime(2022, 10, 27, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),182datetime(2022, 11, 1, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),183datetime(2022, 11, 6, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),184datetime(2022, 11, 11, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),185datetime(2022, 11, 16, 10, 0, tzinfo=ZoneInfo("Asia/Shanghai")),186]187188with pytest.raises(189SchemaError,190match="failed to determine supertype",191):192pl.datetime_range(193low,194high.replace(tzinfo=None),195interval=timedelta(days=5),196time_zone="UTC",197eager=True,198)199200201def test_tzaware_datetime_range_crossing_dst_hourly() -> None:202result = pl.datetime_range(203datetime(2021, 11, 7),204datetime(2021, 11, 7, 2),205"1h",206time_zone="America/Chicago",207eager=True,208)209assert result.to_list() == [210datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),211datetime(2021, 11, 7, 1, 0, tzinfo=ZoneInfo("America/Chicago")),212datetime(2021, 11, 7, 1, 0, fold=1, tzinfo=ZoneInfo("America/Chicago")),213datetime(2021, 11, 7, 2, 0, tzinfo=ZoneInfo("America/Chicago")),214]215216217def test_tzaware_datetime_range_crossing_dst_daily() -> None:218result = pl.datetime_range(219datetime(2021, 11, 7),220datetime(2021, 11, 11),221"2d",222time_zone="America/Chicago",223eager=True,224)225assert result.to_list() == [226datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),227datetime(2021, 11, 9, 0, 0, tzinfo=ZoneInfo("America/Chicago")),228datetime(2021, 11, 11, 0, 0, tzinfo=ZoneInfo("America/Chicago")),229]230231232def test_tzaware_datetime_range_crossing_dst_weekly() -> None:233result = pl.datetime_range(234datetime(2021, 11, 7),235datetime(2021, 11, 20),236"1w",237time_zone="America/Chicago",238eager=True,239)240assert result.to_list() == [241datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),242datetime(2021, 11, 14, 0, 0, tzinfo=ZoneInfo("America/Chicago")),243]244245246def test_tzaware_datetime_range_crossing_dst_monthly() -> None:247result = pl.datetime_range(248datetime(2021, 11, 7),249datetime(2021, 12, 20),250"1mo",251time_zone="America/Chicago",252eager=True,253)254assert result.to_list() == [255datetime(2021, 11, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),256datetime(2021, 12, 7, 0, 0, tzinfo=ZoneInfo("America/Chicago")),257]258259260def test_datetime_range_with_unsupported_datetimes() -> None:261with pytest.raises(262ComputeError,263match=r"datetime '2021-11-07 01:00:00' is ambiguous in time zone 'America/Chicago'",264):265pl.datetime_range(266datetime(2021, 11, 7, 1),267datetime(2021, 11, 7, 2),268"1h",269time_zone="America/Chicago",270eager=True,271)272with pytest.raises(273ComputeError,274match=r"datetime '2021-03-28 02:30:00' is non-existent in time zone 'Europe/Vienna'",275):276pl.datetime_range(277datetime(2021, 3, 28, 2, 30),278datetime(2021, 3, 28, 4),279"1h",280time_zone="Europe/Vienna",281eager=True,282)283284285@pytest.mark.parametrize(286("closed", "expected_values"),287[288("right", [datetime(2020, 2, 29), datetime(2020, 3, 31)]),289("left", [datetime(2020, 1, 31), datetime(2020, 2, 29)]),290("none", [datetime(2020, 2, 29)]),291("both", [datetime(2020, 1, 31), datetime(2020, 2, 29), datetime(2020, 3, 31)]),292],293)294def test_datetime_range_end_of_month_5441(295closed: ClosedInterval, expected_values: list[datetime]296) -> None:297start = date(2020, 1, 31)298stop = date(2020, 3, 31)299result = pl.datetime_range(300start=start,301end=stop,302interval="1mo",303closed=closed,304eager=True,305)306expected = pl.Series("literal", expected_values, dtype=pl.Datetime("us"))307assert_series_equal(result, expected)308309310def test_datetime_range_specifying_ambiguous_11713() -> None:311result = pl.datetime_range(312pl.datetime(2023, 10, 29, 2, 0).dt.replace_time_zone(313"Europe/Madrid", ambiguous="earliest"314),315pl.datetime(2023, 10, 29, 3, 0).dt.replace_time_zone("Europe/Madrid"),316"1h",317eager=True,318)319expected = pl.Series(320"datetime",321[322datetime(2023, 10, 29, 2),323datetime(2023, 10, 29, 2),324datetime(2023, 10, 29, 3),325],326).dt.replace_time_zone(327"Europe/Madrid", ambiguous=pl.Series(["earliest", "latest", "raise"])328)329assert_series_equal(result, expected)330result = pl.datetime_range(331pl.datetime(2023, 10, 29, 2, 0).dt.replace_time_zone(332"Europe/Madrid", ambiguous="latest"333),334pl.datetime(2023, 10, 29, 3, 0).dt.replace_time_zone("Europe/Madrid"),335"1h",336eager=True,337)338expected = pl.Series(339"datetime", [datetime(2023, 10, 29, 2), datetime(2023, 10, 29, 3)]340).dt.replace_time_zone("Europe/Madrid", ambiguous=pl.Series(["latest", "raise"]))341assert_series_equal(result, expected)342343344@given(345closed=st.sampled_from(["none", "left", "right", "both"]),346time_unit=st.sampled_from(["ms", "us", "ns"]),347n=st.integers(1, 10),348size=st.integers(8, 10),349unit=st.sampled_from(["s", "m", "h", "d", "mo"]),350start=st.datetimes(datetime(1965, 1, 1), datetime(2100, 1, 1)),351)352@settings(max_examples=50)353@pytest.mark.benchmark354def test_datetime_range_fast_slow_paths(355closed: ClosedInterval,356time_unit: TimeUnit,357n: int,358size: int,359unit: str,360start: datetime,361) -> None:362end = pl.select(pl.lit(start).dt.offset_by(f"{n * size}{unit}")).item()363result_slow = pl.datetime_range(364start,365end,366closed=closed,367time_unit=time_unit,368interval=f"{n}{unit}",369time_zone="Asia/Kathmandu",370eager=True,371).dt.replace_time_zone(None)372result_fast = pl.datetime_range(373start,374end,375closed=closed,376time_unit=time_unit,377interval=f"{n}{unit}",378eager=True,379)380assert_series_equal(result_slow, result_fast)381382383def test_dt_range_with_nanosecond_interval_19931() -> None:384with pytest.raises(385InvalidOperationError, match="interval 1ns is too small for time unit ms"386):387pl.datetime_range(388pl.date(2022, 1, 1),389pl.date(2022, 1, 1),390time_zone="Asia/Kathmandu",391interval="1ns",392time_unit="ms",393eager=True,394)395396397def test_datetime_range_with_nanoseconds_overflow_15735() -> None:398s = pl.datetime_range(date(2000, 1, 1), date(2300, 1, 1), "24h", eager=True)399assert s.dtype == pl.Datetime("us")400assert s.shape == (109574,)401402403# Helper function to generate output Series with expected dtype.404def to_expected(405values: list[date] | list[datetime], dtype: PolarsDataType406) -> pl.Series:407if dtype == pl.Date:408return pl.Series("literal", values, dtype=pl.Datetime("us"))409else:410if (tz := dtype.time_zone) is not None: # type: ignore[union-attr]411return pl.Series(412"literal",413values,414dtype=pl.Datetime(dtype.time_unit), # type: ignore[union-attr]415).dt.replace_time_zone(tz)416else:417return pl.Series("literal", values, dtype=dtype)418419420# start/end/interval421@pytest.mark.parametrize(422"dtype",423[424pl.Date,425pl.Datetime("ms"),426pl.Datetime("us"),427pl.Datetime("ns"),428pl.Datetime("ms", time_zone="Asia/Kathmandu"),429pl.Datetime("us", time_zone="Asia/Kathmandu"),430pl.Datetime("ns", time_zone="Asia/Kathmandu"),431],432)433def test_datetime_range_start_end_interval_forwards(dtype: PolarsDataType) -> None:434start = date(2025, 1, 1)435end = date(2025, 1, 10)436tu: TimeUnit = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[assignment, union-attr]437tz: str = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[assignment, union-attr]438439assert_series_equal(440pl.datetime_range(441start=start,442end=end,443interval="3d",444closed="left",445eager=True,446time_unit=tu,447time_zone=tz,448),449to_expected([date(2025, 1, 1), date(2025, 1, 4), date(2025, 1, 7)], dtype),450)451assert_series_equal(452pl.datetime_range(453start=start,454end=end,455interval="3d",456closed="right",457eager=True,458time_unit=tu,459time_zone=tz,460),461to_expected([date(2025, 1, 4), date(2025, 1, 7), date(2025, 1, 10)], dtype),462)463assert_series_equal(464pl.datetime_range(465start=start,466end=end,467interval="3d",468closed="none",469eager=True,470time_unit=tu,471time_zone=tz,472),473to_expected([date(2025, 1, 4), date(2025, 1, 7)], dtype),474)475assert_series_equal(476pl.datetime_range(477start=start,478end=end,479interval="3d",480closed="both",481eager=True,482time_unit=tu,483time_zone=tz,484),485to_expected(486[date(2025, 1, 1), date(2025, 1, 4), date(2025, 1, 7), date(2025, 1, 10)],487dtype,488),489)490# test wrong direction is empty491assert_series_equal(492pl.datetime_range(493start=end,494end=start,495interval="3d",496eager=True,497time_unit=tu,498time_zone=tz,499),500to_expected([], dtype),501)502503504@pytest.mark.parametrize(505"dtype",506[507pl.Date,508pl.Datetime("ms"),509pl.Datetime("us"),510pl.Datetime("ns"),511pl.Datetime("ms", time_zone="Asia/Kathmandu"),512pl.Datetime("us", time_zone="Asia/Kathmandu"),513pl.Datetime("ns", time_zone="Asia/Kathmandu"),514],515)516def test_datetime_range_expr_scalar(dtype: PolarsDataType) -> None:517df = pl.DataFrame(518{519"a": [date(2025, 1, 3), date(2025, 1, 1)],520"interval": ["1d", "2d"],521}522)523tu: TimeUnit = dtype.time_unit if dtype == pl.Datetime else None # type: ignore[assignment, union-attr]524tz: str = dtype.time_zone if dtype == pl.Datetime else None # type: ignore[assignment, union-attr]525result = df.select(526forward_start_end_interval=pl.datetime_range(527start=pl.col("a").min(),528end=pl.col("a").max(),529interval="1d",530time_unit=tu,531time_zone=tz,532),533)534forward = [date(2025, 1, 1), date(2025, 1, 2), date(2025, 1, 3)]535expected = pl.DataFrame(536{537"forward_start_end_interval": to_expected(forward, dtype=dtype),538}539)540assert_frame_equal(result, expected)541542543