Path: blob/main/py-polars/tests/unit/functions/range/test_date_range.py
6939 views
from __future__ import annotations12from datetime import date, datetime3from typing import TYPE_CHECKING45import pandas as pd6import pytest78import polars as pl9from polars.exceptions import ComputeError, InvalidOperationError, ShapeError10from polars.testing import assert_frame_equal, assert_series_equal1112if TYPE_CHECKING:13from polars._typing import ClosedInterval141516def test_date_range() -> None:17# if low/high are both date, range is also be date _iff_ the granularity is >= 1d18result = pl.date_range(date(2022, 1, 1), date(2022, 3, 1), "1mo", eager=True)19assert result.to_list() == [date(2022, 1, 1), date(2022, 2, 1), date(2022, 3, 1)]202122def test_date_range_invalid_time_unit() -> None:23with pytest.raises(InvalidOperationError, match="'x' not supported"):24pl.date_range(25start=date(2021, 12, 16),26end=date(2021, 12, 18),27interval="1X",28eager=True,29)303132def test_date_range_lazy_with_literals() -> None:33df = pl.DataFrame({"misc": ["x"]}).with_columns(34pl.date_ranges(35date(2000, 1, 1),36date(2023, 8, 31),37interval="987d",38eager=False,39).alias("dts")40)41assert df.rows() == [42(43"x",44[45date(2000, 1, 1),46date(2002, 9, 14),47date(2005, 5, 28),48date(2008, 2, 9),49date(2010, 10, 23),50date(2013, 7, 6),51date(2016, 3, 19),52date(2018, 12, 1),53date(2021, 8, 14),54],55)56]57assert (58df.rows()[0][1]59== pd.date_range(60date(2000, 1, 1), date(2023, 12, 31), freq="987d"61).date.tolist()62)636465@pytest.mark.parametrize("low", ["start", pl.col("start")])66@pytest.mark.parametrize("high", ["stop", pl.col("stop")])67def test_date_range_lazy_with_expressions(68low: str | pl.Expr, high: str | pl.Expr69) -> None:70lf = pl.LazyFrame(71{72"start": [date(2015, 6, 30)],73"stop": [date(2022, 12, 31)],74}75)7677result = lf.with_columns(78pl.date_ranges(low, high, interval="678d", eager=False).alias("dts")79)8081assert result.collect().rows() == [82(83date(2015, 6, 30),84date(2022, 12, 31),85[86date(2015, 6, 30),87date(2017, 5, 8),88date(2019, 3, 17),89date(2021, 1, 23),90date(2022, 12, 2),91],92)93]9495df = pl.DataFrame(96{97"start": [date(2000, 1, 1), date(2022, 6, 1)],98"stop": [date(2000, 1, 2), date(2022, 6, 2)],99}100)101102result_df = df.with_columns(pl.date_ranges(low, high, interval="1d").alias("dts"))103104assert result_df.to_dict(as_series=False) == {105"start": [date(2000, 1, 1), date(2022, 6, 1)],106"stop": [date(2000, 1, 2), date(2022, 6, 2)],107"dts": [108[date(2000, 1, 1), date(2000, 1, 2)],109[date(2022, 6, 1), date(2022, 6, 2)],110],111}112113114def test_date_ranges_single_row_lazy_7110() -> None:115df = pl.DataFrame(116{117"name": ["A"],118"from": [date(2020, 1, 1)],119"to": [date(2020, 1, 2)],120}121)122result = df.with_columns(123pl.date_ranges(124start=pl.col("from"),125end=pl.col("to"),126interval="1d",127eager=False,128).alias("date_range")129)130expected = pl.DataFrame(131{132"name": ["A"],133"from": [date(2020, 1, 1)],134"to": [date(2020, 1, 2)],135"date_range": [[date(2020, 1, 1), date(2020, 1, 2)]],136}137)138assert_frame_equal(result, expected)139140141@pytest.mark.parametrize(142("closed", "expected_values"),143[144("right", [date(2020, 2, 29), date(2020, 3, 31)]),145("left", [date(2020, 1, 31), date(2020, 2, 29)]),146("none", [date(2020, 2, 29)]),147("both", [date(2020, 1, 31), date(2020, 2, 29), date(2020, 3, 31)]),148],149)150def test_date_range_end_of_month_5441(151closed: ClosedInterval, expected_values: list[date]152) -> None:153start = date(2020, 1, 31)154stop = date(2020, 3, 31)155result = pl.date_range(start, stop, interval="1mo", closed=closed, eager=True)156expected = pl.Series("literal", expected_values)157assert_series_equal(result, expected)158159160def test_date_range_name() -> None:161result_eager = pl.date_range(date(2020, 1, 1), date(2020, 1, 3), eager=True)162assert result_eager.name == "literal"163164start = pl.Series("left", [date(2020, 1, 1)])165result_lazy = pl.select(166pl.date_range(pl.lit(start).first(), date(2020, 1, 3), eager=False)167).to_series()168assert result_lazy.name == "left"169170171def test_date_ranges_eager() -> None:172start = pl.Series("start", [date(2022, 1, 1), date(2022, 1, 2)])173end = pl.Series("end", [date(2022, 1, 4), date(2022, 1, 3)])174175result = pl.date_ranges(start, end, eager=True)176177expected = pl.Series(178"start",179[180[date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3), date(2022, 1, 4)],181[date(2022, 1, 2), date(2022, 1, 3)],182],183)184assert_series_equal(result, expected)185186187def test_date_range_eager() -> None:188result = pl.date_range(date(2022, 1, 1), date(2022, 1, 3), eager=True)189expected = pl.Series(190"literal", [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)]191)192assert_series_equal(result, expected)193194195def test_date_range_input_shape_empty() -> None:196empty = pl.Series(dtype=pl.Datetime)197single = pl.Series([datetime(2022, 1, 2)])198199with pytest.raises(ShapeError):200pl.date_range(empty, single, eager=True)201with pytest.raises(ShapeError):202pl.date_range(single, empty, eager=True)203with pytest.raises(ShapeError):204pl.date_range(empty, empty, eager=True)205206207def test_date_range_input_shape_multiple_values() -> None:208single = pl.Series([datetime(2022, 1, 2)])209multiple = pl.Series([datetime(2022, 1, 3), datetime(2022, 1, 4)])210211with pytest.raises(ShapeError):212pl.date_range(multiple, single, eager=True)213with pytest.raises(ShapeError):214pl.date_range(single, multiple, eager=True)215with pytest.raises(ShapeError):216pl.date_range(multiple, multiple, eager=True)217218219def test_date_range_start_later_than_end() -> None:220result = pl.date_range(date(2000, 3, 20), date(2000, 3, 5), eager=True)221expected = pl.Series("literal", dtype=pl.Date)222assert_series_equal(result, expected)223224225def test_date_range_24h_interval_raises() -> None:226with pytest.raises(227ComputeError,228match="`interval` input for `date_range` must consist of full days",229):230pl.date_range(date(2022, 1, 1), date(2022, 1, 3), interval="24h", eager=True)231232233def test_long_date_range_12461() -> None:234result = pl.date_range(date(1900, 1, 1), date(2300, 1, 1), "1d", eager=True)235assert result[0] == date(1900, 1, 1)236assert result[-1] == date(2300, 1, 1)237assert (result.diff()[1:].dt.total_days() == 1).all()238239240def test_date_ranges_broadcasting() -> None:241df = pl.DataFrame({"dates": [date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3)]})242result = df.select(243pl.date_ranges(start="dates", end=date(2021, 1, 3)).alias("end"),244pl.date_ranges(start=date(2021, 1, 1), end="dates").alias("start"),245)246expected = pl.DataFrame(247{248"end": [249[date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3)],250[date(2021, 1, 2), date(2021, 1, 3)],251[date(2021, 1, 3)],252],253"start": [254[date(2021, 1, 1)],255[date(2021, 1, 1), date(2021, 1, 2)],256[date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3)],257],258}259)260assert_frame_equal(result, expected)261262263def test_date_ranges_broadcasting_fail() -> None:264start = pl.Series([date(2021, 1, 1), date(2021, 1, 2), date(2021, 1, 3)])265end = pl.Series([date(2021, 1, 2), date(2021, 1, 3)])266267with pytest.raises(268ComputeError, match=r"lengths of `start` \(3\) and `end` \(2\) do not match"269):270pl.date_ranges(start, end, eager=True)271272273def test_date_range_datetime_input() -> None:274result = pl.date_range(275datetime(2022, 1, 1, 12), datetime(2022, 1, 3), interval="1d", eager=True276)277expected = pl.Series(278"literal", [date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)]279)280assert_series_equal(result, expected)281282283def test_date_ranges_datetime_input() -> None:284result = pl.date_ranges(285datetime(2022, 1, 1, 12), datetime(2022, 1, 3), interval="1d", eager=True286)287expected = pl.Series(288"literal", [[date(2022, 1, 1), date(2022, 1, 2), date(2022, 1, 3)]]289)290assert_series_equal(result, expected)291292293def test_date_range_with_subclass_18470_18447() -> None:294class MyAmazingDate(date):295pass296297class MyAmazingDatetime(datetime):298pass299300result = pl.datetime_range(301MyAmazingDate(2020, 1, 1), MyAmazingDatetime(2020, 1, 2), eager=True302)303expected = pl.Series("literal", [datetime(2020, 1, 1), datetime(2020, 1, 2)])304assert_series_equal(result, expected)305306307