Path: blob/main/py-polars/tests/unit/operations/rolling/test_rolling.py
8458 views
from __future__ import annotations12import random3from datetime import date, datetime, time, timedelta4from typing import TYPE_CHECKING5from zoneinfo import ZoneInfo67import hypothesis.strategies as st8import numpy as np9import pytest10from hypothesis import assume, given11from numpy import nan1213import polars as pl14from polars._utils.convert import parse_as_duration_string15from polars.exceptions import ComputeError, InvalidOperationError16from polars.meta.index_type import get_index_type17from polars.testing import assert_frame_equal, assert_series_equal18from polars.testing.parametric import column, dataframes, series19from polars.testing.parametric.strategies.dtype import _time_units20from tests.unit.conftest import INTEGER_DTYPES, NUMERIC_DTYPES, TEMPORAL_DTYPES2122if TYPE_CHECKING:23from collections.abc import Callable2425from hypothesis.strategies import SearchStrategy2627from polars._typing import (28ClosedInterval,29PolarsDataType,30QuantileMethod,31RankMethod,32TimeUnit,33)343536@pytest.fixture37def example_df() -> pl.DataFrame:38return pl.DataFrame(39{40"dt": [41datetime(2021, 1, 1),42datetime(2021, 1, 2),43datetime(2021, 1, 4),44datetime(2021, 1, 5),45datetime(2021, 1, 7),46],47"values": pl.arange(0, 5, eager=True),48}49)505152@pytest.mark.parametrize(53"period",54["1d", "2d", "3d", timedelta(days=1), timedelta(days=2), timedelta(days=3)],55)56@pytest.mark.parametrize("closed", ["left", "right", "none", "both"])57def test_rolling_kernels_and_rolling(58example_df: pl.DataFrame, period: str | timedelta, closed: ClosedInterval59) -> None:60out1 = example_df.set_sorted("dt").select(61pl.col("dt"),62# this differs from group_by aggregation because the empty window is63# null here64# where the sum aggregation of an empty set is 065pl.col("values")66.rolling_sum_by("dt", period, closed=closed)67.fill_null(0)68.alias("sum"),69pl.col("values").rolling_var_by("dt", period, closed=closed).alias("var"),70pl.col("values").rolling_mean_by("dt", period, closed=closed).alias("mean"),71pl.col("values").rolling_std_by("dt", period, closed=closed).alias("std"),72pl.col("values")73.rolling_quantile_by("dt", period, quantile=0.2, closed=closed)74.alias("quantile"),75)76out2 = (77example_df.set_sorted("dt")78.rolling("dt", period=period, closed=closed)79.agg(80[81pl.col("values").sum().alias("sum"),82pl.col("values").var().alias("var"),83pl.col("values").mean().alias("mean"),84pl.col("values").std().alias("std"),85pl.col("values").quantile(quantile=0.2).alias("quantile"),86]87)88)89assert_frame_equal(out1, out2)909192@pytest.mark.parametrize(93"period",94["1d", "2d", "3d", timedelta(days=1), timedelta(days=2), timedelta(days=3)],95)96@pytest.mark.parametrize("closed", ["right", "both"])97def test_rolling_rank_kernels_and_rolling(98example_df: pl.DataFrame, period: str | timedelta, closed: ClosedInterval99) -> None:100out1 = example_df.set_sorted("dt").select(101pl.col("dt"),102pl.col("values").rolling_rank_by("dt", period, closed=closed).alias("rank"),103)104out2 = (105example_df.set_sorted("dt")106.rolling("dt", period=period, closed=closed)107.agg([pl.col("values").rank().last().alias("rank")])108)109assert_frame_equal(out1, out2)110111112@pytest.mark.parametrize("closed", ["left", "none"])113def test_rolling_rank_needs_closed_right(114example_df: pl.DataFrame, closed: ClosedInterval115) -> None:116pat = r"`rolling_rank_by` window needs to be closed on the right side \(i.e., `closed` must be `right` or `both`\)"117with pytest.raises(InvalidOperationError, match=pat):118example_df.set_sorted("dt").select(119pl.col("values").rolling_rank_by("dt", "2d", closed=closed).alias("rank"),120)121122123@pytest.mark.parametrize(124("offset", "closed", "expected_values"),125[126pytest.param(127"-1d",128"left",129[[1], [1, 2], [2, 3], [3, 4]],130id="partial lookbehind, left",131),132pytest.param(133"-1d",134"right",135[[1, 2], [2, 3], [3, 4], [4]],136id="partial lookbehind, right",137),138pytest.param(139"-1d",140"both",141[[1, 2], [1, 2, 3], [2, 3, 4], [3, 4]],142id="partial lookbehind, both",143),144pytest.param(145"-1d",146"none",147[[1], [2], [3], [4]],148id="partial lookbehind, none",149),150pytest.param(151"-2d",152"left",153[[], [1], [1, 2], [2, 3]],154id="full lookbehind, left",155),156pytest.param(157"-3d",158"left",159[[], [], [1], [1, 2]],160id="full lookbehind, offset > period, left",161),162pytest.param(163"-3d",164"right",165[[], [1], [1, 2], [2, 3]],166id="full lookbehind, right",167),168pytest.param(169"-3d",170"both",171[[], [1], [1, 2], [1, 2, 3]],172id="full lookbehind, both",173),174pytest.param(175"-2d",176"none",177[[], [1], [2], [3]],178id="full lookbehind, none",179),180pytest.param(181"-3d",182"none",183[[], [], [1], [2]],184id="full lookbehind, offset > period, none",185),186],187)188def test_rolling_negative_offset(189offset: str, closed: ClosedInterval, expected_values: list[list[int]]190) -> None:191df = pl.DataFrame(192{193"ts": pl.datetime_range(194datetime(2021, 1, 1), datetime(2021, 1, 4), "1d", eager=True195),196"value": [1, 2, 3, 4],197}198)199result = df.rolling("ts", period="2d", offset=offset, closed=closed).agg(200pl.col("value")201)202expected = pl.DataFrame(203{204"ts": pl.datetime_range(205datetime(2021, 1, 1), datetime(2021, 1, 4), "1d", eager=True206),207"value": expected_values,208}209)210assert_frame_equal(result, expected)211212213def test_rolling_skew() -> None:214s = pl.Series([1, 2, 3, 3, 2, 10, 8])215assert s.rolling_skew(window_size=4, bias=True).to_list() == pytest.approx(216[217None,218None,219None,220-0.49338220021815865,2210.0,2221.097025449363867,2230.09770939201338157,224]225)226227assert s.rolling_skew(window_size=4, bias=False).to_list() == pytest.approx(228[229None,230None,231None,232-0.8545630383279711,2330.0,2341.9001038154942962,2350.16923763134384154,236]237)238239240def test_rolling_kurtosis() -> None:241s = pl.Series([1, 2, 3, 3, 2, 10, 8])242assert s.rolling_kurtosis(window_size=4, bias=True).to_list() == pytest.approx(243[244None,245None,246None,247-1.371900826446281,248-1.9999999999999991,249-0.7055324211778693,250-1.7878967572797346,251]252)253assert s.rolling_kurtosis(254window_size=4, bias=True, fisher=False255).to_list() == pytest.approx(256[257None,258None,259None,2601.628099173553719,2611.0000000000000009,2622.2944675788221307,2631.2121032427202654,264]265)266267268@pytest.mark.parametrize("time_zone", [None, "America/Chicago"])269@pytest.mark.parametrize(270("rolling_fn", "expected_values", "expected_dtype"),271[272("rolling_mean_by", [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], pl.Float64),273("rolling_sum_by", [1, 2, 3, 4, 5, 6], pl.Int64),274("rolling_min_by", [1, 2, 3, 4, 5, 6], pl.Int64),275("rolling_max_by", [1, 2, 3, 4, 5, 6], pl.Int64),276("rolling_std_by", [None, None, None, None, None, None], pl.Float64),277("rolling_var_by", [None, None, None, None, None, None], pl.Float64),278("rolling_rank_by", [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], pl.Float64),279],280)281def test_rolling_crossing_dst(282time_zone: str | None,283rolling_fn: str,284expected_values: list[int | None | float],285expected_dtype: PolarsDataType,286) -> None:287ts = pl.datetime_range(288datetime(2021, 11, 5), datetime(2021, 11, 10), "1d", time_zone="UTC", eager=True289).dt.replace_time_zone(time_zone)290df = pl.DataFrame({"ts": ts, "value": [1, 2, 3, 4, 5, 6]})291292result = df.with_columns(293getattr(pl.col("value"), rolling_fn)(by="ts", window_size="1d", closed="right")294)295296expected = pl.DataFrame(297{"ts": ts, "value": expected_values}, schema_overrides={"value": expected_dtype}298)299assert_frame_equal(result, expected)300301302def test_rolling_by_invalid() -> None:303df = pl.DataFrame(304{"a": [1, 2, 3], "b": [4, 5, 6]}, schema_overrides={"a": pl.Int16}305).sort("a")306msg = "unsupported data type: i16 for temporal/index column, expected UInt64, UInt32, Int64, Int32, Datetime, Date, Duration, or Time"307with pytest.raises(InvalidOperationError, match=msg):308df.select(pl.col("b").rolling_min_by("a", "2i"))309df = pl.DataFrame({"a": [1, 2, 3], "b": [date(2020, 1, 1)] * 3}).sort("b")310msg = "`window_size` duration may not be a parsed integer"311with pytest.raises(InvalidOperationError, match=msg):312df.select(pl.col("a").rolling_min_by("b", "2i"))313314315def test_rolling_infinity() -> None:316s = pl.Series("col", ["-inf", "5", "5"]).cast(pl.Float64)317s = s.rolling_mean(2)318expected = pl.Series("col", [None, "-inf", "5"]).cast(pl.Float64)319assert_series_equal(s, expected)320321322def test_rolling_by_non_temporal_window_size() -> None:323df = pl.DataFrame(324{"a": [4, 5, 6], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]}325).sort("a", "b")326msg = "`window_size` duration may not be a parsed integer"327with pytest.raises(InvalidOperationError, match=msg):328df.with_columns(pl.col("a").rolling_sum_by("b", "2i", closed="left"))329330331@pytest.mark.parametrize(332"dtype",333[334pl.UInt8,335pl.Int64,336pl.Float32,337pl.Float64,338pl.Time,339pl.Date,340pl.Datetime("ms"),341pl.Datetime("us"),342pl.Datetime("ns"),343pl.Datetime("ns", "Asia/Kathmandu"),344pl.Duration("ms"),345pl.Duration("us"),346pl.Duration("ns"),347],348)349def test_rolling_extrema(dtype: PolarsDataType) -> None:350# sorted data and nulls flags trigger different kernels351df = (352(353pl.DataFrame(354{355"col1": pl.int_range(0, 7, eager=True),356"col2": pl.int_range(0, 7, eager=True).reverse(),357}358)359)360.with_columns(361pl.when(pl.int_range(0, pl.len(), eager=False) < 2)362.then(None)363.otherwise(pl.all())364.name.keep()365.name.suffix("_nulls")366)367.cast(dtype)368)369370expected = {371"col1": [None, None, 0, 1, 2, 3, 4],372"col2": [None, None, 4, 3, 2, 1, 0],373"col1_nulls": [None, None, None, None, 2, 3, 4],374"col2_nulls": [None, None, None, None, 2, 1, 0],375}376result = df.select([pl.all().rolling_min(3)])377assert result.to_dict(as_series=False) == {378k: pl.Series(v, dtype=dtype).to_list() for k, v in expected.items()379}380381expected = {382"col1": [None, None, 2, 3, 4, 5, 6],383"col2": [None, None, 6, 5, 4, 3, 2],384"col1_nulls": [None, None, None, None, 4, 5, 6],385"col2_nulls": [None, None, None, None, 4, 3, 2],386}387result = df.select([pl.all().rolling_max(3)])388assert result.to_dict(as_series=False) == {389k: pl.Series(v, dtype=dtype).to_list() for k, v in expected.items()390}391392# shuffled data triggers other kernels393df = df.select([pl.all().shuffle(seed=0)])394expected = {395"col1": [None, None, 0, 0, 4, 1, 1],396"col2": [None, None, 1, 1, 0, 0, 0],397"col1_nulls": [None, None, None, None, 4, None, None],398"col2_nulls": [None, None, None, None, 0, None, None],399}400result = df.select([pl.all().rolling_min(3)])401assert result.to_dict(as_series=False) == {402k: pl.Series(v, dtype=dtype).to_list() for k, v in expected.items()403}404result = df.select([pl.all().rolling_max(3)])405406expected = {407"col1": [None, None, 5, 5, 6, 6, 6],408"col2": [None, None, 6, 6, 2, 5, 5],409"col1_nulls": [None, None, None, None, 6, None, None],410"col2_nulls": [None, None, None, None, 2, None, None],411}412assert result.to_dict(as_series=False) == {413k: pl.Series(v, dtype=dtype).to_list() for k, v in expected.items()414}415416417@pytest.mark.parametrize(418"dtype",419[420pl.UInt8,421pl.Int64,422pl.Float32,423pl.Float64,424pl.Time,425pl.Date,426pl.Datetime("ms"),427pl.Datetime("us"),428pl.Datetime("ns"),429pl.Datetime("ns", "Asia/Kathmandu"),430pl.Duration("ms"),431pl.Duration("us"),432pl.Duration("ns"),433],434)435def test_rolling_group_by_extrema(dtype: PolarsDataType) -> None:436# ensure we hit different branches so create437438df = pl.DataFrame(439{440"col1": pl.arange(0, 7, eager=True).reverse(),441}442).with_columns(443pl.col("col1").reverse().alias("index"),444pl.col("col1").cast(dtype),445)446447expected = {448"col1_list": pl.Series(449[450[6],451[6, 5],452[6, 5, 4],453[5, 4, 3],454[4, 3, 2],455[3, 2, 1],456[2, 1, 0],457],458dtype=pl.List(dtype),459).to_list(),460"col1_min": pl.Series([6, 5, 4, 3, 2, 1, 0], dtype=dtype).to_list(),461"col1_max": pl.Series([6, 6, 6, 5, 4, 3, 2], dtype=dtype).to_list(),462"col1_first": pl.Series([6, 6, 6, 5, 4, 3, 2], dtype=dtype).to_list(),463"col1_last": pl.Series([6, 5, 4, 3, 2, 1, 0], dtype=dtype).to_list(),464}465result = (466df.rolling(467index_column="index",468period="3i",469)470.agg(471[472pl.col("col1").name.suffix("_list"),473pl.col("col1").min().name.suffix("_min"),474pl.col("col1").max().name.suffix("_max"),475pl.col("col1").first().alias("col1_first"),476pl.col("col1").last().alias("col1_last"),477]478)479.select(["col1_list", "col1_min", "col1_max", "col1_first", "col1_last"])480)481assert result.to_dict(as_series=False) == expected482483# ascending order484485df = pl.DataFrame(486{487"col1": pl.arange(0, 7, eager=True),488}489).with_columns(490pl.col("col1").alias("index"),491pl.col("col1").cast(dtype),492)493494result = (495df.rolling(496index_column="index",497period="3i",498)499.agg(500[501pl.col("col1").name.suffix("_list"),502pl.col("col1").min().name.suffix("_min"),503pl.col("col1").max().name.suffix("_max"),504pl.col("col1").first().alias("col1_first"),505pl.col("col1").last().alias("col1_last"),506]507)508.select(["col1_list", "col1_min", "col1_max", "col1_first", "col1_last"])509)510expected = {511"col1_list": pl.Series(512[513[0],514[0, 1],515[0, 1, 2],516[1, 2, 3],517[2, 3, 4],518[3, 4, 5],519[4, 5, 6],520],521dtype=pl.List(dtype),522).to_list(),523"col1_min": pl.Series([0, 0, 0, 1, 2, 3, 4], dtype=dtype).to_list(),524"col1_max": pl.Series([0, 1, 2, 3, 4, 5, 6], dtype=dtype).to_list(),525"col1_first": pl.Series([0, 0, 0, 1, 2, 3, 4], dtype=dtype).to_list(),526"col1_last": pl.Series([0, 1, 2, 3, 4, 5, 6], dtype=dtype).to_list(),527}528assert result.to_dict(as_series=False) == expected529530# shuffled data.531df = pl.DataFrame(532{533"col1": pl.arange(0, 7, eager=True).shuffle(1),534}535).with_columns(536pl.col("col1").cast(dtype),537pl.col("col1").sort().alias("index"),538)539540result = (541df.rolling(542index_column="index",543period="3i",544)545.agg(546[547pl.col("col1").min().name.suffix("_min"),548pl.col("col1").max().name.suffix("_max"),549pl.col("col1").name.suffix("_list"),550]551)552.select(["col1_list", "col1_min", "col1_max"])553)554expected = {555"col1_list": pl.Series(556[557[4],558[4, 2],559[4, 2, 5],560[2, 5, 1],561[5, 1, 6],562[1, 6, 0],563[6, 0, 3],564],565dtype=pl.List(dtype),566).to_list(),567"col1_min": pl.Series([4, 2, 2, 1, 1, 0, 0], dtype=dtype).to_list(),568"col1_max": pl.Series([4, 4, 5, 5, 6, 6, 6], dtype=dtype).to_list(),569}570assert result.to_dict(as_series=False) == expected571572573def test_rolling_slice_pushdown() -> None:574df = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "a", "b"], "c": [1, 3, 5]}).lazy()575df = (576df.sort("a")577.rolling(578"a",579group_by="b",580period="2i",581)582.agg([(pl.col("c") - pl.col("c").shift(fill_value=0)).sum().alias("c")])583)584assert df.head(2).collect().to_dict(as_series=False) == {585"b": ["a", "a"],586"a": [1, 2],587"c": [1, 3],588}589590591def test_overlapping_groups_4628() -> None:592df = pl.DataFrame(593{594"index": [1, 2, 3, 4, 5, 6],595"val": [10, 20, 40, 70, 110, 160],596}597)598assert (599df.rolling(index_column=pl.col("index").set_sorted(), period="3i").agg(600[601pl.col("val").diff(n=1).alias("val.diff"),602(pl.col("val") - pl.col("val").shift(1)).alias("val - val.shift"),603]604)605).to_dict(as_series=False) == {606"index": [1, 2, 3, 4, 5, 6],607"val.diff": [608[None],609[None, 10],610[None, 10, 20],611[None, 20, 30],612[None, 30, 40],613[None, 40, 50],614],615"val - val.shift": [616[None],617[None, 10],618[None, 10, 20],619[None, 20, 30],620[None, 30, 40],621[None, 40, 50],622],623}624625626def test_rolling_skew_lagging_null_5179() -> None:627s = pl.Series([None, 3, 4, 1, None, None, None, None, 3, None, 5, 4, 7, 2, 1, None])628result = s.rolling_skew(3, min_samples=1).fill_nan(-1.0)629expected = pl.Series(630[631None,632-1.0,6330.0,634-0.3818017741606059,6350.0,636-1.0,637None,638None,639-1.0,640-1.0,6410.0,6420.0,6430.38180177416060695,6440.23906314692954517,6450.6309038567106234,6460.0,647]648)649assert_series_equal(result, expected, check_names=False)650651652def test_rolling_var_numerical_stability_5197() -> None:653s = pl.Series([*[1.2] * 4, *[3.3] * 7])654res = s.to_frame("a").with_columns(pl.col("a").rolling_var(5))[:, 0].to_list()655assert res[4:] == pytest.approx(656[6570.882,6581.3229999999999997,6591.3229999999999997,6600.8819999999999983,6610.0,6620.0,6630.0,664]665)666assert res[:4] == [None] * 4667668669def test_rolling_iter() -> None:670df = pl.DataFrame(671{672"date": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 5)],673"a": [1, 2, 2],674"b": [4, 5, 6],675}676).set_sorted("date")677678# Without 'by' argument679result1 = [680(name[0], data.shape)681for name, data in df.rolling(index_column="date", period="2d")682]683expected1 = [684(date(2020, 1, 1), (1, 3)),685(date(2020, 1, 2), (2, 3)),686(date(2020, 1, 5), (1, 3)),687]688assert result1 == expected1689690# With 'by' argument691result2 = [692(name, data.shape)693for name, data in df.rolling(index_column="date", period="2d", group_by="a")694]695expected2 = [696((1, date(2020, 1, 1)), (1, 3)),697((2, date(2020, 1, 2)), (1, 3)),698((2, date(2020, 1, 5)), (1, 3)),699]700assert result2 == expected2701702703def test_rolling_negative_period() -> None:704df = pl.DataFrame({"ts": [datetime(2020, 1, 1)], "value": [1]}).with_columns(705pl.col("ts").set_sorted()706)707with pytest.raises(708ComputeError, match="rolling window period should be strictly positive"709):710df.rolling("ts", period="-1d", offset="-1d").agg(pl.col("value"))711with pytest.raises(712ComputeError, match="rolling window period should be strictly positive"713):714df.lazy().rolling("ts", period="-1d", offset="-1d").agg(715pl.col("value")716).collect()717with pytest.raises(718InvalidOperationError, match="`window_size` must be strictly positive"719):720df.select(721pl.col("value").rolling_min_by("ts", window_size="-1d", closed="left")722)723with pytest.raises(724InvalidOperationError, match="`window_size` must be strictly positive"725):726df.lazy().select(727pl.col("value").rolling_min_by("ts", window_size="-1d", closed="left")728).collect()729730731def test_rolling_skew_window_offset() -> None:732assert (pl.arange(0, 20, eager=True) ** 2).rolling_skew(20)[733-1734] == 0.6612545648596286735736737def test_rolling_cov_corr() -> None:738df = pl.DataFrame({"x": [3, 3, 3, 5, 8], "y": [3, 4, 4, 4, 8]})739740res = df.select(741pl.rolling_cov("x", "y", window_size=3).alias("cov"),742pl.rolling_corr("x", "y", window_size=3).alias("corr"),743).to_dict(as_series=False)744assert res["cov"][2:] == pytest.approx([0.0, 0.0, 5.333333333333336])745assert res["corr"][2:] == pytest.approx([nan, 0.0, 0.9176629354822473], nan_ok=True)746assert res["cov"][:2] == [None] * 2747assert res["corr"][:2] == [None] * 2748749750def test_rolling_cov_corr_nulls() -> None:751df1 = pl.DataFrame(752{"a": [1.06, 1.07, 0.93, 0.78, 0.85], "lag_a": [1.0, 1.06, 1.07, 0.93, 0.78]}753)754df2 = pl.DataFrame(755{756"a": [1.0, 1.06, 1.07, 0.93, 0.78, 0.85],757"lag_a": [None, 1.0, 1.06, 1.07, 0.93, 0.78],758}759)760761val_1 = df1.select(762pl.rolling_corr("a", "lag_a", window_size=10, min_samples=5, ddof=1)763)764val_2 = df2.select(765pl.rolling_corr("a", "lag_a", window_size=10, min_samples=5, ddof=1)766)767768df1_expected = pl.DataFrame({"a": [None, None, None, None, 0.62204709]})769df2_expected = pl.DataFrame({"a": [None, None, None, None, None, 0.62204709]})770771assert_frame_equal(val_1, df1_expected, abs_tol=0.0000001)772assert_frame_equal(val_2, df2_expected, abs_tol=0.0000001)773774val_1 = df1.select(775pl.rolling_cov("a", "lag_a", window_size=10, min_samples=5, ddof=1)776)777val_2 = df2.select(778pl.rolling_cov("a", "lag_a", window_size=10, min_samples=5, ddof=1)779)780781df1_expected = pl.DataFrame({"a": [None, None, None, None, 0.009445]})782df2_expected = pl.DataFrame({"a": [None, None, None, None, None, 0.009445]})783784assert_frame_equal(val_1, df1_expected, abs_tol=0.0000001)785assert_frame_equal(val_2, df2_expected, abs_tol=0.0000001)786787788@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])789def test_rolling_empty_window_9406(time_unit: TimeUnit) -> None:790datecol = pl.Series(791"d",792[datetime(2019, 1, x) for x in [16, 17, 18, 22, 23]],793dtype=pl.Datetime(time_unit=time_unit, time_zone=None),794).set_sorted()795rawdata = pl.Series("x", [1.1, 1.2, 1.3, 1.15, 1.25], dtype=pl.Float64)796rmin = pl.Series("x", [None, 1.1, 1.1, None, 1.15], dtype=pl.Float64)797rmax = pl.Series("x", [None, 1.1, 1.2, None, 1.15], dtype=pl.Float64)798df = pl.DataFrame([datecol, rawdata])799800assert_frame_equal(801pl.DataFrame([datecol, rmax]),802df.select(803pl.col("d"),804pl.col("x").rolling_max_by("d", window_size="3d", closed="left"),805),806)807assert_frame_equal(808pl.DataFrame([datecol, rmin]),809df.select(810pl.col("d"),811pl.col("x").rolling_min_by("d", window_size="3d", closed="left"),812),813)814815816def test_rolling_weighted_quantile_10031() -> None:817assert_series_equal(818pl.Series([1, 2]).rolling_median(window_size=2, weights=[0, 1]),819pl.Series([None, 2.0]),820)821822assert_series_equal(823pl.Series([1, 2, 3, 5]).rolling_quantile(0.7, "linear", 3, [0.1, 0.3, 0.6]),824pl.Series([None, None, 2.55, 4.1]),825)826827assert_series_equal(828pl.Series([1, 2, 3, 5, 8]).rolling_quantile(8290.7, "linear", 4, [0.1, 0.2, 0, 0.3]830),831pl.Series([None, None, None, 3.5, 5.5]),832)833834835def test_rolling_meta_eq_10101() -> None:836assert pl.col("A").rolling_sum(10).meta.eq(pl.col("A").rolling_sum(10)) is True837838839def test_rolling_aggregations_unsorted_raise_10991() -> None:840df = pl.DataFrame(841{842"dt": [datetime(2020, 1, 3), datetime(2020, 1, 1), datetime(2020, 1, 2)],843"val": [1, 2, 3],844}845)846result = df.with_columns(roll=pl.col("val").rolling_sum_by("dt", "2d"))847expected = pl.DataFrame(848{849"dt": [datetime(2020, 1, 3), datetime(2020, 1, 1), datetime(2020, 1, 2)],850"val": [1, 2, 3],851"roll": [4, 2, 5],852}853)854assert_frame_equal(result, expected)855result = (856df.with_row_index()857.sort("dt")858.with_columns(roll=pl.col("val").rolling_sum_by("dt", "2d"))859.sort("index")860.drop("index")861)862assert_frame_equal(result, expected)863864865def test_rolling_aggregations_with_over_11225() -> None:866start = datetime(2001, 1, 1)867868df_temporal = pl.DataFrame(869{870"date": [start + timedelta(days=k) for k in range(5)],871"group": ["A"] * 2 + ["B"] * 3,872}873).with_row_index()874875df_temporal = df_temporal.sort("group", "date")876877result = df_temporal.with_columns(878rolling_row_mean=pl.col("index")879.rolling_mean_by(880by="date",881window_size="2d",882closed="left",883)884.over("group")885)886expected = pl.DataFrame(887{888"index": [0, 1, 2, 3, 4],889"date": pl.datetime_range(date(2001, 1, 1), date(2001, 1, 5), eager=True),890"group": ["A", "A", "B", "B", "B"],891"rolling_row_mean": [None, 0.0, None, 2.0, 2.5],892},893schema_overrides={"index": pl.get_index_type()},894)895assert_frame_equal(result, expected)896897898@pytest.mark.parametrize("dtype", INTEGER_DTYPES)899def test_rolling_ints(dtype: PolarsDataType) -> None:900s = pl.Series("a", [1, 2, 3, 2, 1], dtype=dtype)901assert_series_equal(902s.rolling_min(2), pl.Series("a", [None, 1, 2, 2, 1], dtype=dtype)903)904assert_series_equal(905s.rolling_max(2), pl.Series("a", [None, 2, 3, 3, 2], dtype=dtype)906)907assert_series_equal(908s.rolling_sum(2),909pl.Series(910"a",911[None, 3, 5, 5, 3],912dtype=(913pl.Int64 if dtype in [pl.Int8, pl.UInt8, pl.Int16, pl.UInt16] else dtype914),915),916)917assert_series_equal(s.rolling_mean(2), pl.Series("a", [None, 1.5, 2.5, 2.5, 1.5]))918919assert s.rolling_std(2).to_list()[1] == pytest.approx(0.7071067811865476)920assert s.rolling_var(2).to_list()[1] == pytest.approx(0.5)921assert s.rolling_std(2, ddof=0).to_list()[1] == pytest.approx(0.5)922assert s.rolling_var(2, ddof=0).to_list()[1] == pytest.approx(0.25)923924assert_series_equal(925s.rolling_median(4), pl.Series("a", [None, None, None, 2, 2], dtype=pl.Float64)926)927assert_series_equal(928s.rolling_quantile(0, "nearest", 3),929pl.Series("a", [None, None, 1, 2, 1], dtype=pl.Float64),930)931assert_series_equal(932s.rolling_quantile(0, "lower", 3),933pl.Series("a", [None, None, 1, 2, 1], dtype=pl.Float64),934)935assert_series_equal(936s.rolling_quantile(0, "higher", 3),937pl.Series("a", [None, None, 1, 2, 1], dtype=pl.Float64),938)939assert s.rolling_skew(4).null_count() == 3940941942def test_rolling_floats() -> None:943# 3099944# test if we maintain proper dtype945for dt in [pl.Float32, pl.Float64]:946result = pl.Series([1, 2, 3], dtype=dt).rolling_min(2, weights=[0.1, 0.2])947expected = pl.Series([None, 0.1, 0.2], dtype=dt)948assert_series_equal(result, expected)949950df = pl.DataFrame({"val": [1.0, 2.0, 3.0, np.nan, 5.0, 6.0, 7.0]})951952for e in [953pl.col("val").rolling_min(window_size=3),954pl.col("val").rolling_max(window_size=3),955]:956out = df.with_columns(e).to_series()957assert out.null_count() == 2958assert np.isnan(out.to_numpy()).sum() == 5959960expected_values = [None, None, 2.0, 3.0, 5.0, 6.0, 6.0]961assert (962df.with_columns(pl.col("val").rolling_median(window_size=3))963.to_series()964.to_list()965== expected_values966)967assert (968df.with_columns(pl.col("val").rolling_quantile(0.5, window_size=3))969.to_series()970.to_list()971== expected_values972)973974nan = float("nan")975s = pl.Series("a", [11.0, 2.0, 9.0, nan, 8.0])976assert_series_equal(977s.rolling_sum(3),978pl.Series("a", [None, None, 22.0, nan, nan]),979)980981982def test_rolling_std_nulls_min_samples_1_20076() -> None:983result = pl.Series([1, 2, None, 4]).rolling_std(3, min_samples=1)984expected = pl.Series(985[None, 0.7071067811865476, 0.7071067811865476, 1.4142135623730951]986)987assert_series_equal(result, expected)988989990@pytest.mark.parametrize(991("bools", "window", "expected"),992[993(994[[True, False, True]],9952,996[[None, 1, 1]],997),998(999[[True, False, True, True, False, False, False, True, True]],10004,1001[[None, None, None, 3, 2, 2, 1, 1, 2]],1002),1003],1004)1005def test_rolling_eval_boolean_list(1006bools: list[list[bool]], window: int, expected: list[list[int]]1007) -> None:1008for accessor, dtype in (1009("list", pl.List(pl.Boolean)),1010("arr", pl.Array(pl.Boolean, shape=len(bools[0]))),1011):1012s = pl.Series(name="bools", values=bools, dtype=dtype)1013res = getattr(s, accessor).eval(pl.element().rolling_sum(window)).to_list()1014assert res == expected101510161017def test_rolling_by_date() -> None:1018df = pl.DataFrame(1019{1020"dt": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1021"val": [1, 2, 3],1022}1023).sort("dt")10241025result = df.with_columns(roll=pl.col("val").rolling_sum_by("dt", "2d"))1026expected = df.with_columns(roll=pl.Series([1, 3, 5]))1027assert_frame_equal(result, expected)102810291030@pytest.mark.parametrize("dtype", [pl.Int64, pl.Int32, pl.UInt64, pl.UInt32])1031def test_rolling_by_integer(dtype: PolarsDataType) -> None:1032df = (1033pl.DataFrame({"val": [1, 2, 3]})1034.with_row_index()1035.with_columns(pl.col("index").cast(dtype))1036)1037result = df.with_columns(roll=pl.col("val").rolling_sum_by("index", "2i"))1038expected = df.with_columns(roll=pl.Series([1, 3, 5]))1039assert_frame_equal(result, expected)104010411042@pytest.mark.parametrize("dtype", INTEGER_DTYPES)1043def test_rolling_sum_by_integer(dtype: PolarsDataType) -> None:1044lf = (1045pl.LazyFrame({"a": [1, 2, 3]}, schema={"a": dtype})1046.with_row_index()1047.select(pl.col("a").rolling_sum_by("index", "2i"))1048)1049result = lf.collect()1050expected_dtype = (1051pl.Int64 if dtype in [pl.Int8, pl.UInt8, pl.Int16, pl.UInt16] else dtype1052)1053expected = pl.DataFrame({"a": [1, 3, 5]}, schema={"a": expected_dtype})1054assert_frame_equal(result, expected)1055assert lf.collect_schema() == expected.schema105610571058def test_rolling_nanoseconds_11003() -> None:1059df = pl.DataFrame(1060{1061"dt": [1062"2020-01-01T00:00:00.000000000",1063"2020-01-01T00:00:00.000000100",1064"2020-01-01T00:00:00.000000200",1065],1066"val": [1, 2, 3],1067}1068)1069df = df.with_columns(pl.col("dt").str.to_datetime(time_unit="ns")).set_sorted("dt")1070result = df.with_columns(pl.col("val").rolling_sum_by("dt", "500ns"))1071expected = df.with_columns(val=pl.Series([1, 3, 6]))1072assert_frame_equal(result, expected)107310741075def test_rolling_by_1mo_saturating_12216() -> None:1076df = pl.DataFrame(1077{1078"date": [1079date(2020, 6, 29),1080date(2020, 6, 30),1081date(2020, 7, 30),1082date(2020, 7, 31),1083date(2020, 8, 1),1084],1085"val": [1, 2, 3, 4, 5],1086}1087).set_sorted("date")1088result = df.rolling(index_column="date", period="1mo").agg(vals=pl.col("val"))1089expected = pl.DataFrame(1090{1091"date": [1092date(2020, 6, 29),1093date(2020, 6, 30),1094date(2020, 7, 30),1095date(2020, 7, 31),1096date(2020, 8, 1),1097],1098"vals": [[1], [1, 2], [3], [3, 4], [3, 4, 5]],1099}1100)1101assert_frame_equal(result, expected)11021103# check with `closed='both'` against DuckDB output1104result = df.rolling(index_column="date", period="1mo", closed="both").agg(1105vals=pl.col("val")1106)1107expected = pl.DataFrame(1108{1109"date": [1110date(2020, 6, 29),1111date(2020, 6, 30),1112date(2020, 7, 30),1113date(2020, 7, 31),1114date(2020, 8, 1),1115],1116"vals": [[1], [1, 2], [2, 3], [2, 3, 4], [3, 4, 5]],1117}1118)1119assert_frame_equal(result, expected)112011211122def test_index_expr_with_literal() -> None:1123df = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}).sort("a")1124out = df.rolling(index_column=(5 * pl.col("a")).set_sorted(), period="2i").agg(1125pl.col("b")1126)1127expected = pl.DataFrame({"literal": [5, 10, 15], "b": [["a"], ["b"], ["c"]]})1128assert_frame_equal(out, expected)112911301131def test_index_expr_output_name_12244() -> None:1132df = pl.DataFrame({"A": [1, 2, 3]})11331134out = df.rolling(pl.int_range(0, pl.len()), period="2i").agg("A")1135assert out.to_dict(as_series=False) == {1136"literal": [0, 1, 2],1137"A": [[1], [1, 2], [2, 3]],1138}113911401141def test_rolling_median() -> None:1142for n in range(10, 25):1143array = np.random.randint(0, 20, n)1144for k in [3, 5, 7]:1145a = pl.Series(array)1146assert_series_equal(1147a.rolling_median(k), pl.from_pandas(a.to_pandas().rolling(k).median())1148)114911501151@pytest.mark.slow1152def test_rolling_median_2() -> None:1153np.random.seed(12)1154n = 10001155df = pl.DataFrame({"x": np.random.normal(0, 1, n)})1156# this can differ because simd sizes and non-associativity of floats.1157assert df.select(1158pl.col("x").rolling_median(window_size=10).sum()1159).item() == pytest.approx(5.139429061527812)1160assert df.select(1161pl.col("x").rolling_median(window_size=100).sum()1162).item() == pytest.approx(26.60506093611384)116311641165@pytest.mark.parametrize(1166("dates", "closed", "expected"),1167[1168(1169[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1170"right",1171[None, 3, 5],1172),1173(1174[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1175"left",1176[None, None, 3],1177),1178(1179[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1180"both",1181[None, 3, 6],1182),1183(1184[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1185"none",1186[None, None, None],1187),1188(1189[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 4)],1190"right",1191[None, 3, None],1192),1193(1194[date(2020, 1, 1), date(2020, 1, 3), date(2020, 1, 4)],1195"right",1196[None, None, 5],1197),1198(1199[date(2020, 1, 1), date(2020, 1, 3), date(2020, 1, 5)],1200"right",1201[None, None, None],1202),1203],1204)1205def test_rolling_min_samples(1206dates: list[date], closed: ClosedInterval, expected: list[int]1207) -> None:1208df = pl.DataFrame({"date": dates, "value": [1, 2, 3]}).sort("date")1209result = df.select(1210pl.col("value").rolling_sum_by(1211"date", window_size="2d", min_samples=2, closed=closed1212)1213)["value"]1214assert_series_equal(result, pl.Series("value", expected, pl.Int64))12151216# Starting with unsorted data1217result = (1218df.sort("date", descending=True)1219.with_columns(1220pl.col("value").rolling_sum_by(1221"date", window_size="2d", min_samples=2, closed=closed1222)1223)1224.sort("date")["value"]1225)1226assert_series_equal(result, pl.Series("value", expected, pl.Int64))122712281229def test_rolling_returns_scalar_15656() -> None:1230df = pl.DataFrame(1231{1232"a": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1233"b": [4, 5, 6],1234"c": [1, 2, 3],1235}1236)1237result = df.group_by("c").agg(pl.col("b").rolling_mean_by("a", "2d")).sort("c")1238expected = pl.DataFrame({"c": [1, 2, 3], "b": [[4.0], [5.0], [6.0]]})1239assert_frame_equal(result, expected)124012411242def test_rolling_invalid() -> None:1243df = pl.DataFrame(1244{1245"values": [1, 4],1246"times": [datetime(2020, 1, 3), datetime(2020, 1, 1)],1247},1248)1249with pytest.raises(1250InvalidOperationError, match="duration may not be a parsed integer"1251):1252(1253df.sort("times")1254.rolling("times", period="3000i")1255.agg(pl.col("values").sum().alias("sum"))1256)1257with pytest.raises(1258InvalidOperationError, match="duration must be a parsed integer"1259):1260(1261df.with_row_index()1262.rolling("index", period="3000d")1263.agg(pl.col("values").sum().alias("sum"))1264)126512661267def test_by_different_length() -> None:1268df = pl.DataFrame({"b": [1]})1269with pytest.raises(InvalidOperationError, match="must be the same length"):1270df.select(1271pl.col("b").rolling_max_by(pl.Series([datetime(2020, 1, 1)] * 2), "1d")1272)127312741275def test_incorrect_nulls_16246() -> None:1276df = pl.concat(1277[1278pl.DataFrame({"a": [datetime(2020, 1, 1)], "b": [1]}),1279pl.DataFrame({"a": [datetime(2021, 1, 1)], "b": [1]}),1280],1281rechunk=False,1282)1283result = df.select(pl.col("b").rolling_max_by("a", "1d"))1284expected = pl.DataFrame({"b": [1, 1]})1285assert_frame_equal(result, expected)128612871288def test_rolling_with_dst() -> None:1289df = pl.DataFrame(1290{"a": [datetime(2020, 10, 26, 1), datetime(2020, 10, 26)], "b": [1, 2]}1291).with_columns(pl.col("a").dt.replace_time_zone("Europe/London"))1292result = df.select(pl.col("b").rolling_sum_by("a", "1d"))1293expected = pl.DataFrame({"b": [3, 2]})1294assert_frame_equal(result, expected)12951296result = df.sort("a").select(pl.col("b").rolling_sum_by("a", "1d"))1297expected = pl.DataFrame({"b": [2, 3]})1298assert_frame_equal(result, expected)129913001301def interval_defs() -> SearchStrategy[ClosedInterval]:1302closed: list[ClosedInterval] = ["left", "right", "both", "none"]1303return st.sampled_from(closed)130413051306@given(1307period=st.timedeltas(1308min_value=timedelta(microseconds=0), max_value=timedelta(days=1000)1309).map(parse_as_duration_string),1310offset=st.timedeltas(1311min_value=timedelta(days=-1000), max_value=timedelta(days=1000)1312).map(parse_as_duration_string),1313closed=interval_defs(),1314data=st.data(),1315time_unit=_time_units(),1316)1317def test_rolling_parametric(1318period: str,1319offset: str,1320closed: ClosedInterval,1321data: st.DataObject,1322time_unit: TimeUnit,1323) -> None:1324assume(period != "")1325dataframe = data.draw(1326dataframes(1327[1328column(1329"ts",1330strategy=st.datetimes(1331min_value=datetime(2000, 1, 1),1332max_value=datetime(2001, 1, 1),1333),1334dtype=pl.Datetime(time_unit),1335),1336column(1337"value",1338strategy=st.integers(min_value=-100, max_value=100),1339dtype=pl.Int64,1340),1341],1342min_size=1,1343)1344)1345df = dataframe.sort("ts")1346result = df.rolling("ts", period=period, offset=offset, closed=closed).agg(1347pl.col("value")1348)13491350expected_dict: dict[str, list[object]] = {"ts": [], "value": []}1351for ts, _ in df.iter_rows():1352window = df.filter(1353pl.col("ts").is_between(1354pl.lit(ts, dtype=pl.Datetime(time_unit)).dt.offset_by(offset),1355pl.lit(ts, dtype=pl.Datetime(time_unit))1356.dt.offset_by(offset)1357.dt.offset_by(period),1358closed=closed,1359)1360)1361value = window["value"].to_list()1362expected_dict["ts"].append(ts)1363expected_dict["value"].append(value)1364expected = pl.DataFrame(expected_dict).select(1365pl.col("ts").cast(pl.Datetime(time_unit)),1366pl.col("value").cast(pl.List(pl.Int64)),1367)1368assert_frame_equal(result, expected)136913701371@given(1372window_size=st.timedeltas(1373min_value=timedelta(microseconds=0), max_value=timedelta(days=2)1374).map(parse_as_duration_string),1375closed=interval_defs(),1376data=st.data(),1377time_unit=_time_units(),1378aggregation=st.sampled_from(1379[1380"min",1381"max",1382"mean",1383"sum",1384"std",1385"var",1386"median",1387]1388),1389)1390def test_rolling_aggs(1391window_size: str,1392closed: ClosedInterval,1393data: st.DataObject,1394time_unit: TimeUnit,1395aggregation: str,1396) -> None:1397assume(window_size != "")13981399# Testing logic can be faulty when window is more precise than time unit1400# https://github.com/pola-rs/polars/issues/117541401assume(not (time_unit == "ms" and "us" in window_size))14021403dataframe = data.draw(1404dataframes(1405[1406column(1407"ts",1408strategy=st.datetimes(1409min_value=datetime(2000, 1, 1),1410max_value=datetime(2001, 1, 1),1411),1412dtype=pl.Datetime(time_unit),1413),1414column(1415"value",1416strategy=st.integers(min_value=-100, max_value=100),1417dtype=pl.Int64,1418),1419],1420)1421)1422df = dataframe.sort("ts")1423func = f"rolling_{aggregation}_by"1424result = df.with_columns(1425getattr(pl.col("value"), func)("ts", window_size=window_size, closed=closed)1426)1427result_from_unsorted = dataframe.with_columns(1428getattr(pl.col("value"), func)("ts", window_size=window_size, closed=closed)1429).sort("ts")14301431expected_dict: dict[str, list[object]] = {"ts": [], "value": []}1432for ts, _ in df.iter_rows():1433window = df.filter(1434pl.col("ts").is_between(1435pl.lit(ts, dtype=pl.Datetime(time_unit)).dt.offset_by(1436f"-{window_size}"1437),1438pl.lit(ts, dtype=pl.Datetime(time_unit)),1439closed=closed,1440)1441)1442expected_dict["ts"].append(ts)1443if window.is_empty():1444expected_dict["value"].append(None)1445else:1446value = getattr(window["value"], aggregation)()1447expected_dict["value"].append(value)1448expected = pl.DataFrame(expected_dict).select(1449pl.col("ts").cast(pl.Datetime(time_unit)),1450pl.col("value").cast(result["value"].dtype),1451)1452assert_frame_equal(result, expected)1453assert_frame_equal(result_from_unsorted, expected)145414551456def test_window_size_validation() -> None:1457df = pl.DataFrame({"x": [1.0]})14581459with pytest.raises(OverflowError, match=r"can't convert negative int to unsigned"):1460df.with_columns(trailing_min=pl.col("x").rolling_min(window_size=-3))146114621463def test_rolling_empty_21032() -> None:1464df = pl.DataFrame(schema={"a": pl.Datetime("ms"), "b": pl.Int64()})14651466result = df.rolling(index_column="a", period=timedelta(days=2)).agg(1467pl.col("b").sum()1468)1469assert_frame_equal(result, df)14701471result = df.rolling(1472index_column="a", period=timedelta(days=2), offset=timedelta(days=3)1473).agg(pl.col("b").sum())1474assert_frame_equal(result, df)147514761477def test_rolling_offset_agg_15122() -> None:1478df = pl.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [1, 2, 3, 1, 2, 3]})14791480result = df.rolling(index_column="b", period="1i", offset="0i", group_by="a").agg(1481window=pl.col("b")1482)1483expected = df.with_columns(window=pl.Series([[2], [3], [], [2], [3], []]))1484assert_frame_equal(result, expected)14851486result = df.rolling(index_column="b", period="1i", offset="1i", group_by="a").agg(1487window=pl.col("b")1488)1489expected = df.with_columns(window=pl.Series([[3], [], [], [3], [], []]))1490assert_frame_equal(result, expected)149114921493def test_rolling_sum_stability_11146() -> None:1494data_frame = pl.DataFrame(1495{1496"value": [14970.0,1498290.57,1499107.0,1500172.0,1501124.25,1502304.0,1503379.5,1504347.35,15051516.41,1506386.12,1507226.5,1508294.62,1509125.5,15100.0,15110.0,15120.0,15130.0,15140.0,15150.0,15160.0,15170.0,1518]1519}1520)1521assert (1522data_frame.with_columns(1523pl.col("value").rolling_mean(window_size=8, min_samples=1).alias("test_col")1524)["test_col"][-1]1525== 0.01526)152715281529def test_rolling() -> None:1530df = pl.DataFrame(1531{1532"n": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10],1533"col1": ["A", "B"] * 11,1534}1535)15361537assert df.rolling("n", period="1i", group_by="col1").agg().to_dict(1538as_series=False1539) == {1540"col1": [1541"A",1542"A",1543"A",1544"A",1545"A",1546"A",1547"A",1548"A",1549"A",1550"A",1551"A",1552"B",1553"B",1554"B",1555"B",1556"B",1557"B",1558"B",1559"B",1560"B",1561"B",1562"B",1563],1564"n": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],1565}156615671568@pytest.mark.parametrize(1569"method",1570["nearest", "higher", "lower", "midpoint", "linear", "equiprobable"],1571)1572def test_rolling_quantile_with_nulls_22781(method: QuantileMethod) -> None:1573lf = pl.LazyFrame(1574{1575"index": [0, 1, 2, 3, 4, 5, 6, 7, 8],1576"a": [None, None, 1.0, None, None, 1.0, 1.0, None, None],1577}1578)1579out = (1580lf.rolling("index", period="2i")1581.agg(pl.col("a").quantile(0.5, interpolation=method))1582.collect()1583)1584expected = pl.Series("a", [None, None, 1.0, 1.0, None, 1.0, 1.0, 1.0, None])1585assert_series_equal(out["a"], expected)158615871588def test_rolling_quantile_nearest_23392() -> None:1589base = range(11)1590s = pl.Series(base)15911592shuffle_base = list(base)1593random.shuffle(shuffle_base)1594s_shuffled = pl.Series(shuffle_base)15951596for q in np.arange(0, 1.0, 0.02, dtype=float):1597out = s.rolling_quantile(q, interpolation="nearest", window_size=11)15981599# explicit:1600expected = pl.Series([None] * 10 + [float(round(q * 10.0))])1601assert_series_equal(out, expected)16021603# equivalence:1604equiv = s.quantile(q, interpolation="nearest")1605assert out.last() == equiv16061607# shuffled:1608out = s_shuffled.rolling_quantile(q, interpolation="nearest", window_size=11)1609assert_series_equal(out, expected)161016111612def test_rolling_quantile_temporals() -> None:1613tz = ZoneInfo("Asia/Tokyo")1614dt = pl.Datetime("ms", "Asia/Tokyo")1615# We use ms to verify that the correct time unit is propagating.1616lf = pl.LazyFrame(1617{1618"date": [date(2025, 1, x) for x in range(1, 6)],1619"datetime": [datetime(2025, 1, x) for x in range(1, 6)],1620"datetime_tu_tz": pl.Series(1621[datetime(2025, 1, x, tzinfo=tz) for x in range(1, 6)], dtype=dt1622),1623"duration": pl.Series(1624[timedelta(hours=x) for x in range(1, 6)], dtype=pl.Duration("ms")1625),1626"time": [time(hour=x) for x in range(1, 6)],1627}1628)1629result = lf.select(1630rolling_date=pl.col("date").rolling_quantile(1631quantile=0.5, window_size=4, interpolation="linear"1632),1633rolling_datetime=pl.col("datetime").rolling_quantile(1634quantile=0.5, window_size=4, interpolation="linear"1635),1636rolling_datetime_tu_tz=pl.col("datetime_tu_tz").rolling_quantile(1637quantile=0.5, window_size=4, interpolation="linear"1638),1639rolling_duration=pl.col("duration").rolling_quantile(1640quantile=0.5, window_size=4, interpolation="linear"1641),1642rolling_time=pl.col("time").rolling_quantile(1643quantile=0.5, window_size=4, interpolation="linear"1644),1645)1646expected = pl.DataFrame(1647{1648"rolling_date": pl.Series(1649[None, None, None, datetime(2025, 1, 2, 12), datetime(2025, 1, 3, 12)],1650dtype=pl.Datetime,1651),1652"rolling_datetime": pl.Series(1653[None, None, None, datetime(2025, 1, 2, 12), datetime(2025, 1, 3, 12)]1654),1655"rolling_datetime_tu_tz": pl.Series(1656[1657None,1658None,1659None,1660datetime(2025, 1, 2, 12, tzinfo=tz),1661datetime(2025, 1, 3, 12, tzinfo=tz),1662],1663dtype=dt,1664),1665"rolling_duration": pl.Series(1666[None, None, None, timedelta(hours=2.5), timedelta(hours=3.5)],1667dtype=pl.Duration("ms"),1668),1669"rolling_time": [1670None,1671None,1672None,1673time(hour=2, minute=30),1674time(hour=3, minute=30),1675],1676}1677)1678assert result.collect_schema() == pl.Schema(1679{ # type: ignore[arg-type]1680"rolling_date": pl.Datetime("us"),1681"rolling_datetime": pl.Datetime("us"),1682"rolling_datetime_tu_tz": dt,1683"rolling_duration": pl.Duration("ms"),1684"rolling_time": pl.Time,1685}1686)1687assert_frame_equal(result.collect(), expected)168816891690def test_rolling_agg_quantile_temporal() -> None:1691tz = ZoneInfo("Asia/Tokyo")1692dt = pl.Datetime("ms", "Asia/Tokyo")1693# We use ms to verify that the correct time unit is propagating.1694lf = pl.LazyFrame(1695{1696"index": [1, 2, 3, 4, 5],1697"int": [1, 2, 3, 4, 5],1698"date": [date(2025, 1, x) for x in range(1, 6)],1699"datetime": [datetime(2025, 1, x) for x in range(1, 6)],1700"datetime_tu_tz": pl.Series(1701[datetime(2025, 1, x, tzinfo=tz) for x in range(1, 6)], dtype=dt1702),1703"duration": pl.Series(1704[timedelta(hours=x) for x in range(1, 6)], dtype=pl.Duration("ms")1705),1706"time": [time(hour=x) for x in range(1, 6)],1707}1708)17091710# Using rolling.agg()1711result1 = lf.rolling("index", period="4i").agg(1712rolling_int=pl.col("int").quantile(0.5, "linear"),1713rolling_date=pl.col("date").quantile(0.5, "linear"),1714rolling_datetime=pl.col("datetime").quantile(0.5, "linear"),1715rolling_datetime_tu_tz=pl.col("datetime_tu_tz").quantile(0.5, "linear"),1716rolling_duration=pl.col("duration").quantile(0.5, "linear"),1717rolling_time=pl.col("time").quantile(0.5, "linear"),1718)1719# Using rolling_quantile_by()1720result2 = lf.select(1721"index",1722rolling_int=pl.col("int").rolling_quantile_by(1723"index", window_size="4i", quantile=0.5, interpolation="linear"1724),1725rolling_date=pl.col("date").rolling_quantile_by(1726"index", window_size="4i", quantile=0.5, interpolation="linear"1727),1728rolling_datetime=pl.col("datetime").rolling_quantile_by(1729"index", window_size="4i", quantile=0.5, interpolation="linear"1730),1731rolling_datetime_tu_tz=pl.col("datetime_tu_tz").rolling_quantile_by(1732"index", window_size="4i", quantile=0.5, interpolation="linear"1733),1734rolling_duration=pl.col("duration").rolling_quantile_by(1735"index", window_size="4i", quantile=0.5, interpolation="linear"1736),1737rolling_time=pl.col("time").rolling_quantile_by(1738"index", window_size="4i", quantile=0.5, interpolation="linear"1739),1740)1741expected = pl.DataFrame(1742{1743"index": [1, 2, 3, 4, 5],1744"rolling_int": [1.0, 1.5, 2.0, 2.5, 3.5],1745"rolling_date": pl.Series(1746[1747datetime(2025, 1, 1),1748datetime(2025, 1, 1, 12),1749datetime(2025, 1, 2),1750datetime(2025, 1, 2, 12),1751datetime(2025, 1, 3, 12),1752]1753),1754"rolling_datetime": pl.Series(1755[1756datetime(2025, 1, 1),1757datetime(2025, 1, 1, 12),1758datetime(2025, 1, 2),1759datetime(2025, 1, 2, 12),1760datetime(2025, 1, 3, 12),1761]1762),1763"rolling_datetime_tu_tz": pl.Series(1764[1765datetime(2025, 1, 1, tzinfo=tz),1766datetime(2025, 1, 1, 12, tzinfo=tz),1767datetime(2025, 1, 2, tzinfo=tz),1768datetime(2025, 1, 2, 12, tzinfo=tz),1769datetime(2025, 1, 3, 12, tzinfo=tz),1770],1771dtype=dt,1772),1773"rolling_duration": pl.Series(1774[1775timedelta(hours=1),1776timedelta(hours=1.5),1777timedelta(hours=2),1778timedelta(hours=2.5),1779timedelta(hours=3.5),1780],1781dtype=pl.Duration("ms"),1782),1783"rolling_time": [1784time(hour=1),1785time(hour=1, minute=30),1786time(hour=2),1787time(hour=2, minute=30),1788time(hour=3, minute=30),1789],1790}1791)1792expected_schema = pl.Schema(1793{ # type: ignore[arg-type]1794"index": pl.Int64,1795"rolling_int": pl.Float64,1796"rolling_date": pl.Datetime("us"),1797"rolling_datetime": pl.Datetime("us"),1798"rolling_datetime_tu_tz": dt,1799"rolling_duration": pl.Duration("ms"),1800"rolling_time": pl.Time,1801}1802)1803assert result1.collect_schema() == expected_schema1804assert result2.collect_schema() == expected_schema1805assert_frame_equal(result1.collect(), expected)1806assert_frame_equal(result2.collect(), expected)180718081809def test_rolling_quantile_nearest_kernel_23392() -> None:1810df = pl.DataFrame(1811{1812"dt": [1813datetime(2021, 1, 1),1814datetime(2021, 1, 2),1815datetime(2021, 1, 4),1816datetime(2021, 1, 5),1817datetime(2021, 1, 7),1818],1819"values": pl.arange(0, 5, eager=True),1820}1821)1822# values (period="3d", quantile=0.7) are chosen to trigger index rounding1823out = (1824df.set_sorted("dt")1825.rolling("dt", period="3d", closed="both")1826.agg([pl.col("values").quantile(quantile=0.7).alias("quantile")])1827.select("quantile")1828)1829expected = pl.DataFrame({"quantile": [0.0, 1.0, 1.0, 2.0, 3.0]})1830assert_frame_equal(out, expected)183118321833def test_rolling_quantile_nearest_with_nulls_23932() -> None:1834lf = pl.LazyFrame(1835{1836"index": [0, 1, 2, 3, 4, 5, 6],1837"a": [None, None, 1.0, 2.0, 3.0, None, None],1838}1839)1840# values (period="3i", quantile=0.7) are chosen to trigger index rounding1841out = (1842lf.rolling("index", period="3i")1843.agg(pl.col("a").quantile(0.7, interpolation="nearest"))1844.collect()1845)1846expected = pl.Series("a", [None, None, 1.0, 2.0, 2.0, 3.0, 3.0])1847assert_series_equal(out["a"], expected)184818491850def test_wtd_min_periods_less_window() -> None:1851df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}).with_columns(1852pl.col("a")1853.rolling_mean(1854window_size=3, weights=[0.25, 0.5, 0.25], min_samples=2, center=True1855)1856.alias("kernel_mean")1857)18581859expected = pl.DataFrame(1860{"a": [1, 2, 3, 4, 5], "kernel_mean": [1.333333, 2, 3, 4, 4.666667]}1861)18621863assert_frame_equal(df, expected)18641865df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}).with_columns(1866pl.col("a")1867.rolling_sum(1868window_size=3, weights=[0.25, 0.5, 0.25], min_samples=2, center=True1869)1870.alias("kernel_sum")1871)1872expected = pl.DataFrame(1873{"a": [1, 2, 3, 4, 5], "kernel_sum": [1.0, 2.0, 3.0, 4.0, 3.5]}1874)18751876df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}).with_columns(1877pl.col("a")1878.rolling_mean(1879window_size=3, weights=[0.2, 0.3, 0.5], min_samples=2, center=False1880)1881.alias("kernel_mean")1882)18831884expected = pl.DataFrame(1885{"a": [1, 2, 3, 4, 5], "kernel_mean": [None, 1.625, 2.3, 3.3, 4.3]}1886)18871888assert_frame_equal(df, expected)18891890df = pl.DataFrame({"a": [1, 2]}).with_columns(1891pl.col("a")1892.rolling_mean(1893window_size=3, weights=[0.25, 0.5, 0.25], min_samples=2, center=True1894)1895.alias("kernel_mean")1896)18971898# Handle edge case where the window size is larger than the number of elements1899expected = pl.DataFrame({"a": [1, 2], "kernel_mean": [1.333333, 1.666667]})1900assert_frame_equal(df, expected)19011902df = pl.DataFrame({"a": [1, 2]}).with_columns(1903pl.col("a")1904.rolling_mean(1905window_size=3, weights=[0.25, 0.25, 0.5], min_samples=1, center=False1906)1907.alias("kernel_mean")1908)19091910expected = pl.DataFrame({"a": [1, 2], "kernel_mean": [1.0, 2 * 2 / 3 + 1 * 1 / 3]})19111912df = pl.DataFrame({"a": [1]}).with_columns(1913pl.col("a")1914.rolling_sum(19156, center=True, min_samples=0, weights=[1, 10, 100, 1000, 10_000, 100_000]1916)1917.alias("kernel_sum")1918)1919expected = pl.DataFrame({"a": [1], "kernel_sum": [1000.0]})1920assert_frame_equal(df, expected)192119221923def test_rolling_median_23480() -> None:1924vals = [None] * 17 + [3262645.8, 856191.4, 1635379.0, 34707156.0]1925evals = [None] * 19 + [1635379.0, (3262645.8 + 1635379.0) / 2]1926out = pl.DataFrame({"a": vals}).select(1927r15=pl.col("a").rolling_median(15, min_samples=3),1928r17=pl.col("a").rolling_median(17, min_samples=3),1929)1930expected = pl.DataFrame({"r15": evals, "r17": evals})1931assert_frame_equal(out, expected)193219331934@pytest.mark.slow1935@pytest.mark.parametrize("with_nulls", [True, False])1936def test_rolling_sum_non_finite_23115(with_nulls: bool) -> None:1937values: list[float | None] = [19380.0,1939float("nan"),1940float("inf"),1941-float("inf"),194242.0,1943-3.0,1944]1945if with_nulls:1946values.append(None)1947data = random.choices(values, k=1000)1948naive = [1949(1950sum(0 if x is None else x for x in data[max(0, i + 1 - 4) : i + 1])1951if sum(x is not None for x in data[max(0, i + 1 - 4) : i + 1]) >= 21952else None1953)1954for i in range(1000)1955]1956assert_series_equal(pl.Series(data).rolling_sum(4, min_samples=2), pl.Series(naive))195719581959@pytest.mark.parametrize(1960("method", "out_dtype"),1961[1962("average", pl.Float64),1963("min", get_index_type()),1964("max", get_index_type()),1965("dense", get_index_type()),1966],1967)1968@given(1969s=series(1970name="a",1971allowed_dtypes=NUMERIC_DTYPES + TEMPORAL_DTYPES + [pl.Boolean],1972min_size=1,1973max_size=50,1974),1975window_size=st.integers(1, 50),1976)1977def test_rolling_rank(1978s: pl.Series,1979window_size: int,1980method: RankMethod,1981out_dtype: pl.DataType,1982) -> None:1983df = pl.DataFrame({"a": s})1984expected = (1985df.with_row_index()1986.with_columns(1987a=pl.col("a")1988.rank(method=method)1989.rolling(index_column="index", period=f"{window_size}i")1990.list.last()1991.cast(out_dtype)1992)1993.drop("index")1994)1995actual = df.lazy().select(1996pl.col("a").rolling_rank(1997window_size=window_size, method=method, seed=0, min_samples=11998)1999)2000assert actual.collect_schema() == actual.collect().schema2001assert_frame_equal(actual.collect(), expected)200220032004@pytest.mark.parametrize("center", [False, True])2005@given(2006s=series(2007name="a",2008allowed_dtypes=NUMERIC_DTYPES + TEMPORAL_DTYPES + [pl.Boolean],2009min_size=1,2010max_size=50,2011),2012window_size=st.integers(1, 50),2013)2014def test_rolling_rank_method_random(2015s: pl.Series, window_size: int, center: bool2016) -> None:2017df = pl.DataFrame({"a": s})2018actual = df.lazy().with_columns(2019lo=pl.col("a").rolling_rank(2020window_size=window_size, method="min", center=center2021),2022hi=pl.col("a").rolling_rank(2023window_size=window_size, method="max", center=center2024),2025random=pl.col("a").rolling_rank(2026window_size=window_size,2027method="random",2028center=center,2029),2030)20312032assert actual.collect_schema() == actual.collect().schema, (2033f"expected {actual.collect_schema()}, got {actual.collect().schema}"2034)2035assert (2036actual.select(2037(2038(pl.col("lo") <= pl.col("random")) & (pl.col("random") <= pl.col("hi"))2039).all()2040)2041.collect()2042.item()2043)204420452046@pytest.mark.parametrize("op", [pl.Expr.rolling_mean, pl.Expr.rolling_median])2047def test_rolling_mean_median_temporals(op: Callable[..., pl.Expr]) -> None:2048tz = ZoneInfo("Asia/Tokyo")2049# We use ms to verify that the correct time unit is propagating.2050dt = pl.Datetime("ms", "Asia/Tokyo")2051lf = pl.LazyFrame(2052{2053"int": [1, 2, 3, 4, 5],2054"date": [date(2025, 1, x) for x in range(1, 6)],2055"datetime": [datetime(2025, 1, x) for x in range(1, 6)],2056"datetime_tu_tz": pl.Series(2057[datetime(2025, 1, x, tzinfo=tz) for x in range(1, 6)], dtype=dt2058),2059"duration": pl.Series(2060[timedelta(hours=x) for x in range(1, 6)], dtype=pl.Duration("ms")2061),2062"time": [time(hour=x) for x in range(1, 6)],2063}2064)2065result = lf.select(2066rolling_date=op(pl.col("date"), window_size=4),2067rolling_datetime=op(pl.col("datetime"), window_size=4),2068rolling_datetime_tu_tz=op(pl.col("datetime_tu_tz"), window_size=4),2069rolling_duration=op(pl.col("duration"), window_size=4),2070rolling_time=op(pl.col("time"), window_size=4),2071)2072expected = pl.DataFrame(2073{2074"rolling_date": pl.Series(2075[None, None, None, datetime(2025, 1, 2, 12), datetime(2025, 1, 3, 12)],2076dtype=pl.Datetime,2077),2078"rolling_datetime": pl.Series(2079[None, None, None, datetime(2025, 1, 2, 12), datetime(2025, 1, 3, 12)]2080),2081"rolling_datetime_tu_tz": pl.Series(2082[2083None,2084None,2085None,2086datetime(2025, 1, 2, 12, tzinfo=tz),2087datetime(2025, 1, 3, 12, tzinfo=tz),2088],2089dtype=dt,2090),2091"rolling_duration": pl.Series(2092[None, None, None, timedelta(hours=2.5), timedelta(hours=3.5)],2093dtype=pl.Duration("ms"),2094),2095"rolling_time": [2096None,2097None,2098None,2099time(hour=2, minute=30),2100time(hour=3, minute=30),2101],2102}2103)2104assert result.collect_schema() == pl.Schema(2105{ # type: ignore[arg-type]2106"rolling_date": pl.Datetime("us"),2107"rolling_datetime": pl.Datetime("us"),2108"rolling_datetime_tu_tz": dt,2109"rolling_duration": pl.Duration("ms"),2110"rolling_time": pl.Time,2111}2112)2113assert_frame_equal(result.collect(), expected)211421152116@pytest.mark.parametrize(2117"op",2118[2119(pl.Expr.mean, pl.Expr.rolling_mean_by),2120(pl.Expr.median, pl.Expr.rolling_median_by),2121],2122)2123def test_rolling_agg_mean_median_temporal(2124op: tuple[Callable[..., pl.Expr], Callable[..., pl.Expr]],2125) -> None:2126tz = ZoneInfo("Asia/Tokyo")2127# We use ms to verify that the correct time unit is propagating.2128dt = pl.Datetime("ms", "Asia/Tokyo")2129lf = pl.LazyFrame(2130{2131"index": [1, 2, 3, 4, 5],2132"int": [1, 2, 3, 4, 5],2133"date": [date(2025, 1, x) for x in range(1, 6)],2134"datetime": [datetime(2025, 1, x) for x in range(1, 6)],2135"datetime_tu_tz": pl.Series(2136[datetime(2025, 1, x, tzinfo=tz) for x in range(1, 6)], dtype=dt2137),2138"duration": pl.Series(2139[timedelta(hours=x) for x in range(1, 6)], dtype=pl.Duration("ms")2140),2141"time": [time(hour=x) for x in range(1, 6)],2142}2143)21442145# Using rolling.agg()2146result1 = lf.rolling("index", period="4i").agg(2147rolling_int=op[0](pl.col("int")),2148rolling_date=op[0](pl.col("date")),2149rolling_datetime=op[0](pl.col("datetime")),2150rolling_datetime_tu_tz=op[0](pl.col("datetime_tu_tz")),2151rolling_duration=op[0](pl.col("duration")),2152rolling_time=op[0](pl.col("time")),2153)2154# Using rolling_quantile_by()2155result2 = lf.select(2156"index",2157rolling_int=op[1](pl.col("int"), "index", window_size="4i"),2158rolling_date=op[1](pl.col("date"), "index", window_size="4i"),2159rolling_datetime=op[1](pl.col("datetime"), "index", window_size="4i"),2160rolling_datetime_tu_tz=op[1](2161pl.col("datetime_tu_tz"), "index", window_size="4i"2162),2163rolling_duration=op[1](pl.col("duration"), "index", window_size="4i"),2164rolling_time=op[1](pl.col("time"), "index", window_size="4i"),2165)2166expected = pl.DataFrame(2167{2168"index": [1, 2, 3, 4, 5],2169"rolling_int": [1.0, 1.5, 2.0, 2.5, 3.5],2170"rolling_date": pl.Series(2171[2172datetime(2025, 1, 1),2173datetime(2025, 1, 1, 12),2174datetime(2025, 1, 2),2175datetime(2025, 1, 2, 12),2176datetime(2025, 1, 3, 12),2177]2178),2179"rolling_datetime": pl.Series(2180[2181datetime(2025, 1, 1),2182datetime(2025, 1, 1, 12),2183datetime(2025, 1, 2),2184datetime(2025, 1, 2, 12),2185datetime(2025, 1, 3, 12),2186]2187),2188"rolling_datetime_tu_tz": pl.Series(2189[2190datetime(2025, 1, 1, tzinfo=tz),2191datetime(2025, 1, 1, 12, tzinfo=tz),2192datetime(2025, 1, 2, tzinfo=tz),2193datetime(2025, 1, 2, 12, tzinfo=tz),2194datetime(2025, 1, 3, 12, tzinfo=tz),2195],2196dtype=dt,2197),2198"rolling_duration": pl.Series(2199[2200timedelta(hours=1),2201timedelta(hours=1.5),2202timedelta(hours=2),2203timedelta(hours=2.5),2204timedelta(hours=3.5),2205],2206dtype=pl.Duration("ms"),2207),2208"rolling_time": [2209time(hour=1),2210time(hour=1, minute=30),2211time(hour=2),2212time(hour=2, minute=30),2213time(hour=3, minute=30),2214],2215}2216)2217expected_schema = pl.Schema(2218{ # type: ignore[arg-type]2219"index": pl.Int64,2220"rolling_int": pl.Float64,2221"rolling_date": pl.Datetime("us"),2222"rolling_datetime": pl.Datetime("us"),2223"rolling_datetime_tu_tz": dt,2224"rolling_duration": pl.Duration("ms"),2225"rolling_time": pl.Time,2226}2227)2228assert result1.collect_schema() == expected_schema2229assert result2.collect_schema() == expected_schema2230assert_frame_equal(result1.collect(), expected)2231assert_frame_equal(result2.collect(), expected)223222332234@pytest.mark.parametrize(2235("df", "expected"),2236[2237(2238pl.DataFrame(2239{"a": [1, 2, 3, 4], "offset": [0, 0, 0, 0], "len": [3, 1, 2, 1]}2240),2241pl.DataFrame({"a": [6, 2, 7, 4]}),2242),2243(2244pl.DataFrame(2245{2246"a": [1, 2, 3, 4, 5, 6],2247"offset": [0, 0, 2, 0, 0, 0],2248"len": [3, 1, 3, 3, 1, 1],2249}2250),2251pl.DataFrame({"a": [6, 2, 11, 15, 5, 6]}),2252),2253(2254pl.DataFrame(2255{"a": [1, 2, 3, None], "offset": [0, 0, 0, 0], "len": [3, 1, 2, 1]}2256),2257pl.DataFrame({"a": [6, 2, 3, 0]}),2258),2259(2260pl.DataFrame(2261{2262"a": [1, 2, 3, 4, 5, None],2263"offset": [0, 0, 2, 0, 0, 0],2264"len": [3, 1, 3, 3, 1, 1],2265}2266),2267pl.DataFrame({"a": [6, 2, 5, 9, 5, 0]}),2268),2269],2270)2271def test_rolling_agg_sum_varying_slice_25434(2272df: pl.DataFrame, expected: pl.DataFrame2273) -> None:2274out = df.with_row_index().select(2275pl.col("a")2276.slice(pl.col("offset").first(), pl.col("len").first())2277.sum()2278.rolling("index", period=f"{df.height}i", offset="0i", closed="left")2279)2280assert_frame_equal(out, expected)228122822283@pytest.mark.parametrize("with_nulls", [True, False])2284def test_rolling_agg_sum_varying_slice_fuzz(with_nulls: bool) -> None:2285n = 10002286max_rand = 1022872288def opt_null(n: int) -> int | None:2289return None if random.randint(0, max_rand) == max_rand and with_nulls else n22902291df = pl.DataFrame(2292{2293"a": [opt_null(i) for i in range(n)],2294"offset": [random.randint(0, max_rand) for _ in range(n)],2295"length": [random.randint(0, max_rand) for _ in range(n)],2296}2297)22982299out = df.with_row_index().select(2300pl.col("a")2301.slice(pl.col("offset").first(), pl.col("length").first())2302.sum()2303.rolling("index", period=f"{df.height}i", offset="0i", closed="left")2304)23052306out = out.select(pl.col("a").fill_null(0))2307df = df.with_columns(pl.col("a").fill_null(0))23082309(a, offset, length) = (2310df["a"].to_list(),2311df["offset"].to_list(),2312df["length"].to_list(),2313)2314expected = [sum(a[i + offset[i] : i + offset[i] + length[i]]) for i in range(n)]2315assert_frame_equal(out, pl.DataFrame({"a": expected}))231623172318def test_rolling_midpoint_25793() -> None:2319df = pl.DataFrame({"i": [1, 2, 3, 4], "x": [1, 2, 3, 4]})23202321out = df.select(2322pl.col.x.quantile(0.5, interpolation="midpoint").rolling("i", period="4i")2323)2324expected = pl.DataFrame({"x": [1.0, 1.5, 2.0, 2.5]})2325assert_frame_equal(out, expected)23262327out = df.select(2328pl.col.x.cumulative_eval(pl.element().quantile(0.5, interpolation="midpoint"))2329)2330assert_frame_equal(out, expected)233123322333def test_rolling_rank_closed_left_26147() -> None:2334df = pl.DataFrame(2335{2336"date": [datetime(2025, 1, 1), datetime(2025, 1, 1)],2337"x": [0, 1],2338"x_flipped": [1, 0],2339}2340)2341actual = df.with_columns(2342x_ranked=pl.col("x").rolling_rank_by("date", "2d"),2343x_flipped_ranked=pl.col("x_flipped").rolling_rank_by("date", "2d"),2344)2345expected = df.with_columns(2346x_ranked=pl.Series([1.0, 2.0]),2347x_flipped_ranked=pl.Series([2.0, 1.0]),2348)2349assert_frame_equal(actual, expected)235023512352