Path: blob/main/py-polars/tests/unit/operations/rolling/test_rolling.py
6940 views
from __future__ import annotations12import random3import sys4from datetime import date, datetime, timedelta5from typing import TYPE_CHECKING67import hypothesis.strategies as st8import numpy as np9import pytest10from hypothesis import assume, given11from numpy import nan1213import polars as pl14from polars._utils.convert import parse_as_duration_string15from polars.exceptions import ComputeError, InvalidOperationError16from polars.testing import assert_frame_equal, assert_series_equal17from polars.testing.parametric import column, dataframes18from polars.testing.parametric.strategies.dtype import _time_units19from tests.unit.conftest import INTEGER_DTYPES2021if TYPE_CHECKING:22from hypothesis.strategies import SearchStrategy2324from polars._typing import (25ClosedInterval,26PolarsDataType,27QuantileMethod,28TimeUnit,29)303132@pytest.fixture33def example_df() -> pl.DataFrame:34return pl.DataFrame(35{36"dt": [37datetime(2021, 1, 1),38datetime(2021, 1, 2),39datetime(2021, 1, 4),40datetime(2021, 1, 5),41datetime(2021, 1, 7),42],43"values": pl.arange(0, 5, eager=True),44}45)464748@pytest.mark.parametrize(49"period",50["1d", "2d", "3d", timedelta(days=1), timedelta(days=2), timedelta(days=3)],51)52@pytest.mark.parametrize("closed", ["left", "right", "none", "both"])53def test_rolling_kernels_and_rolling(54example_df: pl.DataFrame, period: str | timedelta, closed: ClosedInterval55) -> None:56out1 = example_df.set_sorted("dt").select(57pl.col("dt"),58# this differs from group_by aggregation because the empty window is59# null here60# where the sum aggregation of an empty set is 061pl.col("values")62.rolling_sum_by("dt", period, closed=closed)63.fill_null(0)64.alias("sum"),65pl.col("values").rolling_var_by("dt", period, closed=closed).alias("var"),66pl.col("values").rolling_mean_by("dt", period, closed=closed).alias("mean"),67pl.col("values").rolling_std_by("dt", period, closed=closed).alias("std"),68pl.col("values")69.rolling_quantile_by("dt", period, quantile=0.2, closed=closed)70.alias("quantile"),71)72out2 = (73example_df.set_sorted("dt")74.rolling("dt", period=period, closed=closed)75.agg(76[77pl.col("values").sum().alias("sum"),78pl.col("values").var().alias("var"),79pl.col("values").mean().alias("mean"),80pl.col("values").std().alias("std"),81pl.col("values").quantile(quantile=0.2).alias("quantile"),82]83)84)85assert_frame_equal(out1, out2)868788@pytest.mark.parametrize(89("offset", "closed", "expected_values"),90[91pytest.param(92"-1d",93"left",94[[1], [1, 2], [2, 3], [3, 4]],95id="partial lookbehind, left",96),97pytest.param(98"-1d",99"right",100[[1, 2], [2, 3], [3, 4], [4]],101id="partial lookbehind, right",102),103pytest.param(104"-1d",105"both",106[[1, 2], [1, 2, 3], [2, 3, 4], [3, 4]],107id="partial lookbehind, both",108),109pytest.param(110"-1d",111"none",112[[1], [2], [3], [4]],113id="partial lookbehind, none",114),115pytest.param(116"-2d",117"left",118[[], [1], [1, 2], [2, 3]],119id="full lookbehind, left",120),121pytest.param(122"-3d",123"left",124[[], [], [1], [1, 2]],125id="full lookbehind, offset > period, left",126),127pytest.param(128"-3d",129"right",130[[], [1], [1, 2], [2, 3]],131id="full lookbehind, right",132),133pytest.param(134"-3d",135"both",136[[], [1], [1, 2], [1, 2, 3]],137id="full lookbehind, both",138),139pytest.param(140"-2d",141"none",142[[], [1], [2], [3]],143id="full lookbehind, none",144),145pytest.param(146"-3d",147"none",148[[], [], [1], [2]],149id="full lookbehind, offset > period, none",150),151],152)153def test_rolling_negative_offset(154offset: str, closed: ClosedInterval, expected_values: list[list[int]]155) -> None:156df = pl.DataFrame(157{158"ts": pl.datetime_range(159datetime(2021, 1, 1), datetime(2021, 1, 4), "1d", eager=True160),161"value": [1, 2, 3, 4],162}163)164result = df.rolling("ts", period="2d", offset=offset, closed=closed).agg(165pl.col("value")166)167expected = pl.DataFrame(168{169"ts": pl.datetime_range(170datetime(2021, 1, 1), datetime(2021, 1, 4), "1d", eager=True171),172"value": expected_values,173}174)175assert_frame_equal(result, expected)176177178def test_rolling_skew() -> None:179s = pl.Series([1, 2, 3, 3, 2, 10, 8])180assert s.rolling_skew(window_size=4, bias=True).to_list() == pytest.approx(181[182None,183None,184None,185-0.49338220021815865,1860.0,1871.097025449363867,1880.09770939201338157,189]190)191192assert s.rolling_skew(window_size=4, bias=False).to_list() == pytest.approx(193[194None,195None,196None,197-0.8545630383279711,1980.0,1991.9001038154942962,2000.16923763134384154,201]202)203204205def test_rolling_kurtosis() -> None:206s = pl.Series([1, 2, 3, 3, 2, 10, 8])207assert s.rolling_kurtosis(window_size=4, bias=True).to_list() == pytest.approx(208[209None,210None,211None,212-1.371900826446281,213-1.9999999999999991,214-0.7055324211778693,215-1.7878967572797346,216]217)218assert s.rolling_kurtosis(219window_size=4, bias=True, fisher=False220).to_list() == pytest.approx(221[222None,223None,224None,2251.628099173553719,2261.0000000000000009,2272.2944675788221307,2281.2121032427202654,229]230)231232233@pytest.mark.parametrize("time_zone", [None, "US/Central"])234@pytest.mark.parametrize(235("rolling_fn", "expected_values", "expected_dtype"),236[237("rolling_mean_by", [None, 1.0, 2.0, 3.0, 4.0, 5.0], pl.Float64),238("rolling_sum_by", [None, 1, 2, 3, 4, 5], pl.Int64),239("rolling_min_by", [None, 1, 2, 3, 4, 5], pl.Int64),240("rolling_max_by", [None, 1, 2, 3, 4, 5], pl.Int64),241("rolling_std_by", [None, None, None, None, None, None], pl.Float64),242("rolling_var_by", [None, None, None, None, None, None], pl.Float64),243],244)245def test_rolling_crossing_dst(246time_zone: str | None,247rolling_fn: str,248expected_values: list[int | None | float],249expected_dtype: PolarsDataType,250) -> None:251ts = pl.datetime_range(252datetime(2021, 11, 5), datetime(2021, 11, 10), "1d", time_zone="UTC", eager=True253).dt.replace_time_zone(time_zone)254df = pl.DataFrame({"ts": ts, "value": [1, 2, 3, 4, 5, 6]})255256result = df.with_columns(257getattr(pl.col("value"), rolling_fn)(by="ts", window_size="1d", closed="left")258)259260expected = pl.DataFrame(261{"ts": ts, "value": expected_values}, schema_overrides={"value": expected_dtype}262)263assert_frame_equal(result, expected)264265266def test_rolling_by_invalid() -> None:267df = pl.DataFrame(268{"a": [1, 2, 3], "b": [4, 5, 6]}, schema_overrides={"a": pl.Int16}269).sort("a")270msg = "unsupported data type: i16 for temporal/index column, expected UInt64, UInt32, Int64, Int32, Datetime, Date, Duration, or Time"271with pytest.raises(InvalidOperationError, match=msg):272df.select(pl.col("b").rolling_min_by("a", "2i"))273df = pl.DataFrame({"a": [1, 2, 3], "b": [date(2020, 1, 1)] * 3}).sort("b")274msg = "`window_size` duration may not be a parsed integer"275with pytest.raises(InvalidOperationError, match=msg):276df.select(pl.col("a").rolling_min_by("b", "2i"))277278279def test_rolling_infinity() -> None:280s = pl.Series("col", ["-inf", "5", "5"]).cast(pl.Float64)281s = s.rolling_mean(2)282expected = pl.Series("col", [None, "-inf", "5"]).cast(pl.Float64)283assert_series_equal(s, expected)284285286def test_rolling_by_non_temporal_window_size() -> None:287df = pl.DataFrame(288{"a": [4, 5, 6], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]}289).sort("a", "b")290msg = "`window_size` duration may not be a parsed integer"291with pytest.raises(InvalidOperationError, match=msg):292df.with_columns(pl.col("a").rolling_sum_by("b", "2i", closed="left"))293294295@pytest.mark.parametrize(296"dtype",297[298pl.UInt8,299pl.Int64,300pl.Float32,301pl.Float64,302pl.Time,303pl.Date,304pl.Datetime("ms"),305pl.Datetime("us"),306pl.Datetime("ns"),307pl.Datetime("ns", "Asia/Kathmandu"),308pl.Duration("ms"),309pl.Duration("us"),310pl.Duration("ns"),311],312)313def test_rolling_extrema(dtype: PolarsDataType) -> None:314# sorted data and nulls flags trigger different kernels315df = (316(317pl.DataFrame(318{319"col1": pl.int_range(0, 7, eager=True),320"col2": pl.int_range(0, 7, eager=True).reverse(),321}322)323)324.with_columns(325pl.when(pl.int_range(0, pl.len(), eager=False) < 2)326.then(None)327.otherwise(pl.all())328.name.keep()329.name.suffix("_nulls")330)331.cast(dtype)332)333334expected = {335"col1": [None, None, 0, 1, 2, 3, 4],336"col2": [None, None, 4, 3, 2, 1, 0],337"col1_nulls": [None, None, None, None, 2, 3, 4],338"col2_nulls": [None, None, None, None, 2, 1, 0],339}340result = df.select([pl.all().rolling_min(3)])341assert result.to_dict(as_series=False) == {342k: pl.Series(v, dtype=dtype).to_list() for k, v in expected.items()343}344345expected = {346"col1": [None, None, 2, 3, 4, 5, 6],347"col2": [None, None, 6, 5, 4, 3, 2],348"col1_nulls": [None, None, None, None, 4, 5, 6],349"col2_nulls": [None, None, None, None, 4, 3, 2],350}351result = df.select([pl.all().rolling_max(3)])352assert result.to_dict(as_series=False) == {353k: pl.Series(v, dtype=dtype).to_list() for k, v in expected.items()354}355356# shuffled data triggers other kernels357df = df.select([pl.all().shuffle(seed=0)])358expected = {359"col1": [None, None, 0, 0, 4, 1, 1],360"col2": [None, None, 1, 1, 0, 0, 0],361"col1_nulls": [None, None, None, None, 4, None, None],362"col2_nulls": [None, None, None, None, 0, None, None],363}364result = df.select([pl.all().rolling_min(3)])365assert result.to_dict(as_series=False) == {366k: pl.Series(v, dtype=dtype).to_list() for k, v in expected.items()367}368result = df.select([pl.all().rolling_max(3)])369370expected = {371"col1": [None, None, 5, 5, 6, 6, 6],372"col2": [None, None, 6, 6, 2, 5, 5],373"col1_nulls": [None, None, None, None, 6, None, None],374"col2_nulls": [None, None, None, None, 2, None, None],375}376assert result.to_dict(as_series=False) == {377k: pl.Series(v, dtype=dtype).to_list() for k, v in expected.items()378}379380381@pytest.mark.parametrize(382"dtype",383[384pl.UInt8,385pl.Int64,386pl.Float32,387pl.Float64,388pl.Time,389pl.Date,390pl.Datetime("ms"),391pl.Datetime("us"),392pl.Datetime("ns"),393pl.Datetime("ns", "Asia/Kathmandu"),394pl.Duration("ms"),395pl.Duration("us"),396pl.Duration("ns"),397],398)399def test_rolling_group_by_extrema(dtype: PolarsDataType) -> None:400# ensure we hit different branches so create401402df = pl.DataFrame(403{404"col1": pl.arange(0, 7, eager=True).reverse(),405}406).with_columns(407pl.col("col1").reverse().alias("index"),408pl.col("col1").cast(dtype),409)410411expected = {412"col1_list": pl.Series(413[414[6],415[6, 5],416[6, 5, 4],417[5, 4, 3],418[4, 3, 2],419[3, 2, 1],420[2, 1, 0],421],422dtype=pl.List(dtype),423).to_list(),424"col1_min": pl.Series([6, 5, 4, 3, 2, 1, 0], dtype=dtype).to_list(),425"col1_max": pl.Series([6, 6, 6, 5, 4, 3, 2], dtype=dtype).to_list(),426"col1_first": pl.Series([6, 6, 6, 5, 4, 3, 2], dtype=dtype).to_list(),427"col1_last": pl.Series([6, 5, 4, 3, 2, 1, 0], dtype=dtype).to_list(),428}429result = (430df.rolling(431index_column="index",432period="3i",433)434.agg(435[436pl.col("col1").name.suffix("_list"),437pl.col("col1").min().name.suffix("_min"),438pl.col("col1").max().name.suffix("_max"),439pl.col("col1").first().alias("col1_first"),440pl.col("col1").last().alias("col1_last"),441]442)443.select(["col1_list", "col1_min", "col1_max", "col1_first", "col1_last"])444)445assert result.to_dict(as_series=False) == expected446447# ascending order448449df = pl.DataFrame(450{451"col1": pl.arange(0, 7, eager=True),452}453).with_columns(454pl.col("col1").alias("index"),455pl.col("col1").cast(dtype),456)457458result = (459df.rolling(460index_column="index",461period="3i",462)463.agg(464[465pl.col("col1").name.suffix("_list"),466pl.col("col1").min().name.suffix("_min"),467pl.col("col1").max().name.suffix("_max"),468pl.col("col1").first().alias("col1_first"),469pl.col("col1").last().alias("col1_last"),470]471)472.select(["col1_list", "col1_min", "col1_max", "col1_first", "col1_last"])473)474expected = {475"col1_list": pl.Series(476[477[0],478[0, 1],479[0, 1, 2],480[1, 2, 3],481[2, 3, 4],482[3, 4, 5],483[4, 5, 6],484],485dtype=pl.List(dtype),486).to_list(),487"col1_min": pl.Series([0, 0, 0, 1, 2, 3, 4], dtype=dtype).to_list(),488"col1_max": pl.Series([0, 1, 2, 3, 4, 5, 6], dtype=dtype).to_list(),489"col1_first": pl.Series([0, 0, 0, 1, 2, 3, 4], dtype=dtype).to_list(),490"col1_last": pl.Series([0, 1, 2, 3, 4, 5, 6], dtype=dtype).to_list(),491}492assert result.to_dict(as_series=False) == expected493494# shuffled data.495df = pl.DataFrame(496{497"col1": pl.arange(0, 7, eager=True).shuffle(1),498}499).with_columns(500pl.col("col1").cast(dtype),501pl.col("col1").sort().alias("index"),502)503504result = (505df.rolling(506index_column="index",507period="3i",508)509.agg(510[511pl.col("col1").min().name.suffix("_min"),512pl.col("col1").max().name.suffix("_max"),513pl.col("col1").name.suffix("_list"),514]515)516.select(["col1_list", "col1_min", "col1_max"])517)518expected = {519"col1_list": pl.Series(520[521[4],522[4, 2],523[4, 2, 5],524[2, 5, 1],525[5, 1, 6],526[1, 6, 0],527[6, 0, 3],528],529dtype=pl.List(dtype),530).to_list(),531"col1_min": pl.Series([4, 2, 2, 1, 1, 0, 0], dtype=dtype).to_list(),532"col1_max": pl.Series([4, 4, 5, 5, 6, 6, 6], dtype=dtype).to_list(),533}534assert result.to_dict(as_series=False) == expected535536537def test_rolling_slice_pushdown() -> None:538df = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "a", "b"], "c": [1, 3, 5]}).lazy()539df = (540df.sort("a")541.rolling(542"a",543group_by="b",544period="2i",545)546.agg([(pl.col("c") - pl.col("c").shift(fill_value=0)).sum().alias("c")])547)548assert df.head(2).collect().to_dict(as_series=False) == {549"b": ["a", "a"],550"a": [1, 2],551"c": [1, 3],552}553554555def test_overlapping_groups_4628() -> None:556df = pl.DataFrame(557{558"index": [1, 2, 3, 4, 5, 6],559"val": [10, 20, 40, 70, 110, 160],560}561)562assert (563df.rolling(index_column=pl.col("index").set_sorted(), period="3i").agg(564[565pl.col("val").diff(n=1).alias("val.diff"),566(pl.col("val") - pl.col("val").shift(1)).alias("val - val.shift"),567]568)569).to_dict(as_series=False) == {570"index": [1, 2, 3, 4, 5, 6],571"val.diff": [572[None],573[None, 10],574[None, 10, 20],575[None, 20, 30],576[None, 30, 40],577[None, 40, 50],578],579"val - val.shift": [580[None],581[None, 10],582[None, 10, 20],583[None, 20, 30],584[None, 30, 40],585[None, 40, 50],586],587}588589590@pytest.mark.skipif(sys.platform == "win32", reason="Minor numerical diff")591def test_rolling_skew_lagging_null_5179() -> None:592s = pl.Series([None, 3, 4, 1, None, None, None, None, 3, None, 5, 4, 7, 2, 1, None])593result = s.rolling_skew(3, min_samples=1).fill_nan(-1.0)594expected = pl.Series(595[596None,597-1.0,5980.0,599-0.3818017741606059,6000.0,601-1.0,602None,603None,604-1.0,605-1.0,6060.0,6070.0,6080.38180177416060695,6090.23906314692954517,6100.6309038567106234,6110.0,612]613)614assert_series_equal(result, expected, check_names=False)615616617def test_rolling_var_numerical_stability_5197() -> None:618s = pl.Series([*[1.2] * 4, *[3.3] * 7])619res = s.to_frame("a").with_columns(pl.col("a").rolling_var(5))[:, 0].to_list()620assert res[4:] == pytest.approx(621[6220.882,6231.3229999999999997,6241.3229999999999997,6250.8819999999999983,6260.0,6270.0,6280.0,629]630)631assert res[:4] == [None] * 4632633634def test_rolling_iter() -> None:635df = pl.DataFrame(636{637"date": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 5)],638"a": [1, 2, 2],639"b": [4, 5, 6],640}641).set_sorted("date")642643# Without 'by' argument644result1 = [645(name[0], data.shape)646for name, data in df.rolling(index_column="date", period="2d")647]648expected1 = [649(date(2020, 1, 1), (1, 3)),650(date(2020, 1, 2), (2, 3)),651(date(2020, 1, 5), (1, 3)),652]653assert result1 == expected1654655# With 'by' argument656result2 = [657(name, data.shape)658for name, data in df.rolling(index_column="date", period="2d", group_by="a")659]660expected2 = [661((1, date(2020, 1, 1)), (1, 3)),662((2, date(2020, 1, 2)), (1, 3)),663((2, date(2020, 1, 5)), (1, 3)),664]665assert result2 == expected2666667668def test_rolling_negative_period() -> None:669df = pl.DataFrame({"ts": [datetime(2020, 1, 1)], "value": [1]}).with_columns(670pl.col("ts").set_sorted()671)672with pytest.raises(673ComputeError, match="rolling window period should be strictly positive"674):675df.rolling("ts", period="-1d", offset="-1d").agg(pl.col("value"))676with pytest.raises(677ComputeError, match="rolling window period should be strictly positive"678):679df.lazy().rolling("ts", period="-1d", offset="-1d").agg(680pl.col("value")681).collect()682with pytest.raises(683InvalidOperationError, match="`window_size` must be strictly positive"684):685df.select(686pl.col("value").rolling_min_by("ts", window_size="-1d", closed="left")687)688with pytest.raises(689InvalidOperationError, match="`window_size` must be strictly positive"690):691df.lazy().select(692pl.col("value").rolling_min_by("ts", window_size="-1d", closed="left")693).collect()694695696def test_rolling_skew_window_offset() -> None:697assert (pl.arange(0, 20, eager=True) ** 2).rolling_skew(20)[698-1699] == 0.6612545648596286700701702def test_rolling_cov_corr() -> None:703df = pl.DataFrame({"x": [3, 3, 3, 5, 8], "y": [3, 4, 4, 4, 8]})704705res = df.select(706pl.rolling_cov("x", "y", window_size=3).alias("cov"),707pl.rolling_corr("x", "y", window_size=3).alias("corr"),708).to_dict(as_series=False)709assert res["cov"][2:] == pytest.approx([0.0, 0.0, 5.333333333333336])710assert res["corr"][2:] == pytest.approx([nan, 0.0, 0.9176629354822473], nan_ok=True)711assert res["cov"][:2] == [None] * 2712assert res["corr"][:2] == [None] * 2713714715def test_rolling_cov_corr_nulls() -> None:716df1 = pl.DataFrame(717{"a": [1.06, 1.07, 0.93, 0.78, 0.85], "lag_a": [1.0, 1.06, 1.07, 0.93, 0.78]}718)719df2 = pl.DataFrame(720{721"a": [1.0, 1.06, 1.07, 0.93, 0.78, 0.85],722"lag_a": [None, 1.0, 1.06, 1.07, 0.93, 0.78],723}724)725726val_1 = df1.select(727pl.rolling_corr("a", "lag_a", window_size=10, min_samples=5, ddof=1)728)729val_2 = df2.select(730pl.rolling_corr("a", "lag_a", window_size=10, min_samples=5, ddof=1)731)732733df1_expected = pl.DataFrame({"a": [None, None, None, None, 0.62204709]})734df2_expected = pl.DataFrame({"a": [None, None, None, None, None, 0.62204709]})735736assert_frame_equal(val_1, df1_expected, abs_tol=0.0000001)737assert_frame_equal(val_2, df2_expected, abs_tol=0.0000001)738739val_1 = df1.select(740pl.rolling_cov("a", "lag_a", window_size=10, min_samples=5, ddof=1)741)742val_2 = df2.select(743pl.rolling_cov("a", "lag_a", window_size=10, min_samples=5, ddof=1)744)745746df1_expected = pl.DataFrame({"a": [None, None, None, None, 0.009445]})747df2_expected = pl.DataFrame({"a": [None, None, None, None, None, 0.009445]})748749assert_frame_equal(val_1, df1_expected, abs_tol=0.0000001)750assert_frame_equal(val_2, df2_expected, abs_tol=0.0000001)751752753@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])754def test_rolling_empty_window_9406(time_unit: TimeUnit) -> None:755datecol = pl.Series(756"d",757[datetime(2019, 1, x) for x in [16, 17, 18, 22, 23]],758dtype=pl.Datetime(time_unit=time_unit, time_zone=None),759).set_sorted()760rawdata = pl.Series("x", [1.1, 1.2, 1.3, 1.15, 1.25], dtype=pl.Float64)761rmin = pl.Series("x", [None, 1.1, 1.1, None, 1.15], dtype=pl.Float64)762rmax = pl.Series("x", [None, 1.1, 1.2, None, 1.15], dtype=pl.Float64)763df = pl.DataFrame([datecol, rawdata])764765assert_frame_equal(766pl.DataFrame([datecol, rmax]),767df.select(768pl.col("d"),769pl.col("x").rolling_max_by("d", window_size="3d", closed="left"),770),771)772assert_frame_equal(773pl.DataFrame([datecol, rmin]),774df.select(775pl.col("d"),776pl.col("x").rolling_min_by("d", window_size="3d", closed="left"),777),778)779780781def test_rolling_weighted_quantile_10031() -> None:782assert_series_equal(783pl.Series([1, 2]).rolling_median(window_size=2, weights=[0, 1]),784pl.Series([None, 2.0]),785)786787assert_series_equal(788pl.Series([1, 2, 3, 5]).rolling_quantile(0.7, "linear", 3, [0.1, 0.3, 0.6]),789pl.Series([None, None, 2.55, 4.1]),790)791792assert_series_equal(793pl.Series([1, 2, 3, 5, 8]).rolling_quantile(7940.7, "linear", 4, [0.1, 0.2, 0, 0.3]795),796pl.Series([None, None, None, 3.5, 5.5]),797)798799800def test_rolling_meta_eq_10101() -> None:801assert pl.col("A").rolling_sum(10).meta.eq(pl.col("A").rolling_sum(10)) is True802803804def test_rolling_aggregations_unsorted_raise_10991() -> None:805df = pl.DataFrame(806{807"dt": [datetime(2020, 1, 3), datetime(2020, 1, 1), datetime(2020, 1, 2)],808"val": [1, 2, 3],809}810)811result = df.with_columns(roll=pl.col("val").rolling_sum_by("dt", "2d"))812expected = pl.DataFrame(813{814"dt": [datetime(2020, 1, 3), datetime(2020, 1, 1), datetime(2020, 1, 2)],815"val": [1, 2, 3],816"roll": [4, 2, 5],817}818)819assert_frame_equal(result, expected)820result = (821df.with_row_index()822.sort("dt")823.with_columns(roll=pl.col("val").rolling_sum_by("dt", "2d"))824.sort("index")825.drop("index")826)827assert_frame_equal(result, expected)828829830def test_rolling_aggregations_with_over_11225() -> None:831start = datetime(2001, 1, 1)832833df_temporal = pl.DataFrame(834{835"date": [start + timedelta(days=k) for k in range(5)],836"group": ["A"] * 2 + ["B"] * 3,837}838).with_row_index()839840df_temporal = df_temporal.sort("group", "date")841842result = df_temporal.with_columns(843rolling_row_mean=pl.col("index")844.rolling_mean_by(845by="date",846window_size="2d",847closed="left",848)849.over("group")850)851expected = pl.DataFrame(852{853"index": [0, 1, 2, 3, 4],854"date": pl.datetime_range(date(2001, 1, 1), date(2001, 1, 5), eager=True),855"group": ["A", "A", "B", "B", "B"],856"rolling_row_mean": [None, 0.0, None, 2.0, 2.5],857},858schema_overrides={"index": pl.UInt32},859)860assert_frame_equal(result, expected)861862863@pytest.mark.parametrize("dtype", INTEGER_DTYPES)864def test_rolling_ints(dtype: PolarsDataType) -> None:865s = pl.Series("a", [1, 2, 3, 2, 1], dtype=dtype)866assert_series_equal(867s.rolling_min(2), pl.Series("a", [None, 1, 2, 2, 1], dtype=dtype)868)869assert_series_equal(870s.rolling_max(2), pl.Series("a", [None, 2, 3, 3, 2], dtype=dtype)871)872assert_series_equal(873s.rolling_sum(2),874pl.Series(875"a",876[None, 3, 5, 5, 3],877dtype=(878pl.Int64 if dtype in [pl.Int8, pl.UInt8, pl.Int16, pl.UInt16] else dtype879),880),881)882assert_series_equal(s.rolling_mean(2), pl.Series("a", [None, 1.5, 2.5, 2.5, 1.5]))883884assert s.rolling_std(2).to_list()[1] == pytest.approx(0.7071067811865476)885assert s.rolling_var(2).to_list()[1] == pytest.approx(0.5)886assert s.rolling_std(2, ddof=0).to_list()[1] == pytest.approx(0.5)887assert s.rolling_var(2, ddof=0).to_list()[1] == pytest.approx(0.25)888889assert_series_equal(890s.rolling_median(4), pl.Series("a", [None, None, None, 2, 2], dtype=pl.Float64)891)892assert_series_equal(893s.rolling_quantile(0, "nearest", 3),894pl.Series("a", [None, None, 1, 2, 1], dtype=pl.Float64),895)896assert_series_equal(897s.rolling_quantile(0, "lower", 3),898pl.Series("a", [None, None, 1, 2, 1], dtype=pl.Float64),899)900assert_series_equal(901s.rolling_quantile(0, "higher", 3),902pl.Series("a", [None, None, 1, 2, 1], dtype=pl.Float64),903)904assert s.rolling_skew(4).null_count() == 3905906907def test_rolling_floats() -> None:908# 3099909# test if we maintain proper dtype910for dt in [pl.Float32, pl.Float64]:911result = pl.Series([1, 2, 3], dtype=dt).rolling_min(2, weights=[0.1, 0.2])912expected = pl.Series([None, 0.1, 0.2], dtype=dt)913assert_series_equal(result, expected)914915df = pl.DataFrame({"val": [1.0, 2.0, 3.0, np.nan, 5.0, 6.0, 7.0]})916917for e in [918pl.col("val").rolling_min(window_size=3),919pl.col("val").rolling_max(window_size=3),920]:921out = df.with_columns(e).to_series()922assert out.null_count() == 2923assert np.isnan(out.to_numpy()).sum() == 5924925expected_values = [None, None, 2.0, 3.0, 5.0, 6.0, 6.0]926assert (927df.with_columns(pl.col("val").rolling_median(window_size=3))928.to_series()929.to_list()930== expected_values931)932assert (933df.with_columns(pl.col("val").rolling_quantile(0.5, window_size=3))934.to_series()935.to_list()936== expected_values937)938939nan = float("nan")940s = pl.Series("a", [11.0, 2.0, 9.0, nan, 8.0])941assert_series_equal(942s.rolling_sum(3),943pl.Series("a", [None, None, 22.0, nan, nan]),944)945946947def test_rolling_std_nulls_min_samples_1_20076() -> None:948result = pl.Series([1, 2, None, 4]).rolling_std(3, min_samples=1)949expected = pl.Series(950[None, 0.7071067811865476, 0.7071067811865476, 1.4142135623730951]951)952assert_series_equal(result, expected)953954955def test_rolling_by_date() -> None:956df = pl.DataFrame(957{958"dt": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],959"val": [1, 2, 3],960}961).sort("dt")962963result = df.with_columns(roll=pl.col("val").rolling_sum_by("dt", "2d"))964expected = df.with_columns(roll=pl.Series([1, 3, 5]))965assert_frame_equal(result, expected)966967968@pytest.mark.parametrize("dtype", [pl.Int64, pl.Int32, pl.UInt64, pl.UInt32])969def test_rolling_by_integer(dtype: PolarsDataType) -> None:970df = (971pl.DataFrame({"val": [1, 2, 3]})972.with_row_index()973.with_columns(pl.col("index").cast(dtype))974)975result = df.with_columns(roll=pl.col("val").rolling_sum_by("index", "2i"))976expected = df.with_columns(roll=pl.Series([1, 3, 5]))977assert_frame_equal(result, expected)978979980@pytest.mark.parametrize("dtype", INTEGER_DTYPES)981def test_rolling_sum_by_integer(dtype: PolarsDataType) -> None:982lf = (983pl.LazyFrame({"a": [1, 2, 3]}, schema={"a": dtype})984.with_row_index()985.select(pl.col("a").rolling_sum_by("index", "2i"))986)987result = lf.collect()988expected_dtype = (989pl.Int64 if dtype in [pl.Int8, pl.UInt8, pl.Int16, pl.UInt16] else dtype990)991expected = pl.DataFrame({"a": [1, 3, 5]}, schema={"a": expected_dtype})992assert_frame_equal(result, expected)993assert lf.collect_schema() == expected.schema994995996def test_rolling_nanoseconds_11003() -> None:997df = pl.DataFrame(998{999"dt": [1000"2020-01-01T00:00:00.000000000",1001"2020-01-01T00:00:00.000000100",1002"2020-01-01T00:00:00.000000200",1003],1004"val": [1, 2, 3],1005}1006)1007df = df.with_columns(pl.col("dt").str.to_datetime(time_unit="ns")).set_sorted("dt")1008result = df.with_columns(pl.col("val").rolling_sum_by("dt", "500ns"))1009expected = df.with_columns(val=pl.Series([1, 3, 6]))1010assert_frame_equal(result, expected)101110121013def test_rolling_by_1mo_saturating_12216() -> None:1014df = pl.DataFrame(1015{1016"date": [1017date(2020, 6, 29),1018date(2020, 6, 30),1019date(2020, 7, 30),1020date(2020, 7, 31),1021date(2020, 8, 1),1022],1023"val": [1, 2, 3, 4, 5],1024}1025).set_sorted("date")1026result = df.rolling(index_column="date", period="1mo").agg(vals=pl.col("val"))1027expected = pl.DataFrame(1028{1029"date": [1030date(2020, 6, 29),1031date(2020, 6, 30),1032date(2020, 7, 30),1033date(2020, 7, 31),1034date(2020, 8, 1),1035],1036"vals": [[1], [1, 2], [3], [3, 4], [3, 4, 5]],1037}1038)1039assert_frame_equal(result, expected)10401041# check with `closed='both'` against DuckDB output1042result = df.rolling(index_column="date", period="1mo", closed="both").agg(1043vals=pl.col("val")1044)1045expected = pl.DataFrame(1046{1047"date": [1048date(2020, 6, 29),1049date(2020, 6, 30),1050date(2020, 7, 30),1051date(2020, 7, 31),1052date(2020, 8, 1),1053],1054"vals": [[1], [1, 2], [2, 3], [2, 3, 4], [3, 4, 5]],1055}1056)1057assert_frame_equal(result, expected)105810591060def test_index_expr_with_literal() -> None:1061df = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}).sort("a")1062out = df.rolling(index_column=(5 * pl.col("a")).set_sorted(), period="2i").agg(1063pl.col("b")1064)1065expected = pl.DataFrame({"literal": [5, 10, 15], "b": [["a"], ["b"], ["c"]]})1066assert_frame_equal(out, expected)106710681069def test_index_expr_output_name_12244() -> None:1070df = pl.DataFrame({"A": [1, 2, 3]})10711072out = df.rolling(pl.int_range(0, pl.len()), period="2i").agg("A")1073assert out.to_dict(as_series=False) == {1074"literal": [0, 1, 2],1075"A": [[1], [1, 2], [2, 3]],1076}107710781079def test_rolling_median() -> None:1080for n in range(10, 25):1081array = np.random.randint(0, 20, n)1082for k in [3, 5, 7]:1083a = pl.Series(array)1084assert_series_equal(1085a.rolling_median(k), pl.from_pandas(a.to_pandas().rolling(k).median())1086)108710881089@pytest.mark.slow1090def test_rolling_median_2() -> None:1091np.random.seed(12)1092n = 10001093df = pl.DataFrame({"x": np.random.normal(0, 1, n)})1094# this can differ because simd sizes and non-associativity of floats.1095assert df.select(1096pl.col("x").rolling_median(window_size=10).sum()1097).item() == pytest.approx(5.139429061527812)1098assert df.select(1099pl.col("x").rolling_median(window_size=100).sum()1100).item() == pytest.approx(26.60506093611384)110111021103@pytest.mark.parametrize(1104("dates", "closed", "expected"),1105[1106(1107[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1108"right",1109[None, 3, 5],1110),1111(1112[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1113"left",1114[None, None, 3],1115),1116(1117[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1118"both",1119[None, 3, 6],1120),1121(1122[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1123"none",1124[None, None, None],1125),1126(1127[date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 4)],1128"right",1129[None, 3, None],1130),1131(1132[date(2020, 1, 1), date(2020, 1, 3), date(2020, 1, 4)],1133"right",1134[None, None, 5],1135),1136(1137[date(2020, 1, 1), date(2020, 1, 3), date(2020, 1, 5)],1138"right",1139[None, None, None],1140),1141],1142)1143def test_rolling_min_samples(1144dates: list[date], closed: ClosedInterval, expected: list[int]1145) -> None:1146df = pl.DataFrame({"date": dates, "value": [1, 2, 3]}).sort("date")1147result = df.select(1148pl.col("value").rolling_sum_by(1149"date", window_size="2d", min_samples=2, closed=closed1150)1151)["value"]1152assert_series_equal(result, pl.Series("value", expected, pl.Int64))11531154# Starting with unsorted data1155result = (1156df.sort("date", descending=True)1157.with_columns(1158pl.col("value").rolling_sum_by(1159"date", window_size="2d", min_samples=2, closed=closed1160)1161)1162.sort("date")["value"]1163)1164assert_series_equal(result, pl.Series("value", expected, pl.Int64))116511661167def test_rolling_returns_scalar_15656() -> None:1168df = pl.DataFrame(1169{1170"a": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)],1171"b": [4, 5, 6],1172"c": [1, 2, 3],1173}1174)1175result = df.group_by("c").agg(pl.col("b").rolling_mean_by("a", "2d")).sort("c")1176expected = pl.DataFrame({"c": [1, 2, 3], "b": [[4.0], [5.0], [6.0]]})1177assert_frame_equal(result, expected)117811791180def test_rolling_invalid() -> None:1181df = pl.DataFrame(1182{1183"values": [1, 4],1184"times": [datetime(2020, 1, 3), datetime(2020, 1, 1)],1185},1186)1187with pytest.raises(1188InvalidOperationError, match="duration may not be a parsed integer"1189):1190(1191df.sort("times")1192.rolling("times", period="3000i")1193.agg(pl.col("values").sum().alias("sum"))1194)1195with pytest.raises(1196InvalidOperationError, match="duration must be a parsed integer"1197):1198(1199df.with_row_index()1200.rolling("index", period="3000d")1201.agg(pl.col("values").sum().alias("sum"))1202)120312041205def test_by_different_length() -> None:1206df = pl.DataFrame({"b": [1]})1207with pytest.raises(InvalidOperationError, match="must be the same length"):1208df.select(1209pl.col("b").rolling_max_by(pl.Series([datetime(2020, 1, 1)] * 2), "1d")1210)121112121213def test_incorrect_nulls_16246() -> None:1214df = pl.concat(1215[1216pl.DataFrame({"a": [datetime(2020, 1, 1)], "b": [1]}),1217pl.DataFrame({"a": [datetime(2021, 1, 1)], "b": [1]}),1218],1219rechunk=False,1220)1221result = df.select(pl.col("b").rolling_max_by("a", "1d"))1222expected = pl.DataFrame({"b": [1, 1]})1223assert_frame_equal(result, expected)122412251226def test_rolling_with_dst() -> None:1227df = pl.DataFrame(1228{"a": [datetime(2020, 10, 26, 1), datetime(2020, 10, 26)], "b": [1, 2]}1229).with_columns(pl.col("a").dt.replace_time_zone("Europe/London"))1230with pytest.raises(ComputeError, match="is ambiguous"):1231df.select(pl.col("b").rolling_sum_by("a", "1d"))1232with pytest.raises(ComputeError, match="is ambiguous"):1233df.sort("a").select(pl.col("b").rolling_sum_by("a", "1d"))123412351236def interval_defs() -> SearchStrategy[ClosedInterval]:1237closed: list[ClosedInterval] = ["left", "right", "both", "none"]1238return st.sampled_from(closed)123912401241@given(1242period=st.timedeltas(1243min_value=timedelta(microseconds=0), max_value=timedelta(days=1000)1244).map(parse_as_duration_string),1245offset=st.timedeltas(1246min_value=timedelta(days=-1000), max_value=timedelta(days=1000)1247).map(parse_as_duration_string),1248closed=interval_defs(),1249data=st.data(),1250time_unit=_time_units(),1251)1252def test_rolling_parametric(1253period: str,1254offset: str,1255closed: ClosedInterval,1256data: st.DataObject,1257time_unit: TimeUnit,1258) -> None:1259assume(period != "")1260dataframe = data.draw(1261dataframes(1262[1263column(1264"ts",1265strategy=st.datetimes(1266min_value=datetime(2000, 1, 1),1267max_value=datetime(2001, 1, 1),1268),1269dtype=pl.Datetime(time_unit),1270),1271column(1272"value",1273strategy=st.integers(min_value=-100, max_value=100),1274dtype=pl.Int64,1275),1276],1277min_size=1,1278)1279)1280df = dataframe.sort("ts")1281result = df.rolling("ts", period=period, offset=offset, closed=closed).agg(1282pl.col("value")1283)12841285expected_dict: dict[str, list[object]] = {"ts": [], "value": []}1286for ts, _ in df.iter_rows():1287window = df.filter(1288pl.col("ts").is_between(1289pl.lit(ts, dtype=pl.Datetime(time_unit)).dt.offset_by(offset),1290pl.lit(ts, dtype=pl.Datetime(time_unit))1291.dt.offset_by(offset)1292.dt.offset_by(period),1293closed=closed,1294)1295)1296value = window["value"].to_list()1297expected_dict["ts"].append(ts)1298expected_dict["value"].append(value)1299expected = pl.DataFrame(expected_dict).select(1300pl.col("ts").cast(pl.Datetime(time_unit)),1301pl.col("value").cast(pl.List(pl.Int64)),1302)1303assert_frame_equal(result, expected)130413051306@given(1307window_size=st.timedeltas(1308min_value=timedelta(microseconds=0), max_value=timedelta(days=2)1309).map(parse_as_duration_string),1310closed=interval_defs(),1311data=st.data(),1312time_unit=_time_units(),1313aggregation=st.sampled_from(1314[1315"min",1316"max",1317"mean",1318"sum",1319"std",1320"var",1321"median",1322]1323),1324)1325def test_rolling_aggs(1326window_size: str,1327closed: ClosedInterval,1328data: st.DataObject,1329time_unit: TimeUnit,1330aggregation: str,1331) -> None:1332assume(window_size != "")13331334# Testing logic can be faulty when window is more precise than time unit1335# https://github.com/pola-rs/polars/issues/117541336assume(not (time_unit == "ms" and "us" in window_size))13371338dataframe = data.draw(1339dataframes(1340[1341column(1342"ts",1343strategy=st.datetimes(1344min_value=datetime(2000, 1, 1),1345max_value=datetime(2001, 1, 1),1346),1347dtype=pl.Datetime(time_unit),1348),1349column(1350"value",1351strategy=st.integers(min_value=-100, max_value=100),1352dtype=pl.Int64,1353),1354],1355)1356)1357df = dataframe.sort("ts")1358func = f"rolling_{aggregation}_by"1359result = df.with_columns(1360getattr(pl.col("value"), func)("ts", window_size=window_size, closed=closed)1361)1362result_from_unsorted = dataframe.with_columns(1363getattr(pl.col("value"), func)("ts", window_size=window_size, closed=closed)1364).sort("ts")13651366expected_dict: dict[str, list[object]] = {"ts": [], "value": []}1367for ts, _ in df.iter_rows():1368window = df.filter(1369pl.col("ts").is_between(1370pl.lit(ts, dtype=pl.Datetime(time_unit)).dt.offset_by(1371f"-{window_size}"1372),1373pl.lit(ts, dtype=pl.Datetime(time_unit)),1374closed=closed,1375)1376)1377expected_dict["ts"].append(ts)1378if window.is_empty():1379expected_dict["value"].append(None)1380else:1381value = getattr(window["value"], aggregation)()1382expected_dict["value"].append(value)1383expected = pl.DataFrame(expected_dict).select(1384pl.col("ts").cast(pl.Datetime(time_unit)),1385pl.col("value").cast(result["value"].dtype),1386)1387assert_frame_equal(result, expected)1388assert_frame_equal(result_from_unsorted, expected)138913901391def test_rolling_by_nulls() -> None:1392df = pl.DataFrame({"a": [1, None], "b": [1, 2]})1393with pytest.raises(1394InvalidOperationError, match="not yet supported for series with null values"1395):1396df.select(pl.col("a").rolling_min_by("b", "2i"))1397with pytest.raises(1398InvalidOperationError, match="not yet supported for series with null values"1399):1400df.select(pl.col("b").rolling_min_by("a", "2i"))140114021403def test_window_size_validation() -> None:1404df = pl.DataFrame({"x": [1.0]})14051406with pytest.raises(OverflowError, match=r"can't convert negative int to unsigned"):1407df.with_columns(trailing_min=pl.col("x").rolling_min(window_size=-3))140814091410def test_rolling_empty_21032() -> None:1411df = pl.DataFrame(schema={"a": pl.Datetime("ms"), "b": pl.Int64()})14121413result = df.rolling(index_column="a", period=timedelta(days=2)).agg(1414pl.col("b").sum()1415)1416assert_frame_equal(result, df)14171418result = df.rolling(1419index_column="a", period=timedelta(days=2), offset=timedelta(days=3)1420).agg(pl.col("b").sum())1421assert_frame_equal(result, df)142214231424def test_rolling_offset_agg_15122() -> None:1425df = pl.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [1, 2, 3, 1, 2, 3]})14261427result = df.rolling(index_column="b", period="1i", offset="0i", group_by="a").agg(1428window=pl.col("b")1429)1430expected = df.with_columns(window=pl.Series([[2], [3], [], [2], [3], []]))1431assert_frame_equal(result, expected)14321433result = df.rolling(index_column="b", period="1i", offset="1i", group_by="a").agg(1434window=pl.col("b")1435)1436expected = df.with_columns(window=pl.Series([[3], [], [], [3], [], []]))1437assert_frame_equal(result, expected)143814391440def test_rolling_sum_stability_11146() -> None:1441data_frame = pl.DataFrame(1442{1443"value": [14440.0,1445290.57,1446107.0,1447172.0,1448124.25,1449304.0,1450379.5,1451347.35,14521516.41,1453386.12,1454226.5,1455294.62,1456125.5,14570.0,14580.0,14590.0,14600.0,14610.0,14620.0,14630.0,14640.0,1465]1466}1467)1468assert (1469data_frame.with_columns(1470pl.col("value").rolling_mean(window_size=8, min_samples=1).alias("test_col")1471)["test_col"][-1]1472== 0.01473)147414751476def test_rolling() -> None:1477df = pl.DataFrame(1478{1479"n": [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10],1480"col1": ["A", "B"] * 11,1481}1482)14831484assert df.rolling("n", period="1i", group_by="col1").agg().to_dict(1485as_series=False1486) == {1487"col1": [1488"A",1489"A",1490"A",1491"A",1492"A",1493"A",1494"A",1495"A",1496"A",1497"A",1498"A",1499"B",1500"B",1501"B",1502"B",1503"B",1504"B",1505"B",1506"B",1507"B",1508"B",1509"B",1510],1511"n": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],1512}151315141515@pytest.mark.parametrize(1516"method",1517["nearest", "higher", "lower", "midpoint", "linear", "equiprobable"],1518)1519def test_rolling_quantile_with_nulls_22781(method: QuantileMethod) -> None:1520lf = pl.LazyFrame(1521{1522"index": [0, 1, 2, 3, 4, 5, 6, 7, 8],1523"a": [None, None, 1.0, None, None, 1.0, 1.0, None, None],1524}1525)1526out = (1527lf.rolling("index", period="2i")1528.agg(pl.col("a").quantile(0.5, interpolation=method))1529.collect()1530)1531expected = pl.Series("a", [None, None, 1.0, 1.0, None, 1.0, 1.0, 1.0, None])1532assert_series_equal(out["a"], expected)153315341535def test_rolling_quantile_nearest_23392() -> None:1536base = range(11)1537s = pl.Series(base)15381539shuffle_base = list(base)1540random.shuffle(shuffle_base)1541s_shuffled = pl.Series(shuffle_base)15421543for q in np.arange(0, 1.0, 0.02, dtype=float):1544out = s.rolling_quantile(q, interpolation="nearest", window_size=11)15451546# explicit:1547expected = pl.Series([None] * 10 + [float(round(q * 10.0))])1548assert_series_equal(out, expected)15491550# equivalence:1551equiv = s.quantile(q, interpolation="nearest")1552assert out.last() == equiv15531554# shuffled:1555out = s_shuffled.rolling_quantile(q, interpolation="nearest", window_size=11)1556assert_series_equal(out, expected)155715581559def test_rolling_quantile_nearest_kernel_23392() -> None:1560df = pl.DataFrame(1561{1562"dt": [1563datetime(2021, 1, 1),1564datetime(2021, 1, 2),1565datetime(2021, 1, 4),1566datetime(2021, 1, 5),1567datetime(2021, 1, 7),1568],1569"values": pl.arange(0, 5, eager=True),1570}1571)1572# values (period="3d", quantile=0.7) are chosen to trigger index rounding1573out = (1574df.set_sorted("dt")1575.rolling("dt", period="3d", closed="both")1576.agg([pl.col("values").quantile(quantile=0.7).alias("quantile")])1577.select("quantile")1578)1579expected = pl.DataFrame({"quantile": [0.0, 1.0, 1.0, 2.0, 3.0]})1580assert_frame_equal(out, expected)158115821583def test_rolling_quantile_nearest_with_nulls_23932() -> None:1584lf = pl.LazyFrame(1585{1586"index": [0, 1, 2, 3, 4, 5, 6],1587"a": [None, None, 1.0, 2.0, 3.0, None, None],1588}1589)1590# values (period="3i", quantile=0.7) are chosen to trigger index rounding1591out = (1592lf.rolling("index", period="3i")1593.agg(pl.col("a").quantile(0.7, interpolation="nearest"))1594.collect()1595)1596expected = pl.Series("a", [None, None, 1.0, 2.0, 2.0, 3.0, 3.0])1597assert_series_equal(out["a"], expected)159815991600def test_wtd_min_periods_less_window() -> None:1601df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}).with_columns(1602pl.col("a")1603.rolling_mean(1604window_size=3, weights=[0.25, 0.5, 0.25], min_samples=2, center=True1605)1606.alias("kernel_mean")1607)16081609expected = pl.DataFrame(1610{"a": [1, 2, 3, 4, 5], "kernel_mean": [1.333333, 2, 3, 4, 4.666667]}1611)16121613assert_frame_equal(df, expected)16141615df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}).with_columns(1616pl.col("a")1617.rolling_sum(1618window_size=3, weights=[0.25, 0.5, 0.25], min_samples=2, center=True1619)1620.alias("kernel_sum")1621)1622expected = pl.DataFrame(1623{"a": [1, 2, 3, 4, 5], "kernel_sum": [1.0, 2.0, 3.0, 4.0, 3.5]}1624)16251626df = pl.DataFrame({"a": [1, 2, 3, 4, 5]}).with_columns(1627pl.col("a")1628.rolling_mean(1629window_size=3, weights=[0.2, 0.3, 0.5], min_samples=2, center=False1630)1631.alias("kernel_mean")1632)16331634expected = pl.DataFrame(1635{"a": [1, 2, 3, 4, 5], "kernel_mean": [None, 1.625, 2.3, 3.3, 4.3]}1636)16371638assert_frame_equal(df, expected)16391640df = pl.DataFrame({"a": [1, 2]}).with_columns(1641pl.col("a")1642.rolling_mean(1643window_size=3, weights=[0.25, 0.5, 0.25], min_samples=2, center=True1644)1645.alias("kernel_mean")1646)16471648# Handle edge case where the window size is larger than the number of elements1649expected = pl.DataFrame({"a": [1, 2], "kernel_mean": [1.333333, 1.666667]})1650assert_frame_equal(df, expected)16511652df = pl.DataFrame({"a": [1, 2]}).with_columns(1653pl.col("a")1654.rolling_mean(1655window_size=3, weights=[0.25, 0.25, 0.5], min_samples=1, center=False1656)1657.alias("kernel_mean")1658)16591660expected = pl.DataFrame({"a": [1, 2], "kernel_mean": [1.0, 2 * 2 / 3 + 1 * 1 / 3]})16611662df = pl.DataFrame({"a": [1]}).with_columns(1663pl.col("a")1664.rolling_sum(16656, center=True, min_samples=0, weights=[1, 10, 100, 1000, 10_000, 100_000]1666)1667.alias("kernel_sum")1668)1669expected = pl.DataFrame({"a": [1], "kernel_sum": [1000.0]})1670assert_frame_equal(df, expected)167116721673def test_rolling_median_23480() -> None:1674vals = [None] * 17 + [3262645.8, 856191.4, 1635379.0, 34707156.0]1675evals = [None] * 19 + [1635379.0, (3262645.8 + 1635379.0) / 2]1676out = pl.DataFrame({"a": vals}).select(1677r15=pl.col("a").rolling_median(15, min_samples=3),1678r17=pl.col("a").rolling_median(17, min_samples=3),1679)1680expected = pl.DataFrame({"r15": evals, "r17": evals})1681assert_frame_equal(out, expected)168216831684@pytest.mark.slow1685@pytest.mark.parametrize("with_nulls", [True, False])1686def test_rolling_sum_non_finite_23115(with_nulls: bool) -> None:1687values: list[float | None] = [16880.0,1689float("nan"),1690float("inf"),1691-float("inf"),169242.0,1693-3.0,1694]1695if with_nulls:1696values.append(None)1697data = random.choices(values, k=1000)1698naive = [1699sum(0 if x is None else x for x in data[max(0, i + 1 - 4) : i + 1])1700if sum(x is not None for x in data[max(0, i + 1 - 4) : i + 1]) >= 21701else None1702for i in range(1000)1703]1704assert_series_equal(pl.Series(data).rolling_sum(4, min_samples=2), pl.Series(naive))170517061707