Path: blob/main/py-polars/tests/unit/operations/test_join_asof.py
8424 views
from __future__ import annotations12import itertools3import math4import random5import warnings6from datetime import date, datetime, timedelta7from typing import TYPE_CHECKING, Any89import numpy as np10import pytest11from hypothesis import given1213import polars as pl14from polars.exceptions import DuplicateError, InvalidOperationError15from polars.testing import assert_frame_equal16from polars.testing.parametric.strategies.core import dataframes1718if TYPE_CHECKING:19from polars._typing import AsofJoinStrategy, PolarsIntegerType202122def test_asof_join_singular_right_11966() -> None:23df = pl.DataFrame({"id": [1, 2, 3], "time": [0.9, 2.1, 2.8]}).sort("time")24lookup = pl.DataFrame({"time": [2.0], "value": [100]}).sort("time")25joined = df.join_asof(lookup, on="time", strategy="nearest")26expected = pl.DataFrame(27{"id": [1, 2, 3], "time": [0.9, 2.1, 2.8], "value": [100, 100, 100]}28)29assert_frame_equal(joined, expected)303132def test_asof_join_inline_cast_6438() -> None:33df_trades = pl.DataFrame(34{35"time": [36datetime(2020, 1, 1, 9, 1, 0),37datetime(2020, 1, 1, 9, 1, 0),38datetime(2020, 1, 1, 9, 3, 0),39datetime(2020, 1, 1, 9, 6, 0),40],41"stock": ["A", "B", "B", "C"],42"trade": [101, 299, 301, 500],43}44)4546df_quotes = pl.DataFrame(47{48"time": [49datetime(2020, 1, 1, 9, 0, 0),50datetime(2020, 1, 1, 9, 2, 0),51datetime(2020, 1, 1, 9, 3, 0),52datetime(2020, 1, 1, 9, 6, 0),53],54"stock": ["A", "B", "C", "A"],55"quote": [100, 300, 501, 102],56}57).with_columns([pl.col("time").dt.cast_time_unit("ns")])5859assert df_trades.join_asof(60df_quotes, on=pl.col("time").cast(pl.Datetime("ns")).set_sorted(), by="stock"61).to_dict(as_series=False) == {62"time": [63datetime(2020, 1, 1, 9, 1),64datetime(2020, 1, 1, 9, 1),65datetime(2020, 1, 1, 9, 3),66datetime(2020, 1, 1, 9, 6),67],68"time_right": [69datetime(2020, 1, 1, 9, 0),70None,71datetime(2020, 1, 1, 9, 2),72datetime(2020, 1, 1, 9, 3),73],74"stock": ["A", "B", "B", "C"],75"trade": [101, 299, 301, 500],76"quote": [100, None, 300, 501],77}787980def test_asof_join_projection_resolution_4606() -> None:81a = pl.DataFrame({"a": [1], "b": [2], "c": [3]}).lazy()82b = pl.DataFrame({"a": [1], "b": [2], "d": [4]}).lazy()83joined_tbl = a.join_asof(b, on=pl.col("a").set_sorted(), by="b")84assert joined_tbl.group_by("a").agg(85[pl.col("c").sum().alias("c")]86).collect().columns == ["a", "c"]878889def test_asof_join_schema_5211() -> None:90df1 = pl.DataFrame({"today": [1, 2]})9192df2 = pl.DataFrame({"next_friday": [1, 2]})9394assert (95df1.lazy()96.join_asof(97df2.lazy(), left_on="today", right_on="next_friday", strategy="forward"98)99.collect_schema()100) == {"today": pl.Int64, "next_friday": pl.Int64}101102103def test_asof_join_schema_5684() -> None:104df_a = (105pl.DataFrame(106{107"id": [1],108"a": [1],109"b": [1],110}111)112.lazy()113.set_sorted("a")114)115116df_b = (117pl.DataFrame(118{119"id": [1, 1, 2],120"b": [-3, -3, 6],121}122)123.lazy()124.set_sorted("b")125)126127q = (128df_a.join_asof(df_b, by="id", left_on="a", right_on="b")129.drop("b")130.join_asof(df_b, by="id", left_on="a", right_on="b")131.drop("b")132)133134projected_result = q.select(pl.all()).collect()135result = q.collect()136137assert_frame_equal(projected_result, result)138assert (139q.collect_schema()140== projected_result.schema141== {"id": pl.Int64, "a": pl.Int64, "b_right": pl.Int64}142)143144145def test_join_asof_mismatched_dtypes() -> None:146# test 'on' dtype mismatch147df1 = pl.DataFrame(148{"a": pl.Series([1, 2, 3], dtype=pl.Int64), "b": ["a", "b", "c"]}149)150df2 = pl.DataFrame(151{"a": pl.Series([1.0, 2.0, 3.0], dtype=pl.Float64), "c": ["d", "e", "f"]}152)153154with pytest.raises(155pl.exceptions.SchemaError, match="datatypes of join keys don't match"156):157df1.join_asof(df2, on="a", strategy="forward")158159# test 'by' dtype mismatch160df1 = pl.DataFrame(161{162"time": pl.date_range(date(2018, 1, 1), date(2018, 1, 8), eager=True),163"group": pl.Series([1, 1, 1, 1, 2, 2, 2, 2], dtype=pl.Int32),164"value": [0, 0, None, None, 2, None, 1, None],165}166)167df2 = pl.DataFrame(168{169"time": pl.date_range(date(2018, 1, 1), date(2018, 1, 8), eager=True),170"group": pl.Series([1, 1, 1, 1, 2, 2, 2, 2], dtype=pl.Int64),171"value": [0, 0, None, None, 2, None, 1, None],172}173)174175with pytest.raises(176pl.exceptions.ComputeError, match="mismatching dtypes in 'by' parameter"177):178df1.join_asof(df2, on="time", by="group", strategy="forward")179180181def test_join_asof_floats() -> None:182df1 = pl.DataFrame({"a": [1.0, 2.0, 3.0], "b": ["lrow1", "lrow2", "lrow3"]})183df2 = pl.DataFrame({"a": [0.59, 1.49, 2.89], "b": ["rrow1", "rrow2", "rrow3"]})184185result = df1.join_asof(df2, on=pl.col("a").set_sorted(), strategy="backward")186expected = {187"a": [1.0, 2.0, 3.0],188"b": ["lrow1", "lrow2", "lrow3"],189"a_right": [0.59, 1.49, 2.89],190"b_right": ["rrow1", "rrow2", "rrow3"],191}192assert result.to_dict(as_series=False) == expected193194# with by argument195# 5740196df1 = pl.DataFrame(197{"b": np.linspace(0, 5, 7), "c": ["x" if i < 4 else "y" for i in range(7)]}198)199df2 = pl.DataFrame(200{201"val": [0.0, 2.5, 2.6, 2.7, 3.4, 4.0, 5.0],202"c": ["x", "x", "x", "y", "y", "y", "y"],203}204).with_columns(pl.col("val").alias("b").set_sorted())205assert df1.set_sorted("b").join_asof(df2, on=pl.col("b"), by="c").to_dict(206as_series=False207) == {208"b": [2090.0,2100.8333333333333334,2111.6666666666666667,2122.5,2133.3333333333333335,2144.166666666666667,2155.0,216],217"c": ["x", "x", "x", "x", "y", "y", "y"],218"val": [0.0, 0.0, 0.0, 2.5, 2.7, 4.0, 5.0],219}220221222def test_join_asof_tolerance() -> None:223df_trades = pl.DataFrame(224{225"time": [226datetime(2020, 1, 1, 9, 0, 1),227datetime(2020, 1, 1, 9, 0, 1),228datetime(2020, 1, 1, 9, 0, 3),229datetime(2020, 1, 1, 9, 0, 6),230],231"stock": ["A", "B", "B", "C"],232"trade": [101, 299, 301, 500],233}234).set_sorted("time")235236df_quotes = pl.DataFrame(237{238"time": [239datetime(2020, 1, 1, 9, 0, 0),240datetime(2020, 1, 1, 9, 0, 2),241datetime(2020, 1, 1, 9, 0, 4),242datetime(2020, 1, 1, 9, 0, 6),243],244"stock": ["A", "B", "C", "A"],245"quote": [100, 300, 501, 102],246}247).set_sorted("time")248249assert df_trades.join_asof(250df_quotes, on="time", by="stock", tolerance="2s"251).to_dict(as_series=False) == {252"time": [253datetime(2020, 1, 1, 9, 0, 1),254datetime(2020, 1, 1, 9, 0, 1),255datetime(2020, 1, 1, 9, 0, 3),256datetime(2020, 1, 1, 9, 0, 6),257],258"stock": ["A", "B", "B", "C"],259"trade": [101, 299, 301, 500],260"quote": [100, None, 300, 501],261}262263assert df_trades.join_asof(264df_quotes, on="time", by="stock", tolerance="1s"265).to_dict(as_series=False) == {266"time": [267datetime(2020, 1, 1, 9, 0, 1),268datetime(2020, 1, 1, 9, 0, 1),269datetime(2020, 1, 1, 9, 0, 3),270datetime(2020, 1, 1, 9, 0, 6),271],272"stock": ["A", "B", "B", "C"],273"trade": [101, 299, 301, 500],274"quote": [100, None, 300, None],275}276277for invalid_tolerance, match in [278("foo", "expected leading integer"),279([None], "could not extract number"),280]:281with pytest.raises(pl.exceptions.PolarsError, match=match):282df_trades.join_asof(283df_quotes,284on="time",285by="stock",286tolerance=invalid_tolerance, # type: ignore[arg-type]287)288289290def test_join_asof_tolerance_forward() -> None:291df_quotes = pl.DataFrame(292{293"time": [294datetime(2020, 1, 1, 9, 0, 0),295datetime(2020, 1, 1, 9, 0, 2),296datetime(2020, 1, 1, 9, 0, 4),297datetime(2020, 1, 1, 9, 0, 6),298datetime(2020, 1, 1, 9, 0, 7),299],300"stock": ["A", "B", "C", "A", "D"],301"quote": [100, 300, 501, 102, 10],302}303).set_sorted("time")304305df_trades = pl.DataFrame(306{307"time": [308datetime(2020, 1, 1, 9, 0, 2),309datetime(2020, 1, 1, 9, 0, 1),310datetime(2020, 1, 1, 9, 0, 3),311datetime(2020, 1, 1, 9, 0, 6),312datetime(2020, 1, 1, 9, 0, 7),313],314"stock": ["A", "B", "B", "C", "D"],315"trade": [101, 299, 301, 500, 10],316}317).set_sorted("time")318319assert df_quotes.join_asof(320df_trades, on="time", by="stock", tolerance="2s", strategy="forward"321).to_dict(as_series=False) == {322"time": [323datetime(2020, 1, 1, 9, 0, 0),324datetime(2020, 1, 1, 9, 0, 2),325datetime(2020, 1, 1, 9, 0, 4),326datetime(2020, 1, 1, 9, 0, 6),327datetime(2020, 1, 1, 9, 0, 7),328],329"stock": ["A", "B", "C", "A", "D"],330"quote": [100, 300, 501, 102, 10],331"trade": [101, 301, 500, None, 10],332}333334assert df_quotes.join_asof(335df_trades, on="time", by="stock", tolerance="1s", strategy="forward"336).to_dict(as_series=False) == {337"time": [338datetime(2020, 1, 1, 9, 0, 0),339datetime(2020, 1, 1, 9, 0, 2),340datetime(2020, 1, 1, 9, 0, 4),341datetime(2020, 1, 1, 9, 0, 6),342datetime(2020, 1, 1, 9, 0, 7),343],344"stock": ["A", "B", "C", "A", "D"],345"quote": [100, 300, 501, 102, 10],346"trade": [None, 301, None, None, 10],347}348349# Sanity check that this gives us equi-join350assert df_quotes.join_asof(351df_trades, on="time", by="stock", tolerance="0s", strategy="forward"352).to_dict(as_series=False) == {353"time": [354datetime(2020, 1, 1, 9, 0, 0),355datetime(2020, 1, 1, 9, 0, 2),356datetime(2020, 1, 1, 9, 0, 4),357datetime(2020, 1, 1, 9, 0, 6),358datetime(2020, 1, 1, 9, 0, 7),359],360"stock": ["A", "B", "C", "A", "D"],361"quote": [100, 300, 501, 102, 10],362"trade": [None, None, None, None, 10],363}364365366def test_join_asof_projection() -> None:367df1 = pl.DataFrame(368{369"df1_date": [20221011, 20221012, 20221013, 20221014, 20221016],370"df1_col1": ["foo", "bar", "foo", "bar", "foo"],371"key": ["a", "b", "b", "a", "b"],372}373).set_sorted("df1_date")374375df2 = pl.DataFrame(376{377"df2_date": [20221012, 20221015, 20221018],378"df2_col1": ["1", "2", "3"],379"key": ["a", "b", "b"],380}381).set_sorted("df2_date")382383assert (384(385df1.lazy().join_asof(df2.lazy(), left_on="df1_date", right_on="df2_date")386).select([pl.col("df2_date"), "df1_date"])387).collect().to_dict(as_series=False) == {388"df2_date": [None, 20221012, 20221012, 20221012, 20221015],389"df1_date": [20221011, 20221012, 20221013, 20221014, 20221016],390}391assert (392df1.lazy().join_asof(393df2.lazy(), by="key", left_on="df1_date", right_on="df2_date"394)395).select(["df2_date", "df1_date"]).collect().to_dict(as_series=False) == {396"df2_date": [None, None, None, 20221012, 20221015],397"df1_date": [20221011, 20221012, 20221013, 20221014, 20221016],398}399400401def test_asof_join_by_logical_types() -> None:402dates = (403pl.datetime_range(404datetime(2022, 1, 1), datetime(2022, 1, 2), interval="2h", eager=True405)406.cast(pl.Datetime("ns"))407.head(9)408)409x = pl.DataFrame({"a": dates, "b": map(float, range(9)), "c": ["1", "2", "3"] * 3})410411result = x.join_asof(x, on=pl.col("b").set_sorted(), by=["c", "a"])412413expected = {414"a": [415datetime(2022, 1, 1, 0, 0),416datetime(2022, 1, 1, 2, 0),417datetime(2022, 1, 1, 4, 0),418datetime(2022, 1, 1, 6, 0),419datetime(2022, 1, 1, 8, 0),420datetime(2022, 1, 1, 10, 0),421datetime(2022, 1, 1, 12, 0),422datetime(2022, 1, 1, 14, 0),423datetime(2022, 1, 1, 16, 0),424],425"b": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],426"c": ["1", "2", "3", "1", "2", "3", "1", "2", "3"],427"b_right": [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],428}429assert result.to_dict(as_series=False) == expected430431432def test_join_asof_projection_7481() -> None:433ldf1 = pl.DataFrame({"a": [1, 2, 2], "b": "bleft"}).lazy().set_sorted("a")434ldf2 = pl.DataFrame({"a": 2, "b": [1, 2, 2]}).lazy().set_sorted("b")435436assert (437ldf1.join_asof(ldf2, left_on="a", right_on="b").select("a", "b")438).collect().to_dict(as_series=False) == {439"a": [1, 2, 2],440"b": ["bleft", "bleft", "bleft"],441}442443444def test_asof_join_sorted_by_group(capsys: Any) -> None:445df1 = pl.DataFrame(446{447"key": ["a", "a", "a", "b", "b", "b"],448"asof_key": [2.0, 1.0, 3.0, 1.0, 2.0, 3.0],449"a": [102, 101, 103, 104, 105, 106],450}451).sort(by=["key", "asof_key"])452453df2 = pl.DataFrame(454{455"key": ["a", "a", "a", "b", "b", "b"],456"asof_key": [0.9, 1.9, 2.9, 0.9, 1.9, 2.9],457"b": [201, 202, 203, 204, 205, 206],458}459).sort(by=["key", "asof_key"])460461expected = pl.DataFrame(462[463pl.Series("key", ["a", "a", "a", "b", "b", "b"], dtype=pl.String),464pl.Series("asof_key", [1.0, 2.0, 3.0, 1.0, 2.0, 3.0], dtype=pl.Float64),465pl.Series("a", [101, 102, 103, 104, 105, 106], dtype=pl.Int64),466pl.Series("b", [201, 202, 203, 204, 205, 206], dtype=pl.Int64),467]468)469470out = df1.join_asof(df2, on="asof_key", by="key")471assert_frame_equal(out, expected)472473_, err = capsys.readouterr()474assert "is not explicitly sorted" not in err475476477def test_asof_join_nearest() -> None:478# Generic join_asof479df1 = pl.DataFrame(480{481"asof_key": [-1, 1, 2, 4, 6],482"a": [1, 2, 3, 4, 5],483}484).sort(by="asof_key")485486df2 = pl.DataFrame(487{488"asof_key": [-1, 2, 4, 5],489"b": [1, 2, 3, 4],490}491).sort(by="asof_key")492493expected = pl.DataFrame(494{"asof_key": [-1, 1, 2, 4, 6], "a": [1, 2, 3, 4, 5], "b": [1, 2, 2, 3, 4]}495)496497out = df1.join_asof(df2, on="asof_key", strategy="nearest")498assert_frame_equal(out, expected)499500# Edge case: last item of right matches multiples on left501df1 = pl.DataFrame(502{503"asof_key": [9, 9, 10, 10, 10],504"a": [1, 2, 3, 4, 5],505}506).set_sorted("asof_key")507df2 = pl.DataFrame(508{509"asof_key": [1, 2, 3, 10],510"b": [1, 2, 3, 4],511}512).set_sorted("asof_key")513expected = pl.DataFrame(514{515"asof_key": [9, 9, 10, 10, 10],516"a": [1, 2, 3, 4, 5],517"b": [4, 4, 4, 4, 4],518}519)520521out = df1.join_asof(df2, on="asof_key", strategy="nearest")522assert_frame_equal(out, expected)523524525@pytest.mark.slow526@pytest.mark.parametrize("allow_exact_matches", [True, False])527def test_asof_join_nearest_reference(allow_exact_matches: bool) -> None:528def asof_join_nearest_reference(529df_left: pl.DataFrame, df_right: pl.DataFrame530) -> pl.DataFrame:531schema = {"key": pl.Int32, "value": pl.Int32, "value_right": pl.Int32}532result = pl.DataFrame(schema=schema)533for left_row in df_left.iter_rows():534cross_product_sorted = (535pl.DataFrame(536{537"key": pl.Series([left_row[0]] * len(df_right), dtype=pl.Int32),538"value": pl.Series(539[left_row[1]] * len(df_right), dtype=pl.Int32540),541"key_right": df_right["key"],542"value_right": df_right["value"],543},544)545.with_row_index()546.filter(547pl.when(allow_exact_matches)548.then(pl.lit(True))549.otherwise(pl.col("key") != pl.col("key_right"))550)551.sort(552(pl.col("key") - pl.col("key_right")).abs(),553-pl.col("index").cast(pl.Int32),554)555.drop("index", "key_right")556)557if len(cross_product_sorted) == 0:558result = result.vstack(559pl.DataFrame([left_row + (None,)], schema=schema, orient="row"),560)561else:562best_match = cross_product_sorted[0]563result = result.vstack(best_match)564return result565566test_dfs = []567rng = random.Random()568for n_a, n_b, n_c, n_d in itertools.product([0, 1, 2], repeat=4):569a = rng.randint(0, 10)570b = rng.randint(0, 10)571c = rng.randint(0, 10)572d = rng.randint(0, 10)573keys = [a] * n_a + [b] * n_b + [c] * n_c + [d] * n_d574values = [rng.randint(0, 100000) for _ in keys]575576df = pl.DataFrame(577{"key": keys, "value": values}, schema={"key": pl.Int32, "value": pl.Int32}578).sort(by="key")579test_dfs.append(df)580581for df_left, df_right in itertools.product(test_dfs, repeat=2):582expected = asof_join_nearest_reference(df_left, df_right)583actual = df_left.join_asof(584df_right,585on="key",586strategy="nearest",587allow_exact_matches=allow_exact_matches,588)589assert_frame_equal(actual, expected)590591592def test_asof_join_nearest_with_tolerance() -> None:593a = b = [1, 2, 3, 4, 5]594595nones = pl.Series([None, None, None, None, None], dtype=pl.Int64)596597# Case 1: complete miss598df1 = pl.DataFrame({"asof_key": [1, 2, 3, 4, 5], "a": a}).set_sorted("asof_key")599df2 = pl.DataFrame(600{601"asof_key": [7, 8, 9, 10, 11],602"b": b,603}604).set_sorted("asof_key")605expected = df1.with_columns(nones.alias("b"))606out = df1.join_asof(df2, on="asof_key", strategy="nearest", tolerance=1)607assert_frame_equal(out, expected)608609# Case 2: complete miss in other direction610df1 = pl.DataFrame({"asof_key": [7, 8, 9, 10, 11], "a": a}).set_sorted("asof_key")611df2 = pl.DataFrame(612{613"asof_key": [1, 2, 3, 4, 5],614"b": b,615}616).set_sorted("asof_key")617expected = df1.with_columns(nones.alias("b"))618out = df1.join_asof(df2, on="asof_key", strategy="nearest", tolerance=1)619assert_frame_equal(out, expected)620621# Case 3: match first item622df1 = pl.DataFrame({"asof_key": [1, 2, 3, 4, 5], "a": a}).set_sorted("asof_key")623df2 = pl.DataFrame(624{625"asof_key": [6, 7, 8, 9, 10],626"b": b,627}628).set_sorted("asof_key")629out = df1.join_asof(df2, on="asof_key", strategy="nearest", tolerance=1)630expected = df1.with_columns(pl.Series([None, None, None, None, 1]).alias("b"))631assert_frame_equal(out, expected)632633# Case 4: match last item634df1 = pl.DataFrame({"asof_key": [1, 2, 3, 4, 5], "a": a}).set_sorted("asof_key")635df2 = pl.DataFrame(636{637"asof_key": [-4, -3, -2, -1, 0],638"b": b,639}640).set_sorted("asof_key")641out = df1.join_asof(df2, on="asof_key", strategy="nearest", tolerance=1)642expected = df1.with_columns(pl.Series([5, None, None, None, None]).alias("b"))643assert_frame_equal(out, expected)644645# Case 5: match multiples, pick closer646df1 = pl.DataFrame(647{"asof_key": pl.Series([1, 2, 3, 4, 5], dtype=pl.Float64), "a": a}648).set_sorted("asof_key")649df2 = pl.DataFrame(650{651"asof_key": [0.0, 2.0, 2.4, 3.4, 10.0],652"b": b,653}654).set_sorted("asof_key")655out = df1.join_asof(df2, on="asof_key", strategy="nearest", tolerance=1)656expected = df1.with_columns(pl.Series([2, 2, 4, 4, None]).alias("b"))657assert_frame_equal(out, expected)658659# Case 6: use 0 tolerance660df1 = pl.DataFrame(661{"asof_key": pl.Series([1, 2, 3, 4, 5], dtype=pl.Float64), "a": a}662).set_sorted("asof_key")663df2 = pl.DataFrame(664{665"asof_key": [0.0, 2.0, 2.4, 3.4, 10.0],666"b": b,667}668).set_sorted("asof_key")669out = df1.join_asof(df2, on="asof_key", strategy="nearest", tolerance=0)670expected = df1.with_columns(pl.Series([None, 2, None, None, None]).alias("b"))671assert_frame_equal(out, expected)672673# Case 7: test with datetime674df1 = pl.DataFrame(675{676"asof_key": pl.Series(677[678datetime(2023, 1, 1),679datetime(2023, 1, 2),680datetime(2023, 1, 3),681datetime(2023, 1, 4),682datetime(2023, 1, 6),683]684),685"a": a,686}687).set_sorted("asof_key")688df2 = pl.DataFrame(689{690"asof_key": pl.Series(691[692datetime(2022, 1, 1),693datetime(2022, 1, 2),694datetime(2022, 1, 3),695datetime(6962023, 1, 2, 21, 30, 0697), # should match with 2023-01-02, 2023-01-03, and 2021-01-04698datetime(2023, 1, 7),699]700),701"b": b,702}703).set_sorted("asof_key")704out = df1.join_asof(df2, on="asof_key", strategy="nearest", tolerance="1d4h")705expected = df1.with_columns(pl.Series([None, 4, 4, 4, 5]).alias("b"))706assert_frame_equal(out, expected)707708# Case 8: test using timedelta tolerance709out = df1.join_asof(710df2, on="asof_key", strategy="nearest", tolerance=timedelta(days=1, hours=4)711)712assert_frame_equal(out, expected)713714# Case #9: last item is closest match715df1 = pl.DataFrame(716{717"asof_key_left": [10.00001, 20.0, 30.0],718}719).set_sorted("asof_key_left")720df2 = pl.DataFrame(721{722"asof_key_right": [10.00001, 20.0001, 29.0],723}724).set_sorted("asof_key_right")725out = df1.join_asof(726df2,727left_on="asof_key_left",728right_on="asof_key_right",729strategy="nearest",730tolerance=0.5,731)732expected = pl.DataFrame(733{734"asof_key_left": [10.00001, 20.0, 30.0],735"asof_key_right": [10.00001, 20.0001, None],736}737)738assert_frame_equal(out, expected)739740741def test_asof_join_nearest_by() -> None:742# Generic join_asof743df1 = pl.DataFrame(744{745"asof_key": [-1, 1, 2, 6, 1],746"group": [1, 1, 1, 2, 2],747"a": [1, 2, 3, 2, 5],748}749).sort(by=["group", "asof_key"])750751df2 = pl.DataFrame(752{753"asof_key": [-1, 2, 5, 1],754"group": [1, 1, 2, 2],755"b": [1, 2, 3, 4],756}757).sort(by=["group", "asof_key"])758759expected = pl.DataFrame(760{761"asof_key": [-1, 1, 2, 6, 1],762"group": [1, 1, 1, 2, 2],763"a": [1, 2, 3, 5, 2],764"b": [1, 2, 2, 4, 3],765}766).sort(by=["group", "asof_key"])767768# Edge case: last item of right matches multiples on left769df1 = pl.DataFrame(770{771"asof_key": [9, 9, 10, 10, 10],772"group": [1, 1, 1, 2, 2],773"a": [1, 2, 3, 2, 5],774}775).sort(by=["group", "asof_key"])776777df2 = pl.DataFrame(778{779"asof_key": [-1, 1, 1, 10],780"group": [1, 1, 2, 2],781"b": [1, 2, 3, 4],782}783).sort(by=["group", "asof_key"])784785expected = pl.DataFrame(786{787"asof_key": [9, 9, 10, 10, 10],788"group": [1, 1, 1, 2, 2],789"a": [1, 2, 3, 2, 5],790"b": [2, 2, 2, 4, 4],791}792)793794out = df1.join_asof(df2, on="asof_key", by="group", strategy="nearest")795assert_frame_equal(out, expected)796797a = pl.DataFrame(798{799"code": [676, 35, 676, 676, 676],800"time": [364360, 364370, 364380, 365400, 367440],801}802)803b = pl.DataFrame(804{805"code": [676, 676, 35, 676, 676],806"time": [364000, 365000, 365000, 366000, 367000],807"price": [1.0, 2.0, 50, 3.0, None],808}809)810811expected = pl.DataFrame(812{813"code": [676, 35, 676, 676, 676],814"time": [364360, 364370, 364380, 365400, 367440],815"price": [1.0, 50.0, 1.0, 2.0, None],816}817)818819out = a.join_asof(b, by="code", on="time", strategy="nearest")820assert_frame_equal(out, expected)821822# last item is closest match823df1 = pl.DataFrame(824{825"a": [1, 1, 1],826"asof_key_left": [10.00001, 20.0, 30.0],827}828).set_sorted("asof_key_left")829df2 = pl.DataFrame(830{831"a": [1, 1, 1],832"asof_key_right": [10.00001, 20.0001, 29.0],833}834).set_sorted("asof_key_right")835out = df1.join_asof(836df2,837left_on="asof_key_left",838right_on="asof_key_right",839by="a",840strategy="nearest",841)842expected = pl.DataFrame(843{844"a": [1, 1, 1],845"asof_key_left": [10.00001, 20.0, 30.0],846"asof_key_right": [10.00001, 20.0001, 29.0],847}848)849assert_frame_equal(out, expected)850851852def test_asof_join_nearest_by_with_tolerance() -> None:853df1 = pl.DataFrame(854{855"group": [8561,8571,8581,8591,8601,8612,8622,8632,8642,8652,8663,8673,8683,8693,8703,8714,8724,8734,8744,8754,8765,8775,8785,8795,8805,8816,8826,8836,8846,8856,886],887"asof_key": pl.Series(888[8891,8902,8913,8924,8935,8947,8958,8969,89710,89811,8991,9002,9013,9024,9035,9041,9052,9063,9074,9085,9091,9102,9113,9124,9135,9141,9152,9163,9174,9185,919],920dtype=pl.Float32,921),922"a": [9231,9242,9253,9264,9275,9281,9292,9303,9314,9325,9331,9342,9353,9364,9375,9381,9392,9403,9414,9425,9431,9442,9453,9464,9475,9481,9492,9503,9514,9525,953],954}955)956957df2 = pl.DataFrame(958{959"group": [9601,9611,9621,9631,9641,9652,9662,9672,9682,9692,9703,9713,9723,9733,9743,9754,9764,9774,9784,9794,9805,9815,9825,9835,9845,9856,9866,9876,9886,9896,990],991"asof_key": pl.Series(992[9937,9948,9959,99610,99711,9981,9992,10003,10014,10025,10036,10047,10058,10069,100710,10085,1009-3,1010-2,1011-1,10120,10130,10142,10152.4,10163.4,101710,1018-3,10193,10208,10219,102210,1023],1024dtype=pl.Float32,1025),1026"b": [10271,10282,10293,10304,10315,10321,10332,10343,10354,10365,10371,10382,10393,10404,10415,10421,10432,10443,10454,10465,10471,10482,10493,10504,10515,10521,10532,10543,10554,10565,1057],1058}1059)10601061expected = df1.with_columns(1062pl.Series(1063[1064None,1065None,1066None,1067None,1068None,1069None,1070None,1071None,1072None,1073None,1074None,1075None,1076None,1077None,10781,10795,1080None,1081None,10821,10831,10842,10852,10864,10874,1088None,1089None,10902,10912,10922,1093None,1094]1095).alias("b")1096)1097df1 = df1.sort(by=["group", "asof_key"])1098df2 = df2.sort(by=["group", "asof_key"])1099expected = expected.sort(by=["group", "a"])11001101out = df1.join_asof(1102df2, by="group", on="asof_key", strategy="nearest", tolerance=1.01103).sort(by=["group", "a"])1104assert_frame_equal(out, expected)11051106# last item is closest match1107df1 = pl.DataFrame(1108{1109"a": [1, 1, 1],1110"asof_key_left": [10.00001, 20.0, 30.0],1111}1112).set_sorted("asof_key_left")1113df2 = pl.DataFrame(1114{1115"a": [1, 1, 1],1116"asof_key_right": [10.00001, 20.0001, 29.0],1117}1118).set_sorted("asof_key_right")1119out = df1.join_asof(1120df2,1121left_on="asof_key_left",1122right_on="asof_key_right",1123by="a",1124strategy="nearest",1125tolerance=0.5,1126)1127expected = pl.DataFrame(1128{1129"a": [1, 1, 1],1130"asof_key_left": [10.00001, 20.0, 30.0],1131"asof_key_right": [10.00001, 20.0001, None],1132}1133)1134assert_frame_equal(out, expected)113511361137def test_asof_join_nearest_by_date() -> None:1138df1 = pl.DataFrame(1139{1140"asof_key": [1141date(2019, 12, 30),1142date(2020, 1, 1),1143date(2020, 1, 2),1144date(2020, 1, 6),1145date(2020, 1, 1),1146],1147"group": [1, 1, 1, 2, 2],1148"a": [1, 2, 3, 2, 5],1149}1150).sort(by=["group", "asof_key"])11511152df2 = pl.DataFrame(1153{1154"asof_key": [1155date(2020, 1, 1),1156date(2020, 1, 2),1157date(2020, 1, 5),1158date(2020, 1, 1),1159],1160"group": [1, 1, 2, 2],1161"b": [1, 2, 3, 4],1162}1163).sort(by=["group", "asof_key"])11641165expected = pl.DataFrame(1166{1167"asof_key": [1168date(2019, 12, 30),1169date(2020, 1, 1),1170date(2020, 1, 2),1171date(2020, 1, 6),1172date(2020, 1, 1),1173],1174"group": [1, 1, 1, 2, 2],1175"a": [1, 2, 3, 2, 5],1176"b": [1, 1, 2, 3, 4],1177}1178).sort(by=["group", "asof_key"])11791180out = df1.join_asof(df2, on="asof_key", by="group", strategy="nearest")1181assert_frame_equal(out, expected)118211831184@pytest.mark.may_fail_auto_streaming # See #18927.1185def test_asof_join_string() -> None:1186left = pl.DataFrame({"x": [None, "a", "b", "c", None, "d", None]}).set_sorted("x")1187right = pl.DataFrame({"x": ["apple", None, "chutney"], "y": [0, 1, 2]}).set_sorted(1188"x"1189)1190forward = left.join_asof(right, on="x", strategy="forward")1191backward = left.join_asof(right, on="x", strategy="backward")1192forward_expected = pl.DataFrame(1193{1194"x": [None, "a", "b", "c", None, "d", None],1195"y": [None, 0, 2, 2, None, None, None],1196}1197)1198backward_expected = pl.DataFrame(1199{1200"x": [None, "a", "b", "c", None, "d", None],1201"y": [None, None, 0, 0, None, 2, None],1202}1203)1204assert_frame_equal(forward, forward_expected)1205assert_frame_equal(backward, backward_expected)120612071208def test_join_asof_by_argument_parsing() -> None:1209df1 = pl.DataFrame(1210{1211"n": [10, 20, 30, 40, 50, 60],1212"id1": [0, 0, 3, 3, 5, 5],1213"id2": [1, 2, 1, 2, 1, 2],1214"x": ["a", "b", "c", "d", "e", "f"],1215}1216).sort(by="n")12171218df2 = pl.DataFrame(1219{1220"n": [25, 8, 5, 23, 15, 35],1221"id1": [0, 0, 3, 3, 5, 5],1222"id2": [1, 2, 1, 2, 1, 2],1223"y": ["A", "B", "C", "D", "E", "F"],1224}1225).sort(by="n")12261227# any sequency for by argument is allowed, so we should see the same results here1228by_list = df1.join_asof(df2, on="n", by=["id1", "id2"])1229by_tuple = df1.join_asof(df2, on="n", by=("id1", "id2"))1230assert_frame_equal(by_list, by_tuple)12311232# same for using the by_left and by_right kwargs1233by_list2 = df1.join_asof(1234df2, on="n", by_left=["id1", "id2"], by_right=["id1", "id2"]1235)1236by_tuple2 = df1.join_asof(1237df2, on="n", by_left=("id1", "id2"), by_right=("id1", "id2")1238)1239assert_frame_equal(by_list2, by_list)1240assert_frame_equal(by_tuple2, by_list)124112421243def test_join_asof_invalid_args() -> None:1244df1 = pl.DataFrame(1245{1246"a": [1, 2, 3],1247"b": [1, 2, 3],1248}1249).set_sorted("a")1250df2 = pl.DataFrame(1251{1252"a": [1, 2, 3],1253"c": [1, 2, 3],1254}1255).set_sorted("a")12561257with pytest.raises(TypeError, match="expected `on` to be str or Expr, got 'list'"):1258df1.join_asof(df2, on=["a"]) # type: ignore[arg-type]1259with pytest.raises(1260TypeError, match="expected `left_on` to be str or Expr, got 'list'"1261):1262df1.join_asof(df2, left_on=["a"], right_on="a") # type: ignore[arg-type]1263with pytest.raises(1264TypeError, match="expected `right_on` to be str or Expr, got 'list'"1265):1266df1.join_asof(df2, left_on="a", right_on=["a"]) # type: ignore[arg-type]126712681269def test_join_as_of_by_schema() -> None:1270a = pl.DataFrame({"a": [1], "b": [2], "c": [3]}).lazy()1271b = pl.DataFrame({"a": [1], "b": [2], "d": [4]}).lazy()1272q = a.join_asof(b, on=pl.col("a").set_sorted(), by="b")1273assert q.collect_schema().names() == q.collect().columns127412751276def test_asof_join_by_schema() -> None:1277# different `by` names.1278df1 = pl.DataFrame({"on1": 0, "by1": 0})1279df2 = pl.DataFrame({"on1": 0, "by2": 0})12801281q = df1.lazy().join_asof(1282df2.lazy(),1283on="on1",1284by_left="by1",1285by_right="by2",1286)12871288assert q.collect_schema() == q.collect().schema128912901291def test_raise_invalid_by_arg_13020() -> None:1292df1 = pl.DataFrame({"asOfDate": [date(2020, 1, 1)]})1293df2 = pl.DataFrame(1294{1295"endityId": [date(2020, 1, 1)],1296"eventDate": ["A"],1297}1298)1299with pytest.raises(pl.exceptions.InvalidOperationError, match="expected both"):1300df1.sort("asOfDate").join_asof(1301df2.sort("eventDate"),1302left_on="asOfDate",1303right_on="eventDate",1304by_left=None,1305by_right=["entityId"],1306)130713081309def test_join_asof_no_exact_matches() -> None:1310trades = pl.DataFrame(1311{1312"time": [1313"2016-05-25 13:30:00.023",1314"2016-05-25 13:30:00.038",1315"2016-05-25 13:30:00.048",1316"2016-05-25 13:30:00.048",1317"2016-05-25 13:30:00.048",1318],1319"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],1320"price": [51.95, 51.95, 720.77, 720.92, 98.0],1321"quantity": [75, 155, 100, 100, 100],1322}1323).with_columns(pl.col("time").str.to_datetime())13241325quotes = pl.DataFrame(1326{1327"time": [1328"2016-05-25 13:30:00.023",1329"2016-05-25 13:30:00.023",1330"2016-05-25 13:30:00.030",1331"2016-05-25 13:30:00.041",1332"2016-05-25 13:30:00.048",1333"2016-05-25 13:30:00.049",1334"2016-05-25 13:30:00.072",1335"2016-05-25 13:30:00.075",1336],1337"ticker": ["GOOG", "MSFT", "MSFT", "MSFT", "GOOG", "AAPL", "GOOG", "MSFT"],1338"bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01],1339"ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03],1340}1341).with_columns(pl.col("time").str.to_datetime())13421343assert trades.join_asof(1344quotes, on="time", by="ticker", tolerance="10ms", allow_exact_matches=False1345).to_dict(as_series=False) == {1346"time": [1347datetime(2016, 5, 25, 13, 30, 0, 23000),1348datetime(2016, 5, 25, 13, 30, 0, 38000),1349datetime(2016, 5, 25, 13, 30, 0, 48000),1350datetime(2016, 5, 25, 13, 30, 0, 48000),1351datetime(2016, 5, 25, 13, 30, 0, 48000),1352],1353"ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"],1354"price": [51.95, 51.95, 720.77, 720.92, 98.0],1355"quantity": [75, 155, 100, 100, 100],1356"bid": [None, 51.97, None, None, None],1357"ask": [None, 51.98, None, None, None],1358}135913601361@pytest.mark.parametrize("strategy", ["backward", "forward", "nearest"])1362@given(1363df_left=dataframes(cols=1, allowed_dtypes=pl.Int32),1364df_right=dataframes(cols=1, allowed_dtypes=pl.Int32),1365)1366def test_join_asof_no_exact_matches_parametric(1367strategy: AsofJoinStrategy, df_left: pl.DataFrame, df_right: pl.DataFrame1368) -> None:1369df_left = df_left.sort("col0")1370df_right = df_right.sort("col0")13711372out = df_left.join_asof(1373df_right,1374on="col0",1375strategy=strategy,1376suffix="_right",1377coalesce=False,1378allow_exact_matches=False,1379)13801381for l_val, r_val in zip(1382out["col0"],1383out["col0_right"],1384strict=False,1385):1386assert l_val != r_val or r_val is None138713881389def test_join_asof_not_sorted() -> None:1390df = pl.DataFrame({"a": [1, 1, 1, 2, 2, 2], "b": [2, 1, 3, 1, 2, 3]})1391with pytest.raises(InvalidOperationError, match="is not sorted"):1392df.join_asof(df, on="b")13931394# When 'by' is provided, we do not check sortedness, but a warning is received1395with pytest.warns(1396UserWarning,1397match="Sortedness of columns cannot be checked when 'by' groups provided",1398):1399df.join_asof(df, on="b", by="a")14001401# When sortedness is False, we should get no warning1402with warnings.catch_warnings(record=True) as w:1403df.join_asof(df, on="b", check_sortedness=False)1404df.join_asof(df, on="b", by="a", check_sortedness=False)1405assert len(w) == 0 # no warnings caught140614071408@pytest.mark.parametrize(1409"dtypes",1410[1411(pl.Int64, pl.Int64),1412(pl.Int64, pl.UInt64),1413(pl.Int64, pl.Int128),1414(pl.UInt64, pl.Int64),1415(pl.UInt64, pl.UInt64),1416(pl.UInt64, pl.Int128),1417(pl.UInt64, pl.UInt128),1418(pl.Int128, pl.Int64),1419(pl.Int128, pl.UInt64),1420(pl.Int128, pl.Int128),1421(pl.UInt128, pl.UInt64),1422(pl.UInt128, pl.UInt128),1423],1424)1425@pytest.mark.parametrize("swap", [False, True])1426@pytest.mark.parametrize("strategy", ["backward", "forward", "nearest"])1427def test_join_asof_large_int_21276(1428dtypes: tuple[PolarsIntegerType, PolarsIntegerType],1429swap: bool,1430strategy: AsofJoinStrategy,1431) -> None:1432left_dtype, right_dtype = reversed(dtypes) if swap else dtypes1433large_int64 = 1608129000134000123 # it only happen when "on" column is large1434left = pl.DataFrame({"ts": pl.Series([large_int64 + 2], dtype=left_dtype)})1435right = pl.DataFrame(1436{1437"ts": pl.Series([large_int64 + 1, large_int64 + 3], dtype=right_dtype),1438"value": [111, 333],1439}1440)1441result = left.join_asof(right, on="ts", strategy=strategy)1442idx = 0 if strategy == "backward" else 11443expected = pl.DataFrame(1444{1445"ts": left["ts"],1446"value": right["value"].gather(idx),1447}1448)1449assert_frame_equal(result, expected)145014511452@pytest.mark.parametrize("by", ["constant", None])1453def test_join_asof_slice_23583(by: str | None) -> None:1454lhs = pl.LazyFrame(1455{1456"index": [0],1457"constant": 0,1458"date": [date(2025, 1, 1)],1459},1460).set_sorted("date")14611462rhs = pl.LazyFrame(1463{1464"index": [0, 1],1465"constant": 0,1466"date": [date(1970, 1, 1), date(2025, 1, 1)],1467},1468).set_sorted("date")14691470q = (1471lhs.join_asof(rhs, on="date", by=by, check_sortedness=False)1472.head(1)1473.select(pl.exclude("constant_right"))1474)14751476expect = pl.DataFrame(1477{1478"index": [0],1479"constant": 0,1480"date": [date(2025, 1, 1)],1481"index_right": [1],1482},1483)14841485assert_frame_equal(q.collect(optimizations=pl.QueryOptFlags.none()), expect)1486assert_frame_equal(q.collect(), expect)148714881489def test_join_asof_23751() -> None:1490a = pl.DataFrame(1491[1492pl.Series([1, 2, 3, 4, 5]).alias("index") * int(1e10),1493pl.Series([1, -1, 1, 1, -1]).alias("side"),1494]1495)14961497b = pl.DataFrame(1498[1499pl.Series([0, 1, 1, 3, 3, 5]).alias("index_right").cast(pl.UInt64)1500* int(1e10),1501pl.Series([-1, 1, -1, 1, 1, -1]).alias("side"),1502pl.Series([0, 10, 20, 30, 40, 50]).alias("value"),1503]1504)15051506assert a.join_asof(b, left_on="index", right_on="index_right", by="side").to_dict(1507as_series=False1508) == {1509"index": [10000000000, 20000000000, 30000000000, 40000000000, 50000000000],1510"side": [1, -1, 1, 1, -1],1511"index_right": [151210000000000,151310000000000,151430000000000,151530000000000,151650000000000,1517],1518"value": [10, 20, 40, 40, 50],1519}152015211522def test_join_asof_nosuffix_dup_col_23834() -> None:1523a = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})1524b = pl.DataFrame({"b": [1, 2, 3], "c": [9, 10, 11]})1525with pytest.raises(DuplicateError):1526a.join_asof(b, left_on="a", right_on="b", suffix="")152715281529def test_join_asof_planner_schema_24000() -> None:1530a = pl.DataFrame([pl.Series("index", [1, 2, 3]) * 10])1531b = pl.DataFrame(1532[1533pl.Series("value", [10, 20, 30]),1534pl.Series("index_right", [1, 2, 3]).cast(pl.UInt64) * 10,1535]1536)1537q = a.lazy().join_asof(b.lazy(), left_on="index", right_on="index_right")15381539assert q.collect().schema == q.collect_schema()15401541b = pl.DataFrame(1542[1543pl.Series("index_right", [1, 2, 3]).cast(pl.UInt64) * 10,1544pl.Series("value", [10, 20, 30]),1545]1546)1547q = a.lazy().join_asof(b.lazy(), left_on="index", right_on="index_right")15481549assert q.collect().schema == q.collect_schema()155015511552@pytest.mark.parametrize(1553"dtype",1554[pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64, pl.Int8, pl.Int16, pl.Int32, pl.Int64],1555)1556def test_join_asof_int_dtypes_24383(dtype: PolarsIntegerType) -> None:1557lf1 = pl.LazyFrame(1558{1559"id": pl.Series([1], dtype=dtype),1560"date": pl.Series([date(2025, 12, 31)], dtype=pl.Date),1561}1562)15631564lf2 = pl.LazyFrame(1565{1566"id": pl.Series([1], dtype=dtype),1567"date": pl.Series([date(2025, 12, 31)], dtype=pl.Date),1568"value": pl.Series([2.5], dtype=pl.Float32),1569}1570)15711572result = lf1.join_asof(1573other=lf2,1574on="date",1575by="id",1576check_sortedness=False,1577)1578expected = pl.DataFrame(1579{1580"id": pl.Series([1], dtype=dtype),1581"date": pl.Series([date(2025, 12, 31)], dtype=pl.Date),1582"value": pl.Series([2.5], dtype=pl.Float32),1583}1584)1585assert result.collect_schema() == expected.schema1586assert_frame_equal(result.collect(), expected)158715881589@pytest.mark.parametrize("by", [None, "one"])1590def test_join_asof_on_cast_expr_24999(by: str | None) -> None:1591q = pl.LazyFrame({"x": date(2025, 10, 13), "one": 1}).join_asof(1592pl.LazyFrame({"x_right": datetime(2025, 10, 10, 2), "one": 1}),1593left_on=pl.col("x").cast(pl.Datetime),1594right_on=pl.col("x_right"),1595tolerance=timedelta(days=100),1596by=by,1597)15981599expect = pl.DataFrame(1600{1601"x": date(2025, 10, 13),1602"one": 1,1603"x_right": datetime(2025, 10, 10, 2),1604}1605)16061607if by is None:1608expect = expect.with_columns(pl.col("one").alias("one_right"))16091610assert_frame_equal(q.collect(), expect)161116121613@pytest.mark.parametrize(1614("left", "right", "expected_right"),1615[1616([], [], []),1617([1], [2], [2]),1618([], [1, 2], []),1619([1, 2], [], [None, None]),1620([1, 2], [1, 2], [2, 1]),1621([1, 1], [1, 1], [None, None]),1622([1, 2, 2, 2, 3], [1, 2, 2, 2, 3], [2, 3, 3, 3, 2]),1623],1624)1625def test_join_asof_nearest_no_exact_matches_25468(1626left: list[int],1627right: list[int],1628expected_right: list[int],1629) -> None:1630df_left = pl.DataFrame({"a": left}, schema={"a": pl.Int32})1631df_right = pl.DataFrame({"a": right}, schema={"a": pl.Int32})1632expected_df = pl.DataFrame(1633{"a": left, "a_right": expected_right},1634schema={"a": pl.Int32, "a_right": pl.Int32},1635)1636result = df_left.join_asof(1637df_right, on="a", strategy="nearest", coalesce=False, allow_exact_matches=False1638)1639assert_frame_equal(result, expected_df)164016411642@pytest.mark.parametrize("strategy", ["backward", "nearest"])1643def test_join_asof_nans(strategy: AsofJoinStrategy) -> None:1644df_left = pl.LazyFrame(1645{1646"time": [0.0, 6.0, math.nan, math.nan],1647"value": [100, 101, 102, 103],1648}1649)1650df_right = pl.LazyFrame(1651{1652"time": [0.0, 5.0, 10.0, 42.0],1653"value": [100, 200, 300, 400],1654}1655)1656actual = df_left.join_asof(1657df_right,1658on="time",1659strategy=strategy,1660).collect()1661expected = pl.DataFrame(1662{1663"time": [0.0, 6.0, math.nan, math.nan],1664"value": [100, 101, 102, 103],1665"value_right": [100, 200, 400, 400],1666}1667)1668assert_frame_equal(actual, expected)166916701671