Path: blob/main/py-polars/tests/unit/lazyframe/test_with_context.py
6939 views
from datetime import datetime12import pytest34import polars as pl5from polars.testing import assert_frame_equal678@pytest.mark.may_fail_cloud # reason: with_context9def test_with_context() -> None:10df_a = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "c", None]}).lazy()11df_b = pl.DataFrame({"c": ["foo", "ham"]})1213with pytest.deprecated_call():14result = df_a.with_context(df_b.lazy()).select(15pl.col("b") + pl.col("c").first()16)17assert result.collect().to_dict(as_series=False) == {"b": ["afoo", "cfoo", None]}1819with pytest.deprecated_call():20context = df_a.with_context(df_b.lazy())21with pytest.raises(pl.exceptions.ShapeError):22context.select("a", "c").collect()232425# https://github.com/pola-rs/polars/issues/586726@pytest.mark.may_fail_cloud # reason: with_context27def test_with_context_ignore_5867() -> None:28outer = pl.LazyFrame({"OtherCol": [1, 2, 3, 4]})29with pytest.deprecated_call():30lf = pl.LazyFrame(31{"Category": [1, 1, 2, 2], "Counts": [1, 2, 3, 4]}32).with_context(outer)3334result = lf.group_by("Category", maintain_order=True).agg(pl.col("Counts").sum())3536expected = pl.LazyFrame({"Category": [1, 2], "Counts": [3, 7]})37assert_frame_equal(result, expected)383940@pytest.mark.may_fail_cloud # reason: with_context41def test_predicate_pushdown_with_context_11014() -> None:42df1 = pl.LazyFrame(43{44"df1_c1": [1, 2, 3],45"df1_c2": [2, 3, 4],46}47)4849df2 = pl.LazyFrame(50{51"df2_c1": [2, 3, 4],52"df2_c2": [3, 4, 5],53}54)5556with pytest.deprecated_call():57out = (58df1.with_context(df2)59.filter(pl.col("df1_c1").is_in(pl.col("df2_c1")))60.collect(optimizations=pl.QueryOptFlags(predicate_pushdown=True))61)6263assert out.to_dict(as_series=False) == {"df1_c1": [2, 3], "df1_c2": [3, 4]}646566@pytest.mark.may_fail_cloud # reason: with_context67def test_no_cse_in_with_context() -> None:68df1 = pl.DataFrame(69{70"timestamp": [71datetime(2023, 1, 1, 0, 0),72datetime(2023, 5, 1, 0, 0),73datetime(2023, 10, 1, 0, 0),74],75"value": [2, 5, 9],76}77)78df2 = pl.DataFrame(79{80"date_start": [81datetime(2022, 12, 31, 0, 0),82datetime(2023, 1, 2, 0, 0),83],84"date_end": [85datetime(2023, 4, 30, 0, 0),86datetime(2023, 5, 5, 0, 0),87],88"label": [0, 1],89}90)9192with pytest.deprecated_call():93context = df1.lazy().with_context(df2.lazy())9495assert (96context.select(97pl.col("date_start", "label").gather(98pl.col("date_start").search_sorted(pl.col("timestamp")) - 199),100)101).collect().to_dict(as_series=False) == {102"date_start": [103datetime(2022, 12, 31, 0, 0),104datetime(2023, 1, 2, 0, 0),105datetime(2023, 1, 2, 0, 0),106],107"label": [0, 1, 1],108}109110111