Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/lazyframe/test_with_context.py
6939 views
1
from datetime import datetime
2
3
import pytest
4
5
import polars as pl
6
from polars.testing import assert_frame_equal
7
8
9
@pytest.mark.may_fail_cloud # reason: with_context
10
def test_with_context() -> None:
11
df_a = pl.DataFrame({"a": [1, 2, 3], "b": ["a", "c", None]}).lazy()
12
df_b = pl.DataFrame({"c": ["foo", "ham"]})
13
14
with pytest.deprecated_call():
15
result = df_a.with_context(df_b.lazy()).select(
16
pl.col("b") + pl.col("c").first()
17
)
18
assert result.collect().to_dict(as_series=False) == {"b": ["afoo", "cfoo", None]}
19
20
with pytest.deprecated_call():
21
context = df_a.with_context(df_b.lazy())
22
with pytest.raises(pl.exceptions.ShapeError):
23
context.select("a", "c").collect()
24
25
26
# https://github.com/pola-rs/polars/issues/5867
27
@pytest.mark.may_fail_cloud # reason: with_context
28
def test_with_context_ignore_5867() -> None:
29
outer = pl.LazyFrame({"OtherCol": [1, 2, 3, 4]})
30
with pytest.deprecated_call():
31
lf = pl.LazyFrame(
32
{"Category": [1, 1, 2, 2], "Counts": [1, 2, 3, 4]}
33
).with_context(outer)
34
35
result = lf.group_by("Category", maintain_order=True).agg(pl.col("Counts").sum())
36
37
expected = pl.LazyFrame({"Category": [1, 2], "Counts": [3, 7]})
38
assert_frame_equal(result, expected)
39
40
41
@pytest.mark.may_fail_cloud # reason: with_context
42
def test_predicate_pushdown_with_context_11014() -> None:
43
df1 = pl.LazyFrame(
44
{
45
"df1_c1": [1, 2, 3],
46
"df1_c2": [2, 3, 4],
47
}
48
)
49
50
df2 = pl.LazyFrame(
51
{
52
"df2_c1": [2, 3, 4],
53
"df2_c2": [3, 4, 5],
54
}
55
)
56
57
with pytest.deprecated_call():
58
out = (
59
df1.with_context(df2)
60
.filter(pl.col("df1_c1").is_in(pl.col("df2_c1")))
61
.collect(optimizations=pl.QueryOptFlags(predicate_pushdown=True))
62
)
63
64
assert out.to_dict(as_series=False) == {"df1_c1": [2, 3], "df1_c2": [3, 4]}
65
66
67
@pytest.mark.may_fail_cloud # reason: with_context
68
def test_no_cse_in_with_context() -> None:
69
df1 = pl.DataFrame(
70
{
71
"timestamp": [
72
datetime(2023, 1, 1, 0, 0),
73
datetime(2023, 5, 1, 0, 0),
74
datetime(2023, 10, 1, 0, 0),
75
],
76
"value": [2, 5, 9],
77
}
78
)
79
df2 = pl.DataFrame(
80
{
81
"date_start": [
82
datetime(2022, 12, 31, 0, 0),
83
datetime(2023, 1, 2, 0, 0),
84
],
85
"date_end": [
86
datetime(2023, 4, 30, 0, 0),
87
datetime(2023, 5, 5, 0, 0),
88
],
89
"label": [0, 1],
90
}
91
)
92
93
with pytest.deprecated_call():
94
context = df1.lazy().with_context(df2.lazy())
95
96
assert (
97
context.select(
98
pl.col("date_start", "label").gather(
99
pl.col("date_start").search_sorted(pl.col("timestamp")) - 1
100
),
101
)
102
).collect().to_dict(as_series=False) == {
103
"date_start": [
104
datetime(2022, 12, 31, 0, 0),
105
datetime(2023, 1, 2, 0, 0),
106
datetime(2023, 1, 2, 0, 0),
107
],
108
"label": [0, 1, 1],
109
}
110
111