Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/py-polars/tests/unit/test_cwc.py
6939 views
1
# Tests for the optimization pass cluster WITH_COLUMNS
2
3
import polars as pl
4
5
6
def test_basic_cwc() -> None:
7
df = (
8
pl.LazyFrame({"a": [1, 2]})
9
.with_columns(pl.col("a").alias("b") * 2)
10
.with_columns(pl.col("a").alias("c") * 3)
11
.with_columns(pl.col("a").alias("d") * 4)
12
)
13
14
assert (
15
"""[[(col("a")) * (2)].alias("b"), [(col("a")) * (3)].alias("c"), [(col("a")) * (4)].alias("d")]"""
16
in df.explain()
17
)
18
19
20
def test_disable_cwc() -> None:
21
df = (
22
pl.LazyFrame({"a": [1, 2]})
23
.with_columns(pl.col("a").alias("b") * 2)
24
.with_columns(pl.col("a").alias("c") * 3)
25
.with_columns(pl.col("a").alias("d") * 4)
26
)
27
28
explain = df.explain(optimizations=pl.QueryOptFlags(cluster_with_columns=False))
29
30
assert """[[(col("a")) * (2)].alias("b")]""" in explain
31
assert """[[(col("a")) * (3)].alias("c")]""" in explain
32
assert """[[(col("a")) * (4)].alias("d")]""" in explain
33
34
35
def test_refuse_with_deps() -> None:
36
df = (
37
pl.LazyFrame({"a": [1, 2]})
38
.with_columns(pl.col("a").alias("b") * 2)
39
.with_columns(pl.col("b").alias("c") * 3)
40
.with_columns(pl.col("c").alias("d") * 4)
41
)
42
43
explain = df.explain()
44
45
assert """[[(col("a")) * (2)].alias("b")]""" in explain
46
assert """[[(col("b")) * (3)].alias("c")]""" in explain
47
assert """[[(col("c")) * (4)].alias("d")]""" in explain
48
49
50
def test_partial_deps() -> None:
51
df = (
52
pl.LazyFrame({"a": [1, 2]})
53
.with_columns(pl.col("a").alias("b") * 2)
54
.with_columns(
55
pl.col("a").alias("c") * 3,
56
pl.col("b").alias("d") * 4,
57
pl.col("a").alias("e") * 5,
58
)
59
.with_columns(pl.col("b").alias("f") * 6)
60
)
61
62
explain = df.explain()
63
64
assert (
65
"""[[(col("b")) * (4)].alias("d"), [(col("b")) * (6)].alias("f")]""" in explain
66
)
67
assert (
68
"""[[(col("a")) * (2)].alias("b"), [(col("a")) * (3)].alias("c"), [(col("a")) * (5)].alias("e")]"""
69
in explain
70
)
71
72
73
def test_swap_remove() -> None:
74
df = (
75
pl.LazyFrame({"a": [1, 2]})
76
.with_columns(pl.col("a").alias("b") * 2)
77
.with_columns(
78
pl.col("b").alias("f") * 6,
79
pl.col("a").alias("c") * 3,
80
pl.col("b").alias("d") * 4,
81
pl.col("b").alias("e") * 5,
82
)
83
)
84
85
explain = df.explain()
86
assert df.collect().equals(
87
pl.DataFrame(
88
{
89
"a": [1, 2],
90
"b": [2, 4],
91
"f": [12, 24],
92
"c": [3, 6],
93
"d": [8, 16],
94
"e": [10, 20],
95
}
96
)
97
)
98
99
assert (
100
"""[[(col("b")) * (6)].alias("f"), [(col("b")) * (4)].alias("d"), [(col("b")) * (5)].alias("e")]"""
101
in explain
102
)
103
assert (
104
"""[[(col("a")) * (2)].alias("b"), [(col("a")) * (3)].alias("c")]""" in explain
105
)
106
assert """simple π""" in explain
107
108
109
def test_try_remove_simple_project() -> None:
110
df = (
111
pl.LazyFrame({"a": [1, 2]})
112
.with_columns(pl.col("a").alias("b") * 2)
113
.with_columns(pl.col("a").alias("d") * 4, pl.col("b").alias("c") * 3)
114
)
115
116
explain = df.explain()
117
118
assert (
119
"""[[(col("a")) * (2)].alias("b"), [(col("a")) * (4)].alias("d")]""" in explain
120
)
121
assert """[[(col("b")) * (3)].alias("c")]""" in explain
122
assert """simple π""" not in explain
123
124
df = (
125
pl.LazyFrame({"a": [1, 2]})
126
.with_columns(pl.col("a").alias("b") * 2)
127
.with_columns(pl.col("b").alias("c") * 3, pl.col("a").alias("d") * 4)
128
)
129
130
explain = df.explain()
131
132
assert (
133
"""[[(col("a")) * (2)].alias("b"), [(col("a")) * (4)].alias("d")]""" in explain
134
)
135
assert """[[(col("b")) * (3)].alias("c")]""" in explain
136
assert """simple π""" in explain
137
138
139
def test_cwc_with_internal_aliases() -> None:
140
df = (
141
pl.LazyFrame({"a": [1, 2], "b": [3, 4]})
142
.with_columns(pl.any_horizontal((pl.col("a") == 2).alias("b")).alias("c"))
143
.with_columns(pl.col("b").alias("d") * 3)
144
)
145
146
explain = df.explain()
147
148
assert (
149
"""[[(col("a")) == (2)].alias("c"), [(col("b")) * (3)].alias("d")]""" in explain
150
)
151
152
153
def test_read_of_pushed_column_16436() -> None:
154
df = pl.DataFrame(
155
{
156
"x": [1.12, 2.21, 4.2, 3.21],
157
"y": [2.11, 3.32, 2.1, 6.12],
158
}
159
)
160
161
df = (
162
df.lazy()
163
.with_columns((pl.col("y") / pl.col("x")).alias("z"))
164
.with_columns(
165
pl.when(pl.col("z").is_infinite()).then(0).otherwise(pl.col("z")).alias("z")
166
)
167
.fill_nan(0)
168
.collect()
169
)
170
171
172
def test_multiple_simple_projections_16435() -> None:
173
df = pl.DataFrame({"a": [1]}).lazy()
174
175
df = (
176
df.with_columns(b=pl.col("a"))
177
.with_columns(c=pl.col("b"))
178
.with_columns(l2a=pl.lit(2))
179
.with_columns(l2b=pl.col("l2a"))
180
.with_columns(m=pl.lit(3))
181
)
182
183
df.collect()
184
185
186
def test_reverse_order() -> None:
187
df = pl.LazyFrame({"a": [1], "b": [2]})
188
189
df = (
190
df.with_columns(a=pl.col("a"), b=pl.col("b"), c=pl.col("a") * pl.col("b"))
191
.with_columns(x=pl.col("a"), y=pl.col("b"))
192
.with_columns(b=pl.col("a"), a=pl.col("b"))
193
)
194
195
df.collect()
196
197
198
def test_realias_of_unread_column_16530() -> None:
199
df = (
200
pl.LazyFrame({"x": [True]})
201
.with_columns(x=pl.lit(False))
202
.with_columns(y=~pl.col("x"))
203
.with_columns(y=pl.lit(False))
204
)
205
206
explain = df.explain()
207
208
assert explain.count("WITH_COLUMNS") == 1
209
assert df.collect().equals(pl.DataFrame({"x": [False], "y": [False]}))
210
211
212
def test_realias_with_dependencies() -> None:
213
df = (
214
pl.LazyFrame({"x": [True]})
215
.with_columns(x=pl.lit(False))
216
.with_columns(y=~pl.col("x"))
217
.with_columns(y=pl.lit(False), z=pl.col("y") | True)
218
)
219
220
explain = df.explain()
221
222
assert explain.count("WITH_COLUMNS") == 3
223
assert df.collect().equals(pl.DataFrame({"x": [False], "y": [False], "z": [True]}))
224
225
226
def test_refuse_pushdown_with_aliases() -> None:
227
df = (
228
pl.LazyFrame({"x": [True]})
229
.with_columns(x=pl.lit(False))
230
.with_columns(y=pl.lit(True))
231
.with_columns(y=pl.lit(False), z=pl.col("y") | True)
232
)
233
234
explain = df.explain()
235
236
assert explain.count("WITH_COLUMNS") == 2
237
assert df.collect().equals(pl.DataFrame({"x": [False], "y": [False], "z": [True]}))
238
239
240
def test_neighbour_live_expr() -> None:
241
df = (
242
pl.LazyFrame({"x": [True]})
243
.with_columns(y=pl.lit(False))
244
.with_columns(x=pl.lit(False), z=pl.col("x") | False)
245
)
246
247
explain = df.explain()
248
249
assert explain.count("WITH_COLUMNS") == 1
250
assert df.collect().equals(pl.DataFrame({"x": [False], "y": [False], "z": [True]}))
251
252