Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/predicate_queries.rs
6939 views
1
use super::*;
2
3
#[test]
4
#[cfg(feature = "parquet")]
5
fn test_multiple_roots() -> PolarsResult<()> {
6
let mut expr_arena = Arena::with_capacity(16);
7
let mut lp_arena = Arena::with_capacity(8);
8
9
let lf = scan_foods_parquet(false).select([col("calories").alias("bar")]);
10
11
// this produces a predicate with two root columns, this test if we can
12
// deal with multiple roots
13
let lf = lf.filter(col("bar").gt(lit(45i32)));
14
let lf = lf.filter(col("bar").lt(lit(110i32)));
15
16
// also check if all predicates are combined and pushed down
17
let root = lf.clone().optimize(&mut lp_arena, &mut expr_arena)?;
18
assert!(predicate_at_scan(lf));
19
// and that we don't have any filter node
20
assert!(
21
!lp_arena
22
.iter(root)
23
.any(|(_, lp)| matches!(lp, IR::Filter { .. }))
24
);
25
26
Ok(())
27
}
28
29
#[test]
30
#[cfg(all(feature = "is_in", feature = "strings", feature = "dtype-categorical"))]
31
fn test_issue_2472() -> PolarsResult<()> {
32
let df = df![
33
"group" => ["54360-2001-0-20020312-4-1"
34
,"39444-2020-0-20210418-4-1"
35
,"68398-2020-0-20201216-4-1"
36
,"30910-2020-0-20210223-4-1"
37
,"71060-2020-0-20210315-4-1"
38
,"47959-2020-0-20210305-4-1"
39
,"63212-2018-0-20181007-2-2"
40
,"61465-2018-0-20181018-2-2"
41
]
42
]?;
43
let base = df
44
.lazy()
45
.with_column(col("group").cast(DataType::from_categories(Categories::global())));
46
47
let extract = col("group")
48
.cast(DataType::String)
49
.str()
50
.extract(lit(r"(\d+-){4}(\w+)-"), 2)
51
.cast(DataType::Int32)
52
.alias("age");
53
let predicate = col("age").is_in(lit(Series::new("".into(), [2i32])), false);
54
55
let out = base
56
.clone()
57
.with_column(extract.clone())
58
.filter(predicate.clone())
59
.collect()?;
60
61
assert_eq!(out.shape(), (2, 2));
62
63
let out = base.select([extract]).filter(predicate).collect()?;
64
assert_eq!(out.shape(), (2, 1));
65
66
Ok(())
67
}
68
69
#[test]
70
fn test_pass_unrelated_apply() -> PolarsResult<()> {
71
// maps should not influence a predicate of a different column as maps should not depend on previous values
72
let df = fruits_cars();
73
74
let q = df
75
.lazy()
76
.with_column(col("A").map(
77
|s| Ok(s.is_null().into_column()),
78
|_, f| Ok(Field::new(f.name().clone(), DataType::Boolean)),
79
))
80
.filter(col("B").gt(lit(10i32)));
81
82
assert!(predicate_at_scan(q));
83
84
Ok(())
85
}
86
87
#[test]
88
fn filter_added_column_issue_2470() -> PolarsResult<()> {
89
let df = fruits_cars();
90
91
// the binary expression in the predicate lead to an incorrect pushdown because the rhs
92
// was not checked on the schema.
93
let out = df
94
.lazy()
95
.select([col("A"), lit(NULL).alias("foo")])
96
.filter(col("A").gt(lit(2i32)).and(col("foo").is_null()))
97
.collect()?;
98
assert_eq!(out.shape(), (3, 2));
99
100
Ok(())
101
}
102
103
#[test]
104
fn filter_blocked_by_map() -> PolarsResult<()> {
105
let df = fruits_cars();
106
107
let allowed = OptFlags::default() & !OptFlags::PREDICATE_PUSHDOWN;
108
let q = df
109
.lazy()
110
.map(Ok, allowed, None, None)
111
.filter(col("A").gt(lit(2i32)));
112
113
assert!(!predicate_at_scan(q.clone()));
114
let out = q.collect()?;
115
assert_eq!(out.shape(), (3, 4));
116
117
Ok(())
118
}
119
120
#[test]
121
#[cfg(all(feature = "temporal", feature = "strings"))]
122
fn test_strptime_block_predicate() -> PolarsResult<()> {
123
let df = df![
124
"date" => ["2021-01-01", "2021-01-02"]
125
]?;
126
127
let q = df
128
.lazy()
129
.with_column(col("date").str().to_date(StrptimeOptions {
130
..Default::default()
131
}))
132
.filter(
133
col("date").gt(NaiveDate::from_ymd_opt(2021, 1, 1)
134
.unwrap()
135
.and_hms_opt(0, 0, 0)
136
.unwrap()
137
.lit()),
138
);
139
140
assert!(!predicate_at_scan(q.clone()));
141
let df = q.collect()?;
142
assert_eq!(df.shape(), (1, 1));
143
144
Ok(())
145
}
146
147
#[test]
148
fn test_strict_cast_predicate_pushdown() -> PolarsResult<()> {
149
let df = df![
150
"a" => ["a", "b", "c"]
151
]?;
152
153
let lf = df
154
.lazy()
155
.with_column(col("a").cast(DataType::Int32))
156
.filter(col("a").is_null());
157
158
assert!(!predicate_at_scan(lf.clone()));
159
let out = lf.collect()?;
160
assert_eq!(out.shape(), (3, 1));
161
Ok(())
162
}
163
164
#[test]
165
fn test_filter_nulls_created_by_join() -> PolarsResult<()> {
166
// #2602
167
let a = df![
168
"key" => ["foo", "bar"],
169
"bar" => [1, 2]
170
]?;
171
172
let b = df![
173
"key"=> ["bar"]
174
]?
175
.lazy()
176
.with_column(lit(true).alias("flag"));
177
178
let out = a
179
.clone()
180
.lazy()
181
.join(b.clone(), [col("key")], [col("key")], JoinType::Left.into())
182
.filter(col("flag").is_null())
183
.collect()?;
184
let expected = df![
185
"key" => ["foo"],
186
"bar" => [1],
187
"flag" => &[None, Some(true)][0..1]
188
]?;
189
assert!(out.equals_missing(&expected));
190
191
let out = a
192
.lazy()
193
.join(b, [col("key")], [col("key")], JoinType::Left.into())
194
.filter(col("flag").is_null())
195
.with_predicate_pushdown(false)
196
.collect()?;
197
assert!(out.equals_missing(&expected));
198
199
Ok(())
200
}
201
202
#[test]
203
fn test_filter_null_creation_by_cast() -> PolarsResult<()> {
204
let df = df![
205
"int" => [1, 2, 3],
206
"empty" => ["", "", ""]
207
]?;
208
209
let out = df
210
.lazy()
211
.with_column(col("empty").cast(DataType::Int32).alias("empty"))
212
.filter(col("empty").is_null().and(col("int").eq(lit(3i32))))
213
.collect()?;
214
215
let expected = df![
216
"int" => [3],
217
"empty" => &[None, Some(1i32)][..1]
218
]?;
219
assert!(out.equals_missing(&expected));
220
221
Ok(())
222
}
223
224
#[test]
225
#[cfg(feature = "cse")]
226
fn test_predicate_on_join_suffix_4788() -> PolarsResult<()> {
227
let lf = df![
228
"x" => [1, 2],
229
"y" => [1, 1],
230
]?
231
.lazy();
232
233
let q = (lf.clone().join_builder().with(lf))
234
.left_on([col("y")])
235
.right_on([col("y")])
236
.suffix("_")
237
.finish()
238
.filter(col("x").eq(1))
239
.with_comm_subplan_elim(false);
240
241
// the left hand side should have a predicate
242
assert!(predicate_at_scan(q.clone()));
243
244
let expected = df![
245
"x" => [1, 1],
246
"y" => [1, 1],
247
"x_" => [1, 2],
248
]?;
249
assert_eq!(q.collect()?, expected);
250
251
Ok(())
252
}
253
254
#[test]
255
fn test_push_join_col_predicates_to_both_sides_7247() -> PolarsResult<()> {
256
let df1 = df! {
257
"a" => ["a1", "a2"],
258
"b" => ["b1", "b2"],
259
}?;
260
let df2 = df! {
261
"a" => ["a1", "a1", "a2"],
262
"b2" => ["b1", "b1", "b2"],
263
"c" => ["a1", "c", "a2"]
264
}?;
265
let df = df1.lazy().join(
266
df2.lazy(),
267
[col("a"), col("b")],
268
[col("a"), col("b2")],
269
JoinArgs::new(JoinType::Inner),
270
);
271
let q = df
272
.filter(col("a").eq(lit("a1")))
273
.filter(col("a").eq(col("c")));
274
275
predicate_at_all_scans(q.clone());
276
277
let out = q.collect()?;
278
let expected = df![
279
"a" => ["a1"],
280
"b" => ["b1"],
281
"c" => ["a1"],
282
]?;
283
assert_eq!(out, expected);
284
Ok(())
285
}
286
287
#[test]
288
#[cfg(feature = "semi_anti_join")]
289
fn test_push_join_col_predicates_to_both_sides_semi_12565() -> PolarsResult<()> {
290
let df1 = df! {
291
"a" => ["a1", "a2"],
292
"b" => ["b1", "b2"],
293
}?;
294
let df2 = df! {
295
"a" => ["a1", "a1", "a2"],
296
"b2" => ["b1", "b1", "b2"],
297
"c" => ["a1", "c", "a2"]
298
}?;
299
let df = df1.lazy().join(
300
df2.lazy(),
301
[col("a"), col("b")],
302
[col("a"), col("b2")],
303
JoinArgs::new(JoinType::Semi),
304
);
305
let q = df.filter(col("a").eq(lit("a1")));
306
307
predicate_at_all_scans(q.clone());
308
309
let out = q.collect()?;
310
let expected = df![
311
"a" => ["a1"],
312
"b" => ["b1"],
313
]?;
314
assert_eq!(out, expected);
315
Ok(())
316
}
317
318