Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/projection_queries.rs
8483 views
1
use polars_ops::frame::JoinCoalesce;
2
3
use super::*;
4
5
#[test]
6
fn test_join_suffix_and_drop() -> PolarsResult<()> {
7
let weight = df![
8
"id" => [1, 2, 3, 4, 5, 0],
9
"wgt" => [4.32, 5.23, 2.33, 23.399, 392.2, 0.0]
10
]?
11
.lazy();
12
13
let ped = df![
14
"id"=> [1, 2, 3, 4, 5],
15
"sireid"=> [0, 0, 1, 3, 3]
16
]?
17
.lazy();
18
19
let sumry = weight
20
.clone()
21
.filter(col("id").eq(lit(2i32)))
22
.inner_join(ped, "id", "id");
23
24
let out = sumry
25
.join_builder()
26
.with(weight)
27
.left_on([col("sireid")])
28
.right_on([col("id")])
29
.suffix("_sire")
30
.finish()
31
.drop(cols(["sireid"]))
32
.collect()?;
33
34
assert_eq!(out.shape(), (1, 3));
35
36
Ok(())
37
}
38
39
#[test]
40
#[cfg(feature = "cross_join")]
41
fn test_cross_join_pd() -> PolarsResult<()> {
42
let food = df![
43
"name"=> ["Omelette", "Fried Egg"],
44
"price" => [8, 5]
45
]?;
46
47
let drink = df![
48
"name" => ["Orange Juice", "Tea"],
49
"price" => [5, 4]
50
]?;
51
52
let q = food.lazy().cross_join(drink.lazy(), None).select([
53
col("name").alias("food"),
54
col("name_right").alias("beverage"),
55
(col("price") + col("price_right")).alias("total"),
56
]);
57
58
let out = q.collect()?;
59
let expected = df![
60
"food" => ["Omelette", "Omelette", "Fried Egg", "Fried Egg"],
61
"beverage" => ["Orange Juice", "Tea", "Orange Juice", "Tea"],
62
"total" => [13, 12, 10, 9]
63
]?;
64
65
assert!(out.equals(&expected));
66
Ok(())
67
}
68
69
#[test]
70
fn test_row_number_pd() -> PolarsResult<()> {
71
let df = df![
72
"x" => [1, 2, 3],
73
"y" => [3, 2, 1],
74
]?;
75
76
let df = df
77
.lazy()
78
.with_row_index("index", None)
79
.select([col("index"), col("x") * lit(3i32)])
80
.collect()?;
81
82
let expected = df![
83
"index" => [0 as IdxSize, 1, 2],
84
"x" => [3i32, 6, 9]
85
]?;
86
87
assert!(df.equals(&expected));
88
89
Ok(())
90
}
91
92
#[test]
93
#[cfg(feature = "cse")]
94
fn scan_join_same_file() -> PolarsResult<()> {
95
let lf = LazyCsvReader::new(PlRefPath::new(FOODS_CSV)).finish()?;
96
97
for cse in [true, false] {
98
let partial = lf.clone().select([col("category")]).limit(5);
99
let q = lf
100
.clone()
101
.join(
102
partial,
103
[col("category")],
104
[col("category")],
105
JoinType::Inner.into(),
106
)
107
.with_comm_subplan_elim(cse);
108
let out = q.collect()?;
109
assert_eq!(
110
out.get_column_names(),
111
&["category", "calories", "fats_g", "sugars_g"]
112
);
113
}
114
Ok(())
115
}
116
117
#[test]
118
#[cfg(all(feature = "regex", feature = "concat_str"))]
119
fn concat_str_regex_expansion() -> PolarsResult<()> {
120
let df = df![
121
"a"=> [1, 1, 1],
122
"b_a_1"=> ["a--", "", ""],
123
"b_a_2"=> ["", "b--", ""],
124
"b_a_3"=> ["", "", "c--"]
125
]?
126
.lazy();
127
let out = df
128
.select([concat_str([col(r"^b_a_\d$")], ";", false).alias("concatenated")])
129
.collect()?;
130
let s = out.column("concatenated")?;
131
assert_eq!(
132
s,
133
&Column::new("concatenated".into(), ["a--;;", ";b--;", ";;c--"])
134
);
135
136
Ok(())
137
}
138
139
#[test]
140
fn test_coalesce_toggle_projection_pushdown() -> PolarsResult<()> {
141
// Test that the optimizer toggle coalesce to true if the non-coalesced column isn't used.
142
let q1 = df!["a" => [1],
143
"b" => [2]
144
]?
145
.lazy();
146
147
let q2 = df!["a" => [1],
148
"c" => [2]
149
]?
150
.lazy();
151
152
let plan = q1
153
.join(
154
q2,
155
[col("a")],
156
[col("a")],
157
JoinArgs {
158
how: JoinType::Left,
159
coalesce: JoinCoalesce::KeepColumns,
160
..Default::default()
161
},
162
)
163
.select([col("a"), col("b")])
164
.to_alp_optimized()?;
165
166
let node = plan.lp_top;
167
let lp_arena = plan.lp_arena;
168
169
assert!(lp_arena.iter(node).all(|(_, plan)| match plan {
170
IR::Join { options, .. } => options.args.should_coalesce(),
171
_ => true,
172
}));
173
174
Ok(())
175
}
176
177
#[test]
178
fn test_select_hconcat_pushdown_non_strict_25263() -> PolarsResult<()> {
179
let df_a = df![
180
"a" => [1, 2, 2],
181
"b" => [4, 5, 6],
182
]?
183
.lazy();
184
185
let df_b = df![
186
"d" => [1, 2],
187
]?
188
.lazy();
189
190
// not strict: we read a single column from `df_a` to ensure that the concat output
191
// has the correct height
192
let lf = concat_lf_horizontal([df_a, df_b], Default::default())?.select([col("d")]);
193
let plan = lf.clone().to_alp_optimized()?;
194
195
let node = plan.lp_top;
196
let lp_arena = plan.lp_arena;
197
198
assert!(lp_arena.iter(node).all(|(_, plan)| match plan {
199
IR::DataFrameScan {
200
schema,
201
output_schema,
202
..
203
} => {
204
// make sure that for `df_a` we apply a projection pushdown to only read a single column
205
if schema.contains("a") {
206
assert_eq!(output_schema.as_ref().unwrap().len(), 1);
207
}
208
true
209
},
210
_ => true,
211
}));
212
213
let out = lf.collect()?;
214
assert_eq!(
215
out,
216
df![
217
"d" => [Some(1), Some(2), None]
218
]?
219
);
220
221
Ok(())
222
}
223
224
#[test]
225
fn test_select_hconcat_pushdown_strict_25263() -> PolarsResult<()> {
226
let df_a = df![
227
"a" => [1, 2],
228
"b" => [4, 5],
229
]?
230
.lazy();
231
232
let df_b = df![
233
"d" => [1, 2],
234
]?
235
.lazy();
236
237
// strict: we don't read any columns from `df_a`
238
let lf = concat_lf_horizontal(
239
[df_a, df_b],
240
HConcatOptions {
241
strict: true,
242
..Default::default()
243
},
244
)?
245
.select([col("d")]);
246
let plan = lf.clone().to_alp_optimized()?;
247
248
let node = plan.lp_top;
249
let lp_arena = plan.lp_arena;
250
251
assert!(lp_arena.iter(node).all(|(_, plan)| match plan {
252
IR::DataFrameScan { schema, .. } => {
253
// make sure that we don't read any columns from `df_a`
254
if schema.contains("a") {
255
panic!("should not have read any columns from `df_a`");
256
}
257
true
258
},
259
_ => true,
260
}));
261
262
let out = lf.collect()?;
263
assert_eq!(
264
out,
265
df![
266
"d" => [Some(1), Some(2)]
267
]?
268
);
269
270
Ok(())
271
}
272
273