Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/queries.rs
8446 views
1
#[cfg(feature = "diff")]
2
use polars_core::series::ops::NullBehavior;
3
4
use super::*;
5
6
#[test]
7
fn test_lazy_with_column() {
8
let df = get_df()
9
.lazy()
10
.with_column(lit(10).alias("foo"))
11
.collect()
12
.unwrap();
13
assert_eq!(df.width(), 6);
14
assert!(df.column("foo").is_ok());
15
}
16
17
#[test]
18
fn test_lazy_exec() {
19
let df = get_df();
20
let _new = df
21
.clone()
22
.lazy()
23
.select([col("sepal_width"), col("variety")])
24
.sort(["sepal_width"], Default::default())
25
.collect();
26
27
let new = df
28
.lazy()
29
.filter(not(col("sepal_width").lt(lit(3.5))))
30
.collect()
31
.unwrap();
32
33
let check = new.column("sepal_width").unwrap().f64().unwrap().gt(3.4);
34
assert!(check.all())
35
}
36
37
#[test]
38
fn test_lazy_alias() {
39
let df = get_df();
40
let new = df
41
.lazy()
42
.select([col("sepal_width").alias("petals"), col("sepal_width")])
43
.collect()
44
.unwrap();
45
assert_eq!(new.get_column_names(), &["petals", "sepal_width"]);
46
}
47
48
#[test]
49
#[cfg(feature = "pivot")]
50
fn test_lazy_unpivot() {
51
let df = get_df();
52
53
let args = UnpivotArgsDSL {
54
on: Some(by_name(["sepal_length", "sepal_width"], true, false)),
55
index: by_name(["petal_width", "petal_length"], true, false),
56
variable_name: None,
57
value_name: None,
58
};
59
60
let out = df
61
.lazy()
62
.unpivot(args)
63
.filter(col("variable").eq(lit("sepal_length")))
64
.select([col("variable"), col("petal_width"), col("value")])
65
.collect()
66
.unwrap();
67
assert_eq!(out.shape(), (7, 3));
68
}
69
70
#[test]
71
fn test_lazy_drop_nulls() {
72
let df = df! {
73
"foo" => &[Some(1), None, Some(3)],
74
"bar" => &[Some(1), Some(2), None]
75
}
76
.unwrap();
77
78
let new = df.lazy().drop_nulls(None).collect().unwrap();
79
let out = df! {
80
"foo" => &[Some(1)],
81
"bar" => &[Some(1)]
82
}
83
.unwrap();
84
assert!(new.equals(&out));
85
}
86
87
#[test]
88
fn test_lazy_udf() {
89
let df = get_df();
90
let new = df
91
.lazy()
92
.select([col("sepal_width").map(|s| Ok(s * 200.0), |_, f| Ok(f.clone()))])
93
.collect()
94
.unwrap();
95
assert_eq!(
96
new.column("sepal_width").unwrap().f64().unwrap().get(0),
97
Some(700.0)
98
);
99
}
100
101
#[test]
102
fn test_lazy_is_null() {
103
let df = get_df();
104
let new = df
105
.clone()
106
.lazy()
107
.filter(col("sepal_width").is_null())
108
.collect()
109
.unwrap();
110
111
assert_eq!(new.height(), 0);
112
113
let new = df
114
.clone()
115
.lazy()
116
.filter(col("sepal_width").is_not_null())
117
.collect()
118
.unwrap();
119
assert_eq!(new.height(), df.height());
120
121
let new = df
122
.lazy()
123
.group_by([col("variety")])
124
.agg([col("sepal_width").min()])
125
.collect()
126
.unwrap();
127
128
assert_eq!(new.shape(), (1, 2));
129
}
130
131
#[test]
132
fn test_lazy_pushdown_through_agg() {
133
// An aggregation changes the schema names, check if the pushdown succeeds.
134
let df = get_df();
135
let new = df
136
.lazy()
137
.group_by([col("variety")])
138
.agg([
139
col("sepal_length").min(),
140
col("petal_length").min().alias("foo"),
141
])
142
.select([col("foo")])
143
// second selection is to test if optimizer can handle that
144
.select([col("foo").alias("bar")])
145
.collect()
146
.unwrap();
147
148
assert_eq!(new.shape(), (1, 1));
149
let bar = new.column("bar").unwrap();
150
assert_eq!(bar.get(0).unwrap(), AnyValue::Float64(1.3));
151
}
152
153
#[test]
154
fn test_lazy_shift() {
155
let df = get_df();
156
let new = df
157
.lazy()
158
.select([col("sepal_width").alias("foo").shift(lit(2))])
159
.collect()
160
.unwrap();
161
assert_eq!(new.column("foo").unwrap().f64().unwrap().get(0), None);
162
}
163
164
#[test]
165
fn test_shift_and_fill() -> PolarsResult<()> {
166
let out = df![
167
"a" => [1, 2, 3]
168
]?
169
.lazy()
170
.select([col("a").shift_and_fill(lit(-1), lit(5))])
171
.collect()?;
172
173
let out = out.column("a")?;
174
assert_eq!(Vec::from(out.i32()?), &[Some(2), Some(3), Some(5)]);
175
Ok(())
176
}
177
178
#[test]
179
fn test_shift_and_fill_non_numeric() -> PolarsResult<()> {
180
let out = df![
181
"bool" => [true, false, true],
182
]?
183
.lazy()
184
.select([col("bool").shift_and_fill(1, true)])
185
.collect()?;
186
187
let out = out.column("bool")?;
188
assert_eq!(
189
Vec::from(out.bool()?),
190
&[Some(true), Some(true), Some(false)]
191
);
192
Ok(())
193
}
194
195
#[test]
196
fn test_lazy_ternary_and_predicates() {
197
let df = get_df();
198
// test if this runs. This failed because is_not_null changes the schema name, so we
199
// really need to check the root column
200
let ldf = df
201
.clone()
202
.lazy()
203
.with_column(lit(3).alias("foo"))
204
.filter(col("foo").is_not_null());
205
let _new = ldf.collect().unwrap();
206
207
let ldf = df
208
.lazy()
209
.with_column(
210
when(col("sepal_length").lt(lit(5.0)))
211
.then(
212
lit(3), // is another type on purpose to check type coercion
213
)
214
.otherwise(col("sepal_width"))
215
.alias("foo"),
216
)
217
.filter(col("foo").gt(lit(3.0)));
218
219
let new = ldf.collect().unwrap();
220
let length = new.column("sepal_length").unwrap();
221
assert_eq!(
222
length,
223
&Column::new("sepal_length".into(), &[5.1f64, 5.0, 5.4])
224
);
225
assert_eq!(new.shape(), (3, 6));
226
}
227
228
#[test]
229
fn test_lazy_binary_ops() {
230
let df = df!("a" => &[1, 2, 3, 4, 5, ]).unwrap();
231
let new = df
232
.lazy()
233
.select([col("a").eq(lit(2)).alias("foo")])
234
.collect()
235
.unwrap();
236
assert_eq!(
237
new.column("foo")
238
.unwrap()
239
.as_materialized_series()
240
.sum::<i32>()
241
.unwrap(),
242
1
243
);
244
}
245
246
#[test]
247
fn test_lazy_query_2() {
248
let df = load_df();
249
let ldf = df
250
.lazy()
251
.with_column(col("a").map(|s| Ok(s * 2), |_, f| Ok(f.clone())))
252
.filter(col("a").lt(lit(2)))
253
.select([col("b"), col("a")]);
254
255
let new = ldf.collect().unwrap();
256
assert_eq!(new.shape(), (0, 2));
257
}
258
259
#[test]
260
#[cfg(feature = "csv")]
261
fn test_lazy_query_3() {
262
// query checks if schema of scanning is not changed by aggregation
263
let _ = scan_foods_csv()
264
.group_by([col("calories")])
265
.agg([col("fats_g").max()])
266
.collect()
267
.unwrap();
268
}
269
270
#[test]
271
fn test_lazy_query_4() -> PolarsResult<()> {
272
let df = df! {
273
"uid" => [0, 0, 0, 1, 1, 1],
274
"day" => [1, 2, 3, 1, 2, 3],
275
"cumcases" => [10, 12, 15, 25, 30, 41]
276
}
277
.unwrap();
278
279
let base_df = df.lazy();
280
281
let out = base_df
282
.clone()
283
.group_by([col("uid")])
284
.agg([
285
col("day").alias("day"),
286
col("cumcases")
287
.apply(|s: Column| &s - &(s.shift(1)), |_, f| Ok(f.clone()))
288
.alias("diff_cases"),
289
])
290
.explode(
291
by_name(["day", "diff_cases"], true, false),
292
ExplodeOptions {
293
empty_as_null: true,
294
keep_nulls: true,
295
},
296
)
297
.join(
298
base_df,
299
[col("uid"), col("day")],
300
[col("uid"), col("day")],
301
JoinType::Inner.into(),
302
)
303
.collect()
304
.unwrap();
305
assert_eq!(
306
Vec::from(out.column("diff_cases").unwrap().i32().unwrap()),
307
&[None, Some(2), Some(3), None, Some(5), Some(11)]
308
);
309
310
Ok(())
311
}
312
313
#[test]
314
fn test_lazy_query_5() {
315
// if this one fails, the list builder probably does not handle offsets
316
let df = df! {
317
"uid" => [0, 0, 0, 1, 1, 1],
318
"day" => [1, 2, 4, 1, 2, 3],
319
"cumcases" => [10, 12, 15, 25, 30, 41]
320
}
321
.unwrap();
322
323
let out = df
324
.lazy()
325
.group_by([col("uid")])
326
.agg([col("day").head(Some(2))])
327
.collect()
328
.unwrap();
329
let s = out
330
.select_at_idx(1)
331
.unwrap()
332
.list()
333
.unwrap()
334
.get_as_series(0)
335
.unwrap();
336
assert_eq!(s.len(), 2);
337
let s = out
338
.select_at_idx(1)
339
.unwrap()
340
.list()
341
.unwrap()
342
.get_as_series(0)
343
.unwrap();
344
assert_eq!(s.len(), 2);
345
}
346
347
#[test]
348
#[cfg(feature = "is_in")]
349
fn test_lazy_query_8() -> PolarsResult<()> {
350
// https://github.com/pola-rs/polars/issues/842
351
let df = df![
352
"A" => [1, 2, 3],
353
"B" => [1, 2, 3],
354
"C" => [1, 2, 3],
355
"D" => [1, 2, 3],
356
"E" => [1, 2, 3]
357
]?;
358
359
let mut selection = vec![];
360
361
for &c in &["A", "B", "C", "D", "E"] {
362
let e = when(col(c).is_in(col("E"), false))
363
.then(col("A"))
364
.otherwise(Null {}.lit())
365
.alias(c);
366
selection.push(e);
367
}
368
369
let out = df
370
.lazy()
371
.select(selection)
372
.filter(col("D").gt(lit(1)))
373
.collect()?;
374
assert_eq!(out.shape(), (2, 5));
375
Ok(())
376
}
377
378
#[test]
379
fn test_lazy_query_9() -> PolarsResult<()> {
380
// https://github.com/pola-rs/polars/issues/958
381
let cities = df![
382
"Cities.City"=> ["Moscow", "Berlin", "Paris","Hamburg", "Lyon", "Novosibirsk"],
383
"Cities.Population"=> [11.92, 3.645, 2.161, 1.841, 0.513, 1.511],
384
"Cities.Country"=> ["Russia", "Germany", "France", "Germany", "France", "Russia"]
385
]?;
386
387
let sales = df![
388
"Sales.City"=> ["Moscow", "Berlin", "Paris", "Moscow", "Berlin", "Paris", "Moscow", "Berlin", "Paris"],
389
"Sales.Item"=> ["Item A", "Item A","Item A",
390
"Item B", "Item B","Item B",
391
"Item C", "Item C","Item C"],
392
"Sales.Amount"=> [200, 180, 100,
393
3, 30, 20,
394
90, 130, 125]
395
]?;
396
397
let out = sales
398
.lazy()
399
.join(
400
cities.lazy(),
401
[col("Sales.City")],
402
[col("Cities.City")],
403
JoinType::Inner.into(),
404
)
405
.group_by([col("Cities.Country")])
406
.agg([col("Sales.Amount").sum().alias("sum")])
407
.sort(["sum"], Default::default())
408
.collect()?;
409
let vals = out
410
.column("sum")?
411
.i32()?
412
.into_no_null_iter()
413
.collect::<Vec<_>>();
414
assert_eq!(vals, &[245, 293, 340]);
415
Ok(())
416
}
417
418
#[test]
419
#[cfg(all(
420
feature = "temporal",
421
feature = "dtype-datetime",
422
feature = "dtype-date",
423
feature = "dtype-duration"
424
))]
425
fn test_lazy_query_10() {
426
use chrono::Duration as ChronoDuration;
427
let date = NaiveDate::from_ymd_opt(2021, 3, 5).unwrap();
428
let x = DatetimeChunked::from_naive_datetime(
429
"x".into(),
430
[
431
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 0, 0).unwrap()),
432
NaiveDateTime::new(date, NaiveTime::from_hms_opt(13, 0, 0).unwrap()),
433
NaiveDateTime::new(date, NaiveTime::from_hms_opt(14, 0, 0).unwrap()),
434
],
435
TimeUnit::Nanoseconds,
436
)
437
.into_column();
438
let y = DatetimeChunked::from_naive_datetime(
439
"y".into(),
440
[
441
NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()),
442
NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()),
443
NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()),
444
],
445
TimeUnit::Nanoseconds,
446
)
447
.into_column();
448
let df = DataFrame::new_infer_height(vec![x, y]).unwrap();
449
let out = df
450
.lazy()
451
.select(&[(col("x") - col("y")).alias("z")])
452
.collect()
453
.unwrap();
454
let z = DurationChunked::from_duration(
455
"z".into(),
456
[
457
ChronoDuration::try_hours(1).unwrap(),
458
ChronoDuration::try_hours(2).unwrap(),
459
ChronoDuration::try_hours(3).unwrap(),
460
],
461
TimeUnit::Nanoseconds,
462
)
463
.into_column();
464
assert!(out.column("z").unwrap().equals(&z));
465
let x = DatetimeChunked::from_naive_datetime(
466
"x".into(),
467
[
468
NaiveDateTime::new(date, NaiveTime::from_hms_opt(2, 0, 0).unwrap()),
469
NaiveDateTime::new(date, NaiveTime::from_hms_opt(3, 0, 0).unwrap()),
470
NaiveDateTime::new(date, NaiveTime::from_hms_opt(4, 0, 0).unwrap()),
471
],
472
TimeUnit::Milliseconds,
473
)
474
.into_column();
475
let y = DatetimeChunked::from_naive_datetime(
476
"y".into(),
477
[
478
NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()),
479
NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()),
480
NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()),
481
],
482
TimeUnit::Nanoseconds,
483
)
484
.into_column();
485
let df = DataFrame::new_infer_height(vec![x, y]).unwrap();
486
let out = df
487
.lazy()
488
.select(&[(col("x") - col("y")).alias("z")])
489
.collect()
490
.unwrap();
491
assert!(
492
out.column("z")
493
.unwrap()
494
.equals(&z.cast(&DataType::Duration(TimeUnit::Milliseconds)).unwrap())
495
);
496
}
497
498
#[test]
499
#[cfg(all(
500
feature = "temporal",
501
feature = "dtype-date",
502
feature = "dtype-datetime"
503
))]
504
fn test_lazy_query_7() {
505
let date = NaiveDate::from_ymd_opt(2021, 3, 5).unwrap();
506
let dates = [
507
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 0, 0).unwrap()),
508
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 1, 0).unwrap()),
509
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 2, 0).unwrap()),
510
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 3, 0).unwrap()),
511
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 4, 0).unwrap()),
512
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 5, 0).unwrap()),
513
];
514
let data = vec![Some(1.), Some(2.), Some(3.), Some(4.), None, None];
515
let df = DataFrame::new_infer_height(vec![
516
DatetimeChunked::from_naive_datetime("date".into(), dates, TimeUnit::Nanoseconds)
517
.into_column(),
518
Column::new("data".into(), data),
519
])
520
.unwrap();
521
// this tests if predicate pushdown not interferes with the shift data.
522
let out = df
523
.lazy()
524
.with_column(col("data").shift(lit(-1)).alias("output"))
525
.with_column(col("output").shift(lit(2)).alias("shifted"))
526
.filter(col("date").gt(lit(NaiveDateTime::new(
527
date,
528
NaiveTime::from_hms_opt(12, 2, 0).unwrap(),
529
))))
530
.collect()
531
.unwrap();
532
let a = out
533
.column("shifted")
534
.unwrap()
535
.as_materialized_series()
536
.sum::<f64>()
537
.unwrap()
538
- 7.0;
539
assert!(a < 0.01 && a > -0.01);
540
}
541
542
#[test]
543
fn test_lazy_shift_and_fill_all() {
544
let data = &[1, 2, 3];
545
let df = DataFrame::new_infer_height(vec![Column::new("data".into(), data)]).unwrap();
546
let out = df
547
.lazy()
548
.with_column(col("data").shift(lit(1)).fill_null(lit(0)).alias("output"))
549
.collect()
550
.unwrap();
551
assert_eq!(
552
Vec::from(out.column("output").unwrap().i32().unwrap()),
553
vec![Some(0), Some(1), Some(2)]
554
);
555
}
556
557
#[test]
558
fn test_lazy_shift_operation_no_filter() {
559
// check if predicate pushdown optimization does not fail
560
let df = df! {
561
"a" => &[1, 2, 3],
562
"b" => &[1, 2, 3]
563
}
564
.unwrap();
565
df.lazy()
566
.with_column(col("b").shift(lit(1)).alias("output"))
567
.collect()
568
.unwrap();
569
}
570
571
#[test]
572
fn test_simplify_expr() {
573
// Test if expression containing literals is simplified
574
let df = get_df();
575
576
let plan = df
577
.lazy()
578
.select(&[lit(1.0) + lit(1.0) + col("sepal_width")])
579
.logical_plan;
580
581
let mut expr_arena = Arena::new();
582
let mut lp_arena = Arena::new();
583
584
#[allow(const_item_mutation)]
585
let lp_top = to_alp(
586
plan,
587
&mut expr_arena,
588
&mut lp_arena,
589
&mut OptFlags::SIMPLIFY_EXPR,
590
)
591
.unwrap();
592
593
assert!(matches!(
594
lp_arena.get(lp_top),
595
IR::Select { expr, .. } if matches!(expr_arena.get(expr[0].node()), AExpr::BinaryExpr{ left, ..} if matches!(expr_arena.get(*left), &AExpr::Literal(LiteralValue::Dyn(DynLiteralValue::Float(2.0)))))
596
));
597
}
598
599
#[test]
600
fn test_lazy_wildcard() {
601
let df = load_df();
602
let new = df.clone().lazy().select([col("*")]).collect().unwrap();
603
assert_eq!(new.shape(), (5, 3));
604
605
let new = df
606
.lazy()
607
.group_by([col("b")])
608
.agg([
609
col("*").sum().name().suffix(""),
610
col("*").first().name().suffix("_first"),
611
])
612
.collect()
613
.unwrap();
614
assert_eq!(new.shape(), (3, 5)); // Should exclude b from wildcard aggregations.
615
}
616
617
#[test]
618
fn test_lazy_reverse() {
619
let df = load_df();
620
assert!(
621
df.clone()
622
.lazy()
623
.reverse()
624
.collect()
625
.unwrap()
626
.equals_missing(&df.reverse())
627
)
628
}
629
630
#[test]
631
fn test_lazy_fill_null() {
632
let df = df! {
633
"a" => &[None, Some(2.0)],
634
"b" => &[Some(1.0), None]
635
}
636
.unwrap();
637
let out = df.lazy().fill_null(lit(10.0)).collect().unwrap();
638
let correct = df! {
639
"a" => &[Some(10.0), Some(2.0)],
640
"b" => &[Some(1.0), Some(10.0)]
641
}
642
.unwrap();
643
assert!(out.equals(&correct));
644
assert_eq!(out.get_column_names(), vec!["a", "b"])
645
}
646
647
#[test]
648
fn test_lazy_double_projection() {
649
let df = df! {
650
"foo" => &[1, 2, 3]
651
}
652
.unwrap();
653
df.lazy()
654
.select([col("foo").alias("bar")])
655
.select([col("bar")])
656
.collect()
657
.unwrap();
658
}
659
660
#[test]
661
fn test_type_coercion() {
662
let df = df! {
663
"foo" => &[1, 2, 3],
664
"bar" => &[1.0, 2.0, 3.0]
665
}
666
.unwrap();
667
668
let lp = df.lazy().select([col("foo") * col("bar")]).logical_plan;
669
670
let mut expr_arena = Arena::new();
671
let mut lp_arena = Arena::new();
672
let lp_top = to_alp(lp, &mut expr_arena, &mut lp_arena, &mut OptFlags::default()).unwrap();
673
674
if let IR::Select { expr, .. } = lp_arena.get(lp_top) {
675
if let AExpr::BinaryExpr { left, right, .. } = expr_arena.get(expr[0].node()) {
676
assert!(matches!(expr_arena.get(*left), AExpr::Cast { .. }));
677
// bar is already float, does not have to be coerced
678
assert!(matches!(expr_arena.get(*right), AExpr::Column { .. }));
679
} else {
680
panic!()
681
}
682
};
683
}
684
685
#[test]
686
#[cfg(feature = "csv")]
687
fn test_lazy_partition_agg() {
688
let df = df! {
689
"foo" => &[1, 1, 2, 2, 3],
690
"bar" => &[1.0, 1.0, 2.0, 2.0, 3.0]
691
}
692
.unwrap();
693
694
let out = df
695
.lazy()
696
.group_by([col("foo")])
697
.agg([col("bar").mean()])
698
.sort(["foo"], Default::default())
699
.collect()
700
.unwrap();
701
702
assert_eq!(
703
Vec::from(out.column("bar").unwrap().f64().unwrap()),
704
&[Some(1.0), Some(2.0), Some(3.0)]
705
);
706
707
let out = scan_foods_csv()
708
.group_by([col("category")])
709
.agg([col("calories")])
710
.sort(["category"], Default::default())
711
.collect()
712
.unwrap();
713
let cat_agg_list = out.select_at_idx(1).unwrap();
714
let fruit_series = cat_agg_list.list().unwrap().get_as_series(0).unwrap();
715
let fruit_list = fruit_series.i64().unwrap();
716
assert_eq!(
717
Vec::from(fruit_list),
718
&[
719
Some(60),
720
Some(30),
721
Some(50),
722
Some(30),
723
Some(60),
724
Some(130),
725
Some(50),
726
]
727
)
728
}
729
730
#[test]
731
fn test_lazy_group_by_apply() {
732
let df = fruits_cars();
733
734
df.lazy()
735
.group_by([col("fruits")])
736
.agg([col("cars").apply(
737
|s: Column| Ok(Column::new("".into(), &[s.len() as u32])),
738
|_, f| Ok(Field::new(f.name().clone(), DataType::UInt32)),
739
)])
740
.collect()
741
.unwrap();
742
}
743
744
#[test]
745
fn test_lazy_shift_and_fill() {
746
let df = df! {
747
"A" => &[1, 2, 3, 4, 5],
748
"B" => &[5, 4, 3, 2, 1]
749
}
750
.unwrap();
751
let out = df
752
.clone()
753
.lazy()
754
.with_column(col("A").shift_and_fill(lit(2), col("B").mean()))
755
.collect()
756
.unwrap();
757
assert_eq!(out.column("A").unwrap().null_count(), 0);
758
759
// shift from the other side
760
let out = df
761
.clone()
762
.lazy()
763
.with_column(col("A").shift_and_fill(lit(-2), col("B").mean()))
764
.collect()
765
.unwrap();
766
assert_eq!(out.column("A").unwrap().null_count(), 0);
767
768
let out = df
769
.lazy()
770
.shift_and_fill(lit(-1), col("B").std(1))
771
.collect()
772
.unwrap();
773
assert_eq!(out.column("A").unwrap().null_count(), 0);
774
}
775
776
#[test]
777
fn test_lazy_group_by() {
778
let df = df! {
779
"a" => &[Some(1.0), None, Some(3.0), Some(4.0), Some(5.0)],
780
"groups" => &["a", "a", "b", "c", "c"]
781
}
782
.unwrap();
783
784
let out = df
785
.lazy()
786
.group_by([col("groups")])
787
.agg([col("a").mean()])
788
.sort(["a"], Default::default())
789
.collect()
790
.unwrap();
791
792
assert_eq!(out.column("a").unwrap().f64().unwrap().get(0), Some(1.0));
793
}
794
795
#[test]
796
fn test_lazy_tail() {
797
let df = df! {
798
"A" => &[1, 2, 3, 4, 5],
799
"B" => &[5, 4, 3, 2, 1]
800
}
801
.unwrap();
802
803
let _out = df.lazy().tail(3).collect().unwrap();
804
}
805
806
#[test]
807
fn test_lazy_group_by_sort() {
808
let df = df! {
809
"a" => ["a", "b", "a", "b", "b", "c"],
810
"b" => [1, 2, 3, 4, 5, 6]
811
}
812
.unwrap();
813
814
let out = df
815
.clone()
816
.lazy()
817
.group_by([col("a")])
818
.agg([col("b").sort(Default::default()).first()])
819
.collect()
820
.unwrap()
821
.sort(["a"], Default::default())
822
.unwrap();
823
824
assert_eq!(
825
Vec::from(out.column("b").unwrap().i32().unwrap()),
826
[Some(1), Some(2), Some(6)]
827
);
828
829
let out = df
830
.lazy()
831
.group_by([col("a")])
832
.agg([col("b").sort(Default::default()).last()])
833
.collect()
834
.unwrap()
835
.sort(["a"], Default::default())
836
.unwrap();
837
838
assert_eq!(
839
Vec::from(out.column("b").unwrap().i32().unwrap()),
840
[Some(3), Some(5), Some(6)]
841
);
842
}
843
844
#[test]
845
fn test_lazy_group_by_sort_by() {
846
let df = df! {
847
"a" => ["a", "a", "a", "b", "b", "c"],
848
"b" => [1, 2, 3, 4, 5, 6],
849
"c" => [6, 1, 4, 3, 2, 1]
850
}
851
.unwrap();
852
853
let out = df
854
.lazy()
855
.group_by([col("a")])
856
.agg([col("b")
857
.sort_by(
858
[col("c")],
859
SortMultipleOptions::default().with_order_descending(true),
860
)
861
.first()])
862
.collect()
863
.unwrap()
864
.sort(["a"], Default::default())
865
.unwrap();
866
867
assert_eq!(
868
Vec::from(out.column("b").unwrap().i32().unwrap()),
869
[Some(1), Some(4), Some(6)]
870
);
871
}
872
873
#[test]
874
#[cfg(feature = "dtype-datetime")]
875
fn test_lazy_group_by_cast() {
876
let df = df! {
877
"a" => ["a", "a", "a", "b", "b", "c"],
878
"b" => [1, 2, 3, 4, 5, 6]
879
}
880
.unwrap();
881
882
// test if it runs in group_by context
883
let _out = df
884
.lazy()
885
.group_by([col("a")])
886
.agg([col("b")
887
.mean()
888
.cast(DataType::Datetime(TimeUnit::Nanoseconds, None))])
889
.collect()
890
.unwrap();
891
}
892
893
#[test]
894
fn test_lazy_group_by_binary_expr() {
895
let df = df! {
896
"a" => ["a", "a", "a", "b", "b", "c"],
897
"b" => [1, 2, 3, 4, 5, 6]
898
}
899
.unwrap();
900
901
// test if it runs in group_by context
902
let out = df
903
.lazy()
904
.group_by([col("a")])
905
.agg([col("b").mean() * lit(2)])
906
.sort(["a"], Default::default())
907
.collect()
908
.unwrap();
909
assert_eq!(
910
Vec::from(out.column("b").unwrap().f64().unwrap()),
911
[Some(4.0), Some(9.0), Some(12.0)]
912
);
913
}
914
915
#[test]
916
fn test_lazy_group_by_filter() -> PolarsResult<()> {
917
let df = df! {
918
"a" => ["a", "a", "a", "b", "b", "c"],
919
"b" => [1, 2, 3, 4, 5, 6]
920
}?;
921
922
// We test if the filters work in the group_by context
923
// and that the aggregations can deal with empty sets
924
925
let out = df
926
.lazy()
927
.group_by([col("a")])
928
.agg([
929
col("b").filter(col("a").eq(lit("a"))).sum().alias("b_sum"),
930
col("b")
931
.filter(col("a").eq(lit("a")))
932
.first()
933
.alias("b_first"),
934
col("b")
935
.filter(col("a").eq(lit("e")))
936
.mean()
937
.alias("b_mean"),
938
col("b")
939
.filter(col("a").eq(lit("a")))
940
.last()
941
.alias("b_last"),
942
])
943
.sort(["a"], SortMultipleOptions::default())
944
.collect()?;
945
946
assert_eq!(
947
Vec::from(out.column("b_sum").unwrap().i32().unwrap()),
948
[Some(6), Some(0), Some(0)]
949
);
950
assert_eq!(
951
Vec::from(out.column("b_first").unwrap().i32().unwrap()),
952
[Some(1), None, None]
953
);
954
assert_eq!(
955
Vec::from(out.column("b_mean").unwrap().f64().unwrap()),
956
[None, None, None]
957
);
958
assert_eq!(
959
Vec::from(out.column("b_last").unwrap().i32().unwrap()),
960
[Some(3), None, None]
961
);
962
963
Ok(())
964
}
965
966
#[test]
967
fn test_group_by_projection_pd_same_column() -> PolarsResult<()> {
968
// this query failed when projection pushdown was enabled
969
970
let a = || {
971
let df = df![
972
"col1" => ["a", "ab", "abc"],
973
"col2" => [1, 2, 3]
974
]
975
.unwrap();
976
977
df.lazy()
978
.select([col("col1").alias("foo"), col("col2").alias("bar")])
979
};
980
981
let out = a()
982
.left_join(a(), col("foo"), col("foo"))
983
.select([col("bar")])
984
.collect()?;
985
986
let a = out.column("bar")?.i32()?;
987
assert_eq!(Vec::from(a), &[Some(1), Some(2), Some(3)]);
988
989
Ok(())
990
}
991
992
#[test]
993
fn test_group_by_sort_slice() -> PolarsResult<()> {
994
let df = df![
995
"groups" => [1, 2, 2, 3, 3, 3],
996
"vals" => [1, 5, 6, 3, 9, 8]
997
]?;
998
// get largest two values per groups
999
1000
// expected:
1001
// group values
1002
// 1 1
1003
// 2 6, 5
1004
// 3 9, 8
1005
1006
let out1 = df
1007
.clone()
1008
.lazy()
1009
.sort(
1010
["vals"],
1011
SortMultipleOptions::default().with_order_descending(true),
1012
)
1013
.group_by([col("groups")])
1014
.agg([col("vals").head(Some(2)).alias("foo")])
1015
.sort(["groups"], Default::default())
1016
.collect()?;
1017
1018
let out2 = df
1019
.lazy()
1020
.group_by([col("groups")])
1021
.agg([col("vals")
1022
.sort(SortOptions::default().with_order_descending(true))
1023
.head(Some(2))
1024
.alias("foo")])
1025
.sort(["groups"], Default::default())
1026
.collect()?;
1027
1028
assert!(out1.column("foo")?.equals(out2.column("foo")?));
1029
Ok(())
1030
}
1031
1032
#[test]
1033
#[cfg(feature = "cum_agg")]
1034
fn test_group_by_cum_sum() -> PolarsResult<()> {
1035
let df = df![
1036
"groups" => [1, 2, 2, 3, 3, 3],
1037
"vals" => [1, 5, 6, 3, 9, 8]
1038
]?;
1039
1040
let out = df
1041
.lazy()
1042
.group_by([col("groups")])
1043
.agg([col("vals").cum_sum(false)])
1044
.sort(["groups"], Default::default())
1045
.collect()?;
1046
1047
assert_eq!(
1048
Vec::from(
1049
out.column("vals")?
1050
.explode(ExplodeOptions {
1051
empty_as_null: true,
1052
keep_nulls: true
1053
})?
1054
.i32()?
1055
),
1056
[1, 5, 11, 3, 12, 20]
1057
.iter()
1058
.copied()
1059
.map(Some)
1060
.collect::<Vec<_>>()
1061
);
1062
1063
Ok(())
1064
}
1065
1066
#[test]
1067
#[cfg(feature = "range")]
1068
fn test_arg_sort_multiple() -> PolarsResult<()> {
1069
let df = df![
1070
"int" => [1, 2, 3, 1, 2],
1071
"flt" => [3.0, 2.0, 1.0, 2.0, 1.0],
1072
"str" => ["a", "a", "a", "b", "b"]
1073
]?;
1074
1075
let out = df
1076
.clone()
1077
.lazy()
1078
.select([arg_sort_by(
1079
[col("int"), col("flt")],
1080
SortMultipleOptions::default().with_order_descending_multi([true, false]),
1081
)])
1082
.collect()?;
1083
1084
assert_eq!(
1085
Vec::from(out.column("int")?.idx()?),
1086
[2, 4, 1, 3, 0]
1087
.iter()
1088
.copied()
1089
.map(Some)
1090
.collect::<Vec<_>>()
1091
);
1092
1093
// check if this runs
1094
let _out = df
1095
.lazy()
1096
.select([arg_sort_by(
1097
[col("str"), col("flt")],
1098
SortMultipleOptions::default().with_order_descending_multi([true, false]),
1099
)])
1100
.collect()?;
1101
Ok(())
1102
}
1103
1104
#[test]
1105
fn test_multiple_explode() -> PolarsResult<()> {
1106
let df = df![
1107
"a" => [0, 1, 2, 0, 2],
1108
"b" => [5, 4, 3, 2, 1],
1109
"c" => [2, 3, 4, 1, 5]
1110
]?;
1111
1112
let out = df
1113
.lazy()
1114
.group_by([col("a")])
1115
.agg([col("b").alias("b_list"), col("c").alias("c_list")])
1116
.explode(
1117
by_name(["c_list", "b_list"], true, false),
1118
ExplodeOptions {
1119
empty_as_null: true,
1120
keep_nulls: true,
1121
},
1122
)
1123
.collect()?;
1124
assert_eq!(out.shape(), (5, 3));
1125
1126
Ok(())
1127
}
1128
1129
#[test]
1130
fn test_filter_and_alias() -> PolarsResult<()> {
1131
let df = df![
1132
"a" => [0, 1, 2, 0, 2]
1133
]?;
1134
1135
let out = df
1136
.lazy()
1137
.with_column(col("a").pow(2.0).alias("a_squared"))
1138
.filter(col("a_squared").gt(lit(1)).and(col("a").gt(lit(1))))
1139
.collect()?;
1140
1141
let expected = df![
1142
"a" => [2, 2],
1143
"a_squared" => [4.0, 4.0]
1144
]?;
1145
assert!(out.equals(&expected));
1146
Ok(())
1147
}
1148
1149
#[test]
1150
fn test_filter_lit() {
1151
// see https://github.com/pola-rs/polars/issues/790
1152
// failed due to broadcasting filters and splitting threads.
1153
let iter = (0..100).map(|i| ('A'..='Z').nth(i % 26).unwrap().to_string());
1154
let a = Series::from_iter(iter).into_column();
1155
let df = DataFrame::new_infer_height([a].into()).unwrap();
1156
1157
let out = df.lazy().filter(lit(true)).collect().unwrap();
1158
assert_eq!(out.shape(), (100, 1));
1159
}
1160
1161
#[test]
1162
fn test_ternary_null() -> PolarsResult<()> {
1163
let df = df![
1164
"a" => ["a", "b", "c"]
1165
]?;
1166
1167
let out = df
1168
.lazy()
1169
.select([when(col("a").eq(lit("c")))
1170
.then(Null {}.lit())
1171
.otherwise(col("a"))
1172
.alias("foo")])
1173
.collect()?;
1174
1175
assert_eq!(
1176
out.column("foo")?.is_null().into_iter().collect::<Vec<_>>(),
1177
&[Some(false), Some(false), Some(true)]
1178
);
1179
Ok(())
1180
}
1181
1182
#[test]
1183
fn test_fill_forward() -> PolarsResult<()> {
1184
let df = df![
1185
"a" => ["a", "b", "a"],
1186
"b" => [Some(1), None, None]
1187
]?;
1188
1189
let out = df
1190
.lazy()
1191
.select([col("b")
1192
.fill_null_with_strategy(FillNullStrategy::Forward(FillNullLimit::None))
1193
.over_with_options(Some([col("a")]), None, WindowMapping::Join)?])
1194
.collect()?;
1195
let agg = out.column("b")?.list()?;
1196
1197
let a: Series = agg.get_as_series(0).unwrap();
1198
assert!(a.equals(&Series::new("b".into(), &[1, 1])));
1199
let a: Series = agg.get_as_series(2).unwrap();
1200
assert!(a.equals(&Series::new("b".into(), &[1, 1])));
1201
let a: Series = agg.get_as_series(1).unwrap();
1202
assert_eq!(a.null_count(), 1);
1203
Ok(())
1204
}
1205
1206
#[cfg(feature = "cross_join")]
1207
#[test]
1208
fn test_cross_join() -> PolarsResult<()> {
1209
let df1 = df![
1210
"a" => ["a", "b", "a"],
1211
"b" => [Some(1), None, None]
1212
]?;
1213
1214
let df2 = df![
1215
"a" => [1, 2],
1216
"b" => [None, Some(12)]
1217
]?;
1218
1219
let out = df1.lazy().cross_join(df2.lazy(), None).collect()?;
1220
assert_eq!(out.shape(), (6, 4));
1221
Ok(())
1222
}
1223
1224
#[test]
1225
fn test_select_empty_df() -> PolarsResult<()> {
1226
// https://github.com/pola-rs/polars/issues/1056
1227
let df1 = df![
1228
"a" => [1, 2, 3],
1229
"b" => [1, 2, 3]
1230
]?;
1231
1232
let out = df1
1233
.lazy()
1234
.filter(col("a").eq(lit(0))) // this will lead to an empty frame
1235
.select([col("a"), lit(1).alias("c")])
1236
.collect()?;
1237
1238
assert_eq!(out.column("a")?.len(), 0);
1239
assert_eq!(out.column("c")?.len(), 0);
1240
1241
Ok(())
1242
}
1243
1244
#[test]
1245
fn test_keep_name() -> PolarsResult<()> {
1246
let df = df![
1247
"a" => [1, 2, 3],
1248
"b" => [1, 2, 3]
1249
]?;
1250
1251
let out = df
1252
.lazy()
1253
.select([
1254
col("a").alias("bar").name().keep(),
1255
col("b").alias("bar").name().keep(),
1256
])
1257
.collect()?;
1258
1259
assert_eq!(out.get_column_names(), &["a", "b"]);
1260
Ok(())
1261
}
1262
1263
#[test]
1264
fn test_exclude() -> PolarsResult<()> {
1265
let df = df![
1266
"a" => [1, 2, 3],
1267
"b" => [1, 2, 3],
1268
"c" => [1, 2, 3]
1269
]?;
1270
1271
let out = df
1272
.lazy()
1273
.select([all().exclude_cols(["b"]).as_expr()])
1274
.collect()?;
1275
1276
assert_eq!(out.get_column_names(), &["a", "c"]);
1277
Ok(())
1278
}
1279
1280
#[test]
1281
#[cfg(feature = "regex")]
1282
fn test_regex_selection() -> PolarsResult<()> {
1283
let df = df![
1284
"anton" => [1, 2, 3],
1285
"arnold schwars" => [1, 2, 3],
1286
"annie" => [1, 2, 3]
1287
]?;
1288
1289
let out = df.lazy().select([col("^a.*o.*$")]).collect()?;
1290
1291
assert_eq!(out.get_column_names(), &["anton", "arnold schwars"]);
1292
Ok(())
1293
}
1294
1295
#[test]
1296
fn test_sort_by() -> PolarsResult<()> {
1297
let df = df![
1298
"a" => [1, 2, 3, 4, 5],
1299
"b" => [1, 1, 1, 2, 2],
1300
"c" => [2, 3, 1, 2, 1]
1301
]?;
1302
1303
// evaluate
1304
let out = df
1305
.clone()
1306
.lazy()
1307
.select([col("a").sort_by([col("b"), col("c")], SortMultipleOptions::default())])
1308
.collect()?;
1309
1310
let a = out.column("a")?;
1311
assert_eq!(
1312
Vec::from(a.i32().unwrap()),
1313
&[Some(3), Some(1), Some(2), Some(5), Some(4)]
1314
);
1315
1316
// aggregate
1317
let out = df
1318
.clone()
1319
.lazy()
1320
.group_by_stable([col("b")])
1321
.agg([col("a").sort_by([col("b"), col("c")], SortMultipleOptions::default())])
1322
.collect()?;
1323
let a = out.column("a")?.explode(ExplodeOptions {
1324
empty_as_null: true,
1325
keep_nulls: true,
1326
})?;
1327
assert_eq!(
1328
Vec::from(a.i32().unwrap()),
1329
&[Some(3), Some(1), Some(2), Some(5), Some(4)]
1330
);
1331
1332
// evaluate_on_groups
1333
let out = df
1334
.lazy()
1335
.group_by_stable([col("b")])
1336
.agg([col("a").sort_by([col("b"), col("c")], SortMultipleOptions::default())])
1337
.collect()?;
1338
1339
let a = out.column("a")?.explode(ExplodeOptions {
1340
empty_as_null: true,
1341
keep_nulls: true,
1342
})?;
1343
assert_eq!(
1344
Vec::from(a.i32().unwrap()),
1345
&[Some(3), Some(1), Some(2), Some(5), Some(4)]
1346
);
1347
1348
Ok(())
1349
}
1350
1351
#[test]
1352
fn test_filter_after_shift_in_groups() -> PolarsResult<()> {
1353
let df = fruits_cars();
1354
1355
let out = df
1356
.lazy()
1357
.select([
1358
col("fruits"),
1359
col("B")
1360
.shift(lit(1))
1361
.filter(col("B").shift(lit(1)).gt(lit(4)))
1362
.over_with_options(Some([col("fruits")]), None, WindowMapping::Join)?
1363
.alias("filtered"),
1364
])
1365
.collect()?;
1366
1367
assert_eq!(
1368
out.column("filtered")?
1369
.list()?
1370
.get_as_series(0)
1371
.unwrap()
1372
.i32()?
1373
.get(0)
1374
.unwrap(),
1375
5
1376
);
1377
assert_eq!(
1378
out.column("filtered")?
1379
.list()?
1380
.get_as_series(1)
1381
.unwrap()
1382
.i32()?
1383
.get(0)
1384
.unwrap(),
1385
5
1386
);
1387
assert_eq!(
1388
out.column("filtered")?
1389
.list()?
1390
.get_as_series(2)
1391
.unwrap()
1392
.len(),
1393
0
1394
);
1395
1396
Ok(())
1397
}
1398
1399
#[test]
1400
fn test_lazy_ternary_predicate_pushdown() -> PolarsResult<()> {
1401
let df = df![
1402
"a" => &[10, 1, 2, 3]
1403
]?;
1404
1405
let out = df
1406
.lazy()
1407
.select([when(col("a").eq(lit(10)))
1408
.then(Null {}.lit())
1409
.otherwise(col("a"))])
1410
.drop_nulls(None)
1411
.collect()?;
1412
1413
assert_eq!(
1414
Vec::from(out.columns()[0].i32()?),
1415
&[Some(1), Some(2), Some(3)]
1416
);
1417
1418
Ok(())
1419
}
1420
1421
#[test]
1422
#[cfg(feature = "dtype-categorical")]
1423
fn test_categorical_addition() -> PolarsResult<()> {
1424
let df = fruits_cars();
1425
1426
// test if we can do that arithmetic operation with String and Categorical
1427
let out = df
1428
.lazy()
1429
.select([
1430
col("fruits").cast(DataType::from_categories(Categories::global())),
1431
col("cars").cast(DataType::from_categories(Categories::global())),
1432
])
1433
.select([(col("fruits") + lit(" ") + col("cars")).alias("foo")])
1434
.collect()?;
1435
1436
assert_eq!(out.column("foo")?.str()?.get(0).unwrap(), "banana beetle");
1437
1438
Ok(())
1439
}
1440
1441
#[test]
1442
fn test_error_duplicate_names() {
1443
let df = fruits_cars();
1444
assert!(df.lazy().select([col("*"), col("*")]).collect().is_err());
1445
}
1446
1447
#[test]
1448
fn test_filter_count() -> PolarsResult<()> {
1449
let df = fruits_cars();
1450
let out = df
1451
.lazy()
1452
.select([col("fruits")
1453
.filter(col("fruits").eq(lit("banana")))
1454
.count()])
1455
.collect()?;
1456
assert_eq!(out.column("fruits")?.idx()?.get(0), Some(3));
1457
Ok(())
1458
}
1459
1460
#[test]
1461
#[cfg(feature = "dtype-i16")]
1462
fn test_group_by_small_ints() -> PolarsResult<()> {
1463
let df = df![
1464
"id_32" => [1i32, 2],
1465
"id_16" => [1i16, 2]
1466
]?;
1467
1468
// https://github.com/pola-rs/polars/issues/1255
1469
let out = df
1470
.lazy()
1471
.group_by([col("id_16"), col("id_32")])
1472
.agg([col("id_16").sum().alias("foo")])
1473
.sort(
1474
["foo"],
1475
SortMultipleOptions::default().with_order_descending(true),
1476
)
1477
.collect()?;
1478
1479
assert_eq!(Vec::from(out.column("foo")?.i64()?), &[Some(2), Some(1)]);
1480
Ok(())
1481
}
1482
1483
#[test]
1484
fn test_when_then_schema() -> PolarsResult<()> {
1485
let df = fruits_cars();
1486
1487
let schema = df
1488
.lazy()
1489
.select([when(col("A").gt(lit(1)))
1490
.then(Null {}.lit())
1491
.otherwise(col("A"))])
1492
.collect_schema();
1493
assert_ne!(schema?.get_at_index(0).unwrap().1, &DataType::Null);
1494
1495
Ok(())
1496
}
1497
1498
#[test]
1499
fn test_singleton_broadcast() -> PolarsResult<()> {
1500
let df = fruits_cars();
1501
let out = df
1502
.lazy()
1503
.select([col("fruits"), lit(1).alias("foo")])
1504
.collect()?;
1505
1506
assert!(out.column("foo")?.len() > 1);
1507
Ok(())
1508
}
1509
1510
#[test]
1511
fn test_list_in_select_context() -> PolarsResult<()> {
1512
let s = Column::new("a".into(), &[1, 2, 3]);
1513
let mut builder = get_list_builder(s.dtype(), s.len(), 1, s.name().clone());
1514
builder.append_series(s.as_materialized_series()).unwrap();
1515
let expected = builder.finish().into_column();
1516
1517
let df = DataFrame::new_infer_height(vec![s])?;
1518
1519
let out = df.lazy().select([col("a").implode()]).collect()?;
1520
1521
let s = out.column("a")?;
1522
assert!(s.equals(&expected));
1523
1524
Ok(())
1525
}
1526
1527
#[test]
1528
#[cfg(feature = "round_series")]
1529
fn test_round_after_agg() -> PolarsResult<()> {
1530
let df = fruits_cars();
1531
1532
let out = df
1533
.lazy()
1534
.group_by([col("fruits")])
1535
.agg([col("A")
1536
.cast(DataType::Float32)
1537
.mean()
1538
.round(2, polars_ops::series::RoundMode::default())
1539
.alias("foo")])
1540
.collect()?;
1541
1542
assert!(out.column("foo")?.f32().is_ok());
1543
1544
let df = df![
1545
"groups" => ["pigeon",
1546
"rabbit",
1547
"rabbit",
1548
"Chris",
1549
"pigeon",
1550
"fast",
1551
"fast",
1552
"pigeon",
1553
"rabbit",
1554
"Chris"],
1555
"b" => [5409, 4848, 4864, 3540, 8103, 3083, 8575, 9963, 8809, 5425],
1556
"c" => [0.4517241160719615,
1557
0.2551467646274673,
1558
0.8682045191407308,
1559
0.9925316385786037,
1560
0.5392027792928116,
1561
0.7633847828107002,
1562
0.7967295231651537,
1563
0.01444779067224733,
1564
0.23807484087472652,
1565
0.10985868798350984]
1566
]?;
1567
1568
let out = df
1569
.lazy()
1570
.group_by_stable([col("groups")])
1571
.agg([((col("b") * col("c")).sum() / col("b").sum())
1572
.round(2, polars_ops::series::RoundMode::default())
1573
.alias("foo")])
1574
.collect()?;
1575
1576
let out = out.column("foo")?;
1577
let out = out.f64()?;
1578
1579
assert_eq!(
1580
Vec::from(out),
1581
&[Some(0.3), Some(0.41), Some(0.46), Some(0.79)]
1582
);
1583
1584
Ok(())
1585
}
1586
1587
#[test]
1588
#[cfg(feature = "dtype-date")]
1589
fn test_fill_nan() -> PolarsResult<()> {
1590
let s0 = Column::new("date".into(), &[1, 2, 3]).cast(&DataType::Date)?;
1591
let s1 = Column::new("float".into(), &[Some(1.0), Some(f32::NAN), Some(3.0)]);
1592
1593
let df = DataFrame::new_infer_height(vec![s0, s1])?;
1594
let out = df.lazy().fill_nan(Null {}.lit()).collect()?;
1595
let out = out.column("float")?;
1596
assert_eq!(Vec::from(out.f32()?), &[Some(1.0), None, Some(3.0)]);
1597
1598
Ok(())
1599
}
1600
1601
#[test]
1602
#[cfg(feature = "regex")]
1603
fn test_exclude_regex() -> PolarsResult<()> {
1604
let df = fruits_cars();
1605
let out = df
1606
.lazy()
1607
.select([(all() - Selector::Matches("^(fruits|cars)$".into())).as_expr()])
1608
.collect()?;
1609
1610
assert_eq!(out.get_column_names(), &["A", "B"]);
1611
Ok(())
1612
}
1613
1614
#[test]
1615
#[cfg(feature = "rank")]
1616
fn test_group_by_rank() -> PolarsResult<()> {
1617
let df = fruits_cars();
1618
let out = df
1619
.lazy()
1620
.group_by_stable([col("cars")])
1621
.agg([col("B").rank(
1622
RankOptions {
1623
method: RankMethod::Dense,
1624
..Default::default()
1625
},
1626
None,
1627
)])
1628
.collect()?;
1629
1630
let out = out.column("B")?;
1631
let out = out.list()?.get_as_series(1).unwrap();
1632
let out = out.idx()?;
1633
1634
assert_eq!(Vec::from(out), &[Some(1)]);
1635
Ok(())
1636
}
1637
1638
#[test]
1639
pub fn test_select_by_dtypes() -> PolarsResult<()> {
1640
let df = df![
1641
"bools" => [true, false, true],
1642
"ints" => [1, 2, 3],
1643
"strings" => ["a", "b", "c"],
1644
"floats" => [1.0, 2.0, 3.0f32]
1645
]?;
1646
let out = df
1647
.lazy()
1648
.select([dtype_cols([DataType::Float32, DataType::String])
1649
.as_selector()
1650
.as_expr()])
1651
.collect()?;
1652
assert_eq!(out.dtypes(), &[DataType::String, DataType::Float32]);
1653
1654
Ok(())
1655
}
1656
1657
#[test]
1658
fn test_binary_expr() -> PolarsResult<()> {
1659
// test panic in schema names
1660
let df = fruits_cars();
1661
let _ = df.lazy().select([col("A").neq(lit(1))]).collect()?;
1662
1663
// test type coercion
1664
// https://github.com/pola-rs/polars/issues/1649
1665
let df = df!(
1666
"nrs"=> [Some(1i64), Some(2), Some(3), None, Some(5)],
1667
"random"=> [0.1f64, 0.6, 0.2, 0.6, 0.3]
1668
)?;
1669
1670
let other = when(col("random").gt(lit(0.5)))
1671
.then(lit(2))
1672
.otherwise(col("random"))
1673
.alias("other");
1674
let out = df.lazy().select([other * col("nrs").sum()]).collect()?;
1675
assert_eq!(out.dtypes(), &[DataType::Float64]);
1676
Ok(())
1677
}
1678
1679
#[test]
1680
fn test_single_group_result() -> PolarsResult<()> {
1681
// the arg_sort should not auto explode
1682
let df = df![
1683
"a" => [1, 2],
1684
"b" => [1, 1]
1685
]?;
1686
1687
let out = df
1688
.lazy()
1689
.select([col("a").arg_sort(false, false).over([col("a")])])
1690
.collect()?;
1691
1692
let a = out.column("a")?.idx()?;
1693
assert_eq!(Vec::from(a), &[Some(0), Some(0)]);
1694
1695
Ok(())
1696
}
1697
1698
#[test]
1699
#[cfg(feature = "rank")]
1700
fn test_single_ranked_group() -> PolarsResult<()> {
1701
// tests type consistency of rank algorithm
1702
let df = df!["group" => [1, 2, 2],
1703
"value"=> [100, 50, 10]
1704
]?;
1705
1706
let out = df
1707
.lazy()
1708
.with_columns([col("value")
1709
.rank(
1710
RankOptions {
1711
method: RankMethod::Average,
1712
..Default::default()
1713
},
1714
None,
1715
)
1716
.over_with_options(Some([col("group")]), None, WindowMapping::Join)?])
1717
.collect()?;
1718
1719
let out = out.column("value")?.explode(ExplodeOptions {
1720
empty_as_null: true,
1721
keep_nulls: true,
1722
})?;
1723
let out = out.f64()?;
1724
assert_eq!(
1725
Vec::from(out),
1726
&[Some(1.0), Some(2.0), Some(1.0), Some(2.0), Some(1.0)]
1727
);
1728
1729
Ok(())
1730
}
1731
1732
#[test]
1733
#[cfg(feature = "diff")]
1734
fn empty_df() -> PolarsResult<()> {
1735
let df = fruits_cars();
1736
let df = df.filter(&BooleanChunked::full("".into(), false, df.height()))?;
1737
1738
df.lazy()
1739
.select([
1740
col("A").shift(lit(1)).alias("1"),
1741
col("A").shift_and_fill(lit(1), lit(1)).alias("2"),
1742
col("A").shift_and_fill(lit(-1), lit(1)).alias("3"),
1743
col("A").fill_null(lit(1)).alias("4"),
1744
col("A").cum_count(false).alias("5"),
1745
col("A").diff(lit(1), NullBehavior::Ignore).alias("6"),
1746
col("A").cum_max(false).alias("7"),
1747
col("A").cum_min(false).alias("8"),
1748
])
1749
.collect()?;
1750
1751
Ok(())
1752
}
1753
1754
#[test]
1755
#[cfg(feature = "abs")]
1756
fn test_apply_flatten() -> PolarsResult<()> {
1757
let df = df![
1758
"A"=> [1.1435, 2.223456, 3.44732, -1.5234, -2.1238, -3.2923],
1759
"B"=> ["a", "b", "a", "b", "a", "b"]
1760
]?;
1761
1762
let out = df
1763
.lazy()
1764
.group_by_stable([col("B")])
1765
.agg([col("A").abs().sum().alias("A_sum")])
1766
.collect()?;
1767
1768
let out = out.column("A_sum")?;
1769
assert_eq!(out.get(0)?, AnyValue::Float64(6.71462));
1770
assert_eq!(out.get(1)?, AnyValue::Float64(7.039156));
1771
1772
Ok(())
1773
}
1774
1775
#[test]
1776
#[cfg(feature = "is_in")]
1777
fn test_is_in() -> PolarsResult<()> {
1778
let df = fruits_cars();
1779
1780
// // this will be executed by apply
1781
let out = df
1782
.clone()
1783
.lazy()
1784
.group_by_stable([col("fruits")])
1785
.agg([col("cars").is_in(
1786
col("cars").filter(col("cars").eq(lit("beetle"))).implode(),
1787
false,
1788
)])
1789
.collect()?;
1790
let out = out.column("cars").unwrap();
1791
let out = out.explode(ExplodeOptions {
1792
empty_as_null: true,
1793
keep_nulls: true,
1794
})?;
1795
let out = out.bool().unwrap();
1796
assert_eq!(
1797
Vec::from(out),
1798
&[Some(true), Some(false), Some(true), Some(true), Some(true)]
1799
);
1800
1801
// this will be executed by map
1802
let out = df
1803
.lazy()
1804
.group_by_stable([col("fruits")])
1805
.agg([col("cars").is_in(
1806
lit(Series::new("a".into(), ["beetle", "vw"])).implode(),
1807
false,
1808
)])
1809
.collect()?;
1810
1811
let out = out.column("cars").unwrap();
1812
let out = out.explode(ExplodeOptions {
1813
empty_as_null: true,
1814
keep_nulls: true,
1815
})?;
1816
let out = out.bool().unwrap();
1817
assert_eq!(
1818
Vec::from(out),
1819
&[Some(true), Some(false), Some(true), Some(true), Some(true)]
1820
);
1821
1822
Ok(())
1823
}
1824
1825
#[test]
1826
fn test_partitioned_gb_1() -> PolarsResult<()> {
1827
// don't move these to integration tests
1828
// keep these dtypes
1829
let out = df![
1830
"keys" => [1, 1, 1, 1, 2],
1831
"vals" => ["a", "b", "c", "a", "a"]
1832
]?
1833
.lazy()
1834
.group_by([col("keys")])
1835
.agg([
1836
(col("vals").eq(lit("a"))).sum().alias("eq_a"),
1837
(col("vals").eq(lit("b"))).sum().alias("eq_b"),
1838
])
1839
.sort(["keys"], Default::default())
1840
.collect()?;
1841
1842
assert!(out.equals(&df![
1843
"keys" => [1, 2],
1844
"eq_a" => [2 as IdxSize, 1],
1845
"eq_b" => [1 as IdxSize, 0],
1846
]?));
1847
1848
Ok(())
1849
}
1850
1851
#[test]
1852
fn test_partitioned_gb_count() -> PolarsResult<()> {
1853
// don't move these to integration tests
1854
let out = df![
1855
"col" => (0..100).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
1856
]?
1857
.lazy()
1858
.group_by([col("col")])
1859
.agg([
1860
// we make sure to alias with a different name
1861
len().alias("counted"),
1862
col("col").count().alias("count2"),
1863
])
1864
.collect()?;
1865
1866
assert!(out.equals(&df![
1867
"col" => [0],
1868
"counted" => [100 as IdxSize],
1869
"count2" => [100 as IdxSize],
1870
]?));
1871
1872
Ok(())
1873
}
1874
1875
#[test]
1876
fn test_partitioned_gb_mean() -> PolarsResult<()> {
1877
// don't move these to integration tests
1878
let out = df![
1879
"key" => (0..100).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
1880
]?
1881
.lazy()
1882
.with_columns([lit("a").alias("str"), lit(1).alias("int")])
1883
.group_by([col("key")])
1884
.agg([
1885
col("str").mean().alias("mean_str"),
1886
col("int").mean().alias("mean_int"),
1887
])
1888
.collect()?;
1889
1890
assert_eq!(out.shape(), (1, 3));
1891
let str_col = out.column("mean_str")?;
1892
assert_eq!(str_col.get(0)?, AnyValue::Null);
1893
let int_col = out.column("mean_int")?;
1894
assert_eq!(int_col.get(0)?, AnyValue::Float64(1.0));
1895
1896
Ok(())
1897
}
1898
1899
#[test]
1900
fn test_partitioned_gb_binary() -> PolarsResult<()> {
1901
// don't move these to integration tests
1902
let df = df![
1903
"col" => (0..20).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
1904
]?;
1905
1906
let out = df
1907
.clone()
1908
.lazy()
1909
.group_by([col("col")])
1910
.agg([(col("col") + lit(10)).sum().alias("sum")])
1911
.collect()?;
1912
1913
assert!(out.equals(&df![
1914
"col" => [0],
1915
"sum" => [200],
1916
]?));
1917
1918
let out = df
1919
.lazy()
1920
.group_by([col("col")])
1921
.agg([(col("col").cast(DataType::Float32) + lit(10.0))
1922
.sum()
1923
.alias("sum")])
1924
.collect()?;
1925
1926
assert!(out.equals(&df![
1927
"col" => [0],
1928
"sum" => [200.0_f32],
1929
]?));
1930
1931
Ok(())
1932
}
1933
1934
#[test]
1935
fn test_partitioned_gb_ternary() -> PolarsResult<()> {
1936
// don't move these to integration tests
1937
let df = df![
1938
"col" => (0..20).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
1939
"val" => (0..20).map(Some).collect::<Int32Chunked>().into_series(),
1940
]?;
1941
1942
let out = df
1943
.lazy()
1944
.group_by([col("col")])
1945
.agg([when(col("val").gt(lit(10)))
1946
.then(lit(1))
1947
.otherwise(lit(0))
1948
.sum()
1949
.alias("sum")])
1950
.collect()?;
1951
1952
assert!(out.equals(&df![
1953
"col" => [0],
1954
"sum" => [9],
1955
]?));
1956
1957
Ok(())
1958
}
1959
1960
#[test]
1961
fn test_sort_maintain_order_true() -> PolarsResult<()> {
1962
let q = df![
1963
"A" => [1, 1, 1, 1],
1964
"B" => ["A", "B", "C", "D"],
1965
]?
1966
.lazy();
1967
1968
let res = q
1969
.sort_by_exprs(
1970
[col("A")],
1971
SortMultipleOptions::default()
1972
.with_maintain_order(true)
1973
.with_nulls_last(true),
1974
)
1975
.slice(0, 3)
1976
.collect()?;
1977
assert!(res.equals(&df![
1978
"A" => [1, 1, 1],
1979
"B" => ["A", "B", "C"],
1980
]?));
1981
Ok(())
1982
}
1983
1984
#[test]
1985
fn test_over_with_options_empty_join() -> PolarsResult<()> {
1986
let empty_df = DataFrame::new_infer_height(vec![
1987
Series::new_empty("a".into(), &DataType::Int32).into(),
1988
Series::new_empty("b".into(), &DataType::Int32).into(),
1989
])?;
1990
1991
let empty_df_out = empty_df
1992
.lazy()
1993
.select([col("b").over_with_options(
1994
Some([col("a")]),
1995
Option::None,
1996
WindowMapping::Join,
1997
)?])
1998
.collect()?;
1999
2000
let f1: Field = Field::new("b".into(), DataType::List(Box::new(DataType::Int32)));
2001
let sc: Schema = Schema::from_iter(vec![f1]);
2002
2003
assert_eq!(&**empty_df_out.schema(), &sc);
2004
2005
Ok(())
2006
}
2007
2008
#[test]
2009
#[cfg(feature = "serde")]
2010
fn test_named_udfs() -> PolarsResult<()> {
2011
use polars_plan::dsl::named_serde::{ExprRegistry, set_named_serde_registry};
2012
2013
let lf = DataFrame::new_infer_height(vec![Column::new("a".into(), vec![1, 2, 3, 4])])?.lazy();
2014
2015
struct X;
2016
impl ExprRegistry for X {
2017
fn get_function(&self, name: &str, payload: &[u8]) -> Option<Arc<dyn AnonymousColumnsUdf>> {
2018
assert_eq!(name, "test-function");
2019
assert_eq!(payload, b"check");
2020
Some(Arc::new(BaseColumnUdf::new(
2021
|c: &mut [Column]| Ok(std::mem::take(&mut c[0]) * 2),
2022
|_: &Schema, f: &[Field]| Ok(f[0].clone()),
2023
)))
2024
}
2025
}
2026
2027
set_named_serde_registry(Arc::new(X) as _);
2028
2029
let expr = Expr::AnonymousFunction {
2030
input: vec![Expr::Column("a".into())],
2031
function: LazySerde::Named {
2032
name: "test-function".into(),
2033
payload: Some(bytes::Bytes::from("check")),
2034
value: None,
2035
},
2036
options: FunctionOptions::default(),
2037
fmt_str: Box::new("test".into()),
2038
};
2039
2040
assert_eq!(
2041
lf.select(&[expr]).collect()?,
2042
DataFrame::new_infer_height(vec![Column::new("a".into(), vec![2, 4, 6, 8])])?,
2043
);
2044
2045
Ok(())
2046
}
2047
2048