Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/queries.rs
6939 views
1
#[cfg(feature = "diff")]
2
use polars_core::series::ops::NullBehavior;
3
4
use super::*;
5
6
#[test]
7
fn test_lazy_with_column() {
8
let df = get_df()
9
.lazy()
10
.with_column(lit(10).alias("foo"))
11
.collect()
12
.unwrap();
13
assert_eq!(df.width(), 6);
14
assert!(df.column("foo").is_ok());
15
}
16
17
#[test]
18
fn test_lazy_exec() {
19
let df = get_df();
20
let _new = df
21
.clone()
22
.lazy()
23
.select([col("sepal_width"), col("variety")])
24
.sort(["sepal_width"], Default::default())
25
.collect();
26
27
let new = df
28
.lazy()
29
.filter(not(col("sepal_width").lt(lit(3.5))))
30
.collect()
31
.unwrap();
32
33
let check = new.column("sepal_width").unwrap().f64().unwrap().gt(3.4);
34
assert!(check.all())
35
}
36
37
#[test]
38
fn test_lazy_alias() {
39
let df = get_df();
40
let new = df
41
.lazy()
42
.select([col("sepal_width").alias("petals"), col("sepal_width")])
43
.collect()
44
.unwrap();
45
assert_eq!(new.get_column_names(), &["petals", "sepal_width"]);
46
}
47
48
#[test]
49
#[cfg(feature = "pivot")]
50
fn test_lazy_unpivot() {
51
let df = get_df();
52
53
let args = UnpivotArgsDSL {
54
on: by_name(["sepal_length", "sepal_width"], true),
55
index: by_name(["petal_width", "petal_length"], true),
56
variable_name: None,
57
value_name: None,
58
};
59
60
let out = df
61
.lazy()
62
.unpivot(args)
63
.filter(col("variable").eq(lit("sepal_length")))
64
.select([col("variable"), col("petal_width"), col("value")])
65
.collect()
66
.unwrap();
67
assert_eq!(out.shape(), (7, 3));
68
}
69
70
#[test]
71
fn test_lazy_drop_nulls() {
72
let df = df! {
73
"foo" => &[Some(1), None, Some(3)],
74
"bar" => &[Some(1), Some(2), None]
75
}
76
.unwrap();
77
78
let new = df.lazy().drop_nulls(None).collect().unwrap();
79
let out = df! {
80
"foo" => &[Some(1)],
81
"bar" => &[Some(1)]
82
}
83
.unwrap();
84
assert!(new.equals(&out));
85
}
86
87
#[test]
88
fn test_lazy_udf() {
89
let df = get_df();
90
let new = df
91
.lazy()
92
.select([col("sepal_width").map(|s| Ok(s * 200.0), |_, f| Ok(f.clone()))])
93
.collect()
94
.unwrap();
95
assert_eq!(
96
new.column("sepal_width").unwrap().f64().unwrap().get(0),
97
Some(700.0)
98
);
99
}
100
101
#[test]
102
fn test_lazy_is_null() {
103
let df = get_df();
104
let new = df
105
.clone()
106
.lazy()
107
.filter(col("sepal_width").is_null())
108
.collect()
109
.unwrap();
110
111
assert_eq!(new.height(), 0);
112
113
let new = df
114
.clone()
115
.lazy()
116
.filter(col("sepal_width").is_not_null())
117
.collect()
118
.unwrap();
119
assert_eq!(new.height(), df.height());
120
121
let new = df
122
.lazy()
123
.group_by([col("variety")])
124
.agg([col("sepal_width").min()])
125
.collect()
126
.unwrap();
127
128
assert_eq!(new.shape(), (1, 2));
129
}
130
131
#[test]
132
fn test_lazy_pushdown_through_agg() {
133
// An aggregation changes the schema names, check if the pushdown succeeds.
134
let df = get_df();
135
let new = df
136
.lazy()
137
.group_by([col("variety")])
138
.agg([
139
col("sepal_length").min(),
140
col("petal_length").min().alias("foo"),
141
])
142
.select([col("foo")])
143
// second selection is to test if optimizer can handle that
144
.select([col("foo").alias("bar")])
145
.collect()
146
.unwrap();
147
148
assert_eq!(new.shape(), (1, 1));
149
let bar = new.column("bar").unwrap();
150
assert_eq!(bar.get(0).unwrap(), AnyValue::Float64(1.3));
151
}
152
153
#[test]
154
fn test_lazy_shift() {
155
let df = get_df();
156
let new = df
157
.lazy()
158
.select([col("sepal_width").alias("foo").shift(lit(2))])
159
.collect()
160
.unwrap();
161
assert_eq!(new.column("foo").unwrap().f64().unwrap().get(0), None);
162
}
163
164
#[test]
165
fn test_shift_and_fill() -> PolarsResult<()> {
166
let out = df![
167
"a" => [1, 2, 3]
168
]?
169
.lazy()
170
.select([col("a").shift_and_fill(lit(-1), lit(5))])
171
.collect()?;
172
173
let out = out.column("a")?;
174
assert_eq!(Vec::from(out.i32()?), &[Some(2), Some(3), Some(5)]);
175
Ok(())
176
}
177
178
#[test]
179
fn test_shift_and_fill_non_numeric() -> PolarsResult<()> {
180
let out = df![
181
"bool" => [true, false, true],
182
]?
183
.lazy()
184
.select([col("bool").shift_and_fill(1, true)])
185
.collect()?;
186
187
let out = out.column("bool")?;
188
assert_eq!(
189
Vec::from(out.bool()?),
190
&[Some(true), Some(true), Some(false)]
191
);
192
Ok(())
193
}
194
195
#[test]
196
fn test_lazy_ternary_and_predicates() {
197
let df = get_df();
198
// test if this runs. This failed because is_not_null changes the schema name, so we
199
// really need to check the root column
200
let ldf = df
201
.clone()
202
.lazy()
203
.with_column(lit(3).alias("foo"))
204
.filter(col("foo").is_not_null());
205
let _new = ldf.collect().unwrap();
206
207
let ldf = df
208
.lazy()
209
.with_column(
210
when(col("sepal_length").lt(lit(5.0)))
211
.then(
212
lit(3), // is another type on purpose to check type coercion
213
)
214
.otherwise(col("sepal_width"))
215
.alias("foo"),
216
)
217
.filter(col("foo").gt(lit(3.0)));
218
219
let new = ldf.collect().unwrap();
220
let length = new.column("sepal_length").unwrap();
221
assert_eq!(
222
length,
223
&Column::new("sepal_length".into(), &[5.1f64, 5.0, 5.4])
224
);
225
assert_eq!(new.shape(), (3, 6));
226
}
227
228
#[test]
229
fn test_lazy_binary_ops() {
230
let df = df!("a" => &[1, 2, 3, 4, 5, ]).unwrap();
231
let new = df
232
.lazy()
233
.select([col("a").eq(lit(2)).alias("foo")])
234
.collect()
235
.unwrap();
236
assert_eq!(
237
new.column("foo")
238
.unwrap()
239
.as_materialized_series()
240
.sum::<i32>()
241
.unwrap(),
242
1
243
);
244
}
245
246
#[test]
247
fn test_lazy_query_2() {
248
let df = load_df();
249
let ldf = df
250
.lazy()
251
.with_column(col("a").map(|s| Ok(s * 2), |_, f| Ok(f.clone())))
252
.filter(col("a").lt(lit(2)))
253
.select([col("b"), col("a")]);
254
255
let new = ldf.collect().unwrap();
256
assert_eq!(new.shape(), (0, 2));
257
}
258
259
#[test]
260
#[cfg(feature = "csv")]
261
fn test_lazy_query_3() {
262
// query checks if schema of scanning is not changed by aggregation
263
let _ = scan_foods_csv()
264
.group_by([col("calories")])
265
.agg([col("fats_g").max()])
266
.collect()
267
.unwrap();
268
}
269
270
#[test]
271
fn test_lazy_query_4() -> PolarsResult<()> {
272
let df = df! {
273
"uid" => [0, 0, 0, 1, 1, 1],
274
"day" => [1, 2, 3, 1, 2, 3],
275
"cumcases" => [10, 12, 15, 25, 30, 41]
276
}
277
.unwrap();
278
279
let base_df = df.lazy();
280
281
let out = base_df
282
.clone()
283
.group_by([col("uid")])
284
.agg([
285
col("day").alias("day"),
286
col("cumcases")
287
.apply(|s: Column| &s - &(s.shift(1)), |_, f| Ok(f.clone()))
288
.alias("diff_cases"),
289
])
290
.explode(by_name(["day", "diff_cases"], true))
291
.join(
292
base_df,
293
[col("uid"), col("day")],
294
[col("uid"), col("day")],
295
JoinType::Inner.into(),
296
)
297
.collect()
298
.unwrap();
299
assert_eq!(
300
Vec::from(out.column("diff_cases").unwrap().i32().unwrap()),
301
&[None, Some(2), Some(3), None, Some(5), Some(11)]
302
);
303
304
Ok(())
305
}
306
307
#[test]
308
fn test_lazy_query_5() {
309
// if this one fails, the list builder probably does not handle offsets
310
let df = df! {
311
"uid" => [0, 0, 0, 1, 1, 1],
312
"day" => [1, 2, 4, 1, 2, 3],
313
"cumcases" => [10, 12, 15, 25, 30, 41]
314
}
315
.unwrap();
316
317
let out = df
318
.lazy()
319
.group_by([col("uid")])
320
.agg([col("day").head(Some(2))])
321
.collect()
322
.unwrap();
323
let s = out
324
.select_at_idx(1)
325
.unwrap()
326
.list()
327
.unwrap()
328
.get_as_series(0)
329
.unwrap();
330
assert_eq!(s.len(), 2);
331
let s = out
332
.select_at_idx(1)
333
.unwrap()
334
.list()
335
.unwrap()
336
.get_as_series(0)
337
.unwrap();
338
assert_eq!(s.len(), 2);
339
}
340
341
#[test]
342
#[cfg(feature = "is_in")]
343
fn test_lazy_query_8() -> PolarsResult<()> {
344
// https://github.com/pola-rs/polars/issues/842
345
let df = df![
346
"A" => [1, 2, 3],
347
"B" => [1, 2, 3],
348
"C" => [1, 2, 3],
349
"D" => [1, 2, 3],
350
"E" => [1, 2, 3]
351
]?;
352
353
let mut selection = vec![];
354
355
for &c in &["A", "B", "C", "D", "E"] {
356
let e = when(col(c).is_in(col("E"), false))
357
.then(col("A"))
358
.otherwise(Null {}.lit())
359
.alias(c);
360
selection.push(e);
361
}
362
363
let out = df
364
.lazy()
365
.select(selection)
366
.filter(col("D").gt(lit(1)))
367
.collect()?;
368
assert_eq!(out.shape(), (2, 5));
369
Ok(())
370
}
371
372
#[test]
373
fn test_lazy_query_9() -> PolarsResult<()> {
374
// https://github.com/pola-rs/polars/issues/958
375
let cities = df![
376
"Cities.City"=> ["Moscow", "Berlin", "Paris","Hamburg", "Lyon", "Novosibirsk"],
377
"Cities.Population"=> [11.92, 3.645, 2.161, 1.841, 0.513, 1.511],
378
"Cities.Country"=> ["Russia", "Germany", "France", "Germany", "France", "Russia"]
379
]?;
380
381
let sales = df![
382
"Sales.City"=> ["Moscow", "Berlin", "Paris", "Moscow", "Berlin", "Paris", "Moscow", "Berlin", "Paris"],
383
"Sales.Item"=> ["Item A", "Item A","Item A",
384
"Item B", "Item B","Item B",
385
"Item C", "Item C","Item C"],
386
"Sales.Amount"=> [200, 180, 100,
387
3, 30, 20,
388
90, 130, 125]
389
]?;
390
391
let out = sales
392
.lazy()
393
.join(
394
cities.lazy(),
395
[col("Sales.City")],
396
[col("Cities.City")],
397
JoinType::Inner.into(),
398
)
399
.group_by([col("Cities.Country")])
400
.agg([col("Sales.Amount").sum().alias("sum")])
401
.sort(["sum"], Default::default())
402
.collect()?;
403
let vals = out
404
.column("sum")?
405
.i32()?
406
.into_no_null_iter()
407
.collect::<Vec<_>>();
408
assert_eq!(vals, &[245, 293, 340]);
409
Ok(())
410
}
411
412
#[test]
413
#[cfg(all(
414
feature = "temporal",
415
feature = "dtype-datetime",
416
feature = "dtype-date",
417
feature = "dtype-duration"
418
))]
419
fn test_lazy_query_10() {
420
use chrono::Duration as ChronoDuration;
421
let date = NaiveDate::from_ymd_opt(2021, 3, 5).unwrap();
422
let x = DatetimeChunked::from_naive_datetime(
423
"x".into(),
424
[
425
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 0, 0).unwrap()),
426
NaiveDateTime::new(date, NaiveTime::from_hms_opt(13, 0, 0).unwrap()),
427
NaiveDateTime::new(date, NaiveTime::from_hms_opt(14, 0, 0).unwrap()),
428
],
429
TimeUnit::Nanoseconds,
430
)
431
.into_column();
432
let y = DatetimeChunked::from_naive_datetime(
433
"y".into(),
434
[
435
NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()),
436
NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()),
437
NaiveDateTime::new(date, NaiveTime::from_hms_opt(11, 0, 0).unwrap()),
438
],
439
TimeUnit::Nanoseconds,
440
)
441
.into_column();
442
let df = DataFrame::new(vec![x, y]).unwrap();
443
let out = df
444
.lazy()
445
.select(&[(col("x") - col("y")).alias("z")])
446
.collect()
447
.unwrap();
448
let z = DurationChunked::from_duration(
449
"z".into(),
450
[
451
ChronoDuration::try_hours(1).unwrap(),
452
ChronoDuration::try_hours(2).unwrap(),
453
ChronoDuration::try_hours(3).unwrap(),
454
],
455
TimeUnit::Nanoseconds,
456
)
457
.into_column();
458
assert!(out.column("z").unwrap().equals(&z));
459
let x = DatetimeChunked::from_naive_datetime(
460
"x".into(),
461
[
462
NaiveDateTime::new(date, NaiveTime::from_hms_opt(2, 0, 0).unwrap()),
463
NaiveDateTime::new(date, NaiveTime::from_hms_opt(3, 0, 0).unwrap()),
464
NaiveDateTime::new(date, NaiveTime::from_hms_opt(4, 0, 0).unwrap()),
465
],
466
TimeUnit::Milliseconds,
467
)
468
.into_column();
469
let y = DatetimeChunked::from_naive_datetime(
470
"y".into(),
471
[
472
NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()),
473
NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()),
474
NaiveDateTime::new(date, NaiveTime::from_hms_opt(1, 0, 0).unwrap()),
475
],
476
TimeUnit::Nanoseconds,
477
)
478
.into_column();
479
let df = DataFrame::new(vec![x, y]).unwrap();
480
let out = df
481
.lazy()
482
.select(&[(col("x") - col("y")).alias("z")])
483
.collect()
484
.unwrap();
485
assert!(
486
out.column("z")
487
.unwrap()
488
.equals(&z.cast(&DataType::Duration(TimeUnit::Milliseconds)).unwrap())
489
);
490
}
491
492
#[test]
493
#[cfg(all(
494
feature = "temporal",
495
feature = "dtype-date",
496
feature = "dtype-datetime"
497
))]
498
fn test_lazy_query_7() {
499
let date = NaiveDate::from_ymd_opt(2021, 3, 5).unwrap();
500
let dates = [
501
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 0, 0).unwrap()),
502
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 1, 0).unwrap()),
503
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 2, 0).unwrap()),
504
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 3, 0).unwrap()),
505
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 4, 0).unwrap()),
506
NaiveDateTime::new(date, NaiveTime::from_hms_opt(12, 5, 0).unwrap()),
507
];
508
let data = vec![Some(1.), Some(2.), Some(3.), Some(4.), None, None];
509
let df = DataFrame::new(vec![
510
DatetimeChunked::from_naive_datetime("date".into(), dates, TimeUnit::Nanoseconds)
511
.into_column(),
512
Column::new("data".into(), data),
513
])
514
.unwrap();
515
// this tests if predicate pushdown not interferes with the shift data.
516
let out = df
517
.lazy()
518
.with_column(col("data").shift(lit(-1)).alias("output"))
519
.with_column(col("output").shift(lit(2)).alias("shifted"))
520
.filter(col("date").gt(lit(NaiveDateTime::new(
521
date,
522
NaiveTime::from_hms_opt(12, 2, 0).unwrap(),
523
))))
524
.collect()
525
.unwrap();
526
let a = out
527
.column("shifted")
528
.unwrap()
529
.as_materialized_series()
530
.sum::<f64>()
531
.unwrap()
532
- 7.0;
533
assert!(a < 0.01 && a > -0.01);
534
}
535
536
#[test]
537
fn test_lazy_shift_and_fill_all() {
538
let data = &[1, 2, 3];
539
let df = DataFrame::new(vec![Column::new("data".into(), data)]).unwrap();
540
let out = df
541
.lazy()
542
.with_column(col("data").shift(lit(1)).fill_null(lit(0)).alias("output"))
543
.collect()
544
.unwrap();
545
assert_eq!(
546
Vec::from(out.column("output").unwrap().i32().unwrap()),
547
vec![Some(0), Some(1), Some(2)]
548
);
549
}
550
551
#[test]
552
fn test_lazy_shift_operation_no_filter() {
553
// check if predicate pushdown optimization does not fail
554
let df = df! {
555
"a" => &[1, 2, 3],
556
"b" => &[1, 2, 3]
557
}
558
.unwrap();
559
df.lazy()
560
.with_column(col("b").shift(lit(1)).alias("output"))
561
.collect()
562
.unwrap();
563
}
564
565
#[test]
566
fn test_simplify_expr() {
567
// Test if expression containing literals is simplified
568
let df = get_df();
569
570
let plan = df
571
.lazy()
572
.select(&[lit(1.0) + lit(1.0) + col("sepal_width")])
573
.logical_plan;
574
575
let mut expr_arena = Arena::new();
576
let mut lp_arena = Arena::new();
577
578
#[allow(const_item_mutation)]
579
let lp_top = to_alp(
580
plan,
581
&mut expr_arena,
582
&mut lp_arena,
583
&mut OptFlags::SIMPLIFY_EXPR,
584
)
585
.unwrap();
586
587
assert!(matches!(
588
lp_arena.get(lp_top),
589
IR::Select { expr, .. } if matches!(expr_arena.get(expr[0].node()), AExpr::BinaryExpr{ left, ..} if matches!(expr_arena.get(*left), &AExpr::Literal(LiteralValue::Dyn(DynLiteralValue::Float(2.0)))))
590
));
591
}
592
593
#[test]
594
fn test_lazy_wildcard() {
595
let df = load_df();
596
let new = df.clone().lazy().select([col("*")]).collect().unwrap();
597
assert_eq!(new.shape(), (5, 3));
598
599
let new = df
600
.lazy()
601
.group_by([col("b")])
602
.agg([
603
col("*").sum().name().suffix(""),
604
col("*").first().name().suffix("_first"),
605
])
606
.collect()
607
.unwrap();
608
assert_eq!(new.shape(), (3, 5)); // Should exclude b from wildcard aggregations.
609
}
610
611
#[test]
612
fn test_lazy_reverse() {
613
let df = load_df();
614
assert!(
615
df.clone()
616
.lazy()
617
.reverse()
618
.collect()
619
.unwrap()
620
.equals_missing(&df.reverse())
621
)
622
}
623
624
#[test]
625
fn test_lazy_fill_null() {
626
let df = df! {
627
"a" => &[None, Some(2.0)],
628
"b" => &[Some(1.0), None]
629
}
630
.unwrap();
631
let out = df.lazy().fill_null(lit(10.0)).collect().unwrap();
632
let correct = df! {
633
"a" => &[Some(10.0), Some(2.0)],
634
"b" => &[Some(1.0), Some(10.0)]
635
}
636
.unwrap();
637
assert!(out.equals(&correct));
638
assert_eq!(out.get_column_names(), vec!["a", "b"])
639
}
640
641
#[test]
642
fn test_lazy_double_projection() {
643
let df = df! {
644
"foo" => &[1, 2, 3]
645
}
646
.unwrap();
647
df.lazy()
648
.select([col("foo").alias("bar")])
649
.select([col("bar")])
650
.collect()
651
.unwrap();
652
}
653
654
#[test]
655
fn test_type_coercion() {
656
let df = df! {
657
"foo" => &[1, 2, 3],
658
"bar" => &[1.0, 2.0, 3.0]
659
}
660
.unwrap();
661
662
let lp = df.lazy().select([col("foo") * col("bar")]).logical_plan;
663
664
let mut expr_arena = Arena::new();
665
let mut lp_arena = Arena::new();
666
let lp_top = to_alp(lp, &mut expr_arena, &mut lp_arena, &mut OptFlags::default()).unwrap();
667
668
if let IR::Select { expr, .. } = lp_arena.get(lp_top) {
669
if let AExpr::BinaryExpr { left, right, .. } = expr_arena.get(expr[0].node()) {
670
assert!(matches!(expr_arena.get(*left), AExpr::Cast { .. }));
671
// bar is already float, does not have to be coerced
672
assert!(matches!(expr_arena.get(*right), AExpr::Column { .. }));
673
} else {
674
panic!()
675
}
676
};
677
}
678
679
#[test]
680
#[cfg(feature = "csv")]
681
fn test_lazy_partition_agg() {
682
let df = df! {
683
"foo" => &[1, 1, 2, 2, 3],
684
"bar" => &[1.0, 1.0, 2.0, 2.0, 3.0]
685
}
686
.unwrap();
687
688
let out = df
689
.lazy()
690
.group_by([col("foo")])
691
.agg([col("bar").mean()])
692
.sort(["foo"], Default::default())
693
.collect()
694
.unwrap();
695
696
assert_eq!(
697
Vec::from(out.column("bar").unwrap().f64().unwrap()),
698
&[Some(1.0), Some(2.0), Some(3.0)]
699
);
700
701
let out = scan_foods_csv()
702
.group_by([col("category")])
703
.agg([col("calories")])
704
.sort(["category"], Default::default())
705
.collect()
706
.unwrap();
707
let cat_agg_list = out.select_at_idx(1).unwrap();
708
let fruit_series = cat_agg_list.list().unwrap().get_as_series(0).unwrap();
709
let fruit_list = fruit_series.i64().unwrap();
710
assert_eq!(
711
Vec::from(fruit_list),
712
&[
713
Some(60),
714
Some(30),
715
Some(50),
716
Some(30),
717
Some(60),
718
Some(130),
719
Some(50),
720
]
721
)
722
}
723
724
#[test]
725
fn test_lazy_group_by_apply() {
726
let df = fruits_cars();
727
728
df.lazy()
729
.group_by([col("fruits")])
730
.agg([col("cars").apply(
731
|s: Column| Ok(Column::new("".into(), &[s.len() as u32])),
732
|_, f| Ok(Field::new(f.name().clone(), DataType::UInt32)),
733
)])
734
.collect()
735
.unwrap();
736
}
737
738
#[test]
739
fn test_lazy_shift_and_fill() {
740
let df = df! {
741
"A" => &[1, 2, 3, 4, 5],
742
"B" => &[5, 4, 3, 2, 1]
743
}
744
.unwrap();
745
let out = df
746
.clone()
747
.lazy()
748
.with_column(col("A").shift_and_fill(lit(2), col("B").mean()))
749
.collect()
750
.unwrap();
751
assert_eq!(out.column("A").unwrap().null_count(), 0);
752
753
// shift from the other side
754
let out = df
755
.clone()
756
.lazy()
757
.with_column(col("A").shift_and_fill(lit(-2), col("B").mean()))
758
.collect()
759
.unwrap();
760
assert_eq!(out.column("A").unwrap().null_count(), 0);
761
762
let out = df
763
.lazy()
764
.shift_and_fill(lit(-1), col("B").std(1))
765
.collect()
766
.unwrap();
767
assert_eq!(out.column("A").unwrap().null_count(), 0);
768
}
769
770
#[test]
771
fn test_lazy_group_by() {
772
let df = df! {
773
"a" => &[Some(1.0), None, Some(3.0), Some(4.0), Some(5.0)],
774
"groups" => &["a", "a", "b", "c", "c"]
775
}
776
.unwrap();
777
778
let out = df
779
.lazy()
780
.group_by([col("groups")])
781
.agg([col("a").mean()])
782
.sort(["a"], Default::default())
783
.collect()
784
.unwrap();
785
786
assert_eq!(out.column("a").unwrap().f64().unwrap().get(0), Some(1.0));
787
}
788
789
#[test]
790
fn test_lazy_tail() {
791
let df = df! {
792
"A" => &[1, 2, 3, 4, 5],
793
"B" => &[5, 4, 3, 2, 1]
794
}
795
.unwrap();
796
797
let _out = df.lazy().tail(3).collect().unwrap();
798
}
799
800
#[test]
801
fn test_lazy_group_by_sort() {
802
let df = df! {
803
"a" => ["a", "b", "a", "b", "b", "c"],
804
"b" => [1, 2, 3, 4, 5, 6]
805
}
806
.unwrap();
807
808
let out = df
809
.clone()
810
.lazy()
811
.group_by([col("a")])
812
.agg([col("b").sort(Default::default()).first()])
813
.collect()
814
.unwrap()
815
.sort(["a"], Default::default())
816
.unwrap();
817
818
assert_eq!(
819
Vec::from(out.column("b").unwrap().i32().unwrap()),
820
[Some(1), Some(2), Some(6)]
821
);
822
823
let out = df
824
.lazy()
825
.group_by([col("a")])
826
.agg([col("b").sort(Default::default()).last()])
827
.collect()
828
.unwrap()
829
.sort(["a"], Default::default())
830
.unwrap();
831
832
assert_eq!(
833
Vec::from(out.column("b").unwrap().i32().unwrap()),
834
[Some(3), Some(5), Some(6)]
835
);
836
}
837
838
#[test]
839
fn test_lazy_group_by_sort_by() {
840
let df = df! {
841
"a" => ["a", "a", "a", "b", "b", "c"],
842
"b" => [1, 2, 3, 4, 5, 6],
843
"c" => [6, 1, 4, 3, 2, 1]
844
}
845
.unwrap();
846
847
let out = df
848
.lazy()
849
.group_by([col("a")])
850
.agg([col("b")
851
.sort_by(
852
[col("c")],
853
SortMultipleOptions::default().with_order_descending(true),
854
)
855
.first()])
856
.collect()
857
.unwrap()
858
.sort(["a"], Default::default())
859
.unwrap();
860
861
assert_eq!(
862
Vec::from(out.column("b").unwrap().i32().unwrap()),
863
[Some(1), Some(4), Some(6)]
864
);
865
}
866
867
#[test]
868
#[cfg(feature = "dtype-datetime")]
869
fn test_lazy_group_by_cast() {
870
let df = df! {
871
"a" => ["a", "a", "a", "b", "b", "c"],
872
"b" => [1, 2, 3, 4, 5, 6]
873
}
874
.unwrap();
875
876
// test if it runs in group_by context
877
let _out = df
878
.lazy()
879
.group_by([col("a")])
880
.agg([col("b")
881
.mean()
882
.cast(DataType::Datetime(TimeUnit::Nanoseconds, None))])
883
.collect()
884
.unwrap();
885
}
886
887
#[test]
888
fn test_lazy_group_by_binary_expr() {
889
let df = df! {
890
"a" => ["a", "a", "a", "b", "b", "c"],
891
"b" => [1, 2, 3, 4, 5, 6]
892
}
893
.unwrap();
894
895
// test if it runs in group_by context
896
let out = df
897
.lazy()
898
.group_by([col("a")])
899
.agg([col("b").mean() * lit(2)])
900
.sort(["a"], Default::default())
901
.collect()
902
.unwrap();
903
assert_eq!(
904
Vec::from(out.column("b").unwrap().f64().unwrap()),
905
[Some(4.0), Some(9.0), Some(12.0)]
906
);
907
}
908
909
#[test]
910
fn test_lazy_group_by_filter() -> PolarsResult<()> {
911
let df = df! {
912
"a" => ["a", "a", "a", "b", "b", "c"],
913
"b" => [1, 2, 3, 4, 5, 6]
914
}?;
915
916
// We test if the filters work in the group_by context
917
// and that the aggregations can deal with empty sets
918
919
let out = df
920
.lazy()
921
.group_by([col("a")])
922
.agg([
923
col("b").filter(col("a").eq(lit("a"))).sum().alias("b_sum"),
924
col("b")
925
.filter(col("a").eq(lit("a")))
926
.first()
927
.alias("b_first"),
928
col("b")
929
.filter(col("a").eq(lit("e")))
930
.mean()
931
.alias("b_mean"),
932
col("b")
933
.filter(col("a").eq(lit("a")))
934
.last()
935
.alias("b_last"),
936
])
937
.sort(["a"], SortMultipleOptions::default())
938
.collect()?;
939
940
assert_eq!(
941
Vec::from(out.column("b_sum").unwrap().i32().unwrap()),
942
[Some(6), Some(0), Some(0)]
943
);
944
assert_eq!(
945
Vec::from(out.column("b_first").unwrap().i32().unwrap()),
946
[Some(1), None, None]
947
);
948
assert_eq!(
949
Vec::from(out.column("b_mean").unwrap().f64().unwrap()),
950
[None, None, None]
951
);
952
assert_eq!(
953
Vec::from(out.column("b_last").unwrap().i32().unwrap()),
954
[Some(3), None, None]
955
);
956
957
Ok(())
958
}
959
960
#[test]
961
fn test_group_by_projection_pd_same_column() -> PolarsResult<()> {
962
// this query failed when projection pushdown was enabled
963
964
let a = || {
965
let df = df![
966
"col1" => ["a", "ab", "abc"],
967
"col2" => [1, 2, 3]
968
]
969
.unwrap();
970
971
df.lazy()
972
.select([col("col1").alias("foo"), col("col2").alias("bar")])
973
};
974
975
let out = a()
976
.left_join(a(), col("foo"), col("foo"))
977
.select([col("bar")])
978
.collect()?;
979
980
let a = out.column("bar")?.i32()?;
981
assert_eq!(Vec::from(a), &[Some(1), Some(2), Some(3)]);
982
983
Ok(())
984
}
985
986
#[test]
987
fn test_group_by_sort_slice() -> PolarsResult<()> {
988
let df = df![
989
"groups" => [1, 2, 2, 3, 3, 3],
990
"vals" => [1, 5, 6, 3, 9, 8]
991
]?;
992
// get largest two values per groups
993
994
// expected:
995
// group values
996
// 1 1
997
// 2 6, 5
998
// 3 9, 8
999
1000
let out1 = df
1001
.clone()
1002
.lazy()
1003
.sort(
1004
["vals"],
1005
SortMultipleOptions::default().with_order_descending(true),
1006
)
1007
.group_by([col("groups")])
1008
.agg([col("vals").head(Some(2)).alias("foo")])
1009
.sort(["groups"], Default::default())
1010
.collect()?;
1011
1012
let out2 = df
1013
.lazy()
1014
.group_by([col("groups")])
1015
.agg([col("vals")
1016
.sort(SortOptions::default().with_order_descending(true))
1017
.head(Some(2))
1018
.alias("foo")])
1019
.sort(["groups"], Default::default())
1020
.collect()?;
1021
1022
assert!(out1.column("foo")?.equals(out2.column("foo")?));
1023
Ok(())
1024
}
1025
1026
#[test]
1027
#[cfg(feature = "cum_agg")]
1028
fn test_group_by_cum_sum() -> PolarsResult<()> {
1029
let df = df![
1030
"groups" => [1, 2, 2, 3, 3, 3],
1031
"vals" => [1, 5, 6, 3, 9, 8]
1032
]?;
1033
1034
let out = df
1035
.lazy()
1036
.group_by([col("groups")])
1037
.agg([col("vals").cum_sum(false)])
1038
.sort(["groups"], Default::default())
1039
.collect()?;
1040
1041
assert_eq!(
1042
Vec::from(out.column("vals")?.explode(false)?.i32()?),
1043
[1, 5, 11, 3, 12, 20]
1044
.iter()
1045
.copied()
1046
.map(Some)
1047
.collect::<Vec<_>>()
1048
);
1049
1050
Ok(())
1051
}
1052
1053
#[test]
1054
#[cfg(feature = "range")]
1055
fn test_arg_sort_multiple() -> PolarsResult<()> {
1056
let df = df![
1057
"int" => [1, 2, 3, 1, 2],
1058
"flt" => [3.0, 2.0, 1.0, 2.0, 1.0],
1059
"str" => ["a", "a", "a", "b", "b"]
1060
]?;
1061
1062
let out = df
1063
.clone()
1064
.lazy()
1065
.select([arg_sort_by(
1066
[col("int"), col("flt")],
1067
SortMultipleOptions::default().with_order_descending_multi([true, false]),
1068
)])
1069
.collect()?;
1070
1071
assert_eq!(
1072
Vec::from(out.column("int")?.idx()?),
1073
[2, 4, 1, 3, 0]
1074
.iter()
1075
.copied()
1076
.map(Some)
1077
.collect::<Vec<_>>()
1078
);
1079
1080
// check if this runs
1081
let _out = df
1082
.lazy()
1083
.select([arg_sort_by(
1084
[col("str"), col("flt")],
1085
SortMultipleOptions::default().with_order_descending_multi([true, false]),
1086
)])
1087
.collect()?;
1088
Ok(())
1089
}
1090
1091
#[test]
1092
fn test_multiple_explode() -> PolarsResult<()> {
1093
let df = df![
1094
"a" => [0, 1, 2, 0, 2],
1095
"b" => [5, 4, 3, 2, 1],
1096
"c" => [2, 3, 4, 1, 5]
1097
]?;
1098
1099
let out = df
1100
.lazy()
1101
.group_by([col("a")])
1102
.agg([col("b").alias("b_list"), col("c").alias("c_list")])
1103
.explode(by_name(["c_list", "b_list"], true))
1104
.collect()?;
1105
assert_eq!(out.shape(), (5, 3));
1106
1107
Ok(())
1108
}
1109
1110
#[test]
1111
fn test_filter_and_alias() -> PolarsResult<()> {
1112
let df = df![
1113
"a" => [0, 1, 2, 0, 2]
1114
]?;
1115
1116
let out = df
1117
.lazy()
1118
.with_column(col("a").pow(2.0).alias("a_squared"))
1119
.filter(col("a_squared").gt(lit(1)).and(col("a").gt(lit(1))))
1120
.collect()?;
1121
1122
let expected = df![
1123
"a" => [2, 2],
1124
"a_squared" => [4.0, 4.0]
1125
]?;
1126
assert!(out.equals(&expected));
1127
Ok(())
1128
}
1129
1130
#[test]
1131
fn test_filter_lit() {
1132
// see https://github.com/pola-rs/polars/issues/790
1133
// failed due to broadcasting filters and splitting threads.
1134
let iter = (0..100).map(|i| ('A'..='Z').nth(i % 26).unwrap().to_string());
1135
let a = Series::from_iter(iter).into_column();
1136
let df = DataFrame::new([a].into()).unwrap();
1137
1138
let out = df.lazy().filter(lit(true)).collect().unwrap();
1139
assert_eq!(out.shape(), (100, 1));
1140
}
1141
1142
#[test]
1143
fn test_ternary_null() -> PolarsResult<()> {
1144
let df = df![
1145
"a" => ["a", "b", "c"]
1146
]?;
1147
1148
let out = df
1149
.lazy()
1150
.select([when(col("a").eq(lit("c")))
1151
.then(Null {}.lit())
1152
.otherwise(col("a"))
1153
.alias("foo")])
1154
.collect()?;
1155
1156
assert_eq!(
1157
out.column("foo")?.is_null().into_iter().collect::<Vec<_>>(),
1158
&[Some(false), Some(false), Some(true)]
1159
);
1160
Ok(())
1161
}
1162
1163
#[test]
1164
fn test_fill_forward() -> PolarsResult<()> {
1165
let df = df![
1166
"a" => ["a", "b", "a"],
1167
"b" => [Some(1), None, None]
1168
]?;
1169
1170
let out = df
1171
.lazy()
1172
.select([col("b")
1173
.fill_null_with_strategy(FillNullStrategy::Forward(FillNullLimit::None))
1174
.over_with_options(Some([col("a")]), None, WindowMapping::Join)?])
1175
.collect()?;
1176
let agg = out.column("b")?.list()?;
1177
1178
let a: Series = agg.get_as_series(0).unwrap();
1179
assert!(a.equals(&Series::new("b".into(), &[1, 1])));
1180
let a: Series = agg.get_as_series(2).unwrap();
1181
assert!(a.equals(&Series::new("b".into(), &[1, 1])));
1182
let a: Series = agg.get_as_series(1).unwrap();
1183
assert_eq!(a.null_count(), 1);
1184
Ok(())
1185
}
1186
1187
#[cfg(feature = "cross_join")]
1188
#[test]
1189
fn test_cross_join() -> PolarsResult<()> {
1190
let df1 = df![
1191
"a" => ["a", "b", "a"],
1192
"b" => [Some(1), None, None]
1193
]?;
1194
1195
let df2 = df![
1196
"a" => [1, 2],
1197
"b" => [None, Some(12)]
1198
]?;
1199
1200
let out = df1.lazy().cross_join(df2.lazy(), None).collect()?;
1201
assert_eq!(out.shape(), (6, 4));
1202
Ok(())
1203
}
1204
1205
#[test]
1206
fn test_select_empty_df() -> PolarsResult<()> {
1207
// https://github.com/pola-rs/polars/issues/1056
1208
let df1 = df![
1209
"a" => [1, 2, 3],
1210
"b" => [1, 2, 3]
1211
]?;
1212
1213
let out = df1
1214
.lazy()
1215
.filter(col("a").eq(lit(0))) // this will lead to an empty frame
1216
.select([col("a"), lit(1).alias("c")])
1217
.collect()?;
1218
1219
assert_eq!(out.column("a")?.len(), 0);
1220
assert_eq!(out.column("c")?.len(), 0);
1221
1222
Ok(())
1223
}
1224
1225
#[test]
1226
fn test_keep_name() -> PolarsResult<()> {
1227
let df = df![
1228
"a" => [1, 2, 3],
1229
"b" => [1, 2, 3]
1230
]?;
1231
1232
let out = df
1233
.lazy()
1234
.select([
1235
col("a").alias("bar").name().keep(),
1236
col("b").alias("bar").name().keep(),
1237
])
1238
.collect()?;
1239
1240
assert_eq!(out.get_column_names(), &["a", "b"]);
1241
Ok(())
1242
}
1243
1244
#[test]
1245
fn test_exclude() -> PolarsResult<()> {
1246
let df = df![
1247
"a" => [1, 2, 3],
1248
"b" => [1, 2, 3],
1249
"c" => [1, 2, 3]
1250
]?;
1251
1252
let out = df
1253
.lazy()
1254
.select([all().exclude_cols(["b"]).as_expr()])
1255
.collect()?;
1256
1257
assert_eq!(out.get_column_names(), &["a", "c"]);
1258
Ok(())
1259
}
1260
1261
#[test]
1262
#[cfg(feature = "regex")]
1263
fn test_regex_selection() -> PolarsResult<()> {
1264
let df = df![
1265
"anton" => [1, 2, 3],
1266
"arnold schwars" => [1, 2, 3],
1267
"annie" => [1, 2, 3]
1268
]?;
1269
1270
let out = df.lazy().select([col("^a.*o.*$")]).collect()?;
1271
1272
assert_eq!(out.get_column_names(), &["anton", "arnold schwars"]);
1273
Ok(())
1274
}
1275
1276
#[test]
1277
fn test_sort_by() -> PolarsResult<()> {
1278
let df = df![
1279
"a" => [1, 2, 3, 4, 5],
1280
"b" => [1, 1, 1, 2, 2],
1281
"c" => [2, 3, 1, 2, 1]
1282
]?;
1283
1284
// evaluate
1285
let out = df
1286
.clone()
1287
.lazy()
1288
.select([col("a").sort_by([col("b"), col("c")], SortMultipleOptions::default())])
1289
.collect()?;
1290
1291
let a = out.column("a")?;
1292
assert_eq!(
1293
Vec::from(a.i32().unwrap()),
1294
&[Some(3), Some(1), Some(2), Some(5), Some(4)]
1295
);
1296
1297
// aggregate
1298
let out = df
1299
.clone()
1300
.lazy()
1301
.group_by_stable([col("b")])
1302
.agg([col("a").sort_by([col("b"), col("c")], SortMultipleOptions::default())])
1303
.collect()?;
1304
let a = out.column("a")?.explode(false)?;
1305
assert_eq!(
1306
Vec::from(a.i32().unwrap()),
1307
&[Some(3), Some(1), Some(2), Some(5), Some(4)]
1308
);
1309
1310
// evaluate_on_groups
1311
let out = df
1312
.lazy()
1313
.group_by_stable([col("b")])
1314
.agg([col("a").sort_by([col("b"), col("c")], SortMultipleOptions::default())])
1315
.collect()?;
1316
1317
let a = out.column("a")?.explode(false)?;
1318
assert_eq!(
1319
Vec::from(a.i32().unwrap()),
1320
&[Some(3), Some(1), Some(2), Some(5), Some(4)]
1321
);
1322
1323
Ok(())
1324
}
1325
1326
#[test]
1327
fn test_filter_after_shift_in_groups() -> PolarsResult<()> {
1328
let df = fruits_cars();
1329
1330
let out = df
1331
.lazy()
1332
.select([
1333
col("fruits"),
1334
col("B")
1335
.shift(lit(1))
1336
.filter(col("B").shift(lit(1)).gt(lit(4)))
1337
.over_with_options(Some([col("fruits")]), None, WindowMapping::Join)?
1338
.alias("filtered"),
1339
])
1340
.collect()?;
1341
1342
assert_eq!(
1343
out.column("filtered")?
1344
.list()?
1345
.get_as_series(0)
1346
.unwrap()
1347
.i32()?
1348
.get(0)
1349
.unwrap(),
1350
5
1351
);
1352
assert_eq!(
1353
out.column("filtered")?
1354
.list()?
1355
.get_as_series(1)
1356
.unwrap()
1357
.i32()?
1358
.get(0)
1359
.unwrap(),
1360
5
1361
);
1362
assert_eq!(
1363
out.column("filtered")?
1364
.list()?
1365
.get_as_series(2)
1366
.unwrap()
1367
.len(),
1368
0
1369
);
1370
1371
Ok(())
1372
}
1373
1374
#[test]
1375
fn test_lazy_ternary_predicate_pushdown() -> PolarsResult<()> {
1376
let df = df![
1377
"a" => &[10, 1, 2, 3]
1378
]?;
1379
1380
let out = df
1381
.lazy()
1382
.select([when(col("a").eq(lit(10)))
1383
.then(Null {}.lit())
1384
.otherwise(col("a"))])
1385
.drop_nulls(None)
1386
.collect()?;
1387
1388
assert_eq!(
1389
Vec::from(out.get_columns()[0].i32()?),
1390
&[Some(1), Some(2), Some(3)]
1391
);
1392
1393
Ok(())
1394
}
1395
1396
#[test]
1397
#[cfg(feature = "dtype-categorical")]
1398
fn test_categorical_addition() -> PolarsResult<()> {
1399
let df = fruits_cars();
1400
1401
// test if we can do that arithmetic operation with String and Categorical
1402
let out = df
1403
.lazy()
1404
.select([
1405
col("fruits").cast(DataType::from_categories(Categories::global())),
1406
col("cars").cast(DataType::from_categories(Categories::global())),
1407
])
1408
.select([(col("fruits") + lit(" ") + col("cars")).alias("foo")])
1409
.collect()?;
1410
1411
assert_eq!(out.column("foo")?.str()?.get(0).unwrap(), "banana beetle");
1412
1413
Ok(())
1414
}
1415
1416
#[test]
1417
fn test_error_duplicate_names() {
1418
let df = fruits_cars();
1419
assert!(df.lazy().select([col("*"), col("*")]).collect().is_err());
1420
}
1421
1422
#[test]
1423
fn test_filter_count() -> PolarsResult<()> {
1424
let df = fruits_cars();
1425
let out = df
1426
.lazy()
1427
.select([col("fruits")
1428
.filter(col("fruits").eq(lit("banana")))
1429
.count()])
1430
.collect()?;
1431
assert_eq!(out.column("fruits")?.idx()?.get(0), Some(3));
1432
Ok(())
1433
}
1434
1435
#[test]
1436
#[cfg(feature = "dtype-i16")]
1437
fn test_group_by_small_ints() -> PolarsResult<()> {
1438
let df = df![
1439
"id_32" => [1i32, 2],
1440
"id_16" => [1i16, 2]
1441
]?;
1442
1443
// https://github.com/pola-rs/polars/issues/1255
1444
let out = df
1445
.lazy()
1446
.group_by([col("id_16"), col("id_32")])
1447
.agg([col("id_16").sum().alias("foo")])
1448
.sort(
1449
["foo"],
1450
SortMultipleOptions::default().with_order_descending(true),
1451
)
1452
.collect()?;
1453
1454
assert_eq!(Vec::from(out.column("foo")?.i64()?), &[Some(2), Some(1)]);
1455
Ok(())
1456
}
1457
1458
#[test]
1459
fn test_when_then_schema() -> PolarsResult<()> {
1460
let df = fruits_cars();
1461
1462
let schema = df
1463
.lazy()
1464
.select([when(col("A").gt(lit(1)))
1465
.then(Null {}.lit())
1466
.otherwise(col("A"))])
1467
.collect_schema();
1468
assert_ne!(schema?.get_at_index(0).unwrap().1, &DataType::Null);
1469
1470
Ok(())
1471
}
1472
1473
#[test]
1474
fn test_singleton_broadcast() -> PolarsResult<()> {
1475
let df = fruits_cars();
1476
let out = df
1477
.lazy()
1478
.select([col("fruits"), lit(1).alias("foo")])
1479
.collect()?;
1480
1481
assert!(out.column("foo")?.len() > 1);
1482
Ok(())
1483
}
1484
1485
#[test]
1486
fn test_list_in_select_context() -> PolarsResult<()> {
1487
let s = Column::new("a".into(), &[1, 2, 3]);
1488
let mut builder = get_list_builder(s.dtype(), s.len(), 1, s.name().clone());
1489
builder.append_series(s.as_materialized_series()).unwrap();
1490
let expected = builder.finish().into_column();
1491
1492
let df = DataFrame::new(vec![s])?;
1493
1494
let out = df.lazy().select([col("a").implode()]).collect()?;
1495
1496
let s = out.column("a")?;
1497
assert!(s.equals(&expected));
1498
1499
Ok(())
1500
}
1501
1502
#[test]
1503
#[cfg(feature = "round_series")]
1504
fn test_round_after_agg() -> PolarsResult<()> {
1505
let df = fruits_cars();
1506
1507
let out = df
1508
.lazy()
1509
.group_by([col("fruits")])
1510
.agg([col("A")
1511
.cast(DataType::Float32)
1512
.mean()
1513
.round(2, polars_ops::series::RoundMode::default())
1514
.alias("foo")])
1515
.collect()?;
1516
1517
assert!(out.column("foo")?.f32().is_ok());
1518
1519
let df = df![
1520
"groups" => ["pigeon",
1521
"rabbit",
1522
"rabbit",
1523
"Chris",
1524
"pigeon",
1525
"fast",
1526
"fast",
1527
"pigeon",
1528
"rabbit",
1529
"Chris"],
1530
"b" => [5409, 4848, 4864, 3540, 8103, 3083, 8575, 9963, 8809, 5425],
1531
"c" => [0.4517241160719615,
1532
0.2551467646274673,
1533
0.8682045191407308,
1534
0.9925316385786037,
1535
0.5392027792928116,
1536
0.7633847828107002,
1537
0.7967295231651537,
1538
0.01444779067224733,
1539
0.23807484087472652,
1540
0.10985868798350984]
1541
]?;
1542
1543
let out = df
1544
.lazy()
1545
.group_by_stable([col("groups")])
1546
.agg([((col("b") * col("c")).sum() / col("b").sum())
1547
.round(2, polars_ops::series::RoundMode::default())
1548
.alias("foo")])
1549
.collect()?;
1550
1551
let out = out.column("foo")?;
1552
let out = out.f64()?;
1553
1554
assert_eq!(
1555
Vec::from(out),
1556
&[Some(0.3), Some(0.41), Some(0.46), Some(0.79)]
1557
);
1558
1559
Ok(())
1560
}
1561
1562
#[test]
1563
#[cfg(feature = "dtype-date")]
1564
fn test_fill_nan() -> PolarsResult<()> {
1565
let s0 = Column::new("date".into(), &[1, 2, 3]).cast(&DataType::Date)?;
1566
let s1 = Column::new("float".into(), &[Some(1.0), Some(f32::NAN), Some(3.0)]);
1567
1568
let df = DataFrame::new(vec![s0, s1])?;
1569
let out = df.lazy().fill_nan(Null {}.lit()).collect()?;
1570
let out = out.column("float")?;
1571
assert_eq!(Vec::from(out.f32()?), &[Some(1.0), None, Some(3.0)]);
1572
1573
Ok(())
1574
}
1575
1576
#[test]
1577
#[cfg(feature = "regex")]
1578
fn test_exclude_regex() -> PolarsResult<()> {
1579
let df = fruits_cars();
1580
let out = df
1581
.lazy()
1582
.select([(all() - Selector::Matches("^(fruits|cars)$".into())).as_expr()])
1583
.collect()?;
1584
1585
assert_eq!(out.get_column_names(), &["A", "B"]);
1586
Ok(())
1587
}
1588
1589
#[test]
1590
#[cfg(feature = "rank")]
1591
fn test_group_by_rank() -> PolarsResult<()> {
1592
let df = fruits_cars();
1593
let out = df
1594
.lazy()
1595
.group_by_stable([col("cars")])
1596
.agg([col("B").rank(
1597
RankOptions {
1598
method: RankMethod::Dense,
1599
..Default::default()
1600
},
1601
None,
1602
)])
1603
.collect()?;
1604
1605
let out = out.column("B")?;
1606
let out = out.list()?.get_as_series(1).unwrap();
1607
let out = out.idx()?;
1608
1609
assert_eq!(Vec::from(out), &[Some(1)]);
1610
Ok(())
1611
}
1612
1613
#[test]
1614
pub fn test_select_by_dtypes() -> PolarsResult<()> {
1615
let df = df![
1616
"bools" => [true, false, true],
1617
"ints" => [1, 2, 3],
1618
"strings" => ["a", "b", "c"],
1619
"floats" => [1.0, 2.0, 3.0f32]
1620
]?;
1621
let out = df
1622
.lazy()
1623
.select([dtype_cols([DataType::Float32, DataType::String])
1624
.as_selector()
1625
.as_expr()])
1626
.collect()?;
1627
assert_eq!(out.dtypes(), &[DataType::String, DataType::Float32]);
1628
1629
Ok(())
1630
}
1631
1632
#[test]
1633
fn test_binary_expr() -> PolarsResult<()> {
1634
// test panic in schema names
1635
let df = fruits_cars();
1636
let _ = df.lazy().select([col("A").neq(lit(1))]).collect()?;
1637
1638
// test type coercion
1639
// https://github.com/pola-rs/polars/issues/1649
1640
let df = df!(
1641
"nrs"=> [Some(1i64), Some(2), Some(3), None, Some(5)],
1642
"random"=> [0.1f64, 0.6, 0.2, 0.6, 0.3]
1643
)?;
1644
1645
let other = when(col("random").gt(lit(0.5)))
1646
.then(lit(2))
1647
.otherwise(col("random"))
1648
.alias("other");
1649
let out = df.lazy().select([other * col("nrs").sum()]).collect()?;
1650
assert_eq!(out.dtypes(), &[DataType::Float64]);
1651
Ok(())
1652
}
1653
1654
#[test]
1655
fn test_single_group_result() -> PolarsResult<()> {
1656
// the arg_sort should not auto explode
1657
let df = df![
1658
"a" => [1, 2],
1659
"b" => [1, 1]
1660
]?;
1661
1662
let out = df
1663
.lazy()
1664
.select([col("a").arg_sort(false, false).over([col("a")])])
1665
.collect()?;
1666
1667
let a = out.column("a")?.idx()?;
1668
assert_eq!(Vec::from(a), &[Some(0), Some(0)]);
1669
1670
Ok(())
1671
}
1672
1673
#[test]
1674
#[cfg(feature = "rank")]
1675
fn test_single_ranked_group() -> PolarsResult<()> {
1676
// tests type consistency of rank algorithm
1677
let df = df!["group" => [1, 2, 2],
1678
"value"=> [100, 50, 10]
1679
]?;
1680
1681
let out = df
1682
.lazy()
1683
.with_columns([col("value")
1684
.rank(
1685
RankOptions {
1686
method: RankMethod::Average,
1687
..Default::default()
1688
},
1689
None,
1690
)
1691
.over_with_options(Some([col("group")]), None, WindowMapping::Join)?])
1692
.collect()?;
1693
1694
let out = out.column("value")?.explode(false)?;
1695
let out = out.f64()?;
1696
assert_eq!(
1697
Vec::from(out),
1698
&[Some(1.0), Some(2.0), Some(1.0), Some(2.0), Some(1.0)]
1699
);
1700
1701
Ok(())
1702
}
1703
1704
#[test]
1705
#[cfg(feature = "diff")]
1706
fn empty_df() -> PolarsResult<()> {
1707
let df = fruits_cars();
1708
let df = df.filter(&BooleanChunked::full("".into(), false, df.height()))?;
1709
1710
df.lazy()
1711
.select([
1712
col("A").shift(lit(1)).alias("1"),
1713
col("A").shift_and_fill(lit(1), lit(1)).alias("2"),
1714
col("A").shift_and_fill(lit(-1), lit(1)).alias("3"),
1715
col("A").fill_null(lit(1)).alias("4"),
1716
col("A").cum_count(false).alias("5"),
1717
col("A").diff(lit(1), NullBehavior::Ignore).alias("6"),
1718
col("A").cum_max(false).alias("7"),
1719
col("A").cum_min(false).alias("8"),
1720
])
1721
.collect()?;
1722
1723
Ok(())
1724
}
1725
1726
#[test]
1727
#[cfg(feature = "abs")]
1728
fn test_apply_flatten() -> PolarsResult<()> {
1729
let df = df![
1730
"A"=> [1.1435, 2.223456, 3.44732, -1.5234, -2.1238, -3.2923],
1731
"B"=> ["a", "b", "a", "b", "a", "b"]
1732
]?;
1733
1734
let out = df
1735
.lazy()
1736
.group_by_stable([col("B")])
1737
.agg([col("A").abs().sum().alias("A_sum")])
1738
.collect()?;
1739
1740
let out = out.column("A_sum")?;
1741
assert_eq!(out.get(0)?, AnyValue::Float64(6.71462));
1742
assert_eq!(out.get(1)?, AnyValue::Float64(7.039156));
1743
1744
Ok(())
1745
}
1746
1747
#[test]
1748
#[cfg(feature = "is_in")]
1749
fn test_is_in() -> PolarsResult<()> {
1750
let df = fruits_cars();
1751
1752
// // this will be executed by apply
1753
let out = df
1754
.clone()
1755
.lazy()
1756
.group_by_stable([col("fruits")])
1757
.agg([col("cars").is_in(
1758
col("cars").filter(col("cars").eq(lit("beetle"))).implode(),
1759
false,
1760
)])
1761
.collect()?;
1762
let out = out.column("cars").unwrap();
1763
let out = out.explode(false)?;
1764
let out = out.bool().unwrap();
1765
assert_eq!(
1766
Vec::from(out),
1767
&[Some(true), Some(false), Some(true), Some(true), Some(true)]
1768
);
1769
1770
// this will be executed by map
1771
let out = df
1772
.lazy()
1773
.group_by_stable([col("fruits")])
1774
.agg([col("cars").is_in(
1775
lit(Series::new("a".into(), ["beetle", "vw"])).implode(),
1776
false,
1777
)])
1778
.collect()?;
1779
1780
let out = out.column("cars").unwrap();
1781
let out = out.explode(false)?;
1782
let out = out.bool().unwrap();
1783
assert_eq!(
1784
Vec::from(out),
1785
&[Some(true), Some(false), Some(true), Some(true), Some(true)]
1786
);
1787
1788
Ok(())
1789
}
1790
1791
#[test]
1792
fn test_partitioned_gb_1() -> PolarsResult<()> {
1793
// don't move these to integration tests
1794
// keep these dtypes
1795
let out = df![
1796
"keys" => [1, 1, 1, 1, 2],
1797
"vals" => ["a", "b", "c", "a", "a"]
1798
]?
1799
.lazy()
1800
.group_by([col("keys")])
1801
.agg([
1802
(col("vals").eq(lit("a"))).sum().alias("eq_a"),
1803
(col("vals").eq(lit("b"))).sum().alias("eq_b"),
1804
])
1805
.sort(["keys"], Default::default())
1806
.collect()?;
1807
1808
assert!(out.equals(&df![
1809
"keys" => [1, 2],
1810
"eq_a" => [2 as IdxSize, 1],
1811
"eq_b" => [1 as IdxSize, 0],
1812
]?));
1813
1814
Ok(())
1815
}
1816
1817
#[test]
1818
fn test_partitioned_gb_count() -> PolarsResult<()> {
1819
// don't move these to integration tests
1820
let out = df![
1821
"col" => (0..100).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
1822
]?
1823
.lazy()
1824
.group_by([col("col")])
1825
.agg([
1826
// we make sure to alias with a different name
1827
len().alias("counted"),
1828
col("col").count().alias("count2"),
1829
])
1830
.collect()?;
1831
1832
assert!(out.equals(&df![
1833
"col" => [0],
1834
"counted" => [100 as IdxSize],
1835
"count2" => [100 as IdxSize],
1836
]?));
1837
1838
Ok(())
1839
}
1840
1841
#[test]
1842
fn test_partitioned_gb_mean() -> PolarsResult<()> {
1843
// don't move these to integration tests
1844
let out = df![
1845
"key" => (0..100).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
1846
]?
1847
.lazy()
1848
.with_columns([lit("a").alias("str"), lit(1).alias("int")])
1849
.group_by([col("key")])
1850
.agg([
1851
col("str").mean().alias("mean_str"),
1852
col("int").mean().alias("mean_int"),
1853
])
1854
.collect()?;
1855
1856
assert_eq!(out.shape(), (1, 3));
1857
let str_col = out.column("mean_str")?;
1858
assert_eq!(str_col.get(0)?, AnyValue::Null);
1859
let int_col = out.column("mean_int")?;
1860
assert_eq!(int_col.get(0)?, AnyValue::Float64(1.0));
1861
1862
Ok(())
1863
}
1864
1865
#[test]
1866
fn test_partitioned_gb_binary() -> PolarsResult<()> {
1867
// don't move these to integration tests
1868
let df = df![
1869
"col" => (0..20).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
1870
]?;
1871
1872
let out = df
1873
.clone()
1874
.lazy()
1875
.group_by([col("col")])
1876
.agg([(col("col") + lit(10)).sum().alias("sum")])
1877
.collect()?;
1878
1879
assert!(out.equals(&df![
1880
"col" => [0],
1881
"sum" => [200],
1882
]?));
1883
1884
let out = df
1885
.lazy()
1886
.group_by([col("col")])
1887
.agg([(col("col").cast(DataType::Float32) + lit(10.0))
1888
.sum()
1889
.alias("sum")])
1890
.collect()?;
1891
1892
assert!(out.equals(&df![
1893
"col" => [0],
1894
"sum" => [200.0_f32],
1895
]?));
1896
1897
Ok(())
1898
}
1899
1900
#[test]
1901
fn test_partitioned_gb_ternary() -> PolarsResult<()> {
1902
// don't move these to integration tests
1903
let df = df![
1904
"col" => (0..20).map(|_| Some(0)).collect::<Int32Chunked>().into_series(),
1905
"val" => (0..20).map(Some).collect::<Int32Chunked>().into_series(),
1906
]?;
1907
1908
let out = df
1909
.lazy()
1910
.group_by([col("col")])
1911
.agg([when(col("val").gt(lit(10)))
1912
.then(lit(1))
1913
.otherwise(lit(0))
1914
.sum()
1915
.alias("sum")])
1916
.collect()?;
1917
1918
assert!(out.equals(&df![
1919
"col" => [0],
1920
"sum" => [9],
1921
]?));
1922
1923
Ok(())
1924
}
1925
1926
#[test]
1927
fn test_sort_maintain_order_true() -> PolarsResult<()> {
1928
let q = df![
1929
"A" => [1, 1, 1, 1],
1930
"B" => ["A", "B", "C", "D"],
1931
]?
1932
.lazy();
1933
1934
let res = q
1935
.sort_by_exprs(
1936
[col("A")],
1937
SortMultipleOptions::default()
1938
.with_maintain_order(true)
1939
.with_nulls_last(true),
1940
)
1941
.slice(0, 3)
1942
.collect()?;
1943
assert!(res.equals(&df![
1944
"A" => [1, 1, 1],
1945
"B" => ["A", "B", "C"],
1946
]?));
1947
Ok(())
1948
}
1949
1950
#[test]
1951
fn test_over_with_options_empty_join() -> PolarsResult<()> {
1952
let empty_df = DataFrame::new(vec![
1953
Series::new_empty("a".into(), &DataType::Int32).into(),
1954
Series::new_empty("b".into(), &DataType::Int32).into(),
1955
])?;
1956
1957
let empty_df_out = empty_df
1958
.lazy()
1959
.select([col("b").over_with_options(
1960
Some([col("a")]),
1961
Option::None,
1962
WindowMapping::Join,
1963
)?])
1964
.collect()?;
1965
1966
let f1: Field = Field::new("b".into(), DataType::List(Box::new(DataType::Int32)));
1967
let sc: Schema = Schema::from_iter(vec![f1]);
1968
1969
assert_eq!(&**empty_df_out.schema(), &sc);
1970
1971
Ok(())
1972
}
1973
1974
#[test]
1975
#[cfg(feature = "serde")]
1976
fn test_named_udfs() -> PolarsResult<()> {
1977
use polars_plan::dsl::named_serde::{ExprRegistry, set_named_serde_registry};
1978
1979
let lf = DataFrame::new(vec![Column::new("a".into(), vec![1, 2, 3, 4])])?.lazy();
1980
1981
struct X;
1982
impl ExprRegistry for X {
1983
fn get_function(&self, name: &str, payload: &[u8]) -> Option<Arc<dyn AnonymousColumnsUdf>> {
1984
assert_eq!(name, "test-function");
1985
assert_eq!(payload, b"check");
1986
Some(Arc::new(BaseColumnUdf::new(
1987
|c: &mut [Column]| Ok(std::mem::take(&mut c[0]) * 2),
1988
|_: &Schema, f: &[Field]| Ok(f[0].clone()),
1989
)))
1990
}
1991
}
1992
1993
set_named_serde_registry(Arc::new(X) as _);
1994
1995
let expr = Expr::AnonymousFunction {
1996
input: vec![Expr::Column("a".into())],
1997
function: LazySerde::Named {
1998
name: "test-function".into(),
1999
payload: Some(bytes::Bytes::from("check")),
2000
value: None,
2001
},
2002
options: FunctionOptions::default(),
2003
fmt_str: Box::new("test".into()),
2004
};
2005
2006
assert_eq!(
2007
lf.select(&[expr]).collect()?,
2008
DataFrame::new(vec![Column::new("a".into(), vec![2, 4, 6, 8])])?,
2009
);
2010
2011
Ok(())
2012
}
2013
2014