Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/aggregations.rs
6939 views
1
use polars_ops::prelude::ListNameSpaceImpl;
2
use polars_utils::unitvec;
3
4
use super::*;
5
6
#[test]
7
#[cfg(feature = "dtype-datetime")]
8
fn test_agg_list_type() -> PolarsResult<()> {
9
let s = Series::new("foo".into(), &[1, 2, 3]);
10
let s = s.cast(&DataType::Datetime(TimeUnit::Nanoseconds, None))?;
11
12
let l = unsafe { s.agg_list(&GroupsType::Idx(vec![(0, unitvec![0, 1, 2])].into())) };
13
14
let result = match l.dtype() {
15
DataType::List(inner) => {
16
matches!(&**inner, DataType::Datetime(TimeUnit::Nanoseconds, None))
17
},
18
_ => false,
19
};
20
assert!(result);
21
22
Ok(())
23
}
24
25
#[test]
26
fn test_agg_exprs() -> PolarsResult<()> {
27
let df = fruits_cars();
28
29
// a binary expression followed by a function and an aggregation. See if it runs
30
let out = df
31
.lazy()
32
.group_by_stable([col("cars")])
33
.agg([(lit(1) - col("A"))
34
.map(|s| Ok(&s * 2), |_, f| Ok(f.clone()))
35
.alias("foo")])
36
.collect()?;
37
let ca = out.column("foo")?.list()?;
38
let out = ca.lst_lengths();
39
40
assert_eq!(Vec::from(&out), &[Some(4), Some(1)]);
41
Ok(())
42
}
43
44
#[test]
45
fn test_agg_unique_first() -> PolarsResult<()> {
46
let df = df![
47
"g"=> [1, 1, 2, 2, 3, 4, 1],
48
"v"=> [1, 2, 2, 2, 3, 4, 1],
49
]?;
50
51
let out = df
52
.lazy()
53
.group_by_stable([col("g")])
54
.agg([
55
col("v").unique().first().alias("v_first"),
56
col("v")
57
.unique()
58
.sort(Default::default())
59
.first()
60
.alias("true_first"),
61
col("v").unique().implode(),
62
])
63
.collect()?;
64
65
let a = out.column("v_first").unwrap();
66
let a = a.as_materialized_series().sum::<i32>().unwrap();
67
// can be both because unique does not guarantee order
68
assert!(a == 10 || a == 11);
69
70
let a = out.column("true_first").unwrap();
71
let a = a.as_materialized_series().sum::<i32>().unwrap();
72
// can be both because unique does not guarantee order
73
assert_eq!(a, 10);
74
75
Ok(())
76
}
77
78
#[test]
79
#[cfg(feature = "cum_agg")]
80
fn test_cum_sum_agg_as_key() -> PolarsResult<()> {
81
let df = df![
82
"depth" => &[0i32, 1, 2, 3, 4, 5, 6, 7, 8, 9],
83
"soil" => &["peat", "peat", "peat", "silt", "silt", "silt", "sand", "sand", "peat", "peat"]
84
]?;
85
// this checks if the grouper can work with the complex query as a key
86
87
let out = df
88
.lazy()
89
.group_by([col("soil")
90
.neq(col("soil").shift_and_fill(lit(1), col("soil").first()))
91
.cum_sum(false)
92
.alias("key")])
93
.agg([col("depth").max().name().keep()])
94
.sort(["depth"], Default::default())
95
.collect()?;
96
97
assert_eq!(
98
Vec::from(out.column("key")?.u32()?),
99
&[Some(0), Some(1), Some(2), Some(3)]
100
);
101
assert_eq!(
102
Vec::from(out.column("depth")?.i32()?),
103
&[Some(2), Some(5), Some(7), Some(9)]
104
);
105
106
Ok(())
107
}
108
109
#[test]
110
#[cfg(feature = "moment")]
111
fn test_auto_skew_kurtosis_agg() -> PolarsResult<()> {
112
let df = fruits_cars();
113
114
let out = df
115
.lazy()
116
.group_by([col("fruits")])
117
.agg([
118
col("B").skew(false).alias("bskew"),
119
col("B").kurtosis(false, false).alias("bkurt"),
120
])
121
.collect()?;
122
123
assert!(matches!(out.column("bskew")?.dtype(), DataType::Float64));
124
assert!(matches!(out.column("bkurt")?.dtype(), DataType::Float64));
125
126
Ok(())
127
}
128
129
#[test]
130
fn test_auto_list_agg() -> PolarsResult<()> {
131
let df = fruits_cars();
132
133
// test if alias executor adds a list after shift and fill
134
let out = df
135
.clone()
136
.lazy()
137
.group_by([col("fruits")])
138
.agg([col("B").shift_and_fill(lit(-1), lit(-1)).alias("foo")])
139
.collect()?;
140
141
assert!(matches!(out.column("foo")?.dtype(), DataType::List(_)));
142
143
// test if it runs and group_by executor thus implements a list after shift_and_fill
144
let _out = df
145
.clone()
146
.lazy()
147
.group_by([col("fruits")])
148
.agg([col("B").shift_and_fill(lit(-1), lit(-1))])
149
.collect()?;
150
151
// test if window expr executor adds list
152
let _out = df
153
.clone()
154
.lazy()
155
.select([col("B").shift_and_fill(lit(-1), lit(-1)).alias("foo")])
156
.collect()?;
157
158
let _out = df
159
.lazy()
160
.select([col("B").shift_and_fill(lit(-1), lit(-1))])
161
.collect()?;
162
Ok(())
163
}
164
#[test]
165
#[cfg(feature = "rolling_window")]
166
fn test_power_in_agg_list1() -> PolarsResult<()> {
167
let df = fruits_cars();
168
169
// this test if the group tuples are correctly updated after
170
// a flat apply on a final aggregation
171
let out = df
172
.lazy()
173
.group_by([col("fruits")])
174
.agg([
175
col("A")
176
.rolling_min(RollingOptionsFixedWindow {
177
window_size: 1,
178
..Default::default()
179
})
180
.alias("input"),
181
col("A")
182
.rolling_min(RollingOptionsFixedWindow {
183
window_size: 1,
184
..Default::default()
185
})
186
.pow(2.0)
187
.alias("foo"),
188
])
189
.sort(
190
["fruits"],
191
SortMultipleOptions::default().with_order_descending(true),
192
)
193
.collect()?;
194
195
let agg = out.column("foo")?.list()?;
196
let first = agg.get_as_series(0).unwrap();
197
let vals = first.f64()?;
198
assert_eq!(Vec::from(vals), &[Some(1.0), Some(4.0), Some(25.0)]);
199
200
Ok(())
201
}
202
203
#[test]
204
#[cfg(feature = "rolling_window")]
205
fn test_power_in_agg_list2() -> PolarsResult<()> {
206
let df = fruits_cars();
207
208
// this test if the group tuples are correctly updated after
209
// a flat apply on evaluate_on_groups
210
let out = df
211
.lazy()
212
.group_by([col("fruits")])
213
.agg([col("A")
214
.rolling_min(RollingOptionsFixedWindow {
215
window_size: 2,
216
min_periods: 2,
217
..Default::default()
218
})
219
.pow(2.0)
220
.sum()
221
.alias("foo")])
222
.sort(
223
["fruits"],
224
SortMultipleOptions::default().with_order_descending(true),
225
)
226
.collect()?;
227
228
let agg = out.column("foo")?.f64()?;
229
assert_eq!(Vec::from(agg), &[Some(5.0), Some(9.0)]);
230
231
Ok(())
232
}
233
#[test]
234
fn test_binary_agg_context_0() -> PolarsResult<()> {
235
let df = df![
236
"groups" => [1, 1, 2, 2, 3, 3],
237
"vals" => [1, 2, 3, 4, 5, 6]
238
]
239
.unwrap();
240
241
let out = df
242
.lazy()
243
.group_by_stable([col("groups")])
244
.agg([when(col("vals").first().neq(lit(1)))
245
.then(repeat(lit("a"), len()))
246
.otherwise(repeat(lit("b"), len()))
247
.alias("foo")])
248
.collect()
249
.unwrap();
250
251
let out = out.column("foo")?;
252
let out = out.explode(false)?;
253
let out = out.str()?;
254
assert_eq!(
255
Vec::from(out),
256
&[
257
Some("b"),
258
Some("b"),
259
Some("a"),
260
Some("a"),
261
Some("a"),
262
Some("a")
263
]
264
);
265
Ok(())
266
}
267
268
// just like binary expression, this must be changed. This can work
269
#[test]
270
fn test_binary_agg_context_1() -> PolarsResult<()> {
271
let df = df![
272
"groups" => [1, 1, 2, 2, 3, 3],
273
"vals" => [1, 13, 3, 87, 1, 6]
274
]?;
275
276
// groups
277
// 1 => [1, 13]
278
// 2 => [3, 87]
279
// 3 => [1, 6]
280
281
let out = df
282
.clone()
283
.lazy()
284
.group_by_stable([col("groups")])
285
.agg([when(col("vals").eq(lit(1)))
286
.then(col("vals").sum())
287
.otherwise(lit(90))
288
.alias("vals")])
289
.collect()?;
290
291
// if vals == 1 then sum(vals) else vals
292
// [14, 90]
293
// [90, 90]
294
// [7, 90]
295
let out = out.column("vals")?;
296
let out = out.explode(false)?;
297
let out = out.i32()?;
298
assert_eq!(
299
Vec::from(out),
300
&[Some(14), Some(90), Some(90), Some(90), Some(7), Some(90)]
301
);
302
303
let out = df
304
.lazy()
305
.group_by_stable([col("groups")])
306
.agg([when(col("vals").eq(lit(1)))
307
.then(lit(90))
308
.otherwise(col("vals").sum())
309
.alias("vals")])
310
.collect()?;
311
312
// if vals == 1 then 90 else sum(vals)
313
// [90, 14]
314
// [90, 90]
315
// [90, 7]
316
let out = out.column("vals")?;
317
let out = out.explode(false)?;
318
let out = out.i32()?;
319
assert_eq!(
320
Vec::from(out),
321
&[Some(90), Some(14), Some(90), Some(90), Some(90), Some(7)]
322
);
323
324
Ok(())
325
}
326
327
#[test]
328
fn test_binary_agg_context_2() -> PolarsResult<()> {
329
let df = df![
330
"groups" => [1, 1, 2, 2, 3, 3],
331
"vals" => [1, 2, 3, 4, 5, 6]
332
]?;
333
334
// this is complex because we first aggregate one expression of the binary operation.
335
336
let out = df
337
.clone()
338
.lazy()
339
.group_by_stable([col("groups")])
340
.agg([(col("vals").first() - col("vals")).alias("vals")])
341
.collect()?;
342
343
// 0 - [1, 2] = [0, -1]
344
// 3 - [3, 4] = [0, -1]
345
// 5 - [5, 6] = [0, -1]
346
let out = out.column("vals")?;
347
let out = out.explode(false)?;
348
let out = out.i32()?;
349
assert_eq!(
350
Vec::from(out),
351
&[Some(0), Some(-1), Some(0), Some(-1), Some(0), Some(-1)]
352
);
353
354
// Same, but now we reverse the lhs / rhs.
355
let out = df
356
.lazy()
357
.group_by_stable([col("groups")])
358
.agg([((col("vals")) - col("vals").first()).alias("vals")])
359
.collect()?;
360
361
// [1, 2] - 1 = [0, 1]
362
// [3, 4] - 3 = [0, 1]
363
// [5, 6] - 5 = [0, 1]
364
let out = out.column("vals")?;
365
let out = out.explode(false)?;
366
let out = out.i32()?;
367
assert_eq!(
368
Vec::from(out),
369
&[Some(0), Some(1), Some(0), Some(1), Some(0), Some(1)]
370
);
371
372
Ok(())
373
}
374
375
#[test]
376
fn test_binary_agg_context_3() -> PolarsResult<()> {
377
let df = fruits_cars();
378
379
let out = df
380
.lazy()
381
.group_by_stable([col("cars")])
382
.agg([(col("A") - col("A").first()).last().alias("last")])
383
.collect()?;
384
385
let out = out.column("last")?;
386
assert_eq!(out.get(0)?, AnyValue::Int32(4));
387
assert_eq!(out.get(1)?, AnyValue::Int32(0));
388
389
Ok(())
390
}
391
392
#[test]
393
fn test_shift_elementwise_issue_2509() -> PolarsResult<()> {
394
let df = df![
395
"x"=> [0, 0, 0, 1, 1, 1, 2, 2, 2],
396
"y"=> [0, 10, 20, 0, 10, 20, 0, 10, 20]
397
]?;
398
let out = df
399
.lazy()
400
// Don't use maintain order here! That hides the bug
401
.group_by([col("x")])
402
.agg(&[(col("y").shift(lit(-1)) + col("x")).alias("sum")])
403
.sort(["x"], Default::default())
404
.collect()?;
405
406
let out = out.explode(["sum"])?;
407
let out = out.column("sum")?;
408
assert_eq!(out.get(0)?, AnyValue::Int32(10));
409
assert_eq!(out.get(1)?, AnyValue::Int32(20));
410
assert_eq!(out.get(2)?, AnyValue::Null);
411
assert_eq!(out.get(3)?, AnyValue::Int32(11));
412
assert_eq!(out.get(4)?, AnyValue::Int32(21));
413
assert_eq!(out.get(5)?, AnyValue::Null);
414
415
Ok(())
416
}
417
418
#[test]
419
fn take_aggregations() -> PolarsResult<()> {
420
let df = df![
421
"user" => ["lucy", "bob", "bob", "lucy", "tim"],
422
"book" => ["c", "b", "a", "a", "a"],
423
"count" => [3, 1, 2, 1, 1]
424
]?;
425
426
let out = df
427
.clone()
428
.lazy()
429
.group_by([col("user")])
430
.agg([col("book").get(col("count").arg_max()).alias("fav_book")])
431
.sort(["user"], Default::default())
432
.collect()?;
433
434
let s = out.column("fav_book")?;
435
assert_eq!(s.get(0)?, AnyValue::String("a"));
436
assert_eq!(s.get(1)?, AnyValue::String("c"));
437
assert_eq!(s.get(2)?, AnyValue::String("a"));
438
439
let out = df
440
.clone()
441
.lazy()
442
.group_by([col("user")])
443
.agg([
444
// keep the head as it test slice correctness
445
col("book")
446
.gather(col("count").arg_sort(true, false).head(Some(2)))
447
.alias("ordered"),
448
])
449
.sort(["user"], Default::default())
450
.collect()?;
451
let s = out.column("ordered")?;
452
let flat = s.explode(false)?;
453
let flat = flat.str()?;
454
let vals = flat.into_no_null_iter().collect::<Vec<_>>();
455
assert_eq!(vals, ["a", "b", "c", "a", "a"]);
456
457
let out = df
458
.lazy()
459
.group_by([col("user")])
460
.agg([col("book").get(lit(0)).alias("take_lit")])
461
.sort(["user"], Default::default())
462
.collect()?;
463
464
let taken = out.column("take_lit")?;
465
let taken = taken.str()?;
466
let vals = taken.into_no_null_iter().collect::<Vec<_>>();
467
assert_eq!(vals, ["b", "c", "a"]);
468
469
Ok(())
470
}
471
#[test]
472
fn test_take_consistency() -> PolarsResult<()> {
473
let df = fruits_cars();
474
let out = df
475
.clone()
476
.lazy()
477
.select([col("A").arg_sort(true, false).get(lit(0))])
478
.collect()?;
479
480
let a = out.column("A")?;
481
let a = a.idx()?;
482
assert_eq!(a.get(0), Some(4));
483
484
let out = df
485
.clone()
486
.lazy()
487
.group_by_stable([col("cars")])
488
.agg([col("A").arg_sort(true, false).get(lit(0))])
489
.collect()?;
490
491
let out = out.column("A")?;
492
let out = out.idx()?;
493
assert_eq!(Vec::from(out), &[Some(3), Some(0)]);
494
495
let out_df = df
496
.lazy()
497
.group_by_stable([col("cars")])
498
.agg([
499
col("A"),
500
col("A").arg_sort(true, false).get(lit(0)).alias("1"),
501
col("A")
502
.get(col("A").arg_sort(true, false).get(lit(0)))
503
.alias("2"),
504
])
505
.collect()?;
506
507
let out = out_df.column("2")?;
508
let out = out.i32()?;
509
assert_eq!(Vec::from(out), &[Some(5), Some(2)]);
510
511
let out = out_df.column("1")?;
512
let out = out.idx()?;
513
assert_eq!(Vec::from(out), &[Some(3), Some(0)]);
514
515
Ok(())
516
}
517
518
#[test]
519
fn test_take_in_groups() -> PolarsResult<()> {
520
let df = fruits_cars();
521
522
let out = df
523
.lazy()
524
.sort(["fruits"], Default::default())
525
.select([col("B").get(lit(0u32)).over([col("fruits")]).alias("taken")])
526
.collect()?;
527
528
assert_eq!(
529
Vec::from(out.column("taken")?.i32()?),
530
&[Some(3), Some(3), Some(5), Some(5), Some(5)]
531
);
532
Ok(())
533
}
534
535
#[test]
536
fn test_anonymous_function_returns_scalar_all_null_20679() {
537
use std::sync::Arc;
538
539
fn reduction_function(column: Column) -> PolarsResult<Column> {
540
let val = column.get(0)?.into_static();
541
let col = Column::new_scalar("".into(), Scalar::new(column.dtype().clone(), val), 1);
542
Ok(col)
543
}
544
545
let a = Column::new("a".into(), &[0, 0, 1]);
546
let dtype = DataType::Null;
547
let b = Column::new_scalar("b".into(), Scalar::new(dtype, AnyValue::Null), 3);
548
let df = DataFrame::new(vec![a, b]).unwrap();
549
550
let f = move |c: &mut [Column]| reduction_function(std::mem::take(&mut c[0]));
551
let dt = |_: &Schema, fs: &[Field]| Ok(fs[0].clone());
552
553
let f = BaseColumnUdf::new(f, dt);
554
555
let expr = Expr::AnonymousFunction {
556
input: vec![col("b")],
557
function: LazySerde::Deserialized(SpecialEq::new(Arc::new(f))),
558
options: FunctionOptions::aggregation(),
559
fmt_str: Box::new(PlSmallStr::EMPTY),
560
};
561
562
let grouped_df = df
563
.lazy()
564
.group_by([col("a")])
565
.agg([expr])
566
.collect()
567
.unwrap();
568
569
assert_eq!(grouped_df.get_columns()[1].dtype(), &DataType::Null);
570
}
571
572