Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/frame/pivot.rs
6939 views
1
//! Module containing implementation of the pivot operation.
2
//!
3
//! Polars lazy does not implement a pivot because it is impossible to know the schema without
4
//! materializing the whole dataset. This makes a pivot quite a terrible operation for performant
5
//! workflows. An optimization can never be pushed down past a pivot.
6
//!
7
//! We can do a pivot on an eager `DataFrame` as that is already materialized. The code for the
8
//! pivot is here, because we want to be able to pass expressions to the pivot operation.
9
//!
10
11
use polars_core::frame::group_by::expr::PhysicalAggExpr;
12
use polars_core::prelude::*;
13
use polars_ops::pivot::PivotAgg;
14
15
use crate::physical_plan::exotic::{contains_column_refs, prepare_expression_for_context};
16
use crate::prelude::*;
17
18
pub struct PivotExpr(Expr);
19
20
impl PivotExpr {
21
pub fn from_expr(expr: Expr) -> Self {
22
PivotExpr(expr)
23
}
24
}
25
26
impl PhysicalAggExpr for PivotExpr {
27
fn evaluate_on_groups(&self, df: &DataFrame, groups: &GroupPositions) -> PolarsResult<Series> {
28
let state = ExecutionState::new();
29
let dtype = df.get_columns()[0].dtype();
30
let phys_expr = prepare_expression_for_context(
31
PlSmallStr::EMPTY,
32
&self.0,
33
dtype,
34
Context::Aggregation,
35
)?;
36
phys_expr
37
.evaluate_on_groups(df, groups, &state)
38
.map(|mut ac| ac.aggregated().take_materialized_series())
39
}
40
41
fn root_name(&self) -> PolarsResult<&PlSmallStr> {
42
Ok(PlSmallStr::EMPTY_REF)
43
}
44
}
45
46
pub fn pivot<I0, I1, I2, S0, S1, S2>(
47
df: &DataFrame,
48
on: I0,
49
index: Option<I1>,
50
values: Option<I2>,
51
sort_columns: bool,
52
agg_expr: Option<Expr>,
53
// used as separator/delimiter in generated column names.
54
separator: Option<&str>,
55
) -> PolarsResult<DataFrame>
56
where
57
I0: IntoIterator<Item = S0>,
58
I1: IntoIterator<Item = S1>,
59
I2: IntoIterator<Item = S2>,
60
S0: Into<PlSmallStr>,
61
S1: Into<PlSmallStr>,
62
S2: Into<PlSmallStr>,
63
{
64
// we are strict:
65
// agg_expr can only access data as generated by the pivot operation through pl.element()
66
if agg_expr.as_ref().is_some_and(contains_column_refs) {
67
polars_bail!(InvalidOperation: "explicit column references are not allowed in aggregate_function");
68
}
69
70
let agg_expr = agg_expr.map(|ae| PivotAgg(Arc::new(PivotExpr(ae))));
71
polars_ops::pivot::pivot(df, on, index, values, sort_columns, agg_expr, separator)
72
}
73
74
pub fn pivot_stable<I0, I1, I2, S0, S1, S2>(
75
df: &DataFrame,
76
on: I0,
77
index: Option<I1>,
78
values: Option<I2>,
79
sort_columns: bool,
80
agg_expr: Option<Expr>,
81
// used as separator/delimiter in generated column names.
82
separator: Option<&str>,
83
) -> PolarsResult<DataFrame>
84
where
85
I0: IntoIterator<Item = S0>,
86
I1: IntoIterator<Item = S1>,
87
I2: IntoIterator<Item = S2>,
88
S0: Into<PlSmallStr>,
89
S1: Into<PlSmallStr>,
90
S2: Into<PlSmallStr>,
91
{
92
// we are strict:
93
// agg_expr can only access data as generated by the pivot operation through pl.element()
94
if agg_expr.as_ref().is_some_and(contains_column_refs) {
95
polars_bail!(InvalidOperation: "explicit column references are not allowed in aggregate_function");
96
}
97
98
let agg_expr = agg_expr.map(|ae| PivotAgg(Arc::new(PivotExpr(ae))));
99
polars_ops::pivot::pivot_stable(df, on, index, values, sort_columns, agg_expr, separator)
100
}
101
102