Path: blob/main/crates/polars-plan/src/plans/aexpr/properties/order_sensitive.rs
6940 views
//! Order sensitivity is a property of an operation that says that if the inputs' rows are1//! reordered before the operation takes place, the operation returns different results even when2//! not regarding order.3//!4//! Operations that are not order sensitive are a superset of *row separable* operations.5//! Usually operations that are not row separable and not order sensitive (e.g. aggregations) need6//! to see the whole data before being about to provide the correct output.7//!8//! Below is a formal definition.9//!10//! A shuffle of a series A with seed S is given as:11//!12//! Shuffle(A, S) =13//! CONCAT(A[P[i]] for i in [0, |A|))14//! where15//! P is a seeded by S random permutation of all integers in [0, |A|)16//!17//! An operation f(A1, ..., An) is order sensitive i.f.f.18//!19//! sort(f(A1, ..., An)) != sort(f(Shuffle(A1, S), ..., Shuffle(An, S))) for any seed S2021use super::super::*;22use crate::plans::IRAggExpr;2324pub fn is_order_sensitive(aexpr: &AExpr, arena: &Arena<AExpr>) -> bool {25is_order_sensitive_amortized(aexpr, arena, &mut Vec::new())26}2728pub fn is_order_sensitive_amortized(29aexpr: &AExpr,30arena: &Arena<AExpr>,31stack: &mut Vec<Node>,32) -> bool {33if is_order_sensitive_top_level(aexpr) {34return true;35}3637stack.clear();38aexpr.inputs_rev(stack);3940while let Some(node) = stack.pop() {41let aexpr = arena.get(node);42if is_order_sensitive_top_level(aexpr) {43return true;44}45aexpr.inputs_rev(stack);46}4748false49}5051pub fn is_order_sensitive_top_level(aexpr: &AExpr) -> bool {52match aexpr {53AExpr::Explode {54expr: _,55skip_empty: _,56} => true,57AExpr::Column(_) => false,58AExpr::Literal(lv) => !lv.is_scalar(),59AExpr::BinaryExpr {60left: _,61op: _,62right: _,63} => false,64AExpr::Cast {65expr: _,66dtype: _,67options: _,68} => false,69AExpr::Sort {70expr: _,71options: _,72} => true,73AExpr::Gather {74expr: _,75idx: _,76returns_scalar: _,77} => true,78AExpr::SortBy {79expr: _,80by: _,81sort_options: _,82} => true,83AExpr::Filter { input: _, by: _ } => false,84AExpr::Agg(agg) => is_order_sensitive_agg_top_level(agg),85AExpr::Ternary {86predicate: _,87truthy: _,88falsy: _,89} => false,9091AExpr::AnonymousFunction {92input: _,93function: _,94options,95fmt_str: _,96}97| AExpr::Function {98input: _,99function: _,100options,101} => !options.is_elementwise(),102AExpr::Eval {103expr: _,104evaluation: _,105variant,106} => !variant.is_elementwise(),107AExpr::Window {108function: _,109partition_by: _,110order_by: _,111options: _,112} => true,113AExpr::Slice {114input: _,115offset: _,116length: _,117} => true,118AExpr::Len => false,119}120}121122pub fn is_order_sensitive_agg_top_level(agg: &IRAggExpr) -> bool {123match agg {124IRAggExpr::Min {125input: _,126propagate_nans: _,127} => false,128IRAggExpr::Max {129input: _,130propagate_nans: _,131} => false,132IRAggExpr::Median(_) => false,133IRAggExpr::NUnique(_) => false,134IRAggExpr::First(_) => true,135IRAggExpr::Last(_) => true,136IRAggExpr::Mean(_) => false,137IRAggExpr::Implode(_) => true,138IRAggExpr::Quantile {139expr: _,140quantile: _,141method: _,142} => false,143IRAggExpr::Sum(_) => false,144IRAggExpr::Count {145input: _,146include_nulls: _,147} => false,148IRAggExpr::Std(_, _) => false,149IRAggExpr::Var(_, _) => false,150IRAggExpr::AggGroups(_) => true,151}152}153154155