Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/plans/aexpr/properties/order_sensitive.rs
6940 views
1
//! Order sensitivity is a property of an operation that says that if the inputs' rows are
2
//! reordered before the operation takes place, the operation returns different results even when
3
//! not regarding order.
4
//!
5
//! Operations that are not order sensitive are a superset of *row separable* operations.
6
//! Usually operations that are not row separable and not order sensitive (e.g. aggregations) need
7
//! to see the whole data before being about to provide the correct output.
8
//!
9
//! Below is a formal definition.
10
//!
11
//! A shuffle of a series A with seed S is given as:
12
//!
13
//! Shuffle(A, S) =
14
//! CONCAT(A[P[i]] for i in [0, |A|))
15
//! where
16
//! P is a seeded by S random permutation of all integers in [0, |A|)
17
//!
18
//! An operation f(A1, ..., An) is order sensitive i.f.f.
19
//!
20
//! sort(f(A1, ..., An)) != sort(f(Shuffle(A1, S), ..., Shuffle(An, S))) for any seed S
21
22
use super::super::*;
23
use crate::plans::IRAggExpr;
24
25
pub fn is_order_sensitive(aexpr: &AExpr, arena: &Arena<AExpr>) -> bool {
26
is_order_sensitive_amortized(aexpr, arena, &mut Vec::new())
27
}
28
29
pub fn is_order_sensitive_amortized(
30
aexpr: &AExpr,
31
arena: &Arena<AExpr>,
32
stack: &mut Vec<Node>,
33
) -> bool {
34
if is_order_sensitive_top_level(aexpr) {
35
return true;
36
}
37
38
stack.clear();
39
aexpr.inputs_rev(stack);
40
41
while let Some(node) = stack.pop() {
42
let aexpr = arena.get(node);
43
if is_order_sensitive_top_level(aexpr) {
44
return true;
45
}
46
aexpr.inputs_rev(stack);
47
}
48
49
false
50
}
51
52
pub fn is_order_sensitive_top_level(aexpr: &AExpr) -> bool {
53
match aexpr {
54
AExpr::Explode {
55
expr: _,
56
skip_empty: _,
57
} => true,
58
AExpr::Column(_) => false,
59
AExpr::Literal(lv) => !lv.is_scalar(),
60
AExpr::BinaryExpr {
61
left: _,
62
op: _,
63
right: _,
64
} => false,
65
AExpr::Cast {
66
expr: _,
67
dtype: _,
68
options: _,
69
} => false,
70
AExpr::Sort {
71
expr: _,
72
options: _,
73
} => true,
74
AExpr::Gather {
75
expr: _,
76
idx: _,
77
returns_scalar: _,
78
} => true,
79
AExpr::SortBy {
80
expr: _,
81
by: _,
82
sort_options: _,
83
} => true,
84
AExpr::Filter { input: _, by: _ } => false,
85
AExpr::Agg(agg) => is_order_sensitive_agg_top_level(agg),
86
AExpr::Ternary {
87
predicate: _,
88
truthy: _,
89
falsy: _,
90
} => false,
91
92
AExpr::AnonymousFunction {
93
input: _,
94
function: _,
95
options,
96
fmt_str: _,
97
}
98
| AExpr::Function {
99
input: _,
100
function: _,
101
options,
102
} => !options.is_elementwise(),
103
AExpr::Eval {
104
expr: _,
105
evaluation: _,
106
variant,
107
} => !variant.is_elementwise(),
108
AExpr::Window {
109
function: _,
110
partition_by: _,
111
order_by: _,
112
options: _,
113
} => true,
114
AExpr::Slice {
115
input: _,
116
offset: _,
117
length: _,
118
} => true,
119
AExpr::Len => false,
120
}
121
}
122
123
pub fn is_order_sensitive_agg_top_level(agg: &IRAggExpr) -> bool {
124
match agg {
125
IRAggExpr::Min {
126
input: _,
127
propagate_nans: _,
128
} => false,
129
IRAggExpr::Max {
130
input: _,
131
propagate_nans: _,
132
} => false,
133
IRAggExpr::Median(_) => false,
134
IRAggExpr::NUnique(_) => false,
135
IRAggExpr::First(_) => true,
136
IRAggExpr::Last(_) => true,
137
IRAggExpr::Mean(_) => false,
138
IRAggExpr::Implode(_) => true,
139
IRAggExpr::Quantile {
140
expr: _,
141
quantile: _,
142
method: _,
143
} => false,
144
IRAggExpr::Sum(_) => false,
145
IRAggExpr::Count {
146
input: _,
147
include_nulls: _,
148
} => false,
149
IRAggExpr::Std(_, _) => false,
150
IRAggExpr::Var(_, _) => false,
151
IRAggExpr::AggGroups(_) => true,
152
}
153
}
154
155