Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/plans/conversion/stack_opt.rs
8420 views
1
use std::borrow::Borrow;
2
3
use self::type_check::TypeCheckRule;
4
use super::*;
5
use crate::constants::{get_pl_element_name, get_pl_structfields_name};
6
7
/// Applies expression simplification and type coercion during conversion to IR.
8
pub struct ConversionOptimizer {
9
scratch: Vec<(Node, usize)>,
10
schemas: Vec<Schema>,
11
12
simplify: Option<SimplifyExprRule>,
13
coerce: Option<TypeCoercionRule>,
14
check: Option<TypeCheckRule>,
15
// IR's can be cached in the DSL.
16
// But if they are used multiple times in DSL (e.g. concat/join)
17
// then it can occur that we take a slot multiple times.
18
// So we keep track of the arena versions used and allow only
19
// one unique IR cache to be reused.
20
pub(super) used_arenas: PlHashSet<u32>,
21
}
22
23
struct ExtendVec<'a> {
24
out: &'a mut Vec<(Node, usize)>,
25
schema_idx: usize,
26
}
27
impl Extend<Node> for ExtendVec<'_> {
28
fn extend<T: IntoIterator<Item = Node>>(&mut self, iter: T) {
29
self.out
30
.extend(iter.into_iter().map(|n| (n, self.schema_idx)))
31
}
32
}
33
34
impl ConversionOptimizer {
35
pub fn new(simplify: bool, type_coercion: bool, type_check: bool) -> Self {
36
let simplify = if simplify {
37
Some(SimplifyExprRule {})
38
} else {
39
None
40
};
41
42
let coerce = if type_coercion {
43
Some(TypeCoercionRule {})
44
} else {
45
None
46
};
47
48
let check = if type_check {
49
Some(TypeCheckRule)
50
} else {
51
None
52
};
53
54
ConversionOptimizer {
55
scratch: Vec::with_capacity(8),
56
schemas: Vec::new(),
57
simplify,
58
coerce,
59
check,
60
used_arenas: Default::default(),
61
}
62
}
63
64
pub fn push_scratch(&mut self, expr: Node, expr_arena: &Arena<AExpr>) {
65
self.scratch.push((expr, 0));
66
// traverse all subexpressions and add to the stack
67
let expr = unsafe { expr_arena.get_unchecked(expr) };
68
69
expr.inputs_rev_strict(&mut ExtendVec {
70
out: &mut self.scratch,
71
schema_idx: 0,
72
});
73
}
74
75
pub fn fill_scratch<I, N>(&mut self, exprs: I, expr_arena: &Arena<AExpr>)
76
where
77
I: IntoIterator<Item = N>,
78
N: Borrow<Node>,
79
{
80
for e in exprs {
81
let node = *e.borrow();
82
self.push_scratch(node, expr_arena);
83
}
84
}
85
86
/// Optimizes the expressions in the scratch space. This should be called after filling the
87
/// scratch space with the expressions that you want to optimize.
88
pub fn optimize_exprs(
89
&mut self,
90
expr_arena: &mut Arena<AExpr>,
91
ir_arena: &mut Arena<IR>,
92
current_ir_node: Node,
93
// Use the schema of `current_ir_node` instead of its input when resolving expr fields.
94
use_current_node_schema: bool,
95
) -> PolarsResult<()> {
96
// Different from the stack-opt in the optimizer phase, this does a single pass until fixed point per expression.
97
98
if let Some(rule) = &mut self.check {
99
while let Some(x) = rule.optimize_plan(ir_arena, expr_arena, current_ir_node)? {
100
ir_arena.replace(current_ir_node, x);
101
}
102
}
103
104
// process the expressions on the stack and apply optimizations.
105
let schema = if use_current_node_schema {
106
ir_arena.get(current_ir_node).schema(ir_arena)
107
} else {
108
get_input_schema(ir_arena, current_ir_node)
109
};
110
let plan = ir_arena.get(current_ir_node);
111
let mut ctx = OptimizeExprContext {
112
in_filter: matches!(plan, IR::Filter { .. }),
113
has_inputs: !get_input(ir_arena, current_ir_node).is_empty(),
114
..Default::default()
115
};
116
#[cfg(feature = "python")]
117
{
118
use crate::dsl::python_dsl::PythonScanSource;
119
ctx.in_pyarrow_scan = matches!(plan, IR::PythonScan { options } if options.python_source == PythonScanSource::Pyarrow);
120
ctx.in_io_plugin = matches!(plan, IR::PythonScan { options } if options.python_source == PythonScanSource::IOPlugin);
121
};
122
123
self.schemas.clear();
124
while let Some((current_expr_node, schema_idx)) = self.scratch.pop() {
125
let expr = unsafe { expr_arena.get_unchecked(current_expr_node) };
126
127
if expr.is_leaf() {
128
continue;
129
}
130
131
// Evaluation expressions still need to do rules on the evaluation expression but the
132
// schema is not the same and it is not concluded in the inputs.
133
if let AExpr::Eval {
134
expr,
135
evaluation,
136
variant,
137
} = expr
138
{
139
let schema = if schema_idx == 0 {
140
&schema
141
} else {
142
&self.schemas[schema_idx - 1]
143
};
144
let expr = expr_arena
145
.get(*expr)
146
.to_dtype(&ToFieldContext::new(expr_arena, schema))?;
147
148
let element_dtype = variant.element_dtype(&expr)?;
149
let mut schema = schema.clone();
150
schema.insert(get_pl_element_name(), element_dtype.clone());
151
self.schemas.push(schema);
152
self.scratch.push((*evaluation, self.schemas.len()));
153
}
154
155
// Similar for StructEval
156
// Effectively, we are mimicking in-order processing traversal logic (left > parent > right).
157
#[cfg(feature = "dtype-struct")]
158
if let AExpr::StructEval { expr, evaluation } = expr {
159
let schema = if schema_idx == 0 {
160
&schema
161
} else {
162
&self.schemas[schema_idx - 1]
163
};
164
let struct_dtype = expr_arena
165
.get(*expr)
166
.to_dtype(&ToFieldContext::new(expr_arena, schema))?;
167
168
let mut schema = schema.clone();
169
schema.insert(get_pl_structfields_name(), struct_dtype);
170
self.schemas.push(schema);
171
for e in evaluation {
172
self.scratch.push((e.node(), self.schemas.len()))
173
}
174
}
175
176
let schema = if schema_idx == 0 {
177
&schema
178
} else {
179
&self.schemas[schema_idx - 1]
180
};
181
182
if let Some(rule) = &mut self.simplify {
183
while let Some(x) =
184
rule.optimize_expr(expr_arena, current_expr_node, schema, ctx)?
185
{
186
expr_arena.replace(current_expr_node, x);
187
}
188
}
189
if let Some(rule) = &mut self.coerce {
190
while let Some(x) =
191
rule.optimize_expr(expr_arena, current_expr_node, schema, ctx)?
192
{
193
expr_arena.replace(current_expr_node, x);
194
}
195
}
196
197
let expr = unsafe { expr_arena.get_unchecked(current_expr_node) };
198
// traverse subexpressions and add to the stack
199
expr.inputs_rev_strict(&mut ExtendVec {
200
out: &mut self.scratch,
201
schema_idx,
202
});
203
}
204
205
Ok(())
206
}
207
}
208
209