Path: blob/main/crates/polars-plan/src/plans/conversion/stack_opt.rs
8420 views
use std::borrow::Borrow;12use self::type_check::TypeCheckRule;3use super::*;4use crate::constants::{get_pl_element_name, get_pl_structfields_name};56/// Applies expression simplification and type coercion during conversion to IR.7pub struct ConversionOptimizer {8scratch: Vec<(Node, usize)>,9schemas: Vec<Schema>,1011simplify: Option<SimplifyExprRule>,12coerce: Option<TypeCoercionRule>,13check: Option<TypeCheckRule>,14// IR's can be cached in the DSL.15// But if they are used multiple times in DSL (e.g. concat/join)16// then it can occur that we take a slot multiple times.17// So we keep track of the arena versions used and allow only18// one unique IR cache to be reused.19pub(super) used_arenas: PlHashSet<u32>,20}2122struct ExtendVec<'a> {23out: &'a mut Vec<(Node, usize)>,24schema_idx: usize,25}26impl Extend<Node> for ExtendVec<'_> {27fn extend<T: IntoIterator<Item = Node>>(&mut self, iter: T) {28self.out29.extend(iter.into_iter().map(|n| (n, self.schema_idx)))30}31}3233impl ConversionOptimizer {34pub fn new(simplify: bool, type_coercion: bool, type_check: bool) -> Self {35let simplify = if simplify {36Some(SimplifyExprRule {})37} else {38None39};4041let coerce = if type_coercion {42Some(TypeCoercionRule {})43} else {44None45};4647let check = if type_check {48Some(TypeCheckRule)49} else {50None51};5253ConversionOptimizer {54scratch: Vec::with_capacity(8),55schemas: Vec::new(),56simplify,57coerce,58check,59used_arenas: Default::default(),60}61}6263pub fn push_scratch(&mut self, expr: Node, expr_arena: &Arena<AExpr>) {64self.scratch.push((expr, 0));65// traverse all subexpressions and add to the stack66let expr = unsafe { expr_arena.get_unchecked(expr) };6768expr.inputs_rev_strict(&mut ExtendVec {69out: &mut self.scratch,70schema_idx: 0,71});72}7374pub fn fill_scratch<I, N>(&mut self, exprs: I, expr_arena: &Arena<AExpr>)75where76I: IntoIterator<Item = N>,77N: Borrow<Node>,78{79for e in exprs {80let node = *e.borrow();81self.push_scratch(node, expr_arena);82}83}8485/// Optimizes the expressions in the scratch space. This should be called after filling the86/// scratch space with the expressions that you want to optimize.87pub fn optimize_exprs(88&mut self,89expr_arena: &mut Arena<AExpr>,90ir_arena: &mut Arena<IR>,91current_ir_node: Node,92// Use the schema of `current_ir_node` instead of its input when resolving expr fields.93use_current_node_schema: bool,94) -> PolarsResult<()> {95// Different from the stack-opt in the optimizer phase, this does a single pass until fixed point per expression.9697if let Some(rule) = &mut self.check {98while let Some(x) = rule.optimize_plan(ir_arena, expr_arena, current_ir_node)? {99ir_arena.replace(current_ir_node, x);100}101}102103// process the expressions on the stack and apply optimizations.104let schema = if use_current_node_schema {105ir_arena.get(current_ir_node).schema(ir_arena)106} else {107get_input_schema(ir_arena, current_ir_node)108};109let plan = ir_arena.get(current_ir_node);110let mut ctx = OptimizeExprContext {111in_filter: matches!(plan, IR::Filter { .. }),112has_inputs: !get_input(ir_arena, current_ir_node).is_empty(),113..Default::default()114};115#[cfg(feature = "python")]116{117use crate::dsl::python_dsl::PythonScanSource;118ctx.in_pyarrow_scan = matches!(plan, IR::PythonScan { options } if options.python_source == PythonScanSource::Pyarrow);119ctx.in_io_plugin = matches!(plan, IR::PythonScan { options } if options.python_source == PythonScanSource::IOPlugin);120};121122self.schemas.clear();123while let Some((current_expr_node, schema_idx)) = self.scratch.pop() {124let expr = unsafe { expr_arena.get_unchecked(current_expr_node) };125126if expr.is_leaf() {127continue;128}129130// Evaluation expressions still need to do rules on the evaluation expression but the131// schema is not the same and it is not concluded in the inputs.132if let AExpr::Eval {133expr,134evaluation,135variant,136} = expr137{138let schema = if schema_idx == 0 {139&schema140} else {141&self.schemas[schema_idx - 1]142};143let expr = expr_arena144.get(*expr)145.to_dtype(&ToFieldContext::new(expr_arena, schema))?;146147let element_dtype = variant.element_dtype(&expr)?;148let mut schema = schema.clone();149schema.insert(get_pl_element_name(), element_dtype.clone());150self.schemas.push(schema);151self.scratch.push((*evaluation, self.schemas.len()));152}153154// Similar for StructEval155// Effectively, we are mimicking in-order processing traversal logic (left > parent > right).156#[cfg(feature = "dtype-struct")]157if let AExpr::StructEval { expr, evaluation } = expr {158let schema = if schema_idx == 0 {159&schema160} else {161&self.schemas[schema_idx - 1]162};163let struct_dtype = expr_arena164.get(*expr)165.to_dtype(&ToFieldContext::new(expr_arena, schema))?;166167let mut schema = schema.clone();168schema.insert(get_pl_structfields_name(), struct_dtype);169self.schemas.push(schema);170for e in evaluation {171self.scratch.push((e.node(), self.schemas.len()))172}173}174175let schema = if schema_idx == 0 {176&schema177} else {178&self.schemas[schema_idx - 1]179};180181if let Some(rule) = &mut self.simplify {182while let Some(x) =183rule.optimize_expr(expr_arena, current_expr_node, schema, ctx)?184{185expr_arena.replace(current_expr_node, x);186}187}188if let Some(rule) = &mut self.coerce {189while let Some(x) =190rule.optimize_expr(expr_arena, current_expr_node, schema, ctx)?191{192expr_arena.replace(current_expr_node, x);193}194}195196let expr = unsafe { expr_arena.get_unchecked(current_expr_node) };197// traverse subexpressions and add to the stack198expr.inputs_rev_strict(&mut ExtendVec {199out: &mut self.scratch,200schema_idx,201});202}203204Ok(())205}206}207208209