Path: blob/main/crates/polars-plan/src/plans/optimizer/join_utils.rs
8420 views
#![allow(unused)]1use polars_core::error::{PolarsResult, polars_bail};2use polars_core::schema::*;3use polars_utils::arena::{Arena, Node};4use polars_utils::pl_str::PlSmallStr;56use super::{AExpr, aexpr_to_leaf_names_iter};7use crate::plans::visitor::{AexprNode, RewriteRecursion, RewritingVisitor, TreeWalker};8use crate::plans::{ExprIR, OutputName};910/// Join origin of an expression11#[derive(Debug, Clone, PartialEq, Copy)]12#[repr(u8)]13pub(crate) enum ExprOrigin {14// Note: BitOr is implemented on this struct that relies on this exact u815// repr layout (i.e. treated as a bitfield).16//17/// Utilizes no columns18None = 0b00,19/// Utilizes columns from the left side of the join20Left = 0b10,21/// Utilizes columns from the right side of the join22Right = 0b01,23/// Utilizes columns from both sides of the join24#[expect(unused)]25Both = 0b11,26}2728impl ExprOrigin {29/// Errors with ColumnNotFound if a column cannot be found on either side.30///31/// Note, for right-joins an `is_coalesced_to_right` function must be passed32/// to properly identify coalesced key columns as originating from the `ExprOrigin::Right`.33/// Otherwise they will be identified as `ExprOrigin::Left`.34pub(crate) fn get_expr_origin(35root: Node,36expr_arena: &Arena<AExpr>,37left_schema: &Schema,38right_schema: &Schema,39suffix: &str,40// On a coalescing right-join this needs to be passed to properly identify41// the origin of right table columns.42is_coalesced_to_right: Option<&dyn Fn(&str) -> bool>,43) -> PolarsResult<ExprOrigin> {44aexpr_to_leaf_names_iter(root, expr_arena).try_fold(45ExprOrigin::None,46|acc_origin, column_name| {47Ok(acc_origin48| Self::get_column_origin(49column_name,50left_schema,51right_schema,52suffix,53is_coalesced_to_right,54)?)55},56)57}5859/// Errors with ColumnNotFound if a column cannot be found on either side.60///61/// Note, for right-joins an `is_coalesced_to_right` function must be passed62/// to properly identify coalesced key columns as originating from the `ExprOrigin::Right`.63/// Otherwise they will be identified as `ExprOrigin::Left`.64pub(crate) fn get_column_origin(65column_name: &str,66left_schema: &Schema,67right_schema: &Schema,68suffix: &str,69is_coalesced_to_right: Option<&dyn Fn(&str) -> bool>,70) -> PolarsResult<ExprOrigin> {71Ok(72if left_schema.contains(column_name)73&& !is_coalesced_to_right.is_some_and(|f| f(column_name))74{75ExprOrigin::Left76} else if right_schema.contains(column_name)77|| column_name78.strip_suffix(suffix)79.is_some_and(|x| right_schema.contains(x))80{81ExprOrigin::Right82} else {83polars_bail!(ColumnNotFound: "{column_name}")84},85)86}87}8889impl std::ops::BitOr for ExprOrigin {90type Output = ExprOrigin;9192fn bitor(self, rhs: Self) -> Self::Output {93unsafe { std::mem::transmute::<u8, ExprOrigin>(self as u8 | rhs as u8) }94}95}9697impl std::ops::BitOrAssign for ExprOrigin {98fn bitor_assign(&mut self, rhs: Self) {99*self = *self | rhs;100}101}102103pub(super) fn remove_suffix<'a>(104expr: &mut ExprIR,105expr_arena: &mut Arena<AExpr>,106schema_rhs: &'a Schema,107suffix: &'a str,108) {109let schema = schema_rhs;110// Using AexprNode::rewrite() ensures we do not mutate any nodes in-place. The nodes may be111// used in other locations and mutating them will cause really confusing bugs, such as112// https://github.com/pola-rs/polars/issues/20831.113let node = AexprNode::new(expr.node())114.rewrite(&mut RemoveSuffix { schema, suffix }, expr_arena)115.unwrap()116.node();117118expr.set_node(node);119120if let OutputName::ColumnLhs(colname) = expr.output_name_inner() {121if colname.ends_with(suffix) && !schema.contains(colname.as_str()) {122let name = PlSmallStr::from(&colname[..colname.len() - suffix.len()]);123expr.set_columnlhs(name);124}125}126127struct RemoveSuffix<'a> {128schema: &'a Schema,129suffix: &'a str,130}131132impl RewritingVisitor for RemoveSuffix<'_> {133type Node = AexprNode;134type Arena = Arena<AExpr>;135136fn pre_visit(137&mut self,138node: &Self::Node,139arena: &mut Self::Arena,140) -> polars_core::prelude::PolarsResult<crate::prelude::visitor::RewriteRecursion> {141let AExpr::Column(colname) = arena.get(node.node()) else {142return Ok(RewriteRecursion::NoMutateAndContinue);143};144145if !colname.ends_with(self.suffix) || self.schema.contains(colname.as_str()) {146return Ok(RewriteRecursion::NoMutateAndContinue);147}148149Ok(RewriteRecursion::MutateAndContinue)150}151152fn mutate(153&mut self,154node: Self::Node,155arena: &mut Self::Arena,156) -> polars_core::prelude::PolarsResult<Self::Node> {157let AExpr::Column(colname) = arena.get(node.node()) else {158unreachable!();159};160161// Safety: Checked in pre_visit()162Ok(AexprNode::new(arena.add(AExpr::Column(PlSmallStr::from(163&colname[..colname.len() - self.suffix.len()],164)))))165}166}167}168169170