Path: blob/main/crates/polars-plan/src/plans/optimizer/join_utils.rs
6940 views
use polars_core::error::{PolarsResult, polars_bail};1use polars_core::schema::*;2use polars_utils::arena::{Arena, Node};3use polars_utils::pl_str::PlSmallStr;45use super::{AExpr, aexpr_to_leaf_names_iter};6use crate::plans::visitor::{AexprNode, RewriteRecursion, RewritingVisitor, TreeWalker};7use crate::plans::{ExprIR, OutputName};89/// Join origin of an expression10#[derive(Debug, Clone, PartialEq, Copy)]11#[repr(u8)]12pub(crate) enum ExprOrigin {13// Note: BitOr is implemented on this struct that relies on this exact u814// repr layout (i.e. treated as a bitfield).15//16/// Utilizes no columns17None = 0b00,18/// Utilizes columns from the left side of the join19Left = 0b10,20/// Utilizes columns from the right side of the join21Right = 0b01,22/// Utilizes columns from both sides of the join23#[expect(unused)]24Both = 0b11,25}2627impl ExprOrigin {28/// Errors with ColumnNotFound if a column cannot be found on either side.29///30/// The origin of coalesced join columns will be `Left`, except for right-joins.31/// For coalescing right-joins, `is_coalesced_to_right` must be passed to32/// properly identify the origin.33pub(crate) fn get_expr_origin(34root: Node,35expr_arena: &Arena<AExpr>,36left_schema: &Schema,37right_schema: &Schema,38suffix: &str,39// On a coalescing right-join this needs to be passed to properly identify40// the origin of right table columns.41is_coalesced_to_right: Option<&dyn Fn(&str) -> bool>,42) -> PolarsResult<ExprOrigin> {43aexpr_to_leaf_names_iter(root, expr_arena).try_fold(44ExprOrigin::None,45|acc_origin, column_name| {46Ok(acc_origin47| Self::get_column_origin(48&column_name,49left_schema,50right_schema,51suffix,52is_coalesced_to_right,53)?)54},55)56}5758/// Errors with ColumnNotFound if a column cannot be found on either side.59///60/// The origin of coalesced join columns will be `Left`, except for right-joins.61/// For coalescing right-joins, `is_coalesced_to_right` must be passed to62/// properly identify the origin.63pub(crate) fn get_column_origin(64column_name: &str,65left_schema: &Schema,66right_schema: &Schema,67suffix: &str,68is_coalesced_to_right: Option<&dyn Fn(&str) -> bool>,69) -> PolarsResult<ExprOrigin> {70Ok(71if left_schema.contains(column_name)72&& !is_coalesced_to_right.is_some_and(|f| f(column_name))73{74ExprOrigin::Left75} else if right_schema.contains(column_name)76|| column_name77.strip_suffix(suffix)78.is_some_and(|x| right_schema.contains(x))79{80ExprOrigin::Right81} else {82polars_bail!(ColumnNotFound: "{}", column_name)83},84)85}86}8788impl std::ops::BitOr for ExprOrigin {89type Output = ExprOrigin;9091fn bitor(self, rhs: Self) -> Self::Output {92unsafe { std::mem::transmute::<u8, ExprOrigin>(self as u8 | rhs as u8) }93}94}9596impl std::ops::BitOrAssign for ExprOrigin {97fn bitor_assign(&mut self, rhs: Self) {98*self = *self | rhs;99}100}101102pub(super) fn remove_suffix<'a>(103expr: &mut ExprIR,104expr_arena: &mut Arena<AExpr>,105schema_rhs: &'a Schema,106suffix: &'a str,107) {108let schema = schema_rhs;109// Using AexprNode::rewrite() ensures we do not mutate any nodes in-place. The nodes may be110// used in other locations and mutating them will cause really confusing bugs, such as111// https://github.com/pola-rs/polars/issues/20831.112let node = AexprNode::new(expr.node())113.rewrite(&mut RemoveSuffix { schema, suffix }, expr_arena)114.unwrap()115.node();116117expr.set_node(node);118119if let OutputName::ColumnLhs(colname) = expr.output_name_inner() {120if colname.ends_with(suffix) && !schema.contains(colname.as_str()) {121let name = PlSmallStr::from(&colname[..colname.len() - suffix.len()]);122expr.set_columnlhs(name);123}124}125126struct RemoveSuffix<'a> {127schema: &'a Schema,128suffix: &'a str,129}130131impl RewritingVisitor for RemoveSuffix<'_> {132type Node = AexprNode;133type Arena = Arena<AExpr>;134135fn pre_visit(136&mut self,137node: &Self::Node,138arena: &mut Self::Arena,139) -> polars_core::prelude::PolarsResult<crate::prelude::visitor::RewriteRecursion> {140let AExpr::Column(colname) = arena.get(node.node()) else {141return Ok(RewriteRecursion::NoMutateAndContinue);142};143144if !colname.ends_with(self.suffix) || self.schema.contains(colname.as_str()) {145return Ok(RewriteRecursion::NoMutateAndContinue);146}147148Ok(RewriteRecursion::MutateAndContinue)149}150151fn mutate(152&mut self,153node: Self::Node,154arena: &mut Self::Arena,155) -> polars_core::prelude::PolarsResult<Self::Node> {156let AExpr::Column(colname) = arena.get(node.node()) else {157unreachable!();158};159160// Safety: Checked in pre_visit()161Ok(AexprNode::new(arena.add(AExpr::Column(PlSmallStr::from(162&colname[..colname.len() - self.suffix.len()],163)))))164}165}166}167168pub(super) fn split_suffix<'a>(name: &'a str, suffix: &str) -> &'a str {169let (original, _) = name.split_at(name.len() - suffix.len());170original171}172173174