Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/plans/optimizer/join_utils.rs
8420 views
1
#![allow(unused)]
2
use polars_core::error::{PolarsResult, polars_bail};
3
use polars_core::schema::*;
4
use polars_utils::arena::{Arena, Node};
5
use polars_utils::pl_str::PlSmallStr;
6
7
use super::{AExpr, aexpr_to_leaf_names_iter};
8
use crate::plans::visitor::{AexprNode, RewriteRecursion, RewritingVisitor, TreeWalker};
9
use crate::plans::{ExprIR, OutputName};
10
11
/// Join origin of an expression
12
#[derive(Debug, Clone, PartialEq, Copy)]
13
#[repr(u8)]
14
pub(crate) enum ExprOrigin {
15
// Note: BitOr is implemented on this struct that relies on this exact u8
16
// repr layout (i.e. treated as a bitfield).
17
//
18
/// Utilizes no columns
19
None = 0b00,
20
/// Utilizes columns from the left side of the join
21
Left = 0b10,
22
/// Utilizes columns from the right side of the join
23
Right = 0b01,
24
/// Utilizes columns from both sides of the join
25
#[expect(unused)]
26
Both = 0b11,
27
}
28
29
impl ExprOrigin {
30
/// Errors with ColumnNotFound if a column cannot be found on either side.
31
///
32
/// Note, for right-joins an `is_coalesced_to_right` function must be passed
33
/// to properly identify coalesced key columns as originating from the `ExprOrigin::Right`.
34
/// Otherwise they will be identified as `ExprOrigin::Left`.
35
pub(crate) fn get_expr_origin(
36
root: Node,
37
expr_arena: &Arena<AExpr>,
38
left_schema: &Schema,
39
right_schema: &Schema,
40
suffix: &str,
41
// On a coalescing right-join this needs to be passed to properly identify
42
// the origin of right table columns.
43
is_coalesced_to_right: Option<&dyn Fn(&str) -> bool>,
44
) -> PolarsResult<ExprOrigin> {
45
aexpr_to_leaf_names_iter(root, expr_arena).try_fold(
46
ExprOrigin::None,
47
|acc_origin, column_name| {
48
Ok(acc_origin
49
| Self::get_column_origin(
50
column_name,
51
left_schema,
52
right_schema,
53
suffix,
54
is_coalesced_to_right,
55
)?)
56
},
57
)
58
}
59
60
/// Errors with ColumnNotFound if a column cannot be found on either side.
61
///
62
/// Note, for right-joins an `is_coalesced_to_right` function must be passed
63
/// to properly identify coalesced key columns as originating from the `ExprOrigin::Right`.
64
/// Otherwise they will be identified as `ExprOrigin::Left`.
65
pub(crate) fn get_column_origin(
66
column_name: &str,
67
left_schema: &Schema,
68
right_schema: &Schema,
69
suffix: &str,
70
is_coalesced_to_right: Option<&dyn Fn(&str) -> bool>,
71
) -> PolarsResult<ExprOrigin> {
72
Ok(
73
if left_schema.contains(column_name)
74
&& !is_coalesced_to_right.is_some_and(|f| f(column_name))
75
{
76
ExprOrigin::Left
77
} else if right_schema.contains(column_name)
78
|| column_name
79
.strip_suffix(suffix)
80
.is_some_and(|x| right_schema.contains(x))
81
{
82
ExprOrigin::Right
83
} else {
84
polars_bail!(ColumnNotFound: "{column_name}")
85
},
86
)
87
}
88
}
89
90
impl std::ops::BitOr for ExprOrigin {
91
type Output = ExprOrigin;
92
93
fn bitor(self, rhs: Self) -> Self::Output {
94
unsafe { std::mem::transmute::<u8, ExprOrigin>(self as u8 | rhs as u8) }
95
}
96
}
97
98
impl std::ops::BitOrAssign for ExprOrigin {
99
fn bitor_assign(&mut self, rhs: Self) {
100
*self = *self | rhs;
101
}
102
}
103
104
pub(super) fn remove_suffix<'a>(
105
expr: &mut ExprIR,
106
expr_arena: &mut Arena<AExpr>,
107
schema_rhs: &'a Schema,
108
suffix: &'a str,
109
) {
110
let schema = schema_rhs;
111
// Using AexprNode::rewrite() ensures we do not mutate any nodes in-place. The nodes may be
112
// used in other locations and mutating them will cause really confusing bugs, such as
113
// https://github.com/pola-rs/polars/issues/20831.
114
let node = AexprNode::new(expr.node())
115
.rewrite(&mut RemoveSuffix { schema, suffix }, expr_arena)
116
.unwrap()
117
.node();
118
119
expr.set_node(node);
120
121
if let OutputName::ColumnLhs(colname) = expr.output_name_inner() {
122
if colname.ends_with(suffix) && !schema.contains(colname.as_str()) {
123
let name = PlSmallStr::from(&colname[..colname.len() - suffix.len()]);
124
expr.set_columnlhs(name);
125
}
126
}
127
128
struct RemoveSuffix<'a> {
129
schema: &'a Schema,
130
suffix: &'a str,
131
}
132
133
impl RewritingVisitor for RemoveSuffix<'_> {
134
type Node = AexprNode;
135
type Arena = Arena<AExpr>;
136
137
fn pre_visit(
138
&mut self,
139
node: &Self::Node,
140
arena: &mut Self::Arena,
141
) -> polars_core::prelude::PolarsResult<crate::prelude::visitor::RewriteRecursion> {
142
let AExpr::Column(colname) = arena.get(node.node()) else {
143
return Ok(RewriteRecursion::NoMutateAndContinue);
144
};
145
146
if !colname.ends_with(self.suffix) || self.schema.contains(colname.as_str()) {
147
return Ok(RewriteRecursion::NoMutateAndContinue);
148
}
149
150
Ok(RewriteRecursion::MutateAndContinue)
151
}
152
153
fn mutate(
154
&mut self,
155
node: Self::Node,
156
arena: &mut Self::Arena,
157
) -> polars_core::prelude::PolarsResult<Self::Node> {
158
let AExpr::Column(colname) = arena.get(node.node()) else {
159
unreachable!();
160
};
161
162
// Safety: Checked in pre_visit()
163
Ok(AexprNode::new(arena.add(AExpr::Column(PlSmallStr::from(
164
&colname[..colname.len() - self.suffix.len()],
165
)))))
166
}
167
}
168
}
169
170