Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/plans/optimizer/join_utils.rs
6940 views
1
use polars_core::error::{PolarsResult, polars_bail};
2
use polars_core::schema::*;
3
use polars_utils::arena::{Arena, Node};
4
use polars_utils::pl_str::PlSmallStr;
5
6
use super::{AExpr, aexpr_to_leaf_names_iter};
7
use crate::plans::visitor::{AexprNode, RewriteRecursion, RewritingVisitor, TreeWalker};
8
use crate::plans::{ExprIR, OutputName};
9
10
/// Join origin of an expression
11
#[derive(Debug, Clone, PartialEq, Copy)]
12
#[repr(u8)]
13
pub(crate) enum ExprOrigin {
14
// Note: BitOr is implemented on this struct that relies on this exact u8
15
// repr layout (i.e. treated as a bitfield).
16
//
17
/// Utilizes no columns
18
None = 0b00,
19
/// Utilizes columns from the left side of the join
20
Left = 0b10,
21
/// Utilizes columns from the right side of the join
22
Right = 0b01,
23
/// Utilizes columns from both sides of the join
24
#[expect(unused)]
25
Both = 0b11,
26
}
27
28
impl ExprOrigin {
29
/// Errors with ColumnNotFound if a column cannot be found on either side.
30
///
31
/// The origin of coalesced join columns will be `Left`, except for right-joins.
32
/// For coalescing right-joins, `is_coalesced_to_right` must be passed to
33
/// properly identify the origin.
34
pub(crate) fn get_expr_origin(
35
root: Node,
36
expr_arena: &Arena<AExpr>,
37
left_schema: &Schema,
38
right_schema: &Schema,
39
suffix: &str,
40
// On a coalescing right-join this needs to be passed to properly identify
41
// the origin of right table columns.
42
is_coalesced_to_right: Option<&dyn Fn(&str) -> bool>,
43
) -> PolarsResult<ExprOrigin> {
44
aexpr_to_leaf_names_iter(root, expr_arena).try_fold(
45
ExprOrigin::None,
46
|acc_origin, column_name| {
47
Ok(acc_origin
48
| Self::get_column_origin(
49
&column_name,
50
left_schema,
51
right_schema,
52
suffix,
53
is_coalesced_to_right,
54
)?)
55
},
56
)
57
}
58
59
/// Errors with ColumnNotFound if a column cannot be found on either side.
60
///
61
/// The origin of coalesced join columns will be `Left`, except for right-joins.
62
/// For coalescing right-joins, `is_coalesced_to_right` must be passed to
63
/// properly identify the origin.
64
pub(crate) fn get_column_origin(
65
column_name: &str,
66
left_schema: &Schema,
67
right_schema: &Schema,
68
suffix: &str,
69
is_coalesced_to_right: Option<&dyn Fn(&str) -> bool>,
70
) -> PolarsResult<ExprOrigin> {
71
Ok(
72
if left_schema.contains(column_name)
73
&& !is_coalesced_to_right.is_some_and(|f| f(column_name))
74
{
75
ExprOrigin::Left
76
} else if right_schema.contains(column_name)
77
|| column_name
78
.strip_suffix(suffix)
79
.is_some_and(|x| right_schema.contains(x))
80
{
81
ExprOrigin::Right
82
} else {
83
polars_bail!(ColumnNotFound: "{}", column_name)
84
},
85
)
86
}
87
}
88
89
impl std::ops::BitOr for ExprOrigin {
90
type Output = ExprOrigin;
91
92
fn bitor(self, rhs: Self) -> Self::Output {
93
unsafe { std::mem::transmute::<u8, ExprOrigin>(self as u8 | rhs as u8) }
94
}
95
}
96
97
impl std::ops::BitOrAssign for ExprOrigin {
98
fn bitor_assign(&mut self, rhs: Self) {
99
*self = *self | rhs;
100
}
101
}
102
103
pub(super) fn remove_suffix<'a>(
104
expr: &mut ExprIR,
105
expr_arena: &mut Arena<AExpr>,
106
schema_rhs: &'a Schema,
107
suffix: &'a str,
108
) {
109
let schema = schema_rhs;
110
// Using AexprNode::rewrite() ensures we do not mutate any nodes in-place. The nodes may be
111
// used in other locations and mutating them will cause really confusing bugs, such as
112
// https://github.com/pola-rs/polars/issues/20831.
113
let node = AexprNode::new(expr.node())
114
.rewrite(&mut RemoveSuffix { schema, suffix }, expr_arena)
115
.unwrap()
116
.node();
117
118
expr.set_node(node);
119
120
if let OutputName::ColumnLhs(colname) = expr.output_name_inner() {
121
if colname.ends_with(suffix) && !schema.contains(colname.as_str()) {
122
let name = PlSmallStr::from(&colname[..colname.len() - suffix.len()]);
123
expr.set_columnlhs(name);
124
}
125
}
126
127
struct RemoveSuffix<'a> {
128
schema: &'a Schema,
129
suffix: &'a str,
130
}
131
132
impl RewritingVisitor for RemoveSuffix<'_> {
133
type Node = AexprNode;
134
type Arena = Arena<AExpr>;
135
136
fn pre_visit(
137
&mut self,
138
node: &Self::Node,
139
arena: &mut Self::Arena,
140
) -> polars_core::prelude::PolarsResult<crate::prelude::visitor::RewriteRecursion> {
141
let AExpr::Column(colname) = arena.get(node.node()) else {
142
return Ok(RewriteRecursion::NoMutateAndContinue);
143
};
144
145
if !colname.ends_with(self.suffix) || self.schema.contains(colname.as_str()) {
146
return Ok(RewriteRecursion::NoMutateAndContinue);
147
}
148
149
Ok(RewriteRecursion::MutateAndContinue)
150
}
151
152
fn mutate(
153
&mut self,
154
node: Self::Node,
155
arena: &mut Self::Arena,
156
) -> polars_core::prelude::PolarsResult<Self::Node> {
157
let AExpr::Column(colname) = arena.get(node.node()) else {
158
unreachable!();
159
};
160
161
// Safety: Checked in pre_visit()
162
Ok(AexprNode::new(arena.add(AExpr::Column(PlSmallStr::from(
163
&colname[..colname.len() - self.suffix.len()],
164
)))))
165
}
166
}
167
}
168
169
pub(super) fn split_suffix<'a>(name: &'a str, suffix: &str) -> &'a str {
170
let (original, _) = name.split_at(name.len() - suffix.len());
171
original
172
}
173
174