Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-plan/src/plans/optimizer/collect_members.rs
6940 views
1
use std::hash::BuildHasher;
2
3
use super::*;
4
5
// Utility to cheaply check if we have duplicate sources.
6
// This may have false positives.
7
#[cfg(feature = "cse")]
8
#[derive(Default)]
9
struct UniqueScans {
10
ids: PlHashSet<u64>,
11
count: usize,
12
}
13
14
#[cfg(feature = "cse")]
15
impl UniqueScans {
16
fn insert(&mut self, node: Node, lp_arena: &Arena<IR>, expr_arena: &Arena<AExpr>) {
17
let alp_node = IRNode::new(node);
18
self.ids.insert(
19
self.ids
20
.hasher()
21
.hash_one(alp_node.hashable_and_cmp(lp_arena, expr_arena)),
22
);
23
self.count += 1;
24
}
25
}
26
27
pub(super) struct MemberCollector {
28
pub(crate) has_joins_or_unions: bool,
29
pub(crate) has_sink_multiple: bool,
30
pub(crate) has_cache: bool,
31
pub(crate) has_ext_context: bool,
32
pub(crate) has_filter_with_join_input: bool,
33
pub(crate) has_distinct: bool,
34
pub(crate) has_sort: bool,
35
pub(crate) has_group_by: bool,
36
#[cfg(feature = "cse")]
37
scans: UniqueScans,
38
}
39
40
impl MemberCollector {
41
pub(super) fn new() -> Self {
42
Self {
43
has_joins_or_unions: false,
44
has_sink_multiple: false,
45
has_cache: false,
46
has_ext_context: false,
47
has_filter_with_join_input: false,
48
has_distinct: false,
49
has_sort: false,
50
has_group_by: false,
51
#[cfg(feature = "cse")]
52
scans: UniqueScans::default(),
53
}
54
}
55
pub(super) fn collect(&mut self, root: Node, lp_arena: &Arena<IR>, _expr_arena: &Arena<AExpr>) {
56
use IR::*;
57
for (_node, alp) in lp_arena.iter(root) {
58
match alp {
59
SinkMultiple { .. } => self.has_sink_multiple = true,
60
Join { .. } | Union { .. } => self.has_joins_or_unions = true,
61
Filter { input, .. } => {
62
self.has_filter_with_join_input |= matches!(lp_arena.get(*input), Join { options, .. } if options.args.how.is_cross())
63
},
64
Distinct { .. } => {
65
self.has_distinct = true;
66
},
67
GroupBy { .. } => {
68
self.has_group_by = true;
69
},
70
Sort { .. } => {
71
self.has_sort = true;
72
},
73
Cache { .. } => self.has_cache = true,
74
ExtContext { .. } => self.has_ext_context = true,
75
#[cfg(feature = "cse")]
76
Scan { .. } => {
77
self.scans.insert(_node, lp_arena, _expr_arena);
78
},
79
HConcat { .. } => {
80
self.has_joins_or_unions = true;
81
},
82
#[cfg(feature = "cse")]
83
DataFrameScan { .. } => {
84
self.scans.insert(_node, lp_arena, _expr_arena);
85
},
86
#[cfg(all(feature = "cse", feature = "python"))]
87
PythonScan { .. } => {
88
self.scans.insert(_node, lp_arena, _expr_arena);
89
},
90
_ => {},
91
}
92
}
93
}
94
95
#[cfg(feature = "cse")]
96
pub(super) fn has_duplicate_scans(&self) -> bool {
97
self.scans.count != self.scans.ids.len()
98
}
99
}
100
101