Path: blob/main/crates/polars-plan/src/plans/optimizer/collect_members.rs
6940 views
use std::hash::BuildHasher;12use super::*;34// Utility to cheaply check if we have duplicate sources.5// This may have false positives.6#[cfg(feature = "cse")]7#[derive(Default)]8struct UniqueScans {9ids: PlHashSet<u64>,10count: usize,11}1213#[cfg(feature = "cse")]14impl UniqueScans {15fn insert(&mut self, node: Node, lp_arena: &Arena<IR>, expr_arena: &Arena<AExpr>) {16let alp_node = IRNode::new(node);17self.ids.insert(18self.ids19.hasher()20.hash_one(alp_node.hashable_and_cmp(lp_arena, expr_arena)),21);22self.count += 1;23}24}2526pub(super) struct MemberCollector {27pub(crate) has_joins_or_unions: bool,28pub(crate) has_sink_multiple: bool,29pub(crate) has_cache: bool,30pub(crate) has_ext_context: bool,31pub(crate) has_filter_with_join_input: bool,32pub(crate) has_distinct: bool,33pub(crate) has_sort: bool,34pub(crate) has_group_by: bool,35#[cfg(feature = "cse")]36scans: UniqueScans,37}3839impl MemberCollector {40pub(super) fn new() -> Self {41Self {42has_joins_or_unions: false,43has_sink_multiple: false,44has_cache: false,45has_ext_context: false,46has_filter_with_join_input: false,47has_distinct: false,48has_sort: false,49has_group_by: false,50#[cfg(feature = "cse")]51scans: UniqueScans::default(),52}53}54pub(super) fn collect(&mut self, root: Node, lp_arena: &Arena<IR>, _expr_arena: &Arena<AExpr>) {55use IR::*;56for (_node, alp) in lp_arena.iter(root) {57match alp {58SinkMultiple { .. } => self.has_sink_multiple = true,59Join { .. } | Union { .. } => self.has_joins_or_unions = true,60Filter { input, .. } => {61self.has_filter_with_join_input |= matches!(lp_arena.get(*input), Join { options, .. } if options.args.how.is_cross())62},63Distinct { .. } => {64self.has_distinct = true;65},66GroupBy { .. } => {67self.has_group_by = true;68},69Sort { .. } => {70self.has_sort = true;71},72Cache { .. } => self.has_cache = true,73ExtContext { .. } => self.has_ext_context = true,74#[cfg(feature = "cse")]75Scan { .. } => {76self.scans.insert(_node, lp_arena, _expr_arena);77},78HConcat { .. } => {79self.has_joins_or_unions = true;80},81#[cfg(feature = "cse")]82DataFrameScan { .. } => {83self.scans.insert(_node, lp_arena, _expr_arena);84},85#[cfg(all(feature = "cse", feature = "python"))]86PythonScan { .. } => {87self.scans.insert(_node, lp_arena, _expr_arena);88},89_ => {},90}91}92}9394#[cfg(feature = "cse")]95pub(super) fn has_duplicate_scans(&self) -> bool {96self.scans.count != self.scans.ids.len()97}98}99100101